diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
index 0f3b3838de1b2..1768e682b3db4 100644
--- a/asv_bench/benchmarks/algorithms.py
+++ b/asv_bench/benchmarks/algorithms.py
@@ -31,83 +31,62 @@ def time_maybe_convert_objects(self):
class Factorize:
- params = [[True, False], ["int", "uint", "float", "string"]]
- param_names = ["sort", "dtype"]
-
- def setup(self, sort, dtype):
- N = 10 ** 5
- data = {
- "int": pd.Int64Index(np.arange(N).repeat(5)),
- "uint": pd.UInt64Index(np.arange(N).repeat(5)),
- "float": pd.Float64Index(np.random.randn(N).repeat(5)),
- "string": tm.makeStringIndex(N).repeat(5),
- }
- self.idx = data[dtype]
-
- def time_factorize(self, sort, dtype):
- self.idx.factorize(sort=sort)
-
-
-class FactorizeUnique:
-
- params = [[True, False], ["int", "uint", "float", "string"]]
- param_names = ["sort", "dtype"]
+ params = [
+ [True, False],
+ [True, False],
+ ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
+ ]
+ param_names = ["unique", "sort", "dtype"]
- def setup(self, sort, dtype):
+ def setup(self, unique, sort, dtype):
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N)),
"uint": pd.UInt64Index(np.arange(N)),
- "float": pd.Float64Index(np.arange(N)),
+ "float": pd.Float64Index(np.random.randn(N)),
"string": tm.makeStringIndex(N),
- }
- self.idx = data[dtype]
- assert self.idx.is_unique
-
- def time_factorize(self, sort, dtype):
+ "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
+ "datetime64[ns, tz]": pd.date_range(
+ "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+ ),
+ }[dtype]
+ if not unique:
+ data = data.repeat(5)
+ self.idx = data
+
+ def time_factorize(self, unique, sort, dtype):
self.idx.factorize(sort=sort)
class Duplicated:
- params = [["first", "last", False], ["int", "uint", "float", "string"]]
- param_names = ["keep", "dtype"]
-
- def setup(self, keep, dtype):
- N = 10 ** 5
- data = {
- "int": pd.Int64Index(np.arange(N).repeat(5)),
- "uint": pd.UInt64Index(np.arange(N).repeat(5)),
- "float": pd.Float64Index(np.random.randn(N).repeat(5)),
- "string": tm.makeStringIndex(N).repeat(5),
- }
- self.idx = data[dtype]
- # cache is_unique
- self.idx.is_unique
-
- def time_duplicated(self, keep, dtype):
- self.idx.duplicated(keep=keep)
-
-
-class DuplicatedUniqueIndex:
-
- params = ["int", "uint", "float", "string"]
- param_names = ["dtype"]
+ params = [
+ [True, False],
+ ["first", "last", False],
+ ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
+ ]
+ param_names = ["unique", "keep", "dtype"]
- def setup(self, dtype):
+ def setup(self, unique, keep, dtype):
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N)),
"uint": pd.UInt64Index(np.arange(N)),
"float": pd.Float64Index(np.random.randn(N)),
"string": tm.makeStringIndex(N),
- }
- self.idx = data[dtype]
+ "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
+ "datetime64[ns, tz]": pd.date_range(
+ "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+ ),
+ }[dtype]
+ if not unique:
+ data = data.repeat(5)
+ self.idx = data
# cache is_unique
self.idx.is_unique
- def time_duplicated_unique(self, dtype):
- self.idx.duplicated()
+ def time_duplicated(self, unique, keep, dtype):
+ self.idx.duplicated(keep=keep)
class Hashing:
diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 6f43a6fd3fc9b..107b9b9edcd5d 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -258,9 +258,6 @@ def setup(self):
def time_get_loc(self):
self.index.get_loc(self.category)
- def time_shape(self):
- self.index.shape
-
def time_shallow_copy(self):
self.index._shallow_copy()
diff --git a/asv_bench/benchmarks/index_cached_properties.py b/asv_bench/benchmarks/index_cached_properties.py
index 13b33855569c9..16fbc741775e4 100644
--- a/asv_bench/benchmarks/index_cached_properties.py
+++ b/asv_bench/benchmarks/index_cached_properties.py
@@ -7,6 +7,7 @@ class IndexCache:
params = [
[
+ "CategoricalIndex",
"DatetimeIndex",
"Float64Index",
"IntervalIndex",
@@ -42,6 +43,8 @@ def setup(self, index_type):
self.idx = pd.Float64Index(range(N))
elif index_type == "UInt64Index":
self.idx = pd.UInt64Index(range(N))
+ elif index_type == "CategoricalIndex":
+ self.idx = pd.CategoricalIndex(range(N), range(N))
else:
raise ValueError
assert len(self.idx) == N
diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py
index cf51a4d35f805..b242de6a17208 100644
--- a/asv_bench/benchmarks/index_object.py
+++ b/asv_bench/benchmarks/index_object.py
@@ -55,14 +55,6 @@ def time_datetime_difference_disjoint(self):
self.datetime_left.difference(self.datetime_right)
-class Datetime:
- def setup(self):
- self.dr = date_range("20000101", freq="D", periods=10000)
-
- def time_is_dates_only(self):
- self.dr._is_dates_only
-
-
class Range:
def setup(self):
self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3)
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
index 087fe3916845b..e98d2948e76ea 100644
--- a/asv_bench/benchmarks/indexing.py
+++ b/asv_bench/benchmarks/indexing.py
@@ -1,3 +1,8 @@
+"""
+These benchmarks are for Series and DataFrame indexing methods. For the
+lower-level methods directly on Index and subclasses, see index_object.py,
+indexing_engine.py, and index_cached.py
+"""
import warnings
import numpy as np
diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py
index b52aa2e55af35..e15d4c66e4fc0 100644
--- a/asv_bench/benchmarks/period.py
+++ b/asv_bench/benchmarks/period.py
@@ -85,9 +85,6 @@ def setup(self):
def time_get_loc(self):
self.index.get_loc(self.period)
- def time_shape(self):
- self.index.shape
-
def time_shallow_copy(self):
self.index._shallow_copy()
diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py
index 208c8f9d14a5e..cfe05c3e257b1 100644
--- a/asv_bench/benchmarks/timedelta.py
+++ b/asv_bench/benchmarks/timedelta.py
@@ -73,9 +73,6 @@ def setup(self):
def time_get_loc(self):
self.index.get_loc(self.timedelta)
- def time_shape(self):
- self.index.shape
-
def time_shallow_copy(self):
self.index._shallow_copy()
diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index 2f7ea8b9c0873..6c9f8ee77e5ad 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -57,6 +57,9 @@ def time_to_date(self, index_type):
def time_to_pydatetime(self, index_type):
self.index.to_pydatetime()
+ def time_is_dates_only(self, index_type):
+ self.index._is_dates_only
+
class TzLocalize:
@@ -91,20 +94,6 @@ def time_reest_datetimeindex(self, tz):
self.df.reset_index()
-class Factorize:
-
- params = [None, "Asia/Tokyo"]
- param_names = "tz"
-
- def setup(self, tz):
- N = 100000
- self.dti = date_range("2011-01-01", freq="H", periods=N, tz=tz)
- self.dti = self.dti.repeat(5)
-
- def time_factorize(self, tz):
- self.dti.factorize()
-
-
class InferFreq:
params = [None, "D", "B"]
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index bb7d8a388e6e2..e2dc543360a62 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -269,7 +269,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
MSG='Doctests generic.py' ; echo $MSG
pytest -q --doctest-modules pandas/core/generic.py \
- -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard"
+ -k"-_set_axis_name -_xs -describe -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard"
RET=$(($RET + $?)) ; echo $MSG "DONE"
MSG='Doctests groupby.py' ; echo $MSG
diff --git a/ci/setup_env.sh b/ci/setup_env.sh
index e5bee09fe2f79..ae39b0dda5d09 100755
--- a/ci/setup_env.sh
+++ b/ci/setup_env.sh
@@ -50,7 +50,7 @@ echo
echo "update conda"
conda config --set ssl_verify false
conda config --set quiet true --set always_yes true --set changeps1 false
-conda install pip # create conda to create a historical artifact for pip & setuptools
+conda install pip conda # create conda to create a historical artifact for pip & setuptools
conda update -n base conda
echo "conda info -a"
diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index fb06ee122ae88..b7e53b84f0e02 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -56,6 +56,11 @@ joining paths, replacing file extensions, and checking if files exist are also a
Statistics and machine learning
-------------------------------
+`pandas-tfrecords `__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Easy saving pandas dataframe to tensorflow tfrecords format and reading tfrecords to pandas.
+
`Statsmodels `__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst
index 277080006cb3c..c6d9a48fcf8ed 100644
--- a/doc/source/getting_started/basics.rst
+++ b/doc/source/getting_started/basics.rst
@@ -689,6 +689,17 @@ of a 1D array of values. It can also be used as a function on regular arrays:
s.value_counts()
pd.value_counts(data)
+.. versionadded:: 1.1.0
+
+The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns.
+By default all columns are used but a subset can be selected using the ``subset`` argument.
+
+.. ipython:: python
+
+ data = {"a": [1, 2, 3, 4], "b": ["x", "x", "y", "y"]}
+ frame = pd.DataFrame(data)
+ frame.value_counts()
+
Similarly, you can get the most frequently occurring value(s) (the mode) of the values in a Series or DataFrame:
.. ipython:: python
diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index c7b1cc1c832be..b326bbb5a465e 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -170,6 +170,7 @@ Computations / descriptive stats
DataFrame.std
DataFrame.var
DataFrame.nunique
+ DataFrame.value_counts
Reindexing / selection / label manipulation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index a4c991dcc166c..888b7d23aeb35 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -43,7 +43,7 @@ Other enhancements
- :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`)
- When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`)
--
+- `OptionError` is now exposed in `pandas.errors` (:issue:`27553`)
-
.. ---------------------------------------------------------------------------
@@ -55,6 +55,7 @@ Other API changes
- :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last``
will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`)
+- Added :meth:`DataFrame.value_counts` (:issue:`5377`)
- :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`)
- ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`)
-
@@ -114,6 +115,7 @@ Datetimelike
- :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`)
- Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`)
- Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`)
+- :class:`Timestamp` raising confusing error message when year, month or day is missing (:issue:`31200`)
Timedelta
^^^^^^^^^
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index b8c462abe35f1..9f3b4a8a554b5 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -411,10 +411,25 @@ class Timestamp(_Timestamp):
)
elif ts_input is _no_input:
- # User passed keyword arguments.
- ts_input = datetime(year, month, day, hour or 0,
- minute or 0, second or 0,
- microsecond or 0)
+ # GH 31200
+ # When year, month or day is not given, we call the datetime
+ # constructor to make sure we get the same error message
+ # since Timestamp inherits datetime
+ datetime_kwargs = {
+ "hour": hour or 0,
+ "minute": minute or 0,
+ "second": second or 0,
+ "microsecond": microsecond or 0
+ }
+ if year is not None:
+ datetime_kwargs["year"] = year
+ if month is not None:
+ datetime_kwargs["month"] = month
+ if day is not None:
+ datetime_kwargs["day"] = day
+
+ ts_input = datetime(**datetime_kwargs)
+
elif is_integer_object(freq):
# User passed positional arguments:
# Timestamp(year, month, day[, hour[, minute[, second[,
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 854075eaa8d09..f637e16caa4c6 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -777,8 +777,10 @@ def searchsorted(self, value, side="left", sorter=None):
if isinstance(value, str):
try:
value = self._scalar_from_string(value)
- except ValueError:
- raise TypeError("searchsorted requires compatible dtype or scalar")
+ except ValueError as e:
+ raise TypeError(
+ "searchsorted requires compatible dtype or scalar"
+ ) from e
elif is_valid_nat_for_dtype(value, self.dtype):
value = NaT
@@ -1041,7 +1043,7 @@ def _validate_frequency(cls, index, freq, **kwargs):
raise ValueError(
f"Inferred frequency {inferred} from passed values "
f"does not conform to passed frequency {freq.freqstr}"
- )
+ ) from e
# monotonicity/uniqueness properties are called via frequencies.infer_freq,
# see GH#23789
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index b11736248c12a..f5167f470b056 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -725,45 +725,18 @@ def _concat_same_type(cls, to_concat):
right = np.concatenate([interval.right for interval in to_concat])
return cls._simple_new(left, right, closed=closed, copy=False)
- def _shallow_copy(self, left=None, right=None, closed=None):
+ def _shallow_copy(self, left, right):
"""
Return a new IntervalArray with the replacement attributes
Parameters
----------
- left : array-like
+ left : Index
Values to be used for the left-side of the intervals.
- If None, the existing left and right values will be used.
-
- right : array-like
+ right : Index
Values to be used for the right-side of the intervals.
- If None and left is IntervalArray-like, the left and right
- of the IntervalArray-like will be used.
-
- closed : {'left', 'right', 'both', 'neither'}, optional
- Whether the intervals are closed on the left-side, right-side, both
- or neither. If None, the existing closed will be used.
"""
- if left is None:
-
- # no values passed
- left, right = self.left, self.right
-
- elif right is None:
-
- # only single value passed, could be an IntervalArray
- # or array of Intervals
- if not isinstance(left, (type(self), ABCIntervalIndex)):
- left = type(self)(left)
-
- left, right = left.left, left.right
- else:
-
- # both left and right are values
- pass
-
- closed = closed or self.closed
- return self._simple_new(left, right, closed=closed, verify_integrity=False)
+ return self._simple_new(left, right, closed=self.closed, verify_integrity=False)
def copy(self):
"""
@@ -1035,7 +1008,9 @@ def set_closed(self, closed):
msg = f"invalid option for 'closed': {closed}"
raise ValueError(msg)
- return self._shallow_copy(closed=closed)
+ return type(self)._simple_new(
+ left=self.left, right=self.right, closed=closed, verify_integrity=False
+ )
@property
def length(self):
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 56d3596f71813..85424e35fa0e0 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1196,6 +1196,7 @@ def value_counts(
--------
Series.count: Number of non-NA elements in a Series.
DataFrame.count: Number of non-NA elements in a DataFrame.
+ DataFrame.value_counts: Equivalent method on DataFrames.
Examples
--------
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7efb4fbb878d6..b6b6a4fe74ed5 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -111,7 +111,7 @@
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences
from pandas.core.indexes.datetimes import DatetimeIndex
-from pandas.core.indexes.multi import maybe_droplevels
+from pandas.core.indexes.multi import MultiIndex, maybe_droplevels
from pandas.core.indexes.period import PeriodIndex
from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable
from pandas.core.internals import BlockManager
@@ -4569,6 +4569,10 @@ def drop_duplicates(
-------
DataFrame
DataFrame with duplicates removed or None if ``inplace=True``.
+
+ See Also
+ --------
+ DataFrame.value_counts: Count unique combinations of columns.
"""
if self.empty:
return self.copy()
@@ -4814,6 +4818,102 @@ def sort_index(
else:
return self._constructor(new_data).__finalize__(self)
+ def value_counts(
+ self,
+ subset: Optional[Sequence[Label]] = None,
+ normalize: bool = False,
+ sort: bool = True,
+ ascending: bool = False,
+ ):
+ """
+ Return a Series containing counts of unique rows in the DataFrame.
+
+ .. versionadded:: 1.1.0
+
+ Parameters
+ ----------
+ subset : list-like, optional
+ Columns to use when counting unique combinations.
+ normalize : bool, default False
+ Return proportions rather than frequencies.
+ sort : bool, default True
+ Sort by frequencies.
+ ascending : bool, default False
+ Sort in ascending order.
+
+ Returns
+ -------
+ Series
+
+ See Also
+ --------
+ Series.value_counts: Equivalent method on Series.
+
+ Notes
+ -----
+ The returned Series will have a MultiIndex with one level per input
+ column. By default, rows that contain any NA values are omitted from
+ the result. By default, the resulting Series will be in descending
+ order so that the first element is the most frequently-occurring row.
+
+ Examples
+ --------
+ >>> df = pd.DataFrame({'num_legs': [2, 4, 4, 6],
+ ... 'num_wings': [2, 0, 0, 0]},
+ ... index=['falcon', 'dog', 'cat', 'ant'])
+ >>> df
+ num_legs num_wings
+ falcon 2 2
+ dog 4 0
+ cat 4 0
+ ant 6 0
+
+ >>> df.value_counts()
+ num_legs num_wings
+ 4 0 2
+ 6 0 1
+ 2 2 1
+ dtype: int64
+
+ >>> df.value_counts(sort=False)
+ num_legs num_wings
+ 2 2 1
+ 4 0 2
+ 6 0 1
+ dtype: int64
+
+ >>> df.value_counts(ascending=True)
+ num_legs num_wings
+ 2 2 1
+ 6 0 1
+ 4 0 2
+ dtype: int64
+
+ >>> df.value_counts(normalize=True)
+ num_legs num_wings
+ 4 0 0.50
+ 6 0 0.25
+ 2 2 0.25
+ dtype: float64
+ """
+ if subset is None:
+ subset = self.columns.tolist()
+
+ counts = self.groupby(subset).size()
+
+ if sort:
+ counts = counts.sort_values(ascending=ascending)
+ if normalize:
+ counts /= counts.sum()
+
+ # Force MultiIndex for single column
+ if len(subset) == 1:
+ counts.index = MultiIndex.from_arrays(
+ [counts.index], names=[counts.index.name]
+ )
+
+ return counts
+
def nlargest(self, n, columns, keep="first") -> "DataFrame":
"""
Return the first `n` rows ordered by `columns` in descending order.
@@ -8346,9 +8446,8 @@ def isin(self, values) -> "DataFrame":
def _from_nested_dict(data):
# TODO: this should be seriously cythonized
- new_data = {}
+ new_data = collections.defaultdict(dict)
for index, s in data.items():
for col, v in s.items():
- new_data[col] = new_data.get(col, {})
new_data[col][index] = v
return new_data
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a6ab0d4034ddb..ff7c481d550d4 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -602,6 +602,10 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries:
of levels.
axis : {0 or 'index', 1 or 'columns'}, default 0
+ Axis along which the level(s) is removed:
+
+ * 0 or 'index': remove level(s) in column.
+ * 1 or 'columns': remove level(s) in row.
Returns
-------
@@ -617,7 +621,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries:
... ]).set_index([0, 1]).rename_axis(['a', 'b'])
>>> df.columns = pd.MultiIndex.from_tuples([
- ... ('c', 'e'), ('d', 'f')
+ ... ('c', 'e'), ('d', 'f')
... ], names=['level_1', 'level_2'])
>>> df
@@ -636,7 +640,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries:
6 7 8
10 11 12
- >>> df.droplevel('level2', axis=1)
+ >>> df.droplevel('level_2', axis=1)
level_1 c d
a b
1 2 3 4
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index aa22527d8c2d7..67f2f05c8af1e 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -8,7 +8,7 @@
from pandas._libs import algos as libalgos, index as libindex, lib
import pandas._libs.join as libjoin
-from pandas._libs.lib import is_datetime_array
+from pandas._libs.lib import is_datetime_array, no_default
from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp
from pandas._libs.tslibs.period import IncompatibleFrequency
from pandas._libs.tslibs.timezones import tz_compare
@@ -485,7 +485,7 @@ def _get_attributes_dict(self):
"""
return {k: getattr(self, k, None) for k in self._attributes}
- def _shallow_copy(self, values=None, **kwargs):
+ def _shallow_copy(self, values=None, name: Label = no_default):
"""
Create a new Index with the same class as the caller, don't copy the
data, use the same object attributes with passed in attributes taking
@@ -496,16 +496,14 @@ def _shallow_copy(self, values=None, **kwargs):
Parameters
----------
values : the values to create the new Index, optional
- kwargs : updates the default attributes for this Index
+ name : Label, defaults to self.name
"""
+ name = self.name if name is no_default else name
+
if values is None:
values = self.values
- attributes = self._get_attributes_dict()
-
- attributes.update(kwargs)
-
- return self._simple_new(values, **attributes)
+ return self._simple_new(values, name=name)
def _shallow_copy_with_infer(self, values, **kwargs):
"""
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index caa6a9a93141f..67bed7bd77c7f 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -7,6 +7,8 @@
from pandas._libs import index as libindex
from pandas._libs.hashtable import duplicated_int64
+from pandas._libs.lib import no_default
+from pandas._typing import Label
from pandas.util._decorators import Appender, cache_readonly
from pandas.core.dtypes.common import (
@@ -17,7 +19,6 @@
is_scalar,
)
from pandas.core.dtypes.dtypes import CategoricalDtype
-from pandas.core.dtypes.generic import ABCCategorical, ABCSeries
from pandas.core.dtypes.missing import isna
from pandas.core import accessor
@@ -193,7 +194,9 @@ def __new__(
raise cls._scalar_data_error(data)
data = []
- data = cls._create_categorical(data, dtype=dtype)
+ assert isinstance(dtype, CategoricalDtype), dtype
+ if not isinstance(data, Categorical) or data.dtype != dtype:
+ data = Categorical(data, dtype=dtype)
data = data.copy() if copy else data
@@ -223,37 +226,11 @@ def _create_from_codes(self, codes, dtype=None, name=None):
return CategoricalIndex(cat, name=name)
@classmethod
- def _create_categorical(cls, data, dtype=None):
- """
- *this is an internal non-public method*
-
- create the correct categorical from data and the properties
-
- Parameters
- ----------
- data : data for new Categorical
- dtype : CategoricalDtype, defaults to existing
-
- Returns
- -------
- Categorical
- """
- if isinstance(data, (cls, ABCSeries)) and is_categorical_dtype(data):
- data = data.values
-
- if not isinstance(data, ABCCategorical):
- return Categorical(data, dtype=dtype)
-
- if isinstance(dtype, CategoricalDtype) and dtype != data.dtype:
- # we want to silently ignore dtype='category'
- data = data._set_dtype(dtype)
- return data
-
- @classmethod
- def _simple_new(cls, values, name=None, dtype=None):
+ def _simple_new(cls, values: Categorical, name=None, dtype=None):
+ # GH#32204 dtype is included for compat with Index._simple_new
+ assert isinstance(values, Categorical), type(values)
result = object.__new__(cls)
- values = cls._create_categorical(values, dtype=dtype)
result._data = values
result.name = name
@@ -264,13 +241,14 @@ def _simple_new(cls, values, name=None, dtype=None):
# --------------------------------------------------------------------
@Appender(Index._shallow_copy.__doc__)
- def _shallow_copy(self, values=None, **kwargs):
+ def _shallow_copy(self, values=None, name: Label = no_default):
+ name = self.name if name is no_default else name
+
if values is None:
values = self.values
cat = Categorical(values, dtype=self.dtype)
- name = kwargs.get("name", self.name)
return type(self)._simple_new(cat, name=name)
def _is_dtype_compat(self, other) -> bool:
@@ -295,7 +273,8 @@ def _is_dtype_compat(self, other) -> bool:
values = other
if not is_list_like(values):
values = [values]
- other = CategoricalIndex(self._create_categorical(other, dtype=self.dtype))
+ cat = Categorical(other, dtype=self.dtype)
+ other = CategoricalIndex(cat)
if not other.isin(values).all():
raise TypeError(
"cannot append a non-category item to a CategoricalIndex"
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 1b3b6934aa53a..349b582de4358 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -8,6 +8,7 @@
from pandas._libs import NaT, iNaT, join as libjoin, lib
from pandas._libs.tslibs import timezones
+from pandas._typing import Label
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender, cache_readonly
@@ -551,14 +552,6 @@ def _summary(self, name=None) -> str:
result = result.replace("'", "")
return result
- def _concat_same_dtype(self, to_concat, name):
- """
- Concatenate to_concat which has the same class.
- """
- new_data = type(self._data)._concat_same_type(to_concat)
-
- return self._simple_new(new_data, name=name)
-
def shift(self, periods=1, freq=None):
"""
Shift index by desired number of time frequency increments.
@@ -649,7 +642,9 @@ def _set_freq(self, freq):
self._data._freq = freq
- def _shallow_copy(self, values=None, **kwargs):
+ def _shallow_copy(self, values=None, name: Label = lib.no_default):
+ name = self.name if name is lib.no_default else name
+
if values is None:
values = self._data
@@ -657,18 +652,16 @@ def _shallow_copy(self, values=None, **kwargs):
values = values._data
if isinstance(values, np.ndarray):
# TODO: We would rather not get here
- if kwargs.get("freq") is not None:
- raise ValueError(kwargs)
values = type(self._data)(values, dtype=self.dtype)
attributes = self._get_attributes_dict()
- if "freq" not in kwargs and self.freq is not None:
+ if self.freq is not None:
if isinstance(values, (DatetimeArray, TimedeltaArray)):
if values.freq is None:
del attributes["freq"]
- attributes.update(kwargs)
+ attributes["name"] = name
return type(self)._simple_new(values, **attributes)
# --------------------------------------------------------------------
@@ -738,9 +731,7 @@ def intersection(self, other, sort=False):
# this point, depending on the values.
result._set_freq(None)
- result = self._shallow_copy(
- result._data, name=result.name, dtype=result.dtype, freq=None
- )
+ result = self._shallow_copy(result._data, name=result.name)
if result.freq is None:
result._set_freq("infer")
return result
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 6ea4250e4acf4..b3923a1298859 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -333,11 +333,12 @@ def from_tuples(
# --------------------------------------------------------------------
@Appender(Index._shallow_copy.__doc__)
- def _shallow_copy(self, left=None, right=None, **kwargs):
- result = self._data._shallow_copy(left=left, right=right)
+ def _shallow_copy(self, values=None, **kwargs):
+ if values is None:
+ values = self._data
attributes = self._get_attributes_dict()
attributes.update(kwargs)
- return self._simple_new(result, **attributes)
+ return self._simple_new(values, **attributes)
@cache_readonly
def _isnan(self):
@@ -407,7 +408,7 @@ def astype(self, dtype, copy=True):
with rewrite_exception("IntervalArray", type(self).__name__):
new_values = self.values.astype(dtype, copy=copy)
if is_interval_dtype(new_values):
- return self._shallow_copy(new_values.left, new_values.right)
+ return self._shallow_copy(new_values)
return Index.astype(self, dtype, copy=copy)
@property
@@ -881,7 +882,8 @@ def where(self, cond, other=None):
if other is None:
other = self._na_value
values = np.where(cond, self.values, other)
- return self._shallow_copy(values)
+ result = IntervalArray(values)
+ return self._shallow_copy(result)
def delete(self, loc):
"""
@@ -893,7 +895,8 @@ def delete(self, loc):
"""
new_left = self.left.delete(loc)
new_right = self.right.delete(loc)
- return self._shallow_copy(new_left, new_right)
+ result = self._data._shallow_copy(new_left, new_right)
+ return self._shallow_copy(result)
def insert(self, loc, item):
"""
@@ -927,7 +930,8 @@ def insert(self, loc, item):
new_left = self.left.insert(loc, left_insert)
new_right = self.right.insert(loc, right_insert)
- return self._shallow_copy(new_left, new_right)
+ result = self._data._shallow_copy(new_left, new_right)
+ return self._shallow_copy(result)
@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
index 367870f0ee467..06a26cc90555e 100644
--- a/pandas/core/indexes/numeric.py
+++ b/pandas/core/indexes/numeric.py
@@ -3,7 +3,7 @@
import numpy as np
from pandas._libs import index as libindex, lib
-from pandas._typing import Dtype
+from pandas._typing import Dtype, Label
from pandas.util._decorators import Appender, cache_readonly
from pandas.core.dtypes.cast import astype_nansafe
@@ -103,7 +103,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
return self._maybe_cast_indexer(label)
@Appender(Index._shallow_copy.__doc__)
- def _shallow_copy(self, values=None, name=lib.no_default):
+ def _shallow_copy(self, values=None, name: Label = lib.no_default):
name = name if name is not lib.no_default else self.name
if values is not None and not self._can_hold_na and values.dtype.kind == "f":
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 0b85433b699a8..c7c11c60185b3 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -5,9 +5,11 @@
import numpy as np
from pandas._libs import index as libindex
+from pandas._libs.lib import no_default
from pandas._libs.tslibs import frequencies as libfrequencies, resolution
from pandas._libs.tslibs.parsing import parse_time_string
from pandas._libs.tslibs.period import Period
+from pandas._typing import Label
from pandas.util._decorators import Appender, cache_readonly
from pandas.core.dtypes.common import (
@@ -248,8 +250,10 @@ def _has_complex_internals(self):
# used to avoid libreduction code paths, which raise or require conversion
return True
- def _shallow_copy(self, values=None, **kwargs):
+ def _shallow_copy(self, values=None, name: Label = no_default):
# TODO: simplify, figure out type of values
+ name = name if name is not no_default else self.name
+
if values is None:
values = self._data
@@ -263,18 +267,7 @@ def _shallow_copy(self, values=None, **kwargs):
# GH#30713 this should never be reached
raise TypeError(type(values), getattr(values, "dtype", None))
- # We don't allow changing `freq` in _shallow_copy.
- validate_dtype_freq(self.dtype, kwargs.get("freq"))
- attributes = self._get_attributes_dict()
-
- attributes.update(kwargs)
- if not len(values) and "dtype" not in kwargs:
- attributes["dtype"] = self.dtype
- return self._simple_new(values, **attributes)
-
- def _shallow_copy_with_infer(self, values=None, **kwargs):
- """ we always want to return a PeriodIndex """
- return self._shallow_copy(values=values, **kwargs)
+ return self._simple_new(values, name=name)
def _maybe_convert_timedelta(self, other):
"""
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index d6752da6bc58f..fa8551bc646a6 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -7,6 +7,8 @@
import numpy as np
from pandas._libs import index as libindex
+from pandas._libs.lib import no_default
+from pandas._typing import Label
import pandas.compat as compat
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, cache_readonly
@@ -385,13 +387,13 @@ def tolist(self):
return list(self._range)
@Appender(Int64Index._shallow_copy.__doc__)
- def _shallow_copy(self, values=None, **kwargs):
+ def _shallow_copy(self, values=None, name: Label = no_default):
+ name = self.name if name is no_default else name
+
if values is None:
- name = kwargs.get("name", self.name)
return self._simple_new(self._range, name=name)
else:
- kwargs.setdefault("name", self.name)
- return self._int64index._shallow_copy(values, **kwargs)
+ return Int64Index._simple_new(values, name=name)
@Appender(Int64Index.copy.__doc__)
def copy(self, name=None, deep=False, dtype=None, **kwargs):
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index d4f9c15a9f73f..329bfdf543c62 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -141,7 +141,7 @@ def __init__(
if do_integrity_check:
self._verify_integrity()
- self._consolidate_check()
+ self._known_consolidated = False
self._rebuild_blknos_and_blklocs()
@@ -726,7 +726,6 @@ def get_slice(self, slobj: slice, axis: int = 0):
new_axes[axis] = new_axes[axis][slobj]
bm = type(self)(new_blocks, new_axes, do_integrity_check=False)
- bm._consolidate_inplace()
return bm
def __contains__(self, item) -> bool:
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index ebe9a3d5bf472..29e69cc5fe509 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -4,6 +4,8 @@
Expose public exceptions & warnings
"""
+from pandas._config.config import OptionError
+
from pandas._libs.tslibs import NullFrequencyError, OutOfBoundsDatetime
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index d5537359d6948..c6b4c4904735c 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -353,9 +353,9 @@ def test_constructor_from_index_series_period(self):
result = Categorical(Series(idx))
tm.assert_index_equal(result.categories, idx)
- def test_constructor_invariant(self):
- # GH 14190
- vals = [
+ @pytest.mark.parametrize(
+ "values",
+ [
np.array([1.0, 1.2, 1.8, np.nan]),
np.array([1, 2, 3], dtype="int64"),
["a", "b", "c", np.nan],
@@ -366,11 +366,13 @@ def test_constructor_invariant(self):
Timestamp("2014-01-02", tz="US/Eastern"),
NaT,
],
- ]
- for val in vals:
- c = Categorical(val)
- c2 = Categorical(c)
- tm.assert_categorical_equal(c, c2)
+ ],
+ )
+ def test_constructor_invariant(self, values):
+ # GH 14190
+ c = Categorical(values)
+ c2 = Categorical(c)
+ tm.assert_categorical_equal(c, c2)
@pytest.mark.parametrize("ordered", [True, False])
def test_constructor_with_dtype(self, ordered):
@@ -470,9 +472,14 @@ def test_construction_with_null(self, klass, nulls_fixture):
tm.assert_categorical_equal(result, expected)
- def test_from_codes(self):
+ def test_from_codes_empty(self):
+ cat = ["a", "b", "c"]
+ result = Categorical.from_codes([], categories=cat)
+ expected = Categorical([], categories=cat)
- # too few categories
+ tm.assert_categorical_equal(result, expected)
+
+ def test_from_codes_too_few_categories(self):
dtype = CategoricalDtype(categories=[1, 2])
msg = "codes need to be between "
with pytest.raises(ValueError, match=msg):
@@ -480,22 +487,23 @@ def test_from_codes(self):
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2], dtype=dtype)
- # no int codes
+ def test_from_codes_non_int_codes(self):
+ dtype = CategoricalDtype(categories=[1, 2])
msg = "codes need to be array-like integers"
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(["a"], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(["a"], dtype=dtype)
- # no unique categories
+ def test_from_codes_non_unique_categories(self):
with pytest.raises(ValueError, match="Categorical categories must be unique"):
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])
- # NaN categories included
+ def test_from_codes_nan_cat_included(self):
with pytest.raises(ValueError, match="Categorial categories cannot be null"):
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])
- # too negative
+ def test_from_codes_too_negative(self):
dtype = CategoricalDtype(categories=["a", "b", "c"])
msg = r"codes need to be between -1 and len\(categories\)-1"
with pytest.raises(ValueError, match=msg):
@@ -503,6 +511,8 @@ def test_from_codes(self):
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([-2, 1, 2], dtype=dtype)
+ def test_from_codes(self):
+ dtype = CategoricalDtype(categories=["a", "b", "c"])
exp = Categorical(["a", "b", "c"], ordered=False)
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
tm.assert_categorical_equal(exp, res)
@@ -510,21 +520,18 @@ def test_from_codes(self):
res = Categorical.from_codes([0, 1, 2], dtype=dtype)
tm.assert_categorical_equal(exp, res)
- def test_from_codes_with_categorical_categories(self):
+ @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex])
+ def test_from_codes_with_categorical_categories(self, klass):
# GH17884
expected = Categorical(["a", "b"], categories=["a", "b", "c"])
- result = Categorical.from_codes([0, 1], categories=Categorical(["a", "b", "c"]))
+ result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"]))
tm.assert_categorical_equal(result, expected)
- result = Categorical.from_codes(
- [0, 1], categories=CategoricalIndex(["a", "b", "c"])
- )
- tm.assert_categorical_equal(result, expected)
-
- # non-unique Categorical still raises
+ @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex])
+ def test_from_codes_with_non_unique_categorical_categories(self, klass):
with pytest.raises(ValueError, match="Categorical categories must be unique"):
- Categorical.from_codes([0, 1], Categorical(["a", "b", "a"]))
+ Categorical.from_codes([0, 1], klass(["a", "b", "a"]))
def test_from_codes_with_nan_code(self):
# GH21767
@@ -535,24 +542,16 @@ def test_from_codes_with_nan_code(self):
with pytest.raises(ValueError, match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype=dtype)
- def test_from_codes_with_float(self):
+ @pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]])
+ def test_from_codes_with_float(self, codes):
# GH21767
- codes = [1.0, 2.0, 0] # integer, but in float dtype
+ # float codes should raise even if values are equal to integers
dtype = CategoricalDtype(categories=["a", "b", "c"])
- # empty codes should not raise for floats
- Categorical.from_codes([], dtype.categories)
-
- with pytest.raises(ValueError, match="codes need to be array-like integers"):
- Categorical.from_codes(codes, dtype.categories)
-
- with pytest.raises(ValueError, match="codes need to be array-like integers"):
- Categorical.from_codes(codes, dtype=dtype)
-
- codes = [1.1, 2.0, 0] # non-integer
- with pytest.raises(ValueError, match="codes need to be array-like integers"):
+ msg = "codes need to be array-like integers"
+ with pytest.raises(ValueError, match=msg):
Categorical.from_codes(codes, dtype.categories)
- with pytest.raises(ValueError, match="codes need to be array-like integers"):
+ with pytest.raises(ValueError, match=msg):
Categorical.from_codes(codes, dtype=dtype)
def test_from_codes_with_dtype_raises(self):
diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py
index 625d559001e72..f85d823cb2fac 100644
--- a/pandas/tests/base/test_ops.py
+++ b/pandas/tests/base/test_ops.py
@@ -277,6 +277,12 @@ def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj):
pytest.skip(f"values of {klass} cannot be changed")
elif isinstance(orig, pd.MultiIndex):
pytest.skip("MultiIndex doesn't support isna")
+ elif orig.duplicated().any():
+ pytest.xfail(
+ "The test implementation isn't flexible enough to deal"
+ " with duplicated values. This isn't a bug in the"
+ " application code, but in the test code."
+ )
# special assign to the numpy array
if is_datetime64tz_dtype(obj):
diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
index 774eb443c45fe..03598b6bb5eca 100644
--- a/pandas/tests/frame/conftest.py
+++ b/pandas/tests/frame/conftest.py
@@ -1,3 +1,5 @@
+from itertools import product
+
import numpy as np
import pytest
@@ -5,6 +7,11 @@
import pandas._testing as tm
+@pytest.fixture(params=product([True, False], [True, False]))
+def close_open_fixture(request):
+ return request.param
+
+
@pytest.fixture
def float_frame_with_na():
"""
diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py
new file mode 100644
index 0000000000000..40b0ec0c0d811
--- /dev/null
+++ b/pandas/tests/frame/methods/test_asfreq.py
@@ -0,0 +1,58 @@
+from datetime import datetime
+
+import numpy as np
+
+from pandas import DataFrame, DatetimeIndex, Series, date_range
+import pandas._testing as tm
+
+from pandas.tseries import offsets
+
+
+class TestAsFreq:
+ def test_asfreq(self, datetime_frame):
+ offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd())
+ rule_monthly = datetime_frame.asfreq("BM")
+
+ tm.assert_almost_equal(offset_monthly["A"], rule_monthly["A"])
+
+ filled = rule_monthly.asfreq("B", method="pad") # noqa
+ # TODO: actually check that this worked.
+
+ # don't forget!
+ filled_dep = rule_monthly.asfreq("B", method="pad") # noqa
+
+ # test does not blow up on length-0 DataFrame
+ zero_length = datetime_frame.reindex([])
+ result = zero_length.asfreq("BM")
+ assert result is not zero_length
+
+ def test_asfreq_datetimeindex(self):
+ df = DataFrame(
+ {"A": [1, 2, 3]},
+ index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)],
+ )
+ df = df.asfreq("B")
+ assert isinstance(df.index, DatetimeIndex)
+
+ ts = df["A"].asfreq("B")
+ assert isinstance(ts.index, DatetimeIndex)
+
+ def test_asfreq_fillvalue(self):
+ # test for fill value during upsampling, related to issue 3715
+
+ # setup
+ rng = date_range("1/1/2016", periods=10, freq="2S")
+ ts = Series(np.arange(len(rng)), index=rng)
+ df = DataFrame({"one": ts})
+
+ # insert pre-existing missing value
+ df.loc["2016-01-01 00:00:08", "one"] = None
+
+ actual_df = df.asfreq(freq="1S", fill_value=9.0)
+ expected_df = df.asfreq(freq="1S").fillna(9.0)
+ expected_df.loc["2016-01-01 00:00:08", "one"] = None
+ tm.assert_frame_equal(expected_df, actual_df)
+
+ expected_series = ts.asfreq(freq="1S").fillna(9.0)
+ actual_series = ts.asfreq(freq="1S", fill_value=9.0)
+ tm.assert_series_equal(expected_series, actual_series)
diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py
new file mode 100644
index 0000000000000..108bbbfa183c4
--- /dev/null
+++ b/pandas/tests/frame/methods/test_at_time.py
@@ -0,0 +1,86 @@
+from datetime import time
+
+import numpy as np
+import pytest
+import pytz
+
+from pandas import DataFrame, date_range
+import pandas._testing as tm
+
+
+class TestAtTime:
+ def test_at_time(self):
+ rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+ ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
+ rs = ts.at_time(rng[1])
+ assert (rs.index.hour == rng[1].hour).all()
+ assert (rs.index.minute == rng[1].minute).all()
+ assert (rs.index.second == rng[1].second).all()
+
+ result = ts.at_time("9:30")
+ expected = ts.at_time(time(9, 30))
+ tm.assert_frame_equal(result, expected)
+
+ result = ts.loc[time(9, 30)]
+ expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)]
+
+ tm.assert_frame_equal(result, expected)
+
+ # midnight, everything
+ rng = date_range("1/1/2000", "1/31/2000")
+ ts = DataFrame(np.random.randn(len(rng), 3), index=rng)
+
+ result = ts.at_time(time(0, 0))
+ tm.assert_frame_equal(result, ts)
+
+ # time doesn't exist
+ rng = date_range("1/1/2012", freq="23Min", periods=384)
+ ts = DataFrame(np.random.randn(len(rng), 2), rng)
+ rs = ts.at_time("16:00")
+ assert len(rs) == 0
+
+ @pytest.mark.parametrize(
+ "hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)]
+ )
+ def test_at_time_errors(self, hour):
+ # GH#24043
+ dti = date_range("2018", periods=3, freq="H")
+ df = DataFrame(list(range(len(dti))), index=dti)
+ if getattr(hour, "tzinfo", None) is None:
+ result = df.at_time(hour)
+ expected = df.iloc[1:2]
+ tm.assert_frame_equal(result, expected)
+ else:
+ with pytest.raises(ValueError, match="Index must be timezone"):
+ df.at_time(hour)
+
+ def test_at_time_tz(self):
+ # GH#24043
+ dti = date_range("2018", periods=3, freq="H", tz="US/Pacific")
+ df = DataFrame(list(range(len(dti))), index=dti)
+ result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern")))
+ expected = df.iloc[1:2]
+ tm.assert_frame_equal(result, expected)
+
+ def test_at_time_raises(self):
+ # GH#20725
+ df = DataFrame([[1, 2, 3], [4, 5, 6]])
+ with pytest.raises(TypeError): # index is not a DatetimeIndex
+ df.at_time("00:00")
+
+ @pytest.mark.parametrize("axis", ["index", "columns", 0, 1])
+ def test_at_time_axis(self, axis):
+ # issue 8839
+ rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+ ts = DataFrame(np.random.randn(len(rng), len(rng)))
+ ts.index, ts.columns = rng, rng
+
+ indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)]
+
+ if axis in ["index", 0]:
+ expected = ts.loc[indices, :]
+ elif axis in ["columns", 1]:
+ expected = ts.loc[:, indices]
+
+ result = ts.at_time("9:30", axis=axis)
+ tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py
new file mode 100644
index 0000000000000..b40604b4f4a16
--- /dev/null
+++ b/pandas/tests/frame/methods/test_between_time.py
@@ -0,0 +1,110 @@
+from datetime import time
+
+import numpy as np
+import pytest
+
+from pandas import DataFrame, date_range
+import pandas._testing as tm
+
+
+class TestBetweenTime:
+ def test_between_time(self, close_open_fixture):
+ rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+ ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
+ stime = time(0, 0)
+ etime = time(1, 0)
+ inc_start, inc_end = close_open_fixture
+
+ filtered = ts.between_time(stime, etime, inc_start, inc_end)
+ exp_len = 13 * 4 + 1
+ if not inc_start:
+ exp_len -= 5
+ if not inc_end:
+ exp_len -= 4
+
+ assert len(filtered) == exp_len
+ for rs in filtered.index:
+ t = rs.time()
+ if inc_start:
+ assert t >= stime
+ else:
+ assert t > stime
+
+ if inc_end:
+ assert t <= etime
+ else:
+ assert t < etime
+
+ result = ts.between_time("00:00", "01:00")
+ expected = ts.between_time(stime, etime)
+ tm.assert_frame_equal(result, expected)
+
+ # across midnight
+ rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+ ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
+ stime = time(22, 0)
+ etime = time(9, 0)
+
+ filtered = ts.between_time(stime, etime, inc_start, inc_end)
+ exp_len = (12 * 11 + 1) * 4 + 1
+ if not inc_start:
+ exp_len -= 4
+ if not inc_end:
+ exp_len -= 4
+
+ assert len(filtered) == exp_len
+ for rs in filtered.index:
+ t = rs.time()
+ if inc_start:
+ assert (t >= stime) or (t <= etime)
+ else:
+ assert (t > stime) or (t <= etime)
+
+ if inc_end:
+ assert (t <= etime) or (t >= stime)
+ else:
+ assert (t < etime) or (t >= stime)
+
+ def test_between_time_raises(self):
+ # GH#20725
+ df = DataFrame([[1, 2, 3], [4, 5, 6]])
+ with pytest.raises(TypeError): # index is not a DatetimeIndex
+ df.between_time(start_time="00:00", end_time="12:00")
+
+ def test_between_time_axis(self, axis):
+ # GH#8839
+ rng = date_range("1/1/2000", periods=100, freq="10min")
+ ts = DataFrame(np.random.randn(len(rng), len(rng)))
+ stime, etime = ("08:00:00", "09:00:00")
+ exp_len = 7
+
+ if axis in ["index", 0]:
+ ts.index = rng
+ assert len(ts.between_time(stime, etime)) == exp_len
+ assert len(ts.between_time(stime, etime, axis=0)) == exp_len
+
+ if axis in ["columns", 1]:
+ ts.columns = rng
+ selected = ts.between_time(stime, etime, axis=1).columns
+ assert len(selected) == exp_len
+
+ def test_between_time_axis_raises(self, axis):
+ # issue 8839
+ rng = date_range("1/1/2000", periods=100, freq="10min")
+ mask = np.arange(0, len(rng))
+ rand_data = np.random.randn(len(rng), len(rng))
+ ts = DataFrame(rand_data, index=rng, columns=rng)
+ stime, etime = ("08:00:00", "09:00:00")
+
+ msg = "Index must be DatetimeIndex"
+ if axis in ["columns", 1]:
+ ts.index = mask
+ with pytest.raises(TypeError, match=msg):
+ ts.between_time(stime, etime)
+ with pytest.raises(TypeError, match=msg):
+ ts.between_time(stime, etime, axis=0)
+
+ if axis in ["index", 0]:
+ ts.columns = mask
+ with pytest.raises(TypeError, match=msg):
+ ts.between_time(stime, etime, axis=1)
diff --git a/pandas/tests/frame/methods/test_combine.py b/pandas/tests/frame/methods/test_combine.py
new file mode 100644
index 0000000000000..bc6a67e4e1f32
--- /dev/null
+++ b/pandas/tests/frame/methods/test_combine.py
@@ -0,0 +1,47 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+class TestCombine:
+ @pytest.mark.parametrize(
+ "data",
+ [
+ pd.date_range("2000", periods=4),
+ pd.date_range("2000", periods=4, tz="US/Central"),
+ pd.period_range("2000", periods=4),
+ pd.timedelta_range(0, periods=4),
+ ],
+ )
+ def test_combine_datetlike_udf(self, data):
+ # GH#23079
+ df = pd.DataFrame({"A": data})
+ other = df.copy()
+ df.iloc[1, 0] = None
+
+ def combiner(a, b):
+ return b
+
+ result = df.combine(other, combiner)
+ tm.assert_frame_equal(result, other)
+
+ def test_combine_generic(self, float_frame):
+ df1 = float_frame
+ df2 = float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]]
+
+ combined = df1.combine(df2, np.add)
+ combined2 = df2.combine(df1, np.add)
+ assert combined["D"].isna().all()
+ assert combined2["D"].isna().all()
+
+ chunk = combined.loc[combined.index[:-5], ["A", "B", "C"]]
+ chunk2 = combined2.loc[combined2.index[:-5], ["A", "B", "C"]]
+
+ exp = (
+ float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]].reindex_like(chunk)
+ * 2
+ )
+ tm.assert_frame_equal(chunk, exp)
+ tm.assert_frame_equal(chunk2, exp)
diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py
new file mode 100644
index 0000000000000..e69a562f8214d
--- /dev/null
+++ b/pandas/tests/frame/methods/test_rename.py
@@ -0,0 +1,353 @@
+from collections import ChainMap
+
+import numpy as np
+import pytest
+
+from pandas import DataFrame, Index, MultiIndex
+import pandas._testing as tm
+
+
+class TestRename:
+ def test_rename(self, float_frame):
+ mapping = {"A": "a", "B": "b", "C": "c", "D": "d"}
+
+ renamed = float_frame.rename(columns=mapping)
+ renamed2 = float_frame.rename(columns=str.lower)
+
+ tm.assert_frame_equal(renamed, renamed2)
+ tm.assert_frame_equal(
+ renamed2.rename(columns=str.upper), float_frame, check_names=False
+ )
+
+ # index
+ data = {"A": {"foo": 0, "bar": 1}}
+
+ # gets sorted alphabetical
+ df = DataFrame(data)
+ renamed = df.rename(index={"foo": "bar", "bar": "foo"})
+ tm.assert_index_equal(renamed.index, Index(["foo", "bar"]))
+
+ renamed = df.rename(index=str.upper)
+ tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"]))
+
+ # have to pass something
+ with pytest.raises(TypeError, match="must pass an index to rename"):
+ float_frame.rename()
+
+ # partial columns
+ renamed = float_frame.rename(columns={"C": "foo", "D": "bar"})
+ tm.assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"]))
+
+ # other axis
+ renamed = float_frame.T.rename(index={"C": "foo", "D": "bar"})
+ tm.assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"]))
+
+ # index with name
+ index = Index(["foo", "bar"], name="name")
+ renamer = DataFrame(data, index=index)
+ renamed = renamer.rename(index={"foo": "bar", "bar": "foo"})
+ tm.assert_index_equal(renamed.index, Index(["bar", "foo"], name="name"))
+ assert renamed.index.name == renamer.index.name
+
+ @pytest.mark.parametrize(
+ "args,kwargs",
+ [
+ ((ChainMap({"A": "a"}, {"B": "b"}),), dict(axis="columns")),
+ ((), dict(columns=ChainMap({"A": "a"}, {"B": "b"}))),
+ ],
+ )
+ def test_rename_chainmap(self, args, kwargs):
+ # see gh-23859
+ colAData = range(1, 11)
+ colBdata = np.random.randn(10)
+
+ df = DataFrame({"A": colAData, "B": colBdata})
+ result = df.rename(*args, **kwargs)
+
+ expected = DataFrame({"a": colAData, "b": colBdata})
+ tm.assert_frame_equal(result, expected)
+
+ @pytest.mark.parametrize(
+ "kwargs, rename_index, rename_columns",
+ [
+ ({"mapper": None, "axis": 0}, True, False),
+ ({"mapper": None, "axis": 1}, False, True),
+ ({"index": None}, True, False),
+ ({"columns": None}, False, True),
+ ({"index": None, "columns": None}, True, True),
+ ({}, False, False),
+ ],
+ )
+ def test_rename_axis_none(self, kwargs, rename_index, rename_columns):
+ # GH 25034
+ index = Index(list("abc"), name="foo")
+ columns = Index(["col1", "col2"], name="bar")
+ data = np.arange(6).reshape(3, 2)
+ df = DataFrame(data, index, columns)
+
+ result = df.rename_axis(**kwargs)
+ expected_index = index.rename(None) if rename_index else index
+ expected_columns = columns.rename(None) if rename_columns else columns
+ expected = DataFrame(data, expected_index, expected_columns)
+ tm.assert_frame_equal(result, expected)
+
+ def test_rename_multiindex(self):
+
+ tuples_index = [("foo1", "bar1"), ("foo2", "bar2")]
+ tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")]
+ index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"])
+ columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"])
+ df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns)
+
+ #
+ # without specifying level -> across all levels
+
+ renamed = df.rename(
+ index={"foo1": "foo3", "bar2": "bar3"},
+ columns={"fizz1": "fizz3", "buzz2": "buzz3"},
+ )
+ new_index = MultiIndex.from_tuples(
+ [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"]
+ )
+ new_columns = MultiIndex.from_tuples(
+ [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"]
+ )
+ tm.assert_index_equal(renamed.index, new_index)
+ tm.assert_index_equal(renamed.columns, new_columns)
+ assert renamed.index.names == df.index.names
+ assert renamed.columns.names == df.columns.names
+
+ #
+ # with specifying a level (GH13766)
+
+ # dict
+ new_columns = MultiIndex.from_tuples(
+ [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"]
+ )
+ renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0)
+ tm.assert_index_equal(renamed.columns, new_columns)
+ renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz")
+ tm.assert_index_equal(renamed.columns, new_columns)
+
+ new_columns = MultiIndex.from_tuples(
+ [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"]
+ )
+ renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1)
+ tm.assert_index_equal(renamed.columns, new_columns)
+ renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz")
+ tm.assert_index_equal(renamed.columns, new_columns)
+
+ # function
+ func = str.upper
+ new_columns = MultiIndex.from_tuples(
+ [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"]
+ )
+ renamed = df.rename(columns=func, level=0)
+ tm.assert_index_equal(renamed.columns, new_columns)
+ renamed = df.rename(columns=func, level="fizz")
+ tm.assert_index_equal(renamed.columns, new_columns)
+
+ new_columns = MultiIndex.from_tuples(
+ [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"]
+ )
+ renamed = df.rename(columns=func, level=1)
+ tm.assert_index_equal(renamed.columns, new_columns)
+ renamed = df.rename(columns=func, level="buzz")
+ tm.assert_index_equal(renamed.columns, new_columns)
+
+ # index
+ new_index = MultiIndex.from_tuples(
+ [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"]
+ )
+ renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
+ tm.assert_index_equal(renamed.index, new_index)
+
+ def test_rename_nocopy(self, float_frame):
+ renamed = float_frame.rename(columns={"C": "foo"}, copy=False)
+ renamed["foo"] = 1.0
+ assert (float_frame["C"] == 1.0).all()
+
+ def test_rename_inplace(self, float_frame):
+ float_frame.rename(columns={"C": "foo"})
+ assert "C" in float_frame
+ assert "foo" not in float_frame
+
+ c_id = id(float_frame["C"])
+ float_frame = float_frame.copy()
+ float_frame.rename(columns={"C": "foo"}, inplace=True)
+
+ assert "C" not in float_frame
+ assert "foo" in float_frame
+ assert id(float_frame["foo"]) != c_id
+
+ def test_rename_bug(self):
+ # GH 5344
+ # rename set ref_locs, and set_index was not resetting
+ df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]})
+ df = df.rename(columns={0: "a"})
+ df = df.rename(columns={1: "b"})
+ df = df.set_index(["a", "b"])
+ df.columns = ["2001-01-01"]
+ expected = DataFrame(
+ [[1], [2]],
+ index=MultiIndex.from_tuples(
+ [("foo", "bah"), ("bar", "bas")], names=["a", "b"]
+ ),
+ columns=["2001-01-01"],
+ )
+ tm.assert_frame_equal(df, expected)
+
+ def test_rename_bug2(self):
+ # GH 19497
+ # rename was changing Index to MultiIndex if Index contained tuples
+
+ df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"])
+ df = df.rename({(1, 1): (5, 4)}, axis="index")
+ expected = DataFrame(
+ data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"]
+ )
+ tm.assert_frame_equal(df, expected)
+
+ def test_rename_errors_raises(self):
+ df = DataFrame(columns=["A", "B", "C", "D"])
+ with pytest.raises(KeyError, match="'E'] not found in axis"):
+ df.rename(columns={"A": "a", "E": "e"}, errors="raise")
+
+ @pytest.mark.parametrize(
+ "mapper, errors, expected_columns",
+ [
+ ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]),
+ ({"A": "a"}, "raise", ["a", "B", "C", "D"]),
+ (str.lower, "raise", ["a", "b", "c", "d"]),
+ ],
+ )
+ def test_rename_errors(self, mapper, errors, expected_columns):
+ # GH 13473
+ # rename now works with errors parameter
+ df = DataFrame(columns=["A", "B", "C", "D"])
+ result = df.rename(columns=mapper, errors=errors)
+ expected = DataFrame(columns=expected_columns)
+ tm.assert_frame_equal(result, expected)
+
+ def test_rename_objects(self, float_string_frame):
+ renamed = float_string_frame.rename(columns=str.upper)
+
+ assert "FOO" in renamed
+ assert "foo" not in renamed
+
+ def test_rename_axis_style(self):
+ # https://github.com/pandas-dev/pandas/issues/12392
+ df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"])
+ expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"])
+
+ result = df.rename(str.lower, axis=1)
+ tm.assert_frame_equal(result, expected)
+
+ result = df.rename(str.lower, axis="columns")
+ tm.assert_frame_equal(result, expected)
+
+ result = df.rename({"A": "a", "B": "b"}, axis=1)
+ tm.assert_frame_equal(result, expected)
+
+ result = df.rename({"A": "a", "B": "b"}, axis="columns")
+ tm.assert_frame_equal(result, expected)
+
+ # Index
+ expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
+ result = df.rename(str.lower, axis=0)
+ tm.assert_frame_equal(result, expected)
+
+ result = df.rename(str.lower, axis="index")
+ tm.assert_frame_equal(result, expected)
+
+ result = df.rename({"X": "x", "Y": "y"}, axis=0)
+ tm.assert_frame_equal(result, expected)
+
+ result = df.rename({"X": "x", "Y": "y"}, axis="index")
+ tm.assert_frame_equal(result, expected)
+
+ result = df.rename(mapper=str.lower, axis="index")
+ tm.assert_frame_equal(result, expected)
+
+ def test_rename_mapper_multi(self):
+ df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index(
+ ["A", "B"]
+ )
+ result = df.rename(str.upper)
+ expected = df.rename(index=str.upper)
+ tm.assert_frame_equal(result, expected)
+
+ def test_rename_positional_named(self):
+ # https://github.com/pandas-dev/pandas/issues/12392
+ df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"])
+ result = df.rename(index=str.lower, columns=str.upper)
+ expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
+ tm.assert_frame_equal(result, expected)
+
+ def test_rename_axis_style_raises(self):
+ # see gh-12392
+ df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"])
+
+ # Named target and axis
+ over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
+ with pytest.raises(TypeError, match=over_spec_msg):
+ df.rename(index=str.lower, axis=1)
+
+ with pytest.raises(TypeError, match=over_spec_msg):
+ df.rename(index=str.lower, axis="columns")
+
+ with pytest.raises(TypeError, match=over_spec_msg):
+ df.rename(columns=str.lower, axis="columns")
+
+ with pytest.raises(TypeError, match=over_spec_msg):
+ df.rename(index=str.lower, axis=0)
+
+ # Multiple targets and axis
+ with pytest.raises(TypeError, match=over_spec_msg):
+ df.rename(str.lower, index=str.lower, axis="columns")
+
+ # Too many targets
+ over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'"
+ with pytest.raises(TypeError, match=over_spec_msg):
+ df.rename(str.lower, index=str.lower, columns=str.lower)
+
+ # Duplicates
+ with pytest.raises(TypeError, match="multiple values"):
+ df.rename(id, mapper=id)
+
+ def test_rename_positional_raises(self):
+ # GH 29136
+ df = DataFrame(columns=["A", "B"])
+ msg = r"rename\(\) takes from 1 to 2 positional arguments"
+
+ with pytest.raises(TypeError, match=msg):
+ df.rename(None, str.lower)
+
+ def test_rename_no_mappings_raises(self):
+ # GH 29136
+ df = DataFrame([[1]])
+ msg = "must pass an index to rename"
+ with pytest.raises(TypeError, match=msg):
+ df.rename()
+
+ with pytest.raises(TypeError, match=msg):
+ df.rename(None, index=None)
+
+ with pytest.raises(TypeError, match=msg):
+ df.rename(None, columns=None)
+
+ with pytest.raises(TypeError, match=msg):
+ df.rename(None, columns=None, index=None)
+
+ def test_rename_mapper_and_positional_arguments_raises(self):
+ # GH 29136
+ df = DataFrame([[1]])
+ msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'"
+ with pytest.raises(TypeError, match=msg):
+ df.rename({}, index={})
+
+ with pytest.raises(TypeError, match=msg):
+ df.rename({}, columns={})
+
+ with pytest.raises(TypeError, match=msg):
+ df.rename({}, columns={}, index={})
diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py
new file mode 100644
index 0000000000000..6586c19af2539
--- /dev/null
+++ b/pandas/tests/frame/methods/test_reset_index.py
@@ -0,0 +1,299 @@
+from datetime import datetime
+
+import numpy as np
+import pytest
+
+from pandas import (
+ DataFrame,
+ Index,
+ IntervalIndex,
+ MultiIndex,
+ RangeIndex,
+ Series,
+ Timestamp,
+ date_range,
+)
+import pandas._testing as tm
+
+
+class TestResetIndex:
+ def test_reset_index_tz(self, tz_aware_fixture):
+ # GH 3950
+ # reset_index with single level
+ tz = tz_aware_fixture
+ idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx")
+ df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx)
+
+ expected = DataFrame(
+ {
+ "idx": [
+ datetime(2011, 1, 1),
+ datetime(2011, 1, 2),
+ datetime(2011, 1, 3),
+ datetime(2011, 1, 4),
+ datetime(2011, 1, 5),
+ ],
+ "a": range(5),
+ "b": ["A", "B", "C", "D", "E"],
+ },
+ columns=["idx", "a", "b"],
+ )
+ expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz))
+ tm.assert_frame_equal(df.reset_index(), expected)
+
+ def test_reset_index_with_intervals(self):
+ idx = IntervalIndex.from_breaks(np.arange(11), name="x")
+ original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]]
+
+ result = original.set_index("x")
+ expected = DataFrame({"y": np.arange(10)}, index=idx)
+ tm.assert_frame_equal(result, expected)
+
+ result2 = result.reset_index()
+ tm.assert_frame_equal(result2, original)
+
+ def test_reset_index(self, float_frame):
+ stacked = float_frame.stack()[::2]
+ stacked = DataFrame({"foo": stacked, "bar": stacked})
+
+ names = ["first", "second"]
+ stacked.index.names = names
+ deleveled = stacked.reset_index()
+ for i, (lev, level_codes) in enumerate(
+ zip(stacked.index.levels, stacked.index.codes)
+ ):
+ values = lev.take(level_codes)
+ name = names[i]
+ tm.assert_index_equal(values, Index(deleveled[name]))
+
+ stacked.index.names = [None, None]
+ deleveled2 = stacked.reset_index()
+ tm.assert_series_equal(
+ deleveled["first"], deleveled2["level_0"], check_names=False
+ )
+ tm.assert_series_equal(
+ deleveled["second"], deleveled2["level_1"], check_names=False
+ )
+
+ # default name assigned
+ rdf = float_frame.reset_index()
+ exp = Series(float_frame.index.values, name="index")
+ tm.assert_series_equal(rdf["index"], exp)
+
+ # default name assigned, corner case
+ df = float_frame.copy()
+ df["index"] = "foo"
+ rdf = df.reset_index()
+ exp = Series(float_frame.index.values, name="level_0")
+ tm.assert_series_equal(rdf["level_0"], exp)
+
+ # but this is ok
+ float_frame.index.name = "index"
+ deleveled = float_frame.reset_index()
+ tm.assert_series_equal(deleveled["index"], Series(float_frame.index))
+ tm.assert_index_equal(deleveled.index, Index(np.arange(len(deleveled))))
+
+ # preserve column names
+ float_frame.columns.name = "columns"
+ resetted = float_frame.reset_index()
+ assert resetted.columns.name == "columns"
+
+ # only remove certain columns
+ df = float_frame.reset_index().set_index(["index", "A", "B"])
+ rs = df.reset_index(["A", "B"])
+
+ # TODO should reset_index check_names ?
+ tm.assert_frame_equal(rs, float_frame, check_names=False)
+
+ rs = df.reset_index(["index", "A", "B"])
+ tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False)
+
+ rs = df.reset_index(["index", "A", "B"])
+ tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False)
+
+ rs = df.reset_index("A")
+ xp = float_frame.reset_index().set_index(["index", "B"])
+ tm.assert_frame_equal(rs, xp, check_names=False)
+
+ # test resetting in place
+ df = float_frame.copy()
+ resetted = float_frame.reset_index()
+ df.reset_index(inplace=True)
+ tm.assert_frame_equal(df, resetted, check_names=False)
+
+ df = float_frame.reset_index().set_index(["index", "A", "B"])
+ rs = df.reset_index("A", drop=True)
+ xp = float_frame.copy()
+ del xp["A"]
+ xp = xp.set_index(["B"], append=True)
+ tm.assert_frame_equal(rs, xp, check_names=False)
+
+ def test_reset_index_name(self):
+ df = DataFrame(
+ [[1, 2, 3, 4], [5, 6, 7, 8]],
+ columns=["A", "B", "C", "D"],
+ index=Index(range(2), name="x"),
+ )
+ assert df.reset_index().index.name is None
+ assert df.reset_index(drop=True).index.name is None
+ df.reset_index(inplace=True)
+ assert df.index.name is None
+
+ def test_reset_index_level(self):
+ df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"])
+
+ for levels in ["A", "B"], [0, 1]:
+ # With MultiIndex
+ result = df.set_index(["A", "B"]).reset_index(level=levels[0])
+ tm.assert_frame_equal(result, df.set_index("B"))
+
+ result = df.set_index(["A", "B"]).reset_index(level=levels[:1])
+ tm.assert_frame_equal(result, df.set_index("B"))
+
+ result = df.set_index(["A", "B"]).reset_index(level=levels)
+ tm.assert_frame_equal(result, df)
+
+ result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
+ tm.assert_frame_equal(result, df[["C", "D"]])
+
+ # With single-level Index (GH 16263)
+ result = df.set_index("A").reset_index(level=levels[0])
+ tm.assert_frame_equal(result, df)
+
+ result = df.set_index("A").reset_index(level=levels[:1])
+ tm.assert_frame_equal(result, df)
+
+ result = df.set_index(["A"]).reset_index(level=levels[0], drop=True)
+ tm.assert_frame_equal(result, df[["B", "C", "D"]])
+
+ # Missing levels - for both MultiIndex and single-level Index:
+ for idx_lev in ["A", "B"], ["A"]:
+ with pytest.raises(KeyError, match=r"(L|l)evel \(?E\)?"):
+ df.set_index(idx_lev).reset_index(level=["A", "E"])
+ with pytest.raises(IndexError, match="Too many levels"):
+ df.set_index(idx_lev).reset_index(level=[0, 1, 2])
+
+ def test_reset_index_right_dtype(self):
+ time = np.arange(0.0, 10, np.sqrt(2) / 2)
+ s1 = Series(
+ (9.81 * time ** 2) / 2, index=Index(time, name="time"), name="speed"
+ )
+ df = DataFrame(s1)
+
+ resetted = s1.reset_index()
+ assert resetted["time"].dtype == np.float64
+
+ resetted = df.reset_index()
+ assert resetted["time"].dtype == np.float64
+
+ def test_reset_index_multiindex_col(self):
+ vals = np.random.randn(3, 3).astype(object)
+ idx = ["x", "y", "z"]
+ full = np.hstack(([[x] for x in idx], vals))
+ df = DataFrame(
+ vals,
+ Index(idx, name="a"),
+ columns=[["b", "b", "c"], ["mean", "median", "mean"]],
+ )
+ rs = df.reset_index()
+ xp = DataFrame(
+ full, columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]]
+ )
+ tm.assert_frame_equal(rs, xp)
+
+ rs = df.reset_index(col_fill=None)
+ xp = DataFrame(
+ full, columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]]
+ )
+ tm.assert_frame_equal(rs, xp)
+
+ rs = df.reset_index(col_level=1, col_fill="blah")
+ xp = DataFrame(
+ full, columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]]
+ )
+ tm.assert_frame_equal(rs, xp)
+
+ df = DataFrame(
+ vals,
+ MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]),
+ columns=[["b", "b", "c"], ["mean", "median", "mean"]],
+ )
+ rs = df.reset_index("a")
+ xp = DataFrame(
+ full,
+ Index([0, 1, 2], name="d"),
+ columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]],
+ )
+ tm.assert_frame_equal(rs, xp)
+
+ rs = df.reset_index("a", col_fill=None)
+ xp = DataFrame(
+ full,
+ Index(range(3), name="d"),
+ columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]],
+ )
+ tm.assert_frame_equal(rs, xp)
+
+ rs = df.reset_index("a", col_fill="blah", col_level=1)
+ xp = DataFrame(
+ full,
+ Index(range(3), name="d"),
+ columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]],
+ )
+ tm.assert_frame_equal(rs, xp)
+
+ def test_reset_index_multiindex_nan(self):
+ # GH#6322, testing reset_index on MultiIndexes
+ # when we have a nan or all nan
+ df = DataFrame(
+ {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)}
+ )
+ rs = df.set_index(["A", "B"]).reset_index()
+ tm.assert_frame_equal(rs, df)
+
+ df = DataFrame(
+ {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)}
+ )
+ rs = df.set_index(["A", "B"]).reset_index()
+ tm.assert_frame_equal(rs, df)
+
+ df = DataFrame({"A": ["a", "b", "c"], "B": [0, 1, 2], "C": [np.nan, 1.1, 2.2]})
+ rs = df.set_index(["A", "B"]).reset_index()
+ tm.assert_frame_equal(rs, df)
+
+ df = DataFrame(
+ {
+ "A": ["a", "b", "c"],
+ "B": [np.nan, np.nan, np.nan],
+ "C": np.random.rand(3),
+ }
+ )
+ rs = df.set_index(["A", "B"]).reset_index()
+ tm.assert_frame_equal(rs, df)
+
+ def test_reset_index_with_datetimeindex_cols(self):
+ # GH#5818
+ df = DataFrame(
+ [[1, 2], [3, 4]],
+ columns=date_range("1/1/2013", "1/2/2013"),
+ index=["A", "B"],
+ )
+
+ result = df.reset_index()
+ expected = DataFrame(
+ [["A", 1, 2], ["B", 3, 4]],
+ columns=["index", datetime(2013, 1, 1), datetime(2013, 1, 2)],
+ )
+ tm.assert_frame_equal(result, expected)
+
+ def test_reset_index_range(self):
+ # GH#12071
+ df = DataFrame([[0, 0], [1, 1]], columns=["A", "B"], index=RangeIndex(stop=2))
+ result = df.reset_index()
+ assert isinstance(result.index, RangeIndex)
+ expected = DataFrame(
+ [[0, 0, 0], [1, 1, 1]],
+ columns=["index", "A", "B"],
+ index=RangeIndex(stop=2),
+ )
+ tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
new file mode 100644
index 0000000000000..fe7baebcf0cf7
--- /dev/null
+++ b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -0,0 +1,329 @@
+from collections import OrderedDict
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import DataFrame, Timestamp
+import pandas._testing as tm
+
+
+class TestSelectDtypes:
+ def test_select_dtypes_include_using_list_like(self):
+ df = DataFrame(
+ {
+ "a": list("abc"),
+ "b": list(range(1, 4)),
+ "c": np.arange(3, 6).astype("u1"),
+ "d": np.arange(4.0, 7.0, dtype="float64"),
+ "e": [True, False, True],
+ "f": pd.Categorical(list("abc")),
+ "g": pd.date_range("20130101", periods=3),
+ "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
+ "i": pd.date_range("20130101", periods=3, tz="CET"),
+ "j": pd.period_range("2013-01", periods=3, freq="M"),
+ "k": pd.timedelta_range("1 day", periods=3),
+ }
+ )
+
+ ri = df.select_dtypes(include=[np.number])
+ ei = df[["b", "c", "d", "k"]]
+ tm.assert_frame_equal(ri, ei)
+
+ ri = df.select_dtypes(include=[np.number], exclude=["timedelta"])
+ ei = df[["b", "c", "d"]]
+ tm.assert_frame_equal(ri, ei)
+
+ ri = df.select_dtypes(include=[np.number, "category"], exclude=["timedelta"])
+ ei = df[["b", "c", "d", "f"]]
+ tm.assert_frame_equal(ri, ei)
+
+ ri = df.select_dtypes(include=["datetime"])
+ ei = df[["g"]]
+ tm.assert_frame_equal(ri, ei)
+
+ ri = df.select_dtypes(include=["datetime64"])
+ ei = df[["g"]]
+ tm.assert_frame_equal(ri, ei)
+
+ ri = df.select_dtypes(include=["datetimetz"])
+ ei = df[["h", "i"]]
+ tm.assert_frame_equal(ri, ei)
+
+ with pytest.raises(NotImplementedError, match=r"^$"):
+ df.select_dtypes(include=["period"])
+
+ def test_select_dtypes_exclude_using_list_like(self):
+ df = DataFrame(
+ {
+ "a": list("abc"),
+ "b": list(range(1, 4)),
+ "c": np.arange(3, 6).astype("u1"),
+ "d": np.arange(4.0, 7.0, dtype="float64"),
+ "e": [True, False, True],
+ }
+ )
+ re = df.select_dtypes(exclude=[np.number])
+ ee = df[["a", "e"]]
+ tm.assert_frame_equal(re, ee)
+
+ def test_select_dtypes_exclude_include_using_list_like(self):
+ df = DataFrame(
+ {
+ "a": list("abc"),
+ "b": list(range(1, 4)),
+ "c": np.arange(3, 6).astype("u1"),
+ "d": np.arange(4.0, 7.0, dtype="float64"),
+ "e": [True, False, True],
+ "f": pd.date_range("now", periods=3).values,
+ }
+ )
+ exclude = (np.datetime64,)
+ include = np.bool_, "integer"
+ r = df.select_dtypes(include=include, exclude=exclude)
+ e = df[["b", "c", "e"]]
+ tm.assert_frame_equal(r, e)
+
+ exclude = ("datetime",)
+ include = "bool", "int64", "int32"
+ r = df.select_dtypes(include=include, exclude=exclude)
+ e = df[["b", "e"]]
+ tm.assert_frame_equal(r, e)
+
+ def test_select_dtypes_include_using_scalars(self):
+ df = DataFrame(
+ {
+ "a": list("abc"),
+ "b": list(range(1, 4)),
+ "c": np.arange(3, 6).astype("u1"),
+ "d": np.arange(4.0, 7.0, dtype="float64"),
+ "e": [True, False, True],
+ "f": pd.Categorical(list("abc")),
+ "g": pd.date_range("20130101", periods=3),
+ "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
+ "i": pd.date_range("20130101", periods=3, tz="CET"),
+ "j": pd.period_range("2013-01", periods=3, freq="M"),
+ "k": pd.timedelta_range("1 day", periods=3),
+ }
+ )
+
+ ri = df.select_dtypes(include=np.number)
+ ei = df[["b", "c", "d", "k"]]
+ tm.assert_frame_equal(ri, ei)
+
+ ri = df.select_dtypes(include="datetime")
+ ei = df[["g"]]
+ tm.assert_frame_equal(ri, ei)
+
+ ri = df.select_dtypes(include="datetime64")
+ ei = df[["g"]]
+ tm.assert_frame_equal(ri, ei)
+
+ ri = df.select_dtypes(include="category")
+ ei = df[["f"]]
+ tm.assert_frame_equal(ri, ei)
+
+ with pytest.raises(NotImplementedError, match=r"^$"):
+ df.select_dtypes(include="period")
+
+ def test_select_dtypes_exclude_using_scalars(self):
+ df = DataFrame(
+ {
+ "a": list("abc"),
+ "b": list(range(1, 4)),
+ "c": np.arange(3, 6).astype("u1"),
+ "d": np.arange(4.0, 7.0, dtype="float64"),
+ "e": [True, False, True],
+ "f": pd.Categorical(list("abc")),
+ "g": pd.date_range("20130101", periods=3),
+ "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
+ "i": pd.date_range("20130101", periods=3, tz="CET"),
+ "j": pd.period_range("2013-01", periods=3, freq="M"),
+ "k": pd.timedelta_range("1 day", periods=3),
+ }
+ )
+
+ ri = df.select_dtypes(exclude=np.number)
+ ei = df[["a", "e", "f", "g", "h", "i", "j"]]
+ tm.assert_frame_equal(ri, ei)
+
+ ri = df.select_dtypes(exclude="category")
+ ei = df[["a", "b", "c", "d", "e", "g", "h", "i", "j", "k"]]
+ tm.assert_frame_equal(ri, ei)
+
+ with pytest.raises(NotImplementedError, match=r"^$"):
+ df.select_dtypes(exclude="period")
+
+ def test_select_dtypes_include_exclude_using_scalars(self):
+ df = DataFrame(
+ {
+ "a": list("abc"),
+ "b": list(range(1, 4)),
+ "c": np.arange(3, 6).astype("u1"),
+ "d": np.arange(4.0, 7.0, dtype="float64"),
+ "e": [True, False, True],
+ "f": pd.Categorical(list("abc")),
+ "g": pd.date_range("20130101", periods=3),
+ "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
+ "i": pd.date_range("20130101", periods=3, tz="CET"),
+ "j": pd.period_range("2013-01", periods=3, freq="M"),
+ "k": pd.timedelta_range("1 day", periods=3),
+ }
+ )
+
+ ri = df.select_dtypes(include=np.number, exclude="floating")
+ ei = df[["b", "c", "k"]]
+ tm.assert_frame_equal(ri, ei)
+
+ def test_select_dtypes_include_exclude_mixed_scalars_lists(self):
+ df = DataFrame(
+ {
+ "a": list("abc"),
+ "b": list(range(1, 4)),
+ "c": np.arange(3, 6).astype("u1"),
+ "d": np.arange(4.0, 7.0, dtype="float64"),
+ "e": [True, False, True],
+ "f": pd.Categorical(list("abc")),
+ "g": pd.date_range("20130101", periods=3),
+ "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
+ "i": pd.date_range("20130101", periods=3, tz="CET"),
+ "j": pd.period_range("2013-01", periods=3, freq="M"),
+ "k": pd.timedelta_range("1 day", periods=3),
+ }
+ )
+
+ ri = df.select_dtypes(include=np.number, exclude=["floating", "timedelta"])
+ ei = df[["b", "c"]]
+ tm.assert_frame_equal(ri, ei)
+
+ ri = df.select_dtypes(include=[np.number, "category"], exclude="floating")
+ ei = df[["b", "c", "f", "k"]]
+ tm.assert_frame_equal(ri, ei)
+
+ def test_select_dtypes_duplicate_columns(self):
+ # GH20839
+ odict = OrderedDict
+ df = DataFrame(
+ odict(
+ [
+ ("a", list("abc")),
+ ("b", list(range(1, 4))),
+ ("c", np.arange(3, 6).astype("u1")),
+ ("d", np.arange(4.0, 7.0, dtype="float64")),
+ ("e", [True, False, True]),
+ ("f", pd.date_range("now", periods=3).values),
+ ]
+ )
+ )
+ df.columns = ["a", "a", "b", "b", "b", "c"]
+
+ expected = DataFrame(
+ {"a": list(range(1, 4)), "b": np.arange(3, 6).astype("u1")}
+ )
+
+ result = df.select_dtypes(include=[np.number], exclude=["floating"])
+ tm.assert_frame_equal(result, expected)
+
+ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self):
+ df = DataFrame(
+ {
+ "a": list("abc"),
+ "b": list(range(1, 4)),
+ "c": np.arange(3, 6).astype("u1"),
+ "d": np.arange(4.0, 7.0, dtype="float64"),
+ "e": [True, False, True],
+ "f": pd.date_range("now", periods=3).values,
+ }
+ )
+ df["g"] = df.f.diff()
+ assert not hasattr(np, "u8")
+ r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
+ e = df[["a", "b"]]
+ tm.assert_frame_equal(r, e)
+
+ r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
+ e = df[["a", "b", "g"]]
+ tm.assert_frame_equal(r, e)
+
+ def test_select_dtypes_empty(self):
+ df = DataFrame({"a": list("abc"), "b": list(range(1, 4))})
+ msg = "at least one of include or exclude must be nonempty"
+ with pytest.raises(ValueError, match=msg):
+ df.select_dtypes()
+
+ def test_select_dtypes_bad_datetime64(self):
+ df = DataFrame(
+ {
+ "a": list("abc"),
+ "b": list(range(1, 4)),
+ "c": np.arange(3, 6).astype("u1"),
+ "d": np.arange(4.0, 7.0, dtype="float64"),
+ "e": [True, False, True],
+ "f": pd.date_range("now", periods=3).values,
+ }
+ )
+ with pytest.raises(ValueError, match=".+ is too specific"):
+ df.select_dtypes(include=["datetime64[D]"])
+
+ with pytest.raises(ValueError, match=".+ is too specific"):
+ df.select_dtypes(exclude=["datetime64[as]"])
+
+ def test_select_dtypes_datetime_with_tz(self):
+
+ df2 = DataFrame(
+ dict(
+ A=Timestamp("20130102", tz="US/Eastern"),
+ B=Timestamp("20130603", tz="CET"),
+ ),
+ index=range(5),
+ )
+ df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
+ result = df3.select_dtypes(include=["datetime64[ns]"])
+ expected = df3.reindex(columns=[])
+ tm.assert_frame_equal(result, expected)
+
+ @pytest.mark.parametrize(
+ "dtype", [str, "str", np.string_, "S1", "unicode", np.unicode_, "U1"]
+ )
+ @pytest.mark.parametrize("arg", ["include", "exclude"])
+ def test_select_dtypes_str_raises(self, dtype, arg):
+ df = DataFrame(
+ {
+ "a": list("abc"),
+ "g": list("abc"),
+ "b": list(range(1, 4)),
+ "c": np.arange(3, 6).astype("u1"),
+ "d": np.arange(4.0, 7.0, dtype="float64"),
+ "e": [True, False, True],
+ "f": pd.date_range("now", periods=3).values,
+ }
+ )
+ msg = "string dtypes are not allowed"
+ kwargs = {arg: [dtype]}
+
+ with pytest.raises(TypeError, match=msg):
+ df.select_dtypes(**kwargs)
+
+ def test_select_dtypes_bad_arg_raises(self):
+ df = DataFrame(
+ {
+ "a": list("abc"),
+ "g": list("abc"),
+ "b": list(range(1, 4)),
+ "c": np.arange(3, 6).astype("u1"),
+ "d": np.arange(4.0, 7.0, dtype="float64"),
+ "e": [True, False, True],
+ "f": pd.date_range("now", periods=3).values,
+ }
+ )
+
+ msg = "data type.*not understood"
+ with pytest.raises(TypeError, match=msg):
+ df.select_dtypes(["blargy, blarg, blarg"])
+
+ def test_select_dtypes_typecodes(self):
+ # GH 11990
+ df = tm.makeCustomDataframe(30, 3, data_gen_f=lambda x, y: np.random.random())
+ expected = df
+ FLOAT_TYPES = list(np.typecodes["AllFloat"])
+ tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected)
diff --git a/pandas/tests/frame/methods/test_to_period.py b/pandas/tests/frame/methods/test_to_period.py
new file mode 100644
index 0000000000000..eac78e611b008
--- /dev/null
+++ b/pandas/tests/frame/methods/test_to_period.py
@@ -0,0 +1,36 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, date_range, period_range
+import pandas._testing as tm
+
+
+class TestToPeriod:
+ def test_frame_to_period(self):
+ K = 5
+
+ dr = date_range("1/1/2000", "1/1/2001")
+ pr = period_range("1/1/2000", "1/1/2001")
+ df = DataFrame(np.random.randn(len(dr), K), index=dr)
+ df["mix"] = "a"
+
+ pts = df.to_period()
+ exp = df.copy()
+ exp.index = pr
+ tm.assert_frame_equal(pts, exp)
+
+ pts = df.to_period("M")
+ tm.assert_index_equal(pts.index, exp.index.asfreq("M"))
+
+ df = df.T
+ pts = df.to_period(axis=1)
+ exp = df.copy()
+ exp.columns = pr
+ tm.assert_frame_equal(pts, exp)
+
+ pts = df.to_period("M", axis=1)
+ tm.assert_index_equal(pts.columns, exp.columns.asfreq("M"))
+
+ msg = "No axis named 2 for object type "
+ with pytest.raises(ValueError, match=msg):
+ df.to_period(axis=2)
diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py
new file mode 100644
index 0000000000000..ae7d2827e05a6
--- /dev/null
+++ b/pandas/tests/frame/methods/test_to_timestamp.py
@@ -0,0 +1,103 @@
+from datetime import timedelta
+
+import numpy as np
+import pytest
+
+from pandas import (
+ DataFrame,
+ DatetimeIndex,
+ Timedelta,
+ date_range,
+ period_range,
+ to_datetime,
+)
+import pandas._testing as tm
+
+
+class TestToTimestamp:
+ def test_frame_to_time_stamp(self):
+ K = 5
+ index = period_range(freq="A", start="1/1/2001", end="12/1/2009")
+ df = DataFrame(np.random.randn(len(index), K), index=index)
+ df["mix"] = "a"
+
+ exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC")
+ exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns")
+ result = df.to_timestamp("D", "end")
+ tm.assert_index_equal(result.index, exp_index)
+ tm.assert_numpy_array_equal(result.values, df.values)
+
+ exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN")
+ result = df.to_timestamp("D", "start")
+ tm.assert_index_equal(result.index, exp_index)
+
+ def _get_with_delta(delta, freq="A-DEC"):
+ return date_range(
+ to_datetime("1/1/2001") + delta,
+ to_datetime("12/31/2009") + delta,
+ freq=freq,
+ )
+
+ delta = timedelta(hours=23)
+ result = df.to_timestamp("H", "end")
+ exp_index = _get_with_delta(delta)
+ exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns")
+ tm.assert_index_equal(result.index, exp_index)
+
+ delta = timedelta(hours=23, minutes=59)
+ result = df.to_timestamp("T", "end")
+ exp_index = _get_with_delta(delta)
+ exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns")
+ tm.assert_index_equal(result.index, exp_index)
+
+ result = df.to_timestamp("S", "end")
+ delta = timedelta(hours=23, minutes=59, seconds=59)
+ exp_index = _get_with_delta(delta)
+ exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns")
+ tm.assert_index_equal(result.index, exp_index)
+
+ # columns
+ df = df.T
+
+ exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC")
+ exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns")
+ result = df.to_timestamp("D", "end", axis=1)
+ tm.assert_index_equal(result.columns, exp_index)
+ tm.assert_numpy_array_equal(result.values, df.values)
+
+ exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN")
+ result = df.to_timestamp("D", "start", axis=1)
+ tm.assert_index_equal(result.columns, exp_index)
+
+ delta = timedelta(hours=23)
+ result = df.to_timestamp("H", "end", axis=1)
+ exp_index = _get_with_delta(delta)
+ exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns")
+ tm.assert_index_equal(result.columns, exp_index)
+
+ delta = timedelta(hours=23, minutes=59)
+ result = df.to_timestamp("T", "end", axis=1)
+ exp_index = _get_with_delta(delta)
+ exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns")
+ tm.assert_index_equal(result.columns, exp_index)
+
+ result = df.to_timestamp("S", "end", axis=1)
+ delta = timedelta(hours=23, minutes=59, seconds=59)
+ exp_index = _get_with_delta(delta)
+ exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns")
+ tm.assert_index_equal(result.columns, exp_index)
+
+ # invalid axis
+ with pytest.raises(ValueError, match="axis"):
+ df.to_timestamp(axis=2)
+
+ result1 = df.to_timestamp("5t", axis=1)
+ result2 = df.to_timestamp("t", axis=1)
+ expected = date_range("2001-01-01", "2009-01-01", freq="AS")
+ assert isinstance(result1.columns, DatetimeIndex)
+ assert isinstance(result2.columns, DatetimeIndex)
+ tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8)
+ tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8)
+ # PeriodIndex.to_timestamp always use 'infer'
+ assert result1.columns.freqstr == "AS-JAN"
+ assert result2.columns.freqstr == "AS-JAN"
diff --git a/pandas/tests/frame/methods/test_tz_convert.py b/pandas/tests/frame/methods/test_tz_convert.py
new file mode 100644
index 0000000000000..ea8c4b88538d4
--- /dev/null
+++ b/pandas/tests/frame/methods/test_tz_convert.py
@@ -0,0 +1,84 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, Index, MultiIndex, date_range
+import pandas._testing as tm
+
+
+class TestTZConvert:
+ def test_frame_tz_convert(self):
+ rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern")
+
+ df = DataFrame({"a": 1}, index=rng)
+ result = df.tz_convert("Europe/Berlin")
+ expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin"))
+ assert result.index.tz.zone == "Europe/Berlin"
+ tm.assert_frame_equal(result, expected)
+
+ df = df.T
+ result = df.tz_convert("Europe/Berlin", axis=1)
+ assert result.columns.tz.zone == "Europe/Berlin"
+ tm.assert_frame_equal(result, expected.T)
+
+ @pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"])
+ def test_tz_convert_and_localize(self, fn):
+ l0 = date_range("20140701", periods=5, freq="D")
+ l1 = date_range("20140701", periods=5, freq="D")
+
+ int_idx = Index(range(5))
+
+ if fn == "tz_convert":
+ l0 = l0.tz_localize("UTC")
+ l1 = l1.tz_localize("UTC")
+
+ for idx in [l0, l1]:
+
+ l0_expected = getattr(idx, fn)("US/Pacific")
+ l1_expected = getattr(idx, fn)("US/Pacific")
+
+ df1 = DataFrame(np.ones(5), index=l0)
+ df1 = getattr(df1, fn)("US/Pacific")
+ tm.assert_index_equal(df1.index, l0_expected)
+
+ # MultiIndex
+ # GH7846
+ df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1]))
+
+ df3 = getattr(df2, fn)("US/Pacific", level=0)
+ assert not df3.index.levels[0].equals(l0)
+ tm.assert_index_equal(df3.index.levels[0], l0_expected)
+ tm.assert_index_equal(df3.index.levels[1], l1)
+ assert not df3.index.levels[1].equals(l1_expected)
+
+ df3 = getattr(df2, fn)("US/Pacific", level=1)
+ tm.assert_index_equal(df3.index.levels[0], l0)
+ assert not df3.index.levels[0].equals(l0_expected)
+ tm.assert_index_equal(df3.index.levels[1], l1_expected)
+ assert not df3.index.levels[1].equals(l1)
+
+ df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0]))
+
+ # TODO: untested
+ df5 = getattr(df4, fn)("US/Pacific", level=1) # noqa
+
+ tm.assert_index_equal(df3.index.levels[0], l0)
+ assert not df3.index.levels[0].equals(l0_expected)
+ tm.assert_index_equal(df3.index.levels[1], l1_expected)
+ assert not df3.index.levels[1].equals(l1)
+
+ # Bad Inputs
+
+ # Not DatetimeIndex / PeriodIndex
+ with pytest.raises(TypeError, match="DatetimeIndex"):
+ df = DataFrame(index=int_idx)
+ df = getattr(df, fn)("US/Pacific")
+
+ # Not DatetimeIndex / PeriodIndex
+ with pytest.raises(TypeError, match="DatetimeIndex"):
+ df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0]))
+ df = getattr(df, fn)("US/Pacific", level=0)
+
+ # Invalid level
+ with pytest.raises(ValueError, match="not valid"):
+ df = DataFrame(index=l0)
+ df = getattr(df, fn)("US/Pacific", level=1)
diff --git a/pandas/tests/frame/methods/test_tz_localize.py b/pandas/tests/frame/methods/test_tz_localize.py
new file mode 100644
index 0000000000000..1d4e26a6999b7
--- /dev/null
+++ b/pandas/tests/frame/methods/test_tz_localize.py
@@ -0,0 +1,21 @@
+from pandas import DataFrame, date_range
+import pandas._testing as tm
+
+
+class TestTZLocalize:
+ # See also:
+ # test_tz_convert_and_localize in test_tz_convert
+
+ def test_frame_tz_localize(self):
+ rng = date_range("1/1/2011", periods=100, freq="H")
+
+ df = DataFrame({"a": 1}, index=rng)
+ result = df.tz_localize("utc")
+ expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))
+ assert result.index.tz.zone == "UTC"
+ tm.assert_frame_equal(result, expected)
+
+ df = df.T
+ result = df.tz_localize("utc", axis=1)
+ assert result.columns.tz.zone == "UTC"
+ tm.assert_frame_equal(result, expected.T)
diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py
new file mode 100644
index 0000000000000..c409b0bbe6fa9
--- /dev/null
+++ b/pandas/tests/frame/methods/test_value_counts.py
@@ -0,0 +1,102 @@
+import numpy as np
+
+import pandas as pd
+import pandas._testing as tm
+
+
+def test_data_frame_value_counts_unsorted():
+ df = pd.DataFrame(
+ {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+ index=["falcon", "dog", "cat", "ant"],
+ )
+
+ result = df.value_counts(sort=False)
+ expected = pd.Series(
+ data=[1, 2, 1],
+ index=pd.MultiIndex.from_arrays(
+ [(2, 4, 6), (2, 0, 0)], names=["num_legs", "num_wings"]
+ ),
+ )
+
+ tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_ascending():
+ df = pd.DataFrame(
+ {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+ index=["falcon", "dog", "cat", "ant"],
+ )
+
+ result = df.value_counts(ascending=True)
+ expected = pd.Series(
+ data=[1, 1, 2],
+ index=pd.MultiIndex.from_arrays(
+ [(2, 6, 4), (2, 0, 0)], names=["num_legs", "num_wings"]
+ ),
+ )
+
+ tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_default():
+ df = pd.DataFrame(
+ {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+ index=["falcon", "dog", "cat", "ant"],
+ )
+
+ result = df.value_counts()
+ expected = pd.Series(
+ data=[2, 1, 1],
+ index=pd.MultiIndex.from_arrays(
+ [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"]
+ ),
+ )
+
+ tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_normalize():
+ df = pd.DataFrame(
+ {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+ index=["falcon", "dog", "cat", "ant"],
+ )
+
+ result = df.value_counts(normalize=True)
+ expected = pd.Series(
+ data=[0.5, 0.25, 0.25],
+ index=pd.MultiIndex.from_arrays(
+ [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"]
+ ),
+ )
+
+ tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_single_col_default():
+ df = pd.DataFrame({"num_legs": [2, 4, 4, 6]})
+
+ result = df.value_counts()
+ expected = pd.Series(
+ data=[2, 1, 1],
+ index=pd.MultiIndex.from_arrays([[4, 6, 2]], names=["num_legs"]),
+ )
+
+ tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_empty():
+ df_no_cols = pd.DataFrame()
+
+ result = df_no_cols.value_counts()
+ expected = pd.Series([], dtype=np.int64)
+
+ tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_empty_normalize():
+ df_no_cols = pd.DataFrame()
+
+ result = df_no_cols.value_counts(normalize=True)
+ expected = pd.Series([], dtype=np.float64)
+
+ tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 0c19a38bb5fa2..751ed1dfdd847 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -1,4 +1,3 @@
-from collections import ChainMap
from datetime import datetime, timedelta
import inspect
@@ -18,7 +17,6 @@
Index,
IntervalIndex,
MultiIndex,
- RangeIndex,
Series,
Timestamp,
cut,
@@ -533,30 +531,6 @@ def test_convert_dti_to_series(self):
df.pop("ts")
tm.assert_frame_equal(df, expected)
- def test_reset_index_tz(self, tz_aware_fixture):
- # GH 3950
- # reset_index with single level
- tz = tz_aware_fixture
- idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx")
- df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx)
-
- expected = DataFrame(
- {
- "idx": [
- datetime(2011, 1, 1),
- datetime(2011, 1, 2),
- datetime(2011, 1, 3),
- datetime(2011, 1, 4),
- datetime(2011, 1, 5),
- ],
- "a": range(5),
- "b": ["A", "B", "C", "D", "E"],
- },
- columns=["idx", "a", "b"],
- )
- expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz))
- tm.assert_frame_equal(df.reset_index(), expected)
-
def test_set_index_timezone(self):
# GH 12358
# tz-aware Series should retain the tz
@@ -583,17 +557,6 @@ def test_set_index_dst(self):
exp = DataFrame({"b": [3, 4, 5]}, index=exp_index)
tm.assert_frame_equal(res, exp)
- def test_reset_index_with_intervals(self):
- idx = IntervalIndex.from_breaks(np.arange(11), name="x")
- original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]]
-
- result = original.set_index("x")
- expected = DataFrame({"y": np.arange(10)}, index=idx)
- tm.assert_frame_equal(result, expected)
-
- result2 = result.reset_index()
- tm.assert_frame_equal(result2, original)
-
def test_set_index_multiindexcolumns(self):
columns = MultiIndex.from_tuples([("foo", 1), ("foo", 2), ("bar", 1)])
df = DataFrame(np.random.randn(3, 3), columns=columns)
@@ -652,65 +615,6 @@ def test_dti_set_index_reindex(self):
# Renaming
- def test_rename(self, float_frame):
- mapping = {"A": "a", "B": "b", "C": "c", "D": "d"}
-
- renamed = float_frame.rename(columns=mapping)
- renamed2 = float_frame.rename(columns=str.lower)
-
- tm.assert_frame_equal(renamed, renamed2)
- tm.assert_frame_equal(
- renamed2.rename(columns=str.upper), float_frame, check_names=False
- )
-
- # index
- data = {"A": {"foo": 0, "bar": 1}}
-
- # gets sorted alphabetical
- df = DataFrame(data)
- renamed = df.rename(index={"foo": "bar", "bar": "foo"})
- tm.assert_index_equal(renamed.index, Index(["foo", "bar"]))
-
- renamed = df.rename(index=str.upper)
- tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"]))
-
- # have to pass something
- with pytest.raises(TypeError, match="must pass an index to rename"):
- float_frame.rename()
-
- # partial columns
- renamed = float_frame.rename(columns={"C": "foo", "D": "bar"})
- tm.assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"]))
-
- # other axis
- renamed = float_frame.T.rename(index={"C": "foo", "D": "bar"})
- tm.assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"]))
-
- # index with name
- index = Index(["foo", "bar"], name="name")
- renamer = DataFrame(data, index=index)
- renamed = renamer.rename(index={"foo": "bar", "bar": "foo"})
- tm.assert_index_equal(renamed.index, Index(["bar", "foo"], name="name"))
- assert renamed.index.name == renamer.index.name
-
- @pytest.mark.parametrize(
- "args,kwargs",
- [
- ((ChainMap({"A": "a"}, {"B": "b"}),), dict(axis="columns")),
- ((), dict(columns=ChainMap({"A": "a"}, {"B": "b"}))),
- ],
- )
- def test_rename_chainmap(self, args, kwargs):
- # see gh-23859
- colAData = range(1, 11)
- colBdata = np.random.randn(10)
-
- df = DataFrame({"A": colAData, "B": colBdata})
- result = df.rename(*args, **kwargs)
-
- expected = DataFrame({"a": colAData, "b": colBdata})
- tm.assert_frame_equal(result, expected)
-
def test_rename_axis_inplace(self, float_frame):
# GH 15704
expected = float_frame.rename_axis("foo")
@@ -785,168 +689,6 @@ def test_rename_axis_mapper(self):
with pytest.raises(TypeError, match="bogus"):
df.rename_axis(bogus=None)
- @pytest.mark.parametrize(
- "kwargs, rename_index, rename_columns",
- [
- ({"mapper": None, "axis": 0}, True, False),
- ({"mapper": None, "axis": 1}, False, True),
- ({"index": None}, True, False),
- ({"columns": None}, False, True),
- ({"index": None, "columns": None}, True, True),
- ({}, False, False),
- ],
- )
- def test_rename_axis_none(self, kwargs, rename_index, rename_columns):
- # GH 25034
- index = Index(list("abc"), name="foo")
- columns = Index(["col1", "col2"], name="bar")
- data = np.arange(6).reshape(3, 2)
- df = DataFrame(data, index, columns)
-
- result = df.rename_axis(**kwargs)
- expected_index = index.rename(None) if rename_index else index
- expected_columns = columns.rename(None) if rename_columns else columns
- expected = DataFrame(data, expected_index, expected_columns)
- tm.assert_frame_equal(result, expected)
-
- def test_rename_multiindex(self):
-
- tuples_index = [("foo1", "bar1"), ("foo2", "bar2")]
- tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")]
- index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"])
- columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"])
- df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns)
-
- #
- # without specifying level -> across all levels
-
- renamed = df.rename(
- index={"foo1": "foo3", "bar2": "bar3"},
- columns={"fizz1": "fizz3", "buzz2": "buzz3"},
- )
- new_index = MultiIndex.from_tuples(
- [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"]
- )
- new_columns = MultiIndex.from_tuples(
- [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"]
- )
- tm.assert_index_equal(renamed.index, new_index)
- tm.assert_index_equal(renamed.columns, new_columns)
- assert renamed.index.names == df.index.names
- assert renamed.columns.names == df.columns.names
-
- #
- # with specifying a level (GH13766)
-
- # dict
- new_columns = MultiIndex.from_tuples(
- [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"]
- )
- renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0)
- tm.assert_index_equal(renamed.columns, new_columns)
- renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz")
- tm.assert_index_equal(renamed.columns, new_columns)
-
- new_columns = MultiIndex.from_tuples(
- [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"]
- )
- renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1)
- tm.assert_index_equal(renamed.columns, new_columns)
- renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz")
- tm.assert_index_equal(renamed.columns, new_columns)
-
- # function
- func = str.upper
- new_columns = MultiIndex.from_tuples(
- [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"]
- )
- renamed = df.rename(columns=func, level=0)
- tm.assert_index_equal(renamed.columns, new_columns)
- renamed = df.rename(columns=func, level="fizz")
- tm.assert_index_equal(renamed.columns, new_columns)
-
- new_columns = MultiIndex.from_tuples(
- [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"]
- )
- renamed = df.rename(columns=func, level=1)
- tm.assert_index_equal(renamed.columns, new_columns)
- renamed = df.rename(columns=func, level="buzz")
- tm.assert_index_equal(renamed.columns, new_columns)
-
- # index
- new_index = MultiIndex.from_tuples(
- [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"]
- )
- renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
- tm.assert_index_equal(renamed.index, new_index)
-
- def test_rename_nocopy(self, float_frame):
- renamed = float_frame.rename(columns={"C": "foo"}, copy=False)
- renamed["foo"] = 1.0
- assert (float_frame["C"] == 1.0).all()
-
- def test_rename_inplace(self, float_frame):
- float_frame.rename(columns={"C": "foo"})
- assert "C" in float_frame
- assert "foo" not in float_frame
-
- c_id = id(float_frame["C"])
- float_frame = float_frame.copy()
- float_frame.rename(columns={"C": "foo"}, inplace=True)
-
- assert "C" not in float_frame
- assert "foo" in float_frame
- assert id(float_frame["foo"]) != c_id
-
- def test_rename_bug(self):
- # GH 5344
- # rename set ref_locs, and set_index was not resetting
- df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]})
- df = df.rename(columns={0: "a"})
- df = df.rename(columns={1: "b"})
- df = df.set_index(["a", "b"])
- df.columns = ["2001-01-01"]
- expected = DataFrame(
- [[1], [2]],
- index=MultiIndex.from_tuples(
- [("foo", "bah"), ("bar", "bas")], names=["a", "b"]
- ),
- columns=["2001-01-01"],
- )
- tm.assert_frame_equal(df, expected)
-
- def test_rename_bug2(self):
- # GH 19497
- # rename was changing Index to MultiIndex if Index contained tuples
-
- df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"])
- df = df.rename({(1, 1): (5, 4)}, axis="index")
- expected = DataFrame(
- data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"]
- )
- tm.assert_frame_equal(df, expected)
-
- def test_rename_errors_raises(self):
- df = DataFrame(columns=["A", "B", "C", "D"])
- with pytest.raises(KeyError, match="'E'] not found in axis"):
- df.rename(columns={"A": "a", "E": "e"}, errors="raise")
-
- @pytest.mark.parametrize(
- "mapper, errors, expected_columns",
- [
- ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]),
- ({"A": "a"}, "raise", ["a", "B", "C", "D"]),
- (str.lower, "raise", ["a", "b", "c", "d"]),
- ],
- )
- def test_rename_errors(self, mapper, errors, expected_columns):
- # GH 13473
- # rename now works with errors parameter
- df = DataFrame(columns=["A", "B", "C", "D"])
- result = df.rename(columns=mapper, errors=errors)
- expected = DataFrame(columns=expected_columns)
- tm.assert_frame_equal(result, expected)
-
def test_reorder_levels(self):
index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
@@ -985,253 +727,6 @@ def test_reorder_levels(self):
result = df.reorder_levels(["L0", "L0", "L0"])
tm.assert_frame_equal(result, expected)
- def test_reset_index(self, float_frame):
- stacked = float_frame.stack()[::2]
- stacked = DataFrame({"foo": stacked, "bar": stacked})
-
- names = ["first", "second"]
- stacked.index.names = names
- deleveled = stacked.reset_index()
- for i, (lev, level_codes) in enumerate(
- zip(stacked.index.levels, stacked.index.codes)
- ):
- values = lev.take(level_codes)
- name = names[i]
- tm.assert_index_equal(values, Index(deleveled[name]))
-
- stacked.index.names = [None, None]
- deleveled2 = stacked.reset_index()
- tm.assert_series_equal(
- deleveled["first"], deleveled2["level_0"], check_names=False
- )
- tm.assert_series_equal(
- deleveled["second"], deleveled2["level_1"], check_names=False
- )
-
- # default name assigned
- rdf = float_frame.reset_index()
- exp = Series(float_frame.index.values, name="index")
- tm.assert_series_equal(rdf["index"], exp)
-
- # default name assigned, corner case
- df = float_frame.copy()
- df["index"] = "foo"
- rdf = df.reset_index()
- exp = Series(float_frame.index.values, name="level_0")
- tm.assert_series_equal(rdf["level_0"], exp)
-
- # but this is ok
- float_frame.index.name = "index"
- deleveled = float_frame.reset_index()
- tm.assert_series_equal(deleveled["index"], Series(float_frame.index))
- tm.assert_index_equal(deleveled.index, Index(np.arange(len(deleveled))))
-
- # preserve column names
- float_frame.columns.name = "columns"
- resetted = float_frame.reset_index()
- assert resetted.columns.name == "columns"
-
- # only remove certain columns
- df = float_frame.reset_index().set_index(["index", "A", "B"])
- rs = df.reset_index(["A", "B"])
-
- # TODO should reset_index check_names ?
- tm.assert_frame_equal(rs, float_frame, check_names=False)
-
- rs = df.reset_index(["index", "A", "B"])
- tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False)
-
- rs = df.reset_index(["index", "A", "B"])
- tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False)
-
- rs = df.reset_index("A")
- xp = float_frame.reset_index().set_index(["index", "B"])
- tm.assert_frame_equal(rs, xp, check_names=False)
-
- # test resetting in place
- df = float_frame.copy()
- resetted = float_frame.reset_index()
- df.reset_index(inplace=True)
- tm.assert_frame_equal(df, resetted, check_names=False)
-
- df = float_frame.reset_index().set_index(["index", "A", "B"])
- rs = df.reset_index("A", drop=True)
- xp = float_frame.copy()
- del xp["A"]
- xp = xp.set_index(["B"], append=True)
- tm.assert_frame_equal(rs, xp, check_names=False)
-
- def test_reset_index_name(self):
- df = DataFrame(
- [[1, 2, 3, 4], [5, 6, 7, 8]],
- columns=["A", "B", "C", "D"],
- index=Index(range(2), name="x"),
- )
- assert df.reset_index().index.name is None
- assert df.reset_index(drop=True).index.name is None
- df.reset_index(inplace=True)
- assert df.index.name is None
-
- def test_reset_index_level(self):
- df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"])
-
- for levels in ["A", "B"], [0, 1]:
- # With MultiIndex
- result = df.set_index(["A", "B"]).reset_index(level=levels[0])
- tm.assert_frame_equal(result, df.set_index("B"))
-
- result = df.set_index(["A", "B"]).reset_index(level=levels[:1])
- tm.assert_frame_equal(result, df.set_index("B"))
-
- result = df.set_index(["A", "B"]).reset_index(level=levels)
- tm.assert_frame_equal(result, df)
-
- result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
- tm.assert_frame_equal(result, df[["C", "D"]])
-
- # With single-level Index (GH 16263)
- result = df.set_index("A").reset_index(level=levels[0])
- tm.assert_frame_equal(result, df)
-
- result = df.set_index("A").reset_index(level=levels[:1])
- tm.assert_frame_equal(result, df)
-
- result = df.set_index(["A"]).reset_index(level=levels[0], drop=True)
- tm.assert_frame_equal(result, df[["B", "C", "D"]])
-
- # Missing levels - for both MultiIndex and single-level Index:
- for idx_lev in ["A", "B"], ["A"]:
- with pytest.raises(KeyError, match=r"(L|l)evel \(?E\)?"):
- df.set_index(idx_lev).reset_index(level=["A", "E"])
- with pytest.raises(IndexError, match="Too many levels"):
- df.set_index(idx_lev).reset_index(level=[0, 1, 2])
-
- def test_reset_index_right_dtype(self):
- time = np.arange(0.0, 10, np.sqrt(2) / 2)
- s1 = Series(
- (9.81 * time ** 2) / 2, index=Index(time, name="time"), name="speed"
- )
- df = DataFrame(s1)
-
- resetted = s1.reset_index()
- assert resetted["time"].dtype == np.float64
-
- resetted = df.reset_index()
- assert resetted["time"].dtype == np.float64
-
- def test_reset_index_multiindex_col(self):
- vals = np.random.randn(3, 3).astype(object)
- idx = ["x", "y", "z"]
- full = np.hstack(([[x] for x in idx], vals))
- df = DataFrame(
- vals,
- Index(idx, name="a"),
- columns=[["b", "b", "c"], ["mean", "median", "mean"]],
- )
- rs = df.reset_index()
- xp = DataFrame(
- full, columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]]
- )
- tm.assert_frame_equal(rs, xp)
-
- rs = df.reset_index(col_fill=None)
- xp = DataFrame(
- full, columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]]
- )
- tm.assert_frame_equal(rs, xp)
-
- rs = df.reset_index(col_level=1, col_fill="blah")
- xp = DataFrame(
- full, columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]]
- )
- tm.assert_frame_equal(rs, xp)
-
- df = DataFrame(
- vals,
- MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]),
- columns=[["b", "b", "c"], ["mean", "median", "mean"]],
- )
- rs = df.reset_index("a")
- xp = DataFrame(
- full,
- Index([0, 1, 2], name="d"),
- columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]],
- )
- tm.assert_frame_equal(rs, xp)
-
- rs = df.reset_index("a", col_fill=None)
- xp = DataFrame(
- full,
- Index(range(3), name="d"),
- columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]],
- )
- tm.assert_frame_equal(rs, xp)
-
- rs = df.reset_index("a", col_fill="blah", col_level=1)
- xp = DataFrame(
- full,
- Index(range(3), name="d"),
- columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]],
- )
- tm.assert_frame_equal(rs, xp)
-
- def test_reset_index_multiindex_nan(self):
- # GH6322, testing reset_index on MultiIndexes
- # when we have a nan or all nan
- df = DataFrame(
- {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)}
- )
- rs = df.set_index(["A", "B"]).reset_index()
- tm.assert_frame_equal(rs, df)
-
- df = DataFrame(
- {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)}
- )
- rs = df.set_index(["A", "B"]).reset_index()
- tm.assert_frame_equal(rs, df)
-
- df = DataFrame({"A": ["a", "b", "c"], "B": [0, 1, 2], "C": [np.nan, 1.1, 2.2]})
- rs = df.set_index(["A", "B"]).reset_index()
- tm.assert_frame_equal(rs, df)
-
- df = DataFrame(
- {
- "A": ["a", "b", "c"],
- "B": [np.nan, np.nan, np.nan],
- "C": np.random.rand(3),
- }
- )
- rs = df.set_index(["A", "B"]).reset_index()
- tm.assert_frame_equal(rs, df)
-
- def test_reset_index_with_datetimeindex_cols(self):
- # GH5818
- #
- df = DataFrame(
- [[1, 2], [3, 4]],
- columns=date_range("1/1/2013", "1/2/2013"),
- index=["A", "B"],
- )
-
- result = df.reset_index()
- expected = DataFrame(
- [["A", 1, 2], ["B", 3, 4]],
- columns=["index", datetime(2013, 1, 1), datetime(2013, 1, 2)],
- )
- tm.assert_frame_equal(result, expected)
-
- def test_reset_index_range(self):
- # GH 12071
- df = DataFrame([[0, 0], [1, 1]], columns=["A", "B"], index=RangeIndex(stop=2))
- result = df.reset_index()
- assert isinstance(result.index, RangeIndex)
- expected = DataFrame(
- [[0, 0, 0], [1, 1, 1]],
- columns=["index", "A", "B"],
- index=RangeIndex(stop=2),
- )
- tm.assert_frame_equal(result, expected)
-
def test_set_index_names(self):
df = tm.makeDataFrame()
df.index.name = "name"
@@ -1262,92 +757,6 @@ def test_set_index_names(self):
# Check equality
tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2)
- def test_rename_objects(self, float_string_frame):
- renamed = float_string_frame.rename(columns=str.upper)
-
- assert "FOO" in renamed
- assert "foo" not in renamed
-
- def test_rename_axis_style(self):
- # https://github.com/pandas-dev/pandas/issues/12392
- df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"])
- expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"])
-
- result = df.rename(str.lower, axis=1)
- tm.assert_frame_equal(result, expected)
-
- result = df.rename(str.lower, axis="columns")
- tm.assert_frame_equal(result, expected)
-
- result = df.rename({"A": "a", "B": "b"}, axis=1)
- tm.assert_frame_equal(result, expected)
-
- result = df.rename({"A": "a", "B": "b"}, axis="columns")
- tm.assert_frame_equal(result, expected)
-
- # Index
- expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
- result = df.rename(str.lower, axis=0)
- tm.assert_frame_equal(result, expected)
-
- result = df.rename(str.lower, axis="index")
- tm.assert_frame_equal(result, expected)
-
- result = df.rename({"X": "x", "Y": "y"}, axis=0)
- tm.assert_frame_equal(result, expected)
-
- result = df.rename({"X": "x", "Y": "y"}, axis="index")
- tm.assert_frame_equal(result, expected)
-
- result = df.rename(mapper=str.lower, axis="index")
- tm.assert_frame_equal(result, expected)
-
- def test_rename_mapper_multi(self):
- df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index(
- ["A", "B"]
- )
- result = df.rename(str.upper)
- expected = df.rename(index=str.upper)
- tm.assert_frame_equal(result, expected)
-
- def test_rename_positional_named(self):
- # https://github.com/pandas-dev/pandas/issues/12392
- df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"])
- result = df.rename(index=str.lower, columns=str.upper)
- expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
- tm.assert_frame_equal(result, expected)
-
- def test_rename_axis_style_raises(self):
- # see gh-12392
- df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"])
-
- # Named target and axis
- over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(index=str.lower, axis=1)
-
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(index=str.lower, axis="columns")
-
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(columns=str.lower, axis="columns")
-
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(index=str.lower, axis=0)
-
- # Multiple targets and axis
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(str.lower, index=str.lower, axis="columns")
-
- # Too many targets
- over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'"
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(str.lower, index=str.lower, columns=str.lower)
-
- # Duplicates
- with pytest.raises(TypeError, match="multiple values"):
- df.rename(id, mapper=id)
-
def test_reindex_api_equivalence(self):
# equivalence of the labels/axis and index/columns API's
df = DataFrame(
@@ -1376,43 +785,6 @@ def test_reindex_api_equivalence(self):
for res in [res2, res3]:
tm.assert_frame_equal(res1, res)
- def test_rename_positional_raises(self):
- # GH 29136
- df = DataFrame(columns=["A", "B"])
- msg = r"rename\(\) takes from 1 to 2 positional arguments"
-
- with pytest.raises(TypeError, match=msg):
- df.rename(None, str.lower)
-
- def test_rename_no_mappings_raises(self):
- # GH 29136
- df = DataFrame([[1]])
- msg = "must pass an index to rename"
- with pytest.raises(TypeError, match=msg):
- df.rename()
-
- with pytest.raises(TypeError, match=msg):
- df.rename(None, index=None)
-
- with pytest.raises(TypeError, match=msg):
- df.rename(None, columns=None)
-
- with pytest.raises(TypeError, match=msg):
- df.rename(None, columns=None, index=None)
-
- def test_rename_mapper_and_positional_arguments_raises(self):
- # GH 29136
- df = DataFrame([[1]])
- msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'"
- with pytest.raises(TypeError, match=msg):
- df.rename({}, index={})
-
- with pytest.raises(TypeError, match=msg):
- df.rename({}, columns={})
-
- with pytest.raises(TypeError, match=msg):
- df.rename({}, columns={}, index={})
-
def test_assign_columns(self, float_frame):
float_frame["hi"] = "there"
diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py
index 36a476d195fe5..321eb5fe94daf 100644
--- a/pandas/tests/frame/test_combine_concat.py
+++ b/pandas/tests/frame/test_combine_concat.py
@@ -21,27 +21,6 @@ def test_concat_multiple_frames_dtypes(self):
)
tm.assert_series_equal(results, expected)
- @pytest.mark.parametrize(
- "data",
- [
- pd.date_range("2000", periods=4),
- pd.date_range("2000", periods=4, tz="US/Central"),
- pd.period_range("2000", periods=4),
- pd.timedelta_range(0, periods=4),
- ],
- )
- def test_combine_datetlike_udf(self, data):
- # https://github.com/pandas-dev/pandas/issues/23079
- df = pd.DataFrame({"A": data})
- other = df.copy()
- df.iloc[1, 0] = None
-
- def combiner(a, b):
- return b
-
- result = df.combine(other, combiner)
- tm.assert_frame_equal(result, other)
-
def test_concat_multiple_tzs(self):
# GH 12467
# combining datetime tz-aware and naive DataFrames
diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py
index 8b63f0614eebf..713d8f3ceeedb 100644
--- a/pandas/tests/frame/test_dtypes.py
+++ b/pandas/tests/frame/test_dtypes.py
@@ -111,325 +111,6 @@ def test_dtypes_are_correct_after_column_slice(self):
pd.Series(odict([("a", np.float_), ("b", np.float_), ("c", np.float_)])),
)
- def test_select_dtypes_include_using_list_like(self):
- df = DataFrame(
- {
- "a": list("abc"),
- "b": list(range(1, 4)),
- "c": np.arange(3, 6).astype("u1"),
- "d": np.arange(4.0, 7.0, dtype="float64"),
- "e": [True, False, True],
- "f": pd.Categorical(list("abc")),
- "g": pd.date_range("20130101", periods=3),
- "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
- "i": pd.date_range("20130101", periods=3, tz="CET"),
- "j": pd.period_range("2013-01", periods=3, freq="M"),
- "k": pd.timedelta_range("1 day", periods=3),
- }
- )
-
- ri = df.select_dtypes(include=[np.number])
- ei = df[["b", "c", "d", "k"]]
- tm.assert_frame_equal(ri, ei)
-
- ri = df.select_dtypes(include=[np.number], exclude=["timedelta"])
- ei = df[["b", "c", "d"]]
- tm.assert_frame_equal(ri, ei)
-
- ri = df.select_dtypes(include=[np.number, "category"], exclude=["timedelta"])
- ei = df[["b", "c", "d", "f"]]
- tm.assert_frame_equal(ri, ei)
-
- ri = df.select_dtypes(include=["datetime"])
- ei = df[["g"]]
- tm.assert_frame_equal(ri, ei)
-
- ri = df.select_dtypes(include=["datetime64"])
- ei = df[["g"]]
- tm.assert_frame_equal(ri, ei)
-
- ri = df.select_dtypes(include=["datetimetz"])
- ei = df[["h", "i"]]
- tm.assert_frame_equal(ri, ei)
-
- with pytest.raises(NotImplementedError, match=r"^$"):
- df.select_dtypes(include=["period"])
-
- def test_select_dtypes_exclude_using_list_like(self):
- df = DataFrame(
- {
- "a": list("abc"),
- "b": list(range(1, 4)),
- "c": np.arange(3, 6).astype("u1"),
- "d": np.arange(4.0, 7.0, dtype="float64"),
- "e": [True, False, True],
- }
- )
- re = df.select_dtypes(exclude=[np.number])
- ee = df[["a", "e"]]
- tm.assert_frame_equal(re, ee)
-
- def test_select_dtypes_exclude_include_using_list_like(self):
- df = DataFrame(
- {
- "a": list("abc"),
- "b": list(range(1, 4)),
- "c": np.arange(3, 6).astype("u1"),
- "d": np.arange(4.0, 7.0, dtype="float64"),
- "e": [True, False, True],
- "f": pd.date_range("now", periods=3).values,
- }
- )
- exclude = (np.datetime64,)
- include = np.bool_, "integer"
- r = df.select_dtypes(include=include, exclude=exclude)
- e = df[["b", "c", "e"]]
- tm.assert_frame_equal(r, e)
-
- exclude = ("datetime",)
- include = "bool", "int64", "int32"
- r = df.select_dtypes(include=include, exclude=exclude)
- e = df[["b", "e"]]
- tm.assert_frame_equal(r, e)
-
- def test_select_dtypes_include_using_scalars(self):
- df = DataFrame(
- {
- "a": list("abc"),
- "b": list(range(1, 4)),
- "c": np.arange(3, 6).astype("u1"),
- "d": np.arange(4.0, 7.0, dtype="float64"),
- "e": [True, False, True],
- "f": pd.Categorical(list("abc")),
- "g": pd.date_range("20130101", periods=3),
- "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
- "i": pd.date_range("20130101", periods=3, tz="CET"),
- "j": pd.period_range("2013-01", periods=3, freq="M"),
- "k": pd.timedelta_range("1 day", periods=3),
- }
- )
-
- ri = df.select_dtypes(include=np.number)
- ei = df[["b", "c", "d", "k"]]
- tm.assert_frame_equal(ri, ei)
-
- ri = df.select_dtypes(include="datetime")
- ei = df[["g"]]
- tm.assert_frame_equal(ri, ei)
-
- ri = df.select_dtypes(include="datetime64")
- ei = df[["g"]]
- tm.assert_frame_equal(ri, ei)
-
- ri = df.select_dtypes(include="category")
- ei = df[["f"]]
- tm.assert_frame_equal(ri, ei)
-
- with pytest.raises(NotImplementedError, match=r"^$"):
- df.select_dtypes(include="period")
-
- def test_select_dtypes_exclude_using_scalars(self):
- df = DataFrame(
- {
- "a": list("abc"),
- "b": list(range(1, 4)),
- "c": np.arange(3, 6).astype("u1"),
- "d": np.arange(4.0, 7.0, dtype="float64"),
- "e": [True, False, True],
- "f": pd.Categorical(list("abc")),
- "g": pd.date_range("20130101", periods=3),
- "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
- "i": pd.date_range("20130101", periods=3, tz="CET"),
- "j": pd.period_range("2013-01", periods=3, freq="M"),
- "k": pd.timedelta_range("1 day", periods=3),
- }
- )
-
- ri = df.select_dtypes(exclude=np.number)
- ei = df[["a", "e", "f", "g", "h", "i", "j"]]
- tm.assert_frame_equal(ri, ei)
-
- ri = df.select_dtypes(exclude="category")
- ei = df[["a", "b", "c", "d", "e", "g", "h", "i", "j", "k"]]
- tm.assert_frame_equal(ri, ei)
-
- with pytest.raises(NotImplementedError, match=r"^$"):
- df.select_dtypes(exclude="period")
-
- def test_select_dtypes_include_exclude_using_scalars(self):
- df = DataFrame(
- {
- "a": list("abc"),
- "b": list(range(1, 4)),
- "c": np.arange(3, 6).astype("u1"),
- "d": np.arange(4.0, 7.0, dtype="float64"),
- "e": [True, False, True],
- "f": pd.Categorical(list("abc")),
- "g": pd.date_range("20130101", periods=3),
- "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
- "i": pd.date_range("20130101", periods=3, tz="CET"),
- "j": pd.period_range("2013-01", periods=3, freq="M"),
- "k": pd.timedelta_range("1 day", periods=3),
- }
- )
-
- ri = df.select_dtypes(include=np.number, exclude="floating")
- ei = df[["b", "c", "k"]]
- tm.assert_frame_equal(ri, ei)
-
- def test_select_dtypes_include_exclude_mixed_scalars_lists(self):
- df = DataFrame(
- {
- "a": list("abc"),
- "b": list(range(1, 4)),
- "c": np.arange(3, 6).astype("u1"),
- "d": np.arange(4.0, 7.0, dtype="float64"),
- "e": [True, False, True],
- "f": pd.Categorical(list("abc")),
- "g": pd.date_range("20130101", periods=3),
- "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
- "i": pd.date_range("20130101", periods=3, tz="CET"),
- "j": pd.period_range("2013-01", periods=3, freq="M"),
- "k": pd.timedelta_range("1 day", periods=3),
- }
- )
-
- ri = df.select_dtypes(include=np.number, exclude=["floating", "timedelta"])
- ei = df[["b", "c"]]
- tm.assert_frame_equal(ri, ei)
-
- ri = df.select_dtypes(include=[np.number, "category"], exclude="floating")
- ei = df[["b", "c", "f", "k"]]
- tm.assert_frame_equal(ri, ei)
-
- def test_select_dtypes_duplicate_columns(self):
- # GH20839
- odict = OrderedDict
- df = DataFrame(
- odict(
- [
- ("a", list("abc")),
- ("b", list(range(1, 4))),
- ("c", np.arange(3, 6).astype("u1")),
- ("d", np.arange(4.0, 7.0, dtype="float64")),
- ("e", [True, False, True]),
- ("f", pd.date_range("now", periods=3).values),
- ]
- )
- )
- df.columns = ["a", "a", "b", "b", "b", "c"]
-
- expected = DataFrame(
- {"a": list(range(1, 4)), "b": np.arange(3, 6).astype("u1")}
- )
-
- result = df.select_dtypes(include=[np.number], exclude=["floating"])
- tm.assert_frame_equal(result, expected)
-
- def test_select_dtypes_not_an_attr_but_still_valid_dtype(self):
- df = DataFrame(
- {
- "a": list("abc"),
- "b": list(range(1, 4)),
- "c": np.arange(3, 6).astype("u1"),
- "d": np.arange(4.0, 7.0, dtype="float64"),
- "e": [True, False, True],
- "f": pd.date_range("now", periods=3).values,
- }
- )
- df["g"] = df.f.diff()
- assert not hasattr(np, "u8")
- r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
- e = df[["a", "b"]]
- tm.assert_frame_equal(r, e)
-
- r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
- e = df[["a", "b", "g"]]
- tm.assert_frame_equal(r, e)
-
- def test_select_dtypes_empty(self):
- df = DataFrame({"a": list("abc"), "b": list(range(1, 4))})
- msg = "at least one of include or exclude must be nonempty"
- with pytest.raises(ValueError, match=msg):
- df.select_dtypes()
-
- def test_select_dtypes_bad_datetime64(self):
- df = DataFrame(
- {
- "a": list("abc"),
- "b": list(range(1, 4)),
- "c": np.arange(3, 6).astype("u1"),
- "d": np.arange(4.0, 7.0, dtype="float64"),
- "e": [True, False, True],
- "f": pd.date_range("now", periods=3).values,
- }
- )
- with pytest.raises(ValueError, match=".+ is too specific"):
- df.select_dtypes(include=["datetime64[D]"])
-
- with pytest.raises(ValueError, match=".+ is too specific"):
- df.select_dtypes(exclude=["datetime64[as]"])
-
- def test_select_dtypes_datetime_with_tz(self):
-
- df2 = DataFrame(
- dict(
- A=Timestamp("20130102", tz="US/Eastern"),
- B=Timestamp("20130603", tz="CET"),
- ),
- index=range(5),
- )
- df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
- result = df3.select_dtypes(include=["datetime64[ns]"])
- expected = df3.reindex(columns=[])
- tm.assert_frame_equal(result, expected)
-
- @pytest.mark.parametrize(
- "dtype", [str, "str", np.string_, "S1", "unicode", np.unicode_, "U1"]
- )
- @pytest.mark.parametrize("arg", ["include", "exclude"])
- def test_select_dtypes_str_raises(self, dtype, arg):
- df = DataFrame(
- {
- "a": list("abc"),
- "g": list("abc"),
- "b": list(range(1, 4)),
- "c": np.arange(3, 6).astype("u1"),
- "d": np.arange(4.0, 7.0, dtype="float64"),
- "e": [True, False, True],
- "f": pd.date_range("now", periods=3).values,
- }
- )
- msg = "string dtypes are not allowed"
- kwargs = {arg: [dtype]}
-
- with pytest.raises(TypeError, match=msg):
- df.select_dtypes(**kwargs)
-
- def test_select_dtypes_bad_arg_raises(self):
- df = DataFrame(
- {
- "a": list("abc"),
- "g": list("abc"),
- "b": list(range(1, 4)),
- "c": np.arange(3, 6).astype("u1"),
- "d": np.arange(4.0, 7.0, dtype="float64"),
- "e": [True, False, True],
- "f": pd.date_range("now", periods=3).values,
- }
- )
-
- msg = "data type.*not understood"
- with pytest.raises(TypeError, match=msg):
- df.select_dtypes(["blargy, blarg, blarg"])
-
- def test_select_dtypes_typecodes(self):
- # GH 11990
- df = tm.makeCustomDataframe(30, 3, data_gen_f=lambda x, y: np.random.random())
- expected = df
- FLOAT_TYPES = list(np.typecodes["AllFloat"])
- tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected)
-
def test_dtypes_gh8722(self, float_string_frame):
float_string_frame["bool"] = float_string_frame["A"] > 0
result = float_string_frame.dtypes
diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py
index df40c2e7e2a11..542d9835bb5d3 100644
--- a/pandas/tests/frame/test_operators.py
+++ b/pandas/tests/frame/test_operators.py
@@ -685,25 +685,6 @@ def test_boolean_comparison(self):
with pytest.raises(ValueError, match=msg1d):
result = df == tup
- def test_combine_generic(self, float_frame):
- df1 = float_frame
- df2 = float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]]
-
- combined = df1.combine(df2, np.add)
- combined2 = df2.combine(df1, np.add)
- assert combined["D"].isna().all()
- assert combined2["D"].isna().all()
-
- chunk = combined.loc[combined.index[:-5], ["A", "B", "C"]]
- chunk2 = combined2.loc[combined2.index[:-5], ["A", "B", "C"]]
-
- exp = (
- float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]].reindex_like(chunk)
- * 2
- )
- tm.assert_frame_equal(chunk, exp)
- tm.assert_frame_equal(chunk2, exp)
-
def test_inplace_ops_alignment(self):
# inplace ops / ops alignment
diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py
index a6b2b334d3ec8..1ce13fd31ba88 100644
--- a/pandas/tests/frame/test_period.py
+++ b/pandas/tests/frame/test_period.py
@@ -1,19 +1,6 @@
-from datetime import timedelta
-
import numpy as np
-import pytest
-
-import pandas as pd
-from pandas import (
- DataFrame,
- DatetimeIndex,
- Index,
- PeriodIndex,
- Timedelta,
- date_range,
- period_range,
- to_datetime,
-)
+
+from pandas import DataFrame, Index, PeriodIndex, period_range
import pandas._testing as tm
@@ -49,93 +36,6 @@ def test_frame_setitem(self):
assert isinstance(rs.index, PeriodIndex)
tm.assert_index_equal(rs.index, rng)
- def test_frame_to_time_stamp(self):
- K = 5
- index = period_range(freq="A", start="1/1/2001", end="12/1/2009")
- df = DataFrame(np.random.randn(len(index), K), index=index)
- df["mix"] = "a"
-
- exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC")
- exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns")
- result = df.to_timestamp("D", "end")
- tm.assert_index_equal(result.index, exp_index)
- tm.assert_numpy_array_equal(result.values, df.values)
-
- exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN")
- result = df.to_timestamp("D", "start")
- tm.assert_index_equal(result.index, exp_index)
-
- def _get_with_delta(delta, freq="A-DEC"):
- return date_range(
- to_datetime("1/1/2001") + delta,
- to_datetime("12/31/2009") + delta,
- freq=freq,
- )
-
- delta = timedelta(hours=23)
- result = df.to_timestamp("H", "end")
- exp_index = _get_with_delta(delta)
- exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns")
- tm.assert_index_equal(result.index, exp_index)
-
- delta = timedelta(hours=23, minutes=59)
- result = df.to_timestamp("T", "end")
- exp_index = _get_with_delta(delta)
- exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns")
- tm.assert_index_equal(result.index, exp_index)
-
- result = df.to_timestamp("S", "end")
- delta = timedelta(hours=23, minutes=59, seconds=59)
- exp_index = _get_with_delta(delta)
- exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns")
- tm.assert_index_equal(result.index, exp_index)
-
- # columns
- df = df.T
-
- exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC")
- exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns")
- result = df.to_timestamp("D", "end", axis=1)
- tm.assert_index_equal(result.columns, exp_index)
- tm.assert_numpy_array_equal(result.values, df.values)
-
- exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN")
- result = df.to_timestamp("D", "start", axis=1)
- tm.assert_index_equal(result.columns, exp_index)
-
- delta = timedelta(hours=23)
- result = df.to_timestamp("H", "end", axis=1)
- exp_index = _get_with_delta(delta)
- exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns")
- tm.assert_index_equal(result.columns, exp_index)
-
- delta = timedelta(hours=23, minutes=59)
- result = df.to_timestamp("T", "end", axis=1)
- exp_index = _get_with_delta(delta)
- exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns")
- tm.assert_index_equal(result.columns, exp_index)
-
- result = df.to_timestamp("S", "end", axis=1)
- delta = timedelta(hours=23, minutes=59, seconds=59)
- exp_index = _get_with_delta(delta)
- exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns")
- tm.assert_index_equal(result.columns, exp_index)
-
- # invalid axis
- with pytest.raises(ValueError, match="axis"):
- df.to_timestamp(axis=2)
-
- result1 = df.to_timestamp("5t", axis=1)
- result2 = df.to_timestamp("t", axis=1)
- expected = pd.date_range("2001-01-01", "2009-01-01", freq="AS")
- assert isinstance(result1.columns, DatetimeIndex)
- assert isinstance(result2.columns, DatetimeIndex)
- tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8)
- tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8)
- # PeriodIndex.to_timestamp always use 'infer'
- assert result1.columns.freqstr == "AS-JAN"
- assert result2.columns.freqstr == "AS-JAN"
-
def test_frame_index_to_string(self):
index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M")
frame = DataFrame(np.random.randn(3, 4), index=index)
diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py
index 5e06b6402c34f..b713af92eac27 100644
--- a/pandas/tests/frame/test_timeseries.py
+++ b/pandas/tests/frame/test_timeseries.py
@@ -1,30 +1,10 @@
-from datetime import datetime, time
-from itertools import product
-
import numpy as np
import pytest
-import pytz
import pandas as pd
-from pandas import (
- DataFrame,
- DatetimeIndex,
- Index,
- MultiIndex,
- Series,
- date_range,
- period_range,
- to_datetime,
-)
+from pandas import DataFrame, Series, date_range, to_datetime
import pandas._testing as tm
-import pandas.tseries.offsets as offsets
-
-
-@pytest.fixture(params=product([True, False], [True, False]))
-def close_open_fixture(request):
- return request.param
-
class TestDataFrameTimeSeriesMethods:
def test_frame_ctor_datetime64_column(self):
@@ -80,54 +60,6 @@ def test_frame_append_datetime64_col_other_units(self):
assert (tmp["dates"].values == ex_vals).all()
- def test_asfreq(self, datetime_frame):
- offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd())
- rule_monthly = datetime_frame.asfreq("BM")
-
- tm.assert_almost_equal(offset_monthly["A"], rule_monthly["A"])
-
- filled = rule_monthly.asfreq("B", method="pad") # noqa
- # TODO: actually check that this worked.
-
- # don't forget!
- filled_dep = rule_monthly.asfreq("B", method="pad") # noqa
-
- # test does not blow up on length-0 DataFrame
- zero_length = datetime_frame.reindex([])
- result = zero_length.asfreq("BM")
- assert result is not zero_length
-
- def test_asfreq_datetimeindex(self):
- df = DataFrame(
- {"A": [1, 2, 3]},
- index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)],
- )
- df = df.asfreq("B")
- assert isinstance(df.index, DatetimeIndex)
-
- ts = df["A"].asfreq("B")
- assert isinstance(ts.index, DatetimeIndex)
-
- def test_asfreq_fillvalue(self):
- # test for fill value during upsampling, related to issue 3715
-
- # setup
- rng = pd.date_range("1/1/2016", periods=10, freq="2S")
- ts = pd.Series(np.arange(len(rng)), index=rng)
- df = pd.DataFrame({"one": ts})
-
- # insert pre-existing missing value
- df.loc["2016-01-01 00:00:08", "one"] = None
-
- actual_df = df.asfreq(freq="1S", fill_value=9.0)
- expected_df = df.asfreq(freq="1S").fillna(9.0)
- expected_df.loc["2016-01-01 00:00:08", "one"] = None
- tm.assert_frame_equal(expected_df, actual_df)
-
- expected_series = ts.asfreq(freq="1S").fillna(9.0)
- actual_series = ts.asfreq(freq="1S", fill_value=9.0)
- tm.assert_series_equal(expected_series, actual_series)
-
@pytest.mark.parametrize(
"data,idx,expected_first,expected_last",
[
@@ -239,183 +171,6 @@ def test_last_raises(self):
with pytest.raises(TypeError): # index is not a DatetimeIndex
df.last("1D")
- def test_at_time(self):
- rng = date_range("1/1/2000", "1/5/2000", freq="5min")
- ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
- rs = ts.at_time(rng[1])
- assert (rs.index.hour == rng[1].hour).all()
- assert (rs.index.minute == rng[1].minute).all()
- assert (rs.index.second == rng[1].second).all()
-
- result = ts.at_time("9:30")
- expected = ts.at_time(time(9, 30))
- tm.assert_frame_equal(result, expected)
-
- result = ts.loc[time(9, 30)]
- expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)]
-
- tm.assert_frame_equal(result, expected)
-
- # midnight, everything
- rng = date_range("1/1/2000", "1/31/2000")
- ts = DataFrame(np.random.randn(len(rng), 3), index=rng)
-
- result = ts.at_time(time(0, 0))
- tm.assert_frame_equal(result, ts)
-
- # time doesn't exist
- rng = date_range("1/1/2012", freq="23Min", periods=384)
- ts = DataFrame(np.random.randn(len(rng), 2), rng)
- rs = ts.at_time("16:00")
- assert len(rs) == 0
-
- @pytest.mark.parametrize(
- "hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)]
- )
- def test_at_time_errors(self, hour):
- # GH 24043
- dti = pd.date_range("2018", periods=3, freq="H")
- df = pd.DataFrame(list(range(len(dti))), index=dti)
- if getattr(hour, "tzinfo", None) is None:
- result = df.at_time(hour)
- expected = df.iloc[1:2]
- tm.assert_frame_equal(result, expected)
- else:
- with pytest.raises(ValueError, match="Index must be timezone"):
- df.at_time(hour)
-
- def test_at_time_tz(self):
- # GH 24043
- dti = pd.date_range("2018", periods=3, freq="H", tz="US/Pacific")
- df = pd.DataFrame(list(range(len(dti))), index=dti)
- result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern")))
- expected = df.iloc[1:2]
- tm.assert_frame_equal(result, expected)
-
- def test_at_time_raises(self):
- # GH20725
- df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
- with pytest.raises(TypeError): # index is not a DatetimeIndex
- df.at_time("00:00")
-
- @pytest.mark.parametrize("axis", ["index", "columns", 0, 1])
- def test_at_time_axis(self, axis):
- # issue 8839
- rng = date_range("1/1/2000", "1/5/2000", freq="5min")
- ts = DataFrame(np.random.randn(len(rng), len(rng)))
- ts.index, ts.columns = rng, rng
-
- indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)]
-
- if axis in ["index", 0]:
- expected = ts.loc[indices, :]
- elif axis in ["columns", 1]:
- expected = ts.loc[:, indices]
-
- result = ts.at_time("9:30", axis=axis)
- tm.assert_frame_equal(result, expected)
-
- def test_between_time(self, close_open_fixture):
- rng = date_range("1/1/2000", "1/5/2000", freq="5min")
- ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
- stime = time(0, 0)
- etime = time(1, 0)
- inc_start, inc_end = close_open_fixture
-
- filtered = ts.between_time(stime, etime, inc_start, inc_end)
- exp_len = 13 * 4 + 1
- if not inc_start:
- exp_len -= 5
- if not inc_end:
- exp_len -= 4
-
- assert len(filtered) == exp_len
- for rs in filtered.index:
- t = rs.time()
- if inc_start:
- assert t >= stime
- else:
- assert t > stime
-
- if inc_end:
- assert t <= etime
- else:
- assert t < etime
-
- result = ts.between_time("00:00", "01:00")
- expected = ts.between_time(stime, etime)
- tm.assert_frame_equal(result, expected)
-
- # across midnight
- rng = date_range("1/1/2000", "1/5/2000", freq="5min")
- ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
- stime = time(22, 0)
- etime = time(9, 0)
-
- filtered = ts.between_time(stime, etime, inc_start, inc_end)
- exp_len = (12 * 11 + 1) * 4 + 1
- if not inc_start:
- exp_len -= 4
- if not inc_end:
- exp_len -= 4
-
- assert len(filtered) == exp_len
- for rs in filtered.index:
- t = rs.time()
- if inc_start:
- assert (t >= stime) or (t <= etime)
- else:
- assert (t > stime) or (t <= etime)
-
- if inc_end:
- assert (t <= etime) or (t >= stime)
- else:
- assert (t < etime) or (t >= stime)
-
- def test_between_time_raises(self):
- # GH20725
- df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
- with pytest.raises(TypeError): # index is not a DatetimeIndex
- df.between_time(start_time="00:00", end_time="12:00")
-
- def test_between_time_axis(self, axis):
- # issue 8839
- rng = date_range("1/1/2000", periods=100, freq="10min")
- ts = DataFrame(np.random.randn(len(rng), len(rng)))
- stime, etime = ("08:00:00", "09:00:00")
- exp_len = 7
-
- if axis in ["index", 0]:
- ts.index = rng
- assert len(ts.between_time(stime, etime)) == exp_len
- assert len(ts.between_time(stime, etime, axis=0)) == exp_len
-
- if axis in ["columns", 1]:
- ts.columns = rng
- selected = ts.between_time(stime, etime, axis=1).columns
- assert len(selected) == exp_len
-
- def test_between_time_axis_raises(self, axis):
- # issue 8839
- rng = date_range("1/1/2000", periods=100, freq="10min")
- mask = np.arange(0, len(rng))
- rand_data = np.random.randn(len(rng), len(rng))
- ts = DataFrame(rand_data, index=rng, columns=rng)
- stime, etime = ("08:00:00", "09:00:00")
-
- msg = "Index must be DatetimeIndex"
- if axis in ["columns", 1]:
- ts.index = mask
- with pytest.raises(TypeError, match=msg):
- ts.between_time(stime, etime)
- with pytest.raises(TypeError, match=msg):
- ts.between_time(stime, etime, axis=0)
-
- if axis in ["index", 0]:
- ts.columns = mask
- with pytest.raises(TypeError, match=msg):
- ts.between_time(stime, etime, axis=1)
-
def test_operation_on_NaT(self):
# Both NaT and Timestamp are in DataFrame.
df = pd.DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]})
@@ -455,95 +210,3 @@ def test_datetime_assignment_with_NaT_and_diff_time_units(self):
{0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]"
)
tm.assert_frame_equal(result, expected)
-
- def test_frame_to_period(self):
- K = 5
-
- dr = date_range("1/1/2000", "1/1/2001")
- pr = period_range("1/1/2000", "1/1/2001")
- df = DataFrame(np.random.randn(len(dr), K), index=dr)
- df["mix"] = "a"
-
- pts = df.to_period()
- exp = df.copy()
- exp.index = pr
- tm.assert_frame_equal(pts, exp)
-
- pts = df.to_period("M")
- tm.assert_index_equal(pts.index, exp.index.asfreq("M"))
-
- df = df.T
- pts = df.to_period(axis=1)
- exp = df.copy()
- exp.columns = pr
- tm.assert_frame_equal(pts, exp)
-
- pts = df.to_period("M", axis=1)
- tm.assert_index_equal(pts.columns, exp.columns.asfreq("M"))
-
- msg = "No axis named 2 for object type "
- with pytest.raises(ValueError, match=msg):
- df.to_period(axis=2)
-
- @pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"])
- def test_tz_convert_and_localize(self, fn):
- l0 = date_range("20140701", periods=5, freq="D")
- l1 = date_range("20140701", periods=5, freq="D")
-
- int_idx = Index(range(5))
-
- if fn == "tz_convert":
- l0 = l0.tz_localize("UTC")
- l1 = l1.tz_localize("UTC")
-
- for idx in [l0, l1]:
-
- l0_expected = getattr(idx, fn)("US/Pacific")
- l1_expected = getattr(idx, fn)("US/Pacific")
-
- df1 = DataFrame(np.ones(5), index=l0)
- df1 = getattr(df1, fn)("US/Pacific")
- tm.assert_index_equal(df1.index, l0_expected)
-
- # MultiIndex
- # GH7846
- df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1]))
-
- df3 = getattr(df2, fn)("US/Pacific", level=0)
- assert not df3.index.levels[0].equals(l0)
- tm.assert_index_equal(df3.index.levels[0], l0_expected)
- tm.assert_index_equal(df3.index.levels[1], l1)
- assert not df3.index.levels[1].equals(l1_expected)
-
- df3 = getattr(df2, fn)("US/Pacific", level=1)
- tm.assert_index_equal(df3.index.levels[0], l0)
- assert not df3.index.levels[0].equals(l0_expected)
- tm.assert_index_equal(df3.index.levels[1], l1_expected)
- assert not df3.index.levels[1].equals(l1)
-
- df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0]))
-
- # TODO: untested
- df5 = getattr(df4, fn)("US/Pacific", level=1) # noqa
-
- tm.assert_index_equal(df3.index.levels[0], l0)
- assert not df3.index.levels[0].equals(l0_expected)
- tm.assert_index_equal(df3.index.levels[1], l1_expected)
- assert not df3.index.levels[1].equals(l1)
-
- # Bad Inputs
-
- # Not DatetimeIndex / PeriodIndex
- with pytest.raises(TypeError, match="DatetimeIndex"):
- df = DataFrame(index=int_idx)
- df = getattr(df, fn)("US/Pacific")
-
- # Not DatetimeIndex / PeriodIndex
- with pytest.raises(TypeError, match="DatetimeIndex"):
- df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0]))
- df = getattr(df, fn)("US/Pacific", level=0)
-
- # Invalid level
- with pytest.raises(ValueError, match="not valid"):
- df = DataFrame(index=l0)
- df = getattr(df, fn)("US/Pacific", level=1)
diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py
index b60f2052a988f..62e8a4b470218 100644
--- a/pandas/tests/frame/test_timezones.py
+++ b/pandas/tests/frame/test_timezones.py
@@ -59,34 +59,6 @@ def test_frame_from_records_utc(self):
# it works
DataFrame.from_records([rec], index="begin_time")
- def test_frame_tz_localize(self):
- rng = date_range("1/1/2011", periods=100, freq="H")
-
- df = DataFrame({"a": 1}, index=rng)
- result = df.tz_localize("utc")
- expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))
- assert result.index.tz.zone == "UTC"
- tm.assert_frame_equal(result, expected)
-
- df = df.T
- result = df.tz_localize("utc", axis=1)
- assert result.columns.tz.zone == "UTC"
- tm.assert_frame_equal(result, expected.T)
-
- def test_frame_tz_convert(self):
- rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern")
-
- df = DataFrame({"a": 1}, index=rng)
- result = df.tz_convert("Europe/Berlin")
- expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin"))
- assert result.index.tz.zone == "Europe/Berlin"
- tm.assert_frame_equal(result, expected)
-
- df = df.T
- result = df.tz_convert("Europe/Berlin", axis=1)
- assert result.columns.tz.zone == "Europe/Berlin"
- tm.assert_frame_equal(result, expected.T)
-
def test_frame_join_tzaware(self):
test1 = DataFrame(
np.zeros((6, 3)),
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
index 8e54de771a3e4..1b6cb8447c76d 100644
--- a/pandas/tests/generic/test_generic.py
+++ b/pandas/tests/generic/test_generic.py
@@ -187,8 +187,10 @@ def test_constructor_compound_dtypes(self):
def f(dtype):
return self._construct(shape=3, value=1, dtype=dtype)
- msg = "compound dtypes are not implemented"
- f"in the {self._typ.__name__} constructor"
+ msg = (
+ "compound dtypes are not implemented "
+ f"in the {self._typ.__name__} constructor"
+ )
with pytest.raises(NotImplementedError, match=msg):
f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])
diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py
index 5aafd83da78fd..f119eb422a276 100644
--- a/pandas/tests/generic/test_series.py
+++ b/pandas/tests/generic/test_series.py
@@ -24,13 +24,6 @@ class TestSeries(Generic):
_typ = Series
_comparator = lambda self, x, y: tm.assert_series_equal(x, y)
- def setup_method(self):
- self.ts = tm.makeTimeSeries() # Was at top level in test_series
- self.ts.name = "ts"
-
- self.series = tm.makeStringSeries()
- self.series.name = "series"
-
def test_rename_mi(self):
s = Series(
[11, 21, 31],
diff --git a/pandas/tests/indexes/categorical/test_constructors.py b/pandas/tests/indexes/categorical/test_constructors.py
index 1df0874e2f947..ee3f85da22781 100644
--- a/pandas/tests/indexes/categorical/test_constructors.py
+++ b/pandas/tests/indexes/categorical/test_constructors.py
@@ -136,12 +136,3 @@ def test_construction_with_categorical_dtype(self):
with pytest.raises(ValueError, match=msg):
Index(data, ordered=ordered, dtype=dtype)
-
- def test_create_categorical(self):
- # GH#17513 The public CI constructor doesn't hit this code path with
- # instances of CategoricalIndex, but we still want to test the code
- ci = CategoricalIndex(["a", "b", "c"])
- # First ci is self, second ci is data.
- result = CategoricalIndex._create_categorical(ci, ci)
- expected = Categorical(["a", "b", "c"])
- tm.assert_categorical_equal(result, expected)
diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
index 6479b14e9521e..40c7ffba46450 100644
--- a/pandas/tests/indexes/period/test_period.py
+++ b/pandas/tests/indexes/period/test_period.py
@@ -128,15 +128,9 @@ def test_shallow_copy_empty(self):
def test_shallow_copy_i8(self):
# GH-24391
pi = period_range("2018-01-01", periods=3, freq="2D")
- result = pi._shallow_copy(pi.asi8, freq=pi.freq)
+ result = pi._shallow_copy(pi.asi8)
tm.assert_index_equal(result, pi)
- def test_shallow_copy_changing_freq_raises(self):
- pi = period_range("2018-01-01", periods=3, freq="2D")
- msg = "specified freq and dtype are different"
- with pytest.raises(IncompatibleFrequency, match=msg):
- pi._shallow_copy(pi, freq="H")
-
def test_view_asi8(self):
idx = PeriodIndex([], freq="M")
diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py
index 4d3f1b0539aee..87520f5ab2577 100644
--- a/pandas/tests/indexing/test_floats.py
+++ b/pandas/tests/indexing/test_floats.py
@@ -162,10 +162,9 @@ def test_scalar_non_numeric(self, index_func, klass):
s2.loc[3.0] = 10
assert s2.index.is_object()
- for idxr in [lambda x: x]:
- s2 = s.copy()
- idxr(s2)[3.0] = 0
- assert s2.index.is_object()
+ s2 = s.copy()
+ s2[3.0] = 0
+ assert s2.index.is_object()
@pytest.mark.parametrize(
"index_func",
@@ -250,12 +249,7 @@ def test_scalar_integer(self, index_func, klass):
# integer index
i = index_func(5)
-
- if klass is Series:
- # TODO: Should we be passing index=i here?
- obj = Series(np.arange(len(i)))
- else:
- obj = DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i)
+ obj = gen_obj(klass, i)
# coerce to equal int
for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]:
@@ -313,7 +307,7 @@ def test_scalar_float(self, klass):
result = idxr(s2)[indexer]
self.check(result, s, 3, getitem)
- # random integer is a KeyError
+ # random float is a KeyError
with pytest.raises(KeyError, match=r"^3\.5$"):
idxr(s)[3.5]
@@ -429,15 +423,6 @@ def test_slice_integer(self):
indexer = slice(3, 5)
self.check(result, s, indexer, False)
- # positional indexing
- msg = (
- "cannot do slice indexing "
- fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
- "type float"
- )
- with pytest.raises(TypeError, match=msg):
- s[l]
-
# getitem out-of-bounds
for l in [slice(-6, 6), slice(-6.0, 6.0)]:
@@ -485,23 +470,6 @@ def test_slice_integer(self):
with pytest.raises(TypeError, match=msg):
s[l]
- # setitem
- for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
-
- sc = s.copy()
- sc.loc[l] = 0
- result = sc.loc[l].values.ravel()
- assert (result == 0).all()
-
- # positional indexing
- msg = (
- "cannot do slice indexing "
- fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
- "type float"
- )
- with pytest.raises(TypeError, match=msg):
- s[l] = 0
-
@pytest.mark.parametrize("l", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)])
def test_integer_positional_indexing(self, l):
""" make sure that we are raising on positional indexing
@@ -584,22 +552,34 @@ def test_slice_integer_frame_getitem(self, index_func):
with pytest.raises(TypeError, match=msg):
s[l]
+ @pytest.mark.parametrize("l", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
+ @pytest.mark.parametrize(
+ "index_func", [tm.makeIntIndex, tm.makeRangeIndex],
+ )
+ def test_float_slice_getitem_with_integer_index_raises(self, l, index_func):
+
+ # similar to above, but on the getitem dim (of a DataFrame)
+ index = index_func(5)
+
+ s = DataFrame(np.random.randn(5, 2), index=index)
+
# setitem
- for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
+ sc = s.copy()
+ sc.loc[l] = 0
+ result = sc.loc[l].values.ravel()
+ assert (result == 0).all()
- sc = s.copy()
- sc.loc[l] = 0
- result = sc.loc[l].values.ravel()
- assert (result == 0).all()
+ # positional indexing
+ msg = (
+ "cannot do slice indexing "
+ fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
+ "type float"
+ )
+ with pytest.raises(TypeError, match=msg):
+ s[l] = 0
- # positional indexing
- msg = (
- "cannot do slice indexing "
- fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
- "type float"
- )
- with pytest.raises(TypeError, match=msg):
- s[l] = 0
+ with pytest.raises(TypeError, match=msg):
+ s[l]
@pytest.mark.parametrize("l", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
@pytest.mark.parametrize("klass", [Series, DataFrame])
@@ -614,10 +594,9 @@ def test_slice_float(self, l, klass):
# getitem
result = idxr(s)[l]
- if isinstance(s, Series):
- tm.assert_series_equal(result, expected)
- else:
- tm.assert_frame_equal(result, expected)
+ assert isinstance(result, type(s))
+ tm.assert_equal(result, expected)
+
# setitem
s2 = s.copy()
idxr(s2)[l] = 0
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 0c9ddbf5473b3..27b0500983afd 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -203,12 +203,6 @@ def create_mgr(descr, item_shape=None):
class TestBlock:
def setup_method(self, method):
- # self.fblock = get_float_ex() # a,c,e
- # self.cblock = get_complex_ex() #
- # self.oblock = get_obj_ex()
- # self.bool_block = get_bool_ex()
- # self.int_block = get_int_ex()
-
self.fblock = create_block("float", [0, 2, 4])
self.cblock = create_block("complex", [7])
self.oblock = create_block("object", [1, 3])
@@ -254,22 +248,11 @@ def test_merge(self):
tm.assert_numpy_array_equal(merged.values[[0, 2]], np.array(avals))
tm.assert_numpy_array_equal(merged.values[[1, 3]], np.array(bvals))
- # TODO: merge with mixed type?
-
def test_copy(self):
cop = self.fblock.copy()
assert cop is not self.fblock
assert_block_equal(self.fblock, cop)
- def test_reindex_index(self):
- pass
-
- def test_reindex_cast(self):
- pass
-
- def test_insert(self):
- pass
-
def test_delete(self):
newb = self.fblock.copy()
newb.delete(0)
@@ -300,39 +283,7 @@ def test_delete(self):
newb.delete(3)
-class TestDatetimeBlock:
- def test_can_hold_element(self):
- block = create_block("datetime", [0])
-
- # We will check that block._can_hold_element iff arr.__setitem__ works
- arr = pd.array(block.values.ravel())
-
- # coerce None
- assert block._can_hold_element(None)
- arr[0] = None
- assert arr[0] is pd.NaT
-
- # coerce different types of datetime objects
- vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)]
- for val in vals:
- assert block._can_hold_element(val)
- arr[0] = val
-
- val = date(2010, 10, 10)
- assert not block._can_hold_element(val)
-
- msg = (
- "'value' should be a 'Timestamp', 'NaT', "
- "or array of those. Got 'date' instead."
- )
- with pytest.raises(TypeError, match=msg):
- arr[0] = val
-
-
class TestBlockManager:
- def test_constructor_corner(self):
- pass
-
def test_attrs(self):
mgr = create_mgr("a,b,c: f8-1; d,e,f: f8-2")
assert mgr.nblocks == 2
@@ -441,18 +392,6 @@ def test_set_change_dtype(self, mgr):
mgr2.set("quux", tm.randn(N))
assert mgr2.get("quux").dtype == np.float_
- def test_set_change_dtype_slice(self): # GH8850
- cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")])
- df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols)
- df["2nd"] = df["2nd"] * 2.0
-
- blocks = df._to_dict_of_blocks()
- assert sorted(blocks.keys()) == ["float64", "int64"]
- tm.assert_frame_equal(
- blocks["float64"], DataFrame([[1.0, 4.0], [4.0, 10.0]], columns=cols[:2])
- )
- tm.assert_frame_equal(blocks["int64"], DataFrame([[3], [6]], columns=cols[2:]))
-
def test_copy(self, mgr):
cp = mgr.copy(deep=False)
for blk, cp_blk in zip(mgr.blocks, cp.blocks):
@@ -486,7 +425,7 @@ def test_sparse_mixed(self):
assert len(mgr.blocks) == 3
assert isinstance(mgr, BlockManager)
- # what to test here?
+ # TODO: what to test here?
def test_as_array_float(self):
mgr = create_mgr("c: f4; d: f2; e: f8")
@@ -650,22 +589,6 @@ def test_interleave(self):
mgr = create_mgr("a: M8[ns]; b: m8[ns]")
assert mgr.as_array().dtype == "object"
- def test_interleave_non_unique_cols(self):
- df = DataFrame(
- [[pd.Timestamp("20130101"), 3.5], [pd.Timestamp("20130102"), 4.5]],
- columns=["x", "x"],
- index=[1, 2],
- )
-
- df_unique = df.copy()
- df_unique.columns = ["x", "y"]
- assert df_unique.values.shape == df.values.shape
- tm.assert_numpy_array_equal(df_unique.values[0], df.values[0])
- tm.assert_numpy_array_equal(df_unique.values[1], df.values[1])
-
- def test_consolidate(self):
- pass
-
def test_consolidate_ordering_issues(self, mgr):
mgr.set("f", tm.randn(N))
mgr.set("d", tm.randn(N))
@@ -683,10 +606,6 @@ def test_consolidate_ordering_issues(self, mgr):
cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64)
)
- def test_reindex_index(self):
- # TODO: should this be pytest.skip?
- pass
-
def test_reindex_items(self):
# mgr is not consolidated, f8 & f8-2 blocks
mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2")
@@ -767,13 +686,6 @@ def test_get_bool_data(self):
def test_unicode_repr_doesnt_raise(self):
repr(create_mgr("b,\u05d0: object"))
- def test_missing_unicode_key(self):
- df = DataFrame({"a": [1]})
- try:
- df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError
- except KeyError:
- pass # this is the expected exception
-
def test_equals(self):
# unique items
bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2")
@@ -843,8 +755,6 @@ class TestIndexing:
create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N, N)),
]
- # MANAGERS = [MANAGERS[6]]
-
@pytest.mark.parametrize("mgr", MANAGERS)
def test_get_slice(self, mgr):
def assert_slice_ok(mgr, axis, slobj):
@@ -994,11 +904,6 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value):
mgr, ax, pd.Index(["foo", "bar", "baz"]), [0, 1, 2], fill_value,
)
- # test_get_slice(slice_like, axis)
- # take(indexer, axis)
- # reindex_axis(new_labels, axis)
- # reindex_indexer(new_labels, indexer, axis)
-
class TestBlockPlacement:
def test_slice_len(self):
@@ -1151,6 +1056,33 @@ def any(self, axis=None):
class TestCanHoldElement:
+ def test_datetime_block_can_hold_element(self):
+ block = create_block("datetime", [0])
+
+ # We will check that block._can_hold_element iff arr.__setitem__ works
+ arr = pd.array(block.values.ravel())
+
+ # coerce None
+ assert block._can_hold_element(None)
+ arr[0] = None
+ assert arr[0] is pd.NaT
+
+ # coerce different types of datetime objects
+ vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)]
+ for val in vals:
+ assert block._can_hold_element(val)
+ arr[0] = val
+
+ val = date(2010, 10, 10)
+ assert not block._can_hold_element(val)
+
+ msg = (
+ "'value' should be a 'Timestamp', 'NaT', "
+ "or array of those. Got 'date' instead."
+ )
+ with pytest.raises(TypeError, match=msg):
+ arr[0] = val
+
@pytest.mark.parametrize(
"value, dtype",
[
@@ -1280,3 +1212,37 @@ def test_dataframe_not_equal():
df1 = pd.DataFrame({"a": [1, 2], "b": ["s", "d"]})
df2 = pd.DataFrame({"a": ["s", "d"], "b": [1, 2]})
assert df1.equals(df2) is False
+
+
+def test_missing_unicode_key():
+ df = DataFrame({"a": [1]})
+ with pytest.raises(KeyError, match="\u05d0"):
+ df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError
+
+
+def test_set_change_dtype_slice():
+ # GH#8850
+ cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")])
+ df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols)
+ df["2nd"] = df["2nd"] * 2.0
+
+ blocks = df._to_dict_of_blocks()
+ assert sorted(blocks.keys()) == ["float64", "int64"]
+ tm.assert_frame_equal(
+ blocks["float64"], DataFrame([[1.0, 4.0], [4.0, 10.0]], columns=cols[:2])
+ )
+ tm.assert_frame_equal(blocks["int64"], DataFrame([[3], [6]], columns=cols[2:]))
+
+
+def test_interleave_non_unique_cols():
+ df = DataFrame(
+ [[pd.Timestamp("20130101"), 3.5], [pd.Timestamp("20130102"), 4.5]],
+ columns=["x", "x"],
+ index=[1, 2],
+ )
+
+ df_unique = df.copy()
+ df_unique.columns = ["x", "y"]
+ assert df_unique.values.shape == df.values.shape
+ tm.assert_numpy_array_equal(df_unique.values[0], df.values[0])
+ tm.assert_numpy_array_equal(df_unique.values[1], df.values[1])
diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py
index b4a7173da84d0..4c75d1ebcd377 100644
--- a/pandas/tests/scalar/timestamp/test_constructors.py
+++ b/pandas/tests/scalar/timestamp/test_constructors.py
@@ -548,3 +548,16 @@ def test_timestamp_constructor_identity():
expected = Timestamp("2017-01-01T12")
result = Timestamp(expected)
assert result is expected
+
+
+@pytest.mark.parametrize("kwargs", [{}, {"year": 2020}, {"year": 2020, "month": 1}])
+def test_constructor_missing_keyword(kwargs):
+ # GH 31200
+
+ # The exact error message of datetime() depends on its version
+ msg1 = r"function missing required argument '(year|month|day)' \(pos [123]\)"
+ msg2 = r"Required argument '(year|month|day)' \(pos [123]\) not found"
+ msg = "|".join([msg1, msg2])
+
+ with pytest.raises(TypeError, match=msg):
+ Timestamp(**kwargs)
diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py
index 4d64b5b397981..4742d6ae3544f 100644
--- a/pandas/tests/series/methods/test_append.py
+++ b/pandas/tests/series/methods/test_append.py
@@ -2,7 +2,7 @@
import pytest
import pandas as pd
-from pandas import DataFrame, DatetimeIndex, Series, date_range
+from pandas import DataFrame, DatetimeIndex, Index, Series, Timestamp, date_range
import pandas._testing as tm
@@ -166,3 +166,87 @@ def test_append_tz_dateutil(self):
appended = rng.append(rng2)
tm.assert_index_equal(appended, rng3)
+
+ def test_series_append_aware(self):
+ rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern")
+ rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern")
+ ser1 = Series([1], index=rng1)
+ ser2 = Series([2], index=rng2)
+ ts_result = ser1.append(ser2)
+
+ exp_index = DatetimeIndex(
+ ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern"
+ )
+ exp = Series([1, 2], index=exp_index)
+ tm.assert_series_equal(ts_result, exp)
+ assert ts_result.index.tz == rng1.tz
+
+ rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC")
+ rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC")
+ ser1 = Series([1], index=rng1)
+ ser2 = Series([2], index=rng2)
+ ts_result = ser1.append(ser2)
+
+ exp_index = DatetimeIndex(["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC")
+ exp = Series([1, 2], index=exp_index)
+ tm.assert_series_equal(ts_result, exp)
+ utc = rng1.tz
+ assert utc == ts_result.index.tz
+
+ # GH#7795
+ # different tz coerces to object dtype, not UTC
+ rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern")
+ rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central")
+ ser1 = Series([1], index=rng1)
+ ser2 = Series([2], index=rng2)
+ ts_result = ser1.append(ser2)
+ exp_index = Index(
+ [
+ Timestamp("1/1/2011 01:00", tz="US/Eastern"),
+ Timestamp("1/1/2011 02:00", tz="US/Central"),
+ ]
+ )
+ exp = Series([1, 2], index=exp_index)
+ tm.assert_series_equal(ts_result, exp)
+
+ def test_series_append_aware_naive(self):
+ rng1 = date_range("1/1/2011 01:00", periods=1, freq="H")
+ rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern")
+ ser1 = Series(np.random.randn(len(rng1)), index=rng1)
+ ser2 = Series(np.random.randn(len(rng2)), index=rng2)
+ ts_result = ser1.append(ser2)
+
+ expected = ser1.index.astype(object).append(ser2.index.astype(object))
+ assert ts_result.index.equals(expected)
+
+ # mixed
+ rng1 = date_range("1/1/2011 01:00", periods=1, freq="H")
+ rng2 = range(100)
+ ser1 = Series(np.random.randn(len(rng1)), index=rng1)
+ ser2 = Series(np.random.randn(len(rng2)), index=rng2)
+ ts_result = ser1.append(ser2)
+
+ expected = ser1.index.astype(object).append(ser2.index)
+ assert ts_result.index.equals(expected)
+
+ def test_series_append_dst(self):
+ rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern")
+ rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern")
+ ser1 = Series([1, 2, 3], index=rng1)
+ ser2 = Series([10, 11, 12], index=rng2)
+ ts_result = ser1.append(ser2)
+
+ exp_index = DatetimeIndex(
+ [
+ "2016-01-01 01:00",
+ "2016-01-01 02:00",
+ "2016-01-01 03:00",
+ "2016-08-01 01:00",
+ "2016-08-01 02:00",
+ "2016-08-01 03:00",
+ ],
+ tz="US/Eastern",
+ )
+ exp = Series([1, 2, 3, 10, 11, 12], index=exp_index)
+ tm.assert_series_equal(ts_result, exp)
+ assert ts_result.index.tz == rng1.tz
diff --git a/pandas/tests/series/methods/test_asfreq.py b/pandas/tests/series/methods/test_asfreq.py
index 05ec56cf02182..d94b60384a07c 100644
--- a/pandas/tests/series/methods/test_asfreq.py
+++ b/pandas/tests/series/methods/test_asfreq.py
@@ -1,8 +1,13 @@
+from datetime import datetime
+
import numpy as np
+import pytest
-from pandas import DataFrame, Series, period_range
+from pandas import DataFrame, DatetimeIndex, Series, date_range, period_range
import pandas._testing as tm
+from pandas.tseries.offsets import BDay, BMonthEnd
+
class TestAsFreq:
# TODO: de-duplicate/parametrize or move DataFrame test
@@ -21,3 +26,79 @@ def test_asfreq_ts(self):
result = ts.asfreq("D", how="start")
assert len(result) == len(ts)
tm.assert_index_equal(result.index, index.asfreq("D", how="start"))
+
+ @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
+ def test_tz_aware_asfreq(self, tz):
+ dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz)
+
+ ser = Series(np.random.randn(len(dr)), index=dr)
+
+ # it works!
+ ser.asfreq("T")
+
+ def test_asfreq(self):
+ ts = Series(
+ [0.0, 1.0, 2.0],
+ index=[
+ datetime(2009, 10, 30),
+ datetime(2009, 11, 30),
+ datetime(2009, 12, 31),
+ ],
+ )
+
+ daily_ts = ts.asfreq("B")
+ monthly_ts = daily_ts.asfreq("BM")
+ tm.assert_series_equal(monthly_ts, ts)
+
+ daily_ts = ts.asfreq("B", method="pad")
+ monthly_ts = daily_ts.asfreq("BM")
+ tm.assert_series_equal(monthly_ts, ts)
+
+ daily_ts = ts.asfreq(BDay())
+ monthly_ts = daily_ts.asfreq(BMonthEnd())
+ tm.assert_series_equal(monthly_ts, ts)
+
+ result = ts[:0].asfreq("M")
+ assert len(result) == 0
+ assert result is not ts
+
+ daily_ts = ts.asfreq("D", fill_value=-1)
+ result = daily_ts.value_counts().sort_index()
+ expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index()
+ tm.assert_series_equal(result, expected)
+
+ def test_asfreq_datetimeindex_empty_series(self):
+ # GH#14320
+ index = DatetimeIndex(["2016-09-29 11:00"])
+ expected = Series(index=index, dtype=object).asfreq("H")
+ result = Series([3], index=index.copy()).asfreq("H")
+ tm.assert_index_equal(expected.index, result.index)
+
+ def test_asfreq_keep_index_name(self):
+ # GH#9854
+ index_name = "bar"
+ index = date_range("20130101", periods=20, name=index_name)
+ df = DataFrame(list(range(20)), columns=["foo"], index=index)
+
+ assert index_name == df.index.name
+ assert index_name == df.asfreq("10D").index.name
+
+ def test_asfreq_normalize(self):
+ rng = date_range("1/1/2000 09:30", periods=20)
+ norm = date_range("1/1/2000", periods=20)
+ vals = np.random.randn(20)
+ ts = Series(vals, index=rng)
+
+ result = ts.asfreq("D", normalize=True)
+ norm = date_range("1/1/2000", periods=20)
+ expected = Series(vals, index=norm)
+
+ tm.assert_series_equal(result, expected)
+
+ vals = np.random.randn(20, 3)
+ ts = DataFrame(vals, index=rng)
+
+ result = ts.asfreq("D", normalize=True)
+ expected = DataFrame(vals, index=norm)
+
+ tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_at_time.py b/pandas/tests/series/methods/test_at_time.py
new file mode 100644
index 0000000000000..d9985cf33776a
--- /dev/null
+++ b/pandas/tests/series/methods/test_at_time.py
@@ -0,0 +1,72 @@
+from datetime import time
+
+import numpy as np
+import pytest
+
+from pandas._libs.tslibs import timezones
+
+from pandas import DataFrame, Series, date_range
+import pandas._testing as tm
+
+
+class TestAtTime:
+ @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
+ def test_localized_at_time(self, tzstr):
+ tz = timezones.maybe_get_tz(tzstr)
+
+ rng = date_range("4/16/2012", "5/1/2012", freq="H")
+ ts = Series(np.random.randn(len(rng)), index=rng)
+
+ ts_local = ts.tz_localize(tzstr)
+
+ result = ts_local.at_time(time(10, 0))
+ expected = ts.at_time(time(10, 0)).tz_localize(tzstr)
+ tm.assert_series_equal(result, expected)
+ assert timezones.tz_compare(result.index.tz, tz)
+
+ def test_at_time(self):
+ rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+ ts = Series(np.random.randn(len(rng)), index=rng)
+ rs = ts.at_time(rng[1])
+ assert (rs.index.hour == rng[1].hour).all()
+ assert (rs.index.minute == rng[1].minute).all()
+ assert (rs.index.second == rng[1].second).all()
+
+ result = ts.at_time("9:30")
+ expected = ts.at_time(time(9, 30))
+ tm.assert_series_equal(result, expected)
+
+ df = DataFrame(np.random.randn(len(rng), 3), index=rng)
+
+ result = ts[time(9, 30)]
+ result_df = df.loc[time(9, 30)]
+ expected = ts[(rng.hour == 9) & (rng.minute == 30)]
+ exp_df = df[(rng.hour == 9) & (rng.minute == 30)]
+
+ tm.assert_series_equal(result, expected)
+ tm.assert_frame_equal(result_df, exp_df)
+
+ chunk = df.loc["1/4/2000":]
+ result = chunk.loc[time(9, 30)]
+ expected = result_df[-1:]
+ tm.assert_frame_equal(result, expected)
+
+ # midnight, everything
+ rng = date_range("1/1/2000", "1/31/2000")
+ ts = Series(np.random.randn(len(rng)), index=rng)
+
+ result = ts.at_time(time(0, 0))
+ tm.assert_series_equal(result, ts)
+
+ # time doesn't exist
+ rng = date_range("1/1/2012", freq="23Min", periods=384)
+ ts = Series(np.random.randn(len(rng)), rng)
+ rs = ts.at_time("16:00")
+ assert len(rs) == 0
+
+ def test_at_time_raises(self):
+ # GH20725
+ ser = Series("a b c".split())
+ msg = "Index must be DatetimeIndex"
+ with pytest.raises(TypeError, match=msg):
+ ser.at_time("00:00")
diff --git a/pandas/tests/series/methods/test_between.py b/pandas/tests/series/methods/test_between.py
new file mode 100644
index 0000000000000..350a3fe6ff009
--- /dev/null
+++ b/pandas/tests/series/methods/test_between.py
@@ -0,0 +1,35 @@
+import numpy as np
+
+from pandas import Series, bdate_range, date_range, period_range
+import pandas._testing as tm
+
+
+class TestBetween:
+
+ # TODO: redundant with test_between_datetime_values?
+ def test_between(self):
+ series = Series(date_range("1/1/2000", periods=10))
+ left, right = series[[2, 7]]
+
+ result = series.between(left, right)
+ expected = (series >= left) & (series <= right)
+ tm.assert_series_equal(result, expected)
+
+ def test_between_datetime_values(self):
+ ser = Series(bdate_range("1/1/2000", periods=20).astype(object))
+ ser[::2] = np.nan
+
+ result = ser[ser.between(ser[3], ser[17])]
+ expected = ser[3:18].dropna()
+ tm.assert_series_equal(result, expected)
+
+ result = ser[ser.between(ser[3], ser[17], inclusive=False)]
+ expected = ser[5:16].dropna()
+ tm.assert_series_equal(result, expected)
+
+ def test_between_period_values(self):
+ ser = Series(period_range("2000-01-01", periods=10, freq="D"))
+ left, right = ser[[2, 7]]
+ result = ser.between(left, right)
+ expected = (ser >= left) & (ser <= right)
+ tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_between_time.py b/pandas/tests/series/methods/test_between_time.py
new file mode 100644
index 0000000000000..3fa26afe77a1d
--- /dev/null
+++ b/pandas/tests/series/methods/test_between_time.py
@@ -0,0 +1,144 @@
+from datetime import datetime, time
+from itertools import product
+
+import numpy as np
+import pytest
+
+from pandas._libs.tslibs import timezones
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame, Series, date_range
+import pandas._testing as tm
+
+
+class TestBetweenTime:
+ @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
+ def test_localized_between_time(self, tzstr):
+ tz = timezones.maybe_get_tz(tzstr)
+
+ rng = date_range("4/16/2012", "5/1/2012", freq="H")
+ ts = Series(np.random.randn(len(rng)), index=rng)
+
+ ts_local = ts.tz_localize(tzstr)
+
+ t1, t2 = time(10, 0), time(11, 0)
+ result = ts_local.between_time(t1, t2)
+ expected = ts.between_time(t1, t2).tz_localize(tzstr)
+ tm.assert_series_equal(result, expected)
+ assert timezones.tz_compare(result.index.tz, tz)
+
+ def test_between_time(self):
+ rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+ ts = Series(np.random.randn(len(rng)), index=rng)
+ stime = time(0, 0)
+ etime = time(1, 0)
+
+ close_open = product([True, False], [True, False])
+ for inc_start, inc_end in close_open:
+ filtered = ts.between_time(stime, etime, inc_start, inc_end)
+ exp_len = 13 * 4 + 1
+ if not inc_start:
+ exp_len -= 5
+ if not inc_end:
+ exp_len -= 4
+
+ assert len(filtered) == exp_len
+ for rs in filtered.index:
+ t = rs.time()
+ if inc_start:
+ assert t >= stime
+ else:
+ assert t > stime
+
+ if inc_end:
+ assert t <= etime
+ else:
+ assert t < etime
+
+ result = ts.between_time("00:00", "01:00")
+ expected = ts.between_time(stime, etime)
+ tm.assert_series_equal(result, expected)
+
+ # across midnight
+ rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+ ts = Series(np.random.randn(len(rng)), index=rng)
+ stime = time(22, 0)
+ etime = time(9, 0)
+
+ close_open = product([True, False], [True, False])
+ for inc_start, inc_end in close_open:
+ filtered = ts.between_time(stime, etime, inc_start, inc_end)
+ exp_len = (12 * 11 + 1) * 4 + 1
+ if not inc_start:
+ exp_len -= 4
+ if not inc_end:
+ exp_len -= 4
+
+ assert len(filtered) == exp_len
+ for rs in filtered.index:
+ t = rs.time()
+ if inc_start:
+ assert (t >= stime) or (t <= etime)
+ else:
+ assert (t > stime) or (t <= etime)
+
+ if inc_end:
+ assert (t <= etime) or (t >= stime)
+ else:
+ assert (t < etime) or (t >= stime)
+
+ def test_between_time_raises(self):
+ # GH20725
+ ser = Series("a b c".split())
+ msg = "Index must be DatetimeIndex"
+ with pytest.raises(TypeError, match=msg):
+ ser.between_time(start_time="00:00", end_time="12:00")
+
+ def test_between_time_types(self):
+ # GH11818
+ rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+ msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time"
+ with pytest.raises(ValueError, match=msg):
+ rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
+
+ frame = DataFrame({"A": 0}, index=rng)
+ with pytest.raises(ValueError, match=msg):
+ frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
+
+ series = Series(0, index=rng)
+ with pytest.raises(ValueError, match=msg):
+ series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
+
+ @td.skip_if_has_locale
+ def test_between_time_formats(self):
+ # GH11818
+ rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+ ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
+
+ strings = [
+ ("2:00", "2:30"),
+ ("0200", "0230"),
+ ("2:00am", "2:30am"),
+ ("0200am", "0230am"),
+ ("2:00:00", "2:30:00"),
+ ("020000", "023000"),
+ ("2:00:00am", "2:30:00am"),
+ ("020000am", "023000am"),
+ ]
+ expected_length = 28
+
+ for time_string in strings:
+ assert len(ts.between_time(*time_string)) == expected_length
+
+ def test_between_time_axis(self):
+ # issue 8839
+ rng = date_range("1/1/2000", periods=100, freq="10min")
+ ts = Series(np.random.randn(len(rng)), index=rng)
+ stime, etime = ("08:00:00", "09:00:00")
+ expected_length = 7
+
+ assert len(ts.between_time(stime, etime)) == expected_length
+ assert len(ts.between_time(stime, etime, axis=0)) == expected_length
+ msg = "No axis named 1 for object type "
+ with pytest.raises(ValueError, match=msg):
+ ts.between_time(stime, etime, axis=1)
diff --git a/pandas/tests/series/methods/test_combine.py b/pandas/tests/series/methods/test_combine.py
new file mode 100644
index 0000000000000..75d47e3daa103
--- /dev/null
+++ b/pandas/tests/series/methods/test_combine.py
@@ -0,0 +1,17 @@
+from pandas import Series
+import pandas._testing as tm
+
+
+class TestCombine:
+ def test_combine_scalar(self):
+ # GH#21248
+ # Note - combine() with another Series is tested elsewhere because
+ # it is used when testing operators
+ ser = Series([i * 10 for i in range(5)])
+ result = ser.combine(3, lambda x, y: x + y)
+ expected = Series([i * 10 + 3 for i in range(5)])
+ tm.assert_series_equal(result, expected)
+
+ result = ser.combine(22, lambda x, y: min(x, y))
+ expected = Series([min(i * 10, 22) for i in range(5)])
+ tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_rename.py b/pandas/tests/series/methods/test_rename.py
new file mode 100644
index 0000000000000..60182f509e657
--- /dev/null
+++ b/pandas/tests/series/methods/test_rename.py
@@ -0,0 +1,91 @@
+from datetime import datetime
+
+import numpy as np
+
+from pandas import Index, Series
+import pandas._testing as tm
+
+
+class TestRename:
+ def test_rename(self, datetime_series):
+ ts = datetime_series
+ renamer = lambda x: x.strftime("%Y%m%d")
+ renamed = ts.rename(renamer)
+ assert renamed.index[0] == renamer(ts.index[0])
+
+ # dict
+ rename_dict = dict(zip(ts.index, renamed.index))
+ renamed2 = ts.rename(rename_dict)
+ tm.assert_series_equal(renamed, renamed2)
+
+ # partial dict
+ s = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64")
+ renamed = s.rename({"b": "foo", "d": "bar"})
+ tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"]))
+
+ # index with name
+ renamer = Series(
+ np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64"
+ )
+ renamed = renamer.rename({})
+ assert renamed.index.name == renamer.index.name
+
+ def test_rename_by_series(self):
+ s = Series(range(5), name="foo")
+ renamer = Series({1: 10, 2: 20})
+ result = s.rename(renamer)
+ expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo")
+ tm.assert_series_equal(result, expected)
+
+ def test_rename_set_name(self):
+ s = Series(range(4), index=list("abcd"))
+ for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
+ result = s.rename(name)
+ assert result.name == name
+ tm.assert_numpy_array_equal(result.index.values, s.index.values)
+ assert s.name is None
+
+ def test_rename_set_name_inplace(self):
+ s = Series(range(3), index=list("abc"))
+ for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
+ s.rename(name, inplace=True)
+ assert s.name == name
+
+ exp = np.array(["a", "b", "c"], dtype=np.object_)
+ tm.assert_numpy_array_equal(s.index.values, exp)
+
+ def test_rename_axis_supported(self):
+ # Supporting axis for compatibility, detailed in GH-18589
+ s = Series(range(5))
+ s.rename({}, axis=0)
+ s.rename({}, axis="index")
+ # FIXME: dont leave commenred-out
+ # TODO: clean up shared index validation
+ # with pytest.raises(ValueError, match="No axis named 5"):
+ # s.rename({}, axis=5)
+
+ def test_rename_inplace(self, datetime_series):
+ renamer = lambda x: x.strftime("%Y%m%d")
+ expected = renamer(datetime_series.index[0])
+
+ datetime_series.rename(renamer, inplace=True)
+ assert datetime_series.index[0] == expected
+
+ def test_rename_with_custom_indexer(self):
+ # GH 27814
+ class MyIndexer:
+ pass
+
+ ix = MyIndexer()
+ s = Series([1, 2, 3]).rename(ix)
+ assert s.name is ix
+
+ def test_rename_with_custom_indexer_inplace(self):
+ # GH 27814
+ class MyIndexer:
+ pass
+
+ ix = MyIndexer()
+ s = Series([1, 2, 3])
+ s.rename(ix, inplace=True)
+ assert s.name is ix
diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py
new file mode 100644
index 0000000000000..f0c4895ad7c10
--- /dev/null
+++ b/pandas/tests/series/methods/test_reset_index.py
@@ -0,0 +1,110 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series
+import pandas._testing as tm
+
+
+class TestResetIndex:
+ def test_reset_index(self):
+ df = tm.makeDataFrame()[:5]
+ ser = df.stack()
+ ser.index.names = ["hash", "category"]
+
+ ser.name = "value"
+ df = ser.reset_index()
+ assert "value" in df
+
+ df = ser.reset_index(name="value2")
+ assert "value2" in df
+
+ # check inplace
+ s = ser.reset_index(drop=True)
+ s2 = ser
+ s2.reset_index(drop=True, inplace=True)
+ tm.assert_series_equal(s, s2)
+
+ # level
+ index = MultiIndex(
+ levels=[["bar"], ["one", "two", "three"], [0, 1]],
+ codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
+ )
+ s = Series(np.random.randn(6), index=index)
+ rs = s.reset_index(level=1)
+ assert len(rs.columns) == 2
+
+ rs = s.reset_index(level=[0, 2], drop=True)
+ tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
+ assert isinstance(rs, Series)
+
+ def test_reset_index_name(self):
+ s = Series([1, 2, 3], index=Index(range(3), name="x"))
+ assert s.reset_index().index.name is None
+ assert s.reset_index(drop=True).index.name is None
+
+ def test_reset_index_level(self):
+ df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
+
+ for levels in ["A", "B"], [0, 1]:
+ # With MultiIndex
+ s = df.set_index(["A", "B"])["C"]
+
+ result = s.reset_index(level=levels[0])
+ tm.assert_frame_equal(result, df.set_index("B"))
+
+ result = s.reset_index(level=levels[:1])
+ tm.assert_frame_equal(result, df.set_index("B"))
+
+ result = s.reset_index(level=levels)
+ tm.assert_frame_equal(result, df)
+
+ result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
+ tm.assert_frame_equal(result, df[["C"]])
+
+ with pytest.raises(KeyError, match="Level E "):
+ s.reset_index(level=["A", "E"])
+
+ # With single-level Index
+ s = df.set_index("A")["B"]
+
+ result = s.reset_index(level=levels[0])
+ tm.assert_frame_equal(result, df[["A", "B"]])
+
+ result = s.reset_index(level=levels[:1])
+ tm.assert_frame_equal(result, df[["A", "B"]])
+
+ result = s.reset_index(level=levels[0], drop=True)
+ tm.assert_series_equal(result, df["B"])
+
+ with pytest.raises(IndexError, match="Too many levels"):
+ s.reset_index(level=[0, 1, 2])
+
+ # Check that .reset_index([],drop=True) doesn't fail
+ result = Series(range(4)).reset_index([], drop=True)
+ expected = Series(range(4))
+ tm.assert_series_equal(result, expected)
+
+ def test_reset_index_range(self):
+ # GH 12071
+ s = Series(range(2), name="A", dtype="int64")
+ series_result = s.reset_index()
+ assert isinstance(series_result.index, RangeIndex)
+ series_expected = DataFrame(
+ [[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2)
+ )
+ tm.assert_frame_equal(series_result, series_expected)
+
+ def test_reset_index_drop_errors(self):
+ # GH 20925
+
+ # KeyError raised for series index when passed level name is missing
+ s = Series(range(4))
+ with pytest.raises(KeyError, match="does not match index name"):
+ s.reset_index("wrong", drop=True)
+ with pytest.raises(KeyError, match="does not match index name"):
+ s.reset_index("wrong")
+
+ # KeyError raised for series when level to be dropped is missing
+ s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
+ with pytest.raises(KeyError, match="not found"):
+ s.reset_index("wrong", drop=True)
diff --git a/pandas/tests/series/methods/test_truncate.py b/pandas/tests/series/methods/test_truncate.py
index d4e2890ed8bf0..c97369b349f56 100644
--- a/pandas/tests/series/methods/test_truncate.py
+++ b/pandas/tests/series/methods/test_truncate.py
@@ -1,7 +1,10 @@
+from datetime import datetime
+
import numpy as np
import pytest
import pandas as pd
+from pandas import Series, date_range
import pandas._testing as tm
from pandas.tseries.offsets import BDay
@@ -76,3 +79,33 @@ def test_truncate_nonsortedindex(self):
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=False).truncate(before="2011-11", after="2011-12")
+
+ def test_truncate_datetimeindex_tz(self):
+ # GH 9243
+ idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific")
+ s = Series(range(len(idx)), index=idx)
+ result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4))
+ expected = Series([1, 2, 3], index=idx[1:4])
+ tm.assert_series_equal(result, expected)
+
+ def test_truncate_periodindex(self):
+ # GH 17717
+ idx1 = pd.PeriodIndex(
+ [pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
+ )
+ series1 = pd.Series([1, 2, 3], index=idx1)
+ result1 = series1.truncate(after="2017-09-02")
+
+ expected_idx1 = pd.PeriodIndex(
+ [pd.Period("2017-09-02"), pd.Period("2017-09-02")]
+ )
+ tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1))
+
+ idx2 = pd.PeriodIndex(
+ [pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
+ )
+ series2 = pd.Series([1, 2, 3], index=idx2)
+ result2 = series2.sort_index().truncate(after="2017-09-02")
+
+ expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")])
+ tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2))
diff --git a/pandas/tests/series/methods/test_tz_convert.py b/pandas/tests/series/methods/test_tz_convert.py
new file mode 100644
index 0000000000000..ce348d5323e62
--- /dev/null
+++ b/pandas/tests/series/methods/test_tz_convert.py
@@ -0,0 +1,29 @@
+import numpy as np
+import pytest
+
+from pandas import DatetimeIndex, Series, date_range
+import pandas._testing as tm
+
+
+class TestTZConvert:
+ def test_series_tz_convert(self):
+ rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern")
+ ts = Series(1, index=rng)
+
+ result = ts.tz_convert("Europe/Berlin")
+ assert result.index.tz.zone == "Europe/Berlin"
+
+ # can't convert tz-naive
+ rng = date_range("1/1/2011", periods=200, freq="D")
+ ts = Series(1, index=rng)
+
+ with pytest.raises(TypeError, match="Cannot convert tz-naive"):
+ ts.tz_convert("US/Eastern")
+
+ def test_series_tz_convert_to_utc(self):
+ base = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC")
+ idx1 = base.tz_convert("Asia/Tokyo")[:2]
+ idx2 = base.tz_convert("US/Eastern")[1:]
+
+ res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2)
+ tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base))
diff --git a/pandas/tests/series/methods/test_tz_localize.py b/pandas/tests/series/methods/test_tz_localize.py
new file mode 100644
index 0000000000000..44c55edf77c0a
--- /dev/null
+++ b/pandas/tests/series/methods/test_tz_localize.py
@@ -0,0 +1,88 @@
+import pytest
+import pytz
+
+from pandas._libs.tslibs import timezones
+
+from pandas import DatetimeIndex, NaT, Series, Timestamp, date_range
+import pandas._testing as tm
+
+
+class TestTZLocalize:
+ def test_series_tz_localize(self):
+
+ rng = date_range("1/1/2011", periods=100, freq="H")
+ ts = Series(1, index=rng)
+
+ result = ts.tz_localize("utc")
+ assert result.index.tz.zone == "UTC"
+
+ # Can't localize if already tz-aware
+ rng = date_range("1/1/2011", periods=100, freq="H", tz="utc")
+ ts = Series(1, index=rng)
+
+ with pytest.raises(TypeError, match="Already tz-aware"):
+ ts.tz_localize("US/Eastern")
+
+ def test_series_tz_localize_ambiguous_bool(self):
+ # make sure that we are correctly accepting bool values as ambiguous
+
+ # GH#14402
+ ts = Timestamp("2015-11-01 01:00:03")
+ expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central")
+ expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central")
+
+ ser = Series([ts])
+ expected0 = Series([expected0])
+ expected1 = Series([expected1])
+
+ with pytest.raises(pytz.AmbiguousTimeError):
+ ser.dt.tz_localize("US/Central")
+
+ result = ser.dt.tz_localize("US/Central", ambiguous=True)
+ tm.assert_series_equal(result, expected0)
+
+ result = ser.dt.tz_localize("US/Central", ambiguous=[True])
+ tm.assert_series_equal(result, expected0)
+
+ result = ser.dt.tz_localize("US/Central", ambiguous=False)
+ tm.assert_series_equal(result, expected1)
+
+ result = ser.dt.tz_localize("US/Central", ambiguous=[False])
+ tm.assert_series_equal(result, expected1)
+
+ @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"])
+ @pytest.mark.parametrize(
+ "method, exp",
+ [
+ ["shift_forward", "2015-03-29 03:00:00"],
+ ["NaT", NaT],
+ ["raise", None],
+ ["foo", "invalid"],
+ ],
+ )
+ def test_series_tz_localize_nonexistent(self, tz, method, exp):
+ # GH 8917
+ n = 60
+ dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min")
+ s = Series(1, dti)
+ if method == "raise":
+ with pytest.raises(pytz.NonExistentTimeError):
+ s.tz_localize(tz, nonexistent=method)
+ elif exp == "invalid":
+ with pytest.raises(ValueError):
+ dti.tz_localize(tz, nonexistent=method)
+ else:
+ result = s.tz_localize(tz, nonexistent=method)
+ expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz))
+ tm.assert_series_equal(result, expected)
+
+ @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
+ def test_series_tz_localize_empty(self, tzstr):
+ # GH#2248
+ ser = Series(dtype=object)
+
+ ser2 = ser.tz_localize("utc")
+ assert ser2.index.tz == pytz.utc
+
+ ser2 = ser.tz_localize(tzstr)
+ timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr))
diff --git a/pandas/tests/series/methods/test_update.py b/pandas/tests/series/methods/test_update.py
new file mode 100644
index 0000000000000..b7f5f33294792
--- /dev/null
+++ b/pandas/tests/series/methods/test_update.py
@@ -0,0 +1,58 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, Series
+import pandas._testing as tm
+
+
+class TestUpdate:
+ def test_update(self):
+ s = Series([1.5, np.nan, 3.0, 4.0, np.nan])
+ s2 = Series([np.nan, 3.5, np.nan, 5.0])
+ s.update(s2)
+
+ expected = Series([1.5, 3.5, 3.0, 5.0, np.nan])
+ tm.assert_series_equal(s, expected)
+
+ # GH 3217
+ df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
+ df["c"] = np.nan
+
+ df["c"].update(Series(["foo"], index=[0]))
+ expected = DataFrame(
+ [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"]
+ )
+ tm.assert_frame_equal(df, expected)
+
+ @pytest.mark.parametrize(
+ "other, dtype, expected",
+ [
+ # other is int
+ ([61, 63], "int32", Series([10, 61, 12], dtype="int32")),
+ ([61, 63], "int64", Series([10, 61, 12])),
+ ([61, 63], float, Series([10.0, 61.0, 12.0])),
+ ([61, 63], object, Series([10, 61, 12], dtype=object)),
+ # other is float, but can be cast to int
+ ([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32")),
+ ([61.0, 63.0], "int64", Series([10, 61, 12])),
+ ([61.0, 63.0], float, Series([10.0, 61.0, 12.0])),
+ ([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object)),
+ # others is float, cannot be cast to int
+ ([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0])),
+ ([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0])),
+ ([61.1, 63.1], float, Series([10.0, 61.1, 12.0])),
+ ([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object)),
+ # other is object, cannot be cast
+ ([(61,), (63,)], "int32", Series([10, (61,), 12])),
+ ([(61,), (63,)], "int64", Series([10, (61,), 12])),
+ ([(61,), (63,)], float, Series([10.0, (61,), 12.0])),
+ ([(61,), (63,)], object, Series([10, (61,), 12])),
+ ],
+ )
+ def test_update_dtypes(self, other, dtype, expected):
+
+ ser = Series([10, 11, 12], dtype=dtype)
+ other = Series(other, index=[1, 3])
+ ser.update(other)
+
+ tm.assert_series_equal(ser, expected)
diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py
index 71f6681e8c955..9be8744d7223f 100644
--- a/pandas/tests/series/test_alter_axes.py
+++ b/pandas/tests/series/test_alter_axes.py
@@ -3,7 +3,7 @@
import numpy as np
import pytest
-from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series
+from pandas import Index, MultiIndex, Series
import pandas._testing as tm
@@ -31,62 +31,6 @@ def test_setindex(self, string_series):
# Renaming
- def test_rename(self, datetime_series):
- ts = datetime_series
- renamer = lambda x: x.strftime("%Y%m%d")
- renamed = ts.rename(renamer)
- assert renamed.index[0] == renamer(ts.index[0])
-
- # dict
- rename_dict = dict(zip(ts.index, renamed.index))
- renamed2 = ts.rename(rename_dict)
- tm.assert_series_equal(renamed, renamed2)
-
- # partial dict
- s = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64")
- renamed = s.rename({"b": "foo", "d": "bar"})
- tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"]))
-
- # index with name
- renamer = Series(
- np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64"
- )
- renamed = renamer.rename({})
- assert renamed.index.name == renamer.index.name
-
- def test_rename_by_series(self):
- s = Series(range(5), name="foo")
- renamer = Series({1: 10, 2: 20})
- result = s.rename(renamer)
- expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo")
- tm.assert_series_equal(result, expected)
-
- def test_rename_set_name(self):
- s = Series(range(4), index=list("abcd"))
- for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
- result = s.rename(name)
- assert result.name == name
- tm.assert_numpy_array_equal(result.index.values, s.index.values)
- assert s.name is None
-
- def test_rename_set_name_inplace(self):
- s = Series(range(3), index=list("abc"))
- for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
- s.rename(name, inplace=True)
- assert s.name == name
-
- exp = np.array(["a", "b", "c"], dtype=np.object_)
- tm.assert_numpy_array_equal(s.index.values, exp)
-
- def test_rename_axis_supported(self):
- # Supporting axis for compatibility, detailed in GH-18589
- s = Series(range(5))
- s.rename({}, axis=0)
- s.rename({}, axis="index")
- # TODO: clean up shared index validation
- # with pytest.raises(ValueError, match="No axis named 5"):
- # s.rename({}, axis=5)
-
def test_set_name_attribute(self):
s = Series([1, 2, 3])
s2 = Series([1, 2, 3], name="bar")
@@ -103,13 +47,6 @@ def test_set_name(self):
assert s.name is None
assert s is not s2
- def test_rename_inplace(self, datetime_series):
- renamer = lambda x: x.strftime("%Y%m%d")
- expected = renamer(datetime_series.index[0])
-
- datetime_series.rename(renamer, inplace=True)
- assert datetime_series.index[0] == expected
-
def test_set_index_makes_timeseries(self):
idx = tm.makeDateIndex(10)
@@ -117,94 +54,6 @@ def test_set_index_makes_timeseries(self):
s.index = idx
assert s.index.is_all_dates
- def test_reset_index(self):
- df = tm.makeDataFrame()[:5]
- ser = df.stack()
- ser.index.names = ["hash", "category"]
-
- ser.name = "value"
- df = ser.reset_index()
- assert "value" in df
-
- df = ser.reset_index(name="value2")
- assert "value2" in df
-
- # check inplace
- s = ser.reset_index(drop=True)
- s2 = ser
- s2.reset_index(drop=True, inplace=True)
- tm.assert_series_equal(s, s2)
-
- # level
- index = MultiIndex(
- levels=[["bar"], ["one", "two", "three"], [0, 1]],
- codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
- )
- s = Series(np.random.randn(6), index=index)
- rs = s.reset_index(level=1)
- assert len(rs.columns) == 2
-
- rs = s.reset_index(level=[0, 2], drop=True)
- tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
- assert isinstance(rs, Series)
-
- def test_reset_index_name(self):
- s = Series([1, 2, 3], index=Index(range(3), name="x"))
- assert s.reset_index().index.name is None
- assert s.reset_index(drop=True).index.name is None
-
- def test_reset_index_level(self):
- df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
-
- for levels in ["A", "B"], [0, 1]:
- # With MultiIndex
- s = df.set_index(["A", "B"])["C"]
-
- result = s.reset_index(level=levels[0])
- tm.assert_frame_equal(result, df.set_index("B"))
-
- result = s.reset_index(level=levels[:1])
- tm.assert_frame_equal(result, df.set_index("B"))
-
- result = s.reset_index(level=levels)
- tm.assert_frame_equal(result, df)
-
- result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
- tm.assert_frame_equal(result, df[["C"]])
-
- with pytest.raises(KeyError, match="Level E "):
- s.reset_index(level=["A", "E"])
-
- # With single-level Index
- s = df.set_index("A")["B"]
-
- result = s.reset_index(level=levels[0])
- tm.assert_frame_equal(result, df[["A", "B"]])
-
- result = s.reset_index(level=levels[:1])
- tm.assert_frame_equal(result, df[["A", "B"]])
-
- result = s.reset_index(level=levels[0], drop=True)
- tm.assert_series_equal(result, df["B"])
-
- with pytest.raises(IndexError, match="Too many levels"):
- s.reset_index(level=[0, 1, 2])
-
- # Check that .reset_index([],drop=True) doesn't fail
- result = Series(range(4)).reset_index([], drop=True)
- expected = Series(range(4))
- tm.assert_series_equal(result, expected)
-
- def test_reset_index_range(self):
- # GH 12071
- s = Series(range(2), name="A", dtype="int64")
- series_result = s.reset_index()
- assert isinstance(series_result.index, RangeIndex)
- series_expected = DataFrame(
- [[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2)
- )
- tm.assert_frame_equal(series_result, series_expected)
-
def test_reorder_levels(self):
index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
@@ -268,25 +117,6 @@ def test_rename_axis_none(self, kwargs):
expected = Series([1, 2, 3], index=expected_index)
tm.assert_series_equal(result, expected)
- def test_rename_with_custom_indexer(self):
- # GH 27814
- class MyIndexer:
- pass
-
- ix = MyIndexer()
- s = Series([1, 2, 3]).rename(ix)
- assert s.name is ix
-
- def test_rename_with_custom_indexer_inplace(self):
- # GH 27814
- class MyIndexer:
- pass
-
- ix = MyIndexer()
- s = Series([1, 2, 3])
- s.rename(ix, inplace=True)
- assert s.name is ix
-
def test_set_axis_inplace_axes(self, axis_series):
# GH14636
ser = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64")
@@ -323,21 +153,6 @@ def test_set_axis_inplace(self):
with pytest.raises(ValueError, match="No axis named"):
s.set_axis(list("abcd"), axis=axis, inplace=False)
- def test_reset_index_drop_errors(self):
- # GH 20925
-
- # KeyError raised for series index when passed level name is missing
- s = Series(range(4))
- with pytest.raises(KeyError, match="does not match index name"):
- s.reset_index("wrong", drop=True)
- with pytest.raises(KeyError, match="does not match index name"):
- s.reset_index("wrong")
-
- # KeyError raised for series when level to be dropped is missing
- s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
- with pytest.raises(KeyError, match="not found"):
- s.reset_index("wrong", drop=True)
-
def test_droplevel(self):
# GH20342
ser = Series([1, 2, 3, 4])
diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py
index 4afa083e97c7c..adb79f69c2d81 100644
--- a/pandas/tests/series/test_combine_concat.py
+++ b/pandas/tests/series/test_combine_concat.py
@@ -2,84 +2,27 @@
import pytest
import pandas as pd
-from pandas import DataFrame, Series
-import pandas._testing as tm
+from pandas import Series
class TestSeriesCombine:
- def test_combine_scalar(self):
- # GH 21248
- # Note - combine() with another Series is tested elsewhere because
- # it is used when testing operators
- s = pd.Series([i * 10 for i in range(5)])
- result = s.combine(3, lambda x, y: x + y)
- expected = pd.Series([i * 10 + 3 for i in range(5)])
- tm.assert_series_equal(result, expected)
-
- result = s.combine(22, lambda x, y: min(x, y))
- expected = pd.Series([min(i * 10, 22) for i in range(5)])
- tm.assert_series_equal(result, expected)
-
- def test_update(self):
- s = Series([1.5, np.nan, 3.0, 4.0, np.nan])
- s2 = Series([np.nan, 3.5, np.nan, 5.0])
- s.update(s2)
-
- expected = Series([1.5, 3.5, 3.0, 5.0, np.nan])
- tm.assert_series_equal(s, expected)
-
- # GH 3217
- df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
- df["c"] = np.nan
-
- df["c"].update(Series(["foo"], index=[0]))
- expected = DataFrame(
- [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"]
- )
- tm.assert_frame_equal(df, expected)
-
@pytest.mark.parametrize(
- "other, dtype, expected",
- [
- # other is int
- ([61, 63], "int32", pd.Series([10, 61, 12], dtype="int32")),
- ([61, 63], "int64", pd.Series([10, 61, 12])),
- ([61, 63], float, pd.Series([10.0, 61.0, 12.0])),
- ([61, 63], object, pd.Series([10, 61, 12], dtype=object)),
- # other is float, but can be cast to int
- ([61.0, 63.0], "int32", pd.Series([10, 61, 12], dtype="int32")),
- ([61.0, 63.0], "int64", pd.Series([10, 61, 12])),
- ([61.0, 63.0], float, pd.Series([10.0, 61.0, 12.0])),
- ([61.0, 63.0], object, pd.Series([10, 61.0, 12], dtype=object)),
- # others is float, cannot be cast to int
- ([61.1, 63.1], "int32", pd.Series([10.0, 61.1, 12.0])),
- ([61.1, 63.1], "int64", pd.Series([10.0, 61.1, 12.0])),
- ([61.1, 63.1], float, pd.Series([10.0, 61.1, 12.0])),
- ([61.1, 63.1], object, pd.Series([10, 61.1, 12], dtype=object)),
- # other is object, cannot be cast
- ([(61,), (63,)], "int32", pd.Series([10, (61,), 12])),
- ([(61,), (63,)], "int64", pd.Series([10, (61,), 12])),
- ([(61,), (63,)], float, pd.Series([10.0, (61,), 12.0])),
- ([(61,), (63,)], object, pd.Series([10, (61,), 12])),
- ],
+ "dtype", ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"]
)
- def test_update_dtypes(self, other, dtype, expected):
+ def test_concat_empty_series_dtypes_match_roundtrips(self, dtype):
+ dtype = np.dtype(dtype)
- s = Series([10, 11, 12], dtype=dtype)
- other = Series(other, index=[1, 3])
- s.update(other)
+ result = pd.concat([Series(dtype=dtype)])
+ assert result.dtype == dtype
- tm.assert_series_equal(s, expected)
+ result = pd.concat([Series(dtype=dtype), Series(dtype=dtype)])
+ assert result.dtype == dtype
def test_concat_empty_series_dtypes_roundtrips(self):
# round-tripping with self & like self
dtypes = map(np.dtype, ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"])
- for dtype in dtypes:
- assert pd.concat([Series(dtype=dtype)]).dtype == dtype
- assert pd.concat([Series(dtype=dtype), Series(dtype=dtype)]).dtype == dtype
-
def int_result_type(dtype, dtype2):
typs = {dtype.kind, dtype2.kind}
if not len(typs - {"i", "u", "b"}) and (
@@ -118,35 +61,28 @@ def get_result_type(dtype, dtype2):
result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype
assert result.kind == expected
- def test_concat_empty_series_dtypes(self):
+ @pytest.mark.parametrize(
+ "left,right,expected",
+ [
+ # booleans
+ (np.bool_, np.int32, np.int32),
+ (np.bool_, np.float32, np.object_),
+ # datetime-like
+ ("m8[ns]", np.bool, np.object_),
+ ("m8[ns]", np.int64, np.object_),
+ ("M8[ns]", np.bool, np.object_),
+ ("M8[ns]", np.int64, np.object_),
+ # categorical
+ ("category", "category", "category"),
+ ("category", "object", "object"),
+ ],
+ )
+ def test_concat_empty_series_dtypes(self, left, right, expected):
+ result = pd.concat([Series(dtype=left), Series(dtype=right)])
+ assert result.dtype == expected
- # booleans
- assert (
- pd.concat([Series(dtype=np.bool_), Series(dtype=np.int32)]).dtype
- == np.int32
- )
- assert (
- pd.concat([Series(dtype=np.bool_), Series(dtype=np.float32)]).dtype
- == np.object_
- )
+ def test_concat_empty_series_dtypes_triple(self):
- # datetime-like
- assert (
- pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.bool)]).dtype
- == np.object_
- )
- assert (
- pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.int64)]).dtype
- == np.object_
- )
- assert (
- pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.bool)]).dtype
- == np.object_
- )
- assert (
- pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.int64)]).dtype
- == np.object_
- )
assert (
pd.concat(
[Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)]
@@ -154,11 +90,7 @@ def test_concat_empty_series_dtypes(self):
== np.object_
)
- # categorical
- assert (
- pd.concat([Series(dtype="category"), Series(dtype="category")]).dtype
- == "category"
- )
+ def test_concat_empty_series_dtype_category_with_array(self):
# GH 18515
assert (
pd.concat(
@@ -166,13 +98,8 @@ def test_concat_empty_series_dtypes(self):
).dtype
== "float64"
)
- assert (
- pd.concat([Series(dtype="category"), Series(dtype="object")]).dtype
- == "object"
- )
- # sparse
- # TODO: move?
+ def test_concat_empty_series_dtypes_sparse(self):
result = pd.concat(
[
Series(dtype="float64").astype("Sparse"),
diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py
index b8be4ea137e3d..59ae0cd63690c 100644
--- a/pandas/tests/series/test_datetime_values.py
+++ b/pandas/tests/series/test_datetime_values.py
@@ -19,7 +19,6 @@
PeriodIndex,
Series,
TimedeltaIndex,
- bdate_range,
date_range,
period_range,
timedelta_range,
@@ -622,18 +621,6 @@ def test_dt_accessor_updates_on_inplace(self):
result = s.dt.date
assert result[0] == result[2]
- def test_between(self):
- s = Series(bdate_range("1/1/2000", periods=20).astype(object))
- s[::2] = np.nan
-
- result = s[s.between(s[3], s[17])]
- expected = s[3:18].dropna()
- tm.assert_series_equal(result, expected)
-
- result = s[s.between(s[3], s[17], inclusive=False)]
- expected = s[5:16].dropna()
- tm.assert_series_equal(result, expected)
-
def test_date_tz(self):
# GH11757
rng = pd.DatetimeIndex(
diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py
index 03fee389542e3..f41245c2872a7 100644
--- a/pandas/tests/series/test_period.py
+++ b/pandas/tests/series/test_period.py
@@ -52,12 +52,6 @@ def test_dropna(self):
s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")])
tm.assert_series_equal(s.dropna(), Series([pd.Period("2011-01", freq="M")]))
- def test_between(self):
- left, right = self.series[[2, 7]]
- result = self.series.between(left, right)
- expected = (self.series >= left) & (self.series <= right)
- tm.assert_series_equal(result, expected)
-
# ---------------------------------------------------------------------
# NaT support
@@ -110,28 +104,6 @@ def test_align_series(self, join_type):
ts.align(ts[::2], join=join_type)
- def test_truncate(self):
- # GH 17717
- idx1 = pd.PeriodIndex(
- [pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
- )
- series1 = pd.Series([1, 2, 3], index=idx1)
- result1 = series1.truncate(after="2017-09-02")
-
- expected_idx1 = pd.PeriodIndex(
- [pd.Period("2017-09-02"), pd.Period("2017-09-02")]
- )
- tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1))
-
- idx2 = pd.PeriodIndex(
- [pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
- )
- series2 = pd.Series([1, 2, 3], index=idx2)
- result2 = series2.sort_index().truncate(after="2017-09-02")
-
- expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")])
- tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2))
-
@pytest.mark.parametrize(
"input_vals",
[
diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py
index 459377fb18f29..8f06ea69f5d66 100644
--- a/pandas/tests/series/test_timeseries.py
+++ b/pandas/tests/series/test_timeseries.py
@@ -1,13 +1,11 @@
-from datetime import datetime, time, timedelta
+from datetime import datetime, timedelta
from io import StringIO
-from itertools import product
import numpy as np
import pytest
from pandas._libs.tslib import iNaT
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
-import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
@@ -23,8 +21,6 @@
)
import pandas._testing as tm
-from pandas.tseries.offsets import BDay, BMonthEnd
-
def _simple_ts(start, end, freq="D"):
rng = date_range(start, end, freq=freq)
@@ -38,44 +34,6 @@ def assert_range_equal(left, right):
class TestTimeSeries:
- def test_asfreq(self):
- ts = Series(
- [0.0, 1.0, 2.0],
- index=[
- datetime(2009, 10, 30),
- datetime(2009, 11, 30),
- datetime(2009, 12, 31),
- ],
- )
-
- daily_ts = ts.asfreq("B")
- monthly_ts = daily_ts.asfreq("BM")
- tm.assert_series_equal(monthly_ts, ts)
-
- daily_ts = ts.asfreq("B", method="pad")
- monthly_ts = daily_ts.asfreq("BM")
- tm.assert_series_equal(monthly_ts, ts)
-
- daily_ts = ts.asfreq(BDay())
- monthly_ts = daily_ts.asfreq(BMonthEnd())
- tm.assert_series_equal(monthly_ts, ts)
-
- result = ts[:0].asfreq("M")
- assert len(result) == 0
- assert result is not ts
-
- daily_ts = ts.asfreq("D", fill_value=-1)
- result = daily_ts.value_counts().sort_index()
- expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index()
- tm.assert_series_equal(result, expected)
-
- def test_asfreq_datetimeindex_empty_series(self):
- # GH 14320
- index = pd.DatetimeIndex(["2016-09-29 11:00"])
- expected = Series(index=index, dtype=object).asfreq("H")
- result = Series([3], index=index.copy()).asfreq("H")
- tm.assert_index_equal(expected.index, result.index)
-
def test_autocorr(self, datetime_series):
# Just run the function
corr1 = datetime_series.autocorr()
@@ -268,15 +226,6 @@ def test_series_repr_nat(self):
)
assert result == expected
- def test_asfreq_keep_index_name(self):
- # GH #9854
- index_name = "bar"
- index = pd.date_range("20130101", periods=20, name=index_name)
- df = pd.DataFrame(list(range(20)), columns=["foo"], index=index)
-
- assert index_name == df.index.name
- assert index_name == df.asfreq("10D").index.name
-
def test_promote_datetime_date(self):
rng = date_range("1/1/2000", periods=20)
ts = Series(np.random.randn(20), index=rng)
@@ -300,26 +249,6 @@ def test_promote_datetime_date(self):
expected = rng.get_indexer(ts_slice.index)
tm.assert_numpy_array_equal(result, expected)
- def test_asfreq_normalize(self):
- rng = date_range("1/1/2000 09:30", periods=20)
- norm = date_range("1/1/2000", periods=20)
- vals = np.random.randn(20)
- ts = Series(vals, index=rng)
-
- result = ts.asfreq("D", normalize=True)
- norm = date_range("1/1/2000", periods=20)
- expected = Series(vals, index=norm)
-
- tm.assert_series_equal(result, expected)
-
- vals = np.random.randn(20, 3)
- ts = DataFrame(vals, index=rng)
-
- result = ts.asfreq("D", normalize=True)
- expected = DataFrame(vals, index=norm)
-
- tm.assert_frame_equal(result, expected)
-
def test_first_subset(self):
ts = _simple_ts("1/1/2000", "1/1/2010", freq="12h")
result = ts.first("10d")
@@ -380,180 +309,6 @@ def test_format_pre_1900_dates(self):
ts = Series(1, index=rng)
repr(ts)
- def test_at_time(self):
- rng = date_range("1/1/2000", "1/5/2000", freq="5min")
- ts = Series(np.random.randn(len(rng)), index=rng)
- rs = ts.at_time(rng[1])
- assert (rs.index.hour == rng[1].hour).all()
- assert (rs.index.minute == rng[1].minute).all()
- assert (rs.index.second == rng[1].second).all()
-
- result = ts.at_time("9:30")
- expected = ts.at_time(time(9, 30))
- tm.assert_series_equal(result, expected)
-
- df = DataFrame(np.random.randn(len(rng), 3), index=rng)
-
- result = ts[time(9, 30)]
- result_df = df.loc[time(9, 30)]
- expected = ts[(rng.hour == 9) & (rng.minute == 30)]
- exp_df = df[(rng.hour == 9) & (rng.minute == 30)]
-
- # FIXME: dont leave commented-out
- # expected.index = date_range('1/1/2000', '1/4/2000')
-
- tm.assert_series_equal(result, expected)
- tm.assert_frame_equal(result_df, exp_df)
-
- chunk = df.loc["1/4/2000":]
- result = chunk.loc[time(9, 30)]
- expected = result_df[-1:]
- tm.assert_frame_equal(result, expected)
-
- # midnight, everything
- rng = date_range("1/1/2000", "1/31/2000")
- ts = Series(np.random.randn(len(rng)), index=rng)
-
- result = ts.at_time(time(0, 0))
- tm.assert_series_equal(result, ts)
-
- # time doesn't exist
- rng = date_range("1/1/2012", freq="23Min", periods=384)
- ts = Series(np.random.randn(len(rng)), rng)
- rs = ts.at_time("16:00")
- assert len(rs) == 0
-
- def test_at_time_raises(self):
- # GH20725
- ser = pd.Series("a b c".split())
- msg = "Index must be DatetimeIndex"
- with pytest.raises(TypeError, match=msg):
- ser.at_time("00:00")
-
- def test_between(self):
- series = Series(date_range("1/1/2000", periods=10))
- left, right = series[[2, 7]]
-
- result = series.between(left, right)
- expected = (series >= left) & (series <= right)
- tm.assert_series_equal(result, expected)
-
- def test_between_time(self):
- rng = date_range("1/1/2000", "1/5/2000", freq="5min")
- ts = Series(np.random.randn(len(rng)), index=rng)
- stime = time(0, 0)
- etime = time(1, 0)
-
- close_open = product([True, False], [True, False])
- for inc_start, inc_end in close_open:
- filtered = ts.between_time(stime, etime, inc_start, inc_end)
- exp_len = 13 * 4 + 1
- if not inc_start:
- exp_len -= 5
- if not inc_end:
- exp_len -= 4
-
- assert len(filtered) == exp_len
- for rs in filtered.index:
- t = rs.time()
- if inc_start:
- assert t >= stime
- else:
- assert t > stime
-
- if inc_end:
- assert t <= etime
- else:
- assert t < etime
-
- result = ts.between_time("00:00", "01:00")
- expected = ts.between_time(stime, etime)
- tm.assert_series_equal(result, expected)
-
- # across midnight
- rng = date_range("1/1/2000", "1/5/2000", freq="5min")
- ts = Series(np.random.randn(len(rng)), index=rng)
- stime = time(22, 0)
- etime = time(9, 0)
-
- close_open = product([True, False], [True, False])
- for inc_start, inc_end in close_open:
- filtered = ts.between_time(stime, etime, inc_start, inc_end)
- exp_len = (12 * 11 + 1) * 4 + 1
- if not inc_start:
- exp_len -= 4
- if not inc_end:
- exp_len -= 4
-
- assert len(filtered) == exp_len
- for rs in filtered.index:
- t = rs.time()
- if inc_start:
- assert (t >= stime) or (t <= etime)
- else:
- assert (t > stime) or (t <= etime)
-
- if inc_end:
- assert (t <= etime) or (t >= stime)
- else:
- assert (t < etime) or (t >= stime)
-
- def test_between_time_raises(self):
- # GH20725
- ser = pd.Series("a b c".split())
- msg = "Index must be DatetimeIndex"
- with pytest.raises(TypeError, match=msg):
- ser.between_time(start_time="00:00", end_time="12:00")
-
- def test_between_time_types(self):
- # GH11818
- rng = date_range("1/1/2000", "1/5/2000", freq="5min")
- msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time"
- with pytest.raises(ValueError, match=msg):
- rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
-
- frame = DataFrame({"A": 0}, index=rng)
- with pytest.raises(ValueError, match=msg):
- frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
-
- series = Series(0, index=rng)
- with pytest.raises(ValueError, match=msg):
- series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
-
- @td.skip_if_has_locale
- def test_between_time_formats(self):
- # GH11818
- rng = date_range("1/1/2000", "1/5/2000", freq="5min")
- ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
-
- strings = [
- ("2:00", "2:30"),
- ("0200", "0230"),
- ("2:00am", "2:30am"),
- ("0200am", "0230am"),
- ("2:00:00", "2:30:00"),
- ("020000", "023000"),
- ("2:00:00am", "2:30:00am"),
- ("020000am", "023000am"),
- ]
- expected_length = 28
-
- for time_string in strings:
- assert len(ts.between_time(*time_string)) == expected_length
-
- def test_between_time_axis(self):
- # issue 8839
- rng = date_range("1/1/2000", periods=100, freq="10min")
- ts = Series(np.random.randn(len(rng)), index=rng)
- stime, etime = ("08:00:00", "09:00:00")
- expected_length = 7
-
- assert len(ts.between_time(stime, etime)) == expected_length
- assert len(ts.between_time(stime, etime, axis=0)) == expected_length
- msg = "No axis named 1 for object type "
- with pytest.raises(ValueError, match=msg):
- ts.between_time(stime, etime, axis=1)
-
def test_to_period(self):
from pandas.core.indexes.period import period_range
diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py
index a363f927d10a9..e729ff91293a8 100644
--- a/pandas/tests/series/test_timezones.py
+++ b/pandas/tests/series/test_timezones.py
@@ -10,207 +10,12 @@
from pandas._libs.tslibs import conversion, timezones
-from pandas import DatetimeIndex, Index, NaT, Series, Timestamp
+from pandas import Series, Timestamp
import pandas._testing as tm
from pandas.core.indexes.datetimes import date_range
class TestSeriesTimezones:
- # -----------------------------------------------------------------
- # Series.tz_localize
- def test_series_tz_localize(self):
-
- rng = date_range("1/1/2011", periods=100, freq="H")
- ts = Series(1, index=rng)
-
- result = ts.tz_localize("utc")
- assert result.index.tz.zone == "UTC"
-
- # Can't localize if already tz-aware
- rng = date_range("1/1/2011", periods=100, freq="H", tz="utc")
- ts = Series(1, index=rng)
-
- with pytest.raises(TypeError, match="Already tz-aware"):
- ts.tz_localize("US/Eastern")
-
- def test_series_tz_localize_ambiguous_bool(self):
- # make sure that we are correctly accepting bool values as ambiguous
-
- # GH#14402
- ts = Timestamp("2015-11-01 01:00:03")
- expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central")
- expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central")
-
- ser = Series([ts])
- expected0 = Series([expected0])
- expected1 = Series([expected1])
-
- with pytest.raises(pytz.AmbiguousTimeError):
- ser.dt.tz_localize("US/Central")
-
- result = ser.dt.tz_localize("US/Central", ambiguous=True)
- tm.assert_series_equal(result, expected0)
-
- result = ser.dt.tz_localize("US/Central", ambiguous=[True])
- tm.assert_series_equal(result, expected0)
-
- result = ser.dt.tz_localize("US/Central", ambiguous=False)
- tm.assert_series_equal(result, expected1)
-
- result = ser.dt.tz_localize("US/Central", ambiguous=[False])
- tm.assert_series_equal(result, expected1)
-
- @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"])
- @pytest.mark.parametrize(
- "method, exp",
- [
- ["shift_forward", "2015-03-29 03:00:00"],
- ["NaT", NaT],
- ["raise", None],
- ["foo", "invalid"],
- ],
- )
- def test_series_tz_localize_nonexistent(self, tz, method, exp):
- # GH 8917
- n = 60
- dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min")
- s = Series(1, dti)
- if method == "raise":
- with pytest.raises(pytz.NonExistentTimeError):
- s.tz_localize(tz, nonexistent=method)
- elif exp == "invalid":
- with pytest.raises(ValueError):
- dti.tz_localize(tz, nonexistent=method)
- else:
- result = s.tz_localize(tz, nonexistent=method)
- expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz))
- tm.assert_series_equal(result, expected)
-
- @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
- def test_series_tz_localize_empty(self, tzstr):
- # GH#2248
- ser = Series(dtype=object)
-
- ser2 = ser.tz_localize("utc")
- assert ser2.index.tz == pytz.utc
-
- ser2 = ser.tz_localize(tzstr)
- timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr))
-
- # -----------------------------------------------------------------
- # Series.tz_convert
-
- def test_series_tz_convert(self):
- rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern")
- ts = Series(1, index=rng)
-
- result = ts.tz_convert("Europe/Berlin")
- assert result.index.tz.zone == "Europe/Berlin"
-
- # can't convert tz-naive
- rng = date_range("1/1/2011", periods=200, freq="D")
- ts = Series(1, index=rng)
-
- with pytest.raises(TypeError, match="Cannot convert tz-naive"):
- ts.tz_convert("US/Eastern")
-
- def test_series_tz_convert_to_utc(self):
- base = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC")
- idx1 = base.tz_convert("Asia/Tokyo")[:2]
- idx2 = base.tz_convert("US/Eastern")[1:]
-
- res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2)
- tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base))
-
- # -----------------------------------------------------------------
- # Series.append
-
- def test_series_append_aware(self):
- rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern")
- rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern")
- ser1 = Series([1], index=rng1)
- ser2 = Series([2], index=rng2)
- ts_result = ser1.append(ser2)
-
- exp_index = DatetimeIndex(
- ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern"
- )
- exp = Series([1, 2], index=exp_index)
- tm.assert_series_equal(ts_result, exp)
- assert ts_result.index.tz == rng1.tz
-
- rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC")
- rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC")
- ser1 = Series([1], index=rng1)
- ser2 = Series([2], index=rng2)
- ts_result = ser1.append(ser2)
-
- exp_index = DatetimeIndex(["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC")
- exp = Series([1, 2], index=exp_index)
- tm.assert_series_equal(ts_result, exp)
- utc = rng1.tz
- assert utc == ts_result.index.tz
-
- # GH#7795
- # different tz coerces to object dtype, not UTC
- rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern")
- rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central")
- ser1 = Series([1], index=rng1)
- ser2 = Series([2], index=rng2)
- ts_result = ser1.append(ser2)
- exp_index = Index(
- [
- Timestamp("1/1/2011 01:00", tz="US/Eastern"),
- Timestamp("1/1/2011 02:00", tz="US/Central"),
- ]
- )
- exp = Series([1, 2], index=exp_index)
- tm.assert_series_equal(ts_result, exp)
-
- def test_series_append_aware_naive(self):
- rng1 = date_range("1/1/2011 01:00", periods=1, freq="H")
- rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern")
- ser1 = Series(np.random.randn(len(rng1)), index=rng1)
- ser2 = Series(np.random.randn(len(rng2)), index=rng2)
- ts_result = ser1.append(ser2)
-
- expected = ser1.index.astype(object).append(ser2.index.astype(object))
- assert ts_result.index.equals(expected)
-
- # mixed
- rng1 = date_range("1/1/2011 01:00", periods=1, freq="H")
- rng2 = range(100)
- ser1 = Series(np.random.randn(len(rng1)), index=rng1)
- ser2 = Series(np.random.randn(len(rng2)), index=rng2)
- ts_result = ser1.append(ser2)
-
- expected = ser1.index.astype(object).append(ser2.index)
- assert ts_result.index.equals(expected)
-
- def test_series_append_dst(self):
- rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern")
- rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern")
- ser1 = Series([1, 2, 3], index=rng1)
- ser2 = Series([10, 11, 12], index=rng2)
- ts_result = ser1.append(ser2)
-
- exp_index = DatetimeIndex(
- [
- "2016-01-01 01:00",
- "2016-01-01 02:00",
- "2016-01-01 03:00",
- "2016-08-01 01:00",
- "2016-08-01 02:00",
- "2016-08-01 03:00",
- ],
- tz="US/Eastern",
- )
- exp = Series([1, 2, 3, 10, 11, 12], index=exp_index)
- tm.assert_series_equal(ts_result, exp)
- assert ts_result.index.tz == rng1.tz
-
- # -----------------------------------------------------------------
-
def test_dateutil_tzoffset_support(self):
values = [188.5, 328.25]
tzinfo = tzoffset(None, 7200)
@@ -225,15 +30,6 @@ def test_dateutil_tzoffset_support(self):
# it works! #2443
repr(series.index[0])
- @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
- def test_tz_aware_asfreq(self, tz):
- dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz)
-
- ser = Series(np.random.randn(len(dr)), index=dr)
-
- # it works!
- ser.asfreq("T")
-
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
def test_string_index_alias_tz_aware(self, tz):
rng = date_range("1/1/2000", periods=10, tz=tz)
@@ -299,28 +95,6 @@ def test_series_align_aware(self):
assert new1.index.tz == pytz.UTC
assert new2.index.tz == pytz.UTC
- @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
- def test_localized_at_time_between_time(self, tzstr):
- from datetime import time
-
- tz = timezones.maybe_get_tz(tzstr)
-
- rng = date_range("4/16/2012", "5/1/2012", freq="H")
- ts = Series(np.random.randn(len(rng)), index=rng)
-
- ts_local = ts.tz_localize(tzstr)
-
- result = ts_local.at_time(time(10, 0))
- expected = ts.at_time(time(10, 0)).tz_localize(tzstr)
- tm.assert_series_equal(result, expected)
- assert timezones.tz_compare(result.index.tz, tz)
-
- t1, t2 = time(10, 0), time(11, 0)
- result = ts_local.between_time(t1, t2)
- expected = ts.between_time(t1, t2).tz_localize(tzstr)
- tm.assert_series_equal(result, expected)
- assert timezones.tz_compare(result.index.tz, tz)
-
@pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"])
def test_getitem_pydatetime_tz(self, tzstr):
tz = timezones.maybe_get_tz(tzstr)
@@ -335,14 +109,6 @@ def test_getitem_pydatetime_tz(self, tzstr):
time_datetime = conversion.localize_pydatetime(dt, tz)
assert ts[time_pandas] == ts[time_datetime]
- def test_series_truncate_datetimeindex_tz(self):
- # GH 9243
- idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific")
- s = Series(range(len(idx)), index=idx)
- result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4))
- expected = Series([1, 2, 3], index=idx[1:4])
- tm.assert_series_equal(result, expected)
-
@pytest.mark.parametrize("copy", [True, False])
@pytest.mark.parametrize(
"method, tz", [["tz_localize", None], ["tz_convert", "Europe/Berlin"]]
diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py
index d72c00ceb0045..515d798fe4322 100644
--- a/pandas/tests/test_errors.py
+++ b/pandas/tests/test_errors.py
@@ -17,6 +17,7 @@
"EmptyDataError",
"ParserWarning",
"MergeError",
+ "OptionError",
],
)
def test_exception_importable(exc):
diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py
index 0d2c81c4ea6c7..e36ea662fac8b 100644
--- a/pandas/tests/util/test_show_versions.py
+++ b/pandas/tests/util/test_show_versions.py
@@ -1,8 +1,26 @@
import re
+import pytest
+
import pandas as pd
+@pytest.mark.filterwarnings(
+ # openpyxl
+ "ignore:defusedxml.lxml is no longer supported:DeprecationWarning"
+)
+@pytest.mark.filterwarnings(
+ # html5lib
+ "ignore:Using or importing the ABCs from:DeprecationWarning"
+)
+@pytest.mark.filterwarnings(
+ # fastparquet
+ "ignore:pandas.core.index is deprecated:FutureWarning"
+)
+@pytest.mark.filterwarnings(
+ # pandas_datareader
+ "ignore:pandas.util.testing is deprecated:FutureWarning"
+)
def test_show_versions(capsys):
# gh-32041
pd.show_versions()
diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py
index 99b2b9e9f5f6e..f9502cc22b0c6 100644
--- a/pandas/util/_print_versions.py
+++ b/pandas/util/_print_versions.py
@@ -4,13 +4,23 @@
import os
import platform
import struct
-import subprocess
import sys
from typing import List, Optional, Tuple, Union
from pandas.compat._optional import VERSIONS, _get_version, import_optional_dependency
+def _get_commit_hash() -> Optional[str]:
+ """
+ Use vendored versioneer code to get git hash, which handles
+ git worktree correctly.
+ """
+ from pandas._version import get_versions
+
+ versions = get_versions()
+ return versions["full-revisionid"]
+
+
def get_sys_info() -> List[Tuple[str, Optional[Union[str, int]]]]:
"""
Returns system information as a list
@@ -18,20 +28,7 @@ def get_sys_info() -> List[Tuple[str, Optional[Union[str, int]]]]:
blob: List[Tuple[str, Optional[Union[str, int]]]] = []
# get full commit hash
- commit = None
- if os.path.isdir(".git") and os.path.isdir("pandas"):
- try:
- pipe = subprocess.Popen(
- 'git log --format="%H" -n 1'.split(" "),
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- )
- so, serr = pipe.communicate()
- except (OSError, ValueError):
- pass
- else:
- if pipe.returncode == 0:
- commit = so.decode("utf-8").strip().strip('"')
+ commit = _get_commit_hash()
blob.append(("commit", commit))