diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 0f3b3838de1b2..1768e682b3db4 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -31,83 +31,62 @@ def time_maybe_convert_objects(self): class Factorize: - params = [[True, False], ["int", "uint", "float", "string"]] - param_names = ["sort", "dtype"] - - def setup(self, sort, dtype): - N = 10 ** 5 - data = { - "int": pd.Int64Index(np.arange(N).repeat(5)), - "uint": pd.UInt64Index(np.arange(N).repeat(5)), - "float": pd.Float64Index(np.random.randn(N).repeat(5)), - "string": tm.makeStringIndex(N).repeat(5), - } - self.idx = data[dtype] - - def time_factorize(self, sort, dtype): - self.idx.factorize(sort=sort) - - -class FactorizeUnique: - - params = [[True, False], ["int", "uint", "float", "string"]] - param_names = ["sort", "dtype"] + params = [ + [True, False], + [True, False], + ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"], + ] + param_names = ["unique", "sort", "dtype"] - def setup(self, sort, dtype): + def setup(self, unique, sort, dtype): N = 10 ** 5 data = { "int": pd.Int64Index(np.arange(N)), "uint": pd.UInt64Index(np.arange(N)), - "float": pd.Float64Index(np.arange(N)), + "float": pd.Float64Index(np.random.randn(N)), "string": tm.makeStringIndex(N), - } - self.idx = data[dtype] - assert self.idx.is_unique - - def time_factorize(self, sort, dtype): + "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N), + "datetime64[ns, tz]": pd.date_range( + "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" + ), + }[dtype] + if not unique: + data = data.repeat(5) + self.idx = data + + def time_factorize(self, unique, sort, dtype): self.idx.factorize(sort=sort) class Duplicated: - params = [["first", "last", False], ["int", "uint", "float", "string"]] - param_names = ["keep", "dtype"] - - def setup(self, keep, dtype): - N = 10 ** 5 - data = { - "int": pd.Int64Index(np.arange(N).repeat(5)), - "uint": pd.UInt64Index(np.arange(N).repeat(5)), - "float": pd.Float64Index(np.random.randn(N).repeat(5)), - "string": tm.makeStringIndex(N).repeat(5), - } - self.idx = data[dtype] - # cache is_unique - self.idx.is_unique - - def time_duplicated(self, keep, dtype): - self.idx.duplicated(keep=keep) - - -class DuplicatedUniqueIndex: - - params = ["int", "uint", "float", "string"] - param_names = ["dtype"] + params = [ + [True, False], + ["first", "last", False], + ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"], + ] + param_names = ["unique", "keep", "dtype"] - def setup(self, dtype): + def setup(self, unique, keep, dtype): N = 10 ** 5 data = { "int": pd.Int64Index(np.arange(N)), "uint": pd.UInt64Index(np.arange(N)), "float": pd.Float64Index(np.random.randn(N)), "string": tm.makeStringIndex(N), - } - self.idx = data[dtype] + "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N), + "datetime64[ns, tz]": pd.date_range( + "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" + ), + }[dtype] + if not unique: + data = data.repeat(5) + self.idx = data # cache is_unique self.idx.is_unique - def time_duplicated_unique(self, dtype): - self.idx.duplicated() + def time_duplicated(self, unique, keep, dtype): + self.idx.duplicated(keep=keep) class Hashing: diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 6f43a6fd3fc9b..107b9b9edcd5d 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -258,9 +258,6 @@ def setup(self): def time_get_loc(self): self.index.get_loc(self.category) - def time_shape(self): - self.index.shape - def time_shallow_copy(self): self.index._shallow_copy() diff --git a/asv_bench/benchmarks/index_cached_properties.py b/asv_bench/benchmarks/index_cached_properties.py index 13b33855569c9..16fbc741775e4 100644 --- a/asv_bench/benchmarks/index_cached_properties.py +++ b/asv_bench/benchmarks/index_cached_properties.py @@ -7,6 +7,7 @@ class IndexCache: params = [ [ + "CategoricalIndex", "DatetimeIndex", "Float64Index", "IntervalIndex", @@ -42,6 +43,8 @@ def setup(self, index_type): self.idx = pd.Float64Index(range(N)) elif index_type == "UInt64Index": self.idx = pd.UInt64Index(range(N)) + elif index_type == "CategoricalIndex": + self.idx = pd.CategoricalIndex(range(N), range(N)) else: raise ValueError assert len(self.idx) == N diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index cf51a4d35f805..b242de6a17208 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -55,14 +55,6 @@ def time_datetime_difference_disjoint(self): self.datetime_left.difference(self.datetime_right) -class Datetime: - def setup(self): - self.dr = date_range("20000101", freq="D", periods=10000) - - def time_is_dates_only(self): - self.dr._is_dates_only - - class Range: def setup(self): self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 087fe3916845b..e98d2948e76ea 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -1,3 +1,8 @@ +""" +These benchmarks are for Series and DataFrame indexing methods. For the +lower-level methods directly on Index and subclasses, see index_object.py, +indexing_engine.py, and index_cached.py +""" import warnings import numpy as np diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index b52aa2e55af35..e15d4c66e4fc0 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -85,9 +85,6 @@ def setup(self): def time_get_loc(self): self.index.get_loc(self.period) - def time_shape(self): - self.index.shape - def time_shallow_copy(self): self.index._shallow_copy() diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 208c8f9d14a5e..cfe05c3e257b1 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -73,9 +73,6 @@ def setup(self): def time_get_loc(self): self.index.get_loc(self.timedelta) - def time_shape(self): - self.index.shape - def time_shallow_copy(self): self.index._shallow_copy() diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 2f7ea8b9c0873..6c9f8ee77e5ad 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -57,6 +57,9 @@ def time_to_date(self, index_type): def time_to_pydatetime(self, index_type): self.index.to_pydatetime() + def time_is_dates_only(self, index_type): + self.index._is_dates_only + class TzLocalize: @@ -91,20 +94,6 @@ def time_reest_datetimeindex(self, tz): self.df.reset_index() -class Factorize: - - params = [None, "Asia/Tokyo"] - param_names = "tz" - - def setup(self, tz): - N = 100000 - self.dti = date_range("2011-01-01", freq="H", periods=N, tz=tz) - self.dti = self.dti.repeat(5) - - def time_factorize(self, tz): - self.dti.factorize() - - class InferFreq: params = [None, "D", "B"] diff --git a/ci/code_checks.sh b/ci/code_checks.sh index bb7d8a388e6e2..e2dc543360a62 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -269,7 +269,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then MSG='Doctests generic.py' ; echo $MSG pytest -q --doctest-modules pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard" + -k"-_set_axis_name -_xs -describe -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard" RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests groupby.py' ; echo $MSG diff --git a/ci/setup_env.sh b/ci/setup_env.sh index e5bee09fe2f79..ae39b0dda5d09 100755 --- a/ci/setup_env.sh +++ b/ci/setup_env.sh @@ -50,7 +50,7 @@ echo echo "update conda" conda config --set ssl_verify false conda config --set quiet true --set always_yes true --set changeps1 false -conda install pip # create conda to create a historical artifact for pip & setuptools +conda install pip conda # create conda to create a historical artifact for pip & setuptools conda update -n base conda echo "conda info -a" diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index fb06ee122ae88..b7e53b84f0e02 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -56,6 +56,11 @@ joining paths, replacing file extensions, and checking if files exist are also a Statistics and machine learning ------------------------------- +`pandas-tfrecords `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Easy saving pandas dataframe to tensorflow tfrecords format and reading tfrecords to pandas. + `Statsmodels `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst index 277080006cb3c..c6d9a48fcf8ed 100644 --- a/doc/source/getting_started/basics.rst +++ b/doc/source/getting_started/basics.rst @@ -689,6 +689,17 @@ of a 1D array of values. It can also be used as a function on regular arrays: s.value_counts() pd.value_counts(data) +.. versionadded:: 1.1.0 + +The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns. +By default all columns are used but a subset can be selected using the ``subset`` argument. + +.. ipython:: python + + data = {"a": [1, 2, 3, 4], "b": ["x", "x", "y", "y"]} + frame = pd.DataFrame(data) + frame.value_counts() + Similarly, you can get the most frequently occurring value(s) (the mode) of the values in a Series or DataFrame: .. ipython:: python diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index c7b1cc1c832be..b326bbb5a465e 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -170,6 +170,7 @@ Computations / descriptive stats DataFrame.std DataFrame.var DataFrame.nunique + DataFrame.value_counts Reindexing / selection / label manipulation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a4c991dcc166c..888b7d23aeb35 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -43,7 +43,7 @@ Other enhancements - :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`) - When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`) -- +- `OptionError` is now exposed in `pandas.errors` (:issue:`27553`) - .. --------------------------------------------------------------------------- @@ -55,6 +55,7 @@ Other API changes - :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last`` will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`) +- Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) - @@ -114,6 +115,7 @@ Datetimelike - :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`) - Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) - Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`) +- :class:`Timestamp` raising confusing error message when year, month or day is missing (:issue:`31200`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b8c462abe35f1..9f3b4a8a554b5 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -411,10 +411,25 @@ class Timestamp(_Timestamp): ) elif ts_input is _no_input: - # User passed keyword arguments. - ts_input = datetime(year, month, day, hour or 0, - minute or 0, second or 0, - microsecond or 0) + # GH 31200 + # When year, month or day is not given, we call the datetime + # constructor to make sure we get the same error message + # since Timestamp inherits datetime + datetime_kwargs = { + "hour": hour or 0, + "minute": minute or 0, + "second": second or 0, + "microsecond": microsecond or 0 + } + if year is not None: + datetime_kwargs["year"] = year + if month is not None: + datetime_kwargs["month"] = month + if day is not None: + datetime_kwargs["day"] = day + + ts_input = datetime(**datetime_kwargs) + elif is_integer_object(freq): # User passed positional arguments: # Timestamp(year, month, day[, hour[, minute[, second[, diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 854075eaa8d09..f637e16caa4c6 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -777,8 +777,10 @@ def searchsorted(self, value, side="left", sorter=None): if isinstance(value, str): try: value = self._scalar_from_string(value) - except ValueError: - raise TypeError("searchsorted requires compatible dtype or scalar") + except ValueError as e: + raise TypeError( + "searchsorted requires compatible dtype or scalar" + ) from e elif is_valid_nat_for_dtype(value, self.dtype): value = NaT @@ -1041,7 +1043,7 @@ def _validate_frequency(cls, index, freq, **kwargs): raise ValueError( f"Inferred frequency {inferred} from passed values " f"does not conform to passed frequency {freq.freqstr}" - ) + ) from e # monotonicity/uniqueness properties are called via frequencies.infer_freq, # see GH#23789 diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index b11736248c12a..f5167f470b056 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -725,45 +725,18 @@ def _concat_same_type(cls, to_concat): right = np.concatenate([interval.right for interval in to_concat]) return cls._simple_new(left, right, closed=closed, copy=False) - def _shallow_copy(self, left=None, right=None, closed=None): + def _shallow_copy(self, left, right): """ Return a new IntervalArray with the replacement attributes Parameters ---------- - left : array-like + left : Index Values to be used for the left-side of the intervals. - If None, the existing left and right values will be used. - - right : array-like + right : Index Values to be used for the right-side of the intervals. - If None and left is IntervalArray-like, the left and right - of the IntervalArray-like will be used. - - closed : {'left', 'right', 'both', 'neither'}, optional - Whether the intervals are closed on the left-side, right-side, both - or neither. If None, the existing closed will be used. """ - if left is None: - - # no values passed - left, right = self.left, self.right - - elif right is None: - - # only single value passed, could be an IntervalArray - # or array of Intervals - if not isinstance(left, (type(self), ABCIntervalIndex)): - left = type(self)(left) - - left, right = left.left, left.right - else: - - # both left and right are values - pass - - closed = closed or self.closed - return self._simple_new(left, right, closed=closed, verify_integrity=False) + return self._simple_new(left, right, closed=self.closed, verify_integrity=False) def copy(self): """ @@ -1035,7 +1008,9 @@ def set_closed(self, closed): msg = f"invalid option for 'closed': {closed}" raise ValueError(msg) - return self._shallow_copy(closed=closed) + return type(self)._simple_new( + left=self.left, right=self.right, closed=closed, verify_integrity=False + ) @property def length(self): diff --git a/pandas/core/base.py b/pandas/core/base.py index 56d3596f71813..85424e35fa0e0 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1196,6 +1196,7 @@ def value_counts( -------- Series.count: Number of non-NA elements in a Series. DataFrame.count: Number of non-NA elements in a DataFrame. + DataFrame.value_counts: Equivalent method on DataFrames. Examples -------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7efb4fbb878d6..b6b6a4fe74ed5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -111,7 +111,7 @@ from pandas.core.indexes import base as ibase from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences from pandas.core.indexes.datetimes import DatetimeIndex -from pandas.core.indexes.multi import maybe_droplevels +from pandas.core.indexes.multi import MultiIndex, maybe_droplevels from pandas.core.indexes.period import PeriodIndex from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable from pandas.core.internals import BlockManager @@ -4569,6 +4569,10 @@ def drop_duplicates( ------- DataFrame DataFrame with duplicates removed or None if ``inplace=True``. + + See Also + -------- + DataFrame.value_counts: Count unique combinations of columns. """ if self.empty: return self.copy() @@ -4814,6 +4818,102 @@ def sort_index( else: return self._constructor(new_data).__finalize__(self) + def value_counts( + self, + subset: Optional[Sequence[Label]] = None, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + ): + """ + Return a Series containing counts of unique rows in the DataFrame. + + .. versionadded:: 1.1.0 + + Parameters + ---------- + subset : list-like, optional + Columns to use when counting unique combinations. + normalize : bool, default False + Return proportions rather than frequencies. + sort : bool, default True + Sort by frequencies. + ascending : bool, default False + Sort in ascending order. + + Returns + ------- + Series + + See Also + -------- + Series.value_counts: Equivalent method on Series. + + Notes + ----- + The returned Series will have a MultiIndex with one level per input + column. By default, rows that contain any NA values are omitted from + the result. By default, the resulting Series will be in descending + order so that the first element is the most frequently-occurring row. + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [2, 4, 4, 6], + ... 'num_wings': [2, 0, 0, 0]}, + ... index=['falcon', 'dog', 'cat', 'ant']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + cat 4 0 + ant 6 0 + + >>> df.value_counts() + num_legs num_wings + 4 0 2 + 6 0 1 + 2 2 1 + dtype: int64 + + >>> df.value_counts(sort=False) + num_legs num_wings + 2 2 1 + 4 0 2 + 6 0 1 + dtype: int64 + + >>> df.value_counts(ascending=True) + num_legs num_wings + 2 2 1 + 6 0 1 + 4 0 2 + dtype: int64 + + >>> df.value_counts(normalize=True) + num_legs num_wings + 4 0 0.50 + 6 0 0.25 + 2 2 0.25 + dtype: float64 + """ + if subset is None: + subset = self.columns.tolist() + + counts = self.groupby(subset).size() + + if sort: + counts = counts.sort_values(ascending=ascending) + if normalize: + counts /= counts.sum() + + # Force MultiIndex for single column + if len(subset) == 1: + counts.index = MultiIndex.from_arrays( + [counts.index], names=[counts.index.name] + ) + + return counts + def nlargest(self, n, columns, keep="first") -> "DataFrame": """ Return the first `n` rows ordered by `columns` in descending order. @@ -8346,9 +8446,8 @@ def isin(self, values) -> "DataFrame": def _from_nested_dict(data): # TODO: this should be seriously cythonized - new_data = {} + new_data = collections.defaultdict(dict) for index, s in data.items(): for col, v in s.items(): - new_data[col] = new_data.get(col, {}) new_data[col][index] = v return new_data diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a6ab0d4034ddb..ff7c481d550d4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -602,6 +602,10 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: of levels. axis : {0 or 'index', 1 or 'columns'}, default 0 + Axis along which the level(s) is removed: + + * 0 or 'index': remove level(s) in column. + * 1 or 'columns': remove level(s) in row. Returns ------- @@ -617,7 +621,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: ... ]).set_index([0, 1]).rename_axis(['a', 'b']) >>> df.columns = pd.MultiIndex.from_tuples([ - ... ('c', 'e'), ('d', 'f') + ... ('c', 'e'), ('d', 'f') ... ], names=['level_1', 'level_2']) >>> df @@ -636,7 +640,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: 6 7 8 10 11 12 - >>> df.droplevel('level2', axis=1) + >>> df.droplevel('level_2', axis=1) level_1 c d a b 1 2 3 4 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index aa22527d8c2d7..67f2f05c8af1e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -8,7 +8,7 @@ from pandas._libs import algos as libalgos, index as libindex, lib import pandas._libs.join as libjoin -from pandas._libs.lib import is_datetime_array +from pandas._libs.lib import is_datetime_array, no_default from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp from pandas._libs.tslibs.period import IncompatibleFrequency from pandas._libs.tslibs.timezones import tz_compare @@ -485,7 +485,7 @@ def _get_attributes_dict(self): """ return {k: getattr(self, k, None) for k in self._attributes} - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = no_default): """ Create a new Index with the same class as the caller, don't copy the data, use the same object attributes with passed in attributes taking @@ -496,16 +496,14 @@ def _shallow_copy(self, values=None, **kwargs): Parameters ---------- values : the values to create the new Index, optional - kwargs : updates the default attributes for this Index + name : Label, defaults to self.name """ + name = self.name if name is no_default else name + if values is None: values = self.values - attributes = self._get_attributes_dict() - - attributes.update(kwargs) - - return self._simple_new(values, **attributes) + return self._simple_new(values, name=name) def _shallow_copy_with_infer(self, values, **kwargs): """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index caa6a9a93141f..67bed7bd77c7f 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -7,6 +7,8 @@ from pandas._libs import index as libindex from pandas._libs.hashtable import duplicated_int64 +from pandas._libs.lib import no_default +from pandas._typing import Label from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( @@ -17,7 +19,6 @@ is_scalar, ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.generic import ABCCategorical, ABCSeries from pandas.core.dtypes.missing import isna from pandas.core import accessor @@ -193,7 +194,9 @@ def __new__( raise cls._scalar_data_error(data) data = [] - data = cls._create_categorical(data, dtype=dtype) + assert isinstance(dtype, CategoricalDtype), dtype + if not isinstance(data, Categorical) or data.dtype != dtype: + data = Categorical(data, dtype=dtype) data = data.copy() if copy else data @@ -223,37 +226,11 @@ def _create_from_codes(self, codes, dtype=None, name=None): return CategoricalIndex(cat, name=name) @classmethod - def _create_categorical(cls, data, dtype=None): - """ - *this is an internal non-public method* - - create the correct categorical from data and the properties - - Parameters - ---------- - data : data for new Categorical - dtype : CategoricalDtype, defaults to existing - - Returns - ------- - Categorical - """ - if isinstance(data, (cls, ABCSeries)) and is_categorical_dtype(data): - data = data.values - - if not isinstance(data, ABCCategorical): - return Categorical(data, dtype=dtype) - - if isinstance(dtype, CategoricalDtype) and dtype != data.dtype: - # we want to silently ignore dtype='category' - data = data._set_dtype(dtype) - return data - - @classmethod - def _simple_new(cls, values, name=None, dtype=None): + def _simple_new(cls, values: Categorical, name=None, dtype=None): + # GH#32204 dtype is included for compat with Index._simple_new + assert isinstance(values, Categorical), type(values) result = object.__new__(cls) - values = cls._create_categorical(values, dtype=dtype) result._data = values result.name = name @@ -264,13 +241,14 @@ def _simple_new(cls, values, name=None, dtype=None): # -------------------------------------------------------------------- @Appender(Index._shallow_copy.__doc__) - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = no_default): + name = self.name if name is no_default else name + if values is None: values = self.values cat = Categorical(values, dtype=self.dtype) - name = kwargs.get("name", self.name) return type(self)._simple_new(cat, name=name) def _is_dtype_compat(self, other) -> bool: @@ -295,7 +273,8 @@ def _is_dtype_compat(self, other) -> bool: values = other if not is_list_like(values): values = [values] - other = CategoricalIndex(self._create_categorical(other, dtype=self.dtype)) + cat = Categorical(other, dtype=self.dtype) + other = CategoricalIndex(cat) if not other.isin(values).all(): raise TypeError( "cannot append a non-category item to a CategoricalIndex" diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1b3b6934aa53a..349b582de4358 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -8,6 +8,7 @@ from pandas._libs import NaT, iNaT, join as libjoin, lib from pandas._libs.tslibs import timezones +from pandas._typing import Label from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, cache_readonly @@ -551,14 +552,6 @@ def _summary(self, name=None) -> str: result = result.replace("'", "") return result - def _concat_same_dtype(self, to_concat, name): - """ - Concatenate to_concat which has the same class. - """ - new_data = type(self._data)._concat_same_type(to_concat) - - return self._simple_new(new_data, name=name) - def shift(self, periods=1, freq=None): """ Shift index by desired number of time frequency increments. @@ -649,7 +642,9 @@ def _set_freq(self, freq): self._data._freq = freq - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = lib.no_default): + name = self.name if name is lib.no_default else name + if values is None: values = self._data @@ -657,18 +652,16 @@ def _shallow_copy(self, values=None, **kwargs): values = values._data if isinstance(values, np.ndarray): # TODO: We would rather not get here - if kwargs.get("freq") is not None: - raise ValueError(kwargs) values = type(self._data)(values, dtype=self.dtype) attributes = self._get_attributes_dict() - if "freq" not in kwargs and self.freq is not None: + if self.freq is not None: if isinstance(values, (DatetimeArray, TimedeltaArray)): if values.freq is None: del attributes["freq"] - attributes.update(kwargs) + attributes["name"] = name return type(self)._simple_new(values, **attributes) # -------------------------------------------------------------------- @@ -738,9 +731,7 @@ def intersection(self, other, sort=False): # this point, depending on the values. result._set_freq(None) - result = self._shallow_copy( - result._data, name=result.name, dtype=result.dtype, freq=None - ) + result = self._shallow_copy(result._data, name=result.name) if result.freq is None: result._set_freq("infer") return result diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 6ea4250e4acf4..b3923a1298859 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -333,11 +333,12 @@ def from_tuples( # -------------------------------------------------------------------- @Appender(Index._shallow_copy.__doc__) - def _shallow_copy(self, left=None, right=None, **kwargs): - result = self._data._shallow_copy(left=left, right=right) + def _shallow_copy(self, values=None, **kwargs): + if values is None: + values = self._data attributes = self._get_attributes_dict() attributes.update(kwargs) - return self._simple_new(result, **attributes) + return self._simple_new(values, **attributes) @cache_readonly def _isnan(self): @@ -407,7 +408,7 @@ def astype(self, dtype, copy=True): with rewrite_exception("IntervalArray", type(self).__name__): new_values = self.values.astype(dtype, copy=copy) if is_interval_dtype(new_values): - return self._shallow_copy(new_values.left, new_values.right) + return self._shallow_copy(new_values) return Index.astype(self, dtype, copy=copy) @property @@ -881,7 +882,8 @@ def where(self, cond, other=None): if other is None: other = self._na_value values = np.where(cond, self.values, other) - return self._shallow_copy(values) + result = IntervalArray(values) + return self._shallow_copy(result) def delete(self, loc): """ @@ -893,7 +895,8 @@ def delete(self, loc): """ new_left = self.left.delete(loc) new_right = self.right.delete(loc) - return self._shallow_copy(new_left, new_right) + result = self._data._shallow_copy(new_left, new_right) + return self._shallow_copy(result) def insert(self, loc, item): """ @@ -927,7 +930,8 @@ def insert(self, loc, item): new_left = self.left.insert(loc, left_insert) new_right = self.right.insert(loc, right_insert) - return self._shallow_copy(new_left, new_right) + result = self._data._shallow_copy(new_left, new_right) + return self._shallow_copy(result) @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 367870f0ee467..06a26cc90555e 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -3,7 +3,7 @@ import numpy as np from pandas._libs import index as libindex, lib -from pandas._typing import Dtype +from pandas._typing import Dtype, Label from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.cast import astype_nansafe @@ -103,7 +103,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): return self._maybe_cast_indexer(label) @Appender(Index._shallow_copy.__doc__) - def _shallow_copy(self, values=None, name=lib.no_default): + def _shallow_copy(self, values=None, name: Label = lib.no_default): name = name if name is not lib.no_default else self.name if values is not None and not self._can_hold_na and values.dtype.kind == "f": diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0b85433b699a8..c7c11c60185b3 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -5,9 +5,11 @@ import numpy as np from pandas._libs import index as libindex +from pandas._libs.lib import no_default from pandas._libs.tslibs import frequencies as libfrequencies, resolution from pandas._libs.tslibs.parsing import parse_time_string from pandas._libs.tslibs.period import Period +from pandas._typing import Label from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( @@ -248,8 +250,10 @@ def _has_complex_internals(self): # used to avoid libreduction code paths, which raise or require conversion return True - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = no_default): # TODO: simplify, figure out type of values + name = name if name is not no_default else self.name + if values is None: values = self._data @@ -263,18 +267,7 @@ def _shallow_copy(self, values=None, **kwargs): # GH#30713 this should never be reached raise TypeError(type(values), getattr(values, "dtype", None)) - # We don't allow changing `freq` in _shallow_copy. - validate_dtype_freq(self.dtype, kwargs.get("freq")) - attributes = self._get_attributes_dict() - - attributes.update(kwargs) - if not len(values) and "dtype" not in kwargs: - attributes["dtype"] = self.dtype - return self._simple_new(values, **attributes) - - def _shallow_copy_with_infer(self, values=None, **kwargs): - """ we always want to return a PeriodIndex """ - return self._shallow_copy(values=values, **kwargs) + return self._simple_new(values, name=name) def _maybe_convert_timedelta(self, other): """ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index d6752da6bc58f..fa8551bc646a6 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -7,6 +7,8 @@ import numpy as np from pandas._libs import index as libindex +from pandas._libs.lib import no_default +from pandas._typing import Label import pandas.compat as compat from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, cache_readonly @@ -385,13 +387,13 @@ def tolist(self): return list(self._range) @Appender(Int64Index._shallow_copy.__doc__) - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = no_default): + name = self.name if name is no_default else name + if values is None: - name = kwargs.get("name", self.name) return self._simple_new(self._range, name=name) else: - kwargs.setdefault("name", self.name) - return self._int64index._shallow_copy(values, **kwargs) + return Int64Index._simple_new(values, name=name) @Appender(Int64Index.copy.__doc__) def copy(self, name=None, deep=False, dtype=None, **kwargs): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d4f9c15a9f73f..329bfdf543c62 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -141,7 +141,7 @@ def __init__( if do_integrity_check: self._verify_integrity() - self._consolidate_check() + self._known_consolidated = False self._rebuild_blknos_and_blklocs() @@ -726,7 +726,6 @@ def get_slice(self, slobj: slice, axis: int = 0): new_axes[axis] = new_axes[axis][slobj] bm = type(self)(new_blocks, new_axes, do_integrity_check=False) - bm._consolidate_inplace() return bm def __contains__(self, item) -> bool: diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index ebe9a3d5bf472..29e69cc5fe509 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -4,6 +4,8 @@ Expose public exceptions & warnings """ +from pandas._config.config import OptionError + from pandas._libs.tslibs import NullFrequencyError, OutOfBoundsDatetime diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index d5537359d6948..c6b4c4904735c 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -353,9 +353,9 @@ def test_constructor_from_index_series_period(self): result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx) - def test_constructor_invariant(self): - # GH 14190 - vals = [ + @pytest.mark.parametrize( + "values", + [ np.array([1.0, 1.2, 1.8, np.nan]), np.array([1, 2, 3], dtype="int64"), ["a", "b", "c", np.nan], @@ -366,11 +366,13 @@ def test_constructor_invariant(self): Timestamp("2014-01-02", tz="US/Eastern"), NaT, ], - ] - for val in vals: - c = Categorical(val) - c2 = Categorical(c) - tm.assert_categorical_equal(c, c2) + ], + ) + def test_constructor_invariant(self, values): + # GH 14190 + c = Categorical(values) + c2 = Categorical(c) + tm.assert_categorical_equal(c, c2) @pytest.mark.parametrize("ordered", [True, False]) def test_constructor_with_dtype(self, ordered): @@ -470,9 +472,14 @@ def test_construction_with_null(self, klass, nulls_fixture): tm.assert_categorical_equal(result, expected) - def test_from_codes(self): + def test_from_codes_empty(self): + cat = ["a", "b", "c"] + result = Categorical.from_codes([], categories=cat) + expected = Categorical([], categories=cat) - # too few categories + tm.assert_categorical_equal(result, expected) + + def test_from_codes_too_few_categories(self): dtype = CategoricalDtype(categories=[1, 2]) msg = "codes need to be between " with pytest.raises(ValueError, match=msg): @@ -480,22 +487,23 @@ def test_from_codes(self): with pytest.raises(ValueError, match=msg): Categorical.from_codes([1, 2], dtype=dtype) - # no int codes + def test_from_codes_non_int_codes(self): + dtype = CategoricalDtype(categories=[1, 2]) msg = "codes need to be array-like integers" with pytest.raises(ValueError, match=msg): Categorical.from_codes(["a"], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes(["a"], dtype=dtype) - # no unique categories + def test_from_codes_non_unique_categories(self): with pytest.raises(ValueError, match="Categorical categories must be unique"): Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) - # NaN categories included + def test_from_codes_nan_cat_included(self): with pytest.raises(ValueError, match="Categorial categories cannot be null"): Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) - # too negative + def test_from_codes_too_negative(self): dtype = CategoricalDtype(categories=["a", "b", "c"]) msg = r"codes need to be between -1 and len\(categories\)-1" with pytest.raises(ValueError, match=msg): @@ -503,6 +511,8 @@ def test_from_codes(self): with pytest.raises(ValueError, match=msg): Categorical.from_codes([-2, 1, 2], dtype=dtype) + def test_from_codes(self): + dtype = CategoricalDtype(categories=["a", "b", "c"]) exp = Categorical(["a", "b", "c"], ordered=False) res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) tm.assert_categorical_equal(exp, res) @@ -510,21 +520,18 @@ def test_from_codes(self): res = Categorical.from_codes([0, 1, 2], dtype=dtype) tm.assert_categorical_equal(exp, res) - def test_from_codes_with_categorical_categories(self): + @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) + def test_from_codes_with_categorical_categories(self, klass): # GH17884 expected = Categorical(["a", "b"], categories=["a", "b", "c"]) - result = Categorical.from_codes([0, 1], categories=Categorical(["a", "b", "c"])) + result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"])) tm.assert_categorical_equal(result, expected) - result = Categorical.from_codes( - [0, 1], categories=CategoricalIndex(["a", "b", "c"]) - ) - tm.assert_categorical_equal(result, expected) - - # non-unique Categorical still raises + @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) + def test_from_codes_with_non_unique_categorical_categories(self, klass): with pytest.raises(ValueError, match="Categorical categories must be unique"): - Categorical.from_codes([0, 1], Categorical(["a", "b", "a"])) + Categorical.from_codes([0, 1], klass(["a", "b", "a"])) def test_from_codes_with_nan_code(self): # GH21767 @@ -535,24 +542,16 @@ def test_from_codes_with_nan_code(self): with pytest.raises(ValueError, match="codes need to be array-like integers"): Categorical.from_codes(codes, dtype=dtype) - def test_from_codes_with_float(self): + @pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]]) + def test_from_codes_with_float(self, codes): # GH21767 - codes = [1.0, 2.0, 0] # integer, but in float dtype + # float codes should raise even if values are equal to integers dtype = CategoricalDtype(categories=["a", "b", "c"]) - # empty codes should not raise for floats - Categorical.from_codes([], dtype.categories) - - with pytest.raises(ValueError, match="codes need to be array-like integers"): - Categorical.from_codes(codes, dtype.categories) - - with pytest.raises(ValueError, match="codes need to be array-like integers"): - Categorical.from_codes(codes, dtype=dtype) - - codes = [1.1, 2.0, 0] # non-integer - with pytest.raises(ValueError, match="codes need to be array-like integers"): + msg = "codes need to be array-like integers" + with pytest.raises(ValueError, match=msg): Categorical.from_codes(codes, dtype.categories) - with pytest.raises(ValueError, match="codes need to be array-like integers"): + with pytest.raises(ValueError, match=msg): Categorical.from_codes(codes, dtype=dtype) def test_from_codes_with_dtype_raises(self): diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 625d559001e72..f85d823cb2fac 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -277,6 +277,12 @@ def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): pytest.skip(f"values of {klass} cannot be changed") elif isinstance(orig, pd.MultiIndex): pytest.skip("MultiIndex doesn't support isna") + elif orig.duplicated().any(): + pytest.xfail( + "The test implementation isn't flexible enough to deal" + " with duplicated values. This isn't a bug in the" + " application code, but in the test code." + ) # special assign to the numpy array if is_datetime64tz_dtype(obj): diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 774eb443c45fe..03598b6bb5eca 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -1,3 +1,5 @@ +from itertools import product + import numpy as np import pytest @@ -5,6 +7,11 @@ import pandas._testing as tm +@pytest.fixture(params=product([True, False], [True, False])) +def close_open_fixture(request): + return request.param + + @pytest.fixture def float_frame_with_na(): """ diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py new file mode 100644 index 0000000000000..40b0ec0c0d811 --- /dev/null +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -0,0 +1,58 @@ +from datetime import datetime + +import numpy as np + +from pandas import DataFrame, DatetimeIndex, Series, date_range +import pandas._testing as tm + +from pandas.tseries import offsets + + +class TestAsFreq: + def test_asfreq(self, datetime_frame): + offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd()) + rule_monthly = datetime_frame.asfreq("BM") + + tm.assert_almost_equal(offset_monthly["A"], rule_monthly["A"]) + + filled = rule_monthly.asfreq("B", method="pad") # noqa + # TODO: actually check that this worked. + + # don't forget! + filled_dep = rule_monthly.asfreq("B", method="pad") # noqa + + # test does not blow up on length-0 DataFrame + zero_length = datetime_frame.reindex([]) + result = zero_length.asfreq("BM") + assert result is not zero_length + + def test_asfreq_datetimeindex(self): + df = DataFrame( + {"A": [1, 2, 3]}, + index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)], + ) + df = df.asfreq("B") + assert isinstance(df.index, DatetimeIndex) + + ts = df["A"].asfreq("B") + assert isinstance(ts.index, DatetimeIndex) + + def test_asfreq_fillvalue(self): + # test for fill value during upsampling, related to issue 3715 + + # setup + rng = date_range("1/1/2016", periods=10, freq="2S") + ts = Series(np.arange(len(rng)), index=rng) + df = DataFrame({"one": ts}) + + # insert pre-existing missing value + df.loc["2016-01-01 00:00:08", "one"] = None + + actual_df = df.asfreq(freq="1S", fill_value=9.0) + expected_df = df.asfreq(freq="1S").fillna(9.0) + expected_df.loc["2016-01-01 00:00:08", "one"] = None + tm.assert_frame_equal(expected_df, actual_df) + + expected_series = ts.asfreq(freq="1S").fillna(9.0) + actual_series = ts.asfreq(freq="1S", fill_value=9.0) + tm.assert_series_equal(expected_series, actual_series) diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py new file mode 100644 index 0000000000000..108bbbfa183c4 --- /dev/null +++ b/pandas/tests/frame/methods/test_at_time.py @@ -0,0 +1,86 @@ +from datetime import time + +import numpy as np +import pytest +import pytz + +from pandas import DataFrame, date_range +import pandas._testing as tm + + +class TestAtTime: + def test_at_time(self): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + rs = ts.at_time(rng[1]) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() + + result = ts.at_time("9:30") + expected = ts.at_time(time(9, 30)) + tm.assert_frame_equal(result, expected) + + result = ts.loc[time(9, 30)] + expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] + + tm.assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range("1/1/2000", "1/31/2000") + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts.at_time(time(0, 0)) + tm.assert_frame_equal(result, ts) + + # time doesn't exist + rng = date_range("1/1/2012", freq="23Min", periods=384) + ts = DataFrame(np.random.randn(len(rng), 2), rng) + rs = ts.at_time("16:00") + assert len(rs) == 0 + + @pytest.mark.parametrize( + "hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)] + ) + def test_at_time_errors(self, hour): + # GH#24043 + dti = date_range("2018", periods=3, freq="H") + df = DataFrame(list(range(len(dti))), index=dti) + if getattr(hour, "tzinfo", None) is None: + result = df.at_time(hour) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(ValueError, match="Index must be timezone"): + df.at_time(hour) + + def test_at_time_tz(self): + # GH#24043 + dti = date_range("2018", periods=3, freq="H", tz="US/Pacific") + df = DataFrame(list(range(len(dti))), index=dti) + result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern"))) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + + def test_at_time_raises(self): + # GH#20725 + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.at_time("00:00") + + @pytest.mark.parametrize("axis", ["index", "columns", 0, 1]) + def test_at_time_axis(self, axis): + # issue 8839 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), len(rng))) + ts.index, ts.columns = rng, rng + + indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)] + + if axis in ["index", 0]: + expected = ts.loc[indices, :] + elif axis in ["columns", 1]: + expected = ts.loc[:, indices] + + result = ts.at_time("9:30", axis=axis) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py new file mode 100644 index 0000000000000..b40604b4f4a16 --- /dev/null +++ b/pandas/tests/frame/methods/test_between_time.py @@ -0,0 +1,110 @@ +from datetime import time + +import numpy as np +import pytest + +from pandas import DataFrame, date_range +import pandas._testing as tm + + +class TestBetweenTime: + def test_between_time(self, close_open_fixture): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(0, 0) + etime = time(1, 0) + inc_start, inc_end = close_open_fixture + + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert t >= stime + else: + assert t > stime + + if inc_end: + assert t <= etime + else: + assert t < etime + + result = ts.between_time("00:00", "01:00") + expected = ts.between_time(stime, etime) + tm.assert_frame_equal(result, expected) + + # across midnight + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert (t >= stime) or (t <= etime) + else: + assert (t > stime) or (t <= etime) + + if inc_end: + assert (t <= etime) or (t >= stime) + else: + assert (t < etime) or (t >= stime) + + def test_between_time_raises(self): + # GH#20725 + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.between_time(start_time="00:00", end_time="12:00") + + def test_between_time_axis(self, axis): + # GH#8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + ts = DataFrame(np.random.randn(len(rng), len(rng))) + stime, etime = ("08:00:00", "09:00:00") + exp_len = 7 + + if axis in ["index", 0]: + ts.index = rng + assert len(ts.between_time(stime, etime)) == exp_len + assert len(ts.between_time(stime, etime, axis=0)) == exp_len + + if axis in ["columns", 1]: + ts.columns = rng + selected = ts.between_time(stime, etime, axis=1).columns + assert len(selected) == exp_len + + def test_between_time_axis_raises(self, axis): + # issue 8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + mask = np.arange(0, len(rng)) + rand_data = np.random.randn(len(rng), len(rng)) + ts = DataFrame(rand_data, index=rng, columns=rng) + stime, etime = ("08:00:00", "09:00:00") + + msg = "Index must be DatetimeIndex" + if axis in ["columns", 1]: + ts.index = mask + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime) + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime, axis=0) + + if axis in ["index", 0]: + ts.columns = mask + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime, axis=1) diff --git a/pandas/tests/frame/methods/test_combine.py b/pandas/tests/frame/methods/test_combine.py new file mode 100644 index 0000000000000..bc6a67e4e1f32 --- /dev/null +++ b/pandas/tests/frame/methods/test_combine.py @@ -0,0 +1,47 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestCombine: + @pytest.mark.parametrize( + "data", + [ + pd.date_range("2000", periods=4), + pd.date_range("2000", periods=4, tz="US/Central"), + pd.period_range("2000", periods=4), + pd.timedelta_range(0, periods=4), + ], + ) + def test_combine_datetlike_udf(self, data): + # GH#23079 + df = pd.DataFrame({"A": data}) + other = df.copy() + df.iloc[1, 0] = None + + def combiner(a, b): + return b + + result = df.combine(other, combiner) + tm.assert_frame_equal(result, other) + + def test_combine_generic(self, float_frame): + df1 = float_frame + df2 = float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]] + + combined = df1.combine(df2, np.add) + combined2 = df2.combine(df1, np.add) + assert combined["D"].isna().all() + assert combined2["D"].isna().all() + + chunk = combined.loc[combined.index[:-5], ["A", "B", "C"]] + chunk2 = combined2.loc[combined2.index[:-5], ["A", "B", "C"]] + + exp = ( + float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]].reindex_like(chunk) + * 2 + ) + tm.assert_frame_equal(chunk, exp) + tm.assert_frame_equal(chunk2, exp) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py new file mode 100644 index 0000000000000..e69a562f8214d --- /dev/null +++ b/pandas/tests/frame/methods/test_rename.py @@ -0,0 +1,353 @@ +from collections import ChainMap + +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex +import pandas._testing as tm + + +class TestRename: + def test_rename(self, float_frame): + mapping = {"A": "a", "B": "b", "C": "c", "D": "d"} + + renamed = float_frame.rename(columns=mapping) + renamed2 = float_frame.rename(columns=str.lower) + + tm.assert_frame_equal(renamed, renamed2) + tm.assert_frame_equal( + renamed2.rename(columns=str.upper), float_frame, check_names=False + ) + + # index + data = {"A": {"foo": 0, "bar": 1}} + + # gets sorted alphabetical + df = DataFrame(data) + renamed = df.rename(index={"foo": "bar", "bar": "foo"}) + tm.assert_index_equal(renamed.index, Index(["foo", "bar"])) + + renamed = df.rename(index=str.upper) + tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"])) + + # have to pass something + with pytest.raises(TypeError, match="must pass an index to rename"): + float_frame.rename() + + # partial columns + renamed = float_frame.rename(columns={"C": "foo", "D": "bar"}) + tm.assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"])) + + # other axis + renamed = float_frame.T.rename(index={"C": "foo", "D": "bar"}) + tm.assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"])) + + # index with name + index = Index(["foo", "bar"], name="name") + renamer = DataFrame(data, index=index) + renamed = renamer.rename(index={"foo": "bar", "bar": "foo"}) + tm.assert_index_equal(renamed.index, Index(["bar", "foo"], name="name")) + assert renamed.index.name == renamer.index.name + + @pytest.mark.parametrize( + "args,kwargs", + [ + ((ChainMap({"A": "a"}, {"B": "b"}),), dict(axis="columns")), + ((), dict(columns=ChainMap({"A": "a"}, {"B": "b"}))), + ], + ) + def test_rename_chainmap(self, args, kwargs): + # see gh-23859 + colAData = range(1, 11) + colBdata = np.random.randn(10) + + df = DataFrame({"A": colAData, "B": colBdata}) + result = df.rename(*args, **kwargs) + + expected = DataFrame({"a": colAData, "b": colBdata}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "kwargs, rename_index, rename_columns", + [ + ({"mapper": None, "axis": 0}, True, False), + ({"mapper": None, "axis": 1}, False, True), + ({"index": None}, True, False), + ({"columns": None}, False, True), + ({"index": None, "columns": None}, True, True), + ({}, False, False), + ], + ) + def test_rename_axis_none(self, kwargs, rename_index, rename_columns): + # GH 25034 + index = Index(list("abc"), name="foo") + columns = Index(["col1", "col2"], name="bar") + data = np.arange(6).reshape(3, 2) + df = DataFrame(data, index, columns) + + result = df.rename_axis(**kwargs) + expected_index = index.rename(None) if rename_index else index + expected_columns = columns.rename(None) if rename_columns else columns + expected = DataFrame(data, expected_index, expected_columns) + tm.assert_frame_equal(result, expected) + + def test_rename_multiindex(self): + + tuples_index = [("foo1", "bar1"), ("foo2", "bar2")] + tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")] + index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"]) + columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"]) + df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns) + + # + # without specifying level -> across all levels + + renamed = df.rename( + index={"foo1": "foo3", "bar2": "bar3"}, + columns={"fizz1": "fizz3", "buzz2": "buzz3"}, + ) + new_index = MultiIndex.from_tuples( + [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"] + ) + new_columns = MultiIndex.from_tuples( + [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] + ) + tm.assert_index_equal(renamed.index, new_index) + tm.assert_index_equal(renamed.columns, new_columns) + assert renamed.index.names == df.index.names + assert renamed.columns.names == df.columns.names + + # + # with specifying a level (GH13766) + + # dict + new_columns = MultiIndex.from_tuples( + [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz") + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples( + [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz") + tm.assert_index_equal(renamed.columns, new_columns) + + # function + func = str.upper + new_columns = MultiIndex.from_tuples( + [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns=func, level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level="fizz") + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples( + [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns=func, level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level="buzz") + tm.assert_index_equal(renamed.columns, new_columns) + + # index + new_index = MultiIndex.from_tuples( + [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"] + ) + renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) + tm.assert_index_equal(renamed.index, new_index) + + def test_rename_nocopy(self, float_frame): + renamed = float_frame.rename(columns={"C": "foo"}, copy=False) + renamed["foo"] = 1.0 + assert (float_frame["C"] == 1.0).all() + + def test_rename_inplace(self, float_frame): + float_frame.rename(columns={"C": "foo"}) + assert "C" in float_frame + assert "foo" not in float_frame + + c_id = id(float_frame["C"]) + float_frame = float_frame.copy() + float_frame.rename(columns={"C": "foo"}, inplace=True) + + assert "C" not in float_frame + assert "foo" in float_frame + assert id(float_frame["foo"]) != c_id + + def test_rename_bug(self): + # GH 5344 + # rename set ref_locs, and set_index was not resetting + df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]}) + df = df.rename(columns={0: "a"}) + df = df.rename(columns={1: "b"}) + df = df.set_index(["a", "b"]) + df.columns = ["2001-01-01"] + expected = DataFrame( + [[1], [2]], + index=MultiIndex.from_tuples( + [("foo", "bah"), ("bar", "bas")], names=["a", "b"] + ), + columns=["2001-01-01"], + ) + tm.assert_frame_equal(df, expected) + + def test_rename_bug2(self): + # GH 19497 + # rename was changing Index to MultiIndex if Index contained tuples + + df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"]) + df = df.rename({(1, 1): (5, 4)}, axis="index") + expected = DataFrame( + data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"] + ) + tm.assert_frame_equal(df, expected) + + def test_rename_errors_raises(self): + df = DataFrame(columns=["A", "B", "C", "D"]) + with pytest.raises(KeyError, match="'E'] not found in axis"): + df.rename(columns={"A": "a", "E": "e"}, errors="raise") + + @pytest.mark.parametrize( + "mapper, errors, expected_columns", + [ + ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]), + ({"A": "a"}, "raise", ["a", "B", "C", "D"]), + (str.lower, "raise", ["a", "b", "c", "d"]), + ], + ) + def test_rename_errors(self, mapper, errors, expected_columns): + # GH 13473 + # rename now works with errors parameter + df = DataFrame(columns=["A", "B", "C", "D"]) + result = df.rename(columns=mapper, errors=errors) + expected = DataFrame(columns=expected_columns) + tm.assert_frame_equal(result, expected) + + def test_rename_objects(self, float_string_frame): + renamed = float_string_frame.rename(columns=str.upper) + + assert "FOO" in renamed + assert "foo" not in renamed + + def test_rename_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"]) + expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) + + result = df.rename(str.lower, axis=1) + tm.assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis="columns") + tm.assert_frame_equal(result, expected) + + result = df.rename({"A": "a", "B": "b"}, axis=1) + tm.assert_frame_equal(result, expected) + + result = df.rename({"A": "a", "B": "b"}, axis="columns") + tm.assert_frame_equal(result, expected) + + # Index + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) + result = df.rename(str.lower, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.rename({"X": "x", "Y": "y"}, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.rename({"X": "x", "Y": "y"}, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.rename(mapper=str.lower, axis="index") + tm.assert_frame_equal(result, expected) + + def test_rename_mapper_multi(self): + df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index( + ["A", "B"] + ) + result = df.rename(str.upper) + expected = df.rename(index=str.upper) + tm.assert_frame_equal(result, expected) + + def test_rename_positional_named(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) + result = df.rename(index=str.lower, columns=str.upper) + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) + tm.assert_frame_equal(result, expected) + + def test_rename_axis_style_raises(self): + # see gh-12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"]) + + # Named target and axis + over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis=1) + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis="columns") + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(columns=str.lower, axis="columns") + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis=0) + + # Multiple targets and axis + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, index=str.lower, axis="columns") + + # Too many targets + over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, index=str.lower, columns=str.lower) + + # Duplicates + with pytest.raises(TypeError, match="multiple values"): + df.rename(id, mapper=id) + + def test_rename_positional_raises(self): + # GH 29136 + df = DataFrame(columns=["A", "B"]) + msg = r"rename\(\) takes from 1 to 2 positional arguments" + + with pytest.raises(TypeError, match=msg): + df.rename(None, str.lower) + + def test_rename_no_mappings_raises(self): + # GH 29136 + df = DataFrame([[1]]) + msg = "must pass an index to rename" + with pytest.raises(TypeError, match=msg): + df.rename() + + with pytest.raises(TypeError, match=msg): + df.rename(None, index=None) + + with pytest.raises(TypeError, match=msg): + df.rename(None, columns=None) + + with pytest.raises(TypeError, match=msg): + df.rename(None, columns=None, index=None) + + def test_rename_mapper_and_positional_arguments_raises(self): + # GH 29136 + df = DataFrame([[1]]) + msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=msg): + df.rename({}, index={}) + + with pytest.raises(TypeError, match=msg): + df.rename({}, columns={}) + + with pytest.raises(TypeError, match=msg): + df.rename({}, columns={}, index={}) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py new file mode 100644 index 0000000000000..6586c19af2539 --- /dev/null +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -0,0 +1,299 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestResetIndex: + def test_reset_index_tz(self, tz_aware_fixture): + # GH 3950 + # reset_index with single level + tz = tz_aware_fixture + idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx") + df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx) + + expected = DataFrame( + { + "idx": [ + datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5), + ], + "a": range(5), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx", "a", "b"], + ) + expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz)) + tm.assert_frame_equal(df.reset_index(), expected) + + def test_reset_index_with_intervals(self): + idx = IntervalIndex.from_breaks(np.arange(11), name="x") + original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]] + + result = original.set_index("x") + expected = DataFrame({"y": np.arange(10)}, index=idx) + tm.assert_frame_equal(result, expected) + + result2 = result.reset_index() + tm.assert_frame_equal(result2, original) + + def test_reset_index(self, float_frame): + stacked = float_frame.stack()[::2] + stacked = DataFrame({"foo": stacked, "bar": stacked}) + + names = ["first", "second"] + stacked.index.names = names + deleveled = stacked.reset_index() + for i, (lev, level_codes) in enumerate( + zip(stacked.index.levels, stacked.index.codes) + ): + values = lev.take(level_codes) + name = names[i] + tm.assert_index_equal(values, Index(deleveled[name])) + + stacked.index.names = [None, None] + deleveled2 = stacked.reset_index() + tm.assert_series_equal( + deleveled["first"], deleveled2["level_0"], check_names=False + ) + tm.assert_series_equal( + deleveled["second"], deleveled2["level_1"], check_names=False + ) + + # default name assigned + rdf = float_frame.reset_index() + exp = Series(float_frame.index.values, name="index") + tm.assert_series_equal(rdf["index"], exp) + + # default name assigned, corner case + df = float_frame.copy() + df["index"] = "foo" + rdf = df.reset_index() + exp = Series(float_frame.index.values, name="level_0") + tm.assert_series_equal(rdf["level_0"], exp) + + # but this is ok + float_frame.index.name = "index" + deleveled = float_frame.reset_index() + tm.assert_series_equal(deleveled["index"], Series(float_frame.index)) + tm.assert_index_equal(deleveled.index, Index(np.arange(len(deleveled)))) + + # preserve column names + float_frame.columns.name = "columns" + resetted = float_frame.reset_index() + assert resetted.columns.name == "columns" + + # only remove certain columns + df = float_frame.reset_index().set_index(["index", "A", "B"]) + rs = df.reset_index(["A", "B"]) + + # TODO should reset_index check_names ? + tm.assert_frame_equal(rs, float_frame, check_names=False) + + rs = df.reset_index(["index", "A", "B"]) + tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) + + rs = df.reset_index(["index", "A", "B"]) + tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) + + rs = df.reset_index("A") + xp = float_frame.reset_index().set_index(["index", "B"]) + tm.assert_frame_equal(rs, xp, check_names=False) + + # test resetting in place + df = float_frame.copy() + resetted = float_frame.reset_index() + df.reset_index(inplace=True) + tm.assert_frame_equal(df, resetted, check_names=False) + + df = float_frame.reset_index().set_index(["index", "A", "B"]) + rs = df.reset_index("A", drop=True) + xp = float_frame.copy() + del xp["A"] + xp = xp.set_index(["B"], append=True) + tm.assert_frame_equal(rs, xp, check_names=False) + + def test_reset_index_name(self): + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=["A", "B", "C", "D"], + index=Index(range(2), name="x"), + ) + assert df.reset_index().index.name is None + assert df.reset_index(drop=True).index.name is None + df.reset_index(inplace=True) + assert df.index.name is None + + def test_reset_index_level(self): + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"]) + + for levels in ["A", "B"], [0, 1]: + # With MultiIndex + result = df.set_index(["A", "B"]).reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = df.set_index(["A", "B"]).reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = df.set_index(["A", "B"]).reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) + tm.assert_frame_equal(result, df[["C", "D"]]) + + # With single-level Index (GH 16263) + result = df.set_index("A").reset_index(level=levels[0]) + tm.assert_frame_equal(result, df) + + result = df.set_index("A").reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A"]).reset_index(level=levels[0], drop=True) + tm.assert_frame_equal(result, df[["B", "C", "D"]]) + + # Missing levels - for both MultiIndex and single-level Index: + for idx_lev in ["A", "B"], ["A"]: + with pytest.raises(KeyError, match=r"(L|l)evel \(?E\)?"): + df.set_index(idx_lev).reset_index(level=["A", "E"]) + with pytest.raises(IndexError, match="Too many levels"): + df.set_index(idx_lev).reset_index(level=[0, 1, 2]) + + def test_reset_index_right_dtype(self): + time = np.arange(0.0, 10, np.sqrt(2) / 2) + s1 = Series( + (9.81 * time ** 2) / 2, index=Index(time, name="time"), name="speed" + ) + df = DataFrame(s1) + + resetted = s1.reset_index() + assert resetted["time"].dtype == np.float64 + + resetted = df.reset_index() + assert resetted["time"].dtype == np.float64 + + def test_reset_index_multiindex_col(self): + vals = np.random.randn(3, 3).astype(object) + idx = ["x", "y", "z"] + full = np.hstack(([[x] for x in idx], vals)) + df = DataFrame( + vals, + Index(idx, name="a"), + columns=[["b", "b", "c"], ["mean", "median", "mean"]], + ) + rs = df.reset_index() + xp = DataFrame( + full, columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index(col_fill=None) + xp = DataFrame( + full, columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index(col_level=1, col_fill="blah") + xp = DataFrame( + full, columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + df = DataFrame( + vals, + MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]), + columns=[["b", "b", "c"], ["mean", "median", "mean"]], + ) + rs = df.reset_index("a") + xp = DataFrame( + full, + Index([0, 1, 2], name="d"), + columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index("a", col_fill=None) + xp = DataFrame( + full, + Index(range(3), name="d"), + columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index("a", col_fill="blah", col_level=1) + xp = DataFrame( + full, + Index(range(3), name="d"), + columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + def test_reset_index_multiindex_nan(self): + # GH#6322, testing reset_index on MultiIndexes + # when we have a nan or all nan + df = DataFrame( + {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)} + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame( + {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)} + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame({"A": ["a", "b", "c"], "B": [0, 1, 2], "C": [np.nan, 1.1, 2.2]}) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame( + { + "A": ["a", "b", "c"], + "B": [np.nan, np.nan, np.nan], + "C": np.random.rand(3), + } + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + def test_reset_index_with_datetimeindex_cols(self): + # GH#5818 + df = DataFrame( + [[1, 2], [3, 4]], + columns=date_range("1/1/2013", "1/2/2013"), + index=["A", "B"], + ) + + result = df.reset_index() + expected = DataFrame( + [["A", 1, 2], ["B", 3, 4]], + columns=["index", datetime(2013, 1, 1), datetime(2013, 1, 2)], + ) + tm.assert_frame_equal(result, expected) + + def test_reset_index_range(self): + # GH#12071 + df = DataFrame([[0, 0], [1, 1]], columns=["A", "B"], index=RangeIndex(stop=2)) + result = df.reset_index() + assert isinstance(result.index, RangeIndex) + expected = DataFrame( + [[0, 0, 0], [1, 1, 1]], + columns=["index", "A", "B"], + index=RangeIndex(stop=2), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py new file mode 100644 index 0000000000000..fe7baebcf0cf7 --- /dev/null +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -0,0 +1,329 @@ +from collections import OrderedDict + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Timestamp +import pandas._testing as tm + + +class TestSelectDtypes: + def test_select_dtypes_include_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=[np.number]) + ei = df[["b", "c", "d", "k"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number], exclude=["timedelta"]) + ei = df[["b", "c", "d"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number, "category"], exclude=["timedelta"]) + ei = df[["b", "c", "d", "f"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetime"]) + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetime64"]) + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetimetz"]) + ei = df[["h", "i"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(include=["period"]) + + def test_select_dtypes_exclude_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + } + ) + re = df.select_dtypes(exclude=[np.number]) + ee = df[["a", "e"]] + tm.assert_frame_equal(re, ee) + + def test_select_dtypes_exclude_include_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + exclude = (np.datetime64,) + include = np.bool_, "integer" + r = df.select_dtypes(include=include, exclude=exclude) + e = df[["b", "c", "e"]] + tm.assert_frame_equal(r, e) + + exclude = ("datetime",) + include = "bool", "int64", "int32" + r = df.select_dtypes(include=include, exclude=exclude) + e = df[["b", "e"]] + tm.assert_frame_equal(r, e) + + def test_select_dtypes_include_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number) + ei = df[["b", "c", "d", "k"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="datetime") + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="datetime64") + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="category") + ei = df[["f"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(include="period") + + def test_select_dtypes_exclude_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(exclude=np.number) + ei = df[["a", "e", "f", "g", "h", "i", "j"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(exclude="category") + ei = df[["a", "b", "c", "d", "e", "g", "h", "i", "j", "k"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(exclude="period") + + def test_select_dtypes_include_exclude_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number, exclude="floating") + ei = df[["b", "c", "k"]] + tm.assert_frame_equal(ri, ei) + + def test_select_dtypes_include_exclude_mixed_scalars_lists(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number, exclude=["floating", "timedelta"]) + ei = df[["b", "c"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number, "category"], exclude="floating") + ei = df[["b", "c", "f", "k"]] + tm.assert_frame_equal(ri, ei) + + def test_select_dtypes_duplicate_columns(self): + # GH20839 + odict = OrderedDict + df = DataFrame( + odict( + [ + ("a", list("abc")), + ("b", list(range(1, 4))), + ("c", np.arange(3, 6).astype("u1")), + ("d", np.arange(4.0, 7.0, dtype="float64")), + ("e", [True, False, True]), + ("f", pd.date_range("now", periods=3).values), + ] + ) + ) + df.columns = ["a", "a", "b", "b", "b", "c"] + + expected = DataFrame( + {"a": list(range(1, 4)), "b": np.arange(3, 6).astype("u1")} + ) + + result = df.select_dtypes(include=[np.number], exclude=["floating"]) + tm.assert_frame_equal(result, expected) + + def test_select_dtypes_not_an_attr_but_still_valid_dtype(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + df["g"] = df.f.diff() + assert not hasattr(np, "u8") + r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"]) + e = df[["a", "b"]] + tm.assert_frame_equal(r, e) + + r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"]) + e = df[["a", "b", "g"]] + tm.assert_frame_equal(r, e) + + def test_select_dtypes_empty(self): + df = DataFrame({"a": list("abc"), "b": list(range(1, 4))}) + msg = "at least one of include or exclude must be nonempty" + with pytest.raises(ValueError, match=msg): + df.select_dtypes() + + def test_select_dtypes_bad_datetime64(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + with pytest.raises(ValueError, match=".+ is too specific"): + df.select_dtypes(include=["datetime64[D]"]) + + with pytest.raises(ValueError, match=".+ is too specific"): + df.select_dtypes(exclude=["datetime64[as]"]) + + def test_select_dtypes_datetime_with_tz(self): + + df2 = DataFrame( + dict( + A=Timestamp("20130102", tz="US/Eastern"), + B=Timestamp("20130603", tz="CET"), + ), + index=range(5), + ) + df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) + result = df3.select_dtypes(include=["datetime64[ns]"]) + expected = df3.reindex(columns=[]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", [str, "str", np.string_, "S1", "unicode", np.unicode_, "U1"] + ) + @pytest.mark.parametrize("arg", ["include", "exclude"]) + def test_select_dtypes_str_raises(self, dtype, arg): + df = DataFrame( + { + "a": list("abc"), + "g": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + msg = "string dtypes are not allowed" + kwargs = {arg: [dtype]} + + with pytest.raises(TypeError, match=msg): + df.select_dtypes(**kwargs) + + def test_select_dtypes_bad_arg_raises(self): + df = DataFrame( + { + "a": list("abc"), + "g": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + + msg = "data type.*not understood" + with pytest.raises(TypeError, match=msg): + df.select_dtypes(["blargy, blarg, blarg"]) + + def test_select_dtypes_typecodes(self): + # GH 11990 + df = tm.makeCustomDataframe(30, 3, data_gen_f=lambda x, y: np.random.random()) + expected = df + FLOAT_TYPES = list(np.typecodes["AllFloat"]) + tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected) diff --git a/pandas/tests/frame/methods/test_to_period.py b/pandas/tests/frame/methods/test_to_period.py new file mode 100644 index 0000000000000..eac78e611b008 --- /dev/null +++ b/pandas/tests/frame/methods/test_to_period.py @@ -0,0 +1,36 @@ +import numpy as np +import pytest + +from pandas import DataFrame, date_range, period_range +import pandas._testing as tm + + +class TestToPeriod: + def test_frame_to_period(self): + K = 5 + + dr = date_range("1/1/2000", "1/1/2001") + pr = period_range("1/1/2000", "1/1/2001") + df = DataFrame(np.random.randn(len(dr), K), index=dr) + df["mix"] = "a" + + pts = df.to_period() + exp = df.copy() + exp.index = pr + tm.assert_frame_equal(pts, exp) + + pts = df.to_period("M") + tm.assert_index_equal(pts.index, exp.index.asfreq("M")) + + df = df.T + pts = df.to_period(axis=1) + exp = df.copy() + exp.columns = pr + tm.assert_frame_equal(pts, exp) + + pts = df.to_period("M", axis=1) + tm.assert_index_equal(pts.columns, exp.columns.asfreq("M")) + + msg = "No axis named 2 for object type " + with pytest.raises(ValueError, match=msg): + df.to_period(axis=2) diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py new file mode 100644 index 0000000000000..ae7d2827e05a6 --- /dev/null +++ b/pandas/tests/frame/methods/test_to_timestamp.py @@ -0,0 +1,103 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + DatetimeIndex, + Timedelta, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + + +class TestToTimestamp: + def test_frame_to_time_stamp(self): + K = 5 + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + df = DataFrame(np.random.randn(len(index), K), index=index) + df["mix"] = "a" + + exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") + exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + result = df.to_timestamp("D", "end") + tm.assert_index_equal(result.index, exp_index) + tm.assert_numpy_array_equal(result.values, df.values) + + exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") + result = df.to_timestamp("D", "start") + tm.assert_index_equal(result.index, exp_index) + + def _get_with_delta(delta, freq="A-DEC"): + return date_range( + to_datetime("1/1/2001") + delta, + to_datetime("12/31/2009") + delta, + freq=freq, + ) + + delta = timedelta(hours=23) + result = df.to_timestamp("H", "end") + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = df.to_timestamp("T", "end") + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + result = df.to_timestamp("S", "end") + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + # columns + df = df.T + + exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") + exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + result = df.to_timestamp("D", "end", axis=1) + tm.assert_index_equal(result.columns, exp_index) + tm.assert_numpy_array_equal(result.values, df.values) + + exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") + result = df.to_timestamp("D", "start", axis=1) + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23) + result = df.to_timestamp("H", "end", axis=1) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = df.to_timestamp("T", "end", axis=1) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + result = df.to_timestamp("S", "end", axis=1) + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + # invalid axis + with pytest.raises(ValueError, match="axis"): + df.to_timestamp(axis=2) + + result1 = df.to_timestamp("5t", axis=1) + result2 = df.to_timestamp("t", axis=1) + expected = date_range("2001-01-01", "2009-01-01", freq="AS") + assert isinstance(result1.columns, DatetimeIndex) + assert isinstance(result2.columns, DatetimeIndex) + tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) + tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) + # PeriodIndex.to_timestamp always use 'infer' + assert result1.columns.freqstr == "AS-JAN" + assert result2.columns.freqstr == "AS-JAN" diff --git a/pandas/tests/frame/methods/test_tz_convert.py b/pandas/tests/frame/methods/test_tz_convert.py new file mode 100644 index 0000000000000..ea8c4b88538d4 --- /dev/null +++ b/pandas/tests/frame/methods/test_tz_convert.py @@ -0,0 +1,84 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex, date_range +import pandas._testing as tm + + +class TestTZConvert: + def test_frame_tz_convert(self): + rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") + + df = DataFrame({"a": 1}, index=rng) + result = df.tz_convert("Europe/Berlin") + expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) + assert result.index.tz.zone == "Europe/Berlin" + tm.assert_frame_equal(result, expected) + + df = df.T + result = df.tz_convert("Europe/Berlin", axis=1) + assert result.columns.tz.zone == "Europe/Berlin" + tm.assert_frame_equal(result, expected.T) + + @pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"]) + def test_tz_convert_and_localize(self, fn): + l0 = date_range("20140701", periods=5, freq="D") + l1 = date_range("20140701", periods=5, freq="D") + + int_idx = Index(range(5)) + + if fn == "tz_convert": + l0 = l0.tz_localize("UTC") + l1 = l1.tz_localize("UTC") + + for idx in [l0, l1]: + + l0_expected = getattr(idx, fn)("US/Pacific") + l1_expected = getattr(idx, fn)("US/Pacific") + + df1 = DataFrame(np.ones(5), index=l0) + df1 = getattr(df1, fn)("US/Pacific") + tm.assert_index_equal(df1.index, l0_expected) + + # MultiIndex + # GH7846 + df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) + + df3 = getattr(df2, fn)("US/Pacific", level=0) + assert not df3.index.levels[0].equals(l0) + tm.assert_index_equal(df3.index.levels[0], l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1) + assert not df3.index.levels[1].equals(l1_expected) + + df3 = getattr(df2, fn)("US/Pacific", level=1) + tm.assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) + + # TODO: untested + df5 = getattr(df4, fn)("US/Pacific", level=1) # noqa + + tm.assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + # Bad Inputs + + # Not DatetimeIndex / PeriodIndex + with pytest.raises(TypeError, match="DatetimeIndex"): + df = DataFrame(index=int_idx) + df = getattr(df, fn)("US/Pacific") + + # Not DatetimeIndex / PeriodIndex + with pytest.raises(TypeError, match="DatetimeIndex"): + df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) + df = getattr(df, fn)("US/Pacific", level=0) + + # Invalid level + with pytest.raises(ValueError, match="not valid"): + df = DataFrame(index=l0) + df = getattr(df, fn)("US/Pacific", level=1) diff --git a/pandas/tests/frame/methods/test_tz_localize.py b/pandas/tests/frame/methods/test_tz_localize.py new file mode 100644 index 0000000000000..1d4e26a6999b7 --- /dev/null +++ b/pandas/tests/frame/methods/test_tz_localize.py @@ -0,0 +1,21 @@ +from pandas import DataFrame, date_range +import pandas._testing as tm + + +class TestTZLocalize: + # See also: + # test_tz_convert_and_localize in test_tz_convert + + def test_frame_tz_localize(self): + rng = date_range("1/1/2011", periods=100, freq="H") + + df = DataFrame({"a": 1}, index=rng) + result = df.tz_localize("utc") + expected = DataFrame({"a": 1}, rng.tz_localize("UTC")) + assert result.index.tz.zone == "UTC" + tm.assert_frame_equal(result, expected) + + df = df.T + result = df.tz_localize("utc", axis=1) + assert result.columns.tz.zone == "UTC" + tm.assert_frame_equal(result, expected.T) diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py new file mode 100644 index 0000000000000..c409b0bbe6fa9 --- /dev/null +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -0,0 +1,102 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm + + +def test_data_frame_value_counts_unsorted(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(sort=False) + expected = pd.Series( + data=[1, 2, 1], + index=pd.MultiIndex.from_arrays( + [(2, 4, 6), (2, 0, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_ascending(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(ascending=True) + expected = pd.Series( + data=[1, 1, 2], + index=pd.MultiIndex.from_arrays( + [(2, 6, 4), (2, 0, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_default(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts() + expected = pd.Series( + data=[2, 1, 1], + index=pd.MultiIndex.from_arrays( + [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_normalize(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(normalize=True) + expected = pd.Series( + data=[0.5, 0.25, 0.25], + index=pd.MultiIndex.from_arrays( + [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_single_col_default(): + df = pd.DataFrame({"num_legs": [2, 4, 4, 6]}) + + result = df.value_counts() + expected = pd.Series( + data=[2, 1, 1], + index=pd.MultiIndex.from_arrays([[4, 6, 2]], names=["num_legs"]), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_empty(): + df_no_cols = pd.DataFrame() + + result = df_no_cols.value_counts() + expected = pd.Series([], dtype=np.int64) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_empty_normalize(): + df_no_cols = pd.DataFrame() + + result = df_no_cols.value_counts(normalize=True) + expected = pd.Series([], dtype=np.float64) + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 0c19a38bb5fa2..751ed1dfdd847 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -1,4 +1,3 @@ -from collections import ChainMap from datetime import datetime, timedelta import inspect @@ -18,7 +17,6 @@ Index, IntervalIndex, MultiIndex, - RangeIndex, Series, Timestamp, cut, @@ -533,30 +531,6 @@ def test_convert_dti_to_series(self): df.pop("ts") tm.assert_frame_equal(df, expected) - def test_reset_index_tz(self, tz_aware_fixture): - # GH 3950 - # reset_index with single level - tz = tz_aware_fixture - idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx") - df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx) - - expected = DataFrame( - { - "idx": [ - datetime(2011, 1, 1), - datetime(2011, 1, 2), - datetime(2011, 1, 3), - datetime(2011, 1, 4), - datetime(2011, 1, 5), - ], - "a": range(5), - "b": ["A", "B", "C", "D", "E"], - }, - columns=["idx", "a", "b"], - ) - expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz)) - tm.assert_frame_equal(df.reset_index(), expected) - def test_set_index_timezone(self): # GH 12358 # tz-aware Series should retain the tz @@ -583,17 +557,6 @@ def test_set_index_dst(self): exp = DataFrame({"b": [3, 4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp) - def test_reset_index_with_intervals(self): - idx = IntervalIndex.from_breaks(np.arange(11), name="x") - original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]] - - result = original.set_index("x") - expected = DataFrame({"y": np.arange(10)}, index=idx) - tm.assert_frame_equal(result, expected) - - result2 = result.reset_index() - tm.assert_frame_equal(result2, original) - def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([("foo", 1), ("foo", 2), ("bar", 1)]) df = DataFrame(np.random.randn(3, 3), columns=columns) @@ -652,65 +615,6 @@ def test_dti_set_index_reindex(self): # Renaming - def test_rename(self, float_frame): - mapping = {"A": "a", "B": "b", "C": "c", "D": "d"} - - renamed = float_frame.rename(columns=mapping) - renamed2 = float_frame.rename(columns=str.lower) - - tm.assert_frame_equal(renamed, renamed2) - tm.assert_frame_equal( - renamed2.rename(columns=str.upper), float_frame, check_names=False - ) - - # index - data = {"A": {"foo": 0, "bar": 1}} - - # gets sorted alphabetical - df = DataFrame(data) - renamed = df.rename(index={"foo": "bar", "bar": "foo"}) - tm.assert_index_equal(renamed.index, Index(["foo", "bar"])) - - renamed = df.rename(index=str.upper) - tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"])) - - # have to pass something - with pytest.raises(TypeError, match="must pass an index to rename"): - float_frame.rename() - - # partial columns - renamed = float_frame.rename(columns={"C": "foo", "D": "bar"}) - tm.assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"])) - - # other axis - renamed = float_frame.T.rename(index={"C": "foo", "D": "bar"}) - tm.assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"])) - - # index with name - index = Index(["foo", "bar"], name="name") - renamer = DataFrame(data, index=index) - renamed = renamer.rename(index={"foo": "bar", "bar": "foo"}) - tm.assert_index_equal(renamed.index, Index(["bar", "foo"], name="name")) - assert renamed.index.name == renamer.index.name - - @pytest.mark.parametrize( - "args,kwargs", - [ - ((ChainMap({"A": "a"}, {"B": "b"}),), dict(axis="columns")), - ((), dict(columns=ChainMap({"A": "a"}, {"B": "b"}))), - ], - ) - def test_rename_chainmap(self, args, kwargs): - # see gh-23859 - colAData = range(1, 11) - colBdata = np.random.randn(10) - - df = DataFrame({"A": colAData, "B": colBdata}) - result = df.rename(*args, **kwargs) - - expected = DataFrame({"a": colAData, "b": colBdata}) - tm.assert_frame_equal(result, expected) - def test_rename_axis_inplace(self, float_frame): # GH 15704 expected = float_frame.rename_axis("foo") @@ -785,168 +689,6 @@ def test_rename_axis_mapper(self): with pytest.raises(TypeError, match="bogus"): df.rename_axis(bogus=None) - @pytest.mark.parametrize( - "kwargs, rename_index, rename_columns", - [ - ({"mapper": None, "axis": 0}, True, False), - ({"mapper": None, "axis": 1}, False, True), - ({"index": None}, True, False), - ({"columns": None}, False, True), - ({"index": None, "columns": None}, True, True), - ({}, False, False), - ], - ) - def test_rename_axis_none(self, kwargs, rename_index, rename_columns): - # GH 25034 - index = Index(list("abc"), name="foo") - columns = Index(["col1", "col2"], name="bar") - data = np.arange(6).reshape(3, 2) - df = DataFrame(data, index, columns) - - result = df.rename_axis(**kwargs) - expected_index = index.rename(None) if rename_index else index - expected_columns = columns.rename(None) if rename_columns else columns - expected = DataFrame(data, expected_index, expected_columns) - tm.assert_frame_equal(result, expected) - - def test_rename_multiindex(self): - - tuples_index = [("foo1", "bar1"), ("foo2", "bar2")] - tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")] - index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"]) - columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"]) - df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns) - - # - # without specifying level -> across all levels - - renamed = df.rename( - index={"foo1": "foo3", "bar2": "bar3"}, - columns={"fizz1": "fizz3", "buzz2": "buzz3"}, - ) - new_index = MultiIndex.from_tuples( - [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"] - ) - new_columns = MultiIndex.from_tuples( - [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] - ) - tm.assert_index_equal(renamed.index, new_index) - tm.assert_index_equal(renamed.columns, new_columns) - assert renamed.index.names == df.index.names - assert renamed.columns.names == df.columns.names - - # - # with specifying a level (GH13766) - - # dict - new_columns = MultiIndex.from_tuples( - [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"] - ) - renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0) - tm.assert_index_equal(renamed.columns, new_columns) - renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz") - tm.assert_index_equal(renamed.columns, new_columns) - - new_columns = MultiIndex.from_tuples( - [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] - ) - renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1) - tm.assert_index_equal(renamed.columns, new_columns) - renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz") - tm.assert_index_equal(renamed.columns, new_columns) - - # function - func = str.upper - new_columns = MultiIndex.from_tuples( - [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"] - ) - renamed = df.rename(columns=func, level=0) - tm.assert_index_equal(renamed.columns, new_columns) - renamed = df.rename(columns=func, level="fizz") - tm.assert_index_equal(renamed.columns, new_columns) - - new_columns = MultiIndex.from_tuples( - [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"] - ) - renamed = df.rename(columns=func, level=1) - tm.assert_index_equal(renamed.columns, new_columns) - renamed = df.rename(columns=func, level="buzz") - tm.assert_index_equal(renamed.columns, new_columns) - - # index - new_index = MultiIndex.from_tuples( - [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"] - ) - renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) - tm.assert_index_equal(renamed.index, new_index) - - def test_rename_nocopy(self, float_frame): - renamed = float_frame.rename(columns={"C": "foo"}, copy=False) - renamed["foo"] = 1.0 - assert (float_frame["C"] == 1.0).all() - - def test_rename_inplace(self, float_frame): - float_frame.rename(columns={"C": "foo"}) - assert "C" in float_frame - assert "foo" not in float_frame - - c_id = id(float_frame["C"]) - float_frame = float_frame.copy() - float_frame.rename(columns={"C": "foo"}, inplace=True) - - assert "C" not in float_frame - assert "foo" in float_frame - assert id(float_frame["foo"]) != c_id - - def test_rename_bug(self): - # GH 5344 - # rename set ref_locs, and set_index was not resetting - df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]}) - df = df.rename(columns={0: "a"}) - df = df.rename(columns={1: "b"}) - df = df.set_index(["a", "b"]) - df.columns = ["2001-01-01"] - expected = DataFrame( - [[1], [2]], - index=MultiIndex.from_tuples( - [("foo", "bah"), ("bar", "bas")], names=["a", "b"] - ), - columns=["2001-01-01"], - ) - tm.assert_frame_equal(df, expected) - - def test_rename_bug2(self): - # GH 19497 - # rename was changing Index to MultiIndex if Index contained tuples - - df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"]) - df = df.rename({(1, 1): (5, 4)}, axis="index") - expected = DataFrame( - data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"] - ) - tm.assert_frame_equal(df, expected) - - def test_rename_errors_raises(self): - df = DataFrame(columns=["A", "B", "C", "D"]) - with pytest.raises(KeyError, match="'E'] not found in axis"): - df.rename(columns={"A": "a", "E": "e"}, errors="raise") - - @pytest.mark.parametrize( - "mapper, errors, expected_columns", - [ - ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]), - ({"A": "a"}, "raise", ["a", "B", "C", "D"]), - (str.lower, "raise", ["a", "b", "c", "d"]), - ], - ) - def test_rename_errors(self, mapper, errors, expected_columns): - # GH 13473 - # rename now works with errors parameter - df = DataFrame(columns=["A", "B", "C", "D"]) - result = df.rename(columns=mapper, errors=errors) - expected = DataFrame(columns=expected_columns) - tm.assert_frame_equal(result, expected) - def test_reorder_levels(self): index = MultiIndex( levels=[["bar"], ["one", "two", "three"], [0, 1]], @@ -985,253 +727,6 @@ def test_reorder_levels(self): result = df.reorder_levels(["L0", "L0", "L0"]) tm.assert_frame_equal(result, expected) - def test_reset_index(self, float_frame): - stacked = float_frame.stack()[::2] - stacked = DataFrame({"foo": stacked, "bar": stacked}) - - names = ["first", "second"] - stacked.index.names = names - deleveled = stacked.reset_index() - for i, (lev, level_codes) in enumerate( - zip(stacked.index.levels, stacked.index.codes) - ): - values = lev.take(level_codes) - name = names[i] - tm.assert_index_equal(values, Index(deleveled[name])) - - stacked.index.names = [None, None] - deleveled2 = stacked.reset_index() - tm.assert_series_equal( - deleveled["first"], deleveled2["level_0"], check_names=False - ) - tm.assert_series_equal( - deleveled["second"], deleveled2["level_1"], check_names=False - ) - - # default name assigned - rdf = float_frame.reset_index() - exp = Series(float_frame.index.values, name="index") - tm.assert_series_equal(rdf["index"], exp) - - # default name assigned, corner case - df = float_frame.copy() - df["index"] = "foo" - rdf = df.reset_index() - exp = Series(float_frame.index.values, name="level_0") - tm.assert_series_equal(rdf["level_0"], exp) - - # but this is ok - float_frame.index.name = "index" - deleveled = float_frame.reset_index() - tm.assert_series_equal(deleveled["index"], Series(float_frame.index)) - tm.assert_index_equal(deleveled.index, Index(np.arange(len(deleveled)))) - - # preserve column names - float_frame.columns.name = "columns" - resetted = float_frame.reset_index() - assert resetted.columns.name == "columns" - - # only remove certain columns - df = float_frame.reset_index().set_index(["index", "A", "B"]) - rs = df.reset_index(["A", "B"]) - - # TODO should reset_index check_names ? - tm.assert_frame_equal(rs, float_frame, check_names=False) - - rs = df.reset_index(["index", "A", "B"]) - tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) - - rs = df.reset_index(["index", "A", "B"]) - tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) - - rs = df.reset_index("A") - xp = float_frame.reset_index().set_index(["index", "B"]) - tm.assert_frame_equal(rs, xp, check_names=False) - - # test resetting in place - df = float_frame.copy() - resetted = float_frame.reset_index() - df.reset_index(inplace=True) - tm.assert_frame_equal(df, resetted, check_names=False) - - df = float_frame.reset_index().set_index(["index", "A", "B"]) - rs = df.reset_index("A", drop=True) - xp = float_frame.copy() - del xp["A"] - xp = xp.set_index(["B"], append=True) - tm.assert_frame_equal(rs, xp, check_names=False) - - def test_reset_index_name(self): - df = DataFrame( - [[1, 2, 3, 4], [5, 6, 7, 8]], - columns=["A", "B", "C", "D"], - index=Index(range(2), name="x"), - ) - assert df.reset_index().index.name is None - assert df.reset_index(drop=True).index.name is None - df.reset_index(inplace=True) - assert df.index.name is None - - def test_reset_index_level(self): - df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"]) - - for levels in ["A", "B"], [0, 1]: - # With MultiIndex - result = df.set_index(["A", "B"]).reset_index(level=levels[0]) - tm.assert_frame_equal(result, df.set_index("B")) - - result = df.set_index(["A", "B"]).reset_index(level=levels[:1]) - tm.assert_frame_equal(result, df.set_index("B")) - - result = df.set_index(["A", "B"]).reset_index(level=levels) - tm.assert_frame_equal(result, df) - - result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) - tm.assert_frame_equal(result, df[["C", "D"]]) - - # With single-level Index (GH 16263) - result = df.set_index("A").reset_index(level=levels[0]) - tm.assert_frame_equal(result, df) - - result = df.set_index("A").reset_index(level=levels[:1]) - tm.assert_frame_equal(result, df) - - result = df.set_index(["A"]).reset_index(level=levels[0], drop=True) - tm.assert_frame_equal(result, df[["B", "C", "D"]]) - - # Missing levels - for both MultiIndex and single-level Index: - for idx_lev in ["A", "B"], ["A"]: - with pytest.raises(KeyError, match=r"(L|l)evel \(?E\)?"): - df.set_index(idx_lev).reset_index(level=["A", "E"]) - with pytest.raises(IndexError, match="Too many levels"): - df.set_index(idx_lev).reset_index(level=[0, 1, 2]) - - def test_reset_index_right_dtype(self): - time = np.arange(0.0, 10, np.sqrt(2) / 2) - s1 = Series( - (9.81 * time ** 2) / 2, index=Index(time, name="time"), name="speed" - ) - df = DataFrame(s1) - - resetted = s1.reset_index() - assert resetted["time"].dtype == np.float64 - - resetted = df.reset_index() - assert resetted["time"].dtype == np.float64 - - def test_reset_index_multiindex_col(self): - vals = np.random.randn(3, 3).astype(object) - idx = ["x", "y", "z"] - full = np.hstack(([[x] for x in idx], vals)) - df = DataFrame( - vals, - Index(idx, name="a"), - columns=[["b", "b", "c"], ["mean", "median", "mean"]], - ) - rs = df.reset_index() - xp = DataFrame( - full, columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]] - ) - tm.assert_frame_equal(rs, xp) - - rs = df.reset_index(col_fill=None) - xp = DataFrame( - full, columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]] - ) - tm.assert_frame_equal(rs, xp) - - rs = df.reset_index(col_level=1, col_fill="blah") - xp = DataFrame( - full, columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]] - ) - tm.assert_frame_equal(rs, xp) - - df = DataFrame( - vals, - MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]), - columns=[["b", "b", "c"], ["mean", "median", "mean"]], - ) - rs = df.reset_index("a") - xp = DataFrame( - full, - Index([0, 1, 2], name="d"), - columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]], - ) - tm.assert_frame_equal(rs, xp) - - rs = df.reset_index("a", col_fill=None) - xp = DataFrame( - full, - Index(range(3), name="d"), - columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]], - ) - tm.assert_frame_equal(rs, xp) - - rs = df.reset_index("a", col_fill="blah", col_level=1) - xp = DataFrame( - full, - Index(range(3), name="d"), - columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]], - ) - tm.assert_frame_equal(rs, xp) - - def test_reset_index_multiindex_nan(self): - # GH6322, testing reset_index on MultiIndexes - # when we have a nan or all nan - df = DataFrame( - {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)} - ) - rs = df.set_index(["A", "B"]).reset_index() - tm.assert_frame_equal(rs, df) - - df = DataFrame( - {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)} - ) - rs = df.set_index(["A", "B"]).reset_index() - tm.assert_frame_equal(rs, df) - - df = DataFrame({"A": ["a", "b", "c"], "B": [0, 1, 2], "C": [np.nan, 1.1, 2.2]}) - rs = df.set_index(["A", "B"]).reset_index() - tm.assert_frame_equal(rs, df) - - df = DataFrame( - { - "A": ["a", "b", "c"], - "B": [np.nan, np.nan, np.nan], - "C": np.random.rand(3), - } - ) - rs = df.set_index(["A", "B"]).reset_index() - tm.assert_frame_equal(rs, df) - - def test_reset_index_with_datetimeindex_cols(self): - # GH5818 - # - df = DataFrame( - [[1, 2], [3, 4]], - columns=date_range("1/1/2013", "1/2/2013"), - index=["A", "B"], - ) - - result = df.reset_index() - expected = DataFrame( - [["A", 1, 2], ["B", 3, 4]], - columns=["index", datetime(2013, 1, 1), datetime(2013, 1, 2)], - ) - tm.assert_frame_equal(result, expected) - - def test_reset_index_range(self): - # GH 12071 - df = DataFrame([[0, 0], [1, 1]], columns=["A", "B"], index=RangeIndex(stop=2)) - result = df.reset_index() - assert isinstance(result.index, RangeIndex) - expected = DataFrame( - [[0, 0, 0], [1, 1, 1]], - columns=["index", "A", "B"], - index=RangeIndex(stop=2), - ) - tm.assert_frame_equal(result, expected) - def test_set_index_names(self): df = tm.makeDataFrame() df.index.name = "name" @@ -1262,92 +757,6 @@ def test_set_index_names(self): # Check equality tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) - def test_rename_objects(self, float_string_frame): - renamed = float_string_frame.rename(columns=str.upper) - - assert "FOO" in renamed - assert "foo" not in renamed - - def test_rename_axis_style(self): - # https://github.com/pandas-dev/pandas/issues/12392 - df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"]) - expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) - - result = df.rename(str.lower, axis=1) - tm.assert_frame_equal(result, expected) - - result = df.rename(str.lower, axis="columns") - tm.assert_frame_equal(result, expected) - - result = df.rename({"A": "a", "B": "b"}, axis=1) - tm.assert_frame_equal(result, expected) - - result = df.rename({"A": "a", "B": "b"}, axis="columns") - tm.assert_frame_equal(result, expected) - - # Index - expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) - result = df.rename(str.lower, axis=0) - tm.assert_frame_equal(result, expected) - - result = df.rename(str.lower, axis="index") - tm.assert_frame_equal(result, expected) - - result = df.rename({"X": "x", "Y": "y"}, axis=0) - tm.assert_frame_equal(result, expected) - - result = df.rename({"X": "x", "Y": "y"}, axis="index") - tm.assert_frame_equal(result, expected) - - result = df.rename(mapper=str.lower, axis="index") - tm.assert_frame_equal(result, expected) - - def test_rename_mapper_multi(self): - df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index( - ["A", "B"] - ) - result = df.rename(str.upper) - expected = df.rename(index=str.upper) - tm.assert_frame_equal(result, expected) - - def test_rename_positional_named(self): - # https://github.com/pandas-dev/pandas/issues/12392 - df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) - result = df.rename(index=str.lower, columns=str.upper) - expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) - tm.assert_frame_equal(result, expected) - - def test_rename_axis_style_raises(self): - # see gh-12392 - df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"]) - - # Named target and axis - over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(index=str.lower, axis=1) - - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(index=str.lower, axis="columns") - - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(columns=str.lower, axis="columns") - - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(index=str.lower, axis=0) - - # Multiple targets and axis - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(str.lower, index=str.lower, axis="columns") - - # Too many targets - over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(str.lower, index=str.lower, columns=str.lower) - - # Duplicates - with pytest.raises(TypeError, match="multiple values"): - df.rename(id, mapper=id) - def test_reindex_api_equivalence(self): # equivalence of the labels/axis and index/columns API's df = DataFrame( @@ -1376,43 +785,6 @@ def test_reindex_api_equivalence(self): for res in [res2, res3]: tm.assert_frame_equal(res1, res) - def test_rename_positional_raises(self): - # GH 29136 - df = DataFrame(columns=["A", "B"]) - msg = r"rename\(\) takes from 1 to 2 positional arguments" - - with pytest.raises(TypeError, match=msg): - df.rename(None, str.lower) - - def test_rename_no_mappings_raises(self): - # GH 29136 - df = DataFrame([[1]]) - msg = "must pass an index to rename" - with pytest.raises(TypeError, match=msg): - df.rename() - - with pytest.raises(TypeError, match=msg): - df.rename(None, index=None) - - with pytest.raises(TypeError, match=msg): - df.rename(None, columns=None) - - with pytest.raises(TypeError, match=msg): - df.rename(None, columns=None, index=None) - - def test_rename_mapper_and_positional_arguments_raises(self): - # GH 29136 - df = DataFrame([[1]]) - msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" - with pytest.raises(TypeError, match=msg): - df.rename({}, index={}) - - with pytest.raises(TypeError, match=msg): - df.rename({}, columns={}) - - with pytest.raises(TypeError, match=msg): - df.rename({}, columns={}, index={}) - def test_assign_columns(self, float_frame): float_frame["hi"] = "there" diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 36a476d195fe5..321eb5fe94daf 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -21,27 +21,6 @@ def test_concat_multiple_frames_dtypes(self): ) tm.assert_series_equal(results, expected) - @pytest.mark.parametrize( - "data", - [ - pd.date_range("2000", periods=4), - pd.date_range("2000", periods=4, tz="US/Central"), - pd.period_range("2000", periods=4), - pd.timedelta_range(0, periods=4), - ], - ) - def test_combine_datetlike_udf(self, data): - # https://github.com/pandas-dev/pandas/issues/23079 - df = pd.DataFrame({"A": data}) - other = df.copy() - df.iloc[1, 0] = None - - def combiner(a, b): - return b - - result = df.combine(other, combiner) - tm.assert_frame_equal(result, other) - def test_concat_multiple_tzs(self): # GH 12467 # combining datetime tz-aware and naive DataFrames diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 8b63f0614eebf..713d8f3ceeedb 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -111,325 +111,6 @@ def test_dtypes_are_correct_after_column_slice(self): pd.Series(odict([("a", np.float_), ("b", np.float_), ("c", np.float_)])), ) - def test_select_dtypes_include_using_list_like(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), - "i": pd.date_range("20130101", periods=3, tz="CET"), - "j": pd.period_range("2013-01", periods=3, freq="M"), - "k": pd.timedelta_range("1 day", periods=3), - } - ) - - ri = df.select_dtypes(include=[np.number]) - ei = df[["b", "c", "d", "k"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=[np.number], exclude=["timedelta"]) - ei = df[["b", "c", "d"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=[np.number, "category"], exclude=["timedelta"]) - ei = df[["b", "c", "d", "f"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=["datetime"]) - ei = df[["g"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=["datetime64"]) - ei = df[["g"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=["datetimetz"]) - ei = df[["h", "i"]] - tm.assert_frame_equal(ri, ei) - - with pytest.raises(NotImplementedError, match=r"^$"): - df.select_dtypes(include=["period"]) - - def test_select_dtypes_exclude_using_list_like(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - } - ) - re = df.select_dtypes(exclude=[np.number]) - ee = df[["a", "e"]] - tm.assert_frame_equal(re, ee) - - def test_select_dtypes_exclude_include_using_list_like(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.date_range("now", periods=3).values, - } - ) - exclude = (np.datetime64,) - include = np.bool_, "integer" - r = df.select_dtypes(include=include, exclude=exclude) - e = df[["b", "c", "e"]] - tm.assert_frame_equal(r, e) - - exclude = ("datetime",) - include = "bool", "int64", "int32" - r = df.select_dtypes(include=include, exclude=exclude) - e = df[["b", "e"]] - tm.assert_frame_equal(r, e) - - def test_select_dtypes_include_using_scalars(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), - "i": pd.date_range("20130101", periods=3, tz="CET"), - "j": pd.period_range("2013-01", periods=3, freq="M"), - "k": pd.timedelta_range("1 day", periods=3), - } - ) - - ri = df.select_dtypes(include=np.number) - ei = df[["b", "c", "d", "k"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include="datetime") - ei = df[["g"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include="datetime64") - ei = df[["g"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include="category") - ei = df[["f"]] - tm.assert_frame_equal(ri, ei) - - with pytest.raises(NotImplementedError, match=r"^$"): - df.select_dtypes(include="period") - - def test_select_dtypes_exclude_using_scalars(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), - "i": pd.date_range("20130101", periods=3, tz="CET"), - "j": pd.period_range("2013-01", periods=3, freq="M"), - "k": pd.timedelta_range("1 day", periods=3), - } - ) - - ri = df.select_dtypes(exclude=np.number) - ei = df[["a", "e", "f", "g", "h", "i", "j"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(exclude="category") - ei = df[["a", "b", "c", "d", "e", "g", "h", "i", "j", "k"]] - tm.assert_frame_equal(ri, ei) - - with pytest.raises(NotImplementedError, match=r"^$"): - df.select_dtypes(exclude="period") - - def test_select_dtypes_include_exclude_using_scalars(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), - "i": pd.date_range("20130101", periods=3, tz="CET"), - "j": pd.period_range("2013-01", periods=3, freq="M"), - "k": pd.timedelta_range("1 day", periods=3), - } - ) - - ri = df.select_dtypes(include=np.number, exclude="floating") - ei = df[["b", "c", "k"]] - tm.assert_frame_equal(ri, ei) - - def test_select_dtypes_include_exclude_mixed_scalars_lists(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), - "i": pd.date_range("20130101", periods=3, tz="CET"), - "j": pd.period_range("2013-01", periods=3, freq="M"), - "k": pd.timedelta_range("1 day", periods=3), - } - ) - - ri = df.select_dtypes(include=np.number, exclude=["floating", "timedelta"]) - ei = df[["b", "c"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=[np.number, "category"], exclude="floating") - ei = df[["b", "c", "f", "k"]] - tm.assert_frame_equal(ri, ei) - - def test_select_dtypes_duplicate_columns(self): - # GH20839 - odict = OrderedDict - df = DataFrame( - odict( - [ - ("a", list("abc")), - ("b", list(range(1, 4))), - ("c", np.arange(3, 6).astype("u1")), - ("d", np.arange(4.0, 7.0, dtype="float64")), - ("e", [True, False, True]), - ("f", pd.date_range("now", periods=3).values), - ] - ) - ) - df.columns = ["a", "a", "b", "b", "b", "c"] - - expected = DataFrame( - {"a": list(range(1, 4)), "b": np.arange(3, 6).astype("u1")} - ) - - result = df.select_dtypes(include=[np.number], exclude=["floating"]) - tm.assert_frame_equal(result, expected) - - def test_select_dtypes_not_an_attr_but_still_valid_dtype(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.date_range("now", periods=3).values, - } - ) - df["g"] = df.f.diff() - assert not hasattr(np, "u8") - r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"]) - e = df[["a", "b"]] - tm.assert_frame_equal(r, e) - - r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"]) - e = df[["a", "b", "g"]] - tm.assert_frame_equal(r, e) - - def test_select_dtypes_empty(self): - df = DataFrame({"a": list("abc"), "b": list(range(1, 4))}) - msg = "at least one of include or exclude must be nonempty" - with pytest.raises(ValueError, match=msg): - df.select_dtypes() - - def test_select_dtypes_bad_datetime64(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.date_range("now", periods=3).values, - } - ) - with pytest.raises(ValueError, match=".+ is too specific"): - df.select_dtypes(include=["datetime64[D]"]) - - with pytest.raises(ValueError, match=".+ is too specific"): - df.select_dtypes(exclude=["datetime64[as]"]) - - def test_select_dtypes_datetime_with_tz(self): - - df2 = DataFrame( - dict( - A=Timestamp("20130102", tz="US/Eastern"), - B=Timestamp("20130603", tz="CET"), - ), - index=range(5), - ) - df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) - result = df3.select_dtypes(include=["datetime64[ns]"]) - expected = df3.reindex(columns=[]) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "dtype", [str, "str", np.string_, "S1", "unicode", np.unicode_, "U1"] - ) - @pytest.mark.parametrize("arg", ["include", "exclude"]) - def test_select_dtypes_str_raises(self, dtype, arg): - df = DataFrame( - { - "a": list("abc"), - "g": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.date_range("now", periods=3).values, - } - ) - msg = "string dtypes are not allowed" - kwargs = {arg: [dtype]} - - with pytest.raises(TypeError, match=msg): - df.select_dtypes(**kwargs) - - def test_select_dtypes_bad_arg_raises(self): - df = DataFrame( - { - "a": list("abc"), - "g": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.date_range("now", periods=3).values, - } - ) - - msg = "data type.*not understood" - with pytest.raises(TypeError, match=msg): - df.select_dtypes(["blargy, blarg, blarg"]) - - def test_select_dtypes_typecodes(self): - # GH 11990 - df = tm.makeCustomDataframe(30, 3, data_gen_f=lambda x, y: np.random.random()) - expected = df - FLOAT_TYPES = list(np.typecodes["AllFloat"]) - tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected) - def test_dtypes_gh8722(self, float_string_frame): float_string_frame["bool"] = float_string_frame["A"] > 0 result = float_string_frame.dtypes diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index df40c2e7e2a11..542d9835bb5d3 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -685,25 +685,6 @@ def test_boolean_comparison(self): with pytest.raises(ValueError, match=msg1d): result = df == tup - def test_combine_generic(self, float_frame): - df1 = float_frame - df2 = float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]] - - combined = df1.combine(df2, np.add) - combined2 = df2.combine(df1, np.add) - assert combined["D"].isna().all() - assert combined2["D"].isna().all() - - chunk = combined.loc[combined.index[:-5], ["A", "B", "C"]] - chunk2 = combined2.loc[combined2.index[:-5], ["A", "B", "C"]] - - exp = ( - float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]].reindex_like(chunk) - * 2 - ) - tm.assert_frame_equal(chunk, exp) - tm.assert_frame_equal(chunk2, exp) - def test_inplace_ops_alignment(self): # inplace ops / ops alignment diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index a6b2b334d3ec8..1ce13fd31ba88 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -1,19 +1,6 @@ -from datetime import timedelta - import numpy as np -import pytest - -import pandas as pd -from pandas import ( - DataFrame, - DatetimeIndex, - Index, - PeriodIndex, - Timedelta, - date_range, - period_range, - to_datetime, -) + +from pandas import DataFrame, Index, PeriodIndex, period_range import pandas._testing as tm @@ -49,93 +36,6 @@ def test_frame_setitem(self): assert isinstance(rs.index, PeriodIndex) tm.assert_index_equal(rs.index, rng) - def test_frame_to_time_stamp(self): - K = 5 - index = period_range(freq="A", start="1/1/2001", end="12/1/2009") - df = DataFrame(np.random.randn(len(index), K), index=index) - df["mix"] = "a" - - exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") - exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") - result = df.to_timestamp("D", "end") - tm.assert_index_equal(result.index, exp_index) - tm.assert_numpy_array_equal(result.values, df.values) - - exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") - result = df.to_timestamp("D", "start") - tm.assert_index_equal(result.index, exp_index) - - def _get_with_delta(delta, freq="A-DEC"): - return date_range( - to_datetime("1/1/2001") + delta, - to_datetime("12/31/2009") + delta, - freq=freq, - ) - - delta = timedelta(hours=23) - result = df.to_timestamp("H", "end") - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") - tm.assert_index_equal(result.index, exp_index) - - delta = timedelta(hours=23, minutes=59) - result = df.to_timestamp("T", "end") - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") - tm.assert_index_equal(result.index, exp_index) - - result = df.to_timestamp("S", "end") - delta = timedelta(hours=23, minutes=59, seconds=59) - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") - tm.assert_index_equal(result.index, exp_index) - - # columns - df = df.T - - exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") - exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") - result = df.to_timestamp("D", "end", axis=1) - tm.assert_index_equal(result.columns, exp_index) - tm.assert_numpy_array_equal(result.values, df.values) - - exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") - result = df.to_timestamp("D", "start", axis=1) - tm.assert_index_equal(result.columns, exp_index) - - delta = timedelta(hours=23) - result = df.to_timestamp("H", "end", axis=1) - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") - tm.assert_index_equal(result.columns, exp_index) - - delta = timedelta(hours=23, minutes=59) - result = df.to_timestamp("T", "end", axis=1) - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") - tm.assert_index_equal(result.columns, exp_index) - - result = df.to_timestamp("S", "end", axis=1) - delta = timedelta(hours=23, minutes=59, seconds=59) - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") - tm.assert_index_equal(result.columns, exp_index) - - # invalid axis - with pytest.raises(ValueError, match="axis"): - df.to_timestamp(axis=2) - - result1 = df.to_timestamp("5t", axis=1) - result2 = df.to_timestamp("t", axis=1) - expected = pd.date_range("2001-01-01", "2009-01-01", freq="AS") - assert isinstance(result1.columns, DatetimeIndex) - assert isinstance(result2.columns, DatetimeIndex) - tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) - tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) - # PeriodIndex.to_timestamp always use 'infer' - assert result1.columns.freqstr == "AS-JAN" - assert result2.columns.freqstr == "AS-JAN" - def test_frame_index_to_string(self): index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M") frame = DataFrame(np.random.randn(3, 4), index=index) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 5e06b6402c34f..b713af92eac27 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -1,30 +1,10 @@ -from datetime import datetime, time -from itertools import product - import numpy as np import pytest -import pytz import pandas as pd -from pandas import ( - DataFrame, - DatetimeIndex, - Index, - MultiIndex, - Series, - date_range, - period_range, - to_datetime, -) +from pandas import DataFrame, Series, date_range, to_datetime import pandas._testing as tm -import pandas.tseries.offsets as offsets - - -@pytest.fixture(params=product([True, False], [True, False])) -def close_open_fixture(request): - return request.param - class TestDataFrameTimeSeriesMethods: def test_frame_ctor_datetime64_column(self): @@ -80,54 +60,6 @@ def test_frame_append_datetime64_col_other_units(self): assert (tmp["dates"].values == ex_vals).all() - def test_asfreq(self, datetime_frame): - offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd()) - rule_monthly = datetime_frame.asfreq("BM") - - tm.assert_almost_equal(offset_monthly["A"], rule_monthly["A"]) - - filled = rule_monthly.asfreq("B", method="pad") # noqa - # TODO: actually check that this worked. - - # don't forget! - filled_dep = rule_monthly.asfreq("B", method="pad") # noqa - - # test does not blow up on length-0 DataFrame - zero_length = datetime_frame.reindex([]) - result = zero_length.asfreq("BM") - assert result is not zero_length - - def test_asfreq_datetimeindex(self): - df = DataFrame( - {"A": [1, 2, 3]}, - index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)], - ) - df = df.asfreq("B") - assert isinstance(df.index, DatetimeIndex) - - ts = df["A"].asfreq("B") - assert isinstance(ts.index, DatetimeIndex) - - def test_asfreq_fillvalue(self): - # test for fill value during upsampling, related to issue 3715 - - # setup - rng = pd.date_range("1/1/2016", periods=10, freq="2S") - ts = pd.Series(np.arange(len(rng)), index=rng) - df = pd.DataFrame({"one": ts}) - - # insert pre-existing missing value - df.loc["2016-01-01 00:00:08", "one"] = None - - actual_df = df.asfreq(freq="1S", fill_value=9.0) - expected_df = df.asfreq(freq="1S").fillna(9.0) - expected_df.loc["2016-01-01 00:00:08", "one"] = None - tm.assert_frame_equal(expected_df, actual_df) - - expected_series = ts.asfreq(freq="1S").fillna(9.0) - actual_series = ts.asfreq(freq="1S", fill_value=9.0) - tm.assert_series_equal(expected_series, actual_series) - @pytest.mark.parametrize( "data,idx,expected_first,expected_last", [ @@ -239,183 +171,6 @@ def test_last_raises(self): with pytest.raises(TypeError): # index is not a DatetimeIndex df.last("1D") - def test_at_time(self): - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - rs = ts.at_time(rng[1]) - assert (rs.index.hour == rng[1].hour).all() - assert (rs.index.minute == rng[1].minute).all() - assert (rs.index.second == rng[1].second).all() - - result = ts.at_time("9:30") - expected = ts.at_time(time(9, 30)) - tm.assert_frame_equal(result, expected) - - result = ts.loc[time(9, 30)] - expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] - - tm.assert_frame_equal(result, expected) - - # midnight, everything - rng = date_range("1/1/2000", "1/31/2000") - ts = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts.at_time(time(0, 0)) - tm.assert_frame_equal(result, ts) - - # time doesn't exist - rng = date_range("1/1/2012", freq="23Min", periods=384) - ts = DataFrame(np.random.randn(len(rng), 2), rng) - rs = ts.at_time("16:00") - assert len(rs) == 0 - - @pytest.mark.parametrize( - "hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)] - ) - def test_at_time_errors(self, hour): - # GH 24043 - dti = pd.date_range("2018", periods=3, freq="H") - df = pd.DataFrame(list(range(len(dti))), index=dti) - if getattr(hour, "tzinfo", None) is None: - result = df.at_time(hour) - expected = df.iloc[1:2] - tm.assert_frame_equal(result, expected) - else: - with pytest.raises(ValueError, match="Index must be timezone"): - df.at_time(hour) - - def test_at_time_tz(self): - # GH 24043 - dti = pd.date_range("2018", periods=3, freq="H", tz="US/Pacific") - df = pd.DataFrame(list(range(len(dti))), index=dti) - result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern"))) - expected = df.iloc[1:2] - tm.assert_frame_equal(result, expected) - - def test_at_time_raises(self): - # GH20725 - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - with pytest.raises(TypeError): # index is not a DatetimeIndex - df.at_time("00:00") - - @pytest.mark.parametrize("axis", ["index", "columns", 0, 1]) - def test_at_time_axis(self, axis): - # issue 8839 - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), len(rng))) - ts.index, ts.columns = rng, rng - - indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)] - - if axis in ["index", 0]: - expected = ts.loc[indices, :] - elif axis in ["columns", 1]: - expected = ts.loc[:, indices] - - result = ts.at_time("9:30", axis=axis) - tm.assert_frame_equal(result, expected) - - def test_between_time(self, close_open_fixture): - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(0, 0) - etime = time(1, 0) - inc_start, inc_end = close_open_fixture - - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert t >= stime - else: - assert t > stime - - if inc_end: - assert t <= etime - else: - assert t < etime - - result = ts.between_time("00:00", "01:00") - expected = ts.between_time(stime, etime) - tm.assert_frame_equal(result, expected) - - # across midnight - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(22, 0) - etime = time(9, 0) - - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert (t >= stime) or (t <= etime) - else: - assert (t > stime) or (t <= etime) - - if inc_end: - assert (t <= etime) or (t >= stime) - else: - assert (t < etime) or (t >= stime) - - def test_between_time_raises(self): - # GH20725 - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - with pytest.raises(TypeError): # index is not a DatetimeIndex - df.between_time(start_time="00:00", end_time="12:00") - - def test_between_time_axis(self, axis): - # issue 8839 - rng = date_range("1/1/2000", periods=100, freq="10min") - ts = DataFrame(np.random.randn(len(rng), len(rng))) - stime, etime = ("08:00:00", "09:00:00") - exp_len = 7 - - if axis in ["index", 0]: - ts.index = rng - assert len(ts.between_time(stime, etime)) == exp_len - assert len(ts.between_time(stime, etime, axis=0)) == exp_len - - if axis in ["columns", 1]: - ts.columns = rng - selected = ts.between_time(stime, etime, axis=1).columns - assert len(selected) == exp_len - - def test_between_time_axis_raises(self, axis): - # issue 8839 - rng = date_range("1/1/2000", periods=100, freq="10min") - mask = np.arange(0, len(rng)) - rand_data = np.random.randn(len(rng), len(rng)) - ts = DataFrame(rand_data, index=rng, columns=rng) - stime, etime = ("08:00:00", "09:00:00") - - msg = "Index must be DatetimeIndex" - if axis in ["columns", 1]: - ts.index = mask - with pytest.raises(TypeError, match=msg): - ts.between_time(stime, etime) - with pytest.raises(TypeError, match=msg): - ts.between_time(stime, etime, axis=0) - - if axis in ["index", 0]: - ts.columns = mask - with pytest.raises(TypeError, match=msg): - ts.between_time(stime, etime, axis=1) - def test_operation_on_NaT(self): # Both NaT and Timestamp are in DataFrame. df = pd.DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]}) @@ -455,95 +210,3 @@ def test_datetime_assignment_with_NaT_and_diff_time_units(self): {0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]" ) tm.assert_frame_equal(result, expected) - - def test_frame_to_period(self): - K = 5 - - dr = date_range("1/1/2000", "1/1/2001") - pr = period_range("1/1/2000", "1/1/2001") - df = DataFrame(np.random.randn(len(dr), K), index=dr) - df["mix"] = "a" - - pts = df.to_period() - exp = df.copy() - exp.index = pr - tm.assert_frame_equal(pts, exp) - - pts = df.to_period("M") - tm.assert_index_equal(pts.index, exp.index.asfreq("M")) - - df = df.T - pts = df.to_period(axis=1) - exp = df.copy() - exp.columns = pr - tm.assert_frame_equal(pts, exp) - - pts = df.to_period("M", axis=1) - tm.assert_index_equal(pts.columns, exp.columns.asfreq("M")) - - msg = "No axis named 2 for object type " - with pytest.raises(ValueError, match=msg): - df.to_period(axis=2) - - @pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"]) - def test_tz_convert_and_localize(self, fn): - l0 = date_range("20140701", periods=5, freq="D") - l1 = date_range("20140701", periods=5, freq="D") - - int_idx = Index(range(5)) - - if fn == "tz_convert": - l0 = l0.tz_localize("UTC") - l1 = l1.tz_localize("UTC") - - for idx in [l0, l1]: - - l0_expected = getattr(idx, fn)("US/Pacific") - l1_expected = getattr(idx, fn)("US/Pacific") - - df1 = DataFrame(np.ones(5), index=l0) - df1 = getattr(df1, fn)("US/Pacific") - tm.assert_index_equal(df1.index, l0_expected) - - # MultiIndex - # GH7846 - df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) - - df3 = getattr(df2, fn)("US/Pacific", level=0) - assert not df3.index.levels[0].equals(l0) - tm.assert_index_equal(df3.index.levels[0], l0_expected) - tm.assert_index_equal(df3.index.levels[1], l1) - assert not df3.index.levels[1].equals(l1_expected) - - df3 = getattr(df2, fn)("US/Pacific", level=1) - tm.assert_index_equal(df3.index.levels[0], l0) - assert not df3.index.levels[0].equals(l0_expected) - tm.assert_index_equal(df3.index.levels[1], l1_expected) - assert not df3.index.levels[1].equals(l1) - - df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) - - # TODO: untested - df5 = getattr(df4, fn)("US/Pacific", level=1) # noqa - - tm.assert_index_equal(df3.index.levels[0], l0) - assert not df3.index.levels[0].equals(l0_expected) - tm.assert_index_equal(df3.index.levels[1], l1_expected) - assert not df3.index.levels[1].equals(l1) - - # Bad Inputs - - # Not DatetimeIndex / PeriodIndex - with pytest.raises(TypeError, match="DatetimeIndex"): - df = DataFrame(index=int_idx) - df = getattr(df, fn)("US/Pacific") - - # Not DatetimeIndex / PeriodIndex - with pytest.raises(TypeError, match="DatetimeIndex"): - df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) - df = getattr(df, fn)("US/Pacific", level=0) - - # Invalid level - with pytest.raises(ValueError, match="not valid"): - df = DataFrame(index=l0) - df = getattr(df, fn)("US/Pacific", level=1) diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py index b60f2052a988f..62e8a4b470218 100644 --- a/pandas/tests/frame/test_timezones.py +++ b/pandas/tests/frame/test_timezones.py @@ -59,34 +59,6 @@ def test_frame_from_records_utc(self): # it works DataFrame.from_records([rec], index="begin_time") - def test_frame_tz_localize(self): - rng = date_range("1/1/2011", periods=100, freq="H") - - df = DataFrame({"a": 1}, index=rng) - result = df.tz_localize("utc") - expected = DataFrame({"a": 1}, rng.tz_localize("UTC")) - assert result.index.tz.zone == "UTC" - tm.assert_frame_equal(result, expected) - - df = df.T - result = df.tz_localize("utc", axis=1) - assert result.columns.tz.zone == "UTC" - tm.assert_frame_equal(result, expected.T) - - def test_frame_tz_convert(self): - rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") - - df = DataFrame({"a": 1}, index=rng) - result = df.tz_convert("Europe/Berlin") - expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) - assert result.index.tz.zone == "Europe/Berlin" - tm.assert_frame_equal(result, expected) - - df = df.T - result = df.tz_convert("Europe/Berlin", axis=1) - assert result.columns.tz.zone == "Europe/Berlin" - tm.assert_frame_equal(result, expected.T) - def test_frame_join_tzaware(self): test1 = DataFrame( np.zeros((6, 3)), diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 8e54de771a3e4..1b6cb8447c76d 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -187,8 +187,10 @@ def test_constructor_compound_dtypes(self): def f(dtype): return self._construct(shape=3, value=1, dtype=dtype) - msg = "compound dtypes are not implemented" - f"in the {self._typ.__name__} constructor" + msg = ( + "compound dtypes are not implemented " + f"in the {self._typ.__name__} constructor" + ) with pytest.raises(NotImplementedError, match=msg): f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index 5aafd83da78fd..f119eb422a276 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -24,13 +24,6 @@ class TestSeries(Generic): _typ = Series _comparator = lambda self, x, y: tm.assert_series_equal(x, y) - def setup_method(self): - self.ts = tm.makeTimeSeries() # Was at top level in test_series - self.ts.name = "ts" - - self.series = tm.makeStringSeries() - self.series.name = "series" - def test_rename_mi(self): s = Series( [11, 21, 31], diff --git a/pandas/tests/indexes/categorical/test_constructors.py b/pandas/tests/indexes/categorical/test_constructors.py index 1df0874e2f947..ee3f85da22781 100644 --- a/pandas/tests/indexes/categorical/test_constructors.py +++ b/pandas/tests/indexes/categorical/test_constructors.py @@ -136,12 +136,3 @@ def test_construction_with_categorical_dtype(self): with pytest.raises(ValueError, match=msg): Index(data, ordered=ordered, dtype=dtype) - - def test_create_categorical(self): - # GH#17513 The public CI constructor doesn't hit this code path with - # instances of CategoricalIndex, but we still want to test the code - ci = CategoricalIndex(["a", "b", "c"]) - # First ci is self, second ci is data. - result = CategoricalIndex._create_categorical(ci, ci) - expected = Categorical(["a", "b", "c"]) - tm.assert_categorical_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6479b14e9521e..40c7ffba46450 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -128,15 +128,9 @@ def test_shallow_copy_empty(self): def test_shallow_copy_i8(self): # GH-24391 pi = period_range("2018-01-01", periods=3, freq="2D") - result = pi._shallow_copy(pi.asi8, freq=pi.freq) + result = pi._shallow_copy(pi.asi8) tm.assert_index_equal(result, pi) - def test_shallow_copy_changing_freq_raises(self): - pi = period_range("2018-01-01", periods=3, freq="2D") - msg = "specified freq and dtype are different" - with pytest.raises(IncompatibleFrequency, match=msg): - pi._shallow_copy(pi, freq="H") - def test_view_asi8(self): idx = PeriodIndex([], freq="M") diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 4d3f1b0539aee..87520f5ab2577 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -162,10 +162,9 @@ def test_scalar_non_numeric(self, index_func, klass): s2.loc[3.0] = 10 assert s2.index.is_object() - for idxr in [lambda x: x]: - s2 = s.copy() - idxr(s2)[3.0] = 0 - assert s2.index.is_object() + s2 = s.copy() + s2[3.0] = 0 + assert s2.index.is_object() @pytest.mark.parametrize( "index_func", @@ -250,12 +249,7 @@ def test_scalar_integer(self, index_func, klass): # integer index i = index_func(5) - - if klass is Series: - # TODO: Should we be passing index=i here? - obj = Series(np.arange(len(i))) - else: - obj = DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i) + obj = gen_obj(klass, i) # coerce to equal int for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: @@ -313,7 +307,7 @@ def test_scalar_float(self, klass): result = idxr(s2)[indexer] self.check(result, s, 3, getitem) - # random integer is a KeyError + # random float is a KeyError with pytest.raises(KeyError, match=r"^3\.5$"): idxr(s)[3.5] @@ -429,15 +423,6 @@ def test_slice_integer(self): indexer = slice(3, 5) self.check(result, s, indexer, False) - # positional indexing - msg = ( - "cannot do slice indexing " - fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s[l] - # getitem out-of-bounds for l in [slice(-6, 6), slice(-6.0, 6.0)]: @@ -485,23 +470,6 @@ def test_slice_integer(self): with pytest.raises(TypeError, match=msg): s[l] - # setitem - for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: - - sc = s.copy() - sc.loc[l] = 0 - result = sc.loc[l].values.ravel() - assert (result == 0).all() - - # positional indexing - msg = ( - "cannot do slice indexing " - fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s[l] = 0 - @pytest.mark.parametrize("l", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)]) def test_integer_positional_indexing(self, l): """ make sure that we are raising on positional indexing @@ -584,22 +552,34 @@ def test_slice_integer_frame_getitem(self, index_func): with pytest.raises(TypeError, match=msg): s[l] + @pytest.mark.parametrize("l", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) + @pytest.mark.parametrize( + "index_func", [tm.makeIntIndex, tm.makeRangeIndex], + ) + def test_float_slice_getitem_with_integer_index_raises(self, l, index_func): + + # similar to above, but on the getitem dim (of a DataFrame) + index = index_func(5) + + s = DataFrame(np.random.randn(5, 2), index=index) + # setitem - for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + sc = s.copy() + sc.loc[l] = 0 + result = sc.loc[l].values.ravel() + assert (result == 0).all() - sc = s.copy() - sc.loc[l] = 0 - result = sc.loc[l].values.ravel() - assert (result == 0).all() + # positional indexing + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[l] = 0 - # positional indexing - msg = ( - "cannot do slice indexing " - fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s[l] = 0 + with pytest.raises(TypeError, match=msg): + s[l] @pytest.mark.parametrize("l", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) @pytest.mark.parametrize("klass", [Series, DataFrame]) @@ -614,10 +594,9 @@ def test_slice_float(self, l, klass): # getitem result = idxr(s)[l] - if isinstance(s, Series): - tm.assert_series_equal(result, expected) - else: - tm.assert_frame_equal(result, expected) + assert isinstance(result, type(s)) + tm.assert_equal(result, expected) + # setitem s2 = s.copy() idxr(s2)[l] = 0 diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 0c9ddbf5473b3..27b0500983afd 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -203,12 +203,6 @@ def create_mgr(descr, item_shape=None): class TestBlock: def setup_method(self, method): - # self.fblock = get_float_ex() # a,c,e - # self.cblock = get_complex_ex() # - # self.oblock = get_obj_ex() - # self.bool_block = get_bool_ex() - # self.int_block = get_int_ex() - self.fblock = create_block("float", [0, 2, 4]) self.cblock = create_block("complex", [7]) self.oblock = create_block("object", [1, 3]) @@ -254,22 +248,11 @@ def test_merge(self): tm.assert_numpy_array_equal(merged.values[[0, 2]], np.array(avals)) tm.assert_numpy_array_equal(merged.values[[1, 3]], np.array(bvals)) - # TODO: merge with mixed type? - def test_copy(self): cop = self.fblock.copy() assert cop is not self.fblock assert_block_equal(self.fblock, cop) - def test_reindex_index(self): - pass - - def test_reindex_cast(self): - pass - - def test_insert(self): - pass - def test_delete(self): newb = self.fblock.copy() newb.delete(0) @@ -300,39 +283,7 @@ def test_delete(self): newb.delete(3) -class TestDatetimeBlock: - def test_can_hold_element(self): - block = create_block("datetime", [0]) - - # We will check that block._can_hold_element iff arr.__setitem__ works - arr = pd.array(block.values.ravel()) - - # coerce None - assert block._can_hold_element(None) - arr[0] = None - assert arr[0] is pd.NaT - - # coerce different types of datetime objects - vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)] - for val in vals: - assert block._can_hold_element(val) - arr[0] = val - - val = date(2010, 10, 10) - assert not block._can_hold_element(val) - - msg = ( - "'value' should be a 'Timestamp', 'NaT', " - "or array of those. Got 'date' instead." - ) - with pytest.raises(TypeError, match=msg): - arr[0] = val - - class TestBlockManager: - def test_constructor_corner(self): - pass - def test_attrs(self): mgr = create_mgr("a,b,c: f8-1; d,e,f: f8-2") assert mgr.nblocks == 2 @@ -441,18 +392,6 @@ def test_set_change_dtype(self, mgr): mgr2.set("quux", tm.randn(N)) assert mgr2.get("quux").dtype == np.float_ - def test_set_change_dtype_slice(self): # GH8850 - cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")]) - df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols) - df["2nd"] = df["2nd"] * 2.0 - - blocks = df._to_dict_of_blocks() - assert sorted(blocks.keys()) == ["float64", "int64"] - tm.assert_frame_equal( - blocks["float64"], DataFrame([[1.0, 4.0], [4.0, 10.0]], columns=cols[:2]) - ) - tm.assert_frame_equal(blocks["int64"], DataFrame([[3], [6]], columns=cols[2:])) - def test_copy(self, mgr): cp = mgr.copy(deep=False) for blk, cp_blk in zip(mgr.blocks, cp.blocks): @@ -486,7 +425,7 @@ def test_sparse_mixed(self): assert len(mgr.blocks) == 3 assert isinstance(mgr, BlockManager) - # what to test here? + # TODO: what to test here? def test_as_array_float(self): mgr = create_mgr("c: f4; d: f2; e: f8") @@ -650,22 +589,6 @@ def test_interleave(self): mgr = create_mgr("a: M8[ns]; b: m8[ns]") assert mgr.as_array().dtype == "object" - def test_interleave_non_unique_cols(self): - df = DataFrame( - [[pd.Timestamp("20130101"), 3.5], [pd.Timestamp("20130102"), 4.5]], - columns=["x", "x"], - index=[1, 2], - ) - - df_unique = df.copy() - df_unique.columns = ["x", "y"] - assert df_unique.values.shape == df.values.shape - tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) - tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) - - def test_consolidate(self): - pass - def test_consolidate_ordering_issues(self, mgr): mgr.set("f", tm.randn(N)) mgr.set("d", tm.randn(N)) @@ -683,10 +606,6 @@ def test_consolidate_ordering_issues(self, mgr): cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64) ) - def test_reindex_index(self): - # TODO: should this be pytest.skip? - pass - def test_reindex_items(self): # mgr is not consolidated, f8 & f8-2 blocks mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2") @@ -767,13 +686,6 @@ def test_get_bool_data(self): def test_unicode_repr_doesnt_raise(self): repr(create_mgr("b,\u05d0: object")) - def test_missing_unicode_key(self): - df = DataFrame({"a": [1]}) - try: - df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError - except KeyError: - pass # this is the expected exception - def test_equals(self): # unique items bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2") @@ -843,8 +755,6 @@ class TestIndexing: create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N, N)), ] - # MANAGERS = [MANAGERS[6]] - @pytest.mark.parametrize("mgr", MANAGERS) def test_get_slice(self, mgr): def assert_slice_ok(mgr, axis, slobj): @@ -994,11 +904,6 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value): mgr, ax, pd.Index(["foo", "bar", "baz"]), [0, 1, 2], fill_value, ) - # test_get_slice(slice_like, axis) - # take(indexer, axis) - # reindex_axis(new_labels, axis) - # reindex_indexer(new_labels, indexer, axis) - class TestBlockPlacement: def test_slice_len(self): @@ -1151,6 +1056,33 @@ def any(self, axis=None): class TestCanHoldElement: + def test_datetime_block_can_hold_element(self): + block = create_block("datetime", [0]) + + # We will check that block._can_hold_element iff arr.__setitem__ works + arr = pd.array(block.values.ravel()) + + # coerce None + assert block._can_hold_element(None) + arr[0] = None + assert arr[0] is pd.NaT + + # coerce different types of datetime objects + vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)] + for val in vals: + assert block._can_hold_element(val) + arr[0] = val + + val = date(2010, 10, 10) + assert not block._can_hold_element(val) + + msg = ( + "'value' should be a 'Timestamp', 'NaT', " + "or array of those. Got 'date' instead." + ) + with pytest.raises(TypeError, match=msg): + arr[0] = val + @pytest.mark.parametrize( "value, dtype", [ @@ -1280,3 +1212,37 @@ def test_dataframe_not_equal(): df1 = pd.DataFrame({"a": [1, 2], "b": ["s", "d"]}) df2 = pd.DataFrame({"a": ["s", "d"], "b": [1, 2]}) assert df1.equals(df2) is False + + +def test_missing_unicode_key(): + df = DataFrame({"a": [1]}) + with pytest.raises(KeyError, match="\u05d0"): + df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError + + +def test_set_change_dtype_slice(): + # GH#8850 + cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")]) + df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols) + df["2nd"] = df["2nd"] * 2.0 + + blocks = df._to_dict_of_blocks() + assert sorted(blocks.keys()) == ["float64", "int64"] + tm.assert_frame_equal( + blocks["float64"], DataFrame([[1.0, 4.0], [4.0, 10.0]], columns=cols[:2]) + ) + tm.assert_frame_equal(blocks["int64"], DataFrame([[3], [6]], columns=cols[2:])) + + +def test_interleave_non_unique_cols(): + df = DataFrame( + [[pd.Timestamp("20130101"), 3.5], [pd.Timestamp("20130102"), 4.5]], + columns=["x", "x"], + index=[1, 2], + ) + + df_unique = df.copy() + df_unique.columns = ["x", "y"] + assert df_unique.values.shape == df.values.shape + tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) + tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index b4a7173da84d0..4c75d1ebcd377 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -548,3 +548,16 @@ def test_timestamp_constructor_identity(): expected = Timestamp("2017-01-01T12") result = Timestamp(expected) assert result is expected + + +@pytest.mark.parametrize("kwargs", [{}, {"year": 2020}, {"year": 2020, "month": 1}]) +def test_constructor_missing_keyword(kwargs): + # GH 31200 + + # The exact error message of datetime() depends on its version + msg1 = r"function missing required argument '(year|month|day)' \(pos [123]\)" + msg2 = r"Required argument '(year|month|day)' \(pos [123]\) not found" + msg = "|".join([msg1, msg2]) + + with pytest.raises(TypeError, match=msg): + Timestamp(**kwargs) diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py index 4d64b5b397981..4742d6ae3544f 100644 --- a/pandas/tests/series/methods/test_append.py +++ b/pandas/tests/series/methods/test_append.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import DataFrame, DatetimeIndex, Series, date_range +from pandas import DataFrame, DatetimeIndex, Index, Series, Timestamp, date_range import pandas._testing as tm @@ -166,3 +166,87 @@ def test_append_tz_dateutil(self): appended = rng.append(rng2) tm.assert_index_equal(appended, rng3) + + def test_series_append_aware(self): + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex( + ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern" + ) + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz + + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex(["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC") + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + utc = rng1.tz + assert utc == ts_result.index.tz + + # GH#7795 + # different tz coerces to object dtype, not UTC + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + exp_index = Index( + [ + Timestamp("1/1/2011 01:00", tz="US/Eastern"), + Timestamp("1/1/2011 02:00", tz="US/Central"), + ] + ) + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + + def test_series_append_aware_naive(self): + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1.append(ser2) + + expected = ser1.index.astype(object).append(ser2.index.astype(object)) + assert ts_result.index.equals(expected) + + # mixed + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") + rng2 = range(100) + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1.append(ser2) + + expected = ser1.index.astype(object).append(ser2.index) + assert ts_result.index.equals(expected) + + def test_series_append_dst(self): + rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") + rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") + ser1 = Series([1, 2, 3], index=rng1) + ser2 = Series([10, 11, 12], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex( + [ + "2016-01-01 01:00", + "2016-01-01 02:00", + "2016-01-01 03:00", + "2016-08-01 01:00", + "2016-08-01 02:00", + "2016-08-01 03:00", + ], + tz="US/Eastern", + ) + exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz diff --git a/pandas/tests/series/methods/test_asfreq.py b/pandas/tests/series/methods/test_asfreq.py index 05ec56cf02182..d94b60384a07c 100644 --- a/pandas/tests/series/methods/test_asfreq.py +++ b/pandas/tests/series/methods/test_asfreq.py @@ -1,8 +1,13 @@ +from datetime import datetime + import numpy as np +import pytest -from pandas import DataFrame, Series, period_range +from pandas import DataFrame, DatetimeIndex, Series, date_range, period_range import pandas._testing as tm +from pandas.tseries.offsets import BDay, BMonthEnd + class TestAsFreq: # TODO: de-duplicate/parametrize or move DataFrame test @@ -21,3 +26,79 @@ def test_asfreq_ts(self): result = ts.asfreq("D", how="start") assert len(result) == len(ts) tm.assert_index_equal(result.index, index.asfreq("D", how="start")) + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_tz_aware_asfreq(self, tz): + dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz) + + ser = Series(np.random.randn(len(dr)), index=dr) + + # it works! + ser.asfreq("T") + + def test_asfreq(self): + ts = Series( + [0.0, 1.0, 2.0], + index=[ + datetime(2009, 10, 30), + datetime(2009, 11, 30), + datetime(2009, 12, 31), + ], + ) + + daily_ts = ts.asfreq("B") + monthly_ts = daily_ts.asfreq("BM") + tm.assert_series_equal(monthly_ts, ts) + + daily_ts = ts.asfreq("B", method="pad") + monthly_ts = daily_ts.asfreq("BM") + tm.assert_series_equal(monthly_ts, ts) + + daily_ts = ts.asfreq(BDay()) + monthly_ts = daily_ts.asfreq(BMonthEnd()) + tm.assert_series_equal(monthly_ts, ts) + + result = ts[:0].asfreq("M") + assert len(result) == 0 + assert result is not ts + + daily_ts = ts.asfreq("D", fill_value=-1) + result = daily_ts.value_counts().sort_index() + expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() + tm.assert_series_equal(result, expected) + + def test_asfreq_datetimeindex_empty_series(self): + # GH#14320 + index = DatetimeIndex(["2016-09-29 11:00"]) + expected = Series(index=index, dtype=object).asfreq("H") + result = Series([3], index=index.copy()).asfreq("H") + tm.assert_index_equal(expected.index, result.index) + + def test_asfreq_keep_index_name(self): + # GH#9854 + index_name = "bar" + index = date_range("20130101", periods=20, name=index_name) + df = DataFrame(list(range(20)), columns=["foo"], index=index) + + assert index_name == df.index.name + assert index_name == df.asfreq("10D").index.name + + def test_asfreq_normalize(self): + rng = date_range("1/1/2000 09:30", periods=20) + norm = date_range("1/1/2000", periods=20) + vals = np.random.randn(20) + ts = Series(vals, index=rng) + + result = ts.asfreq("D", normalize=True) + norm = date_range("1/1/2000", periods=20) + expected = Series(vals, index=norm) + + tm.assert_series_equal(result, expected) + + vals = np.random.randn(20, 3) + ts = DataFrame(vals, index=rng) + + result = ts.asfreq("D", normalize=True) + expected = DataFrame(vals, index=norm) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_at_time.py b/pandas/tests/series/methods/test_at_time.py new file mode 100644 index 0000000000000..d9985cf33776a --- /dev/null +++ b/pandas/tests/series/methods/test_at_time.py @@ -0,0 +1,72 @@ +from datetime import time + +import numpy as np +import pytest + +from pandas._libs.tslibs import timezones + +from pandas import DataFrame, Series, date_range +import pandas._testing as tm + + +class TestAtTime: + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_localized_at_time(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("4/16/2012", "5/1/2012", freq="H") + ts = Series(np.random.randn(len(rng)), index=rng) + + ts_local = ts.tz_localize(tzstr) + + result = ts_local.at_time(time(10, 0)) + expected = ts.at_time(time(10, 0)).tz_localize(tzstr) + tm.assert_series_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + def test_at_time(self): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = Series(np.random.randn(len(rng)), index=rng) + rs = ts.at_time(rng[1]) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() + + result = ts.at_time("9:30") + expected = ts.at_time(time(9, 30)) + tm.assert_series_equal(result, expected) + + df = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts[time(9, 30)] + result_df = df.loc[time(9, 30)] + expected = ts[(rng.hour == 9) & (rng.minute == 30)] + exp_df = df[(rng.hour == 9) & (rng.minute == 30)] + + tm.assert_series_equal(result, expected) + tm.assert_frame_equal(result_df, exp_df) + + chunk = df.loc["1/4/2000":] + result = chunk.loc[time(9, 30)] + expected = result_df[-1:] + tm.assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range("1/1/2000", "1/31/2000") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.at_time(time(0, 0)) + tm.assert_series_equal(result, ts) + + # time doesn't exist + rng = date_range("1/1/2012", freq="23Min", periods=384) + ts = Series(np.random.randn(len(rng)), rng) + rs = ts.at_time("16:00") + assert len(rs) == 0 + + def test_at_time_raises(self): + # GH20725 + ser = Series("a b c".split()) + msg = "Index must be DatetimeIndex" + with pytest.raises(TypeError, match=msg): + ser.at_time("00:00") diff --git a/pandas/tests/series/methods/test_between.py b/pandas/tests/series/methods/test_between.py new file mode 100644 index 0000000000000..350a3fe6ff009 --- /dev/null +++ b/pandas/tests/series/methods/test_between.py @@ -0,0 +1,35 @@ +import numpy as np + +from pandas import Series, bdate_range, date_range, period_range +import pandas._testing as tm + + +class TestBetween: + + # TODO: redundant with test_between_datetime_values? + def test_between(self): + series = Series(date_range("1/1/2000", periods=10)) + left, right = series[[2, 7]] + + result = series.between(left, right) + expected = (series >= left) & (series <= right) + tm.assert_series_equal(result, expected) + + def test_between_datetime_values(self): + ser = Series(bdate_range("1/1/2000", periods=20).astype(object)) + ser[::2] = np.nan + + result = ser[ser.between(ser[3], ser[17])] + expected = ser[3:18].dropna() + tm.assert_series_equal(result, expected) + + result = ser[ser.between(ser[3], ser[17], inclusive=False)] + expected = ser[5:16].dropna() + tm.assert_series_equal(result, expected) + + def test_between_period_values(self): + ser = Series(period_range("2000-01-01", periods=10, freq="D")) + left, right = ser[[2, 7]] + result = ser.between(left, right) + expected = (ser >= left) & (ser <= right) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_between_time.py b/pandas/tests/series/methods/test_between_time.py new file mode 100644 index 0000000000000..3fa26afe77a1d --- /dev/null +++ b/pandas/tests/series/methods/test_between_time.py @@ -0,0 +1,144 @@ +from datetime import datetime, time +from itertools import product + +import numpy as np +import pytest + +from pandas._libs.tslibs import timezones +import pandas.util._test_decorators as td + +from pandas import DataFrame, Series, date_range +import pandas._testing as tm + + +class TestBetweenTime: + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_localized_between_time(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("4/16/2012", "5/1/2012", freq="H") + ts = Series(np.random.randn(len(rng)), index=rng) + + ts_local = ts.tz_localize(tzstr) + + t1, t2 = time(10, 0), time(11, 0) + result = ts_local.between_time(t1, t2) + expected = ts.between_time(t1, t2).tz_localize(tzstr) + tm.assert_series_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + def test_between_time(self): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = Series(np.random.randn(len(rng)), index=rng) + stime = time(0, 0) + etime = time(1, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert t >= stime + else: + assert t > stime + + if inc_end: + assert t <= etime + else: + assert t < etime + + result = ts.between_time("00:00", "01:00") + expected = ts.between_time(stime, etime) + tm.assert_series_equal(result, expected) + + # across midnight + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = Series(np.random.randn(len(rng)), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert (t >= stime) or (t <= etime) + else: + assert (t > stime) or (t <= etime) + + if inc_end: + assert (t <= etime) or (t >= stime) + else: + assert (t < etime) or (t >= stime) + + def test_between_time_raises(self): + # GH20725 + ser = Series("a b c".split()) + msg = "Index must be DatetimeIndex" + with pytest.raises(TypeError, match=msg): + ser.between_time(start_time="00:00", end_time="12:00") + + def test_between_time_types(self): + # GH11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" + with pytest.raises(ValueError, match=msg): + rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + frame = DataFrame({"A": 0}, index=rng) + with pytest.raises(ValueError, match=msg): + frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + series = Series(0, index=rng) + with pytest.raises(ValueError, match=msg): + series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + @td.skip_if_has_locale + def test_between_time_formats(self): + # GH11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + + strings = [ + ("2:00", "2:30"), + ("0200", "0230"), + ("2:00am", "2:30am"), + ("0200am", "0230am"), + ("2:00:00", "2:30:00"), + ("020000", "023000"), + ("2:00:00am", "2:30:00am"), + ("020000am", "023000am"), + ] + expected_length = 28 + + for time_string in strings: + assert len(ts.between_time(*time_string)) == expected_length + + def test_between_time_axis(self): + # issue 8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + ts = Series(np.random.randn(len(rng)), index=rng) + stime, etime = ("08:00:00", "09:00:00") + expected_length = 7 + + assert len(ts.between_time(stime, etime)) == expected_length + assert len(ts.between_time(stime, etime, axis=0)) == expected_length + msg = "No axis named 1 for object type " + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, axis=1) diff --git a/pandas/tests/series/methods/test_combine.py b/pandas/tests/series/methods/test_combine.py new file mode 100644 index 0000000000000..75d47e3daa103 --- /dev/null +++ b/pandas/tests/series/methods/test_combine.py @@ -0,0 +1,17 @@ +from pandas import Series +import pandas._testing as tm + + +class TestCombine: + def test_combine_scalar(self): + # GH#21248 + # Note - combine() with another Series is tested elsewhere because + # it is used when testing operators + ser = Series([i * 10 for i in range(5)]) + result = ser.combine(3, lambda x, y: x + y) + expected = Series([i * 10 + 3 for i in range(5)]) + tm.assert_series_equal(result, expected) + + result = ser.combine(22, lambda x, y: min(x, y)) + expected = Series([min(i * 10, 22) for i in range(5)]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_rename.py b/pandas/tests/series/methods/test_rename.py new file mode 100644 index 0000000000000..60182f509e657 --- /dev/null +++ b/pandas/tests/series/methods/test_rename.py @@ -0,0 +1,91 @@ +from datetime import datetime + +import numpy as np + +from pandas import Index, Series +import pandas._testing as tm + + +class TestRename: + def test_rename(self, datetime_series): + ts = datetime_series + renamer = lambda x: x.strftime("%Y%m%d") + renamed = ts.rename(renamer) + assert renamed.index[0] == renamer(ts.index[0]) + + # dict + rename_dict = dict(zip(ts.index, renamed.index)) + renamed2 = ts.rename(rename_dict) + tm.assert_series_equal(renamed, renamed2) + + # partial dict + s = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64") + renamed = s.rename({"b": "foo", "d": "bar"}) + tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"])) + + # index with name + renamer = Series( + np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64" + ) + renamed = renamer.rename({}) + assert renamed.index.name == renamer.index.name + + def test_rename_by_series(self): + s = Series(range(5), name="foo") + renamer = Series({1: 10, 2: 20}) + result = s.rename(renamer) + expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo") + tm.assert_series_equal(result, expected) + + def test_rename_set_name(self): + s = Series(range(4), index=list("abcd")) + for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: + result = s.rename(name) + assert result.name == name + tm.assert_numpy_array_equal(result.index.values, s.index.values) + assert s.name is None + + def test_rename_set_name_inplace(self): + s = Series(range(3), index=list("abc")) + for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: + s.rename(name, inplace=True) + assert s.name == name + + exp = np.array(["a", "b", "c"], dtype=np.object_) + tm.assert_numpy_array_equal(s.index.values, exp) + + def test_rename_axis_supported(self): + # Supporting axis for compatibility, detailed in GH-18589 + s = Series(range(5)) + s.rename({}, axis=0) + s.rename({}, axis="index") + # FIXME: dont leave commenred-out + # TODO: clean up shared index validation + # with pytest.raises(ValueError, match="No axis named 5"): + # s.rename({}, axis=5) + + def test_rename_inplace(self, datetime_series): + renamer = lambda x: x.strftime("%Y%m%d") + expected = renamer(datetime_series.index[0]) + + datetime_series.rename(renamer, inplace=True) + assert datetime_series.index[0] == expected + + def test_rename_with_custom_indexer(self): + # GH 27814 + class MyIndexer: + pass + + ix = MyIndexer() + s = Series([1, 2, 3]).rename(ix) + assert s.name is ix + + def test_rename_with_custom_indexer_inplace(self): + # GH 27814 + class MyIndexer: + pass + + ix = MyIndexer() + s = Series([1, 2, 3]) + s.rename(ix, inplace=True) + assert s.name is ix diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py new file mode 100644 index 0000000000000..f0c4895ad7c10 --- /dev/null +++ b/pandas/tests/series/methods/test_reset_index.py @@ -0,0 +1,110 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series +import pandas._testing as tm + + +class TestResetIndex: + def test_reset_index(self): + df = tm.makeDataFrame()[:5] + ser = df.stack() + ser.index.names = ["hash", "category"] + + ser.name = "value" + df = ser.reset_index() + assert "value" in df + + df = ser.reset_index(name="value2") + assert "value2" in df + + # check inplace + s = ser.reset_index(drop=True) + s2 = ser + s2.reset_index(drop=True, inplace=True) + tm.assert_series_equal(s, s2) + + # level + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + s = Series(np.random.randn(6), index=index) + rs = s.reset_index(level=1) + assert len(rs.columns) == 2 + + rs = s.reset_index(level=[0, 2], drop=True) + tm.assert_index_equal(rs.index, Index(index.get_level_values(1))) + assert isinstance(rs, Series) + + def test_reset_index_name(self): + s = Series([1, 2, 3], index=Index(range(3), name="x")) + assert s.reset_index().index.name is None + assert s.reset_index(drop=True).index.name is None + + def test_reset_index_level(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) + + for levels in ["A", "B"], [0, 1]: + # With MultiIndex + s = df.set_index(["A", "B"])["C"] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = s.reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) + tm.assert_frame_equal(result, df[["C"]]) + + with pytest.raises(KeyError, match="Level E "): + s.reset_index(level=["A", "E"]) + + # With single-level Index + s = df.set_index("A")["B"] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df[["A", "B"]]) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df[["A", "B"]]) + + result = s.reset_index(level=levels[0], drop=True) + tm.assert_series_equal(result, df["B"]) + + with pytest.raises(IndexError, match="Too many levels"): + s.reset_index(level=[0, 1, 2]) + + # Check that .reset_index([],drop=True) doesn't fail + result = Series(range(4)).reset_index([], drop=True) + expected = Series(range(4)) + tm.assert_series_equal(result, expected) + + def test_reset_index_range(self): + # GH 12071 + s = Series(range(2), name="A", dtype="int64") + series_result = s.reset_index() + assert isinstance(series_result.index, RangeIndex) + series_expected = DataFrame( + [[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2) + ) + tm.assert_frame_equal(series_result, series_expected) + + def test_reset_index_drop_errors(self): + # GH 20925 + + # KeyError raised for series index when passed level name is missing + s = Series(range(4)) + with pytest.raises(KeyError, match="does not match index name"): + s.reset_index("wrong", drop=True) + with pytest.raises(KeyError, match="does not match index name"): + s.reset_index("wrong") + + # KeyError raised for series when level to be dropped is missing + s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) + with pytest.raises(KeyError, match="not found"): + s.reset_index("wrong", drop=True) diff --git a/pandas/tests/series/methods/test_truncate.py b/pandas/tests/series/methods/test_truncate.py index d4e2890ed8bf0..c97369b349f56 100644 --- a/pandas/tests/series/methods/test_truncate.py +++ b/pandas/tests/series/methods/test_truncate.py @@ -1,7 +1,10 @@ +from datetime import datetime + import numpy as np import pytest import pandas as pd +from pandas import Series, date_range import pandas._testing as tm from pandas.tseries.offsets import BDay @@ -76,3 +79,33 @@ def test_truncate_nonsortedindex(self): with pytest.raises(ValueError, match=msg): ts.sort_values(ascending=False).truncate(before="2011-11", after="2011-12") + + def test_truncate_datetimeindex_tz(self): + # GH 9243 + idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific") + s = Series(range(len(idx)), index=idx) + result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4)) + expected = Series([1, 2, 3], index=idx[1:4]) + tm.assert_series_equal(result, expected) + + def test_truncate_periodindex(self): + # GH 17717 + idx1 = pd.PeriodIndex( + [pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] + ) + series1 = pd.Series([1, 2, 3], index=idx1) + result1 = series1.truncate(after="2017-09-02") + + expected_idx1 = pd.PeriodIndex( + [pd.Period("2017-09-02"), pd.Period("2017-09-02")] + ) + tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1)) + + idx2 = pd.PeriodIndex( + [pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] + ) + series2 = pd.Series([1, 2, 3], index=idx2) + result2 = series2.sort_index().truncate(after="2017-09-02") + + expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")]) + tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2)) diff --git a/pandas/tests/series/methods/test_tz_convert.py b/pandas/tests/series/methods/test_tz_convert.py new file mode 100644 index 0000000000000..ce348d5323e62 --- /dev/null +++ b/pandas/tests/series/methods/test_tz_convert.py @@ -0,0 +1,29 @@ +import numpy as np +import pytest + +from pandas import DatetimeIndex, Series, date_range +import pandas._testing as tm + + +class TestTZConvert: + def test_series_tz_convert(self): + rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") + ts = Series(1, index=rng) + + result = ts.tz_convert("Europe/Berlin") + assert result.index.tz.zone == "Europe/Berlin" + + # can't convert tz-naive + rng = date_range("1/1/2011", periods=200, freq="D") + ts = Series(1, index=rng) + + with pytest.raises(TypeError, match="Cannot convert tz-naive"): + ts.tz_convert("US/Eastern") + + def test_series_tz_convert_to_utc(self): + base = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC") + idx1 = base.tz_convert("Asia/Tokyo")[:2] + idx2 = base.tz_convert("US/Eastern")[1:] + + res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) + tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base)) diff --git a/pandas/tests/series/methods/test_tz_localize.py b/pandas/tests/series/methods/test_tz_localize.py new file mode 100644 index 0000000000000..44c55edf77c0a --- /dev/null +++ b/pandas/tests/series/methods/test_tz_localize.py @@ -0,0 +1,88 @@ +import pytest +import pytz + +from pandas._libs.tslibs import timezones + +from pandas import DatetimeIndex, NaT, Series, Timestamp, date_range +import pandas._testing as tm + + +class TestTZLocalize: + def test_series_tz_localize(self): + + rng = date_range("1/1/2011", periods=100, freq="H") + ts = Series(1, index=rng) + + result = ts.tz_localize("utc") + assert result.index.tz.zone == "UTC" + + # Can't localize if already tz-aware + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + ts = Series(1, index=rng) + + with pytest.raises(TypeError, match="Already tz-aware"): + ts.tz_localize("US/Eastern") + + def test_series_tz_localize_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + + # GH#14402 + ts = Timestamp("2015-11-01 01:00:03") + expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") + expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") + + ser = Series([ts]) + expected0 = Series([expected0]) + expected1 = Series([expected1]) + + with pytest.raises(pytz.AmbiguousTimeError): + ser.dt.tz_localize("US/Central") + + result = ser.dt.tz_localize("US/Central", ambiguous=True) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize("US/Central", ambiguous=[True]) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize("US/Central", ambiguous=False) + tm.assert_series_equal(result, expected1) + + result = ser.dt.tz_localize("US/Central", ambiguous=[False]) + tm.assert_series_equal(result, expected1) + + @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) + @pytest.mark.parametrize( + "method, exp", + [ + ["shift_forward", "2015-03-29 03:00:00"], + ["NaT", NaT], + ["raise", None], + ["foo", "invalid"], + ], + ) + def test_series_tz_localize_nonexistent(self, tz, method, exp): + # GH 8917 + n = 60 + dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") + s = Series(1, dti) + if method == "raise": + with pytest.raises(pytz.NonExistentTimeError): + s.tz_localize(tz, nonexistent=method) + elif exp == "invalid": + with pytest.raises(ValueError): + dti.tz_localize(tz, nonexistent=method) + else: + result = s.tz_localize(tz, nonexistent=method) + expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_series_tz_localize_empty(self, tzstr): + # GH#2248 + ser = Series(dtype=object) + + ser2 = ser.tz_localize("utc") + assert ser2.index.tz == pytz.utc + + ser2 = ser.tz_localize(tzstr) + timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr)) diff --git a/pandas/tests/series/methods/test_update.py b/pandas/tests/series/methods/test_update.py new file mode 100644 index 0000000000000..b7f5f33294792 --- /dev/null +++ b/pandas/tests/series/methods/test_update.py @@ -0,0 +1,58 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Series +import pandas._testing as tm + + +class TestUpdate: + def test_update(self): + s = Series([1.5, np.nan, 3.0, 4.0, np.nan]) + s2 = Series([np.nan, 3.5, np.nan, 5.0]) + s.update(s2) + + expected = Series([1.5, 3.5, 3.0, 5.0, np.nan]) + tm.assert_series_equal(s, expected) + + # GH 3217 + df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) + df["c"] = np.nan + + df["c"].update(Series(["foo"], index=[0])) + expected = DataFrame( + [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"] + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "other, dtype, expected", + [ + # other is int + ([61, 63], "int32", Series([10, 61, 12], dtype="int32")), + ([61, 63], "int64", Series([10, 61, 12])), + ([61, 63], float, Series([10.0, 61.0, 12.0])), + ([61, 63], object, Series([10, 61, 12], dtype=object)), + # other is float, but can be cast to int + ([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32")), + ([61.0, 63.0], "int64", Series([10, 61, 12])), + ([61.0, 63.0], float, Series([10.0, 61.0, 12.0])), + ([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object)), + # others is float, cannot be cast to int + ([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], float, Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object)), + # other is object, cannot be cast + ([(61,), (63,)], "int32", Series([10, (61,), 12])), + ([(61,), (63,)], "int64", Series([10, (61,), 12])), + ([(61,), (63,)], float, Series([10.0, (61,), 12.0])), + ([(61,), (63,)], object, Series([10, (61,), 12])), + ], + ) + def test_update_dtypes(self, other, dtype, expected): + + ser = Series([10, 11, 12], dtype=dtype) + other = Series(other, index=[1, 3]) + ser.update(other) + + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 71f6681e8c955..9be8744d7223f 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series +from pandas import Index, MultiIndex, Series import pandas._testing as tm @@ -31,62 +31,6 @@ def test_setindex(self, string_series): # Renaming - def test_rename(self, datetime_series): - ts = datetime_series - renamer = lambda x: x.strftime("%Y%m%d") - renamed = ts.rename(renamer) - assert renamed.index[0] == renamer(ts.index[0]) - - # dict - rename_dict = dict(zip(ts.index, renamed.index)) - renamed2 = ts.rename(rename_dict) - tm.assert_series_equal(renamed, renamed2) - - # partial dict - s = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64") - renamed = s.rename({"b": "foo", "d": "bar"}) - tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"])) - - # index with name - renamer = Series( - np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64" - ) - renamed = renamer.rename({}) - assert renamed.index.name == renamer.index.name - - def test_rename_by_series(self): - s = Series(range(5), name="foo") - renamer = Series({1: 10, 2: 20}) - result = s.rename(renamer) - expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo") - tm.assert_series_equal(result, expected) - - def test_rename_set_name(self): - s = Series(range(4), index=list("abcd")) - for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: - result = s.rename(name) - assert result.name == name - tm.assert_numpy_array_equal(result.index.values, s.index.values) - assert s.name is None - - def test_rename_set_name_inplace(self): - s = Series(range(3), index=list("abc")) - for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: - s.rename(name, inplace=True) - assert s.name == name - - exp = np.array(["a", "b", "c"], dtype=np.object_) - tm.assert_numpy_array_equal(s.index.values, exp) - - def test_rename_axis_supported(self): - # Supporting axis for compatibility, detailed in GH-18589 - s = Series(range(5)) - s.rename({}, axis=0) - s.rename({}, axis="index") - # TODO: clean up shared index validation - # with pytest.raises(ValueError, match="No axis named 5"): - # s.rename({}, axis=5) - def test_set_name_attribute(self): s = Series([1, 2, 3]) s2 = Series([1, 2, 3], name="bar") @@ -103,13 +47,6 @@ def test_set_name(self): assert s.name is None assert s is not s2 - def test_rename_inplace(self, datetime_series): - renamer = lambda x: x.strftime("%Y%m%d") - expected = renamer(datetime_series.index[0]) - - datetime_series.rename(renamer, inplace=True) - assert datetime_series.index[0] == expected - def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) @@ -117,94 +54,6 @@ def test_set_index_makes_timeseries(self): s.index = idx assert s.index.is_all_dates - def test_reset_index(self): - df = tm.makeDataFrame()[:5] - ser = df.stack() - ser.index.names = ["hash", "category"] - - ser.name = "value" - df = ser.reset_index() - assert "value" in df - - df = ser.reset_index(name="value2") - assert "value2" in df - - # check inplace - s = ser.reset_index(drop=True) - s2 = ser - s2.reset_index(drop=True, inplace=True) - tm.assert_series_equal(s, s2) - - # level - index = MultiIndex( - levels=[["bar"], ["one", "two", "three"], [0, 1]], - codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - ) - s = Series(np.random.randn(6), index=index) - rs = s.reset_index(level=1) - assert len(rs.columns) == 2 - - rs = s.reset_index(level=[0, 2], drop=True) - tm.assert_index_equal(rs.index, Index(index.get_level_values(1))) - assert isinstance(rs, Series) - - def test_reset_index_name(self): - s = Series([1, 2, 3], index=Index(range(3), name="x")) - assert s.reset_index().index.name is None - assert s.reset_index(drop=True).index.name is None - - def test_reset_index_level(self): - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) - - for levels in ["A", "B"], [0, 1]: - # With MultiIndex - s = df.set_index(["A", "B"])["C"] - - result = s.reset_index(level=levels[0]) - tm.assert_frame_equal(result, df.set_index("B")) - - result = s.reset_index(level=levels[:1]) - tm.assert_frame_equal(result, df.set_index("B")) - - result = s.reset_index(level=levels) - tm.assert_frame_equal(result, df) - - result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) - tm.assert_frame_equal(result, df[["C"]]) - - with pytest.raises(KeyError, match="Level E "): - s.reset_index(level=["A", "E"]) - - # With single-level Index - s = df.set_index("A")["B"] - - result = s.reset_index(level=levels[0]) - tm.assert_frame_equal(result, df[["A", "B"]]) - - result = s.reset_index(level=levels[:1]) - tm.assert_frame_equal(result, df[["A", "B"]]) - - result = s.reset_index(level=levels[0], drop=True) - tm.assert_series_equal(result, df["B"]) - - with pytest.raises(IndexError, match="Too many levels"): - s.reset_index(level=[0, 1, 2]) - - # Check that .reset_index([],drop=True) doesn't fail - result = Series(range(4)).reset_index([], drop=True) - expected = Series(range(4)) - tm.assert_series_equal(result, expected) - - def test_reset_index_range(self): - # GH 12071 - s = Series(range(2), name="A", dtype="int64") - series_result = s.reset_index() - assert isinstance(series_result.index, RangeIndex) - series_expected = DataFrame( - [[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2) - ) - tm.assert_frame_equal(series_result, series_expected) - def test_reorder_levels(self): index = MultiIndex( levels=[["bar"], ["one", "two", "three"], [0, 1]], @@ -268,25 +117,6 @@ def test_rename_axis_none(self, kwargs): expected = Series([1, 2, 3], index=expected_index) tm.assert_series_equal(result, expected) - def test_rename_with_custom_indexer(self): - # GH 27814 - class MyIndexer: - pass - - ix = MyIndexer() - s = Series([1, 2, 3]).rename(ix) - assert s.name is ix - - def test_rename_with_custom_indexer_inplace(self): - # GH 27814 - class MyIndexer: - pass - - ix = MyIndexer() - s = Series([1, 2, 3]) - s.rename(ix, inplace=True) - assert s.name is ix - def test_set_axis_inplace_axes(self, axis_series): # GH14636 ser = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64") @@ -323,21 +153,6 @@ def test_set_axis_inplace(self): with pytest.raises(ValueError, match="No axis named"): s.set_axis(list("abcd"), axis=axis, inplace=False) - def test_reset_index_drop_errors(self): - # GH 20925 - - # KeyError raised for series index when passed level name is missing - s = Series(range(4)) - with pytest.raises(KeyError, match="does not match index name"): - s.reset_index("wrong", drop=True) - with pytest.raises(KeyError, match="does not match index name"): - s.reset_index("wrong") - - # KeyError raised for series when level to be dropped is missing - s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) - with pytest.raises(KeyError, match="not found"): - s.reset_index("wrong", drop=True) - def test_droplevel(self): # GH20342 ser = Series([1, 2, 3, 4]) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 4afa083e97c7c..adb79f69c2d81 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -2,84 +2,27 @@ import pytest import pandas as pd -from pandas import DataFrame, Series -import pandas._testing as tm +from pandas import Series class TestSeriesCombine: - def test_combine_scalar(self): - # GH 21248 - # Note - combine() with another Series is tested elsewhere because - # it is used when testing operators - s = pd.Series([i * 10 for i in range(5)]) - result = s.combine(3, lambda x, y: x + y) - expected = pd.Series([i * 10 + 3 for i in range(5)]) - tm.assert_series_equal(result, expected) - - result = s.combine(22, lambda x, y: min(x, y)) - expected = pd.Series([min(i * 10, 22) for i in range(5)]) - tm.assert_series_equal(result, expected) - - def test_update(self): - s = Series([1.5, np.nan, 3.0, 4.0, np.nan]) - s2 = Series([np.nan, 3.5, np.nan, 5.0]) - s.update(s2) - - expected = Series([1.5, 3.5, 3.0, 5.0, np.nan]) - tm.assert_series_equal(s, expected) - - # GH 3217 - df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) - df["c"] = np.nan - - df["c"].update(Series(["foo"], index=[0])) - expected = DataFrame( - [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"] - ) - tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize( - "other, dtype, expected", - [ - # other is int - ([61, 63], "int32", pd.Series([10, 61, 12], dtype="int32")), - ([61, 63], "int64", pd.Series([10, 61, 12])), - ([61, 63], float, pd.Series([10.0, 61.0, 12.0])), - ([61, 63], object, pd.Series([10, 61, 12], dtype=object)), - # other is float, but can be cast to int - ([61.0, 63.0], "int32", pd.Series([10, 61, 12], dtype="int32")), - ([61.0, 63.0], "int64", pd.Series([10, 61, 12])), - ([61.0, 63.0], float, pd.Series([10.0, 61.0, 12.0])), - ([61.0, 63.0], object, pd.Series([10, 61.0, 12], dtype=object)), - # others is float, cannot be cast to int - ([61.1, 63.1], "int32", pd.Series([10.0, 61.1, 12.0])), - ([61.1, 63.1], "int64", pd.Series([10.0, 61.1, 12.0])), - ([61.1, 63.1], float, pd.Series([10.0, 61.1, 12.0])), - ([61.1, 63.1], object, pd.Series([10, 61.1, 12], dtype=object)), - # other is object, cannot be cast - ([(61,), (63,)], "int32", pd.Series([10, (61,), 12])), - ([(61,), (63,)], "int64", pd.Series([10, (61,), 12])), - ([(61,), (63,)], float, pd.Series([10.0, (61,), 12.0])), - ([(61,), (63,)], object, pd.Series([10, (61,), 12])), - ], + "dtype", ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"] ) - def test_update_dtypes(self, other, dtype, expected): + def test_concat_empty_series_dtypes_match_roundtrips(self, dtype): + dtype = np.dtype(dtype) - s = Series([10, 11, 12], dtype=dtype) - other = Series(other, index=[1, 3]) - s.update(other) + result = pd.concat([Series(dtype=dtype)]) + assert result.dtype == dtype - tm.assert_series_equal(s, expected) + result = pd.concat([Series(dtype=dtype), Series(dtype=dtype)]) + assert result.dtype == dtype def test_concat_empty_series_dtypes_roundtrips(self): # round-tripping with self & like self dtypes = map(np.dtype, ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"]) - for dtype in dtypes: - assert pd.concat([Series(dtype=dtype)]).dtype == dtype - assert pd.concat([Series(dtype=dtype), Series(dtype=dtype)]).dtype == dtype - def int_result_type(dtype, dtype2): typs = {dtype.kind, dtype2.kind} if not len(typs - {"i", "u", "b"}) and ( @@ -118,35 +61,28 @@ def get_result_type(dtype, dtype2): result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype assert result.kind == expected - def test_concat_empty_series_dtypes(self): + @pytest.mark.parametrize( + "left,right,expected", + [ + # booleans + (np.bool_, np.int32, np.int32), + (np.bool_, np.float32, np.object_), + # datetime-like + ("m8[ns]", np.bool, np.object_), + ("m8[ns]", np.int64, np.object_), + ("M8[ns]", np.bool, np.object_), + ("M8[ns]", np.int64, np.object_), + # categorical + ("category", "category", "category"), + ("category", "object", "object"), + ], + ) + def test_concat_empty_series_dtypes(self, left, right, expected): + result = pd.concat([Series(dtype=left), Series(dtype=right)]) + assert result.dtype == expected - # booleans - assert ( - pd.concat([Series(dtype=np.bool_), Series(dtype=np.int32)]).dtype - == np.int32 - ) - assert ( - pd.concat([Series(dtype=np.bool_), Series(dtype=np.float32)]).dtype - == np.object_ - ) + def test_concat_empty_series_dtypes_triple(self): - # datetime-like - assert ( - pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.bool)]).dtype - == np.object_ - ) - assert ( - pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.int64)]).dtype - == np.object_ - ) - assert ( - pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.bool)]).dtype - == np.object_ - ) - assert ( - pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.int64)]).dtype - == np.object_ - ) assert ( pd.concat( [Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)] @@ -154,11 +90,7 @@ def test_concat_empty_series_dtypes(self): == np.object_ ) - # categorical - assert ( - pd.concat([Series(dtype="category"), Series(dtype="category")]).dtype - == "category" - ) + def test_concat_empty_series_dtype_category_with_array(self): # GH 18515 assert ( pd.concat( @@ -166,13 +98,8 @@ def test_concat_empty_series_dtypes(self): ).dtype == "float64" ) - assert ( - pd.concat([Series(dtype="category"), Series(dtype="object")]).dtype - == "object" - ) - # sparse - # TODO: move? + def test_concat_empty_series_dtypes_sparse(self): result = pd.concat( [ Series(dtype="float64").astype("Sparse"), diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index b8be4ea137e3d..59ae0cd63690c 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -19,7 +19,6 @@ PeriodIndex, Series, TimedeltaIndex, - bdate_range, date_range, period_range, timedelta_range, @@ -622,18 +621,6 @@ def test_dt_accessor_updates_on_inplace(self): result = s.dt.date assert result[0] == result[2] - def test_between(self): - s = Series(bdate_range("1/1/2000", periods=20).astype(object)) - s[::2] = np.nan - - result = s[s.between(s[3], s[17])] - expected = s[3:18].dropna() - tm.assert_series_equal(result, expected) - - result = s[s.between(s[3], s[17], inclusive=False)] - expected = s[5:16].dropna() - tm.assert_series_equal(result, expected) - def test_date_tz(self): # GH11757 rng = pd.DatetimeIndex( diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 03fee389542e3..f41245c2872a7 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -52,12 +52,6 @@ def test_dropna(self): s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")]) tm.assert_series_equal(s.dropna(), Series([pd.Period("2011-01", freq="M")])) - def test_between(self): - left, right = self.series[[2, 7]] - result = self.series.between(left, right) - expected = (self.series >= left) & (self.series <= right) - tm.assert_series_equal(result, expected) - # --------------------------------------------------------------------- # NaT support @@ -110,28 +104,6 @@ def test_align_series(self, join_type): ts.align(ts[::2], join=join_type) - def test_truncate(self): - # GH 17717 - idx1 = pd.PeriodIndex( - [pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] - ) - series1 = pd.Series([1, 2, 3], index=idx1) - result1 = series1.truncate(after="2017-09-02") - - expected_idx1 = pd.PeriodIndex( - [pd.Period("2017-09-02"), pd.Period("2017-09-02")] - ) - tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1)) - - idx2 = pd.PeriodIndex( - [pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] - ) - series2 = pd.Series([1, 2, 3], index=idx2) - result2 = series2.sort_index().truncate(after="2017-09-02") - - expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")]) - tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2)) - @pytest.mark.parametrize( "input_vals", [ diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 459377fb18f29..8f06ea69f5d66 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,13 +1,11 @@ -from datetime import datetime, time, timedelta +from datetime import datetime, timedelta from io import StringIO -from itertools import product import numpy as np import pytest from pandas._libs.tslib import iNaT from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime -import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -23,8 +21,6 @@ ) import pandas._testing as tm -from pandas.tseries.offsets import BDay, BMonthEnd - def _simple_ts(start, end, freq="D"): rng = date_range(start, end, freq=freq) @@ -38,44 +34,6 @@ def assert_range_equal(left, right): class TestTimeSeries: - def test_asfreq(self): - ts = Series( - [0.0, 1.0, 2.0], - index=[ - datetime(2009, 10, 30), - datetime(2009, 11, 30), - datetime(2009, 12, 31), - ], - ) - - daily_ts = ts.asfreq("B") - monthly_ts = daily_ts.asfreq("BM") - tm.assert_series_equal(monthly_ts, ts) - - daily_ts = ts.asfreq("B", method="pad") - monthly_ts = daily_ts.asfreq("BM") - tm.assert_series_equal(monthly_ts, ts) - - daily_ts = ts.asfreq(BDay()) - monthly_ts = daily_ts.asfreq(BMonthEnd()) - tm.assert_series_equal(monthly_ts, ts) - - result = ts[:0].asfreq("M") - assert len(result) == 0 - assert result is not ts - - daily_ts = ts.asfreq("D", fill_value=-1) - result = daily_ts.value_counts().sort_index() - expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() - tm.assert_series_equal(result, expected) - - def test_asfreq_datetimeindex_empty_series(self): - # GH 14320 - index = pd.DatetimeIndex(["2016-09-29 11:00"]) - expected = Series(index=index, dtype=object).asfreq("H") - result = Series([3], index=index.copy()).asfreq("H") - tm.assert_index_equal(expected.index, result.index) - def test_autocorr(self, datetime_series): # Just run the function corr1 = datetime_series.autocorr() @@ -268,15 +226,6 @@ def test_series_repr_nat(self): ) assert result == expected - def test_asfreq_keep_index_name(self): - # GH #9854 - index_name = "bar" - index = pd.date_range("20130101", periods=20, name=index_name) - df = pd.DataFrame(list(range(20)), columns=["foo"], index=index) - - assert index_name == df.index.name - assert index_name == df.asfreq("10D").index.name - def test_promote_datetime_date(self): rng = date_range("1/1/2000", periods=20) ts = Series(np.random.randn(20), index=rng) @@ -300,26 +249,6 @@ def test_promote_datetime_date(self): expected = rng.get_indexer(ts_slice.index) tm.assert_numpy_array_equal(result, expected) - def test_asfreq_normalize(self): - rng = date_range("1/1/2000 09:30", periods=20) - norm = date_range("1/1/2000", periods=20) - vals = np.random.randn(20) - ts = Series(vals, index=rng) - - result = ts.asfreq("D", normalize=True) - norm = date_range("1/1/2000", periods=20) - expected = Series(vals, index=norm) - - tm.assert_series_equal(result, expected) - - vals = np.random.randn(20, 3) - ts = DataFrame(vals, index=rng) - - result = ts.asfreq("D", normalize=True) - expected = DataFrame(vals, index=norm) - - tm.assert_frame_equal(result, expected) - def test_first_subset(self): ts = _simple_ts("1/1/2000", "1/1/2010", freq="12h") result = ts.first("10d") @@ -380,180 +309,6 @@ def test_format_pre_1900_dates(self): ts = Series(1, index=rng) repr(ts) - def test_at_time(self): - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = Series(np.random.randn(len(rng)), index=rng) - rs = ts.at_time(rng[1]) - assert (rs.index.hour == rng[1].hour).all() - assert (rs.index.minute == rng[1].minute).all() - assert (rs.index.second == rng[1].second).all() - - result = ts.at_time("9:30") - expected = ts.at_time(time(9, 30)) - tm.assert_series_equal(result, expected) - - df = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts[time(9, 30)] - result_df = df.loc[time(9, 30)] - expected = ts[(rng.hour == 9) & (rng.minute == 30)] - exp_df = df[(rng.hour == 9) & (rng.minute == 30)] - - # FIXME: dont leave commented-out - # expected.index = date_range('1/1/2000', '1/4/2000') - - tm.assert_series_equal(result, expected) - tm.assert_frame_equal(result_df, exp_df) - - chunk = df.loc["1/4/2000":] - result = chunk.loc[time(9, 30)] - expected = result_df[-1:] - tm.assert_frame_equal(result, expected) - - # midnight, everything - rng = date_range("1/1/2000", "1/31/2000") - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.at_time(time(0, 0)) - tm.assert_series_equal(result, ts) - - # time doesn't exist - rng = date_range("1/1/2012", freq="23Min", periods=384) - ts = Series(np.random.randn(len(rng)), rng) - rs = ts.at_time("16:00") - assert len(rs) == 0 - - def test_at_time_raises(self): - # GH20725 - ser = pd.Series("a b c".split()) - msg = "Index must be DatetimeIndex" - with pytest.raises(TypeError, match=msg): - ser.at_time("00:00") - - def test_between(self): - series = Series(date_range("1/1/2000", periods=10)) - left, right = series[[2, 7]] - - result = series.between(left, right) - expected = (series >= left) & (series <= right) - tm.assert_series_equal(result, expected) - - def test_between_time(self): - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = Series(np.random.randn(len(rng)), index=rng) - stime = time(0, 0) - etime = time(1, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert t >= stime - else: - assert t > stime - - if inc_end: - assert t <= etime - else: - assert t < etime - - result = ts.between_time("00:00", "01:00") - expected = ts.between_time(stime, etime) - tm.assert_series_equal(result, expected) - - # across midnight - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = Series(np.random.randn(len(rng)), index=rng) - stime = time(22, 0) - etime = time(9, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert (t >= stime) or (t <= etime) - else: - assert (t > stime) or (t <= etime) - - if inc_end: - assert (t <= etime) or (t >= stime) - else: - assert (t < etime) or (t >= stime) - - def test_between_time_raises(self): - # GH20725 - ser = pd.Series("a b c".split()) - msg = "Index must be DatetimeIndex" - with pytest.raises(TypeError, match=msg): - ser.between_time(start_time="00:00", end_time="12:00") - - def test_between_time_types(self): - # GH11818 - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" - with pytest.raises(ValueError, match=msg): - rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - frame = DataFrame({"A": 0}, index=rng) - with pytest.raises(ValueError, match=msg): - frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - series = Series(0, index=rng) - with pytest.raises(ValueError, match=msg): - series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - @td.skip_if_has_locale - def test_between_time_formats(self): - # GH11818 - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - - strings = [ - ("2:00", "2:30"), - ("0200", "0230"), - ("2:00am", "2:30am"), - ("0200am", "0230am"), - ("2:00:00", "2:30:00"), - ("020000", "023000"), - ("2:00:00am", "2:30:00am"), - ("020000am", "023000am"), - ] - expected_length = 28 - - for time_string in strings: - assert len(ts.between_time(*time_string)) == expected_length - - def test_between_time_axis(self): - # issue 8839 - rng = date_range("1/1/2000", periods=100, freq="10min") - ts = Series(np.random.randn(len(rng)), index=rng) - stime, etime = ("08:00:00", "09:00:00") - expected_length = 7 - - assert len(ts.between_time(stime, etime)) == expected_length - assert len(ts.between_time(stime, etime, axis=0)) == expected_length - msg = "No axis named 1 for object type " - with pytest.raises(ValueError, match=msg): - ts.between_time(stime, etime, axis=1) - def test_to_period(self): from pandas.core.indexes.period import period_range diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index a363f927d10a9..e729ff91293a8 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -10,207 +10,12 @@ from pandas._libs.tslibs import conversion, timezones -from pandas import DatetimeIndex, Index, NaT, Series, Timestamp +from pandas import Series, Timestamp import pandas._testing as tm from pandas.core.indexes.datetimes import date_range class TestSeriesTimezones: - # ----------------------------------------------------------------- - # Series.tz_localize - def test_series_tz_localize(self): - - rng = date_range("1/1/2011", periods=100, freq="H") - ts = Series(1, index=rng) - - result = ts.tz_localize("utc") - assert result.index.tz.zone == "UTC" - - # Can't localize if already tz-aware - rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") - ts = Series(1, index=rng) - - with pytest.raises(TypeError, match="Already tz-aware"): - ts.tz_localize("US/Eastern") - - def test_series_tz_localize_ambiguous_bool(self): - # make sure that we are correctly accepting bool values as ambiguous - - # GH#14402 - ts = Timestamp("2015-11-01 01:00:03") - expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") - expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") - - ser = Series([ts]) - expected0 = Series([expected0]) - expected1 = Series([expected1]) - - with pytest.raises(pytz.AmbiguousTimeError): - ser.dt.tz_localize("US/Central") - - result = ser.dt.tz_localize("US/Central", ambiguous=True) - tm.assert_series_equal(result, expected0) - - result = ser.dt.tz_localize("US/Central", ambiguous=[True]) - tm.assert_series_equal(result, expected0) - - result = ser.dt.tz_localize("US/Central", ambiguous=False) - tm.assert_series_equal(result, expected1) - - result = ser.dt.tz_localize("US/Central", ambiguous=[False]) - tm.assert_series_equal(result, expected1) - - @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) - @pytest.mark.parametrize( - "method, exp", - [ - ["shift_forward", "2015-03-29 03:00:00"], - ["NaT", NaT], - ["raise", None], - ["foo", "invalid"], - ], - ) - def test_series_tz_localize_nonexistent(self, tz, method, exp): - # GH 8917 - n = 60 - dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") - s = Series(1, dti) - if method == "raise": - with pytest.raises(pytz.NonExistentTimeError): - s.tz_localize(tz, nonexistent=method) - elif exp == "invalid": - with pytest.raises(ValueError): - dti.tz_localize(tz, nonexistent=method) - else: - result = s.tz_localize(tz, nonexistent=method) - expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz)) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) - def test_series_tz_localize_empty(self, tzstr): - # GH#2248 - ser = Series(dtype=object) - - ser2 = ser.tz_localize("utc") - assert ser2.index.tz == pytz.utc - - ser2 = ser.tz_localize(tzstr) - timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr)) - - # ----------------------------------------------------------------- - # Series.tz_convert - - def test_series_tz_convert(self): - rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") - ts = Series(1, index=rng) - - result = ts.tz_convert("Europe/Berlin") - assert result.index.tz.zone == "Europe/Berlin" - - # can't convert tz-naive - rng = date_range("1/1/2011", periods=200, freq="D") - ts = Series(1, index=rng) - - with pytest.raises(TypeError, match="Cannot convert tz-naive"): - ts.tz_convert("US/Eastern") - - def test_series_tz_convert_to_utc(self): - base = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC") - idx1 = base.tz_convert("Asia/Tokyo")[:2] - idx2 = base.tz_convert("US/Eastern")[1:] - - res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) - tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base)) - - # ----------------------------------------------------------------- - # Series.append - - def test_series_append_aware(self): - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) - - exp_index = DatetimeIndex( - ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern" - ) - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - assert ts_result.index.tz == rng1.tz - - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) - - exp_index = DatetimeIndex(["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC") - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - utc = rng1.tz - assert utc == ts_result.index.tz - - # GH#7795 - # different tz coerces to object dtype, not UTC - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) - exp_index = Index( - [ - Timestamp("1/1/2011 01:00", tz="US/Eastern"), - Timestamp("1/1/2011 02:00", tz="US/Central"), - ] - ) - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - - def test_series_append_aware_naive(self): - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") - ser1 = Series(np.random.randn(len(rng1)), index=rng1) - ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1.append(ser2) - - expected = ser1.index.astype(object).append(ser2.index.astype(object)) - assert ts_result.index.equals(expected) - - # mixed - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") - rng2 = range(100) - ser1 = Series(np.random.randn(len(rng1)), index=rng1) - ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1.append(ser2) - - expected = ser1.index.astype(object).append(ser2.index) - assert ts_result.index.equals(expected) - - def test_series_append_dst(self): - rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") - rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") - ser1 = Series([1, 2, 3], index=rng1) - ser2 = Series([10, 11, 12], index=rng2) - ts_result = ser1.append(ser2) - - exp_index = DatetimeIndex( - [ - "2016-01-01 01:00", - "2016-01-01 02:00", - "2016-01-01 03:00", - "2016-08-01 01:00", - "2016-08-01 02:00", - "2016-08-01 03:00", - ], - tz="US/Eastern", - ) - exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) - tm.assert_series_equal(ts_result, exp) - assert ts_result.index.tz == rng1.tz - - # ----------------------------------------------------------------- - def test_dateutil_tzoffset_support(self): values = [188.5, 328.25] tzinfo = tzoffset(None, 7200) @@ -225,15 +30,6 @@ def test_dateutil_tzoffset_support(self): # it works! #2443 repr(series.index[0]) - @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) - def test_tz_aware_asfreq(self, tz): - dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz) - - ser = Series(np.random.randn(len(dr)), index=dr) - - # it works! - ser.asfreq("T") - @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) def test_string_index_alias_tz_aware(self, tz): rng = date_range("1/1/2000", periods=10, tz=tz) @@ -299,28 +95,6 @@ def test_series_align_aware(self): assert new1.index.tz == pytz.UTC assert new2.index.tz == pytz.UTC - @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) - def test_localized_at_time_between_time(self, tzstr): - from datetime import time - - tz = timezones.maybe_get_tz(tzstr) - - rng = date_range("4/16/2012", "5/1/2012", freq="H") - ts = Series(np.random.randn(len(rng)), index=rng) - - ts_local = ts.tz_localize(tzstr) - - result = ts_local.at_time(time(10, 0)) - expected = ts.at_time(time(10, 0)).tz_localize(tzstr) - tm.assert_series_equal(result, expected) - assert timezones.tz_compare(result.index.tz, tz) - - t1, t2 = time(10, 0), time(11, 0) - result = ts_local.between_time(t1, t2) - expected = ts.between_time(t1, t2).tz_localize(tzstr) - tm.assert_series_equal(result, expected) - assert timezones.tz_compare(result.index.tz, tz) - @pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"]) def test_getitem_pydatetime_tz(self, tzstr): tz = timezones.maybe_get_tz(tzstr) @@ -335,14 +109,6 @@ def test_getitem_pydatetime_tz(self, tzstr): time_datetime = conversion.localize_pydatetime(dt, tz) assert ts[time_pandas] == ts[time_datetime] - def test_series_truncate_datetimeindex_tz(self): - # GH 9243 - idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific") - s = Series(range(len(idx)), index=idx) - result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4)) - expected = Series([1, 2, 3], index=idx[1:4]) - tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("copy", [True, False]) @pytest.mark.parametrize( "method, tz", [["tz_localize", None], ["tz_convert", "Europe/Berlin"]] diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index d72c00ceb0045..515d798fe4322 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -17,6 +17,7 @@ "EmptyDataError", "ParserWarning", "MergeError", + "OptionError", ], ) def test_exception_importable(exc): diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index 0d2c81c4ea6c7..e36ea662fac8b 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -1,8 +1,26 @@ import re +import pytest + import pandas as pd +@pytest.mark.filterwarnings( + # openpyxl + "ignore:defusedxml.lxml is no longer supported:DeprecationWarning" +) +@pytest.mark.filterwarnings( + # html5lib + "ignore:Using or importing the ABCs from:DeprecationWarning" +) +@pytest.mark.filterwarnings( + # fastparquet + "ignore:pandas.core.index is deprecated:FutureWarning" +) +@pytest.mark.filterwarnings( + # pandas_datareader + "ignore:pandas.util.testing is deprecated:FutureWarning" +) def test_show_versions(capsys): # gh-32041 pd.show_versions() diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index 99b2b9e9f5f6e..f9502cc22b0c6 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -4,13 +4,23 @@ import os import platform import struct -import subprocess import sys from typing import List, Optional, Tuple, Union from pandas.compat._optional import VERSIONS, _get_version, import_optional_dependency +def _get_commit_hash() -> Optional[str]: + """ + Use vendored versioneer code to get git hash, which handles + git worktree correctly. + """ + from pandas._version import get_versions + + versions = get_versions() + return versions["full-revisionid"] + + def get_sys_info() -> List[Tuple[str, Optional[Union[str, int]]]]: """ Returns system information as a list @@ -18,20 +28,7 @@ def get_sys_info() -> List[Tuple[str, Optional[Union[str, int]]]]: blob: List[Tuple[str, Optional[Union[str, int]]]] = [] # get full commit hash - commit = None - if os.path.isdir(".git") and os.path.isdir("pandas"): - try: - pipe = subprocess.Popen( - 'git log --format="%H" -n 1'.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - so, serr = pipe.communicate() - except (OSError, ValueError): - pass - else: - if pipe.returncode == 0: - commit = so.decode("utf-8").strip().strip('"') + commit = _get_commit_hash() blob.append(("commit", commit))