|
| 1 | +import sys |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pytest |
| 5 | + |
| 6 | +from pandas.compat import PYPY |
| 7 | + |
| 8 | +from pandas.core.dtypes.common import ( |
| 9 | + is_categorical_dtype, |
| 10 | + is_datetime64_dtype, |
| 11 | + is_datetime64tz_dtype, |
| 12 | + is_object_dtype, |
| 13 | +) |
| 14 | + |
| 15 | +import pandas as pd |
| 16 | +from pandas import DataFrame, Index, IntervalIndex, Series |
| 17 | +import pandas._testing as tm |
| 18 | + |
| 19 | + |
| 20 | +@pytest.mark.parametrize( |
| 21 | + "op_name, op", |
| 22 | + [ |
| 23 | + ("add", "+"), |
| 24 | + ("sub", "-"), |
| 25 | + ("mul", "*"), |
| 26 | + ("mod", "%"), |
| 27 | + ("pow", "**"), |
| 28 | + ("truediv", "/"), |
| 29 | + ("floordiv", "//"), |
| 30 | + ], |
| 31 | +) |
| 32 | +@pytest.mark.parametrize("klass", [Series, DataFrame]) |
| 33 | +def test_binary_ops_docstring(klass, op_name, op): |
| 34 | + # not using the all_arithmetic_functions fixture with _get_opstr |
| 35 | + # as _get_opstr is used internally in the dynamic implementation of the docstring |
| 36 | + operand1 = klass.__name__.lower() |
| 37 | + operand2 = "other" |
| 38 | + expected_str = " ".join([operand1, op, operand2]) |
| 39 | + assert expected_str in getattr(klass, op_name).__doc__ |
| 40 | + |
| 41 | + # reverse version of the binary ops |
| 42 | + expected_str = " ".join([operand2, op, operand1]) |
| 43 | + assert expected_str in getattr(klass, "r" + op_name).__doc__ |
| 44 | + |
| 45 | + |
| 46 | +def test_none_comparison(series_with_simple_index): |
| 47 | + series = series_with_simple_index |
| 48 | + if isinstance(series.index, IntervalIndex): |
| 49 | + # IntervalIndex breaks on "series[0] = np.nan" below |
| 50 | + pytest.skip("IntervalIndex doesn't support assignment") |
| 51 | + if len(series) < 1: |
| 52 | + pytest.skip("Test doesn't make sense on empty data") |
| 53 | + |
| 54 | + # bug brought up by #1079 |
| 55 | + # changed from TypeError in 0.17.0 |
| 56 | + series[0] = np.nan |
| 57 | + |
| 58 | + # noinspection PyComparisonWithNone |
| 59 | + result = series == None # noqa |
| 60 | + assert not result.iat[0] |
| 61 | + assert not result.iat[1] |
| 62 | + |
| 63 | + # noinspection PyComparisonWithNone |
| 64 | + result = series != None # noqa |
| 65 | + assert result.iat[0] |
| 66 | + assert result.iat[1] |
| 67 | + |
| 68 | + result = None == series # noqa |
| 69 | + assert not result.iat[0] |
| 70 | + assert not result.iat[1] |
| 71 | + |
| 72 | + result = None != series # noqa |
| 73 | + assert result.iat[0] |
| 74 | + assert result.iat[1] |
| 75 | + |
| 76 | + if is_datetime64_dtype(series) or is_datetime64tz_dtype(series): |
| 77 | + # Following DatetimeIndex (and Timestamp) convention, |
| 78 | + # inequality comparisons with Series[datetime64] raise |
| 79 | + msg = "Invalid comparison" |
| 80 | + with pytest.raises(TypeError, match=msg): |
| 81 | + None > series |
| 82 | + with pytest.raises(TypeError, match=msg): |
| 83 | + series > None |
| 84 | + else: |
| 85 | + result = None > series |
| 86 | + assert not result.iat[0] |
| 87 | + assert not result.iat[1] |
| 88 | + |
| 89 | + result = series < None |
| 90 | + assert not result.iat[0] |
| 91 | + assert not result.iat[1] |
| 92 | + |
| 93 | + |
| 94 | +def test_ndarray_compat_properties(index_or_series_obj): |
| 95 | + obj = index_or_series_obj |
| 96 | + |
| 97 | + # Check that we work. |
| 98 | + for p in ["shape", "dtype", "T", "nbytes"]: |
| 99 | + assert getattr(obj, p, None) is not None |
| 100 | + |
| 101 | + # deprecated properties |
| 102 | + for p in ["flags", "strides", "itemsize", "base", "data"]: |
| 103 | + assert not hasattr(obj, p) |
| 104 | + |
| 105 | + msg = "can only convert an array of size 1 to a Python scalar" |
| 106 | + with pytest.raises(ValueError, match=msg): |
| 107 | + obj.item() # len > 1 |
| 108 | + |
| 109 | + assert obj.ndim == 1 |
| 110 | + assert obj.size == len(obj) |
| 111 | + |
| 112 | + assert Index([1]).item() == 1 |
| 113 | + assert Series([1]).item() == 1 |
| 114 | + |
| 115 | + |
| 116 | +@pytest.mark.skipif(PYPY, reason="not relevant for PyPy") |
| 117 | +def test_memory_usage(index_or_series_obj): |
| 118 | + obj = index_or_series_obj |
| 119 | + res = obj.memory_usage() |
| 120 | + res_deep = obj.memory_usage(deep=True) |
| 121 | + |
| 122 | + is_object = is_object_dtype(obj) or ( |
| 123 | + isinstance(obj, Series) and is_object_dtype(obj.index) |
| 124 | + ) |
| 125 | + is_categorical = is_categorical_dtype(obj) or ( |
| 126 | + isinstance(obj, Series) and is_categorical_dtype(obj.index) |
| 127 | + ) |
| 128 | + |
| 129 | + if len(obj) == 0: |
| 130 | + assert res_deep == res == 0 |
| 131 | + elif is_object or is_categorical: |
| 132 | + # only deep will pick them up |
| 133 | + assert res_deep > res |
| 134 | + else: |
| 135 | + assert res == res_deep |
| 136 | + |
| 137 | + # sys.getsizeof will call the .memory_usage with |
| 138 | + # deep=True, and add on some GC overhead |
| 139 | + diff = res_deep - sys.getsizeof(obj) |
| 140 | + assert abs(diff) < 100 |
| 141 | + |
| 142 | + |
| 143 | +def test_memory_usage_components_series(series_with_simple_index): |
| 144 | + series = series_with_simple_index |
| 145 | + total_usage = series.memory_usage(index=True) |
| 146 | + non_index_usage = series.memory_usage(index=False) |
| 147 | + index_usage = series.index.memory_usage() |
| 148 | + assert total_usage == non_index_usage + index_usage |
| 149 | + |
| 150 | + |
| 151 | +def test_memory_usage_components_narrow_series(narrow_series): |
| 152 | + series = narrow_series |
| 153 | + total_usage = series.memory_usage(index=True) |
| 154 | + non_index_usage = series.memory_usage(index=False) |
| 155 | + index_usage = series.index.memory_usage() |
| 156 | + assert total_usage == non_index_usage + index_usage |
| 157 | + |
| 158 | + |
| 159 | +def test_searchsorted(index_or_series_obj): |
| 160 | + # numpy.searchsorted calls obj.searchsorted under the hood. |
| 161 | + # See gh-12238 |
| 162 | + obj = index_or_series_obj |
| 163 | + |
| 164 | + if isinstance(obj, pd.MultiIndex): |
| 165 | + # See gh-14833 |
| 166 | + pytest.skip("np.searchsorted doesn't work on pd.MultiIndex") |
| 167 | + |
| 168 | + max_obj = max(obj, default=0) |
| 169 | + index = np.searchsorted(obj, max_obj) |
| 170 | + assert 0 <= index <= len(obj) |
| 171 | + |
| 172 | + index = np.searchsorted(obj, max_obj, sorter=range(len(obj))) |
| 173 | + assert 0 <= index <= len(obj) |
| 174 | + |
| 175 | + |
| 176 | +def test_access_by_position(indices): |
| 177 | + index = indices |
| 178 | + |
| 179 | + if len(index) == 0: |
| 180 | + pytest.skip("Test doesn't make sense on empty data") |
| 181 | + elif isinstance(index, pd.MultiIndex): |
| 182 | + pytest.skip("Can't instantiate Series from MultiIndex") |
| 183 | + |
| 184 | + series = pd.Series(index) |
| 185 | + assert index[0] == series.iloc[0] |
| 186 | + assert index[5] == series.iloc[5] |
| 187 | + assert index[-1] == series.iloc[-1] |
| 188 | + |
| 189 | + size = len(index) |
| 190 | + assert index[-1] == index[size - 1] |
| 191 | + |
| 192 | + msg = f"index {size} is out of bounds for axis 0 with size {size}" |
| 193 | + with pytest.raises(IndexError, match=msg): |
| 194 | + index[size] |
| 195 | + msg = "single positional indexer is out-of-bounds" |
| 196 | + with pytest.raises(IndexError, match=msg): |
| 197 | + series.iloc[size] |
| 198 | + |
| 199 | + |
| 200 | +def test_get_indexer_non_unique_dtype_mismatch(): |
| 201 | + # GH 25459 |
| 202 | + indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0])) |
| 203 | + tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes) |
| 204 | + tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing) |
0 commit comments