Skip to content

Commit 0aa5e61

Browse files
authored
Merge pull request #139 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 28f02b1 + c331ba8 commit 0aa5e61

34 files changed

+271
-94
lines changed

.github/workflows/ci.yml

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,7 @@ jobs:
155155
run: |
156156
source activate pandas-dev
157157
158-
pytest pandas/tests/frame/methods
159-
pytest pandas/tests/frame/test_constructors.py
160-
pytest pandas/tests/frame/test_*
161-
pytest pandas/tests/frame/test_reductions.py
158+
pytest pandas/tests/frame/
162159
pytest pandas/tests/reductions/
163160
pytest pandas/tests/generic/test_generic.py
164161
pytest pandas/tests/arithmetic/
@@ -170,10 +167,6 @@ jobs:
170167
pytest pandas/tests/series/test_*
171168
172169
# indexing subset (temporary since other tests don't pass yet)
173-
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean
174-
pytest pandas/tests/frame/indexing/test_where.py
175-
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index
176-
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns
177170
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
178171
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
179172
@@ -185,6 +178,12 @@ jobs:
185178
pytest pandas/tests/dtypes/
186179
pytest pandas/tests/generic/
187180
pytest pandas/tests/indexes/
181+
pytest pandas/tests/io/test_* -m "not slow and not clipboard"
182+
pytest pandas/tests/io/excel/ -m "not slow and not clipboard"
183+
pytest pandas/tests/io/formats/ -m "not slow and not clipboard"
184+
pytest pandas/tests/io/parser/ -m "not slow and not clipboard"
185+
pytest pandas/tests/io/sas/ -m "not slow and not clipboard"
186+
pytest pandas/tests/io/xml/ -m "not slow and not clipboard"
188187
pytest pandas/tests/libs/
189188
pytest pandas/tests/plotting/
190189
pytest pandas/tests/scalar/

pandas/core/frame.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6464,6 +6464,57 @@ def swaplevel(self, i: Axis = -2, j: Axis = -1, axis: Axis = 0) -> DataFrame:
64646464
Returns
64656465
-------
64666466
DataFrame
6467+
6468+
Examples
6469+
--------
6470+
>>> df = pd.DataFrame(
6471+
... {"Grade": ["A", "B", "A", "C"]},
6472+
... index=[
6473+
... ["Final exam", "Final exam", "Coursework", "Coursework"],
6474+
... ["History", "Geography", "History", "Geography"],
6475+
... ["January", "February", "March", "April"],
6476+
... ],
6477+
... )
6478+
>>> df
6479+
Grade
6480+
Final exam History January A
6481+
Geography February B
6482+
Coursework History March A
6483+
Geography April C
6484+
6485+
In the following example, we will swap the levels of the indices.
6486+
Here, we will swap the levels column-wise, but levels can be swapped row-wise
6487+
in a similar manner. Note that column-wise is the default behaviour.
6488+
By not supplying any arguments for i and j, we swap the last and second to
6489+
last indices.
6490+
6491+
>>> df.swaplevel()
6492+
Grade
6493+
Final exam January History A
6494+
February Geography B
6495+
Coursework March History A
6496+
April Geography C
6497+
6498+
By supplying one argument, we can choose which index to swap the last
6499+
index with. We can for example swap the first index with the last one as
6500+
follows.
6501+
6502+
>>> df.swaplevel(0)
6503+
Grade
6504+
January History Final exam A
6505+
February Geography Final exam B
6506+
March History Coursework A
6507+
April Geography Coursework C
6508+
6509+
We can also define explicitly which indices we want to swap by supplying values
6510+
for both i and j. Here, we for example swap the first and second indices.
6511+
6512+
>>> df.swaplevel(0, 1)
6513+
Grade
6514+
History Final exam January A
6515+
Geography Final exam February B
6516+
History Coursework March A
6517+
Geography Coursework April C
64676518
"""
64686519
result = self.copy()
64696520

pandas/core/indexers.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def validate_indices(indices: np.ndarray, n: int) -> None:
235235
# Indexer Conversion
236236

237237

238-
def maybe_convert_indices(indices, n: int):
238+
def maybe_convert_indices(indices, n: int, verify: bool = True):
239239
"""
240240
Attempt to convert indices into valid, positive indices.
241241
@@ -248,6 +248,8 @@ def maybe_convert_indices(indices, n: int):
248248
Array of indices that we are to convert.
249249
n : int
250250
Number of elements in the array that we are indexing.
251+
verify : bool, default True
252+
Check that all entries are between 0 and n - 1, inclusive.
251253
252254
Returns
253255
-------
@@ -273,9 +275,10 @@ def maybe_convert_indices(indices, n: int):
273275
indices = indices.copy()
274276
indices[mask] += n
275277

276-
mask = (indices >= n) | (indices < 0)
277-
if mask.any():
278-
raise IndexError("indices are out-of-bounds")
278+
if verify:
279+
mask = (indices >= n) | (indices < 0)
280+
if mask.any():
281+
raise IndexError("indices are out-of-bounds")
279282
return indices
280283

281284

pandas/core/internals/array_manager.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ def iset(self, loc: Union[int, slice, np.ndarray], value):
861861
# DataFrame into 1D array when loc is an integer
862862
if isinstance(value, np.ndarray) and value.ndim == 2:
863863
assert value.shape[1] == 1
864-
value = value[0, :]
864+
value = value[:, 0]
865865

866866
# TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item
867867
# but we should avoid that and pass directly the proper array
@@ -1021,7 +1021,7 @@ def _reindex_indexer(
10211021

10221022
return type(self)(new_arrays, new_axes, verify_integrity=False)
10231023

1024-
def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True):
1024+
def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T:
10251025
"""
10261026
Take items along any axis.
10271027
"""
@@ -1034,12 +1034,7 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True
10341034
)
10351035

10361036
n = self.shape_proper[axis]
1037-
if convert:
1038-
indexer = maybe_convert_indices(indexer, n)
1039-
1040-
if verify:
1041-
if ((indexer == -1) | (indexer >= n)).any():
1042-
raise Exception("Indices must be nonzero and less than the axis length")
1037+
indexer = maybe_convert_indices(indexer, n, verify=verify)
10431038

10441039
new_labels = self._axes[axis].take(indexer)
10451040
return self._reindex_indexer(
@@ -1168,10 +1163,6 @@ def axes(self):
11681163
def index(self) -> Index:
11691164
return self._axes[0]
11701165

1171-
@property
1172-
def array(self):
1173-
return self.arrays[0]
1174-
11751166
@property
11761167
def dtype(self):
11771168
return self.array.dtype

pandas/core/internals/base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,13 @@ def isna(self: T, func) -> T:
126126
class SingleDataManager(DataManager):
127127
ndim = 1
128128

129+
@property
130+
def array(self):
131+
"""
132+
Quick access to the backing array of the Block or SingleArrayManager.
133+
"""
134+
return self.arrays[0] # type: ignore[attr-defined]
135+
129136

130137
def interleaved_dtype(dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
131138
"""

pandas/core/internals/managers.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,23 +1491,29 @@ def _make_na_block(self, placement, fill_value=None):
14911491
block_values.fill(fill_value)
14921492
return new_block(block_values, placement=placement, ndim=block_values.ndim)
14931493

1494-
def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True):
1494+
def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T:
14951495
"""
14961496
Take items along any axis.
1497+
1498+
indexer : np.ndarray or slice
1499+
axis : int, default 1
1500+
verify : bool, default True
1501+
Check that all entries are between 0 and len(self) - 1, inclusive.
1502+
Pass verify=False if this check has been done by the caller.
1503+
1504+
Returns
1505+
-------
1506+
BlockManager
14971507
"""
1508+
# We have 6 tests that get here with a slice
14981509
indexer = (
14991510
np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64")
15001511
if isinstance(indexer, slice)
15011512
else np.asanyarray(indexer, dtype="int64")
15021513
)
15031514

15041515
n = self.shape[axis]
1505-
if convert:
1506-
indexer = maybe_convert_indices(indexer, n)
1507-
1508-
if verify:
1509-
if ((indexer == -1) | (indexer >= n)).any():
1510-
raise Exception("Indices must be nonzero and less than the axis length")
1516+
indexer = maybe_convert_indices(indexer, n, verify=verify)
15111517

15121518
new_labels = self.axes[axis].take(indexer)
15131519
return self.reindex_indexer(

pandas/tests/frame/indexing/test_indexing.py

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pytest
99

1010
from pandas._libs import iNaT
11+
import pandas.util._test_decorators as td
1112

1213
from pandas.core.dtypes.common import is_integer
1314

@@ -534,6 +535,7 @@ def test_getitem_setitem_integer_slice_keyerrors(self):
534535
with pytest.raises(KeyError, match=r"^3$"):
535536
df2.loc[3:11] = 0
536537

538+
@td.skip_array_manager_invalid_test # already covered in test_iloc_col_slice_view
537539
def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame):
538540
sliced = float_string_frame.iloc[:, -3:]
539541
assert sliced["D"].dtype == np.float64
@@ -592,6 +594,7 @@ def test_getitem_fancy_scalar(self, float_frame):
592594
for idx in f.index[::5]:
593595
assert ix[idx, col] == ts[idx]
594596

597+
@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
595598
def test_setitem_fancy_scalar(self, float_frame):
596599
f = float_frame
597600
expected = float_frame.copy()
@@ -631,6 +634,7 @@ def test_getitem_fancy_boolean(self, float_frame):
631634
expected = f.reindex(index=f.index[boolvec], columns=["C", "D"])
632635
tm.assert_frame_equal(result, expected)
633636

637+
@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
634638
def test_setitem_fancy_boolean(self, float_frame):
635639
# from 2d, set with booleans
636640
frame = float_frame.copy()
@@ -990,21 +994,29 @@ def test_iloc_row(self):
990994
expected = df.loc[8:14]
991995
tm.assert_frame_equal(result, expected)
992996

997+
# list of integers
998+
result = df.iloc[[1, 2, 4, 6]]
999+
expected = df.reindex(df.index[[1, 2, 4, 6]])
1000+
tm.assert_frame_equal(result, expected)
1001+
1002+
def test_iloc_row_slice_view(self, using_array_manager):
1003+
df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2))
1004+
original = df.copy()
1005+
9931006
# verify slice is view
9941007
# setting it makes it raise/warn
1008+
subset = df.iloc[slice(4, 8)]
1009+
9951010
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
9961011
with pytest.raises(com.SettingWithCopyError, match=msg):
997-
result[2] = 0.0
1012+
subset[2] = 0.0
9981013

999-
exp_col = df[2].copy()
1000-
exp_col[4:8] = 0.0
1014+
exp_col = original[2].copy()
1015+
# TODO(ArrayManager) verify it is expected that the original didn't change
1016+
if not using_array_manager:
1017+
exp_col[4:8] = 0.0
10011018
tm.assert_series_equal(df[2], exp_col)
10021019

1003-
# list of integers
1004-
result = df.iloc[[1, 2, 4, 6]]
1005-
expected = df.reindex(df.index[[1, 2, 4, 6]])
1006-
tm.assert_frame_equal(result, expected)
1007-
10081020
def test_iloc_col(self):
10091021

10101022
df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2))
@@ -1022,19 +1034,32 @@ def test_iloc_col(self):
10221034
expected = df.loc[:, 8:14]
10231035
tm.assert_frame_equal(result, expected)
10241036

1025-
# verify slice is view
1026-
# and that we are setting a copy
1027-
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
1028-
with pytest.raises(com.SettingWithCopyError, match=msg):
1029-
result[8] = 0.0
1030-
1031-
assert (df[8] == 0).all()
1032-
10331037
# list of integers
10341038
result = df.iloc[:, [1, 2, 4, 6]]
10351039
expected = df.reindex(columns=df.columns[[1, 2, 4, 6]])
10361040
tm.assert_frame_equal(result, expected)
10371041

1042+
def test_iloc_col_slice_view(self, using_array_manager):
1043+
df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2))
1044+
original = df.copy()
1045+
subset = df.iloc[:, slice(4, 8)]
1046+
1047+
if not using_array_manager:
1048+
# verify slice is view
1049+
# and that we are setting a copy
1050+
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
1051+
with pytest.raises(com.SettingWithCopyError, match=msg):
1052+
subset[8] = 0.0
1053+
1054+
assert (df[8] == 0).all()
1055+
else:
1056+
# TODO(ArrayManager) verify this is the desired behaviour
1057+
subset[8] = 0.0
1058+
# subset changed
1059+
assert (subset[8] == 0).all()
1060+
# but df itself did not change (setitem replaces full column)
1061+
tm.assert_frame_equal(df, original)
1062+
10381063
def test_loc_duplicates(self):
10391064
# gh-17105
10401065

@@ -1218,7 +1243,7 @@ def test_setitem(self, uint64_frame):
12181243
)
12191244

12201245

1221-
def test_object_casting_indexing_wraps_datetimelike():
1246+
def test_object_casting_indexing_wraps_datetimelike(using_array_manager):
12221247
# GH#31649, check the indexing methods all the way down the stack
12231248
df = DataFrame(
12241249
{
@@ -1240,6 +1265,10 @@ def test_object_casting_indexing_wraps_datetimelike():
12401265
assert isinstance(ser.values[1], Timestamp)
12411266
assert isinstance(ser.values[2], pd.Timedelta)
12421267

1268+
if using_array_manager:
1269+
# remainder of the test checking BlockManager internals
1270+
return
1271+
12431272
mgr = df._mgr
12441273
mgr._rebuild_blknos_and_blklocs()
12451274
arr = mgr.fast_xs(0)

pandas/tests/frame/indexing/test_insert.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,17 @@ def test_insert_with_columns_dups(self):
7272
)
7373
tm.assert_frame_equal(df, exp)
7474

75-
def test_insert_item_cache(self):
75+
def test_insert_item_cache(self, using_array_manager):
7676
df = DataFrame(np.random.randn(4, 3))
7777
ser = df[0]
7878

79-
with tm.assert_produces_warning(PerformanceWarning):
79+
if using_array_manager:
80+
expected_warning = None
81+
else:
82+
# with BlockManager warn about high fragmentation of single dtype
83+
expected_warning = PerformanceWarning
84+
85+
with tm.assert_produces_warning(expected_warning):
8086
for n in range(100):
8187
df[n + 3] = df[1] * n
8288

0 commit comments

Comments
 (0)