forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest_construction.py
326 lines (257 loc) · 12.1 KB
/
test_construction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.arrays import BooleanArray
from pandas.core.arrays.boolean import coerce_to_array
def test_boolean_array_constructor():
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(values, mask)
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
with pytest.raises(TypeError, match="values should be boolean numpy array"):
BooleanArray(values.tolist(), mask)
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
BooleanArray(values, mask.tolist())
with pytest.raises(TypeError, match="values should be boolean numpy array"):
BooleanArray(values.astype(int), mask)
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
BooleanArray(values, None)
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
BooleanArray(values.reshape(1, -1), mask)
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
BooleanArray(values, mask.reshape(1, -1))
def test_boolean_array_constructor_copy():
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(values, mask)
assert result._data is values
assert result._mask is mask
result = BooleanArray(values, mask, copy=True)
assert result._data is not values
assert result._mask is not mask
def test_to_boolean_array():
expected = BooleanArray(
np.array([True, False, True]), np.array([False, False, False])
)
result = pd.array([True, False, True], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, True]), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
expected = BooleanArray(
np.array([True, False, True]), np.array([False, False, True])
)
result = pd.array([True, False, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_all_none():
expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True]))
result = pd.array([None, None, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize(
"a, b",
[
([True, False, None, np.nan, pd.NA], [True, False, None, None, None]),
([True, np.nan], [True, None]),
([True, pd.NA], [True, None]),
([np.nan, np.nan], [None, None]),
(np.array([np.nan, np.nan], dtype=float), [None, None]),
],
)
def test_to_boolean_array_missing_indicators(a, b):
result = pd.array(a, dtype="boolean")
expected = pd.array(b, dtype="boolean")
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize(
"values",
[
["foo", "bar"],
["1", "2"],
# "foo",
[1, 2],
[1.0, 2.0],
pd.date_range("20130101", periods=2),
np.array(["foo"]),
np.array([1, 2]),
np.array([1.0, 2.0]),
[np.nan, {"a": 1}],
],
)
def test_to_boolean_array_error(values):
# error in converting existing arrays to BooleanArray
msg = "Need to pass bool-like value"
with pytest.raises(TypeError, match=msg):
pd.array(values, dtype="boolean")
def test_to_boolean_array_from_integer_array():
result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
result = pd.array(np.array([1, 0, 1, None]), dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_from_float_array():
result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_integer_like():
# integers of 0's and 1's
result = pd.array([1, 0, 1, 0], dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
result = pd.array([1, 0, 1, None], dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_coerce_to_array():
# TODO this is currently not public API
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(*coerce_to_array(values, mask=mask))
expected = BooleanArray(values, mask)
tm.assert_extension_array_equal(result, expected)
assert result._data is values
assert result._mask is mask
result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
expected = BooleanArray(values, mask)
tm.assert_extension_array_equal(result, expected)
assert result._data is not values
assert result._mask is not mask
# mixed missing from values and mask
values = [True, False, None, False]
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(*coerce_to_array(values, mask=mask))
expected = BooleanArray(
np.array([True, False, True, True]), np.array([False, False, True, True])
)
tm.assert_extension_array_equal(result, expected)
result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask))
tm.assert_extension_array_equal(result, expected)
result = BooleanArray(*coerce_to_array(values, mask=mask.tolist()))
tm.assert_extension_array_equal(result, expected)
# raise errors for wrong dimension
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
# passing 2D values is OK as long as no mask
coerce_to_array(values.reshape(1, -1))
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
coerce_to_array(values.reshape(1, -1), mask=mask)
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
coerce_to_array(values, mask=mask.reshape(1, -1))
def test_coerce_to_array_from_boolean_array():
# passing BooleanArray to coerce_to_array
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
arr = BooleanArray(values, mask)
result = BooleanArray(*coerce_to_array(arr))
tm.assert_extension_array_equal(result, arr)
# no copy
assert result._data is arr._data
assert result._mask is arr._mask
result = BooleanArray(*coerce_to_array(arr), copy=True)
tm.assert_extension_array_equal(result, arr)
assert result._data is not arr._data
assert result._mask is not arr._mask
with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
coerce_to_array(arr, mask=mask)
def test_coerce_to_numpy_array():
# with missing values -> object dtype
arr = pd.array([True, False, None], dtype="boolean")
result = np.array(arr)
expected = np.array([True, False, pd.NA], dtype="object")
tm.assert_numpy_array_equal(result, expected)
# also with no missing values -> object dtype
arr = pd.array([True, False, True], dtype="boolean")
result = np.array(arr)
expected = np.array([True, False, True], dtype="object")
tm.assert_numpy_array_equal(result, expected)
# force bool dtype
result = np.array(arr, dtype="bool")
expected = np.array([True, False, True], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
# with missing values will raise error
arr = pd.array([True, False, None], dtype="boolean")
msg = (
"cannot convert to 'bool'-dtype NumPy array with missing values. "
"Specify an appropriate 'na_value' for this dtype."
)
with pytest.raises(ValueError, match=msg):
np.array(arr, dtype="bool")
def test_to_boolean_array_from_strings():
result = BooleanArray._from_sequence_of_strings(
np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object)
)
expected = BooleanArray(
np.array([True, False, True, True, False, False, False]),
np.array([False, False, False, False, False, False, True]),
)
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_from_strings_invalid_string():
with pytest.raises(ValueError, match="cannot be cast"):
BooleanArray._from_sequence_of_strings(["donkey"])
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
def test_to_numpy(box):
con = pd.Series if box else pd.array
# default (with or without missing values) -> object dtype
arr = con([True, False, True], dtype="boolean")
result = arr.to_numpy()
expected = np.array([True, False, True], dtype="object")
tm.assert_numpy_array_equal(result, expected)
arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy()
expected = np.array([True, False, pd.NA], dtype="object")
tm.assert_numpy_array_equal(result, expected)
arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy(dtype="str")
expected = np.array([True, False, pd.NA], dtype="<U5")
tm.assert_numpy_array_equal(result, expected)
# no missing values -> can convert to bool, otherwise raises
arr = con([True, False, True], dtype="boolean")
result = arr.to_numpy(dtype="bool")
expected = np.array([True, False, True], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
arr = con([True, False, None], dtype="boolean")
with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"):
result = arr.to_numpy(dtype="bool")
# specify dtype and na_value
arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy(dtype=object, na_value=None)
expected = np.array([True, False, None], dtype="object")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype=bool, na_value=False)
expected = np.array([True, False, False], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype="int64", na_value=-99)
expected = np.array([1, 0, -99], dtype="int64")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype="float64", na_value=np.nan)
expected = np.array([1, 0, np.nan], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
# converting to int or float without specifying na_value raises
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
arr.to_numpy(dtype="int64")
with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"):
arr.to_numpy(dtype="float64")
def test_to_numpy_copy():
# to_numpy can be zero-copy if no missing values
arr = pd.array([True, False, True], dtype="boolean")
result = arr.to_numpy(dtype=bool)
result[0] = False
tm.assert_extension_array_equal(
arr, pd.array([False, False, True], dtype="boolean")
)
arr = pd.array([True, False, True], dtype="boolean")
result = arr.to_numpy(dtype=bool, copy=True)
result[0] = False
tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean"))