|
7 | 7 | from numpy import nan
|
8 | 8 | from datetime import datetime
|
9 | 9 | from itertools import permutations
|
| 10 | +import struct |
10 | 11 | from pandas import (Series, Categorical, CategoricalIndex,
|
11 | 12 | Timestamp, DatetimeIndex, Index, IntervalIndex)
|
12 | 13 | import pandas as pd
|
@@ -500,6 +501,25 @@ def test_obj_none_preservation(self):
|
500 | 501 |
|
501 | 502 | tm.assert_numpy_array_equal(result, expected, strict_nan=True)
|
502 | 503 |
|
| 504 | + def test_signed_zero(self): |
| 505 | + # GH 21866 |
| 506 | + a = np.array([-0.0, 0.0]) |
| 507 | + result = pd.unique(a) |
| 508 | + expected = np.array([-0.0]) # 0.0 and -0.0 are equivalent |
| 509 | + tm.assert_numpy_array_equal(result, expected) |
| 510 | + |
| 511 | + def test_different_nans(self): |
| 512 | + # GH 21866 |
| 513 | + # create different nans from bit-patterns: |
| 514 | + NAN1 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000000))[0] |
| 515 | + NAN2 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000001))[0] |
| 516 | + assert NAN1 != NAN1 |
| 517 | + assert NAN2 != NAN2 |
| 518 | + a = np.array([NAN1, NAN2]) # NAN1 and NAN2 are equivalent |
| 519 | + result = pd.unique(a) |
| 520 | + expected = np.array([np.nan]) |
| 521 | + tm.assert_numpy_array_equal(result, expected) |
| 522 | + |
503 | 523 |
|
504 | 524 | class TestIsin(object):
|
505 | 525 |
|
@@ -1087,6 +1107,31 @@ def test_lookup_nan(self, writable):
|
1087 | 1107 | tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs),
|
1088 | 1108 | dtype=np.int64))
|
1089 | 1109 |
|
| 1110 | + def test_add_signed_zeros(self): |
| 1111 | + # GH 21866 inconsistent hash-function for float64 |
| 1112 | + # default hash-function would lead to different hash-buckets |
| 1113 | + # for 0.0 and -0.0 if there are more than 2^30 hash-buckets |
| 1114 | + # but this would mean 16GB |
| 1115 | + N = 4 # 12 * 10**8 would trigger the error, if you have enough memory |
| 1116 | + m = ht.Float64HashTable(N) |
| 1117 | + m.set_item(0.0, 0) |
| 1118 | + m.set_item(-0.0, 0) |
| 1119 | + assert len(m) == 1 # 0.0 and -0.0 are equivalent |
| 1120 | + |
| 1121 | + def test_add_different_nans(self): |
| 1122 | + # GH 21866 inconsistent hash-function for float64 |
| 1123 | + # create different nans from bit-patterns: |
| 1124 | + NAN1 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000000))[0] |
| 1125 | + NAN2 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000001))[0] |
| 1126 | + assert NAN1 != NAN1 |
| 1127 | + assert NAN2 != NAN2 |
| 1128 | + # default hash function would lead to different hash-buckets |
| 1129 | + # for NAN1 and NAN2 even if there are only 4 buckets: |
| 1130 | + m = ht.Float64HashTable() |
| 1131 | + m.set_item(NAN1, 0) |
| 1132 | + m.set_item(NAN2, 0) |
| 1133 | + assert len(m) == 1 # NAN1 and NAN2 are equivalent |
| 1134 | + |
1090 | 1135 | def test_lookup_overflow(self, writable):
|
1091 | 1136 | xs = np.array([1, 2, 2**63], dtype=np.uint64)
|
1092 | 1137 | # GH 21688 ensure we can deal with readonly memory views
|
|
0 commit comments