Skip to content

Commit 0701fc8

Browse files
committed
Merge remote-tracking branch 'upstream/master' into Dr-Irv-issue9943
2 parents d1620f6 + 93aba79 commit 0701fc8

File tree

6 files changed

+49
-22
lines changed

6 files changed

+49
-22
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1273,6 +1273,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
12731273
- Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`)
12741274
- Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`)
12751275
- Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`).
1276+
- Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`)
12761277

12771278
Plotting
12781279
^^^^^^^^

pandas/conftest.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@
1414
hypothesis.settings.register_profile(
1515
"ci",
1616
# Hypothesis timing checks are tuned for scalars by default, so we bump
17-
# them from 200ms to 5 secs per test case as the global default. If this
17+
# them from 200ms to 500ms per test case as the global default. If this
1818
# is too short for a specific test, (a) try to make it faster, and (b)
19-
# if it really is slow add `@settings(timeout=...)` with a working value.
20-
timeout=5000,
19+
# if it really is slow add `@settings(deadline=...)` with a working value,
20+
# or `deadline=None` to entirely disable timeouts for that test.
21+
deadline=500,
22+
timeout=hypothesis.unlimited,
2123
suppress_health_check=(hypothesis.HealthCheck.too_slow,)
2224
)
2325
hypothesis.settings.load_profile("ci")

pandas/io/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4637,7 +4637,7 @@ def _convert_string_array(data, encoding, errors, itemsize=None):
46374637
# create the sized dtype
46384638
if itemsize is None:
46394639
ensured = ensure_object(data.ravel())
4640-
itemsize = libwriters.max_len_string_array(ensured)
4640+
itemsize = max(1, libwriters.max_len_string_array(ensured))
46414641

46424642
data = np.asarray(data, dtype="S%d" % itemsize)
46434643
return data

pandas/tests/frame/test_apply.py

+15-14
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,20 @@ def zip_frames(frames, axis=1):
823823
return pd.DataFrame(zipped)
824824

825825

826+
@composite
827+
def indices(draw, max_length=5):
828+
date = draw(
829+
dates(
830+
min_value=Timestamp.min.ceil("D").to_pydatetime().date(),
831+
max_value=Timestamp.max.floor("D").to_pydatetime().date(),
832+
).map(Timestamp)
833+
)
834+
periods = draw(integers(0, max_length))
835+
freq = draw(sampled_from(list("BDHTS")))
836+
dr = date_range(date, periods=periods, freq=freq)
837+
return pd.DatetimeIndex(list(dr))
838+
839+
826840
class TestDataFrameAggregate():
827841

828842
def test_agg_transform(self, axis, float_frame):
@@ -1142,20 +1156,7 @@ def test_agg_cython_table_raises(self, df, func, expected, axis):
11421156
with pytest.raises(expected):
11431157
df.agg(func, axis=axis)
11441158

1145-
@composite
1146-
def indices(draw, max_length=5):
1147-
date = draw(
1148-
dates(
1149-
min_value=Timestamp.min.ceil("D").to_pydatetime().date(),
1150-
max_value=Timestamp.max.floor("D").to_pydatetime().date(),
1151-
).map(Timestamp)
1152-
)
1153-
periods = draw(integers(0, max_length))
1154-
freq = draw(sampled_from(list("BDHTS")))
1155-
dr = date_range(date, periods=periods, freq=freq)
1156-
return pd.DatetimeIndex(list(dr))
1157-
1158-
@given(index=indices(5), num_columns=integers(0, 5))
1159+
@given(index=indices(max_length=5), num_columns=integers(0, 5))
11591160
def test_frequency_is_original(self, index, num_columns):
11601161
# GH 22150
11611162
original = index.copy()

pandas/tests/indexes/interval/test_interval_tree.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,28 @@
88
from pandas._libs.interval import IntervalTree
99

1010

11+
def skipif_32bit(param):
12+
"""
13+
Skip parameters in a parametrize on 32bit systems. Specifically used
14+
here to skip leaf_size parameters related to GH 23440.
15+
"""
16+
marks = pytest.mark.skipif(compat.is_platform_32bit(),
17+
reason='GH 23440: int type mismatch on 32bit')
18+
return pytest.param(param, marks=marks)
19+
20+
1121
@pytest.fixture(
1222
scope='class', params=['int32', 'int64', 'float32', 'float64', 'uint64'])
1323
def dtype(request):
1424
return request.param
1525

1626

17-
@pytest.fixture(params=[1, 2, 10])
27+
@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10])
1828
def leaf_size(request):
29+
"""
30+
Fixture to specify IntervalTree leaf_size parameter; to be used with the
31+
tree fixture.
32+
"""
1933
return request.param
2034

2135

@@ -85,9 +99,8 @@ def test_get_loc_closed(self, closed):
8599
tm.assert_numpy_array_equal(tree.get_loc(p),
86100
np.array([0], dtype='int64'))
87101

88-
@pytest.mark.skipif(compat.is_platform_32bit(),
89-
reason="int type mismatch on 32bit")
90-
@pytest.mark.parametrize('leaf_size', [1, 10, 100, 10000])
102+
@pytest.mark.parametrize('leaf_size', [
103+
skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000])
91104
def test_get_indexer_closed(self, closed, leaf_size):
92105
x = np.arange(1000, dtype='float64')
93106
found = x.astype('intp')

pandas/tests/io/test_pytables.py

+10
Original file line numberDiff line numberDiff line change
@@ -1482,6 +1482,16 @@ def check_col(key, name, size):
14821482
pytest.raises(ValueError, store.append, 'df',
14831483
df, min_itemsize={'foo': 20, 'foobar': 20})
14841484

1485+
def test_append_with_empty_string(self):
1486+
1487+
with ensure_clean_store(self.path) as store:
1488+
1489+
# with all empty strings (GH 12242)
1490+
df = DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', '']})
1491+
store.append('df', df[:-1], min_itemsize={'x': 1})
1492+
store.append('df', df[-1:], min_itemsize={'x': 1})
1493+
tm.assert_frame_equal(store.select('df'), df)
1494+
14851495
def test_to_hdf_with_min_itemsize(self):
14861496

14871497
with ensure_clean_path(self.path) as path:

0 commit comments

Comments
 (0)