Skip to content

Commit 5467d5b

Browse files
authored
API: frames from HDFStore stored without an index now give RangeIndex (#51076)
* API: frames from HDFStore stored without an index now give RangeIndex * add GH-number
1 parent bcb6c1a commit 5467d5b

File tree

4 files changed

+23
-21
lines changed

4 files changed

+23
-21
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,7 @@ Other API changes
620620
new DataFrame (shallow copy) instead of the original DataFrame, consistent with other
621621
methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`)
622622
- Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`)
623+
- :class:`DataFrame` objects read from a :class:`HDFStore` file without an index now have a :class:`RangeIndex` instead of an ``int64`` index (:issue:`51076`)
623624
- Instantiating an :class:`Index` with an numeric numpy dtype with data containing :class:`NA` and/or :class:`NaT` now raises a ``ValueError``. Previously a ``TypeError`` was raised (:issue:`51050`)
624625
- Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`)
625626
- The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`)

pandas/io/pytables.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
Index,
8686
MultiIndex,
8787
PeriodIndex,
88+
RangeIndex,
8889
Series,
8990
TimedeltaIndex,
9091
concat,
@@ -2258,7 +2259,7 @@ def convert(
22582259
"""
22592260
assert isinstance(values, np.ndarray), type(values)
22602261

2261-
index = Index(np.arange(len(values), dtype=np.int64))
2262+
index = RangeIndex(len(values))
22622263
return index, index
22632264

22642265
def set_attr(self) -> None:

pandas/tests/io/pytables/test_append.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,13 @@ def test_append(setup_path):
8686
)
8787
_maybe_remove(store, "uints")
8888
store.append("uints", uint_data)
89-
tm.assert_frame_equal(store["uints"], uint_data)
89+
tm.assert_frame_equal(store["uints"], uint_data, check_index_type=True)
9090

9191
# uints - test storage of uints in indexable columns
9292
_maybe_remove(store, "uints")
9393
# 64-bit indices not yet supported
9494
store.append("uints", uint_data, data_columns=["u08", "u16", "u32"])
95-
tm.assert_frame_equal(store["uints"], uint_data)
95+
tm.assert_frame_equal(store["uints"], uint_data, check_index_type=True)
9696

9797

9898
def test_append_series(setup_path):
@@ -128,7 +128,7 @@ def test_append_series(setup_path):
128128
# select on the index and values
129129
expected = ns[(ns > 70) & (ns.index < 90)]
130130
result = store.select("ns", "foo>70 and index<90")
131-
tm.assert_series_equal(result, expected)
131+
tm.assert_series_equal(result, expected, check_index_type=True)
132132

133133
# multi-index
134134
mi = DataFrame(np.random.randn(5, 1), columns=["A"])
@@ -139,7 +139,7 @@ def test_append_series(setup_path):
139139
s = mi.stack()
140140
s.index = s.index.droplevel(2)
141141
store.append("mi", s)
142-
tm.assert_series_equal(store["mi"], s)
142+
tm.assert_series_equal(store["mi"], s, check_index_type=True)
143143

144144

145145
def test_append_some_nans(setup_path):
@@ -162,31 +162,31 @@ def test_append_some_nans(setup_path):
162162
df.loc[0:15, ["A1", "B", "D", "E"]] = np.nan
163163
store.append("df1", df[:10])
164164
store.append("df1", df[10:])
165-
tm.assert_frame_equal(store["df1"], df)
165+
tm.assert_frame_equal(store["df1"], df, check_index_type=True)
166166

167167
# first column
168168
df1 = df.copy()
169169
df1["A1"] = np.nan
170170
_maybe_remove(store, "df1")
171171
store.append("df1", df1[:10])
172172
store.append("df1", df1[10:])
173-
tm.assert_frame_equal(store["df1"], df1)
173+
tm.assert_frame_equal(store["df1"], df1, check_index_type=True)
174174

175175
# 2nd column
176176
df2 = df.copy()
177177
df2["A2"] = np.nan
178178
_maybe_remove(store, "df2")
179179
store.append("df2", df2[:10])
180180
store.append("df2", df2[10:])
181-
tm.assert_frame_equal(store["df2"], df2)
181+
tm.assert_frame_equal(store["df2"], df2, check_index_type=True)
182182

183183
# datetimes
184184
df3 = df.copy()
185185
df3["E"] = np.nan
186186
_maybe_remove(store, "df3")
187187
store.append("df3", df3[:10])
188188
store.append("df3", df3[10:])
189-
tm.assert_frame_equal(store["df3"], df3)
189+
tm.assert_frame_equal(store["df3"], df3, check_index_type=True)
190190

191191

192192
def test_append_all_nans(setup_path):
@@ -203,13 +203,13 @@ def test_append_all_nans(setup_path):
203203
_maybe_remove(store, "df")
204204
store.append("df", df[:10], dropna=True)
205205
store.append("df", df[10:], dropna=True)
206-
tm.assert_frame_equal(store["df"], df[-4:])
206+
tm.assert_frame_equal(store["df"], df[-4:], check_index_type=True)
207207

208208
# nan some entire rows (dropna=False)
209209
_maybe_remove(store, "df2")
210210
store.append("df2", df[:10], dropna=False)
211211
store.append("df2", df[10:], dropna=False)
212-
tm.assert_frame_equal(store["df2"], df)
212+
tm.assert_frame_equal(store["df2"], df, check_index_type=True)
213213

214214
# tests the option io.hdf.dropna_table
215215
with pd.option_context("io.hdf.dropna_table", False):
@@ -240,12 +240,12 @@ def test_append_all_nans(setup_path):
240240
_maybe_remove(store, "df")
241241
store.append("df", df[:10], dropna=True)
242242
store.append("df", df[10:], dropna=True)
243-
tm.assert_frame_equal(store["df"], df)
243+
tm.assert_frame_equal(store["df"], df, check_index_type=True)
244244

245245
_maybe_remove(store, "df2")
246246
store.append("df2", df[:10], dropna=False)
247247
store.append("df2", df[10:], dropna=False)
248-
tm.assert_frame_equal(store["df2"], df)
248+
tm.assert_frame_equal(store["df2"], df, check_index_type=True)
249249

250250
# nan some entire rows (but since we have dates they are still
251251
# written!)
@@ -266,12 +266,12 @@ def test_append_all_nans(setup_path):
266266
_maybe_remove(store, "df")
267267
store.append("df", df[:10], dropna=True)
268268
store.append("df", df[10:], dropna=True)
269-
tm.assert_frame_equal(store["df"], df)
269+
tm.assert_frame_equal(store["df"], df, check_index_type=True)
270270

271271
_maybe_remove(store, "df2")
272272
store.append("df2", df[:10], dropna=False)
273273
store.append("df2", df[10:], dropna=False)
274-
tm.assert_frame_equal(store["df2"], df)
274+
tm.assert_frame_equal(store["df2"], df, check_index_type=True)
275275

276276

277277
def test_append_frame_column_oriented(setup_path):
@@ -882,7 +882,7 @@ def test_append_to_multiple_dropna(setup_path):
882882
)
883883
result = store.select_as_multiple(["df1", "df2"])
884884
expected = df.dropna()
885-
tm.assert_frame_equal(result, expected)
885+
tm.assert_frame_equal(result, expected, check_index_type=True)
886886
tm.assert_index_equal(store.select("df1").index, store.select("df2").index)
887887

888888

@@ -932,4 +932,4 @@ def test_append_to_multiple_min_itemsize(setup_path):
932932
min_itemsize={"Str": 10, "LongStr": 100, "Num": 2},
933933
)
934934
result = store.select_as_multiple(["index", "nums", "strs"])
935-
tm.assert_frame_equal(result, expected)
935+
tm.assert_frame_equal(result, expected, check_index_type=True)

pandas/tests/io/pytables/test_compat.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -51,25 +51,25 @@ def test_read_complete(self, pytables_hdf5_file):
5151
path, objname, df = pytables_hdf5_file
5252
result = pd.read_hdf(path, key=objname)
5353
expected = df
54-
tm.assert_frame_equal(result, expected)
54+
tm.assert_frame_equal(result, expected, check_index_type=True)
5555

5656
def test_read_with_start(self, pytables_hdf5_file):
5757
path, objname, df = pytables_hdf5_file
5858
# This is a regression test for pandas-dev/pandas/issues/11188
5959
result = pd.read_hdf(path, key=objname, start=1)
6060
expected = df[1:].reset_index(drop=True)
61-
tm.assert_frame_equal(result, expected)
61+
tm.assert_frame_equal(result, expected, check_index_type=True)
6262

6363
def test_read_with_stop(self, pytables_hdf5_file):
6464
path, objname, df = pytables_hdf5_file
6565
# This is a regression test for pandas-dev/pandas/issues/11188
6666
result = pd.read_hdf(path, key=objname, stop=1)
6767
expected = df[:1].reset_index(drop=True)
68-
tm.assert_frame_equal(result, expected)
68+
tm.assert_frame_equal(result, expected, check_index_type=True)
6969

7070
def test_read_with_startstop(self, pytables_hdf5_file):
7171
path, objname, df = pytables_hdf5_file
7272
# This is a regression test for pandas-dev/pandas/issues/11188
7373
result = pd.read_hdf(path, key=objname, start=1, stop=2)
7474
expected = df[1:2].reset_index(drop=True)
75-
tm.assert_frame_equal(result, expected)
75+
tm.assert_frame_equal(result, expected, check_index_type=True)

0 commit comments

Comments
 (0)