Skip to content

Commit 51dcc83

Browse files
committed
CLN: Remove unwanted patterns
1 parent a060553 commit 51dcc83

File tree

2 files changed

+24
-19
lines changed

2 files changed

+24
-19
lines changed

pandas/io/stata.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -1792,18 +1792,21 @@ def _do_convert_categoricals(
17921792
if label in value_labels:
17931793
# Explicit call with ordered=True
17941794
vl = value_label_dict[label]
1795-
keys = np.array([k for k in vl.keys()])
1795+
keys = np.array(list(vl.keys()))
17961796
column = data[col]
1797-
if self._chunksize is not None and column.isin(keys).all():
1797+
key_matches = column.isin(keys)
1798+
if self._chunksize is not None and key_matches.all():
1799+
initial_categories = keys
17981800
# If all categories are in the keys and we are iterating,
17991801
# use the same keys for all chunks. If some are missing
18001802
# value labels, then we will fall back to the categories
18011803
# varying across chunks.
1802-
initial_categories = keys
1803-
warnings.warn(
1804-
categorical_conversion_warning, CategoricalConversionWarning
1805-
)
18061804
else:
1805+
if self._chunksize is not None:
1806+
# warn is using an iterator
1807+
warnings.warn(
1808+
categorical_conversion_warning, CategoricalConversionWarning
1809+
)
18071810
initial_categories = None
18081811
cat_data = Categorical(
18091812
column, categories=initial_categories, ordered=order_categoricals
@@ -1818,7 +1821,7 @@ def _do_convert_categoricals(
18181821
categories.append(category)
18191822
else:
18201823
# If all cats are matched, we can use the values
1821-
categories = [v for v in vl.values()]
1824+
categories = list(vl.values())
18221825
try:
18231826
# Try to catch duplicate categories
18241827
cat_data.categories = categories

pandas/tests/io/test_stata.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -1941,18 +1941,19 @@ def test_chunked_categorical(version):
19411941

19421942
def test_chunked_categorical_partial(dirpath):
19431943
dta_file = os.path.join(dirpath, "stata-dta-partially-labeled.dta")
1944-
reader = StataReader(dta_file, chunksize=2)
19451944
values = ["a", "b", "a", "b", 3.0]
1946-
with pytest.warns(CategoricalConversionWarning, match="One or more series"):
1947-
for i, block in enumerate(reader):
1948-
assert list(block.cats) == values[2 * i : 2 * (i + 1)]
1949-
if i < 2:
1950-
idx = pd.Index(["a", "b"])
1951-
else:
1952-
idx = pd.Float64Index([3.0])
1953-
tm.assert_index_equal(block.cats.cat.categories, idx)
1954-
reader = StataReader(dta_file, chunksize=5)
1955-
large_chunk = reader.__next__()
1945+
with StataReader(dta_file, chunksize=2) as reader:
1946+
with tm.assert_produces_warning(CategoricalConversionWarning):
1947+
for i, block in enumerate(reader):
1948+
assert list(block.cats) == values[2 * i : 2 * (i + 1)]
1949+
if i < 2:
1950+
idx = pd.Index(["a", "b"])
1951+
else:
1952+
idx = pd.Float64Index([3.0])
1953+
tm.assert_index_equal(block.cats.cat.categories, idx)
1954+
with tm.assert_produces_warning(CategoricalConversionWarning):
1955+
with StataReader(dta_file, chunksize=5) as reader:
1956+
large_chunk = reader.__next__()
19561957
direct = read_stata(dta_file)
19571958
tm.assert_frame_equal(direct, large_chunk)
19581959

@@ -1966,4 +1967,5 @@ def test_iterator_errors(dirpath):
19661967
with pytest.raises(ValueError, match="chunksize must be a positive"):
19671968
StataReader(dta_file, chunksize="apple")
19681969
with pytest.raises(ValueError, match="chunksize must be set to a positive"):
1969-
StataReader(dta_file).__next__()
1970+
with StataReader(dta_file) as reader:
1971+
reader.__next__()

0 commit comments

Comments
 (0)