CLN: Remove unwanted patterns

bashtage · bashtage · commit 51dcc834b9da · 2020-05-12T17:26:33.000+01:00
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -1792,18 +1792,21 @@ def _do_convert_categoricals(
             if label in value_labels:
                 # Explicit call with ordered=True
                 vl = value_label_dict[label]
-                keys = np.array([k for k in vl.keys()])
+                keys = np.array(list(vl.keys()))
                 column = data[col]
-                if self._chunksize is not None and column.isin(keys).all():
+                key_matches = column.isin(keys)
+                if self._chunksize is not None and key_matches.all():
+                    initial_categories = keys
                     # If all categories are in the keys and we are iterating,
                     # use the same keys for all chunks. If some are missing
                     # value labels, then we will fall back to the categories
                     # varying across chunks.
-                    initial_categories = keys
-                    warnings.warn(
-                        categorical_conversion_warning, CategoricalConversionWarning
-                    )
                 else:
+                    if self._chunksize is not None:
+                        # warn is using an iterator
+                        warnings.warn(
+                            categorical_conversion_warning, CategoricalConversionWarning
+                        )
                     initial_categories = None
                 cat_data = Categorical(
                     column, categories=initial_categories, ordered=order_categoricals
@@ -1818,7 +1821,7 @@ def _do_convert_categoricals(
                             categories.append(category)
                 else:
                     # If all cats are matched, we can use the values
-                    categories = [v for v in vl.values()]
+                    categories = list(vl.values())
                 try:
                     # Try to catch duplicate categories
                     cat_data.categories = categories
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -1941,18 +1941,19 @@ def test_chunked_categorical(version):
 
 def test_chunked_categorical_partial(dirpath):
     dta_file = os.path.join(dirpath, "stata-dta-partially-labeled.dta")
-    reader = StataReader(dta_file, chunksize=2)
     values = ["a", "b", "a", "b", 3.0]
-    with pytest.warns(CategoricalConversionWarning, match="One or more series"):
-        for i, block in enumerate(reader):
-            assert list(block.cats) == values[2 * i : 2 * (i + 1)]
-            if i < 2:
-                idx = pd.Index(["a", "b"])
-            else:
-                idx = pd.Float64Index([3.0])
-            tm.assert_index_equal(block.cats.cat.categories, idx)
-    reader = StataReader(dta_file, chunksize=5)
-    large_chunk = reader.__next__()
+    with StataReader(dta_file, chunksize=2) as reader:
+        with tm.assert_produces_warning(CategoricalConversionWarning):
+            for i, block in enumerate(reader):
+                assert list(block.cats) == values[2 * i : 2 * (i + 1)]
+                if i < 2:
+                    idx = pd.Index(["a", "b"])
+                else:
+                    idx = pd.Float64Index([3.0])
+                tm.assert_index_equal(block.cats.cat.categories, idx)
+    with tm.assert_produces_warning(CategoricalConversionWarning):
+        with StataReader(dta_file, chunksize=5) as reader:
+            large_chunk = reader.__next__()
     direct = read_stata(dta_file)
     tm.assert_frame_equal(direct, large_chunk)
 
@@ -1966,4 +1967,5 @@ def test_iterator_errors(dirpath):
     with pytest.raises(ValueError, match="chunksize must be a positive"):
         StataReader(dta_file, chunksize="apple")
     with pytest.raises(ValueError, match="chunksize must be set to a positive"):
-        StataReader(dta_file).__next__()
+        with StataReader(dta_file) as reader:
+            reader.__next__()