CLN: Removed pandas.util.testing.choice

paul-reiners · paul-reiners · commit 5f2d9b5dd176 · 2016-03-01T18:36:57.000-06:00
Removed pandas.util.testing.choice() method. Replaced all references to it with calls to np.random.choice(). Added entry to whatsnew. #12386
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -1195,3 +1195,4 @@ Bug Fixes
 - Bug in ``DataFrame.apply`` in which reduction was not being prevented for cases in which ``dtype`` was not a numpy dtype (:issue:`12244`)
 - Bug when initializing categorical series with a scalar value. (:issue:`12336`)
 - Bug when specifying a UTC ``DatetimeIndex`` by setting ``utc=True`` in ``.to_datetime`` (:issue:`11934`)
+- Removed pandas.util.testing.choice().  Should use np.random.choice(), instead. (:issue:`12386`)
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
@@ -96,8 +96,8 @@ class TestDataFrameQueryWithMultiIndex(tm.TestCase):
 
     def check_query_with_named_multiindex(self, parser, engine):
         tm.skip_if_no_ne(engine)
-        a = tm.choice(['red', 'green'], size=10)
-        b = tm.choice(['eggs', 'ham'], size=10)
+        a = np.random.choice(['red', 'green'], size=10)
+        b = np.random.choice(['eggs', 'ham'], size=10)
         index = MultiIndex.from_arrays([a, b], names=['color', 'food'])
         df = DataFrame(randn(10, 2), index=index)
         ind = Series(df.index.get_level_values('color').values, index=index,
@@ -149,8 +149,8 @@ def test_query_with_named_multiindex(self):
 
     def check_query_with_unnamed_multiindex(self, parser, engine):
         tm.skip_if_no_ne(engine)
-        a = tm.choice(['red', 'green'], size=10)
-        b = tm.choice(['eggs', 'ham'], size=10)
+        a = np.random.choice(['red', 'green'], size=10)
+        b = np.random.choice(['eggs', 'ham'], size=10)
         index = MultiIndex.from_arrays([a, b])
         df = DataFrame(randn(10, 2), index=index)
         ind = Series(df.index.get_level_values(0).values, index=index)
@@ -243,7 +243,7 @@ def test_query_with_unnamed_multiindex(self):
 
     def check_query_with_partially_named_multiindex(self, parser, engine):
         tm.skip_if_no_ne(engine)
-        a = tm.choice(['red', 'green'], size=10)
+        a = np.random.choice(['red', 'green'], size=10)
         b = np.arange(10)
         index = MultiIndex.from_arrays([a, b])
         index.names = [None, 'rating']
@@ -975,7 +975,7 @@ def check_query_lex_compare_strings(self, parser, engine):
         tm.skip_if_no_ne(engine=engine)
         import operator as opr
 
-        a = Series(tm.choice(list('abcde'), 20))
+        a = Series(np.random.choice(list('abcde'), 20))
         b = Series(np.arange(a.size))
         df = DataFrame({'X': a, 'Y': b})
 
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
@@ -60,8 +60,8 @@ def setUp(self):
 
         n = 100
         with tm.RNGContext(42):
-            gender = tm.choice(['Male', 'Female'], size=n)
-            classroom = tm.choice(['A', 'B', 'C'], size=n)
+            gender = np.random.choice(['Male', 'Female'], size=n)
+            classroom = np.random.choice(['A', 'B', 'C'], size=n)
 
             self.hist_df = DataFrame({'gender': gender,
                                       'classroom': classroom,
@@ -3861,7 +3861,7 @@ def test_series_groupby_plotting_nominally_works(self):
         weight = Series(np.random.normal(166, 20, size=n))
         height = Series(np.random.normal(60, 10, size=n))
         with tm.RNGContext(42):
-            gender = tm.choice(['male', 'female'], size=n)
+            gender = np.random.choice(['male', 'female'], size=n)
 
         weight.groupby(gender).plot()
         tm.close()
diff --git a/pandas/tests/test_graphics_others.py b/pandas/tests/test_graphics_others.py
@@ -641,7 +641,7 @@ def test_grouped_plot_fignums(self):
         weight = Series(np.random.normal(166, 20, size=n))
         height = Series(np.random.normal(60, 10, size=n))
         with tm.RNGContext(42):
-            gender = tm.choice(['male', 'female'], size=n)
+            gender = np.random.choice(['male', 'female'], size=n)
         df = DataFrame({'height': height, 'weight': weight, 'gender': gender})
         gb = df.groupby('gender')
 
@@ -715,7 +715,7 @@ def test_grouped_hist_legacy2(self):
         weight = Series(np.random.normal(166, 20, size=n))
         height = Series(np.random.normal(60, 10, size=n))
         with tm.RNGContext(42):
-            gender_int = tm.choice([0, 1], size=n)
+            gender_int = np.random.choice([0, 1], size=n)
         df_int = DataFrame({'height': height, 'weight': weight,
                             'gender': gender_int})
         gb = df_int.groupby('gender')
diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
@@ -236,27 +236,27 @@ def test_join_on(self):
 
     def test_join_on_fails_with_different_right_index(self):
         with tm.assertRaises(ValueError):
-            df = DataFrame({'a': tm.choice(['m', 'f'], size=3),
+            df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
                             'b': np.random.randn(3)})
-            df2 = DataFrame({'a': tm.choice(['m', 'f'], size=10),
+            df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
                              'b': np.random.randn(10)},
                             index=tm.makeCustomIndex(10, 2))
             merge(df, df2, left_on='a', right_index=True)
 
     def test_join_on_fails_with_different_left_index(self):
         with tm.assertRaises(ValueError):
-            df = DataFrame({'a': tm.choice(['m', 'f'], size=3),
+            df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
                             'b': np.random.randn(3)},
                            index=tm.makeCustomIndex(10, 2))
-            df2 = DataFrame({'a': tm.choice(['m', 'f'], size=10),
+            df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
                              'b': np.random.randn(10)})
             merge(df, df2, right_on='b', left_index=True)
 
     def test_join_on_fails_with_different_column_counts(self):
         with tm.assertRaises(ValueError):
-            df = DataFrame({'a': tm.choice(['m', 'f'], size=3),
+            df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
                             'b': np.random.randn(3)})
-            df2 = DataFrame({'a': tm.choice(['m', 'f'], size=10),
+            df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
                              'b': np.random.randn(10)},
                             index=tm.makeCustomIndex(10, 2))
             merge(df, df2, right_on='a', left_on=['a', 'b'])
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -133,7 +133,7 @@ def randbool(size=(), p=0.5):
 
 def rands_array(nchars, size, dtype='O'):
     """Generate an array of byte strings."""
-    retval = (choice(RANDS_CHARS, size=nchars * np.prod(size))
+    retval = (np.random.choice(RANDS_CHARS, size=nchars * np.prod(size))
               .view((np.str_, nchars)).reshape(size))
     if dtype is None:
         return retval
@@ -143,7 +143,7 @@ def rands_array(nchars, size, dtype='O'):
 
 def randu_array(nchars, size, dtype='O'):
     """Generate an array of unicode strings."""
-    retval = (choice(RANDU_CHARS, size=nchars * np.prod(size))
+    retval = (np.random.choice(RANDU_CHARS, size=nchars * np.prod(size))
               .view((np.unicode_, nchars)).reshape(size))
     if dtype is None:
         return retval
@@ -158,7 +158,7 @@ def rands(nchars):
     See `rands_array` if you want to create an array of random strings.
 
     """
-    return ''.join(choice(RANDS_CHARS, nchars))
+    return ''.join(np.random.choice(RANDS_CHARS, nchars))
 
 
 def randu(nchars):
@@ -171,14 +171,6 @@ def randu(nchars):
     return ''.join(choice(RANDU_CHARS, nchars))
 
 
-def choice(x, size=10):
-    """sample with replacement; uniform over the input"""
-    try:
-        return np.random.choice(x, size=size)
-    except AttributeError:
-        return np.random.randint(len(x), size=size).choose(x)
-
-
 def close(fignum=None):
     from matplotlib.pyplot import get_fignums, close as _close
 
diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py
@@ -143,7 +143,7 @@ def f():
 value2 = np.random.randn(n)
 value2[np.random.rand(n) > 0.5] = np.nan
 
-obj = tm.choice(list('ab'), size=n).astype(object)
+obj = np.random.choice(list('ab'), size=n).astype(object)
 obj[np.random.randn(n) > 0.5] = np.nan
 
 df = DataFrame({'key1': np.random.randint(0, 500, size=n),