CLN: assorted cleanups

jbrockmendel · jbrockmendel · commit 74f31f2c755a · 2019-12-12T14:14:17.000-08:00
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -331,11 +331,12 @@ def __new__(
 
         # extension dtype
         elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype):
-            data = np.asarray(data)
             if not (dtype is None or is_object_dtype(dtype)):
                 # coerce to the provided dtype
                 ea_cls = dtype.construct_array_type()
                 data = ea_cls._from_sequence(data, dtype=dtype, copy=False)
+            else:
+                data = np.asarray(data, dtype=object)
 
             # coerce to the object dtype
             data = data.astype(object)
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -74,10 +74,12 @@ def cat_core(list_of_columns: List, sep: str):
     """
     if sep == "":
         # no need to interleave sep if it is empty
-        return np.sum(list_of_columns, axis=0)
+        arr_of_cols = np.asarray(list_of_columns, dtype=object)
+        return np.sum(arr_of_cols, axis=0)
     list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
     list_with_sep[::2] = list_of_columns
-    return np.sum(list_with_sep, axis=0)
+    arr_with_sep = np.asarray(list_with_sep)
+    return np.sum(arr_with_sep, axis=0)
 
 
 def cat_safe(list_of_columns: List, sep: str):
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -1314,7 +1314,7 @@ def create_table_index(
         optlevel : int or None, default None
             Optimization level, if None, pytables defaults to 6.
         kind : str or None, default None
-            Kind of index, if None, pytables defaults to "medium"
+            Kind of index, if None, pytables defaults to "medium".
 
         Raises
         ------
@@ -1741,24 +1741,24 @@ def _read_group(self, group: "Node"):
 
 
 class TableIterator:
-    """ define the iteration interface on a table
-
-        Parameters
-        ----------
+    """
+    Define the iteration interface on a table
 
-        store : the reference store
-        s     : the referred storer
-        func  : the function to execute the query
-        where : the where of the query
-        nrows : the rows to iterate on
-        start : the passed start value (default is None)
-        stop  : the passed stop value (default is None)
-        iterator : bool, default False
-            Whether to use the default iterator.
-        chunksize : the passed chunking value (default is 100000)
-        auto_close : boolean, automatically close the store at the end of
-            iteration, default is False
-        """
+    Parameters
+    ----------
+    store : HDFStore
+    s     : the referred storer
+    func  : the function to execute the query
+    where : the where of the query
+    nrows : the rows to iterate on
+    start : the passed start value (default is None)
+    stop  : the passed stop value (default is None)
+    iterator : bool, default False
+        Whether to use the default iterator.
+    chunksize : the passed chunking value (default is 100000)
+    auto_close : bool, default False
+        Whether to automatically close the store at the end of iteration.
+    """
 
     chunksize: Optional[int]
     store: HDFStore
@@ -2540,10 +2540,6 @@ def copy(self):
         new_self = copy.copy(self)
         return new_self
 
-    @property
-    def storage_obj_type(self):
-        return self.obj_type
-
     @property
     def shape(self):
         return self.nrows
@@ -2568,10 +2564,6 @@ def _complevel(self) -> int:
     def _fletcher32(self) -> bool:
         return self.parent._fletcher32
 
-    @property
-    def _complib(self):
-        return self.parent._complib
-
     @property
     def attrs(self):
         return self.group._v_attrs
@@ -3298,12 +3290,12 @@ def data_orientation(self):
     def queryables(self) -> Dict[str, Any]:
         """ return a dict of the kinds allowable columns for this object """
 
+        # mypy doesnt recognize DataFrame._AXIS_NAMES, so we re-write it here
+        axis_names = {0: "index", 1: "columns"}
+
         # compute the values_axes queryables
         d1 = [(a.cname, a) for a in self.index_axes]
-        d2 = [
-            (self.storage_obj_type._AXIS_NAMES[axis], None)
-            for axis, values in self.non_index_axes
-        ]
+        d2 = [(axis_names[axis], None) for axis, values in self.non_index_axes]
         d3 = [
             (v.cname, v) for v in self.values_axes if v.name in set(self.data_columns)
         ]
@@ -3482,9 +3474,7 @@ def f(i, c):
 
     def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None):
         """
-        Create a pytables index on the specified columns
-          note: cannot index Time64Col() or ComplexCol currently;
-          PyTables must be >= 3.0
+        Create a pytables index on the specified columns.
 
         Parameters
         ----------
@@ -3499,12 +3489,16 @@ def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None):
         optlevel : int or None, default None
             Optimization level, if None, pytables defaults to 6.
         kind : str or None, default None
-            Kind of index, if None, pytables defaults to "medium"
+            Kind of index, if None, pytables defaults to "medium".
 
         Raises
         ------
-        raises if the node is not a table
+        TypeError if trying to create an index on a complex-type column.
 
+        Notes
+        -----
+        Cannot index Time64Col or ComplexCol.
+        Pytables must be >= 3.0.
         """
 
         if not self.infer_axes():
@@ -4404,7 +4398,6 @@ class AppendableSeriesTable(AppendableFrameTable):
     table_type = "appendable_series"
     ndim = 2
     obj_type = Series
-    storage_obj_type = DataFrame
 
     @property
     def is_transposed(self) -> bool:
@@ -4792,7 +4785,8 @@ def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.nd
     ----------
     data : np.ndarray[object]
     encoding : str
-    errors : handler for encoding errors
+    errors : str
+        Handler for encoding errors.
 
     Returns
     -------
@@ -4813,13 +4807,15 @@ def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.nd
     return data
 
 
-def _unconvert_string_array(data, nan_rep, encoding: str, errors: str) -> np.ndarray:
+def _unconvert_string_array(
+    data: np.ndarray, nan_rep, encoding: str, errors: str
+) -> np.ndarray:
     """
-    inverse of _convert_string_array
+    Inverse of _convert_string_array.
 
     Parameters
     ----------
-    data : fixed length string dtyped array
+    data : np.ndarray[fixed-length-string]
     nan_rep : the storage repr of NaN
     encoding : str
     errors : str
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -449,7 +449,7 @@ def test_scientific_no_exponent(self):
     def test_convert_non_hashable(self):
         # GH13324
         # make sure that we are handing non-hashables
-        arr = np.array([[10.0, 2], 1.0, "apple"])
+        arr = np.array([[10.0, 2], 1.0, "apple"], dtype=object)
         result = lib.maybe_convert_numeric(arr, set(), False, True)
         tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
 
diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py
@@ -761,8 +761,9 @@ def test_array_list(self):
             ["a", "b"],
             {"key": "val"},
         ]
-        arr = np.array(arr_list)
-        tm.assert_numpy_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr)
+        arr = np.array(arr_list, dtype=object)
+        result = np.array(ujson.decode(ujson.encode(arr)), dtype=object)
+        tm.assert_numpy_array_equal(result, arr)
 
     def test_array_float(self):
         dtype = np.float32
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
@@ -87,7 +87,7 @@ def test_same_ordering(datapath):
 @pytest.mark.parametrize(
     "flavor",
     [
-        pytest.param("bs4", marks=td.skip_if_no("lxml")),
+        pytest.param("bs4", marks=td.skip_if_no("bs4")),
         pytest.param("lxml", marks=td.skip_if_no("lxml")),
     ],
     scope="class",
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
@@ -120,7 +120,8 @@ def test_append_index(self):
                     (1.2, tz.localize(datetime.datetime(2011, 1, 2)), "B"),
                     (1.3, tz.localize(datetime.datetime(2011, 1, 3)), "C"),
                 ]
-                + expected_tuples
+                + expected_tuples,
+                dtype=object,
             ),
             None,
         )
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
@@ -2857,7 +2857,8 @@ def test_partition_index(self):
         result = values.str.partition("_", expand=False)
         exp = Index(
             np.array(
-                [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None]
+                [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None],
+                dtype=object,
             )
         )
         tm.assert_index_equal(result, exp)
@@ -2866,7 +2867,8 @@ def test_partition_index(self):
         result = values.str.rpartition("_", expand=False)
         exp = Index(
             np.array(
-                [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None]
+                [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None],
+                dtype=object,
             )
         )
         tm.assert_index_equal(result, exp)

Original file line number	Diff line number	Diff line change
`@@ -87,7 +87,7 @@ def test_same_ordering(datapath):`
`87`	`87`	`@pytest.mark.parametrize(`
`88`	`88`	`"flavor",`
`89`	`89`	`[`
`90`		`- pytest.param("bs4", marks=td.skip_if_no("lxml")),`
	`90`	`+ pytest.param("bs4", marks=td.skip_if_no("bs4")),`
`91`	`91`	`pytest.param("lxml", marks=td.skip_if_no("lxml")),`
`92`	`92`	`],`
`93`	`93`	`scope="class",`
Original file line number	Diff line number	Diff line change
`@@ -120,7 +120,8 @@ def test_append_index(self):`
`120`	`120`	`(1.2, tz.localize(datetime.datetime(2011, 1, 2)), "B"),`
`121`	`121`	`(1.3, tz.localize(datetime.datetime(2011, 1, 3)), "C"),`
`122`	`122`	`]`
`123`		`- + expected_tuples`
	`123`	`+ + expected_tuples,`
	`124`	`+ dtype=object,`
`124`	`125`	`),`
`125`	`126`	`None,`
`126`	`127`	`)`
Original file line number	Diff line number	Diff line change
`@@ -2857,7 +2857,8 @@ def test_partition_index(self):`
`2857`	`2857`	`result = values.str.partition("_", expand=False)`
`2858`	`2858`	`exp = Index(`
`2859`	`2859`	`np.array(`
`2860`		`- [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None]`
	`2860`	`+ [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None],`
	`2861`	`+ dtype=object,`
`2861`	`2862`	`)`
`2862`	`2863`	`)`
`2863`	`2864`	`tm.assert_index_equal(result, exp)`
`@@ -2866,7 +2867,8 @@ def test_partition_index(self):`
`2866`	`2867`	`result = values.str.rpartition("_", expand=False)`
`2867`	`2868`	`exp = Index(`
`2868`	`2869`	`np.array(`
`2869`		`- [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None]`
	`2870`	`+ [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None],`
	`2871`	`+ dtype=object,`
`2870`	`2872`	`)`
`2871`	`2873`	`)`
`2872`	`2874`	`tm.assert_index_equal(result, exp)`