diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 101612893cb02..49b7b1d1d3a9b 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -327,25 +327,7 @@ def array_equivalent(left, right, strict_nan=False): left = left.view('i8') right = right.view('i8') - # NaNs cannot occur otherwise. - try: - return np.array_equal(left, right) - except AttributeError: - # see gh-13388 - # - # NumPy v1.7.1 has a bug in its array_equal - # function that prevents it from correctly - # comparing two arrays with complex dtypes. - # This bug is corrected in v1.8.0, so remove - # this try-except block as soon as we stop - # supporting NumPy versions < 1.8.0 - if not is_dtype_equal(left.dtype, right.dtype): - return False - - left = left.tolist() - right = right.tolist() - - return left == right + return np.array_equal(left, right) def _infer_fill_value(val): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 83b382ec0ed72..6799d3b5746d0 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1289,6 +1289,15 @@ def get_result(other): elif is_numeric_v_string_like(values, other): result = False + # avoid numpy warning of elementwise comparisons + elif func.__name__ == 'eq': + if is_list_like(other) and not isinstance(other, np.ndarray): + other = np.asarray(other) + + # if we can broadcast, then ok + if values.shape[-1] != other.shape[-1]: + return False + result = func(values, other) else: result = func(values, other) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 93514a8a42215..aac8f785f3d99 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -2,6 +2,7 @@ from __future__ import print_function +import warnings from datetime import timedelta from distutils.version import LooseVersion import sys @@ -102,7 +103,6 @@ def test_corr_int(self): # dtypes other than float64 #1761 df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]}) - # it works! df3.cov() df3.corr() @@ -117,7 +117,11 @@ def test_corr_int_and_boolean(self): expected = DataFrame(np.ones((2, 2)), index=[ 'a', 'b'], columns=['a', 'b']) for meth in ['pearson', 'kendall', 'spearman']: - tm.assert_frame_equal(df.corr(meth), expected) + + # RuntimeWarning + with warnings.catch_warnings(record=True): + result = df.corr(meth) + tm.assert_frame_equal(result, expected) def test_corr_cov_independent_index_column(self): # GH 14617 diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index d6bdb764f1c8e..055a490bc6b5d 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -23,6 +23,19 @@ PossiblePrecisionLoss, StataMissingValue) +@pytest.fixture +def dirpath(): + return tm.get_data_path() + + +@pytest.fixture +def parsed_114(dirpath): + dta14_114 = os.path.join(dirpath, 'stata5_114.dta') + parsed_114 = read_stata(dta14_114, convert_dates=True) + parsed_114.index.name = 'index' + return parsed_114 + + class TestStata(object): def setup_method(self, method): @@ -108,10 +121,12 @@ def test_data_method(self): parsed_114_read = rdr.read() tm.assert_frame_equal(parsed_114_data, parsed_114_read) - def test_read_dta1(self): + @pytest.mark.parametrize( + 'file', ['dta1_114', 'dta1_117']) + def test_read_dta1(self, file): - parsed_114 = self.read_dta(self.dta1_114) - parsed_117 = self.read_dta(self.dta1_117) + file = getattr(self, file) + parsed = self.read_dta(file) # Pandas uses np.nan as missing value. # Thus, all columns will be of type float, regardless of their name. @@ -123,8 +138,7 @@ def test_read_dta1(self): # the casting doesn't fail so need to match stata here expected['float_miss'] = expected['float_miss'].astype(np.float32) - tm.assert_frame_equal(parsed_114, expected) - tm.assert_frame_equal(parsed_117, expected) + tm.assert_frame_equal(parsed, expected) def test_read_dta2(self): if LooseVersion(sys.version) < '2.7': @@ -193,11 +207,12 @@ def test_read_dta2(self): tm.assert_frame_equal(parsed_117, expected, check_datetimelike_compat=True) - def test_read_dta3(self): - parsed_113 = self.read_dta(self.dta3_113) - parsed_114 = self.read_dta(self.dta3_114) - parsed_115 = self.read_dta(self.dta3_115) - parsed_117 = self.read_dta(self.dta3_117) + @pytest.mark.parametrize( + 'file', ['dta3_113', 'dta3_114', 'dta3_115', 'dta3_117']) + def test_read_dta3(self, file): + + file = getattr(self, file) + parsed = self.read_dta(file) # match stata here expected = self.read_csv(self.csv3) @@ -205,16 +220,14 @@ def test_read_dta3(self): expected['year'] = expected['year'].astype(np.int16) expected['quarter'] = expected['quarter'].astype(np.int8) - tm.assert_frame_equal(parsed_113, expected) - tm.assert_frame_equal(parsed_114, expected) - tm.assert_frame_equal(parsed_115, expected) - tm.assert_frame_equal(parsed_117, expected) + tm.assert_frame_equal(parsed, expected) + + @pytest.mark.parametrize( + 'file', ['dta4_113', 'dta4_114', 'dta4_115', 'dta4_117']) + def test_read_dta4(self, file): - def test_read_dta4(self): - parsed_113 = self.read_dta(self.dta4_113) - parsed_114 = self.read_dta(self.dta4_114) - parsed_115 = self.read_dta(self.dta4_115) - parsed_117 = self.read_dta(self.dta4_117) + file = getattr(self, file) + parsed = self.read_dta(file) expected = DataFrame.from_records( [ @@ -237,10 +250,7 @@ def test_read_dta4(self): for col in expected], axis=1) # stata doesn't save .category metadata - tm.assert_frame_equal(parsed_113, expected, check_categorical=False) - tm.assert_frame_equal(parsed_114, expected, check_categorical=False) - tm.assert_frame_equal(parsed_115, expected, check_categorical=False) - tm.assert_frame_equal(parsed_117, expected, check_categorical=False) + tm.assert_frame_equal(parsed, expected, check_categorical=False) # File containing strls def test_read_dta12(self): @@ -427,7 +437,13 @@ def test_read_write_dta13(self): tm.assert_frame_equal(written_and_read_again.set_index('index'), formatted) - def test_read_write_reread_dta14(self): + @pytest.mark.parametrize( + 'file', ['dta14_113', 'dta14_114', 'dta14_115', 'dta14_117']) + def test_read_write_reread_dta14(self, file, parsed_114): + file = getattr(self, file) + parsed = self.read_dta(file) + parsed.index.name = 'index' + expected = self.read_csv(self.csv14) cols = ['byte_', 'int_', 'long_', 'float_', 'double_'] for col in cols: @@ -436,18 +452,7 @@ def test_read_write_reread_dta14(self): expected['date_td'] = pd.to_datetime( expected['date_td'], errors='coerce') - parsed_113 = self.read_dta(self.dta14_113) - parsed_113.index.name = 'index' - parsed_114 = self.read_dta(self.dta14_114) - parsed_114.index.name = 'index' - parsed_115 = self.read_dta(self.dta14_115) - parsed_115.index.name = 'index' - parsed_117 = self.read_dta(self.dta14_117) - parsed_117.index.name = 'index' - - tm.assert_frame_equal(parsed_114, parsed_113) - tm.assert_frame_equal(parsed_114, parsed_115) - tm.assert_frame_equal(parsed_114, parsed_117) + tm.assert_frame_equal(parsed_114, parsed) with tm.ensure_clean() as path: parsed_114.to_stata(path, {'date_td': 'td'}) @@ -455,7 +460,10 @@ def test_read_write_reread_dta14(self): tm.assert_frame_equal( written_and_read_again.set_index('index'), parsed_114) - def test_read_write_reread_dta15(self): + @pytest.mark.parametrize( + 'file', ['dta15_113', 'dta15_114', 'dta15_115', 'dta15_117']) + def test_read_write_reread_dta15(self, file): + expected = self.read_csv(self.csv15) expected['byte_'] = expected['byte_'].astype(np.int8) expected['int_'] = expected['int_'].astype(np.int16) @@ -465,15 +473,10 @@ def test_read_write_reread_dta15(self): expected['date_td'] = expected['date_td'].apply( datetime.strptime, args=('%Y-%m-%d',)) - parsed_113 = self.read_dta(self.dta15_113) - parsed_114 = self.read_dta(self.dta15_114) - parsed_115 = self.read_dta(self.dta15_115) - parsed_117 = self.read_dta(self.dta15_117) + file = getattr(self, file) + parsed = self.read_dta(file) - tm.assert_frame_equal(expected, parsed_114) - tm.assert_frame_equal(parsed_113, parsed_114) - tm.assert_frame_equal(parsed_114, parsed_115) - tm.assert_frame_equal(parsed_114, parsed_117) + tm.assert_frame_equal(expected, parsed) def test_timestamp_and_label(self): original = DataFrame([(1,)], columns=['variable']) @@ -710,7 +713,9 @@ def test_missing_value_generator(self): '