diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 1eb8f531dc62a..218afb734d629 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -345,10 +345,7 @@ def convert_delta_safe(base, deltas, unit) -> Series: has_bad_values = False if bad_locs.any(): has_bad_values = True - # reset cache to avoid SettingWithCopy checks (we own the DataFrame and the - # `dates` Series is used to overwrite itself in the DataFramae) - dates._reset_cacher() - dates[bad_locs] = 1.0 # Replace with NaT + dates._values[bad_locs] = 1.0 # Replace with NaT dates = dates.astype(np.int64) if fmt.startswith(("%tc", "tc")): # Delta ms relative to base @@ -465,11 +462,10 @@ def g(x: datetime) -> int: bad_loc = isna(dates) index = dates.index if bad_loc.any(): - dates = Series(dates) if lib.is_np_dtype(dates.dtype, "M"): - dates[bad_loc] = to_datetime(stata_epoch) + dates._values[bad_loc] = to_datetime(stata_epoch) else: - dates[bad_loc] = stata_epoch + dates._values[bad_loc] = stata_epoch if fmt in ["%tc", "tc"]: d = parse_dates_safe(dates, delta=True) @@ -599,9 +595,8 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame: for col in data: # Cast from unsupported types to supported types is_nullable_int = isinstance(data[col].dtype, (IntegerDtype, BooleanDtype)) - orig = data[col] # We need to find orig_missing before altering data below - orig_missing = orig.isna() + orig_missing = data[col].isna() if is_nullable_int: missing_loc = data[col].isna() if missing_loc.any(): @@ -1783,15 +1778,15 @@ def read( for idx in valid_dtypes: dtype = data.iloc[:, idx].dtype if dtype not in (object_type, self._dtyplist[idx]): - data.iloc[:, idx] = data.iloc[:, idx].astype(dtype) + data.isetitem(idx, data.iloc[:, idx].astype(dtype)) data = self._do_convert_missing(data, convert_missing) if convert_dates: for i, fmt in enumerate(self._fmtlist): if any(fmt.startswith(date_fmt) for date_fmt in _date_formats): - data.iloc[:, i] = _stata_elapsed_date_to_datetime_vec( - data.iloc[:, i], fmt + data.isetitem( + i, _stata_elapsed_date_to_datetime_vec(data.iloc[:, i], fmt) ) if convert_categoricals and self._format_version > 108: @@ -1866,7 +1861,7 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra replacements[i] = replacement if replacements: for idx, value in replacements.items(): - data.iloc[:, idx] = value + data.isetitem(idx, value) return data def _insert_strls(self, data: DataFrame) -> DataFrame: @@ -1876,7 +1871,7 @@ def _insert_strls(self, data: DataFrame) -> DataFrame: if typ != "Q": continue # Wrap v_o in a string to allow uint64 values as keys on 32bit OS - data.iloc[:, i] = [self.GSO[str(k)] for k in data.iloc[:, i]] + data.isetitem(i, [self.GSO[str(k)] for k in data.iloc[:, i]]) return data def _do_select_columns(self, data: DataFrame, columns: Sequence[str]) -> DataFrame: diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 718f967f2f3d8..074033868635a 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -289,8 +289,6 @@ def test_read_expands_user_home_dir( ): reader(path) - # TODO(CoW-warn) avoid warnings in the stata reader code - @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") @pytest.mark.parametrize( "reader, module, path", [ diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 19d81d50f5774..6e76c8fef6bc3 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -32,11 +32,6 @@ read_stata, ) -# TODO(CoW-warn) avoid warnings in the stata reader code -pytestmark = pytest.mark.filterwarnings( - "ignore:Setting a value on a view:FutureWarning" -) - @pytest.fixture def mixed_frame(): @@ -140,7 +135,6 @@ def test_read_dta1(self, file, datapath): tm.assert_frame_equal(parsed, expected) - @pytest.mark.filterwarnings("always") def test_read_dta2(self, datapath): expected = DataFrame.from_records( [ @@ -183,13 +177,11 @@ def test_read_dta2(self, datapath): path2 = datapath("io", "data", "stata", "stata2_115.dta") path3 = datapath("io", "data", "stata", "stata2_117.dta") - # TODO(CoW-warn) avoid warnings in the stata reader code - # once fixed -> remove `raise_on_extra_warnings=False` again - with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False): + with tm.assert_produces_warning(UserWarning): parsed_114 = self.read_dta(path1) - with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False): + with tm.assert_produces_warning(UserWarning): parsed_115 = self.read_dta(path2) - with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False): + with tm.assert_produces_warning(UserWarning): parsed_117 = self.read_dta(path3) # FIXME: don't leave commented-out # 113 is buggy due to limits of date format support in Stata