Remove Python2 numeric relicts (#38916)

eumiro · web-flow · commit f9ce9d6dcc28 · 2021-01-03T12:09:49.000-05:00
diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
@@ -122,18 +122,18 @@ def setup(self, op):
         n_rows = 500
 
         # construct dataframe with 2 blocks
-        arr1 = np.random.randn(n_rows, int(n_cols / 2)).astype("f8")
-        arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("f4")
+        arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8")
+        arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4")
         df = pd.concat(
             [pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True
         )
         # should already be the case, but just to be sure
         df._consolidate_inplace()
 
         # TODO: GH#33198 the setting here shoudlnt need two steps
-        arr1 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
-        arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("i8")
-        arr3 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
+        arr1 = np.random.randn(n_rows, n_cols // 4).astype("f8")
+        arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
+        arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")
         df2 = pd.concat(
             [pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
             axis=1,
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -263,7 +263,7 @@ class Repr:
     def setup(self):
         nrows = 10000
         data = np.random.randn(nrows, 10)
-        arrays = np.tile(np.random.randn(3, int(nrows / 100)), 100)
+        arrays = np.tile(np.random.randn(3, nrows // 100), 100)
         idx = MultiIndex.from_arrays(arrays)
         self.df3 = DataFrame(data, index=idx)
         self.df4 = DataFrame(data, index=np.random.randn(nrows))
@@ -648,9 +648,9 @@ class Describe:
     def setup(self):
         self.df = DataFrame(
             {
-                "a": np.random.randint(0, 100, int(1e6)),
-                "b": np.random.randint(0, 100, int(1e6)),
-                "c": np.random.randint(0, 100, int(1e6)),
+                "a": np.random.randint(0, 100, 10 ** 6),
+                "b": np.random.randint(0, 100, 10 ** 6),
+                "c": np.random.randint(0, 100, 10 ** 6),
             }
         )
 
diff --git a/asv_bench/benchmarks/hash_functions.py b/asv_bench/benchmarks/hash_functions.py
@@ -103,9 +103,9 @@ class Float64GroupIndex:
     # GH28303
     def setup(self):
         self.df = pd.date_range(
-            start="1/1/2018", end="1/2/2018", periods=1e6
+            start="1/1/2018", end="1/2/2018", periods=10 ** 6
         ).to_frame()
-        self.group_index = np.round(self.df.index.astype(int) / 1e9)
+        self.group_index = np.round(self.df.index.astype(int) / 10 ** 9)
 
     def time_groupby(self):
         self.df.groupby(self.group_index).last()
diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py
@@ -42,7 +42,7 @@ class ToNumericDowncast:
     ]
 
     N = 500000
-    N2 = int(N / 2)
+    N2 = N // 2
 
     data_dict = {
         "string-int": ["1"] * N2 + [2] * N2,
diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
@@ -158,7 +158,7 @@ def setup(self):
         daily_dates = date_index.to_period("D").to_timestamp("S", "S")
         self.fracofday = date_index.values - daily_dates.values
         self.fracofday = self.fracofday.astype("timedelta64[ns]")
-        self.fracofday = self.fracofday.astype(np.float64) / 86400000000000.0
+        self.fracofday = self.fracofday.astype(np.float64) / 86_400_000_000_000
         self.fracofday = Series(self.fracofday, daily_dates)
         index = date_range(date_index.min(), date_index.max(), freq="D")
         self.temp = Series(1.0, index)[self.fracofday.index]
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -171,7 +171,7 @@ class PeakMemFixedWindowMinMax:
     params = ["min", "max"]
 
     def setup(self, operation):
-        N = int(1e6)
+        N = 10 ** 6
         arr = np.random.random(N)
         self.roll = pd.Series(arr).rolling(2)
 
@@ -233,7 +233,7 @@ class GroupbyLargeGroups:
 
     def setup(self):
         N = 100000
-        self.df = pd.DataFrame({"A": [1, 2] * int(N / 2), "B": np.random.randn(N)})
+        self.df = pd.DataFrame({"A": [1, 2] * (N // 2), "B": np.random.randn(N)})
 
     def time_rolling_multiindex_creation(self):
         self.df.groupby("A").rolling(3).mean()
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -284,7 +284,7 @@ def time_dir_strings(self):
 class SeriesGetattr:
     # https://github.com/pandas-dev/pandas/issues/19764
     def setup(self):
-        self.s = Series(1, index=date_range("2012-01-01", freq="s", periods=int(1e6)))
+        self.s = Series(1, index=date_range("2012-01-01", freq="s", periods=10 ** 6))
 
     def time_series_datetimeindex_repr(self):
         getattr(self.s, "a", None)
diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
@@ -346,7 +346,7 @@ def time_iso8601_tz_spaceformat(self):
 class ToDatetimeNONISO8601:
     def setup(self):
         N = 10000
-        half = int(N / 2)
+        half = N // 2
         ts_string_1 = "March 1, 2018 12:00:00+0400"
         ts_string_2 = "March 1, 2018 12:00:00+0500"
         self.same_offset = [ts_string_1] * N
@@ -376,7 +376,7 @@ def setup(self):
         self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N
         self.diff_offset = [
             f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10)
-        ] * int(N / 10)
+        ] * (N // 10)
 
     def time_exact(self):
         to_datetime(self.s2, format="%d%b%y")
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -901,7 +901,7 @@ def _create_missing_idx(nrows, ncols, density, random_state=None):
         random_state = np.random.RandomState(random_state)
 
     # below is cribbed from scipy.sparse
-    size = int(np.round((1 - density) * nrows * ncols))
+    size = round((1 - density) * nrows * ncols)
     # generate a few more to ensure unique values
     min_rows = 5
     fac = 1.02
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -835,7 +835,7 @@ def value_counts(
         result = result.sort_values(ascending=ascending)
 
     if normalize:
-        result = result / float(counts.sum())
+        result = result / counts.sum()
 
     return result
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -2214,7 +2214,7 @@ def describe(self):
             A dataframe with frequency and counts by category.
         """
         counts = self.value_counts(dropna=False)
-        freqs = counts / float(counts.sum())
+        freqs = counts / counts.sum()
 
         from pandas.core.reshape.concat import concat
 
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -572,7 +572,7 @@ def __iter__(self):
             data = self.asi8
             length = len(self)
             chunksize = 10000
-            chunks = int(length / chunksize) + 1
+            chunks = (length // chunksize) + 1
             for i in range(chunks):
                 start_i = i * chunksize
                 end_i = min((i + 1) * chunksize, length)
@@ -1847,12 +1847,12 @@ def to_julian_date(self):
             + 1_721_118.5
             + (
                 self.hour
-                + self.minute / 60.0
-                + self.second / 3600.0
-                + self.microsecond / 3600.0 / 1e6
-                + self.nanosecond / 3600.0 / 1e9
+                + self.minute / 60
+                + self.second / 3600
+                + self.microsecond / 3600 / 10 ** 6
+                + self.nanosecond / 3600 / 10 ** 9
             )
-            / 24.0
+            / 24
         )
 
     # -----------------------------------------------------------------
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -578,7 +578,7 @@ def density(self):
         >>> s.density
         0.6
         """
-        return float(self.sp_index.npoints) / float(self.sp_index.length)
+        return self.sp_index.npoints / self.sp_index.length
 
     @property
     def npoints(self) -> int:
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -338,7 +338,7 @@ def __iter__(self):
             data = self.asi8
             length = len(self)
             chunksize = 10000
-            chunks = int(length / chunksize) + 1
+            chunks = (length // chunksize) + 1
             for i in range(chunks):
                 start_i = i * chunksize
                 end_i = min((i + 1) * chunksize, length)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -5336,7 +5336,7 @@ def sample(
         elif n is not None and frac is None and n % 1 != 0:
             raise ValueError("Only integers accepted as `n` values")
         elif n is None and frac is not None:
-            n = int(round(frac * axis_length))
+            n = round(frac * axis_length)
         elif n is not None and frac is not None:
             raise ValueError("Please enter a value for `frac` OR `n`, not both")
 
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -122,7 +122,7 @@ def should_cache(
             return False
 
         if len(arg) <= 5000:
-            check_count = int(len(arg) * 0.1)
+            check_count = len(arg) // 10
         else:
             check_count = 500
     else:
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
@@ -61,7 +61,7 @@ def get_center_of_mass(
     elif span is not None:
         if span < 1:
             raise ValueError("span must satisfy: span >= 1")
-        comass = (span - 1) / 2.0
+        comass = (span - 1) / 2
     elif halflife is not None:
         if halflife <= 0:
             raise ValueError("halflife must satisfy: halflife > 0")
@@ -70,7 +70,7 @@ def get_center_of_mass(
     elif alpha is not None:
         if alpha <= 0 or alpha > 1:
             raise ValueError("alpha must satisfy: 0 < alpha <= 1")
-        comass = (1.0 - alpha) / alpha
+        comass = (1 - alpha) / alpha
     else:
         raise ValueError("Must pass one of comass, span, halflife, or alpha")
 
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -860,7 +860,7 @@ def _value_with_fmt(self, val):
         elif isinstance(val, datetime.date):
             fmt = self.date_format
         elif isinstance(val, datetime.timedelta):
-            val = val.total_seconds() / float(86400)
+            val = val.total_seconds() / 86400
             fmt = "0"
         else:
             val = str(val)
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -282,7 +282,7 @@ def _generate_multiindex_header_rows(self) -> Iterator[List[Label]]:
 
     def _save_body(self) -> None:
         nrows = len(self.data_index)
-        chunks = int(nrows / self.chunksize) + 1
+        chunks = (nrows // self.chunksize) + 1
         for i in range(chunks):
             start_i = i * self.chunksize
             end_i = min(start_i + self.chunksize, nrows)
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1637,7 +1637,7 @@ def is_dates_only(
 
     values_int = values.asi8
     consider_values = values_int != iNaT
-    one_day_nanos = 86400 * 1e9
+    one_day_nanos = 86400 * 10 ** 9
     even_days = (
         np.logical_and(consider_values, values_int % int(one_day_nanos) != 0).sum() == 0
     )
@@ -1741,7 +1741,7 @@ def get_format_timedelta64(
 
     consider_values = values_int != iNaT
 
-    one_day_nanos = 86400 * 1e9
+    one_day_nanos = 86400 * 10 ** 9
     even_days = (
         np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0
     )
diff --git a/pandas/io/formats/string.py b/pandas/io/formats/string.py
@@ -160,7 +160,7 @@ def _fit_strcols_to_terminal_width(self, strcols: List[List[str]]) -> str:
         counter = 0
         while adj_dif > 0 and n_cols > 1:
             counter += 1
-            mid = int(round(n_cols / 2.0))
+            mid = round(n_cols / 2)
             mid_ix = col_lens.index[mid]
             col_len = col_lens[mid_ix]
             # adjoin adds one
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -905,7 +905,7 @@ def insert(self, chunksize: Optional[int] = None, method: Optional[str] = None):
         elif chunksize == 0:
             raise ValueError("chunksize argument should be non-zero")
 
-        chunks = int(nrows / chunksize) + 1
+        chunks = (nrows // chunksize) + 1
 
         with self.pd_sql.run_transaction() as conn:
             for i in range(chunks):
diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py
@@ -38,7 +38,7 @@
 SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR
 SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY
 
-MUSEC_PER_DAY = 1e6 * SEC_PER_DAY
+MUSEC_PER_DAY = 10 ** 6 * SEC_PER_DAY
 
 _mpl_units = {}  # Cache for units overwritten by us
 
@@ -116,7 +116,7 @@ def deregister():
 
 
 def _to_ordinalf(tm: pydt.time) -> float:
-    tot_sec = tm.hour * 3600 + tm.minute * 60 + tm.second + float(tm.microsecond / 1e6)
+    tot_sec = tm.hour * 3600 + tm.minute * 60 + tm.second + tm.microsecond / 10 ** 6
     return tot_sec
 
 
@@ -182,7 +182,7 @@ def __call__(self, x, pos=0) -> str:
         """
         fmt = "%H:%M:%S.%f"
         s = int(x)
-        msus = int(round((x - s) * 1e6))
+        msus = round((x - s) * 10 ** 6)
         ms = msus // 1000
         us = msus % 1000
         m, s = divmod(s, 60)
@@ -429,7 +429,7 @@ def _from_ordinal(x, tz: Optional[tzinfo] = None) -> datetime:
     hour, remainder = divmod(24 * remainder, 1)
     minute, remainder = divmod(60 * remainder, 1)
     second, remainder = divmod(60 * remainder, 1)
-    microsecond = int(1e6 * remainder)
+    microsecond = int(1_000_000 * remainder)
     if microsecond < 10:
         microsecond = 0  # compensate for rounding errors
     dt = datetime(
@@ -439,7 +439,7 @@ def _from_ordinal(x, tz: Optional[tzinfo] = None) -> datetime:
         dt = dt.astimezone(tz)
 
     if microsecond > 999990:  # compensate for rounding errors
-        dt += timedelta(microseconds=1e6 - microsecond)
+        dt += timedelta(microseconds=1_000_000 - microsecond)
 
     return dt
 
@@ -611,27 +611,27 @@ def _second_finder(label_interval):
             info_fmt[day_start] = "%H:%M:%S\n%d-%b"
             info_fmt[year_start] = "%H:%M:%S\n%d-%b\n%Y"
 
-        if span < periodsperday / 12000.0:
+        if span < periodsperday / 12000:
             _second_finder(1)
-        elif span < periodsperday / 6000.0:
+        elif span < periodsperday / 6000:
             _second_finder(2)
-        elif span < periodsperday / 2400.0:
+        elif span < periodsperday / 2400:
             _second_finder(5)
-        elif span < periodsperday / 1200.0:
+        elif span < periodsperday / 1200:
             _second_finder(10)
-        elif span < periodsperday / 800.0:
+        elif span < periodsperday / 800:
             _second_finder(15)
-        elif span < periodsperday / 400.0:
+        elif span < periodsperday / 400:
             _second_finder(30)
-        elif span < periodsperday / 150.0:
+        elif span < periodsperday / 150:
             _minute_finder(1)
-        elif span < periodsperday / 70.0:
+        elif span < periodsperday / 70:
             _minute_finder(2)
-        elif span < periodsperday / 24.0:
+        elif span < periodsperday / 24:
             _minute_finder(5)
-        elif span < periodsperday / 12.0:
+        elif span < periodsperday / 12:
             _minute_finder(15)
-        elif span < periodsperday / 6.0:
+        elif span < periodsperday / 6:
             _minute_finder(30)
         elif span < periodsperday / 2.5:
             _hour_finder(1, False)
@@ -1058,7 +1058,7 @@ def format_timedelta_ticks(x, pos, n_decimals: int) -> str:
         """
         Convert seconds to 'D days HH:MM:SS.F'
         """
-        s, ns = divmod(x, 1e9)
+        s, ns = divmod(x, 10 ** 9)
         m, s = divmod(s, 60)
         h, m = divmod(m, 60)
         d, h = divmod(h, 24)
@@ -1072,7 +1072,7 @@ def format_timedelta_ticks(x, pos, n_decimals: int) -> str:
 
     def __call__(self, x, pos=0) -> str:
         (vmin, vmax) = tuple(self.axis.get_view_interval())
-        n_decimals = int(np.ceil(np.log10(100 * 1e9 / abs(vmax - vmin))))
+        n_decimals = int(np.ceil(np.log10(100 * 10 ** 9 / abs(vmax - vmin))))
         if n_decimals > 9:
             n_decimals = 9
         return self.format_timedelta_ticks(x, pos, n_decimals)
diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py
diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py
diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py
diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py
diff --git a/pandas/tests/window/moments/test_moments_rolling_quantile.py b/pandas/tests/window/moments/test_moments_rolling_quantile.py

Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ class ToNumericDowncast:`
`42`	`42`	`]`
`43`	`43`
`44`	`44`	`N = 500000`
`45`		`- N2 = int(N / 2)`
	`45`	`+ N2 = N // 2`
`46`	`46`
`47`	`47`	`data_dict = {`
`48`	`48`	`"string-int": ["1"] * N2 + [2] * N2,`
Original file line number	Diff line number	Diff line change
`@@ -1637,7 +1637,7 @@ def is_dates_only(`
`1637`	`1637`
`1638`	`1638`	`values_int = values.asi8`
`1639`	`1639`	`consider_values = values_int != iNaT`
`1640`		`- one_day_nanos = 86400 * 1e9`
	`1640`	`+ one_day_nanos = 86400 * 10 ** 9`
`1641`	`1641`	`even_days = (`
`1642`	`1642`	`np.logical_and(consider_values, values_int % int(one_day_nanos) != 0).sum() == 0`
`1643`	`1643`	`)`
`@@ -1741,7 +1741,7 @@ def get_format_timedelta64(`
`1741`	`1741`
`1742`	`1742`	`consider_values = values_int != iNaT`
`1743`	`1743`
`1744`		`- one_day_nanos = 86400 * 1e9`
	`1744`	`+ one_day_nanos = 86400 * 10 ** 9`
`1745`	`1745`	`even_days = (`
`1746`	`1746`	`np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0`
`1747`	`1747`	`)`