From 764acc7012c423684e980201825171d9b379f204 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Mon, 9 Dec 2019 23:07:01 +0200
Subject: [PATCH 1/2] STY: Underscores for long numbers

---
 pandas/core/arrays/datetimes.py | 22 +++++++++++-----------
 pandas/core/frame.py            |  7 +++----
 pandas/core/generic.py          |  9 ++++-----
 pandas/core/series.py           | 12 +++++-------
 pandas/core/tools/datetimes.py  | 17 +++++++----------
 pandas/core/window/ewm.py       |  3 +--
 6 files changed, 31 insertions(+), 39 deletions(-)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index c05eeb761abcf..47f68dae530cc 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -300,7 +300,7 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps
     #  Timestamp.__richcmp__(DateTimeArray) operates pointwise
 
     # ensure that operations with numpy arrays defer to our implementation
-    __array_priority__ = 1000
+    __array_priority__ = 1_000
 
     # -----------------------------------------------------------------
     # Constructors
@@ -663,7 +663,7 @@ def __iter__(self):
         # convert in chunks of 10k for efficiency
         data = self.asi8
         length = len(self)
-        chunksize = 10000
+        chunksize = 10_000
         chunks = int(length / chunksize) + 1
         for i in range(chunks):
             start_i = i * chunksize
@@ -1076,9 +1076,9 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"):
             nonexistent, timedelta
         ):
             raise ValueError(
-                "The nonexistent argument must be one of 'raise',"
-                " 'NaT', 'shift_forward', 'shift_backward' or"
-                " a timedelta object"
+                "The nonexistent argument must be one of 'raise', "
+                "'NaT', 'shift_forward', 'shift_backward' or "
+                "a timedelta object"
             )
 
         if self.tz is not None:
@@ -1151,7 +1151,7 @@ def normalize(self):
         """
         if self.tz is None or timezones.is_utc(self.tz):
             not_null = ~self.isna()
-            DAY_NS = ccalendar.DAY_SECONDS * 1000000000
+            DAY_NS = ccalendar.DAY_SECONDS * 1_000_000_000
             new_values = self.asi8.copy()
             adjustment = new_values[not_null] % DAY_NS
             new_values[not_null] = new_values[not_null] - adjustment
@@ -1767,13 +1767,13 @@ def to_julian_date(self):
             + np.floor(year / 4)
             - np.floor(year / 100)
             + np.floor(year / 400)
-            + 1721118.5
+            + 1_721_118.5
             + (
                 self.hour
                 + self.minute / 60.0
-                + self.second / 3600.0
-                + self.microsecond / 3600.0 / 1e6
-                + self.nanosecond / 3600.0 / 1e9
+                + self.second / 3_600.0
+                + self.microsecond / 3_600.0 / 1e6
+                + self.nanosecond / 3_600.0 / 1e9
             )
             / 24.0
         )
@@ -2031,7 +2031,7 @@ def maybe_convert_dtype(data, copy):
         # Note: without explicitly raising here, PeriodIndex
         #  test_setops.test_join_does_not_recur fails
         raise TypeError(
-            "Passing PeriodDtype data is invalid.  Use `data.to_timestamp()` instead"
+            "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead"
         )
 
     elif is_categorical_dtype(data):
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 413d7a8f3afc0..245779c9408dc 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -593,7 +593,6 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool:
         users expect. display.max_columns remains in effect.
         GH3541, GH3573
         """
-
         width, height = console.get_console_size()
         max_columns = get_option("display.max_columns")
         nb_columns = len(self.columns)
@@ -2359,11 +2358,11 @@ def _non_verbose_repr():
         def _sizeof_fmt(num, size_qualifier):
             # returns size in human readable format
             for x in ["bytes", "KB", "MB", "GB", "TB"]:
-                if num < 1024.0:
+                if num < 1_024.0:
                     return "{num:3.1f}{size_q} {x}".format(
                         num=num, size_q=size_qualifier, x=x
                     )
-                num /= 1024.0
+                num /= 1_024.0
             return "{num:3.1f}{size_q} {pb}".format(
                 num=num, size_q=size_qualifier, pb="PB"
             )
@@ -3542,7 +3541,7 @@ def lookup(self, row_labels, col_labels):
         if n != len(col_labels):
             raise ValueError("Row labels must have same size as column labels")
 
-        thresh = 1000
+        thresh = 1_000
         if not self._is_mixed_type or n > thresh:
             values = self.values
             ridx = self.index.get_indexer(row_labels)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index fcd160ed8d9a7..d27f06dad31a0 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1931,7 +1931,7 @@ def empty(self) -> bool_t:
 
     # This is also set in IndexOpsMixin
     # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
-    __array_priority__ = 1000
+    __array_priority__ = 1_000
 
     def __array__(self, dtype=None):
         return com.values_from_object(self)
@@ -9165,7 +9165,6 @@ def truncate(
         2016-01-10 23:59:58  1
         2016-01-10 23:59:59  1
         """
-
         if axis is None:
             axis = self._stat_axis_number
         axis = self._get_axis_number(axis)
@@ -9391,9 +9390,9 @@ def tz_localize(
             nonexistent, timedelta
         ):
             raise ValueError(
-                "The nonexistent argument must be one of 'raise',"
-                " 'NaT', 'shift_forward', 'shift_backward' or"
-                " a timedelta object"
+                "The nonexistent argument must be one of 'raise', "
+                "'NaT', 'shift_forward', 'shift_backward' or "
+                "a timedelta object"
             )
 
         axis = self._get_axis_number(axis)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 965736a097c21..3eabb70581827 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -197,9 +197,9 @@ def __init__(
             if is_empty_data(data) and dtype is None:
                 # gh-17261
                 warnings.warn(
-                    "The default dtype for empty Series will be 'object' instead"
-                    " of 'float64' in a future version. Specify a dtype explicitly"
-                    " to silence this warning.",
+                    "The default dtype for empty Series will be 'object' instead "
+                    "of 'float64' in a future version. Specify a dtype explicitly "
+                    "to silence this warning.",
                     DeprecationWarning,
                     stacklevel=2,
                 )
@@ -257,14 +257,13 @@ def __init__(
                     raise AssertionError(
                         "Cannot pass both SingleBlockManager "
                         "`data` argument and a different "
-                        "`index` argument.  `copy` must "
-                        "be False."
+                        "`index` argument. `copy` must be False."
                     )
 
             elif is_extension_array_dtype(data):
                 pass
             elif isinstance(data, (set, frozenset)):
-                raise TypeError(f"{repr(type(data).__name__)} type is unordered")
+                raise TypeError(f"'{type(data).__name__}' type is unordered")
             elif isinstance(data, ABCSparseArray):
                 # handle sparse passed here (and force conversion)
                 data = data.to_dense()
@@ -3721,7 +3720,6 @@ def _reduce(
         If we have an ndarray as a value, then simply perform the operation,
         otherwise delegate to the object.
         """
-
         delegate = self._values
 
         if axis is not None:
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index e9e5959454807..65292fb23793c 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -98,7 +98,7 @@ def should_cache(
         if len(arg) <= 50:
             return False
 
-        if len(arg) <= 5000:
+        if len(arg) <= 5_000:
             check_count = int(len(arg) * 0.1)
         else:
             check_count = 500
@@ -832,25 +832,22 @@ def coerce(values):
         return values
 
     values = (
-        coerce(arg[unit_rev["year"]]) * 10000
+        coerce(arg[unit_rev["year"]]) * 10_000
         + coerce(arg[unit_rev["month"]]) * 100
         + coerce(arg[unit_rev["day"]])
     )
     try:
         values = to_datetime(values, format="%Y%m%d", errors=errors, utc=tz)
-    except (TypeError, ValueError) as e:
-        raise ValueError("cannot assemble the datetimes: {error}".format(error=e))
+    except (TypeError, ValueError) as err:
+        raise ValueError(f"cannot assemble the datetimes: {err}")
 
     for u in ["h", "m", "s", "ms", "us", "ns"]:
         value = unit_rev.get(u)
         if value is not None and value in arg:
             try:
                 values += to_timedelta(coerce(arg[value]), unit=u, errors=errors)
-            except (TypeError, ValueError) as e:
-                raise ValueError(
-                    "cannot assemble the datetimes [{value}]: "
-                    "{error}".format(value=value, error=e)
-                )
+            except (TypeError, ValueError) as err:
+                raise ValueError(f"cannot assemble the datetimes [{value}]: {err}")
     return values
 
 
@@ -870,7 +867,7 @@ def calc(carg):
         # calculate the actual result
         carg = carg.astype(object)
         parsed = parsing.try_parse_year_month_day(
-            carg / 10000, carg / 100 % 100, carg % 100
+            carg / 10_000, carg / 100 % 100, carg % 100
         )
         return tslib.array_to_datetime(parsed, errors=errors)[0]
 
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
index 8337318ac6bcc..baecba7e78384 100644
--- a/pandas/core/window/ewm.py
+++ b/pandas/core/window/ewm.py
@@ -231,8 +231,7 @@ def _apply(self, func, **kwargs):
                 cfunc = getattr(window_aggregations, func, None)
                 if cfunc is None:
                     raise ValueError(
-                        "we do not support this function "
-                        f"in window_aggregations.{func}"
+                        f"we do not support this function in window_aggregations.{func}"
                     )
 
                 def func(arg):

From 1cb729fa558245fcb1e8aa3edeb8108c750852a4 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Tue, 10 Dec 2019 13:54:06 +0200
Subject: [PATCH 2/2] Review fixes

---
 pandas/core/arrays/datetimes.py | 10 +++++-----
 pandas/core/frame.py            |  6 +++---
 pandas/core/generic.py          |  2 +-
 pandas/core/tools/datetimes.py  |  6 +++---
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 47f68dae530cc..34e01e55d2028 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -300,7 +300,7 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps
     #  Timestamp.__richcmp__(DateTimeArray) operates pointwise
 
     # ensure that operations with numpy arrays defer to our implementation
-    __array_priority__ = 1_000
+    __array_priority__ = 1000
 
     # -----------------------------------------------------------------
     # Constructors
@@ -663,7 +663,7 @@ def __iter__(self):
         # convert in chunks of 10k for efficiency
         data = self.asi8
         length = len(self)
-        chunksize = 10_000
+        chunksize = 10000
         chunks = int(length / chunksize) + 1
         for i in range(chunks):
             start_i = i * chunksize
@@ -1771,9 +1771,9 @@ def to_julian_date(self):
             + (
                 self.hour
                 + self.minute / 60.0
-                + self.second / 3_600.0
-                + self.microsecond / 3_600.0 / 1e6
-                + self.nanosecond / 3_600.0 / 1e9
+                + self.second / 3600.0
+                + self.microsecond / 3600.0 / 1e6
+                + self.nanosecond / 3600.0 / 1e9
             )
             / 24.0
         )
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 245779c9408dc..b8d86d9e295fe 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2358,11 +2358,11 @@ def _non_verbose_repr():
         def _sizeof_fmt(num, size_qualifier):
             # returns size in human readable format
             for x in ["bytes", "KB", "MB", "GB", "TB"]:
-                if num < 1_024.0:
+                if num < 1024.0:
                     return "{num:3.1f}{size_q} {x}".format(
                         num=num, size_q=size_qualifier, x=x
                     )
-                num /= 1_024.0
+                num /= 1024.0
             return "{num:3.1f}{size_q} {pb}".format(
                 num=num, size_q=size_qualifier, pb="PB"
             )
@@ -3541,7 +3541,7 @@ def lookup(self, row_labels, col_labels):
         if n != len(col_labels):
             raise ValueError("Row labels must have same size as column labels")
 
-        thresh = 1_000
+        thresh = 1000
         if not self._is_mixed_type or n > thresh:
             values = self.values
             ridx = self.index.get_indexer(row_labels)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index d27f06dad31a0..f88f37fac7157 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1931,7 +1931,7 @@ def empty(self) -> bool_t:
 
     # This is also set in IndexOpsMixin
     # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
-    __array_priority__ = 1_000
+    __array_priority__ = 1000
 
     def __array__(self, dtype=None):
         return com.values_from_object(self)
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 65292fb23793c..f2818a0b92e6b 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -98,7 +98,7 @@ def should_cache(
         if len(arg) <= 50:
             return False
 
-        if len(arg) <= 5_000:
+        if len(arg) <= 5000:
             check_count = int(len(arg) * 0.1)
         else:
             check_count = 500
@@ -832,7 +832,7 @@ def coerce(values):
         return values
 
     values = (
-        coerce(arg[unit_rev["year"]]) * 10_000
+        coerce(arg[unit_rev["year"]]) * 10000
         + coerce(arg[unit_rev["month"]]) * 100
         + coerce(arg[unit_rev["day"]])
     )
@@ -867,7 +867,7 @@ def calc(carg):
         # calculate the actual result
         carg = carg.astype(object)
         parsed = parsing.try_parse_year_month_day(
-            carg / 10_000, carg / 100 % 100, carg % 100
+            carg / 10000, carg / 100 % 100, carg % 100
         )
         return tslib.array_to_datetime(parsed, errors=errors)[0]