From 764acc7012c423684e980201825171d9b379f204 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Mon, 9 Dec 2019 23:07:01 +0200 Subject: [PATCH 1/2] STY: Underscores for long numbers --- pandas/core/arrays/datetimes.py | 22 +++++++++++----------- pandas/core/frame.py | 7 +++---- pandas/core/generic.py | 9 ++++----- pandas/core/series.py | 12 +++++------- pandas/core/tools/datetimes.py | 17 +++++++---------- pandas/core/window/ewm.py | 3 +-- 6 files changed, 31 insertions(+), 39 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c05eeb761abcf..47f68dae530cc 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -300,7 +300,7 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps # Timestamp.__richcmp__(DateTimeArray) operates pointwise # ensure that operations with numpy arrays defer to our implementation - __array_priority__ = 1000 + __array_priority__ = 1_000 # ----------------------------------------------------------------- # Constructors @@ -663,7 +663,7 @@ def __iter__(self): # convert in chunks of 10k for efficiency data = self.asi8 length = len(self) - chunksize = 10000 + chunksize = 10_000 chunks = int(length / chunksize) + 1 for i in range(chunks): start_i = i * chunksize @@ -1076,9 +1076,9 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): nonexistent, timedelta ): raise ValueError( - "The nonexistent argument must be one of 'raise'," - " 'NaT', 'shift_forward', 'shift_backward' or" - " a timedelta object" + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or " + "a timedelta object" ) if self.tz is not None: @@ -1151,7 +1151,7 @@ def normalize(self): """ if self.tz is None or timezones.is_utc(self.tz): not_null = ~self.isna() - DAY_NS = ccalendar.DAY_SECONDS * 1000000000 + DAY_NS = ccalendar.DAY_SECONDS * 1_000_000_000 new_values = self.asi8.copy() adjustment = new_values[not_null] % DAY_NS new_values[not_null] = new_values[not_null] - adjustment @@ -1767,13 +1767,13 @@ def to_julian_date(self): + np.floor(year / 4) - np.floor(year / 100) + np.floor(year / 400) - + 1721118.5 + + 1_721_118.5 + ( self.hour + self.minute / 60.0 - + self.second / 3600.0 - + self.microsecond / 3600.0 / 1e6 - + self.nanosecond / 3600.0 / 1e9 + + self.second / 3_600.0 + + self.microsecond / 3_600.0 / 1e6 + + self.nanosecond / 3_600.0 / 1e9 ) / 24.0 ) @@ -2031,7 +2031,7 @@ def maybe_convert_dtype(data, copy): # Note: without explicitly raising here, PeriodIndex # test_setops.test_join_does_not_recur fails raise TypeError( - "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead" + "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead" ) elif is_categorical_dtype(data): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 413d7a8f3afc0..245779c9408dc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -593,7 +593,6 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: users expect. display.max_columns remains in effect. GH3541, GH3573 """ - width, height = console.get_console_size() max_columns = get_option("display.max_columns") nb_columns = len(self.columns) @@ -2359,11 +2358,11 @@ def _non_verbose_repr(): def _sizeof_fmt(num, size_qualifier): # returns size in human readable format for x in ["bytes", "KB", "MB", "GB", "TB"]: - if num < 1024.0: + if num < 1_024.0: return "{num:3.1f}{size_q} {x}".format( num=num, size_q=size_qualifier, x=x ) - num /= 1024.0 + num /= 1_024.0 return "{num:3.1f}{size_q} {pb}".format( num=num, size_q=size_qualifier, pb="PB" ) @@ -3542,7 +3541,7 @@ def lookup(self, row_labels, col_labels): if n != len(col_labels): raise ValueError("Row labels must have same size as column labels") - thresh = 1000 + thresh = 1_000 if not self._is_mixed_type or n > thresh: values = self.values ridx = self.index.get_indexer(row_labels) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fcd160ed8d9a7..d27f06dad31a0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1931,7 +1931,7 @@ def empty(self) -> bool_t: # This is also set in IndexOpsMixin # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented - __array_priority__ = 1000 + __array_priority__ = 1_000 def __array__(self, dtype=None): return com.values_from_object(self) @@ -9165,7 +9165,6 @@ def truncate( 2016-01-10 23:59:58 1 2016-01-10 23:59:59 1 """ - if axis is None: axis = self._stat_axis_number axis = self._get_axis_number(axis) @@ -9391,9 +9390,9 @@ def tz_localize( nonexistent, timedelta ): raise ValueError( - "The nonexistent argument must be one of 'raise'," - " 'NaT', 'shift_forward', 'shift_backward' or" - " a timedelta object" + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or " + "a timedelta object" ) axis = self._get_axis_number(axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index 965736a097c21..3eabb70581827 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -197,9 +197,9 @@ def __init__( if is_empty_data(data) and dtype is None: # gh-17261 warnings.warn( - "The default dtype for empty Series will be 'object' instead" - " of 'float64' in a future version. Specify a dtype explicitly" - " to silence this warning.", + "The default dtype for empty Series will be 'object' instead " + "of 'float64' in a future version. Specify a dtype explicitly " + "to silence this warning.", DeprecationWarning, stacklevel=2, ) @@ -257,14 +257,13 @@ def __init__( raise AssertionError( "Cannot pass both SingleBlockManager " "`data` argument and a different " - "`index` argument. `copy` must " - "be False." + "`index` argument. `copy` must be False." ) elif is_extension_array_dtype(data): pass elif isinstance(data, (set, frozenset)): - raise TypeError(f"{repr(type(data).__name__)} type is unordered") + raise TypeError(f"'{type(data).__name__}' type is unordered") elif isinstance(data, ABCSparseArray): # handle sparse passed here (and force conversion) data = data.to_dense() @@ -3721,7 +3720,6 @@ def _reduce( If we have an ndarray as a value, then simply perform the operation, otherwise delegate to the object. """ - delegate = self._values if axis is not None: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index e9e5959454807..65292fb23793c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -98,7 +98,7 @@ def should_cache( if len(arg) <= 50: return False - if len(arg) <= 5000: + if len(arg) <= 5_000: check_count = int(len(arg) * 0.1) else: check_count = 500 @@ -832,25 +832,22 @@ def coerce(values): return values values = ( - coerce(arg[unit_rev["year"]]) * 10000 + coerce(arg[unit_rev["year"]]) * 10_000 + coerce(arg[unit_rev["month"]]) * 100 + coerce(arg[unit_rev["day"]]) ) try: values = to_datetime(values, format="%Y%m%d", errors=errors, utc=tz) - except (TypeError, ValueError) as e: - raise ValueError("cannot assemble the datetimes: {error}".format(error=e)) + except (TypeError, ValueError) as err: + raise ValueError(f"cannot assemble the datetimes: {err}") for u in ["h", "m", "s", "ms", "us", "ns"]: value = unit_rev.get(u) if value is not None and value in arg: try: values += to_timedelta(coerce(arg[value]), unit=u, errors=errors) - except (TypeError, ValueError) as e: - raise ValueError( - "cannot assemble the datetimes [{value}]: " - "{error}".format(value=value, error=e) - ) + except (TypeError, ValueError) as err: + raise ValueError(f"cannot assemble the datetimes [{value}]: {err}") return values @@ -870,7 +867,7 @@ def calc(carg): # calculate the actual result carg = carg.astype(object) parsed = parsing.try_parse_year_month_day( - carg / 10000, carg / 100 % 100, carg % 100 + carg / 10_000, carg / 100 % 100, carg % 100 ) return tslib.array_to_datetime(parsed, errors=errors)[0] diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 8337318ac6bcc..baecba7e78384 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -231,8 +231,7 @@ def _apply(self, func, **kwargs): cfunc = getattr(window_aggregations, func, None) if cfunc is None: raise ValueError( - "we do not support this function " - f"in window_aggregations.{func}" + f"we do not support this function in window_aggregations.{func}" ) def func(arg): From 1cb729fa558245fcb1e8aa3edeb8108c750852a4 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Tue, 10 Dec 2019 13:54:06 +0200 Subject: [PATCH 2/2] Review fixes --- pandas/core/arrays/datetimes.py | 10 +++++----- pandas/core/frame.py | 6 +++--- pandas/core/generic.py | 2 +- pandas/core/tools/datetimes.py | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 47f68dae530cc..34e01e55d2028 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -300,7 +300,7 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps # Timestamp.__richcmp__(DateTimeArray) operates pointwise # ensure that operations with numpy arrays defer to our implementation - __array_priority__ = 1_000 + __array_priority__ = 1000 # ----------------------------------------------------------------- # Constructors @@ -663,7 +663,7 @@ def __iter__(self): # convert in chunks of 10k for efficiency data = self.asi8 length = len(self) - chunksize = 10_000 + chunksize = 10000 chunks = int(length / chunksize) + 1 for i in range(chunks): start_i = i * chunksize @@ -1771,9 +1771,9 @@ def to_julian_date(self): + ( self.hour + self.minute / 60.0 - + self.second / 3_600.0 - + self.microsecond / 3_600.0 / 1e6 - + self.nanosecond / 3_600.0 / 1e9 + + self.second / 3600.0 + + self.microsecond / 3600.0 / 1e6 + + self.nanosecond / 3600.0 / 1e9 ) / 24.0 ) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 245779c9408dc..b8d86d9e295fe 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2358,11 +2358,11 @@ def _non_verbose_repr(): def _sizeof_fmt(num, size_qualifier): # returns size in human readable format for x in ["bytes", "KB", "MB", "GB", "TB"]: - if num < 1_024.0: + if num < 1024.0: return "{num:3.1f}{size_q} {x}".format( num=num, size_q=size_qualifier, x=x ) - num /= 1_024.0 + num /= 1024.0 return "{num:3.1f}{size_q} {pb}".format( num=num, size_q=size_qualifier, pb="PB" ) @@ -3541,7 +3541,7 @@ def lookup(self, row_labels, col_labels): if n != len(col_labels): raise ValueError("Row labels must have same size as column labels") - thresh = 1_000 + thresh = 1000 if not self._is_mixed_type or n > thresh: values = self.values ridx = self.index.get_indexer(row_labels) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d27f06dad31a0..f88f37fac7157 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1931,7 +1931,7 @@ def empty(self) -> bool_t: # This is also set in IndexOpsMixin # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented - __array_priority__ = 1_000 + __array_priority__ = 1000 def __array__(self, dtype=None): return com.values_from_object(self) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 65292fb23793c..f2818a0b92e6b 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -98,7 +98,7 @@ def should_cache( if len(arg) <= 50: return False - if len(arg) <= 5_000: + if len(arg) <= 5000: check_count = int(len(arg) * 0.1) else: check_count = 500 @@ -832,7 +832,7 @@ def coerce(values): return values values = ( - coerce(arg[unit_rev["year"]]) * 10_000 + coerce(arg[unit_rev["year"]]) * 10000 + coerce(arg[unit_rev["month"]]) * 100 + coerce(arg[unit_rev["day"]]) ) @@ -867,7 +867,7 @@ def calc(carg): # calculate the actual result carg = carg.astype(object) parsed = parsing.try_parse_year_month_day( - carg / 10_000, carg / 100 % 100, carg % 100 + carg / 10000, carg / 100 % 100, carg % 100 ) return tslib.array_to_datetime(parsed, errors=errors)[0]