CLN: F-string formatting in pandas/_libs/*.pyx (pandas-dev#29527)

ShaharNaveh · proost · commit 8462998a1125 · 2019-12-20T01:10:56.000+09:00
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -753,8 +753,7 @@ def group_quantile(ndarray[float64_t] out,
     assert values.shape[0] == N
 
     if not (0 <= q <= 1):
-        raise ValueError("'q' must be between 0 and 1. Got"
-                         " '{}' instead".format(q))
+        raise ValueError(f"'q' must be between 0 and 1. Got '{q}' instead")
 
     inter_methods = {
         'linear': INTERPOLATION_LINEAR,
diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
@@ -47,8 +47,8 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
     k = <bytes>key.encode(encoding)
     kb = <uint8_t *>k
     if len(k) != 16:
-        raise ValueError("key should be a 16-byte string encoded, "
-                         "got {key} (len {klen})".format(key=k, klen=len(k)))
+        raise ValueError(f"key should be a 16-byte string encoded, "
+                         f"got {k} (len {len(k)})")
 
     n = len(arr)
 
@@ -67,9 +67,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
             data = <bytes>str(val).encode(encoding)
 
         else:
-            raise TypeError("{val} of type {typ} is not a valid type "
-                            "for hashing, must be string or null"
-                            .format(val=val, typ=type(val)))
+            raise TypeError(f"{val} of type {type(val)} is not a valid type "
+                            f"for hashing, must be string or null"
+                            )
 
         l = len(data)
         lens[i] = l
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -109,7 +109,7 @@ cdef class IndexEngine:
             Py_ssize_t loc
 
         if is_definitely_invalid_key(val):
-            raise TypeError("'{val}' is an invalid key".format(val=val))
+            raise TypeError(f"'{val}' is an invalid key")
 
         if self.over_size_threshold and self.is_monotonic_increasing:
             if not self.is_unique:
@@ -556,8 +556,8 @@ cpdef convert_scalar(ndarray arr, object value):
             pass
         elif value is None or value != value:
             return np.datetime64("NaT", "ns")
-        raise ValueError("cannot set a Timestamp with a non-timestamp {typ}"
-                         .format(typ=type(value).__name__))
+        raise ValueError(f"cannot set a Timestamp with a non-timestamp "
+                         f"{type(value).__name__}")
 
     elif arr.descr.type_num == NPY_TIMEDELTA:
         if util.is_array(value):
@@ -573,8 +573,8 @@ cpdef convert_scalar(ndarray arr, object value):
             pass
         elif value is None or value != value:
             return np.timedelta64("NaT", "ns")
-        raise ValueError("cannot set a Timedelta with a non-timedelta {typ}"
-                         .format(typ=type(value).__name__))
+        raise ValueError(f"cannot set a Timedelta with a non-timedelta "
+                         f"{type(value).__name__}")
 
     if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and
             not issubclass(arr.dtype.type, np.bool_)):
@@ -677,7 +677,7 @@ cdef class BaseMultiIndexCodesEngine:
             # Index._get_fill_indexer), sort (integer representations of) keys:
             order = np.argsort(lab_ints)
             lab_ints = lab_ints[order]
-            indexer = (getattr(self._base, 'get_{}_indexer'.format(method))
+            indexer = (getattr(self._base, f'get_{method}_indexer')
                        (self, lab_ints, limit=limit))
             indexer = indexer[order]
         else:
@@ -687,7 +687,7 @@ cdef class BaseMultiIndexCodesEngine:
 
     def get_loc(self, object key):
         if is_definitely_invalid_key(key):
-            raise TypeError("'{key}' is an invalid key".format(key=key))
+            raise TypeError(f"'{key}' is an invalid key")
         if not isinstance(key, tuple):
             raise KeyError(key)
         try:
diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
@@ -61,7 +61,7 @@ cdef class BlockPlacement:
         else:
             v = self._as_array
 
-        return '%s(%r)' % (self.__class__.__name__, v)
+        return f'{self.__class__.__name__}({v})'
 
     def __repr__(self) -> str:
         return str(self)
diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -179,8 +179,8 @@ cdef class IntervalMixin:
             When `other` is not closed exactly the same as self.
         """
         if self.closed != other.closed:
-            msg = "'{}.closed' is '{}', expected '{}'."
-            raise ValueError(msg.format(name, other.closed, self.closed))
+            msg = f"'{name}.closed' is '{other.closed}', expected '{self.closed}'."
+            raise ValueError(msg)
 
 
 cdef _interval_like(other):
@@ -308,17 +308,16 @@ cdef class Interval(IntervalMixin):
         self._validate_endpoint(right)
 
         if closed not in _VALID_CLOSED:
-            msg = "invalid option for 'closed': {closed}".format(closed=closed)
+            msg = f"invalid option for 'closed': {closed}"
             raise ValueError(msg)
         if not left <= right:
             raise ValueError('left side of interval must be <= right side')
         if (isinstance(left, Timestamp) and
                 not tz_compare(left.tzinfo, right.tzinfo)):
             # GH 18538
-            msg = ("left and right must have the same time zone, got "
-                   "'{left_tz}' and '{right_tz}'")
-            raise ValueError(msg.format(left_tz=left.tzinfo,
-                                        right_tz=right.tzinfo))
+            msg = (f"left and right must have the same time zone, got "
+                   f"'{left.tzinfo}' and '{right.tzinfo}'")
+            raise ValueError(msg)
         self.left = left
         self.right = right
         self.closed = closed
@@ -359,8 +358,7 @@ cdef class Interval(IntervalMixin):
             name = type(self).__name__
             other = type(other).__name__
             op_str = {Py_LT: '<', Py_LE: '<=', Py_GT: '>', Py_GE: '>='}[op]
-            raise TypeError('unorderable types: {name}() {op} {other}()'
-                            .format(name=name, op=op_str, other=other))
+            raise TypeError(f'unorderable types: {name}() {op_str} {other}()')
 
     def __reduce__(self):
         args = (self.left, self.right, self.closed)
@@ -381,17 +379,15 @@ cdef class Interval(IntervalMixin):
 
         left, right = self._repr_base()
         name = type(self).__name__
-        repr_str = '{name}({left!r}, {right!r}, closed={closed!r})'.format(
-            name=name, left=left, right=right, closed=self.closed)
+        repr_str = f'{name}({left!r}, {right!r}, closed={self.closed!r})'
         return repr_str
 
     def __str__(self) -> str:
 
         left, right = self._repr_base()
         start_symbol = '[' if self.closed_left else '('
         end_symbol = ']' if self.closed_right else ')'
-        return '{start}{left}, {right}{end}'.format(
-            start=start_symbol, left=left, right=right, end=end_symbol)
+        return f'{start_symbol}{left}, {right}{end_symbol}'
 
     def __add__(self, y):
         if isinstance(y, numbers.Number):
@@ -477,8 +473,8 @@ cdef class Interval(IntervalMixin):
         False
         """
         if not isinstance(other, Interval):
-            msg = '`other` must be an Interval, got {other}'
-            raise TypeError(msg.format(other=type(other).__name__))
+            msg = f'`other` must be an Interval, got {type(other).__name__}'
+            raise TypeError(msg)
 
         # equality is okay if both endpoints are closed (overlap at a point)
         op1 = le if (self.closed_left and other.closed_right) else lt
@@ -529,8 +525,8 @@ def intervals_to_interval_bounds(ndarray intervals,
             continue
 
         if not isinstance(interval, Interval):
-            raise TypeError("type {typ} with value {iv} is not an interval"
-                            .format(typ=type(interval), iv=interval))
+            raise TypeError(f"type {type(interval)} with value "
+                            f"{interval} is not an interval")
 
         left[i] = interval.left
         right[i] = interval.right
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -1219,8 +1219,7 @@ def infer_dtype(value: object, skipna: object=None) -> str:
                 return value
 
             # its ndarray like but we can't handle
-            raise ValueError("cannot infer type for {typ}"
-                             .format(typ=type(value)))
+            raise ValueError(f"cannot infer type for {type(value)}")
 
     else:
         if not isinstance(value, list):
@@ -1497,9 +1496,8 @@ cdef class Validator:
         return self.is_valid(value) or self.is_valid_null(value)
 
     cdef bint is_value_typed(self, object value) except -1:
-        raise NotImplementedError(
-            '{typ} child class must define is_value_typed'
-            .format(typ=type(self).__name__))
+        raise NotImplementedError(f'{type(self).__name__} child class '
+                                  f'must define is_value_typed')
 
     cdef bint is_valid_null(self, object value) except -1:
         return value is None or util.is_nan(value)
@@ -1635,9 +1633,8 @@ cdef class TemporalValidator(Validator):
         return self.is_value_typed(value) or self.is_valid_null(value)
 
     cdef bint is_valid_null(self, object value) except -1:
-        raise NotImplementedError(
-            '{typ} child class must define is_valid_null'
-            .format(typ=type(self).__name__))
+        raise NotImplementedError(f'{type(self).__name__} child class '
+                                  f'must define is_valid_null')
 
     cdef inline bint is_valid_skipna(self, object value) except -1:
         cdef:
@@ -1926,7 +1923,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
                     seen.float_ = True
             except (TypeError, ValueError) as e:
                 if not seen.coerce_numeric:
-                    raise type(e)(str(e) + " at position {pos}".format(pos=i))
+                    raise type(e)(str(e) + f" at position {i}")
                 elif "uint64" in str(e):  # Exception from check functions.
                     raise
 
diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx
@@ -123,8 +123,7 @@ def vec_compare(object[:] left, object[:] right, object op):
         int flag
 
     if n != <Py_ssize_t>len(right):
-        raise ValueError('Arrays were different lengths: {n} vs {nright}'
-                         .format(n=n, nright=len(right)))
+        raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')
 
     if op is operator.lt:
         flag = Py_LT
@@ -224,8 +223,7 @@ def vec_binop(object[:] left, object[:] right, object op):
         object[:] result
 
     if n != <Py_ssize_t>len(right):
-        raise ValueError('Arrays were different lengths: {n} vs {nright}'
-                         .format(n=n, nright=len(right)))
+        raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')
 
     result = np.empty(n, dtype=object)
 
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -637,19 +637,19 @@ cdef class TextReader:
                     source = zip_file.open(file_name)
 
                 elif len(zip_names) == 0:
-                    raise ValueError('Zero files found in compressed '
-                                     'zip file %s', source)
+                    raise ValueError(f'Zero files found in compressed '
+                                     f'zip file {source}')
                 else:
-                    raise ValueError('Multiple files found in compressed '
-                                     'zip file %s', str(zip_names))
+                    raise ValueError(f'Multiple files found in compressed '
+                                     f'zip file {zip_names}')
             elif self.compression == 'xz':
                 if isinstance(source, str):
                     source = _get_lzma_file(lzma)(source, 'rb')
                 else:
                     source = _get_lzma_file(lzma)(filename=source)
             else:
-                raise ValueError('Unrecognized compression type: %s' %
-                                 self.compression)
+                raise ValueError(f'Unrecognized compression type: '
+                                 f'{self.compression}')
 
             if b'utf-16' in (self.encoding or b''):
                 # we need to read utf-16 through UTF8Recoder.
@@ -703,8 +703,8 @@ cdef class TextReader:
             self.parser.cb_io = &buffer_rd_bytes
             self.parser.cb_cleanup = &del_rd_source
         else:
-            raise IOError('Expected file path name or file-like object,'
-                          ' got %s type' % type(source))
+            raise IOError(f'Expected file path name or file-like object, '
+                          f'got {type(source)} type')
 
     cdef _get_header(self):
         # header is now a list of lists, so field_count should use header[0]
@@ -744,8 +744,8 @@ cdef class TextReader:
                         msg = "[%s], len of %d," % (
                             ','.join(str(m) for m in msg), len(msg))
                     raise ParserError(
-                        'Passed header=%s but only %d lines in file'
-                        % (msg, self.parser.lines))
+                        f'Passed header={msg} but only '
+                        f'{self.parser.lines} lines in file')
 
                 else:
                     field_count = self.parser.line_fields[hr]
@@ -779,7 +779,7 @@ cdef class TextReader:
                     if not self.has_mi_columns and self.mangle_dupe_cols:
                         while count > 0:
                             counts[name] = count + 1
-                            name = '%s.%d' % (name, count)
+                            name = f'{name}.{count}'
                             count = counts.get(name, 0)
 
                     if old_name == '':
@@ -1662,7 +1662,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start,
         char *data
         ndarray result
 
-    result = np.empty(line_end - line_start, dtype='|S%d' % width)
+    result = np.empty(line_end - line_start, dtype=f'|S{width}')
     data = <char*>result.data
 
     with nogil:
@@ -2176,8 +2176,8 @@ def _concatenate_chunks(list chunks):
     if warning_columns:
         warning_names = ','.join(warning_columns)
         warning_message = " ".join([
-            "Columns (%s) have mixed types." % warning_names,
-            "Specify dtype option on import or set low_memory=False."
+            f"Columns ({warning_names}) have mixed types."
+            f"Specify dtype option on import or set low_memory=False."
           ])
         warnings.warn(warning_message, DtypeWarning, stacklevel=8)
     return result
diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -92,8 +92,7 @@ cdef class Reducer:
             if dummy.dtype != self.arr.dtype:
                 raise ValueError('Dummy array must be same dtype')
             if len(dummy) != self.chunksize:
-                raise ValueError('Dummy array must be length {length}'
-                                 .format(length=self.chunksize))
+                raise ValueError(f'Dummy array must be length {self.chunksize}')
 
         return dummy, typ, index, ityp
 
diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
@@ -53,7 +53,7 @@ cdef class IntIndex(SparseIndex):
 
     def __repr__(self) -> str:
         output = 'IntIndex\n'
-        output += 'Indices: %s\n' % repr(self.indices)
+        output += f'Indices: {repr(self.indices)}\n'
         return output
 
     @property
@@ -72,9 +72,8 @@ cdef class IntIndex(SparseIndex):
         """
 
         if self.npoints > self.length:
-            msg = ("Too many indices. Expected "
-                   "{exp} but found {act}").format(
-                exp=self.length, act=self.npoints)
+            msg = (f"Too many indices. Expected "
+                   f"{self.length} but found {self.npoints}")
             raise ValueError(msg)
 
         # Indices are vacuously ordered and non-negative
@@ -343,8 +342,8 @@ cdef class BlockIndex(SparseIndex):
 
     def __repr__(self) -> str:
         output = 'BlockIndex\n'
-        output += 'Block locations: %s\n' % repr(self.blocs)
-        output += 'Block lengths: %s' % repr(self.blengths)
+        output += f'Block locations: {repr(self.blocs)}\n'
+        output += f'Block lengths: {repr(self.blengths)}'
 
         return output
 
@@ -380,15 +379,14 @@ cdef class BlockIndex(SparseIndex):
 
             if i < self.nblocks - 1:
                 if blocs[i] + blengths[i] > blocs[i + 1]:
-                    raise ValueError('Block {idx} overlaps'.format(idx=i))
+                    raise ValueError(f'Block {i} overlaps')
             else:
                 if blocs[i] + blengths[i] > self.length:
-                    raise ValueError('Block {idx} extends beyond end'
-                                     .format(idx=i))
+                    raise ValueError(f'Block {i} extends beyond end')
 
             # no zero-length blocks
             if blengths[i] == 0:
-                raise ValueError('Zero-length block {idx}'.format(idx=i))
+                raise ValueError(f'Zero-length block {i}')
 
     def equals(self, other):
         if not isinstance(other, BlockIndex):
diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx