Skip to content

Remove Python2 numeric relics #38916

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions asv_bench/benchmarks/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,18 +122,18 @@ def setup(self, op):
n_rows = 500

# construct dataframe with 2 blocks
arr1 = np.random.randn(n_rows, int(n_cols / 2)).astype("f8")
arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("f4")
arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8")
arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4")
df = pd.concat(
[pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True
)
# should already be the case, but just to be sure
df._consolidate_inplace()

# TODO: GH#33198 the setting here shoudlnt need two steps
arr1 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("i8")
arr3 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
arr1 = np.random.randn(n_rows, n_cols // 4).astype("f8")
arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")
df2 = pd.concat(
[pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
axis=1,
Expand Down
8 changes: 4 additions & 4 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ class Repr:
def setup(self):
nrows = 10000
data = np.random.randn(nrows, 10)
arrays = np.tile(np.random.randn(3, int(nrows / 100)), 100)
arrays = np.tile(np.random.randn(3, nrows // 100), 100)
idx = MultiIndex.from_arrays(arrays)
self.df3 = DataFrame(data, index=idx)
self.df4 = DataFrame(data, index=np.random.randn(nrows))
Expand Down Expand Up @@ -648,9 +648,9 @@ class Describe:
def setup(self):
self.df = DataFrame(
{
"a": np.random.randint(0, 100, int(1e6)),
"b": np.random.randint(0, 100, int(1e6)),
"c": np.random.randint(0, 100, int(1e6)),
"a": np.random.randint(0, 100, 10 ** 6),
"b": np.random.randint(0, 100, 10 ** 6),
"c": np.random.randint(0, 100, 10 ** 6),
}
)

Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/hash_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ class Float64GroupIndex:
# GH28303
def setup(self):
self.df = pd.date_range(
start="1/1/2018", end="1/2/2018", periods=1e6
start="1/1/2018", end="1/2/2018", periods=10 ** 6
).to_frame()
self.group_index = np.round(self.df.index.astype(int) / 1e9)
self.group_index = np.round(self.df.index.astype(int) / 10 ** 9)

def time_groupby(self):
self.df.groupby(self.group_index).last()
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class ToNumericDowncast:
]

N = 500000
N2 = int(N / 2)
N2 = N // 2

data_dict = {
"string-int": ["1"] * N2 + [2] * N2,
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def setup(self):
daily_dates = date_index.to_period("D").to_timestamp("S", "S")
self.fracofday = date_index.values - daily_dates.values
self.fracofday = self.fracofday.astype("timedelta64[ns]")
self.fracofday = self.fracofday.astype(np.float64) / 86400000000000.0
self.fracofday = self.fracofday.astype(np.float64) / 86_400_000_000_000
self.fracofday = Series(self.fracofday, daily_dates)
index = date_range(date_index.min(), date_index.max(), freq="D")
self.temp = Series(1.0, index)[self.fracofday.index]
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ class PeakMemFixedWindowMinMax:
params = ["min", "max"]

def setup(self, operation):
N = int(1e6)
N = 10 ** 6
arr = np.random.random(N)
self.roll = pd.Series(arr).rolling(2)

Expand Down Expand Up @@ -233,7 +233,7 @@ class GroupbyLargeGroups:

def setup(self):
N = 100000
self.df = pd.DataFrame({"A": [1, 2] * int(N / 2), "B": np.random.randn(N)})
self.df = pd.DataFrame({"A": [1, 2] * (N // 2), "B": np.random.randn(N)})

def time_rolling_multiindex_creation(self):
self.df.groupby("A").rolling(3).mean()
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/series_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def time_dir_strings(self):
class SeriesGetattr:
# https://github.com/pandas-dev/pandas/issues/19764
def setup(self):
self.s = Series(1, index=date_range("2012-01-01", freq="s", periods=int(1e6)))
self.s = Series(1, index=date_range("2012-01-01", freq="s", periods=10 ** 6))

def time_series_datetimeindex_repr(self):
getattr(self.s, "a", None)
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def time_iso8601_tz_spaceformat(self):
class ToDatetimeNONISO8601:
def setup(self):
N = 10000
half = int(N / 2)
half = N // 2
ts_string_1 = "March 1, 2018 12:00:00+0400"
ts_string_2 = "March 1, 2018 12:00:00+0500"
self.same_offset = [ts_string_1] * N
Expand Down Expand Up @@ -376,7 +376,7 @@ def setup(self):
self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N
self.diff_offset = [
f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10)
] * int(N / 10)
] * (N // 10)

def time_exact(self):
to_datetime(self.s2, format="%d%b%y")
Expand Down
2 changes: 1 addition & 1 deletion pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,7 @@ def _create_missing_idx(nrows, ncols, density, random_state=None):
random_state = np.random.RandomState(random_state)

# below is cribbed from scipy.sparse
size = int(np.round((1 - density) * nrows * ncols))
size = round((1 - density) * nrows * ncols)
# generate a few more to ensure unique values
min_rows = 5
fac = 1.02
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,7 @@ def value_counts(
result = result.sort_values(ascending=ascending)

if normalize:
result = result / float(counts.sum())
result = result / counts.sum()

return result

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2214,7 +2214,7 @@ def describe(self):
A dataframe with frequency and counts by category.
"""
counts = self.value_counts(dropna=False)
freqs = counts / float(counts.sum())
freqs = counts / counts.sum()

from pandas.core.reshape.concat import concat

Expand Down
12 changes: 6 additions & 6 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ def __iter__(self):
data = self.asi8
length = len(self)
chunksize = 10000
chunks = int(length / chunksize) + 1
chunks = (length // chunksize) + 1
for i in range(chunks):
start_i = i * chunksize
end_i = min((i + 1) * chunksize, length)
Expand Down Expand Up @@ -1847,12 +1847,12 @@ def to_julian_date(self):
+ 1_721_118.5
+ (
self.hour
+ self.minute / 60.0
+ self.second / 3600.0
+ self.microsecond / 3600.0 / 1e6
+ self.nanosecond / 3600.0 / 1e9
+ self.minute / 60
+ self.second / 3600
+ self.microsecond / 3600 / 10 ** 6
+ self.nanosecond / 3600 / 10 ** 9
)
/ 24.0
/ 24
)

# -----------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ def density(self):
>>> s.density
0.6
"""
return float(self.sp_index.npoints) / float(self.sp_index.length)
return self.sp_index.npoints / self.sp_index.length

@property
def npoints(self) -> int:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def __iter__(self):
data = self.asi8
length = len(self)
chunksize = 10000
chunks = int(length / chunksize) + 1
chunks = (length // chunksize) + 1
for i in range(chunks):
start_i = i * chunksize
end_i = min((i + 1) * chunksize, length)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5336,7 +5336,7 @@ def sample(
elif n is not None and frac is None and n % 1 != 0:
raise ValueError("Only integers accepted as `n` values")
elif n is None and frac is not None:
n = int(round(frac * axis_length))
n = round(frac * axis_length)
elif n is not None and frac is not None:
raise ValueError("Please enter a value for `frac` OR `n`, not both")

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def should_cache(
return False

if len(arg) <= 5000:
check_count = int(len(arg) * 0.1)
check_count = len(arg) // 10
else:
check_count = 500
else:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_center_of_mass(
elif span is not None:
if span < 1:
raise ValueError("span must satisfy: span >= 1")
comass = (span - 1) / 2.0
comass = (span - 1) / 2
elif halflife is not None:
if halflife <= 0:
raise ValueError("halflife must satisfy: halflife > 0")
Expand All @@ -70,7 +70,7 @@ def get_center_of_mass(
elif alpha is not None:
if alpha <= 0 or alpha > 1:
raise ValueError("alpha must satisfy: 0 < alpha <= 1")
comass = (1.0 - alpha) / alpha
comass = (1 - alpha) / alpha
else:
raise ValueError("Must pass one of comass, span, halflife, or alpha")

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,7 @@ def _value_with_fmt(self, val):
elif isinstance(val, datetime.date):
fmt = self.date_format
elif isinstance(val, datetime.timedelta):
val = val.total_seconds() / float(86400)
val = val.total_seconds() / 86400
fmt = "0"
else:
val = str(val)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def _generate_multiindex_header_rows(self) -> Iterator[List[Label]]:

def _save_body(self) -> None:
nrows = len(self.data_index)
chunks = int(nrows / self.chunksize) + 1
chunks = (nrows // self.chunksize) + 1
for i in range(chunks):
start_i = i * self.chunksize
end_i = min(start_i + self.chunksize, nrows)
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1637,7 +1637,7 @@ def is_dates_only(

values_int = values.asi8
consider_values = values_int != iNaT
one_day_nanos = 86400 * 1e9
one_day_nanos = 86400 * 10 ** 9
even_days = (
np.logical_and(consider_values, values_int % int(one_day_nanos) != 0).sum() == 0
)
Expand Down Expand Up @@ -1741,7 +1741,7 @@ def get_format_timedelta64(

consider_values = values_int != iNaT

one_day_nanos = 86400 * 1e9
one_day_nanos = 86400 * 10 ** 9
even_days = (
np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def _fit_strcols_to_terminal_width(self, strcols: List[List[str]]) -> str:
counter = 0
while adj_dif > 0 and n_cols > 1:
counter += 1
mid = int(round(n_cols / 2.0))
mid = round(n_cols / 2)
mid_ix = col_lens.index[mid]
col_len = col_lens[mid_ix]
# adjoin adds one
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,7 @@ def insert(self, chunksize: Optional[int] = None, method: Optional[str] = None):
elif chunksize == 0:
raise ValueError("chunksize argument should be non-zero")

chunks = int(nrows / chunksize) + 1
chunks = (nrows // chunksize) + 1

with self.pd_sql.run_transaction() as conn:
for i in range(chunks):
Expand Down
36 changes: 18 additions & 18 deletions pandas/plotting/_matplotlib/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR
SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY

MUSEC_PER_DAY = 1e6 * SEC_PER_DAY
MUSEC_PER_DAY = 10 ** 6 * SEC_PER_DAY

_mpl_units = {} # Cache for units overwritten by us

Expand Down Expand Up @@ -116,7 +116,7 @@ def deregister():


def _to_ordinalf(tm: pydt.time) -> float:
tot_sec = tm.hour * 3600 + tm.minute * 60 + tm.second + float(tm.microsecond / 1e6)
tot_sec = tm.hour * 3600 + tm.minute * 60 + tm.second + tm.microsecond / 10 ** 6
return tot_sec


Expand Down Expand Up @@ -182,7 +182,7 @@ def __call__(self, x, pos=0) -> str:
"""
fmt = "%H:%M:%S.%f"
s = int(x)
msus = int(round((x - s) * 1e6))
msus = round((x - s) * 10 ** 6)
ms = msus // 1000
us = msus % 1000
m, s = divmod(s, 60)
Expand Down Expand Up @@ -429,7 +429,7 @@ def _from_ordinal(x, tz: Optional[tzinfo] = None) -> datetime:
hour, remainder = divmod(24 * remainder, 1)
minute, remainder = divmod(60 * remainder, 1)
second, remainder = divmod(60 * remainder, 1)
microsecond = int(1e6 * remainder)
microsecond = int(1_000_000 * remainder)
if microsecond < 10:
microsecond = 0 # compensate for rounding errors
dt = datetime(
Expand All @@ -439,7 +439,7 @@ def _from_ordinal(x, tz: Optional[tzinfo] = None) -> datetime:
dt = dt.astimezone(tz)

if microsecond > 999990: # compensate for rounding errors
dt += timedelta(microseconds=1e6 - microsecond)
dt += timedelta(microseconds=1_000_000 - microsecond)

return dt

Expand Down Expand Up @@ -611,27 +611,27 @@ def _second_finder(label_interval):
info_fmt[day_start] = "%H:%M:%S\n%d-%b"
info_fmt[year_start] = "%H:%M:%S\n%d-%b\n%Y"

if span < periodsperday / 12000.0:
if span < periodsperday / 12000:
_second_finder(1)
elif span < periodsperday / 6000.0:
elif span < periodsperday / 6000:
_second_finder(2)
elif span < periodsperday / 2400.0:
elif span < periodsperday / 2400:
_second_finder(5)
elif span < periodsperday / 1200.0:
elif span < periodsperday / 1200:
_second_finder(10)
elif span < periodsperday / 800.0:
elif span < periodsperday / 800:
_second_finder(15)
elif span < periodsperday / 400.0:
elif span < periodsperday / 400:
_second_finder(30)
elif span < periodsperday / 150.0:
elif span < periodsperday / 150:
_minute_finder(1)
elif span < periodsperday / 70.0:
elif span < periodsperday / 70:
_minute_finder(2)
elif span < periodsperday / 24.0:
elif span < periodsperday / 24:
_minute_finder(5)
elif span < periodsperday / 12.0:
elif span < periodsperday / 12:
_minute_finder(15)
elif span < periodsperday / 6.0:
elif span < periodsperday / 6:
_minute_finder(30)
elif span < periodsperday / 2.5:
_hour_finder(1, False)
Expand Down Expand Up @@ -1058,7 +1058,7 @@ def format_timedelta_ticks(x, pos, n_decimals: int) -> str:
"""
Convert seconds to 'D days HH:MM:SS.F'
"""
s, ns = divmod(x, 1e9)
s, ns = divmod(x, 10 ** 9)
m, s = divmod(s, 60)
h, m = divmod(m, 60)
d, h = divmod(h, 24)
Expand All @@ -1072,7 +1072,7 @@ def format_timedelta_ticks(x, pos, n_decimals: int) -> str:

def __call__(self, x, pos=0) -> str:
(vmin, vmax) = tuple(self.axis.get_view_interval())
n_decimals = int(np.ceil(np.log10(100 * 1e9 / abs(vmax - vmin))))
n_decimals = int(np.ceil(np.log10(100 * 10 ** 9 / abs(vmax - vmin))))
if n_decimals > 9:
n_decimals = 9
return self.format_timedelta_ticks(x, pos, n_decimals)
Loading