|
17 | 17 | from .pandas_vb_common import tm
|
18 | 18 |
|
19 | 19 |
|
| 20 | +class Clip: |
| 21 | + params = [ |
| 22 | + ["float64", "Float64", "float64[pyarrow]"], |
| 23 | + ] |
| 24 | + param_names = ["dtype"] |
| 25 | + |
| 26 | + def setup(self, dtype): |
| 27 | + data = np.random.randn(100_000, 10) |
| 28 | + df = DataFrame(data, dtype=dtype) |
| 29 | + self.df = df |
| 30 | + |
| 31 | + def time_clip(self, dtype): |
| 32 | + self.df.clip(-1.0, 1.0) |
| 33 | + |
| 34 | + |
20 | 35 | class GetNumericData:
|
21 | 36 | def setup(self):
|
22 | 37 | self.df = DataFrame(np.random.randn(10000, 25))
|
@@ -429,6 +444,22 @@ def time_dropna_axis_mixed_dtypes(self, how, axis):
|
429 | 444 | self.df_mixed.dropna(how=how, axis=axis)
|
430 | 445 |
|
431 | 446 |
|
| 447 | +class Isna: |
| 448 | + params = ["float64", "Float64", "float64[pyarrow]"] |
| 449 | + param_names = ["dtype"] |
| 450 | + |
| 451 | + def setup(self, dtype): |
| 452 | + data = np.random.randn(10000, 1000) |
| 453 | + # all-na columns |
| 454 | + data[:, 600:800] = np.nan |
| 455 | + # partial-na columns |
| 456 | + data[800:1000, 4000:5000] = np.nan |
| 457 | + self.df = DataFrame(data, dtype=dtype) |
| 458 | + |
| 459 | + def time_isna(self, dtype): |
| 460 | + self.df.isna() |
| 461 | + |
| 462 | + |
432 | 463 | class Count:
|
433 | 464 | params = [0, 1]
|
434 | 465 | param_names = ["axis"]
|
@@ -739,4 +770,44 @@ def time_memory_usage_object_dtype(self):
|
739 | 770 | self.df2.memory_usage(deep=True)
|
740 | 771 |
|
741 | 772 |
|
| 773 | +class Where: |
| 774 | + params = ( |
| 775 | + [True, False], |
| 776 | + ["float64", "Float64", "float64[pyarrow]"], |
| 777 | + ) |
| 778 | + param_names = ["dtype"] |
| 779 | + |
| 780 | + def setup(self, inplace, dtype): |
| 781 | + self.df = DataFrame(np.random.randn(100_000, 10), dtype=dtype) |
| 782 | + self.mask = self.df < 0 |
| 783 | + |
| 784 | + def time_where(self, inplace, dtype): |
| 785 | + self.df.where(self.mask, other=0.0, inplace=inplace) |
| 786 | + |
| 787 | + |
| 788 | +class FindValidIndex: |
| 789 | + param_names = ["dtype"] |
| 790 | + params = [ |
| 791 | + ["float", "Float64", "float64[pyarrow]"], |
| 792 | + ] |
| 793 | + |
| 794 | + def setup(self, dtype): |
| 795 | + df = DataFrame( |
| 796 | + np.random.randn(100000, 2), |
| 797 | + columns=list("AB"), |
| 798 | + dtype=dtype, |
| 799 | + ) |
| 800 | + df.iloc[:100, 0] = None |
| 801 | + df.iloc[:200, 1] = None |
| 802 | + df.iloc[-100:, 0] = None |
| 803 | + df.iloc[-200:, 1] = None |
| 804 | + self.df = df |
| 805 | + |
| 806 | + def time_first_valid_index(self, dtype): |
| 807 | + self.df.first_valid_index() |
| 808 | + |
| 809 | + def time_last_valid_index(self, dtype): |
| 810 | + self.df.last_valid_index() |
| 811 | + |
| 812 | + |
742 | 813 | from .pandas_vb_common import setup # noqa: F401 isort:skip
|
0 commit comments