Skip to content

Commit 9d3a9bd

Browse files
authored
Merge pull request #162 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 77fafd0 + fe8ca04 commit 9d3a9bd

33 files changed

+494
-353
lines changed

.pre-commit-config.yaml

+8-2
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,10 @@ repos:
8686
types: [python]
8787
exclude: ^pandas/_typing\.py$
8888
- id: inconsistent-namespace-usage
89-
name: 'Check for inconsistent use of pandas namespace in tests'
89+
name: 'Check for inconsistent use of pandas namespace'
9090
entry: python scripts/check_for_inconsistent_pandas_namespace.py
9191
language: python
9292
types: [python]
93-
files: ^pandas/tests/
9493
- id: incorrect-code-directives
9594
name: Check for incorrect code block or IPython directives
9695
language: pygrep
@@ -213,3 +212,10 @@ repos:
213212
|\#\ type:\s?ignore(?!\[)
214213
language: pygrep
215214
types: [python]
215+
- id: use-pd_array-in-core
216+
name: Import pandas.array as pd_array in core
217+
language: python
218+
entry: python scripts/use_pd_array_in_core.py
219+
files: ^pandas/core/
220+
exclude: ^pandas/core/api\.py$
221+
types: [python]

asv_bench/benchmarks/arithmetic.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,7 @@ def setup(self, op, shape):
140140
# construct dataframe with 2 blocks
141141
arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8")
142142
arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4")
143-
df = pd.concat(
144-
[pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True
145-
)
143+
df = pd.concat([DataFrame(arr1), DataFrame(arr2)], axis=1, ignore_index=True)
146144
# should already be the case, but just to be sure
147145
df._consolidate_inplace()
148146

@@ -151,7 +149,7 @@ def setup(self, op, shape):
151149
arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
152150
arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")
153151
df2 = pd.concat(
154-
[pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
152+
[DataFrame(arr1), DataFrame(arr2), DataFrame(arr3)],
155153
axis=1,
156154
ignore_index=True,
157155
)
@@ -459,9 +457,9 @@ class OffsetArrayArithmetic:
459457

460458
def setup(self, offset):
461459
N = 10000
462-
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
460+
rng = date_range(start="1/1/2000", periods=N, freq="T")
463461
self.rng = rng
464-
self.ser = pd.Series(rng)
462+
self.ser = Series(rng)
465463

466464
def time_add_series_offset(self, offset):
467465
with warnings.catch_warnings(record=True):
@@ -478,7 +476,7 @@ class ApplyIndex:
478476

479477
def setup(self, offset):
480478
N = 10000
481-
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
479+
rng = date_range(start="1/1/2000", periods=N, freq="T")
482480
self.rng = rng
483481

484482
def time_apply_index(self, offset):
@@ -490,17 +488,17 @@ class BinaryOpsMultiIndex:
490488
param_names = ["func"]
491489

492490
def setup(self, func):
493-
date_range = pd.date_range("20200101 00:00", "20200102 0:00", freq="S")
491+
array = date_range("20200101 00:00", "20200102 0:00", freq="S")
494492
level_0_names = [str(i) for i in range(30)]
495493

496-
index = pd.MultiIndex.from_product([level_0_names, date_range])
494+
index = pd.MultiIndex.from_product([level_0_names, array])
497495
column_names = ["col_1", "col_2"]
498496

499-
self.df = pd.DataFrame(
497+
self.df = DataFrame(
500498
np.random.rand(len(index), 2), index=index, columns=column_names
501499
)
502500

503-
self.arg_df = pd.DataFrame(
501+
self.arg_df = DataFrame(
504502
np.random.randint(1, 10, (len(level_0_names), 2)),
505503
index=level_0_names,
506504
columns=column_names,

asv_bench/benchmarks/sparse.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def setup(self):
2828
data = np.random.randn(N)[:-i]
2929
idx = rng[:-i]
3030
data[100:] = np.nan
31-
self.series[i] = pd.Series(pd.SparseArray(data), index=idx)
31+
self.series[i] = Series(SparseArray(data), index=idx)
3232

3333
def time_series_to_frame(self):
3434
pd.DataFrame(self.series)
@@ -63,7 +63,7 @@ def setup(self):
6363
)
6464

6565
def time_sparse_series_from_coo(self):
66-
pd.Series.sparse.from_coo(self.matrix)
66+
Series.sparse.from_coo(self.matrix)
6767

6868

6969
class ToCoo:

doc/source/whatsnew/v1.3.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ Other enhancements
161161
- :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`)
162162
- :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`)
163163
- :meth:`.Styler.format` adds keyword argument ``escape`` for optional HTML escaping (:issue:`40437`)
164+
- :meth:`.Styler.clear` now clears :attr:`Styler.hidden_index` and :attr:`Styler.hidden_columns` as well (:issue:`40484`)
164165
- Builtin highlighting methods in :class:`Styler` have a more consistent signature and css customisability (:issue:`40242`)
165166
- :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`)
166167
- :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files.
@@ -571,6 +572,7 @@ Conversion
571572
- Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`)
572573
- Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`)
573574
- Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`)
575+
- Bug in :meth:`StringArray.astype` falling back to numpy and raising when converting to ``dtype='categorical'`` (:issue:`40450`)
574576
- Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`)
575577
-
576578

pandas/_testing/__init__.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -207,12 +207,12 @@ def box_expected(expected, box_cls, transpose=True):
207207
"""
208208
if box_cls is pd.array:
209209
expected = pd.array(expected)
210-
elif box_cls is pd.Index:
211-
expected = pd.Index(expected)
212-
elif box_cls is pd.Series:
213-
expected = pd.Series(expected)
214-
elif box_cls is pd.DataFrame:
215-
expected = pd.Series(expected).to_frame()
210+
elif box_cls is Index:
211+
expected = Index(expected)
212+
elif box_cls is Series:
213+
expected = Series(expected)
214+
elif box_cls is DataFrame:
215+
expected = Series(expected).to_frame()
216216
if transpose:
217217
# for vector operations, we need a DataFrame to be a single-row,
218218
# not a single-column, in order to operate against non-DataFrame
@@ -400,7 +400,7 @@ def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None)
400400
"x": state.rand(n) * 2 - 1,
401401
"y": state.rand(n) * 2 - 1,
402402
}
403-
df = pd.DataFrame(columns, index=index, columns=sorted(columns))
403+
df = DataFrame(columns, index=index, columns=sorted(columns))
404404
if df.index[-1] == end:
405405
df = df.iloc[:-1]
406406
return df

pandas/conftest.py

+8-10
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def unique_nulls_fixture(request):
328328
# ----------------------------------------------------------------
329329

330330

331-
@pytest.fixture(params=[pd.DataFrame, pd.Series])
331+
@pytest.fixture(params=[DataFrame, Series])
332332
def frame_or_series(request):
333333
"""
334334
Fixture to parametrize over DataFrame and Series.
@@ -338,7 +338,7 @@ def frame_or_series(request):
338338

339339
# error: List item 0 has incompatible type "Type[Index]"; expected "Type[IndexOpsMixin]"
340340
@pytest.fixture(
341-
params=[pd.Index, pd.Series], ids=["index", "series"] # type: ignore[list-item]
341+
params=[Index, Series], ids=["index", "series"] # type: ignore[list-item]
342342
)
343343
def index_or_series(request):
344344
"""
@@ -356,9 +356,7 @@ def index_or_series(request):
356356
index_or_series2 = index_or_series
357357

358358

359-
@pytest.fixture(
360-
params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"]
361-
)
359+
@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"])
362360
def index_or_series_or_array(request):
363361
"""
364362
Fixture to parametrize over Index, Series, and ExtensionArray
@@ -559,7 +557,7 @@ def index_with_missing(request):
559557
# ----------------------------------------------------------------
560558
@pytest.fixture
561559
def empty_series():
562-
return pd.Series([], index=[], dtype=np.float64)
560+
return Series([], index=[], dtype=np.float64)
563561

564562

565563
@pytest.fixture
@@ -596,7 +594,7 @@ def _create_series(index):
596594
""" Helper for the _series dict """
597595
size = len(index)
598596
data = np.random.randn(size)
599-
return pd.Series(data, index=index, name="a")
597+
return Series(data, index=index, name="a")
600598

601599

602600
_series = {
@@ -1437,16 +1435,16 @@ def any_numpy_dtype(request):
14371435
("boolean", [True, np.nan, False]),
14381436
("boolean", [True, pd.NA, False]),
14391437
("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]),
1440-
("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]),
1438+
("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]),
14411439
("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
14421440
# The following two dtypes are commented out due to GH 23554
14431441
# ('complex', [1 + 1j, np.nan, 2 + 2j]),
14441442
# ('timedelta64', [np.timedelta64(1, 'D'),
14451443
# np.nan, np.timedelta64(2, 'D')]),
14461444
("timedelta", [timedelta(1), np.nan, timedelta(2)]),
14471445
("time", [time(1), np.nan, time(2)]),
1448-
("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]),
1449-
("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]),
1446+
("period", [Period(2013), pd.NaT, Period(2018)]),
1447+
("interval", [Interval(0, 1), np.nan, Interval(0, 2)]),
14501448
]
14511449
ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id
14521450

pandas/core/arrays/string_.py

+3
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,9 @@ def astype(self, dtype, copy=True):
327327
arr[mask] = "0"
328328
values = arr.astype(dtype.numpy_dtype)
329329
return FloatingArray(values, mask, copy=False)
330+
elif isinstance(dtype, ExtensionDtype):
331+
cls = dtype.construct_array_type()
332+
return cls._from_sequence(self, dtype=dtype, copy=copy)
330333
elif np.issubdtype(dtype, np.floating):
331334
arr = self._ndarray.copy()
332335
mask = self.isna()

pandas/core/computation/expressions.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,20 @@ def _evaluate_numexpr(op, op_str, a, b):
104104
a_value = a
105105
b_value = b
106106

107-
result = ne.evaluate(
108-
f"a_value {op_str} b_value",
109-
local_dict={"a_value": a_value, "b_value": b_value},
110-
casting="safe",
111-
)
107+
try:
108+
result = ne.evaluate(
109+
f"a_value {op_str} b_value",
110+
local_dict={"a_value": a_value, "b_value": b_value},
111+
casting="safe",
112+
)
113+
except TypeError:
114+
# numexpr raises eg for array ** array with integers
115+
# (https://github.com/pydata/numexpr/issues/379)
116+
pass
117+
118+
if is_reversed:
119+
# reverse order to original for fallback
120+
a, b = b, a
112121

113122
if _TEST_MODE:
114123
_store_test_result(result is not None)

pandas/core/dtypes/cast.py

-49
Original file line numberDiff line numberDiff line change
@@ -498,55 +498,6 @@ def maybe_cast_to_extension_array(
498498
return result
499499

500500

501-
def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray) -> np.ndarray:
502-
"""
503-
A safe version of putmask that potentially upcasts the result.
504-
505-
The result is replaced with the first N elements of other,
506-
where N is the number of True values in mask.
507-
If the length of other is shorter than N, other will be repeated.
508-
509-
Parameters
510-
----------
511-
result : ndarray
512-
The destination array. This will be mutated in-place if no upcasting is
513-
necessary.
514-
mask : np.ndarray[bool]
515-
516-
Returns
517-
-------
518-
result : ndarray
519-
520-
Examples
521-
--------
522-
>>> arr = np.arange(1, 6)
523-
>>> mask = np.array([False, True, False, True, True])
524-
>>> result = maybe_upcast_putmask(arr, mask)
525-
>>> result
526-
array([ 1., nan, 3., nan, nan])
527-
"""
528-
if not isinstance(result, np.ndarray):
529-
raise ValueError("The result input must be a ndarray.")
530-
531-
# NB: we never get here with result.dtype.kind in ["m", "M"]
532-
533-
if mask.any():
534-
535-
# we want to decide whether place will work
536-
# if we have nans in the False portion of our mask then we need to
537-
# upcast (possibly), otherwise we DON't want to upcast (e.g. if we
538-
# have values, say integers, in the success portion then it's ok to not
539-
# upcast)
540-
new_dtype = ensure_dtype_can_hold_na(result.dtype)
541-
542-
if new_dtype != result.dtype:
543-
result = result.astype(new_dtype, copy=True)
544-
545-
np.place(result, mask, np.nan)
546-
547-
return result
548-
549-
550501
@overload
551502
def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype:
552503
...

pandas/core/internals/__init__.py

-4
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,10 @@
99
)
1010
from pandas.core.internals.blocks import ( # io.pytables, io.packers
1111
Block,
12-
DatetimeBlock,
1312
DatetimeTZBlock,
1413
ExtensionBlock,
1514
NumericBlock,
1615
ObjectBlock,
17-
TimeDeltaBlock,
1816
)
1917
from pandas.core.internals.concat import concatenate_managers
2018
from pandas.core.internals.managers import (
@@ -28,11 +26,9 @@
2826
"Block",
2927
"CategoricalBlock",
3028
"NumericBlock",
31-
"DatetimeBlock",
3229
"DatetimeTZBlock",
3330
"ExtensionBlock",
3431
"ObjectBlock",
35-
"TimeDeltaBlock",
3632
"make_block",
3733
"DataManager",
3834
"ArrayManager",

pandas/core/internals/array_manager.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T
471471
# error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
472472
# attribute "tz"
473473
if hasattr(arr, "tz") and arr.tz is None: # type: ignore[union-attr]
474-
# DatetimeArray needs to be converted to ndarray for DatetimeBlock
474+
# DatetimeArray needs to be converted to ndarray for DatetimeLikeBlock
475475

476476
# error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
477477
# attribute "_data"

0 commit comments

Comments
 (0)