Skip to content

Sync Fork from Upstream Repo #246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.3.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Fixed regressions
- Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`)
- Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`)
- Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`)
- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`)
- Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`)
-

.. ---------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,9 @@ cdef class _Timestamp(ABCTimestamp):
if op == Py_EQ:
return False
if op == Py_LE or op == Py_LT:
return other.year <= self.year
return self.year <= other.year
if op == Py_GE or op == Py_GT:
return other.year >= self.year
return self.year >= other.year

cdef bint _can_compare(self, datetime other):
if self.tzinfo is not None:
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,9 +405,7 @@ def extract_array(
For an ndarray-backed Series / Index a PandasArray is returned.

>>> extract_array(pd.Series([1, 2, 3]))
<PandasArray>
[1, 2, 3]
Length: 3, dtype: int64
array([1, 2, 3])

To extract all the way down to the ndarray, pass ``extract_numpy=True``.

Expand Down
14 changes: 13 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4754,7 +4754,8 @@ def drop(
Parameters
----------
labels : single label or list-like
Index or column labels to drop.
Index or column labels to drop. A tuple will be used as a single
label and not treated as a list-like.
axis : {0 or 'index', 1 or 'columns'}, default 0
Whether to drop labels from the index (0 or 'index') or
columns (1 or 'columns').
Expand Down Expand Up @@ -4845,6 +4846,17 @@ def drop(
weight 1.0 0.8
length 0.3 0.2

>>> df.drop(index=('falcon', 'weight'))
big small
lama speed 45.0 30.0
weight 200.0 100.0
length 1.5 1.0
cow speed 30.0 20.0
weight 250.0 150.0
length 1.5 0.8
falcon speed 320.0 250.0
length 0.3 0.2

>>> df.drop(index='cow', columns='small')
big
lama speed 45.0
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5406,6 +5406,9 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]
self._raise_if_missing(keyarr, indexer, axis_name)

keyarr = self.take(indexer)
if isinstance(key, Index):
# GH 42790 - Preserve name from an Index
keyarr.name = key.name
if keyarr.dtype.kind in ["m", "M"]:
# DTI/TDI.take can infer a freq in some cases when we dont want one
if isinstance(key, list) or (
Expand Down
9 changes: 3 additions & 6 deletions pandas/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import (
Any,
Callable,
Literal,
)

import numpy as np
Expand Down Expand Up @@ -417,12 +418,8 @@ def _bins_to_cuts(
else:
bins = unique_bins

side = "left" if right else "right"
# error: No overload variant of "searchsorted" of "ndarray" matches
# argument types "Any", "str"
ids = ensure_platform_int(
bins.searchsorted(x, side=side) # type: ignore[call-overload]
)
side: Literal["left", "right"] = "left" if right else "right"
ids = ensure_platform_int(bins.searchsorted(x, side=side))

if include_lowest:
ids[np.asarray(x) == bins[0]] = 1
Expand Down
7 changes: 6 additions & 1 deletion pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1302,7 +1302,12 @@ def _refine_defaults_read(
if delimiter and (sep is not lib.no_default):
raise ValueError("Specified a sep and a delimiter; you can only specify one.")

if names is not lib.no_default and prefix is not lib.no_default:
if (
names is not None
and names is not lib.no_default
and prefix is not None
and prefix is not lib.no_default
):
raise ValueError("Specified named and prefix; you can only specify one.")

kwds["names"] = None if names is lib.no_default else names
Expand Down
6 changes: 5 additions & 1 deletion pandas/plotting/_matplotlib/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,12 @@ def handle_shared_axes(
except IndexError:
# if gridspec is used, ax.rowNum and ax.colNum may different
# from layout shape. in this case, use last_row logic
if compat.mpl_ge_3_4_0():
is_last_row = lambda x: x.get_subplotspec().is_last_row()
else:
is_last_row = lambda x: x.is_last_row()
for ax in axarr:
if ax.is_last_row():
if is_last_row(ax):
continue
if sharex or _has_externally_shared_axis(ax, "x"):
_remove_labels_from_axis(ax.xaxis)
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2432,6 +2432,18 @@ def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period):
with pytest.raises(KeyError, match="not in index"):
ser.loc[keys]

def test_loc_named_index(self):
# GH 42790
df = DataFrame(
[[1, 2], [4, 5], [7, 8]],
index=["cobra", "viper", "sidewinder"],
columns=["max_speed", "shield"],
)
expected = df.iloc[:2]
expected.index.name = "foo"
result = df.loc[Index(["cobra", "viper"], name="foo")]
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"columns, column_key, expected_columns",
Expand Down
17 changes: 13 additions & 4 deletions pandas/tests/io/parser/common/test_common_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,15 +764,24 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):


@pytest.mark.parametrize("func", ["read_csv", "read_table"])
@pytest.mark.parametrize("prefix", [None, "x"])
@pytest.mark.parametrize("names", [None, ["a"]])
def test_names_and_prefix_not_lib_no_default(all_parsers, names, prefix, func):
def test_names_and_prefix_not_None_raises(all_parsers, func):
# GH#39123
f = StringIO("a,b\n1,2")
parser = all_parsers
msg = "Specified named and prefix; you can only specify one."
with pytest.raises(ValueError, match=msg):
getattr(parser, func)(f, names=names, prefix=prefix)
getattr(parser, func)(f, names=["a", "b"], prefix="x")


@pytest.mark.parametrize("func", ["read_csv", "read_table"])
@pytest.mark.parametrize("prefix, names", [(None, ["x0", "x1"]), ("x", None)])
def test_names_and_prefix_explicit_None(all_parsers, names, prefix, func):
# GH42387
f = StringIO("a,b\n1,2")
expected = DataFrame({"x0": ["a", "1"], "x1": ["b", "2"]})
parser = all_parsers
result = getattr(parser, func)(f, names=names, sep=",", prefix=prefix, header=None)
tm.assert_frame_equal(result, expected)


def test_dict_keys_as_names(all_parsers):
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/reshape/concat/test_append.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import datetime as dt
from datetime import datetime
from itertools import combinations

import dateutil
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/reshape/concat/test_append_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):
)

res = dti1.append(dti3)
# tm.assert_index_equal(res, exp)
tm.assert_index_equal(res, exp)

dts1 = Series(dti1)
dts3 = Series(dti3)
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/reshape/concat/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ def test_concat_copy(self):
assert b.values.base is not None

def test_concat_with_group_keys(self):
df = DataFrame(np.random.randn(4, 3))
df2 = DataFrame(np.random.randn(4, 4))

# axis=0
df = DataFrame(np.random.randn(3, 4))
df2 = DataFrame(np.random.randn(4, 4))
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/reshape/concat/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ class TestDataFrameConcat:
def test_concat_multiple_frames_dtypes(self):

# GH#2759
A = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64)
B = DataFrame(data=np.ones((10, 2)), dtype=np.float32)
results = concat((A, B), axis=1).dtypes
df1 = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64)
df2 = DataFrame(data=np.ones((10, 2)), dtype=np.float32)
results = concat((df1, df2), axis=1).dtypes
expected = Series(
[np.dtype("float64")] * 2 + [np.dtype("float32")] * 2,
index=["foo", "bar", 0, 1],
Expand Down
22 changes: 11 additions & 11 deletions pandas/tests/reshape/concat/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,18 @@ def test_concat_rename_index(self):
tm.assert_frame_equal(result, exp)
assert result.index.names == exp.index.names

@pytest.mark.parametrize("test_series", [True, False])
def test_concat_copy_index(self, test_series, axis):
def test_concat_copy_index_series(self, axis):
# GH 29879
if test_series:
ser = Series([1, 2])
comb = concat([ser, ser], axis=axis, copy=True)
assert comb.index is not ser.index
else:
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
comb = concat([df, df], axis=axis, copy=True)
assert comb.index is not df.index
assert comb.columns is not df.columns
ser = Series([1, 2])
comb = concat([ser, ser], axis=axis, copy=True)
assert comb.index is not ser.index

def test_concat_copy_index_frame(self, axis):
# GH 29879
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
comb = concat([df, df], axis=axis, copy=True)
assert comb.index is not df.index
assert comb.columns is not df.columns

def test_default_index(self):
# is_series and ignore_index
Expand Down
17 changes: 3 additions & 14 deletions pandas/tests/reshape/test_cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ def test_simple():
tm.assert_numpy_array_equal(result, expected, check_dtype=False)


def test_bins():
data = np.array([0.2, 1.4, 2.5, 6.2, 9.7, 2.1])
@pytest.mark.parametrize("func", [list, np.array])
def test_bins(func):
data = func([0.2, 1.4, 2.5, 6.2, 9.7, 2.1])
result, bins = cut(data, 3, retbins=True)

intervals = IntervalIndex.from_breaks(bins.round(3))
Expand Down Expand Up @@ -68,18 +69,6 @@ def test_no_right():
tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95, 7.325, 9.7095]))


def test_array_like():
data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1]
result, bins = cut(data, 3, retbins=True)

intervals = IntervalIndex.from_breaks(bins.round(3))
intervals = intervals.take([0, 0, 0, 1, 2, 0])
expected = Categorical(intervals, ordered=True)

tm.assert_categorical_equal(result, expected)
tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, 6.53333333, 9.7]))


def test_bins_from_interval_index():
c = cut(range(5), 3)
expected = c
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/scalar/timestamp/test_comparisons.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,19 @@ def test_timestamp_compare_oob_dt64(self):
assert Timestamp.max < other + us
# Note: numpy gets the reversed comparison wrong

# GH-42794
other = datetime(9999, 9, 9)
assert Timestamp.min < other
assert other > Timestamp.min
assert Timestamp.max < other
assert other > Timestamp.max

other = datetime(1, 1, 1)
assert Timestamp.max > other
assert other < Timestamp.max
assert Timestamp.min > other
assert other < Timestamp.min

def test_compare_zerodim_array(self):
# GH#26916
ts = Timestamp.now()
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/series/methods/test_clip.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import datetime

import numpy as np
import pytest

Expand Down Expand Up @@ -128,6 +130,15 @@ def test_clip_with_datetimes(self):
)
tm.assert_series_equal(result, expected)

def test_clip_with_timestamps_and_oob_datetimes(self):
# GH-42794
ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)])

result = ser.clip(lower=Timestamp.min, upper=Timestamp.max)
expected = Series([Timestamp.min, Timestamp.max], dtype="object")

tm.assert_series_equal(result, expected)

def test_clip_pos_args_deprecation(self):
# https://github.com/pandas-dev/pandas/issues/41485
ser = Series([1, 2, 3])
Expand Down