Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b6aa882

Browse files
authoredOct 19, 2020
REF: move misplaced pd.concat tests (#37232)
1 parent 3f0e0cf commit b6aa882

File tree

3 files changed

+346
-359
lines changed

3 files changed

+346
-359
lines changed
 

‎pandas/tests/frame/test_combine_concat.py

Lines changed: 0 additions & 236 deletions
This file was deleted.

‎pandas/tests/reshape/test_concat.py

Lines changed: 346 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2925,3 +2925,349 @@ def test_concat_preserves_extension_int64_dtype():
29252925
result = pd.concat([df_a, df_b], ignore_index=True)
29262926
expected = pd.DataFrame({"a": [-1, None], "b": [None, 1]}, dtype="Int64")
29272927
tm.assert_frame_equal(result, expected)
2928+
2929+
2930+
class TestSeriesConcat:
2931+
@pytest.mark.parametrize(
2932+
"dtype", ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"]
2933+
)
2934+
def test_concat_empty_series_dtypes_match_roundtrips(self, dtype):
2935+
dtype = np.dtype(dtype)
2936+
2937+
result = pd.concat([Series(dtype=dtype)])
2938+
assert result.dtype == dtype
2939+
2940+
result = pd.concat([Series(dtype=dtype), Series(dtype=dtype)])
2941+
assert result.dtype == dtype
2942+
2943+
def test_concat_empty_series_dtypes_roundtrips(self):
2944+
2945+
# round-tripping with self & like self
2946+
dtypes = map(np.dtype, ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"])
2947+
2948+
def int_result_type(dtype, dtype2):
2949+
typs = {dtype.kind, dtype2.kind}
2950+
if not len(typs - {"i", "u", "b"}) and (
2951+
dtype.kind == "i" or dtype2.kind == "i"
2952+
):
2953+
return "i"
2954+
elif not len(typs - {"u", "b"}) and (
2955+
dtype.kind == "u" or dtype2.kind == "u"
2956+
):
2957+
return "u"
2958+
return None
2959+
2960+
def float_result_type(dtype, dtype2):
2961+
typs = {dtype.kind, dtype2.kind}
2962+
if not len(typs - {"f", "i", "u"}) and (
2963+
dtype.kind == "f" or dtype2.kind == "f"
2964+
):
2965+
return "f"
2966+
return None
2967+
2968+
def get_result_type(dtype, dtype2):
2969+
result = float_result_type(dtype, dtype2)
2970+
if result is not None:
2971+
return result
2972+
result = int_result_type(dtype, dtype2)
2973+
if result is not None:
2974+
return result
2975+
return "O"
2976+
2977+
for dtype in dtypes:
2978+
for dtype2 in dtypes:
2979+
if dtype == dtype2:
2980+
continue
2981+
2982+
expected = get_result_type(dtype, dtype2)
2983+
result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype
2984+
assert result.kind == expected
2985+
2986+
@pytest.mark.parametrize(
2987+
"left,right,expected",
2988+
[
2989+
# booleans
2990+
(np.bool_, np.int32, np.int32),
2991+
(np.bool_, np.float32, np.object_),
2992+
# datetime-like
2993+
("m8[ns]", np.bool_, np.object_),
2994+
("m8[ns]", np.int64, np.object_),
2995+
("M8[ns]", np.bool_, np.object_),
2996+
("M8[ns]", np.int64, np.object_),
2997+
# categorical
2998+
("category", "category", "category"),
2999+
("category", "object", "object"),
3000+
],
3001+
)
3002+
def test_concat_empty_series_dtypes(self, left, right, expected):
3003+
result = pd.concat([Series(dtype=left), Series(dtype=right)])
3004+
assert result.dtype == expected
3005+
3006+
def test_concat_empty_series_dtypes_triple(self):
3007+
3008+
assert (
3009+
pd.concat(
3010+
[Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)]
3011+
).dtype
3012+
== np.object_
3013+
)
3014+
3015+
def test_concat_empty_series_dtype_category_with_array(self):
3016+
# GH#18515
3017+
assert (
3018+
pd.concat(
3019+
[Series(np.array([]), dtype="category"), Series(dtype="float64")]
3020+
).dtype
3021+
== "float64"
3022+
)
3023+
3024+
def test_concat_empty_series_dtypes_sparse(self):
3025+
result = pd.concat(
3026+
[
3027+
Series(dtype="float64").astype("Sparse"),
3028+
Series(dtype="float64").astype("Sparse"),
3029+
]
3030+
)
3031+
assert result.dtype == "Sparse[float64]"
3032+
3033+
result = pd.concat(
3034+
[Series(dtype="float64").astype("Sparse"), Series(dtype="float64")]
3035+
)
3036+
# TODO: release-note: concat sparse dtype
3037+
expected = pd.SparseDtype(np.float64)
3038+
assert result.dtype == expected
3039+
3040+
result = pd.concat(
3041+
[Series(dtype="float64").astype("Sparse"), Series(dtype="object")]
3042+
)
3043+
# TODO: release-note: concat sparse dtype
3044+
expected = pd.SparseDtype("object")
3045+
assert result.dtype == expected
3046+
3047+
3048+
class TestDataFrameConcat:
3049+
def test_concat_multiple_frames_dtypes(self):
3050+
3051+
# GH#2759
3052+
A = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64)
3053+
B = DataFrame(data=np.ones((10, 2)), dtype=np.float32)
3054+
results = pd.concat((A, B), axis=1).dtypes
3055+
expected = Series(
3056+
[np.dtype("float64")] * 2 + [np.dtype("float32")] * 2,
3057+
index=["foo", "bar", 0, 1],
3058+
)
3059+
tm.assert_series_equal(results, expected)
3060+
3061+
def test_concat_multiple_tzs(self):
3062+
# GH#12467
3063+
# combining datetime tz-aware and naive DataFrames
3064+
ts1 = Timestamp("2015-01-01", tz=None)
3065+
ts2 = Timestamp("2015-01-01", tz="UTC")
3066+
ts3 = Timestamp("2015-01-01", tz="EST")
3067+
3068+
df1 = DataFrame(dict(time=[ts1]))
3069+
df2 = DataFrame(dict(time=[ts2]))
3070+
df3 = DataFrame(dict(time=[ts3]))
3071+
3072+
results = pd.concat([df1, df2]).reset_index(drop=True)
3073+
expected = DataFrame(dict(time=[ts1, ts2]), dtype=object)
3074+
tm.assert_frame_equal(results, expected)
3075+
3076+
results = pd.concat([df1, df3]).reset_index(drop=True)
3077+
expected = DataFrame(dict(time=[ts1, ts3]), dtype=object)
3078+
tm.assert_frame_equal(results, expected)
3079+
3080+
results = pd.concat([df2, df3]).reset_index(drop=True)
3081+
expected = DataFrame(dict(time=[ts2, ts3]))
3082+
tm.assert_frame_equal(results, expected)
3083+
3084+
@pytest.mark.parametrize(
3085+
"t1",
3086+
[
3087+
"2015-01-01",
3088+
pytest.param(
3089+
pd.NaT,
3090+
marks=pytest.mark.xfail(
3091+
reason="GH23037 incorrect dtype when concatenating"
3092+
),
3093+
),
3094+
],
3095+
)
3096+
def test_concat_tz_NaT(self, t1):
3097+
# GH#22796
3098+
# Concating tz-aware multicolumn DataFrames
3099+
ts1 = Timestamp(t1, tz="UTC")
3100+
ts2 = Timestamp("2015-01-01", tz="UTC")
3101+
ts3 = Timestamp("2015-01-01", tz="UTC")
3102+
3103+
df1 = DataFrame([[ts1, ts2]])
3104+
df2 = DataFrame([[ts3]])
3105+
3106+
result = pd.concat([df1, df2])
3107+
expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0])
3108+
3109+
tm.assert_frame_equal(result, expected)
3110+
3111+
def test_concat_tz_not_aligned(self):
3112+
# GH#22796
3113+
ts = pd.to_datetime([1, 2]).tz_localize("UTC")
3114+
a = pd.DataFrame({"A": ts})
3115+
b = pd.DataFrame({"A": ts, "B": ts})
3116+
result = pd.concat([a, b], sort=True, ignore_index=True)
3117+
expected = pd.DataFrame(
3118+
{"A": list(ts) + list(ts), "B": [pd.NaT, pd.NaT] + list(ts)}
3119+
)
3120+
tm.assert_frame_equal(result, expected)
3121+
3122+
def test_concat_tuple_keys(self):
3123+
# GH#14438
3124+
df1 = pd.DataFrame(np.ones((2, 2)), columns=list("AB"))
3125+
df2 = pd.DataFrame(np.ones((3, 2)) * 2, columns=list("AB"))
3126+
results = pd.concat((df1, df2), keys=[("bee", "bah"), ("bee", "boo")])
3127+
expected = pd.DataFrame(
3128+
{
3129+
"A": {
3130+
("bee", "bah", 0): 1.0,
3131+
("bee", "bah", 1): 1.0,
3132+
("bee", "boo", 0): 2.0,
3133+
("bee", "boo", 1): 2.0,
3134+
("bee", "boo", 2): 2.0,
3135+
},
3136+
"B": {
3137+
("bee", "bah", 0): 1.0,
3138+
("bee", "bah", 1): 1.0,
3139+
("bee", "boo", 0): 2.0,
3140+
("bee", "boo", 1): 2.0,
3141+
("bee", "boo", 2): 2.0,
3142+
},
3143+
}
3144+
)
3145+
tm.assert_frame_equal(results, expected)
3146+
3147+
def test_concat_named_keys(self):
3148+
# GH#14252
3149+
df = pd.DataFrame({"foo": [1, 2], "bar": [0.1, 0.2]})
3150+
index = Index(["a", "b"], name="baz")
3151+
concatted_named_from_keys = pd.concat([df, df], keys=index)
3152+
expected_named = pd.DataFrame(
3153+
{"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]},
3154+
index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=["baz", None]),
3155+
)
3156+
tm.assert_frame_equal(concatted_named_from_keys, expected_named)
3157+
3158+
index_no_name = Index(["a", "b"], name=None)
3159+
concatted_named_from_names = pd.concat(
3160+
[df, df], keys=index_no_name, names=["baz"]
3161+
)
3162+
tm.assert_frame_equal(concatted_named_from_names, expected_named)
3163+
3164+
concatted_unnamed = pd.concat([df, df], keys=index_no_name)
3165+
expected_unnamed = pd.DataFrame(
3166+
{"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]},
3167+
index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=[None, None]),
3168+
)
3169+
tm.assert_frame_equal(concatted_unnamed, expected_unnamed)
3170+
3171+
def test_concat_axis_parameter(self):
3172+
# GH#14369
3173+
df1 = pd.DataFrame({"A": [0.1, 0.2]}, index=range(2))
3174+
df2 = pd.DataFrame({"A": [0.3, 0.4]}, index=range(2))
3175+
3176+
# Index/row/0 DataFrame
3177+
expected_index = pd.DataFrame({"A": [0.1, 0.2, 0.3, 0.4]}, index=[0, 1, 0, 1])
3178+
3179+
concatted_index = pd.concat([df1, df2], axis="index")
3180+
tm.assert_frame_equal(concatted_index, expected_index)
3181+
3182+
concatted_row = pd.concat([df1, df2], axis="rows")
3183+
tm.assert_frame_equal(concatted_row, expected_index)
3184+
3185+
concatted_0 = pd.concat([df1, df2], axis=0)
3186+
tm.assert_frame_equal(concatted_0, expected_index)
3187+
3188+
# Columns/1 DataFrame
3189+
expected_columns = pd.DataFrame(
3190+
[[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=["A", "A"]
3191+
)
3192+
3193+
concatted_columns = pd.concat([df1, df2], axis="columns")
3194+
tm.assert_frame_equal(concatted_columns, expected_columns)
3195+
3196+
concatted_1 = pd.concat([df1, df2], axis=1)
3197+
tm.assert_frame_equal(concatted_1, expected_columns)
3198+
3199+
series1 = pd.Series([0.1, 0.2])
3200+
series2 = pd.Series([0.3, 0.4])
3201+
3202+
# Index/row/0 Series
3203+
expected_index_series = pd.Series([0.1, 0.2, 0.3, 0.4], index=[0, 1, 0, 1])
3204+
3205+
concatted_index_series = pd.concat([series1, series2], axis="index")
3206+
tm.assert_series_equal(concatted_index_series, expected_index_series)
3207+
3208+
concatted_row_series = pd.concat([series1, series2], axis="rows")
3209+
tm.assert_series_equal(concatted_row_series, expected_index_series)
3210+
3211+
concatted_0_series = pd.concat([series1, series2], axis=0)
3212+
tm.assert_series_equal(concatted_0_series, expected_index_series)
3213+
3214+
# Columns/1 Series
3215+
expected_columns_series = pd.DataFrame(
3216+
[[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=[0, 1]
3217+
)
3218+
3219+
concatted_columns_series = pd.concat([series1, series2], axis="columns")
3220+
tm.assert_frame_equal(concatted_columns_series, expected_columns_series)
3221+
3222+
concatted_1_series = pd.concat([series1, series2], axis=1)
3223+
tm.assert_frame_equal(concatted_1_series, expected_columns_series)
3224+
3225+
# Testing ValueError
3226+
with pytest.raises(ValueError, match="No axis named"):
3227+
pd.concat([series1, series2], axis="something")
3228+
3229+
def test_concat_numerical_names(self):
3230+
# GH#15262, GH#12223
3231+
df = pd.DataFrame(
3232+
{"col": range(9)},
3233+
dtype="int32",
3234+
index=(
3235+
pd.MultiIndex.from_product(
3236+
[["A0", "A1", "A2"], ["B0", "B1", "B2"]], names=[1, 2]
3237+
)
3238+
),
3239+
)
3240+
result = pd.concat((df.iloc[:2, :], df.iloc[-2:, :]))
3241+
expected = pd.DataFrame(
3242+
{"col": [0, 1, 7, 8]},
3243+
dtype="int32",
3244+
index=pd.MultiIndex.from_tuples(
3245+
[("A0", "B0"), ("A0", "B1"), ("A2", "B1"), ("A2", "B2")], names=[1, 2]
3246+
),
3247+
)
3248+
tm.assert_frame_equal(result, expected)
3249+
3250+
def test_concat_astype_dup_col(self):
3251+
# GH#23049
3252+
df = pd.DataFrame([{"a": "b"}])
3253+
df = pd.concat([df, df], axis=1)
3254+
3255+
result = df.astype("category")
3256+
expected = pd.DataFrame(
3257+
np.array(["b", "b"]).reshape(1, 2), columns=["a", "a"]
3258+
).astype("category")
3259+
tm.assert_frame_equal(result, expected)
3260+
3261+
def test_concat_datetime_datetime64_frame(self):
3262+
# GH#2624
3263+
rows = []
3264+
rows.append([datetime(2010, 1, 1), 1])
3265+
rows.append([datetime(2010, 1, 2), "hi"])
3266+
3267+
df2_obj = DataFrame.from_records(rows, columns=["date", "test"])
3268+
3269+
ind = date_range(start="2000/1/1", freq="D", periods=10)
3270+
df1 = DataFrame({"date": ind, "test": range(10)})
3271+
3272+
# it works!
3273+
pd.concat([df1, df2_obj])

‎pandas/tests/series/test_combine_concat.py

Lines changed: 0 additions & 123 deletions
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.