21
21
import pytest
22
22
import pytz
23
23
24
+ from pandas ._config import using_pyarrow_string_dtype
25
+
24
26
from pandas ._libs import lib
25
27
from pandas .errors import IntCastingNaNError
26
28
import pandas .util ._test_decorators as td
@@ -79,7 +81,7 @@ def test_constructor_from_ndarray_with_str_dtype(self):
79
81
# with an array of strings each of which is e.g. "[0 1 2]"
80
82
arr = np .arange (12 ).reshape (4 , 3 )
81
83
df = DataFrame (arr , dtype = str )
82
- expected = DataFrame (arr .astype (str ))
84
+ expected = DataFrame (arr .astype (str ), dtype = object )
83
85
tm .assert_frame_equal (df , expected )
84
86
85
87
def test_constructor_from_2d_datetimearray (self , using_array_manager ):
@@ -261,8 +263,9 @@ def test_emptylike_constructor(self, emptylike, expected_index, expected_columns
261
263
result = DataFrame (emptylike )
262
264
tm .assert_frame_equal (result , expected )
263
265
264
- def test_constructor_mixed (self , float_string_frame ):
265
- assert float_string_frame ["foo" ].dtype == np .object_
266
+ def test_constructor_mixed (self , float_string_frame , using_infer_string ):
267
+ dtype = "string" if using_infer_string else np .object_
268
+ assert float_string_frame ["foo" ].dtype == dtype
266
269
267
270
def test_constructor_cast_failure (self ):
268
271
# as of 2.0, we raise if we can't respect "dtype", previously we
@@ -318,13 +321,15 @@ def test_constructor_dtype_nocast_view_2d_array(
318
321
assert df2 ._mgr .arrays [0 ].flags .c_contiguous
319
322
320
323
@td .skip_array_manager_invalid_test
324
+ @pytest .mark .xfail (using_pyarrow_string_dtype (), reason = "conversion copies" )
321
325
def test_1d_object_array_does_not_copy (self ):
322
326
# https://github.com/pandas-dev/pandas/issues/39272
323
327
arr = np .array (["a" , "b" ], dtype = "object" )
324
328
df = DataFrame (arr , copy = False )
325
329
assert np .shares_memory (df .values , arr )
326
330
327
331
@td .skip_array_manager_invalid_test
332
+ @pytest .mark .xfail (using_pyarrow_string_dtype (), reason = "conversion copies" )
328
333
def test_2d_object_array_does_not_copy (self ):
329
334
# https://github.com/pandas-dev/pandas/issues/39272
330
335
arr = np .array ([["a" , "b" ], ["c" , "d" ]], dtype = "object" )
@@ -764,7 +769,7 @@ def test_constructor_dict_block(self):
764
769
)
765
770
tm .assert_numpy_array_equal (df .values , expected )
766
771
767
- def test_constructor_dict_cast (self ):
772
+ def test_constructor_dict_cast (self , using_infer_string ):
768
773
# cast float tests
769
774
test_data = {"A" : {"1" : 1 , "2" : 2 }, "B" : {"1" : "1" , "2" : "2" , "3" : "3" }}
770
775
frame = DataFrame (test_data , dtype = float )
@@ -774,7 +779,7 @@ def test_constructor_dict_cast(self):
774
779
775
780
frame = DataFrame (test_data )
776
781
assert len (frame ) == 3
777
- assert frame ["B" ].dtype == np .object_
782
+ assert frame ["B" ].dtype == np .object_ if not using_infer_string else "string"
778
783
assert frame ["A" ].dtype == np .float64
779
784
780
785
def test_constructor_dict_cast2 (self ):
@@ -1186,15 +1191,15 @@ def test_constructor_dtype_nullable_extension_arrays(
1186
1191
df = DataFrame ({"a" : data }, dtype = input_dtype )
1187
1192
assert df ["a" ].dtype == expected_dtype ()
1188
1193
1189
- def test_constructor_scalar_inference (self ):
1194
+ def test_constructor_scalar_inference (self , using_infer_string ):
1190
1195
data = {"int" : 1 , "bool" : True , "float" : 3.0 , "complex" : 4j , "object" : "foo" }
1191
1196
df = DataFrame (data , index = np .arange (10 ))
1192
1197
1193
1198
assert df ["int" ].dtype == np .int64
1194
1199
assert df ["bool" ].dtype == np .bool_
1195
1200
assert df ["float" ].dtype == np .float64
1196
1201
assert df ["complex" ].dtype == np .complex128
1197
- assert df ["object" ].dtype == np .object_
1202
+ assert df ["object" ].dtype == np .object_ if not using_infer_string else "string"
1198
1203
1199
1204
def test_constructor_arrays_and_scalars (self ):
1200
1205
df = DataFrame ({"a" : np .random .default_rng (2 ).standard_normal (10 ), "b" : True })
@@ -1273,11 +1278,11 @@ def empty_gen():
1273
1278
df = DataFrame (empty_gen (), columns = ["A" , "B" ])
1274
1279
tm .assert_frame_equal (df , expected )
1275
1280
1276
- def test_constructor_list_of_lists (self ):
1281
+ def test_constructor_list_of_lists (self , using_infer_string ):
1277
1282
# GH #484
1278
1283
df = DataFrame (data = [[1 , "a" ], [2 , "b" ]], columns = ["num" , "str" ])
1279
1284
assert is_integer_dtype (df ["num" ])
1280
- assert df ["str" ].dtype == np .object_
1285
+ assert df ["str" ].dtype == np .object_ if not using_infer_string else "string"
1281
1286
1282
1287
# GH 4851
1283
1288
# list of 0-dim ndarrays
@@ -1822,7 +1827,7 @@ def test_constructor_single_value(self):
1822
1827
with pytest .raises (TypeError , match = msg ):
1823
1828
DataFrame ("a" , [1 , 2 ], ["a" , "c" ], float )
1824
1829
1825
- def test_constructor_with_datetimes (self ):
1830
+ def test_constructor_with_datetimes (self , using_infer_string ):
1826
1831
intname = np .dtype (np .int_ ).name
1827
1832
floatname = np .dtype (np .float64 ).name
1828
1833
objectname = np .dtype (np .object_ ).name
@@ -1841,7 +1846,7 @@ def test_constructor_with_datetimes(self):
1841
1846
result = df .dtypes
1842
1847
expected = Series (
1843
1848
[np .dtype ("int64" )]
1844
- + [np .dtype (objectname )] * 2
1849
+ + [np .dtype (objectname ) if not using_infer_string else "string" ] * 2
1845
1850
+ [np .dtype ("M8[s]" ), np .dtype ("M8[us]" )],
1846
1851
index = list ("ABCDE" ),
1847
1852
)
@@ -1863,7 +1868,7 @@ def test_constructor_with_datetimes(self):
1863
1868
expected = Series (
1864
1869
[np .dtype ("float64" )]
1865
1870
+ [np .dtype ("int64" )]
1866
- + [np .dtype ("object" )]
1871
+ + [np .dtype ("object" ) if not using_infer_string else "string" ]
1867
1872
+ [np .dtype ("float64" )]
1868
1873
+ [np .dtype (intname )],
1869
1874
index = ["a" , "b" , "c" , floatname , intname ],
@@ -1885,7 +1890,7 @@ def test_constructor_with_datetimes(self):
1885
1890
expected = Series (
1886
1891
[np .dtype ("float64" )]
1887
1892
+ [np .dtype ("int64" )]
1888
- + [np .dtype ("object" )]
1893
+ + [np .dtype ("object" ) if not using_infer_string else "string" ]
1889
1894
+ [np .dtype ("float64" )]
1890
1895
+ [np .dtype (intname )],
1891
1896
index = ["a" , "b" , "c" , floatname , intname ],
@@ -1922,13 +1927,13 @@ def test_constructor_with_datetimes3(self):
1922
1927
df = DataFrame ({"End Date" : dt }, index = [0 ])
1923
1928
assert df .iat [0 , 0 ] == dt
1924
1929
tm .assert_series_equal (
1925
- df .dtypes , Series ({"End Date" : "datetime64[us, US/Eastern]" })
1930
+ df .dtypes , Series ({"End Date" : "datetime64[us, US/Eastern]" }, dtype = object )
1926
1931
)
1927
1932
1928
1933
df = DataFrame ([{"End Date" : dt }])
1929
1934
assert df .iat [0 , 0 ] == dt
1930
1935
tm .assert_series_equal (
1931
- df .dtypes , Series ({"End Date" : "datetime64[ns, US/Eastern]" })
1936
+ df .dtypes , Series ({"End Date" : "datetime64[ns, US/Eastern]" }, dtype = object )
1932
1937
)
1933
1938
1934
1939
def test_constructor_with_datetimes4 (self ):
@@ -2053,7 +2058,7 @@ def test_constructor_timedelta_non_ns(self, order, unit):
2053
2058
# dtype=exp_dtype.
2054
2059
tm .assert_frame_equal (df , expected )
2055
2060
2056
- def test_constructor_for_list_with_dtypes (self ):
2061
+ def test_constructor_for_list_with_dtypes (self , using_infer_string ):
2057
2062
# test list of lists/ndarrays
2058
2063
df = DataFrame ([np .arange (5 ) for x in range (5 )])
2059
2064
result = df .dtypes
@@ -2104,7 +2109,7 @@ def test_constructor_for_list_with_dtypes(self):
2104
2109
[
2105
2110
np .dtype ("int64" ),
2106
2111
np .dtype ("float64" ),
2107
- np .dtype ("object" ),
2112
+ np .dtype ("object" ) if not using_infer_string else "string" ,
2108
2113
np .dtype ("datetime64[ns]" ),
2109
2114
np .dtype ("float64" ),
2110
2115
],
0 commit comments