8
8
import numpy as np
9
9
import pytest
10
10
11
- from pandas ._config import using_string_dtype
12
-
13
11
from pandas .errors import ParserWarning
14
12
15
13
import pandas as pd
24
22
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
25
23
)
26
24
25
+ xfail_pyarrow = pytest .mark .usefixtures ("pyarrow_xfail" )
26
+
27
27
28
28
@pytest .mark .parametrize ("dtype" , [str , object ])
29
29
@pytest .mark .parametrize ("check_orig" , [True , False ])
@@ -54,7 +54,6 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig, using_infer_string):
54
54
tm .assert_frame_equal (result , expected )
55
55
56
56
57
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
58
57
@pytest .mark .usefixtures ("pyarrow_xfail" )
59
58
def test_dtype_per_column (all_parsers ):
60
59
parser = all_parsers
@@ -68,7 +67,6 @@ def test_dtype_per_column(all_parsers):
68
67
[[1 , "2.5" ], [2 , "3.5" ], [3 , "4.5" ], [4 , "5.5" ]], columns = ["one" , "two" ]
69
68
)
70
69
expected ["one" ] = expected ["one" ].astype (np .float64 )
71
- expected ["two" ] = expected ["two" ].astype (object )
72
70
73
71
result = parser .read_csv (StringIO (data ), dtype = {"one" : np .float64 , 1 : str })
74
72
tm .assert_frame_equal (result , expected )
@@ -598,6 +596,7 @@ def test_string_inference_object_dtype(all_parsers, dtype, using_infer_string):
598
596
tm .assert_frame_equal (result , expected )
599
597
600
598
599
+ @xfail_pyarrow
601
600
def test_accurate_parsing_of_large_integers (all_parsers ):
602
601
# GH#52505
603
602
data = """SYMBOL,MOMENT,ID,ID_DEAL
@@ -608,7 +607,7 @@ def test_accurate_parsing_of_large_integers(all_parsers):
608
607
AMZN,20230301181139587,2023552585717889759,2023552585717263360
609
608
MSFT,20230301181139587,2023552585717889863,2023552585717263361
610
609
NVDA,20230301181139587,2023552585717889827,2023552585717263361"""
611
- orders = pd .read_csv (StringIO (data ), dtype = {"ID_DEAL" : pd .Int64Dtype ()})
610
+ orders = all_parsers .read_csv (StringIO (data ), dtype = {"ID_DEAL" : pd .Int64Dtype ()})
612
611
assert len (orders .loc [orders ["ID_DEAL" ] == 2023552585717263358 , "ID_DEAL" ]) == 1
613
612
assert len (orders .loc [orders ["ID_DEAL" ] == 2023552585717263359 , "ID_DEAL" ]) == 1
614
613
assert len (orders .loc [orders ["ID_DEAL" ] == 2023552585717263360 , "ID_DEAL" ]) == 2
@@ -630,3 +629,16 @@ def test_dtypes_with_usecols(all_parsers):
630
629
values = ["1" , "4" ]
631
630
expected = DataFrame ({"a" : pd .Series (values , dtype = object ), "c" : [3 , 6 ]})
632
631
tm .assert_frame_equal (result , expected )
632
+
633
+
634
+ def test_index_col_with_dtype_no_rangeindex (all_parsers ):
635
+ data = StringIO ("345.5,519.5,0\n 519.5,726.5,1" )
636
+ result = all_parsers .read_csv (
637
+ data ,
638
+ header = None ,
639
+ names = ["start" , "stop" , "bin_id" ],
640
+ dtype = {"start" : np .float32 , "stop" : np .float32 , "bin_id" : np .uint32 },
641
+ index_col = "bin_id" ,
642
+ ).index
643
+ expected = pd .Index ([0 , 1 ], dtype = np .uint32 , name = "bin_id" )
644
+ tm .assert_index_equal (result , expected )
0 commit comments