@@ -2539,3 +2539,176 @@ def test_table_from_pandas_columns_and_schema_are_mutually_exclusive():
2539
2539
2540
2540
with pytest .raises (ValueError ):
2541
2541
pa .Table .from_pandas (df , schema = schema , columns = columns )
2542
+
2543
+
2544
+ # ----------------------------------------------------------------------
2545
+ # Legacy metadata compatibility tests
2546
+
2547
+
2548
+ def test_range_index_pre_0_12 ():
2549
+ # Forward compatibility for metadata created from pandas.RangeIndex
2550
+ # prior to pyarrow 0.13.0
2551
+ a_values = [u'foo' , u'bar' , None , u'baz' ]
2552
+ b_values = [u'a' , u'a' , u'b' , u'b' ]
2553
+ a_arrow = pa .array (a_values , type = 'utf8' )
2554
+ b_arrow = pa .array (b_values , type = 'utf8' )
2555
+
2556
+ rng_index_arrow = pa .array ([0 , 2 , 4 , 6 ], type = 'int64' )
2557
+
2558
+ gen_name_0 = '__index_level_0__'
2559
+ gen_name_1 = '__index_level_1__'
2560
+
2561
+ # Case 1: named RangeIndex
2562
+ e1 = pd .DataFrame ({
2563
+ 'a' : a_values
2564
+ }, index = pd .RangeIndex (0 , 8 , step = 2 , name = 'qux' ))
2565
+ t1 = pa .Table .from_arrays ([a_arrow , rng_index_arrow ],
2566
+ names = ['a' , 'qux' ])
2567
+ t1 = t1 .replace_schema_metadata ({
2568
+ b'pandas' : json .dumps (
2569
+ {'index_columns' : ['qux' ],
2570
+ 'column_indexes' : [{'name' : None ,
2571
+ 'field_name' : None ,
2572
+ 'pandas_type' : 'unicode' ,
2573
+ 'numpy_type' : 'object' ,
2574
+ 'metadata' : {'encoding' : 'UTF-8' }}],
2575
+ 'columns' : [{'name' : 'a' ,
2576
+ 'field_name' : 'a' ,
2577
+ 'pandas_type' : 'unicode' ,
2578
+ 'numpy_type' : 'object' ,
2579
+ 'metadata' : None },
2580
+ {'name' : 'qux' ,
2581
+ 'field_name' : 'qux' ,
2582
+ 'pandas_type' : 'int64' ,
2583
+ 'numpy_type' : 'int64' ,
2584
+ 'metadata' : None }],
2585
+ 'pandas_version' : '0.23.4' }
2586
+ )})
2587
+ r1 = t1 .to_pandas ()
2588
+ tm .assert_frame_equal (r1 , e1 )
2589
+
2590
+ # Case 2: named RangeIndex, but conflicts with an actual column
2591
+ e2 = pd .DataFrame ({
2592
+ 'qux' : a_values
2593
+ }, index = pd .RangeIndex (0 , 8 , step = 2 , name = 'qux' ))
2594
+ t2 = pa .Table .from_arrays ([a_arrow , rng_index_arrow ],
2595
+ names = ['qux' , gen_name_0 ])
2596
+ t2 = t2 .replace_schema_metadata ({
2597
+ b'pandas' : json .dumps (
2598
+ {'index_columns' : [gen_name_0 ],
2599
+ 'column_indexes' : [{'name' : None ,
2600
+ 'field_name' : None ,
2601
+ 'pandas_type' : 'unicode' ,
2602
+ 'numpy_type' : 'object' ,
2603
+ 'metadata' : {'encoding' : 'UTF-8' }}],
2604
+ 'columns' : [{'name' : 'a' ,
2605
+ 'field_name' : 'a' ,
2606
+ 'pandas_type' : 'unicode' ,
2607
+ 'numpy_type' : 'object' ,
2608
+ 'metadata' : None },
2609
+ {'name' : 'qux' ,
2610
+ 'field_name' : gen_name_0 ,
2611
+ 'pandas_type' : 'int64' ,
2612
+ 'numpy_type' : 'int64' ,
2613
+ 'metadata' : None }],
2614
+ 'pandas_version' : '0.23.4' }
2615
+ )})
2616
+ r2 = t2 .to_pandas ()
2617
+ tm .assert_frame_equal (r2 , e2 )
2618
+
2619
+ # Case 3: unnamed RangeIndex
2620
+ e3 = pd .DataFrame ({
2621
+ 'a' : a_values
2622
+ }, index = pd .RangeIndex (0 , 8 , step = 2 , name = None ))
2623
+ t3 = pa .Table .from_arrays ([a_arrow , rng_index_arrow ],
2624
+ names = ['a' , gen_name_0 ])
2625
+ t3 = t3 .replace_schema_metadata ({
2626
+ b'pandas' : json .dumps (
2627
+ {'index_columns' : [gen_name_0 ],
2628
+ 'column_indexes' : [{'name' : None ,
2629
+ 'field_name' : None ,
2630
+ 'pandas_type' : 'unicode' ,
2631
+ 'numpy_type' : 'object' ,
2632
+ 'metadata' : {'encoding' : 'UTF-8' }}],
2633
+ 'columns' : [{'name' : 'a' ,
2634
+ 'field_name' : 'a' ,
2635
+ 'pandas_type' : 'unicode' ,
2636
+ 'numpy_type' : 'object' ,
2637
+ 'metadata' : None },
2638
+ {'name' : None ,
2639
+ 'field_name' : gen_name_0 ,
2640
+ 'pandas_type' : 'int64' ,
2641
+ 'numpy_type' : 'int64' ,
2642
+ 'metadata' : None }],
2643
+ 'pandas_version' : '0.23.4' }
2644
+ )})
2645
+ r3 = t3 .to_pandas ()
2646
+ tm .assert_frame_equal (r3 , e3 )
2647
+
2648
+ # Case 4: MultiIndex with named RangeIndex
2649
+ e4 = pd .DataFrame ({
2650
+ 'a' : a_values
2651
+ }, index = [pd .RangeIndex (0 , 8 , step = 2 , name = 'qux' ), b_values ])
2652
+ t4 = pa .Table .from_arrays ([a_arrow , rng_index_arrow , b_arrow ],
2653
+ names = ['a' , 'qux' , gen_name_1 ])
2654
+ t4 = t4 .replace_schema_metadata ({
2655
+ b'pandas' : json .dumps (
2656
+ {'index_columns' : ['qux' , gen_name_1 ],
2657
+ 'column_indexes' : [{'name' : None ,
2658
+ 'field_name' : None ,
2659
+ 'pandas_type' : 'unicode' ,
2660
+ 'numpy_type' : 'object' ,
2661
+ 'metadata' : {'encoding' : 'UTF-8' }}],
2662
+ 'columns' : [{'name' : 'a' ,
2663
+ 'field_name' : 'a' ,
2664
+ 'pandas_type' : 'unicode' ,
2665
+ 'numpy_type' : 'object' ,
2666
+ 'metadata' : None },
2667
+ {'name' : 'qux' ,
2668
+ 'field_name' : 'qux' ,
2669
+ 'pandas_type' : 'int64' ,
2670
+ 'numpy_type' : 'int64' ,
2671
+ 'metadata' : None },
2672
+ {'name' : None ,
2673
+ 'field_name' : gen_name_1 ,
2674
+ 'pandas_type' : 'unicode' ,
2675
+ 'numpy_type' : 'object' ,
2676
+ 'metadata' : None }],
2677
+ 'pandas_version' : '0.23.4' }
2678
+ )})
2679
+ r4 = t4 .to_pandas ()
2680
+ tm .assert_frame_equal (r4 , e4 )
2681
+
2682
+ # Case 4: MultiIndex with unnamed RangeIndex
2683
+ e5 = pd .DataFrame ({
2684
+ 'a' : a_values
2685
+ }, index = [pd .RangeIndex (0 , 8 , step = 2 , name = None ), b_values ])
2686
+ t5 = pa .Table .from_arrays ([a_arrow , rng_index_arrow , b_arrow ],
2687
+ names = ['a' , gen_name_0 , gen_name_1 ])
2688
+ t5 = t5 .replace_schema_metadata ({
2689
+ b'pandas' : json .dumps (
2690
+ {'index_columns' : [gen_name_0 , gen_name_1 ],
2691
+ 'column_indexes' : [{'name' : None ,
2692
+ 'field_name' : None ,
2693
+ 'pandas_type' : 'unicode' ,
2694
+ 'numpy_type' : 'object' ,
2695
+ 'metadata' : {'encoding' : 'UTF-8' }}],
2696
+ 'columns' : [{'name' : 'a' ,
2697
+ 'field_name' : 'a' ,
2698
+ 'pandas_type' : 'unicode' ,
2699
+ 'numpy_type' : 'object' ,
2700
+ 'metadata' : None },
2701
+ {'name' : None ,
2702
+ 'field_name' : gen_name_0 ,
2703
+ 'pandas_type' : 'int64' ,
2704
+ 'numpy_type' : 'int64' ,
2705
+ 'metadata' : None },
2706
+ {'name' : None ,
2707
+ 'field_name' : gen_name_1 ,
2708
+ 'pandas_type' : 'unicode' ,
2709
+ 'numpy_type' : 'object' ,
2710
+ 'metadata' : None }],
2711
+ 'pandas_version' : '0.23.4' }
2712
+ )})
2713
+ r5 = t5 .to_pandas ()
2714
+ tm .assert_frame_equal (r5 , e5 )
0 commit comments