@@ -441,10 +441,17 @@ def __init__(self, f, delimiter=None, names=None, header=0,
441
441
self .data = f
442
442
self .columns = self ._infer_columns ()
443
443
444
+ # needs to be cleaned/refactored
445
+ # multiple date column thing turning into a real sphaghetti factory
446
+
444
447
# get popped off for index
445
448
self .orig_columns = list (self .columns )
446
449
447
- self .index_name = self ._get_index_name ()
450
+ self .index_name = None
451
+ self ._name_processed = False
452
+ if not self ._has_complex_date_col :
453
+ self .index_name = self ._get_index_name ()
454
+ self ._name_processed = True
448
455
self ._first_chunk = True
449
456
450
457
self .squeeze = squeeze
@@ -534,6 +541,8 @@ def _infer_columns(self):
534
541
else :
535
542
columns = names
536
543
544
+
545
+
537
546
return columns
538
547
539
548
def _next_line (self ):
@@ -656,10 +665,14 @@ def _get_index_name(self):
656
665
index_name = None
657
666
elif np .isscalar (self .index_col ):
658
667
if isinstance (self .index_col , basestring ):
668
+ index_names = self .index_col
659
669
for i , c in enumerate (list (columns )):
660
670
if c == self .index_col :
661
671
self .index_col = i
662
- index_name = columns .pop (self .index_col )
672
+ columns .pop (i )
673
+ break
674
+ else :
675
+ index_name = columns .pop (self .index_col )
663
676
664
677
if index_name is not None and 'Unnamed' in index_name :
665
678
index_name = None
@@ -670,10 +683,12 @@ def _get_index_name(self):
670
683
index_col = list (self .index_col )
671
684
for i , c in enumerate (index_col ):
672
685
if isinstance (c , basestring ):
673
- index_name = c
686
+ index_name . append ( c )
674
687
for j , name in enumerate (cp_cols ):
675
- if name == index_name :
688
+ if name == c :
676
689
index_col [i ] = j
690
+ columns .remove (name )
691
+ break
677
692
else :
678
693
name = cp_cols [c ]
679
694
columns .remove (name )
@@ -710,8 +725,8 @@ def get_chunk(self, rows=None):
710
725
711
726
zipped_content = list (lib .to_object_array (content ).T )
712
727
713
- if self .index_col is not None :
714
- index = self ._extract_index (zipped_content )
728
+ if not self . _has_complex_date_col and self .index_col is not None :
729
+ index = self ._get_index (zipped_content )
715
730
else :
716
731
index = Index (np .arange (len (content )))
717
732
@@ -746,24 +761,110 @@ def get_chunk(self, rows=None):
746
761
data = _convert_to_ndarrays (data , self .na_values , self .verbose )
747
762
748
763
df = DataFrame (data = data , columns = columns , index = index )
764
+ if self ._has_complex_date_col and self .index_col is not None :
765
+ if not self ._name_processed :
766
+ self .index_name = self ._get_index_name ()
767
+ self ._name_processed = True
768
+ data = dict (((k , v ) for k , v in df .iteritems ()))
769
+ columns = list (columns )
770
+ index = self ._get_index (data , col_order = columns , parse_dates = False )
771
+ data = dict (((k , v .values ) for k , v in data .iteritems ()))
772
+ df = DataFrame (data = data , columns = columns , index = index )
773
+
749
774
if self .squeeze and len (df .columns ) == 1 :
750
775
return df [df .columns [0 ]]
751
776
return df
752
777
753
- def _extract_index (self , zipped_content ):
778
+ @property
779
+ def _has_complex_date_col (self ):
780
+ return (isinstance (self .parse_dates , dict ) or
781
+ (isinstance (self .parse_dates , list ) and
782
+ len (self .parse_dates ) > 0 and
783
+ isinstance (self .parse_dates [0 ], list )))
784
+
785
+ def _get_index (self , data , col_order = None , parse_dates = True ):
786
+ if isinstance (data , dict ):
787
+ index = self ._get_index_from_dict (data , col_order , parse_dates )
788
+ return self ._agg_index (index , parse_dates )
789
+ else :
790
+ index = self ._get_index_from_list (data , col_order , parse_dates )
791
+ return self ._agg_index (index , parse_dates )
792
+
793
+ def _get_index_from_list (self , data , col_names = None , parse_dates = True ):
794
+ def _get_ix (icol ):
795
+ if not isinstance (icol , basestring ):
796
+ return icol
797
+
798
+ if col_names is None :
799
+ raise ValueError (('Must supply column order to use %s as '
800
+ 'index' ) % icol )
801
+
802
+ for i , c in enumerate (col_names ):
803
+ if c == icol :
804
+ return i
805
+
806
+ index = None
807
+ if np .isscalar (self .index_col ):
808
+ ix = _get_ix (self .index_col )
809
+ index = data .pop (ix )
810
+ if col_names is not None :
811
+ col_names .pop (ix )
812
+ else : # given a list of index
813
+ to_remove = []
814
+ index = []
815
+ for idx in self .index_col :
816
+ i = _get_ix (idx )
817
+ to_remove .append (i )
818
+ index .append (data [i ])
819
+
820
+ # remove index items from content and columns, don't pop in
821
+ # loop
822
+ for i in reversed (sorted (to_remove )):
823
+ data .pop (i )
824
+ if col_names is not None :
825
+ col_names .pop (i )
826
+
827
+ return index
828
+
829
+ def _get_index_from_dict (self , data , col_names = None , parse_dates = True ):
830
+ def _get_name (icol ):
831
+ if isinstance (icol , basestring ):
832
+ return icol
833
+
834
+ if col_names is None :
835
+ raise ValueError (('Must supply column order to use %s as '
836
+ 'index' ) % str (icol ))
837
+
838
+ for i , c in enumerate (col_names ):
839
+ if i == icol :
840
+ return c
841
+
842
+ index = None
754
843
if np .isscalar (self .index_col ):
755
- index = zipped_content .pop (self .index_col )
844
+ name = _get_name (self .index_col )
845
+ index = data .pop (name )
846
+ if col_names is not None :
847
+ col_names .remove (name )
756
848
else : # given a list of index
849
+ to_remove = []
757
850
index = []
758
851
for idx in self .index_col :
759
- index .append (zipped_content [idx ])
852
+ c = _get_name (idx )
853
+ to_remove .append (c )
854
+ index .append (data [c ])
855
+
760
856
# remove index items from content and columns, don't pop in
761
857
# loop
762
- for i in reversed (sorted (self .index_col )):
763
- zipped_content .pop (i )
858
+ for c in reversed (sorted (to_remove )):
859
+ data .pop (c )
860
+ if col_names is not None :
861
+ col_names .remove (c )
764
862
863
+ return index
864
+
865
+ def _agg_index (self , index , parse_dates ):
765
866
if np .isscalar (self .index_col ):
766
- if self ._should_parse_dates (self .index_col ):
867
+ if parse_dates and self ._should_parse_dates (self .index_col ):
767
868
index = self ._conv_date (index )
768
869
index , na_count = _convert_types (index , self .na_values )
769
870
index = Index (index , name = self .index_name )
@@ -772,7 +873,7 @@ def _extract_index(self, zipped_content):
772
873
else :
773
874
arrays = []
774
875
for i , arr in enumerate (index ):
775
- if self ._should_parse_dates (self .index_col [i ]):
876
+ if parse_dates and self ._should_parse_dates (self .index_col [i ]):
776
877
arr = self ._conv_date (arr )
777
878
arr , _ = _convert_types (arr , self .na_values )
778
879
arrays .append (arr )
@@ -801,11 +902,13 @@ def _should_parse_dates(self, i):
801
902
if isinstance (self .parse_dates , bool ):
802
903
return self .parse_dates
803
904
else :
804
- to_parse = self .parse_dates
905
+ to_parse = self .parse_dates # int/string or list of int or string
906
+
805
907
if np .isscalar (self .index_col ):
806
908
name = self .index_name
807
909
else :
808
910
name = self .index_name [i ]
911
+
809
912
return i in to_parse or name in to_parse
810
913
811
914
def _conv_date (self , * date_cols ):
0 commit comments