@@ -656,52 +656,58 @@ cdef class TextReader:
656
656
657
657
# Header is in the file
658
658
for level, hr in enumerate (prelim_header):
659
+ print (f" Processing level {level}, header row {hr}" )
659
660
660
661
this_header = []
661
662
662
663
if self .parser.lines < hr + 1 :
664
+ print (" Tokenizing rows..." )
663
665
self ._tokenize_rows(hr + 2 )
664
666
665
667
if self .parser.lines == 0 :
668
+ print (" No lines in parser." )
666
669
field_count = 0
667
670
start = self .parser.line_start[0 ]
668
671
669
672
# e.g., if header=3 and file only has 2 lines
670
- elif (self .parser.lines < hr + 1
671
- and not isinstance (self .orig_header, list )) or (
672
- self .parser.lines < hr):
673
+ elif (self .parser.lines < hr + 1 and not isinstance (self .orig_header, list )) or (self .parser.lines < hr):
673
674
msg = self .orig_header
674
675
if isinstance (msg, list ):
675
676
joined = " ," .join(str (m) for m in msg)
676
677
msg = f" [{joined}], len of {len(msg)},"
677
- raise ParserError(
678
- f" Passed header={msg} but only "
679
- f" {self.parser.lines} lines in file" )
678
+ raise ParserError(f" Passed header={msg} but only {self.parser.lines} lines in file" )
680
679
681
680
else :
682
681
field_count = self .parser.line_fields[hr]
683
682
start = self .parser.line_start[hr]
683
+ print (f" Field count: {field_count}, Start: {start}" )
684
684
685
685
unnamed_count = 0
686
686
unnamed_col_indices = []
687
687
688
688
for i in range (field_count):
689
689
word = self .parser.words[start + i]
690
690
691
- name = PyUnicode_DecodeUTF8(word, strlen(word),
692
- self .encoding_errors )
691
+ name = PyUnicode_DecodeUTF8(word, strlen(word), self .encoding_errors)
692
+ print (f " Word {i}: '{word}', Decoded name: '{name}' " )
693
693
694
694
if name == " " :
695
+ print (" Empty name found." )
695
696
if self .has_mi_columns:
696
697
name = f" Unnamed: {i}_level_{level}"
698
+ print (f" Setting multi-index column name: {name}" )
697
699
else :
698
700
name = f" Unnamed: {i}"
701
+ print (f" Setting unnamed column name: {name}" )
699
702
700
703
unnamed_count += 1
701
704
unnamed_col_indices.append(i)
702
705
703
706
this_header.append(name)
704
707
708
+ print (f" This header: {this_header}" )
709
+
710
+
705
711
if not self .has_mi_columns:
706
712
# Ensure that regular columns are used before unnamed ones
707
713
# to keep given names and mangle unnamed columns
@@ -744,9 +750,10 @@ cdef class TextReader:
744
750
lc = len (this_header)
745
751
ic = (len (self .index_col) if self .index_col
746
752
is not None else 0 )
753
+ print (f" LC {lc}, IC {ic} Unnamed Count {unnamed_count}" )
747
754
748
755
# if wrong number of blanks or no index, not our format
749
- if (lc != unnamed_count and lc - ic > unnamed_count) or ic == 0 :
756
+ if (lc != unnamed_count and lc - ic >= unnamed_count) or ic == 0 :
750
757
hr -= 1
751
758
self .parser_start -= 1
752
759
this_header = [None ] * lc
@@ -2156,4 +2163,4 @@ def sanitize_objects(ndarray[object] values, set na_values) -> int:
2156
2163
else :
2157
2164
memo[val] = val
2158
2165
2159
- return na_count
2166
+ return na_count
0 commit comments