@@ -49,6 +49,11 @@ def add_file_links(
49
49
for link in t_spark_arrow_result_links :
50
50
if link .rowCount <= 0 :
51
51
continue
52
+ logger .debug (
53
+ "ResultFileDownloadManager.add_file_links: start offset {}, row count: {}" .format (
54
+ link .startRowOffset , link .rowCount
55
+ )
56
+ )
52
57
self .download_handlers .append (
53
58
ResultSetDownloadHandler (self .downloadable_result_settings , link )
54
59
)
@@ -88,6 +93,12 @@ def get_next_downloaded_file(
88
93
89
94
# Check (and wait) for download status
90
95
if self ._check_if_download_successful (handler ):
96
+ link = handler .result_link
97
+ logger .debug (
98
+ "ResultFileDownloadManager: file found for row index {}: start {}, row count: {}" .format (
99
+ next_row_offset , link .startRowOffset , link .rowCount
100
+ )
101
+ )
91
102
# Buffer should be empty so set buffer to new ArrowQueue with result_file
92
103
result = DownloadedFile (
93
104
handler .result_file ,
@@ -97,40 +108,78 @@ def get_next_downloaded_file(
97
108
self .download_handlers .pop (idx )
98
109
# Return True upon successful download to continue loop and not force a retry
99
110
return result
111
+ else :
112
+ logger .debug (
113
+ "ResultFileDownloadManager: cannot find file for row index {}" .format (
114
+ next_row_offset
115
+ )
116
+ )
117
+
100
118
# Download was not successful for next download item, force a retry
101
119
self ._shutdown_manager ()
102
120
return None
103
121
104
122
def _remove_past_handlers (self , next_row_offset : int ):
123
+ logger .debug (
124
+ "ResultFileDownloadManager: removing past handlers, current offset: {}" .format (
125
+ next_row_offset
126
+ )
127
+ )
105
128
# Any link in which its start to end range doesn't include the next row to be fetched does not need downloading
106
129
i = 0
107
130
while i < len (self .download_handlers ):
108
131
result_link = self .download_handlers [i ].result_link
132
+ logger .debug (
133
+ "- checking result link: start {}, row count: {}, current offset: {}" .format (
134
+ result_link .startRowOffset , result_link .rowCount , next_row_offset
135
+ )
136
+ )
109
137
if result_link .startRowOffset + result_link .rowCount > next_row_offset :
110
138
i += 1
111
139
continue
112
140
self .download_handlers .pop (i )
113
141
114
142
def _schedule_downloads (self ):
115
143
# Schedule downloads for all download handlers if not already scheduled.
144
+ logger .debug ("ResultFileDownloadManager: schedule downloads" )
116
145
for handler in self .download_handlers :
117
146
if handler .is_download_scheduled :
118
147
continue
119
148
try :
149
+ logger .debug (
150
+ "- start: {}, row count: {}" .format (
151
+ handler .result_link .startRowOffset , handler .result_link .rowCount
152
+ )
153
+ )
120
154
self .thread_pool .submit (handler .run )
121
155
except Exception as e :
122
156
logger .error (e )
123
157
break
124
158
handler .is_download_scheduled = True
125
159
126
160
def _find_next_file_index (self , next_row_offset : int ):
161
+ logger .debug (
162
+ "ResultFileDownloadManager: trying to find file for row {}" .format (
163
+ next_row_offset
164
+ )
165
+ )
127
166
# Get the handler index of the next file in order
128
167
next_indices = [
129
168
i
130
169
for i , handler in enumerate (self .download_handlers )
131
170
if handler .is_download_scheduled
171
+ # TODO: shouldn't `next_row_offset` be tested against the range, not just start row offset?
132
172
and handler .result_link .startRowOffset == next_row_offset
133
173
]
174
+
175
+ for i in next_indices :
176
+ link = self .download_handlers [i ].result_link
177
+ logger .debug (
178
+ "- found file: start {}, row count {}" .format (
179
+ link .startRowOffset , link .rowCount
180
+ )
181
+ )
182
+
134
183
return next_indices [0 ] if len (next_indices ) > 0 else None
135
184
136
185
def _check_if_download_successful (self , handler : ResultSetDownloadHandler ):
0 commit comments