2
2
3
3
import json
4
4
import os
5
+ import pprint
5
6
from subprocess import CalledProcessError , check_output , PIPE , Popen , STDOUT
6
7
try :
7
8
from urllib .parse import urlsplit , urlunsplit
@@ -41,15 +42,37 @@ def get_lfs_endpoint_url(git_repo, checkout_dir):
41
42
url = url [:- 1 ]
42
43
if not url .endswith ('/info/lfs' ):
43
44
url += '/info/lfs' if url .endswith ('.git' ) else '.git/info/lfs'
44
- if not url .startswith ('https://' ):
45
- url = urlsplit (url )
46
- if url .scheme :
47
- url = urlunsplit (('https' , url .hostname , url .path , '' , '' ))
48
- else :
45
+ url_split = urlsplit (url )
46
+ host , path = url_split .hostname , url_split .path
47
+ if url_split .scheme != 'https' :
48
+ if not url_split .scheme :
49
49
# SSH format: [email protected] :repo.git
50
- host , path = url .path .split ('@' , 1 )[1 ].split (':' , 1 )
51
- url = 'https://' + host + '/' + path
52
- return url
50
+ host , path = url_split .path .split ('@' , 1 )[1 ].split (':' , 1 )
51
+ url = urlunsplit (('https' , host , path , '' , '' ))
52
+ del url_split
53
+
54
+ # need to get GHE auth token if available. issue cmd like this to get:
55
+ # ssh [email protected] git-lfs-authenticate foo/bar.git download
56
+ if path .endswith ('/info/lfs' ):
57
+ path = path [:- len ('/info/lfs' )]
58
+ auth_header = get_lfs_api_token (host , path )
59
+ return url , auth_header
60
+
61
+
62
+ def get_lfs_api_token (host , path ):
63
+ """ gets an authorization token to use to do further introspection on the
64
+ LFS info in the repository. See documentation here for description of
65
+ the ssh command and response:
66
+ https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
67
+ """
68
+ header_info = {}
69
+ query_cmd = 'ssh git@' + host + ' git-lfs-authenticate ' + path + ' download'
70
+ output = check_output (query_cmd .split ()).strip ().decode ('utf8' )
71
+ if output :
72
+ query_resp = json .loads (output )
73
+ header_info = query_resp ['header' ]
74
+
75
+ return header_info
53
76
54
77
55
78
def find_lfs_files (checkout_dir ):
@@ -95,11 +118,13 @@ def read_lfs_metadata(checkout_dir):
95
118
yield (path , oid , size )
96
119
97
120
98
- def fetch_urls (lfs_url , oid_list ):
121
+ def fetch_urls (lfs_url , lfs_auth_info , oid_list ):
99
122
"""Fetch the URLs of the files from the Git LFS endpoint
100
123
"""
101
124
data = json .dumps ({'operation' : 'download' , 'objects' : oid_list })
102
- req = Request (lfs_url + '/objects/batch' , data .encode ('ascii' ), POST_HEADERS )
125
+ headers = dict (POST_HEADERS )
126
+ headers .update (lfs_auth_info )
127
+ req = Request (lfs_url + '/objects/batch' , data .encode ('ascii' ), headers )
103
128
resp = json .loads (urlopen (req ).read ().decode ('ascii' ))
104
129
assert 'objects' in resp , resp
105
130
return resp ['objects' ]
@@ -158,10 +183,14 @@ def fetch(git_repo, checkout_dir=None, verbose=0):
158
183
return
159
184
160
185
# Fetch the URLs of the files from the Git LFS endpoint
161
- lfs_url = get_lfs_endpoint_url (git_repo , checkout_dir )
186
+ lfs_url , lfs_auth_info = get_lfs_endpoint_url (git_repo , checkout_dir )
187
+
188
+ if verbose > 0 :
189
+ print ('Fetching URLs from %s ...' % lfs_url )
162
190
if verbose > 1 :
163
- print ('Fetching URLs from %s...' % lfs_url )
164
- objects = fetch_urls (lfs_url , oid_list )
191
+ print ('Authorization info for URL: %s' % lfs_auth_info )
192
+ print ('oid_list: %s' % pprint .pformat (oid_list ))
193
+ objects = fetch_urls (lfs_url , lfs_auth_info , oid_list )
165
194
166
195
# Download the files
167
196
tmp_dir = git_dir + '/lfs/tmp'
@@ -175,9 +204,10 @@ def fetch(git_repo, checkout_dir=None, verbose=0):
175
204
# Download into tmp_dir
176
205
with TempFile (dir = tmp_dir ) as f :
177
206
url = obj ['actions' ]['download' ]['href' ]
207
+ head = obj ['actions' ]['download' ]['header' ]
178
208
print ('Downloading %s (%s bytes) from %s...' %
179
- (path , size , url [:40 ]))
180
- h = urlopen (Request (url ))
209
+ (path , size , url if verbose > 0 else url [:40 ]))
210
+ h = urlopen (Request (url , headers = head ))
181
211
while True :
182
212
buf = h .read (10240 )
183
213
if not buf :
@@ -188,6 +218,9 @@ def fetch(git_repo, checkout_dir=None, verbose=0):
188
218
dst1 = cache_dir + '/' + oid
189
219
if not os .path .exists (cache_dir ):
190
220
os .makedirs (cache_dir )
221
+ if verbose > 1 :
222
+ print ('temp download file: ' + f .name )
223
+ print ('cache file name: ' + dst1 )
191
224
os .rename (f .name , dst1 )
192
225
193
226
# Copy into checkout_dir
0 commit comments