Skip to content

Commit e505101

Browse files
author
y-p
committed
BLD: Add 2to3 caching, move the build cache logic into use_build_cache.py
1 parent c45e769 commit e505101

File tree

4 files changed

+304
-168
lines changed

4 files changed

+304
-168
lines changed

scripts/use_build_cache.py

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
import os
4+
5+
"""
6+
This script should be run from the repo root dir, it rewrites setup.py
7+
to use the build cache directory specified in the envar BUILD_CACHE_DIR
8+
or in a file named .build_cache_dir in the repo root directory.
9+
10+
Artifacts included in the cache:
11+
- gcc artifacts
12+
- The .c files resulting from cythonizing pyx/d files
13+
- 2to3 refactoring results (when run under python3)
14+
15+
Tested on all released back to 0.7.0.
16+
17+
"""
18+
shim="""
19+
import os
20+
import sys
21+
import shutil
22+
import warnings
23+
24+
try:
25+
if not ("develop" in sys.argv) and not ("install" in sys.argv):
26+
1/0
27+
basedir = os.path.dirname(__file__)
28+
dotfile = os.path.join(basedir,".build_cache_dir")
29+
BUILD_CACHE_DIR = ""
30+
if os.path.exists(dotfile):
31+
BUILD_CACHE_DIR = open(dotfile).readline().strip()
32+
BUILD_CACHE_DIR = os.environ.get('BUILD_CACHE_DIR',BUILD_CACHE_DIR)
33+
34+
if os.path.isdir(BUILD_CACHE_DIR):
35+
print("--------------------------------------------------------")
36+
print("BUILD CACHE ACTIVATED (V2). be careful, this is experimental.")
37+
print("--------------------------------------------------------")
38+
else:
39+
BUILD_CACHE_DIR = None
40+
41+
# retrieve 2to3 artifacts
42+
if sys.version_info[0] >= 3:
43+
from lib2to3 import refactor
44+
from hashlib import sha1
45+
import shutil
46+
import multiprocessing
47+
pyver = "%d.%d" % (sys.version_info[:2])
48+
files = ["pandas"]
49+
to_process = dict()
50+
orig_hashes= dict((f.split("-")[0],f) for f in os.listdir(BUILD_CACHE_DIR)
51+
if "-" in f and f.endswith(pyver))
52+
post_hashes= dict((f.split("-")[1],f) for f in os.listdir(BUILD_CACHE_DIR)
53+
if "-" in f and f.endswith(pyver))
54+
55+
while files:
56+
f = files.pop()
57+
58+
if os.path.isdir(f):
59+
files.extend([os.path.join(f,x) for x in os.listdir(f)])
60+
else:
61+
if not f.endswith(".py"):
62+
continue
63+
else:
64+
try:
65+
h = sha1(open(f,"rb").read()).hexdigest()
66+
except IOError:
67+
to_process[h] = f
68+
if h in orig_hashes:
69+
src = os.path.join(BUILD_CACHE_DIR,orig_hashes[h])
70+
# print("cache hit %s,%s" % (f,h))
71+
shutil.copyfile(src,f)
72+
elif h not in post_hashes:
73+
74+
# we're not in a dev dir with already processed files
75+
# print("cache miss %s,%s" % (f,h))
76+
# print("will process " + f)
77+
to_process[h] = f
78+
79+
avail_fixes = set(refactor.get_fixers_from_package("lib2to3.fixes"))
80+
avail_fixes.discard('lib2to3.fixes.fix_next')
81+
t=refactor.RefactoringTool(avail_fixes)
82+
t.refactor(to_process.values(),True)
83+
print("2to3 done refactoring.")
84+
for orig_h in to_process:
85+
f = to_process[orig_h]
86+
post_h = sha1(open(f,"rb").read()).hexdigest()
87+
cached_fname = orig_h + "-" + post_h + "-" + pyver
88+
# print("cache put %s,%s in %s" % (f,h,cached_fname))
89+
shutil.copyfile(f,os.path.join(BUILD_CACHE_DIR,cached_fname))
90+
91+
except:
92+
BUILD_CACHE_DIR = None
93+
94+
print("BUILD_CACHE_DIR: " + str(BUILD_CACHE_DIR) )
95+
96+
class CompilationCacheMixin(object):
97+
def __init__(self, *args, **kwds):
98+
cache_dir = kwds.pop("cache_dir", BUILD_CACHE_DIR)
99+
self.cache_dir = cache_dir
100+
if not os.path.isdir(cache_dir):
101+
raise Exception("Error: path to Cache directory (%s) is not a dir" % cache_dir)
102+
103+
def _copy_from_cache(self, hash, target):
104+
src = os.path.join(self.cache_dir, hash)
105+
if os.path.exists(src):
106+
# print("Cache HIT: asked to copy file %s in %s" %
107+
# (src,os.path.abspath(target)))
108+
s = "."
109+
for d in target.split(os.path.sep)[:-1]:
110+
s = os.path.join(s, d)
111+
if not os.path.exists(s):
112+
os.mkdir(s)
113+
shutil.copyfile(src, target)
114+
115+
return True
116+
117+
return False
118+
119+
def _put_to_cache(self, hash, src):
120+
target = os.path.join(self.cache_dir, hash)
121+
# print( "Cache miss: asked to copy file from %s to %s" % (src,target))
122+
s = "."
123+
for d in target.split(os.path.sep)[:-1]:
124+
s = os.path.join(s, d)
125+
if not os.path.exists(s):
126+
os.mkdir(s)
127+
shutil.copyfile(src, target)
128+
129+
def _hash_obj(self, obj):
130+
try:
131+
return hash(obj)
132+
except:
133+
raise NotImplementedError("You must override this method")
134+
135+
class CompilationCacheExtMixin(CompilationCacheMixin):
136+
def _hash_file(self, fname):
137+
from hashlib import sha1
138+
f= None
139+
try:
140+
hash = sha1()
141+
hash.update(self.build_lib.encode('utf-8'))
142+
try:
143+
if sys.version_info[0] >= 3:
144+
import io
145+
f = io.open(fname, "rb")
146+
else:
147+
f = open(fname)
148+
149+
first_line = f.readline()
150+
# ignore cython generation timestamp header
151+
if "Generated by Cython" not in first_line.decode('utf-8'):
152+
hash.update(first_line)
153+
hash.update(f.read())
154+
return hash.hexdigest()
155+
156+
except:
157+
raise
158+
return None
159+
finally:
160+
if f:
161+
f.close()
162+
163+
except IOError:
164+
return None
165+
166+
def _hash_obj(self, ext):
167+
from hashlib import sha1
168+
169+
sources = ext.sources
170+
if (sources is None or
171+
(not hasattr(sources, '__iter__')) or
172+
isinstance(sources, str) or
173+
sys.version[0] == 2 and isinstance(sources, unicode)): # argh
174+
return False
175+
176+
sources = list(sources) + ext.depends
177+
hash = sha1()
178+
try:
179+
for fname in sources:
180+
fhash = self._hash_file(fname)
181+
if fhash:
182+
hash.update(fhash.encode('utf-8'))
183+
except:
184+
return None
185+
186+
return hash.hexdigest()
187+
188+
189+
class CachingBuildExt(build_ext, CompilationCacheExtMixin):
190+
def __init__(self, *args, **kwds):
191+
CompilationCacheExtMixin.__init__(self, *args, **kwds)
192+
kwds.pop("cache_dir", None)
193+
build_ext.__init__(self, *args, **kwds)
194+
195+
def build_extension(self, ext, *args, **kwds):
196+
ext_path = self.get_ext_fullpath(ext.name)
197+
build_path = os.path.join(self.build_lib, os.path.basename(ext_path))
198+
199+
hash = self._hash_obj(ext)
200+
if hash and self._copy_from_cache(hash, ext_path):
201+
return
202+
203+
build_ext.build_extension(self, ext, *args, **kwds)
204+
205+
hash = self._hash_obj(ext)
206+
if os.path.exists(build_path):
207+
self._put_to_cache(hash, build_path) # build_ext
208+
if os.path.exists(ext_path):
209+
self._put_to_cache(hash, ext_path) # develop
210+
211+
def cython_sources(self, sources, extension):
212+
import re
213+
cplus = self.cython_cplus or getattr(extension, 'cython_cplus', 0) or \
214+
(extension.language and extension.language.lower() == 'c++')
215+
target_ext = '.c'
216+
if cplus:
217+
target_ext = '.cpp'
218+
219+
for i, s in enumerate(sources):
220+
if not re.search("\.(pyx|pxi|pxd)$", s):
221+
continue
222+
ext_dir = os.path.dirname(s)
223+
ext_basename = re.sub("\.[^\.]+$", "", os.path.basename(s))
224+
ext_basename += target_ext
225+
target = os.path.join(ext_dir, ext_basename)
226+
hash = self._hash_file(s)
227+
sources[i] = target
228+
if hash and self._copy_from_cache(hash, target):
229+
continue
230+
build_ext.cython_sources(self, [s], extension)
231+
self._put_to_cache(hash, target)
232+
233+
sources = [x for x in sources if x.startswith("pandas") or "lib." in x]
234+
235+
return sources
236+
237+
if BUILD_CACHE_DIR: # use the cache
238+
cmdclass['build_ext'] = CachingBuildExt
239+
240+
try:
241+
# recent
242+
setuptools_kwargs['use_2to3'] = True if BUILD_CACHE_DIR is None else False
243+
except:
244+
pass
245+
246+
try:
247+
# pre eb2234231 , ~ 0.7.0,
248+
setuptools_args['use_2to3'] = True if BUILD_CACHE_DIR is None else False
249+
except:
250+
pass
251+
252+
"""
253+
def main():
254+
opd = os.path.dirname
255+
opj = os.path.join
256+
s= None
257+
with open(opj(opd(__file__),"..","setup.py")) as f:
258+
s = f.read()
259+
if s:
260+
if "BUILD CACHE ACTIVATED (V2)" in s:
261+
print( "setup.py already wired with V2 build_cache, skipping..")
262+
else:
263+
SEP="\nsetup("
264+
before,after = s.split(SEP)
265+
with open(opj(opd(__file__),"..","setup.py"),"wb") as f:
266+
f.write(before + shim + SEP + after)
267+
print("""
268+
setup.py was rewritten to use a build cache.
269+
Make sure you've put the following in your .bashrc:
270+
271+
export BUILD_CACHE_DIR=<an existing directory for saving cached files>
272+
echo $BUILD_CACHE_DIR > pandas_repo_rootdir/.build_cache_dir
273+
274+
Once active, build results (compilation, cythonizations and 2to3 artifacts)
275+
will be cached in "$BUILD_CACHE_DIR" and subsequent builds should be
276+
sped up if no changes requiring recompilation were made.
277+
278+
Go ahead and run:
279+
280+
python setup.py clean
281+
python setup.py develop
282+
283+
""")
284+
285+
286+
if __name__ == '__main__':
287+
import sys
288+
sys.exit(main())

0 commit comments

Comments
 (0)