|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +import os |
| 4 | + |
| 5 | +""" |
| 6 | +This script should be run from the repo root dir, it rewrites setup.py |
| 7 | +to use the build cache directory specified in the envar BUILD_CACHE_DIR |
| 8 | +or in a file named .build_cache_dir in the repo root directory. |
| 9 | +
|
| 10 | +Artifacts included in the cache: |
| 11 | +- gcc artifacts |
| 12 | +- The .c files resulting from cythonizing pyx/d files |
| 13 | +- 2to3 refactoring results (when run under python3) |
| 14 | +
|
| 15 | +Tested on all released back to 0.7.0. |
| 16 | +
|
| 17 | +""" |
| 18 | +shim=""" |
| 19 | +import os |
| 20 | +import sys |
| 21 | +import shutil |
| 22 | +import warnings |
| 23 | +
|
| 24 | +try: |
| 25 | + if not ("develop" in sys.argv) and not ("install" in sys.argv): |
| 26 | + 1/0 |
| 27 | + basedir = os.path.dirname(__file__) |
| 28 | + dotfile = os.path.join(basedir,".build_cache_dir") |
| 29 | + BUILD_CACHE_DIR = "" |
| 30 | + if os.path.exists(dotfile): |
| 31 | + BUILD_CACHE_DIR = open(dotfile).readline().strip() |
| 32 | + BUILD_CACHE_DIR = os.environ.get('BUILD_CACHE_DIR',BUILD_CACHE_DIR) |
| 33 | +
|
| 34 | + if os.path.isdir(BUILD_CACHE_DIR): |
| 35 | + print("--------------------------------------------------------") |
| 36 | + print("BUILD CACHE ACTIVATED (V2). be careful, this is experimental.") |
| 37 | + print("--------------------------------------------------------") |
| 38 | + else: |
| 39 | + BUILD_CACHE_DIR = None |
| 40 | +
|
| 41 | + # retrieve 2to3 artifacts |
| 42 | + if sys.version_info[0] >= 3: |
| 43 | + from lib2to3 import refactor |
| 44 | + from hashlib import sha1 |
| 45 | + import shutil |
| 46 | + import multiprocessing |
| 47 | + pyver = "%d.%d" % (sys.version_info[:2]) |
| 48 | + files = ["pandas"] |
| 49 | + to_process = dict() |
| 50 | + orig_hashes= dict((f.split("-")[0],f) for f in os.listdir(BUILD_CACHE_DIR) |
| 51 | + if "-" in f and f.endswith(pyver)) |
| 52 | + post_hashes= dict((f.split("-")[1],f) for f in os.listdir(BUILD_CACHE_DIR) |
| 53 | + if "-" in f and f.endswith(pyver)) |
| 54 | +
|
| 55 | + while files: |
| 56 | + f = files.pop() |
| 57 | +
|
| 58 | + if os.path.isdir(f): |
| 59 | + files.extend([os.path.join(f,x) for x in os.listdir(f)]) |
| 60 | + else: |
| 61 | + if not f.endswith(".py"): |
| 62 | + continue |
| 63 | + else: |
| 64 | + try: |
| 65 | + h = sha1(open(f,"rb").read()).hexdigest() |
| 66 | + except IOError: |
| 67 | + to_process[h] = f |
| 68 | + if h in orig_hashes: |
| 69 | + src = os.path.join(BUILD_CACHE_DIR,orig_hashes[h]) |
| 70 | + # print("cache hit %s,%s" % (f,h)) |
| 71 | + shutil.copyfile(src,f) |
| 72 | + elif h not in post_hashes: |
| 73 | +
|
| 74 | + # we're not in a dev dir with already processed files |
| 75 | + # print("cache miss %s,%s" % (f,h)) |
| 76 | + # print("will process " + f) |
| 77 | + to_process[h] = f |
| 78 | +
|
| 79 | + avail_fixes = set(refactor.get_fixers_from_package("lib2to3.fixes")) |
| 80 | + avail_fixes.discard('lib2to3.fixes.fix_next') |
| 81 | + t=refactor.RefactoringTool(avail_fixes) |
| 82 | + t.refactor(to_process.values(),True) |
| 83 | + print("2to3 done refactoring.") |
| 84 | + for orig_h in to_process: |
| 85 | + f = to_process[orig_h] |
| 86 | + post_h = sha1(open(f,"rb").read()).hexdigest() |
| 87 | + cached_fname = orig_h + "-" + post_h + "-" + pyver |
| 88 | + # print("cache put %s,%s in %s" % (f,h,cached_fname)) |
| 89 | + shutil.copyfile(f,os.path.join(BUILD_CACHE_DIR,cached_fname)) |
| 90 | +
|
| 91 | +except: |
| 92 | + BUILD_CACHE_DIR = None |
| 93 | +
|
| 94 | +print("BUILD_CACHE_DIR: " + str(BUILD_CACHE_DIR) ) |
| 95 | +
|
| 96 | +class CompilationCacheMixin(object): |
| 97 | + def __init__(self, *args, **kwds): |
| 98 | + cache_dir = kwds.pop("cache_dir", BUILD_CACHE_DIR) |
| 99 | + self.cache_dir = cache_dir |
| 100 | + if not os.path.isdir(cache_dir): |
| 101 | + raise Exception("Error: path to Cache directory (%s) is not a dir" % cache_dir) |
| 102 | +
|
| 103 | + def _copy_from_cache(self, hash, target): |
| 104 | + src = os.path.join(self.cache_dir, hash) |
| 105 | + if os.path.exists(src): |
| 106 | + # print("Cache HIT: asked to copy file %s in %s" % |
| 107 | + # (src,os.path.abspath(target))) |
| 108 | + s = "." |
| 109 | + for d in target.split(os.path.sep)[:-1]: |
| 110 | + s = os.path.join(s, d) |
| 111 | + if not os.path.exists(s): |
| 112 | + os.mkdir(s) |
| 113 | + shutil.copyfile(src, target) |
| 114 | +
|
| 115 | + return True |
| 116 | +
|
| 117 | + return False |
| 118 | +
|
| 119 | + def _put_to_cache(self, hash, src): |
| 120 | + target = os.path.join(self.cache_dir, hash) |
| 121 | + # print( "Cache miss: asked to copy file from %s to %s" % (src,target)) |
| 122 | + s = "." |
| 123 | + for d in target.split(os.path.sep)[:-1]: |
| 124 | + s = os.path.join(s, d) |
| 125 | + if not os.path.exists(s): |
| 126 | + os.mkdir(s) |
| 127 | + shutil.copyfile(src, target) |
| 128 | +
|
| 129 | + def _hash_obj(self, obj): |
| 130 | + try: |
| 131 | + return hash(obj) |
| 132 | + except: |
| 133 | + raise NotImplementedError("You must override this method") |
| 134 | +
|
| 135 | +class CompilationCacheExtMixin(CompilationCacheMixin): |
| 136 | + def _hash_file(self, fname): |
| 137 | + from hashlib import sha1 |
| 138 | + f= None |
| 139 | + try: |
| 140 | + hash = sha1() |
| 141 | + hash.update(self.build_lib.encode('utf-8')) |
| 142 | + try: |
| 143 | + if sys.version_info[0] >= 3: |
| 144 | + import io |
| 145 | + f = io.open(fname, "rb") |
| 146 | + else: |
| 147 | + f = open(fname) |
| 148 | +
|
| 149 | + first_line = f.readline() |
| 150 | + # ignore cython generation timestamp header |
| 151 | + if "Generated by Cython" not in first_line.decode('utf-8'): |
| 152 | + hash.update(first_line) |
| 153 | + hash.update(f.read()) |
| 154 | + return hash.hexdigest() |
| 155 | +
|
| 156 | + except: |
| 157 | + raise |
| 158 | + return None |
| 159 | + finally: |
| 160 | + if f: |
| 161 | + f.close() |
| 162 | +
|
| 163 | + except IOError: |
| 164 | + return None |
| 165 | +
|
| 166 | + def _hash_obj(self, ext): |
| 167 | + from hashlib import sha1 |
| 168 | +
|
| 169 | + sources = ext.sources |
| 170 | + if (sources is None or |
| 171 | + (not hasattr(sources, '__iter__')) or |
| 172 | + isinstance(sources, str) or |
| 173 | + sys.version[0] == 2 and isinstance(sources, unicode)): # argh |
| 174 | + return False |
| 175 | +
|
| 176 | + sources = list(sources) + ext.depends |
| 177 | + hash = sha1() |
| 178 | + try: |
| 179 | + for fname in sources: |
| 180 | + fhash = self._hash_file(fname) |
| 181 | + if fhash: |
| 182 | + hash.update(fhash.encode('utf-8')) |
| 183 | + except: |
| 184 | + return None |
| 185 | +
|
| 186 | + return hash.hexdigest() |
| 187 | +
|
| 188 | +
|
| 189 | +class CachingBuildExt(build_ext, CompilationCacheExtMixin): |
| 190 | + def __init__(self, *args, **kwds): |
| 191 | + CompilationCacheExtMixin.__init__(self, *args, **kwds) |
| 192 | + kwds.pop("cache_dir", None) |
| 193 | + build_ext.__init__(self, *args, **kwds) |
| 194 | +
|
| 195 | + def build_extension(self, ext, *args, **kwds): |
| 196 | + ext_path = self.get_ext_fullpath(ext.name) |
| 197 | + build_path = os.path.join(self.build_lib, os.path.basename(ext_path)) |
| 198 | +
|
| 199 | + hash = self._hash_obj(ext) |
| 200 | + if hash and self._copy_from_cache(hash, ext_path): |
| 201 | + return |
| 202 | +
|
| 203 | + build_ext.build_extension(self, ext, *args, **kwds) |
| 204 | +
|
| 205 | + hash = self._hash_obj(ext) |
| 206 | + if os.path.exists(build_path): |
| 207 | + self._put_to_cache(hash, build_path) # build_ext |
| 208 | + if os.path.exists(ext_path): |
| 209 | + self._put_to_cache(hash, ext_path) # develop |
| 210 | +
|
| 211 | + def cython_sources(self, sources, extension): |
| 212 | + import re |
| 213 | + cplus = self.cython_cplus or getattr(extension, 'cython_cplus', 0) or \ |
| 214 | + (extension.language and extension.language.lower() == 'c++') |
| 215 | + target_ext = '.c' |
| 216 | + if cplus: |
| 217 | + target_ext = '.cpp' |
| 218 | +
|
| 219 | + for i, s in enumerate(sources): |
| 220 | + if not re.search("\.(pyx|pxi|pxd)$", s): |
| 221 | + continue |
| 222 | + ext_dir = os.path.dirname(s) |
| 223 | + ext_basename = re.sub("\.[^\.]+$", "", os.path.basename(s)) |
| 224 | + ext_basename += target_ext |
| 225 | + target = os.path.join(ext_dir, ext_basename) |
| 226 | + hash = self._hash_file(s) |
| 227 | + sources[i] = target |
| 228 | + if hash and self._copy_from_cache(hash, target): |
| 229 | + continue |
| 230 | + build_ext.cython_sources(self, [s], extension) |
| 231 | + self._put_to_cache(hash, target) |
| 232 | +
|
| 233 | + sources = [x for x in sources if x.startswith("pandas") or "lib." in x] |
| 234 | +
|
| 235 | + return sources |
| 236 | +
|
| 237 | +if BUILD_CACHE_DIR: # use the cache |
| 238 | + cmdclass['build_ext'] = CachingBuildExt |
| 239 | +
|
| 240 | +try: |
| 241 | + # recent |
| 242 | + setuptools_kwargs['use_2to3'] = True if BUILD_CACHE_DIR is None else False |
| 243 | +except: |
| 244 | + pass |
| 245 | +
|
| 246 | +try: |
| 247 | + # pre eb2234231 , ~ 0.7.0, |
| 248 | + setuptools_args['use_2to3'] = True if BUILD_CACHE_DIR is None else False |
| 249 | +except: |
| 250 | + pass |
| 251 | +
|
| 252 | +""" |
| 253 | +def main(): |
| 254 | + opd = os.path.dirname |
| 255 | + opj = os.path.join |
| 256 | + s= None |
| 257 | + with open(opj(opd(__file__),"..","setup.py")) as f: |
| 258 | + s = f.read() |
| 259 | + if s: |
| 260 | + if "BUILD CACHE ACTIVATED (V2)" in s: |
| 261 | + print( "setup.py already wired with V2 build_cache, skipping..") |
| 262 | + else: |
| 263 | + SEP="\nsetup(" |
| 264 | + before,after = s.split(SEP) |
| 265 | + with open(opj(opd(__file__),"..","setup.py"),"wb") as f: |
| 266 | + f.write(before + shim + SEP + after) |
| 267 | + print(""" |
| 268 | + setup.py was rewritten to use a build cache. |
| 269 | + Make sure you've put the following in your .bashrc: |
| 270 | +
|
| 271 | + export BUILD_CACHE_DIR=<an existing directory for saving cached files> |
| 272 | + echo $BUILD_CACHE_DIR > pandas_repo_rootdir/.build_cache_dir |
| 273 | +
|
| 274 | + Once active, build results (compilation, cythonizations and 2to3 artifacts) |
| 275 | + will be cached in "$BUILD_CACHE_DIR" and subsequent builds should be |
| 276 | + sped up if no changes requiring recompilation were made. |
| 277 | +
|
| 278 | + Go ahead and run: |
| 279 | +
|
| 280 | + python setup.py clean |
| 281 | + python setup.py develop |
| 282 | +
|
| 283 | + """) |
| 284 | + |
| 285 | + |
| 286 | +if __name__ == '__main__': |
| 287 | + import sys |
| 288 | + sys.exit(main()) |
0 commit comments