batching helpers

unixpickle · unixpickle · commit e86cbb04fd16 · 2019-10-14T13:44:15.000-04:00
diff --git a/mnist_train.py b/mnist_train.py
@@ -5,12 +5,13 @@
 import torch
 import torch.optim as optim
 
+from reptile_gen.batching import batched_grad
 from reptile_gen.mnist import iterate_mini_datasets
 from reptile_gen.model import MNISTModel
 from reptile_gen.reptile import reptile_grad
 
 OUT_PATH = 'model.pt'
-AVG_SIZE = 1000
+AVG_SIZE = 20
 META_BATCH = 50
 
 
@@ -23,17 +24,19 @@ def main():
     mini_batches = iterate_mini_datasets()
     last_n = []
     for i in itertools.count():
-        inputs, outputs = next(mini_batches)
-        losses = reptile_grad(model, inputs, outputs, opt)
+        outer_opt.zero_grad()
+
+        def grad_fn(model, x, y):
+            return reptile_grad(model, x, y, opt)
+
+        batch = [next(mini_batches) for _ in range(META_BATCH)]
+        losses = batched_grad(model, grad_fn, batch)
         loss = np.mean(losses)
         last_n.append(loss)
         last_n = last_n[-AVG_SIZE:]
-        if i % META_BATCH == 0:
-            outer_opt.step()
-            outer_opt.zero_grad()
-            torch.save(model.state_dict(), OUT_PATH)
-            print('step %d: loss=%f last_%d=%f' %
-                  (i//META_BATCH, np.mean(losses), AVG_SIZE, np.mean(last_n)))
+        outer_opt.step()
+        torch.save(model.state_dict(), OUT_PATH)
+        print('step %d: loss=%f last_%d=%f' % (i, np.mean(losses), AVG_SIZE, np.mean(last_n)))
 
 
 if __name__ == '__main__':
diff --git a/reptile_gen/batching.py b/reptile_gen/batching.py
@@ -0,0 +1,42 @@
+from multiprocessing import Pool, set_start_method
+
+import cloudpickle
+import torch
+
+
+def batched_grad(model, grad_fn, batch, threads=1, device='cpu'):
+    set_start_method('spawn', force=True)
+
+    model_class = model.__class__
+    model_dict = {x: y.cpu().numpy() for x, y in model.state_dict().items()}
+
+    def run_grad_fn(inputs, outputs):
+        model = model_class()
+        state = {x: torch.from_numpy(y) for x, y in model_dict.items()}
+        model.load_state_dict(state)
+        d = torch.device(device)
+        if device != 'cpu':
+            model.to(d)
+        res = grad_fn(model, inputs.to(d), outputs.to(d))
+        return [p.grad for p in model.parameters()], res
+
+    pool = Pool(min(len(batch), threads))
+    pickled_fn = cloudpickle.dumps(run_grad_fn)
+    raw_results = pool.map(call_pickled_fn, [(pickled_fn, x) for x in batch])
+    grads, results = list(zip(*raw_results))
+    pool.close()
+
+    for grad in grads:
+        for p, g in zip(model.parameters(), grad):
+            if p.grad is None:
+                p.grad = g
+            else:
+                p.grad.add_(g)
+
+    return results
+
+
+def call_pickled_fn(data_args):
+    data, args = data_args
+    res = cloudpickle.loads(data)(*args)
+    return res