Return partial traces if sampling is interrupted

aseyboldt · aseyboldt · commit dd21cc4e78bc · 2018-06-08T14:11:05.000+02:00
diff --git a/pymc3/backends/text.py b/pymc3/backends/text.py
@@ -99,8 +99,9 @@ def record(self, point):
         self._fh.write(','.join(columns) + '\n')
 
     def close(self):
-        self._fh.close()
-        self._fh = None  # Avoid serialization issue.
+        if self._fh is not None:
+            self._fh.close()
+            self._fh = None  # Avoid serialization issue.
 
     # Selection methods
 
diff --git a/pymc3/parallel_sampling.py b/pymc3/parallel_sampling.py
@@ -28,7 +28,8 @@ class _Process(multiprocessing.Process):
     We communicate with the main process using a pipe,
     and send finished samples using shared memory.
     """
-    def __init__(self, msg_pipe, step_method, shared_point, draws, tune, seed):
+    def __init__(self, name, msg_pipe, step_method, shared_point,
+                 draws, tune, seed):
         super(_Process, self).__init__(daemon=True)
         self._msg_pipe = msg_pipe
         self._step_method = step_method
@@ -116,6 +117,7 @@ class ProcessAdapter(object):
     """Control a Chain process from the main thread."""
     def __init__(self, draws, tune, step_method, chain, seed, start):
         self.chain = chain
+        process_name = "worker_chain_%s" % chain
         self._msg_pipe, remote_conn = multiprocessing.Pipe()
 
         self._shared_point = {}
@@ -138,7 +140,8 @@ def __init__(self, draws, tune, step_method, chain, seed, start):
         self._num_samples = 0
 
         self._process = _Process(
-            remote_conn, step_method, self._shared_point, draws, tune, seed)
+            process_name, remote_conn, step_method, self._shared_point,
+            draws, tune, seed)
         # We fork right away, so that the main process can start tqdm threads
         self._process.start()
 
@@ -185,7 +188,7 @@ def recv_draw(processes, timeout=3600):
         elif msg[0] == 'writing_done':
             proc._readable = True
             proc._num_samples += 1
-            return (proc, *msg[1:])
+            return (proc,) + msg[1:]
         else:
             raise ValueError('Sampler sent bad message.')
 
@@ -200,7 +203,7 @@ def terminate_all(processes, patience=2):
         start_time = time.time()
         try:
             for process in processes:
-                timeout = start_time + patience - time.time()
+                timeout = time.time() + patience - start_time
                 if timeout < 0:
                     raise multiprocessing.TimeoutError()
                 process.join(timeout)
@@ -285,6 +288,10 @@ def __iter__(self):
                 if self._progress is not None:
                     self._progress[proc.chain - self._start_chain_num].close()
 
+            # We could also yield proc.shared_point_view directly,
+            # and only call proc.write_next() after the yield returns.
+            # This seems to be faster overally though, as the worker
+            # loses less time waiting.
             point = {name: val.copy()
                      for name, val in proc.shared_point_view.items()}
 
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
@@ -966,8 +966,6 @@ def _choose_backend(trace, chain, shortcuts=None, **kwds):
 
 
 def _mp_sample(**kwargs):
-    import sys
-
     cores = kwargs.pop('cores')
     chain = kwargs.pop('chain')
     rseed = kwargs.pop('random_seed')
@@ -978,15 +976,21 @@ def _mp_sample(**kwargs):
     step = kwargs.pop('step')
     progressbar = kwargs.pop('progressbar')
     use_mmap = kwargs.pop('use_mmap')
+    model = kwargs.pop('model', None)
+    trace = kwargs.pop('trace', None)
 
     if sys.version_info.major >= 3:
         import pymc3.parallel_sampling as ps
 
-        model = modelcontext(kwargs.pop('model', None))
-        trace = kwargs.pop('trace', None)
+        # We did draws += tune in pm.sample
+        draws -= tune
+
         traces = []
         for idx in range(chain, chain + chains):
-            strace = _choose_backend(trace, idx, model=model)
+            if trace is not None:
+                strace = _choose_backend(copy(trace), idx, model=model)
+            else:
+                strace = _choose_backend(None, idx, model=model)
             # TODO what is this for?
             update_start_vals(start[idx - chain], model.test_point, model)
             if step.generates_stats and strace.supports_sampler_stats:
@@ -997,20 +1001,27 @@ def _mp_sample(**kwargs):
 
         sampler = ps.ParallelSampler(
             draws, tune, chains, cores, rseed, start, step, chain, progressbar)
-        with sampler:
-            for draw in sampler:
-                trace = traces[draw.chain - chain]
-                if trace.supports_sampler_stats and draw.stats is not None:
-                    trace.record(draw.point, draw.stats)
-                else:
-                    trace.record(draw.point)
-                if draw.is_last:
-                    trace.close()
-        return MultiTrace(traces)
+        try:
+            with sampler:
+                for draw in sampler:
+                    trace = traces[draw.chain - chain]
+                    if trace.supports_sampler_stats and draw.stats is not None:
+                        trace.record(draw.point, draw.stats)
+                    else:
+                        trace.record(draw.point)
+                    if draw.is_last:
+                        trace.close()
+            return MultiTrace(traces)
+        except KeyboardInterrupt:
+            traces, length = _choose_chains(traces, tune)
+            return MultiTrace(traces)[:length]
+        finally:
+            for trace in traces:
+                trace.close()
 
     else:
         chain_nums = list(range(chain, chain + chains))
-        pbars = [kwargs.pop('progressbar')] + [False] * (chains - 1)
+        pbars = [progressbar] + [False] * (chains - 1)
         jobs = (delayed(_sample)(*args, **kwargs)
                 for args in zip(chain_nums, pbars, rseed, start))
         if use_mmap:
@@ -1020,6 +1031,35 @@ def _mp_sample(**kwargs):
         return MultiTrace(traces)
 
 
+def _choose_chains(traces, tune):
+    if tune is None:
+        tune = 0
+
+    if not traces:
+        return []
+
+    lengths = [max(0, len(trace) - tune) for trace in traces]
+    if not sum(lengths):
+        raise ValueError('Not enough samples to build a trace.')
+
+    idxs = np.argsort(lengths)[::-1]
+    l_sort = np.array(lengths)[idxs]
+
+    final_length = l_sort[0]
+    last_total = 0
+    for i, length in enumerate(l_sort):
+        total = (i + 1) * length
+        if total < last_total:
+            use_until = i
+            break
+        last_total = total
+        final_length = length
+    else:
+        use_until = len(lengths)
+
+    return [traces[idx] for idx in idxs[:use_until]], final_length + tune
+
+
 def stop_tuning(step):
     """ stop tuning the current step method """