|
52 | 52 | import java.io.IOException;
|
53 | 53 | import java.io.PrintStream;
|
54 | 54 | import java.lang.invoke.VarHandle;
|
| 55 | +import java.lang.ref.WeakReference; |
55 | 56 | import java.nio.file.LinkOption;
|
56 | 57 | import java.util.ArrayList;
|
57 | 58 | import java.util.Arrays;
|
|
62 | 63 | import java.util.concurrent.ConcurrentHashMap;
|
63 | 64 | import java.util.concurrent.atomic.AtomicBoolean;
|
64 | 65 | import java.util.concurrent.atomic.AtomicLong;
|
| 66 | +import java.util.logging.Level; |
65 | 67 |
|
66 | 68 | import org.graalvm.collections.Pair;
|
67 | 69 | import org.graalvm.nativeimage.ImageInfo;
|
|
114 | 116 | import com.oracle.graal.python.util.Function;
|
115 | 117 | import com.oracle.graal.python.util.PythonUtils;
|
116 | 118 | import com.oracle.graal.python.util.Supplier;
|
| 119 | +import com.oracle.graal.python.util.SuppressFBWarnings; |
117 | 120 | import com.oracle.graal.python.util.WeakIdentityHashMap;
|
118 | 121 | import com.oracle.truffle.api.CallTarget;
|
119 | 122 | import com.oracle.truffle.api.CompilerAsserts;
|
@@ -254,6 +257,9 @@ private record ClosureInfo(Object closure, Object delegate, Object executable, l
|
254 | 257 | */
|
255 | 258 | private final List<Object> loadedExtensions = new LinkedList<>();
|
256 | 259 |
|
| 260 | + public final BackgroundGCTask gcTask; |
| 261 | + private Thread backgroundGCTaskThread; |
| 262 | + |
257 | 263 | public static TruffleLogger getLogger(Class<?> clazz) {
|
258 | 264 | return PythonLanguage.getLogger(LOGGER_CAPI_NAME + "." + clazz.getSimpleName());
|
259 | 265 | }
|
@@ -311,6 +317,8 @@ public CApiContext(PythonContext context, Object llvmLibrary, boolean useNativeB
|
311 | 317 | assert CApiGuards.isSmallInteger(value);
|
312 | 318 | primitiveNativeWrapperCache[i] = PrimitiveNativeWrapper.createInt(value);
|
313 | 319 | }
|
| 320 | + |
| 321 | + this.gcTask = new BackgroundGCTask(context); |
314 | 322 | }
|
315 | 323 |
|
316 | 324 | @TruffleBoundary
|
@@ -595,6 +603,150 @@ public void untrackObject(Object ptr, PFrame.Reference curFrame, TruffleString c
|
595 | 603 | // TODO(fa): implement untracking of container objects
|
596 | 604 | }
|
597 | 605 |
|
| 606 | + private static final class BackgroundGCTask implements Runnable { |
| 607 | + |
| 608 | + private BackgroundGCTask(PythonContext context) { |
| 609 | + this.ctx = new WeakReference<>(context); |
| 610 | + } |
| 611 | + |
| 612 | + Object nativeSymbol = null; |
| 613 | + InteropLibrary callNative = null; |
| 614 | + |
| 615 | + long currentRSS = -1; |
| 616 | + long previousRSS = -1; |
| 617 | + int previousWeakrefCount = -1; |
| 618 | + |
| 619 | + final WeakReference<PythonContext> ctx; |
| 620 | + |
| 621 | + // RSS monitor interval in ms |
| 622 | + static final int RSS_INTERVAL = Integer.getInteger("python.RSSInterval", 1000); |
| 623 | + /** |
| 624 | + * RSS percentage increase between System.gc() calls. Low percentage will trigger |
| 625 | + * System.gc() more often which can cause unnecessary overhead. |
| 626 | + * |
| 627 | + * <ul> |
| 628 | + * why 30%? it's purely based on the {@code huggingface} example. |
| 629 | + * <li>less than 30%: max RSS ~22GB (>200 second per iteration)</li> |
| 630 | + * <li>30%: max RSS ~24GB (~150 second per iteration)</li> |
| 631 | + * <li>larger than 30%: max RSS ~38GB (~140 second per iteration)</li> |
| 632 | + * </ul> |
| 633 | + * |
| 634 | + * <pre> |
| 635 | + */ |
| 636 | + static final double GC_RSS_THRESHOLD = Integer.getInteger("python.RSSThreshold", 30) / 100.0; |
| 637 | + |
| 638 | + Long getCurrentRSS() { |
| 639 | + if (nativeSymbol == null) { |
| 640 | + nativeSymbol = CApiContext.getNativeSymbol(null, NativeCAPISymbol.FUN_GET_CURRENT_RSS); |
| 641 | + callNative = InteropLibrary.getUncached(nativeSymbol); |
| 642 | + } |
| 643 | + Long rss = 0L; |
| 644 | + try { |
| 645 | + rss = (Long) callNative.execute(nativeSymbol); |
| 646 | + } catch (Exception ignored) { |
| 647 | + } |
| 648 | + return rss; |
| 649 | + } |
| 650 | + |
| 651 | + @Override |
| 652 | + public void run() { |
| 653 | + try { |
| 654 | + while (true) { |
| 655 | + Thread.sleep(RSS_INTERVAL); |
| 656 | + perform(); |
| 657 | + } |
| 658 | + } catch (InterruptedException e) { |
| 659 | + Thread.currentThread().interrupt(); |
| 660 | + } |
| 661 | + } |
| 662 | + |
| 663 | + private void perform() { |
| 664 | + PythonContext context = ctx.get(); |
| 665 | + if (context == null) { |
| 666 | + return; |
| 667 | + } |
| 668 | + |
| 669 | + long rss = currentRSS = getCurrentRSS(); |
| 670 | + if (rss == 0) { |
| 671 | + LOGGER.finer("We are unable to get resident set size (RSS) from the system. " + |
| 672 | + "We will skip the java collection routine."); |
| 673 | + Thread.currentThread().interrupt(); |
| 674 | + return; |
| 675 | + } |
| 676 | + |
| 677 | + // reset RSS baseline |
| 678 | + if (rss < this.previousRSS || this.previousRSS == -1) { |
| 679 | + this.previousRSS = rss; |
| 680 | + return; |
| 681 | + } |
| 682 | + |
| 683 | + // skip GC if no new native weakrefs have been created. |
| 684 | + int currentWeakrefCount = context.nativeContext.nativeLookup.size(); |
| 685 | + if (currentWeakrefCount < this.previousWeakrefCount || this.previousWeakrefCount == -1) { |
| 686 | + this.previousWeakrefCount = currentWeakrefCount; |
| 687 | + return; |
| 688 | + } |
| 689 | + |
| 690 | + double ratio = ((rss - this.previousRSS) / (double) this.previousRSS); |
| 691 | + if (ratio >= GC_RSS_THRESHOLD) { |
| 692 | + this.previousWeakrefCount = currentWeakrefCount; |
| 693 | + |
| 694 | + long start = System.nanoTime(); |
| 695 | + PythonUtils.forceFullGC(); |
| 696 | + long gcTime = (System.nanoTime() - start) / 1000000; |
| 697 | + |
| 698 | + if (LOGGER.isLoggable(Level.FINER)) { |
| 699 | + LOGGER.finer(PythonUtils.formatJString("Background GC Task -- GC [%d ms] RSS [%d MB]->[%d MB](%.1f%%)", |
| 700 | + gcTime, previousRSS, rss, ratio * 100)); |
| 701 | + } |
| 702 | + /* |
| 703 | + * cap the previous RSS increase to GC_RSS_THRESHOLD. If the ratio is much larger |
| 704 | + * than GC_RSS_THRESHOLD, then we should do GC more frequently. Though, if we get a |
| 705 | + * lower RSS in subsequent runs, the lower RSS will be set as previous RSS (see |
| 706 | + * above). |
| 707 | + * |
| 708 | + * Note: Resident Set Size (RSS) in the system isn't always an accurate indication |
| 709 | + * of used memory but rather a combination of anonymous memory (RssAnon), file |
| 710 | + * mappings (RssFile) and shmem memory (RssShmem). GC can only reduce RssAnon while |
| 711 | + * RssFile is managed by the operating system which doesn't go down easily. |
| 712 | + */ |
| 713 | + this.previousRSS += (long) (this.previousRSS * GC_RSS_THRESHOLD); |
| 714 | + } |
| 715 | + } |
| 716 | + } |
| 717 | + |
| 718 | + @TruffleBoundary |
| 719 | + public long getCurrentRSS() { |
| 720 | + if (backgroundGCTaskThread != null && backgroundGCTaskThread.isAlive()) { |
| 721 | + long rss = gcTask.currentRSS; |
| 722 | + if (rss == -1) { |
| 723 | + try { |
| 724 | + // in case it just started |
| 725 | + Thread.sleep(BackgroundGCTask.RSS_INTERVAL); |
| 726 | + } catch (InterruptedException e) { |
| 727 | + Thread.currentThread().interrupt(); |
| 728 | + } |
| 729 | + rss = gcTask.currentRSS; |
| 730 | + } |
| 731 | + return rss; |
| 732 | + } |
| 733 | + return 0L; |
| 734 | + } |
| 735 | + |
| 736 | + @SuppressFBWarnings(value = "NP_NULL_ON_SOME_PATH") // context.get() is never null here |
| 737 | + void runBackgroundGCTask(PythonContext context) { |
| 738 | + CompilerAsserts.neverPartOfCompilation(); |
| 739 | + if (ImageInfo.inImageBuildtimeCode() || context.getOption(PythonOptions.NoAsyncActions)) { |
| 740 | + return; |
| 741 | + } |
| 742 | + if (PythonOptions.AUTOMATIC_ASYNC_ACTIONS) { |
| 743 | + backgroundGCTaskThread = context.getEnv().newTruffleThreadBuilder(gcTask).context(context.getEnv().getContext()).build(); |
| 744 | + backgroundGCTaskThread.setDaemon(true); |
| 745 | + backgroundGCTaskThread.setName("python-gc-task"); |
| 746 | + backgroundGCTaskThread.start(); |
| 747 | + } |
| 748 | + } |
| 749 | + |
598 | 750 | /**
|
599 | 751 | * This represents whether the current process has already loaded an instance of the native CAPI
|
600 | 752 | * extensions - this can only be loaded once per process.
|
@@ -691,6 +843,7 @@ public static CApiContext ensureCapiWasLoaded(Node node, PythonContext context,
|
691 | 843 | Object finalizingPointer = SignatureLibrary.getUncached().call(finalizeSignature, finalizeFunction);
|
692 | 844 | try {
|
693 | 845 | cApiContext.addNativeFinalizer(env, finalizingPointer);
|
| 846 | + cApiContext.runBackgroundGCTask(context); |
694 | 847 | } catch (RuntimeException e) {
|
695 | 848 | // This can happen when other languages restrict multithreading
|
696 | 849 | LOGGER.warning(() -> "didn't register a native finalizer due to: " + e.getMessage());
|
@@ -771,6 +924,15 @@ public void exitCApiContext() {
|
771 | 924 | public void finalizeCApi() {
|
772 | 925 | CompilerAsserts.neverPartOfCompilation();
|
773 | 926 | HandleContext handleContext = getContext().nativeContext;
|
| 927 | + if (backgroundGCTaskThread != null && backgroundGCTaskThread.isAlive()) { |
| 928 | + try { |
| 929 | + backgroundGCTaskThread.interrupt(); |
| 930 | + backgroundGCTaskThread.join(); |
| 931 | + } catch (InterruptedException e) { |
| 932 | + Thread.currentThread().interrupt(); |
| 933 | + } |
| 934 | + } |
| 935 | + |
774 | 936 | /*
|
775 | 937 | * Disable reference queue polling because during finalization, we will free any known
|
776 | 938 | * allocated resources (e.g. native object stubs). Calling
|
|
0 commit comments