Skip to content

Commit 0f2639e

Browse files
committed
Background GC with RSS guidance
skip rss test if native is not enabled
1 parent cc89511 commit 0f2639e

File tree

7 files changed

+271
-32
lines changed

7 files changed

+271
-32
lines changed

graalpython/com.oracle.graal.python.cext/src/capi.c

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,44 @@ PyAPI_FUNC(void) PyTruffle_ObjectArrayRelease(PyObject** array, int32_t size) {
566566
}
567567
}
568568

569+
#if defined(__APPLE__) && defined(__MACH__)
570+
#include <mach/mach.h>
571+
#elif defined(_WIN32)
572+
#include <windows.h>
573+
#include "psapi.h"
574+
#endif
575+
576+
PyAPI_FUNC(size_t) PyTruffle_GetCurrentRSS() {
577+
size_t rss = 0;
578+
#if defined(__APPLE__) && defined(__MACH__)
579+
// MacOS
580+
struct mach_task_basic_info info;
581+
mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
582+
if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t) &info, &infoCount) == KERN_SUCCESS) {
583+
rss = (size_t)info.resident_size;
584+
}
585+
586+
#elif defined(__linux__) || defined(__gnu_linux__)
587+
// Linux
588+
FILE* fp = NULL;
589+
if ((fp = fopen( "/proc/self/statm", "r" )) != NULL) {
590+
if (fscanf(fp, "%*s%ld", (long) &rss)) {
591+
rss *= (uint64_t) sysconf( _SC_PAGESIZE);
592+
}
593+
fclose(fp);
594+
}
595+
596+
#elif defined(_WIN32)
597+
// Windows
598+
PROCESS_MEMORY_COUNTERS pmc;
599+
if (GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc))) {
600+
rss = pmc.WorkingSetSize;
601+
}
602+
#endif
603+
return rss / (1024 * 1024 /* bytes -> megabytes*/);
604+
}
605+
606+
569607
#define ReadMember(object, offset, T) ((T*)(((char*)object) + offset))[0]
570608

571609
PyAPI_FUNC(int) ReadShortMember(void* object, Py_ssize_t offset) {

graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_gc.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -896,3 +896,7 @@ def test_module_globals(self):
896896
####################################### GC #######################################
897897
self._trigger_gc()
898898
##################################################################################
899+
900+
@skipIf(not (GRAALPY) or RUNS_ON_LLVM, "Internal GraalPy RSS function")
901+
def test_current_rss_monitor():
902+
assert __graalpython__.get_current_rss() > 0

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/GraalPythonModuleBuiltins.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,6 @@
7878
import java.util.List;
7979
import java.util.logging.Level;
8080

81-
import com.oracle.graal.python.nodes.util.ToNativePrimitiveStorageNode;
82-
import com.oracle.graal.python.runtime.sequence.storage.NativePrimitiveSequenceStorage;
8381
import org.graalvm.home.Version;
8482
import org.graalvm.nativeimage.ImageInfo;
8583

@@ -148,6 +146,7 @@
148146
import com.oracle.graal.python.nodes.statement.AbstractImportNode;
149147
import com.oracle.graal.python.nodes.truffle.PythonArithmeticTypes;
150148
import com.oracle.graal.python.nodes.util.CastToTruffleStringNode;
149+
import com.oracle.graal.python.nodes.util.ToNativePrimitiveStorageNode;
151150
import com.oracle.graal.python.runtime.PosixSupportLibrary;
152151
import com.oracle.graal.python.runtime.PythonContext;
153152
import com.oracle.graal.python.runtime.PythonImageBuildOptions;
@@ -156,6 +155,7 @@
156155
import com.oracle.graal.python.runtime.exception.PythonExitException;
157156
import com.oracle.graal.python.runtime.object.PythonObjectFactory;
158157
import com.oracle.graal.python.runtime.sequence.PSequence;
158+
import com.oracle.graal.python.runtime.sequence.storage.NativePrimitiveSequenceStorage;
159159
import com.oracle.graal.python.runtime.sequence.storage.NativeSequenceStorage;
160160
import com.oracle.graal.python.runtime.sequence.storage.SequenceStorage;
161161
import com.oracle.graal.python.util.PythonUtils;
@@ -1131,4 +1131,15 @@ Object doClear() {
11311131
return PNone.NONE;
11321132
}
11331133
}
1134+
1135+
@Builtin(name = "get_current_rss", maxNumOfPositionalArgs = 0)
1136+
@GenerateNodeFactory
1137+
public abstract static class GetCurrentRSS extends PythonBuiltinNode {
1138+
1139+
@Specialization
1140+
@TruffleBoundary
1141+
Object currentRSS() {
1142+
return getContext().getCApiContext().getCurrentRSS();
1143+
}
1144+
}
11341145
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextWeakrefBuiltins.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,14 @@ static Object refType(Object object, Object callback,
7979
@CApiBuiltin(ret = PyObjectBorrowed, args = {PyObject}, call = Direct)
8080
abstract static class PyWeakref_GetObject extends CApiUnaryBuiltinNode {
8181
@Specialization
82-
static Object call(PReferenceType self) {
83-
return self.getPyObject();
82+
static Object call(Object reference) {
83+
if (reference instanceof PReferenceType ref) {
84+
return ref.getPyObject();
85+
}
86+
/*
87+
* This weak reference has died in the managed side due to its referent being collected.
88+
*/
89+
return PNone.NONE;
8490
}
8591
}
8692
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiContext.java

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import java.io.IOException;
5353
import java.io.PrintStream;
5454
import java.lang.invoke.VarHandle;
55+
import java.lang.ref.WeakReference;
5556
import java.nio.file.LinkOption;
5657
import java.util.ArrayList;
5758
import java.util.Arrays;
@@ -62,6 +63,7 @@
6263
import java.util.concurrent.ConcurrentHashMap;
6364
import java.util.concurrent.atomic.AtomicBoolean;
6465
import java.util.concurrent.atomic.AtomicLong;
66+
import java.util.logging.Level;
6567

6668
import org.graalvm.collections.Pair;
6769
import org.graalvm.nativeimage.ImageInfo;
@@ -114,6 +116,7 @@
114116
import com.oracle.graal.python.util.Function;
115117
import com.oracle.graal.python.util.PythonUtils;
116118
import com.oracle.graal.python.util.Supplier;
119+
import com.oracle.graal.python.util.SuppressFBWarnings;
117120
import com.oracle.graal.python.util.WeakIdentityHashMap;
118121
import com.oracle.truffle.api.CallTarget;
119122
import com.oracle.truffle.api.CompilerAsserts;
@@ -254,6 +257,9 @@ private record ClosureInfo(Object closure, Object delegate, Object executable, l
254257
*/
255258
private final List<Object> loadedExtensions = new LinkedList<>();
256259

260+
public final BackgroundGCTask gcTask;
261+
private Thread backgroundGCTaskThread;
262+
257263
public static TruffleLogger getLogger(Class<?> clazz) {
258264
return PythonLanguage.getLogger(LOGGER_CAPI_NAME + "." + clazz.getSimpleName());
259265
}
@@ -311,6 +317,8 @@ public CApiContext(PythonContext context, Object llvmLibrary, boolean useNativeB
311317
assert CApiGuards.isSmallInteger(value);
312318
primitiveNativeWrapperCache[i] = PrimitiveNativeWrapper.createInt(value);
313319
}
320+
321+
this.gcTask = new BackgroundGCTask(context);
314322
}
315323

316324
@TruffleBoundary
@@ -595,6 +603,150 @@ public void untrackObject(Object ptr, PFrame.Reference curFrame, TruffleString c
595603
// TODO(fa): implement untracking of container objects
596604
}
597605

606+
private static final class BackgroundGCTask implements Runnable {
607+
608+
private BackgroundGCTask(PythonContext context) {
609+
this.ctx = new WeakReference<>(context);
610+
}
611+
612+
Object nativeSymbol = null;
613+
InteropLibrary callNative = null;
614+
615+
long currentRSS = -1;
616+
long previousRSS = -1;
617+
int previousWeakrefCount = -1;
618+
619+
final WeakReference<PythonContext> ctx;
620+
621+
// RSS monitor interval in ms
622+
static final int RSS_INTERVAL = Integer.getInteger("python.RSSInterval", 1000);
623+
/**
624+
* RSS percentage increase between System.gc() calls. Low percentage will trigger
625+
* System.gc() more often which can cause unnecessary overhead.
626+
*
627+
* <ul>
628+
* why 30%? it's purely based on the {@code huggingface} example.
629+
* <li>less than 30%: max RSS ~22GB (>200 second per iteration)</li>
630+
* <li>30%: max RSS ~24GB (~150 second per iteration)</li>
631+
* <li>larger than 30%: max RSS ~38GB (~140 second per iteration)</li>
632+
* </ul>
633+
*
634+
* <pre>
635+
*/
636+
static final double GC_RSS_THRESHOLD = Integer.getInteger("python.RSSThreshold", 30) / 100.0;
637+
638+
Long getCurrentRSS() {
639+
if (nativeSymbol == null) {
640+
nativeSymbol = CApiContext.getNativeSymbol(null, NativeCAPISymbol.FUN_GET_CURRENT_RSS);
641+
callNative = InteropLibrary.getUncached(nativeSymbol);
642+
}
643+
Long rss = 0L;
644+
try {
645+
rss = (Long) callNative.execute(nativeSymbol);
646+
} catch (Exception ignored) {
647+
}
648+
return rss;
649+
}
650+
651+
@Override
652+
public void run() {
653+
try {
654+
while (true) {
655+
Thread.sleep(RSS_INTERVAL);
656+
perform();
657+
}
658+
} catch (InterruptedException e) {
659+
Thread.currentThread().interrupt();
660+
}
661+
}
662+
663+
private void perform() {
664+
PythonContext context = ctx.get();
665+
if (context == null) {
666+
return;
667+
}
668+
669+
long rss = currentRSS = getCurrentRSS();
670+
if (rss == 0) {
671+
LOGGER.finer("We are unable to get resident set size (RSS) from the system. " +
672+
"We will skip the java collection routine.");
673+
Thread.currentThread().interrupt();
674+
return;
675+
}
676+
677+
// reset RSS baseline
678+
if (rss < this.previousRSS || this.previousRSS == -1) {
679+
this.previousRSS = rss;
680+
return;
681+
}
682+
683+
// skip GC if no new native weakrefs have been created.
684+
int currentWeakrefCount = context.nativeContext.nativeLookup.size();
685+
if (currentWeakrefCount < this.previousWeakrefCount || this.previousWeakrefCount == -1) {
686+
this.previousWeakrefCount = currentWeakrefCount;
687+
return;
688+
}
689+
690+
double ratio = ((rss - this.previousRSS) / (double) this.previousRSS);
691+
if (ratio >= GC_RSS_THRESHOLD) {
692+
this.previousWeakrefCount = currentWeakrefCount;
693+
694+
long start = System.nanoTime();
695+
PythonUtils.forceFullGC();
696+
long gcTime = (System.nanoTime() - start) / 1000000;
697+
698+
if (LOGGER.isLoggable(Level.FINER)) {
699+
LOGGER.finer(PythonUtils.formatJString("Background GC Task -- GC [%d ms] RSS [%d MB]->[%d MB](%.1f%%)",
700+
gcTime, previousRSS, rss, ratio * 100));
701+
}
702+
/*
703+
* cap the previous RSS increase to GC_RSS_THRESHOLD. If the ratio is much larger
704+
* than GC_RSS_THRESHOLD, then we should do GC more frequently. Though, if we get a
705+
* lower RSS in subsequent runs, the lower RSS will be set as previous RSS (see
706+
* above).
707+
*
708+
* Note: Resident Set Size (RSS) in the system isn't always an accurate indication
709+
* of used memory but rather a combination of anonymous memory (RssAnon), file
710+
* mappings (RssFile) and shmem memory (RssShmem). GC can only reduce RssAnon while
711+
* RssFile is managed by the operating system which doesn't go down easily.
712+
*/
713+
this.previousRSS += (long) (this.previousRSS * GC_RSS_THRESHOLD);
714+
}
715+
}
716+
}
717+
718+
@TruffleBoundary
719+
public long getCurrentRSS() {
720+
if (backgroundGCTaskThread != null && backgroundGCTaskThread.isAlive()) {
721+
long rss = gcTask.currentRSS;
722+
if (rss == -1) {
723+
try {
724+
// in case it just started
725+
Thread.sleep(BackgroundGCTask.RSS_INTERVAL);
726+
} catch (InterruptedException e) {
727+
Thread.currentThread().interrupt();
728+
}
729+
rss = gcTask.currentRSS;
730+
}
731+
return rss;
732+
}
733+
return 0L;
734+
}
735+
736+
@SuppressFBWarnings(value = "NP_NULL_ON_SOME_PATH") // context.get() is never null here
737+
void runBackgroundGCTask(PythonContext context) {
738+
CompilerAsserts.neverPartOfCompilation();
739+
if (ImageInfo.inImageBuildtimeCode() || context.getOption(PythonOptions.NoAsyncActions)) {
740+
return;
741+
}
742+
if (PythonOptions.AUTOMATIC_ASYNC_ACTIONS) {
743+
backgroundGCTaskThread = context.getEnv().newTruffleThreadBuilder(gcTask).context(context.getEnv().getContext()).build();
744+
backgroundGCTaskThread.setDaemon(true);
745+
backgroundGCTaskThread.setName("python-gc-task");
746+
backgroundGCTaskThread.start();
747+
}
748+
}
749+
598750
/**
599751
* This represents whether the current process has already loaded an instance of the native CAPI
600752
* extensions - this can only be loaded once per process.
@@ -691,6 +843,7 @@ public static CApiContext ensureCapiWasLoaded(Node node, PythonContext context,
691843
Object finalizingPointer = SignatureLibrary.getUncached().call(finalizeSignature, finalizeFunction);
692844
try {
693845
cApiContext.addNativeFinalizer(env, finalizingPointer);
846+
cApiContext.runBackgroundGCTask(context);
694847
} catch (RuntimeException e) {
695848
// This can happen when other languages restrict multithreading
696849
LOGGER.warning(() -> "didn't register a native finalizer due to: " + e.getMessage());
@@ -771,6 +924,15 @@ public void exitCApiContext() {
771924
public void finalizeCApi() {
772925
CompilerAsserts.neverPartOfCompilation();
773926
HandleContext handleContext = getContext().nativeContext;
927+
if (backgroundGCTaskThread != null && backgroundGCTaskThread.isAlive()) {
928+
try {
929+
backgroundGCTaskThread.interrupt();
930+
backgroundGCTaskThread.join();
931+
} catch (InterruptedException e) {
932+
Thread.currentThread().interrupt();
933+
}
934+
}
935+
774936
/*
775937
* Disable reference queue polling because during finalization, we will free any known
776938
* allocated resources (e.g. native object stubs). Calling

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/NativeCAPISymbol.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ public enum NativeCAPISymbol implements NativeCExtSymbol {
139139
FUN_PY_DEALLOC("_Py_Dealloc", Void, Pointer),
140140
FUN_BULK_DEALLOC("PyTruffle_bulk_DEALLOC", Py_ssize_t, Pointer, INT64_T),
141141
FUN_SHUTDOWN_BULK_DEALLOC("PyTruffle_shutdown_bulk_DEALLOC", Py_ssize_t, Pointer, INT64_T),
142+
FUN_GET_CURRENT_RSS("PyTruffle_GetCurrentRSS", SIZE_T),
142143
FUN_TRUFFLE_ADD_SUBOFFSET("truffle_add_suboffset", Pointer, Pointer, Py_ssize_t, Py_ssize_t),
143144
FUN_PY_TRUFFLE_MEMORYVIEW_FROM_OBJECT("PyTruffle_MemoryViewFromObject", PyObjectTransfer, PyObject, Int),
144145
FUN_PY_TRUFFLE_RELEASE_BUFFER("PyTruffle_ReleaseBuffer", ArgDescriptor.Void, Pointer),

0 commit comments

Comments
 (0)