16
16
17
17
#include < cudf/detail/interop.hpp>
18
18
19
+ #include < memory>
20
+ #include < sys/mman.h>
21
+ #include < unistd.h>
22
+
19
23
namespace cudf {
20
24
namespace detail {
21
25
26
+ /*
27
+ Enable Transparent Huge Pages (THP) for large (>4MB) allocations.
28
+ `buf` is returned untouched.
29
+ Enabling THP can improve performance of device-host memory transfers
30
+ significantly, see <https://github.com/rapidsai/cudf/pull/13914>.
31
+ */
32
+ template <typename T>
33
+ T enable_hugepage (T&& buf)
34
+ {
35
+ if (buf->size () < (1u << 22u )) { // Smaller than 4 MB
36
+ return std::move (buf);
37
+ }
38
+
39
+ #ifdef MADV_HUGEPAGE
40
+ const auto pagesize = sysconf (_SC_PAGESIZE);
41
+ void * addr = const_cast <uint8_t *>(buf->data ());
42
+ if (addr == nullptr ) { return std::move (buf); }
43
+ auto length{static_cast <std::size_t >(buf->size ())};
44
+ if (std::align (pagesize, pagesize, addr, length)) {
45
+ // Intentionally not checking for errors that may be returned by older kernel versions;
46
+ // optimistically tries enabling huge pages.
47
+ madvise (addr, length, MADV_HUGEPAGE);
48
+ }
49
+ #endif
50
+ return std::move (buf);
51
+ }
52
+
22
53
std::unique_ptr<arrow::Buffer> allocate_arrow_buffer (int64_t const size, arrow::MemoryPool* ar_mr)
23
54
{
24
55
/*
@@ -28,9 +59,9 @@ std::unique_ptr<arrow::Buffer> allocate_arrow_buffer(int64_t const size, arrow::
28
59
To work around this issue we compile an allocation shim in C++ and use
29
60
that from our cuda sources
30
61
*/
31
- auto result = arrow::AllocateBuffer (size, ar_mr);
62
+ arrow::Result<std::unique_ptr<arrow::Buffer>> result = arrow::AllocateBuffer (size, ar_mr);
32
63
CUDF_EXPECTS (result.ok (), " Failed to allocate Arrow buffer" );
33
- return std::move (result).ValueOrDie ();
64
+ return enable_hugepage ( std::move (result).ValueOrDie () );
34
65
}
35
66
36
67
std::shared_ptr<arrow::Buffer> allocate_arrow_bitmap (int64_t const size, arrow::MemoryPool* ar_mr)
@@ -42,9 +73,9 @@ std::shared_ptr<arrow::Buffer> allocate_arrow_bitmap(int64_t const size, arrow::
42
73
To work around this issue we compile an allocation shim in C++ and use
43
74
that from our cuda sources
44
75
*/
45
- auto result = arrow::AllocateBitmap (size, ar_mr);
76
+ arrow::Result<std::shared_ptr<arrow::Buffer>> result = arrow::AllocateBitmap (size, ar_mr);
46
77
CUDF_EXPECTS (result.ok (), " Failed to allocate Arrow bitmap" );
47
- return std::move (result).ValueOrDie ();
78
+ return enable_hugepage ( std::move (result).ValueOrDie () );
48
79
}
49
80
50
81
} // namespace detail
0 commit comments