Skip to content

Commit 634b4cb

Browse files
authored
Fix is_device_write_preferred in void_sink and user_sink_wrapper (#15064)
Addresses a few issues in `data_sink` classes to avoid D2H copies in writers when using a `void_sink`. Provide an `is_device_write_preferred` implementation to always prefer device writes. Implement `is_device_write_preferred` in `user_sink_wrapper` that forwards the call to the wrapped object. Use the `cudf::io::void_sink` in benchmarks instead of the local version, which is not fully implemented. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Nghia Truong (https://github.com/ttnghia) - Mark Harris (https://github.com/harrism) URL: #15064
1 parent 8c20d2a commit 634b4cb

File tree

3 files changed

+14
-15
lines changed

3 files changed

+14
-15
lines changed

cpp/benchmarks/io/cuio_common.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
2+
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -41,7 +41,8 @@ std::string random_file_in_dir(std::string const& dir_path)
4141
cuio_source_sink_pair::cuio_source_sink_pair(io_type type)
4242
: type{type},
4343
d_buffer{0, cudf::get_default_stream()},
44-
file_name{random_file_in_dir(tmpdir.path())}
44+
file_name{random_file_in_dir(tmpdir.path())},
45+
void_sink{cudf::io::data_sink::create()}
4546
{
4647
}
4748

@@ -67,7 +68,7 @@ cudf::io::source_info cuio_source_sink_pair::make_source_info()
6768
cudf::io::sink_info cuio_source_sink_pair::make_sink_info()
6869
{
6970
switch (type) {
70-
case io_type::VOID: return cudf::io::sink_info(&void_sink);
71+
case io_type::VOID: return cudf::io::sink_info(void_sink.get());
7172
case io_type::FILEPATH: return cudf::io::sink_info(file_name);
7273
case io_type::HOST_BUFFER: [[fallthrough]];
7374
case io_type::DEVICE_BUFFER: return cudf::io::sink_info(&h_buffer);
@@ -78,7 +79,7 @@ cudf::io::sink_info cuio_source_sink_pair::make_sink_info()
7879
size_t cuio_source_sink_pair::size()
7980
{
8081
switch (type) {
81-
case io_type::VOID: return void_sink.bytes_written();
82+
case io_type::VOID: return void_sink->bytes_written();
8283
case io_type::FILEPATH:
8384
return static_cast<size_t>(
8485
std::ifstream(file_name, std::ifstream::ate | std::ifstream::binary).tellg());

cpp/benchmarks/io/cuio_common.hpp

+2-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
2+
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -32,15 +32,6 @@ std::string random_file_in_dir(std::string const& dir_path);
3232
* @brief Class to create a coupled `source_info` and `sink_info` of given type.
3333
*/
3434
class cuio_source_sink_pair {
35-
class bytes_written_only_sink : public cudf::io::data_sink {
36-
size_t _bytes_written = 0;
37-
38-
public:
39-
void host_write(void const* data, size_t size) override { _bytes_written += size; }
40-
void flush() override {}
41-
size_t bytes_written() override { return _bytes_written; }
42-
};
43-
4435
public:
4536
cuio_source_sink_pair(io_type type);
4637
~cuio_source_sink_pair()
@@ -79,7 +70,7 @@ class cuio_source_sink_pair {
7970
std::vector<char> h_buffer;
8071
rmm::device_uvector<std::byte> d_buffer;
8172
std::string const file_name;
82-
bytes_written_only_sink void_sink;
73+
std::unique_ptr<cudf::io::data_sink> void_sink;
8374
};
8475

8576
/**

cpp/src/io/utilities/data_sink.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ class void_sink : public data_sink {
139139

140140
[[nodiscard]] bool supports_device_write() const override { return true; }
141141

142+
[[nodiscard]] bool is_device_write_preferred(size_t size) const override { return true; }
143+
142144
void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
143145
{
144146
_bytes_written += size;
@@ -189,6 +191,11 @@ class user_sink_wrapper : public data_sink {
189191
return user_sink->device_write_async(gpu_data, size, stream);
190192
}
191193

194+
[[nodiscard]] bool is_device_write_preferred(size_t size) const override
195+
{
196+
return user_sink->is_device_write_preferred(size);
197+
}
198+
192199
void flush() override { user_sink->flush(); }
193200

194201
size_t bytes_written() override { return user_sink->bytes_written(); }

0 commit comments

Comments
 (0)