Skip to content

Commit 799d79b

Browse files
committed
Feat: second pass for fixing conflicts
1 parent eae4261 commit 799d79b

File tree

20 files changed

+84
-73
lines changed

20 files changed

+84
-73
lines changed

crates/blastoff/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ repository = "https://github.com/Rust-GPU/Rust-CUDA"
88
[dependencies]
99
bitflags = "1.3.2"
1010
cublas_sys = { version = "0.1", path = "../cublas_sys" }
11-
cust = { version = "0.2", path = "../cust", features = ["num-complex"] }
11+
cust = { version = "0.2", path = "../cust", features = ["impl_num_complex"] }
1212
num-complex = "0.4.0"

crates/blastoff/src/level1.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ impl CublasContext {
4646
Ok(T::amin(
4747
ctx.raw,
4848
x.len() as i32,
49-
x.as_device_ptr().as_raw(),
49+
x.as_device_ptr().as_ptr(),
5050
stride.unwrap_or(1) as i32,
51-
result.as_device_ptr().as_raw_mut(),
51+
result.as_device_ptr().as_mut_ptr(),
5252
)
5353
.to_result()?)
5454
})
@@ -108,9 +108,9 @@ impl CublasContext {
108108
Ok(T::amax(
109109
ctx.raw,
110110
x.len() as i32,
111-
x.as_device_ptr().as_raw(),
111+
x.as_device_ptr().as_ptr(),
112112
stride.unwrap_or(1) as i32,
113-
result.as_device_ptr().as_raw_mut(),
113+
result.as_device_ptr().as_mut_ptr(),
114114
)
115115
.to_result()?)
116116
})

crates/cust/Cargo.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ num-complex = { version = "0.4", optional = true }
2020
vek = { version = "0.15.1", optional = true, default-features = false }
2121

2222
[features]
23-
default-features = ["vek", "impl_glam", "impl_mint"]
23+
default= ["vek", "impl_glam", "impl_mint"]
2424
impl_glam = ["cust_core/glam", "glam"]
2525
impl_mint = ["cust_core/mint", "mint"]
2626
impl_vek = ["cust_core/vek", "vek"]
27+
impl_half = ["cust_core/half"]
28+
impl_num_complex = ["cust_core/num-complex", "num-complex"]
2729

2830
[build-dependencies]
2931
find_cuda_helper = { path = "../find_cuda_helper", version = "0.2" }

crates/cust/src/memory/device/device_box.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ impl<T: DeviceCopy> DeviceBox<T> {
102102
/// assert_eq!(0, value);
103103
/// ```
104104
pub unsafe fn zeroed() -> CudaResult<Self> {
105-
let mut new_box = DeviceBox::uninitialized()?;
105+
let new_box = DeviceBox::uninitialized()?;
106106
if mem::size_of::<T>() != 0 {
107107
cuda::cuMemsetD8_v2(new_box.as_device_ptr().as_raw(), 0, mem::size_of::<T>())
108108
.to_result()?;
@@ -406,8 +406,8 @@ mod test_device_box {
406406
#[test]
407407
fn test_device_pointer_implements_traits_safely() {
408408
let _context = crate::quick_init().unwrap();
409-
let mut x = DeviceBox::new(&5u64).unwrap();
410-
let mut y = DeviceBox::new(&0u64).unwrap();
409+
let x = DeviceBox::new(&5u64).unwrap();
410+
let y = DeviceBox::new(&0u64).unwrap();
411411

412412
// If the impls dereference the pointer, this should segfault.
413413
let _ = Ord::cmp(&x.as_device_ptr(), &y.as_device_ptr());

crates/cust/src/memory/mod.rs

+6-23
Original file line numberDiff line numberDiff line change
@@ -102,26 +102,17 @@ pub trait GpuBuffer<T: DeviceCopy>: private::Sealed {
102102
}
103103

104104
impl<T: DeviceCopy> GpuBuffer<T> for DeviceBuffer<T> {
105-
unsafe fn as_device_ptr(&self) -> DevicePointer<T> {
105+
fn as_device_ptr(&self) -> DevicePointer<T> {
106106
self.as_ptr()
107107
}
108108

109-
fn as_device_ptr_mut(&mut self) -> DevicePointer<T> {
110-
self.as_mut_ptr()
111-
}
112-
113109
fn len(&self) -> usize {
114110
(**self).len()
115111
}
116112
}
117113

118114
impl<T: DeviceCopy> GpuBuffer<T> for UnifiedBuffer<T> {
119-
unsafe fn as_device_ptr(&self) -> DevicePointer<T> {
120-
DevicePointer::from_raw(self.as_ptr() as u64)
121-
}
122-
123-
fn as_device_ptr_mut(&mut self) -> DevicePointer<T> {
124-
// SAFETY: unified pointers can be dereferenced from the gpu.
115+
fn as_device_ptr(&self) -> DevicePointer<T> {
125116
DevicePointer::from_raw(self.as_ptr() as u64)
126117
}
127118

@@ -137,22 +128,13 @@ pub trait GpuBox<T: DeviceCopy>: private::Sealed {
137128
}
138129

139130
impl<T: DeviceCopy> GpuBox<T> for DeviceBox<T> {
140-
unsafe fn as_device_ptr(&self) -> DevicePointer<T> {
131+
fn as_device_ptr(&self) -> DevicePointer<T> {
141132
self.ptr
142133
}
143-
144-
fn as_device_ptr_mut(&mut self) -> DevicePointer<T> {
145-
DeviceBox::as_device_ptr(self)
146-
}
147134
}
148135

149136
impl<T: DeviceCopy> GpuBox<T> for UnifiedBox<T> {
150-
unsafe fn as_device_ptr(&self) -> DevicePointer<T> {
151-
DevicePointer::from_raw(self.ptr.as_raw() as u64)
152-
}
153-
154-
fn as_device_ptr_mut(&mut self) -> DevicePointer<T> {
155-
// SAFETY: unified pointers can be dereferenced from the gpu.
137+
fn as_device_ptr(&self) -> DevicePointer<T> {
156138
DevicePointer::from_raw(self.ptr.as_raw() as u64)
157139
}
158140
}
@@ -189,7 +171,7 @@ impl<T: DeviceCopy> DeviceMemory for DeviceVariable<T> {
189171

190172
impl<T: DeviceCopy> DeviceMemory for DeviceBuffer<T> {
191173
fn as_raw_ptr(&self) -> cust_raw::CUdeviceptr {
192-
unsafe { self.as_device_ptr().as_raw() }
174+
self.as_device_ptr().as_raw()
193175
}
194176

195177
fn size_in_bytes(&self) -> usize {
@@ -218,6 +200,7 @@ mod private {
218200
}
219201

220202
/// Simple wrapper over cuMemcpyHtoD_v2
203+
#[allow(clippy::missing_safety_doc)]
221204
pub unsafe fn memcpy_htod(
222205
d_ptr: cust_raw::CUdeviceptr,
223206
src_ptr: *const c_void,

crates/cust/src/memory/pointer.rs

+12
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,18 @@ impl<T: DeviceCopy> Pointer for DevicePointer<T> {
3838
}
3939

4040
impl<T: ?Sized + DeviceCopy> DevicePointer<T> {
41+
/// Returns a rust [`pointer`] created from this pointer, meant for FFI purposes.
42+
/// **The pointer is not dereferenceable from the CPU!**
43+
pub fn as_ptr(&self) -> *const T {
44+
self.ptr as *const T
45+
}
46+
47+
/// Returns a rust [`pointer`] created from this pointer, meant for FFI purposes.
48+
/// **The pointer is not dereferenceable from the CPU!**
49+
pub fn as_mut_ptr(&self) -> *mut T {
50+
self.ptr as *mut T
51+
}
52+
4153
/// Returns the contained CUdeviceptr.
4254
pub fn as_raw(&self) -> CUdeviceptr {
4355
self.ptr

crates/cust/src/nvtx.rs

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

crates/cust_core/Cargo.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ edition = "2021"
77
vek = { version = "0.15.1", default-features=false, features=["libm"], optional = true }
88
glam = { git = "https://github.com/anderslanglands/glam-rs", branch = "cuda", features=["cuda", "libm"], default-features=false, optional=true }
99
mint = { version = "^0.5", optional = true }
10+
half = { version = "1.8", optional = true }
11+
num-complex = { version = "0.4", optional = true }
1012

1113
[features]
12-
default-features = ["vek", "glam", "mint"]
13-
14-
14+
default = ["vek", "glam", "mint"]

crates/cust_core/src/lib.rs

+20-12
Original file line numberDiff line numberDiff line change
@@ -123,15 +123,15 @@ unsafe impl<
123123
{
124124
}
125125

126-
macro_rules! impl_device_copy_vek {
127-
($($strukt:ident),* $(,)?) => {
126+
macro_rules! impl_device_copy_generic {
127+
($($($strukt:ident)::+),* $(,)?) => {
128128
$(
129-
unsafe impl<T: DeviceCopy> DeviceCopy for $strukt<T> {}
129+
unsafe impl<T: DeviceCopy> DeviceCopy for $($strukt)::+<T> {}
130130
)*
131131
}
132132
}
133133

134-
macro_rules! impl_device_copy_glam {
134+
macro_rules! impl_device_copy {
135135
($($strukt:ty),* $(,)?) => {
136136
$(
137137
unsafe impl DeviceCopy for $strukt {}
@@ -143,23 +143,31 @@ macro_rules! impl_device_copy_glam {
143143
use vek::*;
144144

145145
#[cfg(feature = "vek")]
146-
impl_device_copy_vek! {
146+
impl_device_copy_generic! {
147147
Vec2, Vec3, Vec4, Extent2, Extent3, Rgb, Rgba,
148148
Mat2, Mat3, Mat4,
149149
CubicBezier2, CubicBezier3,
150150
Quaternion,
151151
}
152152

153153
#[cfg(feature = "glam")]
154-
impl_device_copy_glam! {
154+
impl_device_copy! {
155155
glam::Vec2, glam::Vec3, glam::Vec4, glam::IVec2, glam::IVec3, glam::IVec4,
156156
}
157157

158158
#[cfg(feature = "mint")]
159-
impl_device_copy_glam! {
160-
mint::Vector2<i16>, mint::Vector2<i32>, mint::Vector2<f32>,
161-
mint::Vector3<u16>, mint::Vector3<u32>, mint::Vector3<i16>, mint::Vector3<i32>, mint::Vector3<f32>,
162-
mint::Vector4<i16>, mint::Vector4<i32>, mint::Vector4<f32>,
163-
mint::ColumnMatrix2<f32>, mint::ColumnMatrix3<f32>, mint::ColumnMatrix4<f32>, mint::ColumnMatrix3x4<f32>,
164-
mint::RowMatrix2<f32>, mint::RowMatrix3<f32>, mint::RowMatrix4<f32>, mint::RowMatrix3x4<f32>,
159+
impl_device_copy_generic! {
160+
mint::Vector2, mint::Vector3, mint::Vector4,
161+
mint::ColumnMatrix2, mint::ColumnMatrix3, mint::ColumnMatrix4, mint::ColumnMatrix3x4,
162+
mint::RowMatrix2, mint::RowMatrix3, mint::RowMatrix4, mint::RowMatrix3x4,
163+
}
164+
165+
#[cfg(feature = "half")]
166+
unsafe impl DeviceCopy for half::f16 {}
167+
#[cfg(feature = "half")]
168+
unsafe impl DeviceCopy for half::bf16 {}
169+
170+
#[cfg(feature = "num-complex")]
171+
impl_device_copy_generic! {
172+
num_complex::Complex
165173
}

crates/optix/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ optix72 = []
1313
optix73 = []
1414
default=["optix73", "impl_glam"]
1515
impl_glam=["cust/impl_glam", "glam"]
16+
impl_half=["cust/impl_half", "half"]
1617

1718
[dependencies]
1819
cust = { version = "0.2", path = "../cust", features=["impl_mint"] }

crates/optix/src/acceleration.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -1180,23 +1180,23 @@ pub trait Vertex: cust::memory::DeviceCopy {
11801180
const STRIDE: u32 = 0;
11811181
}
11821182

1183-
#[cfg(feature = "half")]
1183+
#[cfg(feature = "impl_half")]
11841184
impl Vertex for [half::f16; 2] {
11851185
const FORMAT: VertexFormat = VertexFormat::Half2;
11861186
}
11871187

1188-
#[cfg(feature = "half")]
1188+
#[cfg(feature = "impl_half")]
11891189
impl Vertex for [half::f16; 3] {
11901190
const FORMAT: VertexFormat = VertexFormat::Half3;
11911191
}
11921192

1193-
#[cfg(feature = "half")]
1194-
impl Vertex for mint::Vector2<f16> {
1193+
#[cfg(feature = "impl_half")]
1194+
impl Vertex for mint::Vector2<half::f16> {
11951195
const FORMAT: VertexFormat = VertexFormat::Half2;
11961196
}
11971197

1198-
#[cfg(feature = "half")]
1199-
impl Vertex for mint::Vector3<f16> {
1198+
#[cfg(feature = "impl_half")]
1199+
impl Vertex for mint::Vector3<half::f16> {
12001200
const FORMAT: VertexFormat = VertexFormat::Half3;
12011201
}
12021202

crates/optix/src/denoiser.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ impl Denoiser {
375375
let raw_params = parameters.to_raw();
376376

377377
let mut out = input_image.to_raw();
378-
out.data = out_buffer.as_device_ptr().as_raw_mut() as u64;
378+
out.data = out_buffer.as_device_ptr().as_raw() as u64;
379379

380380
let layer = sys::OptixDenoiserLayer {
381381
input: input_image.to_raw(),

examples/cuda/cpu/path_tracer/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ version = "0.1.0"
44
edition = "2018"
55

66
[dependencies]
7+
vek = "0.15"
78
bytemuck = { version = "1.7.2", features = ["derive"] }
8-
cust = { version = "0.2", path = "../../../../crates/cust", features = ["vek"] }
9+
cust = { version = "0.2", path = "../../../../crates/cust", features = ["impl_vek"] }
910
image = "0.23.14"
1011
path_tracer_gpu = { path = "../../gpu/path_tracer_gpu" }
1112
gpu_rand = { version = "0.1", path = "../../../../crates/gpu_rand" }

examples/cuda/cpu/path_tracer/src/common.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
use cust::vek::{Vec2, Vec3};
21
use glutin::event::{
32
ElementState, Event, MouseButton, MouseScrollDelta, VirtualKeyCode, WindowEvent,
43
};
54
use path_tracer_gpu::Viewport;
5+
use vek::{Vec2, Vec3};
66

77
#[derive(Debug, Clone, Copy, PartialEq)]
88
pub struct Camera {

examples/cuda/cpu/path_tracer/src/cpu/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
use std::time::Duration;
22

3-
use cust::vek::{Clamp, Vec2, Vec3};
43
use gpu_rand::{DefaultRand, GpuRand};
54
use imgui::Ui;
65
use path_tracer_gpu::{
76
material::MaterialKind, render::generate_ray, scene::Scene, Object, Viewport,
87
};
98
use rayon::prelude::*;
109
use sysinfo::{ProcessorExt, System, SystemExt};
10+
use vek::{Clamp, Vec2, Vec3};
1111

1212
use crate::{common::Camera, cuda::SEED};
1313

examples/cuda/cpu/path_tracer/src/cuda/data.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ use cust::{
33
error::CudaResult,
44
memory::{DeviceBuffer, DeviceCopy, UnifiedBuffer},
55
util::SliceExt,
6-
vek::{num_traits::Zero, Vec2, Vec3},
76
};
7+
use vek::{num_traits::Zero, Vec2, Vec3};
88
use gpu_rand::DefaultRand;
99
use path_tracer_gpu::{material::MaterialKind, scene::Scene, Object, Viewport};
1010

examples/cuda/cpu/path_tracer/src/cuda/mod.rs

+15-12
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ use cust::{
1111
event::{Event, EventFlags},
1212
function::{BlockSize, GridSize},
1313
prelude::*,
14-
vek::{Vec2, Vec3},
1514
};
1615
use optix::{
17-
context::OptixContext,
16+
context::DeviceContext,
1817
denoiser::{Denoiser, DenoiserModelKind, Image, ImageFormat},
1918
};
2019
use path_tracer_gpu::scene::Scene;
20+
use vek::{Vec2, Vec3};
2121

2222
/// Seed for the random states
2323
pub const SEED: u64 = 932174513921034;
@@ -33,7 +33,7 @@ pub struct CudaRenderer {
3333
stream: Stream,
3434
module: Module,
3535
denoiser: Denoiser,
36-
_optix_context: OptixContext,
36+
_optix_context: DeviceContext,
3737
_context: Context,
3838

3939
buffers: CudaRendererBuffers,
@@ -45,7 +45,7 @@ impl CudaRenderer {
4545
let context = cust::quick_init()?;
4646
optix::init().unwrap();
4747

48-
let optix_context = OptixContext::new(&context).unwrap();
48+
let optix_context = DeviceContext::new(&context, false).unwrap();
4949

5050
let module = Module::from_str(PTX)?;
5151
let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
@@ -93,7 +93,8 @@ impl CudaRenderer {
9393

9494
Ok(self
9595
.denoiser
96-
.setup_state(&self.stream, new_size.x as u32, new_size.y as u32, false)?)
96+
.setup_state(&self.stream, new_size.x as u32, new_size.y as u32, false)
97+
.unwrap())
9798
}
9899

99100
/// calculate an optimal launch configuration for an image kernel
@@ -144,13 +145,15 @@ impl CudaRenderer {
144145
height,
145146
);
146147

147-
self.denoiser.invoke(
148-
stream,
149-
Default::default(),
150-
input_image,
151-
Default::default(),
152-
&mut self.buffers.denoised_buffer,
153-
)?;
148+
self.denoiser
149+
.invoke(
150+
stream,
151+
Default::default(),
152+
input_image,
153+
Default::default(),
154+
&mut self.buffers.denoised_buffer,
155+
)
156+
.unwrap();
154157

155158
self.buffers.denoised_buffer.as_device_ptr()
156159
} else {

0 commit comments

Comments
 (0)