@@ -225,6 +225,176 @@ pub unsafe fn memcpy_dtoh(
225
225
Ok ( ( ) )
226
226
}
227
227
228
+ /// Similar to `cudaMemcpy2D` with `HostToDevice` copy type.
229
+ ///
230
+ /// `dpitch`/`spitch` is bytes between the start of two rows.
231
+ /// `width` is the number of *elements* (not bytes) in a row.
232
+ /// `height` is the total number of rows (not bytes).
233
+ ///
234
+ /// # Examples
235
+ ///
236
+ /// ```
237
+ /// # let _context = cust::quick_init().unwrap();
238
+ /// # fn foo() -> Result<(), cust::error::CudaError> {
239
+ /// use cust::memory::*;
240
+ /// unsafe {
241
+ /// // Allocate space for a 3x3 matrix of f32s
242
+ /// let (device_buffer, pitch) = cuda_malloc_pitched::<f32>(3, 3)?;
243
+ ///
244
+ /// let src_array: [f32; 9] = [
245
+ /// 1.0, 2.0, 3.0,
246
+ /// 4.0, 5.0, 6.0,
247
+ /// 7.0, 8.0, 9.0];
248
+ ///
249
+ /// memcpy_2d_htod(
250
+ /// device_buffer,
251
+ /// pitch,
252
+ /// src_array.as_slice().as_ptr(),
253
+ /// 3*std::mem::size_of::<f32>(),
254
+ /// 3,
255
+ /// 3
256
+ /// )?;
257
+ ///
258
+ /// let mut dst_array = [0.0f32; 9];
259
+ ///
260
+ /// memcpy_2d_dtoh(
261
+ /// dst_array.as_mut_slice().as_mut_ptr(),
262
+ /// 3*std::mem::size_of::<f32>(),
263
+ /// device_buffer,
264
+ /// pitch,
265
+ /// 3,
266
+ /// 3
267
+ /// )?;
268
+ ///
269
+ /// assert_eq!(dst_array, src_array);
270
+ /// cuda_free(device_buffer)?;
271
+ /// }
272
+ /// # Ok(())
273
+ /// # }
274
+ /// # foo().unwrap();
275
+ /// ```
276
+ #[ allow( clippy:: missing_safety_doc) ]
277
+ pub unsafe fn memcpy_2d_htod < T : DeviceCopy > (
278
+ dst : DevicePointer < T > ,
279
+ dpitch : usize ,
280
+ src : * const T ,
281
+ spitch : usize ,
282
+ width : usize ,
283
+ height : usize ,
284
+ ) -> CudaResult < ( ) > {
285
+ use cust_raw:: CUmemorytype ;
286
+
287
+ let width_in_bytes = width. checked_mul ( std:: mem:: size_of :: < T > ( ) )
288
+ . ok_or ( CudaError :: InvalidMemoryAllocation ) ?;
289
+
290
+ let pcopy = cust_raw:: CUDA_MEMCPY2D_st {
291
+ srcXInBytes : 0 ,
292
+ srcY : 0 ,
293
+ srcMemoryType : CUmemorytype :: CU_MEMORYTYPE_HOST ,
294
+ srcHost : src as * const c_void ,
295
+ srcDevice : 0 , // Ignored
296
+ srcArray : std:: ptr:: null_mut :: < cust_raw:: CUarray_st > ( ) , // Ignored
297
+ srcPitch : spitch,
298
+ dstXInBytes : 0 ,
299
+ dstY : 0 ,
300
+ dstMemoryType : CUmemorytype :: CU_MEMORYTYPE_DEVICE ,
301
+ dstHost : std:: ptr:: null_mut :: < c_void > ( ) , // Ignored
302
+ dstDevice : dst. as_raw ( ) ,
303
+ dstArray : std:: ptr:: null_mut :: < cust_raw:: CUarray_st > ( ) , // Ignored
304
+ dstPitch : dpitch,
305
+ WidthInBytes : width_in_bytes,
306
+ Height : height,
307
+ } ;
308
+
309
+ crate :: sys:: cuMemcpy2D_v2 ( & pcopy) . to_result ( ) ?;
310
+ Ok ( ( ) )
311
+ }
312
+
313
+ /// Similar to `cudaMemcpy2D` with `DeviceToHost` copy type.
314
+ ///
315
+ /// `dpitch`/`spitch` is bytes between the start of two rows.
316
+ /// `width` is the number of *elements* (not bytes) in a row.
317
+ /// `height` is the total number of rows (not bytes).
318
+ ///
319
+ /// # Examples
320
+ ///
321
+ /// ```
322
+ /// # let _context = cust::quick_init().unwrap();
323
+ /// # fn foo() -> Result<(), cust::error::CudaError> {
324
+ /// use cust::memory::*;
325
+ /// unsafe {
326
+ /// // Allocate space for a 3x3 matrix of f32s
327
+ /// let (device_buffer, pitch) = cuda_malloc_pitched::<f32>(3, 3)?;
328
+ ///
329
+ /// let src_array: [f32; 9] = [
330
+ /// 1.0, 2.0, 3.0,
331
+ /// 4.0, 5.0, 6.0,
332
+ /// 7.0, 8.0, 9.0];
333
+ ///
334
+ /// memcpy_2d_htod(
335
+ /// device_buffer,
336
+ /// pitch,
337
+ /// src_array.as_slice().as_ptr(),
338
+ /// 3*std::mem::size_of::<f32>(),
339
+ /// 3,
340
+ /// 3
341
+ /// )?;
342
+ ///
343
+ /// let mut dst_array = [0.0f32; 9];
344
+ ///
345
+ /// memcpy_2d_dtoh(
346
+ /// dst_array.as_mut_slice().as_mut_ptr(),
347
+ /// 3*std::mem::size_of::<f32>(),
348
+ /// device_buffer,
349
+ /// pitch,
350
+ /// 3,
351
+ /// 3
352
+ /// )?;
353
+ ///
354
+ /// assert_eq!(dst_array, src_array);
355
+ /// cuda_free(device_buffer)?;
356
+ /// }
357
+ /// # Ok(())
358
+ /// # }
359
+ /// # foo().unwrap();
360
+ /// ```
361
+ #[ allow( clippy:: missing_safety_doc) ]
362
+ pub unsafe fn memcpy_2d_dtoh < T : DeviceCopy > (
363
+ dst : * mut T ,
364
+ dpitch : usize ,
365
+ src : DevicePointer < T > ,
366
+ spitch : usize ,
367
+ width : usize ,
368
+ height : usize ,
369
+ ) -> CudaResult < ( ) > {
370
+ use cust_raw:: CUmemorytype ;
371
+
372
+ let width_in_bytes = width. checked_mul ( std:: mem:: size_of :: < T > ( ) )
373
+ . ok_or ( CudaError :: InvalidMemoryAllocation ) ?;
374
+
375
+ let pcopy = cust_raw:: CUDA_MEMCPY2D_st {
376
+ srcXInBytes : 0 ,
377
+ srcY : 0 ,
378
+ srcMemoryType : CUmemorytype :: CU_MEMORYTYPE_DEVICE ,
379
+ srcHost : std:: ptr:: null_mut :: < c_void > ( ) , // Ignored
380
+ srcDevice : src. as_raw ( ) ,
381
+ srcArray : std:: ptr:: null_mut :: < cust_raw:: CUarray_st > ( ) , // Ignored
382
+ srcPitch : spitch,
383
+ dstXInBytes : 0 ,
384
+ dstY : 0 ,
385
+ dstMemoryType : CUmemorytype :: CU_MEMORYTYPE_HOST ,
386
+ dstHost : dst as * mut c_void ,
387
+ dstDevice : 0 , // Ignored
388
+ dstArray : std:: ptr:: null_mut :: < cust_raw:: CUarray_st > ( ) , // Ignored
389
+ dstPitch : dpitch,
390
+ WidthInBytes : width_in_bytes,
391
+ Height : height,
392
+ } ;
393
+
394
+ crate :: sys:: cuMemcpy2D_v2 ( & pcopy) . to_result ( ) ?;
395
+ Ok ( ( ) )
396
+ }
397
+
228
398
/// Get the current free and total memory.
229
399
///
230
400
/// Returns in `.1` the total amount of memory available to the the current context.
0 commit comments