From 2c994307a948d23f3f3b6dd1da38ca7fb7edc765 Mon Sep 17 00:00:00 2001
From: frjnn <f.iannelli.francesco229@gmail.com>
Date: Wed, 16 Mar 2022 15:37:57 +0100
Subject: [PATCH 1/5] Chore: Fix some more warnings

---
 crates/cudnn/src/activation/mod.rs               |  2 ++
 .../cudnn/src/attention/attention_descriptor.rs  |  2 +-
 crates/cudnn/src/attention/mod.rs                |  3 +++
 .../cudnn/src/attention/seq_data_descriptor.rs   |  2 +-
 crates/cudnn/src/convolution/mod.rs              | 16 ++++++++++------
 crates/cudnn/src/op/mod.rs                       |  2 ++
 crates/cudnn/src/pooling/mod.rs                  |  2 ++
 crates/cudnn/src/rnn/mod.rs                      |  5 ++++-
 crates/cudnn/src/rnn/rnn_descriptor.rs           |  1 +
 crates/cudnn/src/softmax/mod.rs                  |  2 ++
 10 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/crates/cudnn/src/activation/mod.rs b/crates/cudnn/src/activation/mod.rs
index 6f96230f..48fda88c 100644
--- a/crates/cudnn/src/activation/mod.rs
+++ b/crates/cudnn/src/activation/mod.rs
@@ -66,6 +66,7 @@ impl CudnnContext {
     /// # Ok(())
     /// # }
     /// ```
+    #[allow(clippy::too_many_arguments)]
     pub fn activation_forward<CompT, T>(
         &self,
         activation_desc: &ActivationDescriptor,
@@ -132,6 +133,7 @@ impl CudnnContext {
     /// Returns errors if the shapes of the `dx` and `x` tensors do not match, the strides of the
     /// tensors and their differential do not match,  or an unsupported configuration of arguments
     /// is detected.
+    #[allow(clippy::too_many_arguments)]
     pub fn activation_backward<CompT, T>(
         &self,
         activation_desc: &ActivationDescriptor,
diff --git a/crates/cudnn/src/attention/attention_descriptor.rs b/crates/cudnn/src/attention/attention_descriptor.rs
index 690a2979..49fe23bc 100644
--- a/crates/cudnn/src/attention/attention_descriptor.rs
+++ b/crates/cudnn/src/attention/attention_descriptor.rs
@@ -100,7 +100,7 @@ where
     ///
     /// * one or more of the following arguments were negative: `q_proj_size`, `k_proj_size`,
     /// `v_proj_size`, `sm_scaler`.
-    ///
+    #[allow(clippy::too_many_arguments)]
     pub fn new(
         mode: AttnModeFlags,
         n_heads: i32,
diff --git a/crates/cudnn/src/attention/mod.rs b/crates/cudnn/src/attention/mod.rs
index 3cbcb796..0c0ca57d 100644
--- a/crates/cudnn/src/attention/mod.rs
+++ b/crates/cudnn/src/attention/mod.rs
@@ -111,6 +111,7 @@ impl CudnnContext {
     ///
     /// * `reserve_space` - reserve space buffer in device memory. This argument should be `None` in
     /// inference mode.
+    #[allow(clippy::too_many_arguments)]
     pub fn multi_head_attn_forward<T, U, D1, D2>(
         &self,
         attn_desc: &AttentionDescriptor<T, U, D1, D2>,
@@ -256,6 +257,7 @@ impl CudnnContext {
     /// Returns errors if an invalid or incompatible input argument was encountered, an inconsistent
     /// internal state was encountered, a requested option or a combination of input arguments is
     /// not supported or in case of insufficient amount of shared memory to launch the kernel.
+    #[allow(clippy::too_many_arguments)]
     pub fn multi_head_attn_backward_data<T, U, D1, D2>(
         &self,
         attn_desc: &AttentionDescriptor<T, U, D1, D2>,
@@ -387,6 +389,7 @@ impl CudnnContext {
     /// Returns errors if an invalid or incompatible input argument was encountered, an inconsistent
     /// internal state was encountered, a requested option or a combination of input arguments is
     /// not supported or in case of insufficient amount of shared memory to launch the kernel.
+    #[allow(clippy::too_many_arguments)]
     pub fn multi_head_attn_backward_weights<T, U, D1, D2>(
         &self,
         attn_desc: &AttentionDescriptor<T, U, D1, D2>,
diff --git a/crates/cudnn/src/attention/seq_data_descriptor.rs b/crates/cudnn/src/attention/seq_data_descriptor.rs
index 9142d93a..d11b1671 100644
--- a/crates/cudnn/src/attention/seq_data_descriptor.rs
+++ b/crates/cudnn/src/attention/seq_data_descriptor.rs
@@ -127,7 +127,7 @@ where
             sys::cudnnSetSeqDataDescriptor(
                 raw,
                 T::into_raw(),
-                4 as i32,
+                4_i32,
                 dims.as_ptr(),
                 raw_axes.as_ptr(),
                 seq_lengths.len(),
diff --git a/crates/cudnn/src/convolution/mod.rs b/crates/cudnn/src/convolution/mod.rs
index 1757f768..ebfa7ae8 100644
--- a/crates/cudnn/src/convolution/mod.rs
+++ b/crates/cudnn/src/convolution/mod.rs
@@ -126,7 +126,7 @@ impl CudnnContext {
                     let algo = results[0];
                     Ok(algo)
                 }
-                _ => return Err(CudnnError::BadParam),
+                _ => Err(CudnnError::BadParam),
             }
         }
     }
@@ -238,7 +238,7 @@ impl CudnnContext {
                     let algo = results[0];
                     Ok(algo)
                 }
-                _ => return Err(CudnnError::BadParam),
+                _ => Err(CudnnError::BadParam),
             }
         }
     }
@@ -350,7 +350,7 @@ impl CudnnContext {
                     let algo = results[0];
                     Ok(algo)
                 }
-                _ => return Err(CudnnError::BadParam),
+                _ => Err(CudnnError::BadParam),
             }
         }
     }
@@ -457,7 +457,7 @@ impl CudnnContext {
 
             Ok(match size.assume_init() {
                 0 => None,
-                size @ _ => Some(size),
+                size => Some(size),
             })
         }
     }
@@ -564,7 +564,7 @@ impl CudnnContext {
 
             Ok(match size.assume_init() {
                 0 => None,
-                size @ _ => Some(size),
+                size => Some(size),
             })
         }
     }
@@ -671,7 +671,7 @@ impl CudnnContext {
 
             Ok(match size.assume_init() {
                 0 => None,
-                size @ _ => Some(size),
+                size => Some(size),
             })
         }
     }
@@ -779,6 +779,7 @@ impl CudnnContext {
     /// # Ok(())
     /// # }
     /// ```
+    #[allow(clippy::too_many_arguments)]
     pub fn convolution_forward<T1, T2, CompT, T3, W>(
         &self,
         alpha: CompT,
@@ -963,6 +964,7 @@ impl CudnnContext {
     /// # Ok(())
     /// # }
     /// ```
+    #[allow(clippy::too_many_arguments)]
     pub fn convolution_bias_act_forward<T1, T2, CompT, T3, W>(
         &self,
         alpha: CompT,
@@ -1129,6 +1131,7 @@ impl CudnnContext {
     /// # Ok(())
     /// # }
     /// ```
+    #[allow(clippy::too_many_arguments)]
     pub fn convolution_backward_data<T1, T2, CompT, T3, W>(
         &self,
         alpha: CompT,
@@ -1283,6 +1286,7 @@ impl CudnnContext {
     /// # Ok(())
     /// # }
     /// ```
+    #[allow(clippy::too_many_arguments)]
     pub fn convolution_backward_filter<T1, T2, CompT, T3, W>(
         &self,
         alpha: CompT,
diff --git a/crates/cudnn/src/op/mod.rs b/crates/cudnn/src/op/mod.rs
index c01a72e6..dd51315b 100644
--- a/crates/cudnn/src/op/mod.rs
+++ b/crates/cudnn/src/op/mod.rs
@@ -85,6 +85,7 @@ impl CudnnContext {
     /// # Ok(())
     /// # }
     /// ```
+    #[allow(clippy::too_many_arguments)]
     pub fn binary_tensor_op<CompT, T1, T2, T3>(
         &self,
         op_desc: &BinaryOpTensorDescriptor<CompT>,
@@ -192,6 +193,7 @@ impl CudnnContext {
     /// # Ok(())
     /// # }
     /// ```
+    #[allow(clippy::too_many_arguments)]
     pub fn unary_tensor_op<CompT, T1, T2>(
         &self,
         op_desc: &UnaryOpTensorDescriptor<CompT>,
diff --git a/crates/cudnn/src/pooling/mod.rs b/crates/cudnn/src/pooling/mod.rs
index f2d07c6d..ad91aa8f 100644
--- a/crates/cudnn/src/pooling/mod.rs
+++ b/crates/cudnn/src/pooling/mod.rs
@@ -33,6 +33,7 @@ impl CudnnContext {
     ///
     /// Returns errors if the batch size or channels dimensions of the two tensor differ or an
     /// invalid combination of arguments is detected.
+    #[allow(clippy::too_many_arguments)]
     pub fn pooling_forward<CompT, T>(
         &self,
         pooling_desc: &PoolingDescriptor,
@@ -99,6 +100,7 @@ impl CudnnContext {
     /// Returns errors if the dimensions or the strides of `y` and `dy` tensors differ or if the
     /// dimensions or the strides of `x` and `dx` tensors differ or if an unsupported combination
     /// of arguments is detected.
+    #[allow(clippy::too_many_arguments)]
     pub fn pooling_backward<CompT, T>(
         &self,
         pooling_desc: &PoolingDescriptor,
diff --git a/crates/cudnn/src/rnn/mod.rs b/crates/cudnn/src/rnn/mod.rs
index 1e13a92f..2f563f6f 100644
--- a/crates/cudnn/src/rnn/mod.rs
+++ b/crates/cudnn/src/rnn/mod.rs
@@ -74,7 +74,7 @@ impl CudnnContext {
                 workspace_size.assume_init(),
                 match reserve_space_size.assume_init() {
                     0 => None,
-                    size @ _ => Some(size),
+                    size => Some(size),
                 },
             ))
         }
@@ -197,6 +197,7 @@ impl CudnnContext {
     ///
     /// Returns errors is an unsupported arguments combination is detected or if the supplied
     /// buffers are too small.
+    #[allow(clippy::too_many_arguments)]
     pub fn rnn_forward<T1, T2>(
         &self,
         rnn_desc: &RnnDescriptor<T1, T2>,
@@ -398,6 +399,7 @@ impl CudnnContext {
     /// # Errors
     ///
     /// Returns errors if an invalid or incompatible input argument was encountered.
+    #[allow(clippy::too_many_arguments)]
     pub fn rnn_backward_data<T1, T2>(
         &self,
         rnn_desc: &RnnDescriptor<T1, T2>,
@@ -543,6 +545,7 @@ impl CudnnContext {
     /// # Errors
     ///
     /// Returns errors if an invalid or incompatible input argument combinations was encountered.
+    #[allow(clippy::too_many_arguments)]
     pub fn rnn_backward_weights<T1, T2>(
         &self,
         rnn_desc: &RnnDescriptor<T1, T2>,
diff --git a/crates/cudnn/src/rnn/rnn_descriptor.rs b/crates/cudnn/src/rnn/rnn_descriptor.rs
index 01bd14e9..db2f4523 100644
--- a/crates/cudnn/src/rnn/rnn_descriptor.rs
+++ b/crates/cudnn/src/rnn/rnn_descriptor.rs
@@ -138,6 +138,7 @@ where
     /// # Ok(())
     /// # }
     /// ```
+    #[allow(clippy::too_many_arguments)]
     pub fn new<S>(
         algo: RnnAlgo,
         cell_mode: RnnMode,
diff --git a/crates/cudnn/src/softmax/mod.rs b/crates/cudnn/src/softmax/mod.rs
index ce2aff9c..bba60b44 100644
--- a/crates/cudnn/src/softmax/mod.rs
+++ b/crates/cudnn/src/softmax/mod.rs
@@ -32,6 +32,7 @@ impl CudnnContext {
     ///
     /// Returns errors if the configuration in input is not supported, the tensor shapes differ or
     /// the data types of the input and destination tensor are not the same.
+    #[allow(clippy::too_many_arguments)]
     pub fn softmax_forward<T, CompT>(
         &self,
         algo: SoftmaxAlgo,
@@ -97,6 +98,7 @@ impl CudnnContext {
     ///
     /// Returns errors if the configuration in input is not supported, the tensor shapes differ or
     /// the data types of the input and differential tensor are not the same.
+    #[allow(clippy::too_many_arguments)]
     pub fn softmax_backward<T, CompT>(
         &self,
         algo: SoftmaxAlgo,

From a2016dc5c6b2d94d7ba757c3961b1a55f2fcc57e Mon Sep 17 00:00:00 2001
From: frjnn <f.iannelli.francesco229@gmail.com>
Date: Wed, 16 Mar 2022 19:41:28 +0100
Subject: [PATCH 2/5] Chore: Add link to individual cuDNN docs for each wrapped
 item and function

---
 .../src/activation/activation_descriptor.rs   |  3 ++
 .../cudnn/src/activation/activation_mode.rs   |  3 ++
 crates/cudnn/src/activation/mod.rs            |  7 ++++
 .../src/attention/attention_descriptor.rs     |  3 ++
 .../src/attention/attention_weights_kind.rs   |  3 ++
 crates/cudnn/src/attention/mod.rs             | 12 ++++++
 crates/cudnn/src/attention/seq_data_axis.rs   |  3 ++
 .../src/attention/seq_data_descriptor.rs      |  3 ++
 crates/cudnn/src/context.rs                   | 12 ++++++
 .../src/convolution/convolution_config.rs     |  3 ++
 .../src/convolution/convolution_descriptor.rs |  9 +++++
 .../cudnn/src/convolution/convolution_mode.rs | 12 ++----
 .../src/convolution/filter_descriptor.rs      |  3 ++
 crates/cudnn/src/convolution/mod.rs           | 30 +++++++++++++--
 crates/cudnn/src/determinism.rs               |  3 ++
 crates/cudnn/src/dropout/mod.rs               | 38 +++++++++++++++----
 crates/cudnn/src/error.rs                     |  3 ++
 crates/cudnn/src/math_type.rs                 |  3 ++
 crates/cudnn/src/nan_propagation.rs           | 12 ++----
 crates/cudnn/src/op/mod.rs                    | 15 ++++++++
 crates/cudnn/src/op/op_tensor_descriptor.rs   |  7 +++-
 crates/cudnn/src/op/op_tensor_op.rs           |  6 +++
 crates/cudnn/src/pooling/mod.rs               |  9 ++++-
 .../cudnn/src/pooling/pooling_descriptor.rs   |  4 +-
 crates/cudnn/src/pooling/pooling_mode.rs      |  3 ++
 crates/cudnn/src/rnn/forward_mode.rs          |  5 ++-
 crates/cudnn/src/rnn/mod.rs                   | 15 ++++++++
 crates/cudnn/src/rnn/rnn_algo.rs              |  3 ++
 crates/cudnn/src/rnn/rnn_bias_mode.rs         | 14 ++-----
 crates/cudnn/src/rnn/rnn_clip_mode.rs         |  3 ++
 crates/cudnn/src/rnn/rnn_data_descriptor.rs   |  3 ++
 crates/cudnn/src/rnn/rnn_data_layout.rs       |  3 ++
 crates/cudnn/src/rnn/rnn_descriptor.rs        |  6 +++
 crates/cudnn/src/rnn/rnn_direction_mode.rs    | 12 ++----
 crates/cudnn/src/rnn/rnn_input_mode.rs        | 12 ++----
 crates/cudnn/src/rnn/rnn_mode.rs              | 14 ++-----
 crates/cudnn/src/softmax/mod.rs               |  6 +++
 crates/cudnn/src/softmax/softmax_algo.rs      |  3 ++
 crates/cudnn/src/softmax/softmax_mode.rs      |  3 ++
 crates/cudnn/src/tensor/tensor_descriptor.rs  |  6 +++
 crates/cudnn/src/tensor/tensor_format.rs      |  3 ++
 crates/cudnn/src/w_grad_mode.rs               |  3 ++
 42 files changed, 250 insertions(+), 73 deletions(-)

diff --git a/crates/cudnn/src/activation/activation_descriptor.rs b/crates/cudnn/src/activation/activation_descriptor.rs
index 5026f37b..ef33b099 100644
--- a/crates/cudnn/src/activation/activation_descriptor.rs
+++ b/crates/cudnn/src/activation/activation_descriptor.rs
@@ -19,6 +19,9 @@ impl ActivationDescriptor {
     /// * `coefficient` - optional coefficient for the given function. It specifies the clipping
     /// threshold for `ActivationMode::ClippedRelu`.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetActivationDescriptor)
+    /// may offer additional information about the API behavior.
+    ///
     /// # Examples
     ///
     /// ```
diff --git a/crates/cudnn/src/activation/activation_mode.rs b/crates/cudnn/src/activation/activation_mode.rs
index 94c23ed3..01c43f4e 100644
--- a/crates/cudnn/src/activation/activation_mode.rs
+++ b/crates/cudnn/src/activation/activation_mode.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Specifies a neuron activation function.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnActivationMode_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum ActivationMode {
     /// Selects the sigmoid function.
diff --git a/crates/cudnn/src/activation/mod.rs b/crates/cudnn/src/activation/mod.rs
index 48fda88c..8674f8bd 100644
--- a/crates/cudnn/src/activation/mod.rs
+++ b/crates/cudnn/src/activation/mod.rs
@@ -28,6 +28,10 @@ impl CudnnContext {
     ///
     /// * `y` - data for the output.
     ///
+    ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnActivationForward)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if the shapes of the `y` and `x` tensors do not match or an unsupported
@@ -128,6 +132,9 @@ impl CudnnContext {
     ///
     /// * `dx` - data for the input differential.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnActivationBackward)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if the shapes of the `dx` and `x` tensors do not match, the strides of the
diff --git a/crates/cudnn/src/attention/attention_descriptor.rs b/crates/cudnn/src/attention/attention_descriptor.rs
index 49fe23bc..c229765b 100644
--- a/crates/cudnn/src/attention/attention_descriptor.rs
+++ b/crates/cudnn/src/attention/attention_descriptor.rs
@@ -85,6 +85,9 @@ where
     ///
     /// * `max_bream_size` - largest beam expected in any sequential data descriptor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetAttnDescriptor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an unsupported combination of arguments is detected. Some examples
diff --git a/crates/cudnn/src/attention/attention_weights_kind.rs b/crates/cudnn/src/attention/attention_weights_kind.rs
index 11c5ad76..48ef86af 100644
--- a/crates/cudnn/src/attention/attention_weights_kind.rs
+++ b/crates/cudnn/src/attention/attention_weights_kind.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Specifies a group of weights or biases for the multi-head attention layer.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnMultiHeadAttnWeightKind_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum AttnWeight {
     /// Selects the input projection weights for queries.
diff --git a/crates/cudnn/src/attention/mod.rs b/crates/cudnn/src/attention/mod.rs
index 0c0ca57d..569b2174 100644
--- a/crates/cudnn/src/attention/mod.rs
+++ b/crates/cudnn/src/attention/mod.rs
@@ -26,6 +26,9 @@ impl CudnnContext {
     ///
     /// `desc` - multi-head attention descriptor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetMultiHeadAttnBuffers)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if invalid arguments are detected.
@@ -111,6 +114,9 @@ impl CudnnContext {
     ///
     /// * `reserve_space` - reserve space buffer in device memory. This argument should be `None` in
     /// inference mode.
+    ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnMultiHeadAttnForward)
+    /// may offer additional information about the APi behavior.
     #[allow(clippy::too_many_arguments)]
     pub fn multi_head_attn_forward<T, U, D1, D2>(
         &self,
@@ -252,6 +258,9 @@ impl CudnnContext {
     ///
     /// * `reserve_space` - reserve space buffer in device memory.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnMultiHeadAttnBackwardData)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid or incompatible input argument was encountered, an inconsistent
@@ -384,6 +393,9 @@ impl CudnnContext {
     ///
     /// * `reserve_space` - reserve space buffer in device memory.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnMultiHeadAttnBackwardWeights)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid or incompatible input argument was encountered, an inconsistent
diff --git a/crates/cudnn/src/attention/seq_data_axis.rs b/crates/cudnn/src/attention/seq_data_axis.rs
index 8e8b7ec0..57612635 100644
--- a/crates/cudnn/src/attention/seq_data_axis.rs
+++ b/crates/cudnn/src/attention/seq_data_axis.rs
@@ -3,6 +3,9 @@ use crate::sys;
 /// Describes and indexes active dimensions in the `SeqDataDescriptor` `dim` field. This enum is
 /// also used in the `axis` argument of the `SeqDataDescriptor` constructor to  define the layout
 /// of the sequence data buffer in memory.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSeqDataAxis_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum SeqDataAxis {
     /// Identifies the time (sequence length) dimension or specifies the time in the data layout.
diff --git a/crates/cudnn/src/attention/seq_data_descriptor.rs b/crates/cudnn/src/attention/seq_data_descriptor.rs
index d11b1671..b34003b5 100644
--- a/crates/cudnn/src/attention/seq_data_descriptor.rs
+++ b/crates/cudnn/src/attention/seq_data_descriptor.rs
@@ -71,6 +71,9 @@ where
     ///
     /// * `seq_lengths` - array that defines all sequence lengths of the underlying container.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetSeqDataDescriptor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if the innermost dimension as specified in the `axes` array is not
diff --git a/crates/cudnn/src/context.rs b/crates/cudnn/src/context.rs
index 704e52fa..5429049e 100644
--- a/crates/cudnn/src/context.rs
+++ b/crates/cudnn/src/context.rs
@@ -30,6 +30,9 @@ pub struct CudnnContext {
 impl CudnnContext {
     /// Creates a new cuDNN context, allocating the required memory on both host and device.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnCreate)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Examples
     ///
     /// ```
@@ -54,6 +57,9 @@ impl CudnnContext {
     }
 
     /// Returns the version number of the underlying cuDNN library.
+    ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetVersion)
+    /// may offer additional information about the APi behavior.
     pub fn version(&self) -> (u32, u32, u32) {
         unsafe {
             // cudnnGetVersion does not return a state as it never fails.
@@ -69,6 +75,9 @@ impl CudnnContext {
     /// Since The same version of a given cuDNN library can be compiled against different CUDA
     /// toolkit versions, this routine returns the CUDA toolkit version that the currently used
     /// cuDNN library has been compiled against.
+    ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetCudartVersion)
+    /// may offer additional information about the APi behavior.
     pub fn cuda_version(&self) -> (u32, u32, u32) {
         unsafe {
             // cudnnGetCudartVersion does not return a state as it never fails.
@@ -94,6 +103,9 @@ impl CudnnContext {
     ///
     /// `stream` - the CUDA stream to be written to the cuDNN handle.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetStream)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns error if the supplied stream in invalid or a mismatch if found between the user
diff --git a/crates/cudnn/src/convolution/convolution_config.rs b/crates/cudnn/src/convolution/convolution_config.rs
index 834b4bd0..c5186e53 100644
--- a/crates/cudnn/src/convolution/convolution_config.rs
+++ b/crates/cudnn/src/convolution/convolution_config.rs
@@ -1,6 +1,9 @@
 use crate::{private, DataType};
 
 /// Supported data types configurations for convolution operations.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnConvolutionForward)
+/// may offer additional information about the APi behavior.
 pub trait SupportedConv<X, W, Y>: private::Sealed + DataType
 where
     X: DataType,
diff --git a/crates/cudnn/src/convolution/convolution_descriptor.rs b/crates/cudnn/src/convolution/convolution_descriptor.rs
index e0271a9c..83b50c1e 100644
--- a/crates/cudnn/src/convolution/convolution_descriptor.rs
+++ b/crates/cudnn/src/convolution/convolution_descriptor.rs
@@ -36,6 +36,9 @@ impl<T: DataType> ConvDescriptor<T> {
     /// * `math_type` - indicates whether or not the use of tensor op is permitted in the library
     /// routines associated with a given convolution descriptor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetConvolutionNdDescriptor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// This function returns an error if any element of stride and dilation is negative or 0, if
@@ -123,6 +126,9 @@ impl<T: DataType> ConvDescriptor<T> {
     ///
     /// **Do note** that tensor core operations may not be available on all device architectures.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetConvolutionMathType)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if the math type was not set successfully.
@@ -155,6 +161,9 @@ impl<T: DataType> ConvDescriptor<T> {
     ///
     /// `groups` - group count.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetConvolutionGroupCount)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if the argument passed is invalid.
diff --git a/crates/cudnn/src/convolution/convolution_mode.rs b/crates/cudnn/src/convolution/convolution_mode.rs
index 842663c8..477bf41e 100644
--- a/crates/cudnn/src/convolution/convolution_mode.rs
+++ b/crates/cudnn/src/convolution/convolution_mode.rs
@@ -6,6 +6,9 @@ use crate::sys;
 /// mathematically to a convolution or to a cross-correlation.
 ///
 /// A cross-correlation is equivalent to a convolution with its filter rotated by 180 degrees.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnConvolutionMode_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum ConvMode {
     /// Convolution operation.
@@ -14,15 +17,6 @@ pub enum ConvMode {
     CrossCorrelation,
 }
 
-impl From<sys::cudnnConvolutionMode_t> for ConvMode {
-    fn from(raw: sys::cudnnConvolutionMode_t) -> Self {
-        match raw {
-            sys::cudnnConvolutionMode_t::CUDNN_CONVOLUTION => Self::Convolution,
-            sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION => Self::CrossCorrelation,
-        }
-    }
-}
-
 impl From<ConvMode> for sys::cudnnConvolutionMode_t {
     fn from(convolution_mode: ConvMode) -> sys::cudnnConvolutionMode_t {
         match convolution_mode {
diff --git a/crates/cudnn/src/convolution/filter_descriptor.rs b/crates/cudnn/src/convolution/filter_descriptor.rs
index 1185c570..bbce59b9 100644
--- a/crates/cudnn/src/convolution/filter_descriptor.rs
+++ b/crates/cudnn/src/convolution/filter_descriptor.rs
@@ -28,6 +28,9 @@ where
     /// a 3D filter descriptor, the number S (number of columns per filter) is omitted. For N = 5
     /// and greater, the layout of the higher dimensions immediately follows RS.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetFilterNdDescriptor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns an error if at least one of the elements of the array shape was negative or zero,
diff --git a/crates/cudnn/src/convolution/mod.rs b/crates/cudnn/src/convolution/mod.rs
index ebfa7ae8..673ab766 100644
--- a/crates/cudnn/src/convolution/mod.rs
+++ b/crates/cudnn/src/convolution/mod.rs
@@ -37,6 +37,9 @@ impl CudnnContext {
     /// math type of the convolution descriptor according to the one of the returned algorithm to
     /// get the best possible performance.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetConvolutionForwardAlgorithm_v7)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid combination of arguments is passed.
@@ -149,6 +152,9 @@ impl CudnnContext {
     /// **Do note** that the best found algorithm `MathType` must be set manually on the
     /// convolution descriptor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetConvolutionBackwardDataAlgorithm_v7)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid combination of arguments is passed.
@@ -261,6 +267,9 @@ impl CudnnContext {
     /// **Do note** that the best found algorithm `MathType` must be set manually on the
     /// convolution descriptor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetConvolutionBackwardFilterAlgorithm_v7)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid combination of arguments is passed.
@@ -380,6 +389,9 @@ impl CudnnContext {
     /// **Do note** that not every algorithm is available for every configuration of the input
     /// tensor and/or every configuration of the convolution descriptor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetConvolutionForwardWorkspaceSize)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid combination of arguments is passed or the combination of the
@@ -487,6 +499,9 @@ impl CudnnContext {
     ///  **Do note** that not every algorithm is available for every configuration of the input
     /// tensor and/or every configuration of the convolution descriptor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetConvolutionBackwardDataWorkspaceSize)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid combination of arguments is passed or the combination of the
@@ -594,6 +609,9 @@ impl CudnnContext {
     /// **Do note** that not every algorithm is available for every configuration of the input
     /// tensor and/or every configuration of the convolution descriptor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetConvolutionBackwardFilterWorkspaceSize)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid combination of arguments is passed or the combination of the
@@ -713,7 +731,8 @@ impl CudnnContext {
     ///
     /// **Do note** than not all possible configurations of layouts and data types for the operands
     /// are supported by cuDNN. Refer to the following link for the
-    /// [complete list](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnConvolutionForward).
+    /// [complete list](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnConvolutionForward)
+    /// and for in-depth explanation of the API behavior.
     ///
     /// # Errors
     ///
@@ -884,6 +903,9 @@ impl CudnnContext {
     ///
     /// **Do note** that `y_desc` and `z_desc` should match.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnConvolutionBiasActivationForward)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid or unsupported combination of argument is passed.
@@ -1070,7 +1092,8 @@ impl CudnnContext {
     ///
     /// **Do note** than not all possible configurations of layouts and data types for the operands
     /// are supported by cuDNN. Refer to the following link for the
-    /// [complete list](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnConvolutionBackwardData).
+    /// [complete list](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnConvolutionBackwardData) and
+    /// for an in-depth explanation of the API behavior.
     ///
     /// # Errors
     ///
@@ -1225,7 +1248,8 @@ impl CudnnContext {
     ///
     /// **Do note** than not all possible configurations of layouts and data types for the operands
     /// are supported by cuDNN. Refer to the following link for the
-    /// [complete list](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnConvolutionBackwardFilter).
+    /// [complete list](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnConvolutionBackwardFilter)
+    /// and for an in-depth explanation of the API behavior.
     ///
     /// # Errors
     ///
diff --git a/crates/cudnn/src/determinism.rs b/crates/cudnn/src/determinism.rs
index 2ec1692c..f62c05d7 100644
--- a/crates/cudnn/src/determinism.rs
+++ b/crates/cudnn/src/determinism.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Enum stating whether or not the computed results are deterministic (reproducible).
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnDeterminism_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum Determinism {
     /// Results are guaranteed to be reproducible.
diff --git a/crates/cudnn/src/dropout/mod.rs b/crates/cudnn/src/dropout/mod.rs
index 82043460..9b57831a 100644
--- a/crates/cudnn/src/dropout/mod.rs
+++ b/crates/cudnn/src/dropout/mod.rs
@@ -10,6 +10,9 @@ impl CudnnContext {
     /// This function is used to query the amount of space required to store the states of the
     /// random number generators.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnDropoutGetStatesSize)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns an error if the query was not successful.
@@ -50,8 +53,10 @@ impl CudnnContext {
     ///
     /// # Arguments
     ///
-    /// `x_desc` - a previously initialized tensor descriptor, describing input to a dropout
-    /// operation.
+    /// `desc` - tensor descriptor.
+    ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnDropoutGetReserveSpaceSize)
+    /// may offer additional information about the APi behavior.
     ///
     /// # Errors
     ///
@@ -63,19 +68,27 @@ impl CudnnContext {
     /// # use std::error::Error;
     /// #
     /// # fn main() -> Result<(), Box<dyn Error>> {
-    /// use cudnn::{CudnnContext, TensorDescriptor};
+    /// use cudnn::{CudnnContext, ScalarC, TensorDescriptor};
     ///
     /// let ctx = CudnnContext::new()?;
     ///
-    /// let size = ctx.get_dropout_reserved_space_size()?;
+    /// let desc = TensorDescriptor::<f32>::new_format(&[4, 5, 20, 20], ScalarC::Nchw)?;
+    ///
+    /// let size = ctx.get_dropout_reserve_space_size(&desc)?;
     /// # Ok(())
     /// # }
     /// ```
-    pub fn get_dropout_reserved_space_size(&self) -> Result<usize, CudnnError> {
+    pub fn get_dropout_reserve_space_size<T>(
+        &self,
+        desc: &TensorDescriptor<T>,
+    ) -> Result<usize, CudnnError>
+    where
+        T: DataType,
+    {
         let mut size = MaybeUninit::uninit();
 
         unsafe {
-            sys::cudnnDropoutGetStatesSize(self.raw, size.as_mut_ptr()).into_result()?;
+            sys::cudnnDropoutGetReserveSpaceSize(desc.raw, size.as_mut_ptr()).into_result()?;
 
             Ok(size.assume_init())
         }
@@ -95,6 +108,9 @@ impl CudnnContext {
     /// **Do note** that the exact amount of memory can be obtained with
     /// [`get_dropout_states_size()`](CudnnContext::get_dropout_states_sizes).
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetDropoutDescriptor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Return errors if `states` size is less than that returned by `get_dropout_states_size`.
@@ -167,6 +183,9 @@ impl CudnnContext {
     /// the contents of `reserved_space` does not change between the `dropout_forward()` and
     /// `dropout_backward()` calls.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnDropoutForward)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns an error if the number of elements in `x_data` and `y_data` differs and if
@@ -200,7 +219,7 @@ impl CudnnContext {
     /// let dropout_desc = ctx.create_dropout_descriptor(dropout, states, seed)?;
     ///
     /// let mut reserved_space = {
-    ///     let size = ctx.get_dropout_reserved_space_size()?;
+    ///     let size = ctx.get_dropout_reserve_space_size(&x_desc)?;
     ///     unsafe { DeviceBuffer::uninitialized(size)? }
     /// };
     ///
@@ -263,6 +282,9 @@ impl CudnnContext {
     /// contents of reserveSpace does not change between `dropout_forward()` and
     /// `dropout_backward()` calls.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnDropoutBackward)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns an error if the number of elements in `dx_data` and `dy_data` differs and if
@@ -299,7 +321,7 @@ impl CudnnContext {
     /// # let seed = 123;
     /// # let dropout_desc = ctx.create_dropout_descriptor(dropout, states, seed)?;
     /// # let mut reserved_space = {
-    /// #     let size = ctx.get_dropout_reserved_space_size()?;
+    /// #     let size = ctx.get_dropout_reserve_space_size(&x_desc)?;
     /// #     unsafe { DeviceBuffer::uninitialized(size)? }
     /// # };
     /// # ctx.dropout_forward(&dropout_desc, &x_desc, &x, &y_desc, &mut y, &mut reserved_space)?;
diff --git a/crates/cudnn/src/error.rs b/crates/cudnn/src/error.rs
index 2d4abc38..448a717b 100644
--- a/crates/cudnn/src/error.rs
+++ b/crates/cudnn/src/error.rs
@@ -2,6 +2,9 @@ use crate::sys;
 use std::{error::Error, ffi::CStr, fmt::Display};
 
 /// Enum encapsulating function status returns. All cuDNN library functions return their status.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnStatus_t)
+/// may offer additional information about the APi behavior.
 #[non_exhaustive]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum CudnnError {
diff --git a/crates/cudnn/src/math_type.rs b/crates/cudnn/src/math_type.rs
index c13e59d8..e0dbdf33 100644
--- a/crates/cudnn/src/math_type.rs
+++ b/crates/cudnn/src/math_type.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Enum stating whether the use of tensor core operations is permitted in a given library routine.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnMathType_t)
+/// may offer additional information about the APi behavior.
 #[non_exhaustive]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum MathType {
diff --git a/crates/cudnn/src/nan_propagation.rs b/crates/cudnn/src/nan_propagation.rs
index 40f829cb..474e41ed 100644
--- a/crates/cudnn/src/nan_propagation.rs
+++ b/crates/cudnn/src/nan_propagation.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Indicates whether a given cuDNN routine should propagate Nan numbers.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnNanPropagation_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum NanPropagation {
     /// NaN numbers are not propagated.
@@ -17,12 +20,3 @@ impl From<NanPropagation> for sys::cudnnNanPropagation_t {
         }
     }
 }
-
-impl From<sys::cudnnNanPropagation_t> for NanPropagation {
-    fn from(raw: sys::cudnnNanPropagation_t) -> Self {
-        match raw {
-            sys::cudnnNanPropagation_t::CUDNN_NOT_PROPAGATE_NAN => NanPropagation::NotPropagateNaN,
-            sys::cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN => NanPropagation::PropagateNaN,
-        }
-    }
-}
diff --git a/crates/cudnn/src/op/mod.rs b/crates/cudnn/src/op/mod.rs
index dd51315b..bf6cbc81 100644
--- a/crates/cudnn/src/op/mod.rs
+++ b/crates/cudnn/src/op/mod.rs
@@ -49,6 +49,9 @@ impl CudnnContext {
     /// to dimension five (5) are supported. This routine does not support tensor formats beyond
     /// these dimensions.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnOpTensor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Examples
     ///
     /// ```
@@ -161,6 +164,9 @@ impl CudnnContext {
     /// to dimension five (5) are supported. This routine does not support tensor formats beyond
     /// these dimensions.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnOpTensor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Examples
     ///
     /// ```
@@ -259,6 +265,9 @@ impl CudnnContext {
     /// to dimension five (5) are supported. This routine does not support tensor formats beyond
     /// these dimensions.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnAddTensor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns error if an unsupported configurations of arguments is detected.
@@ -327,6 +336,9 @@ impl CudnnContext {
     ///
     /// * `value` - value to set. Must be stored in host memory.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetTensor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns error if an unsupported configurations of arguments is detected.
@@ -382,6 +394,9 @@ impl CudnnContext {
     /// * `value` - value in the host memory to a single value that all elements of the tensor will
     /// be scaled with.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnScaleTensor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns error if an unsupported configurations of arguments is detected.
diff --git a/crates/cudnn/src/op/op_tensor_descriptor.rs b/crates/cudnn/src/op/op_tensor_descriptor.rs
index 7d4cf2b1..a93e6895 100644
--- a/crates/cudnn/src/op/op_tensor_descriptor.rs
+++ b/crates/cudnn/src/op/op_tensor_descriptor.rs
@@ -26,7 +26,6 @@ unsafe fn init_raw_op_descriptor<T: DataType>(
 
 /// The description of a unary Tensor Core operation.
 ///
-///
 /// As specified in the cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#scaling-parameters),
 /// admissible types for scaling parameters are `f32` and `f64` for `f32` and `f64` tensors
 /// respectively.
@@ -49,6 +48,9 @@ where
     ///
     /// * `nan_opt` - a NaN propagation policy.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetOpTensorDescriptor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Examples
     ///
     /// ```
@@ -111,6 +113,9 @@ where
     ///
     /// * `nan_opt` - a NaN propagation policy.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetOpTensorDescriptor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Examples
     ///
     /// ```
diff --git a/crates/cudnn/src/op/op_tensor_op.rs b/crates/cudnn/src/op/op_tensor_op.rs
index d787cacd..55774b0f 100644
--- a/crates/cudnn/src/op/op_tensor_op.rs
+++ b/crates/cudnn/src/op/op_tensor_op.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// A unary tensor core operation.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnOpTensorOp_t)
+/// may offer additional information about the APi behavior.
 #[non_exhaustive]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum UnaryOp {
@@ -18,6 +21,9 @@ impl From<UnaryOp> for sys::cudnnOpTensorOp_t {
 }
 
 /// A binary tensor core operation.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnOpTensorOp_t)
+/// may offer additional information about the APi behavior.
 #[non_exhaustive]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum BinaryOp {
diff --git a/crates/cudnn/src/pooling/mod.rs b/crates/cudnn/src/pooling/mod.rs
index ad91aa8f..633cd6f1 100644
--- a/crates/cudnn/src/pooling/mod.rs
+++ b/crates/cudnn/src/pooling/mod.rs
@@ -29,6 +29,9 @@ impl CudnnContext {
     ///
     /// * `y` - data for the destination tensor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnPoolingForward)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if the batch size or channels dimensions of the two tensor differ or an
@@ -95,6 +98,9 @@ impl CudnnContext {
     ///
     /// * `dx` - data for the input differential.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnPoolingBackward)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if the dimensions or the strides of `y` and `dy` tensors differ or if the
@@ -149,7 +155,8 @@ impl CudnnContext {
     }
 }
 
-/// Supported type configurations for the pooling backward operation.
+/// Supported type configurations for the pooling backward operation as specified in the cuDNN
+/// [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnPoolingBackward).
 pub trait SupportedPoolBwd<T>: DataType + private::Sealed
 where
     T: DataType,
diff --git a/crates/cudnn/src/pooling/pooling_descriptor.rs b/crates/cudnn/src/pooling/pooling_descriptor.rs
index 654e467b..ecf57239 100644
--- a/crates/cudnn/src/pooling/pooling_descriptor.rs
+++ b/crates/cudnn/src/pooling/pooling_descriptor.rs
@@ -22,13 +22,15 @@ impl PoolingDescriptor {
     ///
     /// * `stride` - stride for each dimension.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetPoolingNdDescriptor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid configuration of arguments is detected.
     ///
     /// # Examples
     ///
-    ///
     /// ```
     /// # use std::error::Error;
     /// #
diff --git a/crates/cudnn/src/pooling/pooling_mode.rs b/crates/cudnn/src/pooling/pooling_mode.rs
index 58d786c2..013f9e97 100644
--- a/crates/cudnn/src/pooling/pooling_mode.rs
+++ b/crates/cudnn/src/pooling/pooling_mode.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Specifies the pooling method.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnPoolingMode_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum PoolingMode {
     /// The maximum value inside the pooling window is used.
diff --git a/crates/cudnn/src/rnn/forward_mode.rs b/crates/cudnn/src/rnn/forward_mode.rs
index 9ecb2d2e..e0e68c74 100644
--- a/crates/cudnn/src/rnn/forward_mode.rs
+++ b/crates/cudnn/src/rnn/forward_mode.rs
@@ -3,7 +3,10 @@ use crate::sys;
 /// Specifies inference or training mode in RNN API.
 ///
 /// This parameter allows the cuDNN library to tune more precisely the size of the workspace buffer
-/// that could be different in inference and training regimens.
+/// that could be different in inference and training regimes.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnForwardMode_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum ForwardMode {
     /// Selects the inference mode.
diff --git a/crates/cudnn/src/rnn/mod.rs b/crates/cudnn/src/rnn/mod.rs
index 2f563f6f..97606c9c 100644
--- a/crates/cudnn/src/rnn/mod.rs
+++ b/crates/cudnn/src/rnn/mod.rs
@@ -42,6 +42,9 @@ impl CudnnContext {
     ///
     /// * `x_desc` - a RNN data descriptor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetRNNTempSpaceSizes)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns an error is an incompatible or unsupported combination of input arguments was
@@ -86,6 +89,9 @@ impl CudnnContext {
     /// # Arguments
     ///
     /// `rnn_desc` - an RNN descriptor.
+    ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetRNNWeightSpaceSize)
+    /// may offer additional information about the APi behavior.
     pub fn get_rnn_weight_space_size<T1, T2>(
         &self,
         rnn_desc: &RnnDescriptor<T1, T2>,
@@ -193,6 +199,9 @@ impl CudnnContext {
     ///
     /// * `reserve_space` - reserve-space buffer in GPU memory.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNForward)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors is an unsupported arguments combination is detected or if the supplied
@@ -396,6 +405,9 @@ impl CudnnContext {
     ///
     /// * `reserve_space` - reserve-space buffer in GPU memory.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNBackwardData_v8)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid or incompatible input argument was encountered.
@@ -542,6 +554,9 @@ impl CudnnContext {
     ///
     /// * `reserve_space` - reserve-space buffer in GPU memory.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNBackwardWeights_v8)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an invalid or incompatible input argument combinations was encountered.
diff --git a/crates/cudnn/src/rnn/rnn_algo.rs b/crates/cudnn/src/rnn/rnn_algo.rs
index 531cfcd9..50e78876 100644
--- a/crates/cudnn/src/rnn/rnn_algo.rs
+++ b/crates/cudnn/src/rnn/rnn_algo.rs
@@ -3,6 +3,9 @@ use crate::sys;
 /// A recurrent neural network algorithm.
 ///
 /// **Do note** that double precision is only supported by `RnnAlgo::Standard`.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNAlgo_t)
+/// may offer additional information about the APi behavior.
 #[non_exhaustive]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RnnAlgo {
diff --git a/crates/cudnn/src/rnn/rnn_bias_mode.rs b/crates/cudnn/src/rnn/rnn_bias_mode.rs
index 5fc1db8c..1c234b0b 100644
--- a/crates/cudnn/src/rnn/rnn_bias_mode.rs
+++ b/crates/cudnn/src/rnn/rnn_bias_mode.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Specifies the number of bias vectors for a recurrent neural network function.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNBiasMode_t)
+/// may offer additional information about the APi behavior.
 #[non_exhaustive]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RnnBiasMode {
@@ -16,17 +19,6 @@ pub enum RnnBiasMode {
     SingleRecurrentBias,
 }
 
-impl From<sys::cudnnRNNBiasMode_t> for RnnBiasMode {
-    fn from(raw: sys::cudnnRNNBiasMode_t) -> Self {
-        match raw {
-            sys::cudnnRNNBiasMode_t::CUDNN_RNN_NO_BIAS => Self::NoBias,
-            sys::cudnnRNNBiasMode_t::CUDNN_RNN_SINGLE_INP_BIAS => Self::SingleInpBias,
-            sys::cudnnRNNBiasMode_t::CUDNN_RNN_DOUBLE_BIAS => Self::DoubleBias,
-            sys::cudnnRNNBiasMode_t::CUDNN_RNN_SINGLE_REC_BIAS => Self::SingleRecurrentBias,
-        }
-    }
-}
-
 impl From<RnnBiasMode> for sys::cudnnRNNBiasMode_t {
     fn from(mode: RnnBiasMode) -> Self {
         match mode {
diff --git a/crates/cudnn/src/rnn/rnn_clip_mode.rs b/crates/cudnn/src/rnn/rnn_clip_mode.rs
index 18848ea1..ab5917b4 100644
--- a/crates/cudnn/src/rnn/rnn_clip_mode.rs
+++ b/crates/cudnn/src/rnn/rnn_clip_mode.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Selects the LSTM cell clipping mode.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNClipMode_t)
+/// may offer additional information about the APi behavior.
 #[non_exhaustive]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RnnClipMode {
diff --git a/crates/cudnn/src/rnn/rnn_data_descriptor.rs b/crates/cudnn/src/rnn/rnn_data_descriptor.rs
index dc064440..ae1340a6 100644
--- a/crates/cudnn/src/rnn/rnn_data_descriptor.rs
+++ b/crates/cudnn/src/rnn/rnn_data_descriptor.rs
@@ -51,6 +51,9 @@ where
     /// layout is specified. The symbol should be in the host memory, and if a `None` is passed in,
     /// then the padding position in the output will be undefined.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetRNNDataDescriptor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Panics
     ///
     /// If the length of `seq_lengths` doesn't match `batch_size`.
diff --git a/crates/cudnn/src/rnn/rnn_data_layout.rs b/crates/cudnn/src/rnn/rnn_data_layout.rs
index 41157508..8a6bf558 100644
--- a/crates/cudnn/src/rnn/rnn_data_layout.rs
+++ b/crates/cudnn/src/rnn/rnn_data_layout.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// The data layout for input and output of a recurrent neural network.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNDataLayout_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RnnDataLayout {
     /// Data layout is padded, with outer stride from one time-step to the next.
diff --git a/crates/cudnn/src/rnn/rnn_descriptor.rs b/crates/cudnn/src/rnn/rnn_descriptor.rs
index db2f4523..8ac3bf3a 100644
--- a/crates/cudnn/src/rnn/rnn_descriptor.rs
+++ b/crates/cudnn/src/rnn/rnn_descriptor.rs
@@ -89,6 +89,9 @@ where
     /// is enabled, layouts `SeqMajorUnpacked` and `BatchMajorUnpacked` are permitted in RNN data
     /// descriptors.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetRNNDescriptor_v8)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if an incompatible or unsupported combination of input arguments was
@@ -214,6 +217,9 @@ where
     /// **Do note** that cell clipping is only available if the cell mode associated to this
     /// descriptor is `RnnMode::Lstm`.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNSetClip_v8)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if either `left_clip` or `right_clip` is NaN or if `right_clip` <
diff --git a/crates/cudnn/src/rnn/rnn_direction_mode.rs b/crates/cudnn/src/rnn/rnn_direction_mode.rs
index 1268a9d0..cf1cfc59 100644
--- a/crates/cudnn/src/rnn/rnn_direction_mode.rs
+++ b/crates/cudnn/src/rnn/rnn_direction_mode.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Specifies the recurrence pattern for a recurrent neural network.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnDirectionMode_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RnnDirectionMode {
     /// The network iterates recurrently from the first input to the last.
@@ -11,15 +14,6 @@ pub enum RnnDirectionMode {
     Bidirectional,
 }
 
-impl From<sys::cudnnDirectionMode_t> for RnnDirectionMode {
-    fn from(raw: sys::cudnnDirectionMode_t) -> Self {
-        match raw {
-            sys::cudnnDirectionMode_t::CUDNN_UNIDIRECTIONAL => Self::Unidirectional,
-            sys::cudnnDirectionMode_t::CUDNN_BIDIRECTIONAL => Self::Bidirectional,
-        }
-    }
-}
-
 impl From<RnnDirectionMode> for sys::cudnnDirectionMode_t {
     fn from(mode: RnnDirectionMode) -> Self {
         match mode {
diff --git a/crates/cudnn/src/rnn/rnn_input_mode.rs b/crates/cudnn/src/rnn/rnn_input_mode.rs
index eeab89c3..9eb83348 100644
--- a/crates/cudnn/src/rnn/rnn_input_mode.rs
+++ b/crates/cudnn/src/rnn/rnn_input_mode.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Specifies the behavior of the first layer in a recurrent neural network.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNInputMode_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RnnInputMode {
     /// A biased matrix multiplication is performed at the input of the first recurrent layer.
@@ -11,15 +14,6 @@ pub enum RnnInputMode {
     SkipInput,
 }
 
-impl From<sys::cudnnRNNInputMode_t> for RnnInputMode {
-    fn from(raw: sys::cudnnRNNInputMode_t) -> Self {
-        match raw {
-            sys::cudnnRNNInputMode_t::CUDNN_LINEAR_INPUT => Self::LinearInput,
-            sys::cudnnRNNInputMode_t::CUDNN_SKIP_INPUT => Self::SkipInput,
-        }
-    }
-}
-
 impl From<RnnInputMode> for sys::cudnnRNNInputMode_t {
     fn from(mode: RnnInputMode) -> Self {
         match mode {
diff --git a/crates/cudnn/src/rnn/rnn_mode.rs b/crates/cudnn/src/rnn/rnn_mode.rs
index 50a36c38..91e7c4bb 100644
--- a/crates/cudnn/src/rnn/rnn_mode.rs
+++ b/crates/cudnn/src/rnn/rnn_mode.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Specifies the type of recurrent neural network used.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNMode_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RnnMode {
     /// A single-gate recurrent neural network with a ReLU activation function.
@@ -13,17 +16,6 @@ pub enum RnnMode {
     Gru,
 }
 
-impl From<sys::cudnnRNNMode_t> for RnnMode {
-    fn from(raw: sys::cudnnRNNMode_t) -> Self {
-        match raw {
-            sys::cudnnRNNMode_t::CUDNN_RNN_RELU => Self::RnnReLu,
-            sys::cudnnRNNMode_t::CUDNN_RNN_TANH => Self::RnnTanh,
-            sys::cudnnRNNMode_t::CUDNN_LSTM => Self::Lstm,
-            sys::cudnnRNNMode_t::CUDNN_GRU => Self::Gru,
-        }
-    }
-}
-
 impl From<RnnMode> for sys::cudnnRNNMode_t {
     fn from(mode: RnnMode) -> Self {
         match mode {
diff --git a/crates/cudnn/src/softmax/mod.rs b/crates/cudnn/src/softmax/mod.rs
index bba60b44..387bd71c 100644
--- a/crates/cudnn/src/softmax/mod.rs
+++ b/crates/cudnn/src/softmax/mod.rs
@@ -28,6 +28,9 @@ impl CudnnContext {
     ///
     /// * `y` - output data in device memory.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSoftmaxForward)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if the configuration in input is not supported, the tensor shapes differ or
@@ -94,6 +97,9 @@ impl CudnnContext {
     ///
     /// * `dx` - differential data in device memory.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSoftmaxBackward)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Errors
     ///
     /// Returns errors if the configuration in input is not supported, the tensor shapes differ or
diff --git a/crates/cudnn/src/softmax/softmax_algo.rs b/crates/cudnn/src/softmax/softmax_algo.rs
index b3bb8337..99652f4c 100644
--- a/crates/cudnn/src/softmax/softmax_algo.rs
+++ b/crates/cudnn/src/softmax/softmax_algo.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Specifies the implementation of the softmax function.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSoftmaxAlgorithm_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum SoftmaxAlgo {
     /// This implementation applies the straightforward softmax operation.
diff --git a/crates/cudnn/src/softmax/softmax_mode.rs b/crates/cudnn/src/softmax/softmax_mode.rs
index 16f641c1..8d730bf9 100644
--- a/crates/cudnn/src/softmax/softmax_mode.rs
+++ b/crates/cudnn/src/softmax/softmax_mode.rs
@@ -1,6 +1,9 @@
 use crate::sys;
 
 /// Specifies how the softmax input must be processed.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSoftmaxMode_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum SoftmaxMode {
     /// The softmax operation is computed per image (N) across the dimensions C,H,W.
diff --git a/crates/cudnn/src/tensor/tensor_descriptor.rs b/crates/cudnn/src/tensor/tensor_descriptor.rs
index 37ada676..3d141b92 100644
--- a/crates/cudnn/src/tensor/tensor_descriptor.rs
+++ b/crates/cudnn/src/tensor/tensor_descriptor.rs
@@ -23,6 +23,9 @@ where
     ///
     /// * `strides` - strides for the tensor descriptor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetTensorNdDescriptor)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Examples
     ///
     /// ```
@@ -77,6 +80,9 @@ where
     ///
     /// * `format` - format for the tensor descriptor.
     ///
+    /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetTensorNdDescriptorEx)
+    /// may offer additional information about the APi behavior.
+    ///
     /// # Examples
     ///
     /// ```
diff --git a/crates/cudnn/src/tensor/tensor_format.rs b/crates/cudnn/src/tensor/tensor_format.rs
index 1ee2622d..707b201d 100644
--- a/crates/cudnn/src/tensor/tensor_format.rs
+++ b/crates/cudnn/src/tensor/tensor_format.rs
@@ -25,6 +25,9 @@ impl From<ScalarC> for sys::cudnnTensorFormat_t {
 }
 
 /// Predefined layouts for tensors.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnTensorFormat_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum TensorFormat {
     /// Scalar valued formats.
diff --git a/crates/cudnn/src/w_grad_mode.rs b/crates/cudnn/src/w_grad_mode.rs
index d3c6211f..e6466bb4 100644
--- a/crates/cudnn/src/w_grad_mode.rs
+++ b/crates/cudnn/src/w_grad_mode.rs
@@ -2,6 +2,9 @@ use crate::sys;
 
 /// Selects how buffers holding gradients of the loss function, computed with respect to trainable
 /// parameters, are updated.
+///
+/// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnWgradMode_t)
+/// may offer additional information about the APi behavior.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum WGradMode {
     /// A weight gradient component, corresponding to a new batch of inputs, overwrites previously

From be1c977a3af85d97e48ae41571281e709590c59b Mon Sep 17 00:00:00 2001
From: frjnn <f.iannelli.francesco229@gmail.com>
Date: Wed, 16 Mar 2022 21:42:49 +0100
Subject: [PATCH 3/5] Chore: Add primer to README.md

---
 crates/cudnn/README.md                        | 82 +++++++++++++++++++
 crates/cudnn/build.rs                         |  1 -
 .../src/convolution/filter_descriptor.rs      | 55 ++++++++++++-
 crates/cudnn/src/lib.rs                       |  2 +
 4 files changed, 138 insertions(+), 2 deletions(-)

diff --git a/crates/cudnn/README.md b/crates/cudnn/README.md
index 62d853f6..1d9a0d1a 100644
--- a/crates/cudnn/README.md
+++ b/crates/cudnn/README.md
@@ -1,2 +1,84 @@
 # cudnn
 Type safe cuDNN wrapper for the Rust programming language.
+
+## Project status
+The current version of cuDNN targeted by this wrapper is the 8.3.2. You can refer to the official [release notes](https://docs.nvidia.com/deeplearning/cudnn/release-notes/index.html) and to the [support matrix](https://docs.nvidia.com/deeplearning/cudnn/support-matrix/index.html) by NVIDIA.
+
+The legacy API is somewhat complete and it is usable but the backend API is still to be considered a work in progress and its usage is therefore much discouraged. Both APIs are still being developed so expect bugs and reasonable breaking changes whilst using this crate. 
+
+The project is part of the Rust CUDA ecosystem and is actively maintained by @frjnn.
+
+## Primer 
+
+Here follows a list of useful concepts that should be taken as a handbook for the users of the crate. This is not intended to be the full documentation, as each wrapped struct, enum and function has its own docs, but rather a quick sum up of the key points of the API. As a matter of fact, for a deeper view, you should refer both to the docs of each item and to the [official ones](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#overview) by NVIDIA. Furthermore, if you are new to cuDNN we strongly suggest reading the [official developer guide](https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#overview).
+
+### Device buffers
+
+This crate is built around [`cust`](https://docs.rs/cust/latest/cust/memory/index.html) own memory routines and transfer functions.
+
+### cuDNN statuses and Result
+
+All cuDNN library functions return their status. This crate uses [`Result`](https://doc.rust-lang.org/std/result/enum.Result.html) to achieve a leaner, idiomatic and easier to manage API.
+
+### cuDNN handles and RAII
+
+The main entry point of the cuDNN library is the `CudnnContext` struct. This handle is tied to a device and it is explicitly passed to every subsequent library function that operates on GPU data. It manages resources allocations both on the host and the device and takes care of the synchronization of all the the cuDNN primitives. 
+
+The handles, and the other cuDNN structs wrapped by this crate, are implementors of the [`Drop`](https://doc.rust-lang.org/std/ops/trait.Drop.html) trait which implicitly calls their destructors on the cuDNN side when they go out of scope. 
+
+cuDNN contexts can be created as shown in the following snippet:
+
+```rust
+use cudnn::CudnnContext;
+
+let ctx = CudnnContext::new().unwrap();
+```
+
+### cuDNN data types
+
+In order to enforce type safety as much as possible at compile time, we shifted away from the original cuDNN enumerated data types and instead opted to leverage Rust's generics. In practice, this means that specifying the data type of a cuDNN tensor descriptor is done as follows:
+
+```rust
+use cudnn::TensorDescriptor;
+
+let shape = &[5, 5, 10, 25];
+let strides = &[1250, 250, 25, 1];
+
+// f32 tensor
+let desc = TensorDescriptor::<f32>::new_strides(shape, strides).unwrap();
+```
+
+This API also allows for using Rust own types as cuDNN data types, which we see as a desirable property. 
+
+Safely manipulating cuDNN data types that do not have any such direct match, such as vectorized ones, whilst still performing compile time compatibility checks can be done as follows:
+
+```rust
+use cudnn::{TensorDescriptor, Vec4};
+
+let shape = &[4, 32, 32, 32];
+
+// in cuDNN this is equal to the INT8x4 data type and CUDNN_TENSOR_NCHW_VECT_C format
+let desc = TensorDescriptor::<i8>::new_vectorized::<Vec4>(shape).unwrap();
+```
+
+The previous tensor descriptor can be used together with a `i8` device buffer and cuDNN will see it as being a tensor of `CUDNN_TENSOR_NCHW_VECT_C` format and `CUDNN_DATA_INT8x4` data type.
+
+Currently this crate does not support `f16` and `bf16` data types.
+
+### Tensor formats
+
+We decided not to check tensor format configurations at compile time, since it is too strong of a requirement. As a consequence, should you mess up, the program will fail at run-time. A proper understanding of the cuDNN API mechanics is thus fundamental to properly use this crate. 
+
+You can refer to this [extract](https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#data-layout-formats) from the cuDNN developer guide to learn more about tensor formats.
+
+We split the original cuDNN tensor format enum, which counts 3 variants, in 2 parts: the `ScalarC` enum and the `TensorFormat::NchwVectC` enum variant. The former stands for "scalar channel" and it encapsulates the `Nchw` and `Nhwc` formats. Scalar channel formats can be both converted to the `TensorFormat` enum with [`.into()`](https://doc.rust-lang.org/std/convert/trait.Into.html).
+
+```rust
+use cudnn::{TensorFormat, ScalarC};
+
+let sc_fmt = ScalarC::Nchw;
+
+let vc_fmt = TensorFormat::NchwVectC;
+
+let sc_to_tf: TensorFormat = sc_fmt.into();
+``` 
\ No newline at end of file
diff --git a/crates/cudnn/build.rs b/crates/cudnn/build.rs
index 9d137533..e1f2c284 100644
--- a/crates/cudnn/build.rs
+++ b/crates/cudnn/build.rs
@@ -1,5 +1,4 @@
 fn main() {
-    println!("cargo:include=/usr/local/cuda/include");
     println!("cargo:rustc-link-lib=dylib=cudnn");
     println!("cargo:rerun-if-changed=build.rs");
 }
diff --git a/crates/cudnn/src/convolution/filter_descriptor.rs b/crates/cudnn/src/convolution/filter_descriptor.rs
index bbce59b9..c4767959 100644
--- a/crates/cudnn/src/convolution/filter_descriptor.rs
+++ b/crates/cudnn/src/convolution/filter_descriptor.rs
@@ -1,4 +1,4 @@
-use crate::{sys, CudnnError, DataType, IntoResult, ScalarC};
+use crate::{sys, CudnnError, DataType, IntoResult, ScalarC, TensorFormat, VecType};
 use std::{marker::PhantomData, mem::MaybeUninit};
 
 /// A generic description of an n-dimensional filter dataset.
@@ -76,6 +76,59 @@ where
             })
         }
     }
+
+    /// Creates a generic filter descriptor with the given shape and vectorized memory format.
+    ///
+    /// # Arguments
+    ///
+    /// `shape` - slice containing the size of the filter for every dimension.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if at least one of the elements of the array shape was negative or zero,
+    /// the dimension was smaller than 3 or larger than `CUDNN_DIM_MAX`, or the total size of the
+    /// filter descriptor exceeds the maximum limit of 2 Giga-elements.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use std::error::Error;
+    /// #
+    /// # fn main() -> Result<(), Box<dyn Error>> {
+    /// use cudnn::{FilterDescriptor, Vec4};
+    ///
+    /// let shape = &[4, 32, 32, 32];
+    ///
+    /// let desc = FilterDescriptor::<i8>::new_vectorized::<Vec4>(shape)?;
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub fn new_vectorized<V: VecType<T>>(shape: &[i32]) -> Result<Self, CudnnError> {
+        let mut raw = MaybeUninit::uninit();
+
+        let ndims = shape.len();
+        let format = TensorFormat::NchwVectC;
+
+        unsafe {
+            sys::cudnnCreateFilterDescriptor(raw.as_mut_ptr()).into_result()?;
+
+            let raw = raw.assume_init();
+
+            sys::cudnnSetFilterNdDescriptor(
+                raw,
+                V::into_raw(),
+                format.into(),
+                ndims as i32,
+                shape.as_ptr(),
+            )
+            .into_result()?;
+
+            Ok(Self {
+                raw,
+                data_type: PhantomData,
+            })
+        }
+    }
 }
 
 impl<T> Drop for FilterDescriptor<T>
diff --git a/crates/cudnn/src/lib.rs b/crates/cudnn/src/lib.rs
index 11c14d2d..232ed3a4 100644
--- a/crates/cudnn/src/lib.rs
+++ b/crates/cudnn/src/lib.rs
@@ -1,3 +1,5 @@
+#![deny(rustdoc::broken_intra_doc_links)]
+#[doc = include_str!("../README.md")]
 mod activation;
 mod attention;
 mod backend;

From 37548eedc4b03592517c6676c65d661aa944955a Mon Sep 17 00:00:00 2001
From: Francesco Iannelli <54247008+frjnn@users.noreply.github.com>
Date: Wed, 16 Mar 2022 21:47:13 +0100
Subject: [PATCH 4/5] Fix README.md

---
 crates/cudnn/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/cudnn/README.md b/crates/cudnn/README.md
index 1d9a0d1a..35828844 100644
--- a/crates/cudnn/README.md
+++ b/crates/cudnn/README.md
@@ -6,7 +6,7 @@ The current version of cuDNN targeted by this wrapper is the 8.3.2. You can refe
 
 The legacy API is somewhat complete and it is usable but the backend API is still to be considered a work in progress and its usage is therefore much discouraged. Both APIs are still being developed so expect bugs and reasonable breaking changes whilst using this crate. 
 
-The project is part of the Rust CUDA ecosystem and is actively maintained by @frjnn.
+The project is part of the Rust CUDA ecosystem and is actively maintained by [frjnn](https://github.com/frjnn).
 
 ## Primer 
 
@@ -81,4 +81,4 @@ let sc_fmt = ScalarC::Nchw;
 let vc_fmt = TensorFormat::NchwVectC;
 
 let sc_to_tf: TensorFormat = sc_fmt.into();
-``` 
\ No newline at end of file
+``` 

From 85d2007ee566394745a92b0a75e0f006a596eda3 Mon Sep 17 00:00:00 2001
From: frjnn <f.iannelli.francesco229@gmail.com>
Date: Wed, 16 Mar 2022 22:17:37 +0100
Subject: [PATCH 5/5] Fix: Broken docs links

---
 crates/cudnn/README.md                           | 12 ++++++++----
 crates/cudnn/src/activation/activation_mode.rs   |  2 +-
 crates/cudnn/src/activation/mod.rs               |  1 -
 crates/cudnn/src/convolution/convolution_algo.rs |  6 +++---
 crates/cudnn/src/convolution/mod.rs              |  4 ++--
 crates/cudnn/src/dropout/mod.rs                  |  2 +-
 crates/cudnn/src/error.rs                        |  8 ++++----
 7 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/crates/cudnn/README.md b/crates/cudnn/README.md
index 1d9a0d1a..1a6cd321 100644
--- a/crates/cudnn/README.md
+++ b/crates/cudnn/README.md
@@ -14,7 +14,7 @@ Here follows a list of useful concepts that should be taken as a handbook for th
 
 ### Device buffers
 
-This crate is built around [`cust`](https://docs.rs/cust/latest/cust/memory/index.html) own memory routines and transfer functions.
+This crate is built around [`cust`](https://docs.rs/cust/latest/cust/index.html) which is the core wrapper for interfacing with the CUDA driver API of our choice.
 
 ### cuDNN statuses and Result
 
@@ -39,7 +39,9 @@ let ctx = CudnnContext::new().unwrap();
 In order to enforce type safety as much as possible at compile time, we shifted away from the original cuDNN enumerated data types and instead opted to leverage Rust's generics. In practice, this means that specifying the data type of a cuDNN tensor descriptor is done as follows:
 
 ```rust
-use cudnn::TensorDescriptor;
+use cudnn::{CudnnContext, TensorDescriptor};
+
+let ctx = CudnnContext::new().unwrap();
 
 let shape = &[5, 5, 10, 25];
 let strides = &[1250, 250, 25, 1];
@@ -53,7 +55,9 @@ This API also allows for using Rust own types as cuDNN data types, which we see
 Safely manipulating cuDNN data types that do not have any such direct match, such as vectorized ones, whilst still performing compile time compatibility checks can be done as follows:
 
 ```rust
-use cudnn::{TensorDescriptor, Vec4};
+use cudnn::{CudnnContext, TensorDescriptor, Vec4};
+
+let ctx = CudnnContext::new().unwrap();
 
 let shape = &[4, 32, 32, 32];
 
@@ -74,7 +78,7 @@ You can refer to this [extract](https://docs.nvidia.com/deeplearning/cudnn/devel
 We split the original cuDNN tensor format enum, which counts 3 variants, in 2 parts: the `ScalarC` enum and the `TensorFormat::NchwVectC` enum variant. The former stands for "scalar channel" and it encapsulates the `Nchw` and `Nhwc` formats. Scalar channel formats can be both converted to the `TensorFormat` enum with [`.into()`](https://doc.rust-lang.org/std/convert/trait.Into.html).
 
 ```rust
-use cudnn::{TensorFormat, ScalarC};
+use cudnn::{ScalarC, TensorFormat};
 
 let sc_fmt = ScalarC::Nchw;
 
diff --git a/crates/cudnn/src/activation/activation_mode.rs b/crates/cudnn/src/activation/activation_mode.rs
index 01c43f4e..621bc98e 100644
--- a/crates/cudnn/src/activation/activation_mode.rs
+++ b/crates/cudnn/src/activation/activation_mode.rs
@@ -21,7 +21,7 @@ pub enum ActivationMode {
     /// Selects no activation.
     ///
     /// **Do note** that this is only valid for an activation descriptor passed to
-    /// [`convolution_bias_act_forward()`](CudnnContext::convolution_bias_act_fwd).
+    /// [`convolution_bias_act_forward()`](crate::CudnnContext::convolution_bias_act_forward).
     Identity,
 }
 
diff --git a/crates/cudnn/src/activation/mod.rs b/crates/cudnn/src/activation/mod.rs
index 8674f8bd..b75b7917 100644
--- a/crates/cudnn/src/activation/mod.rs
+++ b/crates/cudnn/src/activation/mod.rs
@@ -28,7 +28,6 @@ impl CudnnContext {
     ///
     /// * `y` - data for the output.
     ///
-    ///
     /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnActivationForward)
     /// may offer additional information about the APi behavior.
     ///
diff --git a/crates/cudnn/src/convolution/convolution_algo.rs b/crates/cudnn/src/convolution/convolution_algo.rs
index 0d9b065b..bf5f312f 100644
--- a/crates/cudnn/src/convolution/convolution_algo.rs
+++ b/crates/cudnn/src/convolution/convolution_algo.rs
@@ -63,7 +63,7 @@ pub enum ConvFwdAlgo {
     Fft,
     /// This algorithm uses the Fast-Fourier Transform approach but splits the inputs into tiles.
     /// A significant memory workspace is needed to store intermediate results, but less than
-    /// [`Fft`], for large size images.
+    /// [`ConvFwdAlgo::Fft`], for large size images.
     FftTiling,
     /// This algorithm uses the Winograd Transform approach to compute the convolution. A reasonably
     /// sized workspace is needed to store intermediate results.
@@ -158,7 +158,7 @@ pub enum ConvBwdDataAlgo {
     Fft,
     /// This algorithm uses the Fast-Fourier Transform approach but splits the inputs into tiles.
     /// A significant memory workspace is needed to store intermediate results, but less than
-    /// [`Fft`], for large size images.
+    /// [`ConvBwdDataAlgo::Fft`], for large size images.
     FftTiling,
     /// This algorithm uses the Winograd Transform approach to compute the convolution. A reasonably
     /// sized workspace is needed to store intermediate results.
@@ -240,7 +240,7 @@ pub enum ConvBwdFilterAlgo {
     Fft,
     /// This algorithm uses the Fast-Fourier Transform approach but splits the inputs into tiles.
     /// A significant memory workspace is needed to store intermediate results, but less than
-    /// [`Fft`], for large size images.
+    /// [`ConvBwdFilterAlgo::Fft`], for large size images.
     FftTiling,
     /// This algorithm uses the Winograd Transform approach to compute the convolution. A reasonably
     /// sized workspace is needed to store intermediate results.
diff --git a/crates/cudnn/src/convolution/mod.rs b/crates/cudnn/src/convolution/mod.rs
index 673ab766..109a9859 100644
--- a/crates/cudnn/src/convolution/mod.rs
+++ b/crates/cudnn/src/convolution/mod.rs
@@ -1082,7 +1082,7 @@ impl CudnnContext {
     ///
     /// * `work_space` -  a buffer to GPU memory to a workspace needed to be able to execute the
     /// specified algorithm. Must be left to `None` if the algorithm works in-place. The workspace
-    /// dimension can be obtained with [`get_convolution_backward_data_workspace_size`].
+    /// dimension can be obtained with [`get_convolution_backward_data_workspace_size()`](crate::CudnnContext::get_convolution_backward_data_workspace_size).
     ///
     /// * `beta` - scaling parameter.
     ///
@@ -1238,7 +1238,7 @@ impl CudnnContext {
     ///
     /// * `work_space` -  a buffer to GPU memory to a workspace needed to be able to execute the
     /// specified algorithm. Must be left to `None` if the algorithm works in-place. The workspace
-    /// dimension can be obtained with [`get_convolution_backward_filter_workspace_size()`].
+    /// dimension can be obtained with [`get_convolution_backward_data_workspace_size()`](crate::CudnnContext::get_convolution_backward_data_workspace_size).
     ///
     /// * `beta` - scaling parameter.
     ///
diff --git a/crates/cudnn/src/dropout/mod.rs b/crates/cudnn/src/dropout/mod.rs
index 9b57831a..f8b03bf9 100644
--- a/crates/cudnn/src/dropout/mod.rs
+++ b/crates/cudnn/src/dropout/mod.rs
@@ -106,7 +106,7 @@ impl CudnnContext {
     /// * `seed` - seed used to initialize random number generator states.
     ///
     /// **Do note** that the exact amount of memory can be obtained with
-    /// [`get_dropout_states_size()`](CudnnContext::get_dropout_states_sizes).
+    /// [`get_dropout_states_size()`](crate::CudnnContext::get_dropout_states_size).
     ///
     /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetDropoutDescriptor)
     /// may offer additional information about the APi behavior.
diff --git a/crates/cudnn/src/error.rs b/crates/cudnn/src/error.rs
index 448a717b..6305e03d 100644
--- a/crates/cudnn/src/error.rs
+++ b/crates/cudnn/src/error.rs
@@ -10,10 +10,10 @@ use std::{error::Error, ffi::CStr, fmt::Display};
 pub enum CudnnError {
     /// The cuDNN library was not initialized properly.
     ///
-    /// This error is usually returned when a call to [`CudnnContext::new()`] fails or when `CudnnContext::new()`
-    /// has not been called prior to calling another cuDNN routine. In the former case, it is
-    /// usually due to an error in the CUDA Runtime API called by such a function or by an error in
-    /// the hardware setup.
+    /// This error is usually returned when a call to [`crate::CudnnContext::new()`] fails or when
+    /// `CudnnContext::new()` has not been called prior to calling another cuDNN routine. In the
+    /// former case, it is usually due to an error in the CUDA Runtime API called by such a function
+    /// or by an error in the hardware setup.
     NotInitialized,
     /// Resource allocation failed inside the cuDNN library. This is usually caused by an internal
     /// `cudaMalloc()` failure.