Rust-GPU
diff --git a/‎crates/cudnn/src/activation/activation_mode.rs
Lines changed: 6 additions & 0 deletions b/‎crates/cudnn/src/activation/activation_mode.rs
Lines changed: 6 additions & 0 deletions
diff --git a/‎crates/cudnn/src/activation/mod.rs
Lines changed: 6 additions & 25 deletions b/‎crates/cudnn/src/activation/mod.rs
Lines changed: 6 additions & 25 deletions
diff --git a/‎crates/cudnn/src/attention/attention_weights_kind.rs
Lines changed: 37 additions & 0 deletions b/‎crates/cudnn/src/attention/attention_weights_kind.rs
Lines changed: 37 additions & 0 deletions
@@ -15,6 +15,11 @@ pub enum ActivationMode {
     Elu,
     /// Selects the swish function.
     Swish,
+    /// Selects no activation.
+    ///
+    /// **Do note** that this is only valid for an activation descriptor passed to
+    /// [`convolution_bias_act_forward()`](CudnnContext::convolution_bias_act_fwd).
+    Identity,
 }
 
 impl From<ActivationMode> for sys::cudnnActivationMode_t {
@@ -26,6 +31,7 @@ impl From<ActivationMode> for sys::cudnnActivationMode_t {
             ActivationMode::ClippedRelu => Self::CUDNN_ACTIVATION_CLIPPED_RELU,
             ActivationMode::Elu => Self::CUDNN_ACTIVATION_ELU,
             ActivationMode::Swish => Self::CUDNN_ACTIVATION_SWISH,
+            ActivationMode::Identity => Self::CUDNN_ACTIVATION_IDENTITY,
         }
     }
 }
@@ -4,7 +4,9 @@ mod activation_mode;
 pub use activation_descriptor::*;
 pub use activation_mode::*;
 
-use crate::{private, sys, CudnnContext, CudnnError, DataType, IntoResult, TensorDescriptor};
+use crate::{
+    private, sys, CudnnContext, CudnnError, DataType, IntoResult, ScalingDataType, TensorDescriptor,
+};
 use cust::memory::GpuBuffer;
 use std::mem::MaybeUninit;
 
@@ -49,11 +51,11 @@ impl CudnnContext {
     ///
     /// let desc = ActivationDescriptor::new(mode, nan_opt, coefficient)?;
     ///
-    /// let alpha: f32 = 1.0;
+    /// let alpha = 1.0;
     /// let x_desc = TensorDescriptor::<i8>::new_strides(&[1, 1, 1, 5], &[5, 5, 5, 1])?;
     /// let x = DeviceBuffer::<i8>::from_slice(&[10, 10, 10, 10, 10])?;
     ///
-    /// let beta: f32 = 0.0;
+    /// let beta = 0.0;
     /// let y_desc = TensorDescriptor::<i8>::new_strides(&[1, 1, 1, 5], &[5, 5, 5, 1])?;
     /// let mut y = DeviceBuffer::<i8>::from_slice(&[0, 0, 0, 0, 0])?;
     ///
@@ -76,7 +78,7 @@ impl CudnnContext {
         y: &mut impl GpuBuffer<T>,
     ) -> Result<(), CudnnError>
     where
-        CompT: SupportedActFwd<T>,
+        CompT: ScalingDataType<T>,
         T: DataType,
     {
         let alpha_ptr = &alpha as *const CompT as *const _;
@@ -179,27 +181,6 @@ impl CudnnContext {
     }
 }
 
-/// Supported data type configurations for the activation forward operation.
-pub trait SupportedActFwd<T>: DataType + private::Sealed
-where
-    T: DataType,
-{
-}
-
-impl SupportedActFwd<i8> for f32 {}
-impl SupportedActFwd<u8> for f32 {}
-impl SupportedActFwd<i32> for f32 {}
-impl SupportedActFwd<i64> for f32 {}
-impl SupportedActFwd<f32> for f32 {}
-impl SupportedActFwd<f64> for f32 {}
-
-impl SupportedActFwd<i8> for f64 {}
-impl SupportedActFwd<u8> for f64 {}
-impl SupportedActFwd<i32> for f64 {}
-impl SupportedActFwd<i64> for f64 {}
-impl SupportedActFwd<f32> for f64 {}
-impl SupportedActFwd<f64> for f64 {}
-
 /// Supported type configurations for the activation backward operation.
 pub trait SupportedActBwd<T>: DataType + private::Sealed
 where
 
@@ -0,0 +1,37 @@
+use crate::sys;
+
+/// Specifies a group of weights or biases for the multi-head attention layer.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum AttnWeight {
+    /// Selects the input projection weights for queries.
+    QWeights,
+    /// Selects the input projection weights for keys.
+    KWeights,
+    /// Selects the input projection weights for values.
+    VWeights,
+    /// Selects the output projection weights.
+    OWeights,
+    /// Selects the input projection biases for queries.
+    QBiases,
+    /// Selects the input projection biases for keys.
+    KBiases,
+    /// Selects the input projection biases for values.
+    VBiases,
+    /// Selects the output projection biases.
+    OBiases,
+}
+
+impl From<AttnWeight> for sys::cudnnMultiHeadAttnWeightKind_t {
+    fn from(kind: AttnWeight) -> Self {
+        match kind {
+            AttnWeight::QWeights => Self::CUDNN_MH_ATTN_Q_WEIGHTS,
+            AttnWeight::KWeights => Self::CUDNN_MH_ATTN_K_WEIGHTS,
+            AttnWeight::VWeights => Self::CUDNN_MH_ATTN_V_WEIGHTS,
+            AttnWeight::OWeights => Self::CUDNN_MH_ATTN_O_WEIGHTS,
+            AttnWeight::QBiases => Self::CUDNN_MH_ATTN_Q_BIASES,
+            AttnWeight::KBiases => Self::CUDNN_MH_ATTN_K_BIASES,
+            AttnWeight::VBiases => Self::CUDNN_MH_ATTN_V_BIASES,
+            AttnWeight::OBiases => Self::CUDNN_MH_ATTN_O_BIASES,
+        }
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,11 @@ pub enum ActivationMode {`
`15`	`15`	`Elu,`
`16`	`16`	`/// Selects the swish function.`
`17`	`17`	`Swish,`
	`18`	`+ /// Selects no activation.`
	`19`	`+ ///`
	`20`	`+ /// Do note that this is only valid for an activation descriptor passed to`
	`21`	+ /// [`convolution_bias_act_forward()`](CudnnContext::convolution_bias_act_fwd).
	`22`	`+ Identity,`
`18`	`23`	`}`
`19`	`24`
`20`	`25`	`impl From<ActivationMode> for sys::cudnnActivationMode_t {`
`@@ -26,6 +31,7 @@ impl From<ActivationMode> for sys::cudnnActivationMode_t {`
`26`	`31`	`ActivationMode::ClippedRelu => Self::CUDNN_ACTIVATION_CLIPPED_RELU,`
`27`	`32`	`ActivationMode::Elu => Self::CUDNN_ACTIVATION_ELU,`
`28`	`33`	`ActivationMode::Swish => Self::CUDNN_ACTIVATION_SWISH,`
	`34`	`+ ActivationMode::Identity => Self::CUDNN_ACTIVATION_IDENTITY,`
`29`	`35`	`}`
`30`	`36`	`}`
`31`	`37`	`}`