Skip to content

Commit 3253aba

Browse files
metaspaceaxboe
authored andcommitted
rust: block: introduce kernel::block::mq module
Add initial abstractions for working with blk-mq. This patch is a maintained, refactored subset of code originally published by Wedson Almeida Filho <[email protected]> [1]. [1] https://github.com/wedsonaf/linux/tree/f2cfd2fe0e2ca4e90994f96afe268bbd4382a891/rust/kernel/blk/mq.rs Cc: Wedson Almeida Filho <[email protected]> Signed-off-by: Andreas Hindborg <[email protected]> Reviewed-by: Benno Lossin <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent c2670cf commit 3253aba

File tree

11 files changed

+984
-0
lines changed

11 files changed

+984
-0
lines changed

rust/bindings/bindings_helper.h

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
*/
88

99
#include <kunit/test.h>
10+
#include <linux/blk_types.h>
11+
#include <linux/blk-mq.h>
1012
#include <linux/errname.h>
1113
#include <linux/ethtool.h>
1214
#include <linux/jiffies.h>
@@ -20,6 +22,7 @@
2022

2123
/* `bindgen` gets confused at certain things. */
2224
const size_t RUST_CONST_HELPER_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;
25+
const size_t RUST_CONST_HELPER_PAGE_SIZE = PAGE_SIZE;
2326
const gfp_t RUST_CONST_HELPER_GFP_ATOMIC = GFP_ATOMIC;
2427
const gfp_t RUST_CONST_HELPER_GFP_KERNEL = GFP_KERNEL;
2528
const gfp_t RUST_CONST_HELPER_GFP_KERNEL_ACCOUNT = GFP_KERNEL_ACCOUNT;

rust/helpers.c

+16
Original file line numberDiff line numberDiff line change
@@ -186,3 +186,19 @@ static_assert(
186186
__alignof__(size_t) == __alignof__(uintptr_t),
187187
"Rust code expects C `size_t` to match Rust `usize`"
188188
);
189+
190+
// This will soon be moved to a separate file, so no need to merge with above.
191+
#include <linux/blk-mq.h>
192+
#include <linux/blkdev.h>
193+
194+
void *rust_helper_blk_mq_rq_to_pdu(struct request *rq)
195+
{
196+
return blk_mq_rq_to_pdu(rq);
197+
}
198+
EXPORT_SYMBOL_GPL(rust_helper_blk_mq_rq_to_pdu);
199+
200+
struct request *rust_helper_blk_mq_rq_from_pdu(void *pdu)
201+
{
202+
return blk_mq_rq_from_pdu(pdu);
203+
}
204+
EXPORT_SYMBOL_GPL(rust_helper_blk_mq_rq_from_pdu);

rust/kernel/block.rs

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
//! Types for working with the block layer.
4+
5+
pub mod mq;

rust/kernel/block/mq.rs

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
//! This module provides types for implementing block drivers that interface the
4+
//! blk-mq subsystem.
5+
//!
6+
//! To implement a block device driver, a Rust module must do the following:
7+
//!
8+
//! - Implement [`Operations`] for a type `T`.
9+
//! - Create a [`TagSet<T>`].
10+
//! - Create a [`GenDisk<T>`], via the [`GenDiskBuilder`].
11+
//! - Add the disk to the system by calling [`GenDiskBuilder::build`] passing in
12+
//! the `TagSet` reference.
13+
//!
14+
//! The types available in this module that have direct C counterparts are:
15+
//!
16+
//! - The [`TagSet`] type that abstracts the C type `struct tag_set`.
17+
//! - The [`GenDisk`] type that abstracts the C type `struct gendisk`.
18+
//! - The [`Request`] type that abstracts the C type `struct request`.
19+
//!
20+
//! The kernel will interface with the block device driver by calling the method
21+
//! implementations of the `Operations` trait.
22+
//!
23+
//! IO requests are passed to the driver as [`kernel::types::ARef<Request>`]
24+
//! instances. The `Request` type is a wrapper around the C `struct request`.
25+
//! The driver must mark end of processing by calling one of the
26+
//! `Request::end`, methods. Failure to do so can lead to deadlock or timeout
27+
//! errors. Please note that the C function `blk_mq_start_request` is implicitly
28+
//! called when the request is queued with the driver.
29+
//!
30+
//! The `TagSet` is responsible for creating and maintaining a mapping between
31+
//! `Request`s and integer ids as well as carrying a pointer to the vtable
32+
//! generated by `Operations`. This mapping is useful for associating
33+
//! completions from hardware with the correct `Request` instance. The `TagSet`
34+
//! determines the maximum queue depth by setting the number of `Request`
35+
//! instances available to the driver, and it determines the number of queues to
36+
//! instantiate for the driver. If possible, a driver should allocate one queue
37+
//! per core, to keep queue data local to a core.
38+
//!
39+
//! One `TagSet` instance can be shared between multiple `GenDisk` instances.
40+
//! This can be useful when implementing drivers where one piece of hardware
41+
//! with one set of IO resources are represented to the user as multiple disks.
42+
//!
43+
//! One significant difference between block device drivers implemented with
44+
//! these Rust abstractions and drivers implemented in C, is that the Rust
45+
//! drivers have to own a reference count on the `Request` type when the IO is
46+
//! in flight. This is to ensure that the C `struct request` instances backing
47+
//! the Rust `Request` instances are live while the Rust driver holds a
48+
//! reference to the `Request`. In addition, the conversion of an integer tag to
49+
//! a `Request` via the `TagSet` would not be sound without this bookkeeping.
50+
//!
51+
//! [`GenDisk`]: gen_disk::GenDisk
52+
//! [`GenDisk<T>`]: gen_disk::GenDisk
53+
//! [`GenDiskBuilder`]: gen_disk::GenDiskBuilder
54+
//! [`GenDiskBuilder::build`]: gen_disk::GenDiskBuilder::build
55+
//!
56+
//! # Example
57+
//!
58+
//! ```rust
59+
//! use kernel::{
60+
//! alloc::flags,
61+
//! block::mq::*,
62+
//! new_mutex,
63+
//! prelude::*,
64+
//! sync::{Arc, Mutex},
65+
//! types::{ARef, ForeignOwnable},
66+
//! };
67+
//!
68+
//! struct MyBlkDevice;
69+
//!
70+
//! #[vtable]
71+
//! impl Operations for MyBlkDevice {
72+
//!
73+
//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result {
74+
//! Request::end_ok(rq);
75+
//! Ok(())
76+
//! }
77+
//!
78+
//! fn commit_rqs() {}
79+
//! }
80+
//!
81+
//! let tagset: Arc<TagSet<MyBlkDevice>> =
82+
//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
83+
//! let mut disk = gen_disk::GenDiskBuilder::new()
84+
//! .capacity_sectors(4096)
85+
//! .build(format_args!("myblk"), tagset)?;
86+
//!
87+
//! # Ok::<(), kernel::error::Error>(())
88+
//! ```
89+
90+
pub mod gen_disk;
91+
mod operations;
92+
mod raw_writer;
93+
mod request;
94+
mod tag_set;
95+
96+
pub use operations::Operations;
97+
pub use request::Request;
98+
pub use tag_set::TagSet;

rust/kernel/block/mq/gen_disk.rs

+215
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
//! Generic disk abstraction.
4+
//!
5+
//! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h)
6+
//! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h)
7+
8+
use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet};
9+
use crate::error;
10+
use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc};
11+
use core::fmt::{self, Write};
12+
13+
/// A builder for [`GenDisk`].
14+
///
15+
/// Use this struct to configure and add new [`GenDisk`] to the VFS.
16+
pub struct GenDiskBuilder {
17+
rotational: bool,
18+
logical_block_size: u32,
19+
physical_block_size: u32,
20+
capacity_sectors: u64,
21+
}
22+
23+
impl Default for GenDiskBuilder {
24+
fn default() -> Self {
25+
Self {
26+
rotational: false,
27+
logical_block_size: bindings::PAGE_SIZE as u32,
28+
physical_block_size: bindings::PAGE_SIZE as u32,
29+
capacity_sectors: 0,
30+
}
31+
}
32+
}
33+
34+
impl GenDiskBuilder {
35+
/// Create a new instance.
36+
pub fn new() -> Self {
37+
Self::default()
38+
}
39+
40+
/// Set the rotational media attribute for the device to be built.
41+
pub fn rotational(mut self, rotational: bool) -> Self {
42+
self.rotational = rotational;
43+
self
44+
}
45+
46+
/// Validate block size by verifying that it is between 512 and `PAGE_SIZE`,
47+
/// and that it is a power of two.
48+
fn validate_block_size(size: u32) -> Result<()> {
49+
if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() {
50+
Err(error::code::EINVAL)
51+
} else {
52+
Ok(())
53+
}
54+
}
55+
56+
/// Set the logical block size of the device to be built.
57+
///
58+
/// This method will check that block size is a power of two and between 512
59+
/// and 4096. If not, an error is returned and the block size is not set.
60+
///
61+
/// This is the smallest unit the storage device can address. It is
62+
/// typically 4096 bytes.
63+
pub fn logical_block_size(mut self, block_size: u32) -> Result<Self> {
64+
Self::validate_block_size(block_size)?;
65+
self.logical_block_size = block_size;
66+
Ok(self)
67+
}
68+
69+
/// Set the physical block size of the device to be built.
70+
///
71+
/// This method will check that block size is a power of two and between 512
72+
/// and 4096. If not, an error is returned and the block size is not set.
73+
///
74+
/// This is the smallest unit a physical storage device can write
75+
/// atomically. It is usually the same as the logical block size but may be
76+
/// bigger. One example is SATA drives with 4096 byte physical block size
77+
/// that expose a 512 byte logical block size to the operating system.
78+
pub fn physical_block_size(mut self, block_size: u32) -> Result<Self> {
79+
Self::validate_block_size(block_size)?;
80+
self.physical_block_size = block_size;
81+
Ok(self)
82+
}
83+
84+
/// Set the capacity of the device to be built, in sectors (512 bytes).
85+
pub fn capacity_sectors(mut self, capacity: u64) -> Self {
86+
self.capacity_sectors = capacity;
87+
self
88+
}
89+
90+
/// Build a new `GenDisk` and add it to the VFS.
91+
pub fn build<T: Operations>(
92+
self,
93+
name: fmt::Arguments<'_>,
94+
tagset: Arc<TagSet<T>>,
95+
) -> Result<GenDisk<T>> {
96+
let lock_class_key = crate::sync::LockClassKey::new();
97+
98+
// SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set
99+
let gendisk = from_err_ptr(unsafe {
100+
bindings::__blk_mq_alloc_disk(
101+
tagset.raw_tag_set(),
102+
core::ptr::null_mut(), // TODO: We can pass queue limits right here
103+
core::ptr::null_mut(),
104+
lock_class_key.as_ptr(),
105+
)
106+
})?;
107+
108+
const TABLE: bindings::block_device_operations = bindings::block_device_operations {
109+
submit_bio: None,
110+
open: None,
111+
release: None,
112+
ioctl: None,
113+
compat_ioctl: None,
114+
check_events: None,
115+
unlock_native_capacity: None,
116+
getgeo: None,
117+
set_read_only: None,
118+
swap_slot_free_notify: None,
119+
report_zones: None,
120+
devnode: None,
121+
alternative_gpt_sector: None,
122+
get_unique_id: None,
123+
// TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to
124+
// be merged (unstable in rustc 1.78 which is staged for linux 6.10)
125+
// https://github.com/rust-lang/rust/issues/119618
126+
owner: core::ptr::null_mut(),
127+
pr_ops: core::ptr::null_mut(),
128+
free_disk: None,
129+
poll_bio: None,
130+
};
131+
132+
// SAFETY: `gendisk` is a valid pointer as we initialized it above
133+
unsafe { (*gendisk).fops = &TABLE };
134+
135+
let mut raw_writer = RawWriter::from_array(
136+
// SAFETY: `gendisk` points to a valid and initialized instance. We
137+
// have exclusive access, since the disk is not added to the VFS
138+
// yet.
139+
unsafe { &mut (*gendisk).disk_name },
140+
)?;
141+
raw_writer.write_fmt(name)?;
142+
raw_writer.write_char('\0')?;
143+
144+
// SAFETY: `gendisk` points to a valid and initialized instance of
145+
// `struct gendisk`. We have exclusive access, so we cannot race.
146+
unsafe {
147+
bindings::blk_queue_logical_block_size((*gendisk).queue, self.logical_block_size)
148+
};
149+
150+
// SAFETY: `gendisk` points to a valid and initialized instance of
151+
// `struct gendisk`. We have exclusive access, so we cannot race.
152+
unsafe {
153+
bindings::blk_queue_physical_block_size((*gendisk).queue, self.physical_block_size)
154+
};
155+
156+
// SAFETY: `gendisk` points to a valid and initialized instance of
157+
// `struct gendisk`. `set_capacity` takes a lock to synchronize this
158+
// operation, so we will not race.
159+
unsafe { bindings::set_capacity(gendisk, self.capacity_sectors) };
160+
161+
if !self.rotational {
162+
// SAFETY: `gendisk` points to a valid and initialized instance of
163+
// `struct gendisk`. This operation uses a relaxed atomic bit flip
164+
// operation, so there is no race on this field.
165+
unsafe { bindings::blk_queue_flag_set(bindings::QUEUE_FLAG_NONROT, (*gendisk).queue) };
166+
} else {
167+
// SAFETY: `gendisk` points to a valid and initialized instance of
168+
// `struct gendisk`. This operation uses a relaxed atomic bit flip
169+
// operation, so there is no race on this field.
170+
unsafe {
171+
bindings::blk_queue_flag_clear(bindings::QUEUE_FLAG_NONROT, (*gendisk).queue)
172+
};
173+
}
174+
175+
crate::error::to_result(
176+
// SAFETY: `gendisk` points to a valid and initialized instance of
177+
// `struct gendisk`.
178+
unsafe {
179+
bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut())
180+
},
181+
)?;
182+
183+
// INVARIANT: `gendisk` was initialized above.
184+
// INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above.
185+
Ok(GenDisk {
186+
_tagset: tagset,
187+
gendisk,
188+
})
189+
}
190+
}
191+
192+
/// A generic block device.
193+
///
194+
/// # Invariants
195+
///
196+
/// - `gendisk` must always point to an initialized and valid `struct gendisk`.
197+
/// - `gendisk` was added to the VFS through a call to
198+
/// `bindings::device_add_disk`.
199+
pub struct GenDisk<T: Operations> {
200+
_tagset: Arc<TagSet<T>>,
201+
gendisk: *mut bindings::gendisk,
202+
}
203+
204+
// SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a
205+
// `TagSet` It is safe to send this to other threads as long as T is Send.
206+
unsafe impl<T: Operations + Send> Send for GenDisk<T> {}
207+
208+
impl<T: Operations> Drop for GenDisk<T> {
209+
fn drop(&mut self) {
210+
// SAFETY: By type invariant, `self.gendisk` points to a valid and
211+
// initialized instance of `struct gendisk`, and it was previously added
212+
// to the VFS.
213+
unsafe { bindings::del_gendisk(self.gendisk) };
214+
}
215+
}

0 commit comments

Comments
 (0)