Skip to content

Commit dc14abd

Browse files
committed
Merge patch series "pidfs: implement file handle support"
Christian Brauner <[email protected]> says: Now that we have the preliminaries to lookup struct pid based on its inode number alone we can implement file handle support. This is based on custom export operation methods which allows pidfs to implement permission checking and opening of pidfs file handles cleanly without hacking around in the core file handle code too much. * patches from https://lore.kernel.org/r/[email protected]: pidfs: implement file handle support exportfs: add permission method fhandle: pull CAP_DAC_READ_SEARCH check into may_decode_fh() exportfs: add open method fhandle: simplify error handling pseudofs: add support for export_ops Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Christian Brauner <[email protected]>
2 parents d8b47d0 + b3caba8 commit dc14abd

File tree

5 files changed

+192
-59
lines changed

5 files changed

+192
-59
lines changed

fs/fhandle.c

+56-59
Original file line numberDiff line numberDiff line change
@@ -187,17 +187,6 @@ static int get_path_from_fd(int fd, struct path *root)
187187
return 0;
188188
}
189189

190-
enum handle_to_path_flags {
191-
HANDLE_CHECK_PERMS = (1 << 0),
192-
HANDLE_CHECK_SUBTREE = (1 << 1),
193-
};
194-
195-
struct handle_to_path_ctx {
196-
struct path root;
197-
enum handle_to_path_flags flags;
198-
unsigned int fh_flags;
199-
};
200-
201190
static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
202191
{
203192
struct handle_to_path_ctx *ctx = context;
@@ -261,50 +250,55 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path,
261250
{
262251
int handle_dwords;
263252
struct vfsmount *mnt = ctx->root.mnt;
253+
struct dentry *dentry;
264254

265255
/* change the handle size to multiple of sizeof(u32) */
266256
handle_dwords = handle->handle_bytes >> 2;
267-
path->dentry = exportfs_decode_fh_raw(mnt,
268-
(struct fid *)handle->f_handle,
269-
handle_dwords, handle->handle_type,
270-
ctx->fh_flags,
271-
vfs_dentry_acceptable, ctx);
272-
if (IS_ERR_OR_NULL(path->dentry)) {
273-
if (path->dentry == ERR_PTR(-ENOMEM))
257+
dentry = exportfs_decode_fh_raw(mnt, (struct fid *)handle->f_handle,
258+
handle_dwords, handle->handle_type,
259+
ctx->fh_flags, vfs_dentry_acceptable,
260+
ctx);
261+
if (IS_ERR_OR_NULL(dentry)) {
262+
if (dentry == ERR_PTR(-ENOMEM))
274263
return -ENOMEM;
275264
return -ESTALE;
276265
}
266+
path->dentry = dentry;
277267
path->mnt = mntget(mnt);
278268
return 0;
279269
}
280270

281-
/*
282-
* Allow relaxed permissions of file handles if the caller has the
283-
* ability to mount the filesystem or create a bind-mount of the
284-
* provided @mountdirfd.
285-
*
286-
* In both cases the caller may be able to get an unobstructed way to
287-
* the encoded file handle. If the caller is only able to create a
288-
* bind-mount we need to verify that there are no locked mounts on top
289-
* of it that could prevent us from getting to the encoded file.
290-
*
291-
* In principle, locked mounts can prevent the caller from mounting the
292-
* filesystem but that only applies to procfs and sysfs neither of which
293-
* support decoding file handles.
294-
*/
295-
static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
296-
unsigned int o_flags)
271+
static inline int may_decode_fh(struct handle_to_path_ctx *ctx,
272+
unsigned int o_flags)
297273
{
298274
struct path *root = &ctx->root;
299275

276+
if (capable(CAP_DAC_READ_SEARCH))
277+
return 0;
278+
300279
/*
301-
* Restrict to O_DIRECTORY to provide a deterministic API that avoids a
302-
* confusing api in the face of disconnected non-dir dentries.
280+
* Allow relaxed permissions of file handles if the caller has
281+
* the ability to mount the filesystem or create a bind-mount of
282+
* the provided @mountdirfd.
283+
*
284+
* In both cases the caller may be able to get an unobstructed
285+
* way to the encoded file handle. If the caller is only able to
286+
* create a bind-mount we need to verify that there are no
287+
* locked mounts on top of it that could prevent us from getting
288+
* to the encoded file.
289+
*
290+
* In principle, locked mounts can prevent the caller from
291+
* mounting the filesystem but that only applies to procfs and
292+
* sysfs neither of which support decoding file handles.
293+
*
294+
* Restrict to O_DIRECTORY to provide a deterministic API that
295+
* avoids a confusing api in the face of disconnected non-dir
296+
* dentries.
303297
*
304298
* There's only one dentry for each directory inode (VFS rule)...
305299
*/
306300
if (!(o_flags & O_DIRECTORY))
307-
return false;
301+
return -EPERM;
308302

309303
if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
310304
ctx->flags = HANDLE_CHECK_PERMS;
@@ -314,14 +308,14 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
314308
!has_locked_children(real_mount(root->mnt), root->dentry))
315309
ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
316310
else
317-
return false;
311+
return -EPERM;
318312

319313
/* Are we able to override DAC permissions? */
320314
if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH))
321-
return false;
315+
return -EPERM;
322316

323317
ctx->fh_flags = EXPORT_FH_DIR_ONLY;
324-
return true;
318+
return 0;
325319
}
326320

327321
static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
@@ -331,15 +325,19 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
331325
struct file_handle f_handle;
332326
struct file_handle *handle = NULL;
333327
struct handle_to_path_ctx ctx = {};
328+
const struct export_operations *eops;
334329

335330
retval = get_path_from_fd(mountdirfd, &ctx.root);
336331
if (retval)
337332
goto out_err;
338333

339-
if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
340-
retval = -EPERM;
334+
eops = ctx.root.mnt->mnt_sb->s_export_op;
335+
if (eops && eops->permission)
336+
retval = eops->permission(&ctx, o_flags);
337+
else
338+
retval = may_decode_fh(&ctx, o_flags);
339+
if (retval)
341340
goto out_path;
342-
}
343341

344342
if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
345343
retval = -EFAULT;
@@ -398,29 +396,28 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
398396
int open_flag)
399397
{
400398
long retval = 0;
401-
struct path path;
399+
struct path path __free(path_put) = {};
402400
struct file *file;
403-
int fd;
401+
const struct export_operations *eops;
404402

405403
retval = handle_to_path(mountdirfd, ufh, &path, open_flag);
406404
if (retval)
407405
return retval;
408406

409-
fd = get_unused_fd_flags(open_flag);
410-
if (fd < 0) {
411-
path_put(&path);
407+
CLASS(get_unused_fd, fd)(O_CLOEXEC);
408+
if (fd < 0)
412409
return fd;
413-
}
414-
file = file_open_root(&path, "", open_flag, 0);
415-
if (IS_ERR(file)) {
416-
put_unused_fd(fd);
417-
retval = PTR_ERR(file);
418-
} else {
419-
retval = fd;
420-
fd_install(fd, file);
421-
}
422-
path_put(&path);
423-
return retval;
410+
411+
eops = path.mnt->mnt_sb->s_export_op;
412+
if (eops->open)
413+
file = eops->open(&path, open_flag);
414+
else
415+
file = file_open_root(&path, "", open_flag, 0);
416+
if (IS_ERR(file))
417+
return PTR_ERR(file);
418+
419+
fd_install(fd, file);
420+
return take_fd(fd);
424421
}
425422

426423
/**

fs/libfs.c

+1
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
673673
s->s_blocksize_bits = PAGE_SHIFT;
674674
s->s_magic = ctx->magic;
675675
s->s_op = ctx->ops ?: &simple_super_operations;
676+
s->s_export_op = ctx->eops;
676677
s->s_xattr = ctx->xattr;
677678
s->s_time_gran = 1;
678679
root = new_inode(s);

fs/pidfs.c

+114
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// SPDX-License-Identifier: GPL-2.0
22
#include <linux/anon_inodes.h>
3+
#include <linux/exportfs.h>
34
#include <linux/file.h>
45
#include <linux/fs.h>
56
#include <linux/cgroup.h>
@@ -473,6 +474,118 @@ static const struct dentry_operations pidfs_dentry_operations = {
473474
.d_prune = stashed_dentry_prune,
474475
};
475476

477+
static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
478+
struct inode *parent)
479+
{
480+
const struct pid *pid = inode->i_private;
481+
482+
if (*max_len < 2) {
483+
*max_len = 2;
484+
return FILEID_INVALID;
485+
}
486+
487+
*max_len = 2;
488+
*(u64 *)fh = pid->ino;
489+
return FILEID_KERNFS;
490+
}
491+
492+
/* Find a struct pid based on the inode number. */
493+
static struct pid *pidfs_ino_get_pid(u64 ino)
494+
{
495+
unsigned long pid_ino = pidfs_ino(ino);
496+
u32 gen = pidfs_gen(ino);
497+
struct pid *pid;
498+
499+
guard(rcu)();
500+
501+
pid = idr_find(&pidfs_ino_idr, lower_32_bits(pid_ino));
502+
if (!pid)
503+
return NULL;
504+
505+
if (pidfs_ino(pid->ino) != pid_ino)
506+
return NULL;
507+
508+
if (pidfs_gen(pid->ino) != gen)
509+
return NULL;
510+
511+
/* Within our pid namespace hierarchy? */
512+
if (pid_vnr(pid) == 0)
513+
return NULL;
514+
515+
return get_pid(pid);
516+
}
517+
518+
static struct dentry *pidfs_fh_to_dentry(struct super_block *sb,
519+
struct fid *fid, int fh_len,
520+
int fh_type)
521+
{
522+
int ret;
523+
u64 pid_ino;
524+
struct path path;
525+
struct pid *pid;
526+
527+
if (fh_len < 2)
528+
return NULL;
529+
530+
switch (fh_type) {
531+
case FILEID_KERNFS:
532+
pid_ino = *(u64 *)fid;
533+
break;
534+
default:
535+
return NULL;
536+
}
537+
538+
pid = pidfs_ino_get_pid(pid_ino);
539+
if (!pid)
540+
return NULL;
541+
542+
ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path);
543+
if (ret < 0)
544+
return ERR_PTR(ret);
545+
546+
mntput(path.mnt);
547+
return path.dentry;
548+
}
549+
550+
/*
551+
* Make sure that we reject any nonsensical flags that users pass via
552+
* open_by_handle_at(). Note that PIDFD_THREAD is defined as O_EXCL, and
553+
* PIDFD_NONBLOCK as O_NONBLOCK.
554+
*/
555+
#define VALID_FILE_HANDLE_OPEN_FLAGS \
556+
(O_RDONLY | O_WRONLY | O_RDWR | O_NONBLOCK | O_CLOEXEC | O_EXCL)
557+
558+
static int pidfs_export_permission(struct handle_to_path_ctx *ctx,
559+
unsigned int oflags)
560+
{
561+
if (oflags & ~(VALID_FILE_HANDLE_OPEN_FLAGS | O_LARGEFILE))
562+
return -EINVAL;
563+
564+
/*
565+
* pidfd_ino_get_pid() will verify that the struct pid is part
566+
* of the caller's pid namespace hierarchy. No further
567+
* permission checks are needed.
568+
*/
569+
return 0;
570+
}
571+
572+
static struct file *pidfs_export_open(struct path *path, unsigned int oflags)
573+
{
574+
/*
575+
* Clear O_LARGEFILE as open_by_handle_at() forces it and raise
576+
* O_RDWR as pidfds always are.
577+
*/
578+
oflags &= ~O_LARGEFILE;
579+
return dentry_open(path, oflags | O_RDWR, current_cred());
580+
}
581+
582+
static const struct export_operations pidfs_export_operations = {
583+
.encode_fh = pidfs_encode_fh,
584+
.fh_to_dentry = pidfs_fh_to_dentry,
585+
.open = pidfs_export_open,
586+
.permission = pidfs_export_permission,
587+
};
588+
476589
static int pidfs_init_inode(struct inode *inode, void *data)
477590
{
478591
const struct pid *pid = data;
@@ -507,6 +620,7 @@ static int pidfs_init_fs_context(struct fs_context *fc)
507620
return -ENOMEM;
508621

509622
ctx->ops = &pidfs_sops;
623+
ctx->eops = &pidfs_export_operations;
510624
ctx->dops = &pidfs_dentry_operations;
511625
fc->s_fs_info = (void *)&pidfs_stashed_ops;
512626
return 0;

include/linux/exportfs.h

+20
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#define LINUX_EXPORTFS_H 1
44

55
#include <linux/types.h>
6+
#include <linux/path.h>
67

78
struct dentry;
89
struct iattr;
@@ -156,6 +157,17 @@ struct fid {
156157
};
157158
};
158159

160+
enum handle_to_path_flags {
161+
HANDLE_CHECK_PERMS = (1 << 0),
162+
HANDLE_CHECK_SUBTREE = (1 << 1),
163+
};
164+
165+
struct handle_to_path_ctx {
166+
struct path root;
167+
enum handle_to_path_flags flags;
168+
unsigned int fh_flags;
169+
};
170+
159171
#define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */
160172
#define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */
161173
#define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */
@@ -225,6 +237,12 @@ struct fid {
225237
* is also a directory. In the event that it cannot be found, or storage
226238
* space cannot be allocated, a %ERR_PTR should be returned.
227239
*
240+
* permission:
241+
* Allow filesystems to specify a custom permission function.
242+
*
243+
* open:
244+
* Allow filesystems to specify a custom open function.
245+
*
228246
* commit_metadata:
229247
* @commit_metadata should commit metadata changes to stable storage.
230248
*
@@ -251,6 +269,8 @@ struct export_operations {
251269
bool write, u32 *device_generation);
252270
int (*commit_blocks)(struct inode *inode, struct iomap *iomaps,
253271
int nr_iomaps, struct iattr *iattr);
272+
int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags);
273+
struct file * (*open)(struct path *path, unsigned int oflags);
254274
#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */
255275
#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */
256276
#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */

include/linux/pseudo_fs.h

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
struct pseudo_fs_context {
77
const struct super_operations *ops;
8+
const struct export_operations *eops;
89
const struct xattr_handler * const *xattr;
910
const struct dentry_operations *dops;
1011
unsigned long magic;

0 commit comments

Comments
 (0)