Skip to content

Commit

Permalink
vdev_file: unify FreeBSD and Linux implementations (REVIEW ONLY)
Browse files Browse the repository at this point in the history
This commit is here to make it easier to review the diffs for the
individual platform files and confirm they make sense for each platform.
It should be squashed into the next commit before merge.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Signed-off-by: Rob Norris <[email protected]>
  • Loading branch information
robn committed Feb 12, 2025
1 parent b901d4a commit ed76948
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 57 deletions.
76 changes: 51 additions & 25 deletions module/os/freebsd/zfs/vdev_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/file.h>
#include <sys/vdev_file.h>
#include <sys/vdev_impl.h>
#include <sys/zio.h>
Expand All @@ -40,14 +39,24 @@

static taskq_t *vdev_file_taskq;

/*
* By default, the logical/physical ashift for file vdevs is set to
* SPA_MINBLOCKSHIFT (9). This allows all file vdevs to use 512B (1 << 9)
* blocksizes. Users may opt to change one or both of these for testing
* or performance reasons. Care should be taken as these values will
* impact the vdev_ashift setting which can only be set at vdev creation
* time.
*/
static uint_t vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
static uint_t vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;

void
vdev_file_init(void)
{
vdev_file_taskq = taskq_create("z_vdev_file", MAX(max_ncpus, 16),
minclsyspri, max_ncpus, INT_MAX, 0);
minclsyspri, max_ncpus, INT_MAX, TASKQ_DYNAMIC);

VERIFY(vdev_file_taskq);
}

void
Expand Down Expand Up @@ -138,7 +147,7 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
* to local zone users, so the underlying devices should be as well.
*/
ASSERT3P(vd->vdev_path, !=, NULL);
ASSERT(vd->vdev_path[0] == '/');
ASSERT3S(vd->vdev_path[0], ==, '/');

error = zfs_file_open(vd->vdev_path,
vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
Expand Down Expand Up @@ -186,32 +195,20 @@ vdev_file_close(vdev_t *vd)
return;

if (vf->vf_file != NULL) {
zfs_file_close(vf->vf_file);
(void) zfs_file_close(vf->vf_file);
}

vd->vdev_delayed_close = B_FALSE;
kmem_free(vf, sizeof (vdev_file_t));
vd->vdev_tsd = NULL;
}

/*
* Implements the interrupt side for file vdev types. This routine will be
* called when the I/O completes allowing us to transfer the I/O to the
* interrupt taskqs. For consistency, the code structure mimics disk vdev
* types.
*/
static void
vdev_file_io_intr(zio_t *zio)
{
zio_delay_interrupt(zio);
}

static void
vdev_file_io_strategy(void *arg)
{
zio_t *zio = arg;
zio_t *zio = (zio_t *)arg;
vdev_t *vd = zio->io_vd;
vdev_file_t *vf;
vdev_file_t *vf = vd->vdev_tsd;
void *buf;
ssize_t resid;
loff_t off;
Expand All @@ -222,8 +219,6 @@ vdev_file_io_strategy(void *arg)
size = zio->io_size;
resid = 0;

vf = vd->vdev_tsd;

ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
if (zio->io_type == ZIO_TYPE_READ) {
buf = abd_borrow_buf(zio->io_abd, zio->io_size);
Expand All @@ -236,9 +231,20 @@ vdev_file_io_strategy(void *arg)
}
zio->io_error = err;
if (resid != 0 && zio->io_error == 0)
zio->io_error = ENOSPC;
zio->io_error = SET_ERROR(ENOSPC);

zio_delay_interrupt(zio);
}

vdev_file_io_intr(zio);
static void
vdev_file_io_fsync(void *arg)
{
zio_t *zio = (zio_t *)arg;
vdev_file_t *vf = zio->io_vd->vdev_tsd;

zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC | O_DSYNC);

zio_interrupt(zio);
}

static void
Expand All @@ -255,9 +261,28 @@ vdev_file_io_start(zio_t *zio)
return;
}

zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC|O_DSYNC);
if (zfs_nocacheflush) {
zio_execute(zio);
return;
}

#ifdef __linux__
/*
* We cannot safely call vfs_fsync() when PF_FSTRANS
* is set in the current context. Filesystems like
* XFS include sanity checks to verify it is not
* already set, see xfs_vm_writepage(). Therefore
* the sync must be dispatched to a different context.
*/
if (__spl_pf_fstrans_check()) {
VERIFY3U(taskq_dispatch(vdev_file_taskq,
vdev_file_io_fsync, zio, TQ_SLEEP), !=,
TASKQID_INVALID);
return;
}
#endif

zio_execute(zio);
vdev_file_io_fsync(zio);
return;
} else if (zio->io_type == ZIO_TYPE_TRIM) {
ASSERT3U(zio->io_size, !=, 0);
Expand All @@ -266,11 +291,12 @@ vdev_file_io_start(zio_t *zio)
zio_execute(zio);
return;
}

ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
zio->io_target_timestamp = zio_handle_io_delay(zio);

VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
TQ_SLEEP), !=, 0);
TQ_SLEEP), !=, TASKQID_INVALID);
}

static void
Expand Down
59 changes: 27 additions & 32 deletions module/os/linux/zfs/vdev_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,14 @@

#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/vdev_file.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_trim.h>
#include <sys/zio.h>
#include <sys/fs/zfs.h>
#include <sys/fm/fs/zfs.h>
#include <sys/abd.h>
#include <sys/vnode.h>
#include <sys/zfs_file.h>
#ifdef _KERNEL
#include <linux/falloc.h>
#include <sys/fcntl.h>
#else
#include <fcntl.h>
#endif
#include <sys/stat.h>

/*
* Virtual device vector for files.
*/
Expand All @@ -58,16 +50,31 @@ static taskq_t *vdev_file_taskq;
static uint_t vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
static uint_t vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;

void
vdev_file_init(void)
{
vdev_file_taskq = taskq_create("z_vdev_file", MAX(max_ncpus, 16),
minclsyspri, max_ncpus, INT_MAX, TASKQ_DYNAMIC);

VERIFY(vdev_file_taskq);
}

void
vdev_file_fini(void)
{
taskq_destroy(vdev_file_taskq);
}

static void
vdev_file_hold(vdev_t *vd)
{
ASSERT(vd->vdev_path != NULL);
ASSERT3P(vd->vdev_path, !=, NULL);
}

static void
vdev_file_rele(vdev_t *vd)
{
ASSERT(vd->vdev_path != NULL);
ASSERT3P(vd->vdev_path, !=, NULL);
}

static mode_t
Expand Down Expand Up @@ -139,7 +146,8 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
* administrator has already decided that the pool should be available
* to local zone users, so the underlying devices should be as well.
*/
ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
ASSERT3P(vd->vdev_path, !=, NULL);
ASSERT3S(vd->vdev_path[0], ==, '/');

error = zfs_file_open(vd->vdev_path,
vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
Expand Down Expand Up @@ -201,8 +209,8 @@ vdev_file_io_strategy(void *arg)
zio_t *zio = (zio_t *)arg;
vdev_t *vd = zio->io_vd;
vdev_file_t *vf = vd->vdev_tsd;
ssize_t resid;
void *buf;
ssize_t resid;
loff_t off;
ssize_t size;
int err;
Expand All @@ -211,6 +219,7 @@ vdev_file_io_strategy(void *arg)
size = zio->io_size;
resid = 0;

ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
if (zio->io_type == ZIO_TYPE_READ) {
buf = abd_borrow_buf(zio->io_abd, zio->io_size);
err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
Expand Down Expand Up @@ -257,6 +266,7 @@ vdev_file_io_start(zio_t *zio)
return;
}

#ifdef __linux__
/*
* We cannot safely call vfs_fsync() when PF_FSTRANS
* is set in the current context. Filesystems like
Expand All @@ -270,10 +280,9 @@ vdev_file_io_start(zio_t *zio)
TASKQID_INVALID);
return;
}
#endif

zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC | O_DSYNC);

zio_execute(zio);
vdev_file_io_fsync(zio);
return;
} else if (zio->io_type == ZIO_TYPE_TRIM) {
ASSERT3U(zio->io_size, !=, 0);
Expand All @@ -283,6 +292,7 @@ vdev_file_io_start(zio_t *zio)
return;
}

ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
zio->io_target_timestamp = zio_handle_io_delay(zio);

VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
Expand Down Expand Up @@ -320,21 +330,6 @@ vdev_ops_t vdev_file_ops = {
.vdev_op_leaf = B_TRUE /* leaf vdev */
};

void
vdev_file_init(void)
{
vdev_file_taskq = taskq_create("z_vdev_file", MAX(boot_ncpus, 16),
minclsyspri, boot_ncpus, INT_MAX, TASKQ_DYNAMIC);

VERIFY(vdev_file_taskq);
}

void
vdev_file_fini(void)
{
taskq_destroy(vdev_file_taskq);
}

/*
* From userland we access disks just like files.
*/
Expand Down

0 comments on commit ed76948

Please sign in to comment.