From 55a1613b4416d09c115b9c60e9d44b4ef06b6289 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Wed, 12 Feb 2025 15:52:54 +1100 Subject: [PATCH] vdev_file: make FLUSH and TRIM asynchronous zfs_file_fsync() and zfs_file_deallocate() are both blocking ops, so the zio_taskq thread is active and blocked both while waiting for the IO call and then while calling zio_execute() for the next stage. This is a particular issue for FLUSH, as the z_flush_iss queue typically only has one thread; multiple flushes arriving at once can cause long delays if the underlying fsync() response is particularly slow. To fix this, we dispatch both FLUSH and TRIM to the z_vdev_file taskq, just as we do for reads and writes. Further, we return all results through zio_interrupt(), so neither the issue nor the file taskqs are blocked. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Signed-off-by: Rob Norris --- module/zfs/vdev_file.c | 45 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index 224340405d70..66997f0e7e8e 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -248,11 +248,22 @@ vdev_file_io_fsync(void *arg) zio_interrupt(zio); } +static void +vdev_file_io_deallocate(void *arg) +{ + zio_t *zio = (zio_t *)arg; + vdev_file_t *vf = zio->io_vd->vdev_tsd; + + zio->io_error = zfs_file_deallocate(vf->vf_file, + zio->io_offset, zio->io_size); + + zio_interrupt(zio); +} + static void vdev_file_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; - vdev_file_t *vf = vd->vdev_tsd; if (zio->io_type == ZIO_TYPE_FLUSH) { /* XXPOLICY */ @@ -263,33 +274,23 @@ vdev_file_io_start(zio_t *zio) } if (zfs_nocacheflush) { - zio_execute(zio); + zio_interrupt(zio); return; } -#ifdef __linux__ - /* - * We cannot safely call vfs_fsync() when PF_FSTRANS - * is set in the current context. Filesystems like - * XFS include sanity checks to verify it is not - * already set, see xfs_vm_writepage(). Therefore - * the sync must be dispatched to a different context. - */ - if (__spl_pf_fstrans_check()) { - VERIFY3U(taskq_dispatch(vdev_file_taskq, - vdev_file_io_fsync, zio, TQ_SLEEP), !=, - TASKQID_INVALID); - return; - } -#endif + VERIFY3U(taskq_dispatch(vdev_file_taskq, + vdev_file_io_fsync, zio, TQ_SLEEP), !=, TASKQID_INVALID); - vdev_file_io_fsync(zio); return; - } else if (zio->io_type == ZIO_TYPE_TRIM) { + } + + if (zio->io_type == ZIO_TYPE_TRIM) { ASSERT3U(zio->io_size, !=, 0); - zio->io_error = zfs_file_deallocate(vf->vf_file, - zio->io_offset, zio->io_size); - zio_execute(zio); + + VERIFY3U(taskq_dispatch(vdev_file_taskq, + vdev_file_io_deallocate, zio, TQ_SLEEP), !=, + TASKQID_INVALID); + return; }