From 31a96c95e1d73429d891047a67035a4de6bd38e8 Mon Sep 17 00:00:00 2001 From: Horst Birthelmer Date: Fri, 12 Jun 2026 13:02:37 +0200 Subject: [PATCH 1/2] fuse: use writethrough for writes matching the server alignment Writes that already match the alignment advertised via FUSE_ALIGN_PG_ORDER gain nothing from the writeback cache and can degrade into page-sized WRITE requests under dirty throttling. Send them through fuse_perform_write() instead, which packs requests up to max_write and keeps them stripe-aligned for the backend. They create no dirty pages, so no DLM write lock needs to be cached for them. Unaligned writes keep using the writeback cache. Also clarify in the uapi header that align_page_order is the log2 of the alignment in bytes, not in pages. Signed-off-by: Horst Birthelmer --- fs/fuse/file.c | 47 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/fuse.h | 7 +++--- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 55ab5e9cf61715..3d60770a1cc3bc 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1464,6 +1464,50 @@ static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive) } } + +/* + * With writeback caching the request size seen by the server depends on + * how many contiguous dirty pages the flusher finds, which is bounded by + * dirty throttling: with BDI_CAP_STRICTLIMIT the dirty window can degrade + * to a single page under streaming writes, turning large application + * writes into page-sized requests. + * + * Writes that already match the server's preferred alignment gain + * nothing from accumulating in the page cache, so send them through + * fuse_perform_write() instead, which packs requests up to max_write. + * They create no dirty pages, hence no DLM write lock needs to be cached + * for them. Unaligned writes keep using the writeback cache, where they + * can merge with neighbouring data. + */ +static bool fuse_use_writeback_cache(struct fuse_conn *fc, struct kiocb *iocb, + struct iov_iter *from) +{ + size_t count = iov_iter_count(from); + u64 align; + bool ret; + + if (!fc->big_writes) { + printk("%s: wbc=1 no big_writes pos=%lld count=%zu\n", + __func__, iocb->ki_pos, count); + return true; + } + + /* these rely on the semantics of their current paths */ + if (iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND | IOCB_NOWAIT)) { + printk("%s: wbc=1 ki_flags=0x%x pos=%lld count=%zu\n", + __func__, iocb->ki_flags, iocb->ki_pos, count); + return true; + } + + align = fc->alignment_pages ? + (u64)fc->alignment_pages << PAGE_SHIFT : PAGE_SIZE; + + ret = !IS_ALIGNED(iocb->ki_pos | (u64)count, align); + printk("%s: wbc=%d pos=%lld count=%zu align=%llu alignment_pages=%u\n", + __func__, ret, iocb->ki_pos, count, align, fc->alignment_pages); + return ret; +} + static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; @@ -1486,6 +1530,9 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) goto writethrough; } + if (!fuse_use_writeback_cache(fc, iocb, from)) + goto writethrough; + /* if we have dlm support acquire the lock for the area * we are writing into */ if (fc->dlm) { diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d472df6370a400..e2b2c68b162d43 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -429,8 +429,8 @@ struct fuse_file_lock { * FUSE_OVER_IO_URING: Indicate that client supports io-uring * FUSE_INVAL_INODE_ENTRY: invalidate inode aliases when doing inode invalidation * FUSE_EXPIRE_INODE_ENTRY: expire inode aliases when doing inode invalidation - * FUSE_ALIGN_PG_ORDER: page order (power of 2 exponent for number of pages) for - * optimal io-size alignment + * FUSE_ALIGN_PG_ORDER: alignment order (power of 2 exponent of the IO size + * in bytes) for optimal io-size alignment * FUSE_URING_REDUCED_Q: Client (kernel) supports less queues - Server is free * to register between 1 and nr-core io-uring queues */ @@ -926,7 +926,8 @@ struct fuse_init_in { #define FUSE_COMPAT_22_INIT_OUT_SIZE 24 /* - * align_page_order: Number of pages for optimal IO, or a multiple of that + * align_page_order: log2 of the optimal IO size in bytes; IO is optimal + * when sized and aligned to (1 << align_page_order) or a multiple of it */ struct fuse_init_out { uint32_t major; From 3a1ed389b30382782cb8e8ab0c33e6722bf4d5a2 Mon Sep 17 00:00:00 2001 From: Horst Birthelmer Date: Fri, 12 Jun 2026 13:03:04 +0200 Subject: [PATCH 2/2] fuse: drop BDI_CAP_STRICTLIMIT from the fuse bdi Signed-off-by: Horst Birthelmer --- fs/fuse/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index a2d496160da4c5..259daca77475fe 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1745,7 +1745,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) /* fuse does it's own writeback accounting */ sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; - sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; + sb->s_bdi->capabilities &= ~BDI_CAP_STRICTLIMIT; /* * For a single fuse filesystem use max 1% of dirty +