diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 55ab5e9cf61715..3d60770a1cc3bc 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1464,6 +1464,50 @@ static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive) } } + +/* + * With writeback caching the request size seen by the server depends on + * how many contiguous dirty pages the flusher finds, which is bounded by + * dirty throttling: with BDI_CAP_STRICTLIMIT the dirty window can degrade + * to a single page under streaming writes, turning large application + * writes into page-sized requests. + * + * Writes that already match the server's preferred alignment gain + * nothing from accumulating in the page cache, so send them through + * fuse_perform_write() instead, which packs requests up to max_write. + * They create no dirty pages, hence no DLM write lock needs to be cached + * for them. Unaligned writes keep using the writeback cache, where they + * can merge with neighbouring data. + */ +static bool fuse_use_writeback_cache(struct fuse_conn *fc, struct kiocb *iocb, + struct iov_iter *from) +{ + size_t count = iov_iter_count(from); + u64 align; + bool ret; + + if (!fc->big_writes) { + printk("%s: wbc=1 no big_writes pos=%lld count=%zu\n", + __func__, iocb->ki_pos, count); + return true; + } + + /* these rely on the semantics of their current paths */ + if (iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND | IOCB_NOWAIT)) { + printk("%s: wbc=1 ki_flags=0x%x pos=%lld count=%zu\n", + __func__, iocb->ki_flags, iocb->ki_pos, count); + return true; + } + + align = fc->alignment_pages ? + (u64)fc->alignment_pages << PAGE_SHIFT : PAGE_SIZE; + + ret = !IS_ALIGNED(iocb->ki_pos | (u64)count, align); + printk("%s: wbc=%d pos=%lld count=%zu align=%llu alignment_pages=%u\n", + __func__, ret, iocb->ki_pos, count, align, fc->alignment_pages); + return ret; +} + static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; @@ -1486,6 +1530,9 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) goto writethrough; } + if (!fuse_use_writeback_cache(fc, iocb, from)) + goto writethrough; + /* if we have dlm support acquire the lock for the area * we are writing into */ if (fc->dlm) { diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index a2d496160da4c5..259daca77475fe 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1745,7 +1745,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) /* fuse does it's own writeback accounting */ sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; - sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; + sb->s_bdi->capabilities &= ~BDI_CAP_STRICTLIMIT; /* * For a single fuse filesystem use max 1% of dirty + diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d472df6370a400..e2b2c68b162d43 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -429,8 +429,8 @@ struct fuse_file_lock { * FUSE_OVER_IO_URING: Indicate that client supports io-uring * FUSE_INVAL_INODE_ENTRY: invalidate inode aliases when doing inode invalidation * FUSE_EXPIRE_INODE_ENTRY: expire inode aliases when doing inode invalidation - * FUSE_ALIGN_PG_ORDER: page order (power of 2 exponent for number of pages) for - * optimal io-size alignment + * FUSE_ALIGN_PG_ORDER: alignment order (power of 2 exponent of the IO size + * in bytes) for optimal io-size alignment * FUSE_URING_REDUCED_Q: Client (kernel) supports less queues - Server is free * to register between 1 and nr-core io-uring queues */ @@ -926,7 +926,8 @@ struct fuse_init_in { #define FUSE_COMPAT_22_INIT_OUT_SIZE 24 /* - * align_page_order: Number of pages for optimal IO, or a multiple of that + * align_page_order: log2 of the optimal IO size in bytes; IO is optimal + * when sized and aligned to (1 << align_page_order) or a multiple of it */ struct fuse_init_out { uint32_t major;