diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 284a3500646296..39ef00e127b37a 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -184,6 +184,68 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file, return ret; } +static ssize_t fuse_conn_writethrough_threshold_read(struct file *file, + char __user *buf, + size_t len, loff_t *ppos) +{ + struct fuse_conn *fc; + unsigned val; + + fc = fuse_ctl_file_conn_get(file); + if (!fc) + return 0; + + val = READ_ONCE(fc->writethrough_threshold); + fuse_conn_put(fc); + + return fuse_conn_limit_read(file, buf, len, ppos, val); +} + +static ssize_t fuse_conn_writethrough_threshold_write(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct fuse_conn *fc; + char kbuf[32]; + unsigned long long val; + char *end; + + if (*ppos) + return -EINVAL; + if (count == 0 || count >= sizeof(kbuf)) + return -EINVAL; + if (copy_from_user(kbuf, buf, count)) + return -EFAULT; + kbuf[count] = '\0'; + + /* memparse accepts a bare suffix without a digit; require a digit */ + if (kbuf[0] < '0' || kbuf[0] > '9') + return -EINVAL; + + val = memparse(kbuf, &end); + end = skip_spaces(end); + if (*end) + return -EINVAL; + if (val > UINT_MAX) + return -EINVAL; + + fc = fuse_ctl_file_conn_get(file); + if (!fc) + return -ENOENT; + + WRITE_ONCE(fc->writethrough_threshold, (unsigned int)val); + fuse_conn_put(fc); + + return count; +} + +static const struct file_operations fuse_conn_writethrough_threshold_ops = { + .open = nonseekable_open, + .read = fuse_conn_writethrough_threshold_read, + .write = fuse_conn_writethrough_threshold_write, + .llseek = no_llseek, +}; + static const struct file_operations fuse_ctl_abort_ops = { .open = nonseekable_open, .write = fuse_conn_abort_write, @@ -278,7 +340,10 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) 1, NULL, &fuse_conn_max_background_ops) || !fuse_ctl_add_dentry(parent, fc, "congestion_threshold", S_IFREG | 0600, 1, NULL, - &fuse_conn_congestion_threshold_ops)) + &fuse_conn_congestion_threshold_ops) || + !fuse_ctl_add_dentry(parent, fc, "writethrough_threshold", + S_IFREG | 0600, 1, NULL, + &fuse_conn_writethrough_threshold_ops)) goto err; return 0; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 55ab5e9cf61715..b782778eecccd4 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1464,6 +1464,47 @@ static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive) } } + +/* + * With writeback caching the request size seen by the server depends on + * how many contiguous dirty pages the flusher finds, which is bounded by + * dirty throttling: with BDI_CAP_STRICTLIMIT the dirty window can degrade + * to a single page under streaming writes, turning large application + * writes into page-sized requests. + * + * Writes that already match the server's preferred alignment gain + * nothing from accumulating in the page cache, so send them through + * fuse_perform_write() instead, which packs requests up to max_write. + * They create no dirty pages, hence no DLM write lock needs to be cached + * for them. Unaligned writes keep using the writeback cache, where they + * can merge with neighbouring data. + */ +static bool fuse_use_writeback_cache(struct fuse_conn *fc, struct kiocb *iocb, + struct iov_iter *from) +{ + size_t count = iov_iter_count(from); + unsigned int wt; + u64 align; + bool ret; + + if (!fc->big_writes) + return true; + + /* these rely on the semantics of their current paths */ + if (iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND | IOCB_NOWAIT)) + return true; + + wt = READ_ONCE(fc->writethrough_threshold); + if (wt && count >= wt) + return false; + + align = fc->alignment_pages ? + (u64)fc->alignment_pages << PAGE_SHIFT : PAGE_SIZE; + + ret = !IS_ALIGNED(iocb->ki_pos, align) || !IS_ALIGNED((u64)count, align); + return ret; +} + static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; @@ -1486,6 +1527,9 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) goto writethrough; } + if (!fuse_use_writeback_cache(fc, iocb, from)) + goto writethrough; + /* if we have dlm support acquire the lock for the area * we are writing into */ if (fc->dlm) { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 72aabf8dd5ff31..146ebf7243be23 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -47,7 +47,7 @@ #define FUSE_NAME_MAX (PATH_MAX - 1) /** Number of dentries for each connection in the control filesystem */ -#define FUSE_CTL_NUM_DENTRIES 5 +#define FUSE_CTL_NUM_DENTRIES 6 /** Maximum of max_pages received in init_out */ extern unsigned int fuse_max_pages_limit; @@ -952,6 +952,9 @@ struct fuse_conn { /* The foffset alignment in PAGE */ unsigned int alignment_pages; + /* Buffered writes >= this size bypass the writeback cache (0 = off) */ + unsigned int writethrough_threshold; + }; /* diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index a2d496160da4c5..259daca77475fe 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1745,7 +1745,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) /* fuse does it's own writeback accounting */ sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; - sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; + sb->s_bdi->capabilities &= ~BDI_CAP_STRICTLIMIT; /* * For a single fuse filesystem use max 1% of dirty + diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d472df6370a400..e2b2c68b162d43 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -429,8 +429,8 @@ struct fuse_file_lock { * FUSE_OVER_IO_URING: Indicate that client supports io-uring * FUSE_INVAL_INODE_ENTRY: invalidate inode aliases when doing inode invalidation * FUSE_EXPIRE_INODE_ENTRY: expire inode aliases when doing inode invalidation - * FUSE_ALIGN_PG_ORDER: page order (power of 2 exponent for number of pages) for - * optimal io-size alignment + * FUSE_ALIGN_PG_ORDER: alignment order (power of 2 exponent of the IO size + * in bytes) for optimal io-size alignment * FUSE_URING_REDUCED_Q: Client (kernel) supports less queues - Server is free * to register between 1 and nr-core io-uring queues */ @@ -926,7 +926,8 @@ struct fuse_init_in { #define FUSE_COMPAT_22_INIT_OUT_SIZE 24 /* - * align_page_order: Number of pages for optimal IO, or a multiple of that + * align_page_order: log2 of the optimal IO size in bytes; IO is optimal + * when sized and aligned to (1 << align_page_order) or a multiple of it */ struct fuse_init_out { uint32_t major;