Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 66 additions & 1 deletion fs/fuse/control.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,68 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
return ret;
}

static ssize_t fuse_conn_writethrough_threshold_read(struct file *file,
char __user *buf,
size_t len, loff_t *ppos)
{
struct fuse_conn *fc;
unsigned val;

fc = fuse_ctl_file_conn_get(file);
if (!fc)
return 0;

val = READ_ONCE(fc->writethrough_threshold);
fuse_conn_put(fc);

return fuse_conn_limit_read(file, buf, len, ppos, val);
}

static ssize_t fuse_conn_writethrough_threshold_write(struct file *file,
const char __user *buf,
size_t count, loff_t *ppos)
{
struct fuse_conn *fc;
char kbuf[32];
unsigned long long val;
char *end;

if (*ppos)
return -EINVAL;
if (count == 0 || count >= sizeof(kbuf))
return -EINVAL;
if (copy_from_user(kbuf, buf, count))
return -EFAULT;
kbuf[count] = '\0';

/* memparse accepts a bare suffix without a digit; require a digit */
if (kbuf[0] < '0' || kbuf[0] > '9')
return -EINVAL;

val = memparse(kbuf, &end);
end = skip_spaces(end);
if (*end)
return -EINVAL;
if (val > UINT_MAX)
return -EINVAL;

fc = fuse_ctl_file_conn_get(file);
if (!fc)
return -ENOENT;

WRITE_ONCE(fc->writethrough_threshold, (unsigned int)val);
fuse_conn_put(fc);

return count;
}

static const struct file_operations fuse_conn_writethrough_threshold_ops = {
.open = nonseekable_open,
.read = fuse_conn_writethrough_threshold_read,
.write = fuse_conn_writethrough_threshold_write,
.llseek = no_llseek,
};

static const struct file_operations fuse_ctl_abort_ops = {
.open = nonseekable_open,
.write = fuse_conn_abort_write,
Expand Down Expand Up @@ -278,7 +340,10 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
1, NULL, &fuse_conn_max_background_ops) ||
!fuse_ctl_add_dentry(parent, fc, "congestion_threshold",
S_IFREG | 0600, 1, NULL,
&fuse_conn_congestion_threshold_ops))
&fuse_conn_congestion_threshold_ops) ||
!fuse_ctl_add_dentry(parent, fc, "writethrough_threshold",
S_IFREG | 0600, 1, NULL,
&fuse_conn_writethrough_threshold_ops))
goto err;

return 0;
Expand Down
44 changes: 44 additions & 0 deletions fs/fuse/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1464,6 +1464,47 @@ static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
}
}


/*
* With writeback caching the request size seen by the server depends on
* how many contiguous dirty pages the flusher finds, which is bounded by
* dirty throttling: with BDI_CAP_STRICTLIMIT the dirty window can degrade
* to a single page under streaming writes, turning large application
* writes into page-sized requests.
*
* Writes that already match the server's preferred alignment gain
* nothing from accumulating in the page cache, so send them through
* fuse_perform_write() instead, which packs requests up to max_write.
* They create no dirty pages, hence no DLM write lock needs to be cached
* for them. Unaligned writes keep using the writeback cache, where they
* can merge with neighbouring data.
*/
static bool fuse_use_writeback_cache(struct fuse_conn *fc, struct kiocb *iocb,

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about for the small aligned write, such as 4K, 8K, what if we have a threshold, which is our bucket size (2M), if aligned, but less than 2M, still do write-back cache?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes ... still writeback... in the hope that more writes will be done and we can write alogned later. If during this delay, we get a flush or invalidate, than we haven't lost anything

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when fc->big_writes will true?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a parameter from the fuse server

@hbirth hbirth Jun 15, 2026

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if it is false FUSE will write every page separately

struct iov_iter *from)
{
size_t count = iov_iter_count(from);
unsigned int wt;
u64 align;
bool ret;

if (!fc->big_writes)
return true;

/* these rely on the semantics of their current paths */
if (iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND | IOCB_NOWAIT))
return true;

wt = READ_ONCE(fc->writethrough_threshold);
if (wt && count >= wt)
return false;

align = fc->alignment_pages ?
(u64)fc->alignment_pages << PAGE_SHIFT : PAGE_SIZE;

ret = !IS_ALIGNED(iocb->ki_pos, align) || !IS_ALIGNED((u64)count, align);
return ret;
}

static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
Expand All @@ -1486,6 +1527,9 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto writethrough;
}

if (!fuse_use_writeback_cache(fc, iocb, from))
goto writethrough;

/* if we have dlm support acquire the lock for the area
* we are writing into */
if (fc->dlm) {
Expand Down
5 changes: 4 additions & 1 deletion fs/fuse/fuse_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
#define FUSE_NAME_MAX (PATH_MAX - 1)

/** Number of dentries for each connection in the control filesystem */
#define FUSE_CTL_NUM_DENTRIES 5
#define FUSE_CTL_NUM_DENTRIES 6

/** Maximum of max_pages received in init_out */
extern unsigned int fuse_max_pages_limit;
Expand Down Expand Up @@ -952,6 +952,9 @@ struct fuse_conn {
/* The foffset alignment in PAGE */
unsigned int alignment_pages;

/* Buffered writes >= this size bypass the writeback cache (0 = off) */
unsigned int writethrough_threshold;

};

/*
Expand Down
2 changes: 1 addition & 1 deletion fs/fuse/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1745,7 +1745,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)

/* fuse does it's own writeback accounting */
sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT;
sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT;
sb->s_bdi->capabilities &= ~BDI_CAP_STRICTLIMIT;

/*
* For a single fuse filesystem use max 1% of dirty +
Expand Down
7 changes: 4 additions & 3 deletions include/uapi/linux/fuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,8 @@ struct fuse_file_lock {
* FUSE_OVER_IO_URING: Indicate that client supports io-uring
* FUSE_INVAL_INODE_ENTRY: invalidate inode aliases when doing inode invalidation
* FUSE_EXPIRE_INODE_ENTRY: expire inode aliases when doing inode invalidation
* FUSE_ALIGN_PG_ORDER: page order (power of 2 exponent for number of pages) for
* optimal io-size alignment
* FUSE_ALIGN_PG_ORDER: alignment order (power of 2 exponent of the IO size
* in bytes) for optimal io-size alignment
* FUSE_URING_REDUCED_Q: Client (kernel) supports less queues - Server is free
* to register between 1 and nr-core io-uring queues
*/
Expand Down Expand Up @@ -926,7 +926,8 @@ struct fuse_init_in {
#define FUSE_COMPAT_22_INIT_OUT_SIZE 24

/*
* align_page_order: Number of pages for optimal IO, or a multiple of that
* align_page_order: log2 of the optimal IO size in bytes; IO is optimal
* when sized and aligned to (1 << align_page_order) or a multiple of it
*/
struct fuse_init_out {
uint32_t major;
Expand Down