forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 072de836f53be56a70cecf70b43ae43b7ce17376
kernel/fs/fuse/file.c
....@@ -20,9 +20,19 @@
2020 #include <linux/uio.h>
2121 #include <linux/fs.h>
2222
23
-static const struct file_operations fuse_direct_io_file_operations;
23
+static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
24
+ struct fuse_page_desc **desc)
25
+{
26
+ struct page **pages;
2427
25
-static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
28
+ pages = kzalloc(npages * (sizeof(struct page *) +
29
+ sizeof(struct fuse_page_desc)), flags);
30
+ *desc = (void *) (pages + npages);
31
+
32
+ return pages;
33
+}
34
+
35
+static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
2636 int opcode, struct fuse_open_out *outargp)
2737 {
2838 struct fuse_open_in inarg;
....@@ -30,50 +40,57 @@
3040
3141 memset(&inarg, 0, sizeof(inarg));
3242 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
33
- if (!fc->atomic_o_trunc)
43
+ if (!fm->fc->atomic_o_trunc)
3444 inarg.flags &= ~O_TRUNC;
35
- args.in.h.opcode = opcode;
36
- args.in.h.nodeid = nodeid;
37
- args.in.numargs = 1;
38
- args.in.args[0].size = sizeof(inarg);
39
- args.in.args[0].value = &inarg;
40
- args.out.numargs = 1;
41
- args.out.args[0].size = sizeof(*outargp);
42
- args.out.args[0].value = outargp;
45
+ args.opcode = opcode;
46
+ args.nodeid = nodeid;
47
+ args.in_numargs = 1;
48
+ args.in_args[0].size = sizeof(inarg);
49
+ args.in_args[0].value = &inarg;
50
+ args.out_numargs = 1;
51
+ args.out_args[0].size = sizeof(*outargp);
52
+ args.out_args[0].value = outargp;
4353
44
- return fuse_simple_request(fc, &args);
54
+ return fuse_simple_request(fm, &args);
4555 }
4656
47
-struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
57
+struct fuse_release_args {
58
+ struct fuse_args args;
59
+ struct fuse_release_in inarg;
60
+ struct inode *inode;
61
+};
62
+
63
+struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
4864 {
4965 struct fuse_file *ff;
5066
51
- ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL);
67
+ ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL_ACCOUNT);
5268 if (unlikely(!ff))
5369 return NULL;
5470
55
- ff->fc = fc;
56
- ff->reserved_req = fuse_request_alloc(0);
57
- if (unlikely(!ff->reserved_req)) {
71
+ ff->fm = fm;
72
+ ff->release_args = kzalloc(sizeof(*ff->release_args),
73
+ GFP_KERNEL_ACCOUNT);
74
+ if (!ff->release_args) {
5875 kfree(ff);
5976 return NULL;
6077 }
6178
6279 INIT_LIST_HEAD(&ff->write_entry);
80
+ mutex_init(&ff->readdir.lock);
6381 refcount_set(&ff->count, 1);
6482 RB_CLEAR_NODE(&ff->polled_node);
6583 init_waitqueue_head(&ff->poll_wait);
6684
67
- spin_lock(&fc->lock);
68
- ff->kh = ++fc->khctr;
69
- spin_unlock(&fc->lock);
85
+ ff->kh = atomic64_inc_return(&fm->fc->khctr);
7086
7187 return ff;
7288 }
7389
7490 void fuse_file_free(struct fuse_file *ff)
7591 {
76
- fuse_request_free(ff->reserved_req);
92
+ kfree(ff->release_args);
93
+ mutex_destroy(&ff->readdir.lock);
7794 kfree(ff);
7895 }
7996
....@@ -83,65 +100,67 @@
83100 return ff;
84101 }
85102
86
-static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
103
+static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
104
+ int error)
87105 {
88
- iput(req->misc.release.inode);
106
+ struct fuse_release_args *ra = container_of(args, typeof(*ra), args);
107
+
108
+ iput(ra->inode);
109
+ kfree(ra);
89110 }
90111
91112 static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
92113 {
93114 if (refcount_dec_and_test(&ff->count)) {
94
- struct fuse_req *req = ff->reserved_req;
115
+ struct fuse_args *args = &ff->release_args->args;
95116
96
- if (ff->fc->no_open && !isdir) {
97
- /*
98
- * Drop the release request when client does not
99
- * implement 'open'
100
- */
101
- __clear_bit(FR_BACKGROUND, &req->flags);
102
- iput(req->misc.release.inode);
103
- fuse_put_request(ff->fc, req);
117
+ if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) {
118
+ /* Do nothing when client does not implement 'open' */
119
+ fuse_release_end(ff->fm, args, 0);
104120 } else if (sync) {
105
- __set_bit(FR_FORCE, &req->flags);
106
- __clear_bit(FR_BACKGROUND, &req->flags);
107
- fuse_request_send(ff->fc, req);
108
- iput(req->misc.release.inode);
109
- fuse_put_request(ff->fc, req);
121
+ fuse_simple_request(ff->fm, args);
122
+ fuse_release_end(ff->fm, args, 0);
110123 } else {
111
- req->end = fuse_release_end;
112
- __set_bit(FR_BACKGROUND, &req->flags);
113
- fuse_request_send_background(ff->fc, req);
124
+ args->end = fuse_release_end;
125
+ if (fuse_simple_background(ff->fm, args,
126
+ GFP_KERNEL | __GFP_NOFAIL))
127
+ fuse_release_end(ff->fm, args, -ENOTCONN);
114128 }
115129 kfree(ff);
116130 }
117131 }
118132
119
-int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
133
+int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
120134 bool isdir)
121135 {
136
+ struct fuse_conn *fc = fm->fc;
122137 struct fuse_file *ff;
123138 int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
124139
125
- ff = fuse_file_alloc(fc);
140
+ ff = fuse_file_alloc(fm);
126141 if (!ff)
127142 return -ENOMEM;
128143
129144 ff->fh = 0;
130
- ff->open_flags = FOPEN_KEEP_CACHE; /* Default for no-open */
131
- if (!fc->no_open || isdir) {
145
+ /* Default for no-open */
146
+ ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0);
147
+ if (isdir ? !fc->no_opendir : !fc->no_open) {
132148 struct fuse_open_out outarg;
133149 int err;
134150
135
- err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
151
+ err = fuse_send_open(fm, nodeid, file, opcode, &outarg);
136152 if (!err) {
137153 ff->fh = outarg.fh;
138154 ff->open_flags = outarg.open_flags;
139
-
140
- } else if (err != -ENOSYS || isdir) {
155
+ fuse_passthrough_setup(fc, ff, &outarg);
156
+ } else if (err != -ENOSYS) {
141157 fuse_file_free(ff);
142158 return err;
143159 } else {
144
- fc->no_open = 1;
160
+ if (isdir)
161
+ fc->no_opendir = 1;
162
+ else
163
+ fc->no_open = 1;
145164 }
146165 }
147166
....@@ -158,17 +177,16 @@
158177 static void fuse_link_write_file(struct file *file)
159178 {
160179 struct inode *inode = file_inode(file);
161
- struct fuse_conn *fc = get_fuse_conn(inode);
162180 struct fuse_inode *fi = get_fuse_inode(inode);
163181 struct fuse_file *ff = file->private_data;
164182 /*
165183 * file may be written through mmap, so chain it onto the
166184 * inodes's write_file list
167185 */
168
- spin_lock(&fc->lock);
186
+ spin_lock(&fi->lock);
169187 if (list_empty(&ff->write_entry))
170188 list_add(&ff->write_entry, &fi->write_files);
171
- spin_unlock(&fc->lock);
189
+ spin_unlock(&fi->lock);
172190 }
173191
174192 void fuse_finish_open(struct inode *inode, struct file *file)
....@@ -176,8 +194,6 @@
176194 struct fuse_file *ff = file->private_data;
177195 struct fuse_conn *fc = get_fuse_conn(inode);
178196
179
- if (ff->open_flags & FOPEN_DIRECT_IO)
180
- file->f_op = &fuse_direct_io_file_operations;
181197 if (ff->open_flags & FOPEN_STREAM)
182198 stream_open(inode, file);
183199 else if (ff->open_flags & FOPEN_NONSEEKABLE)
....@@ -186,10 +202,10 @@
186202 if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
187203 struct fuse_inode *fi = get_fuse_inode(inode);
188204
189
- spin_lock(&fc->lock);
190
- fi->attr_version = ++fc->attr_version;
205
+ spin_lock(&fi->lock);
206
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
191207 i_size_write(inode, 0);
192
- spin_unlock(&fc->lock);
208
+ spin_unlock(&fi->lock);
193209 truncate_pagecache(inode, 0);
194210 fuse_invalidate_attr(inode);
195211 if (fc->writeback_cache)
....@@ -204,11 +220,14 @@
204220
205221 int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
206222 {
207
- struct fuse_conn *fc = get_fuse_conn(inode);
223
+ struct fuse_mount *fm = get_fuse_mount(inode);
224
+ struct fuse_conn *fc = fm->fc;
208225 int err;
209226 bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
210227 fc->atomic_o_trunc &&
211228 fc->writeback_cache;
229
+ bool dax_truncate = (file->f_flags & O_TRUNC) &&
230
+ fc->atomic_o_trunc && FUSE_IS_DAX(inode);
212231
213232 if (fuse_is_bad(inode))
214233 return -EIO;
....@@ -217,17 +236,27 @@
217236 if (err)
218237 return err;
219238
220
- if (is_wb_truncate) {
239
+ if (is_wb_truncate || dax_truncate) {
221240 inode_lock(inode);
222241 fuse_set_nowrite(inode);
223242 }
224243
225
- err = fuse_do_open(fc, get_node_id(inode), file, isdir);
244
+ if (dax_truncate) {
245
+ down_write(&get_fuse_inode(inode)->i_mmap_sem);
246
+ err = fuse_dax_break_layouts(inode, 0, 0);
247
+ if (err)
248
+ goto out;
249
+ }
226250
251
+ err = fuse_do_open(fm, get_node_id(inode), file, isdir);
227252 if (!err)
228253 fuse_finish_open(inode, file);
229254
230
- if (is_wb_truncate) {
255
+out:
256
+ if (dax_truncate)
257
+ up_write(&get_fuse_inode(inode)->i_mmap_sem);
258
+
259
+ if (is_wb_truncate | dax_truncate) {
231260 fuse_release_nowrite(inode);
232261 inode_unlock(inode);
233262 }
....@@ -235,45 +264,54 @@
235264 return err;
236265 }
237266
238
-static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
267
+static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
268
+ int flags, int opcode)
239269 {
240
- struct fuse_conn *fc = ff->fc;
241
- struct fuse_req *req = ff->reserved_req;
242
- struct fuse_release_in *inarg = &req->misc.release.in;
270
+ struct fuse_conn *fc = ff->fm->fc;
271
+ struct fuse_release_args *ra = ff->release_args;
243272
273
+ /* Inode is NULL on error path of fuse_create_open() */
274
+ if (likely(fi)) {
275
+ spin_lock(&fi->lock);
276
+ list_del(&ff->write_entry);
277
+ spin_unlock(&fi->lock);
278
+ }
244279 spin_lock(&fc->lock);
245
- list_del(&ff->write_entry);
246280 if (!RB_EMPTY_NODE(&ff->polled_node))
247281 rb_erase(&ff->polled_node, &fc->polled_files);
248282 spin_unlock(&fc->lock);
249283
250284 wake_up_interruptible_all(&ff->poll_wait);
251285
252
- inarg->fh = ff->fh;
253
- inarg->flags = flags;
254
- req->in.h.opcode = opcode;
255
- req->in.h.nodeid = ff->nodeid;
256
- req->in.numargs = 1;
257
- req->in.args[0].size = sizeof(struct fuse_release_in);
258
- req->in.args[0].value = inarg;
286
+ ra->inarg.fh = ff->fh;
287
+ ra->inarg.flags = flags;
288
+ ra->args.in_numargs = 1;
289
+ ra->args.in_args[0].size = sizeof(struct fuse_release_in);
290
+ ra->args.in_args[0].value = &ra->inarg;
291
+ ra->args.opcode = opcode;
292
+ ra->args.nodeid = ff->nodeid;
293
+ ra->args.force = true;
294
+ ra->args.nocreds = true;
259295 }
260296
261297 void fuse_release_common(struct file *file, bool isdir)
262298 {
299
+ struct fuse_inode *fi = get_fuse_inode(file_inode(file));
263300 struct fuse_file *ff = file->private_data;
264
- struct fuse_req *req = ff->reserved_req;
301
+ struct fuse_release_args *ra = ff->release_args;
265302 int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
266303
267
- fuse_prepare_release(ff, file->f_flags, opcode);
304
+ fuse_passthrough_release(&ff->passthrough);
305
+
306
+ fuse_prepare_release(fi, ff, file->f_flags, opcode);
268307
269308 if (ff->flock) {
270
- struct fuse_release_in *inarg = &req->misc.release.in;
271
- inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
272
- inarg->lock_owner = fuse_lock_owner_id(ff->fc,
273
- (fl_owner_t) file);
309
+ ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
310
+ ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc,
311
+ (fl_owner_t) file);
274312 }
275313 /* Hold inode until release is finished */
276
- req->misc.release.inode = igrab(file_inode(file));
314
+ ra->inode = igrab(file_inode(file));
277315
278316 /*
279317 * Normally this will send the RELEASE request, however if
....@@ -284,7 +322,7 @@
284322 * synchronous RELEASE is allowed (and desirable) in this case
285323 * because the server can be trusted not to screw up.
286324 */
287
- fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir);
325
+ fuse_file_put(ff, ff->fm->fc->destroy, isdir);
288326 }
289327
290328 static int fuse_open(struct inode *inode, struct file *file)
....@@ -306,10 +344,10 @@
306344 return 0;
307345 }
308346
309
-void fuse_sync_release(struct fuse_file *ff, int flags)
347
+void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, int flags)
310348 {
311349 WARN_ON(refcount_read(&ff->count) > 1);
312
- fuse_prepare_release(ff, flags, FUSE_RELEASE);
350
+ fuse_prepare_release(fi, ff, flags, FUSE_RELEASE);
313351 /*
314352 * iput(NULL) is a no-op and since the refcount is 1 and everything's
315353 * synchronous, we are fine with not doing igrab() here"
....@@ -340,6 +378,38 @@
340378 return (u64) v0 + ((u64) v1 << 32);
341379 }
342380
381
+struct fuse_writepage_args {
382
+ struct fuse_io_args ia;
383
+ struct rb_node writepages_entry;
384
+ struct list_head queue_entry;
385
+ struct fuse_writepage_args *next;
386
+ struct inode *inode;
387
+};
388
+
389
+static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
390
+ pgoff_t idx_from, pgoff_t idx_to)
391
+{
392
+ struct rb_node *n;
393
+
394
+ n = fi->writepages.rb_node;
395
+
396
+ while (n) {
397
+ struct fuse_writepage_args *wpa;
398
+ pgoff_t curr_index;
399
+
400
+ wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
401
+ WARN_ON(get_fuse_inode(wpa->inode) != fi);
402
+ curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
403
+ if (idx_from >= curr_index + wpa->ia.ap.num_pages)
404
+ n = n->rb_right;
405
+ else if (idx_to < curr_index)
406
+ n = n->rb_left;
407
+ else
408
+ return wpa;
409
+ }
410
+ return NULL;
411
+}
412
+
343413 /*
344414 * Check if any page in a range is under writeback
345415 *
....@@ -349,24 +419,12 @@
349419 static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
350420 pgoff_t idx_to)
351421 {
352
- struct fuse_conn *fc = get_fuse_conn(inode);
353422 struct fuse_inode *fi = get_fuse_inode(inode);
354
- struct fuse_req *req;
355
- bool found = false;
423
+ bool found;
356424
357
- spin_lock(&fc->lock);
358
- list_for_each_entry(req, &fi->writepages, writepages_entry) {
359
- pgoff_t curr_index;
360
-
361
- BUG_ON(req->inode != inode);
362
- curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
363
- if (idx_from < curr_index + req->num_pages &&
364
- curr_index <= idx_to) {
365
- found = true;
366
- break;
367
- }
368
- }
369
- spin_unlock(&fc->lock);
425
+ spin_lock(&fi->lock);
426
+ found = fuse_find_writeback(fi, idx_from, idx_to);
427
+ spin_unlock(&fi->lock);
370428
371429 return found;
372430 }
....@@ -382,12 +440,11 @@
382440 * Since fuse doesn't rely on the VM writeback tracking, this has to
383441 * use some other means.
384442 */
385
-static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
443
+static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
386444 {
387445 struct fuse_inode *fi = get_fuse_inode(inode);
388446
389447 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
390
- return 0;
391448 }
392449
393450 /*
....@@ -408,17 +465,14 @@
408465 static int fuse_flush(struct file *file, fl_owner_t id)
409466 {
410467 struct inode *inode = file_inode(file);
411
- struct fuse_conn *fc = get_fuse_conn(inode);
468
+ struct fuse_mount *fm = get_fuse_mount(inode);
412469 struct fuse_file *ff = file->private_data;
413
- struct fuse_req *req;
414470 struct fuse_flush_in inarg;
471
+ FUSE_ARGS(args);
415472 int err;
416473
417474 if (fuse_is_bad(inode))
418475 return -EIO;
419
-
420
- if (fc->no_flush)
421
- return 0;
422476
423477 err = write_inode_now(inode, 1);
424478 if (err)
....@@ -432,34 +486,61 @@
432486 if (err)
433487 return err;
434488
435
- req = fuse_get_req_nofail_nopages(fc, file);
489
+ err = 0;
490
+ if (fm->fc->no_flush)
491
+ goto inval_attr_out;
492
+
436493 memset(&inarg, 0, sizeof(inarg));
437494 inarg.fh = ff->fh;
438
- inarg.lock_owner = fuse_lock_owner_id(fc, id);
439
- req->in.h.opcode = FUSE_FLUSH;
440
- req->in.h.nodeid = get_node_id(inode);
441
- req->in.numargs = 1;
442
- req->in.args[0].size = sizeof(inarg);
443
- req->in.args[0].value = &inarg;
444
- __set_bit(FR_FORCE, &req->flags);
445
- fuse_request_send(fc, req);
446
- err = req->out.h.error;
447
- fuse_put_request(fc, req);
495
+ inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
496
+ args.opcode = FUSE_FLUSH;
497
+ args.nodeid = get_node_id(inode);
498
+ args.in_numargs = 1;
499
+ args.in_args[0].size = sizeof(inarg);
500
+ args.in_args[0].value = &inarg;
501
+ args.force = true;
502
+
503
+ err = fuse_simple_request(fm, &args);
448504 if (err == -ENOSYS) {
449
- fc->no_flush = 1;
505
+ fm->fc->no_flush = 1;
450506 err = 0;
451507 }
508
+
509
+inval_attr_out:
510
+ /*
511
+ * In memory i_blocks is not maintained by fuse, if writeback cache is
512
+ * enabled, i_blocks from cached attr may not be accurate.
513
+ */
514
+ if (!err && fm->fc->writeback_cache)
515
+ fuse_invalidate_attr(inode);
452516 return err;
453517 }
454518
455519 int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
456
- int datasync, int isdir)
520
+ int datasync, int opcode)
457521 {
458522 struct inode *inode = file->f_mapping->host;
459
- struct fuse_conn *fc = get_fuse_conn(inode);
523
+ struct fuse_mount *fm = get_fuse_mount(inode);
460524 struct fuse_file *ff = file->private_data;
461525 FUSE_ARGS(args);
462526 struct fuse_fsync_in inarg;
527
+
528
+ memset(&inarg, 0, sizeof(inarg));
529
+ inarg.fh = ff->fh;
530
+ inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0;
531
+ args.opcode = opcode;
532
+ args.nodeid = get_node_id(inode);
533
+ args.in_numargs = 1;
534
+ args.in_args[0].size = sizeof(inarg);
535
+ args.in_args[0].value = &inarg;
536
+ return fuse_simple_request(fm, &args);
537
+}
538
+
539
+static int fuse_fsync(struct file *file, loff_t start, loff_t end,
540
+ int datasync)
541
+{
542
+ struct inode *inode = file->f_mapping->host;
543
+ struct fuse_conn *fc = get_fuse_conn(inode);
463544 int err;
464545
465546 if (fuse_is_bad(inode))
....@@ -491,65 +572,49 @@
491572 if (err)
492573 goto out;
493574
494
- if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
575
+ if (fc->no_fsync)
495576 goto out;
496577
497
- memset(&inarg, 0, sizeof(inarg));
498
- inarg.fh = ff->fh;
499
- inarg.fsync_flags = datasync ? 1 : 0;
500
- args.in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
501
- args.in.h.nodeid = get_node_id(inode);
502
- args.in.numargs = 1;
503
- args.in.args[0].size = sizeof(inarg);
504
- args.in.args[0].value = &inarg;
505
- err = fuse_simple_request(fc, &args);
578
+ err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNC);
506579 if (err == -ENOSYS) {
507
- if (isdir)
508
- fc->no_fsyncdir = 1;
509
- else
510
- fc->no_fsync = 1;
580
+ fc->no_fsync = 1;
511581 err = 0;
512582 }
513583 out:
514584 inode_unlock(inode);
585
+
515586 return err;
516587 }
517588
518
-static int fuse_fsync(struct file *file, loff_t start, loff_t end,
519
- int datasync)
589
+void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
590
+ size_t count, int opcode)
520591 {
521
- return fuse_fsync_common(file, start, end, datasync, 0);
522
-}
523
-
524
-void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
525
- size_t count, int opcode)
526
-{
527
- struct fuse_read_in *inarg = &req->misc.read.in;
528592 struct fuse_file *ff = file->private_data;
593
+ struct fuse_args *args = &ia->ap.args;
529594
530
- inarg->fh = ff->fh;
531
- inarg->offset = pos;
532
- inarg->size = count;
533
- inarg->flags = file->f_flags;
534
- req->in.h.opcode = opcode;
535
- req->in.h.nodeid = ff->nodeid;
536
- req->in.numargs = 1;
537
- req->in.args[0].size = sizeof(struct fuse_read_in);
538
- req->in.args[0].value = inarg;
539
- req->out.argvar = 1;
540
- req->out.numargs = 1;
541
- req->out.args[0].size = count;
595
+ ia->read.in.fh = ff->fh;
596
+ ia->read.in.offset = pos;
597
+ ia->read.in.size = count;
598
+ ia->read.in.flags = file->f_flags;
599
+ args->opcode = opcode;
600
+ args->nodeid = ff->nodeid;
601
+ args->in_numargs = 1;
602
+ args->in_args[0].size = sizeof(ia->read.in);
603
+ args->in_args[0].value = &ia->read.in;
604
+ args->out_argvar = true;
605
+ args->out_numargs = 1;
606
+ args->out_args[0].size = count;
542607 }
543608
544
-static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty)
609
+static void fuse_release_user_pages(struct fuse_args_pages *ap,
610
+ bool should_dirty)
545611 {
546
- unsigned i;
612
+ unsigned int i;
547613
548
- for (i = 0; i < req->num_pages; i++) {
549
- struct page *page = req->pages[i];
614
+ for (i = 0; i < ap->num_pages; i++) {
550615 if (should_dirty)
551
- set_page_dirty_lock(page);
552
- put_page(page);
616
+ set_page_dirty_lock(ap->pages[i]);
617
+ put_page(ap->pages[i]);
553618 }
554619 }
555620
....@@ -608,9 +673,9 @@
608673 struct fuse_conn *fc = get_fuse_conn(inode);
609674 struct fuse_inode *fi = get_fuse_inode(inode);
610675
611
- spin_lock(&fc->lock);
612
- fi->attr_version = ++fc->attr_version;
613
- spin_unlock(&fc->lock);
676
+ spin_lock(&fi->lock);
677
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
678
+ spin_unlock(&fi->lock);
614679 }
615680
616681 io->iocb->ki_complete(io->iocb, res, 0);
....@@ -619,64 +684,97 @@
619684 kref_put(&io->refcnt, fuse_io_release);
620685 }
621686
622
-static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
687
+static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
688
+ unsigned int npages)
623689 {
624
- struct fuse_io_priv *io = req->io;
625
- ssize_t pos = -1;
690
+ struct fuse_io_args *ia;
626691
627
- fuse_release_user_pages(req, io->should_dirty);
628
-
629
- if (io->write) {
630
- if (req->misc.write.in.size != req->misc.write.out.size)
631
- pos = req->misc.write.in.offset - io->offset +
632
- req->misc.write.out.size;
633
- } else {
634
- if (req->misc.read.in.size != req->out.args[0].size)
635
- pos = req->misc.read.in.offset - io->offset +
636
- req->out.args[0].size;
692
+ ia = kzalloc(sizeof(*ia), GFP_KERNEL);
693
+ if (ia) {
694
+ ia->io = io;
695
+ ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL,
696
+ &ia->ap.descs);
697
+ if (!ia->ap.pages) {
698
+ kfree(ia);
699
+ ia = NULL;
700
+ }
637701 }
638
-
639
- fuse_aio_complete(io, req->out.h.error, pos);
702
+ return ia;
640703 }
641704
642
-static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
643
- size_t num_bytes, struct fuse_io_priv *io)
705
+static void fuse_io_free(struct fuse_io_args *ia)
644706 {
707
+ kfree(ia->ap.pages);
708
+ kfree(ia);
709
+}
710
+
711
+static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args,
712
+ int err)
713
+{
714
+ struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
715
+ struct fuse_io_priv *io = ia->io;
716
+ ssize_t pos = -1;
717
+
718
+ fuse_release_user_pages(&ia->ap, io->should_dirty);
719
+
720
+ if (err) {
721
+ /* Nothing */
722
+ } else if (io->write) {
723
+ if (ia->write.out.size > ia->write.in.size) {
724
+ err = -EIO;
725
+ } else if (ia->write.in.size != ia->write.out.size) {
726
+ pos = ia->write.in.offset - io->offset +
727
+ ia->write.out.size;
728
+ }
729
+ } else {
730
+ u32 outsize = args->out_args[0].size;
731
+
732
+ if (ia->read.in.size != outsize)
733
+ pos = ia->read.in.offset - io->offset + outsize;
734
+ }
735
+
736
+ fuse_aio_complete(io, err, pos);
737
+ fuse_io_free(ia);
738
+}
739
+
740
+static ssize_t fuse_async_req_send(struct fuse_mount *fm,
741
+ struct fuse_io_args *ia, size_t num_bytes)
742
+{
743
+ ssize_t err;
744
+ struct fuse_io_priv *io = ia->io;
745
+
645746 spin_lock(&io->lock);
646747 kref_get(&io->refcnt);
647748 io->size += num_bytes;
648749 io->reqs++;
649750 spin_unlock(&io->lock);
650751
651
- req->io = io;
652
- req->end = fuse_aio_complete_req;
653
-
654
- __fuse_get_request(req);
655
- fuse_request_send_background(fc, req);
752
+ ia->ap.args.end = fuse_aio_complete_req;
753
+ ia->ap.args.may_block = io->should_dirty;
754
+ err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL);
755
+ if (err)
756
+ fuse_aio_complete_req(fm, &ia->ap.args, err);
656757
657758 return num_bytes;
658759 }
659760
660
-static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io,
661
- loff_t pos, size_t count, fl_owner_t owner)
761
+static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count,
762
+ fl_owner_t owner)
662763 {
663
- struct file *file = io->iocb->ki_filp;
764
+ struct file *file = ia->io->iocb->ki_filp;
664765 struct fuse_file *ff = file->private_data;
665
- struct fuse_conn *fc = ff->fc;
766
+ struct fuse_mount *fm = ff->fm;
666767
667
- fuse_read_fill(req, file, pos, count, FUSE_READ);
768
+ fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
668769 if (owner != NULL) {
669
- struct fuse_read_in *inarg = &req->misc.read.in;
670
-
671
- inarg->read_flags |= FUSE_READ_LOCKOWNER;
672
- inarg->lock_owner = fuse_lock_owner_id(fc, owner);
770
+ ia->read.in.read_flags |= FUSE_READ_LOCKOWNER;
771
+ ia->read.in.lock_owner = fuse_lock_owner_id(fm->fc, owner);
673772 }
674773
675
- if (io->async)
676
- return fuse_async_req_send(fc, req, count, io);
774
+ if (ia->io->async)
775
+ return fuse_async_req_send(fm, ia, count);
677776
678
- fuse_request_send(fc, req);
679
- return req->out.args[0].size;
777
+ return fuse_simple_request(fm, &ia->ap.args);
680778 }
681779
682780 static void fuse_read_update_size(struct inode *inode, loff_t size,
....@@ -685,19 +783,18 @@
685783 struct fuse_conn *fc = get_fuse_conn(inode);
686784 struct fuse_inode *fi = get_fuse_inode(inode);
687785
688
- spin_lock(&fc->lock);
786
+ spin_lock(&fi->lock);
689787 if (attr_ver == fi->attr_version && size < inode->i_size &&
690788 !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
691
- fi->attr_version = ++fc->attr_version;
789
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
692790 i_size_write(inode, size);
693791 }
694
- spin_unlock(&fc->lock);
792
+ spin_unlock(&fi->lock);
695793 }
696794
697
-static void fuse_short_read(struct fuse_req *req, struct inode *inode,
698
- u64 attr_ver)
795
+static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
796
+ struct fuse_args_pages *ap)
699797 {
700
- size_t num_read = req->out.args[0].size;
701798 struct fuse_conn *fc = get_fuse_conn(inode);
702799
703800 if (fc->writeback_cache) {
....@@ -710,28 +807,31 @@
710807 int start_idx = num_read >> PAGE_SHIFT;
711808 size_t off = num_read & (PAGE_SIZE - 1);
712809
713
- for (i = start_idx; i < req->num_pages; i++) {
714
- zero_user_segment(req->pages[i], off, PAGE_SIZE);
810
+ for (i = start_idx; i < ap->num_pages; i++) {
811
+ zero_user_segment(ap->pages[i], off, PAGE_SIZE);
715812 off = 0;
716813 }
717814 } else {
718
- loff_t pos = page_offset(req->pages[0]) + num_read;
815
+ loff_t pos = page_offset(ap->pages[0]) + num_read;
719816 fuse_read_update_size(inode, pos, attr_ver);
720817 }
721818 }
722819
723820 static int fuse_do_readpage(struct file *file, struct page *page)
724821 {
725
- struct kiocb iocb;
726
- struct fuse_io_priv io;
727822 struct inode *inode = page->mapping->host;
728
- struct fuse_conn *fc = get_fuse_conn(inode);
729
- struct fuse_req *req;
730
- size_t num_read;
823
+ struct fuse_mount *fm = get_fuse_mount(inode);
731824 loff_t pos = page_offset(page);
732
- size_t count = PAGE_SIZE;
825
+ struct fuse_page_desc desc = { .length = PAGE_SIZE };
826
+ struct fuse_io_args ia = {
827
+ .ap.args.page_zeroing = true,
828
+ .ap.args.out_pages = true,
829
+ .ap.num_pages = 1,
830
+ .ap.pages = &page,
831
+ .ap.descs = &desc,
832
+ };
833
+ ssize_t res;
733834 u64 attr_ver;
734
- int err;
735835
736836 /*
737837 * Page writeback can extend beyond the lifetime of the
....@@ -740,35 +840,25 @@
740840 */
741841 fuse_wait_on_page_writeback(inode, page->index);
742842
743
- req = fuse_get_req(fc, 1);
744
- if (IS_ERR(req))
745
- return PTR_ERR(req);
843
+ attr_ver = fuse_get_attr_version(fm->fc);
746844
747
- attr_ver = fuse_get_attr_version(fc);
845
+ /* Don't overflow end offset */
846
+ if (pos + (desc.length - 1) == LLONG_MAX)
847
+ desc.length--;
748848
749
- req->out.page_zeroing = 1;
750
- req->out.argpages = 1;
751
- req->num_pages = 1;
752
- req->pages[0] = page;
753
- req->page_descs[0].length = count;
754
- init_sync_kiocb(&iocb, file);
755
- io = (struct fuse_io_priv) FUSE_IO_PRIV_SYNC(&iocb);
756
- num_read = fuse_send_read(req, &io, pos, count, NULL);
757
- err = req->out.h.error;
849
+ fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ);
850
+ res = fuse_simple_request(fm, &ia.ap.args);
851
+ if (res < 0)
852
+ return res;
853
+ /*
854
+ * Short read means EOF. If file size is larger, truncate it
855
+ */
856
+ if (res < desc.length)
857
+ fuse_short_read(inode, attr_ver, res, &ia.ap);
758858
759
- if (!err) {
760
- /*
761
- * Short read means EOF. If file size is larger, truncate it
762
- */
763
- if (num_read < count)
764
- fuse_short_read(req, inode, attr_ver);
859
+ SetPageUptodate(page);
765860
766
- SetPageUptodate(page);
767
- }
768
-
769
- fuse_put_request(fc, req);
770
-
771
- return err;
861
+ return 0;
772862 }
773863
774864 static int fuse_readpage(struct file *file, struct page *page)
....@@ -787,15 +877,18 @@
787877 return err;
788878 }
789879
790
-static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
880
+static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
881
+ int err)
791882 {
792883 int i;
793
- size_t count = req->misc.read.in.size;
794
- size_t num_read = req->out.args[0].size;
884
+ struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
885
+ struct fuse_args_pages *ap = &ia->ap;
886
+ size_t count = ia->read.in.size;
887
+ size_t num_read = args->out_args[0].size;
795888 struct address_space *mapping = NULL;
796889
797
- for (i = 0; mapping == NULL && i < req->num_pages; i++)
798
- mapping = req->pages[i]->mapping;
890
+ for (i = 0; mapping == NULL && i < ap->num_pages; i++)
891
+ mapping = ap->pages[i]->mapping;
799892
800893 if (mapping) {
801894 struct inode *inode = mapping->host;
....@@ -803,139 +896,104 @@
803896 /*
804897 * Short read means EOF. If file size is larger, truncate it
805898 */
806
- if (!req->out.h.error && num_read < count)
807
- fuse_short_read(req, inode, req->misc.read.attr_ver);
899
+ if (!err && num_read < count)
900
+ fuse_short_read(inode, ia->read.attr_ver, num_read, ap);
808901
809902 fuse_invalidate_atime(inode);
810903 }
811904
812
- for (i = 0; i < req->num_pages; i++) {
813
- struct page *page = req->pages[i];
814
- if (!req->out.h.error)
905
+ for (i = 0; i < ap->num_pages; i++) {
906
+ struct page *page = ap->pages[i];
907
+
908
+ if (!err)
815909 SetPageUptodate(page);
816910 else
817911 SetPageError(page);
818912 unlock_page(page);
819913 put_page(page);
820914 }
821
- if (req->ff)
822
- fuse_file_put(req->ff, false, false);
915
+ if (ia->ff)
916
+ fuse_file_put(ia->ff, false, false);
917
+
918
+ fuse_io_free(ia);
823919 }
824920
825
-static void fuse_send_readpages(struct fuse_req *req, struct file *file)
921
+static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
826922 {
827923 struct fuse_file *ff = file->private_data;
828
- struct fuse_conn *fc = ff->fc;
829
- loff_t pos = page_offset(req->pages[0]);
830
- size_t count = req->num_pages << PAGE_SHIFT;
831
-
832
- req->out.argpages = 1;
833
- req->out.page_zeroing = 1;
834
- req->out.page_replace = 1;
835
- fuse_read_fill(req, file, pos, count, FUSE_READ);
836
- req->misc.read.attr_ver = fuse_get_attr_version(fc);
837
- if (fc->async_read) {
838
- req->ff = fuse_file_get(ff);
839
- req->end = fuse_readpages_end;
840
- fuse_request_send_background(fc, req);
841
- } else {
842
- fuse_request_send(fc, req);
843
- fuse_readpages_end(fc, req);
844
- fuse_put_request(fc, req);
845
- }
846
-}
847
-
848
-struct fuse_fill_data {
849
- struct fuse_req *req;
850
- struct file *file;
851
- struct inode *inode;
852
- unsigned nr_pages;
853
-};
854
-
855
-static int fuse_readpages_fill(void *_data, struct page *page)
856
-{
857
- struct fuse_fill_data *data = _data;
858
- struct fuse_req *req = data->req;
859
- struct inode *inode = data->inode;
860
- struct fuse_conn *fc = get_fuse_conn(inode);
861
-
862
- fuse_wait_on_page_writeback(inode, page->index);
863
-
864
- if (req->num_pages &&
865
- (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
866
- (req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
867
- req->pages[req->num_pages - 1]->index + 1 != page->index)) {
868
- int nr_alloc = min_t(unsigned, data->nr_pages,
869
- FUSE_MAX_PAGES_PER_REQ);
870
- fuse_send_readpages(req, data->file);
871
- if (fc->async_read)
872
- req = fuse_get_req_for_background(fc, nr_alloc);
873
- else
874
- req = fuse_get_req(fc, nr_alloc);
875
-
876
- data->req = req;
877
- if (IS_ERR(req)) {
878
- unlock_page(page);
879
- return PTR_ERR(req);
880
- }
881
- }
882
-
883
- if (WARN_ON(req->num_pages >= req->max_pages)) {
884
- unlock_page(page);
885
- fuse_put_request(fc, req);
886
- return -EIO;
887
- }
888
-
889
- get_page(page);
890
- req->pages[req->num_pages] = page;
891
- req->page_descs[req->num_pages].length = PAGE_SIZE;
892
- req->num_pages++;
893
- data->nr_pages--;
894
- return 0;
895
-}
896
-
897
-static int fuse_readpages(struct file *file, struct address_space *mapping,
898
- struct list_head *pages, unsigned nr_pages)
899
-{
900
- struct inode *inode = mapping->host;
901
- struct fuse_conn *fc = get_fuse_conn(inode);
902
- struct fuse_fill_data data;
924
+ struct fuse_mount *fm = ff->fm;
925
+ struct fuse_args_pages *ap = &ia->ap;
926
+ loff_t pos = page_offset(ap->pages[0]);
927
+ size_t count = ap->num_pages << PAGE_SHIFT;
928
+ ssize_t res;
903929 int err;
904
- int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
905930
906
- err = -EIO;
907
- if (fuse_is_bad(inode))
908
- goto out;
931
+ ap->args.out_pages = true;
932
+ ap->args.page_zeroing = true;
933
+ ap->args.page_replace = true;
909934
910
- data.file = file;
911
- data.inode = inode;
912
- if (fc->async_read)
913
- data.req = fuse_get_req_for_background(fc, nr_alloc);
914
- else
915
- data.req = fuse_get_req(fc, nr_alloc);
916
- data.nr_pages = nr_pages;
917
- err = PTR_ERR(data.req);
918
- if (IS_ERR(data.req))
919
- goto out;
920
-
921
- err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
922
- if (!err) {
923
- if (data.req->num_pages)
924
- fuse_send_readpages(data.req, file);
925
- else
926
- fuse_put_request(fc, data.req);
935
+ /* Don't overflow end offset */
936
+ if (pos + (count - 1) == LLONG_MAX) {
937
+ count--;
938
+ ap->descs[ap->num_pages - 1].length--;
927939 }
928
-out:
929
- return err;
940
+ WARN_ON((loff_t) (pos + count) < 0);
941
+
942
+ fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
943
+ ia->read.attr_ver = fuse_get_attr_version(fm->fc);
944
+ if (fm->fc->async_read) {
945
+ ia->ff = fuse_file_get(ff);
946
+ ap->args.end = fuse_readpages_end;
947
+ err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
948
+ if (!err)
949
+ return;
950
+ } else {
951
+ res = fuse_simple_request(fm, &ap->args);
952
+ err = res < 0 ? res : 0;
953
+ }
954
+ fuse_readpages_end(fm, &ap->args, err);
930955 }
931956
932
-static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
957
+static void fuse_readahead(struct readahead_control *rac)
958
+{
959
+ struct inode *inode = rac->mapping->host;
960
+ struct fuse_conn *fc = get_fuse_conn(inode);
961
+ unsigned int i, max_pages, nr_pages = 0;
962
+
963
+ if (fuse_is_bad(inode))
964
+ return;
965
+
966
+ max_pages = min_t(unsigned int, fc->max_pages,
967
+ fc->max_read / PAGE_SIZE);
968
+
969
+ for (;;) {
970
+ struct fuse_io_args *ia;
971
+ struct fuse_args_pages *ap;
972
+
973
+ nr_pages = readahead_count(rac) - nr_pages;
974
+ if (nr_pages > max_pages)
975
+ nr_pages = max_pages;
976
+ if (nr_pages == 0)
977
+ break;
978
+ ia = fuse_io_alloc(NULL, nr_pages);
979
+ if (!ia)
980
+ return;
981
+ ap = &ia->ap;
982
+ nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
983
+ for (i = 0; i < nr_pages; i++) {
984
+ fuse_wait_on_page_writeback(inode,
985
+ readahead_index(rac) + i);
986
+ ap->descs[i].length = PAGE_SIZE;
987
+ }
988
+ ap->num_pages = nr_pages;
989
+ fuse_send_readpages(ia, rac->file);
990
+ }
991
+}
992
+
993
+static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
933994 {
934995 struct inode *inode = iocb->ki_filp->f_mapping->host;
935996 struct fuse_conn *fc = get_fuse_conn(inode);
936
-
937
- if (fuse_is_bad(inode))
938
- return -EIO;
939997
940998 /*
941999 * In auto invalidate mode, always update attributes on read.
....@@ -953,54 +1011,65 @@
9531011 return generic_file_read_iter(iocb, to);
9541012 }
9551013
956
-static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
957
- loff_t pos, size_t count)
1014
+static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
1015
+ loff_t pos, size_t count)
9581016 {
959
- struct fuse_write_in *inarg = &req->misc.write.in;
960
- struct fuse_write_out *outarg = &req->misc.write.out;
1017
+ struct fuse_args *args = &ia->ap.args;
9611018
962
- inarg->fh = ff->fh;
963
- inarg->offset = pos;
964
- inarg->size = count;
965
- req->in.h.opcode = FUSE_WRITE;
966
- req->in.h.nodeid = ff->nodeid;
967
- req->in.numargs = 2;
968
- if (ff->fc->minor < 9)
969
- req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
1019
+ ia->write.in.fh = ff->fh;
1020
+ ia->write.in.offset = pos;
1021
+ ia->write.in.size = count;
1022
+ args->opcode = FUSE_WRITE;
1023
+ args->nodeid = ff->nodeid;
1024
+ args->in_numargs = 2;
1025
+ if (ff->fm->fc->minor < 9)
1026
+ args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
9701027 else
971
- req->in.args[0].size = sizeof(struct fuse_write_in);
972
- req->in.args[0].value = inarg;
973
- req->in.args[1].size = count;
974
- req->out.numargs = 1;
975
- req->out.args[0].size = sizeof(struct fuse_write_out);
976
- req->out.args[0].value = outarg;
1028
+ args->in_args[0].size = sizeof(ia->write.in);
1029
+ args->in_args[0].value = &ia->write.in;
1030
+ args->in_args[1].size = count;
1031
+ args->out_numargs = 1;
1032
+ args->out_args[0].size = sizeof(ia->write.out);
1033
+ args->out_args[0].value = &ia->write.out;
9771034 }
9781035
979
-static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
980
- loff_t pos, size_t count, fl_owner_t owner)
1036
+static unsigned int fuse_write_flags(struct kiocb *iocb)
9811037 {
982
- struct kiocb *iocb = io->iocb;
1038
+ unsigned int flags = iocb->ki_filp->f_flags;
1039
+
1040
+ if (iocb->ki_flags & IOCB_DSYNC)
1041
+ flags |= O_DSYNC;
1042
+ if (iocb->ki_flags & IOCB_SYNC)
1043
+ flags |= O_SYNC;
1044
+
1045
+ return flags;
1046
+}
1047
+
1048
+static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
1049
+ size_t count, fl_owner_t owner)
1050
+{
1051
+ struct kiocb *iocb = ia->io->iocb;
9831052 struct file *file = iocb->ki_filp;
9841053 struct fuse_file *ff = file->private_data;
985
- struct fuse_conn *fc = ff->fc;
986
- struct fuse_write_in *inarg = &req->misc.write.in;
1054
+ struct fuse_mount *fm = ff->fm;
1055
+ struct fuse_write_in *inarg = &ia->write.in;
1056
+ ssize_t err;
9871057
988
- fuse_write_fill(req, ff, pos, count);
989
- inarg->flags = file->f_flags;
990
- if (iocb->ki_flags & IOCB_DSYNC)
991
- inarg->flags |= O_DSYNC;
992
- if (iocb->ki_flags & IOCB_SYNC)
993
- inarg->flags |= O_SYNC;
1058
+ fuse_write_args_fill(ia, ff, pos, count);
1059
+ inarg->flags = fuse_write_flags(iocb);
9941060 if (owner != NULL) {
9951061 inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
996
- inarg->lock_owner = fuse_lock_owner_id(fc, owner);
1062
+ inarg->lock_owner = fuse_lock_owner_id(fm->fc, owner);
9971063 }
9981064
999
- if (io->async)
1000
- return fuse_async_req_send(fc, req, count, io);
1065
+ if (ia->io->async)
1066
+ return fuse_async_req_send(fm, ia, count);
10011067
1002
- fuse_request_send(fc, req);
1003
- return req->misc.write.out.size;
1068
+ err = fuse_simple_request(fm, &ia->ap.args);
1069
+ if (!err && ia->write.out.size > count)
1070
+ err = -EIO;
1071
+
1072
+ return err ?: ia->write.out.size;
10041073 }
10051074
10061075 bool fuse_write_update_size(struct inode *inode, loff_t pos)
....@@ -1009,63 +1078,78 @@
10091078 struct fuse_inode *fi = get_fuse_inode(inode);
10101079 bool ret = false;
10111080
1012
- spin_lock(&fc->lock);
1013
- fi->attr_version = ++fc->attr_version;
1081
+ spin_lock(&fi->lock);
1082
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
10141083 if (pos > inode->i_size) {
10151084 i_size_write(inode, pos);
10161085 ret = true;
10171086 }
1018
- spin_unlock(&fc->lock);
1087
+ spin_unlock(&fi->lock);
10191088
10201089 return ret;
10211090 }
10221091
1023
-static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb,
1024
- struct inode *inode, loff_t pos,
1025
- size_t count)
1092
+static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
1093
+ struct kiocb *iocb, struct inode *inode,
1094
+ loff_t pos, size_t count)
10261095 {
1027
- size_t res;
1028
- unsigned offset;
1029
- unsigned i;
1030
- struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
1096
+ struct fuse_args_pages *ap = &ia->ap;
1097
+ struct file *file = iocb->ki_filp;
1098
+ struct fuse_file *ff = file->private_data;
1099
+ struct fuse_mount *fm = ff->fm;
1100
+ unsigned int offset, i;
1101
+ bool short_write;
1102
+ int err;
10311103
1032
- for (i = 0; i < req->num_pages; i++)
1033
- fuse_wait_on_page_writeback(inode, req->pages[i]->index);
1104
+ for (i = 0; i < ap->num_pages; i++)
1105
+ fuse_wait_on_page_writeback(inode, ap->pages[i]->index);
10341106
1035
- res = fuse_send_write(req, &io, pos, count, NULL);
1107
+ fuse_write_args_fill(ia, ff, pos, count);
1108
+ ia->write.in.flags = fuse_write_flags(iocb);
10361109
1037
- offset = req->page_descs[0].offset;
1038
- count = res;
1039
- for (i = 0; i < req->num_pages; i++) {
1040
- struct page *page = req->pages[i];
1110
+ err = fuse_simple_request(fm, &ap->args);
1111
+ if (!err && ia->write.out.size > count)
1112
+ err = -EIO;
10411113
1042
- if (!req->out.h.error && !offset && count >= PAGE_SIZE)
1043
- SetPageUptodate(page);
1114
+ short_write = ia->write.out.size < count;
1115
+ offset = ap->descs[0].offset;
1116
+ count = ia->write.out.size;
1117
+ for (i = 0; i < ap->num_pages; i++) {
1118
+ struct page *page = ap->pages[i];
10441119
1045
- if (count > PAGE_SIZE - offset)
1046
- count -= PAGE_SIZE - offset;
1047
- else
1048
- count = 0;
1049
- offset = 0;
1050
-
1051
- unlock_page(page);
1120
+ if (err) {
1121
+ ClearPageUptodate(page);
1122
+ } else {
1123
+ if (count >= PAGE_SIZE - offset)
1124
+ count -= PAGE_SIZE - offset;
1125
+ else {
1126
+ if (short_write)
1127
+ ClearPageUptodate(page);
1128
+ count = 0;
1129
+ }
1130
+ offset = 0;
1131
+ }
1132
+ if (ia->write.page_locked && (i == ap->num_pages - 1))
1133
+ unlock_page(page);
10521134 put_page(page);
10531135 }
10541136
1055
- return res;
1137
+ return err;
10561138 }
10571139
1058
-static ssize_t fuse_fill_write_pages(struct fuse_req *req,
1059
- struct address_space *mapping,
1060
- struct iov_iter *ii, loff_t pos)
1140
+static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
1141
+ struct address_space *mapping,
1142
+ struct iov_iter *ii, loff_t pos,
1143
+ unsigned int max_pages)
10611144 {
1145
+ struct fuse_args_pages *ap = &ia->ap;
10621146 struct fuse_conn *fc = get_fuse_conn(mapping->host);
10631147 unsigned offset = pos & (PAGE_SIZE - 1);
10641148 size_t count = 0;
10651149 int err;
10661150
1067
- req->in.argpages = 1;
1068
- req->page_descs[0].offset = offset;
1151
+ ap->args.in_pages = true;
1152
+ ap->descs[0].offset = offset;
10691153
10701154 do {
10711155 size_t tmp;
....@@ -1101,9 +1185,9 @@
11011185 }
11021186
11031187 err = 0;
1104
- req->pages[req->num_pages] = page;
1105
- req->page_descs[req->num_pages].length = tmp;
1106
- req->num_pages++;
1188
+ ap->pages[ap->num_pages] = page;
1189
+ ap->descs[ap->num_pages].length = tmp;
1190
+ ap->num_pages++;
11071191
11081192 count += tmp;
11091193 pos += tmp;
....@@ -1111,20 +1195,31 @@
11111195 if (offset == PAGE_SIZE)
11121196 offset = 0;
11131197
1198
+ /* If we copied full page, mark it uptodate */
1199
+ if (tmp == PAGE_SIZE)
1200
+ SetPageUptodate(page);
1201
+
1202
+ if (PageUptodate(page)) {
1203
+ unlock_page(page);
1204
+ } else {
1205
+ ia->write.page_locked = true;
1206
+ break;
1207
+ }
11141208 if (!fc->big_writes)
11151209 break;
11161210 } while (iov_iter_count(ii) && count < fc->max_write &&
1117
- req->num_pages < req->max_pages && offset == 0);
1211
+ ap->num_pages < max_pages && offset == 0);
11181212
11191213 return count > 0 ? count : err;
11201214 }
11211215
1122
-static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
1216
+static inline unsigned int fuse_wr_pages(loff_t pos, size_t len,
1217
+ unsigned int max_pages)
11231218 {
1124
- return min_t(unsigned,
1219
+ return min_t(unsigned int,
11251220 ((pos + len - 1) >> PAGE_SHIFT) -
11261221 (pos >> PAGE_SHIFT) + 1,
1127
- FUSE_MAX_PAGES_PER_REQ);
1222
+ max_pages);
11281223 }
11291224
11301225 static ssize_t fuse_perform_write(struct kiocb *iocb,
....@@ -1137,33 +1232,31 @@
11371232 int err = 0;
11381233 ssize_t res = 0;
11391234
1140
- if (fuse_is_bad(inode))
1141
- return -EIO;
1142
-
11431235 if (inode->i_size < pos + iov_iter_count(ii))
11441236 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
11451237
11461238 do {
1147
- struct fuse_req *req;
11481239 ssize_t count;
1149
- unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
1240
+ struct fuse_io_args ia = {};
1241
+ struct fuse_args_pages *ap = &ia.ap;
1242
+ unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
1243
+ fc->max_pages);
11501244
1151
- req = fuse_get_req(fc, nr_pages);
1152
- if (IS_ERR(req)) {
1153
- err = PTR_ERR(req);
1245
+ ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs);
1246
+ if (!ap->pages) {
1247
+ err = -ENOMEM;
11541248 break;
11551249 }
11561250
1157
- count = fuse_fill_write_pages(req, mapping, ii, pos);
1251
+ count = fuse_fill_write_pages(&ia, mapping, ii, pos, nr_pages);
11581252 if (count <= 0) {
11591253 err = count;
11601254 } else {
1161
- size_t num_written;
1162
-
1163
- num_written = fuse_send_write_pages(req, iocb, inode,
1164
- pos, count);
1165
- err = req->out.h.error;
1255
+ err = fuse_send_write_pages(&ia, iocb, inode,
1256
+ pos, count);
11661257 if (!err) {
1258
+ size_t num_written = ia.write.out.size;
1259
+
11671260 res += num_written;
11681261 pos += num_written;
11691262
....@@ -1172,7 +1265,7 @@
11721265 err = -EIO;
11731266 }
11741267 }
1175
- fuse_put_request(fc, req);
1268
+ kfree(ap->pages);
11761269 } while (!err && iov_iter_count(ii));
11771270
11781271 if (res > 0)
....@@ -1184,7 +1277,7 @@
11841277 return res > 0 ? res : err;
11851278 }
11861279
1187
-static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1280
+static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
11881281 {
11891282 struct file *file = iocb->ki_filp;
11901283 struct address_space *mapping = file->f_mapping;
....@@ -1193,9 +1286,6 @@
11931286 struct inode *inode = mapping->host;
11941287 ssize_t err;
11951288 loff_t endbyte = 0;
1196
-
1197
- if (fuse_is_bad(inode))
1198
- return -EIO;
11991289
12001290 if (get_fuse_conn(inode)->writeback_cache) {
12011291 /* Update size (EOF optimization) and mode (SUID clearing) */
....@@ -1263,14 +1353,14 @@
12631353 return written ? written : err;
12641354 }
12651355
1266
-static inline void fuse_page_descs_length_init(struct fuse_req *req,
1267
- unsigned index, unsigned nr_pages)
1356
+static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
1357
+ unsigned int index,
1358
+ unsigned int nr_pages)
12681359 {
12691360 int i;
12701361
12711362 for (i = index; i < index + nr_pages; i++)
1272
- req->page_descs[i].length = PAGE_SIZE -
1273
- req->page_descs[i].offset;
1363
+ descs[i].length = PAGE_SIZE - descs[i].offset;
12741364 }
12751365
12761366 static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
....@@ -1284,33 +1374,34 @@
12841374 return min(iov_iter_single_seg_count(ii), max_size);
12851375 }
12861376
1287
-static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
1288
- size_t *nbytesp, int write)
1377
+static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
1378
+ size_t *nbytesp, int write,
1379
+ unsigned int max_pages)
12891380 {
12901381 size_t nbytes = 0; /* # bytes already packed in req */
12911382 ssize_t ret = 0;
12921383
12931384 /* Special case for kernel I/O: can copy directly into the buffer */
1294
- if (ii->type & ITER_KVEC) {
1385
+ if (iov_iter_is_kvec(ii)) {
12951386 unsigned long user_addr = fuse_get_user_addr(ii);
12961387 size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
12971388
12981389 if (write)
1299
- req->in.args[1].value = (void *) user_addr;
1390
+ ap->args.in_args[1].value = (void *) user_addr;
13001391 else
1301
- req->out.args[0].value = (void *) user_addr;
1392
+ ap->args.out_args[0].value = (void *) user_addr;
13021393
13031394 iov_iter_advance(ii, frag_size);
13041395 *nbytesp = frag_size;
13051396 return 0;
13061397 }
13071398
1308
- while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
1399
+ while (nbytes < *nbytesp && ap->num_pages < max_pages) {
13091400 unsigned npages;
13101401 size_t start;
1311
- ret = iov_iter_get_pages(ii, &req->pages[req->num_pages],
1402
+ ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages],
13121403 *nbytesp - nbytes,
1313
- req->max_pages - req->num_pages,
1404
+ max_pages - ap->num_pages,
13141405 &start);
13151406 if (ret < 0)
13161407 break;
....@@ -1321,27 +1412,23 @@
13211412 ret += start;
13221413 npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
13231414
1324
- req->page_descs[req->num_pages].offset = start;
1325
- fuse_page_descs_length_init(req, req->num_pages, npages);
1415
+ ap->descs[ap->num_pages].offset = start;
1416
+ fuse_page_descs_length_init(ap->descs, ap->num_pages, npages);
13261417
1327
- req->num_pages += npages;
1328
- req->page_descs[req->num_pages - 1].length -=
1418
+ ap->num_pages += npages;
1419
+ ap->descs[ap->num_pages - 1].length -=
13291420 (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
13301421 }
13311422
1423
+ ap->args.user_pages = true;
13321424 if (write)
1333
- req->in.argpages = 1;
1425
+ ap->args.in_pages = true;
13341426 else
1335
- req->out.argpages = 1;
1427
+ ap->args.out_pages = true;
13361428
13371429 *nbytesp = nbytes;
13381430
13391431 return ret < 0 ? ret : 0;
1340
-}
1341
-
1342
-static inline int fuse_iter_npages(const struct iov_iter *ii_p)
1343
-{
1344
- return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
13451432 }
13461433
13471434 ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
....@@ -1352,23 +1439,23 @@
13521439 struct file *file = io->iocb->ki_filp;
13531440 struct inode *inode = file->f_mapping->host;
13541441 struct fuse_file *ff = file->private_data;
1355
- struct fuse_conn *fc = ff->fc;
1442
+ struct fuse_conn *fc = ff->fm->fc;
13561443 size_t nmax = write ? fc->max_write : fc->max_read;
13571444 loff_t pos = *ppos;
13581445 size_t count = iov_iter_count(iter);
13591446 pgoff_t idx_from = pos >> PAGE_SHIFT;
13601447 pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT;
13611448 ssize_t res = 0;
1362
- struct fuse_req *req;
13631449 int err = 0;
1450
+ struct fuse_io_args *ia;
1451
+ unsigned int max_pages;
13641452
1365
- if (io->async)
1366
- req = fuse_get_req_for_background(fc, fuse_iter_npages(iter));
1367
- else
1368
- req = fuse_get_req(fc, fuse_iter_npages(iter));
1369
- if (IS_ERR(req))
1370
- return PTR_ERR(req);
1453
+ max_pages = iov_iter_npages(iter, fc->max_pages);
1454
+ ia = fuse_io_alloc(io, max_pages);
1455
+ if (!ia)
1456
+ return -ENOMEM;
13711457
1458
+ ia->io = io;
13721459 if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
13731460 if (!write)
13741461 inode_lock(inode);
....@@ -1379,46 +1466,52 @@
13791466
13801467 io->should_dirty = !write && iter_is_iovec(iter);
13811468 while (count) {
1382
- size_t nres;
1469
+ ssize_t nres;
13831470 fl_owner_t owner = current->files;
13841471 size_t nbytes = min(count, nmax);
1385
- err = fuse_get_user_pages(req, iter, &nbytes, write);
1472
+
1473
+ err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
1474
+ max_pages);
13861475 if (err && !nbytes)
13871476 break;
13881477
1389
- if (write)
1390
- nres = fuse_send_write(req, io, pos, nbytes, owner);
1391
- else
1392
- nres = fuse_send_read(req, io, pos, nbytes, owner);
1478
+ if (write) {
1479
+ if (!capable(CAP_FSETID))
1480
+ ia->write.in.write_flags |= FUSE_WRITE_KILL_PRIV;
13931481
1394
- if (!io->async)
1395
- fuse_release_user_pages(req, io->should_dirty);
1396
- if (req->out.h.error) {
1397
- err = req->out.h.error;
1398
- break;
1399
- } else if (nres > nbytes) {
1400
- res = 0;
1401
- err = -EIO;
1482
+ nres = fuse_send_write(ia, pos, nbytes, owner);
1483
+ } else {
1484
+ nres = fuse_send_read(ia, pos, nbytes, owner);
1485
+ }
1486
+
1487
+ if (!io->async || nres < 0) {
1488
+ fuse_release_user_pages(&ia->ap, io->should_dirty);
1489
+ fuse_io_free(ia);
1490
+ }
1491
+ ia = NULL;
1492
+ if (nres < 0) {
1493
+ iov_iter_revert(iter, nbytes);
1494
+ err = nres;
14021495 break;
14031496 }
1497
+ WARN_ON(nres > nbytes);
1498
+
14041499 count -= nres;
14051500 res += nres;
14061501 pos += nres;
1407
- if (nres != nbytes)
1502
+ if (nres != nbytes) {
1503
+ iov_iter_revert(iter, nbytes - nres);
14081504 break;
1505
+ }
14091506 if (count) {
1410
- fuse_put_request(fc, req);
1411
- if (io->async)
1412
- req = fuse_get_req_for_background(fc,
1413
- fuse_iter_npages(iter));
1414
- else
1415
- req = fuse_get_req(fc, fuse_iter_npages(iter));
1416
- if (IS_ERR(req))
1507
+ max_pages = iov_iter_npages(iter, fc->max_pages);
1508
+ ia = fuse_io_alloc(io, max_pages);
1509
+ if (!ia)
14171510 break;
14181511 }
14191512 }
1420
- if (!IS_ERR(req))
1421
- fuse_put_request(fc, req);
1513
+ if (ia)
1514
+ fuse_io_free(ia);
14221515 if (res > 0)
14231516 *ppos = pos;
14241517
....@@ -1433,20 +1526,28 @@
14331526 ssize_t res;
14341527 struct inode *inode = file_inode(io->iocb->ki_filp);
14351528
1436
- if (fuse_is_bad(inode))
1437
- return -EIO;
1438
-
14391529 res = fuse_direct_io(io, iter, ppos, 0);
14401530
1441
- fuse_invalidate_attr(inode);
1531
+ fuse_invalidate_atime(inode);
14421532
14431533 return res;
14441534 }
14451535
1536
+static ssize_t fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
1537
+
14461538 static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
14471539 {
1448
- struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
1449
- return __fuse_direct_read(&io, to, &iocb->ki_pos);
1540
+ ssize_t res;
1541
+
1542
+ if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
1543
+ res = fuse_direct_IO(iocb, to);
1544
+ } else {
1545
+ struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
1546
+
1547
+ res = __fuse_direct_read(&io, to, &iocb->ki_pos);
1548
+ }
1549
+
1550
+ return res;
14501551 }
14511552
14521553 static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
....@@ -1455,14 +1556,17 @@
14551556 struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
14561557 ssize_t res;
14571558
1458
- if (fuse_is_bad(inode))
1459
- return -EIO;
1460
-
14611559 /* Don't allow parallel writes to the same file */
14621560 inode_lock(inode);
14631561 res = generic_write_checks(iocb, from);
1464
- if (res > 0)
1465
- res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
1562
+ if (res > 0) {
1563
+ if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
1564
+ res = fuse_direct_IO(iocb, from);
1565
+ } else {
1566
+ res = fuse_direct_io(&io, from, &iocb->ki_pos,
1567
+ FUSE_DIO_WRITE);
1568
+ }
1569
+ }
14661570 fuse_invalidate_attr(inode);
14671571 if (res > 0)
14681572 fuse_write_update_size(inode, iocb->ki_pos);
....@@ -1471,46 +1575,92 @@
14711575 return res;
14721576 }
14731577
1474
-static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1578
+static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
14751579 {
1476
- int i;
1580
+ struct file *file = iocb->ki_filp;
1581
+ struct fuse_file *ff = file->private_data;
1582
+ struct inode *inode = file_inode(file);
14771583
1478
- for (i = 0; i < req->num_pages; i++)
1479
- __free_page(req->pages[i]);
1584
+ if (fuse_is_bad(inode))
1585
+ return -EIO;
14801586
1481
- if (req->ff)
1482
- fuse_file_put(req->ff, false, false);
1587
+ if (FUSE_IS_DAX(inode))
1588
+ return fuse_dax_read_iter(iocb, to);
1589
+
1590
+ if (ff->passthrough.filp)
1591
+ return fuse_passthrough_read_iter(iocb, to);
1592
+ else if (!(ff->open_flags & FOPEN_DIRECT_IO))
1593
+ return fuse_cache_read_iter(iocb, to);
1594
+ else
1595
+ return fuse_direct_read_iter(iocb, to);
14831596 }
14841597
1485
-static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
1598
+static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
14861599 {
1487
- struct inode *inode = req->inode;
1600
+ struct file *file = iocb->ki_filp;
1601
+ struct fuse_file *ff = file->private_data;
1602
+ struct inode *inode = file_inode(file);
1603
+
1604
+ if (fuse_is_bad(inode))
1605
+ return -EIO;
1606
+
1607
+ if (FUSE_IS_DAX(inode))
1608
+ return fuse_dax_write_iter(iocb, from);
1609
+
1610
+ if (ff->passthrough.filp)
1611
+ return fuse_passthrough_write_iter(iocb, from);
1612
+ else if (!(ff->open_flags & FOPEN_DIRECT_IO))
1613
+ return fuse_cache_write_iter(iocb, from);
1614
+ else
1615
+ return fuse_direct_write_iter(iocb, from);
1616
+}
1617
+
1618
+static void fuse_writepage_free(struct fuse_writepage_args *wpa)
1619
+{
1620
+ struct fuse_args_pages *ap = &wpa->ia.ap;
1621
+ int i;
1622
+
1623
+ for (i = 0; i < ap->num_pages; i++)
1624
+ __free_page(ap->pages[i]);
1625
+
1626
+ if (wpa->ia.ff)
1627
+ fuse_file_put(wpa->ia.ff, false, false);
1628
+
1629
+ kfree(ap->pages);
1630
+ kfree(wpa);
1631
+}
1632
+
1633
+static void fuse_writepage_finish(struct fuse_mount *fm,
1634
+ struct fuse_writepage_args *wpa)
1635
+{
1636
+ struct fuse_args_pages *ap = &wpa->ia.ap;
1637
+ struct inode *inode = wpa->inode;
14881638 struct fuse_inode *fi = get_fuse_inode(inode);
14891639 struct backing_dev_info *bdi = inode_to_bdi(inode);
14901640 int i;
14911641
1492
- list_del(&req->writepages_entry);
1493
- for (i = 0; i < req->num_pages; i++) {
1642
+ for (i = 0; i < ap->num_pages; i++) {
14941643 dec_wb_stat(&bdi->wb, WB_WRITEBACK);
1495
- dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
1644
+ dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
14961645 wb_writeout_inc(&bdi->wb);
14971646 }
14981647 wake_up(&fi->page_waitq);
14991648 }
15001649
1501
-/* Called under fc->lock, may release and reacquire it */
1502
-static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
1503
- loff_t size)
1504
-__releases(fc->lock)
1505
-__acquires(fc->lock)
1650
+/* Called under fi->lock, may release and reacquire it */
1651
+static void fuse_send_writepage(struct fuse_mount *fm,
1652
+ struct fuse_writepage_args *wpa, loff_t size)
1653
+__releases(fi->lock)
1654
+__acquires(fi->lock)
15061655 {
1507
- struct fuse_inode *fi = get_fuse_inode(req->inode);
1508
- struct fuse_write_in *inarg = &req->misc.write.in;
1509
- __u64 data_size = req->num_pages * PAGE_SIZE;
1656
+ struct fuse_writepage_args *aux, *next;
1657
+ struct fuse_inode *fi = get_fuse_inode(wpa->inode);
1658
+ struct fuse_write_in *inarg = &wpa->ia.write.in;
1659
+ struct fuse_args *args = &wpa->ia.ap.args;
1660
+ __u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE;
1661
+ int err;
15101662
1511
- if (!fc->connected)
1512
- goto out_free;
1513
-
1663
+ fi->writectr++;
15141664 if (inarg->offset + data_size <= size) {
15151665 inarg->size = data_size;
15161666 } else if (inarg->offset < size) {
....@@ -1520,56 +1670,129 @@
15201670 goto out_free;
15211671 }
15221672
1523
- req->in.args[1].size = inarg->size;
1524
- fi->writectr++;
1525
- fuse_request_send_background_locked(fc, req);
1673
+ args->in_args[1].size = inarg->size;
1674
+ args->force = true;
1675
+ args->nocreds = true;
1676
+
1677
+ err = fuse_simple_background(fm, args, GFP_ATOMIC);
1678
+ if (err == -ENOMEM) {
1679
+ spin_unlock(&fi->lock);
1680
+ err = fuse_simple_background(fm, args, GFP_NOFS | __GFP_NOFAIL);
1681
+ spin_lock(&fi->lock);
1682
+ }
1683
+
1684
+ /* Fails on broken connection only */
1685
+ if (unlikely(err))
1686
+ goto out_free;
1687
+
15261688 return;
15271689
15281690 out_free:
1529
- fuse_writepage_finish(fc, req);
1530
- spin_unlock(&fc->lock);
1531
- fuse_writepage_free(fc, req);
1532
- fuse_put_request(fc, req);
1533
- spin_lock(&fc->lock);
1691
+ fi->writectr--;
1692
+ rb_erase(&wpa->writepages_entry, &fi->writepages);
1693
+ fuse_writepage_finish(fm, wpa);
1694
+ spin_unlock(&fi->lock);
1695
+
1696
+ /* After fuse_writepage_finish() aux request list is private */
1697
+ for (aux = wpa->next; aux; aux = next) {
1698
+ next = aux->next;
1699
+ aux->next = NULL;
1700
+ fuse_writepage_free(aux);
1701
+ }
1702
+
1703
+ fuse_writepage_free(wpa);
1704
+ spin_lock(&fi->lock);
15341705 }
15351706
15361707 /*
15371708 * If fi->writectr is positive (no truncate or fsync going on) send
15381709 * all queued writepage requests.
15391710 *
1540
- * Called with fc->lock
1711
+ * Called with fi->lock
15411712 */
15421713 void fuse_flush_writepages(struct inode *inode)
1543
-__releases(fc->lock)
1544
-__acquires(fc->lock)
1714
+__releases(fi->lock)
1715
+__acquires(fi->lock)
15451716 {
1546
- struct fuse_conn *fc = get_fuse_conn(inode);
1717
+ struct fuse_mount *fm = get_fuse_mount(inode);
15471718 struct fuse_inode *fi = get_fuse_inode(inode);
15481719 loff_t crop = i_size_read(inode);
1549
- struct fuse_req *req;
1720
+ struct fuse_writepage_args *wpa;
15501721
15511722 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
1552
- req = list_entry(fi->queued_writes.next, struct fuse_req, list);
1553
- list_del_init(&req->list);
1554
- fuse_send_writepage(fc, req, crop);
1723
+ wpa = list_entry(fi->queued_writes.next,
1724
+ struct fuse_writepage_args, queue_entry);
1725
+ list_del_init(&wpa->queue_entry);
1726
+ fuse_send_writepage(fm, wpa, crop);
15551727 }
15561728 }
15571729
1558
-static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
1730
+static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root,
1731
+ struct fuse_writepage_args *wpa)
15591732 {
1560
- struct inode *inode = req->inode;
1561
- struct fuse_inode *fi = get_fuse_inode(inode);
1733
+ pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
1734
+ pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
1735
+ struct rb_node **p = &root->rb_node;
1736
+ struct rb_node *parent = NULL;
15621737
1563
- mapping_set_error(inode->i_mapping, req->out.h.error);
1564
- spin_lock(&fc->lock);
1565
- while (req->misc.write.next) {
1566
- struct fuse_conn *fc = get_fuse_conn(inode);
1567
- struct fuse_write_in *inarg = &req->misc.write.in;
1568
- struct fuse_req *next = req->misc.write.next;
1569
- req->misc.write.next = next->misc.write.next;
1570
- next->misc.write.next = NULL;
1571
- next->ff = fuse_file_get(req->ff);
1572
- list_add(&next->writepages_entry, &fi->writepages);
1738
+ WARN_ON(!wpa->ia.ap.num_pages);
1739
+ while (*p) {
1740
+ struct fuse_writepage_args *curr;
1741
+ pgoff_t curr_index;
1742
+
1743
+ parent = *p;
1744
+ curr = rb_entry(parent, struct fuse_writepage_args,
1745
+ writepages_entry);
1746
+ WARN_ON(curr->inode != wpa->inode);
1747
+ curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
1748
+
1749
+ if (idx_from >= curr_index + curr->ia.ap.num_pages)
1750
+ p = &(*p)->rb_right;
1751
+ else if (idx_to < curr_index)
1752
+ p = &(*p)->rb_left;
1753
+ else
1754
+ return curr;
1755
+ }
1756
+
1757
+ rb_link_node(&wpa->writepages_entry, parent, p);
1758
+ rb_insert_color(&wpa->writepages_entry, root);
1759
+ return NULL;
1760
+}
1761
+
1762
+static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
1763
+{
1764
+ WARN_ON(fuse_insert_writeback(root, wpa));
1765
+}
1766
+
1767
+static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
1768
+ int error)
1769
+{
1770
+ struct fuse_writepage_args *wpa =
1771
+ container_of(args, typeof(*wpa), ia.ap.args);
1772
+ struct inode *inode = wpa->inode;
1773
+ struct fuse_inode *fi = get_fuse_inode(inode);
1774
+ struct fuse_conn *fc = get_fuse_conn(inode);
1775
+
1776
+ mapping_set_error(inode->i_mapping, error);
1777
+ /*
1778
+ * A writeback finished and this might have updated mtime/ctime on
1779
+ * server making local mtime/ctime stale. Hence invalidate attrs.
1780
+ * Do this only if writeback_cache is not enabled. If writeback_cache
1781
+ * is enabled, we trust local ctime/mtime.
1782
+ */
1783
+ if (!fc->writeback_cache)
1784
+ fuse_invalidate_attr(inode);
1785
+ spin_lock(&fi->lock);
1786
+ rb_erase(&wpa->writepages_entry, &fi->writepages);
1787
+ while (wpa->next) {
1788
+ struct fuse_mount *fm = get_fuse_mount(inode);
1789
+ struct fuse_write_in *inarg = &wpa->ia.write.in;
1790
+ struct fuse_writepage_args *next = wpa->next;
1791
+
1792
+ wpa->next = next->next;
1793
+ next->next = NULL;
1794
+ next->ia.ff = fuse_file_get(wpa->ia.ff);
1795
+ tree_insert(&fi->writepages, next);
15731796
15741797 /*
15751798 * Skip fuse_flush_writepages() to make it easy to crop requests
....@@ -1594,12 +1817,12 @@
15941817 * no invocations of fuse_writepage_end() while we're in
15951818 * fuse_set_nowrite..fuse_release_nowrite section.
15961819 */
1597
- fuse_send_writepage(fc, next, inarg->offset + inarg->size);
1820
+ fuse_send_writepage(fm, next, inarg->offset + inarg->size);
15981821 }
15991822 fi->writectr--;
1600
- fuse_writepage_finish(fc, req);
1601
- spin_unlock(&fc->lock);
1602
- fuse_writepage_free(fc, req);
1823
+ fuse_writepage_finish(fm, wpa);
1824
+ spin_unlock(&fi->lock);
1825
+ fuse_writepage_free(wpa);
16031826 }
16041827
16051828 static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
....@@ -1607,13 +1830,13 @@
16071830 {
16081831 struct fuse_file *ff = NULL;
16091832
1610
- spin_lock(&fc->lock);
1833
+ spin_lock(&fi->lock);
16111834 if (!list_empty(&fi->write_files)) {
16121835 ff = list_entry(fi->write_files.next, struct fuse_file,
16131836 write_entry);
16141837 fuse_file_get(ff);
16151838 }
1616
- spin_unlock(&fc->lock);
1839
+ spin_unlock(&fi->lock);
16171840
16181841 return ff;
16191842 }
....@@ -1633,6 +1856,17 @@
16331856 struct fuse_file *ff;
16341857 int err;
16351858
1859
+ /*
1860
+ * Inode is always written before the last reference is dropped and
1861
+ * hence this should not be reached from reclaim.
1862
+ *
1863
+ * Writing back the inode from reclaim can deadlock if the request
1864
+ * processing itself needs an allocation. Allocations triggering
1865
+ * reclaim while serving a request can't be prevented, because it can
1866
+ * involve any number of unrelated userspace processes.
1867
+ */
1868
+ WARN_ON(wbc->for_reclaim);
1869
+
16361870 ff = __fuse_write_file_get(fc, fi);
16371871 err = fuse_flush_times(inode, ff);
16381872 if (ff)
....@@ -1641,54 +1875,73 @@
16411875 return err;
16421876 }
16431877
1878
+static struct fuse_writepage_args *fuse_writepage_args_alloc(void)
1879
+{
1880
+ struct fuse_writepage_args *wpa;
1881
+ struct fuse_args_pages *ap;
1882
+
1883
+ wpa = kzalloc(sizeof(*wpa), GFP_NOFS);
1884
+ if (wpa) {
1885
+ ap = &wpa->ia.ap;
1886
+ ap->num_pages = 0;
1887
+ ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs);
1888
+ if (!ap->pages) {
1889
+ kfree(wpa);
1890
+ wpa = NULL;
1891
+ }
1892
+ }
1893
+ return wpa;
1894
+
1895
+}
1896
+
16441897 static int fuse_writepage_locked(struct page *page)
16451898 {
16461899 struct address_space *mapping = page->mapping;
16471900 struct inode *inode = mapping->host;
16481901 struct fuse_conn *fc = get_fuse_conn(inode);
16491902 struct fuse_inode *fi = get_fuse_inode(inode);
1650
- struct fuse_req *req;
1903
+ struct fuse_writepage_args *wpa;
1904
+ struct fuse_args_pages *ap;
16511905 struct page *tmp_page;
16521906 int error = -ENOMEM;
16531907
16541908 set_page_writeback(page);
16551909
1656
- req = fuse_request_alloc_nofs(1);
1657
- if (!req)
1910
+ wpa = fuse_writepage_args_alloc();
1911
+ if (!wpa)
16581912 goto err;
1913
+ ap = &wpa->ia.ap;
16591914
1660
- /* writeback always goes to bg_queue */
1661
- __set_bit(FR_BACKGROUND, &req->flags);
16621915 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
16631916 if (!tmp_page)
16641917 goto err_free;
16651918
16661919 error = -EIO;
1667
- req->ff = fuse_write_file_get(fc, fi);
1668
- if (!req->ff)
1920
+ wpa->ia.ff = fuse_write_file_get(fc, fi);
1921
+ if (!wpa->ia.ff)
16691922 goto err_nofile;
16701923
1671
- fuse_write_fill(req, req->ff, page_offset(page), 0);
1924
+ fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
16721925
16731926 copy_highpage(tmp_page, page);
1674
- req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
1675
- req->misc.write.next = NULL;
1676
- req->in.argpages = 1;
1677
- req->num_pages = 1;
1678
- req->pages[0] = tmp_page;
1679
- req->page_descs[0].offset = 0;
1680
- req->page_descs[0].length = PAGE_SIZE;
1681
- req->end = fuse_writepage_end;
1682
- req->inode = inode;
1927
+ wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
1928
+ wpa->next = NULL;
1929
+ ap->args.in_pages = true;
1930
+ ap->num_pages = 1;
1931
+ ap->pages[0] = tmp_page;
1932
+ ap->descs[0].offset = 0;
1933
+ ap->descs[0].length = PAGE_SIZE;
1934
+ ap->args.end = fuse_writepage_end;
1935
+ wpa->inode = inode;
16831936
16841937 inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
16851938 inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
16861939
1687
- spin_lock(&fc->lock);
1688
- list_add(&req->writepages_entry, &fi->writepages);
1689
- list_add_tail(&req->list, &fi->queued_writes);
1940
+ spin_lock(&fi->lock);
1941
+ tree_insert(&fi->writepages, wpa);
1942
+ list_add_tail(&wpa->queue_entry, &fi->queued_writes);
16901943 fuse_flush_writepages(inode);
1691
- spin_unlock(&fc->lock);
1944
+ spin_unlock(&fi->lock);
16921945
16931946 end_page_writeback(page);
16941947
....@@ -1697,7 +1950,7 @@
16971950 err_nofile:
16981951 __free_page(tmp_page);
16991952 err_free:
1700
- fuse_request_free(req);
1953
+ kfree(wpa);
17011954 err:
17021955 mapping_set_error(page->mapping, error);
17031956 end_page_writeback(page);
....@@ -1728,108 +1981,118 @@
17281981 }
17291982
17301983 struct fuse_fill_wb_data {
1731
- struct fuse_req *req;
1984
+ struct fuse_writepage_args *wpa;
17321985 struct fuse_file *ff;
17331986 struct inode *inode;
17341987 struct page **orig_pages;
1988
+ unsigned int max_pages;
17351989 };
1990
+
1991
+static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
1992
+{
1993
+ struct fuse_args_pages *ap = &data->wpa->ia.ap;
1994
+ struct fuse_conn *fc = get_fuse_conn(data->inode);
1995
+ struct page **pages;
1996
+ struct fuse_page_desc *descs;
1997
+ unsigned int npages = min_t(unsigned int,
1998
+ max_t(unsigned int, data->max_pages * 2,
1999
+ FUSE_DEFAULT_MAX_PAGES_PER_REQ),
2000
+ fc->max_pages);
2001
+ WARN_ON(npages <= data->max_pages);
2002
+
2003
+ pages = fuse_pages_alloc(npages, GFP_NOFS, &descs);
2004
+ if (!pages)
2005
+ return false;
2006
+
2007
+ memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages);
2008
+ memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages);
2009
+ kfree(ap->pages);
2010
+ ap->pages = pages;
2011
+ ap->descs = descs;
2012
+ data->max_pages = npages;
2013
+
2014
+ return true;
2015
+}
17362016
17372017 static void fuse_writepages_send(struct fuse_fill_wb_data *data)
17382018 {
1739
- struct fuse_req *req = data->req;
2019
+ struct fuse_writepage_args *wpa = data->wpa;
17402020 struct inode *inode = data->inode;
1741
- struct fuse_conn *fc = get_fuse_conn(inode);
17422021 struct fuse_inode *fi = get_fuse_inode(inode);
1743
- int num_pages = req->num_pages;
2022
+ int num_pages = wpa->ia.ap.num_pages;
17442023 int i;
17452024
1746
- req->ff = fuse_file_get(data->ff);
1747
- spin_lock(&fc->lock);
1748
- list_add_tail(&req->list, &fi->queued_writes);
2025
+ wpa->ia.ff = fuse_file_get(data->ff);
2026
+ spin_lock(&fi->lock);
2027
+ list_add_tail(&wpa->queue_entry, &fi->queued_writes);
17492028 fuse_flush_writepages(inode);
1750
- spin_unlock(&fc->lock);
2029
+ spin_unlock(&fi->lock);
17512030
17522031 for (i = 0; i < num_pages; i++)
17532032 end_page_writeback(data->orig_pages[i]);
17542033 }
17552034
1756
-static bool fuse_writepage_in_flight(struct fuse_req *new_req,
1757
- struct page *page)
2035
+/*
2036
+ * Check under fi->lock if the page is under writeback, and insert it onto the
2037
+ * rb_tree if not. Otherwise iterate auxiliary write requests, to see if there's
2038
+ * one already added for a page at this offset. If there's none, then insert
2039
+ * this new request onto the auxiliary list, otherwise reuse the existing one by
2040
+ * swapping the new temp page with the old one.
2041
+ */
2042
+static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
2043
+ struct page *page)
17582044 {
1759
- struct fuse_conn *fc = get_fuse_conn(new_req->inode);
1760
- struct fuse_inode *fi = get_fuse_inode(new_req->inode);
1761
- struct fuse_req *tmp;
1762
- struct fuse_req *old_req;
1763
- bool found = false;
1764
- pgoff_t curr_index;
2045
+ struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
2046
+ struct fuse_writepage_args *tmp;
2047
+ struct fuse_writepage_args *old_wpa;
2048
+ struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
17652049
1766
- BUG_ON(new_req->num_pages != 0);
2050
+ WARN_ON(new_ap->num_pages != 0);
2051
+ new_ap->num_pages = 1;
17672052
1768
- spin_lock(&fc->lock);
1769
- list_del(&new_req->writepages_entry);
1770
- list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
1771
- BUG_ON(old_req->inode != new_req->inode);
1772
- curr_index = old_req->misc.write.in.offset >> PAGE_SHIFT;
1773
- if (curr_index <= page->index &&
1774
- page->index < curr_index + old_req->num_pages) {
1775
- found = true;
2053
+ spin_lock(&fi->lock);
2054
+ old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa);
2055
+ if (!old_wpa) {
2056
+ spin_unlock(&fi->lock);
2057
+ return true;
2058
+ }
2059
+
2060
+ for (tmp = old_wpa->next; tmp; tmp = tmp->next) {
2061
+ pgoff_t curr_index;
2062
+
2063
+ WARN_ON(tmp->inode != new_wpa->inode);
2064
+ curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT;
2065
+ if (curr_index == page->index) {
2066
+ WARN_ON(tmp->ia.ap.num_pages != 1);
2067
+ swap(tmp->ia.ap.pages[0], new_ap->pages[0]);
17762068 break;
17772069 }
17782070 }
1779
- if (!found) {
1780
- list_add(&new_req->writepages_entry, &fi->writepages);
1781
- goto out_unlock;
2071
+
2072
+ if (!tmp) {
2073
+ new_wpa->next = old_wpa->next;
2074
+ old_wpa->next = new_wpa;
17822075 }
17832076
1784
- new_req->num_pages = 1;
1785
- for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
1786
- BUG_ON(tmp->inode != new_req->inode);
1787
- curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
1788
- if (tmp->num_pages == 1 &&
1789
- curr_index == page->index) {
1790
- old_req = tmp;
1791
- }
1792
- }
2077
+ spin_unlock(&fi->lock);
17932078
1794
- if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) {
1795
- struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host);
1796
-
1797
- copy_highpage(old_req->pages[0], page);
1798
- spin_unlock(&fc->lock);
2079
+ if (tmp) {
2080
+ struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode);
17992081
18002082 dec_wb_stat(&bdi->wb, WB_WRITEBACK);
1801
- dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP);
2083
+ dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP);
18022084 wb_writeout_inc(&bdi->wb);
1803
- fuse_writepage_free(fc, new_req);
1804
- fuse_request_free(new_req);
1805
- goto out;
1806
- } else {
1807
- new_req->misc.write.next = old_req->misc.write.next;
1808
- old_req->misc.write.next = new_req;
2085
+ fuse_writepage_free(new_wpa);
18092086 }
1810
-out_unlock:
1811
- spin_unlock(&fc->lock);
1812
-out:
1813
- return found;
2087
+
2088
+ return false;
18142089 }
18152090
1816
-static int fuse_writepages_fill(struct page *page,
1817
- struct writeback_control *wbc, void *_data)
2091
+static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
2092
+ struct fuse_args_pages *ap,
2093
+ struct fuse_fill_wb_data *data)
18182094 {
1819
- struct fuse_fill_wb_data *data = _data;
1820
- struct fuse_req *req = data->req;
1821
- struct inode *inode = data->inode;
1822
- struct fuse_conn *fc = get_fuse_conn(inode);
1823
- struct page *tmp_page;
1824
- bool is_writeback;
1825
- int err;
1826
-
1827
- if (!data->ff) {
1828
- err = -EIO;
1829
- data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
1830
- if (!data->ff)
1831
- goto out_unlock;
1832
- }
2095
+ WARN_ON(!ap->num_pages);
18332096
18342097 /*
18352098 * Being under writeback is unlikely but possible. For example direct
....@@ -1837,15 +2100,52 @@
18372100 * the pages are faulted with get_user_pages(), and then after the read
18382101 * completed.
18392102 */
1840
- is_writeback = fuse_page_is_writeback(inode, page->index);
2103
+ if (fuse_page_is_writeback(data->inode, page->index))
2104
+ return true;
18412105
1842
- if (req && req->num_pages &&
1843
- (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
1844
- (req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
1845
- data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
1846
- fuse_writepages_send(data);
1847
- data->req = NULL;
2106
+ /* Reached max pages */
2107
+ if (ap->num_pages == fc->max_pages)
2108
+ return true;
2109
+
2110
+ /* Reached max write bytes */
2111
+ if ((ap->num_pages + 1) * PAGE_SIZE > fc->max_write)
2112
+ return true;
2113
+
2114
+ /* Discontinuity */
2115
+ if (data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)
2116
+ return true;
2117
+
2118
+ /* Need to grow the pages array? If so, did the expansion fail? */
2119
+ if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data))
2120
+ return true;
2121
+
2122
+ return false;
2123
+}
2124
+
2125
+static int fuse_writepages_fill(struct page *page,
2126
+ struct writeback_control *wbc, void *_data)
2127
+{
2128
+ struct fuse_fill_wb_data *data = _data;
2129
+ struct fuse_writepage_args *wpa = data->wpa;
2130
+ struct fuse_args_pages *ap = &wpa->ia.ap;
2131
+ struct inode *inode = data->inode;
2132
+ struct fuse_inode *fi = get_fuse_inode(inode);
2133
+ struct fuse_conn *fc = get_fuse_conn(inode);
2134
+ struct page *tmp_page;
2135
+ int err;
2136
+
2137
+ if (!data->ff) {
2138
+ err = -EIO;
2139
+ data->ff = fuse_write_file_get(fc, fi);
2140
+ if (!data->ff)
2141
+ goto out_unlock;
18482142 }
2143
+
2144
+ if (wpa && fuse_writepage_need_send(fc, page, ap, data)) {
2145
+ fuse_writepages_send(data);
2146
+ data->wpa = NULL;
2147
+ }
2148
+
18492149 err = -ENOMEM;
18502150 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
18512151 if (!tmp_page)
....@@ -1860,61 +2160,53 @@
18602160 * This is ensured by holding the page lock in page_mkwrite() while
18612161 * checking fuse_page_is_writeback(). We already hold the page lock
18622162 * since clear_page_dirty_for_io() and keep it held until we add the
1863
- * request to the fi->writepages list and increment req->num_pages.
2163
+ * request to the fi->writepages list and increment ap->num_pages.
18642164 * After this fuse_page_is_writeback() will indicate that the page is
18652165 * under writeback, so we can release the page lock.
18662166 */
1867
- if (data->req == NULL) {
1868
- struct fuse_inode *fi = get_fuse_inode(inode);
1869
-
2167
+ if (data->wpa == NULL) {
18702168 err = -ENOMEM;
1871
- req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
1872
- if (!req) {
2169
+ wpa = fuse_writepage_args_alloc();
2170
+ if (!wpa) {
18732171 __free_page(tmp_page);
18742172 goto out_unlock;
18752173 }
2174
+ data->max_pages = 1;
18762175
1877
- fuse_write_fill(req, data->ff, page_offset(page), 0);
1878
- req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
1879
- req->misc.write.next = NULL;
1880
- req->in.argpages = 1;
1881
- __set_bit(FR_BACKGROUND, &req->flags);
1882
- req->num_pages = 0;
1883
- req->end = fuse_writepage_end;
1884
- req->inode = inode;
1885
-
1886
- spin_lock(&fc->lock);
1887
- list_add(&req->writepages_entry, &fi->writepages);
1888
- spin_unlock(&fc->lock);
1889
-
1890
- data->req = req;
2176
+ ap = &wpa->ia.ap;
2177
+ fuse_write_args_fill(&wpa->ia, data->ff, page_offset(page), 0);
2178
+ wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
2179
+ wpa->next = NULL;
2180
+ ap->args.in_pages = true;
2181
+ ap->args.end = fuse_writepage_end;
2182
+ ap->num_pages = 0;
2183
+ wpa->inode = inode;
18912184 }
18922185 set_page_writeback(page);
18932186
18942187 copy_highpage(tmp_page, page);
1895
- req->pages[req->num_pages] = tmp_page;
1896
- req->page_descs[req->num_pages].offset = 0;
1897
- req->page_descs[req->num_pages].length = PAGE_SIZE;
2188
+ ap->pages[ap->num_pages] = tmp_page;
2189
+ ap->descs[ap->num_pages].offset = 0;
2190
+ ap->descs[ap->num_pages].length = PAGE_SIZE;
2191
+ data->orig_pages[ap->num_pages] = page;
18982192
18992193 inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
19002194 inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
19012195
19022196 err = 0;
1903
- if (is_writeback && fuse_writepage_in_flight(req, page)) {
2197
+ if (data->wpa) {
2198
+ /*
2199
+ * Protected by fi->lock against concurrent access by
2200
+ * fuse_page_is_writeback().
2201
+ */
2202
+ spin_lock(&fi->lock);
2203
+ ap->num_pages++;
2204
+ spin_unlock(&fi->lock);
2205
+ } else if (fuse_writepage_add(wpa, page)) {
2206
+ data->wpa = wpa;
2207
+ } else {
19042208 end_page_writeback(page);
1905
- data->req = NULL;
1906
- goto out_unlock;
19072209 }
1908
- data->orig_pages[req->num_pages] = page;
1909
-
1910
- /*
1911
- * Protected by fc->lock against concurrent access by
1912
- * fuse_page_is_writeback().
1913
- */
1914
- spin_lock(&fc->lock);
1915
- req->num_pages++;
1916
- spin_unlock(&fc->lock);
1917
-
19182210 out_unlock:
19192211 unlock_page(page);
19202212
....@@ -1925,6 +2217,7 @@
19252217 struct writeback_control *wbc)
19262218 {
19272219 struct inode *inode = mapping->host;
2220
+ struct fuse_conn *fc = get_fuse_conn(inode);
19282221 struct fuse_fill_wb_data data;
19292222 int err;
19302223
....@@ -1933,22 +2226,20 @@
19332226 goto out;
19342227
19352228 data.inode = inode;
1936
- data.req = NULL;
2229
+ data.wpa = NULL;
19372230 data.ff = NULL;
19382231
19392232 err = -ENOMEM;
1940
- data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
2233
+ data.orig_pages = kcalloc(fc->max_pages,
19412234 sizeof(struct page *),
19422235 GFP_NOFS);
19432236 if (!data.orig_pages)
19442237 goto out;
19452238
19462239 err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
1947
- if (data.req) {
1948
- /* Ignore errors if we can write at least one page */
1949
- BUG_ON(!data.req->num_pages);
2240
+ if (data.wpa) {
2241
+ WARN_ON(!data.wpa->ia.ap.num_pages);
19502242 fuse_writepages_send(&data);
1951
- err = 0;
19522243 }
19532244 if (data.ff)
19542245 fuse_file_put(data.ff, false, false);
....@@ -2096,23 +2387,31 @@
20962387
20972388 static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
20982389 {
2390
+ struct fuse_file *ff = file->private_data;
2391
+
2392
+ /* DAX mmap is superior to direct_io mmap */
2393
+ if (FUSE_IS_DAX(file_inode(file)))
2394
+ return fuse_dax_mmap(file, vma);
2395
+
2396
+ if (ff->passthrough.filp)
2397
+ return fuse_passthrough_mmap(file, vma);
2398
+
2399
+ if (ff->open_flags & FOPEN_DIRECT_IO) {
2400
+ /* Can't provide the coherency needed for MAP_SHARED */
2401
+ if (vma->vm_flags & VM_MAYSHARE)
2402
+ return -ENODEV;
2403
+
2404
+ invalidate_inode_pages2(file->f_mapping);
2405
+
2406
+ return generic_file_mmap(file, vma);
2407
+ }
2408
+
20992409 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
21002410 fuse_link_write_file(file);
21012411
21022412 file_accessed(file);
21032413 vma->vm_ops = &fuse_file_vm_ops;
21042414 return 0;
2105
-}
2106
-
2107
-static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
2108
-{
2109
- /* Can't provide the coherency needed for MAP_SHARED */
2110
- if (vma->vm_flags & VM_MAYSHARE)
2111
- return -ENODEV;
2112
-
2113
- invalidate_inode_pages2(file->f_mapping);
2114
-
2115
- return generic_file_mmap(file, vma);
21162415 }
21172416
21182417 static int convert_fuse_file_lock(struct fuse_conn *fc,
....@@ -2165,29 +2464,29 @@
21652464 inarg->lk.pid = pid;
21662465 if (flock)
21672466 inarg->lk_flags |= FUSE_LK_FLOCK;
2168
- args->in.h.opcode = opcode;
2169
- args->in.h.nodeid = get_node_id(inode);
2170
- args->in.numargs = 1;
2171
- args->in.args[0].size = sizeof(*inarg);
2172
- args->in.args[0].value = inarg;
2467
+ args->opcode = opcode;
2468
+ args->nodeid = get_node_id(inode);
2469
+ args->in_numargs = 1;
2470
+ args->in_args[0].size = sizeof(*inarg);
2471
+ args->in_args[0].value = inarg;
21732472 }
21742473
21752474 static int fuse_getlk(struct file *file, struct file_lock *fl)
21762475 {
21772476 struct inode *inode = file_inode(file);
2178
- struct fuse_conn *fc = get_fuse_conn(inode);
2477
+ struct fuse_mount *fm = get_fuse_mount(inode);
21792478 FUSE_ARGS(args);
21802479 struct fuse_lk_in inarg;
21812480 struct fuse_lk_out outarg;
21822481 int err;
21832482
21842483 fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
2185
- args.out.numargs = 1;
2186
- args.out.args[0].size = sizeof(outarg);
2187
- args.out.args[0].value = &outarg;
2188
- err = fuse_simple_request(fc, &args);
2484
+ args.out_numargs = 1;
2485
+ args.out_args[0].size = sizeof(outarg);
2486
+ args.out_args[0].value = &outarg;
2487
+ err = fuse_simple_request(fm, &args);
21892488 if (!err)
2190
- err = convert_fuse_file_lock(fc, &outarg.lk, fl);
2489
+ err = convert_fuse_file_lock(fm->fc, &outarg.lk, fl);
21912490
21922491 return err;
21932492 }
....@@ -2195,12 +2494,12 @@
21952494 static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
21962495 {
21972496 struct inode *inode = file_inode(file);
2198
- struct fuse_conn *fc = get_fuse_conn(inode);
2497
+ struct fuse_mount *fm = get_fuse_mount(inode);
21992498 FUSE_ARGS(args);
22002499 struct fuse_lk_in inarg;
22012500 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
22022501 struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL;
2203
- pid_t pid_nr = pid_nr_ns(pid, fc->pid_ns);
2502
+ pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns);
22042503 int err;
22052504
22062505 if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
....@@ -2213,7 +2512,7 @@
22132512 return 0;
22142513
22152514 fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
2216
- err = fuse_simple_request(fc, &args);
2515
+ err = fuse_simple_request(fm, &args);
22172516
22182517 /* locking is restartable */
22192518 if (err == -EINTR)
....@@ -2267,29 +2566,29 @@
22672566 static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
22682567 {
22692568 struct inode *inode = mapping->host;
2270
- struct fuse_conn *fc = get_fuse_conn(inode);
2569
+ struct fuse_mount *fm = get_fuse_mount(inode);
22712570 FUSE_ARGS(args);
22722571 struct fuse_bmap_in inarg;
22732572 struct fuse_bmap_out outarg;
22742573 int err;
22752574
2276
- if (!inode->i_sb->s_bdev || fc->no_bmap)
2575
+ if (!inode->i_sb->s_bdev || fm->fc->no_bmap)
22772576 return 0;
22782577
22792578 memset(&inarg, 0, sizeof(inarg));
22802579 inarg.block = block;
22812580 inarg.blocksize = inode->i_sb->s_blocksize;
2282
- args.in.h.opcode = FUSE_BMAP;
2283
- args.in.h.nodeid = get_node_id(inode);
2284
- args.in.numargs = 1;
2285
- args.in.args[0].size = sizeof(inarg);
2286
- args.in.args[0].value = &inarg;
2287
- args.out.numargs = 1;
2288
- args.out.args[0].size = sizeof(outarg);
2289
- args.out.args[0].value = &outarg;
2290
- err = fuse_simple_request(fc, &args);
2581
+ args.opcode = FUSE_BMAP;
2582
+ args.nodeid = get_node_id(inode);
2583
+ args.in_numargs = 1;
2584
+ args.in_args[0].size = sizeof(inarg);
2585
+ args.in_args[0].value = &inarg;
2586
+ args.out_numargs = 1;
2587
+ args.out_args[0].size = sizeof(outarg);
2588
+ args.out_args[0].value = &outarg;
2589
+ err = fuse_simple_request(fm, &args);
22912590 if (err == -ENOSYS)
2292
- fc->no_bmap = 1;
2591
+ fm->fc->no_bmap = 1;
22932592
22942593 return err ? 0 : outarg.block;
22952594 }
....@@ -2297,7 +2596,7 @@
22972596 static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
22982597 {
22992598 struct inode *inode = file->f_mapping->host;
2300
- struct fuse_conn *fc = get_fuse_conn(inode);
2599
+ struct fuse_mount *fm = get_fuse_mount(inode);
23012600 struct fuse_file *ff = file->private_data;
23022601 FUSE_ARGS(args);
23032602 struct fuse_lseek_in inarg = {
....@@ -2308,21 +2607,21 @@
23082607 struct fuse_lseek_out outarg;
23092608 int err;
23102609
2311
- if (fc->no_lseek)
2610
+ if (fm->fc->no_lseek)
23122611 goto fallback;
23132612
2314
- args.in.h.opcode = FUSE_LSEEK;
2315
- args.in.h.nodeid = ff->nodeid;
2316
- args.in.numargs = 1;
2317
- args.in.args[0].size = sizeof(inarg);
2318
- args.in.args[0].value = &inarg;
2319
- args.out.numargs = 1;
2320
- args.out.args[0].size = sizeof(outarg);
2321
- args.out.args[0].value = &outarg;
2322
- err = fuse_simple_request(fc, &args);
2613
+ args.opcode = FUSE_LSEEK;
2614
+ args.nodeid = ff->nodeid;
2615
+ args.in_numargs = 1;
2616
+ args.in_args[0].size = sizeof(inarg);
2617
+ args.in_args[0].value = &inarg;
2618
+ args.out_numargs = 1;
2619
+ args.out_args[0].size = sizeof(outarg);
2620
+ args.out_args[0].value = &outarg;
2621
+ err = fuse_simple_request(fm, &args);
23232622 if (err) {
23242623 if (err == -ENOSYS) {
2325
- fc->no_lseek = 1;
2624
+ fm->fc->no_lseek = 1;
23262625 goto fallback;
23272626 }
23282627 return err;
....@@ -2408,10 +2707,11 @@
24082707 }
24092708
24102709 /* Make sure iov_length() won't overflow */
2411
-static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
2710
+static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov,
2711
+ size_t count)
24122712 {
24132713 size_t n;
2414
- u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
2714
+ u32 max = fc->max_pages << PAGE_SHIFT;
24152715
24162716 for (n = 0; n < count; n++, iov++) {
24172717 if (iov->iov_len > (size_t) max)
....@@ -2507,7 +2807,7 @@
25072807 unsigned int flags)
25082808 {
25092809 struct fuse_file *ff = file->private_data;
2510
- struct fuse_conn *fc = ff->fc;
2810
+ struct fuse_mount *fm = ff->fm;
25112811 struct fuse_ioctl_in inarg = {
25122812 .fh = ff->fh,
25132813 .cmd = cmd,
....@@ -2515,30 +2815,37 @@
25152815 .flags = flags
25162816 };
25172817 struct fuse_ioctl_out outarg;
2518
- struct fuse_req *req = NULL;
2519
- struct page **pages = NULL;
25202818 struct iovec *iov_page = NULL;
25212819 struct iovec *in_iov = NULL, *out_iov = NULL;
2522
- unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
2523
- size_t in_size, out_size, transferred, c;
2820
+ unsigned int in_iovs = 0, out_iovs = 0, max_pages;
2821
+ size_t in_size, out_size, c;
2822
+ ssize_t transferred;
25242823 int err, i;
25252824 struct iov_iter ii;
2825
+ struct fuse_args_pages ap = {};
25262826
25272827 #if BITS_PER_LONG == 32
25282828 inarg.flags |= FUSE_IOCTL_32BIT;
25292829 #else
2530
- if (flags & FUSE_IOCTL_COMPAT)
2830
+ if (flags & FUSE_IOCTL_COMPAT) {
25312831 inarg.flags |= FUSE_IOCTL_32BIT;
2832
+#ifdef CONFIG_X86_X32
2833
+ if (in_x32_syscall())
2834
+ inarg.flags |= FUSE_IOCTL_COMPAT_X32;
2835
+#endif
2836
+ }
25322837 #endif
25332838
25342839 /* assume all the iovs returned by client always fits in a page */
25352840 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
25362841
25372842 err = -ENOMEM;
2538
- pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL);
2843
+ ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
25392844 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
2540
- if (!pages || !iov_page)
2845
+ if (!ap.pages || !iov_page)
25412846 goto out;
2847
+
2848
+ fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages);
25422849
25432850 /*
25442851 * If restricted, initialize IO parameters as encoded in @cmd.
....@@ -2583,58 +2890,46 @@
25832890
25842891 /* make sure there are enough buffer pages and init request with them */
25852892 err = -ENOMEM;
2586
- if (max_pages > FUSE_MAX_PAGES_PER_REQ)
2893
+ if (max_pages > fm->fc->max_pages)
25872894 goto out;
2588
- while (num_pages < max_pages) {
2589
- pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
2590
- if (!pages[num_pages])
2895
+ while (ap.num_pages < max_pages) {
2896
+ ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
2897
+ if (!ap.pages[ap.num_pages])
25912898 goto out;
2592
- num_pages++;
2899
+ ap.num_pages++;
25932900 }
25942901
2595
- req = fuse_get_req(fc, num_pages);
2596
- if (IS_ERR(req)) {
2597
- err = PTR_ERR(req);
2598
- req = NULL;
2599
- goto out;
2600
- }
2601
- memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
2602
- req->num_pages = num_pages;
2603
- fuse_page_descs_length_init(req, 0, req->num_pages);
26042902
26052903 /* okay, let's send it to the client */
2606
- req->in.h.opcode = FUSE_IOCTL;
2607
- req->in.h.nodeid = ff->nodeid;
2608
- req->in.numargs = 1;
2609
- req->in.args[0].size = sizeof(inarg);
2610
- req->in.args[0].value = &inarg;
2904
+ ap.args.opcode = FUSE_IOCTL;
2905
+ ap.args.nodeid = ff->nodeid;
2906
+ ap.args.in_numargs = 1;
2907
+ ap.args.in_args[0].size = sizeof(inarg);
2908
+ ap.args.in_args[0].value = &inarg;
26112909 if (in_size) {
2612
- req->in.numargs++;
2613
- req->in.args[1].size = in_size;
2614
- req->in.argpages = 1;
2910
+ ap.args.in_numargs++;
2911
+ ap.args.in_args[1].size = in_size;
2912
+ ap.args.in_pages = true;
26152913
26162914 err = -EFAULT;
26172915 iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size);
2618
- for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
2619
- c = copy_page_from_iter(pages[i], 0, PAGE_SIZE, &ii);
2916
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
2917
+ c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
26202918 if (c != PAGE_SIZE && iov_iter_count(&ii))
26212919 goto out;
26222920 }
26232921 }
26242922
2625
- req->out.numargs = 2;
2626
- req->out.args[0].size = sizeof(outarg);
2627
- req->out.args[0].value = &outarg;
2628
- req->out.args[1].size = out_size;
2629
- req->out.argpages = 1;
2630
- req->out.argvar = 1;
2923
+ ap.args.out_numargs = 2;
2924
+ ap.args.out_args[0].size = sizeof(outarg);
2925
+ ap.args.out_args[0].value = &outarg;
2926
+ ap.args.out_args[1].size = out_size;
2927
+ ap.args.out_pages = true;
2928
+ ap.args.out_argvar = true;
26312929
2632
- fuse_request_send(fc, req);
2633
- err = req->out.h.error;
2634
- transferred = req->out.args[1].size;
2635
- fuse_put_request(fc, req);
2636
- req = NULL;
2637
- if (err)
2930
+ transferred = fuse_simple_request(fm, &ap.args);
2931
+ err = transferred;
2932
+ if (transferred < 0)
26382933 goto out;
26392934
26402935 /* did it ask for retry? */
....@@ -2659,8 +2954,8 @@
26592954 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
26602955 goto out;
26612956
2662
- vaddr = kmap_atomic(pages[0]);
2663
- err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
2957
+ vaddr = kmap_atomic(ap.pages[0]);
2958
+ err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr,
26642959 transferred, in_iovs + out_iovs,
26652960 (flags & FUSE_IOCTL_COMPAT) != 0);
26662961 kunmap_atomic(vaddr);
....@@ -2670,11 +2965,11 @@
26702965 in_iov = iov_page;
26712966 out_iov = in_iov + in_iovs;
26722967
2673
- err = fuse_verify_ioctl_iov(in_iov, in_iovs);
2968
+ err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs);
26742969 if (err)
26752970 goto out;
26762971
2677
- err = fuse_verify_ioctl_iov(out_iov, out_iovs);
2972
+ err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs);
26782973 if (err)
26792974 goto out;
26802975
....@@ -2687,19 +2982,17 @@
26872982
26882983 err = -EFAULT;
26892984 iov_iter_init(&ii, READ, out_iov, out_iovs, transferred);
2690
- for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
2691
- c = copy_page_to_iter(pages[i], 0, PAGE_SIZE, &ii);
2985
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
2986
+ c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
26922987 if (c != PAGE_SIZE && iov_iter_count(&ii))
26932988 goto out;
26942989 }
26952990 err = 0;
26962991 out:
2697
- if (req)
2698
- fuse_put_request(fc, req);
26992992 free_page((unsigned long) iov_page);
2700
- while (num_pages)
2701
- __free_page(pages[--num_pages]);
2702
- kfree(pages);
2993
+ while (ap.num_pages)
2994
+ __free_page(ap.pages[--ap.num_pages]);
2995
+ kfree(ap.pages);
27032996
27042997 return err ? err : outarg.result;
27052998 }
....@@ -2773,7 +3066,7 @@
27733066 {
27743067 spin_lock(&fc->lock);
27753068 if (RB_EMPTY_NODE(&ff->polled_node)) {
2776
- struct rb_node **link, *uninitialized_var(parent);
3069
+ struct rb_node **link, *parent;
27773070
27783071 link = fuse_find_polled_node(fc, ff->kh, &parent);
27793072 BUG_ON(*link);
....@@ -2786,13 +3079,13 @@
27863079 __poll_t fuse_file_poll(struct file *file, poll_table *wait)
27873080 {
27883081 struct fuse_file *ff = file->private_data;
2789
- struct fuse_conn *fc = ff->fc;
3082
+ struct fuse_mount *fm = ff->fm;
27903083 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
27913084 struct fuse_poll_out outarg;
27923085 FUSE_ARGS(args);
27933086 int err;
27943087
2795
- if (fc->no_poll)
3088
+ if (fm->fc->no_poll)
27963089 return DEFAULT_POLLMASK;
27973090
27983091 poll_wait(file, &ff->poll_wait, wait);
....@@ -2804,23 +3097,23 @@
28043097 */
28053098 if (waitqueue_active(&ff->poll_wait)) {
28063099 inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY;
2807
- fuse_register_polled_file(fc, ff);
3100
+ fuse_register_polled_file(fm->fc, ff);
28083101 }
28093102
2810
- args.in.h.opcode = FUSE_POLL;
2811
- args.in.h.nodeid = ff->nodeid;
2812
- args.in.numargs = 1;
2813
- args.in.args[0].size = sizeof(inarg);
2814
- args.in.args[0].value = &inarg;
2815
- args.out.numargs = 1;
2816
- args.out.args[0].size = sizeof(outarg);
2817
- args.out.args[0].value = &outarg;
2818
- err = fuse_simple_request(fc, &args);
3103
+ args.opcode = FUSE_POLL;
3104
+ args.nodeid = ff->nodeid;
3105
+ args.in_numargs = 1;
3106
+ args.in_args[0].size = sizeof(inarg);
3107
+ args.in_args[0].value = &inarg;
3108
+ args.out_numargs = 1;
3109
+ args.out_args[0].size = sizeof(outarg);
3110
+ args.out_args[0].value = &outarg;
3111
+ err = fuse_simple_request(fm, &args);
28193112
28203113 if (!err)
28213114 return demangle_poll(outarg.revents);
28223115 if (err == -ENOSYS) {
2823
- fc->no_poll = 1;
3116
+ fm->fc->no_poll = 1;
28243117 return DEFAULT_POLLMASK;
28253118 }
28263119 return EPOLLERR;
....@@ -2865,9 +3158,9 @@
28653158 fuse_do_setattr(file_dentry(file), &attr, file);
28663159 }
28673160
2868
-static inline loff_t fuse_round_up(loff_t off)
3161
+static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
28693162 {
2870
- return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
3163
+ return round_up(off, fc->max_pages << PAGE_SHIFT);
28713164 }
28723165
28733166 static ssize_t
....@@ -2877,11 +3170,10 @@
28773170 ssize_t ret = 0;
28783171 struct file *file = iocb->ki_filp;
28793172 struct fuse_file *ff = file->private_data;
2880
- bool async_dio = ff->fc->async_dio;
28813173 loff_t pos = 0;
28823174 struct inode *inode;
28833175 loff_t i_size;
2884
- size_t count = iov_iter_count(iter);
3176
+ size_t count = iov_iter_count(iter), shortened = 0;
28853177 loff_t offset = iocb->ki_pos;
28863178 struct fuse_io_priv *io;
28873179
....@@ -2889,16 +3181,8 @@
28893181 inode = file->f_mapping->host;
28903182 i_size = i_size_read(inode);
28913183
2892
- if ((iov_iter_rw(iter) == READ) && (offset > i_size))
3184
+ if ((iov_iter_rw(iter) == READ) && (offset >= i_size))
28933185 return 0;
2894
-
2895
- /* optimization for short read */
2896
- if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
2897
- if (offset >= i_size)
2898
- return 0;
2899
- iov_iter_truncate(iter, fuse_round_up(i_size - offset));
2900
- count = iov_iter_count(iter);
2901
- }
29023186
29033187 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
29043188 if (!io)
....@@ -2915,15 +3199,22 @@
29153199 * By default, we want to optimize all I/Os with async request
29163200 * submission to the client filesystem if supported.
29173201 */
2918
- io->async = async_dio;
3202
+ io->async = ff->fm->fc->async_dio;
29193203 io->iocb = iocb;
29203204 io->blocking = is_sync_kiocb(iocb);
3205
+
3206
+ /* optimization for short read */
3207
+ if (io->async && !io->write && offset + count > i_size) {
3208
+ iov_iter_truncate(iter, fuse_round_up(ff->fm->fc, i_size - offset));
3209
+ shortened = count - iov_iter_count(iter);
3210
+ count -= shortened;
3211
+ }
29213212
29223213 /*
29233214 * We cannot asynchronously extend the size of a file.
29243215 * In such case the aio will behave exactly like sync io.
29253216 */
2926
- if ((offset + count > i_size) && iov_iter_rw(iter) == WRITE)
3217
+ if ((offset + count > i_size) && io->write)
29273218 io->blocking = true;
29283219
29293220 if (io->async && io->blocking) {
....@@ -2941,6 +3232,7 @@
29413232 } else {
29423233 ret = __fuse_direct_read(io, iter, &pos);
29433234 }
3235
+ iov_iter_reexpand(iter, iov_iter_count(iter) + shortened);
29443236
29453237 if (io->async) {
29463238 bool blocking = io->blocking;
....@@ -2967,13 +3259,23 @@
29673259 return ret;
29683260 }
29693261
3262
+static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
3263
+{
3264
+ int err = filemap_write_and_wait_range(inode->i_mapping, start, LLONG_MAX);
3265
+
3266
+ if (!err)
3267
+ fuse_sync_writes(inode);
3268
+
3269
+ return err;
3270
+}
3271
+
29703272 static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
29713273 loff_t length)
29723274 {
29733275 struct fuse_file *ff = file->private_data;
29743276 struct inode *inode = file_inode(file);
29753277 struct fuse_inode *fi = get_fuse_inode(inode);
2976
- struct fuse_conn *fc = ff->fc;
3278
+ struct fuse_mount *fm = ff->fm;
29773279 FUSE_ARGS(args);
29783280 struct fuse_fallocate_in inarg = {
29793281 .fh = ff->fh,
....@@ -2982,26 +3284,30 @@
29823284 .mode = mode
29833285 };
29843286 int err;
2985
- bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
2986
- (mode & FALLOC_FL_PUNCH_HOLE);
3287
+ bool block_faults = FUSE_IS_DAX(inode) &&
3288
+ (!(mode & FALLOC_FL_KEEP_SIZE) ||
3289
+ (mode & FALLOC_FL_PUNCH_HOLE));
29873290
29883291 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
29893292 return -EOPNOTSUPP;
29903293
2991
- if (fc->no_fallocate)
3294
+ if (fm->fc->no_fallocate)
29923295 return -EOPNOTSUPP;
29933296
2994
- if (lock_inode) {
2995
- inode_lock(inode);
2996
- if (mode & FALLOC_FL_PUNCH_HOLE) {
2997
- loff_t endbyte = offset + length - 1;
2998
- err = filemap_write_and_wait_range(inode->i_mapping,
2999
- offset, endbyte);
3000
- if (err)
3001
- goto out;
3297
+ inode_lock(inode);
3298
+ if (block_faults) {
3299
+ down_write(&fi->i_mmap_sem);
3300
+ err = fuse_dax_break_layouts(inode, 0, 0);
3301
+ if (err)
3302
+ goto out;
3303
+ }
30023304
3003
- fuse_sync_writes(inode);
3004
- }
3305
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
3306
+ loff_t endbyte = offset + length - 1;
3307
+
3308
+ err = fuse_writeback_range(inode, offset, endbyte);
3309
+ if (err)
3310
+ goto out;
30053311 }
30063312
30073313 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
....@@ -3011,17 +3317,21 @@
30113317 goto out;
30123318 }
30133319
3320
+ err = file_modified(file);
3321
+ if (err)
3322
+ goto out;
3323
+
30143324 if (!(mode & FALLOC_FL_KEEP_SIZE))
30153325 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
30163326
3017
- args.in.h.opcode = FUSE_FALLOCATE;
3018
- args.in.h.nodeid = ff->nodeid;
3019
- args.in.numargs = 1;
3020
- args.in.args[0].size = sizeof(inarg);
3021
- args.in.args[0].value = &inarg;
3022
- err = fuse_simple_request(fc, &args);
3327
+ args.opcode = FUSE_FALLOCATE;
3328
+ args.nodeid = ff->nodeid;
3329
+ args.in_numargs = 1;
3330
+ args.in_args[0].size = sizeof(inarg);
3331
+ args.in_args[0].value = &inarg;
3332
+ err = fuse_simple_request(fm, &args);
30233333 if (err == -ENOSYS) {
3024
- fc->no_fallocate = 1;
3334
+ fm->fc->no_fallocate = 1;
30253335 err = -EOPNOTSUPP;
30263336 }
30273337 if (err)
....@@ -3031,7 +3341,7 @@
30313341 if (!(mode & FALLOC_FL_KEEP_SIZE)) {
30323342 bool changed = fuse_write_update_size(inode, offset + length);
30333343
3034
- if (changed && fc->writeback_cache)
3344
+ if (changed && fm->fc->writeback_cache)
30353345 file_update_time(file);
30363346 }
30373347
....@@ -3044,10 +3354,140 @@
30443354 if (!(mode & FALLOC_FL_KEEP_SIZE))
30453355 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
30463356
3047
- if (lock_inode)
3048
- inode_unlock(inode);
3357
+ if (block_faults)
3358
+ up_write(&fi->i_mmap_sem);
3359
+
3360
+ inode_unlock(inode);
3361
+
3362
+ fuse_flush_time_update(inode);
30493363
30503364 return err;
3365
+}
3366
+
3367
+static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
3368
+ struct file *file_out, loff_t pos_out,
3369
+ size_t len, unsigned int flags)
3370
+{
3371
+ struct fuse_file *ff_in = file_in->private_data;
3372
+ struct fuse_file *ff_out = file_out->private_data;
3373
+ struct inode *inode_in = file_inode(file_in);
3374
+ struct inode *inode_out = file_inode(file_out);
3375
+ struct fuse_inode *fi_out = get_fuse_inode(inode_out);
3376
+ struct fuse_mount *fm = ff_in->fm;
3377
+ struct fuse_conn *fc = fm->fc;
3378
+ FUSE_ARGS(args);
3379
+ struct fuse_copy_file_range_in inarg = {
3380
+ .fh_in = ff_in->fh,
3381
+ .off_in = pos_in,
3382
+ .nodeid_out = ff_out->nodeid,
3383
+ .fh_out = ff_out->fh,
3384
+ .off_out = pos_out,
3385
+ .len = len,
3386
+ .flags = flags
3387
+ };
3388
+ struct fuse_write_out outarg;
3389
+ ssize_t err;
3390
+ /* mark unstable when write-back is not used, and file_out gets
3391
+ * extended */
3392
+ bool is_unstable = (!fc->writeback_cache) &&
3393
+ ((pos_out + len) > inode_out->i_size);
3394
+
3395
+ if (fc->no_copy_file_range)
3396
+ return -EOPNOTSUPP;
3397
+
3398
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
3399
+ return -EXDEV;
3400
+
3401
+ inode_lock(inode_in);
3402
+ err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1);
3403
+ inode_unlock(inode_in);
3404
+ if (err)
3405
+ return err;
3406
+
3407
+ inode_lock(inode_out);
3408
+
3409
+ err = file_modified(file_out);
3410
+ if (err)
3411
+ goto out;
3412
+
3413
+ /*
3414
+ * Write out dirty pages in the destination file before sending the COPY
3415
+ * request to userspace. After the request is completed, truncate off
3416
+ * pages (including partial ones) from the cache that have been copied,
3417
+ * since these contain stale data at that point.
3418
+ *
3419
+ * This should be mostly correct, but if the COPY writes to partial
3420
+ * pages (at the start or end) and the parts not covered by the COPY are
3421
+ * written through a memory map after calling fuse_writeback_range(),
3422
+ * then these partial page modifications will be lost on truncation.
3423
+ *
3424
+ * It is unlikely that someone would rely on such mixed style
3425
+ * modifications. Yet this does give less guarantees than if the
3426
+ * copying was performed with write(2).
3427
+ *
3428
+ * To fix this a i_mmap_sem style lock could be used to prevent new
3429
+ * faults while the copy is ongoing.
3430
+ */
3431
+ err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1);
3432
+ if (err)
3433
+ goto out;
3434
+
3435
+ if (is_unstable)
3436
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
3437
+
3438
+ args.opcode = FUSE_COPY_FILE_RANGE;
3439
+ args.nodeid = ff_in->nodeid;
3440
+ args.in_numargs = 1;
3441
+ args.in_args[0].size = sizeof(inarg);
3442
+ args.in_args[0].value = &inarg;
3443
+ args.out_numargs = 1;
3444
+ args.out_args[0].size = sizeof(outarg);
3445
+ args.out_args[0].value = &outarg;
3446
+ err = fuse_simple_request(fm, &args);
3447
+ if (err == -ENOSYS) {
3448
+ fc->no_copy_file_range = 1;
3449
+ err = -EOPNOTSUPP;
3450
+ }
3451
+ if (err)
3452
+ goto out;
3453
+
3454
+ truncate_inode_pages_range(inode_out->i_mapping,
3455
+ ALIGN_DOWN(pos_out, PAGE_SIZE),
3456
+ ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);
3457
+
3458
+ if (fc->writeback_cache) {
3459
+ fuse_write_update_size(inode_out, pos_out + outarg.size);
3460
+ file_update_time(file_out);
3461
+ }
3462
+
3463
+ fuse_invalidate_attr(inode_out);
3464
+
3465
+ err = outarg.size;
3466
+out:
3467
+ if (is_unstable)
3468
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
3469
+
3470
+ inode_unlock(inode_out);
3471
+ file_accessed(file_in);
3472
+
3473
+ fuse_flush_time_update(inode_out);
3474
+
3475
+ return err;
3476
+}
3477
+
3478
+static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
3479
+ struct file *dst_file, loff_t dst_off,
3480
+ size_t len, unsigned int flags)
3481
+{
3482
+ ssize_t ret;
3483
+
3484
+ ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off,
3485
+ len, flags);
3486
+
3487
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
3488
+ ret = generic_copy_file_range(src_file, src_off, dst_file,
3489
+ dst_off, len, flags);
3490
+ return ret;
30513491 }
30523492
30533493 static const struct file_operations fuse_file_operations = {
....@@ -3060,38 +3500,23 @@
30603500 .release = fuse_release,
30613501 .fsync = fuse_fsync,
30623502 .lock = fuse_file_lock,
3503
+ .get_unmapped_area = thp_get_unmapped_area,
30633504 .flock = fuse_file_flock,
30643505 .splice_read = generic_file_splice_read,
3506
+ .splice_write = iter_file_splice_write,
30653507 .unlocked_ioctl = fuse_file_ioctl,
30663508 .compat_ioctl = fuse_file_compat_ioctl,
30673509 .poll = fuse_file_poll,
30683510 .fallocate = fuse_file_fallocate,
3069
-};
3070
-
3071
-static const struct file_operations fuse_direct_io_file_operations = {
3072
- .llseek = fuse_file_llseek,
3073
- .read_iter = fuse_direct_read_iter,
3074
- .write_iter = fuse_direct_write_iter,
3075
- .mmap = fuse_direct_mmap,
3076
- .open = fuse_open,
3077
- .flush = fuse_flush,
3078
- .release = fuse_release,
3079
- .fsync = fuse_fsync,
3080
- .lock = fuse_file_lock,
3081
- .flock = fuse_file_flock,
3082
- .unlocked_ioctl = fuse_file_ioctl,
3083
- .compat_ioctl = fuse_file_compat_ioctl,
3084
- .poll = fuse_file_poll,
3085
- .fallocate = fuse_file_fallocate,
3086
- /* no splice_read */
3511
+ .copy_file_range = fuse_copy_file_range,
30873512 };
30883513
30893514 static const struct address_space_operations fuse_file_aops = {
30903515 .readpage = fuse_readpage,
3516
+ .readahead = fuse_readahead,
30913517 .writepage = fuse_writepage,
30923518 .writepages = fuse_writepages,
30933519 .launder_page = fuse_launder_page,
3094
- .readpages = fuse_readpages,
30953520 .set_page_dirty = __set_page_dirty_nobuffers,
30963521 .bmap = fuse_bmap,
30973522 .direct_IO = fuse_direct_IO,
....@@ -3101,6 +3526,17 @@
31013526
31023527 void fuse_init_file_inode(struct inode *inode)
31033528 {
3529
+ struct fuse_inode *fi = get_fuse_inode(inode);
3530
+
31043531 inode->i_fop = &fuse_file_operations;
31053532 inode->i_data.a_ops = &fuse_file_aops;
3533
+
3534
+ INIT_LIST_HEAD(&fi->write_files);
3535
+ INIT_LIST_HEAD(&fi->queued_writes);
3536
+ fi->writectr = 0;
3537
+ init_waitqueue_head(&fi->page_waitq);
3538
+ fi->writepages = RB_ROOT;
3539
+
3540
+ if (IS_ENABLED(CONFIG_FUSE_DAX))
3541
+ fuse_dax_inode_init(inode);
31063542 }