hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/fuse/file.c
....@@ -20,9 +20,19 @@
2020 #include <linux/uio.h>
2121 #include <linux/fs.h>
2222
23
-static const struct file_operations fuse_direct_io_file_operations;
23
+static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
24
+ struct fuse_page_desc **desc)
25
+{
26
+ struct page **pages;
2427
25
-static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
28
+ pages = kzalloc(npages * (sizeof(struct page *) +
29
+ sizeof(struct fuse_page_desc)), flags);
30
+ *desc = (void *) (pages + npages);
31
+
32
+ return pages;
33
+}
34
+
35
+static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
2636 int opcode, struct fuse_open_out *outargp)
2737 {
2838 struct fuse_open_in inarg;
....@@ -30,50 +40,57 @@
3040
3141 memset(&inarg, 0, sizeof(inarg));
3242 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
33
- if (!fc->atomic_o_trunc)
43
+ if (!fm->fc->atomic_o_trunc)
3444 inarg.flags &= ~O_TRUNC;
35
- args.in.h.opcode = opcode;
36
- args.in.h.nodeid = nodeid;
37
- args.in.numargs = 1;
38
- args.in.args[0].size = sizeof(inarg);
39
- args.in.args[0].value = &inarg;
40
- args.out.numargs = 1;
41
- args.out.args[0].size = sizeof(*outargp);
42
- args.out.args[0].value = outargp;
45
+ args.opcode = opcode;
46
+ args.nodeid = nodeid;
47
+ args.in_numargs = 1;
48
+ args.in_args[0].size = sizeof(inarg);
49
+ args.in_args[0].value = &inarg;
50
+ args.out_numargs = 1;
51
+ args.out_args[0].size = sizeof(*outargp);
52
+ args.out_args[0].value = outargp;
4353
44
- return fuse_simple_request(fc, &args);
54
+ return fuse_simple_request(fm, &args);
4555 }
4656
47
-struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
57
+struct fuse_release_args {
58
+ struct fuse_args args;
59
+ struct fuse_release_in inarg;
60
+ struct inode *inode;
61
+};
62
+
63
+struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
4864 {
4965 struct fuse_file *ff;
5066
51
- ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL);
67
+ ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL_ACCOUNT);
5268 if (unlikely(!ff))
5369 return NULL;
5470
55
- ff->fc = fc;
56
- ff->reserved_req = fuse_request_alloc(0);
57
- if (unlikely(!ff->reserved_req)) {
71
+ ff->fm = fm;
72
+ ff->release_args = kzalloc(sizeof(*ff->release_args),
73
+ GFP_KERNEL_ACCOUNT);
74
+ if (!ff->release_args) {
5875 kfree(ff);
5976 return NULL;
6077 }
6178
6279 INIT_LIST_HEAD(&ff->write_entry);
80
+ mutex_init(&ff->readdir.lock);
6381 refcount_set(&ff->count, 1);
6482 RB_CLEAR_NODE(&ff->polled_node);
6583 init_waitqueue_head(&ff->poll_wait);
6684
67
- spin_lock(&fc->lock);
68
- ff->kh = ++fc->khctr;
69
- spin_unlock(&fc->lock);
85
+ ff->kh = atomic64_inc_return(&fm->fc->khctr);
7086
7187 return ff;
7288 }
7389
7490 void fuse_file_free(struct fuse_file *ff)
7591 {
76
- fuse_request_free(ff->reserved_req);
92
+ kfree(ff->release_args);
93
+ mutex_destroy(&ff->readdir.lock);
7794 kfree(ff);
7895 }
7996
....@@ -83,65 +100,67 @@
83100 return ff;
84101 }
85102
86
-static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
103
+static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
104
+ int error)
87105 {
88
- iput(req->misc.release.inode);
106
+ struct fuse_release_args *ra = container_of(args, typeof(*ra), args);
107
+
108
+ iput(ra->inode);
109
+ kfree(ra);
89110 }
90111
91112 static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
92113 {
93114 if (refcount_dec_and_test(&ff->count)) {
94
- struct fuse_req *req = ff->reserved_req;
115
+ struct fuse_args *args = &ff->release_args->args;
95116
96
- if (ff->fc->no_open && !isdir) {
97
- /*
98
- * Drop the release request when client does not
99
- * implement 'open'
100
- */
101
- __clear_bit(FR_BACKGROUND, &req->flags);
102
- iput(req->misc.release.inode);
103
- fuse_put_request(ff->fc, req);
117
+ if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) {
118
+ /* Do nothing when client does not implement 'open' */
119
+ fuse_release_end(ff->fm, args, 0);
104120 } else if (sync) {
105
- __set_bit(FR_FORCE, &req->flags);
106
- __clear_bit(FR_BACKGROUND, &req->flags);
107
- fuse_request_send(ff->fc, req);
108
- iput(req->misc.release.inode);
109
- fuse_put_request(ff->fc, req);
121
+ fuse_simple_request(ff->fm, args);
122
+ fuse_release_end(ff->fm, args, 0);
110123 } else {
111
- req->end = fuse_release_end;
112
- __set_bit(FR_BACKGROUND, &req->flags);
113
- fuse_request_send_background(ff->fc, req);
124
+ args->end = fuse_release_end;
125
+ if (fuse_simple_background(ff->fm, args,
126
+ GFP_KERNEL | __GFP_NOFAIL))
127
+ fuse_release_end(ff->fm, args, -ENOTCONN);
114128 }
115129 kfree(ff);
116130 }
117131 }
118132
119
-int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
133
+int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
120134 bool isdir)
121135 {
136
+ struct fuse_conn *fc = fm->fc;
122137 struct fuse_file *ff;
123138 int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
124139
125
- ff = fuse_file_alloc(fc);
140
+ ff = fuse_file_alloc(fm);
126141 if (!ff)
127142 return -ENOMEM;
128143
129144 ff->fh = 0;
130
- ff->open_flags = FOPEN_KEEP_CACHE; /* Default for no-open */
131
- if (!fc->no_open || isdir) {
145
+ /* Default for no-open */
146
+ ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0);
147
+ if (isdir ? !fc->no_opendir : !fc->no_open) {
132148 struct fuse_open_out outarg;
133149 int err;
134150
135
- err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
151
+ err = fuse_send_open(fm, nodeid, file, opcode, &outarg);
136152 if (!err) {
137153 ff->fh = outarg.fh;
138154 ff->open_flags = outarg.open_flags;
139
-
140
- } else if (err != -ENOSYS || isdir) {
155
+ fuse_passthrough_setup(fc, ff, &outarg);
156
+ } else if (err != -ENOSYS) {
141157 fuse_file_free(ff);
142158 return err;
143159 } else {
144
- fc->no_open = 1;
160
+ if (isdir)
161
+ fc->no_opendir = 1;
162
+ else
163
+ fc->no_open = 1;
145164 }
146165 }
147166
....@@ -158,17 +177,16 @@
158177 static void fuse_link_write_file(struct file *file)
159178 {
160179 struct inode *inode = file_inode(file);
161
- struct fuse_conn *fc = get_fuse_conn(inode);
162180 struct fuse_inode *fi = get_fuse_inode(inode);
163181 struct fuse_file *ff = file->private_data;
164182 /*
165183 * file may be written through mmap, so chain it onto the
166184 * inodes's write_file list
167185 */
168
- spin_lock(&fc->lock);
186
+ spin_lock(&fi->lock);
169187 if (list_empty(&ff->write_entry))
170188 list_add(&ff->write_entry, &fi->write_files);
171
- spin_unlock(&fc->lock);
189
+ spin_unlock(&fi->lock);
172190 }
173191
174192 void fuse_finish_open(struct inode *inode, struct file *file)
....@@ -176,8 +194,6 @@
176194 struct fuse_file *ff = file->private_data;
177195 struct fuse_conn *fc = get_fuse_conn(inode);
178196
179
- if (ff->open_flags & FOPEN_DIRECT_IO)
180
- file->f_op = &fuse_direct_io_file_operations;
181197 if (ff->open_flags & FOPEN_STREAM)
182198 stream_open(inode, file);
183199 else if (ff->open_flags & FOPEN_NONSEEKABLE)
....@@ -186,29 +202,28 @@
186202 if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
187203 struct fuse_inode *fi = get_fuse_inode(inode);
188204
189
- spin_lock(&fc->lock);
190
- fi->attr_version = ++fc->attr_version;
205
+ spin_lock(&fi->lock);
206
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
191207 i_size_write(inode, 0);
192
- spin_unlock(&fc->lock);
193
- truncate_pagecache(inode, 0);
208
+ spin_unlock(&fi->lock);
194209 fuse_invalidate_attr(inode);
195210 if (fc->writeback_cache)
196211 file_update_time(file);
197
- } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
198
- invalidate_inode_pages2(inode->i_mapping);
199212 }
200
-
201213 if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
202214 fuse_link_write_file(file);
203215 }
204216
205217 int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
206218 {
207
- struct fuse_conn *fc = get_fuse_conn(inode);
219
+ struct fuse_mount *fm = get_fuse_mount(inode);
220
+ struct fuse_conn *fc = fm->fc;
208221 int err;
209222 bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
210223 fc->atomic_o_trunc &&
211224 fc->writeback_cache;
225
+ bool dax_truncate = (file->f_flags & O_TRUNC) &&
226
+ fc->atomic_o_trunc && FUSE_IS_DAX(inode);
212227
213228 if (fuse_is_bad(inode))
214229 return -EIO;
....@@ -217,63 +232,91 @@
217232 if (err)
218233 return err;
219234
220
- if (is_wb_truncate) {
235
+ if (is_wb_truncate || dax_truncate)
221236 inode_lock(inode);
222
- fuse_set_nowrite(inode);
237
+
238
+ if (dax_truncate) {
239
+ down_write(&get_fuse_inode(inode)->i_mmap_sem);
240
+ err = fuse_dax_break_layouts(inode, 0, 0);
241
+ if (err)
242
+ goto out_inode_unlock;
223243 }
224244
225
- err = fuse_do_open(fc, get_node_id(inode), file, isdir);
245
+ if (is_wb_truncate || dax_truncate)
246
+ fuse_set_nowrite(inode);
226247
248
+ err = fuse_do_open(fm, get_node_id(inode), file, isdir);
227249 if (!err)
228250 fuse_finish_open(inode, file);
229251
230
- if (is_wb_truncate) {
252
+ if (is_wb_truncate || dax_truncate)
231253 fuse_release_nowrite(inode);
232
- inode_unlock(inode);
254
+ if (!err) {
255
+ struct fuse_file *ff = file->private_data;
256
+
257
+ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
258
+ truncate_pagecache(inode, 0);
259
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
260
+ invalidate_inode_pages2(inode->i_mapping);
233261 }
262
+ if (dax_truncate)
263
+ up_write(&get_fuse_inode(inode)->i_mmap_sem);
264
+
265
+out_inode_unlock:
266
+ if (is_wb_truncate || dax_truncate)
267
+ inode_unlock(inode);
234268
235269 return err;
236270 }
237271
238
-static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
272
+static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
273
+ int flags, int opcode)
239274 {
240
- struct fuse_conn *fc = ff->fc;
241
- struct fuse_req *req = ff->reserved_req;
242
- struct fuse_release_in *inarg = &req->misc.release.in;
275
+ struct fuse_conn *fc = ff->fm->fc;
276
+ struct fuse_release_args *ra = ff->release_args;
243277
278
+ /* Inode is NULL on error path of fuse_create_open() */
279
+ if (likely(fi)) {
280
+ spin_lock(&fi->lock);
281
+ list_del(&ff->write_entry);
282
+ spin_unlock(&fi->lock);
283
+ }
244284 spin_lock(&fc->lock);
245
- list_del(&ff->write_entry);
246285 if (!RB_EMPTY_NODE(&ff->polled_node))
247286 rb_erase(&ff->polled_node, &fc->polled_files);
248287 spin_unlock(&fc->lock);
249288
250289 wake_up_interruptible_all(&ff->poll_wait);
251290
252
- inarg->fh = ff->fh;
253
- inarg->flags = flags;
254
- req->in.h.opcode = opcode;
255
- req->in.h.nodeid = ff->nodeid;
256
- req->in.numargs = 1;
257
- req->in.args[0].size = sizeof(struct fuse_release_in);
258
- req->in.args[0].value = inarg;
291
+ ra->inarg.fh = ff->fh;
292
+ ra->inarg.flags = flags;
293
+ ra->args.in_numargs = 1;
294
+ ra->args.in_args[0].size = sizeof(struct fuse_release_in);
295
+ ra->args.in_args[0].value = &ra->inarg;
296
+ ra->args.opcode = opcode;
297
+ ra->args.nodeid = ff->nodeid;
298
+ ra->args.force = true;
299
+ ra->args.nocreds = true;
259300 }
260301
261302 void fuse_release_common(struct file *file, bool isdir)
262303 {
304
+ struct fuse_inode *fi = get_fuse_inode(file_inode(file));
263305 struct fuse_file *ff = file->private_data;
264
- struct fuse_req *req = ff->reserved_req;
306
+ struct fuse_release_args *ra = ff->release_args;
265307 int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
266308
267
- fuse_prepare_release(ff, file->f_flags, opcode);
309
+ fuse_passthrough_release(&ff->passthrough);
310
+
311
+ fuse_prepare_release(fi, ff, file->f_flags, opcode);
268312
269313 if (ff->flock) {
270
- struct fuse_release_in *inarg = &req->misc.release.in;
271
- inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
272
- inarg->lock_owner = fuse_lock_owner_id(ff->fc,
273
- (fl_owner_t) file);
314
+ ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
315
+ ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc,
316
+ (fl_owner_t) file);
274317 }
275318 /* Hold inode until release is finished */
276
- req->misc.release.inode = igrab(file_inode(file));
319
+ ra->inode = igrab(file_inode(file));
277320
278321 /*
279322 * Normally this will send the RELEASE request, however if
....@@ -284,7 +327,7 @@
284327 * synchronous RELEASE is allowed (and desirable) in this case
285328 * because the server can be trusted not to screw up.
286329 */
287
- fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir);
330
+ fuse_file_put(ff, ff->fm->fc->destroy, isdir);
288331 }
289332
290333 static int fuse_open(struct inode *inode, struct file *file)
....@@ -306,10 +349,10 @@
306349 return 0;
307350 }
308351
309
-void fuse_sync_release(struct fuse_file *ff, int flags)
352
+void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, int flags)
310353 {
311354 WARN_ON(refcount_read(&ff->count) > 1);
312
- fuse_prepare_release(ff, flags, FUSE_RELEASE);
355
+ fuse_prepare_release(fi, ff, flags, FUSE_RELEASE);
313356 /*
314357 * iput(NULL) is a no-op and since the refcount is 1 and everything's
315358 * synchronous, we are fine with not doing igrab() here"
....@@ -340,6 +383,38 @@
340383 return (u64) v0 + ((u64) v1 << 32);
341384 }
342385
386
+struct fuse_writepage_args {
387
+ struct fuse_io_args ia;
388
+ struct rb_node writepages_entry;
389
+ struct list_head queue_entry;
390
+ struct fuse_writepage_args *next;
391
+ struct inode *inode;
392
+};
393
+
394
+static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
395
+ pgoff_t idx_from, pgoff_t idx_to)
396
+{
397
+ struct rb_node *n;
398
+
399
+ n = fi->writepages.rb_node;
400
+
401
+ while (n) {
402
+ struct fuse_writepage_args *wpa;
403
+ pgoff_t curr_index;
404
+
405
+ wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
406
+ WARN_ON(get_fuse_inode(wpa->inode) != fi);
407
+ curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
408
+ if (idx_from >= curr_index + wpa->ia.ap.num_pages)
409
+ n = n->rb_right;
410
+ else if (idx_to < curr_index)
411
+ n = n->rb_left;
412
+ else
413
+ return wpa;
414
+ }
415
+ return NULL;
416
+}
417
+
343418 /*
344419 * Check if any page in a range is under writeback
345420 *
....@@ -349,24 +424,12 @@
349424 static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
350425 pgoff_t idx_to)
351426 {
352
- struct fuse_conn *fc = get_fuse_conn(inode);
353427 struct fuse_inode *fi = get_fuse_inode(inode);
354
- struct fuse_req *req;
355
- bool found = false;
428
+ bool found;
356429
357
- spin_lock(&fc->lock);
358
- list_for_each_entry(req, &fi->writepages, writepages_entry) {
359
- pgoff_t curr_index;
360
-
361
- BUG_ON(req->inode != inode);
362
- curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
363
- if (idx_from < curr_index + req->num_pages &&
364
- curr_index <= idx_to) {
365
- found = true;
366
- break;
367
- }
368
- }
369
- spin_unlock(&fc->lock);
430
+ spin_lock(&fi->lock);
431
+ found = fuse_find_writeback(fi, idx_from, idx_to);
432
+ spin_unlock(&fi->lock);
370433
371434 return found;
372435 }
....@@ -382,12 +445,11 @@
382445 * Since fuse doesn't rely on the VM writeback tracking, this has to
383446 * use some other means.
384447 */
385
-static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
448
+static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
386449 {
387450 struct fuse_inode *fi = get_fuse_inode(inode);
388451
389452 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
390
- return 0;
391453 }
392454
393455 /*
....@@ -408,17 +470,14 @@
408470 static int fuse_flush(struct file *file, fl_owner_t id)
409471 {
410472 struct inode *inode = file_inode(file);
411
- struct fuse_conn *fc = get_fuse_conn(inode);
473
+ struct fuse_mount *fm = get_fuse_mount(inode);
412474 struct fuse_file *ff = file->private_data;
413
- struct fuse_req *req;
414475 struct fuse_flush_in inarg;
476
+ FUSE_ARGS(args);
415477 int err;
416478
417479 if (fuse_is_bad(inode))
418480 return -EIO;
419
-
420
- if (fc->no_flush)
421
- return 0;
422481
423482 err = write_inode_now(inode, 1);
424483 if (err)
....@@ -432,34 +491,61 @@
432491 if (err)
433492 return err;
434493
435
- req = fuse_get_req_nofail_nopages(fc, file);
494
+ err = 0;
495
+ if (fm->fc->no_flush)
496
+ goto inval_attr_out;
497
+
436498 memset(&inarg, 0, sizeof(inarg));
437499 inarg.fh = ff->fh;
438
- inarg.lock_owner = fuse_lock_owner_id(fc, id);
439
- req->in.h.opcode = FUSE_FLUSH;
440
- req->in.h.nodeid = get_node_id(inode);
441
- req->in.numargs = 1;
442
- req->in.args[0].size = sizeof(inarg);
443
- req->in.args[0].value = &inarg;
444
- __set_bit(FR_FORCE, &req->flags);
445
- fuse_request_send(fc, req);
446
- err = req->out.h.error;
447
- fuse_put_request(fc, req);
500
+ inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
501
+ args.opcode = FUSE_FLUSH;
502
+ args.nodeid = get_node_id(inode);
503
+ args.in_numargs = 1;
504
+ args.in_args[0].size = sizeof(inarg);
505
+ args.in_args[0].value = &inarg;
506
+ args.force = true;
507
+
508
+ err = fuse_simple_request(fm, &args);
448509 if (err == -ENOSYS) {
449
- fc->no_flush = 1;
510
+ fm->fc->no_flush = 1;
450511 err = 0;
451512 }
513
+
514
+inval_attr_out:
515
+ /*
516
+ * In memory i_blocks is not maintained by fuse, if writeback cache is
517
+ * enabled, i_blocks from cached attr may not be accurate.
518
+ */
519
+ if (!err && fm->fc->writeback_cache)
520
+ fuse_invalidate_attr(inode);
452521 return err;
453522 }
454523
455524 int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
456
- int datasync, int isdir)
525
+ int datasync, int opcode)
457526 {
458527 struct inode *inode = file->f_mapping->host;
459
- struct fuse_conn *fc = get_fuse_conn(inode);
528
+ struct fuse_mount *fm = get_fuse_mount(inode);
460529 struct fuse_file *ff = file->private_data;
461530 FUSE_ARGS(args);
462531 struct fuse_fsync_in inarg;
532
+
533
+ memset(&inarg, 0, sizeof(inarg));
534
+ inarg.fh = ff->fh;
535
+ inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0;
536
+ args.opcode = opcode;
537
+ args.nodeid = get_node_id(inode);
538
+ args.in_numargs = 1;
539
+ args.in_args[0].size = sizeof(inarg);
540
+ args.in_args[0].value = &inarg;
541
+ return fuse_simple_request(fm, &args);
542
+}
543
+
544
+static int fuse_fsync(struct file *file, loff_t start, loff_t end,
545
+ int datasync)
546
+{
547
+ struct inode *inode = file->f_mapping->host;
548
+ struct fuse_conn *fc = get_fuse_conn(inode);
463549 int err;
464550
465551 if (fuse_is_bad(inode))
....@@ -491,65 +577,49 @@
491577 if (err)
492578 goto out;
493579
494
- if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
580
+ if (fc->no_fsync)
495581 goto out;
496582
497
- memset(&inarg, 0, sizeof(inarg));
498
- inarg.fh = ff->fh;
499
- inarg.fsync_flags = datasync ? 1 : 0;
500
- args.in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
501
- args.in.h.nodeid = get_node_id(inode);
502
- args.in.numargs = 1;
503
- args.in.args[0].size = sizeof(inarg);
504
- args.in.args[0].value = &inarg;
505
- err = fuse_simple_request(fc, &args);
583
+ err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNC);
506584 if (err == -ENOSYS) {
507
- if (isdir)
508
- fc->no_fsyncdir = 1;
509
- else
510
- fc->no_fsync = 1;
585
+ fc->no_fsync = 1;
511586 err = 0;
512587 }
513588 out:
514589 inode_unlock(inode);
590
+
515591 return err;
516592 }
517593
518
-static int fuse_fsync(struct file *file, loff_t start, loff_t end,
519
- int datasync)
594
+void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
595
+ size_t count, int opcode)
520596 {
521
- return fuse_fsync_common(file, start, end, datasync, 0);
522
-}
523
-
524
-void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
525
- size_t count, int opcode)
526
-{
527
- struct fuse_read_in *inarg = &req->misc.read.in;
528597 struct fuse_file *ff = file->private_data;
598
+ struct fuse_args *args = &ia->ap.args;
529599
530
- inarg->fh = ff->fh;
531
- inarg->offset = pos;
532
- inarg->size = count;
533
- inarg->flags = file->f_flags;
534
- req->in.h.opcode = opcode;
535
- req->in.h.nodeid = ff->nodeid;
536
- req->in.numargs = 1;
537
- req->in.args[0].size = sizeof(struct fuse_read_in);
538
- req->in.args[0].value = inarg;
539
- req->out.argvar = 1;
540
- req->out.numargs = 1;
541
- req->out.args[0].size = count;
600
+ ia->read.in.fh = ff->fh;
601
+ ia->read.in.offset = pos;
602
+ ia->read.in.size = count;
603
+ ia->read.in.flags = file->f_flags;
604
+ args->opcode = opcode;
605
+ args->nodeid = ff->nodeid;
606
+ args->in_numargs = 1;
607
+ args->in_args[0].size = sizeof(ia->read.in);
608
+ args->in_args[0].value = &ia->read.in;
609
+ args->out_argvar = true;
610
+ args->out_numargs = 1;
611
+ args->out_args[0].size = count;
542612 }
543613
544
-static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty)
614
+static void fuse_release_user_pages(struct fuse_args_pages *ap,
615
+ bool should_dirty)
545616 {
546
- unsigned i;
617
+ unsigned int i;
547618
548
- for (i = 0; i < req->num_pages; i++) {
549
- struct page *page = req->pages[i];
619
+ for (i = 0; i < ap->num_pages; i++) {
550620 if (should_dirty)
551
- set_page_dirty_lock(page);
552
- put_page(page);
621
+ set_page_dirty_lock(ap->pages[i]);
622
+ put_page(ap->pages[i]);
553623 }
554624 }
555625
....@@ -608,9 +678,9 @@
608678 struct fuse_conn *fc = get_fuse_conn(inode);
609679 struct fuse_inode *fi = get_fuse_inode(inode);
610680
611
- spin_lock(&fc->lock);
612
- fi->attr_version = ++fc->attr_version;
613
- spin_unlock(&fc->lock);
681
+ spin_lock(&fi->lock);
682
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
683
+ spin_unlock(&fi->lock);
614684 }
615685
616686 io->iocb->ki_complete(io->iocb, res, 0);
....@@ -619,64 +689,97 @@
619689 kref_put(&io->refcnt, fuse_io_release);
620690 }
621691
622
-static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
692
+static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
693
+ unsigned int npages)
623694 {
624
- struct fuse_io_priv *io = req->io;
625
- ssize_t pos = -1;
695
+ struct fuse_io_args *ia;
626696
627
- fuse_release_user_pages(req, io->should_dirty);
628
-
629
- if (io->write) {
630
- if (req->misc.write.in.size != req->misc.write.out.size)
631
- pos = req->misc.write.in.offset - io->offset +
632
- req->misc.write.out.size;
633
- } else {
634
- if (req->misc.read.in.size != req->out.args[0].size)
635
- pos = req->misc.read.in.offset - io->offset +
636
- req->out.args[0].size;
697
+ ia = kzalloc(sizeof(*ia), GFP_KERNEL);
698
+ if (ia) {
699
+ ia->io = io;
700
+ ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL,
701
+ &ia->ap.descs);
702
+ if (!ia->ap.pages) {
703
+ kfree(ia);
704
+ ia = NULL;
705
+ }
637706 }
638
-
639
- fuse_aio_complete(io, req->out.h.error, pos);
707
+ return ia;
640708 }
641709
642
-static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
643
- size_t num_bytes, struct fuse_io_priv *io)
710
+static void fuse_io_free(struct fuse_io_args *ia)
644711 {
712
+ kfree(ia->ap.pages);
713
+ kfree(ia);
714
+}
715
+
716
+static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args,
717
+ int err)
718
+{
719
+ struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
720
+ struct fuse_io_priv *io = ia->io;
721
+ ssize_t pos = -1;
722
+
723
+ fuse_release_user_pages(&ia->ap, io->should_dirty);
724
+
725
+ if (err) {
726
+ /* Nothing */
727
+ } else if (io->write) {
728
+ if (ia->write.out.size > ia->write.in.size) {
729
+ err = -EIO;
730
+ } else if (ia->write.in.size != ia->write.out.size) {
731
+ pos = ia->write.in.offset - io->offset +
732
+ ia->write.out.size;
733
+ }
734
+ } else {
735
+ u32 outsize = args->out_args[0].size;
736
+
737
+ if (ia->read.in.size != outsize)
738
+ pos = ia->read.in.offset - io->offset + outsize;
739
+ }
740
+
741
+ fuse_aio_complete(io, err, pos);
742
+ fuse_io_free(ia);
743
+}
744
+
745
+static ssize_t fuse_async_req_send(struct fuse_mount *fm,
746
+ struct fuse_io_args *ia, size_t num_bytes)
747
+{
748
+ ssize_t err;
749
+ struct fuse_io_priv *io = ia->io;
750
+
645751 spin_lock(&io->lock);
646752 kref_get(&io->refcnt);
647753 io->size += num_bytes;
648754 io->reqs++;
649755 spin_unlock(&io->lock);
650756
651
- req->io = io;
652
- req->end = fuse_aio_complete_req;
653
-
654
- __fuse_get_request(req);
655
- fuse_request_send_background(fc, req);
757
+ ia->ap.args.end = fuse_aio_complete_req;
758
+ ia->ap.args.may_block = io->should_dirty;
759
+ err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL);
760
+ if (err)
761
+ fuse_aio_complete_req(fm, &ia->ap.args, err);
656762
657763 return num_bytes;
658764 }
659765
660
-static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io,
661
- loff_t pos, size_t count, fl_owner_t owner)
766
+static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count,
767
+ fl_owner_t owner)
662768 {
663
- struct file *file = io->iocb->ki_filp;
769
+ struct file *file = ia->io->iocb->ki_filp;
664770 struct fuse_file *ff = file->private_data;
665
- struct fuse_conn *fc = ff->fc;
771
+ struct fuse_mount *fm = ff->fm;
666772
667
- fuse_read_fill(req, file, pos, count, FUSE_READ);
773
+ fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
668774 if (owner != NULL) {
669
- struct fuse_read_in *inarg = &req->misc.read.in;
670
-
671
- inarg->read_flags |= FUSE_READ_LOCKOWNER;
672
- inarg->lock_owner = fuse_lock_owner_id(fc, owner);
775
+ ia->read.in.read_flags |= FUSE_READ_LOCKOWNER;
776
+ ia->read.in.lock_owner = fuse_lock_owner_id(fm->fc, owner);
673777 }
674778
675
- if (io->async)
676
- return fuse_async_req_send(fc, req, count, io);
779
+ if (ia->io->async)
780
+ return fuse_async_req_send(fm, ia, count);
677781
678
- fuse_request_send(fc, req);
679
- return req->out.args[0].size;
782
+ return fuse_simple_request(fm, &ia->ap.args);
680783 }
681784
682785 static void fuse_read_update_size(struct inode *inode, loff_t size,
....@@ -685,19 +788,18 @@
685788 struct fuse_conn *fc = get_fuse_conn(inode);
686789 struct fuse_inode *fi = get_fuse_inode(inode);
687790
688
- spin_lock(&fc->lock);
689
- if (attr_ver == fi->attr_version && size < inode->i_size &&
791
+ spin_lock(&fi->lock);
792
+ if (attr_ver >= fi->attr_version && size < inode->i_size &&
690793 !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
691
- fi->attr_version = ++fc->attr_version;
794
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
692795 i_size_write(inode, size);
693796 }
694
- spin_unlock(&fc->lock);
797
+ spin_unlock(&fi->lock);
695798 }
696799
697
-static void fuse_short_read(struct fuse_req *req, struct inode *inode,
698
- u64 attr_ver)
800
+static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
801
+ struct fuse_args_pages *ap)
699802 {
700
- size_t num_read = req->out.args[0].size;
701803 struct fuse_conn *fc = get_fuse_conn(inode);
702804
703805 if (fc->writeback_cache) {
....@@ -710,28 +812,31 @@
710812 int start_idx = num_read >> PAGE_SHIFT;
711813 size_t off = num_read & (PAGE_SIZE - 1);
712814
713
- for (i = start_idx; i < req->num_pages; i++) {
714
- zero_user_segment(req->pages[i], off, PAGE_SIZE);
815
+ for (i = start_idx; i < ap->num_pages; i++) {
816
+ zero_user_segment(ap->pages[i], off, PAGE_SIZE);
715817 off = 0;
716818 }
717819 } else {
718
- loff_t pos = page_offset(req->pages[0]) + num_read;
820
+ loff_t pos = page_offset(ap->pages[0]) + num_read;
719821 fuse_read_update_size(inode, pos, attr_ver);
720822 }
721823 }
722824
723825 static int fuse_do_readpage(struct file *file, struct page *page)
724826 {
725
- struct kiocb iocb;
726
- struct fuse_io_priv io;
727827 struct inode *inode = page->mapping->host;
728
- struct fuse_conn *fc = get_fuse_conn(inode);
729
- struct fuse_req *req;
730
- size_t num_read;
828
+ struct fuse_mount *fm = get_fuse_mount(inode);
731829 loff_t pos = page_offset(page);
732
- size_t count = PAGE_SIZE;
830
+ struct fuse_page_desc desc = { .length = PAGE_SIZE };
831
+ struct fuse_io_args ia = {
832
+ .ap.args.page_zeroing = true,
833
+ .ap.args.out_pages = true,
834
+ .ap.num_pages = 1,
835
+ .ap.pages = &page,
836
+ .ap.descs = &desc,
837
+ };
838
+ ssize_t res;
733839 u64 attr_ver;
734
- int err;
735840
736841 /*
737842 * Page writeback can extend beyond the lifetime of the
....@@ -740,35 +845,25 @@
740845 */
741846 fuse_wait_on_page_writeback(inode, page->index);
742847
743
- req = fuse_get_req(fc, 1);
744
- if (IS_ERR(req))
745
- return PTR_ERR(req);
848
+ attr_ver = fuse_get_attr_version(fm->fc);
746849
747
- attr_ver = fuse_get_attr_version(fc);
850
+ /* Don't overflow end offset */
851
+ if (pos + (desc.length - 1) == LLONG_MAX)
852
+ desc.length--;
748853
749
- req->out.page_zeroing = 1;
750
- req->out.argpages = 1;
751
- req->num_pages = 1;
752
- req->pages[0] = page;
753
- req->page_descs[0].length = count;
754
- init_sync_kiocb(&iocb, file);
755
- io = (struct fuse_io_priv) FUSE_IO_PRIV_SYNC(&iocb);
756
- num_read = fuse_send_read(req, &io, pos, count, NULL);
757
- err = req->out.h.error;
854
+ fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ);
855
+ res = fuse_simple_request(fm, &ia.ap.args);
856
+ if (res < 0)
857
+ return res;
858
+ /*
859
+ * Short read means EOF. If file size is larger, truncate it
860
+ */
861
+ if (res < desc.length)
862
+ fuse_short_read(inode, attr_ver, res, &ia.ap);
758863
759
- if (!err) {
760
- /*
761
- * Short read means EOF. If file size is larger, truncate it
762
- */
763
- if (num_read < count)
764
- fuse_short_read(req, inode, attr_ver);
864
+ SetPageUptodate(page);
765865
766
- SetPageUptodate(page);
767
- }
768
-
769
- fuse_put_request(fc, req);
770
-
771
- return err;
866
+ return 0;
772867 }
773868
774869 static int fuse_readpage(struct file *file, struct page *page)
....@@ -787,15 +882,18 @@
787882 return err;
788883 }
789884
790
-static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
885
+static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
886
+ int err)
791887 {
792888 int i;
793
- size_t count = req->misc.read.in.size;
794
- size_t num_read = req->out.args[0].size;
889
+ struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
890
+ struct fuse_args_pages *ap = &ia->ap;
891
+ size_t count = ia->read.in.size;
892
+ size_t num_read = args->out_args[0].size;
795893 struct address_space *mapping = NULL;
796894
797
- for (i = 0; mapping == NULL && i < req->num_pages; i++)
798
- mapping = req->pages[i]->mapping;
895
+ for (i = 0; mapping == NULL && i < ap->num_pages; i++)
896
+ mapping = ap->pages[i]->mapping;
799897
800898 if (mapping) {
801899 struct inode *inode = mapping->host;
....@@ -803,139 +901,104 @@
803901 /*
804902 * Short read means EOF. If file size is larger, truncate it
805903 */
806
- if (!req->out.h.error && num_read < count)
807
- fuse_short_read(req, inode, req->misc.read.attr_ver);
904
+ if (!err && num_read < count)
905
+ fuse_short_read(inode, ia->read.attr_ver, num_read, ap);
808906
809907 fuse_invalidate_atime(inode);
810908 }
811909
812
- for (i = 0; i < req->num_pages; i++) {
813
- struct page *page = req->pages[i];
814
- if (!req->out.h.error)
910
+ for (i = 0; i < ap->num_pages; i++) {
911
+ struct page *page = ap->pages[i];
912
+
913
+ if (!err)
815914 SetPageUptodate(page);
816915 else
817916 SetPageError(page);
818917 unlock_page(page);
819918 put_page(page);
820919 }
821
- if (req->ff)
822
- fuse_file_put(req->ff, false, false);
920
+ if (ia->ff)
921
+ fuse_file_put(ia->ff, false, false);
922
+
923
+ fuse_io_free(ia);
823924 }
824925
825
-static void fuse_send_readpages(struct fuse_req *req, struct file *file)
926
+static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
826927 {
827928 struct fuse_file *ff = file->private_data;
828
- struct fuse_conn *fc = ff->fc;
829
- loff_t pos = page_offset(req->pages[0]);
830
- size_t count = req->num_pages << PAGE_SHIFT;
831
-
832
- req->out.argpages = 1;
833
- req->out.page_zeroing = 1;
834
- req->out.page_replace = 1;
835
- fuse_read_fill(req, file, pos, count, FUSE_READ);
836
- req->misc.read.attr_ver = fuse_get_attr_version(fc);
837
- if (fc->async_read) {
838
- req->ff = fuse_file_get(ff);
839
- req->end = fuse_readpages_end;
840
- fuse_request_send_background(fc, req);
841
- } else {
842
- fuse_request_send(fc, req);
843
- fuse_readpages_end(fc, req);
844
- fuse_put_request(fc, req);
845
- }
846
-}
847
-
848
-struct fuse_fill_data {
849
- struct fuse_req *req;
850
- struct file *file;
851
- struct inode *inode;
852
- unsigned nr_pages;
853
-};
854
-
855
-static int fuse_readpages_fill(void *_data, struct page *page)
856
-{
857
- struct fuse_fill_data *data = _data;
858
- struct fuse_req *req = data->req;
859
- struct inode *inode = data->inode;
860
- struct fuse_conn *fc = get_fuse_conn(inode);
861
-
862
- fuse_wait_on_page_writeback(inode, page->index);
863
-
864
- if (req->num_pages &&
865
- (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
866
- (req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
867
- req->pages[req->num_pages - 1]->index + 1 != page->index)) {
868
- int nr_alloc = min_t(unsigned, data->nr_pages,
869
- FUSE_MAX_PAGES_PER_REQ);
870
- fuse_send_readpages(req, data->file);
871
- if (fc->async_read)
872
- req = fuse_get_req_for_background(fc, nr_alloc);
873
- else
874
- req = fuse_get_req(fc, nr_alloc);
875
-
876
- data->req = req;
877
- if (IS_ERR(req)) {
878
- unlock_page(page);
879
- return PTR_ERR(req);
880
- }
881
- }
882
-
883
- if (WARN_ON(req->num_pages >= req->max_pages)) {
884
- unlock_page(page);
885
- fuse_put_request(fc, req);
886
- return -EIO;
887
- }
888
-
889
- get_page(page);
890
- req->pages[req->num_pages] = page;
891
- req->page_descs[req->num_pages].length = PAGE_SIZE;
892
- req->num_pages++;
893
- data->nr_pages--;
894
- return 0;
895
-}
896
-
897
-static int fuse_readpages(struct file *file, struct address_space *mapping,
898
- struct list_head *pages, unsigned nr_pages)
899
-{
900
- struct inode *inode = mapping->host;
901
- struct fuse_conn *fc = get_fuse_conn(inode);
902
- struct fuse_fill_data data;
929
+ struct fuse_mount *fm = ff->fm;
930
+ struct fuse_args_pages *ap = &ia->ap;
931
+ loff_t pos = page_offset(ap->pages[0]);
932
+ size_t count = ap->num_pages << PAGE_SHIFT;
933
+ ssize_t res;
903934 int err;
904
- int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
905935
906
- err = -EIO;
907
- if (fuse_is_bad(inode))
908
- goto out;
936
+ ap->args.out_pages = true;
937
+ ap->args.page_zeroing = true;
938
+ ap->args.page_replace = true;
909939
910
- data.file = file;
911
- data.inode = inode;
912
- if (fc->async_read)
913
- data.req = fuse_get_req_for_background(fc, nr_alloc);
914
- else
915
- data.req = fuse_get_req(fc, nr_alloc);
916
- data.nr_pages = nr_pages;
917
- err = PTR_ERR(data.req);
918
- if (IS_ERR(data.req))
919
- goto out;
920
-
921
- err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
922
- if (!err) {
923
- if (data.req->num_pages)
924
- fuse_send_readpages(data.req, file);
925
- else
926
- fuse_put_request(fc, data.req);
940
+ /* Don't overflow end offset */
941
+ if (pos + (count - 1) == LLONG_MAX) {
942
+ count--;
943
+ ap->descs[ap->num_pages - 1].length--;
927944 }
928
-out:
929
- return err;
945
+ WARN_ON((loff_t) (pos + count) < 0);
946
+
947
+ fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
948
+ ia->read.attr_ver = fuse_get_attr_version(fm->fc);
949
+ if (fm->fc->async_read) {
950
+ ia->ff = fuse_file_get(ff);
951
+ ap->args.end = fuse_readpages_end;
952
+ err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
953
+ if (!err)
954
+ return;
955
+ } else {
956
+ res = fuse_simple_request(fm, &ap->args);
957
+ err = res < 0 ? res : 0;
958
+ }
959
+ fuse_readpages_end(fm, &ap->args, err);
930960 }
931961
932
-static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
962
+static void fuse_readahead(struct readahead_control *rac)
963
+{
964
+ struct inode *inode = rac->mapping->host;
965
+ struct fuse_conn *fc = get_fuse_conn(inode);
966
+ unsigned int i, max_pages, nr_pages = 0;
967
+
968
+ if (fuse_is_bad(inode))
969
+ return;
970
+
971
+ max_pages = min_t(unsigned int, fc->max_pages,
972
+ fc->max_read / PAGE_SIZE);
973
+
974
+ for (;;) {
975
+ struct fuse_io_args *ia;
976
+ struct fuse_args_pages *ap;
977
+
978
+ nr_pages = readahead_count(rac) - nr_pages;
979
+ if (nr_pages > max_pages)
980
+ nr_pages = max_pages;
981
+ if (nr_pages == 0)
982
+ break;
983
+ ia = fuse_io_alloc(NULL, nr_pages);
984
+ if (!ia)
985
+ return;
986
+ ap = &ia->ap;
987
+ nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
988
+ for (i = 0; i < nr_pages; i++) {
989
+ fuse_wait_on_page_writeback(inode,
990
+ readahead_index(rac) + i);
991
+ ap->descs[i].length = PAGE_SIZE;
992
+ }
993
+ ap->num_pages = nr_pages;
994
+ fuse_send_readpages(ia, rac->file);
995
+ }
996
+}
997
+
998
+static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
933999 {
9341000 struct inode *inode = iocb->ki_filp->f_mapping->host;
9351001 struct fuse_conn *fc = get_fuse_conn(inode);
936
-
937
- if (fuse_is_bad(inode))
938
- return -EIO;
9391002
9401003 /*
9411004 * In auto invalidate mode, always update attributes on read.
....@@ -953,54 +1016,65 @@
9531016 return generic_file_read_iter(iocb, to);
9541017 }
9551018
956
-static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
957
- loff_t pos, size_t count)
1019
+static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
1020
+ loff_t pos, size_t count)
9581021 {
959
- struct fuse_write_in *inarg = &req->misc.write.in;
960
- struct fuse_write_out *outarg = &req->misc.write.out;
1022
+ struct fuse_args *args = &ia->ap.args;
9611023
962
- inarg->fh = ff->fh;
963
- inarg->offset = pos;
964
- inarg->size = count;
965
- req->in.h.opcode = FUSE_WRITE;
966
- req->in.h.nodeid = ff->nodeid;
967
- req->in.numargs = 2;
968
- if (ff->fc->minor < 9)
969
- req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
1024
+ ia->write.in.fh = ff->fh;
1025
+ ia->write.in.offset = pos;
1026
+ ia->write.in.size = count;
1027
+ args->opcode = FUSE_WRITE;
1028
+ args->nodeid = ff->nodeid;
1029
+ args->in_numargs = 2;
1030
+ if (ff->fm->fc->minor < 9)
1031
+ args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
9701032 else
971
- req->in.args[0].size = sizeof(struct fuse_write_in);
972
- req->in.args[0].value = inarg;
973
- req->in.args[1].size = count;
974
- req->out.numargs = 1;
975
- req->out.args[0].size = sizeof(struct fuse_write_out);
976
- req->out.args[0].value = outarg;
1033
+ args->in_args[0].size = sizeof(ia->write.in);
1034
+ args->in_args[0].value = &ia->write.in;
1035
+ args->in_args[1].size = count;
1036
+ args->out_numargs = 1;
1037
+ args->out_args[0].size = sizeof(ia->write.out);
1038
+ args->out_args[0].value = &ia->write.out;
9771039 }
9781040
979
-static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
980
- loff_t pos, size_t count, fl_owner_t owner)
1041
+static unsigned int fuse_write_flags(struct kiocb *iocb)
9811042 {
982
- struct kiocb *iocb = io->iocb;
1043
+ unsigned int flags = iocb->ki_filp->f_flags;
1044
+
1045
+ if (iocb->ki_flags & IOCB_DSYNC)
1046
+ flags |= O_DSYNC;
1047
+ if (iocb->ki_flags & IOCB_SYNC)
1048
+ flags |= O_SYNC;
1049
+
1050
+ return flags;
1051
+}
1052
+
1053
+static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
1054
+ size_t count, fl_owner_t owner)
1055
+{
1056
+ struct kiocb *iocb = ia->io->iocb;
9831057 struct file *file = iocb->ki_filp;
9841058 struct fuse_file *ff = file->private_data;
985
- struct fuse_conn *fc = ff->fc;
986
- struct fuse_write_in *inarg = &req->misc.write.in;
1059
+ struct fuse_mount *fm = ff->fm;
1060
+ struct fuse_write_in *inarg = &ia->write.in;
1061
+ ssize_t err;
9871062
988
- fuse_write_fill(req, ff, pos, count);
989
- inarg->flags = file->f_flags;
990
- if (iocb->ki_flags & IOCB_DSYNC)
991
- inarg->flags |= O_DSYNC;
992
- if (iocb->ki_flags & IOCB_SYNC)
993
- inarg->flags |= O_SYNC;
1063
+ fuse_write_args_fill(ia, ff, pos, count);
1064
+ inarg->flags = fuse_write_flags(iocb);
9941065 if (owner != NULL) {
9951066 inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
996
- inarg->lock_owner = fuse_lock_owner_id(fc, owner);
1067
+ inarg->lock_owner = fuse_lock_owner_id(fm->fc, owner);
9971068 }
9981069
999
- if (io->async)
1000
- return fuse_async_req_send(fc, req, count, io);
1070
+ if (ia->io->async)
1071
+ return fuse_async_req_send(fm, ia, count);
10011072
1002
- fuse_request_send(fc, req);
1003
- return req->misc.write.out.size;
1073
+ err = fuse_simple_request(fm, &ia->ap.args);
1074
+ if (!err && ia->write.out.size > count)
1075
+ err = -EIO;
1076
+
1077
+ return err ?: ia->write.out.size;
10041078 }
10051079
10061080 bool fuse_write_update_size(struct inode *inode, loff_t pos)
....@@ -1009,63 +1083,78 @@
10091083 struct fuse_inode *fi = get_fuse_inode(inode);
10101084 bool ret = false;
10111085
1012
- spin_lock(&fc->lock);
1013
- fi->attr_version = ++fc->attr_version;
1086
+ spin_lock(&fi->lock);
1087
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
10141088 if (pos > inode->i_size) {
10151089 i_size_write(inode, pos);
10161090 ret = true;
10171091 }
1018
- spin_unlock(&fc->lock);
1092
+ spin_unlock(&fi->lock);
10191093
10201094 return ret;
10211095 }
10221096
1023
-static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb,
1024
- struct inode *inode, loff_t pos,
1025
- size_t count)
1097
+static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
1098
+ struct kiocb *iocb, struct inode *inode,
1099
+ loff_t pos, size_t count)
10261100 {
1027
- size_t res;
1028
- unsigned offset;
1029
- unsigned i;
1030
- struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
1101
+ struct fuse_args_pages *ap = &ia->ap;
1102
+ struct file *file = iocb->ki_filp;
1103
+ struct fuse_file *ff = file->private_data;
1104
+ struct fuse_mount *fm = ff->fm;
1105
+ unsigned int offset, i;
1106
+ bool short_write;
1107
+ int err;
10311108
1032
- for (i = 0; i < req->num_pages; i++)
1033
- fuse_wait_on_page_writeback(inode, req->pages[i]->index);
1109
+ for (i = 0; i < ap->num_pages; i++)
1110
+ fuse_wait_on_page_writeback(inode, ap->pages[i]->index);
10341111
1035
- res = fuse_send_write(req, &io, pos, count, NULL);
1112
+ fuse_write_args_fill(ia, ff, pos, count);
1113
+ ia->write.in.flags = fuse_write_flags(iocb);
10361114
1037
- offset = req->page_descs[0].offset;
1038
- count = res;
1039
- for (i = 0; i < req->num_pages; i++) {
1040
- struct page *page = req->pages[i];
1115
+ err = fuse_simple_request(fm, &ap->args);
1116
+ if (!err && ia->write.out.size > count)
1117
+ err = -EIO;
10411118
1042
- if (!req->out.h.error && !offset && count >= PAGE_SIZE)
1043
- SetPageUptodate(page);
1119
+ short_write = ia->write.out.size < count;
1120
+ offset = ap->descs[0].offset;
1121
+ count = ia->write.out.size;
1122
+ for (i = 0; i < ap->num_pages; i++) {
1123
+ struct page *page = ap->pages[i];
10441124
1045
- if (count > PAGE_SIZE - offset)
1046
- count -= PAGE_SIZE - offset;
1047
- else
1048
- count = 0;
1049
- offset = 0;
1050
-
1051
- unlock_page(page);
1125
+ if (err) {
1126
+ ClearPageUptodate(page);
1127
+ } else {
1128
+ if (count >= PAGE_SIZE - offset)
1129
+ count -= PAGE_SIZE - offset;
1130
+ else {
1131
+ if (short_write)
1132
+ ClearPageUptodate(page);
1133
+ count = 0;
1134
+ }
1135
+ offset = 0;
1136
+ }
1137
+ if (ia->write.page_locked && (i == ap->num_pages - 1))
1138
+ unlock_page(page);
10521139 put_page(page);
10531140 }
10541141
1055
- return res;
1142
+ return err;
10561143 }
10571144
1058
-static ssize_t fuse_fill_write_pages(struct fuse_req *req,
1059
- struct address_space *mapping,
1060
- struct iov_iter *ii, loff_t pos)
1145
+static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
1146
+ struct address_space *mapping,
1147
+ struct iov_iter *ii, loff_t pos,
1148
+ unsigned int max_pages)
10611149 {
1150
+ struct fuse_args_pages *ap = &ia->ap;
10621151 struct fuse_conn *fc = get_fuse_conn(mapping->host);
10631152 unsigned offset = pos & (PAGE_SIZE - 1);
10641153 size_t count = 0;
10651154 int err;
10661155
1067
- req->in.argpages = 1;
1068
- req->page_descs[0].offset = offset;
1156
+ ap->args.in_pages = true;
1157
+ ap->descs[0].offset = offset;
10691158
10701159 do {
10711160 size_t tmp;
....@@ -1101,9 +1190,9 @@
11011190 }
11021191
11031192 err = 0;
1104
- req->pages[req->num_pages] = page;
1105
- req->page_descs[req->num_pages].length = tmp;
1106
- req->num_pages++;
1193
+ ap->pages[ap->num_pages] = page;
1194
+ ap->descs[ap->num_pages].length = tmp;
1195
+ ap->num_pages++;
11071196
11081197 count += tmp;
11091198 pos += tmp;
....@@ -1111,20 +1200,31 @@
11111200 if (offset == PAGE_SIZE)
11121201 offset = 0;
11131202
1203
+ /* If we copied full page, mark it uptodate */
1204
+ if (tmp == PAGE_SIZE)
1205
+ SetPageUptodate(page);
1206
+
1207
+ if (PageUptodate(page)) {
1208
+ unlock_page(page);
1209
+ } else {
1210
+ ia->write.page_locked = true;
1211
+ break;
1212
+ }
11141213 if (!fc->big_writes)
11151214 break;
11161215 } while (iov_iter_count(ii) && count < fc->max_write &&
1117
- req->num_pages < req->max_pages && offset == 0);
1216
+ ap->num_pages < max_pages && offset == 0);
11181217
11191218 return count > 0 ? count : err;
11201219 }
11211220
1122
-static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
1221
+static inline unsigned int fuse_wr_pages(loff_t pos, size_t len,
1222
+ unsigned int max_pages)
11231223 {
1124
- return min_t(unsigned,
1224
+ return min_t(unsigned int,
11251225 ((pos + len - 1) >> PAGE_SHIFT) -
11261226 (pos >> PAGE_SHIFT) + 1,
1127
- FUSE_MAX_PAGES_PER_REQ);
1227
+ max_pages);
11281228 }
11291229
11301230 static ssize_t fuse_perform_write(struct kiocb *iocb,
....@@ -1137,33 +1237,31 @@
11371237 int err = 0;
11381238 ssize_t res = 0;
11391239
1140
- if (fuse_is_bad(inode))
1141
- return -EIO;
1142
-
11431240 if (inode->i_size < pos + iov_iter_count(ii))
11441241 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
11451242
11461243 do {
1147
- struct fuse_req *req;
11481244 ssize_t count;
1149
- unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
1245
+ struct fuse_io_args ia = {};
1246
+ struct fuse_args_pages *ap = &ia.ap;
1247
+ unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
1248
+ fc->max_pages);
11501249
1151
- req = fuse_get_req(fc, nr_pages);
1152
- if (IS_ERR(req)) {
1153
- err = PTR_ERR(req);
1250
+ ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs);
1251
+ if (!ap->pages) {
1252
+ err = -ENOMEM;
11541253 break;
11551254 }
11561255
1157
- count = fuse_fill_write_pages(req, mapping, ii, pos);
1256
+ count = fuse_fill_write_pages(&ia, mapping, ii, pos, nr_pages);
11581257 if (count <= 0) {
11591258 err = count;
11601259 } else {
1161
- size_t num_written;
1162
-
1163
- num_written = fuse_send_write_pages(req, iocb, inode,
1164
- pos, count);
1165
- err = req->out.h.error;
1260
+ err = fuse_send_write_pages(&ia, iocb, inode,
1261
+ pos, count);
11661262 if (!err) {
1263
+ size_t num_written = ia.write.out.size;
1264
+
11671265 res += num_written;
11681266 pos += num_written;
11691267
....@@ -1172,7 +1270,7 @@
11721270 err = -EIO;
11731271 }
11741272 }
1175
- fuse_put_request(fc, req);
1273
+ kfree(ap->pages);
11761274 } while (!err && iov_iter_count(ii));
11771275
11781276 if (res > 0)
....@@ -1184,7 +1282,7 @@
11841282 return res > 0 ? res : err;
11851283 }
11861284
1187
-static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1285
+static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
11881286 {
11891287 struct file *file = iocb->ki_filp;
11901288 struct address_space *mapping = file->f_mapping;
....@@ -1193,9 +1291,6 @@
11931291 struct inode *inode = mapping->host;
11941292 ssize_t err;
11951293 loff_t endbyte = 0;
1196
-
1197
- if (fuse_is_bad(inode))
1198
- return -EIO;
11991294
12001295 if (get_fuse_conn(inode)->writeback_cache) {
12011296 /* Update size (EOF optimization) and mode (SUID clearing) */
....@@ -1263,14 +1358,14 @@
12631358 return written ? written : err;
12641359 }
12651360
1266
-static inline void fuse_page_descs_length_init(struct fuse_req *req,
1267
- unsigned index, unsigned nr_pages)
1361
+static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
1362
+ unsigned int index,
1363
+ unsigned int nr_pages)
12681364 {
12691365 int i;
12701366
12711367 for (i = index; i < index + nr_pages; i++)
1272
- req->page_descs[i].length = PAGE_SIZE -
1273
- req->page_descs[i].offset;
1368
+ descs[i].length = PAGE_SIZE - descs[i].offset;
12741369 }
12751370
12761371 static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
....@@ -1284,33 +1379,34 @@
12841379 return min(iov_iter_single_seg_count(ii), max_size);
12851380 }
12861381
1287
-static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
1288
- size_t *nbytesp, int write)
1382
+static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
1383
+ size_t *nbytesp, int write,
1384
+ unsigned int max_pages)
12891385 {
12901386 size_t nbytes = 0; /* # bytes already packed in req */
12911387 ssize_t ret = 0;
12921388
12931389 /* Special case for kernel I/O: can copy directly into the buffer */
1294
- if (ii->type & ITER_KVEC) {
1390
+ if (iov_iter_is_kvec(ii)) {
12951391 unsigned long user_addr = fuse_get_user_addr(ii);
12961392 size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
12971393
12981394 if (write)
1299
- req->in.args[1].value = (void *) user_addr;
1395
+ ap->args.in_args[1].value = (void *) user_addr;
13001396 else
1301
- req->out.args[0].value = (void *) user_addr;
1397
+ ap->args.out_args[0].value = (void *) user_addr;
13021398
13031399 iov_iter_advance(ii, frag_size);
13041400 *nbytesp = frag_size;
13051401 return 0;
13061402 }
13071403
1308
- while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
1404
+ while (nbytes < *nbytesp && ap->num_pages < max_pages) {
13091405 unsigned npages;
13101406 size_t start;
1311
- ret = iov_iter_get_pages(ii, &req->pages[req->num_pages],
1407
+ ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages],
13121408 *nbytesp - nbytes,
1313
- req->max_pages - req->num_pages,
1409
+ max_pages - ap->num_pages,
13141410 &start);
13151411 if (ret < 0)
13161412 break;
....@@ -1321,27 +1417,23 @@
13211417 ret += start;
13221418 npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
13231419
1324
- req->page_descs[req->num_pages].offset = start;
1325
- fuse_page_descs_length_init(req, req->num_pages, npages);
1420
+ ap->descs[ap->num_pages].offset = start;
1421
+ fuse_page_descs_length_init(ap->descs, ap->num_pages, npages);
13261422
1327
- req->num_pages += npages;
1328
- req->page_descs[req->num_pages - 1].length -=
1423
+ ap->num_pages += npages;
1424
+ ap->descs[ap->num_pages - 1].length -=
13291425 (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
13301426 }
13311427
1428
+ ap->args.user_pages = true;
13321429 if (write)
1333
- req->in.argpages = 1;
1430
+ ap->args.in_pages = true;
13341431 else
1335
- req->out.argpages = 1;
1432
+ ap->args.out_pages = true;
13361433
13371434 *nbytesp = nbytes;
13381435
13391436 return ret < 0 ? ret : 0;
1340
-}
1341
-
1342
-static inline int fuse_iter_npages(const struct iov_iter *ii_p)
1343
-{
1344
- return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
13451437 }
13461438
13471439 ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
....@@ -1352,23 +1444,23 @@
13521444 struct file *file = io->iocb->ki_filp;
13531445 struct inode *inode = file->f_mapping->host;
13541446 struct fuse_file *ff = file->private_data;
1355
- struct fuse_conn *fc = ff->fc;
1447
+ struct fuse_conn *fc = ff->fm->fc;
13561448 size_t nmax = write ? fc->max_write : fc->max_read;
13571449 loff_t pos = *ppos;
13581450 size_t count = iov_iter_count(iter);
13591451 pgoff_t idx_from = pos >> PAGE_SHIFT;
13601452 pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT;
13611453 ssize_t res = 0;
1362
- struct fuse_req *req;
13631454 int err = 0;
1455
+ struct fuse_io_args *ia;
1456
+ unsigned int max_pages;
13641457
1365
- if (io->async)
1366
- req = fuse_get_req_for_background(fc, fuse_iter_npages(iter));
1367
- else
1368
- req = fuse_get_req(fc, fuse_iter_npages(iter));
1369
- if (IS_ERR(req))
1370
- return PTR_ERR(req);
1458
+ max_pages = iov_iter_npages(iter, fc->max_pages);
1459
+ ia = fuse_io_alloc(io, max_pages);
1460
+ if (!ia)
1461
+ return -ENOMEM;
13711462
1463
+ ia->io = io;
13721464 if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
13731465 if (!write)
13741466 inode_lock(inode);
....@@ -1379,46 +1471,52 @@
13791471
13801472 io->should_dirty = !write && iter_is_iovec(iter);
13811473 while (count) {
1382
- size_t nres;
1474
+ ssize_t nres;
13831475 fl_owner_t owner = current->files;
13841476 size_t nbytes = min(count, nmax);
1385
- err = fuse_get_user_pages(req, iter, &nbytes, write);
1477
+
1478
+ err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
1479
+ max_pages);
13861480 if (err && !nbytes)
13871481 break;
13881482
1389
- if (write)
1390
- nres = fuse_send_write(req, io, pos, nbytes, owner);
1391
- else
1392
- nres = fuse_send_read(req, io, pos, nbytes, owner);
1483
+ if (write) {
1484
+ if (!capable(CAP_FSETID))
1485
+ ia->write.in.write_flags |= FUSE_WRITE_KILL_PRIV;
13931486
1394
- if (!io->async)
1395
- fuse_release_user_pages(req, io->should_dirty);
1396
- if (req->out.h.error) {
1397
- err = req->out.h.error;
1398
- break;
1399
- } else if (nres > nbytes) {
1400
- res = 0;
1401
- err = -EIO;
1487
+ nres = fuse_send_write(ia, pos, nbytes, owner);
1488
+ } else {
1489
+ nres = fuse_send_read(ia, pos, nbytes, owner);
1490
+ }
1491
+
1492
+ if (!io->async || nres < 0) {
1493
+ fuse_release_user_pages(&ia->ap, io->should_dirty);
1494
+ fuse_io_free(ia);
1495
+ }
1496
+ ia = NULL;
1497
+ if (nres < 0) {
1498
+ iov_iter_revert(iter, nbytes);
1499
+ err = nres;
14021500 break;
14031501 }
1502
+ WARN_ON(nres > nbytes);
1503
+
14041504 count -= nres;
14051505 res += nres;
14061506 pos += nres;
1407
- if (nres != nbytes)
1507
+ if (nres != nbytes) {
1508
+ iov_iter_revert(iter, nbytes - nres);
14081509 break;
1510
+ }
14091511 if (count) {
1410
- fuse_put_request(fc, req);
1411
- if (io->async)
1412
- req = fuse_get_req_for_background(fc,
1413
- fuse_iter_npages(iter));
1414
- else
1415
- req = fuse_get_req(fc, fuse_iter_npages(iter));
1416
- if (IS_ERR(req))
1512
+ max_pages = iov_iter_npages(iter, fc->max_pages);
1513
+ ia = fuse_io_alloc(io, max_pages);
1514
+ if (!ia)
14171515 break;
14181516 }
14191517 }
1420
- if (!IS_ERR(req))
1421
- fuse_put_request(fc, req);
1518
+ if (ia)
1519
+ fuse_io_free(ia);
14221520 if (res > 0)
14231521 *ppos = pos;
14241522
....@@ -1433,20 +1531,28 @@
14331531 ssize_t res;
14341532 struct inode *inode = file_inode(io->iocb->ki_filp);
14351533
1436
- if (fuse_is_bad(inode))
1437
- return -EIO;
1438
-
14391534 res = fuse_direct_io(io, iter, ppos, 0);
14401535
1441
- fuse_invalidate_attr(inode);
1536
+ fuse_invalidate_atime(inode);
14421537
14431538 return res;
14441539 }
14451540
1541
+static ssize_t fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
1542
+
14461543 static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
14471544 {
1448
- struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
1449
- return __fuse_direct_read(&io, to, &iocb->ki_pos);
1545
+ ssize_t res;
1546
+
1547
+ if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
1548
+ res = fuse_direct_IO(iocb, to);
1549
+ } else {
1550
+ struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
1551
+
1552
+ res = __fuse_direct_read(&io, to, &iocb->ki_pos);
1553
+ }
1554
+
1555
+ return res;
14501556 }
14511557
14521558 static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
....@@ -1455,14 +1561,17 @@
14551561 struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
14561562 ssize_t res;
14571563
1458
- if (fuse_is_bad(inode))
1459
- return -EIO;
1460
-
14611564 /* Don't allow parallel writes to the same file */
14621565 inode_lock(inode);
14631566 res = generic_write_checks(iocb, from);
1464
- if (res > 0)
1465
- res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
1567
+ if (res > 0) {
1568
+ if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
1569
+ res = fuse_direct_IO(iocb, from);
1570
+ } else {
1571
+ res = fuse_direct_io(&io, from, &iocb->ki_pos,
1572
+ FUSE_DIO_WRITE);
1573
+ }
1574
+ }
14661575 fuse_invalidate_attr(inode);
14671576 if (res > 0)
14681577 fuse_write_update_size(inode, iocb->ki_pos);
....@@ -1471,46 +1580,92 @@
14711580 return res;
14721581 }
14731582
1474
-static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1583
+static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
14751584 {
1476
- int i;
1585
+ struct file *file = iocb->ki_filp;
1586
+ struct fuse_file *ff = file->private_data;
1587
+ struct inode *inode = file_inode(file);
14771588
1478
- for (i = 0; i < req->num_pages; i++)
1479
- __free_page(req->pages[i]);
1589
+ if (fuse_is_bad(inode))
1590
+ return -EIO;
14801591
1481
- if (req->ff)
1482
- fuse_file_put(req->ff, false, false);
1592
+ if (FUSE_IS_DAX(inode))
1593
+ return fuse_dax_read_iter(iocb, to);
1594
+
1595
+ if (ff->passthrough.filp)
1596
+ return fuse_passthrough_read_iter(iocb, to);
1597
+ else if (!(ff->open_flags & FOPEN_DIRECT_IO))
1598
+ return fuse_cache_read_iter(iocb, to);
1599
+ else
1600
+ return fuse_direct_read_iter(iocb, to);
14831601 }
14841602
1485
-static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
1603
+static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
14861604 {
1487
- struct inode *inode = req->inode;
1605
+ struct file *file = iocb->ki_filp;
1606
+ struct fuse_file *ff = file->private_data;
1607
+ struct inode *inode = file_inode(file);
1608
+
1609
+ if (fuse_is_bad(inode))
1610
+ return -EIO;
1611
+
1612
+ if (FUSE_IS_DAX(inode))
1613
+ return fuse_dax_write_iter(iocb, from);
1614
+
1615
+ if (ff->passthrough.filp)
1616
+ return fuse_passthrough_write_iter(iocb, from);
1617
+ else if (!(ff->open_flags & FOPEN_DIRECT_IO))
1618
+ return fuse_cache_write_iter(iocb, from);
1619
+ else
1620
+ return fuse_direct_write_iter(iocb, from);
1621
+}
1622
+
1623
+static void fuse_writepage_free(struct fuse_writepage_args *wpa)
1624
+{
1625
+ struct fuse_args_pages *ap = &wpa->ia.ap;
1626
+ int i;
1627
+
1628
+ for (i = 0; i < ap->num_pages; i++)
1629
+ __free_page(ap->pages[i]);
1630
+
1631
+ if (wpa->ia.ff)
1632
+ fuse_file_put(wpa->ia.ff, false, false);
1633
+
1634
+ kfree(ap->pages);
1635
+ kfree(wpa);
1636
+}
1637
+
1638
+static void fuse_writepage_finish(struct fuse_mount *fm,
1639
+ struct fuse_writepage_args *wpa)
1640
+{
1641
+ struct fuse_args_pages *ap = &wpa->ia.ap;
1642
+ struct inode *inode = wpa->inode;
14881643 struct fuse_inode *fi = get_fuse_inode(inode);
14891644 struct backing_dev_info *bdi = inode_to_bdi(inode);
14901645 int i;
14911646
1492
- list_del(&req->writepages_entry);
1493
- for (i = 0; i < req->num_pages; i++) {
1647
+ for (i = 0; i < ap->num_pages; i++) {
14941648 dec_wb_stat(&bdi->wb, WB_WRITEBACK);
1495
- dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
1649
+ dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
14961650 wb_writeout_inc(&bdi->wb);
14971651 }
14981652 wake_up(&fi->page_waitq);
14991653 }
15001654
1501
-/* Called under fc->lock, may release and reacquire it */
1502
-static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
1503
- loff_t size)
1504
-__releases(fc->lock)
1505
-__acquires(fc->lock)
1655
+/* Called under fi->lock, may release and reacquire it */
1656
+static void fuse_send_writepage(struct fuse_mount *fm,
1657
+ struct fuse_writepage_args *wpa, loff_t size)
1658
+__releases(fi->lock)
1659
+__acquires(fi->lock)
15061660 {
1507
- struct fuse_inode *fi = get_fuse_inode(req->inode);
1508
- struct fuse_write_in *inarg = &req->misc.write.in;
1509
- __u64 data_size = req->num_pages * PAGE_SIZE;
1661
+ struct fuse_writepage_args *aux, *next;
1662
+ struct fuse_inode *fi = get_fuse_inode(wpa->inode);
1663
+ struct fuse_write_in *inarg = &wpa->ia.write.in;
1664
+ struct fuse_args *args = &wpa->ia.ap.args;
1665
+ __u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE;
1666
+ int err;
15101667
1511
- if (!fc->connected)
1512
- goto out_free;
1513
-
1668
+ fi->writectr++;
15141669 if (inarg->offset + data_size <= size) {
15151670 inarg->size = data_size;
15161671 } else if (inarg->offset < size) {
....@@ -1520,56 +1675,129 @@
15201675 goto out_free;
15211676 }
15221677
1523
- req->in.args[1].size = inarg->size;
1524
- fi->writectr++;
1525
- fuse_request_send_background_locked(fc, req);
1678
+ args->in_args[1].size = inarg->size;
1679
+ args->force = true;
1680
+ args->nocreds = true;
1681
+
1682
+ err = fuse_simple_background(fm, args, GFP_ATOMIC);
1683
+ if (err == -ENOMEM) {
1684
+ spin_unlock(&fi->lock);
1685
+ err = fuse_simple_background(fm, args, GFP_NOFS | __GFP_NOFAIL);
1686
+ spin_lock(&fi->lock);
1687
+ }
1688
+
1689
+ /* Fails on broken connection only */
1690
+ if (unlikely(err))
1691
+ goto out_free;
1692
+
15261693 return;
15271694
15281695 out_free:
1529
- fuse_writepage_finish(fc, req);
1530
- spin_unlock(&fc->lock);
1531
- fuse_writepage_free(fc, req);
1532
- fuse_put_request(fc, req);
1533
- spin_lock(&fc->lock);
1696
+ fi->writectr--;
1697
+ rb_erase(&wpa->writepages_entry, &fi->writepages);
1698
+ fuse_writepage_finish(fm, wpa);
1699
+ spin_unlock(&fi->lock);
1700
+
1701
+ /* After fuse_writepage_finish() aux request list is private */
1702
+ for (aux = wpa->next; aux; aux = next) {
1703
+ next = aux->next;
1704
+ aux->next = NULL;
1705
+ fuse_writepage_free(aux);
1706
+ }
1707
+
1708
+ fuse_writepage_free(wpa);
1709
+ spin_lock(&fi->lock);
15341710 }
15351711
15361712 /*
15371713 * If fi->writectr is positive (no truncate or fsync going on) send
15381714 * all queued writepage requests.
15391715 *
1540
- * Called with fc->lock
1716
+ * Called with fi->lock
15411717 */
15421718 void fuse_flush_writepages(struct inode *inode)
1543
-__releases(fc->lock)
1544
-__acquires(fc->lock)
1719
+__releases(fi->lock)
1720
+__acquires(fi->lock)
15451721 {
1546
- struct fuse_conn *fc = get_fuse_conn(inode);
1722
+ struct fuse_mount *fm = get_fuse_mount(inode);
15471723 struct fuse_inode *fi = get_fuse_inode(inode);
15481724 loff_t crop = i_size_read(inode);
1549
- struct fuse_req *req;
1725
+ struct fuse_writepage_args *wpa;
15501726
15511727 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
1552
- req = list_entry(fi->queued_writes.next, struct fuse_req, list);
1553
- list_del_init(&req->list);
1554
- fuse_send_writepage(fc, req, crop);
1728
+ wpa = list_entry(fi->queued_writes.next,
1729
+ struct fuse_writepage_args, queue_entry);
1730
+ list_del_init(&wpa->queue_entry);
1731
+ fuse_send_writepage(fm, wpa, crop);
15551732 }
15561733 }
15571734
1558
-static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
1735
+static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root,
1736
+ struct fuse_writepage_args *wpa)
15591737 {
1560
- struct inode *inode = req->inode;
1561
- struct fuse_inode *fi = get_fuse_inode(inode);
1738
+ pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
1739
+ pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
1740
+ struct rb_node **p = &root->rb_node;
1741
+ struct rb_node *parent = NULL;
15621742
1563
- mapping_set_error(inode->i_mapping, req->out.h.error);
1564
- spin_lock(&fc->lock);
1565
- while (req->misc.write.next) {
1566
- struct fuse_conn *fc = get_fuse_conn(inode);
1567
- struct fuse_write_in *inarg = &req->misc.write.in;
1568
- struct fuse_req *next = req->misc.write.next;
1569
- req->misc.write.next = next->misc.write.next;
1570
- next->misc.write.next = NULL;
1571
- next->ff = fuse_file_get(req->ff);
1572
- list_add(&next->writepages_entry, &fi->writepages);
1743
+ WARN_ON(!wpa->ia.ap.num_pages);
1744
+ while (*p) {
1745
+ struct fuse_writepage_args *curr;
1746
+ pgoff_t curr_index;
1747
+
1748
+ parent = *p;
1749
+ curr = rb_entry(parent, struct fuse_writepage_args,
1750
+ writepages_entry);
1751
+ WARN_ON(curr->inode != wpa->inode);
1752
+ curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
1753
+
1754
+ if (idx_from >= curr_index + curr->ia.ap.num_pages)
1755
+ p = &(*p)->rb_right;
1756
+ else if (idx_to < curr_index)
1757
+ p = &(*p)->rb_left;
1758
+ else
1759
+ return curr;
1760
+ }
1761
+
1762
+ rb_link_node(&wpa->writepages_entry, parent, p);
1763
+ rb_insert_color(&wpa->writepages_entry, root);
1764
+ return NULL;
1765
+}
1766
+
1767
+static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
1768
+{
1769
+ WARN_ON(fuse_insert_writeback(root, wpa));
1770
+}
1771
+
1772
+static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
1773
+ int error)
1774
+{
1775
+ struct fuse_writepage_args *wpa =
1776
+ container_of(args, typeof(*wpa), ia.ap.args);
1777
+ struct inode *inode = wpa->inode;
1778
+ struct fuse_inode *fi = get_fuse_inode(inode);
1779
+ struct fuse_conn *fc = get_fuse_conn(inode);
1780
+
1781
+ mapping_set_error(inode->i_mapping, error);
1782
+ /*
1783
+ * A writeback finished and this might have updated mtime/ctime on
1784
+ * server making local mtime/ctime stale. Hence invalidate attrs.
1785
+ * Do this only if writeback_cache is not enabled. If writeback_cache
1786
+ * is enabled, we trust local ctime/mtime.
1787
+ */
1788
+ if (!fc->writeback_cache)
1789
+ fuse_invalidate_attr(inode);
1790
+ spin_lock(&fi->lock);
1791
+ rb_erase(&wpa->writepages_entry, &fi->writepages);
1792
+ while (wpa->next) {
1793
+ struct fuse_mount *fm = get_fuse_mount(inode);
1794
+ struct fuse_write_in *inarg = &wpa->ia.write.in;
1795
+ struct fuse_writepage_args *next = wpa->next;
1796
+
1797
+ wpa->next = next->next;
1798
+ next->next = NULL;
1799
+ next->ia.ff = fuse_file_get(wpa->ia.ff);
1800
+ tree_insert(&fi->writepages, next);
15731801
15741802 /*
15751803 * Skip fuse_flush_writepages() to make it easy to crop requests
....@@ -1594,12 +1822,12 @@
15941822 * no invocations of fuse_writepage_end() while we're in
15951823 * fuse_set_nowrite..fuse_release_nowrite section.
15961824 */
1597
- fuse_send_writepage(fc, next, inarg->offset + inarg->size);
1825
+ fuse_send_writepage(fm, next, inarg->offset + inarg->size);
15981826 }
15991827 fi->writectr--;
1600
- fuse_writepage_finish(fc, req);
1601
- spin_unlock(&fc->lock);
1602
- fuse_writepage_free(fc, req);
1828
+ fuse_writepage_finish(fm, wpa);
1829
+ spin_unlock(&fi->lock);
1830
+ fuse_writepage_free(wpa);
16031831 }
16041832
16051833 static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
....@@ -1607,13 +1835,13 @@
16071835 {
16081836 struct fuse_file *ff = NULL;
16091837
1610
- spin_lock(&fc->lock);
1838
+ spin_lock(&fi->lock);
16111839 if (!list_empty(&fi->write_files)) {
16121840 ff = list_entry(fi->write_files.next, struct fuse_file,
16131841 write_entry);
16141842 fuse_file_get(ff);
16151843 }
1616
- spin_unlock(&fc->lock);
1844
+ spin_unlock(&fi->lock);
16171845
16181846 return ff;
16191847 }
....@@ -1633,6 +1861,17 @@
16331861 struct fuse_file *ff;
16341862 int err;
16351863
1864
+ /*
1865
+ * Inode is always written before the last reference is dropped and
1866
+ * hence this should not be reached from reclaim.
1867
+ *
1868
+ * Writing back the inode from reclaim can deadlock if the request
1869
+ * processing itself needs an allocation. Allocations triggering
1870
+ * reclaim while serving a request can't be prevented, because it can
1871
+ * involve any number of unrelated userspace processes.
1872
+ */
1873
+ WARN_ON(wbc->for_reclaim);
1874
+
16361875 ff = __fuse_write_file_get(fc, fi);
16371876 err = fuse_flush_times(inode, ff);
16381877 if (ff)
....@@ -1641,54 +1880,73 @@
16411880 return err;
16421881 }
16431882
1883
+static struct fuse_writepage_args *fuse_writepage_args_alloc(void)
1884
+{
1885
+ struct fuse_writepage_args *wpa;
1886
+ struct fuse_args_pages *ap;
1887
+
1888
+ wpa = kzalloc(sizeof(*wpa), GFP_NOFS);
1889
+ if (wpa) {
1890
+ ap = &wpa->ia.ap;
1891
+ ap->num_pages = 0;
1892
+ ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs);
1893
+ if (!ap->pages) {
1894
+ kfree(wpa);
1895
+ wpa = NULL;
1896
+ }
1897
+ }
1898
+ return wpa;
1899
+
1900
+}
1901
+
16441902 static int fuse_writepage_locked(struct page *page)
16451903 {
16461904 struct address_space *mapping = page->mapping;
16471905 struct inode *inode = mapping->host;
16481906 struct fuse_conn *fc = get_fuse_conn(inode);
16491907 struct fuse_inode *fi = get_fuse_inode(inode);
1650
- struct fuse_req *req;
1908
+ struct fuse_writepage_args *wpa;
1909
+ struct fuse_args_pages *ap;
16511910 struct page *tmp_page;
16521911 int error = -ENOMEM;
16531912
16541913 set_page_writeback(page);
16551914
1656
- req = fuse_request_alloc_nofs(1);
1657
- if (!req)
1915
+ wpa = fuse_writepage_args_alloc();
1916
+ if (!wpa)
16581917 goto err;
1918
+ ap = &wpa->ia.ap;
16591919
1660
- /* writeback always goes to bg_queue */
1661
- __set_bit(FR_BACKGROUND, &req->flags);
16621920 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
16631921 if (!tmp_page)
16641922 goto err_free;
16651923
16661924 error = -EIO;
1667
- req->ff = fuse_write_file_get(fc, fi);
1668
- if (!req->ff)
1925
+ wpa->ia.ff = fuse_write_file_get(fc, fi);
1926
+ if (!wpa->ia.ff)
16691927 goto err_nofile;
16701928
1671
- fuse_write_fill(req, req->ff, page_offset(page), 0);
1929
+ fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
16721930
16731931 copy_highpage(tmp_page, page);
1674
- req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
1675
- req->misc.write.next = NULL;
1676
- req->in.argpages = 1;
1677
- req->num_pages = 1;
1678
- req->pages[0] = tmp_page;
1679
- req->page_descs[0].offset = 0;
1680
- req->page_descs[0].length = PAGE_SIZE;
1681
- req->end = fuse_writepage_end;
1682
- req->inode = inode;
1932
+ wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
1933
+ wpa->next = NULL;
1934
+ ap->args.in_pages = true;
1935
+ ap->num_pages = 1;
1936
+ ap->pages[0] = tmp_page;
1937
+ ap->descs[0].offset = 0;
1938
+ ap->descs[0].length = PAGE_SIZE;
1939
+ ap->args.end = fuse_writepage_end;
1940
+ wpa->inode = inode;
16831941
16841942 inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
16851943 inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
16861944
1687
- spin_lock(&fc->lock);
1688
- list_add(&req->writepages_entry, &fi->writepages);
1689
- list_add_tail(&req->list, &fi->queued_writes);
1945
+ spin_lock(&fi->lock);
1946
+ tree_insert(&fi->writepages, wpa);
1947
+ list_add_tail(&wpa->queue_entry, &fi->queued_writes);
16901948 fuse_flush_writepages(inode);
1691
- spin_unlock(&fc->lock);
1949
+ spin_unlock(&fi->lock);
16921950
16931951 end_page_writeback(page);
16941952
....@@ -1697,7 +1955,7 @@
16971955 err_nofile:
16981956 __free_page(tmp_page);
16991957 err_free:
1700
- fuse_request_free(req);
1958
+ kfree(wpa);
17011959 err:
17021960 mapping_set_error(page->mapping, error);
17031961 end_page_writeback(page);
....@@ -1728,108 +1986,118 @@
17281986 }
17291987
17301988 struct fuse_fill_wb_data {
1731
- struct fuse_req *req;
1989
+ struct fuse_writepage_args *wpa;
17321990 struct fuse_file *ff;
17331991 struct inode *inode;
17341992 struct page **orig_pages;
1993
+ unsigned int max_pages;
17351994 };
1995
+
1996
+static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
1997
+{
1998
+ struct fuse_args_pages *ap = &data->wpa->ia.ap;
1999
+ struct fuse_conn *fc = get_fuse_conn(data->inode);
2000
+ struct page **pages;
2001
+ struct fuse_page_desc *descs;
2002
+ unsigned int npages = min_t(unsigned int,
2003
+ max_t(unsigned int, data->max_pages * 2,
2004
+ FUSE_DEFAULT_MAX_PAGES_PER_REQ),
2005
+ fc->max_pages);
2006
+ WARN_ON(npages <= data->max_pages);
2007
+
2008
+ pages = fuse_pages_alloc(npages, GFP_NOFS, &descs);
2009
+ if (!pages)
2010
+ return false;
2011
+
2012
+ memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages);
2013
+ memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages);
2014
+ kfree(ap->pages);
2015
+ ap->pages = pages;
2016
+ ap->descs = descs;
2017
+ data->max_pages = npages;
2018
+
2019
+ return true;
2020
+}
17362021
17372022 static void fuse_writepages_send(struct fuse_fill_wb_data *data)
17382023 {
1739
- struct fuse_req *req = data->req;
2024
+ struct fuse_writepage_args *wpa = data->wpa;
17402025 struct inode *inode = data->inode;
1741
- struct fuse_conn *fc = get_fuse_conn(inode);
17422026 struct fuse_inode *fi = get_fuse_inode(inode);
1743
- int num_pages = req->num_pages;
2027
+ int num_pages = wpa->ia.ap.num_pages;
17442028 int i;
17452029
1746
- req->ff = fuse_file_get(data->ff);
1747
- spin_lock(&fc->lock);
1748
- list_add_tail(&req->list, &fi->queued_writes);
2030
+ wpa->ia.ff = fuse_file_get(data->ff);
2031
+ spin_lock(&fi->lock);
2032
+ list_add_tail(&wpa->queue_entry, &fi->queued_writes);
17492033 fuse_flush_writepages(inode);
1750
- spin_unlock(&fc->lock);
2034
+ spin_unlock(&fi->lock);
17512035
17522036 for (i = 0; i < num_pages; i++)
17532037 end_page_writeback(data->orig_pages[i]);
17542038 }
17552039
1756
-static bool fuse_writepage_in_flight(struct fuse_req *new_req,
1757
- struct page *page)
2040
+/*
2041
+ * Check under fi->lock if the page is under writeback, and insert it onto the
2042
+ * rb_tree if not. Otherwise iterate auxiliary write requests, to see if there's
2043
+ * one already added for a page at this offset. If there's none, then insert
2044
+ * this new request onto the auxiliary list, otherwise reuse the existing one by
2045
+ * swapping the new temp page with the old one.
2046
+ */
2047
+static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
2048
+ struct page *page)
17582049 {
1759
- struct fuse_conn *fc = get_fuse_conn(new_req->inode);
1760
- struct fuse_inode *fi = get_fuse_inode(new_req->inode);
1761
- struct fuse_req *tmp;
1762
- struct fuse_req *old_req;
1763
- bool found = false;
1764
- pgoff_t curr_index;
2050
+ struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
2051
+ struct fuse_writepage_args *tmp;
2052
+ struct fuse_writepage_args *old_wpa;
2053
+ struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
17652054
1766
- BUG_ON(new_req->num_pages != 0);
2055
+ WARN_ON(new_ap->num_pages != 0);
2056
+ new_ap->num_pages = 1;
17672057
1768
- spin_lock(&fc->lock);
1769
- list_del(&new_req->writepages_entry);
1770
- list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
1771
- BUG_ON(old_req->inode != new_req->inode);
1772
- curr_index = old_req->misc.write.in.offset >> PAGE_SHIFT;
1773
- if (curr_index <= page->index &&
1774
- page->index < curr_index + old_req->num_pages) {
1775
- found = true;
2058
+ spin_lock(&fi->lock);
2059
+ old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa);
2060
+ if (!old_wpa) {
2061
+ spin_unlock(&fi->lock);
2062
+ return true;
2063
+ }
2064
+
2065
+ for (tmp = old_wpa->next; tmp; tmp = tmp->next) {
2066
+ pgoff_t curr_index;
2067
+
2068
+ WARN_ON(tmp->inode != new_wpa->inode);
2069
+ curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT;
2070
+ if (curr_index == page->index) {
2071
+ WARN_ON(tmp->ia.ap.num_pages != 1);
2072
+ swap(tmp->ia.ap.pages[0], new_ap->pages[0]);
17762073 break;
17772074 }
17782075 }
1779
- if (!found) {
1780
- list_add(&new_req->writepages_entry, &fi->writepages);
1781
- goto out_unlock;
2076
+
2077
+ if (!tmp) {
2078
+ new_wpa->next = old_wpa->next;
2079
+ old_wpa->next = new_wpa;
17822080 }
17832081
1784
- new_req->num_pages = 1;
1785
- for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
1786
- BUG_ON(tmp->inode != new_req->inode);
1787
- curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
1788
- if (tmp->num_pages == 1 &&
1789
- curr_index == page->index) {
1790
- old_req = tmp;
1791
- }
1792
- }
2082
+ spin_unlock(&fi->lock);
17932083
1794
- if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) {
1795
- struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host);
1796
-
1797
- copy_highpage(old_req->pages[0], page);
1798
- spin_unlock(&fc->lock);
2084
+ if (tmp) {
2085
+ struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode);
17992086
18002087 dec_wb_stat(&bdi->wb, WB_WRITEBACK);
1801
- dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP);
2088
+ dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP);
18022089 wb_writeout_inc(&bdi->wb);
1803
- fuse_writepage_free(fc, new_req);
1804
- fuse_request_free(new_req);
1805
- goto out;
1806
- } else {
1807
- new_req->misc.write.next = old_req->misc.write.next;
1808
- old_req->misc.write.next = new_req;
2090
+ fuse_writepage_free(new_wpa);
18092091 }
1810
-out_unlock:
1811
- spin_unlock(&fc->lock);
1812
-out:
1813
- return found;
2092
+
2093
+ return false;
18142094 }
18152095
1816
-static int fuse_writepages_fill(struct page *page,
1817
- struct writeback_control *wbc, void *_data)
2096
+static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
2097
+ struct fuse_args_pages *ap,
2098
+ struct fuse_fill_wb_data *data)
18182099 {
1819
- struct fuse_fill_wb_data *data = _data;
1820
- struct fuse_req *req = data->req;
1821
- struct inode *inode = data->inode;
1822
- struct fuse_conn *fc = get_fuse_conn(inode);
1823
- struct page *tmp_page;
1824
- bool is_writeback;
1825
- int err;
1826
-
1827
- if (!data->ff) {
1828
- err = -EIO;
1829
- data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
1830
- if (!data->ff)
1831
- goto out_unlock;
1832
- }
2100
+ WARN_ON(!ap->num_pages);
18332101
18342102 /*
18352103 * Being under writeback is unlikely but possible. For example direct
....@@ -1837,15 +2105,52 @@
18372105 * the pages are faulted with get_user_pages(), and then after the read
18382106 * completed.
18392107 */
1840
- is_writeback = fuse_page_is_writeback(inode, page->index);
2108
+ if (fuse_page_is_writeback(data->inode, page->index))
2109
+ return true;
18412110
1842
- if (req && req->num_pages &&
1843
- (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
1844
- (req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
1845
- data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
1846
- fuse_writepages_send(data);
1847
- data->req = NULL;
2111
+ /* Reached max pages */
2112
+ if (ap->num_pages == fc->max_pages)
2113
+ return true;
2114
+
2115
+ /* Reached max write bytes */
2116
+ if ((ap->num_pages + 1) * PAGE_SIZE > fc->max_write)
2117
+ return true;
2118
+
2119
+ /* Discontinuity */
2120
+ if (data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)
2121
+ return true;
2122
+
2123
+ /* Need to grow the pages array? If so, did the expansion fail? */
2124
+ if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data))
2125
+ return true;
2126
+
2127
+ return false;
2128
+}
2129
+
2130
+static int fuse_writepages_fill(struct page *page,
2131
+ struct writeback_control *wbc, void *_data)
2132
+{
2133
+ struct fuse_fill_wb_data *data = _data;
2134
+ struct fuse_writepage_args *wpa = data->wpa;
2135
+ struct fuse_args_pages *ap = &wpa->ia.ap;
2136
+ struct inode *inode = data->inode;
2137
+ struct fuse_inode *fi = get_fuse_inode(inode);
2138
+ struct fuse_conn *fc = get_fuse_conn(inode);
2139
+ struct page *tmp_page;
2140
+ int err;
2141
+
2142
+ if (!data->ff) {
2143
+ err = -EIO;
2144
+ data->ff = fuse_write_file_get(fc, fi);
2145
+ if (!data->ff)
2146
+ goto out_unlock;
18482147 }
2148
+
2149
+ if (wpa && fuse_writepage_need_send(fc, page, ap, data)) {
2150
+ fuse_writepages_send(data);
2151
+ data->wpa = NULL;
2152
+ }
2153
+
18492154 err = -ENOMEM;
18502155 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
18512156 if (!tmp_page)
....@@ -1860,61 +2165,53 @@
18602165 * This is ensured by holding the page lock in page_mkwrite() while
18612166 * checking fuse_page_is_writeback(). We already hold the page lock
18622167 * since clear_page_dirty_for_io() and keep it held until we add the
1863
- * request to the fi->writepages list and increment req->num_pages.
2168
+ * request to the fi->writepages list and increment ap->num_pages.
18642169 * After this fuse_page_is_writeback() will indicate that the page is
18652170 * under writeback, so we can release the page lock.
18662171 */
1867
- if (data->req == NULL) {
1868
- struct fuse_inode *fi = get_fuse_inode(inode);
1869
-
2172
+ if (data->wpa == NULL) {
18702173 err = -ENOMEM;
1871
- req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
1872
- if (!req) {
2174
+ wpa = fuse_writepage_args_alloc();
2175
+ if (!wpa) {
18732176 __free_page(tmp_page);
18742177 goto out_unlock;
18752178 }
2179
+ data->max_pages = 1;
18762180
1877
- fuse_write_fill(req, data->ff, page_offset(page), 0);
1878
- req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
1879
- req->misc.write.next = NULL;
1880
- req->in.argpages = 1;
1881
- __set_bit(FR_BACKGROUND, &req->flags);
1882
- req->num_pages = 0;
1883
- req->end = fuse_writepage_end;
1884
- req->inode = inode;
1885
-
1886
- spin_lock(&fc->lock);
1887
- list_add(&req->writepages_entry, &fi->writepages);
1888
- spin_unlock(&fc->lock);
1889
-
1890
- data->req = req;
2181
+ ap = &wpa->ia.ap;
2182
+ fuse_write_args_fill(&wpa->ia, data->ff, page_offset(page), 0);
2183
+ wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
2184
+ wpa->next = NULL;
2185
+ ap->args.in_pages = true;
2186
+ ap->args.end = fuse_writepage_end;
2187
+ ap->num_pages = 0;
2188
+ wpa->inode = inode;
18912189 }
18922190 set_page_writeback(page);
18932191
18942192 copy_highpage(tmp_page, page);
1895
- req->pages[req->num_pages] = tmp_page;
1896
- req->page_descs[req->num_pages].offset = 0;
1897
- req->page_descs[req->num_pages].length = PAGE_SIZE;
2193
+ ap->pages[ap->num_pages] = tmp_page;
2194
+ ap->descs[ap->num_pages].offset = 0;
2195
+ ap->descs[ap->num_pages].length = PAGE_SIZE;
2196
+ data->orig_pages[ap->num_pages] = page;
18982197
18992198 inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
19002199 inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
19012200
19022201 err = 0;
1903
- if (is_writeback && fuse_writepage_in_flight(req, page)) {
2202
+ if (data->wpa) {
2203
+ /*
2204
+ * Protected by fi->lock against concurrent access by
2205
+ * fuse_page_is_writeback().
2206
+ */
2207
+ spin_lock(&fi->lock);
2208
+ ap->num_pages++;
2209
+ spin_unlock(&fi->lock);
2210
+ } else if (fuse_writepage_add(wpa, page)) {
2211
+ data->wpa = wpa;
2212
+ } else {
19042213 end_page_writeback(page);
1905
- data->req = NULL;
1906
- goto out_unlock;
19072214 }
1908
- data->orig_pages[req->num_pages] = page;
1909
-
1910
- /*
1911
- * Protected by fc->lock against concurrent access by
1912
- * fuse_page_is_writeback().
1913
- */
1914
- spin_lock(&fc->lock);
1915
- req->num_pages++;
1916
- spin_unlock(&fc->lock);
1917
-
19182215 out_unlock:
19192216 unlock_page(page);
19202217
....@@ -1925,6 +2222,7 @@
19252222 struct writeback_control *wbc)
19262223 {
19272224 struct inode *inode = mapping->host;
2225
+ struct fuse_conn *fc = get_fuse_conn(inode);
19282226 struct fuse_fill_wb_data data;
19292227 int err;
19302228
....@@ -1933,22 +2231,20 @@
19332231 goto out;
19342232
19352233 data.inode = inode;
1936
- data.req = NULL;
2234
+ data.wpa = NULL;
19372235 data.ff = NULL;
19382236
19392237 err = -ENOMEM;
1940
- data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
2238
+ data.orig_pages = kcalloc(fc->max_pages,
19412239 sizeof(struct page *),
19422240 GFP_NOFS);
19432241 if (!data.orig_pages)
19442242 goto out;
19452243
19462244 err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
1947
- if (data.req) {
1948
- /* Ignore errors if we can write at least one page */
1949
- BUG_ON(!data.req->num_pages);
2245
+ if (data.wpa) {
2246
+ WARN_ON(!data.wpa->ia.ap.num_pages);
19502247 fuse_writepages_send(&data);
1951
- err = 0;
19522248 }
19532249 if (data.ff)
19542250 fuse_file_put(data.ff, false, false);
....@@ -2096,23 +2392,31 @@
20962392
20972393 static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
20982394 {
2395
+ struct fuse_file *ff = file->private_data;
2396
+
2397
+ /* DAX mmap is superior to direct_io mmap */
2398
+ if (FUSE_IS_DAX(file_inode(file)))
2399
+ return fuse_dax_mmap(file, vma);
2400
+
2401
+ if (ff->passthrough.filp)
2402
+ return fuse_passthrough_mmap(file, vma);
2403
+
2404
+ if (ff->open_flags & FOPEN_DIRECT_IO) {
2405
+ /* Can't provide the coherency needed for MAP_SHARED */
2406
+ if (vma->vm_flags & VM_MAYSHARE)
2407
+ return -ENODEV;
2408
+
2409
+ invalidate_inode_pages2(file->f_mapping);
2410
+
2411
+ return generic_file_mmap(file, vma);
2412
+ }
2413
+
20992414 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
21002415 fuse_link_write_file(file);
21012416
21022417 file_accessed(file);
21032418 vma->vm_ops = &fuse_file_vm_ops;
21042419 return 0;
2105
-}
2106
-
2107
-static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
2108
-{
2109
- /* Can't provide the coherency needed for MAP_SHARED */
2110
- if (vma->vm_flags & VM_MAYSHARE)
2111
- return -ENODEV;
2112
-
2113
- invalidate_inode_pages2(file->f_mapping);
2114
-
2115
- return generic_file_mmap(file, vma);
21162420 }
21172421
21182422 static int convert_fuse_file_lock(struct fuse_conn *fc,
....@@ -2165,29 +2469,29 @@
21652469 inarg->lk.pid = pid;
21662470 if (flock)
21672471 inarg->lk_flags |= FUSE_LK_FLOCK;
2168
- args->in.h.opcode = opcode;
2169
- args->in.h.nodeid = get_node_id(inode);
2170
- args->in.numargs = 1;
2171
- args->in.args[0].size = sizeof(*inarg);
2172
- args->in.args[0].value = inarg;
2472
+ args->opcode = opcode;
2473
+ args->nodeid = get_node_id(inode);
2474
+ args->in_numargs = 1;
2475
+ args->in_args[0].size = sizeof(*inarg);
2476
+ args->in_args[0].value = inarg;
21732477 }
21742478
21752479 static int fuse_getlk(struct file *file, struct file_lock *fl)
21762480 {
21772481 struct inode *inode = file_inode(file);
2178
- struct fuse_conn *fc = get_fuse_conn(inode);
2482
+ struct fuse_mount *fm = get_fuse_mount(inode);
21792483 FUSE_ARGS(args);
21802484 struct fuse_lk_in inarg;
21812485 struct fuse_lk_out outarg;
21822486 int err;
21832487
21842488 fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
2185
- args.out.numargs = 1;
2186
- args.out.args[0].size = sizeof(outarg);
2187
- args.out.args[0].value = &outarg;
2188
- err = fuse_simple_request(fc, &args);
2489
+ args.out_numargs = 1;
2490
+ args.out_args[0].size = sizeof(outarg);
2491
+ args.out_args[0].value = &outarg;
2492
+ err = fuse_simple_request(fm, &args);
21892493 if (!err)
2190
- err = convert_fuse_file_lock(fc, &outarg.lk, fl);
2494
+ err = convert_fuse_file_lock(fm->fc, &outarg.lk, fl);
21912495
21922496 return err;
21932497 }
....@@ -2195,12 +2499,12 @@
21952499 static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
21962500 {
21972501 struct inode *inode = file_inode(file);
2198
- struct fuse_conn *fc = get_fuse_conn(inode);
2502
+ struct fuse_mount *fm = get_fuse_mount(inode);
21992503 FUSE_ARGS(args);
22002504 struct fuse_lk_in inarg;
22012505 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
22022506 struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL;
2203
- pid_t pid_nr = pid_nr_ns(pid, fc->pid_ns);
2507
+ pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns);
22042508 int err;
22052509
22062510 if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
....@@ -2213,7 +2517,7 @@
22132517 return 0;
22142518
22152519 fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
2216
- err = fuse_simple_request(fc, &args);
2520
+ err = fuse_simple_request(fm, &args);
22172521
22182522 /* locking is restartable */
22192523 if (err == -EINTR)
....@@ -2267,29 +2571,29 @@
22672571 static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
22682572 {
22692573 struct inode *inode = mapping->host;
2270
- struct fuse_conn *fc = get_fuse_conn(inode);
2574
+ struct fuse_mount *fm = get_fuse_mount(inode);
22712575 FUSE_ARGS(args);
22722576 struct fuse_bmap_in inarg;
22732577 struct fuse_bmap_out outarg;
22742578 int err;
22752579
2276
- if (!inode->i_sb->s_bdev || fc->no_bmap)
2580
+ if (!inode->i_sb->s_bdev || fm->fc->no_bmap)
22772581 return 0;
22782582
22792583 memset(&inarg, 0, sizeof(inarg));
22802584 inarg.block = block;
22812585 inarg.blocksize = inode->i_sb->s_blocksize;
2282
- args.in.h.opcode = FUSE_BMAP;
2283
- args.in.h.nodeid = get_node_id(inode);
2284
- args.in.numargs = 1;
2285
- args.in.args[0].size = sizeof(inarg);
2286
- args.in.args[0].value = &inarg;
2287
- args.out.numargs = 1;
2288
- args.out.args[0].size = sizeof(outarg);
2289
- args.out.args[0].value = &outarg;
2290
- err = fuse_simple_request(fc, &args);
2586
+ args.opcode = FUSE_BMAP;
2587
+ args.nodeid = get_node_id(inode);
2588
+ args.in_numargs = 1;
2589
+ args.in_args[0].size = sizeof(inarg);
2590
+ args.in_args[0].value = &inarg;
2591
+ args.out_numargs = 1;
2592
+ args.out_args[0].size = sizeof(outarg);
2593
+ args.out_args[0].value = &outarg;
2594
+ err = fuse_simple_request(fm, &args);
22912595 if (err == -ENOSYS)
2292
- fc->no_bmap = 1;
2596
+ fm->fc->no_bmap = 1;
22932597
22942598 return err ? 0 : outarg.block;
22952599 }
....@@ -2297,7 +2601,7 @@
22972601 static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
22982602 {
22992603 struct inode *inode = file->f_mapping->host;
2300
- struct fuse_conn *fc = get_fuse_conn(inode);
2604
+ struct fuse_mount *fm = get_fuse_mount(inode);
23012605 struct fuse_file *ff = file->private_data;
23022606 FUSE_ARGS(args);
23032607 struct fuse_lseek_in inarg = {
....@@ -2308,21 +2612,21 @@
23082612 struct fuse_lseek_out outarg;
23092613 int err;
23102614
2311
- if (fc->no_lseek)
2615
+ if (fm->fc->no_lseek)
23122616 goto fallback;
23132617
2314
- args.in.h.opcode = FUSE_LSEEK;
2315
- args.in.h.nodeid = ff->nodeid;
2316
- args.in.numargs = 1;
2317
- args.in.args[0].size = sizeof(inarg);
2318
- args.in.args[0].value = &inarg;
2319
- args.out.numargs = 1;
2320
- args.out.args[0].size = sizeof(outarg);
2321
- args.out.args[0].value = &outarg;
2322
- err = fuse_simple_request(fc, &args);
2618
+ args.opcode = FUSE_LSEEK;
2619
+ args.nodeid = ff->nodeid;
2620
+ args.in_numargs = 1;
2621
+ args.in_args[0].size = sizeof(inarg);
2622
+ args.in_args[0].value = &inarg;
2623
+ args.out_numargs = 1;
2624
+ args.out_args[0].size = sizeof(outarg);
2625
+ args.out_args[0].value = &outarg;
2626
+ err = fuse_simple_request(fm, &args);
23232627 if (err) {
23242628 if (err == -ENOSYS) {
2325
- fc->no_lseek = 1;
2629
+ fm->fc->no_lseek = 1;
23262630 goto fallback;
23272631 }
23282632 return err;
....@@ -2408,10 +2712,11 @@
24082712 }
24092713
24102714 /* Make sure iov_length() won't overflow */
2411
-static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
2715
+static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov,
2716
+ size_t count)
24122717 {
24132718 size_t n;
2414
- u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
2719
+ u32 max = fc->max_pages << PAGE_SHIFT;
24152720
24162721 for (n = 0; n < count; n++, iov++) {
24172722 if (iov->iov_len > (size_t) max)
....@@ -2507,7 +2812,7 @@
25072812 unsigned int flags)
25082813 {
25092814 struct fuse_file *ff = file->private_data;
2510
- struct fuse_conn *fc = ff->fc;
2815
+ struct fuse_mount *fm = ff->fm;
25112816 struct fuse_ioctl_in inarg = {
25122817 .fh = ff->fh,
25132818 .cmd = cmd,
....@@ -2515,30 +2820,37 @@
25152820 .flags = flags
25162821 };
25172822 struct fuse_ioctl_out outarg;
2518
- struct fuse_req *req = NULL;
2519
- struct page **pages = NULL;
25202823 struct iovec *iov_page = NULL;
25212824 struct iovec *in_iov = NULL, *out_iov = NULL;
2522
- unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
2523
- size_t in_size, out_size, transferred, c;
2825
+ unsigned int in_iovs = 0, out_iovs = 0, max_pages;
2826
+ size_t in_size, out_size, c;
2827
+ ssize_t transferred;
25242828 int err, i;
25252829 struct iov_iter ii;
2830
+ struct fuse_args_pages ap = {};
25262831
25272832 #if BITS_PER_LONG == 32
25282833 inarg.flags |= FUSE_IOCTL_32BIT;
25292834 #else
2530
- if (flags & FUSE_IOCTL_COMPAT)
2835
+ if (flags & FUSE_IOCTL_COMPAT) {
25312836 inarg.flags |= FUSE_IOCTL_32BIT;
2837
+#ifdef CONFIG_X86_X32
2838
+ if (in_x32_syscall())
2839
+ inarg.flags |= FUSE_IOCTL_COMPAT_X32;
2840
+#endif
2841
+ }
25322842 #endif
25332843
25342844 /* assume all the iovs returned by client always fits in a page */
25352845 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
25362846
25372847 err = -ENOMEM;
2538
- pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL);
2848
+ ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
25392849 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
2540
- if (!pages || !iov_page)
2850
+ if (!ap.pages || !iov_page)
25412851 goto out;
2852
+
2853
+ fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages);
25422854
25432855 /*
25442856 * If restricted, initialize IO parameters as encoded in @cmd.
....@@ -2583,58 +2895,46 @@
25832895
25842896 /* make sure there are enough buffer pages and init request with them */
25852897 err = -ENOMEM;
2586
- if (max_pages > FUSE_MAX_PAGES_PER_REQ)
2898
+ if (max_pages > fm->fc->max_pages)
25872899 goto out;
2588
- while (num_pages < max_pages) {
2589
- pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
2590
- if (!pages[num_pages])
2900
+ while (ap.num_pages < max_pages) {
2901
+ ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
2902
+ if (!ap.pages[ap.num_pages])
25912903 goto out;
2592
- num_pages++;
2904
+ ap.num_pages++;
25932905 }
25942906
2595
- req = fuse_get_req(fc, num_pages);
2596
- if (IS_ERR(req)) {
2597
- err = PTR_ERR(req);
2598
- req = NULL;
2599
- goto out;
2600
- }
2601
- memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
2602
- req->num_pages = num_pages;
2603
- fuse_page_descs_length_init(req, 0, req->num_pages);
26042907
26052908 /* okay, let's send it to the client */
2606
- req->in.h.opcode = FUSE_IOCTL;
2607
- req->in.h.nodeid = ff->nodeid;
2608
- req->in.numargs = 1;
2609
- req->in.args[0].size = sizeof(inarg);
2610
- req->in.args[0].value = &inarg;
2909
+ ap.args.opcode = FUSE_IOCTL;
2910
+ ap.args.nodeid = ff->nodeid;
2911
+ ap.args.in_numargs = 1;
2912
+ ap.args.in_args[0].size = sizeof(inarg);
2913
+ ap.args.in_args[0].value = &inarg;
26112914 if (in_size) {
2612
- req->in.numargs++;
2613
- req->in.args[1].size = in_size;
2614
- req->in.argpages = 1;
2915
+ ap.args.in_numargs++;
2916
+ ap.args.in_args[1].size = in_size;
2917
+ ap.args.in_pages = true;
26152918
26162919 err = -EFAULT;
26172920 iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size);
2618
- for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
2619
- c = copy_page_from_iter(pages[i], 0, PAGE_SIZE, &ii);
2921
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
2922
+ c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
26202923 if (c != PAGE_SIZE && iov_iter_count(&ii))
26212924 goto out;
26222925 }
26232926 }
26242927
2625
- req->out.numargs = 2;
2626
- req->out.args[0].size = sizeof(outarg);
2627
- req->out.args[0].value = &outarg;
2628
- req->out.args[1].size = out_size;
2629
- req->out.argpages = 1;
2630
- req->out.argvar = 1;
2928
+ ap.args.out_numargs = 2;
2929
+ ap.args.out_args[0].size = sizeof(outarg);
2930
+ ap.args.out_args[0].value = &outarg;
2931
+ ap.args.out_args[1].size = out_size;
2932
+ ap.args.out_pages = true;
2933
+ ap.args.out_argvar = true;
26312934
2632
- fuse_request_send(fc, req);
2633
- err = req->out.h.error;
2634
- transferred = req->out.args[1].size;
2635
- fuse_put_request(fc, req);
2636
- req = NULL;
2637
- if (err)
2935
+ transferred = fuse_simple_request(fm, &ap.args);
2936
+ err = transferred;
2937
+ if (transferred < 0)
26382938 goto out;
26392939
26402940 /* did it ask for retry? */
....@@ -2659,8 +2959,8 @@
26592959 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
26602960 goto out;
26612961
2662
- vaddr = kmap_atomic(pages[0]);
2663
- err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
2962
+ vaddr = kmap_atomic(ap.pages[0]);
2963
+ err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr,
26642964 transferred, in_iovs + out_iovs,
26652965 (flags & FUSE_IOCTL_COMPAT) != 0);
26662966 kunmap_atomic(vaddr);
....@@ -2670,11 +2970,11 @@
26702970 in_iov = iov_page;
26712971 out_iov = in_iov + in_iovs;
26722972
2673
- err = fuse_verify_ioctl_iov(in_iov, in_iovs);
2973
+ err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs);
26742974 if (err)
26752975 goto out;
26762976
2677
- err = fuse_verify_ioctl_iov(out_iov, out_iovs);
2977
+ err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs);
26782978 if (err)
26792979 goto out;
26802980
....@@ -2687,19 +2987,17 @@
26872987
26882988 err = -EFAULT;
26892989 iov_iter_init(&ii, READ, out_iov, out_iovs, transferred);
2690
- for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
2691
- c = copy_page_to_iter(pages[i], 0, PAGE_SIZE, &ii);
2990
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
2991
+ c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
26922992 if (c != PAGE_SIZE && iov_iter_count(&ii))
26932993 goto out;
26942994 }
26952995 err = 0;
26962996 out:
2697
- if (req)
2698
- fuse_put_request(fc, req);
26992997 free_page((unsigned long) iov_page);
2700
- while (num_pages)
2701
- __free_page(pages[--num_pages]);
2702
- kfree(pages);
2998
+ while (ap.num_pages)
2999
+ __free_page(ap.pages[--ap.num_pages]);
3000
+ kfree(ap.pages);
27033001
27043002 return err ? err : outarg.result;
27053003 }
....@@ -2773,7 +3071,7 @@
27733071 {
27743072 spin_lock(&fc->lock);
27753073 if (RB_EMPTY_NODE(&ff->polled_node)) {
2776
- struct rb_node **link, *uninitialized_var(parent);
3074
+ struct rb_node **link, *parent;
27773075
27783076 link = fuse_find_polled_node(fc, ff->kh, &parent);
27793077 BUG_ON(*link);
....@@ -2786,13 +3084,13 @@
27863084 __poll_t fuse_file_poll(struct file *file, poll_table *wait)
27873085 {
27883086 struct fuse_file *ff = file->private_data;
2789
- struct fuse_conn *fc = ff->fc;
3087
+ struct fuse_mount *fm = ff->fm;
27903088 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
27913089 struct fuse_poll_out outarg;
27923090 FUSE_ARGS(args);
27933091 int err;
27943092
2795
- if (fc->no_poll)
3093
+ if (fm->fc->no_poll)
27963094 return DEFAULT_POLLMASK;
27973095
27983096 poll_wait(file, &ff->poll_wait, wait);
....@@ -2804,23 +3102,23 @@
28043102 */
28053103 if (waitqueue_active(&ff->poll_wait)) {
28063104 inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY;
2807
- fuse_register_polled_file(fc, ff);
3105
+ fuse_register_polled_file(fm->fc, ff);
28083106 }
28093107
2810
- args.in.h.opcode = FUSE_POLL;
2811
- args.in.h.nodeid = ff->nodeid;
2812
- args.in.numargs = 1;
2813
- args.in.args[0].size = sizeof(inarg);
2814
- args.in.args[0].value = &inarg;
2815
- args.out.numargs = 1;
2816
- args.out.args[0].size = sizeof(outarg);
2817
- args.out.args[0].value = &outarg;
2818
- err = fuse_simple_request(fc, &args);
3108
+ args.opcode = FUSE_POLL;
3109
+ args.nodeid = ff->nodeid;
3110
+ args.in_numargs = 1;
3111
+ args.in_args[0].size = sizeof(inarg);
3112
+ args.in_args[0].value = &inarg;
3113
+ args.out_numargs = 1;
3114
+ args.out_args[0].size = sizeof(outarg);
3115
+ args.out_args[0].value = &outarg;
3116
+ err = fuse_simple_request(fm, &args);
28193117
28203118 if (!err)
28213119 return demangle_poll(outarg.revents);
28223120 if (err == -ENOSYS) {
2823
- fc->no_poll = 1;
3121
+ fm->fc->no_poll = 1;
28243122 return DEFAULT_POLLMASK;
28253123 }
28263124 return EPOLLERR;
....@@ -2865,9 +3163,9 @@
28653163 fuse_do_setattr(file_dentry(file), &attr, file);
28663164 }
28673165
2868
-static inline loff_t fuse_round_up(loff_t off)
3166
+static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
28693167 {
2870
- return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
3168
+ return round_up(off, fc->max_pages << PAGE_SHIFT);
28713169 }
28723170
28733171 static ssize_t
....@@ -2877,11 +3175,10 @@
28773175 ssize_t ret = 0;
28783176 struct file *file = iocb->ki_filp;
28793177 struct fuse_file *ff = file->private_data;
2880
- bool async_dio = ff->fc->async_dio;
28813178 loff_t pos = 0;
28823179 struct inode *inode;
28833180 loff_t i_size;
2884
- size_t count = iov_iter_count(iter);
3181
+ size_t count = iov_iter_count(iter), shortened = 0;
28853182 loff_t offset = iocb->ki_pos;
28863183 struct fuse_io_priv *io;
28873184
....@@ -2889,16 +3186,8 @@
28893186 inode = file->f_mapping->host;
28903187 i_size = i_size_read(inode);
28913188
2892
- if ((iov_iter_rw(iter) == READ) && (offset > i_size))
3189
+ if ((iov_iter_rw(iter) == READ) && (offset >= i_size))
28933190 return 0;
2894
-
2895
- /* optimization for short read */
2896
- if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
2897
- if (offset >= i_size)
2898
- return 0;
2899
- iov_iter_truncate(iter, fuse_round_up(i_size - offset));
2900
- count = iov_iter_count(iter);
2901
- }
29023191
29033192 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
29043193 if (!io)
....@@ -2915,15 +3204,22 @@
29153204 * By default, we want to optimize all I/Os with async request
29163205 * submission to the client filesystem if supported.
29173206 */
2918
- io->async = async_dio;
3207
+ io->async = ff->fm->fc->async_dio;
29193208 io->iocb = iocb;
29203209 io->blocking = is_sync_kiocb(iocb);
3210
+
3211
+ /* optimization for short read */
3212
+ if (io->async && !io->write && offset + count > i_size) {
3213
+ iov_iter_truncate(iter, fuse_round_up(ff->fm->fc, i_size - offset));
3214
+ shortened = count - iov_iter_count(iter);
3215
+ count -= shortened;
3216
+ }
29213217
29223218 /*
29233219 * We cannot asynchronously extend the size of a file.
29243220 * In such case the aio will behave exactly like sync io.
29253221 */
2926
- if ((offset + count > i_size) && iov_iter_rw(iter) == WRITE)
3222
+ if ((offset + count > i_size) && io->write)
29273223 io->blocking = true;
29283224
29293225 if (io->async && io->blocking) {
....@@ -2941,6 +3237,7 @@
29413237 } else {
29423238 ret = __fuse_direct_read(io, iter, &pos);
29433239 }
3240
+ iov_iter_reexpand(iter, iov_iter_count(iter) + shortened);
29443241
29453242 if (io->async) {
29463243 bool blocking = io->blocking;
....@@ -2967,13 +3264,23 @@
29673264 return ret;
29683265 }
29693266
3267
+static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
3268
+{
3269
+ int err = filemap_write_and_wait_range(inode->i_mapping, start, LLONG_MAX);
3270
+
3271
+ if (!err)
3272
+ fuse_sync_writes(inode);
3273
+
3274
+ return err;
3275
+}
3276
+
29703277 static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
29713278 loff_t length)
29723279 {
29733280 struct fuse_file *ff = file->private_data;
29743281 struct inode *inode = file_inode(file);
29753282 struct fuse_inode *fi = get_fuse_inode(inode);
2976
- struct fuse_conn *fc = ff->fc;
3283
+ struct fuse_mount *fm = ff->fm;
29773284 FUSE_ARGS(args);
29783285 struct fuse_fallocate_in inarg = {
29793286 .fh = ff->fh,
....@@ -2982,26 +3289,30 @@
29823289 .mode = mode
29833290 };
29843291 int err;
2985
- bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
2986
- (mode & FALLOC_FL_PUNCH_HOLE);
3292
+ bool block_faults = FUSE_IS_DAX(inode) &&
3293
+ (!(mode & FALLOC_FL_KEEP_SIZE) ||
3294
+ (mode & FALLOC_FL_PUNCH_HOLE));
29873295
29883296 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
29893297 return -EOPNOTSUPP;
29903298
2991
- if (fc->no_fallocate)
3299
+ if (fm->fc->no_fallocate)
29923300 return -EOPNOTSUPP;
29933301
2994
- if (lock_inode) {
2995
- inode_lock(inode);
2996
- if (mode & FALLOC_FL_PUNCH_HOLE) {
2997
- loff_t endbyte = offset + length - 1;
2998
- err = filemap_write_and_wait_range(inode->i_mapping,
2999
- offset, endbyte);
3000
- if (err)
3001
- goto out;
3302
+ inode_lock(inode);
3303
+ if (block_faults) {
3304
+ down_write(&fi->i_mmap_sem);
3305
+ err = fuse_dax_break_layouts(inode, 0, 0);
3306
+ if (err)
3307
+ goto out;
3308
+ }
30023309
3003
- fuse_sync_writes(inode);
3004
- }
3310
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
3311
+ loff_t endbyte = offset + length - 1;
3312
+
3313
+ err = fuse_writeback_range(inode, offset, endbyte);
3314
+ if (err)
3315
+ goto out;
30053316 }
30063317
30073318 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
....@@ -3011,17 +3322,21 @@
30113322 goto out;
30123323 }
30133324
3325
+ err = file_modified(file);
3326
+ if (err)
3327
+ goto out;
3328
+
30143329 if (!(mode & FALLOC_FL_KEEP_SIZE))
30153330 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
30163331
3017
- args.in.h.opcode = FUSE_FALLOCATE;
3018
- args.in.h.nodeid = ff->nodeid;
3019
- args.in.numargs = 1;
3020
- args.in.args[0].size = sizeof(inarg);
3021
- args.in.args[0].value = &inarg;
3022
- err = fuse_simple_request(fc, &args);
3332
+ args.opcode = FUSE_FALLOCATE;
3333
+ args.nodeid = ff->nodeid;
3334
+ args.in_numargs = 1;
3335
+ args.in_args[0].size = sizeof(inarg);
3336
+ args.in_args[0].value = &inarg;
3337
+ err = fuse_simple_request(fm, &args);
30233338 if (err == -ENOSYS) {
3024
- fc->no_fallocate = 1;
3339
+ fm->fc->no_fallocate = 1;
30253340 err = -EOPNOTSUPP;
30263341 }
30273342 if (err)
....@@ -3031,7 +3346,7 @@
30313346 if (!(mode & FALLOC_FL_KEEP_SIZE)) {
30323347 bool changed = fuse_write_update_size(inode, offset + length);
30333348
3034
- if (changed && fc->writeback_cache)
3349
+ if (changed && fm->fc->writeback_cache)
30353350 file_update_time(file);
30363351 }
30373352
....@@ -3044,10 +3359,140 @@
30443359 if (!(mode & FALLOC_FL_KEEP_SIZE))
30453360 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
30463361
3047
- if (lock_inode)
3048
- inode_unlock(inode);
3362
+ if (block_faults)
3363
+ up_write(&fi->i_mmap_sem);
3364
+
3365
+ inode_unlock(inode);
3366
+
3367
+ fuse_flush_time_update(inode);
30493368
30503369 return err;
3370
+}
3371
+
3372
+static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
3373
+ struct file *file_out, loff_t pos_out,
3374
+ size_t len, unsigned int flags)
3375
+{
3376
+ struct fuse_file *ff_in = file_in->private_data;
3377
+ struct fuse_file *ff_out = file_out->private_data;
3378
+ struct inode *inode_in = file_inode(file_in);
3379
+ struct inode *inode_out = file_inode(file_out);
3380
+ struct fuse_inode *fi_out = get_fuse_inode(inode_out);
3381
+ struct fuse_mount *fm = ff_in->fm;
3382
+ struct fuse_conn *fc = fm->fc;
3383
+ FUSE_ARGS(args);
3384
+ struct fuse_copy_file_range_in inarg = {
3385
+ .fh_in = ff_in->fh,
3386
+ .off_in = pos_in,
3387
+ .nodeid_out = ff_out->nodeid,
3388
+ .fh_out = ff_out->fh,
3389
+ .off_out = pos_out,
3390
+ .len = len,
3391
+ .flags = flags
3392
+ };
3393
+ struct fuse_write_out outarg;
3394
+ ssize_t err;
3395
+ /* mark unstable when write-back is not used, and file_out gets
3396
+ * extended */
3397
+ bool is_unstable = (!fc->writeback_cache) &&
3398
+ ((pos_out + len) > inode_out->i_size);
3399
+
3400
+ if (fc->no_copy_file_range)
3401
+ return -EOPNOTSUPP;
3402
+
3403
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
3404
+ return -EXDEV;
3405
+
3406
+ inode_lock(inode_in);
3407
+ err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1);
3408
+ inode_unlock(inode_in);
3409
+ if (err)
3410
+ return err;
3411
+
3412
+ inode_lock(inode_out);
3413
+
3414
+ err = file_modified(file_out);
3415
+ if (err)
3416
+ goto out;
3417
+
3418
+ /*
3419
+ * Write out dirty pages in the destination file before sending the COPY
3420
+ * request to userspace. After the request is completed, truncate off
3421
+ * pages (including partial ones) from the cache that have been copied,
3422
+ * since these contain stale data at that point.
3423
+ *
3424
+ * This should be mostly correct, but if the COPY writes to partial
3425
+ * pages (at the start or end) and the parts not covered by the COPY are
3426
+ * written through a memory map after calling fuse_writeback_range(),
3427
+ * then these partial page modifications will be lost on truncation.
3428
+ *
3429
+ * It is unlikely that someone would rely on such mixed style
3430
+ * modifications. Yet this does give less guarantees than if the
3431
+ * copying was performed with write(2).
3432
+ *
3433
+ * To fix this a i_mmap_sem style lock could be used to prevent new
3434
+ * faults while the copy is ongoing.
3435
+ */
3436
+ err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1);
3437
+ if (err)
3438
+ goto out;
3439
+
3440
+ if (is_unstable)
3441
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
3442
+
3443
+ args.opcode = FUSE_COPY_FILE_RANGE;
3444
+ args.nodeid = ff_in->nodeid;
3445
+ args.in_numargs = 1;
3446
+ args.in_args[0].size = sizeof(inarg);
3447
+ args.in_args[0].value = &inarg;
3448
+ args.out_numargs = 1;
3449
+ args.out_args[0].size = sizeof(outarg);
3450
+ args.out_args[0].value = &outarg;
3451
+ err = fuse_simple_request(fm, &args);
3452
+ if (err == -ENOSYS) {
3453
+ fc->no_copy_file_range = 1;
3454
+ err = -EOPNOTSUPP;
3455
+ }
3456
+ if (err)
3457
+ goto out;
3458
+
3459
+ truncate_inode_pages_range(inode_out->i_mapping,
3460
+ ALIGN_DOWN(pos_out, PAGE_SIZE),
3461
+ ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);
3462
+
3463
+ if (fc->writeback_cache) {
3464
+ fuse_write_update_size(inode_out, pos_out + outarg.size);
3465
+ file_update_time(file_out);
3466
+ }
3467
+
3468
+ fuse_invalidate_attr(inode_out);
3469
+
3470
+ err = outarg.size;
3471
+out:
3472
+ if (is_unstable)
3473
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
3474
+
3475
+ inode_unlock(inode_out);
3476
+ file_accessed(file_in);
3477
+
3478
+ fuse_flush_time_update(inode_out);
3479
+
3480
+ return err;
3481
+}
3482
+
3483
+static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
3484
+ struct file *dst_file, loff_t dst_off,
3485
+ size_t len, unsigned int flags)
3486
+{
3487
+ ssize_t ret;
3488
+
3489
+ ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off,
3490
+ len, flags);
3491
+
3492
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
3493
+ ret = generic_copy_file_range(src_file, src_off, dst_file,
3494
+ dst_off, len, flags);
3495
+ return ret;
30513496 }
30523497
30533498 static const struct file_operations fuse_file_operations = {
....@@ -3060,38 +3505,23 @@
30603505 .release = fuse_release,
30613506 .fsync = fuse_fsync,
30623507 .lock = fuse_file_lock,
3508
+ .get_unmapped_area = thp_get_unmapped_area,
30633509 .flock = fuse_file_flock,
30643510 .splice_read = generic_file_splice_read,
3511
+ .splice_write = iter_file_splice_write,
30653512 .unlocked_ioctl = fuse_file_ioctl,
30663513 .compat_ioctl = fuse_file_compat_ioctl,
30673514 .poll = fuse_file_poll,
30683515 .fallocate = fuse_file_fallocate,
3069
-};
3070
-
3071
-static const struct file_operations fuse_direct_io_file_operations = {
3072
- .llseek = fuse_file_llseek,
3073
- .read_iter = fuse_direct_read_iter,
3074
- .write_iter = fuse_direct_write_iter,
3075
- .mmap = fuse_direct_mmap,
3076
- .open = fuse_open,
3077
- .flush = fuse_flush,
3078
- .release = fuse_release,
3079
- .fsync = fuse_fsync,
3080
- .lock = fuse_file_lock,
3081
- .flock = fuse_file_flock,
3082
- .unlocked_ioctl = fuse_file_ioctl,
3083
- .compat_ioctl = fuse_file_compat_ioctl,
3084
- .poll = fuse_file_poll,
3085
- .fallocate = fuse_file_fallocate,
3086
- /* no splice_read */
3516
+ .copy_file_range = fuse_copy_file_range,
30873517 };
30883518
30893519 static const struct address_space_operations fuse_file_aops = {
30903520 .readpage = fuse_readpage,
3521
+ .readahead = fuse_readahead,
30913522 .writepage = fuse_writepage,
30923523 .writepages = fuse_writepages,
30933524 .launder_page = fuse_launder_page,
3094
- .readpages = fuse_readpages,
30953525 .set_page_dirty = __set_page_dirty_nobuffers,
30963526 .bmap = fuse_bmap,
30973527 .direct_IO = fuse_direct_IO,
....@@ -3101,6 +3531,17 @@
31013531
31023532 void fuse_init_file_inode(struct inode *inode)
31033533 {
3534
+ struct fuse_inode *fi = get_fuse_inode(inode);
3535
+
31043536 inode->i_fop = &fuse_file_operations;
31053537 inode->i_data.a_ops = &fuse_file_aops;
3538
+
3539
+ INIT_LIST_HEAD(&fi->write_files);
3540
+ INIT_LIST_HEAD(&fi->queued_writes);
3541
+ fi->writectr = 0;
3542
+ init_waitqueue_head(&fi->page_waitq);
3543
+ fi->writepages = RB_ROOT;
3544
+
3545
+ if (IS_ENABLED(CONFIG_FUSE_DAX))
3546
+ fuse_dax_inode_init(inode);
31063547 }