hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/fs/fuse/dev.c
....@@ -26,6 +26,10 @@
2626 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
2727 MODULE_ALIAS("devname:fuse");
2828
29
+/* Ordinary requests have even IDs, while interrupts IDs are odd */
30
+#define FUSE_INT_REQ_BIT (1ULL << 0)
31
+#define FUSE_REQ_ID_STEP (1ULL << 1)
32
+
2933 static struct kmem_cache *fuse_req_cachep;
3034
3135 static struct fuse_dev *fuse_get_dev(struct file *file)
....@@ -37,75 +41,31 @@
3741 return READ_ONCE(file->private_data);
3842 }
3943
40
-static void fuse_request_init(struct fuse_req *req, struct page **pages,
41
- struct fuse_page_desc *page_descs,
42
- unsigned npages)
44
+static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
4345 {
44
- memset(req, 0, sizeof(*req));
45
- memset(pages, 0, sizeof(*pages) * npages);
46
- memset(page_descs, 0, sizeof(*page_descs) * npages);
4746 INIT_LIST_HEAD(&req->list);
4847 INIT_LIST_HEAD(&req->intr_entry);
4948 init_waitqueue_head(&req->waitq);
5049 refcount_set(&req->count, 1);
51
- req->pages = pages;
52
- req->page_descs = page_descs;
53
- req->max_pages = npages;
5450 __set_bit(FR_PENDING, &req->flags);
51
+ req->fm = fm;
5552 }
5653
57
-static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
54
+static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
5855 {
59
- struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
60
- if (req) {
61
- struct page **pages;
62
- struct fuse_page_desc *page_descs;
56
+ struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
57
+ if (req)
58
+ fuse_request_init(fm, req);
6359
64
- if (npages <= FUSE_REQ_INLINE_PAGES) {
65
- pages = req->inline_pages;
66
- page_descs = req->inline_page_descs;
67
- } else {
68
- pages = kmalloc_array(npages, sizeof(struct page *),
69
- flags);
70
- page_descs =
71
- kmalloc_array(npages,
72
- sizeof(struct fuse_page_desc),
73
- flags);
74
- }
75
-
76
- if (!pages || !page_descs) {
77
- kfree(pages);
78
- kfree(page_descs);
79
- kmem_cache_free(fuse_req_cachep, req);
80
- return NULL;
81
- }
82
-
83
- fuse_request_init(req, pages, page_descs, npages);
84
- }
8560 return req;
8661 }
8762
88
-struct fuse_req *fuse_request_alloc(unsigned npages)
63
+static void fuse_request_free(struct fuse_req *req)
8964 {
90
- return __fuse_request_alloc(npages, GFP_KERNEL);
91
-}
92
-EXPORT_SYMBOL_GPL(fuse_request_alloc);
93
-
94
-struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
95
-{
96
- return __fuse_request_alloc(npages, GFP_NOFS);
97
-}
98
-
99
-void fuse_request_free(struct fuse_req *req)
100
-{
101
- if (req->pages != req->inline_pages) {
102
- kfree(req->pages);
103
- kfree(req->page_descs);
104
- }
10565 kmem_cache_free(fuse_req_cachep, req);
10666 }
10767
108
-void __fuse_get_request(struct fuse_req *req)
68
+static void __fuse_get_request(struct fuse_req *req)
10969 {
11070 refcount_inc(&req->count);
11171 }
....@@ -142,9 +102,11 @@
142102 }
143103 }
144104
145
-static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
146
- bool for_background)
105
+static void fuse_put_request(struct fuse_req *req);
106
+
107
+static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background)
147108 {
109
+ struct fuse_conn *fc = fm->fc;
148110 struct fuse_req *req;
149111 int err;
150112 atomic_inc(&fc->num_waiting);
....@@ -166,7 +128,7 @@
166128 if (fc->conn_error)
167129 goto out;
168130
169
- req = fuse_request_alloc(npages);
131
+ req = fuse_request_alloc(fm, GFP_KERNEL);
170132 err = -ENOMEM;
171133 if (!req) {
172134 if (for_background)
....@@ -184,7 +146,7 @@
184146
185147 if (unlikely(req->in.h.uid == ((uid_t)-1) ||
186148 req->in.h.gid == ((gid_t)-1))) {
187
- fuse_put_request(fc, req);
149
+ fuse_put_request(req);
188150 return ERR_PTR(-EOVERFLOW);
189151 }
190152 return req;
....@@ -194,108 +156,20 @@
194156 return ERR_PTR(err);
195157 }
196158
197
-struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
159
+static void fuse_put_request(struct fuse_req *req)
198160 {
199
- return __fuse_get_req(fc, npages, false);
200
-}
201
-EXPORT_SYMBOL_GPL(fuse_get_req);
161
+ struct fuse_conn *fc = req->fm->fc;
202162
203
-struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
204
- unsigned npages)
205
-{
206
- return __fuse_get_req(fc, npages, true);
207
-}
208
-EXPORT_SYMBOL_GPL(fuse_get_req_for_background);
209
-
210
-/*
211
- * Return request in fuse_file->reserved_req. However that may
212
- * currently be in use. If that is the case, wait for it to become
213
- * available.
214
- */
215
-static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
216
- struct file *file)
217
-{
218
- struct fuse_req *req = NULL;
219
- struct fuse_file *ff = file->private_data;
220
-
221
- do {
222
- wait_event(fc->reserved_req_waitq, ff->reserved_req);
223
- spin_lock(&fc->lock);
224
- if (ff->reserved_req) {
225
- req = ff->reserved_req;
226
- ff->reserved_req = NULL;
227
- req->stolen_file = get_file(file);
228
- }
229
- spin_unlock(&fc->lock);
230
- } while (!req);
231
-
232
- return req;
233
-}
234
-
235
-/*
236
- * Put stolen request back into fuse_file->reserved_req
237
- */
238
-static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
239
-{
240
- struct file *file = req->stolen_file;
241
- struct fuse_file *ff = file->private_data;
242
-
243
- spin_lock(&fc->lock);
244
- fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
245
- BUG_ON(ff->reserved_req);
246
- ff->reserved_req = req;
247
- wake_up_all(&fc->reserved_req_waitq);
248
- spin_unlock(&fc->lock);
249
- fput(file);
250
-}
251
-
252
-/*
253
- * Gets a requests for a file operation, always succeeds
254
- *
255
- * This is used for sending the FLUSH request, which must get to
256
- * userspace, due to POSIX locks which may need to be unlocked.
257
- *
258
- * If allocation fails due to OOM, use the reserved request in
259
- * fuse_file.
260
- *
261
- * This is very unlikely to deadlock accidentally, since the
262
- * filesystem should not have it's own file open. If deadlock is
263
- * intentional, it can still be broken by "aborting" the filesystem.
264
- */
265
-struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
266
- struct file *file)
267
-{
268
- struct fuse_req *req;
269
-
270
- atomic_inc(&fc->num_waiting);
271
- wait_event(fc->blocked_waitq, fc->initialized);
272
- /* Matches smp_wmb() in fuse_set_initialized() */
273
- smp_rmb();
274
- req = fuse_request_alloc(0);
275
- if (!req)
276
- req = get_reserved_req(fc, file);
277
-
278
- req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
279
- req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
280
- req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
281
-
282
- __set_bit(FR_WAITING, &req->flags);
283
- __clear_bit(FR_BACKGROUND, &req->flags);
284
- return req;
285
-}
286
-
287
-void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
288
-{
289163 if (refcount_dec_and_test(&req->count)) {
290164 if (test_bit(FR_BACKGROUND, &req->flags)) {
291165 /*
292166 * We get here in the unlikely case that a background
293167 * request was allocated but not sent
294168 */
295
- spin_lock(&fc->lock);
169
+ spin_lock(&fc->bg_lock);
296170 if (!fc->blocked)
297171 wake_up(&fc->blocked_waitq);
298
- spin_unlock(&fc->lock);
172
+ spin_unlock(&fc->bg_lock);
299173 }
300174
301175 if (test_bit(FR_WAITING, &req->flags)) {
....@@ -303,15 +177,11 @@
303177 fuse_drop_waiting(fc);
304178 }
305179
306
- if (req->stolen_file)
307
- put_reserved_req(fc, req);
308
- else
309
- fuse_request_free(req);
180
+ fuse_request_free(req);
310181 }
311182 }
312
-EXPORT_SYMBOL_GPL(fuse_put_request);
313183
314
-static unsigned len_args(unsigned numargs, struct fuse_arg *args)
184
+unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
315185 {
316186 unsigned nbytes = 0;
317187 unsigned i;
....@@ -321,19 +191,50 @@
321191
322192 return nbytes;
323193 }
194
+EXPORT_SYMBOL_GPL(fuse_len_args);
324195
325
-static u64 fuse_get_unique(struct fuse_iqueue *fiq)
196
+u64 fuse_get_unique(struct fuse_iqueue *fiq)
326197 {
327
- return ++fiq->reqctr;
198
+ fiq->reqctr += FUSE_REQ_ID_STEP;
199
+ return fiq->reqctr;
200
+}
201
+EXPORT_SYMBOL_GPL(fuse_get_unique);
202
+
203
+static unsigned int fuse_req_hash(u64 unique)
204
+{
205
+ return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
328206 }
329207
330
-static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
208
+/**
209
+ * A new request is available, wake fiq->waitq
210
+ */
211
+static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq, bool sync)
212
+__releases(fiq->lock)
213
+{
214
+ if (sync)
215
+ wake_up_sync(&fiq->waitq);
216
+ else
217
+ wake_up(&fiq->waitq);
218
+ kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
219
+ spin_unlock(&fiq->lock);
220
+}
221
+
222
+const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
223
+ .wake_forget_and_unlock = fuse_dev_wake_and_unlock,
224
+ .wake_interrupt_and_unlock = fuse_dev_wake_and_unlock,
225
+ .wake_pending_and_unlock = fuse_dev_wake_and_unlock,
226
+};
227
+EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
228
+
229
+static void queue_request_and_unlock(struct fuse_iqueue *fiq,
230
+ struct fuse_req *req, bool sync)
231
+__releases(fiq->lock)
331232 {
332233 req->in.h.len = sizeof(struct fuse_in_header) +
333
- len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
234
+ fuse_len_args(req->args->in_numargs,
235
+ (struct fuse_arg *) req->args->in_args);
334236 list_add_tail(&req->list, &fiq->pending);
335
- wake_up(&fiq->waitq);
336
- kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
237
+ fiq->ops->wake_pending_and_unlock(fiq, sync);
337238 }
338239
339240 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
....@@ -348,28 +249,27 @@
348249 if (fiq->connected) {
349250 fiq->forget_list_tail->next = forget;
350251 fiq->forget_list_tail = forget;
351
- wake_up(&fiq->waitq);
352
- kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
252
+ fiq->ops->wake_forget_and_unlock(fiq, false);
353253 } else {
354254 kfree(forget);
255
+ spin_unlock(&fiq->lock);
355256 }
356
- spin_unlock(&fiq->lock);
357257 }
358258
359259 static void flush_bg_queue(struct fuse_conn *fc)
360260 {
261
+ struct fuse_iqueue *fiq = &fc->iq;
262
+
361263 while (fc->active_background < fc->max_background &&
362264 !list_empty(&fc->bg_queue)) {
363265 struct fuse_req *req;
364
- struct fuse_iqueue *fiq = &fc->iq;
365266
366
- req = list_entry(fc->bg_queue.next, struct fuse_req, list);
267
+ req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
367268 list_del(&req->list);
368269 fc->active_background++;
369270 spin_lock(&fiq->lock);
370271 req->in.h.unique = fuse_get_unique(fiq);
371
- queue_request(fiq, req);
372
- spin_unlock(&fiq->lock);
272
+ queue_request_and_unlock(fiq, req, false);
373273 }
374274 }
375275
....@@ -381,20 +281,29 @@
381281 * the 'end' callback is called if given, else the reference to the
382282 * request is released
383283 */
384
-static void request_end(struct fuse_conn *fc, struct fuse_req *req)
284
+void fuse_request_end(struct fuse_req *req)
385285 {
286
+ struct fuse_mount *fm = req->fm;
287
+ struct fuse_conn *fc = fm->fc;
386288 struct fuse_iqueue *fiq = &fc->iq;
387289
388290 if (test_and_set_bit(FR_FINISHED, &req->flags))
389291 goto put_request;
390292
391
- spin_lock(&fiq->lock);
392
- list_del_init(&req->intr_entry);
393
- spin_unlock(&fiq->lock);
293
+ /*
294
+ * test_and_set_bit() implies smp_mb() between bit
295
+ * changing and below FR_INTERRUPTED check. Pairs with
296
+ * smp_mb() from queue_interrupt().
297
+ */
298
+ if (test_bit(FR_INTERRUPTED, &req->flags)) {
299
+ spin_lock(&fiq->lock);
300
+ list_del_init(&req->intr_entry);
301
+ spin_unlock(&fiq->lock);
302
+ }
394303 WARN_ON(test_bit(FR_PENDING, &req->flags));
395304 WARN_ON(test_bit(FR_SENT, &req->flags));
396305 if (test_bit(FR_BACKGROUND, &req->flags)) {
397
- spin_lock(&fc->lock);
306
+ spin_lock(&fc->bg_lock);
398307 clear_bit(FR_BACKGROUND, &req->flags);
399308 if (fc->num_background == fc->max_background) {
400309 fc->blocked = 0;
....@@ -410,39 +319,59 @@
410319 wake_up(&fc->blocked_waitq);
411320 }
412321
413
- if (fc->num_background == fc->congestion_threshold && fc->sb) {
414
- clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
415
- clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
322
+ if (fc->num_background == fc->congestion_threshold && fm->sb) {
323
+ clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
324
+ clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
416325 }
417326 fc->num_background--;
418327 fc->active_background--;
419328 flush_bg_queue(fc);
420
- spin_unlock(&fc->lock);
329
+ spin_unlock(&fc->bg_lock);
330
+ } else {
331
+ /* Wake up waiter sleeping in request_wait_answer() */
332
+ wake_up(&req->waitq);
421333 }
422
- wake_up(&req->waitq);
423
- if (req->end)
424
- req->end(fc, req);
425
-put_request:
426
- fuse_put_request(fc, req);
427
-}
428334
429
-static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
335
+ if (test_bit(FR_ASYNC, &req->flags))
336
+ req->args->end(fm, req->args, req->out.h.error);
337
+put_request:
338
+ fuse_put_request(req);
339
+}
340
+EXPORT_SYMBOL_GPL(fuse_request_end);
341
+
342
+static int queue_interrupt(struct fuse_req *req)
430343 {
344
+ struct fuse_iqueue *fiq = &req->fm->fc->iq;
345
+
431346 spin_lock(&fiq->lock);
432
- if (test_bit(FR_FINISHED, &req->flags)) {
347
+ /* Check for we've sent request to interrupt this req */
348
+ if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
433349 spin_unlock(&fiq->lock);
434
- return;
350
+ return -EINVAL;
435351 }
352
+
436353 if (list_empty(&req->intr_entry)) {
437354 list_add_tail(&req->intr_entry, &fiq->interrupts);
438
- wake_up(&fiq->waitq);
355
+ /*
356
+ * Pairs with smp_mb() implied by test_and_set_bit()
357
+ * from fuse_request_end().
358
+ */
359
+ smp_mb();
360
+ if (test_bit(FR_FINISHED, &req->flags)) {
361
+ list_del_init(&req->intr_entry);
362
+ spin_unlock(&fiq->lock);
363
+ return 0;
364
+ }
365
+ fiq->ops->wake_interrupt_and_unlock(fiq, false);
366
+ } else {
367
+ spin_unlock(&fiq->lock);
439368 }
440
- spin_unlock(&fiq->lock);
441
- kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
369
+ return 0;
442370 }
443371
444
-static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
372
+static void request_wait_answer(struct fuse_req *req)
445373 {
374
+ struct fuse_conn *fc = req->fm->fc;
446375 struct fuse_iqueue *fiq = &fc->iq;
447376 int err;
448377
....@@ -457,7 +386,7 @@
457386 /* matches barrier in fuse_dev_do_read() */
458387 smp_mb__after_atomic();
459388 if (test_bit(FR_SENT, &req->flags))
460
- queue_interrupt(fiq, req);
389
+ queue_interrupt(req);
461390 }
462391
463392 if (!test_bit(FR_FORCE, &req->flags)) {
....@@ -486,9 +415,9 @@
486415 wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
487416 }
488417
489
-static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
418
+static void __fuse_request_send(struct fuse_req *req)
490419 {
491
- struct fuse_iqueue *fiq = &fc->iq;
420
+ struct fuse_iqueue *fiq = &req->fm->fc->iq;
492421
493422 BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
494423 spin_lock(&fiq->lock);
....@@ -497,173 +426,192 @@
497426 req->out.h.error = -ENOTCONN;
498427 } else {
499428 req->in.h.unique = fuse_get_unique(fiq);
500
- queue_request(fiq, req);
501429 /* acquire extra reference, since request is still needed
502
- after request_end() */
430
+ after fuse_request_end() */
503431 __fuse_get_request(req);
504
- spin_unlock(&fiq->lock);
432
+ queue_request_and_unlock(fiq, req, true);
505433
506
- request_wait_answer(fc, req);
507
- /* Pairs with smp_wmb() in request_end() */
434
+ request_wait_answer(req);
435
+ /* Pairs with smp_wmb() in fuse_request_end() */
508436 smp_rmb();
509437 }
510438 }
511439
512
-void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
513
-{
514
- __set_bit(FR_ISREPLY, &req->flags);
515
- if (!test_bit(FR_WAITING, &req->flags)) {
516
- __set_bit(FR_WAITING, &req->flags);
517
- atomic_inc(&fc->num_waiting);
518
- }
519
- __fuse_request_send(fc, req);
520
-}
521
-EXPORT_SYMBOL_GPL(fuse_request_send);
522
-
523440 static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
524441 {
525
- if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS)
526
- args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE;
442
+ if (fc->minor < 4 && args->opcode == FUSE_STATFS)
443
+ args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
527444
528445 if (fc->minor < 9) {
529
- switch (args->in.h.opcode) {
446
+ switch (args->opcode) {
530447 case FUSE_LOOKUP:
531448 case FUSE_CREATE:
532449 case FUSE_MKNOD:
533450 case FUSE_MKDIR:
534451 case FUSE_SYMLINK:
535452 case FUSE_LINK:
536
- args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
453
+ args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
537454 break;
538455 case FUSE_GETATTR:
539456 case FUSE_SETATTR:
540
- args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
457
+ args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
541458 break;
542459 }
543460 }
544461 if (fc->minor < 12) {
545
- switch (args->in.h.opcode) {
462
+ switch (args->opcode) {
546463 case FUSE_CREATE:
547
- args->in.args[0].size = sizeof(struct fuse_open_in);
464
+ args->in_args[0].size = sizeof(struct fuse_open_in);
548465 break;
549466 case FUSE_MKNOD:
550
- args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
467
+ args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
551468 break;
552469 }
553470 }
554471 }
555472
556
-ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
473
+static void fuse_force_creds(struct fuse_req *req)
557474 {
475
+ struct fuse_conn *fc = req->fm->fc;
476
+
477
+ req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
478
+ req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
479
+ req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
480
+}
481
+
482
+static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
483
+{
484
+ req->in.h.opcode = args->opcode;
485
+ req->in.h.nodeid = args->nodeid;
486
+ req->args = args;
487
+ if (args->end)
488
+ __set_bit(FR_ASYNC, &req->flags);
489
+}
490
+
491
+ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
492
+{
493
+ struct fuse_conn *fc = fm->fc;
558494 struct fuse_req *req;
559495 ssize_t ret;
560496
561
- req = fuse_get_req(fc, 0);
562
- if (IS_ERR(req))
563
- return PTR_ERR(req);
497
+ if (args->force) {
498
+ atomic_inc(&fc->num_waiting);
499
+ req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL);
500
+
501
+ if (!args->nocreds)
502
+ fuse_force_creds(req);
503
+
504
+ __set_bit(FR_WAITING, &req->flags);
505
+ __set_bit(FR_FORCE, &req->flags);
506
+ } else {
507
+ WARN_ON(args->nocreds);
508
+ req = fuse_get_req(fm, false);
509
+ if (IS_ERR(req))
510
+ return PTR_ERR(req);
511
+ }
564512
565513 /* Needs to be done after fuse_get_req() so that fc->minor is valid */
566514 fuse_adjust_compat(fc, args);
515
+ fuse_args_to_req(req, args);
567516
568
- req->in.h.opcode = args->in.h.opcode;
569
- req->in.h.nodeid = args->in.h.nodeid;
570
- req->in.numargs = args->in.numargs;
571
- memcpy(req->in.args, args->in.args,
572
- args->in.numargs * sizeof(struct fuse_in_arg));
573
- req->out.argvar = args->out.argvar;
574
- req->out.numargs = args->out.numargs;
575
- memcpy(req->out.args, args->out.args,
576
- args->out.numargs * sizeof(struct fuse_arg));
577
- req->out.canonical_path = args->out.canonical_path;
578
- fuse_request_send(fc, req);
517
+ if (!args->noreply)
518
+ __set_bit(FR_ISREPLY, &req->flags);
519
+ __fuse_request_send(req);
579520 ret = req->out.h.error;
580
- if (!ret && args->out.argvar) {
581
- BUG_ON(args->out.numargs != 1);
582
- ret = req->out.args[0].size;
521
+ if (!ret && args->out_argvar) {
522
+ BUG_ON(args->out_numargs == 0);
523
+ ret = args->out_args[args->out_numargs - 1].size;
583524 }
584
- fuse_put_request(fc, req);
525
+ fuse_put_request(req);
585526
586527 return ret;
587528 }
588529
589
-/*
590
- * Called under fc->lock
591
- *
592
- * fc->connected must have been checked previously
593
- */
594
-void fuse_request_send_background_locked(struct fuse_conn *fc,
595
- struct fuse_req *req)
530
+static bool fuse_request_queue_background(struct fuse_req *req)
596531 {
597
- BUG_ON(!test_bit(FR_BACKGROUND, &req->flags));
532
+ struct fuse_mount *fm = req->fm;
533
+ struct fuse_conn *fc = fm->fc;
534
+ bool queued = false;
535
+
536
+ WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
598537 if (!test_bit(FR_WAITING, &req->flags)) {
599538 __set_bit(FR_WAITING, &req->flags);
600539 atomic_inc(&fc->num_waiting);
601540 }
602541 __set_bit(FR_ISREPLY, &req->flags);
603
- fc->num_background++;
604
- if (fc->num_background == fc->max_background)
605
- fc->blocked = 1;
606
- if (fc->num_background == fc->congestion_threshold && fc->sb) {
607
- set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
608
- set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
542
+ spin_lock(&fc->bg_lock);
543
+ if (likely(fc->connected)) {
544
+ fc->num_background++;
545
+ if (fc->num_background == fc->max_background)
546
+ fc->blocked = 1;
547
+ if (fc->num_background == fc->congestion_threshold && fm->sb) {
548
+ set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
549
+ set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
550
+ }
551
+ list_add_tail(&req->list, &fc->bg_queue);
552
+ flush_bg_queue(fc);
553
+ queued = true;
609554 }
610
- list_add_tail(&req->list, &fc->bg_queue);
611
- flush_bg_queue(fc);
555
+ spin_unlock(&fc->bg_lock);
556
+
557
+ return queued;
612558 }
613559
614
-void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
560
+int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
561
+ gfp_t gfp_flags)
615562 {
616
- BUG_ON(!req->end);
617
- spin_lock(&fc->lock);
618
- if (fc->connected) {
619
- fuse_request_send_background_locked(fc, req);
620
- spin_unlock(&fc->lock);
563
+ struct fuse_req *req;
564
+
565
+ if (args->force) {
566
+ WARN_ON(!args->nocreds);
567
+ req = fuse_request_alloc(fm, gfp_flags);
568
+ if (!req)
569
+ return -ENOMEM;
570
+ __set_bit(FR_BACKGROUND, &req->flags);
621571 } else {
622
- spin_unlock(&fc->lock);
623
- req->out.h.error = -ENOTCONN;
624
- req->end(fc, req);
625
- fuse_put_request(fc, req);
572
+ WARN_ON(args->nocreds);
573
+ req = fuse_get_req(fm, true);
574
+ if (IS_ERR(req))
575
+ return PTR_ERR(req);
626576 }
627
-}
628
-EXPORT_SYMBOL_GPL(fuse_request_send_background);
629577
630
-static int fuse_request_send_notify_reply(struct fuse_conn *fc,
631
- struct fuse_req *req, u64 unique)
578
+ fuse_args_to_req(req, args);
579
+
580
+ if (!fuse_request_queue_background(req)) {
581
+ fuse_put_request(req);
582
+ return -ENOTCONN;
583
+ }
584
+
585
+ return 0;
586
+}
587
+EXPORT_SYMBOL_GPL(fuse_simple_background);
588
+
589
+static int fuse_simple_notify_reply(struct fuse_mount *fm,
590
+ struct fuse_args *args, u64 unique)
632591 {
633
- int err = -ENODEV;
634
- struct fuse_iqueue *fiq = &fc->iq;
592
+ struct fuse_req *req;
593
+ struct fuse_iqueue *fiq = &fm->fc->iq;
594
+ int err = 0;
595
+
596
+ req = fuse_get_req(fm, false);
597
+ if (IS_ERR(req))
598
+ return PTR_ERR(req);
635599
636600 __clear_bit(FR_ISREPLY, &req->flags);
637601 req->in.h.unique = unique;
602
+
603
+ fuse_args_to_req(req, args);
604
+
638605 spin_lock(&fiq->lock);
639606 if (fiq->connected) {
640
- queue_request(fiq, req);
641
- err = 0;
607
+ queue_request_and_unlock(fiq, req, false);
608
+ } else {
609
+ err = -ENODEV;
610
+ spin_unlock(&fiq->lock);
611
+ fuse_put_request(req);
642612 }
643
- spin_unlock(&fiq->lock);
644613
645614 return err;
646
-}
647
-
648
-void fuse_force_forget(struct file *file, u64 nodeid)
649
-{
650
- struct inode *inode = file_inode(file);
651
- struct fuse_conn *fc = get_fuse_conn(inode);
652
- struct fuse_req *req;
653
- struct fuse_forget_in inarg;
654
-
655
- memset(&inarg, 0, sizeof(inarg));
656
- inarg.nlookup = 1;
657
- req = fuse_get_req_nofail_nopages(fc, file);
658
- req->in.h.opcode = FUSE_FORGET;
659
- req->in.h.nodeid = nodeid;
660
- req->in.numargs = 1;
661
- req->in.args[0].size = sizeof(inarg);
662
- req->in.args[0].value = &inarg;
663
- __clear_bit(FR_ISREPLY, &req->flags);
664
- __fuse_request_send(fc, req);
665
- /* ignore errors */
666
- fuse_put_request(fc, req);
667615 }
668616
669617 /*
....@@ -739,7 +687,11 @@
739687 flush_dcache_page(cs->pg);
740688 set_page_dirty_lock(cs->pg);
741689 }
742
- put_page(cs->pg);
690
+ /*
691
+ * The page could be GUP page(see iov_iter_get_pages in
692
+ * fuse_copy_fill) so use put_user_page to release it.
693
+ */
694
+ put_user_page(cs->pg);
743695 }
744696 cs->pg = NULL;
745697 }
....@@ -774,7 +726,7 @@
774726 cs->pipebufs++;
775727 cs->nr_segs--;
776728 } else {
777
- if (cs->nr_segs == cs->pipe->buffers)
729
+ if (cs->nr_segs >= cs->pipe->max_usage)
778730 return -EIO;
779731
780732 page = alloc_page(GFP_HIGHUSER);
....@@ -839,10 +791,10 @@
839791 1 << PG_uptodate |
840792 1 << PG_lru |
841793 1 << PG_active |
794
+ 1 << PG_workingset |
842795 1 << PG_reclaim |
843796 1 << PG_waiters))) {
844
- printk(KERN_WARNING "fuse: trying to steal weird page\n");
845
- printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
797
+ dump_page(page, "fuse: trying to steal weird page");
846798 return 1;
847799 }
848800 return 0;
....@@ -875,7 +827,7 @@
875827 if (cs->len != PAGE_SIZE)
876828 goto out_fallback;
877829
878
- if (pipe_buf_steal(cs->pipe, buf) != 0)
830
+ if (!pipe_buf_try_steal(cs->pipe, buf))
879831 goto out_fallback;
880832
881833 newpage = buf->page;
....@@ -910,7 +862,7 @@
910862 get_page(newpage);
911863
912864 if (!(buf->flags & PIPE_BUF_FLAG_LRU))
913
- lru_cache_add_file(newpage);
865
+ lru_cache_add(newpage);
914866
915867 /*
916868 * Release while we have extra ref on stolen page. Otherwise
....@@ -962,7 +914,7 @@
962914 struct pipe_buffer *buf;
963915 int err;
964916
965
- if (cs->nr_segs == cs->pipe->buffers)
917
+ if (cs->nr_segs >= cs->pipe->max_usage)
966918 return -EIO;
967919
968920 get_page(page);
....@@ -1001,7 +953,17 @@
1001953
1002954 while (count) {
1003955 if (cs->write && cs->pipebufs && page) {
1004
- return fuse_ref_page(cs, page, offset, count);
956
+ /*
957
+ * Can't control lifetime of pipe buffers, so always
958
+ * copy user pages.
959
+ */
960
+ if (cs->req->args->user_pages) {
961
+ err = fuse_copy_fill(cs);
962
+ if (err)
963
+ return err;
964
+ } else {
965
+ return fuse_ref_page(cs, page, offset, count);
966
+ }
1005967 } else if (!cs->len) {
1006968 if (cs->move_pages && page &&
1007969 offset == 0 && count == PAGE_SIZE) {
....@@ -1033,14 +995,15 @@
1033995 {
1034996 unsigned i;
1035997 struct fuse_req *req = cs->req;
998
+ struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
1036999
1037
- for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
1000
+
1001
+ for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
10381002 int err;
1039
- unsigned offset = req->page_descs[i].offset;
1040
- unsigned count = min(nbytes, req->page_descs[i].length);
1003
+ unsigned int offset = ap->descs[i].offset;
1004
+ unsigned int count = min(nbytes, ap->descs[i].length);
10411005
1042
- err = fuse_copy_page(cs, &req->pages[i], offset, count,
1043
- zeroing);
1006
+ err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
10441007 if (err)
10451008 return err;
10461009
....@@ -1111,12 +1074,11 @@
11111074 int err;
11121075
11131076 list_del_init(&req->intr_entry);
1114
- req->intr_unique = fuse_get_unique(fiq);
11151077 memset(&ih, 0, sizeof(ih));
11161078 memset(&arg, 0, sizeof(arg));
11171079 ih.len = reqsize;
11181080 ih.opcode = FUSE_INTERRUPT;
1119
- ih.unique = req->intr_unique;
1081
+ ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
11201082 arg.unique = req->in.h.unique;
11211083
11221084 spin_unlock(&fiq->lock);
....@@ -1131,9 +1093,9 @@
11311093 return err ? err : reqsize;
11321094 }
11331095
1134
-static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
1135
- unsigned max,
1136
- unsigned *countp)
1096
+struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
1097
+ unsigned int max,
1098
+ unsigned int *countp)
11371099 {
11381100 struct fuse_forget_link *head = fiq->forget_list_head.next;
11391101 struct fuse_forget_link **newhead = &head;
....@@ -1152,6 +1114,7 @@
11521114
11531115 return head;
11541116 }
1117
+EXPORT_SYMBOL(fuse_dequeue_forget);
11551118
11561119 static int fuse_read_single_forget(struct fuse_iqueue *fiq,
11571120 struct fuse_copy_state *cs,
....@@ -1159,7 +1122,7 @@
11591122 __releases(fiq->lock)
11601123 {
11611124 int err;
1162
- struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL);
1125
+ struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL);
11631126 struct fuse_forget_in arg = {
11641127 .nlookup = forget->forget_one.nlookup,
11651128 };
....@@ -1207,7 +1170,7 @@
12071170 }
12081171
12091172 max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1210
- head = dequeue_forget(fiq, max_forgets, &count);
1173
+ head = fuse_dequeue_forget(fiq, max_forgets, &count);
12111174 spin_unlock(&fiq->lock);
12121175
12131176 arg.count = count;
....@@ -1252,7 +1215,7 @@
12521215 * the pending list and copies request data to userspace buffer. If
12531216 * no reply is needed (FORGET) or request has been aborted or there
12541217 * was an error during the copying then it's finished by calling
1255
- * request_end(). Otherwise add it to the processing list, and set
1218
+ * fuse_request_end(). Otherwise add it to the processing list, and set
12561219 * the 'sent' flag.
12571220 */
12581221 static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
....@@ -1263,8 +1226,27 @@
12631226 struct fuse_iqueue *fiq = &fc->iq;
12641227 struct fuse_pqueue *fpq = &fud->pq;
12651228 struct fuse_req *req;
1266
- struct fuse_in *in;
1229
+ struct fuse_args *args;
12671230 unsigned reqsize;
1231
+ unsigned int hash;
1232
+
1233
+ /*
1234
+ * Require sane minimum read buffer - that has capacity for fixed part
1235
+ * of any request header + negotiated max_write room for data.
1236
+ *
1237
+ * Historically libfuse reserves 4K for fixed header room, but e.g.
1238
+ * GlusterFS reserves only 80 bytes
1239
+ *
1240
+ * = `sizeof(fuse_in_header) + sizeof(fuse_write_in)`
1241
+ *
1242
+ * which is the absolute minimum any sane filesystem should be using
1243
+ * for header room.
1244
+ */
1245
+ if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER,
1246
+ sizeof(struct fuse_in_header) +
1247
+ sizeof(struct fuse_write_in) +
1248
+ fc->max_write))
1249
+ return -EINVAL;
12681250
12691251 restart:
12701252 for (;;) {
....@@ -1282,7 +1264,7 @@
12821264 }
12831265
12841266 if (!fiq->connected) {
1285
- err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
1267
+ err = fc->aborted ? -ECONNABORTED : -ENODEV;
12861268 goto err_unlock;
12871269 }
12881270
....@@ -1305,16 +1287,16 @@
13051287 list_del_init(&req->list);
13061288 spin_unlock(&fiq->lock);
13071289
1308
- in = &req->in;
1309
- reqsize = in->h.len;
1290
+ args = req->args;
1291
+ reqsize = req->in.h.len;
13101292
13111293 /* If request is too large, reply with an error and restart the read */
13121294 if (nbytes < reqsize) {
13131295 req->out.h.error = -EIO;
13141296 /* SETXATTR is special, since it may contain too large data */
1315
- if (in->h.opcode == FUSE_SETXATTR)
1297
+ if (args->opcode == FUSE_SETXATTR)
13161298 req->out.h.error = -E2BIG;
1317
- request_end(fc, req);
1299
+ fuse_request_end(req);
13181300 goto restart;
13191301 }
13201302 spin_lock(&fpq->lock);
....@@ -1330,15 +1312,15 @@
13301312 list_add(&req->list, &fpq->io);
13311313 spin_unlock(&fpq->lock);
13321314 cs->req = req;
1333
- err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1315
+ err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
13341316 if (!err)
1335
- err = fuse_copy_args(cs, in->numargs, in->argpages,
1336
- (struct fuse_arg *) in->args, 0);
1317
+ err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
1318
+ (struct fuse_arg *) args->in_args, 0);
13371319 fuse_copy_finish(cs);
13381320 spin_lock(&fpq->lock);
13391321 clear_bit(FR_LOCKED, &req->flags);
13401322 if (!fpq->connected) {
1341
- err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
1323
+ err = fc->aborted ? -ECONNABORTED : -ENODEV;
13421324 goto out_end;
13431325 }
13441326 if (err) {
....@@ -1349,15 +1331,16 @@
13491331 err = reqsize;
13501332 goto out_end;
13511333 }
1352
- list_move_tail(&req->list, &fpq->processing);
1334
+ hash = fuse_req_hash(req->in.h.unique);
1335
+ list_move_tail(&req->list, &fpq->processing[hash]);
13531336 __fuse_get_request(req);
13541337 set_bit(FR_SENT, &req->flags);
13551338 spin_unlock(&fpq->lock);
13561339 /* matches barrier in request_wait_answer() */
13571340 smp_mb__after_atomic();
13581341 if (test_bit(FR_INTERRUPTED, &req->flags))
1359
- queue_interrupt(fiq, req);
1360
- fuse_put_request(fc, req);
1342
+ queue_interrupt(req);
1343
+ fuse_put_request(req);
13611344
13621345 return reqsize;
13631346
....@@ -1365,7 +1348,7 @@
13651348 if (!test_bit(FR_PRIVATE, &req->flags))
13661349 list_del_init(&req->list);
13671350 spin_unlock(&fpq->lock);
1368
- request_end(fc, req);
1351
+ fuse_request_end(req);
13691352 return err;
13701353
13711354 err_unlock:
....@@ -1414,7 +1397,7 @@
14141397 if (!fud)
14151398 return -EPERM;
14161399
1417
- bufs = kvmalloc_array(pipe->buffers, sizeof(struct pipe_buffer),
1400
+ bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
14181401 GFP_KERNEL);
14191402 if (!bufs)
14201403 return -ENOMEM;
....@@ -1426,7 +1409,7 @@
14261409 if (ret < 0)
14271410 goto out;
14281411
1429
- if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1412
+ if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) {
14301413 ret = -EIO;
14311414 goto out;
14321415 }
....@@ -1488,11 +1471,8 @@
14881471 fuse_copy_finish(cs);
14891472
14901473 down_read(&fc->killsb);
1491
- err = -ENOENT;
1492
- if (fc->sb) {
1493
- err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1494
- outarg.off, outarg.len);
1495
- }
1474
+ err = fuse_reverse_inval_inode(fc, outarg.ino,
1475
+ outarg.off, outarg.len);
14961476 up_read(&fc->killsb);
14971477 return err;
14981478
....@@ -1538,9 +1518,7 @@
15381518 buf[outarg.namelen] = 0;
15391519
15401520 down_read(&fc->killsb);
1541
- err = -ENOENT;
1542
- if (fc->sb)
1543
- err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1521
+ err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name);
15441522 up_read(&fc->killsb);
15451523 kfree(buf);
15461524 return err;
....@@ -1588,10 +1566,7 @@
15881566 buf[outarg.namelen] = 0;
15891567
15901568 down_read(&fc->killsb);
1591
- err = -ENOENT;
1592
- if (fc->sb)
1593
- err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1594
- outarg.child, &name);
1569
+ err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name);
15951570 up_read(&fc->killsb);
15961571 kfree(buf);
15971572 return err;
....@@ -1633,10 +1608,7 @@
16331608 down_read(&fc->killsb);
16341609
16351610 err = -ENOENT;
1636
- if (!fc->sb)
1637
- goto out_up_killsb;
1638
-
1639
- inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1611
+ inode = fuse_ilookup(fc, nodeid, NULL);
16401612 if (!inode)
16411613 goto out_up_killsb;
16421614
....@@ -1688,23 +1660,37 @@
16881660 return err;
16891661 }
16901662
1691
-static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1663
+struct fuse_retrieve_args {
1664
+ struct fuse_args_pages ap;
1665
+ struct fuse_notify_retrieve_in inarg;
1666
+};
1667
+
1668
+static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
1669
+ int error)
16921670 {
1693
- release_pages(req->pages, req->num_pages);
1671
+ struct fuse_retrieve_args *ra =
1672
+ container_of(args, typeof(*ra), ap.args);
1673
+
1674
+ release_pages(ra->ap.pages, ra->ap.num_pages);
1675
+ kfree(ra);
16941676 }
16951677
1696
-static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1678
+static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
16971679 struct fuse_notify_retrieve_out *outarg)
16981680 {
16991681 int err;
17001682 struct address_space *mapping = inode->i_mapping;
1701
- struct fuse_req *req;
17021683 pgoff_t index;
17031684 loff_t file_size;
17041685 unsigned int num;
17051686 unsigned int offset;
17061687 size_t total_len = 0;
1707
- int num_pages;
1688
+ unsigned int num_pages;
1689
+ struct fuse_conn *fc = fm->fc;
1690
+ struct fuse_retrieve_args *ra;
1691
+ size_t args_size = sizeof(*ra);
1692
+ struct fuse_args_pages *ap;
1693
+ struct fuse_args *args;
17081694
17091695 offset = outarg->offset & ~PAGE_MASK;
17101696 file_size = i_size_read(inode);
....@@ -1716,21 +1702,28 @@
17161702 num = file_size - outarg->offset;
17171703
17181704 num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1719
- num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1705
+ num_pages = min(num_pages, fc->max_pages);
17201706
1721
- req = fuse_get_req(fc, num_pages);
1722
- if (IS_ERR(req))
1723
- return PTR_ERR(req);
1707
+ args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
17241708
1725
- req->in.h.opcode = FUSE_NOTIFY_REPLY;
1726
- req->in.h.nodeid = outarg->nodeid;
1727
- req->in.numargs = 2;
1728
- req->in.argpages = 1;
1729
- req->end = fuse_retrieve_end;
1709
+ ra = kzalloc(args_size, GFP_KERNEL);
1710
+ if (!ra)
1711
+ return -ENOMEM;
1712
+
1713
+ ap = &ra->ap;
1714
+ ap->pages = (void *) (ra + 1);
1715
+ ap->descs = (void *) (ap->pages + num_pages);
1716
+
1717
+ args = &ap->args;
1718
+ args->nodeid = outarg->nodeid;
1719
+ args->opcode = FUSE_NOTIFY_REPLY;
1720
+ args->in_numargs = 2;
1721
+ args->in_pages = true;
1722
+ args->end = fuse_retrieve_end;
17301723
17311724 index = outarg->offset >> PAGE_SHIFT;
17321725
1733
- while (num && req->num_pages < num_pages) {
1726
+ while (num && ap->num_pages < num_pages) {
17341727 struct page *page;
17351728 unsigned int this_num;
17361729
....@@ -1739,27 +1732,25 @@
17391732 break;
17401733
17411734 this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1742
- req->pages[req->num_pages] = page;
1743
- req->page_descs[req->num_pages].offset = offset;
1744
- req->page_descs[req->num_pages].length = this_num;
1745
- req->num_pages++;
1735
+ ap->pages[ap->num_pages] = page;
1736
+ ap->descs[ap->num_pages].offset = offset;
1737
+ ap->descs[ap->num_pages].length = this_num;
1738
+ ap->num_pages++;
17461739
17471740 offset = 0;
17481741 num -= this_num;
17491742 total_len += this_num;
17501743 index++;
17511744 }
1752
- req->misc.retrieve_in.offset = outarg->offset;
1753
- req->misc.retrieve_in.size = total_len;
1754
- req->in.args[0].size = sizeof(req->misc.retrieve_in);
1755
- req->in.args[0].value = &req->misc.retrieve_in;
1756
- req->in.args[1].size = total_len;
1745
+ ra->inarg.offset = outarg->offset;
1746
+ ra->inarg.size = total_len;
1747
+ args->in_args[0].size = sizeof(ra->inarg);
1748
+ args->in_args[0].value = &ra->inarg;
1749
+ args->in_args[1].size = total_len;
17571750
1758
- err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1759
- if (err) {
1760
- fuse_retrieve_end(fc, req);
1761
- fuse_put_request(fc, req);
1762
- }
1751
+ err = fuse_simple_notify_reply(fm, args, outarg->notify_unique);
1752
+ if (err)
1753
+ fuse_retrieve_end(fm, args, err);
17631754
17641755 return err;
17651756 }
....@@ -1768,7 +1759,9 @@
17681759 struct fuse_copy_state *cs)
17691760 {
17701761 struct fuse_notify_retrieve_out outarg;
1762
+ struct fuse_mount *fm;
17711763 struct inode *inode;
1764
+ u64 nodeid;
17721765 int err;
17731766
17741767 err = -EINVAL;
....@@ -1783,14 +1776,12 @@
17831776
17841777 down_read(&fc->killsb);
17851778 err = -ENOENT;
1786
- if (fc->sb) {
1787
- u64 nodeid = outarg.nodeid;
1779
+ nodeid = outarg.nodeid;
17881780
1789
- inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1790
- if (inode) {
1791
- err = fuse_retrieve(fc, inode, &outarg);
1792
- iput(inode);
1793
- }
1781
+ inode = fuse_ilookup(fc, nodeid, &fm);
1782
+ if (inode) {
1783
+ err = fuse_retrieve(fm, inode, &outarg);
1784
+ iput(inode);
17941785 }
17951786 up_read(&fc->killsb);
17961787
....@@ -1835,36 +1826,35 @@
18351826 /* Look up request on processing list by unique ID */
18361827 static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
18371828 {
1829
+ unsigned int hash = fuse_req_hash(unique);
18381830 struct fuse_req *req;
18391831
1840
- list_for_each_entry(req, &fpq->processing, list) {
1841
- if (req->in.h.unique == unique || req->intr_unique == unique)
1832
+ list_for_each_entry(req, &fpq->processing[hash], list) {
1833
+ if (req->in.h.unique == unique)
18421834 return req;
18431835 }
18441836 return NULL;
18451837 }
18461838
1847
-static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1839
+static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
18481840 unsigned nbytes)
18491841 {
18501842 unsigned reqsize = sizeof(struct fuse_out_header);
18511843
1852
- if (out->h.error)
1853
- return nbytes != reqsize ? -EINVAL : 0;
1844
+ reqsize += fuse_len_args(args->out_numargs, args->out_args);
18541845
1855
- reqsize += len_args(out->numargs, out->args);
1856
-
1857
- if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1846
+ if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar))
18581847 return -EINVAL;
18591848 else if (reqsize > nbytes) {
1860
- struct fuse_arg *lastarg = &out->args[out->numargs-1];
1849
+ struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1];
18611850 unsigned diffsize = reqsize - nbytes;
1851
+
18621852 if (diffsize > lastarg->size)
18631853 return -EINVAL;
18641854 lastarg->size -= diffsize;
18651855 }
1866
- return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1867
- out->page_zeroing);
1856
+ return fuse_copy_args(cs, args->out_numargs, args->out_pages,
1857
+ args->out_args, args->page_zeroing);
18681858 }
18691859
18701860 /*
....@@ -1872,7 +1862,7 @@
18721862 * the write buffer. The request is then searched on the processing
18731863 * list by the unique ID found in the header. If found, then remove
18741864 * it from the list and copy the rest of the buffer to the request.
1875
- * The request is finished by calling request_end()
1865
+ * The request is finished by calling fuse_request_end().
18761866 */
18771867 static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
18781868 struct fuse_copy_state *cs, size_t nbytes)
....@@ -1883,16 +1873,17 @@
18831873 struct fuse_req *req;
18841874 struct fuse_out_header oh;
18851875
1876
+ err = -EINVAL;
18861877 if (nbytes < sizeof(struct fuse_out_header))
1887
- return -EINVAL;
1878
+ goto out;
18881879
18891880 err = fuse_copy_one(cs, &oh, sizeof(oh));
18901881 if (err)
1891
- goto err_finish;
1882
+ goto copy_finish;
18921883
18931884 err = -EINVAL;
18941885 if (oh.len != nbytes)
1895
- goto err_finish;
1886
+ goto copy_finish;
18961887
18971888 /*
18981889 * Zero oh.unique indicates unsolicited notification message
....@@ -1900,41 +1891,40 @@
19001891 */
19011892 if (!oh.unique) {
19021893 err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1903
- return err ? err : nbytes;
1894
+ goto out;
19041895 }
19051896
19061897 err = -EINVAL;
19071898 if (oh.error <= -512 || oh.error > 0)
1908
- goto err_finish;
1899
+ goto copy_finish;
19091900
19101901 spin_lock(&fpq->lock);
1902
+ req = NULL;
1903
+ if (fpq->connected)
1904
+ req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
1905
+
19111906 err = -ENOENT;
1912
- if (!fpq->connected)
1913
- goto err_unlock_pq;
1907
+ if (!req) {
1908
+ spin_unlock(&fpq->lock);
1909
+ goto copy_finish;
1910
+ }
19141911
1915
- req = request_find(fpq, oh.unique);
1916
- if (!req)
1917
- goto err_unlock_pq;
1918
-
1919
- /* Is it an interrupt reply? */
1920
- if (req->intr_unique == oh.unique) {
1912
+ /* Is it an interrupt reply ID? */
1913
+ if (oh.unique & FUSE_INT_REQ_BIT) {
19211914 __fuse_get_request(req);
19221915 spin_unlock(&fpq->lock);
19231916
1924
- err = -EINVAL;
1925
- if (nbytes != sizeof(struct fuse_out_header)) {
1926
- fuse_put_request(fc, req);
1927
- goto err_finish;
1928
- }
1929
-
1930
- if (oh.error == -ENOSYS)
1917
+ err = 0;
1918
+ if (nbytes != sizeof(struct fuse_out_header))
1919
+ err = -EINVAL;
1920
+ else if (oh.error == -ENOSYS)
19311921 fc->no_interrupt = 1;
19321922 else if (oh.error == -EAGAIN)
1933
- queue_interrupt(&fc->iq, req);
1934
- fuse_put_request(fc, req);
1923
+ err = queue_interrupt(req);
19351924
1936
- fuse_copy_finish(cs);
1937
- return nbytes;
1925
+ fuse_put_request(req);
1926
+
1927
+ goto copy_finish;
19381928 }
19391929
19401930 clear_bit(FR_SENT, &req->flags);
....@@ -1943,18 +1933,21 @@
19431933 set_bit(FR_LOCKED, &req->flags);
19441934 spin_unlock(&fpq->lock);
19451935 cs->req = req;
1946
- if (!req->out.page_replace)
1936
+ if (!req->args->page_replace)
19471937 cs->move_pages = 0;
19481938
1949
- err = copy_out_args(cs, &req->out, nbytes);
1939
+ if (oh.error)
1940
+ err = nbytes != sizeof(oh) ? -EINVAL : 0;
1941
+ else
1942
+ err = copy_out_args(cs, req->args, nbytes);
19501943 fuse_copy_finish(cs);
19511944
19521945 if (!err && req->in.h.opcode == FUSE_CANONICAL_PATH) {
1953
- char *path = (char *)req->out.args[0].value;
1946
+ char *path = (char *)req->args->out_args[0].value;
19541947
1955
- path[req->out.args[0].size - 1] = 0;
1948
+ path[req->args->out_args[0].size - 1] = 0;
19561949 if (req->out.h.error != -ENOSYS)
1957
- req->out.h.error = kern_path(path, 0, req->out.canonical_path);
1950
+ req->out.h.error = kern_path(path, 0, req->args->canonical_path);
19581951 }
19591952
19601953 spin_lock(&fpq->lock);
....@@ -1967,15 +1960,13 @@
19671960 list_del_init(&req->list);
19681961 spin_unlock(&fpq->lock);
19691962
1970
- request_end(fc, req);
1971
-
1963
+ fuse_request_end(req);
1964
+out:
19721965 return err ? err : nbytes;
19731966
1974
- err_unlock_pq:
1975
- spin_unlock(&fpq->lock);
1976
- err_finish:
1967
+copy_finish:
19771968 fuse_copy_finish(cs);
1978
- return err;
1969
+ goto out;
19791970 }
19801971
19811972 static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
....@@ -1998,6 +1989,7 @@
19981989 struct file *out, loff_t *ppos,
19991990 size_t len, unsigned int flags)
20001991 {
1992
+ unsigned int head, tail, mask, count;
20011993 unsigned nbuf;
20021994 unsigned idx;
20031995 struct pipe_buffer *bufs;
....@@ -2012,8 +2004,12 @@
20122004
20132005 pipe_lock(pipe);
20142006
2015
- bufs = kvmalloc_array(pipe->nrbufs, sizeof(struct pipe_buffer),
2016
- GFP_KERNEL);
2007
+ head = pipe->head;
2008
+ tail = pipe->tail;
2009
+ mask = pipe->ring_size - 1;
2010
+ count = head - tail;
2011
+
2012
+ bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
20172013 if (!bufs) {
20182014 pipe_unlock(pipe);
20192015 return -ENOMEM;
....@@ -2021,8 +2017,8 @@
20212017
20222018 nbuf = 0;
20232019 rem = 0;
2024
- for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
2025
- rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
2020
+ for (idx = tail; idx != head && rem < len; idx++)
2021
+ rem += pipe->bufs[idx & mask].len;
20262022
20272023 ret = -EINVAL;
20282024 if (rem < len)
....@@ -2033,16 +2029,17 @@
20332029 struct pipe_buffer *ibuf;
20342030 struct pipe_buffer *obuf;
20352031
2036
- BUG_ON(nbuf >= pipe->buffers);
2037
- BUG_ON(!pipe->nrbufs);
2038
- ibuf = &pipe->bufs[pipe->curbuf];
2032
+ if (WARN_ON(nbuf >= count || tail == head))
2033
+ goto out_free;
2034
+
2035
+ ibuf = &pipe->bufs[tail & mask];
20392036 obuf = &bufs[nbuf];
20402037
20412038 if (rem >= ibuf->len) {
20422039 *obuf = *ibuf;
20432040 ibuf->ops = NULL;
2044
- pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
2045
- pipe->nrbufs--;
2041
+ tail++;
2042
+ pipe->tail = tail;
20462043 } else {
20472044 if (!pipe_buf_get(pipe, ibuf))
20482045 goto out_free;
....@@ -2104,12 +2101,8 @@
21042101 return mask;
21052102 }
21062103
2107
-/*
2108
- * Abort all requests on the given list (pending or processing)
2109
- *
2110
- * This function releases and reacquires fc->lock
2111
- */
2112
-static void end_requests(struct fuse_conn *fc, struct list_head *head)
2104
+/* Abort all requests on the given list (pending or processing) */
2105
+static void end_requests(struct list_head *head)
21132106 {
21142107 while (!list_empty(head)) {
21152108 struct fuse_req *req;
....@@ -2117,7 +2110,7 @@
21172110 req->out.h.error = -ECONNABORTED;
21182111 clear_bit(FR_SENT, &req->flags);
21192112 list_del_init(&req->list);
2120
- request_end(fc, req);
2113
+ fuse_request_end(req);
21212114 }
21222115 }
21232116
....@@ -2145,7 +2138,7 @@
21452138 * The same effect is usually achievable through killing the filesystem daemon
21462139 * and all users of the filesystem. The exception is the combination of an
21472140 * asynchronous request and the tricky deadlock (see
2148
- * Documentation/filesystems/fuse.txt).
2141
+ * Documentation/filesystems/fuse.rst).
21492142 *
21502143 * Aborting requests under I/O goes as follows: 1: Separate out unlocked
21512144 * requests, they should be finished off immediately. Locked requests will be
....@@ -2154,7 +2147,7 @@
21542147 * is OK, the request will in that case be removed from the list before we touch
21552148 * it.
21562149 */
2157
-void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
2150
+void fuse_abort_conn(struct fuse_conn *fc)
21582151 {
21592152 struct fuse_iqueue *fiq = &fc->iq;
21602153
....@@ -2163,10 +2156,13 @@
21632156 struct fuse_dev *fud;
21642157 struct fuse_req *req, *next;
21652158 LIST_HEAD(to_end);
2159
+ unsigned int i;
21662160
2161
+ /* Background queuing checks fc->connected under bg_lock */
2162
+ spin_lock(&fc->bg_lock);
21672163 fc->connected = 0;
2168
- fc->blocked = 0;
2169
- fc->aborted = is_abort;
2164
+ spin_unlock(&fc->bg_lock);
2165
+
21702166 fuse_set_initialized(fc);
21712167 list_for_each_entry(fud, &fc->devices, entry) {
21722168 struct fuse_pqueue *fpq = &fud->pq;
....@@ -2184,11 +2180,16 @@
21842180 }
21852181 spin_unlock(&req->waitq.lock);
21862182 }
2187
- list_splice_tail_init(&fpq->processing, &to_end);
2183
+ for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2184
+ list_splice_tail_init(&fpq->processing[i],
2185
+ &to_end);
21882186 spin_unlock(&fpq->lock);
21892187 }
2188
+ spin_lock(&fc->bg_lock);
2189
+ fc->blocked = 0;
21902190 fc->max_background = UINT_MAX;
21912191 flush_bg_queue(fc);
2192
+ spin_unlock(&fc->bg_lock);
21922193
21932194 spin_lock(&fiq->lock);
21942195 fiq->connected = 0;
....@@ -2196,7 +2197,7 @@
21962197 clear_bit(FR_PENDING, &req->flags);
21972198 list_splice_tail_init(&fiq->pending, &to_end);
21982199 while (forget_pending(fiq))
2199
- kfree(dequeue_forget(fiq, 1, NULL));
2200
+ kfree(fuse_dequeue_forget(fiq, 1, NULL));
22002201 wake_up_all(&fiq->waitq);
22012202 spin_unlock(&fiq->lock);
22022203 kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
....@@ -2204,7 +2205,7 @@
22042205 wake_up_all(&fc->blocked_waitq);
22052206 spin_unlock(&fc->lock);
22062207
2207
- end_requests(fc, &to_end);
2208
+ end_requests(&to_end);
22082209 } else {
22092210 spin_unlock(&fc->lock);
22102211 }
....@@ -2226,18 +2227,20 @@
22262227 struct fuse_conn *fc = fud->fc;
22272228 struct fuse_pqueue *fpq = &fud->pq;
22282229 LIST_HEAD(to_end);
2230
+ unsigned int i;
22292231
22302232 spin_lock(&fpq->lock);
22312233 WARN_ON(!list_empty(&fpq->io));
2232
- list_splice_init(&fpq->processing, &to_end);
2234
+ for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2235
+ list_splice_init(&fpq->processing[i], &to_end);
22332236 spin_unlock(&fpq->lock);
22342237
2235
- end_requests(fc, &to_end);
2238
+ end_requests(&to_end);
22362239
22372240 /* Are we the last open device? */
22382241 if (atomic_dec_and_test(&fc->dev_count)) {
22392242 WARN_ON(fc->iq.fasync != NULL);
2240
- fuse_abort_conn(fc, false);
2243
+ fuse_abort_conn(fc);
22412244 }
22422245 fuse_dev_free(fud);
22432246 }
....@@ -2263,7 +2266,7 @@
22632266 if (new->private_data)
22642267 return -EINVAL;
22652268
2266
- fud = fuse_dev_alloc(fc);
2269
+ fud = fuse_dev_alloc_install(fc);
22672270 if (!fud)
22682271 return -ENOMEM;
22692272
....@@ -2276,37 +2279,50 @@
22762279 static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
22772280 unsigned long arg)
22782281 {
2279
- int err = -ENOTTY;
2282
+ int res;
2283
+ int oldfd;
2284
+ struct fuse_dev *fud = NULL;
22802285
2281
- if (cmd == FUSE_DEV_IOC_CLONE) {
2282
- int oldfd;
2283
-
2284
- err = -EFAULT;
2285
- if (!get_user(oldfd, (__u32 __user *) arg)) {
2286
+ switch (cmd) {
2287
+ case FUSE_DEV_IOC_CLONE:
2288
+ res = -EFAULT;
2289
+ if (!get_user(oldfd, (__u32 __user *)arg)) {
22862290 struct file *old = fget(oldfd);
22872291
2288
- err = -EINVAL;
2292
+ res = -EINVAL;
22892293 if (old) {
2290
- struct fuse_dev *fud = NULL;
2291
-
22922294 /*
22932295 * Check against file->f_op because CUSE
22942296 * uses the same ioctl handler.
22952297 */
22962298 if (old->f_op == file->f_op &&
2297
- old->f_cred->user_ns == file->f_cred->user_ns)
2299
+ old->f_cred->user_ns ==
2300
+ file->f_cred->user_ns)
22982301 fud = fuse_get_dev(old);
22992302
23002303 if (fud) {
23012304 mutex_lock(&fuse_mutex);
2302
- err = fuse_device_clone(fud->fc, file);
2305
+ res = fuse_device_clone(fud->fc, file);
23032306 mutex_unlock(&fuse_mutex);
23042307 }
23052308 fput(old);
23062309 }
23072310 }
2311
+ break;
2312
+ case FUSE_DEV_IOC_PASSTHROUGH_OPEN:
2313
+ res = -EFAULT;
2314
+ if (!get_user(oldfd, (__u32 __user *)arg)) {
2315
+ res = -EINVAL;
2316
+ fud = fuse_get_dev(file);
2317
+ if (fud)
2318
+ res = fuse_passthrough_open(fud, oldfd);
2319
+ }
2320
+ break;
2321
+ default:
2322
+ res = -ENOTTY;
2323
+ break;
23082324 }
2309
- return err;
2325
+ return res;
23102326 }
23112327
23122328 const struct file_operations fuse_dev_operations = {
....@@ -2321,7 +2337,7 @@
23212337 .release = fuse_dev_release,
23222338 .fasync = fuse_dev_fasync,
23232339 .unlocked_ioctl = fuse_dev_ioctl,
2324
- .compat_ioctl = fuse_dev_ioctl,
2340
+ .compat_ioctl = compat_ptr_ioctl,
23252341 };
23262342 EXPORT_SYMBOL_GPL(fuse_dev_operations);
23272343