hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/splice.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * "splice": joining two ropes together by interweaving their strands.
34 *
....@@ -32,7 +33,6 @@
3233 #include <linux/security.h>
3334 #include <linux/gfp.h>
3435 #include <linux/socket.h>
35
-#include <linux/compat.h>
3636 #include <linux/sched/signal.h>
3737
3838 #include "internal.h"
....@@ -43,8 +43,8 @@
4343 * addition of remove_mapping(). If success is returned, the caller may
4444 * attempt to reuse this page for another destination.
4545 */
46
-static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
47
- struct pipe_buffer *buf)
46
+static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe,
47
+ struct pipe_buffer *buf)
4848 {
4949 struct page *page = buf->page;
5050 struct address_space *mapping;
....@@ -75,7 +75,7 @@
7575 */
7676 if (remove_mapping(mapping, page)) {
7777 buf->flags |= PIPE_BUF_FLAG_LRU;
78
- return 0;
78
+ return true;
7979 }
8080 }
8181
....@@ -85,7 +85,7 @@
8585 */
8686 out_unlock:
8787 unlock_page(page);
88
- return 1;
88
+ return false;
8989 }
9090
9191 static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
....@@ -138,36 +138,33 @@
138138 }
139139
140140 const struct pipe_buf_operations page_cache_pipe_buf_ops = {
141
- .can_merge = 0,
142
- .confirm = page_cache_pipe_buf_confirm,
143
- .release = page_cache_pipe_buf_release,
144
- .steal = page_cache_pipe_buf_steal,
145
- .get = generic_pipe_buf_get,
141
+ .confirm = page_cache_pipe_buf_confirm,
142
+ .release = page_cache_pipe_buf_release,
143
+ .try_steal = page_cache_pipe_buf_try_steal,
144
+ .get = generic_pipe_buf_get,
146145 };
147146
148
-static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
149
- struct pipe_buffer *buf)
147
+static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe,
148
+ struct pipe_buffer *buf)
150149 {
151150 if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
152
- return 1;
151
+ return false;
153152
154153 buf->flags |= PIPE_BUF_FLAG_LRU;
155
- return generic_pipe_buf_steal(pipe, buf);
154
+ return generic_pipe_buf_try_steal(pipe, buf);
156155 }
157156
158157 static const struct pipe_buf_operations user_page_pipe_buf_ops = {
159
- .can_merge = 0,
160
- .confirm = generic_pipe_buf_confirm,
161
- .release = page_cache_pipe_buf_release,
162
- .steal = user_page_pipe_buf_steal,
163
- .get = generic_pipe_buf_get,
158
+ .release = page_cache_pipe_buf_release,
159
+ .try_steal = user_page_pipe_buf_try_steal,
160
+ .get = generic_pipe_buf_get,
164161 };
165162
166163 static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
167164 {
168165 smp_mb();
169
- if (waitqueue_active(&pipe->wait))
170
- wake_up_interruptible(&pipe->wait);
166
+ if (waitqueue_active(&pipe->rd_wait))
167
+ wake_up_interruptible(&pipe->rd_wait);
171168 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
172169 }
173170
....@@ -186,6 +183,9 @@
186183 struct splice_pipe_desc *spd)
187184 {
188185 unsigned int spd_pages = spd->nr_pages;
186
+ unsigned int tail = pipe->tail;
187
+ unsigned int head = pipe->head;
188
+ unsigned int mask = pipe->ring_size - 1;
189189 int ret = 0, page_nr = 0;
190190
191191 if (!spd_pages)
....@@ -197,9 +197,8 @@
197197 goto out;
198198 }
199199
200
- while (pipe->nrbufs < pipe->buffers) {
201
- int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
202
- struct pipe_buffer *buf = pipe->bufs + newbuf;
200
+ while (!pipe_full(head, tail, pipe->max_usage)) {
201
+ struct pipe_buffer *buf = &pipe->bufs[head & mask];
203202
204203 buf->page = spd->pages[page_nr];
205204 buf->offset = spd->partial[page_nr].offset;
....@@ -208,7 +207,8 @@
208207 buf->ops = spd->ops;
209208 buf->flags = 0;
210209
211
- pipe->nrbufs++;
210
+ head++;
211
+ pipe->head = head;
212212 page_nr++;
213213 ret += buf->len;
214214
....@@ -229,17 +229,19 @@
229229
230230 ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
231231 {
232
+ unsigned int head = pipe->head;
233
+ unsigned int tail = pipe->tail;
234
+ unsigned int mask = pipe->ring_size - 1;
232235 int ret;
233236
234237 if (unlikely(!pipe->readers)) {
235238 send_sig(SIGPIPE, current, 0);
236239 ret = -EPIPE;
237
- } else if (pipe->nrbufs == pipe->buffers) {
240
+ } else if (pipe_full(head, tail, pipe->max_usage)) {
238241 ret = -EAGAIN;
239242 } else {
240
- int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
241
- pipe->bufs[newbuf] = *buf;
242
- pipe->nrbufs++;
243
+ pipe->bufs[head & mask] = *buf;
244
+ pipe->head = head + 1;
243245 return buf->len;
244246 }
245247 pipe_buf_release(pipe, buf);
....@@ -253,14 +255,14 @@
253255 */
254256 int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
255257 {
256
- unsigned int buffers = READ_ONCE(pipe->buffers);
258
+ unsigned int max_usage = READ_ONCE(pipe->max_usage);
257259
258
- spd->nr_pages_max = buffers;
259
- if (buffers <= PIPE_DEF_BUFFERS)
260
+ spd->nr_pages_max = max_usage;
261
+ if (max_usage <= PIPE_DEF_BUFFERS)
260262 return 0;
261263
262
- spd->pages = kmalloc_array(buffers, sizeof(struct page *), GFP_KERNEL);
263
- spd->partial = kmalloc_array(buffers, sizeof(struct partial_page),
264
+ spd->pages = kmalloc_array(max_usage, sizeof(struct page *), GFP_KERNEL);
265
+ spd->partial = kmalloc_array(max_usage, sizeof(struct partial_page),
264266 GFP_KERNEL);
265267
266268 if (spd->pages && spd->partial)
....@@ -299,10 +301,11 @@
299301 {
300302 struct iov_iter to;
301303 struct kiocb kiocb;
302
- int idx, ret;
304
+ unsigned int i_head;
305
+ int ret;
303306
304
- iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
305
- idx = to.idx;
307
+ iov_iter_pipe(&to, READ, pipe, len);
308
+ i_head = to.head;
306309 init_sync_kiocb(&kiocb, in);
307310 kiocb.ki_pos = *ppos;
308311 ret = call_read_iter(in, &kiocb, &to);
....@@ -310,7 +313,7 @@
310313 *ppos = kiocb.ki_pos;
311314 file_accessed(in);
312315 } else if (ret < 0) {
313
- to.idx = idx;
316
+ to.head = i_head;
314317 to.iov_offset = 0;
315318 iov_iter_advance(&to, 0); /* to free what was emitted */
316319 /*
....@@ -323,112 +326,20 @@
323326
324327 return ret;
325328 }
326
-EXPORT_SYMBOL(generic_file_splice_read);
329
+EXPORT_SYMBOL_NS(generic_file_splice_read, ANDROID_GKI_VFS_EXPORT_ONLY);
327330
328331 const struct pipe_buf_operations default_pipe_buf_ops = {
329
- .can_merge = 0,
330
- .confirm = generic_pipe_buf_confirm,
331
- .release = generic_pipe_buf_release,
332
- .steal = generic_pipe_buf_steal,
333
- .get = generic_pipe_buf_get,
332
+ .release = generic_pipe_buf_release,
333
+ .try_steal = generic_pipe_buf_try_steal,
334
+ .get = generic_pipe_buf_get,
334335 };
335
-
336
-int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
337
- struct pipe_buffer *buf)
338
-{
339
- return 1;
340
-}
341336
342337 /* Pipe buffer operations for a socket and similar. */
343338 const struct pipe_buf_operations nosteal_pipe_buf_ops = {
344
- .can_merge = 0,
345
- .confirm = generic_pipe_buf_confirm,
346
- .release = generic_pipe_buf_release,
347
- .steal = generic_pipe_buf_nosteal,
348
- .get = generic_pipe_buf_get,
339
+ .release = generic_pipe_buf_release,
340
+ .get = generic_pipe_buf_get,
349341 };
350342 EXPORT_SYMBOL(nosteal_pipe_buf_ops);
351
-
352
-static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
353
- unsigned long vlen, loff_t offset)
354
-{
355
- mm_segment_t old_fs;
356
- loff_t pos = offset;
357
- ssize_t res;
358
-
359
- old_fs = get_fs();
360
- set_fs(get_ds());
361
- /* The cast to a user pointer is valid due to the set_fs() */
362
- res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
363
- set_fs(old_fs);
364
-
365
- return res;
366
-}
367
-
368
-static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
369
- struct pipe_inode_info *pipe, size_t len,
370
- unsigned int flags)
371
-{
372
- struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
373
- struct iov_iter to;
374
- struct page **pages;
375
- unsigned int nr_pages;
376
- size_t offset, base, copied = 0;
377
- ssize_t res;
378
- int i;
379
-
380
- if (pipe->nrbufs == pipe->buffers)
381
- return -EAGAIN;
382
-
383
- /*
384
- * Try to keep page boundaries matching to source pagecache ones -
385
- * it probably won't be much help, but...
386
- */
387
- offset = *ppos & ~PAGE_MASK;
388
-
389
- iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
390
-
391
- res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
392
- if (res <= 0)
393
- return -ENOMEM;
394
-
395
- nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE);
396
-
397
- vec = __vec;
398
- if (nr_pages > PIPE_DEF_BUFFERS) {
399
- vec = kmalloc_array(nr_pages, sizeof(struct kvec), GFP_KERNEL);
400
- if (unlikely(!vec)) {
401
- res = -ENOMEM;
402
- goto out;
403
- }
404
- }
405
-
406
- pipe->bufs[to.idx].offset = offset;
407
- pipe->bufs[to.idx].len -= offset;
408
-
409
- for (i = 0; i < nr_pages; i++) {
410
- size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
411
- vec[i].iov_base = page_address(pages[i]) + offset;
412
- vec[i].iov_len = this_len;
413
- len -= this_len;
414
- offset = 0;
415
- }
416
-
417
- res = kernel_readv(in, vec, nr_pages, *ppos);
418
- if (res > 0) {
419
- copied = res;
420
- *ppos += res;
421
- }
422
-
423
- if (vec != __vec)
424
- kfree(vec);
425
-out:
426
- for (i = 0; i < nr_pages; i++)
427
- put_page(pages[i]);
428
- kvfree(pages);
429
- iov_iter_advance(&to, copied); /* truncates and discards */
430
- return res;
431
-}
432343
433344 /*
434345 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
....@@ -446,7 +357,8 @@
446357
447358 more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
448359
449
- if (sd->len < sd->total_len && pipe->nrbufs > 1)
360
+ if (sd->len < sd->total_len &&
361
+ pipe_occupancy(pipe->head, pipe->tail) > 1)
450362 more |= MSG_SENDPAGE_NOTLAST;
451363
452364 return file->f_op->sendpage(file, buf->page, buf->offset,
....@@ -456,8 +368,8 @@
456368 static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
457369 {
458370 smp_mb();
459
- if (waitqueue_active(&pipe->wait))
460
- wake_up_interruptible(&pipe->wait);
371
+ if (waitqueue_active(&pipe->wr_wait))
372
+ wake_up_interruptible(&pipe->wr_wait);
461373 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
462374 }
463375
....@@ -484,10 +396,13 @@
484396 static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
485397 splice_actor *actor)
486398 {
399
+ unsigned int head = pipe->head;
400
+ unsigned int tail = pipe->tail;
401
+ unsigned int mask = pipe->ring_size - 1;
487402 int ret;
488403
489
- while (pipe->nrbufs) {
490
- struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
404
+ while (!pipe_empty(head, tail)) {
405
+ struct pipe_buffer *buf = &pipe->bufs[tail & mask];
491406
492407 sd->len = buf->len;
493408 if (sd->len > sd->total_len)
....@@ -514,8 +429,8 @@
514429
515430 if (!buf->len) {
516431 pipe_buf_release(pipe, buf);
517
- pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
518
- pipe->nrbufs--;
432
+ tail++;
433
+ pipe->tail = tail;
519434 if (pipe->files)
520435 sd->need_wakeup = true;
521436 }
....@@ -525,6 +440,22 @@
525440 }
526441
527442 return 1;
443
+}
444
+
445
+/* We know we have a pipe buffer, but maybe it's empty? */
446
+static inline bool eat_empty_buffer(struct pipe_inode_info *pipe)
447
+{
448
+ unsigned int tail = pipe->tail;
449
+ unsigned int mask = pipe->ring_size - 1;
450
+ struct pipe_buffer *buf = &pipe->bufs[tail & mask];
451
+
452
+ if (unlikely(!buf->len)) {
453
+ pipe_buf_release(pipe, buf);
454
+ pipe->tail = tail+1;
455
+ return true;
456
+ }
457
+
458
+ return false;
528459 }
529460
530461 /**
....@@ -546,11 +477,12 @@
546477 if (signal_pending(current))
547478 return -ERESTARTSYS;
548479
549
- while (!pipe->nrbufs) {
480
+repeat:
481
+ while (pipe_empty(pipe->head, pipe->tail)) {
550482 if (!pipe->writers)
551483 return 0;
552484
553
- if (!pipe->waiting_writers && sd->num_spliced)
485
+ if (sd->num_spliced)
554486 return 0;
555487
556488 if (sd->flags & SPLICE_F_NONBLOCK)
....@@ -564,8 +496,11 @@
564496 sd->need_wakeup = false;
565497 }
566498
567
- pipe_wait(pipe);
499
+ pipe_wait_readable(pipe);
568500 }
501
+
502
+ if (eat_empty_buffer(pipe))
503
+ goto repeat;
569504
570505 return 1;
571506 }
....@@ -689,7 +624,7 @@
689624 .pos = *ppos,
690625 .u.file = out,
691626 };
692
- int nbufs = pipe->buffers;
627
+ int nbufs = pipe->max_usage;
693628 struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
694629 GFP_KERNEL);
695630 ssize_t ret;
....@@ -702,16 +637,17 @@
702637 splice_from_pipe_begin(&sd);
703638 while (sd.total_len) {
704639 struct iov_iter from;
640
+ unsigned int head, tail, mask;
705641 size_t left;
706
- int n, idx;
642
+ int n;
707643
708644 ret = splice_from_pipe_next(pipe, &sd);
709645 if (ret <= 0)
710646 break;
711647
712
- if (unlikely(nbufs < pipe->buffers)) {
648
+ if (unlikely(nbufs < pipe->max_usage)) {
713649 kfree(array);
714
- nbufs = pipe->buffers;
650
+ nbufs = pipe->max_usage;
715651 array = kcalloc(nbufs, sizeof(struct bio_vec),
716652 GFP_KERNEL);
717653 if (!array) {
....@@ -720,17 +656,18 @@
720656 }
721657 }
722658
659
+ head = pipe->head;
660
+ tail = pipe->tail;
661
+ mask = pipe->ring_size - 1;
662
+
723663 /* build the vector */
724664 left = sd.total_len;
725
- for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) {
726
- struct pipe_buffer *buf = pipe->bufs + idx;
665
+ for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++, n++) {
666
+ struct pipe_buffer *buf = &pipe->bufs[tail & mask];
727667 size_t this_len = buf->len;
728668
729669 if (this_len > left)
730670 this_len = left;
731
-
732
- if (idx == pipe->buffers - 1)
733
- idx = -1;
734671
735672 ret = pipe_buf_confirm(pipe, buf);
736673 if (unlikely(ret)) {
....@@ -745,8 +682,7 @@
745682 left -= this_len;
746683 }
747684
748
- iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n,
749
- sd.total_len - left);
685
+ iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left);
750686 ret = vfs_iter_write(out, &from, &sd.pos, 0);
751687 if (ret <= 0)
752688 break;
....@@ -756,14 +692,15 @@
756692 *ppos = sd.pos;
757693
758694 /* dismiss the fully eaten buffers, adjust the partial one */
695
+ tail = pipe->tail;
759696 while (ret) {
760
- struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
697
+ struct pipe_buffer *buf = &pipe->bufs[tail & mask];
761698 if (ret >= buf->len) {
762699 ret -= buf->len;
763700 buf->len = 0;
764701 pipe_buf_release(pipe, buf);
765
- pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
766
- pipe->nrbufs--;
702
+ tail++;
703
+ pipe->tail = tail;
767704 if (pipe->files)
768705 sd.need_wakeup = true;
769706 } else {
....@@ -785,34 +722,7 @@
785722 return ret;
786723 }
787724
788
-EXPORT_SYMBOL(iter_file_splice_write);
789
-
790
-static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
791
- struct splice_desc *sd)
792
-{
793
- int ret;
794
- void *data;
795
- loff_t tmp = sd->pos;
796
-
797
- data = kmap(buf->page);
798
- ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
799
- kunmap(buf->page);
800
-
801
- return ret;
802
-}
803
-
804
-static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
805
- struct file *out, loff_t *ppos,
806
- size_t len, unsigned int flags)
807
-{
808
- ssize_t ret;
809
-
810
- ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
811
- if (ret > 0)
812
- *ppos += ret;
813
-
814
- return ret;
815
-}
725
+EXPORT_SYMBOL_NS(iter_file_splice_write, ANDROID_GKI_VFS_EXPORT_ONLY);
816726
817727 /**
818728 * generic_splice_sendpage - splice data from a pipe to a socket
....@@ -835,21 +745,23 @@
835745
836746 EXPORT_SYMBOL(generic_splice_sendpage);
837747
748
+static int warn_unsupported(struct file *file, const char *op)
749
+{
750
+ pr_debug_ratelimited(
751
+ "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
752
+ op, file, current->pid, current->comm);
753
+ return -EINVAL;
754
+}
755
+
838756 /*
839757 * Attempt to initiate a splice from pipe to file.
840758 */
841759 static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
842760 loff_t *ppos, size_t len, unsigned int flags)
843761 {
844
- ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
845
- loff_t *, size_t, unsigned int);
846
-
847
- if (out->f_op->splice_write)
848
- splice_write = out->f_op->splice_write;
849
- else
850
- splice_write = default_file_splice_write;
851
-
852
- return splice_write(pipe, out, ppos, len, flags);
762
+ if (unlikely(!out->f_op->splice_write))
763
+ return warn_unsupported(out, "write");
764
+ return out->f_op->splice_write(pipe, out, ppos, len, flags);
853765 }
854766
855767 /*
....@@ -859,8 +771,6 @@
859771 struct pipe_inode_info *pipe, size_t len,
860772 unsigned int flags)
861773 {
862
- ssize_t (*splice_read)(struct file *, loff_t *,
863
- struct pipe_inode_info *, size_t, unsigned int);
864774 int ret;
865775
866776 if (unlikely(!(in->f_mode & FMODE_READ)))
....@@ -873,12 +783,9 @@
873783 if (unlikely(len > MAX_RW_COUNT))
874784 len = MAX_RW_COUNT;
875785
876
- if (in->f_op->splice_read)
877
- splice_read = in->f_op->splice_read;
878
- else
879
- splice_read = default_file_splice_read;
880
-
881
- return splice_read(in, ppos, pipe, len, flags);
786
+ if (unlikely(!in->f_op->splice_read))
787
+ return warn_unsupported(in, "read");
788
+ return in->f_op->splice_read(in, ppos, pipe, len, flags);
882789 }
883790
884791 /**
....@@ -946,16 +853,17 @@
946853 sd->flags &= ~SPLICE_F_NONBLOCK;
947854 more = sd->flags & SPLICE_F_MORE;
948855
949
- WARN_ON_ONCE(pipe->nrbufs != 0);
856
+ WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail));
950857
951858 while (len) {
952
- unsigned int pipe_pages;
859
+ unsigned int p_space;
953860 size_t read_len;
954861 loff_t pos = sd->pos, prev_pos = pos;
955862
956863 /* Don't try to read more the pipe has space for. */
957
- pipe_pages = pipe->buffers - pipe->nrbufs;
958
- read_len = min(len, (size_t)pipe_pages << PAGE_SHIFT);
864
+ p_space = pipe->max_usage -
865
+ pipe_occupancy(pipe->head, pipe->tail);
866
+ read_len = min_t(size_t, len, p_space << PAGE_SHIFT);
959867 ret = do_splice_to(in, &pos, pipe, read_len, flags);
960868 if (unlikely(ret <= 0))
961869 goto out_release;
....@@ -994,7 +902,7 @@
994902 }
995903
996904 done:
997
- pipe->nrbufs = pipe->curbuf = 0;
905
+ pipe->tail = pipe->head = 0;
998906 file_accessed(in);
999907 return bytes;
1000908
....@@ -1003,8 +911,8 @@
1003911 * If we did an incomplete transfer we must release
1004912 * the pipe buffers in question:
1005913 */
1006
- for (i = 0; i < pipe->buffers; i++) {
1007
- struct pipe_buffer *buf = pipe->bufs + i;
914
+ for (i = 0; i < pipe->ring_size; i++) {
915
+ struct pipe_buffer *buf = &pipe->bufs[i];
1008916
1009917 if (buf->ops)
1010918 pipe_buf_release(pipe, buf);
....@@ -1080,15 +988,13 @@
1080988 send_sig(SIGPIPE, current, 0);
1081989 return -EPIPE;
1082990 }
1083
- if (pipe->nrbufs != pipe->buffers)
991
+ if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
1084992 return 0;
1085993 if (flags & SPLICE_F_NONBLOCK)
1086994 return -EAGAIN;
1087995 if (signal_pending(current))
1088996 return -ERESTARTSYS;
1089
- pipe->waiting_writers++;
1090
- pipe_wait(pipe);
1091
- pipe->waiting_writers--;
997
+ pipe_wait_writable(pipe);
1092998 }
1093999 }
10941000
....@@ -1099,31 +1005,31 @@
10991005 /*
11001006 * Determine where to splice to/from.
11011007 */
1102
-static long do_splice(struct file *in, loff_t __user *off_in,
1103
- struct file *out, loff_t __user *off_out,
1104
- size_t len, unsigned int flags)
1008
+long do_splice(struct file *in, loff_t *off_in, struct file *out,
1009
+ loff_t *off_out, size_t len, unsigned int flags)
11051010 {
11061011 struct pipe_inode_info *ipipe;
11071012 struct pipe_inode_info *opipe;
11081013 loff_t offset;
11091014 long ret;
11101015
1111
- ipipe = get_pipe_info(in);
1112
- opipe = get_pipe_info(out);
1016
+ if (unlikely(!(in->f_mode & FMODE_READ) ||
1017
+ !(out->f_mode & FMODE_WRITE)))
1018
+ return -EBADF;
1019
+
1020
+ ipipe = get_pipe_info(in, true);
1021
+ opipe = get_pipe_info(out, true);
11131022
11141023 if (ipipe && opipe) {
11151024 if (off_in || off_out)
11161025 return -ESPIPE;
11171026
1118
- if (!(in->f_mode & FMODE_READ))
1119
- return -EBADF;
1120
-
1121
- if (!(out->f_mode & FMODE_WRITE))
1122
- return -EBADF;
1123
-
11241027 /* Splicing to self would be fun, but... */
11251028 if (ipipe == opipe)
11261029 return -EINVAL;
1030
+
1031
+ if ((in->f_flags | out->f_flags) & O_NONBLOCK)
1032
+ flags |= SPLICE_F_NONBLOCK;
11271033
11281034 return splice_pipe_to_pipe(ipipe, opipe, len, flags);
11291035 }
....@@ -1134,14 +1040,10 @@
11341040 if (off_out) {
11351041 if (!(out->f_mode & FMODE_PWRITE))
11361042 return -EINVAL;
1137
- if (copy_from_user(&offset, off_out, sizeof(loff_t)))
1138
- return -EFAULT;
1043
+ offset = *off_out;
11391044 } else {
11401045 offset = out->f_pos;
11411046 }
1142
-
1143
- if (unlikely(!(out->f_mode & FMODE_WRITE)))
1144
- return -EBADF;
11451047
11461048 if (unlikely(out->f_flags & O_APPEND))
11471049 return -EINVAL;
....@@ -1150,14 +1052,17 @@
11501052 if (unlikely(ret < 0))
11511053 return ret;
11521054
1055
+ if (in->f_flags & O_NONBLOCK)
1056
+ flags |= SPLICE_F_NONBLOCK;
1057
+
11531058 file_start_write(out);
11541059 ret = do_splice_from(ipipe, out, &offset, len, flags);
11551060 file_end_write(out);
11561061
11571062 if (!off_out)
11581063 out->f_pos = offset;
1159
- else if (copy_to_user(off_out, &offset, sizeof(loff_t)))
1160
- ret = -EFAULT;
1064
+ else
1065
+ *off_out = offset;
11611066
11621067 return ret;
11631068 }
....@@ -1168,20 +1073,22 @@
11681073 if (off_in) {
11691074 if (!(in->f_mode & FMODE_PREAD))
11701075 return -EINVAL;
1171
- if (copy_from_user(&offset, off_in, sizeof(loff_t)))
1172
- return -EFAULT;
1076
+ offset = *off_in;
11731077 } else {
11741078 offset = in->f_pos;
11751079 }
11761080
1081
+ if (out->f_flags & O_NONBLOCK)
1082
+ flags |= SPLICE_F_NONBLOCK;
1083
+
11771084 pipe_lock(opipe);
11781085 ret = wait_for_space(opipe, flags);
11791086 if (!ret) {
1180
- unsigned int pipe_pages;
1087
+ unsigned int p_space;
11811088
11821089 /* Don't try to read more the pipe has space for. */
1183
- pipe_pages = opipe->buffers - opipe->nrbufs;
1184
- len = min(len, (size_t)pipe_pages << PAGE_SHIFT);
1090
+ p_space = opipe->max_usage - pipe_occupancy(opipe->head, opipe->tail);
1091
+ len = min_t(size_t, len, p_space << PAGE_SHIFT);
11851092
11861093 ret = do_splice_to(in, &offset, opipe, len, flags);
11871094 }
....@@ -1190,13 +1097,53 @@
11901097 wakeup_pipe_readers(opipe);
11911098 if (!off_in)
11921099 in->f_pos = offset;
1193
- else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
1194
- ret = -EFAULT;
1100
+ else
1101
+ *off_in = offset;
11951102
11961103 return ret;
11971104 }
11981105
11991106 return -EINVAL;
1107
+}
1108
+
1109
+static long __do_splice(struct file *in, loff_t __user *off_in,
1110
+ struct file *out, loff_t __user *off_out,
1111
+ size_t len, unsigned int flags)
1112
+{
1113
+ struct pipe_inode_info *ipipe;
1114
+ struct pipe_inode_info *opipe;
1115
+ loff_t offset, *__off_in = NULL, *__off_out = NULL;
1116
+ long ret;
1117
+
1118
+ ipipe = get_pipe_info(in, true);
1119
+ opipe = get_pipe_info(out, true);
1120
+
1121
+ if (ipipe && off_in)
1122
+ return -ESPIPE;
1123
+ if (opipe && off_out)
1124
+ return -ESPIPE;
1125
+
1126
+ if (off_out) {
1127
+ if (copy_from_user(&offset, off_out, sizeof(loff_t)))
1128
+ return -EFAULT;
1129
+ __off_out = &offset;
1130
+ }
1131
+ if (off_in) {
1132
+ if (copy_from_user(&offset, off_in, sizeof(loff_t)))
1133
+ return -EFAULT;
1134
+ __off_in = &offset;
1135
+ }
1136
+
1137
+ ret = do_splice(in, __off_in, out, __off_out, len, flags);
1138
+ if (ret < 0)
1139
+ return ret;
1140
+
1141
+ if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t)))
1142
+ return -EFAULT;
1143
+ if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t)))
1144
+ return -EFAULT;
1145
+
1146
+ return ret;
12001147 }
12011148
12021149 static int iter_to_pipe(struct iov_iter *from,
....@@ -1259,7 +1206,7 @@
12591206 static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
12601207 unsigned int flags)
12611208 {
1262
- struct pipe_inode_info *pipe = get_pipe_info(file);
1209
+ struct pipe_inode_info *pipe = get_pipe_info(file, true);
12631210 struct splice_desc sd = {
12641211 .total_len = iov_iter_count(iter),
12651212 .flags = flags,
....@@ -1294,7 +1241,7 @@
12941241 if (flags & SPLICE_F_GIFT)
12951242 buf_flag = PIPE_BUF_FLAG_GIFT;
12961243
1297
- pipe = get_pipe_info(file);
1244
+ pipe = get_pipe_info(file, true);
12981245 if (!pipe)
12991246 return -EBADF;
13001247
....@@ -1339,29 +1286,18 @@
13391286 * Currently we punt and implement it as a normal copy, see pipe_to_user().
13401287 *
13411288 */
1342
-static long do_vmsplice(struct file *f, struct iov_iter *iter, unsigned int flags)
1343
-{
1344
- if (unlikely(flags & ~SPLICE_F_ALL))
1345
- return -EINVAL;
1346
-
1347
- if (!iov_iter_count(iter))
1348
- return 0;
1349
-
1350
- if (iov_iter_rw(iter) == WRITE)
1351
- return vmsplice_to_pipe(f, iter, flags);
1352
- else
1353
- return vmsplice_to_user(f, iter, flags);
1354
-}
1355
-
13561289 SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
13571290 unsigned long, nr_segs, unsigned int, flags)
13581291 {
13591292 struct iovec iovstack[UIO_FASTIOV];
13601293 struct iovec *iov = iovstack;
13611294 struct iov_iter iter;
1362
- long error;
1295
+ ssize_t error;
13631296 struct fd f;
13641297 int type;
1298
+
1299
+ if (unlikely(flags & ~SPLICE_F_ALL))
1300
+ return -EINVAL;
13651301
13661302 f = fdget(fd);
13671303 error = vmsplice_type(f, &type);
....@@ -1370,40 +1306,21 @@
13701306
13711307 error = import_iovec(type, uiov, nr_segs,
13721308 ARRAY_SIZE(iovstack), &iov, &iter);
1373
- if (!error) {
1374
- error = do_vmsplice(f.file, &iter, flags);
1375
- kfree(iov);
1376
- }
1309
+ if (error < 0)
1310
+ goto out_fdput;
1311
+
1312
+ if (!iov_iter_count(&iter))
1313
+ error = 0;
1314
+ else if (iov_iter_rw(&iter) == WRITE)
1315
+ error = vmsplice_to_pipe(f.file, &iter, flags);
1316
+ else
1317
+ error = vmsplice_to_user(f.file, &iter, flags);
1318
+
1319
+ kfree(iov);
1320
+out_fdput:
13771321 fdput(f);
13781322 return error;
13791323 }
1380
-
1381
-#ifdef CONFIG_COMPAT
1382
-COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32,
1383
- unsigned int, nr_segs, unsigned int, flags)
1384
-{
1385
- struct iovec iovstack[UIO_FASTIOV];
1386
- struct iovec *iov = iovstack;
1387
- struct iov_iter iter;
1388
- long error;
1389
- struct fd f;
1390
- int type;
1391
-
1392
- f = fdget(fd);
1393
- error = vmsplice_type(f, &type);
1394
- if (error)
1395
- return error;
1396
-
1397
- error = compat_import_iovec(type, iov32, nr_segs,
1398
- ARRAY_SIZE(iovstack), &iov, &iter);
1399
- if (!error) {
1400
- error = do_vmsplice(f.file, &iter, flags);
1401
- kfree(iov);
1402
- }
1403
- fdput(f);
1404
- return error;
1405
-}
1406
-#endif
14071324
14081325 SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
14091326 int, fd_out, loff_t __user *, off_out,
....@@ -1421,15 +1338,11 @@
14211338 error = -EBADF;
14221339 in = fdget(fd_in);
14231340 if (in.file) {
1424
- if (in.file->f_mode & FMODE_READ) {
1425
- out = fdget(fd_out);
1426
- if (out.file) {
1427
- if (out.file->f_mode & FMODE_WRITE)
1428
- error = do_splice(in.file, off_in,
1429
- out.file, off_out,
1430
- len, flags);
1431
- fdput(out);
1432
- }
1341
+ out = fdget(fd_out);
1342
+ if (out.file) {
1343
+ error = __do_splice(in.file, off_in, out.file, off_out,
1344
+ len, flags);
1345
+ fdput(out);
14331346 }
14341347 fdput(in);
14351348 }
....@@ -1445,29 +1358,27 @@
14451358 int ret;
14461359
14471360 /*
1448
- * Check ->nrbufs without the inode lock first. This function
1361
+ * Check the pipe occupancy without the inode lock first. This function
14491362 * is speculative anyways, so missing one is ok.
14501363 */
1451
- if (pipe->nrbufs)
1364
+ if (!pipe_empty(pipe->head, pipe->tail))
14521365 return 0;
14531366
14541367 ret = 0;
14551368 pipe_lock(pipe);
14561369
1457
- while (!pipe->nrbufs) {
1370
+ while (pipe_empty(pipe->head, pipe->tail)) {
14581371 if (signal_pending(current)) {
14591372 ret = -ERESTARTSYS;
14601373 break;
14611374 }
14621375 if (!pipe->writers)
14631376 break;
1464
- if (!pipe->waiting_writers) {
1465
- if (flags & SPLICE_F_NONBLOCK) {
1466
- ret = -EAGAIN;
1467
- break;
1468
- }
1377
+ if (flags & SPLICE_F_NONBLOCK) {
1378
+ ret = -EAGAIN;
1379
+ break;
14691380 }
1470
- pipe_wait(pipe);
1381
+ pipe_wait_readable(pipe);
14711382 }
14721383
14731384 pipe_unlock(pipe);
....@@ -1483,16 +1394,16 @@
14831394 int ret;
14841395
14851396 /*
1486
- * Check ->nrbufs without the inode lock first. This function
1397
+ * Check pipe occupancy without the inode lock first. This function
14871398 * is speculative anyways, so missing one is ok.
14881399 */
1489
- if (pipe->nrbufs < pipe->buffers)
1400
+ if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
14901401 return 0;
14911402
14921403 ret = 0;
14931404 pipe_lock(pipe);
14941405
1495
- while (pipe->nrbufs >= pipe->buffers) {
1406
+ while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
14961407 if (!pipe->readers) {
14971408 send_sig(SIGPIPE, current, 0);
14981409 ret = -EPIPE;
....@@ -1506,9 +1417,7 @@
15061417 ret = -ERESTARTSYS;
15071418 break;
15081419 }
1509
- pipe->waiting_writers++;
1510
- pipe_wait(pipe);
1511
- pipe->waiting_writers--;
1420
+ pipe_wait_writable(pipe);
15121421 }
15131422
15141423 pipe_unlock(pipe);
....@@ -1523,7 +1432,10 @@
15231432 size_t len, unsigned int flags)
15241433 {
15251434 struct pipe_buffer *ibuf, *obuf;
1526
- int ret = 0, nbuf;
1435
+ unsigned int i_head, o_head;
1436
+ unsigned int i_tail, o_tail;
1437
+ unsigned int i_mask, o_mask;
1438
+ int ret = 0;
15271439 bool input_wakeup = false;
15281440
15291441
....@@ -1543,7 +1455,14 @@
15431455 */
15441456 pipe_double_lock(ipipe, opipe);
15451457
1458
+ i_tail = ipipe->tail;
1459
+ i_mask = ipipe->ring_size - 1;
1460
+ o_head = opipe->head;
1461
+ o_mask = opipe->ring_size - 1;
1462
+
15461463 do {
1464
+ size_t o_len;
1465
+
15471466 if (!opipe->readers) {
15481467 send_sig(SIGPIPE, current, 0);
15491468 if (!ret)
....@@ -1551,14 +1470,18 @@
15511470 break;
15521471 }
15531472
1554
- if (!ipipe->nrbufs && !ipipe->writers)
1473
+ i_head = ipipe->head;
1474
+ o_tail = opipe->tail;
1475
+
1476
+ if (pipe_empty(i_head, i_tail) && !ipipe->writers)
15551477 break;
15561478
15571479 /*
15581480 * Cannot make any progress, because either the input
15591481 * pipe is empty or the output pipe is full.
15601482 */
1561
- if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) {
1483
+ if (pipe_empty(i_head, i_tail) ||
1484
+ pipe_full(o_head, o_tail, opipe->max_usage)) {
15621485 /* Already processed some buffers, break */
15631486 if (ret)
15641487 break;
....@@ -1578,9 +1501,8 @@
15781501 goto retry;
15791502 }
15801503
1581
- ibuf = ipipe->bufs + ipipe->curbuf;
1582
- nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
1583
- obuf = opipe->bufs + nbuf;
1504
+ ibuf = &ipipe->bufs[i_tail & i_mask];
1505
+ obuf = &opipe->bufs[o_head & o_mask];
15841506
15851507 if (len >= ibuf->len) {
15861508 /*
....@@ -1588,10 +1510,12 @@
15881510 */
15891511 *obuf = *ibuf;
15901512 ibuf->ops = NULL;
1591
- opipe->nrbufs++;
1592
- ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1);
1593
- ipipe->nrbufs--;
1513
+ i_tail++;
1514
+ ipipe->tail = i_tail;
15941515 input_wakeup = true;
1516
+ o_len = obuf->len;
1517
+ o_head++;
1518
+ opipe->head = o_head;
15951519 } else {
15961520 /*
15971521 * Get a reference to this pipe buffer,
....@@ -1605,20 +1529,21 @@
16051529 *obuf = *ibuf;
16061530
16071531 /*
1608
- * Don't inherit the gift flag, we need to
1532
+ * Don't inherit the gift and merge flags, we need to
16091533 * prevent multiple steals of this page.
16101534 */
16111535 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1612
-
1613
- pipe_buf_mark_unmergeable(obuf);
1536
+ obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
16141537
16151538 obuf->len = len;
1616
- opipe->nrbufs++;
1617
- ibuf->offset += obuf->len;
1618
- ibuf->len -= obuf->len;
1539
+ ibuf->offset += len;
1540
+ ibuf->len -= len;
1541
+ o_len = len;
1542
+ o_head++;
1543
+ opipe->head = o_head;
16191544 }
1620
- ret += obuf->len;
1621
- len -= obuf->len;
1545
+ ret += o_len;
1546
+ len -= o_len;
16221547 } while (len);
16231548
16241549 pipe_unlock(ipipe);
....@@ -1644,7 +1569,10 @@
16441569 size_t len, unsigned int flags)
16451570 {
16461571 struct pipe_buffer *ibuf, *obuf;
1647
- int ret = 0, i = 0, nbuf;
1572
+ unsigned int i_head, o_head;
1573
+ unsigned int i_tail, o_tail;
1574
+ unsigned int i_mask, o_mask;
1575
+ int ret = 0;
16481576
16491577 /*
16501578 * Potential ABBA deadlock, work around it by ordering lock
....@@ -1652,6 +1580,11 @@
16521580 * could deadlock (one doing tee from A -> B, the other from B -> A).
16531581 */
16541582 pipe_double_lock(ipipe, opipe);
1583
+
1584
+ i_tail = ipipe->tail;
1585
+ i_mask = ipipe->ring_size - 1;
1586
+ o_head = opipe->head;
1587
+ o_mask = opipe->ring_size - 1;
16551588
16561589 do {
16571590 if (!opipe->readers) {
....@@ -1661,15 +1594,19 @@
16611594 break;
16621595 }
16631596
1597
+ i_head = ipipe->head;
1598
+ o_tail = opipe->tail;
1599
+
16641600 /*
1665
- * If we have iterated all input buffers or ran out of
1601
+ * If we have iterated all input buffers or run out of
16661602 * output room, break.
16671603 */
1668
- if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers)
1604
+ if (pipe_empty(i_head, i_tail) ||
1605
+ pipe_full(o_head, o_tail, opipe->max_usage))
16691606 break;
16701607
1671
- ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1));
1672
- nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
1608
+ ibuf = &ipipe->bufs[i_tail & i_mask];
1609
+ obuf = &opipe->bufs[o_head & o_mask];
16731610
16741611 /*
16751612 * Get a reference to this pipe buffer,
....@@ -1681,32 +1618,24 @@
16811618 break;
16821619 }
16831620
1684
- obuf = opipe->bufs + nbuf;
16851621 *obuf = *ibuf;
16861622
16871623 /*
1688
- * Don't inherit the gift flag, we need to
1689
- * prevent multiple steals of this page.
1624
+ * Don't inherit the gift and merge flag, we need to prevent
1625
+ * multiple steals of this page.
16901626 */
16911627 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1692
-
1693
- pipe_buf_mark_unmergeable(obuf);
1628
+ obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
16941629
16951630 if (obuf->len > len)
16961631 obuf->len = len;
1697
-
1698
- opipe->nrbufs++;
16991632 ret += obuf->len;
17001633 len -= obuf->len;
1701
- i++;
1702
- } while (len);
17031634
1704
- /*
1705
- * return EAGAIN if we have the potential of some data in the
1706
- * future, otherwise just return 0
1707
- */
1708
- if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
1709
- ret = -EAGAIN;
1635
+ o_head++;
1636
+ opipe->head = o_head;
1637
+ i_tail++;
1638
+ } while (len);
17101639
17111640 pipe_unlock(ipipe);
17121641 pipe_unlock(opipe);
....@@ -1726,18 +1655,24 @@
17261655 * The 'flags' used are the SPLICE_F_* variants, currently the only
17271656 * applicable one is SPLICE_F_NONBLOCK.
17281657 */
1729
-static long do_tee(struct file *in, struct file *out, size_t len,
1730
- unsigned int flags)
1658
+long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags)
17311659 {
1732
- struct pipe_inode_info *ipipe = get_pipe_info(in);
1733
- struct pipe_inode_info *opipe = get_pipe_info(out);
1660
+ struct pipe_inode_info *ipipe = get_pipe_info(in, true);
1661
+ struct pipe_inode_info *opipe = get_pipe_info(out, true);
17341662 int ret = -EINVAL;
1663
+
1664
+ if (unlikely(!(in->f_mode & FMODE_READ) ||
1665
+ !(out->f_mode & FMODE_WRITE)))
1666
+ return -EBADF;
17351667
17361668 /*
17371669 * Duplicate the contents of ipipe to opipe without actually
17381670 * copying the data.
17391671 */
17401672 if (ipipe && opipe && ipipe != opipe) {
1673
+ if ((in->f_flags | out->f_flags) & O_NONBLOCK)
1674
+ flags |= SPLICE_F_NONBLOCK;
1675
+
17411676 /*
17421677 * Keep going, unless we encounter an error. The ipipe/opipe
17431678 * ordering doesn't really matter.
....@@ -1755,7 +1690,7 @@
17551690
17561691 SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
17571692 {
1758
- struct fd in;
1693
+ struct fd in, out;
17591694 int error;
17601695
17611696 if (unlikely(flags & ~SPLICE_F_ALL))
....@@ -1767,14 +1702,10 @@
17671702 error = -EBADF;
17681703 in = fdget(fdin);
17691704 if (in.file) {
1770
- if (in.file->f_mode & FMODE_READ) {
1771
- struct fd out = fdget(fdout);
1772
- if (out.file) {
1773
- if (out.file->f_mode & FMODE_WRITE)
1774
- error = do_tee(in.file, out.file,
1775
- len, flags);
1776
- fdput(out);
1777
- }
1705
+ out = fdget(fdout);
1706
+ if (out.file) {
1707
+ error = do_tee(in.file, out.file, len, flags);
1708
+ fdput(out);
17781709 }
17791710 fdput(in);
17801711 }