From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/fs/splice.c | 657 ++++++++++++++++++++++++++--------------------------------- 1 files changed, 294 insertions(+), 363 deletions(-) diff --git a/kernel/fs/splice.c b/kernel/fs/splice.c index fd28c7d..036a479 100644 --- a/kernel/fs/splice.c +++ b/kernel/fs/splice.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * "splice": joining two ropes together by interweaving their strands. * @@ -32,7 +33,6 @@ #include <linux/security.h> #include <linux/gfp.h> #include <linux/socket.h> -#include <linux/compat.h> #include <linux/sched/signal.h> #include "internal.h" @@ -43,8 +43,8 @@ * addition of remove_mapping(). If success is returned, the caller may * attempt to reuse this page for another destination. */ -static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { struct page *page = buf->page; struct address_space *mapping; @@ -75,7 +75,7 @@ */ if (remove_mapping(mapping, page)) { buf->flags |= PIPE_BUF_FLAG_LRU; - return 0; + return true; } } @@ -85,7 +85,7 @@ */ out_unlock: unlock_page(page); - return 1; + return false; } static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, @@ -138,36 +138,33 @@ } const struct pipe_buf_operations page_cache_pipe_buf_ops = { - .can_merge = 0, - .confirm = page_cache_pipe_buf_confirm, - .release = page_cache_pipe_buf_release, - .steal = page_cache_pipe_buf_steal, - .get = generic_pipe_buf_get, + .confirm = page_cache_pipe_buf_confirm, + .release = page_cache_pipe_buf_release, + .try_steal = page_cache_pipe_buf_try_steal, + .get = generic_pipe_buf_get, }; -static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) - return 1; + return false; buf->flags |= PIPE_BUF_FLAG_LRU; - return generic_pipe_buf_steal(pipe, buf); + return generic_pipe_buf_try_steal(pipe, buf); } static const struct pipe_buf_operations user_page_pipe_buf_ops = { - .can_merge = 0, - .confirm = generic_pipe_buf_confirm, - .release = page_cache_pipe_buf_release, - .steal = user_page_pipe_buf_steal, - .get = generic_pipe_buf_get, + .release = page_cache_pipe_buf_release, + .try_steal = user_page_pipe_buf_try_steal, + .get = generic_pipe_buf_get, }; static void wakeup_pipe_readers(struct pipe_inode_info *pipe) { smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); + if (waitqueue_active(&pipe->rd_wait)) + wake_up_interruptible(&pipe->rd_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } @@ -186,6 +183,9 @@ struct splice_pipe_desc *spd) { unsigned int spd_pages = spd->nr_pages; + unsigned int tail = pipe->tail; + unsigned int head = pipe->head; + unsigned int mask = pipe->ring_size - 1; int ret = 0, page_nr = 0; if (!spd_pages) @@ -197,9 +197,8 @@ goto out; } - while (pipe->nrbufs < pipe->buffers) { - int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); - struct pipe_buffer *buf = pipe->bufs + newbuf; + while (!pipe_full(head, tail, pipe->max_usage)) { + struct pipe_buffer *buf = &pipe->bufs[head & mask]; buf->page = spd->pages[page_nr]; buf->offset = spd->partial[page_nr].offset; @@ -208,7 +207,8 @@ buf->ops = spd->ops; buf->flags = 0; - pipe->nrbufs++; + head++; + pipe->head = head; page_nr++; ret += buf->len; @@ -229,17 +229,19 @@ ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { + unsigned int head = pipe->head; + unsigned int tail = pipe->tail; + unsigned int mask = pipe->ring_size - 1; int ret; if (unlikely(!pipe->readers)) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; - } else if (pipe->nrbufs == pipe->buffers) { + } else if (pipe_full(head, tail, pipe->max_usage)) { ret = -EAGAIN; } else { - int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); - pipe->bufs[newbuf] = *buf; - pipe->nrbufs++; + pipe->bufs[head & mask] = *buf; + pipe->head = head + 1; return buf->len; } pipe_buf_release(pipe, buf); @@ -253,14 +255,14 @@ */ int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { - unsigned int buffers = READ_ONCE(pipe->buffers); + unsigned int max_usage = READ_ONCE(pipe->max_usage); - spd->nr_pages_max = buffers; - if (buffers <= PIPE_DEF_BUFFERS) + spd->nr_pages_max = max_usage; + if (max_usage <= PIPE_DEF_BUFFERS) return 0; - spd->pages = kmalloc_array(buffers, sizeof(struct page *), GFP_KERNEL); - spd->partial = kmalloc_array(buffers, sizeof(struct partial_page), + spd->pages = kmalloc_array(max_usage, sizeof(struct page *), GFP_KERNEL); + spd->partial = kmalloc_array(max_usage, sizeof(struct partial_page), GFP_KERNEL); if (spd->pages && spd->partial) @@ -299,10 +301,11 @@ { struct iov_iter to; struct kiocb kiocb; - int idx, ret; + unsigned int i_head; + int ret; - iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len); - idx = to.idx; + iov_iter_pipe(&to, READ, pipe, len); + i_head = to.head; init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; ret = call_read_iter(in, &kiocb, &to); @@ -310,7 +313,7 @@ *ppos = kiocb.ki_pos; file_accessed(in); } else if (ret < 0) { - to.idx = idx; + to.head = i_head; to.iov_offset = 0; iov_iter_advance(&to, 0); /* to free what was emitted */ /* @@ -323,112 +326,20 @@ return ret; } -EXPORT_SYMBOL(generic_file_splice_read); +EXPORT_SYMBOL_NS(generic_file_splice_read, ANDROID_GKI_VFS_EXPORT_ONLY); const struct pipe_buf_operations default_pipe_buf_ops = { - .can_merge = 0, - .confirm = generic_pipe_buf_confirm, - .release = generic_pipe_buf_release, - .steal = generic_pipe_buf_steal, - .get = generic_pipe_buf_get, + .release = generic_pipe_buf_release, + .try_steal = generic_pipe_buf_try_steal, + .get = generic_pipe_buf_get, }; - -int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - return 1; -} /* Pipe buffer operations for a socket and similar. */ const struct pipe_buf_operations nosteal_pipe_buf_ops = { - .can_merge = 0, - .confirm = generic_pipe_buf_confirm, - .release = generic_pipe_buf_release, - .steal = generic_pipe_buf_nosteal, - .get = generic_pipe_buf_get, + .release = generic_pipe_buf_release, + .get = generic_pipe_buf_get, }; EXPORT_SYMBOL(nosteal_pipe_buf_ops); - -static ssize_t kernel_readv(struct file *file, const struct kvec *vec, - unsigned long vlen, loff_t offset) -{ - mm_segment_t old_fs; - loff_t pos = offset; - ssize_t res; - - old_fs = get_fs(); - set_fs(get_ds()); - /* The cast to a user pointer is valid due to the set_fs() */ - res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0); - set_fs(old_fs); - - return res; -} - -static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) -{ - struct kvec *vec, __vec[PIPE_DEF_BUFFERS]; - struct iov_iter to; - struct page **pages; - unsigned int nr_pages; - size_t offset, base, copied = 0; - ssize_t res; - int i; - - if (pipe->nrbufs == pipe->buffers) - return -EAGAIN; - - /* - * Try to keep page boundaries matching to source pagecache ones - - * it probably won't be much help, but... - */ - offset = *ppos & ~PAGE_MASK; - - iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset); - - res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base); - if (res <= 0) - return -ENOMEM; - - nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE); - - vec = __vec; - if (nr_pages > PIPE_DEF_BUFFERS) { - vec = kmalloc_array(nr_pages, sizeof(struct kvec), GFP_KERNEL); - if (unlikely(!vec)) { - res = -ENOMEM; - goto out; - } - } - - pipe->bufs[to.idx].offset = offset; - pipe->bufs[to.idx].len -= offset; - - for (i = 0; i < nr_pages; i++) { - size_t this_len = min_t(size_t, len, PAGE_SIZE - offset); - vec[i].iov_base = page_address(pages[i]) + offset; - vec[i].iov_len = this_len; - len -= this_len; - offset = 0; - } - - res = kernel_readv(in, vec, nr_pages, *ppos); - if (res > 0) { - copied = res; - *ppos += res; - } - - if (vec != __vec) - kfree(vec); -out: - for (i = 0; i < nr_pages; i++) - put_page(pages[i]); - kvfree(pages); - iov_iter_advance(&to, copied); /* truncates and discards */ - return res; -} /* * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' @@ -446,7 +357,8 @@ more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; - if (sd->len < sd->total_len && pipe->nrbufs > 1) + if (sd->len < sd->total_len && + pipe_occupancy(pipe->head, pipe->tail) > 1) more |= MSG_SENDPAGE_NOTLAST; return file->f_op->sendpage(file, buf->page, buf->offset, @@ -456,8 +368,8 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); + if (waitqueue_active(&pipe->wr_wait)) + wake_up_interruptible(&pipe->wr_wait); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } @@ -484,10 +396,13 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, splice_actor *actor) { + unsigned int head = pipe->head; + unsigned int tail = pipe->tail; + unsigned int mask = pipe->ring_size - 1; int ret; - while (pipe->nrbufs) { - struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; + while (!pipe_empty(head, tail)) { + struct pipe_buffer *buf = &pipe->bufs[tail & mask]; sd->len = buf->len; if (sd->len > sd->total_len) @@ -514,8 +429,8 @@ if (!buf->len) { pipe_buf_release(pipe, buf); - pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); - pipe->nrbufs--; + tail++; + pipe->tail = tail; if (pipe->files) sd->need_wakeup = true; } @@ -525,6 +440,22 @@ } return 1; +} + +/* We know we have a pipe buffer, but maybe it's empty? */ +static inline bool eat_empty_buffer(struct pipe_inode_info *pipe) +{ + unsigned int tail = pipe->tail; + unsigned int mask = pipe->ring_size - 1; + struct pipe_buffer *buf = &pipe->bufs[tail & mask]; + + if (unlikely(!buf->len)) { + pipe_buf_release(pipe, buf); + pipe->tail = tail+1; + return true; + } + + return false; } /** @@ -546,11 +477,12 @@ if (signal_pending(current)) return -ERESTARTSYS; - while (!pipe->nrbufs) { +repeat: + while (pipe_empty(pipe->head, pipe->tail)) { if (!pipe->writers) return 0; - if (!pipe->waiting_writers && sd->num_spliced) + if (sd->num_spliced) return 0; if (sd->flags & SPLICE_F_NONBLOCK) @@ -564,8 +496,11 @@ sd->need_wakeup = false; } - pipe_wait(pipe); + pipe_wait_readable(pipe); } + + if (eat_empty_buffer(pipe)) + goto repeat; return 1; } @@ -689,7 +624,7 @@ .pos = *ppos, .u.file = out, }; - int nbufs = pipe->buffers; + int nbufs = pipe->max_usage; struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL); ssize_t ret; @@ -702,16 +637,17 @@ splice_from_pipe_begin(&sd); while (sd.total_len) { struct iov_iter from; + unsigned int head, tail, mask; size_t left; - int n, idx; + int n; ret = splice_from_pipe_next(pipe, &sd); if (ret <= 0) break; - if (unlikely(nbufs < pipe->buffers)) { + if (unlikely(nbufs < pipe->max_usage)) { kfree(array); - nbufs = pipe->buffers; + nbufs = pipe->max_usage; array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL); if (!array) { @@ -720,17 +656,18 @@ } } + head = pipe->head; + tail = pipe->tail; + mask = pipe->ring_size - 1; + /* build the vector */ left = sd.total_len; - for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) { - struct pipe_buffer *buf = pipe->bufs + idx; + for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++, n++) { + struct pipe_buffer *buf = &pipe->bufs[tail & mask]; size_t this_len = buf->len; if (this_len > left) this_len = left; - - if (idx == pipe->buffers - 1) - idx = -1; ret = pipe_buf_confirm(pipe, buf); if (unlikely(ret)) { @@ -745,8 +682,7 @@ left -= this_len; } - iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n, - sd.total_len - left); + iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left); ret = vfs_iter_write(out, &from, &sd.pos, 0); if (ret <= 0) break; @@ -756,14 +692,15 @@ *ppos = sd.pos; /* dismiss the fully eaten buffers, adjust the partial one */ + tail = pipe->tail; while (ret) { - struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; + struct pipe_buffer *buf = &pipe->bufs[tail & mask]; if (ret >= buf->len) { ret -= buf->len; buf->len = 0; pipe_buf_release(pipe, buf); - pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); - pipe->nrbufs--; + tail++; + pipe->tail = tail; if (pipe->files) sd.need_wakeup = true; } else { @@ -785,34 +722,7 @@ return ret; } -EXPORT_SYMBOL(iter_file_splice_write); - -static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, - struct splice_desc *sd) -{ - int ret; - void *data; - loff_t tmp = sd->pos; - - data = kmap(buf->page); - ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp); - kunmap(buf->page); - - return ret; -} - -static ssize_t default_file_splice_write(struct pipe_inode_info *pipe, - struct file *out, loff_t *ppos, - size_t len, unsigned int flags) -{ - ssize_t ret; - - ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf); - if (ret > 0) - *ppos += ret; - - return ret; -} +EXPORT_SYMBOL_NS(iter_file_splice_write, ANDROID_GKI_VFS_EXPORT_ONLY); /** * generic_splice_sendpage - splice data from a pipe to a socket @@ -835,21 +745,23 @@ EXPORT_SYMBOL(generic_splice_sendpage); +static int warn_unsupported(struct file *file, const char *op) +{ + pr_debug_ratelimited( + "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n", + op, file, current->pid, current->comm); + return -EINVAL; +} + /* * Attempt to initiate a splice from pipe to file. */ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { - ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, - loff_t *, size_t, unsigned int); - - if (out->f_op->splice_write) - splice_write = out->f_op->splice_write; - else - splice_write = default_file_splice_write; - - return splice_write(pipe, out, ppos, len, flags); + if (unlikely(!out->f_op->splice_write)) + return warn_unsupported(out, "write"); + return out->f_op->splice_write(pipe, out, ppos, len, flags); } /* @@ -859,8 +771,6 @@ struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - ssize_t (*splice_read)(struct file *, loff_t *, - struct pipe_inode_info *, size_t, unsigned int); int ret; if (unlikely(!(in->f_mode & FMODE_READ))) @@ -873,12 +783,9 @@ if (unlikely(len > MAX_RW_COUNT)) len = MAX_RW_COUNT; - if (in->f_op->splice_read) - splice_read = in->f_op->splice_read; - else - splice_read = default_file_splice_read; - - return splice_read(in, ppos, pipe, len, flags); + if (unlikely(!in->f_op->splice_read)) + return warn_unsupported(in, "read"); + return in->f_op->splice_read(in, ppos, pipe, len, flags); } /** @@ -946,16 +853,17 @@ sd->flags &= ~SPLICE_F_NONBLOCK; more = sd->flags & SPLICE_F_MORE; - WARN_ON_ONCE(pipe->nrbufs != 0); + WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail)); while (len) { - unsigned int pipe_pages; + unsigned int p_space; size_t read_len; loff_t pos = sd->pos, prev_pos = pos; /* Don't try to read more the pipe has space for. */ - pipe_pages = pipe->buffers - pipe->nrbufs; - read_len = min(len, (size_t)pipe_pages << PAGE_SHIFT); + p_space = pipe->max_usage - + pipe_occupancy(pipe->head, pipe->tail); + read_len = min_t(size_t, len, p_space << PAGE_SHIFT); ret = do_splice_to(in, &pos, pipe, read_len, flags); if (unlikely(ret <= 0)) goto out_release; @@ -994,7 +902,7 @@ } done: - pipe->nrbufs = pipe->curbuf = 0; + pipe->tail = pipe->head = 0; file_accessed(in); return bytes; @@ -1003,8 +911,8 @@ * If we did an incomplete transfer we must release * the pipe buffers in question: */ - for (i = 0; i < pipe->buffers; i++) { - struct pipe_buffer *buf = pipe->bufs + i; + for (i = 0; i < pipe->ring_size; i++) { + struct pipe_buffer *buf = &pipe->bufs[i]; if (buf->ops) pipe_buf_release(pipe, buf); @@ -1080,15 +988,13 @@ send_sig(SIGPIPE, current, 0); return -EPIPE; } - if (pipe->nrbufs != pipe->buffers) + if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; if (signal_pending(current)) return -ERESTARTSYS; - pipe->waiting_writers++; - pipe_wait(pipe); - pipe->waiting_writers--; + pipe_wait_writable(pipe); } } @@ -1099,31 +1005,31 @@ /* * Determine where to splice to/from. */ -static long do_splice(struct file *in, loff_t __user *off_in, - struct file *out, loff_t __user *off_out, - size_t len, unsigned int flags) +long do_splice(struct file *in, loff_t *off_in, struct file *out, + loff_t *off_out, size_t len, unsigned int flags) { struct pipe_inode_info *ipipe; struct pipe_inode_info *opipe; loff_t offset; long ret; - ipipe = get_pipe_info(in); - opipe = get_pipe_info(out); + if (unlikely(!(in->f_mode & FMODE_READ) || + !(out->f_mode & FMODE_WRITE))) + return -EBADF; + + ipipe = get_pipe_info(in, true); + opipe = get_pipe_info(out, true); if (ipipe && opipe) { if (off_in || off_out) return -ESPIPE; - if (!(in->f_mode & FMODE_READ)) - return -EBADF; - - if (!(out->f_mode & FMODE_WRITE)) - return -EBADF; - /* Splicing to self would be fun, but... */ if (ipipe == opipe) return -EINVAL; + + if ((in->f_flags | out->f_flags) & O_NONBLOCK) + flags |= SPLICE_F_NONBLOCK; return splice_pipe_to_pipe(ipipe, opipe, len, flags); } @@ -1134,14 +1040,10 @@ if (off_out) { if (!(out->f_mode & FMODE_PWRITE)) return -EINVAL; - if (copy_from_user(&offset, off_out, sizeof(loff_t))) - return -EFAULT; + offset = *off_out; } else { offset = out->f_pos; } - - if (unlikely(!(out->f_mode & FMODE_WRITE))) - return -EBADF; if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; @@ -1150,14 +1052,17 @@ if (unlikely(ret < 0)) return ret; + if (in->f_flags & O_NONBLOCK) + flags |= SPLICE_F_NONBLOCK; + file_start_write(out); ret = do_splice_from(ipipe, out, &offset, len, flags); file_end_write(out); if (!off_out) out->f_pos = offset; - else if (copy_to_user(off_out, &offset, sizeof(loff_t))) - ret = -EFAULT; + else + *off_out = offset; return ret; } @@ -1168,20 +1073,22 @@ if (off_in) { if (!(in->f_mode & FMODE_PREAD)) return -EINVAL; - if (copy_from_user(&offset, off_in, sizeof(loff_t))) - return -EFAULT; + offset = *off_in; } else { offset = in->f_pos; } + if (out->f_flags & O_NONBLOCK) + flags |= SPLICE_F_NONBLOCK; + pipe_lock(opipe); ret = wait_for_space(opipe, flags); if (!ret) { - unsigned int pipe_pages; + unsigned int p_space; /* Don't try to read more the pipe has space for. */ - pipe_pages = opipe->buffers - opipe->nrbufs; - len = min(len, (size_t)pipe_pages << PAGE_SHIFT); + p_space = opipe->max_usage - pipe_occupancy(opipe->head, opipe->tail); + len = min_t(size_t, len, p_space << PAGE_SHIFT); ret = do_splice_to(in, &offset, opipe, len, flags); } @@ -1190,13 +1097,53 @@ wakeup_pipe_readers(opipe); if (!off_in) in->f_pos = offset; - else if (copy_to_user(off_in, &offset, sizeof(loff_t))) - ret = -EFAULT; + else + *off_in = offset; return ret; } return -EINVAL; +} + +static long __do_splice(struct file *in, loff_t __user *off_in, + struct file *out, loff_t __user *off_out, + size_t len, unsigned int flags) +{ + struct pipe_inode_info *ipipe; + struct pipe_inode_info *opipe; + loff_t offset, *__off_in = NULL, *__off_out = NULL; + long ret; + + ipipe = get_pipe_info(in, true); + opipe = get_pipe_info(out, true); + + if (ipipe && off_in) + return -ESPIPE; + if (opipe && off_out) + return -ESPIPE; + + if (off_out) { + if (copy_from_user(&offset, off_out, sizeof(loff_t))) + return -EFAULT; + __off_out = &offset; + } + if (off_in) { + if (copy_from_user(&offset, off_in, sizeof(loff_t))) + return -EFAULT; + __off_in = &offset; + } + + ret = do_splice(in, __off_in, out, __off_out, len, flags); + if (ret < 0) + return ret; + + if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t))) + return -EFAULT; + if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t))) + return -EFAULT; + + return ret; } static int iter_to_pipe(struct iov_iter *from, @@ -1259,7 +1206,7 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter, unsigned int flags) { - struct pipe_inode_info *pipe = get_pipe_info(file); + struct pipe_inode_info *pipe = get_pipe_info(file, true); struct splice_desc sd = { .total_len = iov_iter_count(iter), .flags = flags, @@ -1294,7 +1241,7 @@ if (flags & SPLICE_F_GIFT) buf_flag = PIPE_BUF_FLAG_GIFT; - pipe = get_pipe_info(file); + pipe = get_pipe_info(file, true); if (!pipe) return -EBADF; @@ -1339,29 +1286,18 @@ * Currently we punt and implement it as a normal copy, see pipe_to_user(). * */ -static long do_vmsplice(struct file *f, struct iov_iter *iter, unsigned int flags) -{ - if (unlikely(flags & ~SPLICE_F_ALL)) - return -EINVAL; - - if (!iov_iter_count(iter)) - return 0; - - if (iov_iter_rw(iter) == WRITE) - return vmsplice_to_pipe(f, iter, flags); - else - return vmsplice_to_user(f, iter, flags); -} - SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, unsigned long, nr_segs, unsigned int, flags) { struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - long error; + ssize_t error; struct fd f; int type; + + if (unlikely(flags & ~SPLICE_F_ALL)) + return -EINVAL; f = fdget(fd); error = vmsplice_type(f, &type); @@ -1370,40 +1306,21 @@ error = import_iovec(type, uiov, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); - if (!error) { - error = do_vmsplice(f.file, &iter, flags); - kfree(iov); - } + if (error < 0) + goto out_fdput; + + if (!iov_iter_count(&iter)) + error = 0; + else if (iov_iter_rw(&iter) == WRITE) + error = vmsplice_to_pipe(f.file, &iter, flags); + else + error = vmsplice_to_user(f.file, &iter, flags); + + kfree(iov); +out_fdput: fdput(f); return error; } - -#ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32, - unsigned int, nr_segs, unsigned int, flags) -{ - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct iov_iter iter; - long error; - struct fd f; - int type; - - f = fdget(fd); - error = vmsplice_type(f, &type); - if (error) - return error; - - error = compat_import_iovec(type, iov32, nr_segs, - ARRAY_SIZE(iovstack), &iov, &iter); - if (!error) { - error = do_vmsplice(f.file, &iter, flags); - kfree(iov); - } - fdput(f); - return error; -} -#endif SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, @@ -1421,15 +1338,11 @@ error = -EBADF; in = fdget(fd_in); if (in.file) { - if (in.file->f_mode & FMODE_READ) { - out = fdget(fd_out); - if (out.file) { - if (out.file->f_mode & FMODE_WRITE) - error = do_splice(in.file, off_in, - out.file, off_out, - len, flags); - fdput(out); - } + out = fdget(fd_out); + if (out.file) { + error = __do_splice(in.file, off_in, out.file, off_out, + len, flags); + fdput(out); } fdput(in); } @@ -1445,29 +1358,27 @@ int ret; /* - * Check ->nrbufs without the inode lock first. This function + * Check the pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (pipe->nrbufs) + if (!pipe_empty(pipe->head, pipe->tail)) return 0; ret = 0; pipe_lock(pipe); - while (!pipe->nrbufs) { + while (pipe_empty(pipe->head, pipe->tail)) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; } if (!pipe->writers) break; - if (!pipe->waiting_writers) { - if (flags & SPLICE_F_NONBLOCK) { - ret = -EAGAIN; - break; - } + if (flags & SPLICE_F_NONBLOCK) { + ret = -EAGAIN; + break; } - pipe_wait(pipe); + pipe_wait_readable(pipe); } pipe_unlock(pipe); @@ -1483,16 +1394,16 @@ int ret; /* - * Check ->nrbufs without the inode lock first. This function + * Check pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (pipe->nrbufs < pipe->buffers) + if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return 0; ret = 0; pipe_lock(pipe); - while (pipe->nrbufs >= pipe->buffers) { + while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; @@ -1506,9 +1417,7 @@ ret = -ERESTARTSYS; break; } - pipe->waiting_writers++; - pipe_wait(pipe); - pipe->waiting_writers--; + pipe_wait_writable(pipe); } pipe_unlock(pipe); @@ -1523,7 +1432,10 @@ size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; - int ret = 0, nbuf; + unsigned int i_head, o_head; + unsigned int i_tail, o_tail; + unsigned int i_mask, o_mask; + int ret = 0; bool input_wakeup = false; @@ -1543,7 +1455,14 @@ */ pipe_double_lock(ipipe, opipe); + i_tail = ipipe->tail; + i_mask = ipipe->ring_size - 1; + o_head = opipe->head; + o_mask = opipe->ring_size - 1; + do { + size_t o_len; + if (!opipe->readers) { send_sig(SIGPIPE, current, 0); if (!ret) @@ -1551,14 +1470,18 @@ break; } - if (!ipipe->nrbufs && !ipipe->writers) + i_head = ipipe->head; + o_tail = opipe->tail; + + if (pipe_empty(i_head, i_tail) && !ipipe->writers) break; /* * Cannot make any progress, because either the input * pipe is empty or the output pipe is full. */ - if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) { + if (pipe_empty(i_head, i_tail) || + pipe_full(o_head, o_tail, opipe->max_usage)) { /* Already processed some buffers, break */ if (ret) break; @@ -1578,9 +1501,8 @@ goto retry; } - ibuf = ipipe->bufs + ipipe->curbuf; - nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); - obuf = opipe->bufs + nbuf; + ibuf = &ipipe->bufs[i_tail & i_mask]; + obuf = &opipe->bufs[o_head & o_mask]; if (len >= ibuf->len) { /* @@ -1588,10 +1510,12 @@ */ *obuf = *ibuf; ibuf->ops = NULL; - opipe->nrbufs++; - ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1); - ipipe->nrbufs--; + i_tail++; + ipipe->tail = i_tail; input_wakeup = true; + o_len = obuf->len; + o_head++; + opipe->head = o_head; } else { /* * Get a reference to this pipe buffer, @@ -1605,20 +1529,21 @@ *obuf = *ibuf; /* - * Don't inherit the gift flag, we need to + * Don't inherit the gift and merge flags, we need to * prevent multiple steals of this page. */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; - - pipe_buf_mark_unmergeable(obuf); + obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE; obuf->len = len; - opipe->nrbufs++; - ibuf->offset += obuf->len; - ibuf->len -= obuf->len; + ibuf->offset += len; + ibuf->len -= len; + o_len = len; + o_head++; + opipe->head = o_head; } - ret += obuf->len; - len -= obuf->len; + ret += o_len; + len -= o_len; } while (len); pipe_unlock(ipipe); @@ -1644,7 +1569,10 @@ size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; - int ret = 0, i = 0, nbuf; + unsigned int i_head, o_head; + unsigned int i_tail, o_tail; + unsigned int i_mask, o_mask; + int ret = 0; /* * Potential ABBA deadlock, work around it by ordering lock @@ -1652,6 +1580,11 @@ * could deadlock (one doing tee from A -> B, the other from B -> A). */ pipe_double_lock(ipipe, opipe); + + i_tail = ipipe->tail; + i_mask = ipipe->ring_size - 1; + o_head = opipe->head; + o_mask = opipe->ring_size - 1; do { if (!opipe->readers) { @@ -1661,15 +1594,19 @@ break; } + i_head = ipipe->head; + o_tail = opipe->tail; + /* - * If we have iterated all input buffers or ran out of + * If we have iterated all input buffers or run out of * output room, break. */ - if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) + if (pipe_empty(i_head, i_tail) || + pipe_full(o_head, o_tail, opipe->max_usage)) break; - ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1)); - nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); + ibuf = &ipipe->bufs[i_tail & i_mask]; + obuf = &opipe->bufs[o_head & o_mask]; /* * Get a reference to this pipe buffer, @@ -1681,32 +1618,24 @@ break; } - obuf = opipe->bufs + nbuf; *obuf = *ibuf; /* - * Don't inherit the gift flag, we need to - * prevent multiple steals of this page. + * Don't inherit the gift and merge flag, we need to prevent + * multiple steals of this page. */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; - - pipe_buf_mark_unmergeable(obuf); + obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE; if (obuf->len > len) obuf->len = len; - - opipe->nrbufs++; ret += obuf->len; len -= obuf->len; - i++; - } while (len); - /* - * return EAGAIN if we have the potential of some data in the - * future, otherwise just return 0 - */ - if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) - ret = -EAGAIN; + o_head++; + opipe->head = o_head; + i_tail++; + } while (len); pipe_unlock(ipipe); pipe_unlock(opipe); @@ -1726,18 +1655,24 @@ * The 'flags' used are the SPLICE_F_* variants, currently the only * applicable one is SPLICE_F_NONBLOCK. */ -static long do_tee(struct file *in, struct file *out, size_t len, - unsigned int flags) +long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) { - struct pipe_inode_info *ipipe = get_pipe_info(in); - struct pipe_inode_info *opipe = get_pipe_info(out); + struct pipe_inode_info *ipipe = get_pipe_info(in, true); + struct pipe_inode_info *opipe = get_pipe_info(out, true); int ret = -EINVAL; + + if (unlikely(!(in->f_mode & FMODE_READ) || + !(out->f_mode & FMODE_WRITE))) + return -EBADF; /* * Duplicate the contents of ipipe to opipe without actually * copying the data. */ if (ipipe && opipe && ipipe != opipe) { + if ((in->f_flags | out->f_flags) & O_NONBLOCK) + flags |= SPLICE_F_NONBLOCK; + /* * Keep going, unless we encounter an error. The ipipe/opipe * ordering doesn't really matter. @@ -1755,7 +1690,7 @@ SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { - struct fd in; + struct fd in, out; int error; if (unlikely(flags & ~SPLICE_F_ALL)) @@ -1767,14 +1702,10 @@ error = -EBADF; in = fdget(fdin); if (in.file) { - if (in.file->f_mode & FMODE_READ) { - struct fd out = fdget(fdout); - if (out.file) { - if (out.file->f_mode & FMODE_WRITE) - error = do_tee(in.file, out.file, - len, flags); - fdput(out); - } + out = fdget(fdout); + if (out.file) { + error = do_tee(in.file, out.file, len, flags); + fdput(out); } fdput(in); } -- Gitblit v1.6.2