.. | .. |
---|
27 | 27 | #include <linux/file.h> |
---|
28 | 28 | #include <linux/mm.h> |
---|
29 | 29 | #include <linux/mman.h> |
---|
30 | | -#include <linux/mmu_context.h> |
---|
31 | 30 | #include <linux/percpu.h> |
---|
32 | 31 | #include <linux/slab.h> |
---|
33 | 32 | #include <linux/timer.h> |
---|
.. | .. |
---|
42 | 41 | #include <linux/ramfs.h> |
---|
43 | 42 | #include <linux/percpu-refcount.h> |
---|
44 | 43 | #include <linux/mount.h> |
---|
| 44 | +#include <linux/pseudo_fs.h> |
---|
45 | 45 | |
---|
46 | 46 | #include <asm/kmap_types.h> |
---|
47 | 47 | #include <linux/uaccess.h> |
---|
.. | .. |
---|
67 | 67 | unsigned header_length; /* size of aio_ring */ |
---|
68 | 68 | |
---|
69 | 69 | |
---|
70 | | - struct io_event io_events[0]; |
---|
| 70 | + struct io_event io_events[]; |
---|
71 | 71 | }; /* 128 bytes + ring size */ |
---|
| 72 | + |
---|
| 73 | +/* |
---|
| 74 | + * Plugging is meant to work with larger batches of IOs. If we don't |
---|
| 75 | + * have more than the below, then don't bother setting up a plug. |
---|
| 76 | + */ |
---|
| 77 | +#define AIO_PLUG_THRESHOLD 2 |
---|
72 | 78 | |
---|
73 | 79 | #define AIO_RING_PAGES 8 |
---|
74 | 80 | |
---|
.. | .. |
---|
245 | 251 | return file; |
---|
246 | 252 | } |
---|
247 | 253 | |
---|
248 | | -static struct dentry *aio_mount(struct file_system_type *fs_type, |
---|
249 | | - int flags, const char *dev_name, void *data) |
---|
| 254 | +static int aio_init_fs_context(struct fs_context *fc) |
---|
250 | 255 | { |
---|
251 | | - struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL, |
---|
252 | | - AIO_RING_MAGIC); |
---|
253 | | - |
---|
254 | | - if (!IS_ERR(root)) |
---|
255 | | - root->d_sb->s_iflags |= SB_I_NOEXEC; |
---|
256 | | - return root; |
---|
| 256 | + if (!init_pseudo(fc, AIO_RING_MAGIC)) |
---|
| 257 | + return -ENOMEM; |
---|
| 258 | + fc->s_iflags |= SB_I_NOEXEC; |
---|
| 259 | + return 0; |
---|
257 | 260 | } |
---|
258 | 261 | |
---|
259 | 262 | /* aio_setup |
---|
.. | .. |
---|
264 | 267 | { |
---|
265 | 268 | static struct file_system_type aio_fs = { |
---|
266 | 269 | .name = "aio", |
---|
267 | | - .mount = aio_mount, |
---|
| 270 | + .init_fs_context = aio_init_fs_context, |
---|
268 | 271 | .kill_sb = kill_anon_super, |
---|
269 | 272 | }; |
---|
270 | 273 | aio_mnt = kern_mount(&aio_fs); |
---|
.. | .. |
---|
332 | 335 | spin_lock(&mm->ioctx_lock); |
---|
333 | 336 | rcu_read_lock(); |
---|
334 | 337 | table = rcu_dereference(mm->ioctx_table); |
---|
| 338 | + if (!table) |
---|
| 339 | + goto out_unlock; |
---|
| 340 | + |
---|
335 | 341 | for (i = 0; i < table->nr; i++) { |
---|
336 | 342 | struct kioctx *ctx; |
---|
337 | 343 | |
---|
.. | .. |
---|
345 | 351 | } |
---|
346 | 352 | } |
---|
347 | 353 | |
---|
| 354 | +out_unlock: |
---|
348 | 355 | rcu_read_unlock(); |
---|
349 | 356 | spin_unlock(&mm->ioctx_lock); |
---|
350 | 357 | return res; |
---|
.. | .. |
---|
421 | 428 | BUG_ON(PageWriteback(old)); |
---|
422 | 429 | get_page(new); |
---|
423 | 430 | |
---|
424 | | - rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1); |
---|
| 431 | + rc = migrate_page_move_mapping(mapping, new, old, 1); |
---|
425 | 432 | if (rc != MIGRATEPAGE_SUCCESS) { |
---|
426 | 433 | put_page(new); |
---|
427 | 434 | goto out_unlock; |
---|
.. | .. |
---|
517 | 524 | ctx->mmap_size = nr_pages * PAGE_SIZE; |
---|
518 | 525 | pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size); |
---|
519 | 526 | |
---|
520 | | - if (down_write_killable(&mm->mmap_sem)) { |
---|
| 527 | + if (mmap_write_lock_killable(mm)) { |
---|
521 | 528 | ctx->mmap_size = 0; |
---|
522 | 529 | aio_free_ring(ctx); |
---|
523 | 530 | return -EINTR; |
---|
524 | 531 | } |
---|
525 | 532 | |
---|
526 | | - ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, |
---|
527 | | - PROT_READ | PROT_WRITE, |
---|
528 | | - MAP_SHARED, 0, &unused, NULL); |
---|
529 | | - up_write(&mm->mmap_sem); |
---|
| 533 | + ctx->mmap_base = do_mmap(ctx->aio_ring_file, 0, ctx->mmap_size, |
---|
| 534 | + PROT_READ | PROT_WRITE, |
---|
| 535 | + MAP_SHARED, 0, &unused, NULL); |
---|
| 536 | + mmap_write_unlock(mm); |
---|
530 | 537 | if (IS_ERR((void *)ctx->mmap_base)) { |
---|
531 | 538 | ctx->mmap_size = 0; |
---|
532 | 539 | aio_free_ring(ctx); |
---|
.. | .. |
---|
1029 | 1036 | if (unlikely(!req)) |
---|
1030 | 1037 | return NULL; |
---|
1031 | 1038 | |
---|
| 1039 | + if (unlikely(!get_reqs_available(ctx))) { |
---|
| 1040 | + kmem_cache_free(kiocb_cachep, req); |
---|
| 1041 | + return NULL; |
---|
| 1042 | + } |
---|
| 1043 | + |
---|
1032 | 1044 | percpu_ref_get(&ctx->reqs); |
---|
1033 | 1045 | req->ki_ctx = ctx; |
---|
1034 | 1046 | INIT_LIST_HEAD(&req->ki_list); |
---|
.. | .. |
---|
1067 | 1079 | |
---|
1068 | 1080 | static inline void iocb_destroy(struct aio_kiocb *iocb) |
---|
1069 | 1081 | { |
---|
| 1082 | + if (iocb->ki_eventfd) |
---|
| 1083 | + eventfd_ctx_put(iocb->ki_eventfd); |
---|
1070 | 1084 | if (iocb->ki_filp) |
---|
1071 | 1085 | fput(iocb->ki_filp); |
---|
1072 | 1086 | percpu_ref_put(&iocb->ki_ctx->reqs); |
---|
.. | .. |
---|
1134 | 1148 | * eventfd. The eventfd_signal() function is safe to be called |
---|
1135 | 1149 | * from IRQ context. |
---|
1136 | 1150 | */ |
---|
1137 | | - if (iocb->ki_eventfd) { |
---|
| 1151 | + if (iocb->ki_eventfd) |
---|
1138 | 1152 | eventfd_signal(iocb->ki_eventfd, 1); |
---|
1139 | | - eventfd_ctx_put(iocb->ki_eventfd); |
---|
1140 | | - } |
---|
1141 | 1153 | |
---|
1142 | 1154 | /* |
---|
1143 | 1155 | * We have to order our ring_info tail store above and test |
---|
.. | .. |
---|
1460 | 1472 | |
---|
1461 | 1473 | req->ki_ioprio = iocb->aio_reqprio; |
---|
1462 | 1474 | } else |
---|
1463 | | - req->ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); |
---|
| 1475 | + req->ki_ioprio = get_current_ioprio(); |
---|
1464 | 1476 | |
---|
1465 | 1477 | ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags); |
---|
1466 | 1478 | if (unlikely(ret)) |
---|
.. | .. |
---|
1470 | 1482 | return 0; |
---|
1471 | 1483 | } |
---|
1472 | 1484 | |
---|
1473 | | -static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec, |
---|
1474 | | - bool vectored, bool compat, struct iov_iter *iter) |
---|
| 1485 | +static ssize_t aio_setup_rw(int rw, const struct iocb *iocb, |
---|
| 1486 | + struct iovec **iovec, bool vectored, bool compat, |
---|
| 1487 | + struct iov_iter *iter) |
---|
1475 | 1488 | { |
---|
1476 | 1489 | void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; |
---|
1477 | 1490 | size_t len = iocb->aio_nbytes; |
---|
.. | .. |
---|
1481 | 1494 | *iovec = NULL; |
---|
1482 | 1495 | return ret; |
---|
1483 | 1496 | } |
---|
1484 | | -#ifdef CONFIG_COMPAT |
---|
1485 | | - if (compat) |
---|
1486 | | - return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec, |
---|
1487 | | - iter); |
---|
1488 | | -#endif |
---|
1489 | | - return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter); |
---|
| 1497 | + |
---|
| 1498 | + return __import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter, compat); |
---|
1490 | 1499 | } |
---|
1491 | 1500 | |
---|
1492 | 1501 | static inline void aio_rw_done(struct kiocb *req, ssize_t ret) |
---|
.. | .. |
---|
1503 | 1512 | * may be already running. Just fail this IO with EINTR. |
---|
1504 | 1513 | */ |
---|
1505 | 1514 | ret = -EINTR; |
---|
1506 | | - /*FALLTHRU*/ |
---|
| 1515 | + fallthrough; |
---|
1507 | 1516 | default: |
---|
1508 | 1517 | req->ki_complete(req, ret, 0); |
---|
1509 | 1518 | } |
---|
1510 | 1519 | } |
---|
1511 | 1520 | |
---|
1512 | | -static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb, |
---|
| 1521 | +static int aio_read(struct kiocb *req, const struct iocb *iocb, |
---|
1513 | 1522 | bool vectored, bool compat) |
---|
1514 | 1523 | { |
---|
1515 | 1524 | struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; |
---|
1516 | 1525 | struct iov_iter iter; |
---|
1517 | 1526 | struct file *file; |
---|
1518 | | - ssize_t ret; |
---|
| 1527 | + int ret; |
---|
1519 | 1528 | |
---|
1520 | 1529 | ret = aio_prep_rw(req, iocb); |
---|
1521 | 1530 | if (ret) |
---|
.. | .. |
---|
1528 | 1537 | return -EINVAL; |
---|
1529 | 1538 | |
---|
1530 | 1539 | ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); |
---|
1531 | | - if (ret) |
---|
| 1540 | + if (ret < 0) |
---|
1532 | 1541 | return ret; |
---|
1533 | 1542 | ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); |
---|
1534 | 1543 | if (!ret) |
---|
.. | .. |
---|
1537 | 1546 | return ret; |
---|
1538 | 1547 | } |
---|
1539 | 1548 | |
---|
1540 | | -static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb, |
---|
| 1549 | +static int aio_write(struct kiocb *req, const struct iocb *iocb, |
---|
1541 | 1550 | bool vectored, bool compat) |
---|
1542 | 1551 | { |
---|
1543 | 1552 | struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; |
---|
1544 | 1553 | struct iov_iter iter; |
---|
1545 | 1554 | struct file *file; |
---|
1546 | | - ssize_t ret; |
---|
| 1555 | + int ret; |
---|
1547 | 1556 | |
---|
1548 | 1557 | ret = aio_prep_rw(req, iocb); |
---|
1549 | 1558 | if (ret) |
---|
.. | .. |
---|
1556 | 1565 | return -EINVAL; |
---|
1557 | 1566 | |
---|
1558 | 1567 | ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); |
---|
1559 | | - if (ret) |
---|
| 1568 | + if (ret < 0) |
---|
1560 | 1569 | return ret; |
---|
1561 | 1570 | ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); |
---|
1562 | 1571 | if (!ret) { |
---|
.. | .. |
---|
1568 | 1577 | * we return to userspace. |
---|
1569 | 1578 | */ |
---|
1570 | 1579 | if (S_ISREG(file_inode(file)->i_mode)) { |
---|
1571 | | - __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); |
---|
| 1580 | + sb_start_write(file_inode(file)->i_sb); |
---|
1572 | 1581 | __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); |
---|
1573 | 1582 | } |
---|
1574 | 1583 | req->ki_flags |= IOCB_WRITE; |
---|
.. | .. |
---|
1832 | 1841 | add_wait_queue(head, &pt->iocb->poll.wait); |
---|
1833 | 1842 | } |
---|
1834 | 1843 | |
---|
1835 | | -static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) |
---|
| 1844 | +static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) |
---|
1836 | 1845 | { |
---|
1837 | 1846 | struct kioctx *ctx = aiocb->ki_ctx; |
---|
1838 | 1847 | struct poll_iocb *req = &aiocb->poll; |
---|
.. | .. |
---|
1908 | 1917 | } |
---|
1909 | 1918 | |
---|
1910 | 1919 | static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, |
---|
1911 | | - struct iocb __user *user_iocb, bool compat) |
---|
| 1920 | + struct iocb __user *user_iocb, struct aio_kiocb *req, |
---|
| 1921 | + bool compat) |
---|
1912 | 1922 | { |
---|
1913 | | - struct aio_kiocb *req; |
---|
1914 | | - ssize_t ret; |
---|
1915 | | - |
---|
1916 | | - /* enforce forwards compatibility on users */ |
---|
1917 | | - if (unlikely(iocb->aio_reserved2)) { |
---|
1918 | | - pr_debug("EINVAL: reserve field set\n"); |
---|
1919 | | - return -EINVAL; |
---|
1920 | | - } |
---|
1921 | | - |
---|
1922 | | - /* prevent overflows */ |
---|
1923 | | - if (unlikely( |
---|
1924 | | - (iocb->aio_buf != (unsigned long)iocb->aio_buf) || |
---|
1925 | | - (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) || |
---|
1926 | | - ((ssize_t)iocb->aio_nbytes < 0) |
---|
1927 | | - )) { |
---|
1928 | | - pr_debug("EINVAL: overflow check\n"); |
---|
1929 | | - return -EINVAL; |
---|
1930 | | - } |
---|
1931 | | - |
---|
1932 | | - if (!get_reqs_available(ctx)) |
---|
1933 | | - return -EAGAIN; |
---|
1934 | | - |
---|
1935 | | - ret = -EAGAIN; |
---|
1936 | | - req = aio_get_req(ctx); |
---|
1937 | | - if (unlikely(!req)) |
---|
1938 | | - goto out_put_reqs_available; |
---|
1939 | | - |
---|
1940 | 1923 | req->ki_filp = fget(iocb->aio_fildes); |
---|
1941 | | - ret = -EBADF; |
---|
1942 | 1924 | if (unlikely(!req->ki_filp)) |
---|
1943 | | - goto out_put_req; |
---|
| 1925 | + return -EBADF; |
---|
1944 | 1926 | |
---|
1945 | 1927 | if (iocb->aio_flags & IOCB_FLAG_RESFD) { |
---|
| 1928 | + struct eventfd_ctx *eventfd; |
---|
1946 | 1929 | /* |
---|
1947 | 1930 | * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an |
---|
1948 | 1931 | * instance of the file* now. The file descriptor must be |
---|
1949 | 1932 | * an eventfd() fd, and will be signaled for each completed |
---|
1950 | 1933 | * event using the eventfd_signal() function. |
---|
1951 | 1934 | */ |
---|
1952 | | - req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); |
---|
1953 | | - if (IS_ERR(req->ki_eventfd)) { |
---|
1954 | | - ret = PTR_ERR(req->ki_eventfd); |
---|
1955 | | - req->ki_eventfd = NULL; |
---|
1956 | | - goto out_put_req; |
---|
1957 | | - } |
---|
| 1935 | + eventfd = eventfd_ctx_fdget(iocb->aio_resfd); |
---|
| 1936 | + if (IS_ERR(eventfd)) |
---|
| 1937 | + return PTR_ERR(eventfd); |
---|
| 1938 | + |
---|
| 1939 | + req->ki_eventfd = eventfd; |
---|
1958 | 1940 | } |
---|
1959 | 1941 | |
---|
1960 | | - ret = put_user(KIOCB_KEY, &user_iocb->aio_key); |
---|
1961 | | - if (unlikely(ret)) { |
---|
| 1942 | + if (unlikely(put_user(KIOCB_KEY, &user_iocb->aio_key))) { |
---|
1962 | 1943 | pr_debug("EFAULT: aio_key\n"); |
---|
1963 | | - goto out_put_req; |
---|
| 1944 | + return -EFAULT; |
---|
1964 | 1945 | } |
---|
1965 | 1946 | |
---|
1966 | 1947 | req->ki_res.obj = (u64)(unsigned long)user_iocb; |
---|
.. | .. |
---|
1970 | 1951 | |
---|
1971 | 1952 | switch (iocb->aio_lio_opcode) { |
---|
1972 | 1953 | case IOCB_CMD_PREAD: |
---|
1973 | | - ret = aio_read(&req->rw, iocb, false, compat); |
---|
1974 | | - break; |
---|
| 1954 | + return aio_read(&req->rw, iocb, false, compat); |
---|
1975 | 1955 | case IOCB_CMD_PWRITE: |
---|
1976 | | - ret = aio_write(&req->rw, iocb, false, compat); |
---|
1977 | | - break; |
---|
| 1956 | + return aio_write(&req->rw, iocb, false, compat); |
---|
1978 | 1957 | case IOCB_CMD_PREADV: |
---|
1979 | | - ret = aio_read(&req->rw, iocb, true, compat); |
---|
1980 | | - break; |
---|
| 1958 | + return aio_read(&req->rw, iocb, true, compat); |
---|
1981 | 1959 | case IOCB_CMD_PWRITEV: |
---|
1982 | | - ret = aio_write(&req->rw, iocb, true, compat); |
---|
1983 | | - break; |
---|
| 1960 | + return aio_write(&req->rw, iocb, true, compat); |
---|
1984 | 1961 | case IOCB_CMD_FSYNC: |
---|
1985 | | - ret = aio_fsync(&req->fsync, iocb, false); |
---|
1986 | | - break; |
---|
| 1962 | + return aio_fsync(&req->fsync, iocb, false); |
---|
1987 | 1963 | case IOCB_CMD_FDSYNC: |
---|
1988 | | - ret = aio_fsync(&req->fsync, iocb, true); |
---|
1989 | | - break; |
---|
| 1964 | + return aio_fsync(&req->fsync, iocb, true); |
---|
1990 | 1965 | case IOCB_CMD_POLL: |
---|
1991 | | - ret = aio_poll(req, iocb); |
---|
1992 | | - break; |
---|
| 1966 | + return aio_poll(req, iocb); |
---|
1993 | 1967 | default: |
---|
1994 | 1968 | pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode); |
---|
1995 | | - ret = -EINVAL; |
---|
1996 | | - break; |
---|
| 1969 | + return -EINVAL; |
---|
1997 | 1970 | } |
---|
1998 | | - |
---|
1999 | | - /* Done with the synchronous reference */ |
---|
2000 | | - iocb_put(req); |
---|
2001 | | - |
---|
2002 | | - /* |
---|
2003 | | - * If ret is 0, we'd either done aio_complete() ourselves or have |
---|
2004 | | - * arranged for that to be done asynchronously. Anything non-zero |
---|
2005 | | - * means that we need to destroy req ourselves. |
---|
2006 | | - */ |
---|
2007 | | - if (!ret) |
---|
2008 | | - return 0; |
---|
2009 | | - |
---|
2010 | | -out_put_req: |
---|
2011 | | - if (req->ki_eventfd) |
---|
2012 | | - eventfd_ctx_put(req->ki_eventfd); |
---|
2013 | | - iocb_destroy(req); |
---|
2014 | | -out_put_reqs_available: |
---|
2015 | | - put_reqs_available(ctx, 1); |
---|
2016 | | - return ret; |
---|
2017 | 1971 | } |
---|
2018 | 1972 | |
---|
2019 | 1973 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, |
---|
2020 | 1974 | bool compat) |
---|
2021 | 1975 | { |
---|
| 1976 | + struct aio_kiocb *req; |
---|
2022 | 1977 | struct iocb iocb; |
---|
| 1978 | + int err; |
---|
2023 | 1979 | |
---|
2024 | 1980 | if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb)))) |
---|
2025 | 1981 | return -EFAULT; |
---|
2026 | 1982 | |
---|
2027 | | - return __io_submit_one(ctx, &iocb, user_iocb, compat); |
---|
| 1983 | + /* enforce forwards compatibility on users */ |
---|
| 1984 | + if (unlikely(iocb.aio_reserved2)) { |
---|
| 1985 | + pr_debug("EINVAL: reserve field set\n"); |
---|
| 1986 | + return -EINVAL; |
---|
| 1987 | + } |
---|
| 1988 | + |
---|
| 1989 | + /* prevent overflows */ |
---|
| 1990 | + if (unlikely( |
---|
| 1991 | + (iocb.aio_buf != (unsigned long)iocb.aio_buf) || |
---|
| 1992 | + (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) || |
---|
| 1993 | + ((ssize_t)iocb.aio_nbytes < 0) |
---|
| 1994 | + )) { |
---|
| 1995 | + pr_debug("EINVAL: overflow check\n"); |
---|
| 1996 | + return -EINVAL; |
---|
| 1997 | + } |
---|
| 1998 | + |
---|
| 1999 | + req = aio_get_req(ctx); |
---|
| 2000 | + if (unlikely(!req)) |
---|
| 2001 | + return -EAGAIN; |
---|
| 2002 | + |
---|
| 2003 | + err = __io_submit_one(ctx, &iocb, user_iocb, req, compat); |
---|
| 2004 | + |
---|
| 2005 | + /* Done with the synchronous reference */ |
---|
| 2006 | + iocb_put(req); |
---|
| 2007 | + |
---|
| 2008 | + /* |
---|
| 2009 | + * If err is 0, we'd either done aio_complete() ourselves or have |
---|
| 2010 | + * arranged for that to be done asynchronously. Anything non-zero |
---|
| 2011 | + * means that we need to destroy req ourselves. |
---|
| 2012 | + */ |
---|
| 2013 | + if (unlikely(err)) { |
---|
| 2014 | + iocb_destroy(req); |
---|
| 2015 | + put_reqs_available(ctx, 1); |
---|
| 2016 | + } |
---|
| 2017 | + return err; |
---|
2028 | 2018 | } |
---|
2029 | 2019 | |
---|
2030 | 2020 | /* sys_io_submit: |
---|
.. | .. |
---|
2059 | 2049 | if (nr > ctx->nr_events) |
---|
2060 | 2050 | nr = ctx->nr_events; |
---|
2061 | 2051 | |
---|
2062 | | - blk_start_plug(&plug); |
---|
| 2052 | + if (nr > AIO_PLUG_THRESHOLD) |
---|
| 2053 | + blk_start_plug(&plug); |
---|
2063 | 2054 | for (i = 0; i < nr; i++) { |
---|
2064 | 2055 | struct iocb __user *user_iocb; |
---|
2065 | 2056 | |
---|
.. | .. |
---|
2072 | 2063 | if (ret) |
---|
2073 | 2064 | break; |
---|
2074 | 2065 | } |
---|
2075 | | - blk_finish_plug(&plug); |
---|
| 2066 | + if (nr > AIO_PLUG_THRESHOLD) |
---|
| 2067 | + blk_finish_plug(&plug); |
---|
2076 | 2068 | |
---|
2077 | 2069 | percpu_ref_put(&ctx->users); |
---|
2078 | 2070 | return i ? i : ret; |
---|
.. | .. |
---|
2099 | 2091 | if (nr > ctx->nr_events) |
---|
2100 | 2092 | nr = ctx->nr_events; |
---|
2101 | 2093 | |
---|
2102 | | - blk_start_plug(&plug); |
---|
| 2094 | + if (nr > AIO_PLUG_THRESHOLD) |
---|
| 2095 | + blk_start_plug(&plug); |
---|
2103 | 2096 | for (i = 0; i < nr; i++) { |
---|
2104 | 2097 | compat_uptr_t user_iocb; |
---|
2105 | 2098 | |
---|
.. | .. |
---|
2112 | 2105 | if (ret) |
---|
2113 | 2106 | break; |
---|
2114 | 2107 | } |
---|
2115 | | - blk_finish_plug(&plug); |
---|
| 2108 | + if (nr > AIO_PLUG_THRESHOLD) |
---|
| 2109 | + blk_finish_plug(&plug); |
---|
2116 | 2110 | |
---|
2117 | 2111 | percpu_ref_put(&ctx->users); |
---|
2118 | 2112 | return i ? i : ret; |
---|
.. | .. |
---|
2203 | 2197 | * specifies an infinite timeout. Note that the timeout pointed to by |
---|
2204 | 2198 | * timeout is relative. Will fail with -ENOSYS if not implemented. |
---|
2205 | 2199 | */ |
---|
| 2200 | +#ifdef CONFIG_64BIT |
---|
| 2201 | + |
---|
2206 | 2202 | SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, |
---|
2207 | 2203 | long, min_nr, |
---|
2208 | 2204 | long, nr, |
---|
2209 | 2205 | struct io_event __user *, events, |
---|
2210 | | - struct timespec __user *, timeout) |
---|
| 2206 | + struct __kernel_timespec __user *, timeout) |
---|
2211 | 2207 | { |
---|
2212 | 2208 | struct timespec64 ts; |
---|
2213 | 2209 | int ret; |
---|
.. | .. |
---|
2221 | 2217 | return ret; |
---|
2222 | 2218 | } |
---|
2223 | 2219 | |
---|
| 2220 | +#endif |
---|
| 2221 | + |
---|
2224 | 2222 | struct __aio_sigset { |
---|
2225 | 2223 | const sigset_t __user *sigmask; |
---|
2226 | 2224 | size_t sigsetsize; |
---|
.. | .. |
---|
2231 | 2229 | long, min_nr, |
---|
2232 | 2230 | long, nr, |
---|
2233 | 2231 | struct io_event __user *, events, |
---|
2234 | | - struct timespec __user *, timeout, |
---|
| 2232 | + struct __kernel_timespec __user *, timeout, |
---|
2235 | 2233 | const struct __aio_sigset __user *, usig) |
---|
2236 | 2234 | { |
---|
2237 | 2235 | struct __aio_sigset ksig = { NULL, }; |
---|
2238 | | - sigset_t ksigmask, sigsaved; |
---|
2239 | 2236 | struct timespec64 ts; |
---|
| 2237 | + bool interrupted; |
---|
2240 | 2238 | int ret; |
---|
2241 | 2239 | |
---|
2242 | 2240 | if (timeout && unlikely(get_timespec64(&ts, timeout))) |
---|
.. | .. |
---|
2245 | 2243 | if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) |
---|
2246 | 2244 | return -EFAULT; |
---|
2247 | 2245 | |
---|
2248 | | - if (ksig.sigmask) { |
---|
2249 | | - if (ksig.sigsetsize != sizeof(sigset_t)) |
---|
2250 | | - return -EINVAL; |
---|
2251 | | - if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask))) |
---|
2252 | | - return -EFAULT; |
---|
2253 | | - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); |
---|
2254 | | - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); |
---|
2255 | | - } |
---|
| 2246 | + ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize); |
---|
| 2247 | + if (ret) |
---|
| 2248 | + return ret; |
---|
2256 | 2249 | |
---|
2257 | 2250 | ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); |
---|
2258 | | - if (signal_pending(current)) { |
---|
2259 | | - if (ksig.sigmask) { |
---|
2260 | | - current->saved_sigmask = sigsaved; |
---|
2261 | | - set_restore_sigmask(); |
---|
2262 | | - } |
---|
2263 | 2251 | |
---|
2264 | | - if (!ret) |
---|
2265 | | - ret = -ERESTARTNOHAND; |
---|
2266 | | - } else { |
---|
2267 | | - if (ksig.sigmask) |
---|
2268 | | - sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
---|
2269 | | - } |
---|
| 2252 | + interrupted = signal_pending(current); |
---|
| 2253 | + restore_saved_sigmask_unless(interrupted); |
---|
| 2254 | + if (interrupted && !ret) |
---|
| 2255 | + ret = -ERESTARTNOHAND; |
---|
2270 | 2256 | |
---|
2271 | 2257 | return ret; |
---|
2272 | 2258 | } |
---|
2273 | 2259 | |
---|
2274 | | -#ifdef CONFIG_COMPAT |
---|
2275 | | -COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, |
---|
2276 | | - compat_long_t, min_nr, |
---|
2277 | | - compat_long_t, nr, |
---|
2278 | | - struct io_event __user *, events, |
---|
2279 | | - struct compat_timespec __user *, timeout) |
---|
| 2260 | +#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT) |
---|
| 2261 | + |
---|
| 2262 | +SYSCALL_DEFINE6(io_pgetevents_time32, |
---|
| 2263 | + aio_context_t, ctx_id, |
---|
| 2264 | + long, min_nr, |
---|
| 2265 | + long, nr, |
---|
| 2266 | + struct io_event __user *, events, |
---|
| 2267 | + struct old_timespec32 __user *, timeout, |
---|
| 2268 | + const struct __aio_sigset __user *, usig) |
---|
| 2269 | +{ |
---|
| 2270 | + struct __aio_sigset ksig = { NULL, }; |
---|
| 2271 | + struct timespec64 ts; |
---|
| 2272 | + bool interrupted; |
---|
| 2273 | + int ret; |
---|
| 2274 | + |
---|
| 2275 | + if (timeout && unlikely(get_old_timespec32(&ts, timeout))) |
---|
| 2276 | + return -EFAULT; |
---|
| 2277 | + |
---|
| 2278 | + if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) |
---|
| 2279 | + return -EFAULT; |
---|
| 2280 | + |
---|
| 2281 | + |
---|
| 2282 | + ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize); |
---|
| 2283 | + if (ret) |
---|
| 2284 | + return ret; |
---|
| 2285 | + |
---|
| 2286 | + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); |
---|
| 2287 | + |
---|
| 2288 | + interrupted = signal_pending(current); |
---|
| 2289 | + restore_saved_sigmask_unless(interrupted); |
---|
| 2290 | + if (interrupted && !ret) |
---|
| 2291 | + ret = -ERESTARTNOHAND; |
---|
| 2292 | + |
---|
| 2293 | + return ret; |
---|
| 2294 | +} |
---|
| 2295 | + |
---|
| 2296 | +#endif |
---|
| 2297 | + |
---|
| 2298 | +#if defined(CONFIG_COMPAT_32BIT_TIME) |
---|
| 2299 | + |
---|
| 2300 | +SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id, |
---|
| 2301 | + __s32, min_nr, |
---|
| 2302 | + __s32, nr, |
---|
| 2303 | + struct io_event __user *, events, |
---|
| 2304 | + struct old_timespec32 __user *, timeout) |
---|
2280 | 2305 | { |
---|
2281 | 2306 | struct timespec64 t; |
---|
2282 | 2307 | int ret; |
---|
2283 | 2308 | |
---|
2284 | | - if (timeout && compat_get_timespec64(&t, timeout)) |
---|
| 2309 | + if (timeout && get_old_timespec32(&t, timeout)) |
---|
2285 | 2310 | return -EFAULT; |
---|
2286 | 2311 | |
---|
2287 | 2312 | ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); |
---|
.. | .. |
---|
2290 | 2315 | return ret; |
---|
2291 | 2316 | } |
---|
2292 | 2317 | |
---|
| 2318 | +#endif |
---|
| 2319 | + |
---|
| 2320 | +#ifdef CONFIG_COMPAT |
---|
2293 | 2321 | |
---|
2294 | 2322 | struct __compat_aio_sigset { |
---|
2295 | | - compat_sigset_t __user *sigmask; |
---|
| 2323 | + compat_uptr_t sigmask; |
---|
2296 | 2324 | compat_size_t sigsetsize; |
---|
2297 | 2325 | }; |
---|
| 2326 | + |
---|
| 2327 | +#if defined(CONFIG_COMPAT_32BIT_TIME) |
---|
2298 | 2328 | |
---|
2299 | 2329 | COMPAT_SYSCALL_DEFINE6(io_pgetevents, |
---|
2300 | 2330 | compat_aio_context_t, ctx_id, |
---|
2301 | 2331 | compat_long_t, min_nr, |
---|
2302 | 2332 | compat_long_t, nr, |
---|
2303 | 2333 | struct io_event __user *, events, |
---|
2304 | | - struct compat_timespec __user *, timeout, |
---|
| 2334 | + struct old_timespec32 __user *, timeout, |
---|
2305 | 2335 | const struct __compat_aio_sigset __user *, usig) |
---|
2306 | 2336 | { |
---|
2307 | | - struct __compat_aio_sigset ksig = { NULL, }; |
---|
2308 | | - sigset_t ksigmask, sigsaved; |
---|
| 2337 | + struct __compat_aio_sigset ksig = { 0, }; |
---|
2309 | 2338 | struct timespec64 t; |
---|
| 2339 | + bool interrupted; |
---|
2310 | 2340 | int ret; |
---|
2311 | 2341 | |
---|
2312 | | - if (timeout && compat_get_timespec64(&t, timeout)) |
---|
| 2342 | + if (timeout && get_old_timespec32(&t, timeout)) |
---|
2313 | 2343 | return -EFAULT; |
---|
2314 | 2344 | |
---|
2315 | 2345 | if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) |
---|
2316 | 2346 | return -EFAULT; |
---|
2317 | 2347 | |
---|
2318 | | - if (ksig.sigmask) { |
---|
2319 | | - if (ksig.sigsetsize != sizeof(compat_sigset_t)) |
---|
2320 | | - return -EINVAL; |
---|
2321 | | - if (get_compat_sigset(&ksigmask, ksig.sigmask)) |
---|
2322 | | - return -EFAULT; |
---|
2323 | | - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); |
---|
2324 | | - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); |
---|
2325 | | - } |
---|
| 2348 | + ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize); |
---|
| 2349 | + if (ret) |
---|
| 2350 | + return ret; |
---|
2326 | 2351 | |
---|
2327 | 2352 | ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); |
---|
2328 | | - if (signal_pending(current)) { |
---|
2329 | | - if (ksig.sigmask) { |
---|
2330 | | - current->saved_sigmask = sigsaved; |
---|
2331 | | - set_restore_sigmask(); |
---|
2332 | | - } |
---|
2333 | | - if (!ret) |
---|
2334 | | - ret = -ERESTARTNOHAND; |
---|
2335 | | - } else { |
---|
2336 | | - if (ksig.sigmask) |
---|
2337 | | - sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
---|
2338 | | - } |
---|
| 2353 | + |
---|
| 2354 | + interrupted = signal_pending(current); |
---|
| 2355 | + restore_saved_sigmask_unless(interrupted); |
---|
| 2356 | + if (interrupted && !ret) |
---|
| 2357 | + ret = -ERESTARTNOHAND; |
---|
| 2358 | + |
---|
| 2359 | + return ret; |
---|
| 2360 | +} |
---|
| 2361 | + |
---|
| 2362 | +#endif |
---|
| 2363 | + |
---|
| 2364 | +COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, |
---|
| 2365 | + compat_aio_context_t, ctx_id, |
---|
| 2366 | + compat_long_t, min_nr, |
---|
| 2367 | + compat_long_t, nr, |
---|
| 2368 | + struct io_event __user *, events, |
---|
| 2369 | + struct __kernel_timespec __user *, timeout, |
---|
| 2370 | + const struct __compat_aio_sigset __user *, usig) |
---|
| 2371 | +{ |
---|
| 2372 | + struct __compat_aio_sigset ksig = { 0, }; |
---|
| 2373 | + struct timespec64 t; |
---|
| 2374 | + bool interrupted; |
---|
| 2375 | + int ret; |
---|
| 2376 | + |
---|
| 2377 | + if (timeout && get_timespec64(&t, timeout)) |
---|
| 2378 | + return -EFAULT; |
---|
| 2379 | + |
---|
| 2380 | + if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) |
---|
| 2381 | + return -EFAULT; |
---|
| 2382 | + |
---|
| 2383 | + ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize); |
---|
| 2384 | + if (ret) |
---|
| 2385 | + return ret; |
---|
| 2386 | + |
---|
| 2387 | + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); |
---|
| 2388 | + |
---|
| 2389 | + interrupted = signal_pending(current); |
---|
| 2390 | + restore_saved_sigmask_unless(interrupted); |
---|
| 2391 | + if (interrupted && !ret) |
---|
| 2392 | + ret = -ERESTARTNOHAND; |
---|
2339 | 2393 | |
---|
2340 | 2394 | return ret; |
---|
2341 | 2395 | } |
---|