.. | .. |
---|
18 | 18 | #include <linux/pagemap.h> |
---|
19 | 19 | #include <linux/file.h> |
---|
20 | 20 | #include <linux/mount.h> |
---|
| 21 | +#include <linux/fs_context.h> |
---|
21 | 22 | #include <linux/namei.h> |
---|
22 | 23 | #include <linux/sysctl.h> |
---|
23 | 24 | #include <linux/poll.h> |
---|
.. | .. |
---|
42 | 43 | #include <net/sock.h> |
---|
43 | 44 | #include "util.h" |
---|
44 | 45 | |
---|
| 46 | +struct mqueue_fs_context { |
---|
| 47 | + struct ipc_namespace *ipc_ns; |
---|
| 48 | + bool newns; /* Set if newly created ipc namespace */ |
---|
| 49 | +}; |
---|
| 50 | + |
---|
45 | 51 | #define MQUEUE_MAGIC 0x19800202 |
---|
46 | 52 | #define DIRENT_SIZE 20 |
---|
47 | 53 | #define FILENT_SIZE 80 |
---|
.. | .. |
---|
58 | 64 | int priority; |
---|
59 | 65 | }; |
---|
60 | 66 | |
---|
| 67 | +/* |
---|
| 68 | + * Locking: |
---|
| 69 | + * |
---|
| 70 | + * Accesses to a message queue are synchronized by acquiring info->lock. |
---|
| 71 | + * |
---|
| 72 | + * There are two notable exceptions: |
---|
| 73 | + * - The actual wakeup of a sleeping task is performed using the wake_q |
---|
| 74 | + * framework. info->lock is already released when wake_up_q is called. |
---|
| 75 | + * - The exit codepaths after sleeping check ext_wait_queue->state without |
---|
| 76 | + * any locks. If it is STATE_READY, then the syscall is completed without |
---|
| 77 | + * acquiring info->lock. |
---|
| 78 | + * |
---|
| 79 | + * MQ_BARRIER: |
---|
| 80 | + * To achieve proper release/acquire memory barrier pairing, the state is set to |
---|
| 81 | + * STATE_READY with smp_store_release(), and it is read with READ_ONCE followed |
---|
| 82 | + * by smp_acquire__after_ctrl_dep(). In addition, wake_q_add_safe() is used. |
---|
| 83 | + * |
---|
| 84 | + * This prevents the following races: |
---|
| 85 | + * |
---|
| 86 | + * 1) With the simple wake_q_add(), the task could be gone already before |
---|
| 87 | + * the increase of the reference happens |
---|
| 88 | + * Thread A |
---|
| 89 | + * Thread B |
---|
| 90 | + * WRITE_ONCE(wait.state, STATE_NONE); |
---|
| 91 | + * schedule_hrtimeout() |
---|
| 92 | + * wake_q_add(A) |
---|
| 93 | + * if (cmpxchg()) // success |
---|
| 94 | + * ->state = STATE_READY (reordered) |
---|
| 95 | + * <timeout returns> |
---|
| 96 | + * if (wait.state == STATE_READY) return; |
---|
| 97 | + * sysret to user space |
---|
| 98 | + * sys_exit() |
---|
| 99 | + * get_task_struct() // UaF |
---|
| 100 | + * |
---|
| 101 | + * Solution: Use wake_q_add_safe() and perform the get_task_struct() before |
---|
| 102 | + * the smp_store_release() that does ->state = STATE_READY. |
---|
| 103 | + * |
---|
| 104 | + * 2) Without proper _release/_acquire barriers, the woken up task |
---|
| 105 | + * could read stale data |
---|
| 106 | + * |
---|
| 107 | + * Thread A |
---|
| 108 | + * Thread B |
---|
| 109 | + * do_mq_timedreceive |
---|
| 110 | + * WRITE_ONCE(wait.state, STATE_NONE); |
---|
| 111 | + * schedule_hrtimeout() |
---|
| 112 | + * state = STATE_READY; |
---|
| 113 | + * <timeout returns> |
---|
| 114 | + * if (wait.state == STATE_READY) return; |
---|
| 115 | + * msg_ptr = wait.msg; // Access to stale data! |
---|
| 116 | + * receiver->msg = message; (reordered) |
---|
| 117 | + * |
---|
| 118 | + * Solution: use _release and _acquire barriers. |
---|
| 119 | + * |
---|
| 120 | + * 3) There is intentionally no barrier when setting current->state |
---|
| 121 | + * to TASK_INTERRUPTIBLE: spin_unlock(&info->lock) provides the |
---|
| 122 | + * release memory barrier, and the wakeup is triggered when holding |
---|
| 123 | + * info->lock, i.e. spin_lock(&info->lock) provided a pairing |
---|
| 124 | + * acquire memory barrier. |
---|
| 125 | + */ |
---|
| 126 | + |
---|
61 | 127 | struct ext_wait_queue { /* queue of sleeping tasks */ |
---|
62 | 128 | struct task_struct *task; |
---|
63 | 129 | struct list_head list; |
---|
.. | .. |
---|
71 | 137 | wait_queue_head_t wait_q; |
---|
72 | 138 | |
---|
73 | 139 | struct rb_root msg_tree; |
---|
| 140 | + struct rb_node *msg_tree_rightmost; |
---|
74 | 141 | struct posix_msg_tree_node *node_cache; |
---|
75 | 142 | struct mq_attr attr; |
---|
76 | 143 | |
---|
.. | .. |
---|
88 | 155 | unsigned long qsize; /* size of queue in memory (sum of all msgs) */ |
---|
89 | 156 | }; |
---|
90 | 157 | |
---|
| 158 | +static struct file_system_type mqueue_fs_type; |
---|
91 | 159 | static const struct inode_operations mqueue_dir_inode_operations; |
---|
92 | 160 | static const struct file_operations mqueue_file_operations; |
---|
93 | 161 | static const struct super_operations mqueue_super_ops; |
---|
| 162 | +static const struct fs_context_operations mqueue_fs_context_ops; |
---|
94 | 163 | static void remove_notification(struct mqueue_inode_info *info); |
---|
95 | 164 | |
---|
96 | 165 | static struct kmem_cache *mqueue_inode_cachep; |
---|
.. | .. |
---|
125 | 194 | { |
---|
126 | 195 | struct rb_node **p, *parent = NULL; |
---|
127 | 196 | struct posix_msg_tree_node *leaf; |
---|
| 197 | + bool rightmost = true; |
---|
128 | 198 | |
---|
129 | 199 | p = &info->msg_tree.rb_node; |
---|
130 | 200 | while (*p) { |
---|
.. | .. |
---|
133 | 203 | |
---|
134 | 204 | if (likely(leaf->priority == msg->m_type)) |
---|
135 | 205 | goto insert_msg; |
---|
136 | | - else if (msg->m_type < leaf->priority) |
---|
| 206 | + else if (msg->m_type < leaf->priority) { |
---|
137 | 207 | p = &(*p)->rb_left; |
---|
138 | | - else |
---|
| 208 | + rightmost = false; |
---|
| 209 | + } else |
---|
139 | 210 | p = &(*p)->rb_right; |
---|
140 | 211 | } |
---|
141 | 212 | if (info->node_cache) { |
---|
.. | .. |
---|
148 | 219 | INIT_LIST_HEAD(&leaf->msg_list); |
---|
149 | 220 | } |
---|
150 | 221 | leaf->priority = msg->m_type; |
---|
| 222 | + |
---|
| 223 | + if (rightmost) |
---|
| 224 | + info->msg_tree_rightmost = &leaf->rb_node; |
---|
| 225 | + |
---|
151 | 226 | rb_link_node(&leaf->rb_node, parent, p); |
---|
152 | 227 | rb_insert_color(&leaf->rb_node, &info->msg_tree); |
---|
153 | 228 | insert_msg: |
---|
.. | .. |
---|
157 | 232 | return 0; |
---|
158 | 233 | } |
---|
159 | 234 | |
---|
| 235 | +static inline void msg_tree_erase(struct posix_msg_tree_node *leaf, |
---|
| 236 | + struct mqueue_inode_info *info) |
---|
| 237 | +{ |
---|
| 238 | + struct rb_node *node = &leaf->rb_node; |
---|
| 239 | + |
---|
| 240 | + if (info->msg_tree_rightmost == node) |
---|
| 241 | + info->msg_tree_rightmost = rb_prev(node); |
---|
| 242 | + |
---|
| 243 | + rb_erase(node, &info->msg_tree); |
---|
| 244 | + if (info->node_cache) |
---|
| 245 | + kfree(leaf); |
---|
| 246 | + else |
---|
| 247 | + info->node_cache = leaf; |
---|
| 248 | +} |
---|
| 249 | + |
---|
160 | 250 | static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) |
---|
161 | 251 | { |
---|
162 | | - struct rb_node **p, *parent = NULL; |
---|
| 252 | + struct rb_node *parent = NULL; |
---|
163 | 253 | struct posix_msg_tree_node *leaf; |
---|
164 | 254 | struct msg_msg *msg; |
---|
165 | 255 | |
---|
166 | 256 | try_again: |
---|
167 | | - p = &info->msg_tree.rb_node; |
---|
168 | | - while (*p) { |
---|
169 | | - parent = *p; |
---|
170 | | - /* |
---|
171 | | - * During insert, low priorities go to the left and high to the |
---|
172 | | - * right. On receive, we want the highest priorities first, so |
---|
173 | | - * walk all the way to the right. |
---|
174 | | - */ |
---|
175 | | - p = &(*p)->rb_right; |
---|
176 | | - } |
---|
| 257 | + /* |
---|
| 258 | + * During insert, low priorities go to the left and high to the |
---|
| 259 | + * right. On receive, we want the highest priorities first, so |
---|
| 260 | + * walk all the way to the right. |
---|
| 261 | + */ |
---|
| 262 | + parent = info->msg_tree_rightmost; |
---|
177 | 263 | if (!parent) { |
---|
178 | 264 | if (info->attr.mq_curmsgs) { |
---|
179 | 265 | pr_warn_once("Inconsistency in POSIX message queue, " |
---|
.. | .. |
---|
188 | 274 | pr_warn_once("Inconsistency in POSIX message queue, " |
---|
189 | 275 | "empty leaf node but we haven't implemented " |
---|
190 | 276 | "lazy leaf delete!\n"); |
---|
191 | | - rb_erase(&leaf->rb_node, &info->msg_tree); |
---|
192 | | - if (info->node_cache) { |
---|
193 | | - kfree(leaf); |
---|
194 | | - } else { |
---|
195 | | - info->node_cache = leaf; |
---|
196 | | - } |
---|
| 277 | + msg_tree_erase(leaf, info); |
---|
197 | 278 | goto try_again; |
---|
198 | 279 | } else { |
---|
199 | 280 | msg = list_first_entry(&leaf->msg_list, |
---|
200 | 281 | struct msg_msg, m_list); |
---|
201 | 282 | list_del(&msg->m_list); |
---|
202 | 283 | if (list_empty(&leaf->msg_list)) { |
---|
203 | | - rb_erase(&leaf->rb_node, &info->msg_tree); |
---|
204 | | - if (info->node_cache) { |
---|
205 | | - kfree(leaf); |
---|
206 | | - } else { |
---|
207 | | - info->node_cache = leaf; |
---|
208 | | - } |
---|
| 284 | + msg_tree_erase(leaf, info); |
---|
209 | 285 | } |
---|
210 | 286 | } |
---|
211 | 287 | info->attr.mq_curmsgs--; |
---|
.. | .. |
---|
248 | 324 | info->qsize = 0; |
---|
249 | 325 | info->user = NULL; /* set when all is ok */ |
---|
250 | 326 | info->msg_tree = RB_ROOT; |
---|
| 327 | + info->msg_tree_rightmost = NULL; |
---|
251 | 328 | info->node_cache = NULL; |
---|
252 | 329 | memset(&info->attr, 0, sizeof(info->attr)); |
---|
253 | 330 | info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, |
---|
.. | .. |
---|
323 | 400 | return ERR_PTR(ret); |
---|
324 | 401 | } |
---|
325 | 402 | |
---|
326 | | -static int mqueue_fill_super(struct super_block *sb, void *data, int silent) |
---|
| 403 | +static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc) |
---|
327 | 404 | { |
---|
328 | 405 | struct inode *inode; |
---|
329 | 406 | struct ipc_namespace *ns = sb->s_fs_info; |
---|
.. | .. |
---|
344 | 421 | return 0; |
---|
345 | 422 | } |
---|
346 | 423 | |
---|
347 | | -static struct dentry *mqueue_mount(struct file_system_type *fs_type, |
---|
348 | | - int flags, const char *dev_name, |
---|
349 | | - void *data) |
---|
| 424 | +static int mqueue_get_tree(struct fs_context *fc) |
---|
350 | 425 | { |
---|
351 | | - struct ipc_namespace *ns; |
---|
352 | | - if (flags & SB_KERNMOUNT) { |
---|
353 | | - ns = data; |
---|
354 | | - data = NULL; |
---|
355 | | - } else { |
---|
356 | | - ns = current->nsproxy->ipc_ns; |
---|
| 426 | + struct mqueue_fs_context *ctx = fc->fs_private; |
---|
| 427 | + |
---|
| 428 | + /* |
---|
| 429 | + * With a newly created ipc namespace, we don't need to do a search |
---|
| 430 | + * for an ipc namespace match, but we still need to set s_fs_info. |
---|
| 431 | + */ |
---|
| 432 | + if (ctx->newns) { |
---|
| 433 | + fc->s_fs_info = ctx->ipc_ns; |
---|
| 434 | + return get_tree_nodev(fc, mqueue_fill_super); |
---|
357 | 435 | } |
---|
358 | | - return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super); |
---|
| 436 | + return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns); |
---|
| 437 | +} |
---|
| 438 | + |
---|
| 439 | +static void mqueue_fs_context_free(struct fs_context *fc) |
---|
| 440 | +{ |
---|
| 441 | + struct mqueue_fs_context *ctx = fc->fs_private; |
---|
| 442 | + |
---|
| 443 | + put_ipc_ns(ctx->ipc_ns); |
---|
| 444 | + kfree(ctx); |
---|
| 445 | +} |
---|
| 446 | + |
---|
| 447 | +static int mqueue_init_fs_context(struct fs_context *fc) |
---|
| 448 | +{ |
---|
| 449 | + struct mqueue_fs_context *ctx; |
---|
| 450 | + |
---|
| 451 | + ctx = kzalloc(sizeof(struct mqueue_fs_context), GFP_KERNEL); |
---|
| 452 | + if (!ctx) |
---|
| 453 | + return -ENOMEM; |
---|
| 454 | + |
---|
| 455 | + ctx->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); |
---|
| 456 | + put_user_ns(fc->user_ns); |
---|
| 457 | + fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns); |
---|
| 458 | + fc->fs_private = ctx; |
---|
| 459 | + fc->ops = &mqueue_fs_context_ops; |
---|
| 460 | + return 0; |
---|
| 461 | +} |
---|
| 462 | + |
---|
| 463 | +/* |
---|
| 464 | + * mq_init_ns() is currently the only caller of mq_create_mount(). |
---|
| 465 | + * So the ns parameter is always a newly created ipc namespace. |
---|
| 466 | + */ |
---|
| 467 | +static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) |
---|
| 468 | +{ |
---|
| 469 | + struct mqueue_fs_context *ctx; |
---|
| 470 | + struct fs_context *fc; |
---|
| 471 | + struct vfsmount *mnt; |
---|
| 472 | + |
---|
| 473 | + fc = fs_context_for_mount(&mqueue_fs_type, SB_KERNMOUNT); |
---|
| 474 | + if (IS_ERR(fc)) |
---|
| 475 | + return ERR_CAST(fc); |
---|
| 476 | + |
---|
| 477 | + ctx = fc->fs_private; |
---|
| 478 | + ctx->newns = true; |
---|
| 479 | + put_ipc_ns(ctx->ipc_ns); |
---|
| 480 | + ctx->ipc_ns = get_ipc_ns(ns); |
---|
| 481 | + put_user_ns(fc->user_ns); |
---|
| 482 | + fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns); |
---|
| 483 | + |
---|
| 484 | + mnt = fc_mount(fc); |
---|
| 485 | + put_fs_context(fc); |
---|
| 486 | + return mnt; |
---|
359 | 487 | } |
---|
360 | 488 | |
---|
361 | 489 | static void init_once(void *foo) |
---|
.. | .. |
---|
375 | 503 | return &ei->vfs_inode; |
---|
376 | 504 | } |
---|
377 | 505 | |
---|
378 | | -static void mqueue_i_callback(struct rcu_head *head) |
---|
| 506 | +static void mqueue_free_inode(struct inode *inode) |
---|
379 | 507 | { |
---|
380 | | - struct inode *inode = container_of(head, struct inode, i_rcu); |
---|
381 | 508 | kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode)); |
---|
382 | | -} |
---|
383 | | - |
---|
384 | | -static void mqueue_destroy_inode(struct inode *inode) |
---|
385 | | -{ |
---|
386 | | - call_rcu(&inode->i_rcu, mqueue_i_callback); |
---|
387 | 509 | } |
---|
388 | 510 | |
---|
389 | 511 | static void mqueue_evict_inode(struct inode *inode) |
---|
.. | .. |
---|
574 | 696 | { |
---|
575 | 697 | struct ext_wait_queue *walk; |
---|
576 | 698 | |
---|
577 | | - ewp->task = current; |
---|
578 | | - |
---|
579 | 699 | list_for_each_entry(walk, &info->e_wait_q[sr].list, list) { |
---|
580 | 700 | if (walk->task->prio <= current->prio) { |
---|
581 | 701 | list_add_tail(&ewp->list, &walk->list); |
---|
.. | .. |
---|
600 | 720 | wq_add(info, sr, ewp); |
---|
601 | 721 | |
---|
602 | 722 | for (;;) { |
---|
| 723 | + /* memory barrier not required, we hold info->lock */ |
---|
603 | 724 | __set_current_state(TASK_INTERRUPTIBLE); |
---|
604 | 725 | |
---|
605 | 726 | spin_unlock(&info->lock); |
---|
606 | 727 | time = schedule_hrtimeout_range_clock(timeout, 0, |
---|
607 | 728 | HRTIMER_MODE_ABS, CLOCK_REALTIME); |
---|
608 | 729 | |
---|
609 | | - if (ewp->state == STATE_READY) { |
---|
| 730 | + if (READ_ONCE(ewp->state) == STATE_READY) { |
---|
| 731 | + /* see MQ_BARRIER for purpose/pairing */ |
---|
| 732 | + smp_acquire__after_ctrl_dep(); |
---|
610 | 733 | retval = 0; |
---|
611 | 734 | goto out; |
---|
612 | 735 | } |
---|
613 | 736 | spin_lock(&info->lock); |
---|
614 | | - if (ewp->state == STATE_READY) { |
---|
| 737 | + |
---|
| 738 | + /* we hold info->lock, so no memory barrier required */ |
---|
| 739 | + if (READ_ONCE(ewp->state) == STATE_READY) { |
---|
615 | 740 | retval = 0; |
---|
616 | 741 | goto out_unlock; |
---|
617 | 742 | } |
---|
.. | .. |
---|
667 | 792 | case SIGEV_NONE: |
---|
668 | 793 | break; |
---|
669 | 794 | case SIGEV_SIGNAL: { |
---|
670 | | - struct siginfo sig_i; |
---|
| 795 | + struct kernel_siginfo sig_i; |
---|
671 | 796 | struct task_struct *task; |
---|
672 | 797 | |
---|
673 | 798 | /* do_mq_notify() accepts sigev_signo == 0, why?? */ |
---|
.. | .. |
---|
738 | 863 | info->notify_user_ns = NULL; |
---|
739 | 864 | } |
---|
740 | 865 | |
---|
741 | | -static int prepare_open(struct vfsmount *mnt, struct dentry *dentry, int oflag, int ro, |
---|
| 866 | +static int prepare_open(struct dentry *dentry, int oflag, int ro, |
---|
742 | 867 | umode_t mode, struct filename *name, |
---|
743 | 868 | struct mq_attr *attr) |
---|
744 | 869 | { |
---|
.. | .. |
---|
752 | 877 | if (ro) |
---|
753 | 878 | return ro; |
---|
754 | 879 | audit_inode_parent_hidden(name, dentry->d_parent); |
---|
755 | | - return vfs_mkobj2(mnt, dentry, mode & ~current_umask(), |
---|
| 880 | + return vfs_mkobj(dentry, mode & ~current_umask(), |
---|
756 | 881 | mqueue_create_attr, attr); |
---|
757 | 882 | } |
---|
758 | 883 | /* it already existed */ |
---|
.. | .. |
---|
762 | 887 | if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) |
---|
763 | 888 | return -EINVAL; |
---|
764 | 889 | acc = oflag2acc[oflag & O_ACCMODE]; |
---|
765 | | - return inode_permission2(mnt, d_inode(dentry), acc); |
---|
| 890 | + return inode_permission(d_inode(dentry), acc); |
---|
766 | 891 | } |
---|
767 | 892 | |
---|
768 | 893 | static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, |
---|
.. | .. |
---|
786 | 911 | |
---|
787 | 912 | ro = mnt_want_write(mnt); /* we'll drop it in any case */ |
---|
788 | 913 | inode_lock(d_inode(root)); |
---|
789 | | - path.dentry = lookup_one_len2(name->name, mnt, root, strlen(name->name)); |
---|
| 914 | + path.dentry = lookup_one_len(name->name, root, strlen(name->name)); |
---|
790 | 915 | if (IS_ERR(path.dentry)) { |
---|
791 | 916 | error = PTR_ERR(path.dentry); |
---|
792 | 917 | goto out_putfd; |
---|
793 | 918 | } |
---|
794 | 919 | path.mnt = mntget(mnt); |
---|
795 | | - error = prepare_open(path.mnt, path.dentry, oflag, ro, mode, name, attr); |
---|
| 920 | + error = prepare_open(path.dentry, oflag, ro, mode, name, attr); |
---|
796 | 921 | if (!error) { |
---|
797 | 922 | struct file *file = dentry_open(&path, oflag, current_cred()); |
---|
798 | 923 | if (!IS_ERR(file)) |
---|
.. | .. |
---|
842 | 967 | if (err) |
---|
843 | 968 | goto out_name; |
---|
844 | 969 | inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT); |
---|
845 | | - dentry = lookup_one_len2(name->name, mnt, mnt->mnt_root, |
---|
| 970 | + dentry = lookup_one_len(name->name, mnt->mnt_root, |
---|
846 | 971 | strlen(name->name)); |
---|
847 | 972 | if (IS_ERR(dentry)) { |
---|
848 | 973 | err = PTR_ERR(dentry); |
---|
.. | .. |
---|
854 | 979 | err = -ENOENT; |
---|
855 | 980 | } else { |
---|
856 | 981 | ihold(inode); |
---|
857 | | - err = vfs_unlink2(mnt, d_inode(dentry->d_parent), dentry, NULL); |
---|
| 982 | + err = vfs_unlink(d_inode(dentry->d_parent), dentry, NULL); |
---|
858 | 983 | } |
---|
859 | 984 | dput(dentry); |
---|
860 | 985 | |
---|
.. | .. |
---|
888 | 1013 | * The same algorithm is used for senders. |
---|
889 | 1014 | */ |
---|
890 | 1015 | |
---|
| 1016 | +static inline void __pipelined_op(struct wake_q_head *wake_q, |
---|
| 1017 | + struct mqueue_inode_info *info, |
---|
| 1018 | + struct ext_wait_queue *this) |
---|
| 1019 | +{ |
---|
| 1020 | + struct task_struct *task; |
---|
| 1021 | + |
---|
| 1022 | + list_del(&this->list); |
---|
| 1023 | + task = get_task_struct(this->task); |
---|
| 1024 | + |
---|
| 1025 | + /* see MQ_BARRIER for purpose/pairing */ |
---|
| 1026 | + smp_store_release(&this->state, STATE_READY); |
---|
| 1027 | + wake_q_add_safe(wake_q, task); |
---|
| 1028 | +} |
---|
| 1029 | + |
---|
891 | 1030 | /* pipelined_send() - send a message directly to the task waiting in |
---|
892 | 1031 | * sys_mq_timedreceive() (without inserting message into a queue). |
---|
893 | 1032 | */ |
---|
.. | .. |
---|
897 | 1036 | struct ext_wait_queue *receiver) |
---|
898 | 1037 | { |
---|
899 | 1038 | receiver->msg = message; |
---|
900 | | - list_del(&receiver->list); |
---|
901 | | - wake_q_add(wake_q, receiver->task); |
---|
902 | | - /* |
---|
903 | | - * Rely on the implicit cmpxchg barrier from wake_q_add such |
---|
904 | | - * that we can ensure that updating receiver->state is the last |
---|
905 | | - * write operation: As once set, the receiver can continue, |
---|
906 | | - * and if we don't have the reference count from the wake_q, |
---|
907 | | - * yet, at that point we can later have a use-after-free |
---|
908 | | - * condition and bogus wakeup. |
---|
909 | | - */ |
---|
910 | | - receiver->state = STATE_READY; |
---|
| 1039 | + __pipelined_op(wake_q, info, receiver); |
---|
911 | 1040 | } |
---|
912 | 1041 | |
---|
913 | 1042 | /* pipelined_receive() - if there is task waiting in sys_mq_timedsend() |
---|
.. | .. |
---|
925 | 1054 | if (msg_insert(sender->msg, info)) |
---|
926 | 1055 | return; |
---|
927 | 1056 | |
---|
928 | | - list_del(&sender->list); |
---|
929 | | - wake_q_add(wake_q, sender->task); |
---|
930 | | - sender->state = STATE_READY; |
---|
| 1057 | + __pipelined_op(wake_q, info, sender); |
---|
931 | 1058 | } |
---|
932 | 1059 | |
---|
933 | 1060 | static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, |
---|
.. | .. |
---|
1014 | 1141 | } else { |
---|
1015 | 1142 | wait.task = current; |
---|
1016 | 1143 | wait.msg = (void *) msg_ptr; |
---|
1017 | | - wait.state = STATE_NONE; |
---|
| 1144 | + |
---|
| 1145 | + /* memory barrier not required, we hold info->lock */ |
---|
| 1146 | + WRITE_ONCE(wait.state, STATE_NONE); |
---|
1018 | 1147 | ret = wq_sleep(info, SEND, timeout, &wait); |
---|
1019 | 1148 | /* |
---|
1020 | 1149 | * wq_sleep must be called with info->lock held, and |
---|
.. | .. |
---|
1117 | 1246 | ret = -EAGAIN; |
---|
1118 | 1247 | } else { |
---|
1119 | 1248 | wait.task = current; |
---|
1120 | | - wait.state = STATE_NONE; |
---|
| 1249 | + |
---|
| 1250 | + /* memory barrier not required, we hold info->lock */ |
---|
| 1251 | + WRITE_ONCE(wait.state, STATE_NONE); |
---|
1121 | 1252 | ret = wq_sleep(info, RECV, timeout, &wait); |
---|
1122 | 1253 | msg_ptr = wait.msg; |
---|
1123 | 1254 | } |
---|
.. | .. |
---|
1210 | 1341 | |
---|
1211 | 1342 | /* create the notify skb */ |
---|
1212 | 1343 | nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL); |
---|
1213 | | - if (!nc) { |
---|
1214 | | - ret = -ENOMEM; |
---|
1215 | | - goto out; |
---|
1216 | | - } |
---|
| 1344 | + if (!nc) |
---|
| 1345 | + return -ENOMEM; |
---|
| 1346 | + |
---|
1217 | 1347 | if (copy_from_user(nc->data, |
---|
1218 | 1348 | notification->sigev_value.sival_ptr, |
---|
1219 | 1349 | NOTIFY_COOKIE_LEN)) { |
---|
1220 | 1350 | ret = -EFAULT; |
---|
1221 | | - goto out; |
---|
| 1351 | + goto free_skb; |
---|
1222 | 1352 | } |
---|
1223 | 1353 | |
---|
1224 | 1354 | /* TODO: add a header? */ |
---|
.. | .. |
---|
1234 | 1364 | fdput(f); |
---|
1235 | 1365 | if (IS_ERR(sock)) { |
---|
1236 | 1366 | ret = PTR_ERR(sock); |
---|
1237 | | - sock = NULL; |
---|
1238 | | - goto out; |
---|
| 1367 | + goto free_skb; |
---|
1239 | 1368 | } |
---|
1240 | 1369 | |
---|
1241 | 1370 | timeo = MAX_SCHEDULE_TIMEOUT; |
---|
.. | .. |
---|
1244 | 1373 | sock = NULL; |
---|
1245 | 1374 | goto retry; |
---|
1246 | 1375 | } |
---|
1247 | | - if (ret) { |
---|
1248 | | - sock = NULL; |
---|
1249 | | - nc = NULL; |
---|
1250 | | - goto out; |
---|
1251 | | - } |
---|
| 1376 | + if (ret) |
---|
| 1377 | + return ret; |
---|
1252 | 1378 | } |
---|
1253 | 1379 | } |
---|
1254 | 1380 | |
---|
.. | .. |
---|
1304 | 1430 | out: |
---|
1305 | 1431 | if (sock) |
---|
1306 | 1432 | netlink_detachskb(sock, nc); |
---|
1307 | | - else if (nc) |
---|
| 1433 | + else |
---|
| 1434 | +free_skb: |
---|
1308 | 1435 | dev_kfree_skb(nc); |
---|
1309 | 1436 | |
---|
1310 | 1437 | return ret; |
---|
.. | .. |
---|
1486 | 1613 | #endif |
---|
1487 | 1614 | |
---|
1488 | 1615 | #ifdef CONFIG_COMPAT_32BIT_TIME |
---|
1489 | | -static int compat_prepare_timeout(const struct compat_timespec __user *p, |
---|
| 1616 | +static int compat_prepare_timeout(const struct old_timespec32 __user *p, |
---|
1490 | 1617 | struct timespec64 *ts) |
---|
1491 | 1618 | { |
---|
1492 | | - if (compat_get_timespec64(ts, p)) |
---|
| 1619 | + if (get_old_timespec32(ts, p)) |
---|
1493 | 1620 | return -EFAULT; |
---|
1494 | 1621 | if (!timespec64_valid(ts)) |
---|
1495 | 1622 | return -EINVAL; |
---|
1496 | 1623 | return 0; |
---|
1497 | 1624 | } |
---|
1498 | 1625 | |
---|
1499 | | -COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, |
---|
1500 | | - const char __user *, u_msg_ptr, |
---|
1501 | | - compat_size_t, msg_len, unsigned int, msg_prio, |
---|
1502 | | - const struct compat_timespec __user *, u_abs_timeout) |
---|
| 1626 | +SYSCALL_DEFINE5(mq_timedsend_time32, mqd_t, mqdes, |
---|
| 1627 | + const char __user *, u_msg_ptr, |
---|
| 1628 | + unsigned int, msg_len, unsigned int, msg_prio, |
---|
| 1629 | + const struct old_timespec32 __user *, u_abs_timeout) |
---|
1503 | 1630 | { |
---|
1504 | 1631 | struct timespec64 ts, *p = NULL; |
---|
1505 | 1632 | if (u_abs_timeout) { |
---|
.. | .. |
---|
1511 | 1638 | return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p); |
---|
1512 | 1639 | } |
---|
1513 | 1640 | |
---|
1514 | | -COMPAT_SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, |
---|
1515 | | - char __user *, u_msg_ptr, |
---|
1516 | | - compat_size_t, msg_len, unsigned int __user *, u_msg_prio, |
---|
1517 | | - const struct compat_timespec __user *, u_abs_timeout) |
---|
| 1641 | +SYSCALL_DEFINE5(mq_timedreceive_time32, mqd_t, mqdes, |
---|
| 1642 | + char __user *, u_msg_ptr, |
---|
| 1643 | + unsigned int, msg_len, unsigned int __user *, u_msg_prio, |
---|
| 1644 | + const struct old_timespec32 __user *, u_abs_timeout) |
---|
1518 | 1645 | { |
---|
1519 | 1646 | struct timespec64 ts, *p = NULL; |
---|
1520 | 1647 | if (u_abs_timeout) { |
---|
.. | .. |
---|
1542 | 1669 | |
---|
1543 | 1670 | static const struct super_operations mqueue_super_ops = { |
---|
1544 | 1671 | .alloc_inode = mqueue_alloc_inode, |
---|
1545 | | - .destroy_inode = mqueue_destroy_inode, |
---|
| 1672 | + .free_inode = mqueue_free_inode, |
---|
1546 | 1673 | .evict_inode = mqueue_evict_inode, |
---|
1547 | 1674 | .statfs = simple_statfs, |
---|
1548 | 1675 | }; |
---|
1549 | 1676 | |
---|
| 1677 | +static const struct fs_context_operations mqueue_fs_context_ops = { |
---|
| 1678 | + .free = mqueue_fs_context_free, |
---|
| 1679 | + .get_tree = mqueue_get_tree, |
---|
| 1680 | +}; |
---|
| 1681 | + |
---|
1550 | 1682 | static struct file_system_type mqueue_fs_type = { |
---|
1551 | | - .name = "mqueue", |
---|
1552 | | - .mount = mqueue_mount, |
---|
1553 | | - .kill_sb = kill_litter_super, |
---|
1554 | | - .fs_flags = FS_USERNS_MOUNT, |
---|
| 1683 | + .name = "mqueue", |
---|
| 1684 | + .init_fs_context = mqueue_init_fs_context, |
---|
| 1685 | + .kill_sb = kill_litter_super, |
---|
| 1686 | + .fs_flags = FS_USERNS_MOUNT, |
---|
1555 | 1687 | }; |
---|
1556 | 1688 | |
---|
1557 | 1689 | int mq_init_ns(struct ipc_namespace *ns) |
---|
1558 | 1690 | { |
---|
| 1691 | + struct vfsmount *m; |
---|
| 1692 | + |
---|
1559 | 1693 | ns->mq_queues_count = 0; |
---|
1560 | 1694 | ns->mq_queues_max = DFLT_QUEUESMAX; |
---|
1561 | 1695 | ns->mq_msg_max = DFLT_MSGMAX; |
---|
.. | .. |
---|
1563 | 1697 | ns->mq_msg_default = DFLT_MSG; |
---|
1564 | 1698 | ns->mq_msgsize_default = DFLT_MSGSIZE; |
---|
1565 | 1699 | |
---|
1566 | | - ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); |
---|
1567 | | - if (IS_ERR(ns->mq_mnt)) { |
---|
1568 | | - int err = PTR_ERR(ns->mq_mnt); |
---|
1569 | | - ns->mq_mnt = NULL; |
---|
1570 | | - return err; |
---|
1571 | | - } |
---|
| 1700 | + m = mq_create_mount(ns); |
---|
| 1701 | + if (IS_ERR(m)) |
---|
| 1702 | + return PTR_ERR(m); |
---|
| 1703 | + ns->mq_mnt = m; |
---|
1572 | 1704 | return 0; |
---|
1573 | 1705 | } |
---|
1574 | 1706 | |
---|