hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/ipc/sem.c
....@@ -205,15 +205,38 @@
205205 *
206206 * Memory ordering:
207207 * Most ordering is enforced by using spin_lock() and spin_unlock().
208
- * The special case is use_global_lock:
208
+ *
209
+ * Exceptions:
210
+ * 1) use_global_lock: (SEM_BARRIER_1)
209211 * Setting it from non-zero to 0 is a RELEASE, this is ensured by
210
- * using smp_store_release().
212
+ * using smp_store_release(): Immediately after setting it to 0,
213
+ * a simple op can start.
211214 * Testing if it is non-zero is an ACQUIRE, this is ensured by using
212215 * smp_load_acquire().
213216 * Setting it from 0 to non-zero must be ordered with regards to
214217 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
215218 * is inside a spin_lock() and after a write from 0 to non-zero a
216219 * spin_lock()+spin_unlock() is done.
220
+ *
221
+ * 2) queue.status: (SEM_BARRIER_2)
222
+ * Initialization is done while holding sem_lock(), so no further barrier is
223
+ * required.
224
+ * Setting it to a result code is a RELEASE, this is ensured by both a
225
+ * smp_store_release() (for case a) and while holding sem_lock()
226
+ * (for case b).
227
+ * The AQUIRE when reading the result code without holding sem_lock() is
228
+ * achieved by using READ_ONCE() + smp_acquire__after_ctrl_dep().
229
+ * (case a above).
230
+ * Reading the result code while holding sem_lock() needs no further barriers,
231
+ * the locks inside sem_lock() enforce ordering (case b above)
232
+ *
233
+ * 3) current->state:
234
+ * current->state is set to TASK_INTERRUPTIBLE while holding sem_lock().
235
+ * The wakeup is handled using the wake_q infrastructure. wake_q wakeups may
236
+ * happen immediately after calling wake_q_add. As wake_q_add_safe() is called
237
+ * when holding sem_lock(), no further barriers are required.
238
+ *
239
+ * See also ipc/mqueue.c for more details on the covered races.
217240 */
218241
219242 #define sc_semmsl sem_ctls[0]
....@@ -344,12 +367,8 @@
344367 return;
345368 }
346369 if (sma->use_global_lock == 1) {
347
- /*
348
- * Immediately after setting use_global_lock to 0,
349
- * a simple op can start. Thus: all memory writes
350
- * performed by the current operation must be visible
351
- * before we set use_global_lock to 0.
352
- */
370
+
371
+ /* See SEM_BARRIER_1 for purpose/pairing */
353372 smp_store_release(&sma->use_global_lock, 0);
354373 } else {
355374 sma->use_global_lock--;
....@@ -400,7 +419,7 @@
400419 */
401420 spin_lock(&sem->lock);
402421
403
- /* pairs with smp_store_release() */
422
+ /* see SEM_BARRIER_1 for purpose/pairing */
404423 if (!smp_load_acquire(&sma->use_global_lock)) {
405424 /* fast path successful! */
406425 return sops->sem_num;
....@@ -488,17 +507,13 @@
488507 static struct sem_array *sem_alloc(size_t nsems)
489508 {
490509 struct sem_array *sma;
491
- size_t size;
492510
493511 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
494512 return NULL;
495513
496
- size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
497
- sma = kvmalloc(size, GFP_KERNEL);
514
+ sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL_ACCOUNT);
498515 if (unlikely(!sma))
499516 return NULL;
500
-
501
- memset(sma, 0, size);
502517
503518 return sma;
504519 }
....@@ -570,8 +585,7 @@
570585 /*
571586 * Called with sem_ids.rwsem and ipcp locked.
572587 */
573
-static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
574
- struct ipc_params *params)
588
+static int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params)
575589 {
576590 struct sem_array *sma;
577591
....@@ -770,15 +784,14 @@
770784 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
771785 struct wake_q_head *wake_q)
772786 {
773
- wake_q_add(wake_q, q->sleeper);
774
- /*
775
- * Rely on the above implicit barrier, such that we can
776
- * ensure that we hold reference to the task before setting
777
- * q->status. Otherwise we could race with do_exit if the
778
- * task is awoken by an external event before calling
779
- * wake_up_process().
780
- */
781
- WRITE_ONCE(q->status, error);
787
+ struct task_struct *sleeper;
788
+
789
+ sleeper = get_task_struct(q->sleeper);
790
+
791
+ /* see SEM_BARRIER_2 for purpuse/pairing */
792
+ smp_store_release(&q->status, error);
793
+
794
+ wake_q_add_safe(wake_q, sleeper);
782795 }
783796
784797 static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
....@@ -1634,9 +1647,8 @@
16341647 return err;
16351648 }
16361649
1637
-long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg)
1650
+static long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg, int version)
16381651 {
1639
- int version;
16401652 struct ipc_namespace *ns;
16411653 void __user *p = (void __user *)arg;
16421654 struct semid64_ds semid64;
....@@ -1645,7 +1657,6 @@
16451657 if (semid < 0)
16461658 return -EINVAL;
16471659
1648
- version = ipc_parse_version(&cmd);
16491660 ns = current->nsproxy->ipc_ns;
16501661
16511662 switch (cmd) {
....@@ -1682,6 +1693,7 @@
16821693 case IPC_SET:
16831694 if (copy_semid_from_user(&semid64, p, version))
16841695 return -EFAULT;
1696
+ fallthrough;
16851697 case IPC_RMID:
16861698 return semctl_down(ns, semid, cmd, &semid64);
16871699 default:
....@@ -1691,15 +1703,29 @@
16911703
16921704 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
16931705 {
1694
- return ksys_semctl(semid, semnum, cmd, arg);
1706
+ return ksys_semctl(semid, semnum, cmd, arg, IPC_64);
16951707 }
1708
+
1709
+#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
1710
+long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg)
1711
+{
1712
+ int version = ipc_parse_version(&cmd);
1713
+
1714
+ return ksys_semctl(semid, semnum, cmd, arg, version);
1715
+}
1716
+
1717
+SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
1718
+{
1719
+ return ksys_old_semctl(semid, semnum, cmd, arg);
1720
+}
1721
+#endif
16961722
16971723 #ifdef CONFIG_COMPAT
16981724
16991725 struct compat_semid_ds {
17001726 struct compat_ipc_perm sem_perm;
1701
- compat_time_t sem_otime;
1702
- compat_time_t sem_ctime;
1727
+ old_time32_t sem_otime;
1728
+ old_time32_t sem_ctime;
17031729 compat_uptr_t sem_base;
17041730 compat_uptr_t sem_pending;
17051731 compat_uptr_t sem_pending_last;
....@@ -1744,12 +1770,11 @@
17441770 }
17451771 }
17461772
1747
-long compat_ksys_semctl(int semid, int semnum, int cmd, int arg)
1773
+static long compat_ksys_semctl(int semid, int semnum, int cmd, int arg, int version)
17481774 {
17491775 void __user *p = compat_ptr(arg);
17501776 struct ipc_namespace *ns;
17511777 struct semid64_ds semid64;
1752
- int version = compat_ipc_parse_version(&cmd);
17531778 int err;
17541779
17551780 ns = current->nsproxy->ipc_ns;
....@@ -1782,7 +1807,7 @@
17821807 case IPC_SET:
17831808 if (copy_compat_semid_from_user(&semid64, p, version))
17841809 return -EFAULT;
1785
- /* fallthru */
1810
+ fallthrough;
17861811 case IPC_RMID:
17871812 return semctl_down(ns, semid, cmd, &semid64);
17881813 default:
....@@ -1792,8 +1817,22 @@
17921817
17931818 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg)
17941819 {
1795
- return compat_ksys_semctl(semid, semnum, cmd, arg);
1820
+ return compat_ksys_semctl(semid, semnum, cmd, arg, IPC_64);
17961821 }
1822
+
1823
+#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
1824
+long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg)
1825
+{
1826
+ int version = compat_ipc_parse_version(&cmd);
1827
+
1828
+ return compat_ksys_semctl(semid, semnum, cmd, arg, version);
1829
+}
1830
+
1831
+COMPAT_SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, int, arg)
1832
+{
1833
+ return compat_ksys_old_semctl(semid, semnum, cmd, arg);
1834
+}
1835
+#endif
17971836 #endif
17981837
17991838 /* If the task doesn't already have a undo_list, then allocate one
....@@ -1813,7 +1852,7 @@
18131852
18141853 undo_list = current->sysvsem.undo_list;
18151854 if (!undo_list) {
1816
- undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
1855
+ undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL_ACCOUNT);
18171856 if (undo_list == NULL)
18181857 return -ENOMEM;
18191858 spin_lock_init(&undo_list->lock);
....@@ -1830,7 +1869,8 @@
18301869 {
18311870 struct sem_undo *un;
18321871
1833
- list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
1872
+ list_for_each_entry_rcu(un, &ulp->list_proc, list_proc,
1873
+ spin_is_locked(&ulp->lock)) {
18341874 if (un->semid == semid)
18351875 return un;
18361876 }
....@@ -1897,7 +1937,7 @@
18971937 rcu_read_unlock();
18981938
18991939 /* step 2: allocate new undo structure */
1900
- new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1940
+ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL_ACCOUNT);
19011941 if (!new) {
19021942 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
19031943 return ERR_PTR(-ENOMEM);
....@@ -2125,9 +2165,11 @@
21252165 }
21262166
21272167 do {
2168
+ /* memory ordering ensured by the lock in sem_lock() */
21282169 WRITE_ONCE(queue.status, -EINTR);
21292170 queue.sleeper = current;
21302171
2172
+ /* memory ordering is ensured by the lock in sem_lock() */
21312173 __set_current_state(TASK_INTERRUPTIBLE);
21322174 sem_unlock(sma, locknum);
21332175 rcu_read_unlock();
....@@ -2148,24 +2190,23 @@
21482190 * scenarios where we were awakened externally, during the
21492191 * window between wake_q_add() and wake_up_q().
21502192 */
2193
+ rcu_read_lock();
21512194 error = READ_ONCE(queue.status);
21522195 if (error != -EINTR) {
2153
- /*
2154
- * User space could assume that semop() is a memory
2155
- * barrier: Without the mb(), the cpu could
2156
- * speculatively read in userspace stale data that was
2157
- * overwritten by the previous owner of the semaphore.
2158
- */
2159
- smp_mb();
2196
+ /* see SEM_BARRIER_2 for purpose/pairing */
2197
+ smp_acquire__after_ctrl_dep();
2198
+ rcu_read_unlock();
21602199 goto out_free;
21612200 }
21622201
2163
- rcu_read_lock();
21642202 locknum = sem_lock(sma, sops, nsops);
21652203
21662204 if (!ipc_valid_object(&sma->sem_perm))
21672205 goto out_unlock_free;
21682206
2207
+ /*
2208
+ * No necessity for any barrier: We are protect by sem_lock()
2209
+ */
21692210 error = READ_ONCE(queue.status);
21702211
21712212 /*
....@@ -2214,20 +2255,20 @@
22142255 #ifdef CONFIG_COMPAT_32BIT_TIME
22152256 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
22162257 unsigned int nsops,
2217
- const struct compat_timespec __user *timeout)
2258
+ const struct old_timespec32 __user *timeout)
22182259 {
22192260 if (timeout) {
22202261 struct timespec64 ts;
2221
- if (compat_get_timespec64(&ts, timeout))
2262
+ if (get_old_timespec32(&ts, timeout))
22222263 return -EFAULT;
22232264 return do_semtimedop(semid, tsems, nsops, &ts);
22242265 }
22252266 return do_semtimedop(semid, tsems, nsops, NULL);
22262267 }
22272268
2228
-COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems,
2269
+SYSCALL_DEFINE4(semtimedop_time32, int, semid, struct sembuf __user *, tsems,
22292270 unsigned int, nsops,
2230
- const struct compat_timespec __user *, timeout)
2271
+ const struct old_timespec32 __user *, timeout)
22312272 {
22322273 return compat_ksys_semtimedop(semid, tsems, nsops, timeout);
22332274 }