From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/ipc/msg.c | 94 ++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 76 insertions(+), 18 deletions(-) diff --git a/kernel/ipc/msg.c b/kernel/ipc/msg.c index ac4de3f..8ded6b8 100644 --- a/kernel/ipc/msg.c +++ b/kernel/ipc/msg.c @@ -61,6 +61,16 @@ struct list_head q_senders; } __randomize_layout; +/* + * MSG_BARRIER Locking: + * + * Similar to the optimization used in ipc/mqueue.c, one syscall return path + * does not acquire any locks when it sees that a message exists in + * msg_receiver.r_msg. Therefore r_msg is set using smp_store_release() + * and accessed using READ_ONCE()+smp_acquire__after_ctrl_dep(). In addition, + * wake_q_add_safe() is used. See ipc/mqueue.c for more details + */ + /* one msg_receiver structure for each sleeping receiver */ struct msg_receiver { struct list_head r_list; @@ -137,7 +147,7 @@ key_t key = params->key; int msgflg = params->flg; - msq = kvmalloc(sizeof(*msq), GFP_KERNEL); + msq = kvmalloc(sizeof(*msq), GFP_KERNEL_ACCOUNT); if (unlikely(!msq)) return -ENOMEM; @@ -184,6 +194,10 @@ { mss->tsk = current; mss->msgsz = msgsz; + /* + * No memory barrier required: we did ipc_lock_object(), + * and the waker obtains that lock before calling wake_q_add(). + */ __set_current_state(TASK_INTERRUPTIBLE); list_add_tail(&mss->list, &msq->q_senders); } @@ -237,8 +251,13 @@ struct msg_receiver *msr, *t; list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { - wake_q_add(wake_q, msr->r_tsk); - WRITE_ONCE(msr->r_msg, ERR_PTR(res)); + struct task_struct *r_tsk; + + r_tsk = get_task_struct(msr->r_tsk); + + /* see MSG_BARRIER for purpose/pairing */ + smp_store_release(&msr->r_msg, ERR_PTR(res)); + wake_q_add_safe(wake_q, r_tsk); } } @@ -251,6 +270,8 @@ * before freeque() is called. msg_ids.rwsem remains locked on exit. */ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) + __releases(RCU) + __releases(&msq->q_perm) { struct msg_msg *msg, *t; struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); @@ -567,9 +588,8 @@ return err; } -long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) +static long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf, int version) { - int version; struct ipc_namespace *ns; struct msqid64_ds msqid64; int err; @@ -577,7 +597,6 @@ if (msqid < 0 || cmd < 0) return -EINVAL; - version = ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; switch (cmd) { @@ -614,8 +633,22 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) { - return ksys_msgctl(msqid, cmd, buf); + return ksys_msgctl(msqid, cmd, buf, IPC_64); } + +#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION +long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) +{ + int version = ipc_parse_version(&cmd); + + return ksys_msgctl(msqid, cmd, buf, version); +} + +SYSCALL_DEFINE3(old_msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +{ + return ksys_old_msgctl(msqid, cmd, buf); +} +#endif #ifdef CONFIG_COMPAT @@ -623,9 +656,9 @@ struct compat_ipc_perm msg_perm; compat_uptr_t msg_first; compat_uptr_t msg_last; - compat_time_t msg_stime; - compat_time_t msg_rtime; - compat_time_t msg_ctime; + old_time32_t msg_stime; + old_time32_t msg_rtime; + old_time32_t msg_ctime; compat_ulong_t msg_lcbytes; compat_ulong_t msg_lqbytes; unsigned short msg_cbytes; @@ -690,12 +723,11 @@ } } -long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr) +static long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr, int version) { struct ipc_namespace *ns; int err; struct msqid64_ds msqid64; - int version = compat_ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; @@ -735,8 +767,22 @@ COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr) { - return compat_ksys_msgctl(msqid, cmd, uptr); + return compat_ksys_msgctl(msqid, cmd, uptr, IPC_64); } + +#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION +long compat_ksys_old_msgctl(int msqid, int cmd, void __user *uptr) +{ + int version = compat_ipc_parse_version(&cmd); + + return compat_ksys_msgctl(msqid, cmd, uptr, version); +} + +COMPAT_SYSCALL_DEFINE3(old_msgctl, int, msqid, int, cmd, void __user *, uptr) +{ + return compat_ksys_old_msgctl(msqid, cmd, uptr); +} +#endif #endif static int testmsg(struct msg_msg *msg, long type, int mode) @@ -774,13 +820,17 @@ list_del(&msr->r_list); if (msr->r_maxsize < msg->m_ts) { wake_q_add(wake_q, msr->r_tsk); - WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG)); + + /* See expunge_all regarding memory barrier */ + smp_store_release(&msr->r_msg, ERR_PTR(-E2BIG)); } else { ipc_update_pid(&msq->q_lrpid, task_pid(msr->r_tsk)); msq->q_rtime = ktime_get_real_seconds(); wake_q_add(wake_q, msr->r_tsk); - WRITE_ONCE(msr->r_msg, msg); + + /* See expunge_all regarding memory barrier */ + smp_store_release(&msr->r_msg, msg); return 1; } } @@ -1130,7 +1180,11 @@ msr_d.r_maxsize = INT_MAX; else msr_d.r_maxsize = bufsz; - msr_d.r_msg = ERR_PTR(-EAGAIN); + + /* memory barrier not require due to ipc_lock_object() */ + WRITE_ONCE(msr_d.r_msg, ERR_PTR(-EAGAIN)); + + /* memory barrier not required, we own ipc_lock_object() */ __set_current_state(TASK_INTERRUPTIBLE); ipc_unlock_object(&msq->q_perm); @@ -1159,8 +1213,12 @@ * signal) it will either see the message and continue ... */ msg = READ_ONCE(msr_d.r_msg); - if (msg != ERR_PTR(-EAGAIN)) + if (msg != ERR_PTR(-EAGAIN)) { + /* see MSG_BARRIER for purpose/pairing */ + smp_acquire__after_ctrl_dep(); + goto out_unlock1; + } /* * ... or see -EAGAIN, acquire the lock to check the message @@ -1168,7 +1226,7 @@ */ ipc_lock_object(&msq->q_perm); - msg = msr_d.r_msg; + msg = READ_ONCE(msr_d.r_msg); if (msg != ERR_PTR(-EAGAIN)) goto out_unlock0; -- Gitblit v1.6.2