hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/fs/eventfd.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * fs/eventfd.c
34 *
....@@ -21,8 +22,12 @@
2122 #include <linux/eventfd.h>
2223 #include <linux/proc_fs.h>
2324 #include <linux/seq_file.h>
25
+#include <linux/idr.h>
26
+#include <linux/uio.h>
2427
2528 DEFINE_PER_CPU(int, eventfd_wake_count);
29
+
30
+static DEFINE_IDA(eventfd_ida);
2631
2732 struct eventfd_ctx {
2833 struct kref kref;
....@@ -37,23 +42,10 @@
3742 */
3843 __u64 count;
3944 unsigned int flags;
45
+ int id;
4046 };
4147
42
-/**
43
- * eventfd_signal - Adds @n to the eventfd counter.
44
- * @ctx: [in] Pointer to the eventfd context.
45
- * @n: [in] Value of the counter to be added to the eventfd internal counter.
46
- * The value cannot be negative.
47
- *
48
- * This function is supposed to be called by the kernel in paths that do not
49
- * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
50
- * value, and we signal this as overflow condition by returning a EPOLLERR
51
- * to poll(2).
52
- *
53
- * Returns the amount by which the counter was incremented. This will be less
54
- * than @n if the counter has overflowed.
55
- */
56
-__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
48
+__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask)
5749 {
5850 unsigned long flags;
5951
....@@ -74,16 +66,37 @@
7466 n = ULLONG_MAX - ctx->count;
7567 ctx->count += n;
7668 if (waitqueue_active(&ctx->wqh))
77
- wake_up_locked_poll(&ctx->wqh, EPOLLIN);
69
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask);
7870 this_cpu_dec(eventfd_wake_count);
7971 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
8072
8173 return n;
8274 }
75
+
76
+/**
77
+ * eventfd_signal - Adds @n to the eventfd counter.
78
+ * @ctx: [in] Pointer to the eventfd context.
79
+ * @n: [in] Value of the counter to be added to the eventfd internal counter.
80
+ * The value cannot be negative.
81
+ *
82
+ * This function is supposed to be called by the kernel in paths that do not
83
+ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
84
+ * value, and we signal this as overflow condition by returning a EPOLLERR
85
+ * to poll(2).
86
+ *
87
+ * Returns the amount by which the counter was incremented. This will be less
88
+ * than @n if the counter has overflowed.
89
+ */
90
+__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
91
+{
92
+ return eventfd_signal_mask(ctx, n, 0);
93
+}
8394 EXPORT_SYMBOL_GPL(eventfd_signal);
8495
8596 static void eventfd_free_ctx(struct eventfd_ctx *ctx)
8697 {
98
+ if (ctx->id >= 0)
99
+ ida_simple_remove(&eventfd_ida, ctx->id);
87100 kfree(ctx);
88101 }
89102
....@@ -174,11 +187,14 @@
174187 return events;
175188 }
176189
177
-static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
190
+void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
178191 {
179
- *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
192
+ lockdep_assert_held(&ctx->wqh.lock);
193
+
194
+ *cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count;
180195 ctx->count -= *cnt;
181196 }
197
+EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
182198
183199 /**
184200 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
....@@ -209,32 +225,32 @@
209225 }
210226 EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
211227
212
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
213
- loff_t *ppos)
228
+static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to)
214229 {
230
+ struct file *file = iocb->ki_filp;
215231 struct eventfd_ctx *ctx = file->private_data;
216
- ssize_t res;
217232 __u64 ucnt = 0;
218233 DECLARE_WAITQUEUE(wait, current);
219234
220
- if (count < sizeof(ucnt))
235
+ if (iov_iter_count(to) < sizeof(ucnt))
221236 return -EINVAL;
222
-
223237 spin_lock_irq(&ctx->wqh.lock);
224
- res = -EAGAIN;
225
- if (ctx->count > 0)
226
- res = sizeof(ucnt);
227
- else if (!(file->f_flags & O_NONBLOCK)) {
238
+ if (!ctx->count) {
239
+ if ((file->f_flags & O_NONBLOCK) ||
240
+ (iocb->ki_flags & IOCB_NOWAIT)) {
241
+ spin_unlock_irq(&ctx->wqh.lock);
242
+ return -EAGAIN;
243
+ }
228244 __add_wait_queue(&ctx->wqh, &wait);
229245 for (;;) {
230246 set_current_state(TASK_INTERRUPTIBLE);
231
- if (ctx->count > 0) {
232
- res = sizeof(ucnt);
247
+ if (ctx->count)
233248 break;
234
- }
235249 if (signal_pending(current)) {
236
- res = -ERESTARTSYS;
237
- break;
250
+ __remove_wait_queue(&ctx->wqh, &wait);
251
+ __set_current_state(TASK_RUNNING);
252
+ spin_unlock_irq(&ctx->wqh.lock);
253
+ return -ERESTARTSYS;
238254 }
239255 spin_unlock_irq(&ctx->wqh.lock);
240256 schedule();
....@@ -243,17 +259,14 @@
243259 __remove_wait_queue(&ctx->wqh, &wait);
244260 __set_current_state(TASK_RUNNING);
245261 }
246
- if (likely(res > 0)) {
247
- eventfd_ctx_do_read(ctx, &ucnt);
248
- if (waitqueue_active(&ctx->wqh))
249
- wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
250
- }
262
+ eventfd_ctx_do_read(ctx, &ucnt);
263
+ if (waitqueue_active(&ctx->wqh))
264
+ wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
251265 spin_unlock_irq(&ctx->wqh.lock);
252
-
253
- if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
266
+ if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt)))
254267 return -EFAULT;
255268
256
- return res;
269
+ return sizeof(ucnt);
257270 }
258271
259272 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
....@@ -312,6 +325,7 @@
312325 seq_printf(m, "eventfd-count: %16llx\n",
313326 (unsigned long long)ctx->count);
314327 spin_unlock_irq(&ctx->wqh.lock);
328
+ seq_printf(m, "eventfd-id: %d\n", ctx->id);
315329 }
316330 #endif
317331
....@@ -321,7 +335,7 @@
321335 #endif
322336 .release = eventfd_release,
323337 .poll = eventfd_poll,
324
- .read = eventfd_read,
338
+ .read_iter = eventfd_read,
325339 .write = eventfd_write,
326340 .llseek = noop_llseek,
327341 };
....@@ -398,6 +412,7 @@
398412 static int do_eventfd(unsigned int count, int flags)
399413 {
400414 struct eventfd_ctx *ctx;
415
+ struct file *file;
401416 int fd;
402417
403418 /* Check the EFD_* constants for consistency. */
....@@ -415,12 +430,26 @@
415430 init_waitqueue_head(&ctx->wqh);
416431 ctx->count = count;
417432 ctx->flags = flags;
433
+ ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
418434
419
- fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
420
- O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
435
+ flags &= EFD_SHARED_FCNTL_FLAGS;
436
+ flags |= O_RDWR;
437
+ fd = get_unused_fd_flags(flags);
421438 if (fd < 0)
422
- eventfd_free_ctx(ctx);
439
+ goto err;
423440
441
+ file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
442
+ if (IS_ERR(file)) {
443
+ put_unused_fd(fd);
444
+ fd = PTR_ERR(file);
445
+ goto err;
446
+ }
447
+
448
+ file->f_mode |= FMODE_NOWAIT;
449
+ fd_install(fd, file);
450
+ return fd;
451
+err:
452
+ eventfd_free_ctx(ctx);
424453 return fd;
425454 }
426455