hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/drivers/base/devcoredump.c
....@@ -29,6 +29,47 @@
2929 struct device devcd_dev;
3030 void *data;
3131 size_t datalen;
32
+ /*
33
+ * Here, mutex is required to serialize the calls to del_wk work between
34
+ * user/kernel space which happens when devcd is added with device_add()
35
+ * and that sends uevent to user space. User space reads the uevents,
36
+ * and calls to devcd_data_write() which try to modify the work which is
37
+ * not even initialized/queued from devcoredump.
38
+ *
39
+ *
40
+ *
41
+ * cpu0(X) cpu1(Y)
42
+ *
43
+ * dev_coredump() uevent sent to user space
44
+ * device_add() ======================> user space process Y reads the
45
+ * uevents writes to devcd fd
46
+ * which results into writes to
47
+ *
48
+ * devcd_data_write()
49
+ * mod_delayed_work()
50
+ * try_to_grab_pending()
51
+ * del_timer()
52
+ * debug_assert_init()
53
+ * INIT_DELAYED_WORK()
54
+ * schedule_delayed_work()
55
+ *
56
+ *
57
+ * Also, mutex alone would not be enough to avoid scheduling of
58
+ * del_wk work after it get flush from a call to devcd_free()
59
+ * mentioned as below.
60
+ *
61
+ * disabled_store()
62
+ * devcd_free()
63
+ * mutex_lock() devcd_data_write()
64
+ * flush_delayed_work()
65
+ * mutex_unlock()
66
+ * mutex_lock()
67
+ * mod_delayed_work()
68
+ * mutex_unlock()
69
+ * So, delete_work flag is required.
70
+ */
71
+ struct mutex mutex;
72
+ bool delete_work;
3273 struct module *owner;
3374 ssize_t (*read)(char *buffer, loff_t offset, size_t count,
3475 void *data, size_t datalen);
....@@ -88,7 +129,12 @@
88129 struct device *dev = kobj_to_dev(kobj);
89130 struct devcd_entry *devcd = dev_to_devcd(dev);
90131
91
- mod_delayed_work(system_wq, &devcd->del_wk, 0);
132
+ mutex_lock(&devcd->mutex);
133
+ if (!devcd->delete_work) {
134
+ devcd->delete_work = true;
135
+ mod_delayed_work(system_wq, &devcd->del_wk, 0);
136
+ }
137
+ mutex_unlock(&devcd->mutex);
92138
93139 return count;
94140 }
....@@ -116,15 +162,44 @@
116162 {
117163 struct devcd_entry *devcd = dev_to_devcd(dev);
118164
165
+ mutex_lock(&devcd->mutex);
166
+ if (!devcd->delete_work)
167
+ devcd->delete_work = true;
168
+
119169 flush_delayed_work(&devcd->del_wk);
170
+ mutex_unlock(&devcd->mutex);
120171 return 0;
121172 }
122173
123174 static ssize_t disabled_show(struct class *class, struct class_attribute *attr,
124175 char *buf)
125176 {
126
- return sprintf(buf, "%d\n", devcd_disabled);
177
+ return sysfs_emit(buf, "%d\n", devcd_disabled);
127178 }
179
+
180
+/*
181
+ *
182
+ * disabled_store() worker()
183
+ * class_for_each_device(&devcd_class,
184
+ * NULL, NULL, devcd_free)
185
+ * ...
186
+ * ...
187
+ * while ((dev = class_dev_iter_next(&iter))
188
+ * devcd_del()
189
+ * device_del()
190
+ * put_device() <- last reference
191
+ * error = fn(dev, data) devcd_dev_release()
192
+ * devcd_free(dev, data) kfree(devcd)
193
+ * mutex_lock(&devcd->mutex);
194
+ *
195
+ *
196
+ * In the above diagram, It looks like disabled_store() would be racing with parallely
197
+ * running devcd_del() and result in memory abort while acquiring devcd->mutex which
198
+ * is called after kfree of devcd memory after dropping its last reference with
199
+ * put_device(). However, this will not happens as fn(dev, data) runs
200
+ * with its own reference to device via klist_node so it is not its last reference.
201
+ * so, above situation would not occur.
202
+ */
128203
129204 static ssize_t disabled_store(struct class *class, struct class_attribute *attr,
130205 const char *buf, size_t count)
....@@ -164,16 +239,7 @@
164239 static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count,
165240 void *data, size_t datalen)
166241 {
167
- if (offset > datalen)
168
- return -EINVAL;
169
-
170
- if (offset + count > datalen)
171
- count = datalen - offset;
172
-
173
- if (count)
174
- memcpy(buffer, ((u8 *)data) + offset, count);
175
-
176
- return count;
242
+ return memory_read_from_buffer(buffer, count, &offset, data, datalen);
177243 }
178244
179245 static void devcd_freev(void *data)
....@@ -291,13 +357,16 @@
291357 devcd->read = read;
292358 devcd->free = free;
293359 devcd->failing_dev = get_device(dev);
360
+ devcd->delete_work = false;
294361
362
+ mutex_init(&devcd->mutex);
295363 device_initialize(&devcd->devcd_dev);
296364
297365 dev_set_name(&devcd->devcd_dev, "devcd%d",
298366 atomic_inc_return(&devcd_count));
299367 devcd->devcd_dev.class = &devcd_class;
300368
369
+ mutex_lock(&devcd->mutex);
301370 if (device_add(&devcd->devcd_dev))
302371 goto put_device;
303372
....@@ -311,10 +380,11 @@
311380
312381 INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
313382 schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
314
-
383
+ mutex_unlock(&devcd->mutex);
315384 return;
316385 put_device:
317386 put_device(&devcd->devcd_dev);
387
+ mutex_unlock(&devcd->mutex);
318388 put_module:
319389 module_put(owner);
320390 free:
....@@ -323,7 +393,7 @@
323393 EXPORT_SYMBOL_GPL(dev_coredumpm);
324394
325395 /**
326
- * dev_coredumpmsg - create device coredump that uses scatterlist as data
396
+ * dev_coredumpsg - create device coredump that uses scatterlist as data
327397 * parameter
328398 * @dev: the struct device for the crashed device
329399 * @table: the dump data