| .. | .. |
|---|
| 15 | 15 | #include <linux/poll.h> |
|---|
| 16 | 16 | #include <linux/uio.h> |
|---|
| 17 | 17 | #include <linux/wait.h> |
|---|
| 18 | +#include <linux/workqueue.h> |
|---|
| 18 | 19 | |
|---|
| 19 | 20 | #define DM_MSG_PREFIX "user" |
|---|
| 20 | 21 | |
|---|
| 21 | 22 | #define MAX_OUTSTANDING_MESSAGES 128 |
|---|
| 23 | + |
|---|
| 24 | +static unsigned int daemon_timeout_msec = 4000; |
|---|
| 25 | +module_param_named(dm_user_daemon_timeout_msec, daemon_timeout_msec, uint, |
|---|
| 26 | + 0644); |
|---|
| 27 | +MODULE_PARM_DESC(dm_user_daemon_timeout_msec, |
|---|
| 28 | + "IO Timeout in msec if daemon does not process"); |
|---|
| 22 | 29 | |
|---|
| 23 | 30 | /* |
|---|
| 24 | 31 | * dm-user uses four structures: |
|---|
| .. | .. |
|---|
| 80 | 87 | */ |
|---|
| 81 | 88 | u64 return_type; |
|---|
| 82 | 89 | u64 return_flags; |
|---|
| 90 | + |
|---|
| 91 | + struct delayed_work work; |
|---|
| 92 | + bool delayed; |
|---|
| 93 | + struct target *t; |
|---|
| 83 | 94 | }; |
|---|
| 84 | 95 | |
|---|
| 85 | 96 | struct target { |
|---|
| .. | .. |
|---|
| 132 | 143 | */ |
|---|
| 133 | 144 | struct kref references; |
|---|
| 134 | 145 | int dm_destroyed; |
|---|
| 146 | + bool daemon_terminated; |
|---|
| 135 | 147 | }; |
|---|
| 136 | 148 | |
|---|
| 137 | 149 | struct channel { |
|---|
| .. | .. |
|---|
| 171 | 183 | */ |
|---|
| 172 | 184 | struct message scratch_message_from_user; |
|---|
| 173 | 185 | }; |
|---|
| 186 | + |
|---|
| 187 | +static void message_kill(struct message *m, mempool_t *pool) |
|---|
| 188 | +{ |
|---|
| 189 | + m->bio->bi_status = BLK_STS_IOERR; |
|---|
| 190 | + bio_endio(m->bio); |
|---|
| 191 | + mempool_free(m, pool); |
|---|
| 192 | +} |
|---|
| 193 | + |
|---|
| 194 | +static inline bool is_user_space_thread_present(struct target *t) |
|---|
| 195 | +{ |
|---|
| 196 | + lockdep_assert_held(&t->lock); |
|---|
| 197 | + return (kref_read(&t->references) > 1); |
|---|
| 198 | +} |
|---|
| 199 | + |
|---|
| 200 | +static void process_delayed_work(struct work_struct *work) |
|---|
| 201 | +{ |
|---|
| 202 | + struct delayed_work *del_work = to_delayed_work(work); |
|---|
| 203 | + struct message *msg = container_of(del_work, struct message, work); |
|---|
| 204 | + |
|---|
| 205 | + struct target *t = msg->t; |
|---|
| 206 | + |
|---|
| 207 | + mutex_lock(&t->lock); |
|---|
| 208 | + |
|---|
| 209 | + /* |
|---|
| 210 | + * There is a atleast one thread to process the IO. |
|---|
| 211 | + */ |
|---|
| 212 | + if (is_user_space_thread_present(t)) { |
|---|
| 213 | + mutex_unlock(&t->lock); |
|---|
| 214 | + return; |
|---|
| 215 | + } |
|---|
| 216 | + |
|---|
| 217 | + /* |
|---|
| 218 | + * Terminate the IO with an error |
|---|
| 219 | + */ |
|---|
| 220 | + list_del(&msg->to_user); |
|---|
| 221 | + pr_err("I/O error: sector %llu: no user-space daemon for %s target\n", |
|---|
| 222 | + msg->bio->bi_iter.bi_sector, |
|---|
| 223 | + t->miscdev.name); |
|---|
| 224 | + message_kill(msg, &t->message_pool); |
|---|
| 225 | + mutex_unlock(&t->lock); |
|---|
| 226 | +} |
|---|
| 227 | + |
|---|
| 228 | +static void enqueue_delayed_work(struct message *m, bool is_delay) |
|---|
| 229 | +{ |
|---|
| 230 | + unsigned long delay = 0; |
|---|
| 231 | + |
|---|
| 232 | + m->delayed = true; |
|---|
| 233 | + INIT_DELAYED_WORK(&m->work, process_delayed_work); |
|---|
| 234 | + |
|---|
| 235 | + /* |
|---|
| 236 | + * Snapuserd daemon is the user-space process |
|---|
| 237 | + * which processes IO request from dm-user |
|---|
| 238 | + * when OTA is applied. Per the current design, |
|---|
| 239 | + * when a dm-user target is created, daemon |
|---|
| 240 | + * attaches to target and starts processing |
|---|
| 241 | + * the IO's. Daemon is terminated only when |
|---|
| 242 | + * dm-user target is destroyed. |
|---|
| 243 | + * |
|---|
| 244 | + * If for some reason, daemon crashes or terminates early, |
|---|
| 245 | + * without destroying the dm-user target; then |
|---|
| 246 | + * there is no mechanism to restart the daemon |
|---|
| 247 | + * and start processing the IO's from the same target. |
|---|
| 248 | + * Theoretically, it is possible but that infrastructure |
|---|
| 249 | + * doesn't exist in the android ecosystem. |
|---|
| 250 | + * |
|---|
| 251 | + * Thus, when the daemon terminates, there is no way the IO's |
|---|
| 252 | + * issued on that target will be processed. Hence, |
|---|
| 253 | + * we set the delay to 0 and fail the IO's immediately. |
|---|
| 254 | + * |
|---|
| 255 | + * On the other hand, when a new dm-user target is created, |
|---|
| 256 | + * we wait for the daemon to get attached for the first time. |
|---|
| 257 | + * This primarily happens when init first stage spins up |
|---|
| 258 | + * the daemon. At this point, since the snapshot device is mounted |
|---|
| 259 | + * of a root filesystem, dm-user target may receive IO request |
|---|
| 260 | + * even though daemon is not fully launched. We don't want |
|---|
| 261 | + * to fail those IO requests immediately. Thus, we queue these |
|---|
| 262 | + * requests with a timeout so that daemon is ready to process |
|---|
| 263 | + * those IO requests. Again, if the daemon fails to launch within |
|---|
| 264 | + * the timeout period, then IO's will be failed. |
|---|
| 265 | + */ |
|---|
| 266 | + if (is_delay) |
|---|
| 267 | + delay = msecs_to_jiffies(daemon_timeout_msec); |
|---|
| 268 | + |
|---|
| 269 | + queue_delayed_work(system_wq, &m->work, delay); |
|---|
| 270 | +} |
|---|
| 174 | 271 | |
|---|
| 175 | 272 | static inline struct target *target_from_target(struct dm_target *target) |
|---|
| 176 | 273 | { |
|---|
| .. | .. |
|---|
| 500 | 597 | return NULL; |
|---|
| 501 | 598 | |
|---|
| 502 | 599 | m = list_first_entry(&t->to_user, struct message, to_user); |
|---|
| 600 | + |
|---|
| 503 | 601 | list_del(&m->to_user); |
|---|
| 602 | + |
|---|
| 603 | + /* |
|---|
| 604 | + * If the IO was queued to workqueue since there |
|---|
| 605 | + * was no daemon to service the IO, then we |
|---|
| 606 | + * will have to cancel the delayed work as the |
|---|
| 607 | + * IO will be processed by this user-space thread. |
|---|
| 608 | + * |
|---|
| 609 | + * If the delayed work was already picked up for |
|---|
| 610 | + * processing, then wait for it to complete. Note |
|---|
| 611 | + * that the IO will not be terminated by the work |
|---|
| 612 | + * queue thread. |
|---|
| 613 | + */ |
|---|
| 614 | + if (unlikely(m->delayed)) { |
|---|
| 615 | + mutex_unlock(&t->lock); |
|---|
| 616 | + cancel_delayed_work_sync(&m->work); |
|---|
| 617 | + mutex_lock(&t->lock); |
|---|
| 618 | + } |
|---|
| 504 | 619 | return m; |
|---|
| 505 | 620 | } |
|---|
| 506 | 621 | |
|---|
| .. | .. |
|---|
| 522 | 637 | return NULL; |
|---|
| 523 | 638 | } |
|---|
| 524 | 639 | |
|---|
| 525 | | -static void message_kill(struct message *m, mempool_t *pool) |
|---|
| 526 | | -{ |
|---|
| 527 | | - m->bio->bi_status = BLK_STS_IOERR; |
|---|
| 528 | | - bio_endio(m->bio); |
|---|
| 529 | | - bio_put(m->bio); |
|---|
| 530 | | - mempool_free(m, pool); |
|---|
| 531 | | -} |
|---|
| 532 | | - |
|---|
| 533 | 640 | /* |
|---|
| 534 | 641 | * Returns 0 when there is no work left to do. This must be callable without |
|---|
| 535 | 642 | * holding the target lock, as it is part of the waitqueue's check expression. |
|---|
| 536 | 643 | * When called without the lock it may spuriously indicate there is remaining |
|---|
| 537 | 644 | * work, but when called with the lock it must be accurate. |
|---|
| 538 | 645 | */ |
|---|
| 539 | | -static int target_poll(struct target *t) |
|---|
| 646 | +int target_poll(struct target *t) |
|---|
| 540 | 647 | { |
|---|
| 541 | 648 | return !list_empty(&t->to_user) || t->dm_destroyed; |
|---|
| 542 | 649 | } |
|---|
| 543 | 650 | |
|---|
| 544 | | -static void target_release(struct kref *ref) |
|---|
| 651 | +void target_release(struct kref *ref) |
|---|
| 545 | 652 | { |
|---|
| 546 | 653 | struct target *t = container_of(ref, struct target, references); |
|---|
| 547 | 654 | struct list_head *cur, *tmp; |
|---|
| .. | .. |
|---|
| 552 | 659 | * there are and will never be any channels. |
|---|
| 553 | 660 | */ |
|---|
| 554 | 661 | list_for_each_safe (cur, tmp, &t->to_user) { |
|---|
| 555 | | - message_kill(list_entry(cur, struct message, to_user), |
|---|
| 556 | | - &t->message_pool); |
|---|
| 662 | + struct message *m = list_entry(cur, struct message, to_user); |
|---|
| 663 | + |
|---|
| 664 | + if (unlikely(m->delayed)) { |
|---|
| 665 | + bool ret; |
|---|
| 666 | + |
|---|
| 667 | + mutex_unlock(&t->lock); |
|---|
| 668 | + ret = cancel_delayed_work_sync(&m->work); |
|---|
| 669 | + mutex_lock(&t->lock); |
|---|
| 670 | + if (!ret) |
|---|
| 671 | + continue; |
|---|
| 672 | + } |
|---|
| 673 | + message_kill(m, &t->message_pool); |
|---|
| 557 | 674 | } |
|---|
| 558 | 675 | |
|---|
| 559 | 676 | mempool_exit(&t->message_pool); |
|---|
| .. | .. |
|---|
| 562 | 679 | kfree(t); |
|---|
| 563 | 680 | } |
|---|
| 564 | 681 | |
|---|
| 565 | | -static void target_put(struct target *t) |
|---|
| 682 | +void target_put(struct target *t) |
|---|
| 566 | 683 | { |
|---|
| 567 | 684 | /* |
|---|
| 568 | 685 | * This both releases a reference to the target and the lock. We leave |
|---|
| .. | .. |
|---|
| 571 | 688 | */ |
|---|
| 572 | 689 | lockdep_assert_held(&t->lock); |
|---|
| 573 | 690 | |
|---|
| 574 | | - if (!kref_put(&t->references, target_release)) |
|---|
| 691 | + if (!kref_put(&t->references, target_release)) { |
|---|
| 692 | + /* |
|---|
| 693 | + * User-space thread is getting terminated. |
|---|
| 694 | + * We need to scan the list for all those |
|---|
| 695 | + * pending IO's which were not processed yet |
|---|
| 696 | + * and put them back to work-queue for delayed |
|---|
| 697 | + * processing. |
|---|
| 698 | + */ |
|---|
| 699 | + if (!is_user_space_thread_present(t)) { |
|---|
| 700 | + struct list_head *cur, *tmp; |
|---|
| 701 | + |
|---|
| 702 | + list_for_each_safe(cur, tmp, &t->to_user) { |
|---|
| 703 | + struct message *m = list_entry(cur, |
|---|
| 704 | + struct message, |
|---|
| 705 | + to_user); |
|---|
| 706 | + if (!m->delayed) |
|---|
| 707 | + enqueue_delayed_work(m, false); |
|---|
| 708 | + } |
|---|
| 709 | + /* |
|---|
| 710 | + * Daemon attached to this target is terminated. |
|---|
| 711 | + */ |
|---|
| 712 | + t->daemon_terminated = true; |
|---|
| 713 | + } |
|---|
| 575 | 714 | mutex_unlock(&t->lock); |
|---|
| 715 | + } |
|---|
| 576 | 716 | } |
|---|
| 577 | 717 | |
|---|
| 578 | 718 | static struct channel *channel_alloc(struct target *t) |
|---|
| .. | .. |
|---|
| 593 | 733 | return c; |
|---|
| 594 | 734 | } |
|---|
| 595 | 735 | |
|---|
| 596 | | -static void channel_free(struct channel *c) |
|---|
| 736 | +void channel_free(struct channel *c) |
|---|
| 597 | 737 | { |
|---|
| 598 | 738 | struct list_head *cur, *tmp; |
|---|
| 599 | 739 | |
|---|
| .. | .. |
|---|
| 848 | 988 | */ |
|---|
| 849 | 989 | WARN_ON(bio_size(c->cur_from_user->bio) != 0); |
|---|
| 850 | 990 | bio_endio(c->cur_from_user->bio); |
|---|
| 851 | | - bio_put(c->cur_from_user->bio); |
|---|
| 852 | 991 | |
|---|
| 853 | 992 | /* |
|---|
| 854 | 993 | * We don't actually need to take the target lock here, as all |
|---|
| .. | .. |
|---|
| 917 | 1056 | */ |
|---|
| 918 | 1057 | kref_init(&t->references); |
|---|
| 919 | 1058 | |
|---|
| 1059 | + t->daemon_terminated = false; |
|---|
| 920 | 1060 | mutex_init(&t->lock); |
|---|
| 921 | 1061 | init_waitqueue_head(&t->wq); |
|---|
| 922 | 1062 | INIT_LIST_HEAD(&t->to_user); |
|---|
| .. | .. |
|---|
| 1085 | 1225 | return DM_MAPIO_REQUEUE; |
|---|
| 1086 | 1226 | } |
|---|
| 1087 | 1227 | |
|---|
| 1088 | | - bio_get(bio); |
|---|
| 1089 | 1228 | entry->msg.type = bio_type_to_user_type(bio); |
|---|
| 1090 | 1229 | entry->msg.flags = bio_flags_to_user_flags(bio); |
|---|
| 1091 | 1230 | entry->msg.sector = bio->bi_iter.bi_sector; |
|---|
| .. | .. |
|---|
| 1095 | 1234 | entry->total_to_user = bio_bytes_needed_to_user(bio); |
|---|
| 1096 | 1235 | entry->posn_from_user = 0; |
|---|
| 1097 | 1236 | entry->total_from_user = bio_bytes_needed_from_user(bio); |
|---|
| 1237 | + entry->delayed = false; |
|---|
| 1238 | + entry->t = t; |
|---|
| 1098 | 1239 | /* Pairs with the barrier in dev_read() */ |
|---|
| 1099 | 1240 | smp_wmb(); |
|---|
| 1100 | 1241 | list_add_tail(&entry->to_user, &t->to_user); |
|---|
| 1242 | + |
|---|
| 1243 | + /* |
|---|
| 1244 | + * If there is no daemon to process the IO's, |
|---|
| 1245 | + * queue these messages into a workqueue with |
|---|
| 1246 | + * a timeout. |
|---|
| 1247 | + */ |
|---|
| 1248 | + if (!is_user_space_thread_present(t)) |
|---|
| 1249 | + enqueue_delayed_work(entry, !t->daemon_terminated); |
|---|
| 1250 | + |
|---|
| 1101 | 1251 | wake_up_interruptible(&t->wq); |
|---|
| 1102 | 1252 | mutex_unlock(&t->lock); |
|---|
| 1103 | 1253 | return DM_MAPIO_SUBMITTED; |
|---|