.. | .. |
---|
65 | 65 | |
---|
66 | 66 | #include "hfi.h" |
---|
67 | 67 | #include "sdma.h" |
---|
68 | | -#include "mmu_rb.h" |
---|
69 | 68 | #include "user_sdma.h" |
---|
70 | 69 | #include "verbs.h" /* for the headers */ |
---|
71 | 70 | #include "common.h" /* for struct hfi1_tid_info */ |
---|
.. | .. |
---|
80 | 79 | static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts); |
---|
81 | 80 | static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); |
---|
82 | 81 | static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); |
---|
83 | | -static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); |
---|
84 | | -static int pin_vector_pages(struct user_sdma_request *req, |
---|
85 | | - struct user_sdma_iovec *iovec); |
---|
86 | | -static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, |
---|
87 | | - unsigned start, unsigned npages); |
---|
| 82 | +static void user_sdma_free_request(struct user_sdma_request *req); |
---|
88 | 83 | static int check_header_template(struct user_sdma_request *req, |
---|
89 | 84 | struct hfi1_pkt_header *hdr, u32 lrhlen, |
---|
90 | 85 | u32 datalen); |
---|
.. | .. |
---|
108 | 103 | static void activate_packet_queue(struct iowait *wait, int reason); |
---|
109 | 104 | static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, |
---|
110 | 105 | unsigned long len); |
---|
111 | | -static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode); |
---|
112 | 106 | static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, |
---|
113 | 107 | void *arg2, bool *stop); |
---|
114 | 108 | static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); |
---|
115 | | -static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode); |
---|
116 | 109 | |
---|
117 | 110 | static struct mmu_rb_ops sdma_rb_ops = { |
---|
118 | 111 | .filter = sdma_rb_filter, |
---|
119 | | - .insert = sdma_rb_insert, |
---|
120 | 112 | .evict = sdma_rb_evict, |
---|
121 | 113 | .remove = sdma_rb_remove, |
---|
122 | | - .invalidate = sdma_rb_invalidate |
---|
123 | 114 | }; |
---|
| 115 | + |
---|
| 116 | +static int add_system_pages_to_sdma_packet(struct user_sdma_request *req, |
---|
| 117 | + struct user_sdma_txreq *tx, |
---|
| 118 | + struct user_sdma_iovec *iovec, |
---|
| 119 | + u32 *pkt_remaining); |
---|
124 | 120 | |
---|
125 | 121 | static int defer_packet_queue( |
---|
126 | 122 | struct sdma_engine *sde, |
---|
.. | .. |
---|
133 | 129 | container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); |
---|
134 | 130 | |
---|
135 | 131 | write_seqlock(&sde->waitlock); |
---|
| 132 | + trace_hfi1_usdma_defer(pq, sde, &pq->busy); |
---|
136 | 133 | if (sdma_progress(sde, seq, txreq)) |
---|
137 | 134 | goto eagain; |
---|
138 | 135 | /* |
---|
.. | .. |
---|
157 | 154 | { |
---|
158 | 155 | struct hfi1_user_sdma_pkt_q *pq = |
---|
159 | 156 | container_of(wait, struct hfi1_user_sdma_pkt_q, busy); |
---|
160 | | - pq->busy.lock = NULL; |
---|
| 157 | + |
---|
| 158 | + trace_hfi1_usdma_activate(pq, wait, reason); |
---|
161 | 159 | xchg(&pq->state, SDMA_PKT_Q_ACTIVE); |
---|
162 | 160 | wake_up(&wait->wait_dma); |
---|
163 | 161 | }; |
---|
.. | .. |
---|
200 | 198 | if (!pq->reqs) |
---|
201 | 199 | goto pq_reqs_nomem; |
---|
202 | 200 | |
---|
203 | | - pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), |
---|
204 | | - sizeof(*pq->req_in_use), |
---|
205 | | - GFP_KERNEL); |
---|
| 201 | + pq->req_in_use = bitmap_zalloc(hfi1_sdma_comp_ring_size, GFP_KERNEL); |
---|
206 | 202 | if (!pq->req_in_use) |
---|
207 | 203 | goto pq_reqs_no_in_use; |
---|
208 | 204 | |
---|
.. | .. |
---|
249 | 245 | cq_nomem: |
---|
250 | 246 | kmem_cache_destroy(pq->txreq_cache); |
---|
251 | 247 | pq_txreq_nomem: |
---|
252 | | - kfree(pq->req_in_use); |
---|
| 248 | + bitmap_free(pq->req_in_use); |
---|
253 | 249 | pq_reqs_no_in_use: |
---|
254 | 250 | kfree(pq->reqs); |
---|
255 | 251 | pq_reqs_nomem: |
---|
.. | .. |
---|
288 | 284 | spin_unlock(&fd->pq_rcu_lock); |
---|
289 | 285 | synchronize_srcu(&fd->pq_srcu); |
---|
290 | 286 | /* at this point there can be no more new requests */ |
---|
291 | | - if (pq->handler) |
---|
292 | | - hfi1_mmu_rb_unregister(pq->handler); |
---|
293 | 287 | iowait_sdma_drain(&pq->busy); |
---|
294 | 288 | /* Wait until all requests have been freed. */ |
---|
295 | 289 | wait_event_interruptible( |
---|
296 | 290 | pq->wait, |
---|
297 | 291 | !atomic_read(&pq->n_reqs)); |
---|
298 | 292 | kfree(pq->reqs); |
---|
299 | | - kfree(pq->req_in_use); |
---|
| 293 | + if (pq->handler) |
---|
| 294 | + hfi1_mmu_rb_unregister(pq->handler); |
---|
| 295 | + bitmap_free(pq->req_in_use); |
---|
300 | 296 | kmem_cache_destroy(pq->txreq_cache); |
---|
301 | 297 | flush_pq_iowait(pq); |
---|
302 | 298 | kfree(pq); |
---|
.. | .. |
---|
451 | 447 | ret = -EINVAL; |
---|
452 | 448 | goto free_req; |
---|
453 | 449 | } |
---|
| 450 | + |
---|
454 | 451 | /* Copy the header from the user buffer */ |
---|
455 | 452 | ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info), |
---|
456 | 453 | sizeof(req->hdr)); |
---|
.. | .. |
---|
525 | 522 | memcpy(&req->iovs[i].iov, |
---|
526 | 523 | iovec + idx++, |
---|
527 | 524 | sizeof(req->iovs[i].iov)); |
---|
528 | | - ret = pin_vector_pages(req, &req->iovs[i]); |
---|
529 | | - if (ret) { |
---|
530 | | - req->data_iovs = i; |
---|
| 525 | + if (req->iovs[i].iov.iov_len == 0) { |
---|
| 526 | + ret = -EINVAL; |
---|
531 | 527 | goto free_req; |
---|
532 | 528 | } |
---|
533 | 529 | req->data_len += req->iovs[i].iov.iov_len; |
---|
.. | .. |
---|
599 | 595 | while (req->seqsubmitted != req->info.npkts) { |
---|
600 | 596 | ret = user_sdma_send_pkts(req, pcount); |
---|
601 | 597 | if (ret < 0) { |
---|
| 598 | + int we_ret; |
---|
| 599 | + |
---|
602 | 600 | if (ret != -EBUSY) |
---|
603 | 601 | goto free_req; |
---|
604 | | - if (wait_event_interruptible_timeout( |
---|
| 602 | + we_ret = wait_event_interruptible_timeout( |
---|
605 | 603 | pq->busy.wait_dma, |
---|
606 | 604 | pq->state == SDMA_PKT_Q_ACTIVE, |
---|
607 | 605 | msecs_to_jiffies( |
---|
608 | | - SDMA_IOWAIT_TIMEOUT)) <= 0) |
---|
| 606 | + SDMA_IOWAIT_TIMEOUT)); |
---|
| 607 | + trace_hfi1_usdma_we(pq, we_ret); |
---|
| 608 | + if (we_ret <= 0) |
---|
609 | 609 | flush_pq_iowait(pq); |
---|
610 | 610 | } |
---|
611 | 611 | } |
---|
.. | .. |
---|
621 | 621 | if (req->seqsubmitted) |
---|
622 | 622 | wait_event(pq->busy.wait_dma, |
---|
623 | 623 | (req->seqcomp == req->seqsubmitted - 1)); |
---|
624 | | - user_sdma_free_request(req, true); |
---|
| 624 | + user_sdma_free_request(req); |
---|
625 | 625 | pq_update(pq); |
---|
626 | 626 | set_comp_state(pq, cq, info.comp_idx, ERROR, ret); |
---|
627 | 627 | } |
---|
.. | .. |
---|
733 | 733 | return ret; |
---|
734 | 734 | } |
---|
735 | 735 | |
---|
736 | | -static int user_sdma_txadd(struct user_sdma_request *req, |
---|
737 | | - struct user_sdma_txreq *tx, |
---|
738 | | - struct user_sdma_iovec *iovec, u32 datalen, |
---|
739 | | - u32 *queued_ptr, u32 *data_sent_ptr, |
---|
740 | | - u64 *iov_offset_ptr) |
---|
741 | | -{ |
---|
742 | | - int ret; |
---|
743 | | - unsigned int pageidx, len; |
---|
744 | | - unsigned long base, offset; |
---|
745 | | - u64 iov_offset = *iov_offset_ptr; |
---|
746 | | - u32 queued = *queued_ptr, data_sent = *data_sent_ptr; |
---|
747 | | - struct hfi1_user_sdma_pkt_q *pq = req->pq; |
---|
748 | | - |
---|
749 | | - base = (unsigned long)iovec->iov.iov_base; |
---|
750 | | - offset = offset_in_page(base + iovec->offset + iov_offset); |
---|
751 | | - pageidx = (((iovec->offset + iov_offset + base) - (base & PAGE_MASK)) >> |
---|
752 | | - PAGE_SHIFT); |
---|
753 | | - len = offset + req->info.fragsize > PAGE_SIZE ? |
---|
754 | | - PAGE_SIZE - offset : req->info.fragsize; |
---|
755 | | - len = min((datalen - queued), len); |
---|
756 | | - ret = sdma_txadd_page(pq->dd, &tx->txreq, iovec->pages[pageidx], |
---|
757 | | - offset, len); |
---|
758 | | - if (ret) { |
---|
759 | | - SDMA_DBG(req, "SDMA txreq add page failed %d\n", ret); |
---|
760 | | - return ret; |
---|
761 | | - } |
---|
762 | | - iov_offset += len; |
---|
763 | | - queued += len; |
---|
764 | | - data_sent += len; |
---|
765 | | - if (unlikely(queued < datalen && pageidx == iovec->npages && |
---|
766 | | - req->iov_idx < req->data_iovs - 1)) { |
---|
767 | | - iovec->offset += iov_offset; |
---|
768 | | - iovec = &req->iovs[++req->iov_idx]; |
---|
769 | | - iov_offset = 0; |
---|
770 | | - } |
---|
771 | | - |
---|
772 | | - *queued_ptr = queued; |
---|
773 | | - *data_sent_ptr = data_sent; |
---|
774 | | - *iov_offset_ptr = iov_offset; |
---|
775 | | - return ret; |
---|
776 | | -} |
---|
777 | | - |
---|
778 | 736 | static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) |
---|
779 | 737 | { |
---|
780 | 738 | int ret = 0; |
---|
.. | .. |
---|
806 | 764 | maxpkts = req->info.npkts - req->seqnum; |
---|
807 | 765 | |
---|
808 | 766 | while (npkts < maxpkts) { |
---|
809 | | - u32 datalen = 0, queued = 0, data_sent = 0; |
---|
810 | | - u64 iov_offset = 0; |
---|
| 767 | + u32 datalen = 0; |
---|
811 | 768 | |
---|
812 | 769 | /* |
---|
813 | 770 | * Check whether any of the completions have come back |
---|
.. | .. |
---|
900 | 857 | goto free_txreq; |
---|
901 | 858 | } |
---|
902 | 859 | |
---|
903 | | - /* |
---|
904 | | - * If the request contains any data vectors, add up to |
---|
905 | | - * fragsize bytes to the descriptor. |
---|
906 | | - */ |
---|
907 | | - while (queued < datalen && |
---|
908 | | - (req->sent + data_sent) < req->data_len) { |
---|
909 | | - ret = user_sdma_txadd(req, tx, iovec, datalen, |
---|
910 | | - &queued, &data_sent, &iov_offset); |
---|
911 | | - if (ret) |
---|
912 | | - goto free_txreq; |
---|
913 | | - } |
---|
914 | | - /* |
---|
915 | | - * The txreq was submitted successfully so we can update |
---|
916 | | - * the counters. |
---|
917 | | - */ |
---|
918 | 860 | req->koffset += datalen; |
---|
919 | 861 | if (req_opcode(req->info.ctrl) == EXPECTED) |
---|
920 | 862 | req->tidoffset += datalen; |
---|
921 | | - req->sent += data_sent; |
---|
922 | | - if (req->data_len) |
---|
923 | | - iovec->offset += iov_offset; |
---|
| 863 | + req->sent += datalen; |
---|
| 864 | + while (datalen) { |
---|
| 865 | + ret = add_system_pages_to_sdma_packet(req, tx, iovec, |
---|
| 866 | + &datalen); |
---|
| 867 | + if (ret) |
---|
| 868 | + goto free_txreq; |
---|
| 869 | + iovec = &req->iovs[req->iov_idx]; |
---|
| 870 | + } |
---|
924 | 871 | list_add_tail(&tx->txreq.list, &req->txps); |
---|
925 | 872 | /* |
---|
926 | 873 | * It is important to increment this here as it is used to |
---|
.. | .. |
---|
957 | 904 | static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) |
---|
958 | 905 | { |
---|
959 | 906 | struct evict_data evict_data; |
---|
| 907 | + struct mmu_rb_handler *handler = pq->handler; |
---|
960 | 908 | |
---|
961 | 909 | evict_data.cleared = 0; |
---|
962 | 910 | evict_data.target = npages; |
---|
963 | | - hfi1_mmu_rb_evict(pq->handler, &evict_data); |
---|
| 911 | + hfi1_mmu_rb_evict(handler, &evict_data); |
---|
964 | 912 | return evict_data.cleared; |
---|
965 | | -} |
---|
966 | | - |
---|
967 | | -static int pin_sdma_pages(struct user_sdma_request *req, |
---|
968 | | - struct user_sdma_iovec *iovec, |
---|
969 | | - struct sdma_mmu_node *node, |
---|
970 | | - int npages) |
---|
971 | | -{ |
---|
972 | | - int pinned, cleared; |
---|
973 | | - struct page **pages; |
---|
974 | | - struct hfi1_user_sdma_pkt_q *pq = req->pq; |
---|
975 | | - |
---|
976 | | - pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); |
---|
977 | | - if (!pages) |
---|
978 | | - return -ENOMEM; |
---|
979 | | - memcpy(pages, node->pages, node->npages * sizeof(*pages)); |
---|
980 | | - |
---|
981 | | - npages -= node->npages; |
---|
982 | | -retry: |
---|
983 | | - if (!hfi1_can_pin_pages(pq->dd, current->mm, |
---|
984 | | - atomic_read(&pq->n_locked), npages)) { |
---|
985 | | - cleared = sdma_cache_evict(pq, npages); |
---|
986 | | - if (cleared >= npages) |
---|
987 | | - goto retry; |
---|
988 | | - } |
---|
989 | | - pinned = hfi1_acquire_user_pages(current->mm, |
---|
990 | | - ((unsigned long)iovec->iov.iov_base + |
---|
991 | | - (node->npages * PAGE_SIZE)), npages, 0, |
---|
992 | | - pages + node->npages); |
---|
993 | | - if (pinned < 0) { |
---|
994 | | - kfree(pages); |
---|
995 | | - return pinned; |
---|
996 | | - } |
---|
997 | | - if (pinned != npages) { |
---|
998 | | - unpin_vector_pages(current->mm, pages, node->npages, pinned); |
---|
999 | | - return -EFAULT; |
---|
1000 | | - } |
---|
1001 | | - kfree(node->pages); |
---|
1002 | | - node->rb.len = iovec->iov.iov_len; |
---|
1003 | | - node->pages = pages; |
---|
1004 | | - atomic_add(pinned, &pq->n_locked); |
---|
1005 | | - return pinned; |
---|
1006 | | -} |
---|
1007 | | - |
---|
1008 | | -static void unpin_sdma_pages(struct sdma_mmu_node *node) |
---|
1009 | | -{ |
---|
1010 | | - if (node->npages) { |
---|
1011 | | - unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0, |
---|
1012 | | - node->npages); |
---|
1013 | | - atomic_sub(node->npages, &node->pq->n_locked); |
---|
1014 | | - } |
---|
1015 | | -} |
---|
1016 | | - |
---|
1017 | | -static int pin_vector_pages(struct user_sdma_request *req, |
---|
1018 | | - struct user_sdma_iovec *iovec) |
---|
1019 | | -{ |
---|
1020 | | - int ret = 0, pinned, npages; |
---|
1021 | | - struct hfi1_user_sdma_pkt_q *pq = req->pq; |
---|
1022 | | - struct sdma_mmu_node *node = NULL; |
---|
1023 | | - struct mmu_rb_node *rb_node; |
---|
1024 | | - struct iovec *iov; |
---|
1025 | | - bool extracted; |
---|
1026 | | - |
---|
1027 | | - extracted = |
---|
1028 | | - hfi1_mmu_rb_remove_unless_exact(pq->handler, |
---|
1029 | | - (unsigned long) |
---|
1030 | | - iovec->iov.iov_base, |
---|
1031 | | - iovec->iov.iov_len, &rb_node); |
---|
1032 | | - if (rb_node) { |
---|
1033 | | - node = container_of(rb_node, struct sdma_mmu_node, rb); |
---|
1034 | | - if (!extracted) { |
---|
1035 | | - atomic_inc(&node->refcount); |
---|
1036 | | - iovec->pages = node->pages; |
---|
1037 | | - iovec->npages = node->npages; |
---|
1038 | | - iovec->node = node; |
---|
1039 | | - return 0; |
---|
1040 | | - } |
---|
1041 | | - } |
---|
1042 | | - |
---|
1043 | | - if (!node) { |
---|
1044 | | - node = kzalloc(sizeof(*node), GFP_KERNEL); |
---|
1045 | | - if (!node) |
---|
1046 | | - return -ENOMEM; |
---|
1047 | | - |
---|
1048 | | - node->rb.addr = (unsigned long)iovec->iov.iov_base; |
---|
1049 | | - node->pq = pq; |
---|
1050 | | - atomic_set(&node->refcount, 0); |
---|
1051 | | - } |
---|
1052 | | - |
---|
1053 | | - iov = &iovec->iov; |
---|
1054 | | - npages = num_user_pages((unsigned long)iov->iov_base, iov->iov_len); |
---|
1055 | | - if (node->npages < npages) { |
---|
1056 | | - pinned = pin_sdma_pages(req, iovec, node, npages); |
---|
1057 | | - if (pinned < 0) { |
---|
1058 | | - ret = pinned; |
---|
1059 | | - goto bail; |
---|
1060 | | - } |
---|
1061 | | - node->npages += pinned; |
---|
1062 | | - npages = node->npages; |
---|
1063 | | - } |
---|
1064 | | - iovec->pages = node->pages; |
---|
1065 | | - iovec->npages = npages; |
---|
1066 | | - iovec->node = node; |
---|
1067 | | - |
---|
1068 | | - ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb); |
---|
1069 | | - if (ret) { |
---|
1070 | | - iovec->node = NULL; |
---|
1071 | | - goto bail; |
---|
1072 | | - } |
---|
1073 | | - return 0; |
---|
1074 | | -bail: |
---|
1075 | | - unpin_sdma_pages(node); |
---|
1076 | | - kfree(node); |
---|
1077 | | - return ret; |
---|
1078 | | -} |
---|
1079 | | - |
---|
1080 | | -static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, |
---|
1081 | | - unsigned start, unsigned npages) |
---|
1082 | | -{ |
---|
1083 | | - hfi1_release_user_pages(mm, pages + start, npages, false); |
---|
1084 | | - kfree(pages); |
---|
1085 | 913 | } |
---|
1086 | 914 | |
---|
1087 | 915 | static int check_header_template(struct user_sdma_request *req, |
---|
.. | .. |
---|
1425 | 1253 | if (req->seqcomp != req->info.npkts - 1) |
---|
1426 | 1254 | return; |
---|
1427 | 1255 | |
---|
1428 | | - user_sdma_free_request(req, false); |
---|
| 1256 | + user_sdma_free_request(req); |
---|
1429 | 1257 | set_comp_state(pq, cq, req->info.comp_idx, state, status); |
---|
1430 | 1258 | pq_update(pq); |
---|
1431 | 1259 | } |
---|
.. | .. |
---|
1436 | 1264 | wake_up(&pq->wait); |
---|
1437 | 1265 | } |
---|
1438 | 1266 | |
---|
1439 | | -static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) |
---|
| 1267 | +static void user_sdma_free_request(struct user_sdma_request *req) |
---|
1440 | 1268 | { |
---|
1441 | | - int i; |
---|
1442 | | - |
---|
1443 | 1269 | if (!list_empty(&req->txps)) { |
---|
1444 | 1270 | struct sdma_txreq *t, *p; |
---|
1445 | 1271 | |
---|
.. | .. |
---|
1450 | 1276 | sdma_txclean(req->pq->dd, t); |
---|
1451 | 1277 | kmem_cache_free(req->pq->txreq_cache, tx); |
---|
1452 | 1278 | } |
---|
1453 | | - } |
---|
1454 | | - |
---|
1455 | | - for (i = 0; i < req->data_iovs; i++) { |
---|
1456 | | - struct sdma_mmu_node *node = req->iovs[i].node; |
---|
1457 | | - |
---|
1458 | | - if (!node) |
---|
1459 | | - continue; |
---|
1460 | | - |
---|
1461 | | - req->iovs[i].node = NULL; |
---|
1462 | | - |
---|
1463 | | - if (unpin) |
---|
1464 | | - hfi1_mmu_rb_remove(req->pq->handler, |
---|
1465 | | - &node->rb); |
---|
1466 | | - else |
---|
1467 | | - atomic_dec(&node->refcount); |
---|
1468 | 1279 | } |
---|
1469 | 1280 | |
---|
1470 | 1281 | kfree(req->tids); |
---|
.. | .. |
---|
1484 | 1295 | idx, state, ret); |
---|
1485 | 1296 | } |
---|
1486 | 1297 | |
---|
| 1298 | +static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, |
---|
| 1299 | + unsigned int start, unsigned int npages) |
---|
| 1300 | +{ |
---|
| 1301 | + hfi1_release_user_pages(mm, pages + start, npages, false); |
---|
| 1302 | + kfree(pages); |
---|
| 1303 | +} |
---|
| 1304 | + |
---|
| 1305 | +static void free_system_node(struct sdma_mmu_node *node) |
---|
| 1306 | +{ |
---|
| 1307 | + if (node->npages) { |
---|
| 1308 | + unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0, |
---|
| 1309 | + node->npages); |
---|
| 1310 | + atomic_sub(node->npages, &node->pq->n_locked); |
---|
| 1311 | + } |
---|
| 1312 | + kfree(node); |
---|
| 1313 | +} |
---|
| 1314 | + |
---|
| 1315 | +/* |
---|
| 1316 | + * kref_get()'s an additional kref on the returned rb_node to prevent rb_node |
---|
| 1317 | + * from being released until after rb_node is assigned to an SDMA descriptor |
---|
| 1318 | + * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the |
---|
| 1319 | + * virtual address range for rb_node is invalidated between now and then. |
---|
| 1320 | + */ |
---|
| 1321 | +static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler, |
---|
| 1322 | + unsigned long start, |
---|
| 1323 | + unsigned long end) |
---|
| 1324 | +{ |
---|
| 1325 | + struct mmu_rb_node *rb_node; |
---|
| 1326 | + unsigned long flags; |
---|
| 1327 | + |
---|
| 1328 | + spin_lock_irqsave(&handler->lock, flags); |
---|
| 1329 | + rb_node = hfi1_mmu_rb_get_first(handler, start, (end - start)); |
---|
| 1330 | + if (!rb_node) { |
---|
| 1331 | + spin_unlock_irqrestore(&handler->lock, flags); |
---|
| 1332 | + return NULL; |
---|
| 1333 | + } |
---|
| 1334 | + |
---|
| 1335 | + /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ |
---|
| 1336 | + kref_get(&rb_node->refcount); |
---|
| 1337 | + spin_unlock_irqrestore(&handler->lock, flags); |
---|
| 1338 | + |
---|
| 1339 | + return container_of(rb_node, struct sdma_mmu_node, rb); |
---|
| 1340 | +} |
---|
| 1341 | + |
---|
| 1342 | +static int pin_system_pages(struct user_sdma_request *req, |
---|
| 1343 | + uintptr_t start_address, size_t length, |
---|
| 1344 | + struct sdma_mmu_node *node, int npages) |
---|
| 1345 | +{ |
---|
| 1346 | + struct hfi1_user_sdma_pkt_q *pq = req->pq; |
---|
| 1347 | + int pinned, cleared; |
---|
| 1348 | + struct page **pages; |
---|
| 1349 | + |
---|
| 1350 | + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); |
---|
| 1351 | + if (!pages) |
---|
| 1352 | + return -ENOMEM; |
---|
| 1353 | + |
---|
| 1354 | +retry: |
---|
| 1355 | + if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked), |
---|
| 1356 | + npages)) { |
---|
| 1357 | + SDMA_DBG(req, "Evicting: nlocked %u npages %u", |
---|
| 1358 | + atomic_read(&pq->n_locked), npages); |
---|
| 1359 | + cleared = sdma_cache_evict(pq, npages); |
---|
| 1360 | + if (cleared >= npages) |
---|
| 1361 | + goto retry; |
---|
| 1362 | + } |
---|
| 1363 | + |
---|
| 1364 | + SDMA_DBG(req, "Acquire user pages start_address %lx node->npages %u npages %u", |
---|
| 1365 | + start_address, node->npages, npages); |
---|
| 1366 | + pinned = hfi1_acquire_user_pages(current->mm, start_address, npages, 0, |
---|
| 1367 | + pages); |
---|
| 1368 | + |
---|
| 1369 | + if (pinned < 0) { |
---|
| 1370 | + kfree(pages); |
---|
| 1371 | + SDMA_DBG(req, "pinned %d", pinned); |
---|
| 1372 | + return pinned; |
---|
| 1373 | + } |
---|
| 1374 | + if (pinned != npages) { |
---|
| 1375 | + unpin_vector_pages(current->mm, pages, node->npages, pinned); |
---|
| 1376 | + SDMA_DBG(req, "npages %u pinned %d", npages, pinned); |
---|
| 1377 | + return -EFAULT; |
---|
| 1378 | + } |
---|
| 1379 | + node->rb.addr = start_address; |
---|
| 1380 | + node->rb.len = length; |
---|
| 1381 | + node->pages = pages; |
---|
| 1382 | + node->npages = npages; |
---|
| 1383 | + atomic_add(pinned, &pq->n_locked); |
---|
| 1384 | + SDMA_DBG(req, "done. pinned %d", pinned); |
---|
| 1385 | + return 0; |
---|
| 1386 | +} |
---|
| 1387 | + |
---|
| 1388 | +/* |
---|
| 1389 | + * kref refcount on *node_p will be 2 on successful addition: one kref from |
---|
| 1390 | + * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being |
---|
| 1391 | + * released until after *node_p is assigned to an SDMA descriptor (struct |
---|
| 1392 | + * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual |
---|
| 1393 | + * address range for *node_p is invalidated between now and then. |
---|
| 1394 | + */ |
---|
| 1395 | +static int add_system_pinning(struct user_sdma_request *req, |
---|
| 1396 | + struct sdma_mmu_node **node_p, |
---|
| 1397 | + unsigned long start, unsigned long len) |
---|
| 1398 | + |
---|
| 1399 | +{ |
---|
| 1400 | + struct hfi1_user_sdma_pkt_q *pq = req->pq; |
---|
| 1401 | + struct sdma_mmu_node *node; |
---|
| 1402 | + int ret; |
---|
| 1403 | + |
---|
| 1404 | + node = kzalloc(sizeof(*node), GFP_KERNEL); |
---|
| 1405 | + if (!node) |
---|
| 1406 | + return -ENOMEM; |
---|
| 1407 | + |
---|
| 1408 | + /* First kref "moves" to mmu_rb_handler */ |
---|
| 1409 | + kref_init(&node->rb.refcount); |
---|
| 1410 | + |
---|
| 1411 | + /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ |
---|
| 1412 | + kref_get(&node->rb.refcount); |
---|
| 1413 | + |
---|
| 1414 | + node->pq = pq; |
---|
| 1415 | + ret = pin_system_pages(req, start, len, node, PFN_DOWN(len)); |
---|
| 1416 | + if (ret == 0) { |
---|
| 1417 | + ret = hfi1_mmu_rb_insert(pq->handler, &node->rb); |
---|
| 1418 | + if (ret) |
---|
| 1419 | + free_system_node(node); |
---|
| 1420 | + else |
---|
| 1421 | + *node_p = node; |
---|
| 1422 | + |
---|
| 1423 | + return ret; |
---|
| 1424 | + } |
---|
| 1425 | + |
---|
| 1426 | + kfree(node); |
---|
| 1427 | + return ret; |
---|
| 1428 | +} |
---|
| 1429 | + |
---|
| 1430 | +static int get_system_cache_entry(struct user_sdma_request *req, |
---|
| 1431 | + struct sdma_mmu_node **node_p, |
---|
| 1432 | + size_t req_start, size_t req_len) |
---|
| 1433 | +{ |
---|
| 1434 | + struct hfi1_user_sdma_pkt_q *pq = req->pq; |
---|
| 1435 | + u64 start = ALIGN_DOWN(req_start, PAGE_SIZE); |
---|
| 1436 | + u64 end = PFN_ALIGN(req_start + req_len); |
---|
| 1437 | + struct mmu_rb_handler *handler = pq->handler; |
---|
| 1438 | + int ret; |
---|
| 1439 | + |
---|
| 1440 | + if ((end - start) == 0) { |
---|
| 1441 | + SDMA_DBG(req, |
---|
| 1442 | + "Request for empty cache entry req_start %lx req_len %lx start %llx end %llx", |
---|
| 1443 | + req_start, req_len, start, end); |
---|
| 1444 | + return -EINVAL; |
---|
| 1445 | + } |
---|
| 1446 | + |
---|
| 1447 | + SDMA_DBG(req, "req_start %lx req_len %lu", req_start, req_len); |
---|
| 1448 | + |
---|
| 1449 | + while (1) { |
---|
| 1450 | + struct sdma_mmu_node *node = |
---|
| 1451 | + find_system_node(handler, start, end); |
---|
| 1452 | + u64 prepend_len = 0; |
---|
| 1453 | + |
---|
| 1454 | + SDMA_DBG(req, "node %p start %llx end %llu", node, start, end); |
---|
| 1455 | + if (!node) { |
---|
| 1456 | + ret = add_system_pinning(req, node_p, start, |
---|
| 1457 | + end - start); |
---|
| 1458 | + if (ret == -EEXIST) { |
---|
| 1459 | + /* |
---|
| 1460 | + * Another execution context has inserted a |
---|
| 1461 | + * conficting entry first. |
---|
| 1462 | + */ |
---|
| 1463 | + continue; |
---|
| 1464 | + } |
---|
| 1465 | + return ret; |
---|
| 1466 | + } |
---|
| 1467 | + |
---|
| 1468 | + if (node->rb.addr <= start) { |
---|
| 1469 | + /* |
---|
| 1470 | + * This entry covers at least part of the region. If it doesn't extend |
---|
| 1471 | + * to the end, then this will be called again for the next segment. |
---|
| 1472 | + */ |
---|
| 1473 | + *node_p = node; |
---|
| 1474 | + return 0; |
---|
| 1475 | + } |
---|
| 1476 | + |
---|
| 1477 | + SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d", |
---|
| 1478 | + node->rb.addr, kref_read(&node->rb.refcount)); |
---|
| 1479 | + prepend_len = node->rb.addr - start; |
---|
| 1480 | + |
---|
| 1481 | + /* |
---|
| 1482 | + * This node will not be returned, instead a new node |
---|
| 1483 | + * will be. So release the reference. |
---|
| 1484 | + */ |
---|
| 1485 | + kref_put(&node->rb.refcount, hfi1_mmu_rb_release); |
---|
| 1486 | + |
---|
| 1487 | + /* Prepend a node to cover the beginning of the allocation */ |
---|
| 1488 | + ret = add_system_pinning(req, node_p, start, prepend_len); |
---|
| 1489 | + if (ret == -EEXIST) { |
---|
| 1490 | + /* Another execution context has inserted a conficting entry first. */ |
---|
| 1491 | + continue; |
---|
| 1492 | + } |
---|
| 1493 | + return ret; |
---|
| 1494 | + } |
---|
| 1495 | +} |
---|
| 1496 | + |
---|
| 1497 | +static void sdma_mmu_rb_node_get(void *ctx) |
---|
| 1498 | +{ |
---|
| 1499 | + struct mmu_rb_node *node = ctx; |
---|
| 1500 | + |
---|
| 1501 | + kref_get(&node->refcount); |
---|
| 1502 | +} |
---|
| 1503 | + |
---|
| 1504 | +static void sdma_mmu_rb_node_put(void *ctx) |
---|
| 1505 | +{ |
---|
| 1506 | + struct sdma_mmu_node *node = ctx; |
---|
| 1507 | + |
---|
| 1508 | + kref_put(&node->rb.refcount, hfi1_mmu_rb_release); |
---|
| 1509 | +} |
---|
| 1510 | + |
---|
| 1511 | +static int add_mapping_to_sdma_packet(struct user_sdma_request *req, |
---|
| 1512 | + struct user_sdma_txreq *tx, |
---|
| 1513 | + struct sdma_mmu_node *cache_entry, |
---|
| 1514 | + size_t start, |
---|
| 1515 | + size_t from_this_cache_entry) |
---|
| 1516 | +{ |
---|
| 1517 | + struct hfi1_user_sdma_pkt_q *pq = req->pq; |
---|
| 1518 | + unsigned int page_offset; |
---|
| 1519 | + unsigned int from_this_page; |
---|
| 1520 | + size_t page_index; |
---|
| 1521 | + void *ctx; |
---|
| 1522 | + int ret; |
---|
| 1523 | + |
---|
| 1524 | + /* |
---|
| 1525 | + * Because the cache may be more fragmented than the memory that is being accessed, |
---|
| 1526 | + * it's not strictly necessary to have a descriptor per cache entry. |
---|
| 1527 | + */ |
---|
| 1528 | + |
---|
| 1529 | + while (from_this_cache_entry) { |
---|
| 1530 | + page_index = PFN_DOWN(start - cache_entry->rb.addr); |
---|
| 1531 | + |
---|
| 1532 | + if (page_index >= cache_entry->npages) { |
---|
| 1533 | + SDMA_DBG(req, |
---|
| 1534 | + "Request for page_index %zu >= cache_entry->npages %u", |
---|
| 1535 | + page_index, cache_entry->npages); |
---|
| 1536 | + return -EINVAL; |
---|
| 1537 | + } |
---|
| 1538 | + |
---|
| 1539 | + page_offset = start - ALIGN_DOWN(start, PAGE_SIZE); |
---|
| 1540 | + from_this_page = PAGE_SIZE - page_offset; |
---|
| 1541 | + |
---|
| 1542 | + if (from_this_page < from_this_cache_entry) { |
---|
| 1543 | + ctx = NULL; |
---|
| 1544 | + } else { |
---|
| 1545 | + /* |
---|
| 1546 | + * In the case they are equal the next line has no practical effect, |
---|
| 1547 | + * but it's better to do a register to register copy than a conditional |
---|
| 1548 | + * branch. |
---|
| 1549 | + */ |
---|
| 1550 | + from_this_page = from_this_cache_entry; |
---|
| 1551 | + ctx = cache_entry; |
---|
| 1552 | + } |
---|
| 1553 | + |
---|
| 1554 | + ret = sdma_txadd_page(pq->dd, &tx->txreq, |
---|
| 1555 | + cache_entry->pages[page_index], |
---|
| 1556 | + page_offset, from_this_page, |
---|
| 1557 | + ctx, |
---|
| 1558 | + sdma_mmu_rb_node_get, |
---|
| 1559 | + sdma_mmu_rb_node_put); |
---|
| 1560 | + if (ret) { |
---|
| 1561 | + /* |
---|
| 1562 | + * When there's a failure, the entire request is freed by |
---|
| 1563 | + * user_sdma_send_pkts(). |
---|
| 1564 | + */ |
---|
| 1565 | + SDMA_DBG(req, |
---|
| 1566 | + "sdma_txadd_page failed %d page_index %lu page_offset %u from_this_page %u", |
---|
| 1567 | + ret, page_index, page_offset, from_this_page); |
---|
| 1568 | + return ret; |
---|
| 1569 | + } |
---|
| 1570 | + start += from_this_page; |
---|
| 1571 | + from_this_cache_entry -= from_this_page; |
---|
| 1572 | + } |
---|
| 1573 | + return 0; |
---|
| 1574 | +} |
---|
| 1575 | + |
---|
| 1576 | +static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req, |
---|
| 1577 | + struct user_sdma_txreq *tx, |
---|
| 1578 | + struct user_sdma_iovec *iovec, |
---|
| 1579 | + size_t from_this_iovec) |
---|
| 1580 | +{ |
---|
| 1581 | + while (from_this_iovec > 0) { |
---|
| 1582 | + struct sdma_mmu_node *cache_entry; |
---|
| 1583 | + size_t from_this_cache_entry; |
---|
| 1584 | + size_t start; |
---|
| 1585 | + int ret; |
---|
| 1586 | + |
---|
| 1587 | + start = (uintptr_t)iovec->iov.iov_base + iovec->offset; |
---|
| 1588 | + ret = get_system_cache_entry(req, &cache_entry, start, |
---|
| 1589 | + from_this_iovec); |
---|
| 1590 | + if (ret) { |
---|
| 1591 | + SDMA_DBG(req, "pin system segment failed %d", ret); |
---|
| 1592 | + return ret; |
---|
| 1593 | + } |
---|
| 1594 | + |
---|
| 1595 | + from_this_cache_entry = cache_entry->rb.len - (start - cache_entry->rb.addr); |
---|
| 1596 | + if (from_this_cache_entry > from_this_iovec) |
---|
| 1597 | + from_this_cache_entry = from_this_iovec; |
---|
| 1598 | + |
---|
| 1599 | + ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start, |
---|
| 1600 | + from_this_cache_entry); |
---|
| 1601 | + |
---|
| 1602 | + /* |
---|
| 1603 | + * Done adding cache_entry to zero or more sdma_desc. Can |
---|
| 1604 | + * kref_put() the "safety" kref taken under |
---|
| 1605 | + * get_system_cache_entry(). |
---|
| 1606 | + */ |
---|
| 1607 | + kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release); |
---|
| 1608 | + |
---|
| 1609 | + if (ret) { |
---|
| 1610 | + SDMA_DBG(req, "add system segment failed %d", ret); |
---|
| 1611 | + return ret; |
---|
| 1612 | + } |
---|
| 1613 | + |
---|
| 1614 | + iovec->offset += from_this_cache_entry; |
---|
| 1615 | + from_this_iovec -= from_this_cache_entry; |
---|
| 1616 | + } |
---|
| 1617 | + |
---|
| 1618 | + return 0; |
---|
| 1619 | +} |
---|
| 1620 | + |
---|
| 1621 | +static int add_system_pages_to_sdma_packet(struct user_sdma_request *req, |
---|
| 1622 | + struct user_sdma_txreq *tx, |
---|
| 1623 | + struct user_sdma_iovec *iovec, |
---|
| 1624 | + u32 *pkt_data_remaining) |
---|
| 1625 | +{ |
---|
| 1626 | + size_t remaining_to_add = *pkt_data_remaining; |
---|
| 1627 | + /* |
---|
| 1628 | + * Walk through iovec entries, ensure the associated pages |
---|
| 1629 | + * are pinned and mapped, add data to the packet until no more |
---|
| 1630 | + * data remains to be added. |
---|
| 1631 | + */ |
---|
| 1632 | + while (remaining_to_add > 0) { |
---|
| 1633 | + struct user_sdma_iovec *cur_iovec; |
---|
| 1634 | + size_t from_this_iovec; |
---|
| 1635 | + int ret; |
---|
| 1636 | + |
---|
| 1637 | + cur_iovec = iovec; |
---|
| 1638 | + from_this_iovec = iovec->iov.iov_len - iovec->offset; |
---|
| 1639 | + |
---|
| 1640 | + if (from_this_iovec > remaining_to_add) { |
---|
| 1641 | + from_this_iovec = remaining_to_add; |
---|
| 1642 | + } else { |
---|
| 1643 | + /* The current iovec entry will be consumed by this pass. */ |
---|
| 1644 | + req->iov_idx++; |
---|
| 1645 | + iovec++; |
---|
| 1646 | + } |
---|
| 1647 | + |
---|
| 1648 | + ret = add_system_iovec_to_sdma_packet(req, tx, cur_iovec, |
---|
| 1649 | + from_this_iovec); |
---|
| 1650 | + if (ret) |
---|
| 1651 | + return ret; |
---|
| 1652 | + |
---|
| 1653 | + remaining_to_add -= from_this_iovec; |
---|
| 1654 | + } |
---|
| 1655 | + *pkt_data_remaining = remaining_to_add; |
---|
| 1656 | + |
---|
| 1657 | + return 0; |
---|
| 1658 | +} |
---|
| 1659 | + |
---|
1487 | 1660 | static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, |
---|
1488 | 1661 | unsigned long len) |
---|
1489 | 1662 | { |
---|
1490 | 1663 | return (bool)(node->addr == addr); |
---|
1491 | | -} |
---|
1492 | | - |
---|
1493 | | -static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode) |
---|
1494 | | -{ |
---|
1495 | | - struct sdma_mmu_node *node = |
---|
1496 | | - container_of(mnode, struct sdma_mmu_node, rb); |
---|
1497 | | - |
---|
1498 | | - atomic_inc(&node->refcount); |
---|
1499 | | - return 0; |
---|
1500 | 1664 | } |
---|
1501 | 1665 | |
---|
1502 | 1666 | /* |
---|
.. | .. |
---|
1510 | 1674 | struct sdma_mmu_node *node = |
---|
1511 | 1675 | container_of(mnode, struct sdma_mmu_node, rb); |
---|
1512 | 1676 | struct evict_data *evict_data = evict_arg; |
---|
1513 | | - |
---|
1514 | | - /* is this node still being used? */ |
---|
1515 | | - if (atomic_read(&node->refcount)) |
---|
1516 | | - return 0; /* keep this node */ |
---|
1517 | 1677 | |
---|
1518 | 1678 | /* this node will be evicted, add its pages to our count */ |
---|
1519 | 1679 | evict_data->cleared += node->npages; |
---|
.. | .. |
---|
1530 | 1690 | struct sdma_mmu_node *node = |
---|
1531 | 1691 | container_of(mnode, struct sdma_mmu_node, rb); |
---|
1532 | 1692 | |
---|
1533 | | - unpin_sdma_pages(node); |
---|
1534 | | - kfree(node); |
---|
1535 | | -} |
---|
1536 | | - |
---|
1537 | | -static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode) |
---|
1538 | | -{ |
---|
1539 | | - struct sdma_mmu_node *node = |
---|
1540 | | - container_of(mnode, struct sdma_mmu_node, rb); |
---|
1541 | | - |
---|
1542 | | - if (!atomic_read(&node->refcount)) |
---|
1543 | | - return 1; |
---|
1544 | | - return 0; |
---|
| 1693 | + free_system_node(node); |
---|
1545 | 1694 | } |
---|