| .. | .. |
|---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (c) 2014 Mellanox Technologies. All rights reserved. |
|---|
| 3 | | - * |
|---|
| 4 | | - * This software is available to you under a choice of one of two |
|---|
| 5 | | - * licenses. You may choose to be licensed under the terms of the GNU |
|---|
| 6 | | - * General Public License (GPL) Version 2, available from the file |
|---|
| 7 | | - * COPYING in the main directory of this source tree, or the |
|---|
| 8 | | - * OpenIB.org BSD license below: |
|---|
| 9 | | - * |
|---|
| 10 | | - * Redistribution and use in source and binary forms, with or |
|---|
| 11 | | - * without modification, are permitted provided that the following |
|---|
| 12 | | - * conditions are met: |
|---|
| 13 | | - * |
|---|
| 14 | | - * - Redistributions of source code must retain the above |
|---|
| 15 | | - * copyright notice, this list of conditions and the following |
|---|
| 16 | | - * disclaimer. |
|---|
| 17 | | - * |
|---|
| 18 | | - * - Redistributions in binary form must reproduce the above |
|---|
| 19 | | - * copyright notice, this list of conditions and the following |
|---|
| 20 | | - * disclaimer in the documentation and/or other materials |
|---|
| 21 | | - * provided with the distribution. |
|---|
| 22 | | - * |
|---|
| 23 | | - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|---|
| 24 | | - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|---|
| 25 | | - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|---|
| 26 | | - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
|---|
| 27 | | - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
|---|
| 28 | | - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
|---|
| 29 | | - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|---|
| 30 | | - * SOFTWARE. |
|---|
| 31 | 4 | */ |
|---|
| 32 | 5 | |
|---|
| 33 | 6 | #ifndef IB_UMEM_ODP_H |
|---|
| .. | .. |
|---|
| 35 | 8 | |
|---|
| 36 | 9 | #include <rdma/ib_umem.h> |
|---|
| 37 | 10 | #include <rdma/ib_verbs.h> |
|---|
| 38 | | -#include <linux/interval_tree.h> |
|---|
| 39 | | - |
|---|
| 40 | | -struct umem_odp_node { |
|---|
| 41 | | - u64 __subtree_last; |
|---|
| 42 | | - struct rb_node rb; |
|---|
| 43 | | -}; |
|---|
| 44 | 11 | |
|---|
| 45 | 12 | struct ib_umem_odp { |
|---|
| 13 | + struct ib_umem umem; |
|---|
| 14 | + struct mmu_interval_notifier notifier; |
|---|
| 15 | + struct pid *tgid; |
|---|
| 16 | + |
|---|
| 17 | + /* An array of the pfns included in the on-demand paging umem. */ |
|---|
| 18 | + unsigned long *pfn_list; |
|---|
| 19 | + |
|---|
| 46 | 20 | /* |
|---|
| 47 | | - * An array of the pages included in the on-demand paging umem. |
|---|
| 48 | | - * Indices of pages that are currently not mapped into the device will |
|---|
| 49 | | - * contain NULL. |
|---|
| 50 | | - */ |
|---|
| 51 | | - struct page **page_list; |
|---|
| 52 | | - /* |
|---|
| 53 | | - * An array of the same size as page_list, with DMA addresses mapped |
|---|
| 54 | | - * for pages the pages in page_list. The lower two bits designate |
|---|
| 55 | | - * access permissions. See ODP_READ_ALLOWED_BIT and |
|---|
| 56 | | - * ODP_WRITE_ALLOWED_BIT. |
|---|
| 21 | + * An array with DMA addresses mapped for pfns in pfn_list. |
|---|
| 22 | + * The lower two bits designate access permissions. |
|---|
| 23 | + * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT. |
|---|
| 57 | 24 | */ |
|---|
| 58 | 25 | dma_addr_t *dma_list; |
|---|
| 59 | 26 | /* |
|---|
| .. | .. |
|---|
| 64 | 31 | struct mutex umem_mutex; |
|---|
| 65 | 32 | void *private; /* for the HW driver to use. */ |
|---|
| 66 | 33 | |
|---|
| 67 | | - /* When false, use the notifier counter in the ucontext struct. */ |
|---|
| 68 | | - bool mn_counters_active; |
|---|
| 69 | | - int notifiers_seq; |
|---|
| 70 | | - int notifiers_count; |
|---|
| 34 | + int npages; |
|---|
| 71 | 35 | |
|---|
| 72 | | - /* A linked list of umems that don't have private mmu notifier |
|---|
| 73 | | - * counters yet. */ |
|---|
| 74 | | - struct list_head no_private_counters; |
|---|
| 75 | | - struct ib_umem *umem; |
|---|
| 36 | + /* |
|---|
| 37 | + * An implicit odp umem cannot be DMA mapped, has 0 length, and serves |
|---|
| 38 | + * only as an anchor for the driver to hold onto the per_mm. FIXME: |
|---|
| 39 | + * This should be removed and drivers should work with the per_mm |
|---|
| 40 | + * directly. |
|---|
| 41 | + */ |
|---|
| 42 | + bool is_implicit_odp; |
|---|
| 76 | 43 | |
|---|
| 77 | | - /* Tree tracking */ |
|---|
| 78 | | - struct umem_odp_node interval_tree; |
|---|
| 79 | | - |
|---|
| 80 | | - struct completion notifier_completion; |
|---|
| 81 | | - int dying; |
|---|
| 82 | | - struct work_struct work; |
|---|
| 44 | + unsigned int page_shift; |
|---|
| 83 | 45 | }; |
|---|
| 84 | 46 | |
|---|
| 85 | | -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING |
|---|
| 47 | +static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) |
|---|
| 48 | +{ |
|---|
| 49 | + return container_of(umem, struct ib_umem_odp, umem); |
|---|
| 50 | +} |
|---|
| 86 | 51 | |
|---|
| 87 | | -int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, |
|---|
| 88 | | - int access); |
|---|
| 89 | | -struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, |
|---|
| 90 | | - unsigned long addr, |
|---|
| 91 | | - size_t size); |
|---|
| 52 | +/* Returns the first page of an ODP umem. */ |
|---|
| 53 | +static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp) |
|---|
| 54 | +{ |
|---|
| 55 | + return umem_odp->notifier.interval_tree.start; |
|---|
| 56 | +} |
|---|
| 92 | 57 | |
|---|
| 93 | | -void ib_umem_odp_release(struct ib_umem *umem); |
|---|
| 58 | +/* Returns the address of the page after the last one of an ODP umem. */ |
|---|
| 59 | +static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp) |
|---|
| 60 | +{ |
|---|
| 61 | + return umem_odp->notifier.interval_tree.last + 1; |
|---|
| 62 | +} |
|---|
| 63 | + |
|---|
| 64 | +static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) |
|---|
| 65 | +{ |
|---|
| 66 | + return (ib_umem_end(umem_odp) - ib_umem_start(umem_odp)) >> |
|---|
| 67 | + umem_odp->page_shift; |
|---|
| 68 | +} |
|---|
| 94 | 69 | |
|---|
| 95 | 70 | /* |
|---|
| 96 | 71 | * The lower 2 bits of the DMA address signal the R/W permissions for |
|---|
| .. | .. |
|---|
| 105 | 80 | |
|---|
| 106 | 81 | #define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) |
|---|
| 107 | 82 | |
|---|
| 108 | | -int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, |
|---|
| 109 | | - u64 access_mask, unsigned long current_seq); |
|---|
| 83 | +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING |
|---|
| 110 | 84 | |
|---|
| 111 | | -void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, |
|---|
| 85 | +struct ib_umem_odp * |
|---|
| 86 | +ib_umem_odp_get(struct ib_device *device, unsigned long addr, size_t size, |
|---|
| 87 | + int access, const struct mmu_interval_notifier_ops *ops); |
|---|
| 88 | +struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device, |
|---|
| 89 | + int access); |
|---|
| 90 | +struct ib_umem_odp * |
|---|
| 91 | +ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr, |
|---|
| 92 | + size_t size, |
|---|
| 93 | + const struct mmu_interval_notifier_ops *ops); |
|---|
| 94 | +void ib_umem_odp_release(struct ib_umem_odp *umem_odp); |
|---|
| 95 | + |
|---|
| 96 | +int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 start_offset, |
|---|
| 97 | + u64 bcnt, u64 access_mask, bool fault); |
|---|
| 98 | + |
|---|
| 99 | +void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, |
|---|
| 112 | 100 | u64 bound); |
|---|
| 113 | | - |
|---|
| 114 | | -typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, |
|---|
| 115 | | - void *cookie); |
|---|
| 116 | | -/* |
|---|
| 117 | | - * Call the callback on each ib_umem in the range. Returns the logical or of |
|---|
| 118 | | - * the return values of the functions called. |
|---|
| 119 | | - */ |
|---|
| 120 | | -int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, |
|---|
| 121 | | - u64 start, u64 end, |
|---|
| 122 | | - umem_call_back cb, |
|---|
| 123 | | - bool blockable, void *cookie); |
|---|
| 124 | | - |
|---|
| 125 | | -/* |
|---|
| 126 | | - * Find first region intersecting with address range. |
|---|
| 127 | | - * Return NULL if not found |
|---|
| 128 | | - */ |
|---|
| 129 | | -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, |
|---|
| 130 | | - u64 addr, u64 length); |
|---|
| 131 | | - |
|---|
| 132 | | -static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, |
|---|
| 133 | | - unsigned long mmu_seq) |
|---|
| 134 | | -{ |
|---|
| 135 | | - /* |
|---|
| 136 | | - * This code is strongly based on the KVM code from |
|---|
| 137 | | - * mmu_notifier_retry. Should be called with |
|---|
| 138 | | - * the relevant locks taken (item->odp_data->umem_mutex |
|---|
| 139 | | - * and the ucontext umem_mutex semaphore locked for read). |
|---|
| 140 | | - */ |
|---|
| 141 | | - |
|---|
| 142 | | - /* Do not allow page faults while the new ib_umem hasn't seen a state |
|---|
| 143 | | - * with zero notifiers yet, and doesn't have its own valid set of |
|---|
| 144 | | - * private counters. */ |
|---|
| 145 | | - if (!item->odp_data->mn_counters_active) |
|---|
| 146 | | - return 1; |
|---|
| 147 | | - |
|---|
| 148 | | - if (unlikely(item->odp_data->notifiers_count)) |
|---|
| 149 | | - return 1; |
|---|
| 150 | | - if (item->odp_data->notifiers_seq != mmu_seq) |
|---|
| 151 | | - return 1; |
|---|
| 152 | | - return 0; |
|---|
| 153 | | -} |
|---|
| 154 | 101 | |
|---|
| 155 | 102 | #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ |
|---|
| 156 | 103 | |
|---|
| 157 | | -static inline int ib_umem_odp_get(struct ib_ucontext *context, |
|---|
| 158 | | - struct ib_umem *umem, |
|---|
| 159 | | - int access) |
|---|
| 160 | | -{ |
|---|
| 161 | | - return -EINVAL; |
|---|
| 162 | | -} |
|---|
| 163 | | - |
|---|
| 164 | | -static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, |
|---|
| 165 | | - unsigned long addr, |
|---|
| 166 | | - size_t size) |
|---|
| 104 | +static inline struct ib_umem_odp * |
|---|
| 105 | +ib_umem_odp_get(struct ib_device *device, unsigned long addr, size_t size, |
|---|
| 106 | + int access, const struct mmu_interval_notifier_ops *ops) |
|---|
| 167 | 107 | { |
|---|
| 168 | 108 | return ERR_PTR(-EINVAL); |
|---|
| 169 | 109 | } |
|---|
| 170 | 110 | |
|---|
| 171 | | -static inline void ib_umem_odp_release(struct ib_umem *umem) {} |
|---|
| 111 | +static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {} |
|---|
| 172 | 112 | |
|---|
| 173 | 113 | #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ |
|---|
| 174 | 114 | |
|---|