| .. | .. |
|---|
| 37 | 37 | #include <linux/kernel.h> |
|---|
| 38 | 38 | #include <linux/slab.h> |
|---|
| 39 | 39 | #include <linux/init.h> |
|---|
| 40 | | -#include <linux/mutex.h> |
|---|
| 41 | 40 | #include <linux/netdevice.h> |
|---|
| 41 | +#include <net/net_namespace.h> |
|---|
| 42 | 42 | #include <linux/security.h> |
|---|
| 43 | 43 | #include <linux/notifier.h> |
|---|
| 44 | +#include <linux/hashtable.h> |
|---|
| 44 | 45 | #include <rdma/rdma_netlink.h> |
|---|
| 45 | 46 | #include <rdma/ib_addr.h> |
|---|
| 46 | 47 | #include <rdma/ib_cache.h> |
|---|
| 48 | +#include <rdma/rdma_counter.h> |
|---|
| 47 | 49 | |
|---|
| 48 | 50 | #include "core_priv.h" |
|---|
| 51 | +#include "restrack.h" |
|---|
| 49 | 52 | |
|---|
| 50 | 53 | MODULE_AUTHOR("Roland Dreier"); |
|---|
| 51 | 54 | MODULE_DESCRIPTION("core kernel InfiniBand API"); |
|---|
| 52 | 55 | MODULE_LICENSE("Dual BSD/GPL"); |
|---|
| 53 | | - |
|---|
| 54 | | -struct ib_client_data { |
|---|
| 55 | | - struct list_head list; |
|---|
| 56 | | - struct ib_client *client; |
|---|
| 57 | | - void * data; |
|---|
| 58 | | - /* The device or client is going down. Do not call client or device |
|---|
| 59 | | - * callbacks other than remove(). */ |
|---|
| 60 | | - bool going_down; |
|---|
| 61 | | -}; |
|---|
| 62 | 56 | |
|---|
| 63 | 57 | struct workqueue_struct *ib_comp_wq; |
|---|
| 64 | 58 | struct workqueue_struct *ib_comp_unbound_wq; |
|---|
| 65 | 59 | struct workqueue_struct *ib_wq; |
|---|
| 66 | 60 | EXPORT_SYMBOL_GPL(ib_wq); |
|---|
| 67 | 61 | |
|---|
| 68 | | -/* The device_list and client_list contain devices and clients after their |
|---|
| 69 | | - * registration has completed, and the devices and clients are removed |
|---|
| 70 | | - * during unregistration. */ |
|---|
| 71 | | -static LIST_HEAD(device_list); |
|---|
| 72 | | -static LIST_HEAD(client_list); |
|---|
| 62 | +/* |
|---|
| 63 | + * Each of the three rwsem locks (devices, clients, client_data) protects the |
|---|
| 64 | + * xarray of the same name. Specifically it allows the caller to assert that |
|---|
| 65 | + * the MARK will/will not be changing under the lock, and for devices and |
|---|
| 66 | + * clients, that the value in the xarray is still a valid pointer. Change of |
|---|
| 67 | + * the MARK is linked to the object state, so holding the lock and testing the |
|---|
| 68 | + * MARK also asserts that the contained object is in a certain state. |
|---|
| 69 | + * |
|---|
| 70 | + * This is used to build a two stage register/unregister flow where objects |
|---|
| 71 | + * can continue to be in the xarray even though they are still in progress to |
|---|
| 72 | + * register/unregister. |
|---|
| 73 | + * |
|---|
| 74 | + * The xarray itself provides additional locking, and restartable iteration, |
|---|
| 75 | + * which is also relied on. |
|---|
| 76 | + * |
|---|
| 77 | + * Locks should not be nested, with the exception of client_data, which is |
|---|
| 78 | + * allowed to nest under the read side of the other two locks. |
|---|
| 79 | + * |
|---|
| 80 | + * The devices_rwsem also protects the device name list, any change or |
|---|
| 81 | + * assignment of device name must also hold the write side to guarantee unique |
|---|
| 82 | + * names. |
|---|
| 83 | + */ |
|---|
| 73 | 84 | |
|---|
| 74 | 85 | /* |
|---|
| 75 | | - * device_mutex and lists_rwsem protect access to both device_list and |
|---|
| 76 | | - * client_list. device_mutex protects writer access by device and client |
|---|
| 77 | | - * registration / de-registration. lists_rwsem protects reader access to |
|---|
| 78 | | - * these lists. Iterators of these lists must lock it for read, while updates |
|---|
| 79 | | - * to the lists must be done with a write lock. A special case is when the |
|---|
| 80 | | - * device_mutex is locked. In this case locking the lists for read access is |
|---|
| 81 | | - * not necessary as the device_mutex implies it. |
|---|
| 86 | + * devices contains devices that have had their names assigned. The |
|---|
| 87 | + * devices may not be registered. Users that care about the registration |
|---|
| 88 | + * status need to call ib_device_try_get() on the device to ensure it is |
|---|
| 89 | + * registered, and keep it registered, for the required duration. |
|---|
| 82 | 90 | * |
|---|
| 83 | | - * lists_rwsem also protects access to the client data list. |
|---|
| 84 | 91 | */ |
|---|
| 85 | | -static DEFINE_MUTEX(device_mutex); |
|---|
| 86 | | -static DECLARE_RWSEM(lists_rwsem); |
|---|
| 92 | +static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); |
|---|
| 93 | +static DECLARE_RWSEM(devices_rwsem); |
|---|
| 94 | +#define DEVICE_REGISTERED XA_MARK_1 |
|---|
| 87 | 95 | |
|---|
| 96 | +static u32 highest_client_id; |
|---|
| 97 | +#define CLIENT_REGISTERED XA_MARK_1 |
|---|
| 98 | +static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); |
|---|
| 99 | +static DECLARE_RWSEM(clients_rwsem); |
|---|
| 100 | + |
|---|
| 101 | +static void ib_client_put(struct ib_client *client) |
|---|
| 102 | +{ |
|---|
| 103 | + if (refcount_dec_and_test(&client->uses)) |
|---|
| 104 | + complete(&client->uses_zero); |
|---|
| 105 | +} |
|---|
| 106 | + |
|---|
| 107 | +/* |
|---|
| 108 | + * If client_data is registered then the corresponding client must also still |
|---|
| 109 | + * be registered. |
|---|
| 110 | + */ |
|---|
| 111 | +#define CLIENT_DATA_REGISTERED XA_MARK_1 |
|---|
| 112 | + |
|---|
| 113 | +unsigned int rdma_dev_net_id; |
|---|
| 114 | + |
|---|
| 115 | +/* |
|---|
| 116 | + * A list of net namespaces is maintained in an xarray. This is necessary |
|---|
| 117 | + * because we can't get the locking right using the existing net ns list. We |
|---|
| 118 | + * would require a init_net callback after the list is updated. |
|---|
| 119 | + */ |
|---|
| 120 | +static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC); |
|---|
| 121 | +/* |
|---|
| 122 | + * rwsem to protect accessing the rdma_nets xarray entries. |
|---|
| 123 | + */ |
|---|
| 124 | +static DECLARE_RWSEM(rdma_nets_rwsem); |
|---|
| 125 | + |
|---|
| 126 | +bool ib_devices_shared_netns = true; |
|---|
| 127 | +module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); |
|---|
| 128 | +MODULE_PARM_DESC(netns_mode, |
|---|
| 129 | + "Share device among net namespaces; default=1 (shared)"); |
|---|
| 130 | +/** |
|---|
| 131 | + * rdma_dev_access_netns() - Return whether an rdma device can be accessed |
|---|
| 132 | + * from a specified net namespace or not. |
|---|
| 133 | + * @dev: Pointer to rdma device which needs to be checked |
|---|
| 134 | + * @net: Pointer to net namesapce for which access to be checked |
|---|
| 135 | + * |
|---|
| 136 | + * When the rdma device is in shared mode, it ignores the net namespace. |
|---|
| 137 | + * When the rdma device is exclusive to a net namespace, rdma device net |
|---|
| 138 | + * namespace is checked against the specified one. |
|---|
| 139 | + */ |
|---|
| 140 | +bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) |
|---|
| 141 | +{ |
|---|
| 142 | + return (ib_devices_shared_netns || |
|---|
| 143 | + net_eq(read_pnet(&dev->coredev.rdma_net), net)); |
|---|
| 144 | +} |
|---|
| 145 | +EXPORT_SYMBOL(rdma_dev_access_netns); |
|---|
| 146 | + |
|---|
| 147 | +/* |
|---|
| 148 | + * xarray has this behavior where it won't iterate over NULL values stored in |
|---|
| 149 | + * allocated arrays. So we need our own iterator to see all values stored in |
|---|
| 150 | + * the array. This does the same thing as xa_for_each except that it also |
|---|
| 151 | + * returns NULL valued entries if the array is allocating. Simplified to only |
|---|
| 152 | + * work on simple xarrays. |
|---|
| 153 | + */ |
|---|
| 154 | +static void *xan_find_marked(struct xarray *xa, unsigned long *indexp, |
|---|
| 155 | + xa_mark_t filter) |
|---|
| 156 | +{ |
|---|
| 157 | + XA_STATE(xas, xa, *indexp); |
|---|
| 158 | + void *entry; |
|---|
| 159 | + |
|---|
| 160 | + rcu_read_lock(); |
|---|
| 161 | + do { |
|---|
| 162 | + entry = xas_find_marked(&xas, ULONG_MAX, filter); |
|---|
| 163 | + if (xa_is_zero(entry)) |
|---|
| 164 | + break; |
|---|
| 165 | + } while (xas_retry(&xas, entry)); |
|---|
| 166 | + rcu_read_unlock(); |
|---|
| 167 | + |
|---|
| 168 | + if (entry) { |
|---|
| 169 | + *indexp = xas.xa_index; |
|---|
| 170 | + if (xa_is_zero(entry)) |
|---|
| 171 | + return NULL; |
|---|
| 172 | + return entry; |
|---|
| 173 | + } |
|---|
| 174 | + return XA_ERROR(-ENOENT); |
|---|
| 175 | +} |
|---|
| 176 | +#define xan_for_each_marked(xa, index, entry, filter) \ |
|---|
| 177 | + for (index = 0, entry = xan_find_marked(xa, &(index), filter); \ |
|---|
| 178 | + !xa_is_err(entry); \ |
|---|
| 179 | + (index)++, entry = xan_find_marked(xa, &(index), filter)) |
|---|
| 180 | + |
|---|
| 181 | +/* RCU hash table mapping netdevice pointers to struct ib_port_data */ |
|---|
| 182 | +static DEFINE_SPINLOCK(ndev_hash_lock); |
|---|
| 183 | +static DECLARE_HASHTABLE(ndev_hash, 5); |
|---|
| 184 | + |
|---|
| 185 | +static void free_netdevs(struct ib_device *ib_dev); |
|---|
| 186 | +static void ib_unregister_work(struct work_struct *work); |
|---|
| 187 | +static void __ib_unregister_device(struct ib_device *device); |
|---|
| 88 | 188 | static int ib_security_change(struct notifier_block *nb, unsigned long event, |
|---|
| 89 | 189 | void *lsm_data); |
|---|
| 90 | 190 | static void ib_policy_change_task(struct work_struct *work); |
|---|
| 91 | 191 | static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task); |
|---|
| 92 | 192 | |
|---|
| 193 | +static void __ibdev_printk(const char *level, const struct ib_device *ibdev, |
|---|
| 194 | + struct va_format *vaf) |
|---|
| 195 | +{ |
|---|
| 196 | + if (ibdev && ibdev->dev.parent) |
|---|
| 197 | + dev_printk_emit(level[1] - '0', |
|---|
| 198 | + ibdev->dev.parent, |
|---|
| 199 | + "%s %s %s: %pV", |
|---|
| 200 | + dev_driver_string(ibdev->dev.parent), |
|---|
| 201 | + dev_name(ibdev->dev.parent), |
|---|
| 202 | + dev_name(&ibdev->dev), |
|---|
| 203 | + vaf); |
|---|
| 204 | + else if (ibdev) |
|---|
| 205 | + printk("%s%s: %pV", |
|---|
| 206 | + level, dev_name(&ibdev->dev), vaf); |
|---|
| 207 | + else |
|---|
| 208 | + printk("%s(NULL ib_device): %pV", level, vaf); |
|---|
| 209 | +} |
|---|
| 210 | + |
|---|
| 211 | +void ibdev_printk(const char *level, const struct ib_device *ibdev, |
|---|
| 212 | + const char *format, ...) |
|---|
| 213 | +{ |
|---|
| 214 | + struct va_format vaf; |
|---|
| 215 | + va_list args; |
|---|
| 216 | + |
|---|
| 217 | + va_start(args, format); |
|---|
| 218 | + |
|---|
| 219 | + vaf.fmt = format; |
|---|
| 220 | + vaf.va = &args; |
|---|
| 221 | + |
|---|
| 222 | + __ibdev_printk(level, ibdev, &vaf); |
|---|
| 223 | + |
|---|
| 224 | + va_end(args); |
|---|
| 225 | +} |
|---|
| 226 | +EXPORT_SYMBOL(ibdev_printk); |
|---|
| 227 | + |
|---|
| 228 | +#define define_ibdev_printk_level(func, level) \ |
|---|
| 229 | +void func(const struct ib_device *ibdev, const char *fmt, ...) \ |
|---|
| 230 | +{ \ |
|---|
| 231 | + struct va_format vaf; \ |
|---|
| 232 | + va_list args; \ |
|---|
| 233 | + \ |
|---|
| 234 | + va_start(args, fmt); \ |
|---|
| 235 | + \ |
|---|
| 236 | + vaf.fmt = fmt; \ |
|---|
| 237 | + vaf.va = &args; \ |
|---|
| 238 | + \ |
|---|
| 239 | + __ibdev_printk(level, ibdev, &vaf); \ |
|---|
| 240 | + \ |
|---|
| 241 | + va_end(args); \ |
|---|
| 242 | +} \ |
|---|
| 243 | +EXPORT_SYMBOL(func); |
|---|
| 244 | + |
|---|
| 245 | +define_ibdev_printk_level(ibdev_emerg, KERN_EMERG); |
|---|
| 246 | +define_ibdev_printk_level(ibdev_alert, KERN_ALERT); |
|---|
| 247 | +define_ibdev_printk_level(ibdev_crit, KERN_CRIT); |
|---|
| 248 | +define_ibdev_printk_level(ibdev_err, KERN_ERR); |
|---|
| 249 | +define_ibdev_printk_level(ibdev_warn, KERN_WARNING); |
|---|
| 250 | +define_ibdev_printk_level(ibdev_notice, KERN_NOTICE); |
|---|
| 251 | +define_ibdev_printk_level(ibdev_info, KERN_INFO); |
|---|
| 252 | + |
|---|
| 93 | 253 | static struct notifier_block ibdev_lsm_nb = { |
|---|
| 94 | 254 | .notifier_call = ib_security_change, |
|---|
| 95 | 255 | }; |
|---|
| 96 | 256 | |
|---|
| 97 | | -static int ib_device_check_mandatory(struct ib_device *device) |
|---|
| 257 | +static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, |
|---|
| 258 | + struct net *net); |
|---|
| 259 | + |
|---|
| 260 | +/* Pointer to the RCU head at the start of the ib_port_data array */ |
|---|
| 261 | +struct ib_port_data_rcu { |
|---|
| 262 | + struct rcu_head rcu_head; |
|---|
| 263 | + struct ib_port_data pdata[]; |
|---|
| 264 | +}; |
|---|
| 265 | + |
|---|
| 266 | +static void ib_device_check_mandatory(struct ib_device *device) |
|---|
| 98 | 267 | { |
|---|
| 99 | | -#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x } |
|---|
| 268 | +#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x } |
|---|
| 100 | 269 | static const struct { |
|---|
| 101 | 270 | size_t offset; |
|---|
| 102 | 271 | char *name; |
|---|
| 103 | 272 | } mandatory_table[] = { |
|---|
| 104 | 273 | IB_MANDATORY_FUNC(query_device), |
|---|
| 105 | 274 | IB_MANDATORY_FUNC(query_port), |
|---|
| 106 | | - IB_MANDATORY_FUNC(query_pkey), |
|---|
| 107 | 275 | IB_MANDATORY_FUNC(alloc_pd), |
|---|
| 108 | 276 | IB_MANDATORY_FUNC(dealloc_pd), |
|---|
| 109 | 277 | IB_MANDATORY_FUNC(create_qp), |
|---|
| .. | .. |
|---|
| 121 | 289 | }; |
|---|
| 122 | 290 | int i; |
|---|
| 123 | 291 | |
|---|
| 292 | + device->kverbs_provider = true; |
|---|
| 124 | 293 | for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { |
|---|
| 125 | | - if (!*(void **) ((void *) device + mandatory_table[i].offset)) { |
|---|
| 126 | | - pr_warn("Device %s is missing mandatory function %s\n", |
|---|
| 127 | | - device->name, mandatory_table[i].name); |
|---|
| 128 | | - return -EINVAL; |
|---|
| 294 | + if (!*(void **) ((void *) &device->ops + |
|---|
| 295 | + mandatory_table[i].offset)) { |
|---|
| 296 | + device->kverbs_provider = false; |
|---|
| 297 | + break; |
|---|
| 129 | 298 | } |
|---|
| 130 | 299 | } |
|---|
| 131 | | - |
|---|
| 132 | | - return 0; |
|---|
| 133 | | -} |
|---|
| 134 | | - |
|---|
| 135 | | -static struct ib_device *__ib_device_get_by_index(u32 index) |
|---|
| 136 | | -{ |
|---|
| 137 | | - struct ib_device *device; |
|---|
| 138 | | - |
|---|
| 139 | | - list_for_each_entry(device, &device_list, core_list) |
|---|
| 140 | | - if (device->index == index) |
|---|
| 141 | | - return device; |
|---|
| 142 | | - |
|---|
| 143 | | - return NULL; |
|---|
| 144 | 300 | } |
|---|
| 145 | 301 | |
|---|
| 146 | 302 | /* |
|---|
| 147 | | - * Caller is responsible to return refrerence count by calling put_device() |
|---|
| 303 | + * Caller must perform ib_device_put() to return the device reference count |
|---|
| 304 | + * when ib_device_get_by_index() returns valid device pointer. |
|---|
| 148 | 305 | */ |
|---|
| 149 | | -struct ib_device *ib_device_get_by_index(u32 index) |
|---|
| 306 | +struct ib_device *ib_device_get_by_index(const struct net *net, u32 index) |
|---|
| 150 | 307 | { |
|---|
| 151 | 308 | struct ib_device *device; |
|---|
| 152 | 309 | |
|---|
| 153 | | - down_read(&lists_rwsem); |
|---|
| 154 | | - device = __ib_device_get_by_index(index); |
|---|
| 155 | | - if (device) |
|---|
| 156 | | - get_device(&device->dev); |
|---|
| 310 | + down_read(&devices_rwsem); |
|---|
| 311 | + device = xa_load(&devices, index); |
|---|
| 312 | + if (device) { |
|---|
| 313 | + if (!rdma_dev_access_netns(device, net)) { |
|---|
| 314 | + device = NULL; |
|---|
| 315 | + goto out; |
|---|
| 316 | + } |
|---|
| 157 | 317 | |
|---|
| 158 | | - up_read(&lists_rwsem); |
|---|
| 318 | + if (!ib_device_try_get(device)) |
|---|
| 319 | + device = NULL; |
|---|
| 320 | + } |
|---|
| 321 | +out: |
|---|
| 322 | + up_read(&devices_rwsem); |
|---|
| 159 | 323 | return device; |
|---|
| 160 | 324 | } |
|---|
| 325 | + |
|---|
| 326 | +/** |
|---|
| 327 | + * ib_device_put - Release IB device reference |
|---|
| 328 | + * @device: device whose reference to be released |
|---|
| 329 | + * |
|---|
| 330 | + * ib_device_put() releases reference to the IB device to allow it to be |
|---|
| 331 | + * unregistered and eventually free. |
|---|
| 332 | + */ |
|---|
| 333 | +void ib_device_put(struct ib_device *device) |
|---|
| 334 | +{ |
|---|
| 335 | + if (refcount_dec_and_test(&device->refcount)) |
|---|
| 336 | + complete(&device->unreg_completion); |
|---|
| 337 | +} |
|---|
| 338 | +EXPORT_SYMBOL(ib_device_put); |
|---|
| 161 | 339 | |
|---|
| 162 | 340 | static struct ib_device *__ib_device_get_by_name(const char *name) |
|---|
| 163 | 341 | { |
|---|
| 164 | 342 | struct ib_device *device; |
|---|
| 343 | + unsigned long index; |
|---|
| 165 | 344 | |
|---|
| 166 | | - list_for_each_entry(device, &device_list, core_list) |
|---|
| 167 | | - if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX)) |
|---|
| 345 | + xa_for_each (&devices, index, device) |
|---|
| 346 | + if (!strcmp(name, dev_name(&device->dev))) |
|---|
| 168 | 347 | return device; |
|---|
| 169 | 348 | |
|---|
| 170 | 349 | return NULL; |
|---|
| 171 | 350 | } |
|---|
| 172 | 351 | |
|---|
| 173 | | -static int alloc_name(char *name) |
|---|
| 352 | +/** |
|---|
| 353 | + * ib_device_get_by_name - Find an IB device by name |
|---|
| 354 | + * @name: The name to look for |
|---|
| 355 | + * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all) |
|---|
| 356 | + * |
|---|
| 357 | + * Find and hold an ib_device by its name. The caller must call |
|---|
| 358 | + * ib_device_put() on the returned pointer. |
|---|
| 359 | + */ |
|---|
| 360 | +struct ib_device *ib_device_get_by_name(const char *name, |
|---|
| 361 | + enum rdma_driver_id driver_id) |
|---|
| 174 | 362 | { |
|---|
| 175 | | - unsigned long *inuse; |
|---|
| 176 | | - char buf[IB_DEVICE_NAME_MAX]; |
|---|
| 177 | 363 | struct ib_device *device; |
|---|
| 178 | | - int i; |
|---|
| 179 | 364 | |
|---|
| 180 | | - inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL); |
|---|
| 181 | | - if (!inuse) |
|---|
| 182 | | - return -ENOMEM; |
|---|
| 365 | + down_read(&devices_rwsem); |
|---|
| 366 | + device = __ib_device_get_by_name(name); |
|---|
| 367 | + if (device && driver_id != RDMA_DRIVER_UNKNOWN && |
|---|
| 368 | + device->ops.driver_id != driver_id) |
|---|
| 369 | + device = NULL; |
|---|
| 183 | 370 | |
|---|
| 184 | | - list_for_each_entry(device, &device_list, core_list) { |
|---|
| 185 | | - if (!sscanf(device->name, name, &i)) |
|---|
| 186 | | - continue; |
|---|
| 187 | | - if (i < 0 || i >= PAGE_SIZE * 8) |
|---|
| 188 | | - continue; |
|---|
| 189 | | - snprintf(buf, sizeof buf, name, i); |
|---|
| 190 | | - if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX)) |
|---|
| 191 | | - set_bit(i, inuse); |
|---|
| 371 | + if (device) { |
|---|
| 372 | + if (!ib_device_try_get(device)) |
|---|
| 373 | + device = NULL; |
|---|
| 374 | + } |
|---|
| 375 | + up_read(&devices_rwsem); |
|---|
| 376 | + return device; |
|---|
| 377 | +} |
|---|
| 378 | +EXPORT_SYMBOL(ib_device_get_by_name); |
|---|
| 379 | + |
|---|
| 380 | +static int rename_compat_devs(struct ib_device *device) |
|---|
| 381 | +{ |
|---|
| 382 | + struct ib_core_device *cdev; |
|---|
| 383 | + unsigned long index; |
|---|
| 384 | + int ret = 0; |
|---|
| 385 | + |
|---|
| 386 | + mutex_lock(&device->compat_devs_mutex); |
|---|
| 387 | + xa_for_each (&device->compat_devs, index, cdev) { |
|---|
| 388 | + ret = device_rename(&cdev->dev, dev_name(&device->dev)); |
|---|
| 389 | + if (ret) { |
|---|
| 390 | + dev_warn(&cdev->dev, |
|---|
| 391 | + "Fail to rename compatdev to new name %s\n", |
|---|
| 392 | + dev_name(&device->dev)); |
|---|
| 393 | + break; |
|---|
| 394 | + } |
|---|
| 395 | + } |
|---|
| 396 | + mutex_unlock(&device->compat_devs_mutex); |
|---|
| 397 | + return ret; |
|---|
| 398 | +} |
|---|
| 399 | + |
|---|
| 400 | +int ib_device_rename(struct ib_device *ibdev, const char *name) |
|---|
| 401 | +{ |
|---|
| 402 | + unsigned long index; |
|---|
| 403 | + void *client_data; |
|---|
| 404 | + int ret; |
|---|
| 405 | + |
|---|
| 406 | + down_write(&devices_rwsem); |
|---|
| 407 | + if (!strcmp(name, dev_name(&ibdev->dev))) { |
|---|
| 408 | + up_write(&devices_rwsem); |
|---|
| 409 | + return 0; |
|---|
| 192 | 410 | } |
|---|
| 193 | 411 | |
|---|
| 194 | | - i = find_first_zero_bit(inuse, PAGE_SIZE * 8); |
|---|
| 195 | | - free_page((unsigned long) inuse); |
|---|
| 196 | | - snprintf(buf, sizeof buf, name, i); |
|---|
| 412 | + if (__ib_device_get_by_name(name)) { |
|---|
| 413 | + up_write(&devices_rwsem); |
|---|
| 414 | + return -EEXIST; |
|---|
| 415 | + } |
|---|
| 197 | 416 | |
|---|
| 198 | | - if (__ib_device_get_by_name(buf)) |
|---|
| 199 | | - return -ENFILE; |
|---|
| 417 | + ret = device_rename(&ibdev->dev, name); |
|---|
| 418 | + if (ret) { |
|---|
| 419 | + up_write(&devices_rwsem); |
|---|
| 420 | + return ret; |
|---|
| 421 | + } |
|---|
| 200 | 422 | |
|---|
| 201 | | - strlcpy(name, buf, IB_DEVICE_NAME_MAX); |
|---|
| 423 | + strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); |
|---|
| 424 | + ret = rename_compat_devs(ibdev); |
|---|
| 425 | + |
|---|
| 426 | + downgrade_write(&devices_rwsem); |
|---|
| 427 | + down_read(&ibdev->client_data_rwsem); |
|---|
| 428 | + xan_for_each_marked(&ibdev->client_data, index, client_data, |
|---|
| 429 | + CLIENT_DATA_REGISTERED) { |
|---|
| 430 | + struct ib_client *client = xa_load(&clients, index); |
|---|
| 431 | + |
|---|
| 432 | + if (!client || !client->rename) |
|---|
| 433 | + continue; |
|---|
| 434 | + |
|---|
| 435 | + client->rename(ibdev, client_data); |
|---|
| 436 | + } |
|---|
| 437 | + up_read(&ibdev->client_data_rwsem); |
|---|
| 438 | + up_read(&devices_rwsem); |
|---|
| 202 | 439 | return 0; |
|---|
| 440 | +} |
|---|
| 441 | + |
|---|
| 442 | +int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim) |
|---|
| 443 | +{ |
|---|
| 444 | + if (use_dim > 1) |
|---|
| 445 | + return -EINVAL; |
|---|
| 446 | + ibdev->use_cq_dim = use_dim; |
|---|
| 447 | + |
|---|
| 448 | + return 0; |
|---|
| 449 | +} |
|---|
| 450 | + |
|---|
| 451 | +static int alloc_name(struct ib_device *ibdev, const char *name) |
|---|
| 452 | +{ |
|---|
| 453 | + struct ib_device *device; |
|---|
| 454 | + unsigned long index; |
|---|
| 455 | + struct ida inuse; |
|---|
| 456 | + int rc; |
|---|
| 457 | + int i; |
|---|
| 458 | + |
|---|
| 459 | + lockdep_assert_held_write(&devices_rwsem); |
|---|
| 460 | + ida_init(&inuse); |
|---|
| 461 | + xa_for_each (&devices, index, device) { |
|---|
| 462 | + char buf[IB_DEVICE_NAME_MAX]; |
|---|
| 463 | + |
|---|
| 464 | + if (sscanf(dev_name(&device->dev), name, &i) != 1) |
|---|
| 465 | + continue; |
|---|
| 466 | + if (i < 0 || i >= INT_MAX) |
|---|
| 467 | + continue; |
|---|
| 468 | + snprintf(buf, sizeof buf, name, i); |
|---|
| 469 | + if (strcmp(buf, dev_name(&device->dev)) != 0) |
|---|
| 470 | + continue; |
|---|
| 471 | + |
|---|
| 472 | + rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL); |
|---|
| 473 | + if (rc < 0) |
|---|
| 474 | + goto out; |
|---|
| 475 | + } |
|---|
| 476 | + |
|---|
| 477 | + rc = ida_alloc(&inuse, GFP_KERNEL); |
|---|
| 478 | + if (rc < 0) |
|---|
| 479 | + goto out; |
|---|
| 480 | + |
|---|
| 481 | + rc = dev_set_name(&ibdev->dev, name, rc); |
|---|
| 482 | +out: |
|---|
| 483 | + ida_destroy(&inuse); |
|---|
| 484 | + return rc; |
|---|
| 203 | 485 | } |
|---|
| 204 | 486 | |
|---|
| 205 | 487 | static void ib_device_release(struct device *device) |
|---|
| 206 | 488 | { |
|---|
| 207 | 489 | struct ib_device *dev = container_of(device, struct ib_device, dev); |
|---|
| 208 | 490 | |
|---|
| 209 | | - WARN_ON(dev->reg_state == IB_DEV_REGISTERED); |
|---|
| 210 | | - if (dev->reg_state == IB_DEV_UNREGISTERED) { |
|---|
| 211 | | - /* |
|---|
| 212 | | - * In IB_DEV_UNINITIALIZED state, cache or port table |
|---|
| 213 | | - * is not even created. Free cache and port table only when |
|---|
| 214 | | - * device reaches UNREGISTERED state. |
|---|
| 215 | | - */ |
|---|
| 491 | + free_netdevs(dev); |
|---|
| 492 | + WARN_ON(refcount_read(&dev->refcount)); |
|---|
| 493 | + if (dev->port_data) { |
|---|
| 216 | 494 | ib_cache_release_one(dev); |
|---|
| 217 | | - kfree(dev->port_immutable); |
|---|
| 495 | + ib_security_release_port_pkey_list(dev); |
|---|
| 496 | + rdma_counter_release(dev); |
|---|
| 497 | + kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu, |
|---|
| 498 | + pdata[0]), |
|---|
| 499 | + rcu_head); |
|---|
| 218 | 500 | } |
|---|
| 219 | | - kfree(dev); |
|---|
| 501 | + |
|---|
| 502 | + mutex_destroy(&dev->unregistration_lock); |
|---|
| 503 | + mutex_destroy(&dev->compat_devs_mutex); |
|---|
| 504 | + |
|---|
| 505 | + xa_destroy(&dev->compat_devs); |
|---|
| 506 | + xa_destroy(&dev->client_data); |
|---|
| 507 | + kfree_rcu(dev, rcu_head); |
|---|
| 220 | 508 | } |
|---|
| 221 | 509 | |
|---|
| 222 | 510 | static int ib_device_uevent(struct device *device, |
|---|
| 223 | 511 | struct kobj_uevent_env *env) |
|---|
| 224 | 512 | { |
|---|
| 225 | | - struct ib_device *dev = container_of(device, struct ib_device, dev); |
|---|
| 226 | | - |
|---|
| 227 | | - if (add_uevent_var(env, "NAME=%s", dev->name)) |
|---|
| 513 | + if (add_uevent_var(env, "NAME=%s", dev_name(device))) |
|---|
| 228 | 514 | return -ENOMEM; |
|---|
| 229 | 515 | |
|---|
| 230 | 516 | /* |
|---|
| .. | .. |
|---|
| 234 | 520 | return 0; |
|---|
| 235 | 521 | } |
|---|
| 236 | 522 | |
|---|
| 523 | +static const void *net_namespace(struct device *d) |
|---|
| 524 | +{ |
|---|
| 525 | + struct ib_core_device *coredev = |
|---|
| 526 | + container_of(d, struct ib_core_device, dev); |
|---|
| 527 | + |
|---|
| 528 | + return read_pnet(&coredev->rdma_net); |
|---|
| 529 | +} |
|---|
| 530 | + |
|---|
| 237 | 531 | static struct class ib_class = { |
|---|
| 238 | 532 | .name = "infiniband", |
|---|
| 239 | 533 | .dev_release = ib_device_release, |
|---|
| 240 | 534 | .dev_uevent = ib_device_uevent, |
|---|
| 535 | + .ns_type = &net_ns_type_operations, |
|---|
| 536 | + .namespace = net_namespace, |
|---|
| 241 | 537 | }; |
|---|
| 242 | 538 | |
|---|
| 539 | +static void rdma_init_coredev(struct ib_core_device *coredev, |
|---|
| 540 | + struct ib_device *dev, struct net *net) |
|---|
| 541 | +{ |
|---|
| 542 | + /* This BUILD_BUG_ON is intended to catch layout change |
|---|
| 543 | + * of union of ib_core_device and device. |
|---|
| 544 | + * dev must be the first element as ib_core and providers |
|---|
| 545 | + * driver uses it. Adding anything in ib_core_device before |
|---|
| 546 | + * device will break this assumption. |
|---|
| 547 | + */ |
|---|
| 548 | + BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) != |
|---|
| 549 | + offsetof(struct ib_device, dev)); |
|---|
| 550 | + |
|---|
| 551 | + coredev->dev.class = &ib_class; |
|---|
| 552 | + coredev->dev.groups = dev->groups; |
|---|
| 553 | + device_initialize(&coredev->dev); |
|---|
| 554 | + coredev->owner = dev; |
|---|
| 555 | + INIT_LIST_HEAD(&coredev->port_list); |
|---|
| 556 | + write_pnet(&coredev->rdma_net, net); |
|---|
| 557 | +} |
|---|
| 558 | + |
|---|
| 243 | 559 | /** |
|---|
| 244 | | - * ib_alloc_device - allocate an IB device struct |
|---|
| 560 | + * _ib_alloc_device - allocate an IB device struct |
|---|
| 245 | 561 | * @size:size of structure to allocate |
|---|
| 246 | 562 | * |
|---|
| 247 | 563 | * Low-level drivers should use ib_alloc_device() to allocate &struct |
|---|
| .. | .. |
|---|
| 250 | 566 | * ib_dealloc_device() must be used to free structures allocated with |
|---|
| 251 | 567 | * ib_alloc_device(). |
|---|
| 252 | 568 | */ |
|---|
| 253 | | -struct ib_device *ib_alloc_device(size_t size) |
|---|
| 569 | +struct ib_device *_ib_alloc_device(size_t size) |
|---|
| 254 | 570 | { |
|---|
| 255 | 571 | struct ib_device *device; |
|---|
| 256 | 572 | |
|---|
| .. | .. |
|---|
| 261 | 577 | if (!device) |
|---|
| 262 | 578 | return NULL; |
|---|
| 263 | 579 | |
|---|
| 264 | | - rdma_restrack_init(&device->res); |
|---|
| 580 | + if (rdma_restrack_init(device)) { |
|---|
| 581 | + kfree(device); |
|---|
| 582 | + return NULL; |
|---|
| 583 | + } |
|---|
| 265 | 584 | |
|---|
| 266 | | - device->dev.class = &ib_class; |
|---|
| 267 | | - device_initialize(&device->dev); |
|---|
| 268 | | - |
|---|
| 269 | | - dev_set_drvdata(&device->dev, device); |
|---|
| 585 | + device->groups[0] = &ib_dev_attr_group; |
|---|
| 586 | + rdma_init_coredev(&device->coredev, device, &init_net); |
|---|
| 270 | 587 | |
|---|
| 271 | 588 | INIT_LIST_HEAD(&device->event_handler_list); |
|---|
| 272 | | - spin_lock_init(&device->event_handler_lock); |
|---|
| 273 | | - spin_lock_init(&device->client_data_lock); |
|---|
| 274 | | - INIT_LIST_HEAD(&device->client_data_list); |
|---|
| 275 | | - INIT_LIST_HEAD(&device->port_list); |
|---|
| 589 | + spin_lock_init(&device->qp_open_list_lock); |
|---|
| 590 | + init_rwsem(&device->event_handler_rwsem); |
|---|
| 591 | + mutex_init(&device->unregistration_lock); |
|---|
| 592 | + /* |
|---|
| 593 | + * client_data needs to be alloc because we don't want our mark to be |
|---|
| 594 | + * destroyed if the user stores NULL in the client data. |
|---|
| 595 | + */ |
|---|
| 596 | + xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); |
|---|
| 597 | + init_rwsem(&device->client_data_rwsem); |
|---|
| 598 | + xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC); |
|---|
| 599 | + mutex_init(&device->compat_devs_mutex); |
|---|
| 600 | + init_completion(&device->unreg_completion); |
|---|
| 601 | + INIT_WORK(&device->unregistration_work, ib_unregister_work); |
|---|
| 602 | + |
|---|
| 603 | + device->uverbs_ex_cmd_mask = |
|---|
| 604 | + BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_FLOW) | |
|---|
| 605 | + BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | |
|---|
| 606 | + BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_WQ) | |
|---|
| 607 | + BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | |
|---|
| 608 | + BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL) | |
|---|
| 609 | + BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_WQ) | |
|---|
| 610 | + BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_CQ) | |
|---|
| 611 | + BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_WQ) | |
|---|
| 612 | + BIT_ULL(IB_USER_VERBS_EX_CMD_QUERY_DEVICE); |
|---|
| 276 | 613 | |
|---|
| 277 | 614 | return device; |
|---|
| 278 | 615 | } |
|---|
| 279 | | -EXPORT_SYMBOL(ib_alloc_device); |
|---|
| 616 | +EXPORT_SYMBOL(_ib_alloc_device); |
|---|
| 280 | 617 | |
|---|
| 281 | 618 | /** |
|---|
| 282 | 619 | * ib_dealloc_device - free an IB device struct |
|---|
| .. | .. |
|---|
| 286 | 623 | */ |
|---|
| 287 | 624 | void ib_dealloc_device(struct ib_device *device) |
|---|
| 288 | 625 | { |
|---|
| 289 | | - WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && |
|---|
| 290 | | - device->reg_state != IB_DEV_UNINITIALIZED); |
|---|
| 291 | | - rdma_restrack_clean(&device->res); |
|---|
| 626 | + if (device->ops.dealloc_driver) |
|---|
| 627 | + device->ops.dealloc_driver(device); |
|---|
| 628 | + |
|---|
| 629 | + /* |
|---|
| 630 | + * ib_unregister_driver() requires all devices to remain in the xarray |
|---|
| 631 | + * while their ops are callable. The last op we call is dealloc_driver |
|---|
| 632 | + * above. This is needed to create a fence on op callbacks prior to |
|---|
| 633 | + * allowing the driver module to unload. |
|---|
| 634 | + */ |
|---|
| 635 | + down_write(&devices_rwsem); |
|---|
| 636 | + if (xa_load(&devices, device->index) == device) |
|---|
| 637 | + xa_erase(&devices, device->index); |
|---|
| 638 | + up_write(&devices_rwsem); |
|---|
| 639 | + |
|---|
| 640 | + /* Expedite releasing netdev references */ |
|---|
| 641 | + free_netdevs(device); |
|---|
| 642 | + |
|---|
| 643 | + WARN_ON(!xa_empty(&device->compat_devs)); |
|---|
| 644 | + WARN_ON(!xa_empty(&device->client_data)); |
|---|
| 645 | + WARN_ON(refcount_read(&device->refcount)); |
|---|
| 646 | + rdma_restrack_clean(device); |
|---|
| 647 | + /* Balances with device_initialize */ |
|---|
| 292 | 648 | put_device(&device->dev); |
|---|
| 293 | 649 | } |
|---|
| 294 | 650 | EXPORT_SYMBOL(ib_dealloc_device); |
|---|
| 295 | 651 | |
|---|
| 296 | | -static int add_client_context(struct ib_device *device, struct ib_client *client) |
|---|
| 652 | +/* |
|---|
| 653 | + * add_client_context() and remove_client_context() must be safe against |
|---|
| 654 | + * parallel calls on the same device - registration/unregistration of both the |
|---|
| 655 | + * device and client can be occurring in parallel. |
|---|
| 656 | + * |
|---|
| 657 | + * The routines need to be a fence, any caller must not return until the add |
|---|
| 658 | + * or remove is fully completed. |
|---|
| 659 | + */ |
|---|
| 660 | +static int add_client_context(struct ib_device *device, |
|---|
| 661 | + struct ib_client *client) |
|---|
| 297 | 662 | { |
|---|
| 298 | | - struct ib_client_data *context; |
|---|
| 299 | | - unsigned long flags; |
|---|
| 663 | + int ret = 0; |
|---|
| 300 | 664 | |
|---|
| 301 | | - context = kmalloc(sizeof *context, GFP_KERNEL); |
|---|
| 302 | | - if (!context) |
|---|
| 665 | + if (!device->kverbs_provider && !client->no_kverbs_req) |
|---|
| 666 | + return 0; |
|---|
| 667 | + |
|---|
| 668 | + down_write(&device->client_data_rwsem); |
|---|
| 669 | + /* |
|---|
| 670 | + * So long as the client is registered hold both the client and device |
|---|
| 671 | + * unregistration locks. |
|---|
| 672 | + */ |
|---|
| 673 | + if (!refcount_inc_not_zero(&client->uses)) |
|---|
| 674 | + goto out_unlock; |
|---|
| 675 | + refcount_inc(&device->refcount); |
|---|
| 676 | + |
|---|
| 677 | + /* |
|---|
| 678 | + * Another caller to add_client_context got here first and has already |
|---|
| 679 | + * completely initialized context. |
|---|
| 680 | + */ |
|---|
| 681 | + if (xa_get_mark(&device->client_data, client->client_id, |
|---|
| 682 | + CLIENT_DATA_REGISTERED)) |
|---|
| 683 | + goto out; |
|---|
| 684 | + |
|---|
| 685 | + ret = xa_err(xa_store(&device->client_data, client->client_id, NULL, |
|---|
| 686 | + GFP_KERNEL)); |
|---|
| 687 | + if (ret) |
|---|
| 688 | + goto out; |
|---|
| 689 | + downgrade_write(&device->client_data_rwsem); |
|---|
| 690 | + if (client->add) { |
|---|
| 691 | + if (client->add(device)) { |
|---|
| 692 | + /* |
|---|
| 693 | + * If a client fails to add then the error code is |
|---|
| 694 | + * ignored, but we won't call any more ops on this |
|---|
| 695 | + * client. |
|---|
| 696 | + */ |
|---|
| 697 | + xa_erase(&device->client_data, client->client_id); |
|---|
| 698 | + up_read(&device->client_data_rwsem); |
|---|
| 699 | + ib_device_put(device); |
|---|
| 700 | + ib_client_put(client); |
|---|
| 701 | + return 0; |
|---|
| 702 | + } |
|---|
| 703 | + } |
|---|
| 704 | + |
|---|
| 705 | + /* Readers shall not see a client until add has been completed */ |
|---|
| 706 | + xa_set_mark(&device->client_data, client->client_id, |
|---|
| 707 | + CLIENT_DATA_REGISTERED); |
|---|
| 708 | + up_read(&device->client_data_rwsem); |
|---|
| 709 | + return 0; |
|---|
| 710 | + |
|---|
| 711 | +out: |
|---|
| 712 | + ib_device_put(device); |
|---|
| 713 | + ib_client_put(client); |
|---|
| 714 | +out_unlock: |
|---|
| 715 | + up_write(&device->client_data_rwsem); |
|---|
| 716 | + return ret; |
|---|
| 717 | +} |
|---|
| 718 | + |
|---|
| 719 | +static void remove_client_context(struct ib_device *device, |
|---|
| 720 | + unsigned int client_id) |
|---|
| 721 | +{ |
|---|
| 722 | + struct ib_client *client; |
|---|
| 723 | + void *client_data; |
|---|
| 724 | + |
|---|
| 725 | + down_write(&device->client_data_rwsem); |
|---|
| 726 | + if (!xa_get_mark(&device->client_data, client_id, |
|---|
| 727 | + CLIENT_DATA_REGISTERED)) { |
|---|
| 728 | + up_write(&device->client_data_rwsem); |
|---|
| 729 | + return; |
|---|
| 730 | + } |
|---|
| 731 | + client_data = xa_load(&device->client_data, client_id); |
|---|
| 732 | + xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED); |
|---|
| 733 | + client = xa_load(&clients, client_id); |
|---|
| 734 | + up_write(&device->client_data_rwsem); |
|---|
| 735 | + |
|---|
| 736 | + /* |
|---|
| 737 | + * Notice we cannot be holding any exclusive locks when calling the |
|---|
| 738 | + * remove callback as the remove callback can recurse back into any |
|---|
| 739 | + * public functions in this module and thus try for any locks those |
|---|
| 740 | + * functions take. |
|---|
| 741 | + * |
|---|
| 742 | + * For this reason clients and drivers should not call the |
|---|
| 743 | + * unregistration functions will holdling any locks. |
|---|
| 744 | + */ |
|---|
| 745 | + if (client->remove) |
|---|
| 746 | + client->remove(device, client_data); |
|---|
| 747 | + |
|---|
| 748 | + xa_erase(&device->client_data, client_id); |
|---|
| 749 | + ib_device_put(device); |
|---|
| 750 | + ib_client_put(client); |
|---|
| 751 | +} |
|---|
| 752 | + |
|---|
| 753 | +static int alloc_port_data(struct ib_device *device) |
|---|
| 754 | +{ |
|---|
| 755 | + struct ib_port_data_rcu *pdata_rcu; |
|---|
| 756 | + unsigned int port; |
|---|
| 757 | + |
|---|
| 758 | + if (device->port_data) |
|---|
| 759 | + return 0; |
|---|
| 760 | + |
|---|
| 761 | + /* This can only be called once the physical port range is defined */ |
|---|
| 762 | + if (WARN_ON(!device->phys_port_cnt)) |
|---|
| 763 | + return -EINVAL; |
|---|
| 764 | + |
|---|
| 765 | + /* |
|---|
| 766 | + * device->port_data is indexed directly by the port number to make |
|---|
| 767 | + * access to this data as efficient as possible. |
|---|
| 768 | + * |
|---|
| 769 | + * Therefore port_data is declared as a 1 based array with potential |
|---|
| 770 | + * empty slots at the beginning. |
|---|
| 771 | + */ |
|---|
| 772 | + pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata, |
|---|
| 773 | + rdma_end_port(device) + 1), |
|---|
| 774 | + GFP_KERNEL); |
|---|
| 775 | + if (!pdata_rcu) |
|---|
| 303 | 776 | return -ENOMEM; |
|---|
| 777 | + /* |
|---|
| 778 | + * The rcu_head is put in front of the port data array and the stored |
|---|
| 779 | + * pointer is adjusted since we never need to see that member until |
|---|
| 780 | + * kfree_rcu. |
|---|
| 781 | + */ |
|---|
| 782 | + device->port_data = pdata_rcu->pdata; |
|---|
| 304 | 783 | |
|---|
| 305 | | - context->client = client; |
|---|
| 306 | | - context->data = NULL; |
|---|
| 307 | | - context->going_down = false; |
|---|
| 784 | + rdma_for_each_port (device, port) { |
|---|
| 785 | + struct ib_port_data *pdata = &device->port_data[port]; |
|---|
| 308 | 786 | |
|---|
| 309 | | - down_write(&lists_rwsem); |
|---|
| 310 | | - spin_lock_irqsave(&device->client_data_lock, flags); |
|---|
| 311 | | - list_add(&context->list, &device->client_data_list); |
|---|
| 312 | | - spin_unlock_irqrestore(&device->client_data_lock, flags); |
|---|
| 313 | | - up_write(&lists_rwsem); |
|---|
| 314 | | - |
|---|
| 787 | + pdata->ib_dev = device; |
|---|
| 788 | + spin_lock_init(&pdata->pkey_list_lock); |
|---|
| 789 | + INIT_LIST_HEAD(&pdata->pkey_list); |
|---|
| 790 | + spin_lock_init(&pdata->netdev_lock); |
|---|
| 791 | + INIT_HLIST_NODE(&pdata->ndev_hash_link); |
|---|
| 792 | + } |
|---|
| 315 | 793 | return 0; |
|---|
| 316 | 794 | } |
|---|
| 317 | 795 | |
|---|
| .. | .. |
|---|
| 321 | 799 | rdma_max_mad_size(dev, port) != 0); |
|---|
| 322 | 800 | } |
|---|
| 323 | 801 | |
|---|
| 324 | | -static int read_port_immutable(struct ib_device *device) |
|---|
| 802 | +static int setup_port_data(struct ib_device *device) |
|---|
| 325 | 803 | { |
|---|
| 804 | + unsigned int port; |
|---|
| 326 | 805 | int ret; |
|---|
| 327 | | - u8 start_port = rdma_start_port(device); |
|---|
| 328 | | - u8 end_port = rdma_end_port(device); |
|---|
| 329 | | - u8 port; |
|---|
| 330 | 806 | |
|---|
| 331 | | - /** |
|---|
| 332 | | - * device->port_immutable is indexed directly by the port number to make |
|---|
| 333 | | - * access to this data as efficient as possible. |
|---|
| 334 | | - * |
|---|
| 335 | | - * Therefore port_immutable is declared as a 1 based array with |
|---|
| 336 | | - * potential empty slots at the beginning. |
|---|
| 337 | | - */ |
|---|
| 338 | | - device->port_immutable = kcalloc(end_port + 1, |
|---|
| 339 | | - sizeof(*device->port_immutable), |
|---|
| 340 | | - GFP_KERNEL); |
|---|
| 341 | | - if (!device->port_immutable) |
|---|
| 342 | | - return -ENOMEM; |
|---|
| 807 | + ret = alloc_port_data(device); |
|---|
| 808 | + if (ret) |
|---|
| 809 | + return ret; |
|---|
| 343 | 810 | |
|---|
| 344 | | - for (port = start_port; port <= end_port; ++port) { |
|---|
| 345 | | - ret = device->get_port_immutable(device, port, |
|---|
| 346 | | - &device->port_immutable[port]); |
|---|
| 811 | + rdma_for_each_port (device, port) { |
|---|
| 812 | + struct ib_port_data *pdata = &device->port_data[port]; |
|---|
| 813 | + |
|---|
| 814 | + ret = device->ops.get_port_immutable(device, port, |
|---|
| 815 | + &pdata->immutable); |
|---|
| 347 | 816 | if (ret) |
|---|
| 348 | 817 | return ret; |
|---|
| 349 | 818 | |
|---|
| .. | .. |
|---|
| 355 | 824 | |
|---|
| 356 | 825 | void ib_get_device_fw_str(struct ib_device *dev, char *str) |
|---|
| 357 | 826 | { |
|---|
| 358 | | - if (dev->get_dev_fw_str) |
|---|
| 359 | | - dev->get_dev_fw_str(dev, str); |
|---|
| 827 | + if (dev->ops.get_dev_fw_str) |
|---|
| 828 | + dev->ops.get_dev_fw_str(dev, str); |
|---|
| 360 | 829 | else |
|---|
| 361 | 830 | str[0] = '\0'; |
|---|
| 362 | 831 | } |
|---|
| 363 | 832 | EXPORT_SYMBOL(ib_get_device_fw_str); |
|---|
| 364 | 833 | |
|---|
| 365 | | -static int setup_port_pkey_list(struct ib_device *device) |
|---|
| 366 | | -{ |
|---|
| 367 | | - int i; |
|---|
| 368 | | - |
|---|
| 369 | | - /** |
|---|
| 370 | | - * device->port_pkey_list is indexed directly by the port number, |
|---|
| 371 | | - * Therefore it is declared as a 1 based array with potential empty |
|---|
| 372 | | - * slots at the beginning. |
|---|
| 373 | | - */ |
|---|
| 374 | | - device->port_pkey_list = kcalloc(rdma_end_port(device) + 1, |
|---|
| 375 | | - sizeof(*device->port_pkey_list), |
|---|
| 376 | | - GFP_KERNEL); |
|---|
| 377 | | - |
|---|
| 378 | | - if (!device->port_pkey_list) |
|---|
| 379 | | - return -ENOMEM; |
|---|
| 380 | | - |
|---|
| 381 | | - for (i = 0; i < (rdma_end_port(device) + 1); i++) { |
|---|
| 382 | | - spin_lock_init(&device->port_pkey_list[i].list_lock); |
|---|
| 383 | | - INIT_LIST_HEAD(&device->port_pkey_list[i].pkey_list); |
|---|
| 384 | | - } |
|---|
| 385 | | - |
|---|
| 386 | | - return 0; |
|---|
| 387 | | -} |
|---|
| 388 | | - |
|---|
| 389 | 834 | static void ib_policy_change_task(struct work_struct *work) |
|---|
| 390 | 835 | { |
|---|
| 391 | 836 | struct ib_device *dev; |
|---|
| 837 | + unsigned long index; |
|---|
| 392 | 838 | |
|---|
| 393 | | - down_read(&lists_rwsem); |
|---|
| 394 | | - list_for_each_entry(dev, &device_list, core_list) { |
|---|
| 395 | | - int i; |
|---|
| 839 | + down_read(&devices_rwsem); |
|---|
| 840 | + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { |
|---|
| 841 | + unsigned int i; |
|---|
| 396 | 842 | |
|---|
| 397 | | - for (i = rdma_start_port(dev); i <= rdma_end_port(dev); i++) { |
|---|
| 843 | + rdma_for_each_port (dev, i) { |
|---|
| 398 | 844 | u64 sp; |
|---|
| 399 | 845 | int ret = ib_get_cached_subnet_prefix(dev, |
|---|
| 400 | 846 | i, |
|---|
| .. | .. |
|---|
| 407 | 853 | ib_security_cache_change(dev, i, sp); |
|---|
| 408 | 854 | } |
|---|
| 409 | 855 | } |
|---|
| 410 | | - up_read(&lists_rwsem); |
|---|
| 856 | + up_read(&devices_rwsem); |
|---|
| 411 | 857 | } |
|---|
| 412 | 858 | |
|---|
| 413 | 859 | static int ib_security_change(struct notifier_block *nb, unsigned long event, |
|---|
| .. | .. |
|---|
| 417 | 863 | return NOTIFY_DONE; |
|---|
| 418 | 864 | |
|---|
| 419 | 865 | schedule_work(&ib_policy_change_work); |
|---|
| 866 | + ib_mad_agent_security_change(); |
|---|
| 420 | 867 | |
|---|
| 421 | 868 | return NOTIFY_OK; |
|---|
| 422 | 869 | } |
|---|
| 423 | 870 | |
|---|
| 424 | | -/** |
|---|
| 425 | | - * __dev_new_index - allocate an device index |
|---|
| 426 | | - * |
|---|
| 427 | | - * Returns a suitable unique value for a new device interface |
|---|
| 428 | | - * number. It assumes that there are less than 2^32-1 ib devices |
|---|
| 429 | | - * will be present in the system. |
|---|
| 430 | | - */ |
|---|
| 431 | | -static u32 __dev_new_index(void) |
|---|
| 871 | +static void compatdev_release(struct device *dev) |
|---|
| 432 | 872 | { |
|---|
| 873 | + struct ib_core_device *cdev = |
|---|
| 874 | + container_of(dev, struct ib_core_device, dev); |
|---|
| 875 | + |
|---|
| 876 | + kfree(cdev); |
|---|
| 877 | +} |
|---|
| 878 | + |
|---|
| 879 | +static int add_one_compat_dev(struct ib_device *device, |
|---|
| 880 | + struct rdma_dev_net *rnet) |
|---|
| 881 | +{ |
|---|
| 882 | + struct ib_core_device *cdev; |
|---|
| 883 | + int ret; |
|---|
| 884 | + |
|---|
| 885 | + lockdep_assert_held(&rdma_nets_rwsem); |
|---|
| 886 | + if (!ib_devices_shared_netns) |
|---|
| 887 | + return 0; |
|---|
| 888 | + |
|---|
| 433 | 889 | /* |
|---|
| 434 | | - * The device index to allow stable naming. |
|---|
| 435 | | - * Similar to struct net -> ifindex. |
|---|
| 890 | + * Create and add compat device in all namespaces other than where it |
|---|
| 891 | + * is currently bound to. |
|---|
| 436 | 892 | */ |
|---|
| 437 | | - static u32 index; |
|---|
| 893 | + if (net_eq(read_pnet(&rnet->net), |
|---|
| 894 | + read_pnet(&device->coredev.rdma_net))) |
|---|
| 895 | + return 0; |
|---|
| 438 | 896 | |
|---|
| 439 | | - for (;;) { |
|---|
| 440 | | - if (!(++index)) |
|---|
| 441 | | - index = 1; |
|---|
| 442 | | - |
|---|
| 443 | | - if (!__ib_device_get_by_index(index)) |
|---|
| 444 | | - return index; |
|---|
| 897 | + /* |
|---|
| 898 | + * The first of init_net() or ib_register_device() to take the |
|---|
| 899 | + * compat_devs_mutex wins and gets to add the device. Others will wait |
|---|
| 900 | + * for completion here. |
|---|
| 901 | + */ |
|---|
| 902 | + mutex_lock(&device->compat_devs_mutex); |
|---|
| 903 | + cdev = xa_load(&device->compat_devs, rnet->id); |
|---|
| 904 | + if (cdev) { |
|---|
| 905 | + ret = 0; |
|---|
| 906 | + goto done; |
|---|
| 445 | 907 | } |
|---|
| 908 | + ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL); |
|---|
| 909 | + if (ret) |
|---|
| 910 | + goto done; |
|---|
| 911 | + |
|---|
| 912 | + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); |
|---|
| 913 | + if (!cdev) { |
|---|
| 914 | + ret = -ENOMEM; |
|---|
| 915 | + goto cdev_err; |
|---|
| 916 | + } |
|---|
| 917 | + |
|---|
| 918 | + cdev->dev.parent = device->dev.parent; |
|---|
| 919 | + rdma_init_coredev(cdev, device, read_pnet(&rnet->net)); |
|---|
| 920 | + cdev->dev.release = compatdev_release; |
|---|
| 921 | + ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); |
|---|
| 922 | + if (ret) |
|---|
| 923 | + goto add_err; |
|---|
| 924 | + |
|---|
| 925 | + ret = device_add(&cdev->dev); |
|---|
| 926 | + if (ret) |
|---|
| 927 | + goto add_err; |
|---|
| 928 | + ret = ib_setup_port_attrs(cdev); |
|---|
| 929 | + if (ret) |
|---|
| 930 | + goto port_err; |
|---|
| 931 | + |
|---|
| 932 | + ret = xa_err(xa_store(&device->compat_devs, rnet->id, |
|---|
| 933 | + cdev, GFP_KERNEL)); |
|---|
| 934 | + if (ret) |
|---|
| 935 | + goto insert_err; |
|---|
| 936 | + |
|---|
| 937 | + mutex_unlock(&device->compat_devs_mutex); |
|---|
| 938 | + return 0; |
|---|
| 939 | + |
|---|
| 940 | +insert_err: |
|---|
| 941 | + ib_free_port_attrs(cdev); |
|---|
| 942 | +port_err: |
|---|
| 943 | + device_del(&cdev->dev); |
|---|
| 944 | +add_err: |
|---|
| 945 | + put_device(&cdev->dev); |
|---|
| 946 | +cdev_err: |
|---|
| 947 | + xa_release(&device->compat_devs, rnet->id); |
|---|
| 948 | +done: |
|---|
| 949 | + mutex_unlock(&device->compat_devs_mutex); |
|---|
| 950 | + return ret; |
|---|
| 951 | +} |
|---|
| 952 | + |
|---|
| 953 | +static void remove_one_compat_dev(struct ib_device *device, u32 id) |
|---|
| 954 | +{ |
|---|
| 955 | + struct ib_core_device *cdev; |
|---|
| 956 | + |
|---|
| 957 | + mutex_lock(&device->compat_devs_mutex); |
|---|
| 958 | + cdev = xa_erase(&device->compat_devs, id); |
|---|
| 959 | + mutex_unlock(&device->compat_devs_mutex); |
|---|
| 960 | + if (cdev) { |
|---|
| 961 | + ib_free_port_attrs(cdev); |
|---|
| 962 | + device_del(&cdev->dev); |
|---|
| 963 | + put_device(&cdev->dev); |
|---|
| 964 | + } |
|---|
| 965 | +} |
|---|
| 966 | + |
|---|
| 967 | +static void remove_compat_devs(struct ib_device *device) |
|---|
| 968 | +{ |
|---|
| 969 | + struct ib_core_device *cdev; |
|---|
| 970 | + unsigned long index; |
|---|
| 971 | + |
|---|
| 972 | + xa_for_each (&device->compat_devs, index, cdev) |
|---|
| 973 | + remove_one_compat_dev(device, index); |
|---|
| 974 | +} |
|---|
| 975 | + |
|---|
| 976 | +static int add_compat_devs(struct ib_device *device) |
|---|
| 977 | +{ |
|---|
| 978 | + struct rdma_dev_net *rnet; |
|---|
| 979 | + unsigned long index; |
|---|
| 980 | + int ret = 0; |
|---|
| 981 | + |
|---|
| 982 | + lockdep_assert_held(&devices_rwsem); |
|---|
| 983 | + |
|---|
| 984 | + down_read(&rdma_nets_rwsem); |
|---|
| 985 | + xa_for_each (&rdma_nets, index, rnet) { |
|---|
| 986 | + ret = add_one_compat_dev(device, rnet); |
|---|
| 987 | + if (ret) |
|---|
| 988 | + break; |
|---|
| 989 | + } |
|---|
| 990 | + up_read(&rdma_nets_rwsem); |
|---|
| 991 | + return ret; |
|---|
| 992 | +} |
|---|
| 993 | + |
|---|
| 994 | +static void remove_all_compat_devs(void) |
|---|
| 995 | +{ |
|---|
| 996 | + struct ib_compat_device *cdev; |
|---|
| 997 | + struct ib_device *dev; |
|---|
| 998 | + unsigned long index; |
|---|
| 999 | + |
|---|
| 1000 | + down_read(&devices_rwsem); |
|---|
| 1001 | + xa_for_each (&devices, index, dev) { |
|---|
| 1002 | + unsigned long c_index = 0; |
|---|
| 1003 | + |
|---|
| 1004 | + /* Hold nets_rwsem so that any other thread modifying this |
|---|
| 1005 | + * system param can sync with this thread. |
|---|
| 1006 | + */ |
|---|
| 1007 | + down_read(&rdma_nets_rwsem); |
|---|
| 1008 | + xa_for_each (&dev->compat_devs, c_index, cdev) |
|---|
| 1009 | + remove_one_compat_dev(dev, c_index); |
|---|
| 1010 | + up_read(&rdma_nets_rwsem); |
|---|
| 1011 | + } |
|---|
| 1012 | + up_read(&devices_rwsem); |
|---|
| 1013 | +} |
|---|
| 1014 | + |
|---|
| 1015 | +static int add_all_compat_devs(void) |
|---|
| 1016 | +{ |
|---|
| 1017 | + struct rdma_dev_net *rnet; |
|---|
| 1018 | + struct ib_device *dev; |
|---|
| 1019 | + unsigned long index; |
|---|
| 1020 | + int ret = 0; |
|---|
| 1021 | + |
|---|
| 1022 | + down_read(&devices_rwsem); |
|---|
| 1023 | + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { |
|---|
| 1024 | + unsigned long net_index = 0; |
|---|
| 1025 | + |
|---|
| 1026 | + /* Hold nets_rwsem so that any other thread modifying this |
|---|
| 1027 | + * system param can sync with this thread. |
|---|
| 1028 | + */ |
|---|
| 1029 | + down_read(&rdma_nets_rwsem); |
|---|
| 1030 | + xa_for_each (&rdma_nets, net_index, rnet) { |
|---|
| 1031 | + ret = add_one_compat_dev(dev, rnet); |
|---|
| 1032 | + if (ret) |
|---|
| 1033 | + break; |
|---|
| 1034 | + } |
|---|
| 1035 | + up_read(&rdma_nets_rwsem); |
|---|
| 1036 | + } |
|---|
| 1037 | + up_read(&devices_rwsem); |
|---|
| 1038 | + if (ret) |
|---|
| 1039 | + remove_all_compat_devs(); |
|---|
| 1040 | + return ret; |
|---|
| 1041 | +} |
|---|
| 1042 | + |
|---|
| 1043 | +int rdma_compatdev_set(u8 enable) |
|---|
| 1044 | +{ |
|---|
| 1045 | + struct rdma_dev_net *rnet; |
|---|
| 1046 | + unsigned long index; |
|---|
| 1047 | + int ret = 0; |
|---|
| 1048 | + |
|---|
| 1049 | + down_write(&rdma_nets_rwsem); |
|---|
| 1050 | + if (ib_devices_shared_netns == enable) { |
|---|
| 1051 | + up_write(&rdma_nets_rwsem); |
|---|
| 1052 | + return 0; |
|---|
| 1053 | + } |
|---|
| 1054 | + |
|---|
| 1055 | + /* enable/disable of compat devices is not supported |
|---|
| 1056 | + * when more than default init_net exists. |
|---|
| 1057 | + */ |
|---|
| 1058 | + xa_for_each (&rdma_nets, index, rnet) { |
|---|
| 1059 | + ret++; |
|---|
| 1060 | + break; |
|---|
| 1061 | + } |
|---|
| 1062 | + if (!ret) |
|---|
| 1063 | + ib_devices_shared_netns = enable; |
|---|
| 1064 | + up_write(&rdma_nets_rwsem); |
|---|
| 1065 | + if (ret) |
|---|
| 1066 | + return -EBUSY; |
|---|
| 1067 | + |
|---|
| 1068 | + if (enable) |
|---|
| 1069 | + ret = add_all_compat_devs(); |
|---|
| 1070 | + else |
|---|
| 1071 | + remove_all_compat_devs(); |
|---|
| 1072 | + return ret; |
|---|
| 1073 | +} |
|---|
| 1074 | + |
|---|
| 1075 | +static void rdma_dev_exit_net(struct net *net) |
|---|
| 1076 | +{ |
|---|
| 1077 | + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); |
|---|
| 1078 | + struct ib_device *dev; |
|---|
| 1079 | + unsigned long index; |
|---|
| 1080 | + int ret; |
|---|
| 1081 | + |
|---|
| 1082 | + down_write(&rdma_nets_rwsem); |
|---|
| 1083 | + /* |
|---|
| 1084 | + * Prevent the ID from being re-used and hide the id from xa_for_each. |
|---|
| 1085 | + */ |
|---|
| 1086 | + ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL)); |
|---|
| 1087 | + WARN_ON(ret); |
|---|
| 1088 | + up_write(&rdma_nets_rwsem); |
|---|
| 1089 | + |
|---|
| 1090 | + down_read(&devices_rwsem); |
|---|
| 1091 | + xa_for_each (&devices, index, dev) { |
|---|
| 1092 | + get_device(&dev->dev); |
|---|
| 1093 | + /* |
|---|
| 1094 | + * Release the devices_rwsem so that pontentially blocking |
|---|
| 1095 | + * device_del, doesn't hold the devices_rwsem for too long. |
|---|
| 1096 | + */ |
|---|
| 1097 | + up_read(&devices_rwsem); |
|---|
| 1098 | + |
|---|
| 1099 | + remove_one_compat_dev(dev, rnet->id); |
|---|
| 1100 | + |
|---|
| 1101 | + /* |
|---|
| 1102 | + * If the real device is in the NS then move it back to init. |
|---|
| 1103 | + */ |
|---|
| 1104 | + rdma_dev_change_netns(dev, net, &init_net); |
|---|
| 1105 | + |
|---|
| 1106 | + put_device(&dev->dev); |
|---|
| 1107 | + down_read(&devices_rwsem); |
|---|
| 1108 | + } |
|---|
| 1109 | + up_read(&devices_rwsem); |
|---|
| 1110 | + |
|---|
| 1111 | + rdma_nl_net_exit(rnet); |
|---|
| 1112 | + xa_erase(&rdma_nets, rnet->id); |
|---|
| 1113 | +} |
|---|
| 1114 | + |
|---|
| 1115 | +static __net_init int rdma_dev_init_net(struct net *net) |
|---|
| 1116 | +{ |
|---|
| 1117 | + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); |
|---|
| 1118 | + unsigned long index; |
|---|
| 1119 | + struct ib_device *dev; |
|---|
| 1120 | + int ret; |
|---|
| 1121 | + |
|---|
| 1122 | + write_pnet(&rnet->net, net); |
|---|
| 1123 | + |
|---|
| 1124 | + ret = rdma_nl_net_init(rnet); |
|---|
| 1125 | + if (ret) |
|---|
| 1126 | + return ret; |
|---|
| 1127 | + |
|---|
| 1128 | + /* No need to create any compat devices in default init_net. */ |
|---|
| 1129 | + if (net_eq(net, &init_net)) |
|---|
| 1130 | + return 0; |
|---|
| 1131 | + |
|---|
| 1132 | + ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL); |
|---|
| 1133 | + if (ret) { |
|---|
| 1134 | + rdma_nl_net_exit(rnet); |
|---|
| 1135 | + return ret; |
|---|
| 1136 | + } |
|---|
| 1137 | + |
|---|
| 1138 | + down_read(&devices_rwsem); |
|---|
| 1139 | + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { |
|---|
| 1140 | + /* Hold nets_rwsem so that netlink command cannot change |
|---|
| 1141 | + * system configuration for device sharing mode. |
|---|
| 1142 | + */ |
|---|
| 1143 | + down_read(&rdma_nets_rwsem); |
|---|
| 1144 | + ret = add_one_compat_dev(dev, rnet); |
|---|
| 1145 | + up_read(&rdma_nets_rwsem); |
|---|
| 1146 | + if (ret) |
|---|
| 1147 | + break; |
|---|
| 1148 | + } |
|---|
| 1149 | + up_read(&devices_rwsem); |
|---|
| 1150 | + |
|---|
| 1151 | + if (ret) |
|---|
| 1152 | + rdma_dev_exit_net(net); |
|---|
| 1153 | + |
|---|
| 1154 | + return ret; |
|---|
| 1155 | +} |
|---|
| 1156 | + |
|---|
| 1157 | +/* |
|---|
| 1158 | + * Assign the unique string device name and the unique device index. This is |
|---|
| 1159 | + * undone by ib_dealloc_device. |
|---|
| 1160 | + */ |
|---|
| 1161 | +static int assign_name(struct ib_device *device, const char *name) |
|---|
| 1162 | +{ |
|---|
| 1163 | + static u32 last_id; |
|---|
| 1164 | + int ret; |
|---|
| 1165 | + |
|---|
| 1166 | + down_write(&devices_rwsem); |
|---|
| 1167 | + /* Assign a unique name to the device */ |
|---|
| 1168 | + if (strchr(name, '%')) |
|---|
| 1169 | + ret = alloc_name(device, name); |
|---|
| 1170 | + else |
|---|
| 1171 | + ret = dev_set_name(&device->dev, name); |
|---|
| 1172 | + if (ret) |
|---|
| 1173 | + goto out; |
|---|
| 1174 | + |
|---|
| 1175 | + if (__ib_device_get_by_name(dev_name(&device->dev))) { |
|---|
| 1176 | + ret = -ENFILE; |
|---|
| 1177 | + goto out; |
|---|
| 1178 | + } |
|---|
| 1179 | + strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); |
|---|
| 1180 | + |
|---|
| 1181 | + ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b, |
|---|
| 1182 | + &last_id, GFP_KERNEL); |
|---|
| 1183 | + if (ret > 0) |
|---|
| 1184 | + ret = 0; |
|---|
| 1185 | + |
|---|
| 1186 | +out: |
|---|
| 1187 | + up_write(&devices_rwsem); |
|---|
| 1188 | + return ret; |
|---|
| 1189 | +} |
|---|
| 1190 | + |
|---|
| 1191 | +/* |
|---|
| 1192 | + * setup_device() allocates memory and sets up data that requires calling the |
|---|
| 1193 | + * device ops, this is the only reason these actions are not done during |
|---|
| 1194 | + * ib_alloc_device. It is undone by ib_dealloc_device(). |
|---|
| 1195 | + */ |
|---|
| 1196 | +static int setup_device(struct ib_device *device) |
|---|
| 1197 | +{ |
|---|
| 1198 | + struct ib_udata uhw = {.outlen = 0, .inlen = 0}; |
|---|
| 1199 | + int ret; |
|---|
| 1200 | + |
|---|
| 1201 | + ib_device_check_mandatory(device); |
|---|
| 1202 | + |
|---|
| 1203 | + ret = setup_port_data(device); |
|---|
| 1204 | + if (ret) { |
|---|
| 1205 | + dev_warn(&device->dev, "Couldn't create per-port data\n"); |
|---|
| 1206 | + return ret; |
|---|
| 1207 | + } |
|---|
| 1208 | + |
|---|
| 1209 | + memset(&device->attrs, 0, sizeof(device->attrs)); |
|---|
| 1210 | + ret = device->ops.query_device(device, &device->attrs, &uhw); |
|---|
| 1211 | + if (ret) { |
|---|
| 1212 | + dev_warn(&device->dev, |
|---|
| 1213 | + "Couldn't query the device attributes\n"); |
|---|
| 1214 | + return ret; |
|---|
| 1215 | + } |
|---|
| 1216 | + |
|---|
| 1217 | + return 0; |
|---|
| 1218 | +} |
|---|
| 1219 | + |
|---|
| 1220 | +static void disable_device(struct ib_device *device) |
|---|
| 1221 | +{ |
|---|
| 1222 | + u32 cid; |
|---|
| 1223 | + |
|---|
| 1224 | + WARN_ON(!refcount_read(&device->refcount)); |
|---|
| 1225 | + |
|---|
| 1226 | + down_write(&devices_rwsem); |
|---|
| 1227 | + xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); |
|---|
| 1228 | + up_write(&devices_rwsem); |
|---|
| 1229 | + |
|---|
| 1230 | + /* |
|---|
| 1231 | + * Remove clients in LIFO order, see assign_client_id. This could be |
|---|
| 1232 | + * more efficient if xarray learns to reverse iterate. Since no new |
|---|
| 1233 | + * clients can be added to this ib_device past this point we only need |
|---|
| 1234 | + * the maximum possible client_id value here. |
|---|
| 1235 | + */ |
|---|
| 1236 | + down_read(&clients_rwsem); |
|---|
| 1237 | + cid = highest_client_id; |
|---|
| 1238 | + up_read(&clients_rwsem); |
|---|
| 1239 | + while (cid) { |
|---|
| 1240 | + cid--; |
|---|
| 1241 | + remove_client_context(device, cid); |
|---|
| 1242 | + } |
|---|
| 1243 | + |
|---|
| 1244 | + ib_cq_pool_destroy(device); |
|---|
| 1245 | + |
|---|
| 1246 | + /* Pairs with refcount_set in enable_device */ |
|---|
| 1247 | + ib_device_put(device); |
|---|
| 1248 | + wait_for_completion(&device->unreg_completion); |
|---|
| 1249 | + |
|---|
| 1250 | + /* |
|---|
| 1251 | + * compat devices must be removed after device refcount drops to zero. |
|---|
| 1252 | + * Otherwise init_net() may add more compatdevs after removing compat |
|---|
| 1253 | + * devices and before device is disabled. |
|---|
| 1254 | + */ |
|---|
| 1255 | + remove_compat_devs(device); |
|---|
| 1256 | +} |
|---|
| 1257 | + |
|---|
| 1258 | +/* |
|---|
| 1259 | + * An enabled device is visible to all clients and to all the public facing |
|---|
| 1260 | + * APIs that return a device pointer. This always returns with a new get, even |
|---|
| 1261 | + * if it fails. |
|---|
| 1262 | + */ |
|---|
| 1263 | +static int enable_device_and_get(struct ib_device *device) |
|---|
| 1264 | +{ |
|---|
| 1265 | + struct ib_client *client; |
|---|
| 1266 | + unsigned long index; |
|---|
| 1267 | + int ret = 0; |
|---|
| 1268 | + |
|---|
| 1269 | + /* |
|---|
| 1270 | + * One ref belongs to the xa and the other belongs to this |
|---|
| 1271 | + * thread. This is needed to guard against parallel unregistration. |
|---|
| 1272 | + */ |
|---|
| 1273 | + refcount_set(&device->refcount, 2); |
|---|
| 1274 | + down_write(&devices_rwsem); |
|---|
| 1275 | + xa_set_mark(&devices, device->index, DEVICE_REGISTERED); |
|---|
| 1276 | + |
|---|
| 1277 | + /* |
|---|
| 1278 | + * By using downgrade_write() we ensure that no other thread can clear |
|---|
| 1279 | + * DEVICE_REGISTERED while we are completing the client setup. |
|---|
| 1280 | + */ |
|---|
| 1281 | + downgrade_write(&devices_rwsem); |
|---|
| 1282 | + |
|---|
| 1283 | + if (device->ops.enable_driver) { |
|---|
| 1284 | + ret = device->ops.enable_driver(device); |
|---|
| 1285 | + if (ret) |
|---|
| 1286 | + goto out; |
|---|
| 1287 | + } |
|---|
| 1288 | + |
|---|
| 1289 | + ib_cq_pool_init(device); |
|---|
| 1290 | + |
|---|
| 1291 | + down_read(&clients_rwsem); |
|---|
| 1292 | + xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { |
|---|
| 1293 | + ret = add_client_context(device, client); |
|---|
| 1294 | + if (ret) |
|---|
| 1295 | + break; |
|---|
| 1296 | + } |
|---|
| 1297 | + up_read(&clients_rwsem); |
|---|
| 1298 | + if (!ret) |
|---|
| 1299 | + ret = add_compat_devs(device); |
|---|
| 1300 | +out: |
|---|
| 1301 | + up_read(&devices_rwsem); |
|---|
| 1302 | + return ret; |
|---|
| 1303 | +} |
|---|
| 1304 | + |
|---|
| 1305 | +static void prevent_dealloc_device(struct ib_device *ib_dev) |
|---|
| 1306 | +{ |
|---|
| 446 | 1307 | } |
|---|
| 447 | 1308 | |
|---|
| 448 | 1309 | /** |
|---|
| 449 | 1310 | * ib_register_device - Register an IB device with IB core |
|---|
| 450 | | - * @device:Device to register |
|---|
| 1311 | + * @device: Device to register |
|---|
| 1312 | + * @name: unique string device name. This may include a '%' which will |
|---|
| 1313 | + * cause a unique index to be added to the passed device name. |
|---|
| 1314 | + * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB |
|---|
| 1315 | + * device will be used. In this case the caller should fully |
|---|
| 1316 | + * setup the ibdev for DMA. This usually means using dma_virt_ops. |
|---|
| 451 | 1317 | * |
|---|
| 452 | 1318 | * Low-level drivers use ib_register_device() to register their |
|---|
| 453 | 1319 | * devices with the IB core. All registered clients will receive a |
|---|
| 454 | 1320 | * callback for each device that is added. @device must be allocated |
|---|
| 455 | 1321 | * with ib_alloc_device(). |
|---|
| 1322 | + * |
|---|
| 1323 | + * If the driver uses ops.dealloc_driver and calls any ib_unregister_device() |
|---|
| 1324 | + * asynchronously then the device pointer may become freed as soon as this |
|---|
| 1325 | + * function returns. |
|---|
| 456 | 1326 | */ |
|---|
| 457 | | -int ib_register_device(struct ib_device *device, |
|---|
| 458 | | - int (*port_callback)(struct ib_device *, |
|---|
| 459 | | - u8, struct kobject *)) |
|---|
| 1327 | +int ib_register_device(struct ib_device *device, const char *name, |
|---|
| 1328 | + struct device *dma_device) |
|---|
| 460 | 1329 | { |
|---|
| 461 | 1330 | int ret; |
|---|
| 462 | | - struct ib_client *client; |
|---|
| 463 | | - struct ib_udata uhw = {.outlen = 0, .inlen = 0}; |
|---|
| 464 | | - struct device *parent = device->dev.parent; |
|---|
| 465 | 1331 | |
|---|
| 466 | | - WARN_ON_ONCE(device->dma_device); |
|---|
| 467 | | - if (device->dev.dma_ops) { |
|---|
| 468 | | - /* |
|---|
| 469 | | - * The caller provided custom DMA operations. Copy the |
|---|
| 470 | | - * DMA-related fields that are used by e.g. dma_alloc_coherent() |
|---|
| 471 | | - * into device->dev. |
|---|
| 472 | | - */ |
|---|
| 473 | | - device->dma_device = &device->dev; |
|---|
| 474 | | - if (!device->dev.dma_mask) { |
|---|
| 475 | | - if (parent) |
|---|
| 476 | | - device->dev.dma_mask = parent->dma_mask; |
|---|
| 477 | | - else |
|---|
| 478 | | - WARN_ON_ONCE(true); |
|---|
| 479 | | - } |
|---|
| 480 | | - if (!device->dev.coherent_dma_mask) { |
|---|
| 481 | | - if (parent) |
|---|
| 482 | | - device->dev.coherent_dma_mask = |
|---|
| 483 | | - parent->coherent_dma_mask; |
|---|
| 484 | | - else |
|---|
| 485 | | - WARN_ON_ONCE(true); |
|---|
| 486 | | - } |
|---|
| 487 | | - } else { |
|---|
| 488 | | - /* |
|---|
| 489 | | - * The caller did not provide custom DMA operations. Use the |
|---|
| 490 | | - * DMA mapping operations of the parent device. |
|---|
| 491 | | - */ |
|---|
| 492 | | - WARN_ON_ONCE(!parent); |
|---|
| 493 | | - device->dma_device = parent; |
|---|
| 494 | | - } |
|---|
| 1332 | + ret = assign_name(device, name); |
|---|
| 1333 | + if (ret) |
|---|
| 1334 | + return ret; |
|---|
| 495 | 1335 | |
|---|
| 496 | | - mutex_lock(&device_mutex); |
|---|
| 1336 | + /* |
|---|
| 1337 | + * If the caller does not provide a DMA capable device then the IB core |
|---|
| 1338 | + * will set up ib_sge and scatterlist structures that stash the kernel |
|---|
| 1339 | + * virtual address into the address field. |
|---|
| 1340 | + */ |
|---|
| 1341 | + WARN_ON(dma_device && !dma_device->dma_parms); |
|---|
| 1342 | + device->dma_device = dma_device; |
|---|
| 497 | 1343 | |
|---|
| 498 | | - if (strchr(device->name, '%')) { |
|---|
| 499 | | - ret = alloc_name(device->name); |
|---|
| 500 | | - if (ret) |
|---|
| 501 | | - goto out; |
|---|
| 502 | | - } |
|---|
| 503 | | - |
|---|
| 504 | | - if (ib_device_check_mandatory(device)) { |
|---|
| 505 | | - ret = -EINVAL; |
|---|
| 506 | | - goto out; |
|---|
| 507 | | - } |
|---|
| 508 | | - |
|---|
| 509 | | - ret = read_port_immutable(device); |
|---|
| 510 | | - if (ret) { |
|---|
| 511 | | - pr_warn("Couldn't create per port immutable data %s\n", |
|---|
| 512 | | - device->name); |
|---|
| 513 | | - goto out; |
|---|
| 514 | | - } |
|---|
| 515 | | - |
|---|
| 516 | | - ret = setup_port_pkey_list(device); |
|---|
| 517 | | - if (ret) { |
|---|
| 518 | | - pr_warn("Couldn't create per port_pkey_list\n"); |
|---|
| 519 | | - goto out; |
|---|
| 520 | | - } |
|---|
| 1344 | + ret = setup_device(device); |
|---|
| 1345 | + if (ret) |
|---|
| 1346 | + return ret; |
|---|
| 521 | 1347 | |
|---|
| 522 | 1348 | ret = ib_cache_setup_one(device); |
|---|
| 523 | 1349 | if (ret) { |
|---|
| 524 | | - pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n"); |
|---|
| 525 | | - goto port_cleanup; |
|---|
| 1350 | + dev_warn(&device->dev, |
|---|
| 1351 | + "Couldn't set up InfiniBand P_Key/GID cache\n"); |
|---|
| 1352 | + return ret; |
|---|
| 526 | 1353 | } |
|---|
| 527 | 1354 | |
|---|
| 528 | | - ret = ib_device_register_rdmacg(device); |
|---|
| 529 | | - if (ret) { |
|---|
| 530 | | - pr_warn("Couldn't register device with rdma cgroup\n"); |
|---|
| 531 | | - goto cache_cleanup; |
|---|
| 532 | | - } |
|---|
| 1355 | + ib_device_register_rdmacg(device); |
|---|
| 533 | 1356 | |
|---|
| 534 | | - memset(&device->attrs, 0, sizeof(device->attrs)); |
|---|
| 535 | | - ret = device->query_device(device, &device->attrs, &uhw); |
|---|
| 536 | | - if (ret) { |
|---|
| 537 | | - pr_warn("Couldn't query the device attributes\n"); |
|---|
| 1357 | + rdma_counter_init(device); |
|---|
| 1358 | + |
|---|
| 1359 | + /* |
|---|
| 1360 | + * Ensure that ADD uevent is not fired because it |
|---|
| 1361 | + * is too early amd device is not initialized yet. |
|---|
| 1362 | + */ |
|---|
| 1363 | + dev_set_uevent_suppress(&device->dev, true); |
|---|
| 1364 | + ret = device_add(&device->dev); |
|---|
| 1365 | + if (ret) |
|---|
| 538 | 1366 | goto cg_cleanup; |
|---|
| 539 | | - } |
|---|
| 540 | 1367 | |
|---|
| 541 | | - ret = ib_device_register_sysfs(device, port_callback); |
|---|
| 1368 | + ret = ib_device_register_sysfs(device); |
|---|
| 542 | 1369 | if (ret) { |
|---|
| 543 | | - pr_warn("Couldn't register device %s with driver model\n", |
|---|
| 544 | | - device->name); |
|---|
| 545 | | - goto cg_cleanup; |
|---|
| 1370 | + dev_warn(&device->dev, |
|---|
| 1371 | + "Couldn't register device with driver model\n"); |
|---|
| 1372 | + goto dev_cleanup; |
|---|
| 546 | 1373 | } |
|---|
| 547 | 1374 | |
|---|
| 548 | | - device->reg_state = IB_DEV_REGISTERED; |
|---|
| 1375 | + ret = enable_device_and_get(device); |
|---|
| 1376 | + if (ret) { |
|---|
| 1377 | + void (*dealloc_fn)(struct ib_device *); |
|---|
| 549 | 1378 | |
|---|
| 550 | | - list_for_each_entry(client, &client_list, list) |
|---|
| 551 | | - if (!add_client_context(device, client) && client->add) |
|---|
| 552 | | - client->add(device); |
|---|
| 1379 | + /* |
|---|
| 1380 | + * If we hit this error flow then we don't want to |
|---|
| 1381 | + * automatically dealloc the device since the caller is |
|---|
| 1382 | + * expected to call ib_dealloc_device() after |
|---|
| 1383 | + * ib_register_device() fails. This is tricky due to the |
|---|
| 1384 | + * possibility for a parallel unregistration along with this |
|---|
| 1385 | + * error flow. Since we have a refcount here we know any |
|---|
| 1386 | + * parallel flow is stopped in disable_device and will see the |
|---|
| 1387 | + * special dealloc_driver pointer, causing the responsibility to |
|---|
| 1388 | + * ib_dealloc_device() to revert back to this thread. |
|---|
| 1389 | + */ |
|---|
| 1390 | + dealloc_fn = device->ops.dealloc_driver; |
|---|
| 1391 | + device->ops.dealloc_driver = prevent_dealloc_device; |
|---|
| 1392 | + ib_device_put(device); |
|---|
| 1393 | + __ib_unregister_device(device); |
|---|
| 1394 | + device->ops.dealloc_driver = dealloc_fn; |
|---|
| 1395 | + dev_set_uevent_suppress(&device->dev, false); |
|---|
| 1396 | + return ret; |
|---|
| 1397 | + } |
|---|
| 1398 | + dev_set_uevent_suppress(&device->dev, false); |
|---|
| 1399 | + /* Mark for userspace that device is ready */ |
|---|
| 1400 | + kobject_uevent(&device->dev.kobj, KOBJ_ADD); |
|---|
| 1401 | + ib_device_put(device); |
|---|
| 553 | 1402 | |
|---|
| 554 | | - device->index = __dev_new_index(); |
|---|
| 555 | | - down_write(&lists_rwsem); |
|---|
| 556 | | - list_add_tail(&device->core_list, &device_list); |
|---|
| 557 | | - up_write(&lists_rwsem); |
|---|
| 558 | | - mutex_unlock(&device_mutex); |
|---|
| 559 | 1403 | return 0; |
|---|
| 560 | 1404 | |
|---|
| 1405 | +dev_cleanup: |
|---|
| 1406 | + device_del(&device->dev); |
|---|
| 561 | 1407 | cg_cleanup: |
|---|
| 1408 | + dev_set_uevent_suppress(&device->dev, false); |
|---|
| 562 | 1409 | ib_device_unregister_rdmacg(device); |
|---|
| 563 | | -cache_cleanup: |
|---|
| 564 | 1410 | ib_cache_cleanup_one(device); |
|---|
| 565 | | - ib_cache_release_one(device); |
|---|
| 566 | | -port_cleanup: |
|---|
| 567 | | - kfree(device->port_immutable); |
|---|
| 568 | | -out: |
|---|
| 569 | | - mutex_unlock(&device_mutex); |
|---|
| 570 | 1411 | return ret; |
|---|
| 571 | 1412 | } |
|---|
| 572 | 1413 | EXPORT_SYMBOL(ib_register_device); |
|---|
| 573 | 1414 | |
|---|
| 1415 | +/* Callers must hold a get on the device. */ |
|---|
| 1416 | +static void __ib_unregister_device(struct ib_device *ib_dev) |
|---|
| 1417 | +{ |
|---|
| 1418 | + /* |
|---|
| 1419 | + * We have a registration lock so that all the calls to unregister are |
|---|
| 1420 | + * fully fenced, once any unregister returns the device is truely |
|---|
| 1421 | + * unregistered even if multiple callers are unregistering it at the |
|---|
| 1422 | + * same time. This also interacts with the registration flow and |
|---|
| 1423 | + * provides sane semantics if register and unregister are racing. |
|---|
| 1424 | + */ |
|---|
| 1425 | + mutex_lock(&ib_dev->unregistration_lock); |
|---|
| 1426 | + if (!refcount_read(&ib_dev->refcount)) |
|---|
| 1427 | + goto out; |
|---|
| 1428 | + |
|---|
| 1429 | + disable_device(ib_dev); |
|---|
| 1430 | + |
|---|
| 1431 | + /* Expedite removing unregistered pointers from the hash table */ |
|---|
| 1432 | + free_netdevs(ib_dev); |
|---|
| 1433 | + |
|---|
| 1434 | + ib_device_unregister_sysfs(ib_dev); |
|---|
| 1435 | + device_del(&ib_dev->dev); |
|---|
| 1436 | + ib_device_unregister_rdmacg(ib_dev); |
|---|
| 1437 | + ib_cache_cleanup_one(ib_dev); |
|---|
| 1438 | + |
|---|
| 1439 | + /* |
|---|
| 1440 | + * Drivers using the new flow may not call ib_dealloc_device except |
|---|
| 1441 | + * in error unwind prior to registration success. |
|---|
| 1442 | + */ |
|---|
| 1443 | + if (ib_dev->ops.dealloc_driver && |
|---|
| 1444 | + ib_dev->ops.dealloc_driver != prevent_dealloc_device) { |
|---|
| 1445 | + WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); |
|---|
| 1446 | + ib_dealloc_device(ib_dev); |
|---|
| 1447 | + } |
|---|
| 1448 | +out: |
|---|
| 1449 | + mutex_unlock(&ib_dev->unregistration_lock); |
|---|
| 1450 | +} |
|---|
| 1451 | + |
|---|
| 574 | 1452 | /** |
|---|
| 575 | 1453 | * ib_unregister_device - Unregister an IB device |
|---|
| 576 | | - * @device:Device to unregister |
|---|
| 1454 | + * @ib_dev: The device to unregister |
|---|
| 577 | 1455 | * |
|---|
| 578 | 1456 | * Unregister an IB device. All clients will receive a remove callback. |
|---|
| 1457 | + * |
|---|
| 1458 | + * Callers should call this routine only once, and protect against races with |
|---|
| 1459 | + * registration. Typically it should only be called as part of a remove |
|---|
| 1460 | + * callback in an implementation of driver core's struct device_driver and |
|---|
| 1461 | + * related. |
|---|
| 1462 | + * |
|---|
| 1463 | + * If ops.dealloc_driver is used then ib_dev will be freed upon return from |
|---|
| 1464 | + * this function. |
|---|
| 579 | 1465 | */ |
|---|
| 580 | | -void ib_unregister_device(struct ib_device *device) |
|---|
| 1466 | +void ib_unregister_device(struct ib_device *ib_dev) |
|---|
| 581 | 1467 | { |
|---|
| 582 | | - struct ib_client_data *context, *tmp; |
|---|
| 583 | | - unsigned long flags; |
|---|
| 584 | | - |
|---|
| 585 | | - mutex_lock(&device_mutex); |
|---|
| 586 | | - |
|---|
| 587 | | - down_write(&lists_rwsem); |
|---|
| 588 | | - list_del(&device->core_list); |
|---|
| 589 | | - spin_lock_irqsave(&device->client_data_lock, flags); |
|---|
| 590 | | - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) |
|---|
| 591 | | - context->going_down = true; |
|---|
| 592 | | - spin_unlock_irqrestore(&device->client_data_lock, flags); |
|---|
| 593 | | - downgrade_write(&lists_rwsem); |
|---|
| 594 | | - |
|---|
| 595 | | - list_for_each_entry_safe(context, tmp, &device->client_data_list, |
|---|
| 596 | | - list) { |
|---|
| 597 | | - if (context->client->remove) |
|---|
| 598 | | - context->client->remove(device, context->data); |
|---|
| 599 | | - } |
|---|
| 600 | | - up_read(&lists_rwsem); |
|---|
| 601 | | - |
|---|
| 602 | | - ib_device_unregister_sysfs(device); |
|---|
| 603 | | - ib_device_unregister_rdmacg(device); |
|---|
| 604 | | - |
|---|
| 605 | | - mutex_unlock(&device_mutex); |
|---|
| 606 | | - |
|---|
| 607 | | - ib_cache_cleanup_one(device); |
|---|
| 608 | | - |
|---|
| 609 | | - ib_security_destroy_port_pkey_list(device); |
|---|
| 610 | | - kfree(device->port_pkey_list); |
|---|
| 611 | | - |
|---|
| 612 | | - down_write(&lists_rwsem); |
|---|
| 613 | | - spin_lock_irqsave(&device->client_data_lock, flags); |
|---|
| 614 | | - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) |
|---|
| 615 | | - kfree(context); |
|---|
| 616 | | - spin_unlock_irqrestore(&device->client_data_lock, flags); |
|---|
| 617 | | - up_write(&lists_rwsem); |
|---|
| 618 | | - |
|---|
| 619 | | - device->reg_state = IB_DEV_UNREGISTERED; |
|---|
| 1468 | + get_device(&ib_dev->dev); |
|---|
| 1469 | + __ib_unregister_device(ib_dev); |
|---|
| 1470 | + put_device(&ib_dev->dev); |
|---|
| 620 | 1471 | } |
|---|
| 621 | 1472 | EXPORT_SYMBOL(ib_unregister_device); |
|---|
| 1473 | + |
|---|
| 1474 | +/** |
|---|
| 1475 | + * ib_unregister_device_and_put - Unregister a device while holding a 'get' |
|---|
| 1476 | + * @ib_dev: The device to unregister |
|---|
| 1477 | + * |
|---|
| 1478 | + * This is the same as ib_unregister_device(), except it includes an internal |
|---|
| 1479 | + * ib_device_put() that should match a 'get' obtained by the caller. |
|---|
| 1480 | + * |
|---|
| 1481 | + * It is safe to call this routine concurrently from multiple threads while |
|---|
| 1482 | + * holding the 'get'. When the function returns the device is fully |
|---|
| 1483 | + * unregistered. |
|---|
| 1484 | + * |
|---|
| 1485 | + * Drivers using this flow MUST use the driver_unregister callback to clean up |
|---|
| 1486 | + * their resources associated with the device and dealloc it. |
|---|
| 1487 | + */ |
|---|
| 1488 | +void ib_unregister_device_and_put(struct ib_device *ib_dev) |
|---|
| 1489 | +{ |
|---|
| 1490 | + WARN_ON(!ib_dev->ops.dealloc_driver); |
|---|
| 1491 | + get_device(&ib_dev->dev); |
|---|
| 1492 | + ib_device_put(ib_dev); |
|---|
| 1493 | + __ib_unregister_device(ib_dev); |
|---|
| 1494 | + put_device(&ib_dev->dev); |
|---|
| 1495 | +} |
|---|
| 1496 | +EXPORT_SYMBOL(ib_unregister_device_and_put); |
|---|
| 1497 | + |
|---|
| 1498 | +/** |
|---|
| 1499 | + * ib_unregister_driver - Unregister all IB devices for a driver |
|---|
| 1500 | + * @driver_id: The driver to unregister |
|---|
| 1501 | + * |
|---|
| 1502 | + * This implements a fence for device unregistration. It only returns once all |
|---|
| 1503 | + * devices associated with the driver_id have fully completed their |
|---|
| 1504 | + * unregistration and returned from ib_unregister_device*(). |
|---|
| 1505 | + * |
|---|
| 1506 | + * If device's are not yet unregistered it goes ahead and starts unregistering |
|---|
| 1507 | + * them. |
|---|
| 1508 | + * |
|---|
| 1509 | + * This does not block creation of new devices with the given driver_id, that |
|---|
| 1510 | + * is the responsibility of the caller. |
|---|
| 1511 | + */ |
|---|
| 1512 | +void ib_unregister_driver(enum rdma_driver_id driver_id) |
|---|
| 1513 | +{ |
|---|
| 1514 | + struct ib_device *ib_dev; |
|---|
| 1515 | + unsigned long index; |
|---|
| 1516 | + |
|---|
| 1517 | + down_read(&devices_rwsem); |
|---|
| 1518 | + xa_for_each (&devices, index, ib_dev) { |
|---|
| 1519 | + if (ib_dev->ops.driver_id != driver_id) |
|---|
| 1520 | + continue; |
|---|
| 1521 | + |
|---|
| 1522 | + get_device(&ib_dev->dev); |
|---|
| 1523 | + up_read(&devices_rwsem); |
|---|
| 1524 | + |
|---|
| 1525 | + WARN_ON(!ib_dev->ops.dealloc_driver); |
|---|
| 1526 | + __ib_unregister_device(ib_dev); |
|---|
| 1527 | + |
|---|
| 1528 | + put_device(&ib_dev->dev); |
|---|
| 1529 | + down_read(&devices_rwsem); |
|---|
| 1530 | + } |
|---|
| 1531 | + up_read(&devices_rwsem); |
|---|
| 1532 | +} |
|---|
| 1533 | +EXPORT_SYMBOL(ib_unregister_driver); |
|---|
| 1534 | + |
|---|
| 1535 | +static void ib_unregister_work(struct work_struct *work) |
|---|
| 1536 | +{ |
|---|
| 1537 | + struct ib_device *ib_dev = |
|---|
| 1538 | + container_of(work, struct ib_device, unregistration_work); |
|---|
| 1539 | + |
|---|
| 1540 | + __ib_unregister_device(ib_dev); |
|---|
| 1541 | + put_device(&ib_dev->dev); |
|---|
| 1542 | +} |
|---|
| 1543 | + |
|---|
| 1544 | +/** |
|---|
| 1545 | + * ib_unregister_device_queued - Unregister a device using a work queue |
|---|
| 1546 | + * @ib_dev: The device to unregister |
|---|
| 1547 | + * |
|---|
| 1548 | + * This schedules an asynchronous unregistration using a WQ for the device. A |
|---|
| 1549 | + * driver should use this to avoid holding locks while doing unregistration, |
|---|
| 1550 | + * such as holding the RTNL lock. |
|---|
| 1551 | + * |
|---|
| 1552 | + * Drivers using this API must use ib_unregister_driver before module unload |
|---|
| 1553 | + * to ensure that all scheduled unregistrations have completed. |
|---|
| 1554 | + */ |
|---|
| 1555 | +void ib_unregister_device_queued(struct ib_device *ib_dev) |
|---|
| 1556 | +{ |
|---|
| 1557 | + WARN_ON(!refcount_read(&ib_dev->refcount)); |
|---|
| 1558 | + WARN_ON(!ib_dev->ops.dealloc_driver); |
|---|
| 1559 | + get_device(&ib_dev->dev); |
|---|
| 1560 | + if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work)) |
|---|
| 1561 | + put_device(&ib_dev->dev); |
|---|
| 1562 | +} |
|---|
| 1563 | +EXPORT_SYMBOL(ib_unregister_device_queued); |
|---|
| 1564 | + |
|---|
| 1565 | +/* |
|---|
| 1566 | + * The caller must pass in a device that has the kref held and the refcount |
|---|
| 1567 | + * released. If the device is in cur_net and still registered then it is moved |
|---|
| 1568 | + * into net. |
|---|
| 1569 | + */ |
|---|
| 1570 | +static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, |
|---|
| 1571 | + struct net *net) |
|---|
| 1572 | +{ |
|---|
| 1573 | + int ret2 = -EINVAL; |
|---|
| 1574 | + int ret; |
|---|
| 1575 | + |
|---|
| 1576 | + mutex_lock(&device->unregistration_lock); |
|---|
| 1577 | + |
|---|
| 1578 | + /* |
|---|
| 1579 | + * If a device not under ib_device_get() or if the unregistration_lock |
|---|
| 1580 | + * is not held, the namespace can be changed, or it can be unregistered. |
|---|
| 1581 | + * Check again under the lock. |
|---|
| 1582 | + */ |
|---|
| 1583 | + if (refcount_read(&device->refcount) == 0 || |
|---|
| 1584 | + !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) { |
|---|
| 1585 | + ret = -ENODEV; |
|---|
| 1586 | + goto out; |
|---|
| 1587 | + } |
|---|
| 1588 | + |
|---|
| 1589 | + kobject_uevent(&device->dev.kobj, KOBJ_REMOVE); |
|---|
| 1590 | + disable_device(device); |
|---|
| 1591 | + |
|---|
| 1592 | + /* |
|---|
| 1593 | + * At this point no one can be using the device, so it is safe to |
|---|
| 1594 | + * change the namespace. |
|---|
| 1595 | + */ |
|---|
| 1596 | + write_pnet(&device->coredev.rdma_net, net); |
|---|
| 1597 | + |
|---|
| 1598 | + down_read(&devices_rwsem); |
|---|
| 1599 | + /* |
|---|
| 1600 | + * Currently rdma devices are system wide unique. So the device name |
|---|
| 1601 | + * is guaranteed free in the new namespace. Publish the new namespace |
|---|
| 1602 | + * at the sysfs level. |
|---|
| 1603 | + */ |
|---|
| 1604 | + ret = device_rename(&device->dev, dev_name(&device->dev)); |
|---|
| 1605 | + up_read(&devices_rwsem); |
|---|
| 1606 | + if (ret) { |
|---|
| 1607 | + dev_warn(&device->dev, |
|---|
| 1608 | + "%s: Couldn't rename device after namespace change\n", |
|---|
| 1609 | + __func__); |
|---|
| 1610 | + /* Try and put things back and re-enable the device */ |
|---|
| 1611 | + write_pnet(&device->coredev.rdma_net, cur_net); |
|---|
| 1612 | + } |
|---|
| 1613 | + |
|---|
| 1614 | + ret2 = enable_device_and_get(device); |
|---|
| 1615 | + if (ret2) { |
|---|
| 1616 | + /* |
|---|
| 1617 | + * This shouldn't really happen, but if it does, let the user |
|---|
| 1618 | + * retry at later point. So don't disable the device. |
|---|
| 1619 | + */ |
|---|
| 1620 | + dev_warn(&device->dev, |
|---|
| 1621 | + "%s: Couldn't re-enable device after namespace change\n", |
|---|
| 1622 | + __func__); |
|---|
| 1623 | + } |
|---|
| 1624 | + kobject_uevent(&device->dev.kobj, KOBJ_ADD); |
|---|
| 1625 | + |
|---|
| 1626 | + ib_device_put(device); |
|---|
| 1627 | +out: |
|---|
| 1628 | + mutex_unlock(&device->unregistration_lock); |
|---|
| 1629 | + if (ret) |
|---|
| 1630 | + return ret; |
|---|
| 1631 | + return ret2; |
|---|
| 1632 | +} |
|---|
| 1633 | + |
|---|
| 1634 | +int ib_device_set_netns_put(struct sk_buff *skb, |
|---|
| 1635 | + struct ib_device *dev, u32 ns_fd) |
|---|
| 1636 | +{ |
|---|
| 1637 | + struct net *net; |
|---|
| 1638 | + int ret; |
|---|
| 1639 | + |
|---|
| 1640 | + net = get_net_ns_by_fd(ns_fd); |
|---|
| 1641 | + if (IS_ERR(net)) { |
|---|
| 1642 | + ret = PTR_ERR(net); |
|---|
| 1643 | + goto net_err; |
|---|
| 1644 | + } |
|---|
| 1645 | + |
|---|
| 1646 | + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { |
|---|
| 1647 | + ret = -EPERM; |
|---|
| 1648 | + goto ns_err; |
|---|
| 1649 | + } |
|---|
| 1650 | + |
|---|
| 1651 | + /* |
|---|
| 1652 | + * Currently supported only for those providers which support |
|---|
| 1653 | + * disassociation and don't do port specific sysfs init. Once a |
|---|
| 1654 | + * port_cleanup infrastructure is implemented, this limitation will be |
|---|
| 1655 | + * removed. |
|---|
| 1656 | + */ |
|---|
| 1657 | + if (!dev->ops.disassociate_ucontext || dev->ops.init_port || |
|---|
| 1658 | + ib_devices_shared_netns) { |
|---|
| 1659 | + ret = -EOPNOTSUPP; |
|---|
| 1660 | + goto ns_err; |
|---|
| 1661 | + } |
|---|
| 1662 | + |
|---|
| 1663 | + get_device(&dev->dev); |
|---|
| 1664 | + ib_device_put(dev); |
|---|
| 1665 | + ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net); |
|---|
| 1666 | + put_device(&dev->dev); |
|---|
| 1667 | + |
|---|
| 1668 | + put_net(net); |
|---|
| 1669 | + return ret; |
|---|
| 1670 | + |
|---|
| 1671 | +ns_err: |
|---|
| 1672 | + put_net(net); |
|---|
| 1673 | +net_err: |
|---|
| 1674 | + ib_device_put(dev); |
|---|
| 1675 | + return ret; |
|---|
| 1676 | +} |
|---|
| 1677 | + |
|---|
| 1678 | +static struct pernet_operations rdma_dev_net_ops = { |
|---|
| 1679 | + .init = rdma_dev_init_net, |
|---|
| 1680 | + .exit = rdma_dev_exit_net, |
|---|
| 1681 | + .id = &rdma_dev_net_id, |
|---|
| 1682 | + .size = sizeof(struct rdma_dev_net), |
|---|
| 1683 | +}; |
|---|
| 1684 | + |
|---|
| 1685 | +static int assign_client_id(struct ib_client *client) |
|---|
| 1686 | +{ |
|---|
| 1687 | + int ret; |
|---|
| 1688 | + |
|---|
| 1689 | + down_write(&clients_rwsem); |
|---|
| 1690 | + /* |
|---|
| 1691 | + * The add/remove callbacks must be called in FIFO/LIFO order. To |
|---|
| 1692 | + * achieve this we assign client_ids so they are sorted in |
|---|
| 1693 | + * registration order. |
|---|
| 1694 | + */ |
|---|
| 1695 | + client->client_id = highest_client_id; |
|---|
| 1696 | + ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); |
|---|
| 1697 | + if (ret) |
|---|
| 1698 | + goto out; |
|---|
| 1699 | + |
|---|
| 1700 | + highest_client_id++; |
|---|
| 1701 | + xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); |
|---|
| 1702 | + |
|---|
| 1703 | +out: |
|---|
| 1704 | + up_write(&clients_rwsem); |
|---|
| 1705 | + return ret; |
|---|
| 1706 | +} |
|---|
| 1707 | + |
|---|
| 1708 | +static void remove_client_id(struct ib_client *client) |
|---|
| 1709 | +{ |
|---|
| 1710 | + down_write(&clients_rwsem); |
|---|
| 1711 | + xa_erase(&clients, client->client_id); |
|---|
| 1712 | + for (; highest_client_id; highest_client_id--) |
|---|
| 1713 | + if (xa_load(&clients, highest_client_id - 1)) |
|---|
| 1714 | + break; |
|---|
| 1715 | + up_write(&clients_rwsem); |
|---|
| 1716 | +} |
|---|
| 622 | 1717 | |
|---|
| 623 | 1718 | /** |
|---|
| 624 | 1719 | * ib_register_client - Register an IB client |
|---|
| .. | .. |
|---|
| 636 | 1731 | int ib_register_client(struct ib_client *client) |
|---|
| 637 | 1732 | { |
|---|
| 638 | 1733 | struct ib_device *device; |
|---|
| 1734 | + unsigned long index; |
|---|
| 1735 | + int ret; |
|---|
| 639 | 1736 | |
|---|
| 640 | | - mutex_lock(&device_mutex); |
|---|
| 1737 | + refcount_set(&client->uses, 1); |
|---|
| 1738 | + init_completion(&client->uses_zero); |
|---|
| 1739 | + ret = assign_client_id(client); |
|---|
| 1740 | + if (ret) |
|---|
| 1741 | + return ret; |
|---|
| 641 | 1742 | |
|---|
| 642 | | - list_for_each_entry(device, &device_list, core_list) |
|---|
| 643 | | - if (!add_client_context(device, client) && client->add) |
|---|
| 644 | | - client->add(device); |
|---|
| 645 | | - |
|---|
| 646 | | - down_write(&lists_rwsem); |
|---|
| 647 | | - list_add_tail(&client->list, &client_list); |
|---|
| 648 | | - up_write(&lists_rwsem); |
|---|
| 649 | | - |
|---|
| 650 | | - mutex_unlock(&device_mutex); |
|---|
| 651 | | - |
|---|
| 1743 | + down_read(&devices_rwsem); |
|---|
| 1744 | + xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { |
|---|
| 1745 | + ret = add_client_context(device, client); |
|---|
| 1746 | + if (ret) { |
|---|
| 1747 | + up_read(&devices_rwsem); |
|---|
| 1748 | + ib_unregister_client(client); |
|---|
| 1749 | + return ret; |
|---|
| 1750 | + } |
|---|
| 1751 | + } |
|---|
| 1752 | + up_read(&devices_rwsem); |
|---|
| 652 | 1753 | return 0; |
|---|
| 653 | 1754 | } |
|---|
| 654 | 1755 | EXPORT_SYMBOL(ib_register_client); |
|---|
| .. | .. |
|---|
| 660 | 1761 | * Upper level users use ib_unregister_client() to remove their client |
|---|
| 661 | 1762 | * registration. When ib_unregister_client() is called, the client |
|---|
| 662 | 1763 | * will receive a remove callback for each IB device still registered. |
|---|
| 1764 | + * |
|---|
| 1765 | + * This is a full fence, once it returns no client callbacks will be called, |
|---|
| 1766 | + * or are running in another thread. |
|---|
| 663 | 1767 | */ |
|---|
| 664 | 1768 | void ib_unregister_client(struct ib_client *client) |
|---|
| 665 | 1769 | { |
|---|
| 666 | | - struct ib_client_data *context, *tmp; |
|---|
| 667 | 1770 | struct ib_device *device; |
|---|
| 668 | | - unsigned long flags; |
|---|
| 1771 | + unsigned long index; |
|---|
| 669 | 1772 | |
|---|
| 670 | | - mutex_lock(&device_mutex); |
|---|
| 1773 | + down_write(&clients_rwsem); |
|---|
| 1774 | + ib_client_put(client); |
|---|
| 1775 | + xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); |
|---|
| 1776 | + up_write(&clients_rwsem); |
|---|
| 671 | 1777 | |
|---|
| 672 | | - down_write(&lists_rwsem); |
|---|
| 673 | | - list_del(&client->list); |
|---|
| 674 | | - up_write(&lists_rwsem); |
|---|
| 675 | | - |
|---|
| 676 | | - list_for_each_entry(device, &device_list, core_list) { |
|---|
| 677 | | - struct ib_client_data *found_context = NULL; |
|---|
| 678 | | - |
|---|
| 679 | | - down_write(&lists_rwsem); |
|---|
| 680 | | - spin_lock_irqsave(&device->client_data_lock, flags); |
|---|
| 681 | | - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) |
|---|
| 682 | | - if (context->client == client) { |
|---|
| 683 | | - context->going_down = true; |
|---|
| 684 | | - found_context = context; |
|---|
| 685 | | - break; |
|---|
| 686 | | - } |
|---|
| 687 | | - spin_unlock_irqrestore(&device->client_data_lock, flags); |
|---|
| 688 | | - up_write(&lists_rwsem); |
|---|
| 689 | | - |
|---|
| 690 | | - if (client->remove) |
|---|
| 691 | | - client->remove(device, found_context ? |
|---|
| 692 | | - found_context->data : NULL); |
|---|
| 693 | | - |
|---|
| 694 | | - if (!found_context) { |
|---|
| 695 | | - pr_warn("No client context found for %s/%s\n", |
|---|
| 696 | | - device->name, client->name); |
|---|
| 1778 | + /* We do not want to have locks while calling client->remove() */ |
|---|
| 1779 | + rcu_read_lock(); |
|---|
| 1780 | + xa_for_each (&devices, index, device) { |
|---|
| 1781 | + if (!ib_device_try_get(device)) |
|---|
| 697 | 1782 | continue; |
|---|
| 698 | | - } |
|---|
| 1783 | + rcu_read_unlock(); |
|---|
| 699 | 1784 | |
|---|
| 700 | | - down_write(&lists_rwsem); |
|---|
| 701 | | - spin_lock_irqsave(&device->client_data_lock, flags); |
|---|
| 702 | | - list_del(&found_context->list); |
|---|
| 703 | | - kfree(found_context); |
|---|
| 704 | | - spin_unlock_irqrestore(&device->client_data_lock, flags); |
|---|
| 705 | | - up_write(&lists_rwsem); |
|---|
| 1785 | + remove_client_context(device, client->client_id); |
|---|
| 1786 | + |
|---|
| 1787 | + ib_device_put(device); |
|---|
| 1788 | + rcu_read_lock(); |
|---|
| 706 | 1789 | } |
|---|
| 1790 | + rcu_read_unlock(); |
|---|
| 707 | 1791 | |
|---|
| 708 | | - mutex_unlock(&device_mutex); |
|---|
| 1792 | + /* |
|---|
| 1793 | + * remove_client_context() is not a fence, it can return even though a |
|---|
| 1794 | + * removal is ongoing. Wait until all removals are completed. |
|---|
| 1795 | + */ |
|---|
| 1796 | + wait_for_completion(&client->uses_zero); |
|---|
| 1797 | + remove_client_id(client); |
|---|
| 709 | 1798 | } |
|---|
| 710 | 1799 | EXPORT_SYMBOL(ib_unregister_client); |
|---|
| 711 | 1800 | |
|---|
| 712 | | -/** |
|---|
| 713 | | - * ib_get_client_data - Get IB client context |
|---|
| 714 | | - * @device:Device to get context for |
|---|
| 715 | | - * @client:Client to get context for |
|---|
| 716 | | - * |
|---|
| 717 | | - * ib_get_client_data() returns client context set with |
|---|
| 718 | | - * ib_set_client_data(). |
|---|
| 719 | | - */ |
|---|
| 720 | | -void *ib_get_client_data(struct ib_device *device, struct ib_client *client) |
|---|
| 1801 | +static int __ib_get_global_client_nl_info(const char *client_name, |
|---|
| 1802 | + struct ib_client_nl_info *res) |
|---|
| 721 | 1803 | { |
|---|
| 722 | | - struct ib_client_data *context; |
|---|
| 723 | | - void *ret = NULL; |
|---|
| 724 | | - unsigned long flags; |
|---|
| 1804 | + struct ib_client *client; |
|---|
| 1805 | + unsigned long index; |
|---|
| 1806 | + int ret = -ENOENT; |
|---|
| 725 | 1807 | |
|---|
| 726 | | - spin_lock_irqsave(&device->client_data_lock, flags); |
|---|
| 727 | | - list_for_each_entry(context, &device->client_data_list, list) |
|---|
| 728 | | - if (context->client == client) { |
|---|
| 729 | | - ret = context->data; |
|---|
| 1808 | + down_read(&clients_rwsem); |
|---|
| 1809 | + xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { |
|---|
| 1810 | + if (strcmp(client->name, client_name) != 0) |
|---|
| 1811 | + continue; |
|---|
| 1812 | + if (!client->get_global_nl_info) { |
|---|
| 1813 | + ret = -EOPNOTSUPP; |
|---|
| 730 | 1814 | break; |
|---|
| 731 | 1815 | } |
|---|
| 732 | | - spin_unlock_irqrestore(&device->client_data_lock, flags); |
|---|
| 1816 | + ret = client->get_global_nl_info(res); |
|---|
| 1817 | + if (WARN_ON(ret == -ENOENT)) |
|---|
| 1818 | + ret = -EINVAL; |
|---|
| 1819 | + if (!ret && res->cdev) |
|---|
| 1820 | + get_device(res->cdev); |
|---|
| 1821 | + break; |
|---|
| 1822 | + } |
|---|
| 1823 | + up_read(&clients_rwsem); |
|---|
| 1824 | + return ret; |
|---|
| 1825 | +} |
|---|
| 1826 | + |
|---|
| 1827 | +static int __ib_get_client_nl_info(struct ib_device *ibdev, |
|---|
| 1828 | + const char *client_name, |
|---|
| 1829 | + struct ib_client_nl_info *res) |
|---|
| 1830 | +{ |
|---|
| 1831 | + unsigned long index; |
|---|
| 1832 | + void *client_data; |
|---|
| 1833 | + int ret = -ENOENT; |
|---|
| 1834 | + |
|---|
| 1835 | + down_read(&ibdev->client_data_rwsem); |
|---|
| 1836 | + xan_for_each_marked (&ibdev->client_data, index, client_data, |
|---|
| 1837 | + CLIENT_DATA_REGISTERED) { |
|---|
| 1838 | + struct ib_client *client = xa_load(&clients, index); |
|---|
| 1839 | + |
|---|
| 1840 | + if (!client || strcmp(client->name, client_name) != 0) |
|---|
| 1841 | + continue; |
|---|
| 1842 | + if (!client->get_nl_info) { |
|---|
| 1843 | + ret = -EOPNOTSUPP; |
|---|
| 1844 | + break; |
|---|
| 1845 | + } |
|---|
| 1846 | + ret = client->get_nl_info(ibdev, client_data, res); |
|---|
| 1847 | + if (WARN_ON(ret == -ENOENT)) |
|---|
| 1848 | + ret = -EINVAL; |
|---|
| 1849 | + |
|---|
| 1850 | + /* |
|---|
| 1851 | + * The cdev is guaranteed valid as long as we are inside the |
|---|
| 1852 | + * client_data_rwsem as remove_one can't be called. Keep it |
|---|
| 1853 | + * valid for the caller. |
|---|
| 1854 | + */ |
|---|
| 1855 | + if (!ret && res->cdev) |
|---|
| 1856 | + get_device(res->cdev); |
|---|
| 1857 | + break; |
|---|
| 1858 | + } |
|---|
| 1859 | + up_read(&ibdev->client_data_rwsem); |
|---|
| 733 | 1860 | |
|---|
| 734 | 1861 | return ret; |
|---|
| 735 | 1862 | } |
|---|
| 736 | | -EXPORT_SYMBOL(ib_get_client_data); |
|---|
| 1863 | + |
|---|
| 1864 | +/** |
|---|
| 1865 | + * ib_get_client_nl_info - Fetch the nl_info from a client |
|---|
| 1866 | + * @device - IB device |
|---|
| 1867 | + * @client_name - Name of the client |
|---|
| 1868 | + * @res - Result of the query |
|---|
| 1869 | + */ |
|---|
| 1870 | +int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name, |
|---|
| 1871 | + struct ib_client_nl_info *res) |
|---|
| 1872 | +{ |
|---|
| 1873 | + int ret; |
|---|
| 1874 | + |
|---|
| 1875 | + if (ibdev) |
|---|
| 1876 | + ret = __ib_get_client_nl_info(ibdev, client_name, res); |
|---|
| 1877 | + else |
|---|
| 1878 | + ret = __ib_get_global_client_nl_info(client_name, res); |
|---|
| 1879 | +#ifdef CONFIG_MODULES |
|---|
| 1880 | + if (ret == -ENOENT) { |
|---|
| 1881 | + request_module("rdma-client-%s", client_name); |
|---|
| 1882 | + if (ibdev) |
|---|
| 1883 | + ret = __ib_get_client_nl_info(ibdev, client_name, res); |
|---|
| 1884 | + else |
|---|
| 1885 | + ret = __ib_get_global_client_nl_info(client_name, res); |
|---|
| 1886 | + } |
|---|
| 1887 | +#endif |
|---|
| 1888 | + if (ret) { |
|---|
| 1889 | + if (ret == -ENOENT) |
|---|
| 1890 | + return -EOPNOTSUPP; |
|---|
| 1891 | + return ret; |
|---|
| 1892 | + } |
|---|
| 1893 | + |
|---|
| 1894 | + if (WARN_ON(!res->cdev)) |
|---|
| 1895 | + return -EINVAL; |
|---|
| 1896 | + return 0; |
|---|
| 1897 | +} |
|---|
| 737 | 1898 | |
|---|
| 738 | 1899 | /** |
|---|
| 739 | 1900 | * ib_set_client_data - Set IB client context |
|---|
| .. | .. |
|---|
| 741 | 1902 | * @client:Client to set context for |
|---|
| 742 | 1903 | * @data:Context to set |
|---|
| 743 | 1904 | * |
|---|
| 744 | | - * ib_set_client_data() sets client context that can be retrieved with |
|---|
| 745 | | - * ib_get_client_data(). |
|---|
| 1905 | + * ib_set_client_data() sets client context data that can be retrieved with |
|---|
| 1906 | + * ib_get_client_data(). This can only be called while the client is |
|---|
| 1907 | + * registered to the device, once the ib_client remove() callback returns this |
|---|
| 1908 | + * cannot be called. |
|---|
| 746 | 1909 | */ |
|---|
| 747 | 1910 | void ib_set_client_data(struct ib_device *device, struct ib_client *client, |
|---|
| 748 | 1911 | void *data) |
|---|
| 749 | 1912 | { |
|---|
| 750 | | - struct ib_client_data *context; |
|---|
| 751 | | - unsigned long flags; |
|---|
| 1913 | + void *rc; |
|---|
| 752 | 1914 | |
|---|
| 753 | | - spin_lock_irqsave(&device->client_data_lock, flags); |
|---|
| 754 | | - list_for_each_entry(context, &device->client_data_list, list) |
|---|
| 755 | | - if (context->client == client) { |
|---|
| 756 | | - context->data = data; |
|---|
| 757 | | - goto out; |
|---|
| 758 | | - } |
|---|
| 1915 | + if (WARN_ON(IS_ERR(data))) |
|---|
| 1916 | + data = NULL; |
|---|
| 759 | 1917 | |
|---|
| 760 | | - pr_warn("No client context found for %s/%s\n", |
|---|
| 761 | | - device->name, client->name); |
|---|
| 762 | | - |
|---|
| 763 | | -out: |
|---|
| 764 | | - spin_unlock_irqrestore(&device->client_data_lock, flags); |
|---|
| 1918 | + rc = xa_store(&device->client_data, client->client_id, data, |
|---|
| 1919 | + GFP_KERNEL); |
|---|
| 1920 | + WARN_ON(xa_is_err(rc)); |
|---|
| 765 | 1921 | } |
|---|
| 766 | 1922 | EXPORT_SYMBOL(ib_set_client_data); |
|---|
| 767 | 1923 | |
|---|
| .. | .. |
|---|
| 771 | 1927 | * |
|---|
| 772 | 1928 | * ib_register_event_handler() registers an event handler that will be |
|---|
| 773 | 1929 | * called back when asynchronous IB events occur (as defined in |
|---|
| 774 | | - * chapter 11 of the InfiniBand Architecture Specification). This |
|---|
| 775 | | - * callback may occur in interrupt context. |
|---|
| 1930 | + * chapter 11 of the InfiniBand Architecture Specification). This |
|---|
| 1931 | + * callback occurs in workqueue context. |
|---|
| 776 | 1932 | */ |
|---|
| 777 | 1933 | void ib_register_event_handler(struct ib_event_handler *event_handler) |
|---|
| 778 | 1934 | { |
|---|
| 779 | | - unsigned long flags; |
|---|
| 780 | | - |
|---|
| 781 | | - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); |
|---|
| 1935 | + down_write(&event_handler->device->event_handler_rwsem); |
|---|
| 782 | 1936 | list_add_tail(&event_handler->list, |
|---|
| 783 | 1937 | &event_handler->device->event_handler_list); |
|---|
| 784 | | - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); |
|---|
| 1938 | + up_write(&event_handler->device->event_handler_rwsem); |
|---|
| 785 | 1939 | } |
|---|
| 786 | 1940 | EXPORT_SYMBOL(ib_register_event_handler); |
|---|
| 787 | 1941 | |
|---|
| .. | .. |
|---|
| 794 | 1948 | */ |
|---|
| 795 | 1949 | void ib_unregister_event_handler(struct ib_event_handler *event_handler) |
|---|
| 796 | 1950 | { |
|---|
| 797 | | - unsigned long flags; |
|---|
| 798 | | - |
|---|
| 799 | | - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); |
|---|
| 1951 | + down_write(&event_handler->device->event_handler_rwsem); |
|---|
| 800 | 1952 | list_del(&event_handler->list); |
|---|
| 801 | | - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); |
|---|
| 1953 | + up_write(&event_handler->device->event_handler_rwsem); |
|---|
| 802 | 1954 | } |
|---|
| 803 | 1955 | EXPORT_SYMBOL(ib_unregister_event_handler); |
|---|
| 804 | 1956 | |
|---|
| 805 | | -/** |
|---|
| 806 | | - * ib_dispatch_event - Dispatch an asynchronous event |
|---|
| 807 | | - * @event:Event to dispatch |
|---|
| 808 | | - * |
|---|
| 809 | | - * Low-level drivers must call ib_dispatch_event() to dispatch the |
|---|
| 810 | | - * event to all registered event handlers when an asynchronous event |
|---|
| 811 | | - * occurs. |
|---|
| 812 | | - */ |
|---|
| 813 | | -void ib_dispatch_event(struct ib_event *event) |
|---|
| 1957 | +void ib_dispatch_event_clients(struct ib_event *event) |
|---|
| 814 | 1958 | { |
|---|
| 815 | | - unsigned long flags; |
|---|
| 816 | 1959 | struct ib_event_handler *handler; |
|---|
| 817 | 1960 | |
|---|
| 818 | | - spin_lock_irqsave(&event->device->event_handler_lock, flags); |
|---|
| 1961 | + down_read(&event->device->event_handler_rwsem); |
|---|
| 819 | 1962 | |
|---|
| 820 | 1963 | list_for_each_entry(handler, &event->device->event_handler_list, list) |
|---|
| 821 | 1964 | handler->handler(handler, event); |
|---|
| 822 | 1965 | |
|---|
| 823 | | - spin_unlock_irqrestore(&event->device->event_handler_lock, flags); |
|---|
| 1966 | + up_read(&event->device->event_handler_rwsem); |
|---|
| 824 | 1967 | } |
|---|
| 825 | | -EXPORT_SYMBOL(ib_dispatch_event); |
|---|
| 1968 | + |
|---|
| 1969 | +static int iw_query_port(struct ib_device *device, |
|---|
| 1970 | + u8 port_num, |
|---|
| 1971 | + struct ib_port_attr *port_attr) |
|---|
| 1972 | +{ |
|---|
| 1973 | + struct in_device *inetdev; |
|---|
| 1974 | + struct net_device *netdev; |
|---|
| 1975 | + |
|---|
| 1976 | + memset(port_attr, 0, sizeof(*port_attr)); |
|---|
| 1977 | + |
|---|
| 1978 | + netdev = ib_device_get_netdev(device, port_num); |
|---|
| 1979 | + if (!netdev) |
|---|
| 1980 | + return -ENODEV; |
|---|
| 1981 | + |
|---|
| 1982 | + port_attr->max_mtu = IB_MTU_4096; |
|---|
| 1983 | + port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu); |
|---|
| 1984 | + |
|---|
| 1985 | + if (!netif_carrier_ok(netdev)) { |
|---|
| 1986 | + port_attr->state = IB_PORT_DOWN; |
|---|
| 1987 | + port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; |
|---|
| 1988 | + } else { |
|---|
| 1989 | + rcu_read_lock(); |
|---|
| 1990 | + inetdev = __in_dev_get_rcu(netdev); |
|---|
| 1991 | + |
|---|
| 1992 | + if (inetdev && inetdev->ifa_list) { |
|---|
| 1993 | + port_attr->state = IB_PORT_ACTIVE; |
|---|
| 1994 | + port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; |
|---|
| 1995 | + } else { |
|---|
| 1996 | + port_attr->state = IB_PORT_INIT; |
|---|
| 1997 | + port_attr->phys_state = |
|---|
| 1998 | + IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING; |
|---|
| 1999 | + } |
|---|
| 2000 | + |
|---|
| 2001 | + rcu_read_unlock(); |
|---|
| 2002 | + } |
|---|
| 2003 | + |
|---|
| 2004 | + dev_put(netdev); |
|---|
| 2005 | + return device->ops.query_port(device, port_num, port_attr); |
|---|
| 2006 | +} |
|---|
| 2007 | + |
|---|
| 2008 | +static int __ib_query_port(struct ib_device *device, |
|---|
| 2009 | + u8 port_num, |
|---|
| 2010 | + struct ib_port_attr *port_attr) |
|---|
| 2011 | +{ |
|---|
| 2012 | + union ib_gid gid = {}; |
|---|
| 2013 | + int err; |
|---|
| 2014 | + |
|---|
| 2015 | + memset(port_attr, 0, sizeof(*port_attr)); |
|---|
| 2016 | + |
|---|
| 2017 | + err = device->ops.query_port(device, port_num, port_attr); |
|---|
| 2018 | + if (err || port_attr->subnet_prefix) |
|---|
| 2019 | + return err; |
|---|
| 2020 | + |
|---|
| 2021 | + if (rdma_port_get_link_layer(device, port_num) != |
|---|
| 2022 | + IB_LINK_LAYER_INFINIBAND) |
|---|
| 2023 | + return 0; |
|---|
| 2024 | + |
|---|
| 2025 | + err = device->ops.query_gid(device, port_num, 0, &gid); |
|---|
| 2026 | + if (err) |
|---|
| 2027 | + return err; |
|---|
| 2028 | + |
|---|
| 2029 | + port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); |
|---|
| 2030 | + return 0; |
|---|
| 2031 | +} |
|---|
| 826 | 2032 | |
|---|
| 827 | 2033 | /** |
|---|
| 828 | 2034 | * ib_query_port - Query IB port attributes |
|---|
| .. | .. |
|---|
| 837 | 2043 | u8 port_num, |
|---|
| 838 | 2044 | struct ib_port_attr *port_attr) |
|---|
| 839 | 2045 | { |
|---|
| 840 | | - union ib_gid gid; |
|---|
| 841 | | - int err; |
|---|
| 842 | | - |
|---|
| 843 | 2046 | if (!rdma_is_port_valid(device, port_num)) |
|---|
| 844 | 2047 | return -EINVAL; |
|---|
| 845 | 2048 | |
|---|
| 846 | | - memset(port_attr, 0, sizeof(*port_attr)); |
|---|
| 847 | | - err = device->query_port(device, port_num, port_attr); |
|---|
| 848 | | - if (err || port_attr->subnet_prefix) |
|---|
| 849 | | - return err; |
|---|
| 850 | | - |
|---|
| 851 | | - if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) |
|---|
| 852 | | - return 0; |
|---|
| 853 | | - |
|---|
| 854 | | - err = device->query_gid(device, port_num, 0, &gid); |
|---|
| 855 | | - if (err) |
|---|
| 856 | | - return err; |
|---|
| 857 | | - |
|---|
| 858 | | - port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); |
|---|
| 859 | | - return 0; |
|---|
| 2049 | + if (rdma_protocol_iwarp(device, port_num)) |
|---|
| 2050 | + return iw_query_port(device, port_num, port_attr); |
|---|
| 2051 | + else |
|---|
| 2052 | + return __ib_query_port(device, port_num, port_attr); |
|---|
| 860 | 2053 | } |
|---|
| 861 | 2054 | EXPORT_SYMBOL(ib_query_port); |
|---|
| 2055 | + |
|---|
| 2056 | +static void add_ndev_hash(struct ib_port_data *pdata) |
|---|
| 2057 | +{ |
|---|
| 2058 | + unsigned long flags; |
|---|
| 2059 | + |
|---|
| 2060 | + might_sleep(); |
|---|
| 2061 | + |
|---|
| 2062 | + spin_lock_irqsave(&ndev_hash_lock, flags); |
|---|
| 2063 | + if (hash_hashed(&pdata->ndev_hash_link)) { |
|---|
| 2064 | + hash_del_rcu(&pdata->ndev_hash_link); |
|---|
| 2065 | + spin_unlock_irqrestore(&ndev_hash_lock, flags); |
|---|
| 2066 | + /* |
|---|
| 2067 | + * We cannot do hash_add_rcu after a hash_del_rcu until the |
|---|
| 2068 | + * grace period |
|---|
| 2069 | + */ |
|---|
| 2070 | + synchronize_rcu(); |
|---|
| 2071 | + spin_lock_irqsave(&ndev_hash_lock, flags); |
|---|
| 2072 | + } |
|---|
| 2073 | + if (pdata->netdev) |
|---|
| 2074 | + hash_add_rcu(ndev_hash, &pdata->ndev_hash_link, |
|---|
| 2075 | + (uintptr_t)pdata->netdev); |
|---|
| 2076 | + spin_unlock_irqrestore(&ndev_hash_lock, flags); |
|---|
| 2077 | +} |
|---|
| 2078 | + |
|---|
| 2079 | +/** |
|---|
| 2080 | + * ib_device_set_netdev - Associate the ib_dev with an underlying net_device |
|---|
| 2081 | + * @ib_dev: Device to modify |
|---|
| 2082 | + * @ndev: net_device to affiliate, may be NULL |
|---|
| 2083 | + * @port: IB port the net_device is connected to |
|---|
| 2084 | + * |
|---|
| 2085 | + * Drivers should use this to link the ib_device to a netdev so the netdev |
|---|
| 2086 | + * shows up in interfaces like ib_enum_roce_netdev. Only one netdev may be |
|---|
| 2087 | + * affiliated with any port. |
|---|
| 2088 | + * |
|---|
| 2089 | + * The caller must ensure that the given ndev is not unregistered or |
|---|
| 2090 | + * unregistering, and that either the ib_device is unregistered or |
|---|
| 2091 | + * ib_device_set_netdev() is called with NULL when the ndev sends a |
|---|
| 2092 | + * NETDEV_UNREGISTER event. |
|---|
| 2093 | + */ |
|---|
| 2094 | +int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, |
|---|
| 2095 | + unsigned int port) |
|---|
| 2096 | +{ |
|---|
| 2097 | + struct net_device *old_ndev; |
|---|
| 2098 | + struct ib_port_data *pdata; |
|---|
| 2099 | + unsigned long flags; |
|---|
| 2100 | + int ret; |
|---|
| 2101 | + |
|---|
| 2102 | + /* |
|---|
| 2103 | + * Drivers wish to call this before ib_register_driver, so we have to |
|---|
| 2104 | + * setup the port data early. |
|---|
| 2105 | + */ |
|---|
| 2106 | + ret = alloc_port_data(ib_dev); |
|---|
| 2107 | + if (ret) |
|---|
| 2108 | + return ret; |
|---|
| 2109 | + |
|---|
| 2110 | + if (!rdma_is_port_valid(ib_dev, port)) |
|---|
| 2111 | + return -EINVAL; |
|---|
| 2112 | + |
|---|
| 2113 | + pdata = &ib_dev->port_data[port]; |
|---|
| 2114 | + spin_lock_irqsave(&pdata->netdev_lock, flags); |
|---|
| 2115 | + old_ndev = rcu_dereference_protected( |
|---|
| 2116 | + pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); |
|---|
| 2117 | + if (old_ndev == ndev) { |
|---|
| 2118 | + spin_unlock_irqrestore(&pdata->netdev_lock, flags); |
|---|
| 2119 | + return 0; |
|---|
| 2120 | + } |
|---|
| 2121 | + |
|---|
| 2122 | + if (ndev) |
|---|
| 2123 | + dev_hold(ndev); |
|---|
| 2124 | + rcu_assign_pointer(pdata->netdev, ndev); |
|---|
| 2125 | + spin_unlock_irqrestore(&pdata->netdev_lock, flags); |
|---|
| 2126 | + |
|---|
| 2127 | + add_ndev_hash(pdata); |
|---|
| 2128 | + if (old_ndev) |
|---|
| 2129 | + dev_put(old_ndev); |
|---|
| 2130 | + |
|---|
| 2131 | + return 0; |
|---|
| 2132 | +} |
|---|
| 2133 | +EXPORT_SYMBOL(ib_device_set_netdev); |
|---|
| 2134 | + |
|---|
| 2135 | +static void free_netdevs(struct ib_device *ib_dev) |
|---|
| 2136 | +{ |
|---|
| 2137 | + unsigned long flags; |
|---|
| 2138 | + unsigned int port; |
|---|
| 2139 | + |
|---|
| 2140 | + if (!ib_dev->port_data) |
|---|
| 2141 | + return; |
|---|
| 2142 | + |
|---|
| 2143 | + rdma_for_each_port (ib_dev, port) { |
|---|
| 2144 | + struct ib_port_data *pdata = &ib_dev->port_data[port]; |
|---|
| 2145 | + struct net_device *ndev; |
|---|
| 2146 | + |
|---|
| 2147 | + spin_lock_irqsave(&pdata->netdev_lock, flags); |
|---|
| 2148 | + ndev = rcu_dereference_protected( |
|---|
| 2149 | + pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); |
|---|
| 2150 | + if (ndev) { |
|---|
| 2151 | + spin_lock(&ndev_hash_lock); |
|---|
| 2152 | + hash_del_rcu(&pdata->ndev_hash_link); |
|---|
| 2153 | + spin_unlock(&ndev_hash_lock); |
|---|
| 2154 | + |
|---|
| 2155 | + /* |
|---|
| 2156 | + * If this is the last dev_put there is still a |
|---|
| 2157 | + * synchronize_rcu before the netdev is kfreed, so we |
|---|
| 2158 | + * can continue to rely on unlocked pointer |
|---|
| 2159 | + * comparisons after the put |
|---|
| 2160 | + */ |
|---|
| 2161 | + rcu_assign_pointer(pdata->netdev, NULL); |
|---|
| 2162 | + dev_put(ndev); |
|---|
| 2163 | + } |
|---|
| 2164 | + spin_unlock_irqrestore(&pdata->netdev_lock, flags); |
|---|
| 2165 | + } |
|---|
| 2166 | +} |
|---|
| 2167 | + |
|---|
| 2168 | +struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, |
|---|
| 2169 | + unsigned int port) |
|---|
| 2170 | +{ |
|---|
| 2171 | + struct ib_port_data *pdata; |
|---|
| 2172 | + struct net_device *res; |
|---|
| 2173 | + |
|---|
| 2174 | + if (!rdma_is_port_valid(ib_dev, port)) |
|---|
| 2175 | + return NULL; |
|---|
| 2176 | + |
|---|
| 2177 | + pdata = &ib_dev->port_data[port]; |
|---|
| 2178 | + |
|---|
| 2179 | + /* |
|---|
| 2180 | + * New drivers should use ib_device_set_netdev() not the legacy |
|---|
| 2181 | + * get_netdev(). |
|---|
| 2182 | + */ |
|---|
| 2183 | + if (ib_dev->ops.get_netdev) |
|---|
| 2184 | + res = ib_dev->ops.get_netdev(ib_dev, port); |
|---|
| 2185 | + else { |
|---|
| 2186 | + spin_lock(&pdata->netdev_lock); |
|---|
| 2187 | + res = rcu_dereference_protected( |
|---|
| 2188 | + pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); |
|---|
| 2189 | + if (res) |
|---|
| 2190 | + dev_hold(res); |
|---|
| 2191 | + spin_unlock(&pdata->netdev_lock); |
|---|
| 2192 | + } |
|---|
| 2193 | + |
|---|
| 2194 | + /* |
|---|
| 2195 | + * If we are starting to unregister expedite things by preventing |
|---|
| 2196 | + * propagation of an unregistering netdev. |
|---|
| 2197 | + */ |
|---|
| 2198 | + if (res && res->reg_state != NETREG_REGISTERED) { |
|---|
| 2199 | + dev_put(res); |
|---|
| 2200 | + return NULL; |
|---|
| 2201 | + } |
|---|
| 2202 | + |
|---|
| 2203 | + return res; |
|---|
| 2204 | +} |
|---|
| 2205 | + |
|---|
| 2206 | +/** |
|---|
| 2207 | + * ib_device_get_by_netdev - Find an IB device associated with a netdev |
|---|
| 2208 | + * @ndev: netdev to locate |
|---|
| 2209 | + * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all) |
|---|
| 2210 | + * |
|---|
| 2211 | + * Find and hold an ib_device that is associated with a netdev via |
|---|
| 2212 | + * ib_device_set_netdev(). The caller must call ib_device_put() on the |
|---|
| 2213 | + * returned pointer. |
|---|
| 2214 | + */ |
|---|
| 2215 | +struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, |
|---|
| 2216 | + enum rdma_driver_id driver_id) |
|---|
| 2217 | +{ |
|---|
| 2218 | + struct ib_device *res = NULL; |
|---|
| 2219 | + struct ib_port_data *cur; |
|---|
| 2220 | + |
|---|
| 2221 | + rcu_read_lock(); |
|---|
| 2222 | + hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link, |
|---|
| 2223 | + (uintptr_t)ndev) { |
|---|
| 2224 | + if (rcu_access_pointer(cur->netdev) == ndev && |
|---|
| 2225 | + (driver_id == RDMA_DRIVER_UNKNOWN || |
|---|
| 2226 | + cur->ib_dev->ops.driver_id == driver_id) && |
|---|
| 2227 | + ib_device_try_get(cur->ib_dev)) { |
|---|
| 2228 | + res = cur->ib_dev; |
|---|
| 2229 | + break; |
|---|
| 2230 | + } |
|---|
| 2231 | + } |
|---|
| 2232 | + rcu_read_unlock(); |
|---|
| 2233 | + |
|---|
| 2234 | + return res; |
|---|
| 2235 | +} |
|---|
| 2236 | +EXPORT_SYMBOL(ib_device_get_by_netdev); |
|---|
| 862 | 2237 | |
|---|
| 863 | 2238 | /** |
|---|
| 864 | 2239 | * ib_enum_roce_netdev - enumerate all RoCE ports |
|---|
| .. | .. |
|---|
| 878 | 2253 | roce_netdev_callback cb, |
|---|
| 879 | 2254 | void *cookie) |
|---|
| 880 | 2255 | { |
|---|
| 881 | | - u8 port; |
|---|
| 2256 | + unsigned int port; |
|---|
| 882 | 2257 | |
|---|
| 883 | | - for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev); |
|---|
| 884 | | - port++) |
|---|
| 2258 | + rdma_for_each_port (ib_dev, port) |
|---|
| 885 | 2259 | if (rdma_protocol_roce(ib_dev, port)) { |
|---|
| 886 | | - struct net_device *idev = NULL; |
|---|
| 887 | | - |
|---|
| 888 | | - if (ib_dev->get_netdev) |
|---|
| 889 | | - idev = ib_dev->get_netdev(ib_dev, port); |
|---|
| 890 | | - |
|---|
| 891 | | - if (idev && |
|---|
| 892 | | - idev->reg_state >= NETREG_UNREGISTERED) { |
|---|
| 893 | | - dev_put(idev); |
|---|
| 894 | | - idev = NULL; |
|---|
| 895 | | - } |
|---|
| 2260 | + struct net_device *idev = |
|---|
| 2261 | + ib_device_get_netdev(ib_dev, port); |
|---|
| 896 | 2262 | |
|---|
| 897 | 2263 | if (filter(ib_dev, port, idev, filter_cookie)) |
|---|
| 898 | 2264 | cb(ib_dev, port, idev, cookie); |
|---|
| .. | .. |
|---|
| 919 | 2285 | void *cookie) |
|---|
| 920 | 2286 | { |
|---|
| 921 | 2287 | struct ib_device *dev; |
|---|
| 2288 | + unsigned long index; |
|---|
| 922 | 2289 | |
|---|
| 923 | | - down_read(&lists_rwsem); |
|---|
| 924 | | - list_for_each_entry(dev, &device_list, core_list) |
|---|
| 2290 | + down_read(&devices_rwsem); |
|---|
| 2291 | + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) |
|---|
| 925 | 2292 | ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); |
|---|
| 926 | | - up_read(&lists_rwsem); |
|---|
| 2293 | + up_read(&devices_rwsem); |
|---|
| 927 | 2294 | } |
|---|
| 928 | 2295 | |
|---|
| 929 | 2296 | /** |
|---|
| .. | .. |
|---|
| 935 | 2302 | int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, |
|---|
| 936 | 2303 | struct netlink_callback *cb) |
|---|
| 937 | 2304 | { |
|---|
| 2305 | + unsigned long index; |
|---|
| 938 | 2306 | struct ib_device *dev; |
|---|
| 939 | 2307 | unsigned int idx = 0; |
|---|
| 940 | 2308 | int ret = 0; |
|---|
| 941 | 2309 | |
|---|
| 942 | | - down_read(&lists_rwsem); |
|---|
| 943 | | - list_for_each_entry(dev, &device_list, core_list) { |
|---|
| 2310 | + down_read(&devices_rwsem); |
|---|
| 2311 | + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { |
|---|
| 2312 | + if (!rdma_dev_access_netns(dev, sock_net(skb->sk))) |
|---|
| 2313 | + continue; |
|---|
| 2314 | + |
|---|
| 944 | 2315 | ret = nldev_cb(dev, skb, cb, idx); |
|---|
| 945 | 2316 | if (ret) |
|---|
| 946 | 2317 | break; |
|---|
| 947 | 2318 | idx++; |
|---|
| 948 | 2319 | } |
|---|
| 949 | | - |
|---|
| 950 | | - up_read(&lists_rwsem); |
|---|
| 2320 | + up_read(&devices_rwsem); |
|---|
| 951 | 2321 | return ret; |
|---|
| 952 | 2322 | } |
|---|
| 953 | 2323 | |
|---|
| .. | .. |
|---|
| 963 | 2333 | int ib_query_pkey(struct ib_device *device, |
|---|
| 964 | 2334 | u8 port_num, u16 index, u16 *pkey) |
|---|
| 965 | 2335 | { |
|---|
| 966 | | - return device->query_pkey(device, port_num, index, pkey); |
|---|
| 2336 | + if (!rdma_is_port_valid(device, port_num)) |
|---|
| 2337 | + return -EINVAL; |
|---|
| 2338 | + |
|---|
| 2339 | + if (!device->ops.query_pkey) |
|---|
| 2340 | + return -EOPNOTSUPP; |
|---|
| 2341 | + |
|---|
| 2342 | + return device->ops.query_pkey(device, port_num, index, pkey); |
|---|
| 967 | 2343 | } |
|---|
| 968 | 2344 | EXPORT_SYMBOL(ib_query_pkey); |
|---|
| 969 | 2345 | |
|---|
| .. | .. |
|---|
| 980 | 2356 | int device_modify_mask, |
|---|
| 981 | 2357 | struct ib_device_modify *device_modify) |
|---|
| 982 | 2358 | { |
|---|
| 983 | | - if (!device->modify_device) |
|---|
| 984 | | - return -ENOSYS; |
|---|
| 2359 | + if (!device->ops.modify_device) |
|---|
| 2360 | + return -EOPNOTSUPP; |
|---|
| 985 | 2361 | |
|---|
| 986 | | - return device->modify_device(device, device_modify_mask, |
|---|
| 987 | | - device_modify); |
|---|
| 2362 | + return device->ops.modify_device(device, device_modify_mask, |
|---|
| 2363 | + device_modify); |
|---|
| 988 | 2364 | } |
|---|
| 989 | 2365 | EXPORT_SYMBOL(ib_modify_device); |
|---|
| 990 | 2366 | |
|---|
| .. | .. |
|---|
| 1008 | 2384 | if (!rdma_is_port_valid(device, port_num)) |
|---|
| 1009 | 2385 | return -EINVAL; |
|---|
| 1010 | 2386 | |
|---|
| 1011 | | - if (device->modify_port) |
|---|
| 1012 | | - rc = device->modify_port(device, port_num, port_modify_mask, |
|---|
| 1013 | | - port_modify); |
|---|
| 2387 | + if (device->ops.modify_port) |
|---|
| 2388 | + rc = device->ops.modify_port(device, port_num, |
|---|
| 2389 | + port_modify_mask, |
|---|
| 2390 | + port_modify); |
|---|
| 2391 | + else if (rdma_protocol_roce(device, port_num) && |
|---|
| 2392 | + ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 || |
|---|
| 2393 | + (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0)) |
|---|
| 2394 | + rc = 0; |
|---|
| 1014 | 2395 | else |
|---|
| 1015 | | - rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS; |
|---|
| 2396 | + rc = -EOPNOTSUPP; |
|---|
| 1016 | 2397 | return rc; |
|---|
| 1017 | 2398 | } |
|---|
| 1018 | 2399 | EXPORT_SYMBOL(ib_modify_port); |
|---|
| .. | .. |
|---|
| 1030 | 2411 | u8 *port_num, u16 *index) |
|---|
| 1031 | 2412 | { |
|---|
| 1032 | 2413 | union ib_gid tmp_gid; |
|---|
| 1033 | | - int ret, port, i; |
|---|
| 2414 | + unsigned int port; |
|---|
| 2415 | + int ret, i; |
|---|
| 1034 | 2416 | |
|---|
| 1035 | | - for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { |
|---|
| 2417 | + rdma_for_each_port (device, port) { |
|---|
| 1036 | 2418 | if (!rdma_protocol_ib(device, port)) |
|---|
| 1037 | 2419 | continue; |
|---|
| 1038 | 2420 | |
|---|
| 1039 | | - for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { |
|---|
| 2421 | + for (i = 0; i < device->port_data[port].immutable.gid_tbl_len; |
|---|
| 2422 | + ++i) { |
|---|
| 1040 | 2423 | ret = rdma_query_gid(device, port, i, &tmp_gid); |
|---|
| 1041 | 2424 | if (ret) |
|---|
| 1042 | 2425 | continue; |
|---|
| .. | .. |
|---|
| 1069 | 2452 | u16 tmp_pkey; |
|---|
| 1070 | 2453 | int partial_ix = -1; |
|---|
| 1071 | 2454 | |
|---|
| 1072 | | - for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) { |
|---|
| 2455 | + for (i = 0; i < device->port_data[port_num].immutable.pkey_tbl_len; |
|---|
| 2456 | + ++i) { |
|---|
| 1073 | 2457 | ret = ib_query_pkey(device, port_num, i, &tmp_pkey); |
|---|
| 1074 | 2458 | if (ret) |
|---|
| 1075 | 2459 | return ret; |
|---|
| .. | .. |
|---|
| 1102 | 2486 | * @gid: A GID that the net_dev uses to communicate. |
|---|
| 1103 | 2487 | * @addr: Contains the IP address that the request specified as its |
|---|
| 1104 | 2488 | * destination. |
|---|
| 2489 | + * |
|---|
| 1105 | 2490 | */ |
|---|
| 1106 | 2491 | struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, |
|---|
| 1107 | 2492 | u8 port, |
|---|
| .. | .. |
|---|
| 1110 | 2495 | const struct sockaddr *addr) |
|---|
| 1111 | 2496 | { |
|---|
| 1112 | 2497 | struct net_device *net_dev = NULL; |
|---|
| 1113 | | - struct ib_client_data *context; |
|---|
| 2498 | + unsigned long index; |
|---|
| 2499 | + void *client_data; |
|---|
| 1114 | 2500 | |
|---|
| 1115 | 2501 | if (!rdma_protocol_ib(dev, port)) |
|---|
| 1116 | 2502 | return NULL; |
|---|
| 1117 | 2503 | |
|---|
| 1118 | | - down_read(&lists_rwsem); |
|---|
| 2504 | + /* |
|---|
| 2505 | + * Holding the read side guarantees that the client will not become |
|---|
| 2506 | + * unregistered while we are calling get_net_dev_by_params() |
|---|
| 2507 | + */ |
|---|
| 2508 | + down_read(&dev->client_data_rwsem); |
|---|
| 2509 | + xan_for_each_marked (&dev->client_data, index, client_data, |
|---|
| 2510 | + CLIENT_DATA_REGISTERED) { |
|---|
| 2511 | + struct ib_client *client = xa_load(&clients, index); |
|---|
| 1119 | 2512 | |
|---|
| 1120 | | - list_for_each_entry(context, &dev->client_data_list, list) { |
|---|
| 1121 | | - struct ib_client *client = context->client; |
|---|
| 1122 | | - |
|---|
| 1123 | | - if (context->going_down) |
|---|
| 2513 | + if (!client || !client->get_net_dev_by_params) |
|---|
| 1124 | 2514 | continue; |
|---|
| 1125 | 2515 | |
|---|
| 1126 | | - if (client->get_net_dev_by_params) { |
|---|
| 1127 | | - net_dev = client->get_net_dev_by_params(dev, port, pkey, |
|---|
| 1128 | | - gid, addr, |
|---|
| 1129 | | - context->data); |
|---|
| 1130 | | - if (net_dev) |
|---|
| 1131 | | - break; |
|---|
| 1132 | | - } |
|---|
| 2516 | + net_dev = client->get_net_dev_by_params(dev, port, pkey, gid, |
|---|
| 2517 | + addr, client_data); |
|---|
| 2518 | + if (net_dev) |
|---|
| 2519 | + break; |
|---|
| 1133 | 2520 | } |
|---|
| 1134 | | - |
|---|
| 1135 | | - up_read(&lists_rwsem); |
|---|
| 2521 | + up_read(&dev->client_data_rwsem); |
|---|
| 1136 | 2522 | |
|---|
| 1137 | 2523 | return net_dev; |
|---|
| 1138 | 2524 | } |
|---|
| 1139 | 2525 | EXPORT_SYMBOL(ib_get_net_dev_by_params); |
|---|
| 2526 | + |
|---|
| 2527 | +void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) |
|---|
| 2528 | +{ |
|---|
| 2529 | + struct ib_device_ops *dev_ops = &dev->ops; |
|---|
| 2530 | +#define SET_DEVICE_OP(ptr, name) \ |
|---|
| 2531 | + do { \ |
|---|
| 2532 | + if (ops->name) \ |
|---|
| 2533 | + if (!((ptr)->name)) \ |
|---|
| 2534 | + (ptr)->name = ops->name; \ |
|---|
| 2535 | + } while (0) |
|---|
| 2536 | + |
|---|
| 2537 | +#define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name) |
|---|
| 2538 | + |
|---|
| 2539 | + if (ops->driver_id != RDMA_DRIVER_UNKNOWN) { |
|---|
| 2540 | + WARN_ON(dev_ops->driver_id != RDMA_DRIVER_UNKNOWN && |
|---|
| 2541 | + dev_ops->driver_id != ops->driver_id); |
|---|
| 2542 | + dev_ops->driver_id = ops->driver_id; |
|---|
| 2543 | + } |
|---|
| 2544 | + if (ops->owner) { |
|---|
| 2545 | + WARN_ON(dev_ops->owner && dev_ops->owner != ops->owner); |
|---|
| 2546 | + dev_ops->owner = ops->owner; |
|---|
| 2547 | + } |
|---|
| 2548 | + if (ops->uverbs_abi_ver) |
|---|
| 2549 | + dev_ops->uverbs_abi_ver = ops->uverbs_abi_ver; |
|---|
| 2550 | + |
|---|
| 2551 | + dev_ops->uverbs_no_driver_id_binding |= |
|---|
| 2552 | + ops->uverbs_no_driver_id_binding; |
|---|
| 2553 | + |
|---|
| 2554 | + SET_DEVICE_OP(dev_ops, add_gid); |
|---|
| 2555 | + SET_DEVICE_OP(dev_ops, advise_mr); |
|---|
| 2556 | + SET_DEVICE_OP(dev_ops, alloc_dm); |
|---|
| 2557 | + SET_DEVICE_OP(dev_ops, alloc_hw_stats); |
|---|
| 2558 | + SET_DEVICE_OP(dev_ops, alloc_mr); |
|---|
| 2559 | + SET_DEVICE_OP(dev_ops, alloc_mr_integrity); |
|---|
| 2560 | + SET_DEVICE_OP(dev_ops, alloc_mw); |
|---|
| 2561 | + SET_DEVICE_OP(dev_ops, alloc_pd); |
|---|
| 2562 | + SET_DEVICE_OP(dev_ops, alloc_rdma_netdev); |
|---|
| 2563 | + SET_DEVICE_OP(dev_ops, alloc_ucontext); |
|---|
| 2564 | + SET_DEVICE_OP(dev_ops, alloc_xrcd); |
|---|
| 2565 | + SET_DEVICE_OP(dev_ops, attach_mcast); |
|---|
| 2566 | + SET_DEVICE_OP(dev_ops, check_mr_status); |
|---|
| 2567 | + SET_DEVICE_OP(dev_ops, counter_alloc_stats); |
|---|
| 2568 | + SET_DEVICE_OP(dev_ops, counter_bind_qp); |
|---|
| 2569 | + SET_DEVICE_OP(dev_ops, counter_dealloc); |
|---|
| 2570 | + SET_DEVICE_OP(dev_ops, counter_unbind_qp); |
|---|
| 2571 | + SET_DEVICE_OP(dev_ops, counter_update_stats); |
|---|
| 2572 | + SET_DEVICE_OP(dev_ops, create_ah); |
|---|
| 2573 | + SET_DEVICE_OP(dev_ops, create_counters); |
|---|
| 2574 | + SET_DEVICE_OP(dev_ops, create_cq); |
|---|
| 2575 | + SET_DEVICE_OP(dev_ops, create_flow); |
|---|
| 2576 | + SET_DEVICE_OP(dev_ops, create_flow_action_esp); |
|---|
| 2577 | + SET_DEVICE_OP(dev_ops, create_qp); |
|---|
| 2578 | + SET_DEVICE_OP(dev_ops, create_rwq_ind_table); |
|---|
| 2579 | + SET_DEVICE_OP(dev_ops, create_srq); |
|---|
| 2580 | + SET_DEVICE_OP(dev_ops, create_wq); |
|---|
| 2581 | + SET_DEVICE_OP(dev_ops, dealloc_dm); |
|---|
| 2582 | + SET_DEVICE_OP(dev_ops, dealloc_driver); |
|---|
| 2583 | + SET_DEVICE_OP(dev_ops, dealloc_mw); |
|---|
| 2584 | + SET_DEVICE_OP(dev_ops, dealloc_pd); |
|---|
| 2585 | + SET_DEVICE_OP(dev_ops, dealloc_ucontext); |
|---|
| 2586 | + SET_DEVICE_OP(dev_ops, dealloc_xrcd); |
|---|
| 2587 | + SET_DEVICE_OP(dev_ops, del_gid); |
|---|
| 2588 | + SET_DEVICE_OP(dev_ops, dereg_mr); |
|---|
| 2589 | + SET_DEVICE_OP(dev_ops, destroy_ah); |
|---|
| 2590 | + SET_DEVICE_OP(dev_ops, destroy_counters); |
|---|
| 2591 | + SET_DEVICE_OP(dev_ops, destroy_cq); |
|---|
| 2592 | + SET_DEVICE_OP(dev_ops, destroy_flow); |
|---|
| 2593 | + SET_DEVICE_OP(dev_ops, destroy_flow_action); |
|---|
| 2594 | + SET_DEVICE_OP(dev_ops, destroy_qp); |
|---|
| 2595 | + SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table); |
|---|
| 2596 | + SET_DEVICE_OP(dev_ops, destroy_srq); |
|---|
| 2597 | + SET_DEVICE_OP(dev_ops, destroy_wq); |
|---|
| 2598 | + SET_DEVICE_OP(dev_ops, detach_mcast); |
|---|
| 2599 | + SET_DEVICE_OP(dev_ops, disassociate_ucontext); |
|---|
| 2600 | + SET_DEVICE_OP(dev_ops, drain_rq); |
|---|
| 2601 | + SET_DEVICE_OP(dev_ops, drain_sq); |
|---|
| 2602 | + SET_DEVICE_OP(dev_ops, enable_driver); |
|---|
| 2603 | + SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry); |
|---|
| 2604 | + SET_DEVICE_OP(dev_ops, fill_res_cq_entry); |
|---|
| 2605 | + SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw); |
|---|
| 2606 | + SET_DEVICE_OP(dev_ops, fill_res_mr_entry); |
|---|
| 2607 | + SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw); |
|---|
| 2608 | + SET_DEVICE_OP(dev_ops, fill_res_qp_entry); |
|---|
| 2609 | + SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw); |
|---|
| 2610 | + SET_DEVICE_OP(dev_ops, fill_stat_mr_entry); |
|---|
| 2611 | + SET_DEVICE_OP(dev_ops, get_dev_fw_str); |
|---|
| 2612 | + SET_DEVICE_OP(dev_ops, get_dma_mr); |
|---|
| 2613 | + SET_DEVICE_OP(dev_ops, get_hw_stats); |
|---|
| 2614 | + SET_DEVICE_OP(dev_ops, get_link_layer); |
|---|
| 2615 | + SET_DEVICE_OP(dev_ops, get_netdev); |
|---|
| 2616 | + SET_DEVICE_OP(dev_ops, get_port_immutable); |
|---|
| 2617 | + SET_DEVICE_OP(dev_ops, get_vector_affinity); |
|---|
| 2618 | + SET_DEVICE_OP(dev_ops, get_vf_config); |
|---|
| 2619 | + SET_DEVICE_OP(dev_ops, get_vf_guid); |
|---|
| 2620 | + SET_DEVICE_OP(dev_ops, get_vf_stats); |
|---|
| 2621 | + SET_DEVICE_OP(dev_ops, init_port); |
|---|
| 2622 | + SET_DEVICE_OP(dev_ops, iw_accept); |
|---|
| 2623 | + SET_DEVICE_OP(dev_ops, iw_add_ref); |
|---|
| 2624 | + SET_DEVICE_OP(dev_ops, iw_connect); |
|---|
| 2625 | + SET_DEVICE_OP(dev_ops, iw_create_listen); |
|---|
| 2626 | + SET_DEVICE_OP(dev_ops, iw_destroy_listen); |
|---|
| 2627 | + SET_DEVICE_OP(dev_ops, iw_get_qp); |
|---|
| 2628 | + SET_DEVICE_OP(dev_ops, iw_reject); |
|---|
| 2629 | + SET_DEVICE_OP(dev_ops, iw_rem_ref); |
|---|
| 2630 | + SET_DEVICE_OP(dev_ops, map_mr_sg); |
|---|
| 2631 | + SET_DEVICE_OP(dev_ops, map_mr_sg_pi); |
|---|
| 2632 | + SET_DEVICE_OP(dev_ops, mmap); |
|---|
| 2633 | + SET_DEVICE_OP(dev_ops, mmap_free); |
|---|
| 2634 | + SET_DEVICE_OP(dev_ops, modify_ah); |
|---|
| 2635 | + SET_DEVICE_OP(dev_ops, modify_cq); |
|---|
| 2636 | + SET_DEVICE_OP(dev_ops, modify_device); |
|---|
| 2637 | + SET_DEVICE_OP(dev_ops, modify_flow_action_esp); |
|---|
| 2638 | + SET_DEVICE_OP(dev_ops, modify_port); |
|---|
| 2639 | + SET_DEVICE_OP(dev_ops, modify_qp); |
|---|
| 2640 | + SET_DEVICE_OP(dev_ops, modify_srq); |
|---|
| 2641 | + SET_DEVICE_OP(dev_ops, modify_wq); |
|---|
| 2642 | + SET_DEVICE_OP(dev_ops, peek_cq); |
|---|
| 2643 | + SET_DEVICE_OP(dev_ops, poll_cq); |
|---|
| 2644 | + SET_DEVICE_OP(dev_ops, post_recv); |
|---|
| 2645 | + SET_DEVICE_OP(dev_ops, post_send); |
|---|
| 2646 | + SET_DEVICE_OP(dev_ops, post_srq_recv); |
|---|
| 2647 | + SET_DEVICE_OP(dev_ops, process_mad); |
|---|
| 2648 | + SET_DEVICE_OP(dev_ops, query_ah); |
|---|
| 2649 | + SET_DEVICE_OP(dev_ops, query_device); |
|---|
| 2650 | + SET_DEVICE_OP(dev_ops, query_gid); |
|---|
| 2651 | + SET_DEVICE_OP(dev_ops, query_pkey); |
|---|
| 2652 | + SET_DEVICE_OP(dev_ops, query_port); |
|---|
| 2653 | + SET_DEVICE_OP(dev_ops, query_qp); |
|---|
| 2654 | + SET_DEVICE_OP(dev_ops, query_srq); |
|---|
| 2655 | + SET_DEVICE_OP(dev_ops, query_ucontext); |
|---|
| 2656 | + SET_DEVICE_OP(dev_ops, rdma_netdev_get_params); |
|---|
| 2657 | + SET_DEVICE_OP(dev_ops, read_counters); |
|---|
| 2658 | + SET_DEVICE_OP(dev_ops, reg_dm_mr); |
|---|
| 2659 | + SET_DEVICE_OP(dev_ops, reg_user_mr); |
|---|
| 2660 | + SET_DEVICE_OP(dev_ops, req_ncomp_notif); |
|---|
| 2661 | + SET_DEVICE_OP(dev_ops, req_notify_cq); |
|---|
| 2662 | + SET_DEVICE_OP(dev_ops, rereg_user_mr); |
|---|
| 2663 | + SET_DEVICE_OP(dev_ops, resize_cq); |
|---|
| 2664 | + SET_DEVICE_OP(dev_ops, set_vf_guid); |
|---|
| 2665 | + SET_DEVICE_OP(dev_ops, set_vf_link_state); |
|---|
| 2666 | + |
|---|
| 2667 | + SET_OBJ_SIZE(dev_ops, ib_ah); |
|---|
| 2668 | + SET_OBJ_SIZE(dev_ops, ib_counters); |
|---|
| 2669 | + SET_OBJ_SIZE(dev_ops, ib_cq); |
|---|
| 2670 | + SET_OBJ_SIZE(dev_ops, ib_mw); |
|---|
| 2671 | + SET_OBJ_SIZE(dev_ops, ib_pd); |
|---|
| 2672 | + SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table); |
|---|
| 2673 | + SET_OBJ_SIZE(dev_ops, ib_srq); |
|---|
| 2674 | + SET_OBJ_SIZE(dev_ops, ib_ucontext); |
|---|
| 2675 | + SET_OBJ_SIZE(dev_ops, ib_xrcd); |
|---|
| 2676 | +} |
|---|
| 2677 | +EXPORT_SYMBOL(ib_set_device_ops); |
|---|
| 2678 | + |
|---|
| 2679 | +#ifdef CONFIG_INFINIBAND_VIRT_DMA |
|---|
| 2680 | +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) |
|---|
| 2681 | +{ |
|---|
| 2682 | + struct scatterlist *s; |
|---|
| 2683 | + int i; |
|---|
| 2684 | + |
|---|
| 2685 | + for_each_sg(sg, s, nents, i) { |
|---|
| 2686 | + sg_dma_address(s) = (uintptr_t)sg_virt(s); |
|---|
| 2687 | + sg_dma_len(s) = s->length; |
|---|
| 2688 | + } |
|---|
| 2689 | + return nents; |
|---|
| 2690 | +} |
|---|
| 2691 | +EXPORT_SYMBOL(ib_dma_virt_map_sg); |
|---|
| 2692 | +#endif /* CONFIG_INFINIBAND_VIRT_DMA */ |
|---|
| 1140 | 2693 | |
|---|
| 1141 | 2694 | static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { |
|---|
| 1142 | 2695 | [RDMA_NL_LS_OP_RESOLVE] = { |
|---|
| .. | .. |
|---|
| 1183 | 2736 | goto err_comp_unbound; |
|---|
| 1184 | 2737 | } |
|---|
| 1185 | 2738 | |
|---|
| 1186 | | - ret = rdma_nl_init(); |
|---|
| 1187 | | - if (ret) { |
|---|
| 1188 | | - pr_warn("Couldn't init IB netlink interface: err %d\n", ret); |
|---|
| 1189 | | - goto err_sysfs; |
|---|
| 1190 | | - } |
|---|
| 2739 | + rdma_nl_init(); |
|---|
| 1191 | 2740 | |
|---|
| 1192 | 2741 | ret = addr_init(); |
|---|
| 1193 | 2742 | if (ret) { |
|---|
| 1194 | | - pr_warn("Could't init IB address resolution\n"); |
|---|
| 2743 | + pr_warn("Couldn't init IB address resolution\n"); |
|---|
| 1195 | 2744 | goto err_ibnl; |
|---|
| 1196 | 2745 | } |
|---|
| 1197 | 2746 | |
|---|
| .. | .. |
|---|
| 1207 | 2756 | goto err_mad; |
|---|
| 1208 | 2757 | } |
|---|
| 1209 | 2758 | |
|---|
| 1210 | | - ret = register_lsm_notifier(&ibdev_lsm_nb); |
|---|
| 2759 | + ret = register_blocking_lsm_notifier(&ibdev_lsm_nb); |
|---|
| 1211 | 2760 | if (ret) { |
|---|
| 1212 | 2761 | pr_warn("Couldn't register LSM notifier. ret %d\n", ret); |
|---|
| 1213 | 2762 | goto err_sa; |
|---|
| 1214 | 2763 | } |
|---|
| 1215 | 2764 | |
|---|
| 2765 | + ret = register_pernet_device(&rdma_dev_net_ops); |
|---|
| 2766 | + if (ret) { |
|---|
| 2767 | + pr_warn("Couldn't init compat dev. ret %d\n", ret); |
|---|
| 2768 | + goto err_compat; |
|---|
| 2769 | + } |
|---|
| 2770 | + |
|---|
| 1216 | 2771 | nldev_init(); |
|---|
| 1217 | 2772 | rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); |
|---|
| 1218 | | - roce_gid_mgmt_init(); |
|---|
| 2773 | + ret = roce_gid_mgmt_init(); |
|---|
| 2774 | + if (ret) { |
|---|
| 2775 | + pr_warn("Couldn't init RoCE GID management\n"); |
|---|
| 2776 | + goto err_parent; |
|---|
| 2777 | + } |
|---|
| 1219 | 2778 | |
|---|
| 1220 | 2779 | return 0; |
|---|
| 1221 | 2780 | |
|---|
| 2781 | +err_parent: |
|---|
| 2782 | + rdma_nl_unregister(RDMA_NL_LS); |
|---|
| 2783 | + nldev_exit(); |
|---|
| 2784 | + unregister_pernet_device(&rdma_dev_net_ops); |
|---|
| 2785 | +err_compat: |
|---|
| 2786 | + unregister_blocking_lsm_notifier(&ibdev_lsm_nb); |
|---|
| 1222 | 2787 | err_sa: |
|---|
| 1223 | 2788 | ib_sa_cleanup(); |
|---|
| 1224 | 2789 | err_mad: |
|---|
| .. | .. |
|---|
| 1226 | 2791 | err_addr: |
|---|
| 1227 | 2792 | addr_cleanup(); |
|---|
| 1228 | 2793 | err_ibnl: |
|---|
| 1229 | | - rdma_nl_exit(); |
|---|
| 1230 | | -err_sysfs: |
|---|
| 1231 | 2794 | class_unregister(&ib_class); |
|---|
| 1232 | 2795 | err_comp_unbound: |
|---|
| 1233 | 2796 | destroy_workqueue(ib_comp_unbound_wq); |
|---|
| .. | .. |
|---|
| 1241 | 2804 | static void __exit ib_core_cleanup(void) |
|---|
| 1242 | 2805 | { |
|---|
| 1243 | 2806 | roce_gid_mgmt_cleanup(); |
|---|
| 1244 | | - nldev_exit(); |
|---|
| 1245 | 2807 | rdma_nl_unregister(RDMA_NL_LS); |
|---|
| 1246 | | - unregister_lsm_notifier(&ibdev_lsm_nb); |
|---|
| 2808 | + nldev_exit(); |
|---|
| 2809 | + unregister_pernet_device(&rdma_dev_net_ops); |
|---|
| 2810 | + unregister_blocking_lsm_notifier(&ibdev_lsm_nb); |
|---|
| 1247 | 2811 | ib_sa_cleanup(); |
|---|
| 1248 | 2812 | ib_mad_cleanup(); |
|---|
| 1249 | 2813 | addr_cleanup(); |
|---|
| .. | .. |
|---|
| 1253 | 2817 | destroy_workqueue(ib_comp_wq); |
|---|
| 1254 | 2818 | /* Make sure that any pending umem accounting work is done. */ |
|---|
| 1255 | 2819 | destroy_workqueue(ib_wq); |
|---|
| 2820 | + flush_workqueue(system_unbound_wq); |
|---|
| 2821 | + WARN_ON(!xa_empty(&clients)); |
|---|
| 2822 | + WARN_ON(!xa_empty(&devices)); |
|---|
| 1256 | 2823 | } |
|---|
| 1257 | 2824 | |
|---|
| 1258 | 2825 | MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); |
|---|
| 1259 | 2826 | |
|---|
| 1260 | | -subsys_initcall(ib_core_init); |
|---|
| 2827 | +/* ib core relies on netdev stack to first register net_ns_type_operations |
|---|
| 2828 | + * ns kobject type before ib_core initialization. |
|---|
| 2829 | + */ |
|---|
| 2830 | +fs_initcall(ib_core_init); |
|---|
| 1261 | 2831 | module_exit(ib_core_cleanup); |
|---|