.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
---|
1 | 2 | /* |
---|
2 | 3 | * NVM Express device driver |
---|
3 | 4 | * Copyright (c) 2011-2014, Intel Corporation. |
---|
4 | | - * |
---|
5 | | - * This program is free software; you can redistribute it and/or modify it |
---|
6 | | - * under the terms and conditions of the GNU General Public License, |
---|
7 | | - * version 2, as published by the Free Software Foundation. |
---|
8 | | - * |
---|
9 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
---|
10 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
---|
11 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
---|
12 | | - * more details. |
---|
13 | 5 | */ |
---|
14 | 6 | |
---|
| 7 | +#include <linux/acpi.h> |
---|
15 | 8 | #include <linux/aer.h> |
---|
16 | 9 | #include <linux/async.h> |
---|
17 | 10 | #include <linux/blkdev.h> |
---|
.. | .. |
---|
26 | 19 | #include <linux/mutex.h> |
---|
27 | 20 | #include <linux/once.h> |
---|
28 | 21 | #include <linux/pci.h> |
---|
| 22 | +#include <linux/suspend.h> |
---|
29 | 23 | #include <linux/t10-pi.h> |
---|
30 | 24 | #include <linux/types.h> |
---|
31 | 25 | #include <linux/io-64-nonatomic-lo-hi.h> |
---|
| 26 | +#include <linux/io-64-nonatomic-hi-lo.h> |
---|
32 | 27 | #include <linux/sed-opal.h> |
---|
| 28 | +#include <linux/pci-p2pdma.h> |
---|
33 | 29 | |
---|
| 30 | +#include "trace.h" |
---|
34 | 31 | #include "nvme.h" |
---|
35 | 32 | |
---|
36 | | -#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) |
---|
37 | | -#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) |
---|
| 33 | +#define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) |
---|
| 34 | +#define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) |
---|
38 | 35 | |
---|
39 | | -#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) |
---|
| 36 | +#define SGES_PER_PAGE (NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc)) |
---|
40 | 37 | |
---|
41 | 38 | /* |
---|
42 | 39 | * These can be higher, but we need to ensure that any command doesn't |
---|
.. | .. |
---|
66 | 63 | static int io_queue_depth_set(const char *val, const struct kernel_param *kp); |
---|
67 | 64 | static const struct kernel_param_ops io_queue_depth_ops = { |
---|
68 | 65 | .set = io_queue_depth_set, |
---|
69 | | - .get = param_get_int, |
---|
| 66 | + .get = param_get_uint, |
---|
70 | 67 | }; |
---|
71 | 68 | |
---|
72 | | -static int io_queue_depth = 1024; |
---|
| 69 | +static unsigned int io_queue_depth = 1024; |
---|
73 | 70 | module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); |
---|
74 | 71 | MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); |
---|
| 72 | + |
---|
| 73 | +static int io_queue_count_set(const char *val, const struct kernel_param *kp) |
---|
| 74 | +{ |
---|
| 75 | + unsigned int n; |
---|
| 76 | + int ret; |
---|
| 77 | + |
---|
| 78 | + ret = kstrtouint(val, 10, &n); |
---|
| 79 | + if (ret != 0 || n > num_possible_cpus()) |
---|
| 80 | + return -EINVAL; |
---|
| 81 | + return param_set_uint(val, kp); |
---|
| 82 | +} |
---|
| 83 | + |
---|
| 84 | +static const struct kernel_param_ops io_queue_count_ops = { |
---|
| 85 | + .set = io_queue_count_set, |
---|
| 86 | + .get = param_get_uint, |
---|
| 87 | +}; |
---|
| 88 | + |
---|
| 89 | +static unsigned int write_queues; |
---|
| 90 | +module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644); |
---|
| 91 | +MODULE_PARM_DESC(write_queues, |
---|
| 92 | + "Number of queues to use for writes. If not set, reads and writes " |
---|
| 93 | + "will share a queue set."); |
---|
| 94 | + |
---|
| 95 | +static unsigned int poll_queues; |
---|
| 96 | +module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644); |
---|
| 97 | +MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); |
---|
| 98 | + |
---|
| 99 | +static bool noacpi; |
---|
| 100 | +module_param(noacpi, bool, 0444); |
---|
| 101 | +MODULE_PARM_DESC(noacpi, "disable acpi bios quirks"); |
---|
75 | 102 | |
---|
76 | 103 | struct nvme_dev; |
---|
77 | 104 | struct nvme_queue; |
---|
78 | 105 | |
---|
79 | 106 | static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); |
---|
| 107 | +static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode); |
---|
80 | 108 | |
---|
81 | 109 | /* |
---|
82 | 110 | * Represents an NVM Express device. Each nvme_dev is a PCI function. |
---|
.. | .. |
---|
91 | 119 | struct dma_pool *prp_small_pool; |
---|
92 | 120 | unsigned online_queues; |
---|
93 | 121 | unsigned max_qid; |
---|
| 122 | + unsigned io_queues[HCTX_MAX_TYPES]; |
---|
94 | 123 | unsigned int num_vecs; |
---|
95 | | - int q_depth; |
---|
| 124 | + u32 q_depth; |
---|
| 125 | + int io_sqes; |
---|
96 | 126 | u32 db_stride; |
---|
97 | 127 | void __iomem *bar; |
---|
98 | 128 | unsigned long bar_mapped_size; |
---|
99 | 129 | struct work_struct remove_work; |
---|
100 | 130 | struct mutex shutdown_lock; |
---|
101 | 131 | bool subsystem; |
---|
102 | | - void __iomem *cmb; |
---|
103 | | - pci_bus_addr_t cmb_bus_addr; |
---|
104 | 132 | u64 cmb_size; |
---|
| 133 | + bool cmb_use_sqes; |
---|
105 | 134 | u32 cmbsz; |
---|
106 | 135 | u32 cmbloc; |
---|
107 | 136 | struct nvme_ctrl ctrl; |
---|
108 | | - struct completion ioq_wait; |
---|
| 137 | + u32 last_ps; |
---|
109 | 138 | |
---|
110 | 139 | mempool_t *iod_mempool; |
---|
111 | 140 | |
---|
112 | 141 | /* shadow doorbell buffer support: */ |
---|
113 | | - u32 *dbbuf_dbs; |
---|
| 142 | + __le32 *dbbuf_dbs; |
---|
114 | 143 | dma_addr_t dbbuf_dbs_dma_addr; |
---|
115 | | - u32 *dbbuf_eis; |
---|
| 144 | + __le32 *dbbuf_eis; |
---|
116 | 145 | dma_addr_t dbbuf_eis_dma_addr; |
---|
117 | 146 | |
---|
118 | 147 | /* host memory buffer support: */ |
---|
.. | .. |
---|
121 | 150 | dma_addr_t host_mem_descs_dma; |
---|
122 | 151 | struct nvme_host_mem_buf_desc *host_mem_descs; |
---|
123 | 152 | void **host_mem_desc_bufs; |
---|
| 153 | + unsigned int nr_allocated_queues; |
---|
| 154 | + unsigned int nr_write_queues; |
---|
| 155 | + unsigned int nr_poll_queues; |
---|
124 | 156 | }; |
---|
125 | 157 | |
---|
126 | 158 | static int io_queue_depth_set(const char *val, const struct kernel_param *kp) |
---|
127 | 159 | { |
---|
128 | | - int n = 0, ret; |
---|
| 160 | + int ret; |
---|
| 161 | + u32 n; |
---|
129 | 162 | |
---|
130 | | - ret = kstrtoint(val, 10, &n); |
---|
| 163 | + ret = kstrtou32(val, 10, &n); |
---|
131 | 164 | if (ret != 0 || n < 2) |
---|
132 | 165 | return -EINVAL; |
---|
133 | 166 | |
---|
134 | | - return param_set_int(val, kp); |
---|
| 167 | + return param_set_uint(val, kp); |
---|
135 | 168 | } |
---|
136 | 169 | |
---|
137 | 170 | static inline unsigned int sq_idx(unsigned int qid, u32 stride) |
---|
.. | .. |
---|
154 | 187 | * commands and one for I/O commands). |
---|
155 | 188 | */ |
---|
156 | 189 | struct nvme_queue { |
---|
157 | | - struct device *q_dmadev; |
---|
158 | 190 | struct nvme_dev *dev; |
---|
159 | 191 | spinlock_t sq_lock; |
---|
160 | | - struct nvme_command *sq_cmds; |
---|
161 | | - struct nvme_command __iomem *sq_cmds_io; |
---|
162 | | - spinlock_t cq_lock ____cacheline_aligned_in_smp; |
---|
163 | | - volatile struct nvme_completion *cqes; |
---|
164 | | - struct blk_mq_tags **tags; |
---|
| 192 | + void *sq_cmds; |
---|
| 193 | + /* only used for poll queues: */ |
---|
| 194 | + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; |
---|
| 195 | + struct nvme_completion *cqes; |
---|
165 | 196 | dma_addr_t sq_dma_addr; |
---|
166 | 197 | dma_addr_t cq_dma_addr; |
---|
167 | 198 | u32 __iomem *q_db; |
---|
168 | | - u16 q_depth; |
---|
169 | | - s16 cq_vector; |
---|
| 199 | + u32 q_depth; |
---|
| 200 | + u16 cq_vector; |
---|
170 | 201 | u16 sq_tail; |
---|
| 202 | + u16 last_sq_tail; |
---|
171 | 203 | u16 cq_head; |
---|
172 | | - u16 last_cq_head; |
---|
173 | 204 | u16 qid; |
---|
174 | 205 | u8 cq_phase; |
---|
175 | | - u32 *dbbuf_sq_db; |
---|
176 | | - u32 *dbbuf_cq_db; |
---|
177 | | - u32 *dbbuf_sq_ei; |
---|
178 | | - u32 *dbbuf_cq_ei; |
---|
| 206 | + u8 sqes; |
---|
| 207 | + unsigned long flags; |
---|
| 208 | +#define NVMEQ_ENABLED 0 |
---|
| 209 | +#define NVMEQ_SQ_CMB 1 |
---|
| 210 | +#define NVMEQ_DELETE_ERROR 2 |
---|
| 211 | +#define NVMEQ_POLLED 3 |
---|
| 212 | + __le32 *dbbuf_sq_db; |
---|
| 213 | + __le32 *dbbuf_cq_db; |
---|
| 214 | + __le32 *dbbuf_sq_ei; |
---|
| 215 | + __le32 *dbbuf_cq_ei; |
---|
| 216 | + struct completion delete_done; |
---|
179 | 217 | }; |
---|
180 | 218 | |
---|
181 | 219 | /* |
---|
182 | | - * The nvme_iod describes the data in an I/O, including the list of PRP |
---|
183 | | - * entries. You can't see it in this data structure because C doesn't let |
---|
184 | | - * me express that. Use nvme_init_iod to ensure there's enough space |
---|
185 | | - * allocated to store the PRP list. |
---|
| 220 | + * The nvme_iod describes the data in an I/O. |
---|
| 221 | + * |
---|
| 222 | + * The sg pointer contains the list of PRP/SGL chunk allocations in addition |
---|
| 223 | + * to the actual struct scatterlist. |
---|
186 | 224 | */ |
---|
187 | 225 | struct nvme_iod { |
---|
188 | 226 | struct nvme_request req; |
---|
| 227 | + struct nvme_command cmd; |
---|
189 | 228 | struct nvme_queue *nvmeq; |
---|
190 | 229 | bool use_sgl; |
---|
191 | 230 | int aborted; |
---|
192 | 231 | int npages; /* In the PRP list. 0 means small pool in use */ |
---|
193 | 232 | int nents; /* Used in scatterlist */ |
---|
194 | | - int length; /* Of data, in bytes */ |
---|
195 | 233 | dma_addr_t first_dma; |
---|
196 | | - struct scatterlist meta_sg; /* metadata requires single contiguous buffer */ |
---|
| 234 | + unsigned int dma_len; /* length of single DMA segment mapping */ |
---|
| 235 | + dma_addr_t meta_dma; |
---|
197 | 236 | struct scatterlist *sg; |
---|
198 | | - struct scatterlist inline_sg[0]; |
---|
199 | 237 | }; |
---|
200 | 238 | |
---|
201 | | -/* |
---|
202 | | - * Check we didin't inadvertently grow the command struct |
---|
203 | | - */ |
---|
204 | | -static inline void _nvme_check_size(void) |
---|
| 239 | +static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) |
---|
205 | 240 | { |
---|
206 | | - BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); |
---|
207 | | - BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); |
---|
208 | | - BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); |
---|
209 | | - BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); |
---|
210 | | - BUILD_BUG_ON(sizeof(struct nvme_features) != 64); |
---|
211 | | - BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); |
---|
212 | | - BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); |
---|
213 | | - BUILD_BUG_ON(sizeof(struct nvme_command) != 64); |
---|
214 | | - BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); |
---|
215 | | - BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); |
---|
216 | | - BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); |
---|
217 | | - BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); |
---|
218 | | - BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); |
---|
219 | | -} |
---|
220 | | - |
---|
221 | | -static inline unsigned int nvme_dbbuf_size(u32 stride) |
---|
222 | | -{ |
---|
223 | | - return ((num_possible_cpus() + 1) * 8 * stride); |
---|
| 241 | + return dev->nr_allocated_queues * 8 * dev->db_stride; |
---|
224 | 242 | } |
---|
225 | 243 | |
---|
226 | 244 | static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) |
---|
227 | 245 | { |
---|
228 | | - unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); |
---|
| 246 | + unsigned int mem_size = nvme_dbbuf_size(dev); |
---|
229 | 247 | |
---|
230 | 248 | if (dev->dbbuf_dbs) |
---|
231 | 249 | return 0; |
---|
.. | .. |
---|
250 | 268 | |
---|
251 | 269 | static void nvme_dbbuf_dma_free(struct nvme_dev *dev) |
---|
252 | 270 | { |
---|
253 | | - unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); |
---|
| 271 | + unsigned int mem_size = nvme_dbbuf_size(dev); |
---|
254 | 272 | |
---|
255 | 273 | if (dev->dbbuf_dbs) { |
---|
256 | 274 | dma_free_coherent(dev->dev, mem_size, |
---|
.. | .. |
---|
316 | 334 | } |
---|
317 | 335 | |
---|
318 | 336 | /* Update dbbuf and return true if an MMIO is required */ |
---|
319 | | -static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, |
---|
320 | | - volatile u32 *dbbuf_ei) |
---|
| 337 | +static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, |
---|
| 338 | + volatile __le32 *dbbuf_ei) |
---|
321 | 339 | { |
---|
322 | 340 | if (dbbuf_db) { |
---|
323 | | - u16 old_value; |
---|
| 341 | + u16 old_value, event_idx; |
---|
324 | 342 | |
---|
325 | 343 | /* |
---|
326 | 344 | * Ensure that the queue is written before updating |
---|
.. | .. |
---|
328 | 346 | */ |
---|
329 | 347 | wmb(); |
---|
330 | 348 | |
---|
331 | | - old_value = *dbbuf_db; |
---|
332 | | - *dbbuf_db = value; |
---|
| 349 | + old_value = le32_to_cpu(*dbbuf_db); |
---|
| 350 | + *dbbuf_db = cpu_to_le32(value); |
---|
333 | 351 | |
---|
334 | 352 | /* |
---|
335 | 353 | * Ensure that the doorbell is updated before reading the event |
---|
.. | .. |
---|
339 | 357 | */ |
---|
340 | 358 | mb(); |
---|
341 | 359 | |
---|
342 | | - if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) |
---|
| 360 | + event_idx = le32_to_cpu(*dbbuf_ei); |
---|
| 361 | + if (!nvme_dbbuf_need_event(event_idx, value, old_value)) |
---|
343 | 362 | return false; |
---|
344 | 363 | } |
---|
345 | 364 | |
---|
.. | .. |
---|
347 | 366 | } |
---|
348 | 367 | |
---|
349 | 368 | /* |
---|
350 | | - * Max size of iod being embedded in the request payload |
---|
351 | | - */ |
---|
352 | | -#define NVME_INT_PAGES 2 |
---|
353 | | -#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->ctrl.page_size) |
---|
354 | | - |
---|
355 | | -/* |
---|
356 | 369 | * Will slightly overestimate the number of pages needed. This is OK |
---|
357 | 370 | * as it only leads to a small amount of wasted memory for the lifetime of |
---|
358 | 371 | * the I/O. |
---|
359 | 372 | */ |
---|
360 | | -static int nvme_npages(unsigned size, struct nvme_dev *dev) |
---|
| 373 | +static int nvme_pci_npages_prp(void) |
---|
361 | 374 | { |
---|
362 | | - unsigned nprps = DIV_ROUND_UP(size + dev->ctrl.page_size, |
---|
363 | | - dev->ctrl.page_size); |
---|
364 | | - return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); |
---|
| 375 | + unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE; |
---|
| 376 | + unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE); |
---|
| 377 | + return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8); |
---|
365 | 378 | } |
---|
366 | 379 | |
---|
367 | 380 | /* |
---|
368 | 381 | * Calculates the number of pages needed for the SGL segments. For example a 4k |
---|
369 | 382 | * page can accommodate 256 SGL descriptors. |
---|
370 | 383 | */ |
---|
371 | | -static int nvme_pci_npages_sgl(unsigned int num_seg) |
---|
| 384 | +static int nvme_pci_npages_sgl(void) |
---|
372 | 385 | { |
---|
373 | | - return DIV_ROUND_UP(num_seg * sizeof(struct nvme_sgl_desc), PAGE_SIZE); |
---|
374 | | -} |
---|
375 | | - |
---|
376 | | -static unsigned int nvme_pci_iod_alloc_size(struct nvme_dev *dev, |
---|
377 | | - unsigned int size, unsigned int nseg, bool use_sgl) |
---|
378 | | -{ |
---|
379 | | - size_t alloc_size; |
---|
380 | | - |
---|
381 | | - if (use_sgl) |
---|
382 | | - alloc_size = sizeof(__le64 *) * nvme_pci_npages_sgl(nseg); |
---|
383 | | - else |
---|
384 | | - alloc_size = sizeof(__le64 *) * nvme_npages(size, dev); |
---|
385 | | - |
---|
386 | | - return alloc_size + sizeof(struct scatterlist) * nseg; |
---|
387 | | -} |
---|
388 | | - |
---|
389 | | -static unsigned int nvme_pci_cmd_size(struct nvme_dev *dev, bool use_sgl) |
---|
390 | | -{ |
---|
391 | | - unsigned int alloc_size = nvme_pci_iod_alloc_size(dev, |
---|
392 | | - NVME_INT_BYTES(dev), NVME_INT_PAGES, |
---|
393 | | - use_sgl); |
---|
394 | | - |
---|
395 | | - return sizeof(struct nvme_iod) + alloc_size; |
---|
| 386 | + return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc), |
---|
| 387 | + NVME_CTRL_PAGE_SIZE); |
---|
396 | 388 | } |
---|
397 | 389 | |
---|
398 | 390 | static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, |
---|
.. | .. |
---|
403 | 395 | |
---|
404 | 396 | WARN_ON(hctx_idx != 0); |
---|
405 | 397 | WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); |
---|
406 | | - WARN_ON(nvmeq->tags); |
---|
407 | 398 | |
---|
408 | 399 | hctx->driver_data = nvmeq; |
---|
409 | | - nvmeq->tags = &dev->admin_tagset.tags[0]; |
---|
410 | 400 | return 0; |
---|
411 | | -} |
---|
412 | | - |
---|
413 | | -static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) |
---|
414 | | -{ |
---|
415 | | - struct nvme_queue *nvmeq = hctx->driver_data; |
---|
416 | | - |
---|
417 | | - nvmeq->tags = NULL; |
---|
418 | 401 | } |
---|
419 | 402 | |
---|
420 | 403 | static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, |
---|
.. | .. |
---|
422 | 405 | { |
---|
423 | 406 | struct nvme_dev *dev = data; |
---|
424 | 407 | struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; |
---|
425 | | - |
---|
426 | | - if (!nvmeq->tags) |
---|
427 | | - nvmeq->tags = &dev->tagset.tags[hctx_idx]; |
---|
428 | 408 | |
---|
429 | 409 | WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); |
---|
430 | 410 | hctx->driver_data = nvmeq; |
---|
.. | .. |
---|
446 | 426 | return 0; |
---|
447 | 427 | } |
---|
448 | 428 | |
---|
| 429 | +static int queue_irq_offset(struct nvme_dev *dev) |
---|
| 430 | +{ |
---|
| 431 | + /* if we have more than 1 vec, admin queue offsets us by 1 */ |
---|
| 432 | + if (dev->num_vecs > 1) |
---|
| 433 | + return 1; |
---|
| 434 | + |
---|
| 435 | + return 0; |
---|
| 436 | +} |
---|
| 437 | + |
---|
449 | 438 | static int nvme_pci_map_queues(struct blk_mq_tag_set *set) |
---|
450 | 439 | { |
---|
451 | 440 | struct nvme_dev *dev = set->driver_data; |
---|
| 441 | + int i, qoff, offset; |
---|
452 | 442 | |
---|
453 | | - return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), |
---|
454 | | - dev->num_vecs > 1 ? 1 /* admin queue */ : 0); |
---|
| 443 | + offset = queue_irq_offset(dev); |
---|
| 444 | + for (i = 0, qoff = 0; i < set->nr_maps; i++) { |
---|
| 445 | + struct blk_mq_queue_map *map = &set->map[i]; |
---|
| 446 | + |
---|
| 447 | + map->nr_queues = dev->io_queues[i]; |
---|
| 448 | + if (!map->nr_queues) { |
---|
| 449 | + BUG_ON(i == HCTX_TYPE_DEFAULT); |
---|
| 450 | + continue; |
---|
| 451 | + } |
---|
| 452 | + |
---|
| 453 | + /* |
---|
| 454 | + * The poll queue(s) doesn't have an IRQ (and hence IRQ |
---|
| 455 | + * affinity), so use the regular blk-mq cpu mapping |
---|
| 456 | + */ |
---|
| 457 | + map->queue_offset = qoff; |
---|
| 458 | + if (i != HCTX_TYPE_POLL && offset) |
---|
| 459 | + blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); |
---|
| 460 | + else |
---|
| 461 | + blk_mq_map_queues(map); |
---|
| 462 | + qoff += map->nr_queues; |
---|
| 463 | + offset += map->nr_queues; |
---|
| 464 | + } |
---|
| 465 | + |
---|
| 466 | + return 0; |
---|
| 467 | +} |
---|
| 468 | + |
---|
| 469 | +/* |
---|
| 470 | + * Write sq tail if we are asked to, or if the next command would wrap. |
---|
| 471 | + */ |
---|
| 472 | +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) |
---|
| 473 | +{ |
---|
| 474 | + if (!write_sq) { |
---|
| 475 | + u16 next_tail = nvmeq->sq_tail + 1; |
---|
| 476 | + |
---|
| 477 | + if (next_tail == nvmeq->q_depth) |
---|
| 478 | + next_tail = 0; |
---|
| 479 | + if (next_tail != nvmeq->last_sq_tail) |
---|
| 480 | + return; |
---|
| 481 | + } |
---|
| 482 | + |
---|
| 483 | + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, |
---|
| 484 | + nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) |
---|
| 485 | + writel(nvmeq->sq_tail, nvmeq->q_db); |
---|
| 486 | + nvmeq->last_sq_tail = nvmeq->sq_tail; |
---|
455 | 487 | } |
---|
456 | 488 | |
---|
457 | 489 | /** |
---|
458 | 490 | * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell |
---|
459 | 491 | * @nvmeq: The queue to use |
---|
460 | 492 | * @cmd: The command to send |
---|
| 493 | + * @write_sq: whether to write to the SQ doorbell |
---|
461 | 494 | */ |
---|
462 | | -static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) |
---|
| 495 | +static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, |
---|
| 496 | + bool write_sq) |
---|
463 | 497 | { |
---|
464 | 498 | spin_lock(&nvmeq->sq_lock); |
---|
465 | | - if (nvmeq->sq_cmds_io) |
---|
466 | | - memcpy_toio(&nvmeq->sq_cmds_io[nvmeq->sq_tail], cmd, |
---|
467 | | - sizeof(*cmd)); |
---|
468 | | - else |
---|
469 | | - memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd)); |
---|
470 | | - |
---|
| 499 | + memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes), |
---|
| 500 | + cmd, sizeof(*cmd)); |
---|
471 | 501 | if (++nvmeq->sq_tail == nvmeq->q_depth) |
---|
472 | 502 | nvmeq->sq_tail = 0; |
---|
473 | | - if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, |
---|
474 | | - nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) |
---|
475 | | - writel(nvmeq->sq_tail, nvmeq->q_db); |
---|
| 503 | + nvme_write_sq_db(nvmeq, write_sq); |
---|
| 504 | + spin_unlock(&nvmeq->sq_lock); |
---|
| 505 | +} |
---|
| 506 | + |
---|
| 507 | +static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) |
---|
| 508 | +{ |
---|
| 509 | + struct nvme_queue *nvmeq = hctx->driver_data; |
---|
| 510 | + |
---|
| 511 | + spin_lock(&nvmeq->sq_lock); |
---|
| 512 | + if (nvmeq->sq_tail != nvmeq->last_sq_tail) |
---|
| 513 | + nvme_write_sq_db(nvmeq, true); |
---|
476 | 514 | spin_unlock(&nvmeq->sq_lock); |
---|
477 | 515 | } |
---|
478 | 516 | |
---|
.. | .. |
---|
488 | 526 | int nseg = blk_rq_nr_phys_segments(req); |
---|
489 | 527 | unsigned int avg_seg_size; |
---|
490 | 528 | |
---|
491 | | - if (nseg == 0) |
---|
492 | | - return false; |
---|
493 | | - |
---|
494 | 529 | avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); |
---|
495 | 530 | |
---|
496 | 531 | if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) |
---|
.. | .. |
---|
502 | 537 | return true; |
---|
503 | 538 | } |
---|
504 | 539 | |
---|
505 | | -static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) |
---|
| 540 | +static void nvme_free_prps(struct nvme_dev *dev, struct request *req) |
---|
506 | 541 | { |
---|
507 | | - struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); |
---|
508 | | - int nseg = blk_rq_nr_phys_segments(rq); |
---|
509 | | - unsigned int size = blk_rq_payload_bytes(rq); |
---|
510 | | - |
---|
511 | | - iod->use_sgl = nvme_pci_use_sgls(dev, rq); |
---|
512 | | - |
---|
513 | | - if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { |
---|
514 | | - iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); |
---|
515 | | - if (!iod->sg) |
---|
516 | | - return BLK_STS_RESOURCE; |
---|
517 | | - } else { |
---|
518 | | - iod->sg = iod->inline_sg; |
---|
519 | | - } |
---|
520 | | - |
---|
521 | | - iod->aborted = 0; |
---|
522 | | - iod->npages = -1; |
---|
523 | | - iod->nents = 0; |
---|
524 | | - iod->length = size; |
---|
525 | | - |
---|
526 | | - return BLK_STS_OK; |
---|
527 | | -} |
---|
528 | | - |
---|
529 | | -static void nvme_free_iod(struct nvme_dev *dev, struct request *req) |
---|
530 | | -{ |
---|
| 542 | + const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; |
---|
531 | 543 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
---|
532 | | - const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1; |
---|
533 | | - dma_addr_t dma_addr = iod->first_dma, next_dma_addr; |
---|
534 | | - |
---|
| 544 | + dma_addr_t dma_addr = iod->first_dma; |
---|
535 | 545 | int i; |
---|
536 | 546 | |
---|
537 | | - if (iod->npages == 0) |
---|
538 | | - dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], |
---|
539 | | - dma_addr); |
---|
540 | | - |
---|
541 | 547 | for (i = 0; i < iod->npages; i++) { |
---|
542 | | - void *addr = nvme_pci_iod_list(req)[i]; |
---|
| 548 | + __le64 *prp_list = nvme_pci_iod_list(req)[i]; |
---|
| 549 | + dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); |
---|
543 | 550 | |
---|
544 | | - if (iod->use_sgl) { |
---|
545 | | - struct nvme_sgl_desc *sg_list = addr; |
---|
546 | | - |
---|
547 | | - next_dma_addr = |
---|
548 | | - le64_to_cpu((sg_list[SGES_PER_PAGE - 1]).addr); |
---|
549 | | - } else { |
---|
550 | | - __le64 *prp_list = addr; |
---|
551 | | - |
---|
552 | | - next_dma_addr = le64_to_cpu(prp_list[last_prp]); |
---|
553 | | - } |
---|
554 | | - |
---|
555 | | - dma_pool_free(dev->prp_page_pool, addr, dma_addr); |
---|
| 551 | + dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); |
---|
556 | 552 | dma_addr = next_dma_addr; |
---|
557 | 553 | } |
---|
558 | 554 | |
---|
559 | | - if (iod->sg != iod->inline_sg) |
---|
560 | | - mempool_free(iod->sg, dev->iod_mempool); |
---|
| 555 | +} |
---|
| 556 | + |
---|
| 557 | +static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) |
---|
| 558 | +{ |
---|
| 559 | + const int last_sg = SGES_PER_PAGE - 1; |
---|
| 560 | + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
---|
| 561 | + dma_addr_t dma_addr = iod->first_dma; |
---|
| 562 | + int i; |
---|
| 563 | + |
---|
| 564 | + for (i = 0; i < iod->npages; i++) { |
---|
| 565 | + struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i]; |
---|
| 566 | + dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr); |
---|
| 567 | + |
---|
| 568 | + dma_pool_free(dev->prp_page_pool, sg_list, dma_addr); |
---|
| 569 | + dma_addr = next_dma_addr; |
---|
| 570 | + } |
---|
| 571 | + |
---|
| 572 | +} |
---|
| 573 | + |
---|
| 574 | +static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req) |
---|
| 575 | +{ |
---|
| 576 | + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
---|
| 577 | + |
---|
| 578 | + if (is_pci_p2pdma_page(sg_page(iod->sg))) |
---|
| 579 | + pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, |
---|
| 580 | + rq_dma_dir(req)); |
---|
| 581 | + else |
---|
| 582 | + dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); |
---|
| 583 | +} |
---|
| 584 | + |
---|
| 585 | +static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) |
---|
| 586 | +{ |
---|
| 587 | + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
---|
| 588 | + |
---|
| 589 | + if (iod->dma_len) { |
---|
| 590 | + dma_unmap_page(dev->dev, iod->first_dma, iod->dma_len, |
---|
| 591 | + rq_dma_dir(req)); |
---|
| 592 | + return; |
---|
| 593 | + } |
---|
| 594 | + |
---|
| 595 | + WARN_ON_ONCE(!iod->nents); |
---|
| 596 | + |
---|
| 597 | + nvme_unmap_sg(dev, req); |
---|
| 598 | + if (iod->npages == 0) |
---|
| 599 | + dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], |
---|
| 600 | + iod->first_dma); |
---|
| 601 | + else if (iod->use_sgl) |
---|
| 602 | + nvme_free_sgls(dev, req); |
---|
| 603 | + else |
---|
| 604 | + nvme_free_prps(dev, req); |
---|
| 605 | + mempool_free(iod->sg, dev->iod_mempool); |
---|
561 | 606 | } |
---|
562 | 607 | |
---|
563 | 608 | static void nvme_print_sgl(struct scatterlist *sgl, int nents) |
---|
.. | .. |
---|
583 | 628 | struct scatterlist *sg = iod->sg; |
---|
584 | 629 | int dma_len = sg_dma_len(sg); |
---|
585 | 630 | u64 dma_addr = sg_dma_address(sg); |
---|
586 | | - u32 page_size = dev->ctrl.page_size; |
---|
587 | | - int offset = dma_addr & (page_size - 1); |
---|
| 631 | + int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1); |
---|
588 | 632 | __le64 *prp_list; |
---|
589 | 633 | void **list = nvme_pci_iod_list(req); |
---|
590 | 634 | dma_addr_t prp_dma; |
---|
591 | 635 | int nprps, i; |
---|
592 | 636 | |
---|
593 | | - length -= (page_size - offset); |
---|
| 637 | + length -= (NVME_CTRL_PAGE_SIZE - offset); |
---|
594 | 638 | if (length <= 0) { |
---|
595 | 639 | iod->first_dma = 0; |
---|
596 | 640 | goto done; |
---|
597 | 641 | } |
---|
598 | 642 | |
---|
599 | | - dma_len -= (page_size - offset); |
---|
| 643 | + dma_len -= (NVME_CTRL_PAGE_SIZE - offset); |
---|
600 | 644 | if (dma_len) { |
---|
601 | | - dma_addr += (page_size - offset); |
---|
| 645 | + dma_addr += (NVME_CTRL_PAGE_SIZE - offset); |
---|
602 | 646 | } else { |
---|
603 | 647 | sg = sg_next(sg); |
---|
604 | 648 | dma_addr = sg_dma_address(sg); |
---|
605 | 649 | dma_len = sg_dma_len(sg); |
---|
606 | 650 | } |
---|
607 | 651 | |
---|
608 | | - if (length <= page_size) { |
---|
| 652 | + if (length <= NVME_CTRL_PAGE_SIZE) { |
---|
609 | 653 | iod->first_dma = dma_addr; |
---|
610 | 654 | goto done; |
---|
611 | 655 | } |
---|
612 | 656 | |
---|
613 | | - nprps = DIV_ROUND_UP(length, page_size); |
---|
| 657 | + nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE); |
---|
614 | 658 | if (nprps <= (256 / 8)) { |
---|
615 | 659 | pool = dev->prp_small_pool; |
---|
616 | 660 | iod->npages = 0; |
---|
.. | .. |
---|
629 | 673 | iod->first_dma = prp_dma; |
---|
630 | 674 | i = 0; |
---|
631 | 675 | for (;;) { |
---|
632 | | - if (i == page_size >> 3) { |
---|
| 676 | + if (i == NVME_CTRL_PAGE_SIZE >> 3) { |
---|
633 | 677 | __le64 *old_prp_list = prp_list; |
---|
634 | 678 | prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); |
---|
635 | 679 | if (!prp_list) |
---|
636 | | - return BLK_STS_RESOURCE; |
---|
| 680 | + goto free_prps; |
---|
637 | 681 | list[iod->npages++] = prp_list; |
---|
638 | 682 | prp_list[0] = old_prp_list[i - 1]; |
---|
639 | 683 | old_prp_list[i - 1] = cpu_to_le64(prp_dma); |
---|
640 | 684 | i = 1; |
---|
641 | 685 | } |
---|
642 | 686 | prp_list[i++] = cpu_to_le64(dma_addr); |
---|
643 | | - dma_len -= page_size; |
---|
644 | | - dma_addr += page_size; |
---|
645 | | - length -= page_size; |
---|
| 687 | + dma_len -= NVME_CTRL_PAGE_SIZE; |
---|
| 688 | + dma_addr += NVME_CTRL_PAGE_SIZE; |
---|
| 689 | + length -= NVME_CTRL_PAGE_SIZE; |
---|
646 | 690 | if (length <= 0) |
---|
647 | 691 | break; |
---|
648 | 692 | if (dma_len > 0) |
---|
.. | .. |
---|
653 | 697 | dma_addr = sg_dma_address(sg); |
---|
654 | 698 | dma_len = sg_dma_len(sg); |
---|
655 | 699 | } |
---|
656 | | - |
---|
657 | 700 | done: |
---|
658 | 701 | cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); |
---|
659 | 702 | cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); |
---|
660 | | - |
---|
661 | 703 | return BLK_STS_OK; |
---|
662 | | - |
---|
663 | | - bad_sgl: |
---|
| 704 | +free_prps: |
---|
| 705 | + nvme_free_prps(dev, req); |
---|
| 706 | + return BLK_STS_RESOURCE; |
---|
| 707 | +bad_sgl: |
---|
664 | 708 | WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents), |
---|
665 | 709 | "Invalid SGL for payload:%d nents:%d\n", |
---|
666 | 710 | blk_rq_payload_bytes(req), iod->nents); |
---|
.. | .. |
---|
683 | 727 | sge->length = cpu_to_le32(entries * sizeof(*sge)); |
---|
684 | 728 | sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; |
---|
685 | 729 | } else { |
---|
686 | | - sge->length = cpu_to_le32(PAGE_SIZE); |
---|
| 730 | + sge->length = cpu_to_le32(NVME_CTRL_PAGE_SIZE); |
---|
687 | 731 | sge->type = NVME_SGL_FMT_SEG_DESC << 4; |
---|
688 | 732 | } |
---|
689 | 733 | } |
---|
.. | .. |
---|
732 | 776 | |
---|
733 | 777 | sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); |
---|
734 | 778 | if (!sg_list) |
---|
735 | | - return BLK_STS_RESOURCE; |
---|
| 779 | + goto free_sgls; |
---|
736 | 780 | |
---|
737 | 781 | i = 0; |
---|
738 | 782 | nvme_pci_iod_list(req)[iod->npages++] = sg_list; |
---|
.. | .. |
---|
745 | 789 | } while (--entries > 0); |
---|
746 | 790 | |
---|
747 | 791 | return BLK_STS_OK; |
---|
| 792 | +free_sgls: |
---|
| 793 | + nvme_free_sgls(dev, req); |
---|
| 794 | + return BLK_STS_RESOURCE; |
---|
| 795 | +} |
---|
| 796 | + |
---|
| 797 | +static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, |
---|
| 798 | + struct request *req, struct nvme_rw_command *cmnd, |
---|
| 799 | + struct bio_vec *bv) |
---|
| 800 | +{ |
---|
| 801 | + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
---|
| 802 | + unsigned int offset = bv->bv_offset & (NVME_CTRL_PAGE_SIZE - 1); |
---|
| 803 | + unsigned int first_prp_len = NVME_CTRL_PAGE_SIZE - offset; |
---|
| 804 | + |
---|
| 805 | + iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); |
---|
| 806 | + if (dma_mapping_error(dev->dev, iod->first_dma)) |
---|
| 807 | + return BLK_STS_RESOURCE; |
---|
| 808 | + iod->dma_len = bv->bv_len; |
---|
| 809 | + |
---|
| 810 | + cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); |
---|
| 811 | + if (bv->bv_len > first_prp_len) |
---|
| 812 | + cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); |
---|
| 813 | + else |
---|
| 814 | + cmnd->dptr.prp2 = 0; |
---|
| 815 | + return BLK_STS_OK; |
---|
| 816 | +} |
---|
| 817 | + |
---|
| 818 | +static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, |
---|
| 819 | + struct request *req, struct nvme_rw_command *cmnd, |
---|
| 820 | + struct bio_vec *bv) |
---|
| 821 | +{ |
---|
| 822 | + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
---|
| 823 | + |
---|
| 824 | + iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); |
---|
| 825 | + if (dma_mapping_error(dev->dev, iod->first_dma)) |
---|
| 826 | + return BLK_STS_RESOURCE; |
---|
| 827 | + iod->dma_len = bv->bv_len; |
---|
| 828 | + |
---|
| 829 | + cmnd->flags = NVME_CMD_SGL_METABUF; |
---|
| 830 | + cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma); |
---|
| 831 | + cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len); |
---|
| 832 | + cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4; |
---|
| 833 | + return BLK_STS_OK; |
---|
748 | 834 | } |
---|
749 | 835 | |
---|
750 | 836 | static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, |
---|
751 | 837 | struct nvme_command *cmnd) |
---|
752 | 838 | { |
---|
753 | 839 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
---|
754 | | - struct request_queue *q = req->q; |
---|
755 | | - enum dma_data_direction dma_dir = rq_data_dir(req) ? |
---|
756 | | - DMA_TO_DEVICE : DMA_FROM_DEVICE; |
---|
757 | | - blk_status_t ret = BLK_STS_IOERR; |
---|
| 840 | + blk_status_t ret = BLK_STS_RESOURCE; |
---|
758 | 841 | int nr_mapped; |
---|
759 | 842 | |
---|
| 843 | + if (blk_rq_nr_phys_segments(req) == 1) { |
---|
| 844 | + struct bio_vec bv = req_bvec(req); |
---|
| 845 | + |
---|
| 846 | + if (!is_pci_p2pdma_page(bv.bv_page)) { |
---|
| 847 | + if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2) |
---|
| 848 | + return nvme_setup_prp_simple(dev, req, |
---|
| 849 | + &cmnd->rw, &bv); |
---|
| 850 | + |
---|
| 851 | + if (iod->nvmeq->qid && sgl_threshold && |
---|
| 852 | + dev->ctrl.sgls & ((1 << 0) | (1 << 1))) |
---|
| 853 | + return nvme_setup_sgl_simple(dev, req, |
---|
| 854 | + &cmnd->rw, &bv); |
---|
| 855 | + } |
---|
| 856 | + } |
---|
| 857 | + |
---|
| 858 | + iod->dma_len = 0; |
---|
| 859 | + iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); |
---|
| 860 | + if (!iod->sg) |
---|
| 861 | + return BLK_STS_RESOURCE; |
---|
760 | 862 | sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); |
---|
761 | | - iod->nents = blk_rq_map_sg(q, req, iod->sg); |
---|
| 863 | + iod->nents = blk_rq_map_sg(req->q, req, iod->sg); |
---|
762 | 864 | if (!iod->nents) |
---|
763 | | - goto out; |
---|
| 865 | + goto out_free_sg; |
---|
764 | 866 | |
---|
765 | | - ret = BLK_STS_RESOURCE; |
---|
766 | | - nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir, |
---|
767 | | - DMA_ATTR_NO_WARN); |
---|
| 867 | + if (is_pci_p2pdma_page(sg_page(iod->sg))) |
---|
| 868 | + nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg, |
---|
| 869 | + iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN); |
---|
| 870 | + else |
---|
| 871 | + nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, |
---|
| 872 | + rq_dma_dir(req), DMA_ATTR_NO_WARN); |
---|
768 | 873 | if (!nr_mapped) |
---|
769 | | - goto out; |
---|
| 874 | + goto out_free_sg; |
---|
770 | 875 | |
---|
| 876 | + iod->use_sgl = nvme_pci_use_sgls(dev, req); |
---|
771 | 877 | if (iod->use_sgl) |
---|
772 | 878 | ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); |
---|
773 | 879 | else |
---|
774 | 880 | ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); |
---|
775 | | - |
---|
776 | 881 | if (ret != BLK_STS_OK) |
---|
777 | | - goto out_unmap; |
---|
778 | | - |
---|
779 | | - ret = BLK_STS_IOERR; |
---|
780 | | - if (blk_integrity_rq(req)) { |
---|
781 | | - if (blk_rq_count_integrity_sg(q, req->bio) != 1) |
---|
782 | | - goto out_unmap; |
---|
783 | | - |
---|
784 | | - sg_init_table(&iod->meta_sg, 1); |
---|
785 | | - if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1) |
---|
786 | | - goto out_unmap; |
---|
787 | | - |
---|
788 | | - if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) |
---|
789 | | - goto out_unmap; |
---|
790 | | - } |
---|
791 | | - |
---|
792 | | - if (blk_integrity_rq(req)) |
---|
793 | | - cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); |
---|
| 882 | + goto out_unmap_sg; |
---|
794 | 883 | return BLK_STS_OK; |
---|
795 | 884 | |
---|
796 | | -out_unmap: |
---|
797 | | - dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); |
---|
798 | | -out: |
---|
| 885 | +out_unmap_sg: |
---|
| 886 | + nvme_unmap_sg(dev, req); |
---|
| 887 | +out_free_sg: |
---|
| 888 | + mempool_free(iod->sg, dev->iod_mempool); |
---|
799 | 889 | return ret; |
---|
800 | 890 | } |
---|
801 | 891 | |
---|
802 | | -static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) |
---|
| 892 | +static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req, |
---|
| 893 | + struct nvme_command *cmnd) |
---|
803 | 894 | { |
---|
804 | 895 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
---|
805 | | - enum dma_data_direction dma_dir = rq_data_dir(req) ? |
---|
806 | | - DMA_TO_DEVICE : DMA_FROM_DEVICE; |
---|
807 | 896 | |
---|
808 | | - if (iod->nents) { |
---|
809 | | - dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); |
---|
810 | | - if (blk_integrity_rq(req)) |
---|
811 | | - dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); |
---|
812 | | - } |
---|
813 | | - |
---|
814 | | - nvme_cleanup_cmd(req); |
---|
815 | | - nvme_free_iod(dev, req); |
---|
| 897 | + iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req), |
---|
| 898 | + rq_dma_dir(req), 0); |
---|
| 899 | + if (dma_mapping_error(dev->dev, iod->meta_dma)) |
---|
| 900 | + return BLK_STS_IOERR; |
---|
| 901 | + cmnd->rw.metadata = cpu_to_le64(iod->meta_dma); |
---|
| 902 | + return BLK_STS_OK; |
---|
816 | 903 | } |
---|
817 | 904 | |
---|
818 | 905 | /* |
---|
.. | .. |
---|
825 | 912 | struct nvme_queue *nvmeq = hctx->driver_data; |
---|
826 | 913 | struct nvme_dev *dev = nvmeq->dev; |
---|
827 | 914 | struct request *req = bd->rq; |
---|
828 | | - struct nvme_command cmnd; |
---|
| 915 | + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
---|
| 916 | + struct nvme_command *cmnd = &iod->cmd; |
---|
829 | 917 | blk_status_t ret; |
---|
| 918 | + |
---|
| 919 | + iod->aborted = 0; |
---|
| 920 | + iod->npages = -1; |
---|
| 921 | + iod->nents = 0; |
---|
830 | 922 | |
---|
831 | 923 | /* |
---|
832 | 924 | * We should not need to do this, but we're still using this to |
---|
833 | 925 | * ensure we can drain requests on a dying queue. |
---|
834 | 926 | */ |
---|
835 | | - if (unlikely(nvmeq->cq_vector < 0)) |
---|
| 927 | + if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags))) |
---|
836 | 928 | return BLK_STS_IOERR; |
---|
837 | 929 | |
---|
838 | | - ret = nvme_setup_cmd(ns, req, &cmnd); |
---|
| 930 | + ret = nvme_setup_cmd(ns, req, cmnd); |
---|
839 | 931 | if (ret) |
---|
840 | 932 | return ret; |
---|
841 | 933 | |
---|
842 | | - ret = nvme_init_iod(req, dev); |
---|
843 | | - if (ret) |
---|
844 | | - goto out_free_cmd; |
---|
845 | | - |
---|
846 | 934 | if (blk_rq_nr_phys_segments(req)) { |
---|
847 | | - ret = nvme_map_data(dev, req, &cmnd); |
---|
| 935 | + ret = nvme_map_data(dev, req, cmnd); |
---|
848 | 936 | if (ret) |
---|
849 | | - goto out_cleanup_iod; |
---|
| 937 | + goto out_free_cmd; |
---|
| 938 | + } |
---|
| 939 | + |
---|
| 940 | + if (blk_integrity_rq(req)) { |
---|
| 941 | + ret = nvme_map_metadata(dev, req, cmnd); |
---|
| 942 | + if (ret) |
---|
| 943 | + goto out_unmap_data; |
---|
850 | 944 | } |
---|
851 | 945 | |
---|
852 | 946 | blk_mq_start_request(req); |
---|
853 | | - nvme_submit_cmd(nvmeq, &cmnd); |
---|
| 947 | + nvme_submit_cmd(nvmeq, cmnd, bd->last); |
---|
854 | 948 | return BLK_STS_OK; |
---|
855 | | -out_cleanup_iod: |
---|
856 | | - nvme_free_iod(dev, req); |
---|
| 949 | +out_unmap_data: |
---|
| 950 | + nvme_unmap_data(dev, req); |
---|
857 | 951 | out_free_cmd: |
---|
858 | 952 | nvme_cleanup_cmd(req); |
---|
859 | 953 | return ret; |
---|
.. | .. |
---|
862 | 956 | static void nvme_pci_complete_rq(struct request *req) |
---|
863 | 957 | { |
---|
864 | 958 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
---|
| 959 | + struct nvme_dev *dev = iod->nvmeq->dev; |
---|
865 | 960 | |
---|
866 | | - nvme_unmap_data(iod->nvmeq->dev, req); |
---|
| 961 | + if (blk_integrity_rq(req)) |
---|
| 962 | + dma_unmap_page(dev->dev, iod->meta_dma, |
---|
| 963 | + rq_integrity_vec(req)->bv_len, rq_dma_dir(req)); |
---|
| 964 | + |
---|
| 965 | + if (blk_rq_nr_phys_segments(req)) |
---|
| 966 | + nvme_unmap_data(dev, req); |
---|
867 | 967 | nvme_complete_rq(req); |
---|
868 | 968 | } |
---|
869 | 969 | |
---|
870 | 970 | /* We read the CQE phase first to check if the rest of the entry is valid */ |
---|
871 | 971 | static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) |
---|
872 | 972 | { |
---|
873 | | - return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == |
---|
874 | | - nvmeq->cq_phase; |
---|
| 973 | + struct nvme_completion *hcqe = &nvmeq->cqes[nvmeq->cq_head]; |
---|
| 974 | + |
---|
| 975 | + return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == nvmeq->cq_phase; |
---|
875 | 976 | } |
---|
876 | 977 | |
---|
877 | 978 | static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) |
---|
.. | .. |
---|
883 | 984 | writel(head, nvmeq->q_db + nvmeq->dev->db_stride); |
---|
884 | 985 | } |
---|
885 | 986 | |
---|
| 987 | +static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) |
---|
| 988 | +{ |
---|
| 989 | + if (!nvmeq->qid) |
---|
| 990 | + return nvmeq->dev->admin_tagset.tags[0]; |
---|
| 991 | + return nvmeq->dev->tagset.tags[nvmeq->qid - 1]; |
---|
| 992 | +} |
---|
| 993 | + |
---|
886 | 994 | static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) |
---|
887 | 995 | { |
---|
888 | | - volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; |
---|
| 996 | + struct nvme_completion *cqe = &nvmeq->cqes[idx]; |
---|
| 997 | + __u16 command_id = READ_ONCE(cqe->command_id); |
---|
889 | 998 | struct request *req; |
---|
890 | | - |
---|
891 | | - if (unlikely(cqe->command_id >= nvmeq->q_depth)) { |
---|
892 | | - dev_warn(nvmeq->dev->ctrl.device, |
---|
893 | | - "invalid id %d completed on queue %d\n", |
---|
894 | | - cqe->command_id, le16_to_cpu(cqe->sq_id)); |
---|
895 | | - return; |
---|
896 | | - } |
---|
897 | 999 | |
---|
898 | 1000 | /* |
---|
899 | 1001 | * AEN requests are special as they don't time out and can |
---|
.. | .. |
---|
901 | 1003 | * aborts. We don't even bother to allocate a struct request |
---|
902 | 1004 | * for them but rather special case them here. |
---|
903 | 1005 | */ |
---|
904 | | - if (unlikely(nvmeq->qid == 0 && |
---|
905 | | - cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) { |
---|
| 1006 | + if (unlikely(nvme_is_aen_req(nvmeq->qid, command_id))) { |
---|
906 | 1007 | nvme_complete_async_event(&nvmeq->dev->ctrl, |
---|
907 | 1008 | cqe->status, &cqe->result); |
---|
908 | 1009 | return; |
---|
909 | 1010 | } |
---|
910 | 1011 | |
---|
911 | | - req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); |
---|
912 | | - nvme_end_request(req, cqe->status, cqe->result); |
---|
913 | | -} |
---|
914 | | - |
---|
915 | | -static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) |
---|
916 | | -{ |
---|
917 | | - while (start != end) { |
---|
918 | | - nvme_handle_cqe(nvmeq, start); |
---|
919 | | - if (++start == nvmeq->q_depth) |
---|
920 | | - start = 0; |
---|
| 1012 | + req = nvme_find_rq(nvme_queue_tagset(nvmeq), command_id); |
---|
| 1013 | + if (unlikely(!req)) { |
---|
| 1014 | + dev_warn(nvmeq->dev->ctrl.device, |
---|
| 1015 | + "invalid id %d completed on queue %d\n", |
---|
| 1016 | + command_id, le16_to_cpu(cqe->sq_id)); |
---|
| 1017 | + return; |
---|
921 | 1018 | } |
---|
| 1019 | + |
---|
| 1020 | + trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); |
---|
| 1021 | + if (!nvme_try_complete_req(req, cqe->status, cqe->result)) |
---|
| 1022 | + nvme_pci_complete_rq(req); |
---|
922 | 1023 | } |
---|
923 | 1024 | |
---|
924 | 1025 | static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) |
---|
925 | 1026 | { |
---|
926 | | - if (nvmeq->cq_head == nvmeq->q_depth - 1) { |
---|
| 1027 | + u32 tmp = nvmeq->cq_head + 1; |
---|
| 1028 | + |
---|
| 1029 | + if (tmp == nvmeq->q_depth) { |
---|
927 | 1030 | nvmeq->cq_head = 0; |
---|
928 | | - nvmeq->cq_phase = !nvmeq->cq_phase; |
---|
| 1031 | + nvmeq->cq_phase ^= 1; |
---|
929 | 1032 | } else { |
---|
930 | | - nvmeq->cq_head++; |
---|
| 1033 | + nvmeq->cq_head = tmp; |
---|
931 | 1034 | } |
---|
932 | 1035 | } |
---|
933 | 1036 | |
---|
934 | | -static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, |
---|
935 | | - u16 *end, int tag) |
---|
| 1037 | +static inline int nvme_process_cq(struct nvme_queue *nvmeq) |
---|
936 | 1038 | { |
---|
937 | | - bool found = false; |
---|
| 1039 | + int found = 0; |
---|
938 | 1040 | |
---|
939 | | - *start = nvmeq->cq_head; |
---|
940 | | - while (!found && nvme_cqe_pending(nvmeq)) { |
---|
941 | | - if (nvmeq->cqes[nvmeq->cq_head].command_id == tag) |
---|
942 | | - found = true; |
---|
| 1041 | + while (nvme_cqe_pending(nvmeq)) { |
---|
| 1042 | + found++; |
---|
| 1043 | + /* |
---|
| 1044 | + * load-load control dependency between phase and the rest of |
---|
| 1045 | + * the cqe requires a full read memory barrier |
---|
| 1046 | + */ |
---|
| 1047 | + dma_rmb(); |
---|
| 1048 | + nvme_handle_cqe(nvmeq, nvmeq->cq_head); |
---|
943 | 1049 | nvme_update_cq_head(nvmeq); |
---|
944 | 1050 | } |
---|
945 | | - *end = nvmeq->cq_head; |
---|
946 | 1051 | |
---|
947 | | - if (*start != *end) |
---|
| 1052 | + if (found) |
---|
948 | 1053 | nvme_ring_cq_doorbell(nvmeq); |
---|
949 | 1054 | return found; |
---|
950 | 1055 | } |
---|
.. | .. |
---|
953 | 1058 | { |
---|
954 | 1059 | struct nvme_queue *nvmeq = data; |
---|
955 | 1060 | irqreturn_t ret = IRQ_NONE; |
---|
956 | | - u16 start, end; |
---|
957 | 1061 | |
---|
958 | | - spin_lock(&nvmeq->cq_lock); |
---|
959 | | - if (nvmeq->cq_head != nvmeq->last_cq_head) |
---|
| 1062 | + /* |
---|
| 1063 | + * The rmb/wmb pair ensures we see all updates from a previous run of |
---|
| 1064 | + * the irq handler, even if that was on another CPU. |
---|
| 1065 | + */ |
---|
| 1066 | + rmb(); |
---|
| 1067 | + if (nvme_process_cq(nvmeq)) |
---|
960 | 1068 | ret = IRQ_HANDLED; |
---|
961 | | - nvme_process_cq(nvmeq, &start, &end, -1); |
---|
962 | | - nvmeq->last_cq_head = nvmeq->cq_head; |
---|
963 | | - spin_unlock(&nvmeq->cq_lock); |
---|
964 | | - |
---|
965 | | - if (start != end) { |
---|
966 | | - nvme_complete_cqes(nvmeq, start, end); |
---|
967 | | - return IRQ_HANDLED; |
---|
968 | | - } |
---|
| 1069 | + wmb(); |
---|
969 | 1070 | |
---|
970 | 1071 | return ret; |
---|
971 | 1072 | } |
---|
.. | .. |
---|
973 | 1074 | static irqreturn_t nvme_irq_check(int irq, void *data) |
---|
974 | 1075 | { |
---|
975 | 1076 | struct nvme_queue *nvmeq = data; |
---|
| 1077 | + |
---|
976 | 1078 | if (nvme_cqe_pending(nvmeq)) |
---|
977 | 1079 | return IRQ_WAKE_THREAD; |
---|
978 | 1080 | return IRQ_NONE; |
---|
979 | 1081 | } |
---|
980 | 1082 | |
---|
981 | | -static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) |
---|
| 1083 | +/* |
---|
| 1084 | + * Poll for completions for any interrupt driven queue |
---|
| 1085 | + * Can be called from any context. |
---|
| 1086 | + */ |
---|
| 1087 | +static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) |
---|
982 | 1088 | { |
---|
983 | | - u16 start, end; |
---|
| 1089 | + struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); |
---|
| 1090 | + |
---|
| 1091 | + WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); |
---|
| 1092 | + |
---|
| 1093 | + disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); |
---|
| 1094 | + nvme_process_cq(nvmeq); |
---|
| 1095 | + enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); |
---|
| 1096 | +} |
---|
| 1097 | + |
---|
| 1098 | +static int nvme_poll(struct blk_mq_hw_ctx *hctx) |
---|
| 1099 | +{ |
---|
| 1100 | + struct nvme_queue *nvmeq = hctx->driver_data; |
---|
984 | 1101 | bool found; |
---|
985 | 1102 | |
---|
986 | 1103 | if (!nvme_cqe_pending(nvmeq)) |
---|
987 | 1104 | return 0; |
---|
988 | 1105 | |
---|
989 | | - spin_lock_irq(&nvmeq->cq_lock); |
---|
990 | | - found = nvme_process_cq(nvmeq, &start, &end, tag); |
---|
991 | | - spin_unlock_irq(&nvmeq->cq_lock); |
---|
| 1106 | + spin_lock(&nvmeq->cq_poll_lock); |
---|
| 1107 | + found = nvme_process_cq(nvmeq); |
---|
| 1108 | + spin_unlock(&nvmeq->cq_poll_lock); |
---|
992 | 1109 | |
---|
993 | | - nvme_complete_cqes(nvmeq, start, end); |
---|
994 | 1110 | return found; |
---|
995 | | -} |
---|
996 | | - |
---|
997 | | -static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) |
---|
998 | | -{ |
---|
999 | | - struct nvme_queue *nvmeq = hctx->driver_data; |
---|
1000 | | - |
---|
1001 | | - return __nvme_poll(nvmeq, tag); |
---|
1002 | 1111 | } |
---|
1003 | 1112 | |
---|
1004 | 1113 | static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) |
---|
.. | .. |
---|
1010 | 1119 | memset(&c, 0, sizeof(c)); |
---|
1011 | 1120 | c.common.opcode = nvme_admin_async_event; |
---|
1012 | 1121 | c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; |
---|
1013 | | - nvme_submit_cmd(nvmeq, &c); |
---|
| 1122 | + nvme_submit_cmd(nvmeq, &c, true); |
---|
1014 | 1123 | } |
---|
1015 | 1124 | |
---|
1016 | 1125 | static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) |
---|
.. | .. |
---|
1028 | 1137 | struct nvme_queue *nvmeq, s16 vector) |
---|
1029 | 1138 | { |
---|
1030 | 1139 | struct nvme_command c; |
---|
1031 | | - int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; |
---|
| 1140 | + int flags = NVME_QUEUE_PHYS_CONTIG; |
---|
| 1141 | + |
---|
| 1142 | + if (!test_bit(NVMEQ_POLLED, &nvmeq->flags)) |
---|
| 1143 | + flags |= NVME_CQ_IRQ_ENABLED; |
---|
1032 | 1144 | |
---|
1033 | 1145 | /* |
---|
1034 | 1146 | * Note: we (ab)use the fact that the prp fields survive if no data |
---|
.. | .. |
---|
1098 | 1210 | |
---|
1099 | 1211 | static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) |
---|
1100 | 1212 | { |
---|
1101 | | - |
---|
1102 | 1213 | /* If true, indicates loss of adapter communication, possibly by a |
---|
1103 | 1214 | * NVMe Subsystem reset. |
---|
1104 | 1215 | */ |
---|
.. | .. |
---|
1147 | 1258 | struct nvme_dev *dev = nvmeq->dev; |
---|
1148 | 1259 | struct request *abort_req; |
---|
1149 | 1260 | struct nvme_command cmd; |
---|
1150 | | - bool shutdown = false; |
---|
1151 | 1261 | u32 csts = readl(dev->bar + NVME_REG_CSTS); |
---|
1152 | 1262 | |
---|
1153 | 1263 | /* If PCI error recovery process is happening, we cannot reset or |
---|
.. | .. |
---|
1170 | 1280 | /* |
---|
1171 | 1281 | * Did we miss an interrupt? |
---|
1172 | 1282 | */ |
---|
1173 | | - if (__nvme_poll(nvmeq, req->tag)) { |
---|
| 1283 | + if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) |
---|
| 1284 | + nvme_poll(req->mq_hctx); |
---|
| 1285 | + else |
---|
| 1286 | + nvme_poll_irqdisable(nvmeq); |
---|
| 1287 | + |
---|
| 1288 | + if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) { |
---|
1174 | 1289 | dev_warn(dev->ctrl.device, |
---|
1175 | 1290 | "I/O %d QID %d timeout, completion polled\n", |
---|
1176 | 1291 | req->tag, nvmeq->qid); |
---|
.. | .. |
---|
1184 | 1299 | * shutdown, so we return BLK_EH_DONE. |
---|
1185 | 1300 | */ |
---|
1186 | 1301 | switch (dev->ctrl.state) { |
---|
1187 | | - case NVME_CTRL_DELETING: |
---|
1188 | | - shutdown = true; |
---|
1189 | 1302 | case NVME_CTRL_CONNECTING: |
---|
1190 | | - case NVME_CTRL_RESETTING: |
---|
| 1303 | + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); |
---|
| 1304 | + fallthrough; |
---|
| 1305 | + case NVME_CTRL_DELETING: |
---|
1191 | 1306 | dev_warn_ratelimited(dev->ctrl.device, |
---|
1192 | 1307 | "I/O %d QID %d timeout, disable controller\n", |
---|
1193 | 1308 | req->tag, nvmeq->qid); |
---|
1194 | | - nvme_dev_disable(dev, shutdown); |
---|
1195 | 1309 | nvme_req(req)->flags |= NVME_REQ_CANCELLED; |
---|
| 1310 | + nvme_dev_disable(dev, true); |
---|
1196 | 1311 | return BLK_EH_DONE; |
---|
| 1312 | + case NVME_CTRL_RESETTING: |
---|
| 1313 | + return BLK_EH_RESET_TIMER; |
---|
1197 | 1314 | default: |
---|
1198 | 1315 | break; |
---|
1199 | 1316 | } |
---|
1200 | 1317 | |
---|
1201 | 1318 | /* |
---|
1202 | | - * Shutdown the controller immediately and schedule a reset if the |
---|
1203 | | - * command was already aborted once before and still hasn't been |
---|
1204 | | - * returned to the driver, or if this is the admin queue. |
---|
| 1319 | + * Shutdown the controller immediately and schedule a reset if the |
---|
| 1320 | + * command was already aborted once before and still hasn't been |
---|
| 1321 | + * returned to the driver, or if this is the admin queue. |
---|
1205 | 1322 | */ |
---|
1206 | 1323 | if (!nvmeq->qid || iod->aborted) { |
---|
1207 | 1324 | dev_warn(dev->ctrl.device, |
---|
1208 | 1325 | "I/O %d QID %d timeout, reset controller\n", |
---|
1209 | 1326 | req->tag, nvmeq->qid); |
---|
| 1327 | + nvme_req(req)->flags |= NVME_REQ_CANCELLED; |
---|
1210 | 1328 | nvme_dev_disable(dev, false); |
---|
1211 | 1329 | nvme_reset_ctrl(&dev->ctrl); |
---|
1212 | 1330 | |
---|
1213 | | - nvme_req(req)->flags |= NVME_REQ_CANCELLED; |
---|
1214 | 1331 | return BLK_EH_DONE; |
---|
1215 | 1332 | } |
---|
1216 | 1333 | |
---|
.. | .. |
---|
1222 | 1339 | |
---|
1223 | 1340 | memset(&cmd, 0, sizeof(cmd)); |
---|
1224 | 1341 | cmd.abort.opcode = nvme_admin_abort_cmd; |
---|
1225 | | - cmd.abort.cid = req->tag; |
---|
| 1342 | + cmd.abort.cid = nvme_cid(req); |
---|
1226 | 1343 | cmd.abort.sqid = cpu_to_le16(nvmeq->qid); |
---|
1227 | 1344 | |
---|
1228 | 1345 | dev_warn(nvmeq->dev->ctrl.device, |
---|
.. | .. |
---|
1230 | 1347 | req->tag, nvmeq->qid); |
---|
1231 | 1348 | |
---|
1232 | 1349 | abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd, |
---|
1233 | | - BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); |
---|
| 1350 | + BLK_MQ_REQ_NOWAIT); |
---|
1234 | 1351 | if (IS_ERR(abort_req)) { |
---|
1235 | 1352 | atomic_inc(&dev->ctrl.abort_limit); |
---|
1236 | 1353 | return BLK_EH_RESET_TIMER; |
---|
1237 | 1354 | } |
---|
1238 | 1355 | |
---|
1239 | | - abort_req->timeout = ADMIN_TIMEOUT; |
---|
1240 | 1356 | abort_req->end_io_data = NULL; |
---|
1241 | 1357 | blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio); |
---|
1242 | 1358 | |
---|
.. | .. |
---|
1250 | 1366 | |
---|
1251 | 1367 | static void nvme_free_queue(struct nvme_queue *nvmeq) |
---|
1252 | 1368 | { |
---|
1253 | | - dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), |
---|
| 1369 | + dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq), |
---|
1254 | 1370 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); |
---|
1255 | | - if (nvmeq->sq_cmds) |
---|
1256 | | - dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), |
---|
1257 | | - nvmeq->sq_cmds, nvmeq->sq_dma_addr); |
---|
| 1371 | + if (!nvmeq->sq_cmds) |
---|
| 1372 | + return; |
---|
| 1373 | + |
---|
| 1374 | + if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { |
---|
| 1375 | + pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev), |
---|
| 1376 | + nvmeq->sq_cmds, SQ_SIZE(nvmeq)); |
---|
| 1377 | + } else { |
---|
| 1378 | + dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq), |
---|
| 1379 | + nvmeq->sq_cmds, nvmeq->sq_dma_addr); |
---|
| 1380 | + } |
---|
1258 | 1381 | } |
---|
1259 | 1382 | |
---|
1260 | 1383 | static void nvme_free_queues(struct nvme_dev *dev, int lowest) |
---|
.. | .. |
---|
1269 | 1392 | |
---|
1270 | 1393 | /** |
---|
1271 | 1394 | * nvme_suspend_queue - put queue into suspended state |
---|
1272 | | - * @nvmeq - queue to suspend |
---|
| 1395 | + * @nvmeq: queue to suspend |
---|
1273 | 1396 | */ |
---|
1274 | 1397 | static int nvme_suspend_queue(struct nvme_queue *nvmeq) |
---|
1275 | 1398 | { |
---|
1276 | | - int vector; |
---|
1277 | | - |
---|
1278 | | - spin_lock_irq(&nvmeq->cq_lock); |
---|
1279 | | - if (nvmeq->cq_vector == -1) { |
---|
1280 | | - spin_unlock_irq(&nvmeq->cq_lock); |
---|
| 1399 | + if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags)) |
---|
1281 | 1400 | return 1; |
---|
1282 | | - } |
---|
1283 | | - vector = nvmeq->cq_vector; |
---|
1284 | | - nvmeq->dev->online_queues--; |
---|
1285 | | - nvmeq->cq_vector = -1; |
---|
1286 | | - spin_unlock_irq(&nvmeq->cq_lock); |
---|
1287 | 1401 | |
---|
1288 | | - /* |
---|
1289 | | - * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without |
---|
1290 | | - * having to grab the lock. |
---|
1291 | | - */ |
---|
| 1402 | + /* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */ |
---|
1292 | 1403 | mb(); |
---|
1293 | 1404 | |
---|
| 1405 | + nvmeq->dev->online_queues--; |
---|
1294 | 1406 | if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) |
---|
1295 | 1407 | blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); |
---|
1296 | | - |
---|
1297 | | - pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq); |
---|
1298 | | - |
---|
| 1408 | + if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags)) |
---|
| 1409 | + pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); |
---|
1299 | 1410 | return 0; |
---|
| 1411 | +} |
---|
| 1412 | + |
---|
| 1413 | +static void nvme_suspend_io_queues(struct nvme_dev *dev) |
---|
| 1414 | +{ |
---|
| 1415 | + int i; |
---|
| 1416 | + |
---|
| 1417 | + for (i = dev->ctrl.queue_count - 1; i > 0; i--) |
---|
| 1418 | + nvme_suspend_queue(&dev->queues[i]); |
---|
1300 | 1419 | } |
---|
1301 | 1420 | |
---|
1302 | 1421 | static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) |
---|
1303 | 1422 | { |
---|
1304 | 1423 | struct nvme_queue *nvmeq = &dev->queues[0]; |
---|
1305 | | - u16 start, end; |
---|
1306 | 1424 | |
---|
1307 | 1425 | if (shutdown) |
---|
1308 | 1426 | nvme_shutdown_ctrl(&dev->ctrl); |
---|
1309 | 1427 | else |
---|
1310 | | - nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); |
---|
| 1428 | + nvme_disable_ctrl(&dev->ctrl); |
---|
1311 | 1429 | |
---|
1312 | | - spin_lock_irq(&nvmeq->cq_lock); |
---|
1313 | | - nvme_process_cq(nvmeq, &start, &end, -1); |
---|
1314 | | - spin_unlock_irq(&nvmeq->cq_lock); |
---|
| 1430 | + nvme_poll_irqdisable(nvmeq); |
---|
| 1431 | +} |
---|
1315 | 1432 | |
---|
1316 | | - nvme_complete_cqes(nvmeq, start, end); |
---|
| 1433 | +/* |
---|
| 1434 | + * Called only on a device that has been disabled and after all other threads |
---|
| 1435 | + * that can check this device's completion queues have synced, except |
---|
| 1436 | + * nvme_poll(). This is the last chance for the driver to see a natural |
---|
| 1437 | + * completion before nvme_cancel_request() terminates all incomplete requests. |
---|
| 1438 | + */ |
---|
| 1439 | +static void nvme_reap_pending_cqes(struct nvme_dev *dev) |
---|
| 1440 | +{ |
---|
| 1441 | + int i; |
---|
| 1442 | + |
---|
| 1443 | + for (i = dev->ctrl.queue_count - 1; i > 0; i--) { |
---|
| 1444 | + spin_lock(&dev->queues[i].cq_poll_lock); |
---|
| 1445 | + nvme_process_cq(&dev->queues[i]); |
---|
| 1446 | + spin_unlock(&dev->queues[i].cq_poll_lock); |
---|
| 1447 | + } |
---|
1317 | 1448 | } |
---|
1318 | 1449 | |
---|
1319 | 1450 | static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, |
---|
.. | .. |
---|
1321 | 1452 | { |
---|
1322 | 1453 | int q_depth = dev->q_depth; |
---|
1323 | 1454 | unsigned q_size_aligned = roundup(q_depth * entry_size, |
---|
1324 | | - dev->ctrl.page_size); |
---|
| 1455 | + NVME_CTRL_PAGE_SIZE); |
---|
1325 | 1456 | |
---|
1326 | 1457 | if (q_size_aligned * nr_io_queues > dev->cmb_size) { |
---|
1327 | 1458 | u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues); |
---|
1328 | | - mem_per_q = round_down(mem_per_q, dev->ctrl.page_size); |
---|
| 1459 | + |
---|
| 1460 | + mem_per_q = round_down(mem_per_q, NVME_CTRL_PAGE_SIZE); |
---|
1329 | 1461 | q_depth = div_u64(mem_per_q, entry_size); |
---|
1330 | 1462 | |
---|
1331 | 1463 | /* |
---|
.. | .. |
---|
1341 | 1473 | } |
---|
1342 | 1474 | |
---|
1343 | 1475 | static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, |
---|
1344 | | - int qid, int depth) |
---|
| 1476 | + int qid) |
---|
1345 | 1477 | { |
---|
1346 | | - /* CMB SQEs will be mapped before creation */ |
---|
1347 | | - if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) |
---|
1348 | | - return 0; |
---|
| 1478 | + struct pci_dev *pdev = to_pci_dev(dev->dev); |
---|
1349 | 1479 | |
---|
1350 | | - nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), |
---|
1351 | | - &nvmeq->sq_dma_addr, GFP_KERNEL); |
---|
| 1480 | + if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { |
---|
| 1481 | + nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq)); |
---|
| 1482 | + if (nvmeq->sq_cmds) { |
---|
| 1483 | + nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, |
---|
| 1484 | + nvmeq->sq_cmds); |
---|
| 1485 | + if (nvmeq->sq_dma_addr) { |
---|
| 1486 | + set_bit(NVMEQ_SQ_CMB, &nvmeq->flags); |
---|
| 1487 | + return 0; |
---|
| 1488 | + } |
---|
| 1489 | + |
---|
| 1490 | + pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(nvmeq)); |
---|
| 1491 | + } |
---|
| 1492 | + } |
---|
| 1493 | + |
---|
| 1494 | + nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq), |
---|
| 1495 | + &nvmeq->sq_dma_addr, GFP_KERNEL); |
---|
1352 | 1496 | if (!nvmeq->sq_cmds) |
---|
1353 | 1497 | return -ENOMEM; |
---|
1354 | 1498 | return 0; |
---|
.. | .. |
---|
1361 | 1505 | if (dev->ctrl.queue_count > qid) |
---|
1362 | 1506 | return 0; |
---|
1363 | 1507 | |
---|
1364 | | - nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth), |
---|
1365 | | - &nvmeq->cq_dma_addr, GFP_KERNEL); |
---|
| 1508 | + nvmeq->sqes = qid ? dev->io_sqes : NVME_ADM_SQES; |
---|
| 1509 | + nvmeq->q_depth = depth; |
---|
| 1510 | + nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq), |
---|
| 1511 | + &nvmeq->cq_dma_addr, GFP_KERNEL); |
---|
1366 | 1512 | if (!nvmeq->cqes) |
---|
1367 | 1513 | goto free_nvmeq; |
---|
1368 | 1514 | |
---|
1369 | | - if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth)) |
---|
| 1515 | + if (nvme_alloc_sq_cmds(dev, nvmeq, qid)) |
---|
1370 | 1516 | goto free_cqdma; |
---|
1371 | 1517 | |
---|
1372 | | - nvmeq->q_dmadev = dev->dev; |
---|
1373 | 1518 | nvmeq->dev = dev; |
---|
1374 | 1519 | spin_lock_init(&nvmeq->sq_lock); |
---|
1375 | | - spin_lock_init(&nvmeq->cq_lock); |
---|
| 1520 | + spin_lock_init(&nvmeq->cq_poll_lock); |
---|
1376 | 1521 | nvmeq->cq_head = 0; |
---|
1377 | 1522 | nvmeq->cq_phase = 1; |
---|
1378 | 1523 | nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; |
---|
1379 | | - nvmeq->q_depth = depth; |
---|
1380 | 1524 | nvmeq->qid = qid; |
---|
1381 | | - nvmeq->cq_vector = -1; |
---|
1382 | 1525 | dev->ctrl.queue_count++; |
---|
1383 | 1526 | |
---|
1384 | 1527 | return 0; |
---|
1385 | 1528 | |
---|
1386 | 1529 | free_cqdma: |
---|
1387 | | - dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes, |
---|
1388 | | - nvmeq->cq_dma_addr); |
---|
| 1530 | + dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes, |
---|
| 1531 | + nvmeq->cq_dma_addr); |
---|
1389 | 1532 | free_nvmeq: |
---|
1390 | 1533 | return -ENOMEM; |
---|
1391 | 1534 | } |
---|
.. | .. |
---|
1408 | 1551 | { |
---|
1409 | 1552 | struct nvme_dev *dev = nvmeq->dev; |
---|
1410 | 1553 | |
---|
1411 | | - spin_lock_irq(&nvmeq->cq_lock); |
---|
1412 | 1554 | nvmeq->sq_tail = 0; |
---|
| 1555 | + nvmeq->last_sq_tail = 0; |
---|
1413 | 1556 | nvmeq->cq_head = 0; |
---|
1414 | 1557 | nvmeq->cq_phase = 1; |
---|
1415 | 1558 | nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; |
---|
1416 | | - memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); |
---|
| 1559 | + memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq)); |
---|
1417 | 1560 | nvme_dbbuf_init(dev, nvmeq, qid); |
---|
1418 | 1561 | dev->online_queues++; |
---|
1419 | | - spin_unlock_irq(&nvmeq->cq_lock); |
---|
| 1562 | + wmb(); /* ensure the first interrupt sees the initialization */ |
---|
1420 | 1563 | } |
---|
1421 | 1564 | |
---|
1422 | | -static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) |
---|
| 1565 | +static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) |
---|
1423 | 1566 | { |
---|
1424 | 1567 | struct nvme_dev *dev = nvmeq->dev; |
---|
1425 | 1568 | int result; |
---|
1426 | | - s16 vector; |
---|
| 1569 | + u16 vector = 0; |
---|
1427 | 1570 | |
---|
1428 | | - if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { |
---|
1429 | | - unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth), |
---|
1430 | | - dev->ctrl.page_size); |
---|
1431 | | - nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset; |
---|
1432 | | - nvmeq->sq_cmds_io = dev->cmb + offset; |
---|
1433 | | - } |
---|
| 1571 | + clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); |
---|
1434 | 1572 | |
---|
1435 | 1573 | /* |
---|
1436 | 1574 | * A queue's vector matches the queue identifier unless the controller |
---|
1437 | 1575 | * has only one vector available. |
---|
1438 | 1576 | */ |
---|
1439 | | - vector = dev->num_vecs == 1 ? 0 : qid; |
---|
| 1577 | + if (!polled) |
---|
| 1578 | + vector = dev->num_vecs == 1 ? 0 : qid; |
---|
| 1579 | + else |
---|
| 1580 | + set_bit(NVMEQ_POLLED, &nvmeq->flags); |
---|
| 1581 | + |
---|
1440 | 1582 | result = adapter_alloc_cq(dev, qid, nvmeq, vector); |
---|
1441 | 1583 | if (result) |
---|
1442 | 1584 | return result; |
---|
.. | .. |
---|
1444 | 1586 | result = adapter_alloc_sq(dev, qid, nvmeq); |
---|
1445 | 1587 | if (result < 0) |
---|
1446 | 1588 | return result; |
---|
1447 | | - else if (result) |
---|
| 1589 | + if (result) |
---|
1448 | 1590 | goto release_cq; |
---|
1449 | 1591 | |
---|
1450 | | - /* |
---|
1451 | | - * Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will |
---|
1452 | | - * invoke free_irq for it and cause a 'Trying to free already-free IRQ |
---|
1453 | | - * xxx' warning if the create CQ/SQ command times out. |
---|
1454 | | - */ |
---|
1455 | 1592 | nvmeq->cq_vector = vector; |
---|
1456 | 1593 | nvme_init_queue(nvmeq, qid); |
---|
1457 | | - result = queue_request_irq(nvmeq); |
---|
1458 | | - if (result < 0) |
---|
1459 | | - goto release_sq; |
---|
1460 | 1594 | |
---|
| 1595 | + if (!polled) { |
---|
| 1596 | + result = queue_request_irq(nvmeq); |
---|
| 1597 | + if (result < 0) |
---|
| 1598 | + goto release_sq; |
---|
| 1599 | + } |
---|
| 1600 | + |
---|
| 1601 | + set_bit(NVMEQ_ENABLED, &nvmeq->flags); |
---|
1461 | 1602 | return result; |
---|
1462 | 1603 | |
---|
1463 | 1604 | release_sq: |
---|
1464 | | - nvmeq->cq_vector = -1; |
---|
1465 | 1605 | dev->online_queues--; |
---|
1466 | 1606 | adapter_delete_sq(dev, qid); |
---|
1467 | 1607 | release_cq: |
---|
.. | .. |
---|
1473 | 1613 | .queue_rq = nvme_queue_rq, |
---|
1474 | 1614 | .complete = nvme_pci_complete_rq, |
---|
1475 | 1615 | .init_hctx = nvme_admin_init_hctx, |
---|
1476 | | - .exit_hctx = nvme_admin_exit_hctx, |
---|
1477 | 1616 | .init_request = nvme_init_request, |
---|
1478 | 1617 | .timeout = nvme_timeout, |
---|
1479 | 1618 | }; |
---|
.. | .. |
---|
1481 | 1620 | static const struct blk_mq_ops nvme_mq_ops = { |
---|
1482 | 1621 | .queue_rq = nvme_queue_rq, |
---|
1483 | 1622 | .complete = nvme_pci_complete_rq, |
---|
| 1623 | + .commit_rqs = nvme_commit_rqs, |
---|
1484 | 1624 | .init_hctx = nvme_init_hctx, |
---|
1485 | 1625 | .init_request = nvme_init_request, |
---|
1486 | 1626 | .map_queues = nvme_pci_map_queues, |
---|
.. | .. |
---|
1510 | 1650 | |
---|
1511 | 1651 | dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH; |
---|
1512 | 1652 | dev->admin_tagset.timeout = ADMIN_TIMEOUT; |
---|
1513 | | - dev->admin_tagset.numa_node = dev_to_node(dev->dev); |
---|
1514 | | - dev->admin_tagset.cmd_size = nvme_pci_cmd_size(dev, false); |
---|
| 1653 | + dev->admin_tagset.numa_node = dev->ctrl.numa_node; |
---|
| 1654 | + dev->admin_tagset.cmd_size = sizeof(struct nvme_iod); |
---|
1515 | 1655 | dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; |
---|
1516 | 1656 | dev->admin_tagset.driver_data = dev; |
---|
1517 | 1657 | |
---|
.. | .. |
---|
1522 | 1662 | dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); |
---|
1523 | 1663 | if (IS_ERR(dev->ctrl.admin_q)) { |
---|
1524 | 1664 | blk_mq_free_tag_set(&dev->admin_tagset); |
---|
| 1665 | + dev->ctrl.admin_q = NULL; |
---|
1525 | 1666 | return -ENOMEM; |
---|
1526 | 1667 | } |
---|
1527 | 1668 | if (!blk_get_queue(dev->ctrl.admin_q)) { |
---|
.. | .. |
---|
1578 | 1719 | (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO)) |
---|
1579 | 1720 | writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS); |
---|
1580 | 1721 | |
---|
1581 | | - result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); |
---|
| 1722 | + result = nvme_disable_ctrl(&dev->ctrl); |
---|
1582 | 1723 | if (result < 0) |
---|
1583 | 1724 | return result; |
---|
1584 | 1725 | |
---|
1585 | 1726 | result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); |
---|
1586 | 1727 | if (result) |
---|
1587 | 1728 | return result; |
---|
| 1729 | + |
---|
| 1730 | + dev->ctrl.numa_node = dev_to_node(dev->dev); |
---|
1588 | 1731 | |
---|
1589 | 1732 | nvmeq = &dev->queues[0]; |
---|
1590 | 1733 | aqa = nvmeq->q_depth - 1; |
---|
.. | .. |
---|
1594 | 1737 | lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ); |
---|
1595 | 1738 | lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ); |
---|
1596 | 1739 | |
---|
1597 | | - result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap); |
---|
| 1740 | + result = nvme_enable_ctrl(&dev->ctrl); |
---|
1598 | 1741 | if (result) |
---|
1599 | 1742 | return result; |
---|
1600 | 1743 | |
---|
.. | .. |
---|
1602 | 1745 | nvme_init_queue(nvmeq, 0); |
---|
1603 | 1746 | result = queue_request_irq(nvmeq); |
---|
1604 | 1747 | if (result) { |
---|
1605 | | - nvmeq->cq_vector = -1; |
---|
| 1748 | + dev->online_queues--; |
---|
1606 | 1749 | return result; |
---|
1607 | 1750 | } |
---|
1608 | 1751 | |
---|
| 1752 | + set_bit(NVMEQ_ENABLED, &nvmeq->flags); |
---|
1609 | 1753 | return result; |
---|
1610 | 1754 | } |
---|
1611 | 1755 | |
---|
1612 | 1756 | static int nvme_create_io_queues(struct nvme_dev *dev) |
---|
1613 | 1757 | { |
---|
1614 | | - unsigned i, max; |
---|
| 1758 | + unsigned i, max, rw_queues; |
---|
1615 | 1759 | int ret = 0; |
---|
1616 | 1760 | |
---|
1617 | 1761 | for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { |
---|
.. | .. |
---|
1622 | 1766 | } |
---|
1623 | 1767 | |
---|
1624 | 1768 | max = min(dev->max_qid, dev->ctrl.queue_count - 1); |
---|
| 1769 | + if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) { |
---|
| 1770 | + rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] + |
---|
| 1771 | + dev->io_queues[HCTX_TYPE_READ]; |
---|
| 1772 | + } else { |
---|
| 1773 | + rw_queues = max; |
---|
| 1774 | + } |
---|
| 1775 | + |
---|
1625 | 1776 | for (i = dev->online_queues; i <= max; i++) { |
---|
1626 | | - ret = nvme_create_queue(&dev->queues[i], i); |
---|
| 1777 | + bool polled = i > rw_queues; |
---|
| 1778 | + |
---|
| 1779 | + ret = nvme_create_queue(&dev->queues[i], i, polled); |
---|
1627 | 1780 | if (ret) |
---|
1628 | 1781 | break; |
---|
1629 | 1782 | } |
---|
.. | .. |
---|
1670 | 1823 | if (dev->cmb_size) |
---|
1671 | 1824 | return; |
---|
1672 | 1825 | |
---|
| 1826 | + if (NVME_CAP_CMBS(dev->ctrl.cap)) |
---|
| 1827 | + writel(NVME_CMBMSC_CRE, dev->bar + NVME_REG_CMBMSC); |
---|
| 1828 | + |
---|
1673 | 1829 | dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); |
---|
1674 | 1830 | if (!dev->cmbsz) |
---|
1675 | 1831 | return; |
---|
1676 | 1832 | dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC); |
---|
1677 | | - |
---|
1678 | | - if (!use_cmb_sqes) |
---|
1679 | | - return; |
---|
1680 | 1833 | |
---|
1681 | 1834 | size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev); |
---|
1682 | 1835 | offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc); |
---|
.. | .. |
---|
1687 | 1840 | return; |
---|
1688 | 1841 | |
---|
1689 | 1842 | /* |
---|
| 1843 | + * Tell the controller about the host side address mapping the CMB, |
---|
| 1844 | + * and enable CMB decoding for the NVMe 1.4+ scheme: |
---|
| 1845 | + */ |
---|
| 1846 | + if (NVME_CAP_CMBS(dev->ctrl.cap)) { |
---|
| 1847 | + hi_lo_writeq(NVME_CMBMSC_CRE | NVME_CMBMSC_CMSE | |
---|
| 1848 | + (pci_bus_address(pdev, bar) + offset), |
---|
| 1849 | + dev->bar + NVME_REG_CMBMSC); |
---|
| 1850 | + } |
---|
| 1851 | + |
---|
| 1852 | + /* |
---|
1690 | 1853 | * Controllers may support a CMB size larger than their BAR, |
---|
1691 | 1854 | * for example, due to being behind a bridge. Reduce the CMB to |
---|
1692 | 1855 | * the reported size of the BAR |
---|
.. | .. |
---|
1694 | 1857 | if (size > bar_size - offset) |
---|
1695 | 1858 | size = bar_size - offset; |
---|
1696 | 1859 | |
---|
1697 | | - dev->cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size); |
---|
1698 | | - if (!dev->cmb) |
---|
| 1860 | + if (pci_p2pdma_add_resource(pdev, bar, size, offset)) { |
---|
| 1861 | + dev_warn(dev->ctrl.device, |
---|
| 1862 | + "failed to register the CMB\n"); |
---|
1699 | 1863 | return; |
---|
1700 | | - dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset; |
---|
| 1864 | + } |
---|
| 1865 | + |
---|
1701 | 1866 | dev->cmb_size = size; |
---|
| 1867 | + dev->cmb_use_sqes = use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS); |
---|
| 1868 | + |
---|
| 1869 | + if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) == |
---|
| 1870 | + (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) |
---|
| 1871 | + pci_p2pmem_publish(pdev, true); |
---|
1702 | 1872 | |
---|
1703 | 1873 | if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, |
---|
1704 | 1874 | &dev_attr_cmb.attr, NULL)) |
---|
.. | .. |
---|
1708 | 1878 | |
---|
1709 | 1879 | static inline void nvme_release_cmb(struct nvme_dev *dev) |
---|
1710 | 1880 | { |
---|
1711 | | - if (dev->cmb) { |
---|
1712 | | - iounmap(dev->cmb); |
---|
1713 | | - dev->cmb = NULL; |
---|
| 1881 | + if (dev->cmb_size) { |
---|
1714 | 1882 | sysfs_remove_file_from_group(&dev->ctrl.device->kobj, |
---|
1715 | 1883 | &dev_attr_cmb.attr, NULL); |
---|
1716 | | - dev->cmbsz = 0; |
---|
| 1884 | + dev->cmb_size = 0; |
---|
1717 | 1885 | } |
---|
1718 | 1886 | } |
---|
1719 | 1887 | |
---|
1720 | 1888 | static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) |
---|
1721 | 1889 | { |
---|
| 1890 | + u32 host_mem_size = dev->host_mem_size >> NVME_CTRL_PAGE_SHIFT; |
---|
1722 | 1891 | u64 dma_addr = dev->host_mem_descs_dma; |
---|
1723 | 1892 | struct nvme_command c; |
---|
1724 | 1893 | int ret; |
---|
.. | .. |
---|
1727 | 1896 | c.features.opcode = nvme_admin_set_features; |
---|
1728 | 1897 | c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF); |
---|
1729 | 1898 | c.features.dword11 = cpu_to_le32(bits); |
---|
1730 | | - c.features.dword12 = cpu_to_le32(dev->host_mem_size >> |
---|
1731 | | - ilog2(dev->ctrl.page_size)); |
---|
| 1899 | + c.features.dword12 = cpu_to_le32(host_mem_size); |
---|
1732 | 1900 | c.features.dword13 = cpu_to_le32(lower_32_bits(dma_addr)); |
---|
1733 | 1901 | c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr)); |
---|
1734 | 1902 | c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs); |
---|
.. | .. |
---|
1748 | 1916 | |
---|
1749 | 1917 | for (i = 0; i < dev->nr_host_mem_descs; i++) { |
---|
1750 | 1918 | struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i]; |
---|
1751 | | - size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size; |
---|
| 1919 | + size_t size = le32_to_cpu(desc->size) * NVME_CTRL_PAGE_SIZE; |
---|
1752 | 1920 | |
---|
1753 | 1921 | dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i], |
---|
1754 | 1922 | le64_to_cpu(desc->addr), |
---|
.. | .. |
---|
1781 | 1949 | if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries) |
---|
1782 | 1950 | max_entries = dev->ctrl.hmmaxd; |
---|
1783 | 1951 | |
---|
1784 | | - descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs), |
---|
1785 | | - &descs_dma, GFP_KERNEL); |
---|
| 1952 | + descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs), |
---|
| 1953 | + &descs_dma, GFP_KERNEL); |
---|
1786 | 1954 | if (!descs) |
---|
1787 | 1955 | goto out; |
---|
1788 | 1956 | |
---|
.. | .. |
---|
1800 | 1968 | break; |
---|
1801 | 1969 | |
---|
1802 | 1970 | descs[i].addr = cpu_to_le64(dma_addr); |
---|
1803 | | - descs[i].size = cpu_to_le32(len / dev->ctrl.page_size); |
---|
| 1971 | + descs[i].size = cpu_to_le32(len / NVME_CTRL_PAGE_SIZE); |
---|
1804 | 1972 | i++; |
---|
1805 | 1973 | } |
---|
1806 | 1974 | |
---|
.. | .. |
---|
1816 | 1984 | |
---|
1817 | 1985 | out_free_bufs: |
---|
1818 | 1986 | while (--i >= 0) { |
---|
1819 | | - size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size; |
---|
| 1987 | + size_t size = le32_to_cpu(descs[i].size) * NVME_CTRL_PAGE_SIZE; |
---|
1820 | 1988 | |
---|
1821 | 1989 | dma_free_attrs(dev->dev, size, bufs[i], |
---|
1822 | 1990 | le64_to_cpu(descs[i].addr), |
---|
.. | .. |
---|
1834 | 2002 | |
---|
1835 | 2003 | static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) |
---|
1836 | 2004 | { |
---|
1837 | | - u32 chunk_size; |
---|
| 2005 | + u64 min_chunk = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES); |
---|
| 2006 | + u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2); |
---|
| 2007 | + u64 chunk_size; |
---|
1838 | 2008 | |
---|
1839 | 2009 | /* start big and work our way down */ |
---|
1840 | | - for (chunk_size = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES); |
---|
1841 | | - chunk_size >= max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2); |
---|
1842 | | - chunk_size /= 2) { |
---|
| 2010 | + for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) { |
---|
1843 | 2011 | if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) { |
---|
1844 | 2012 | if (!min || dev->host_mem_size >= min) |
---|
1845 | 2013 | return 0; |
---|
.. | .. |
---|
1895 | 2063 | return ret; |
---|
1896 | 2064 | } |
---|
1897 | 2065 | |
---|
| 2066 | +/* |
---|
| 2067 | + * nirqs is the number of interrupts available for write and read |
---|
| 2068 | + * queues. The core already reserved an interrupt for the admin queue. |
---|
| 2069 | + */ |
---|
| 2070 | +static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) |
---|
| 2071 | +{ |
---|
| 2072 | + struct nvme_dev *dev = affd->priv; |
---|
| 2073 | + unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues; |
---|
| 2074 | + |
---|
| 2075 | + /* |
---|
| 2076 | + * If there is no interrupt available for queues, ensure that |
---|
| 2077 | + * the default queue is set to 1. The affinity set size is |
---|
| 2078 | + * also set to one, but the irq core ignores it for this case. |
---|
| 2079 | + * |
---|
| 2080 | + * If only one interrupt is available or 'write_queue' == 0, combine |
---|
| 2081 | + * write and read queues. |
---|
| 2082 | + * |
---|
| 2083 | + * If 'write_queues' > 0, ensure it leaves room for at least one read |
---|
| 2084 | + * queue. |
---|
| 2085 | + */ |
---|
| 2086 | + if (!nrirqs) { |
---|
| 2087 | + nrirqs = 1; |
---|
| 2088 | + nr_read_queues = 0; |
---|
| 2089 | + } else if (nrirqs == 1 || !nr_write_queues) { |
---|
| 2090 | + nr_read_queues = 0; |
---|
| 2091 | + } else if (nr_write_queues >= nrirqs) { |
---|
| 2092 | + nr_read_queues = 1; |
---|
| 2093 | + } else { |
---|
| 2094 | + nr_read_queues = nrirqs - nr_write_queues; |
---|
| 2095 | + } |
---|
| 2096 | + |
---|
| 2097 | + dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; |
---|
| 2098 | + affd->set_size[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; |
---|
| 2099 | + dev->io_queues[HCTX_TYPE_READ] = nr_read_queues; |
---|
| 2100 | + affd->set_size[HCTX_TYPE_READ] = nr_read_queues; |
---|
| 2101 | + affd->nr_sets = nr_read_queues ? 2 : 1; |
---|
| 2102 | +} |
---|
| 2103 | + |
---|
| 2104 | +static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) |
---|
| 2105 | +{ |
---|
| 2106 | + struct pci_dev *pdev = to_pci_dev(dev->dev); |
---|
| 2107 | + struct irq_affinity affd = { |
---|
| 2108 | + .pre_vectors = 1, |
---|
| 2109 | + .calc_sets = nvme_calc_irq_sets, |
---|
| 2110 | + .priv = dev, |
---|
| 2111 | + }; |
---|
| 2112 | + unsigned int irq_queues, poll_queues; |
---|
| 2113 | + |
---|
| 2114 | + /* |
---|
| 2115 | + * Poll queues don't need interrupts, but we need at least one I/O queue |
---|
| 2116 | + * left over for non-polled I/O. |
---|
| 2117 | + */ |
---|
| 2118 | + poll_queues = min(dev->nr_poll_queues, nr_io_queues - 1); |
---|
| 2119 | + dev->io_queues[HCTX_TYPE_POLL] = poll_queues; |
---|
| 2120 | + |
---|
| 2121 | + /* |
---|
| 2122 | + * Initialize for the single interrupt case, will be updated in |
---|
| 2123 | + * nvme_calc_irq_sets(). |
---|
| 2124 | + */ |
---|
| 2125 | + dev->io_queues[HCTX_TYPE_DEFAULT] = 1; |
---|
| 2126 | + dev->io_queues[HCTX_TYPE_READ] = 0; |
---|
| 2127 | + |
---|
| 2128 | + /* |
---|
| 2129 | + * We need interrupts for the admin queue and each non-polled I/O queue, |
---|
| 2130 | + * but some Apple controllers require all queues to use the first |
---|
| 2131 | + * vector. |
---|
| 2132 | + */ |
---|
| 2133 | + irq_queues = 1; |
---|
| 2134 | + if (!(dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR)) |
---|
| 2135 | + irq_queues += (nr_io_queues - poll_queues); |
---|
| 2136 | + return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues, |
---|
| 2137 | + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); |
---|
| 2138 | +} |
---|
| 2139 | + |
---|
| 2140 | +static void nvme_disable_io_queues(struct nvme_dev *dev) |
---|
| 2141 | +{ |
---|
| 2142 | + if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq)) |
---|
| 2143 | + __nvme_disable_io_queues(dev, nvme_admin_delete_cq); |
---|
| 2144 | +} |
---|
| 2145 | + |
---|
| 2146 | +static unsigned int nvme_max_io_queues(struct nvme_dev *dev) |
---|
| 2147 | +{ |
---|
| 2148 | + return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues; |
---|
| 2149 | +} |
---|
| 2150 | + |
---|
1898 | 2151 | static int nvme_setup_io_queues(struct nvme_dev *dev) |
---|
1899 | 2152 | { |
---|
1900 | 2153 | struct nvme_queue *adminq = &dev->queues[0]; |
---|
1901 | 2154 | struct pci_dev *pdev = to_pci_dev(dev->dev); |
---|
1902 | | - int result, nr_io_queues; |
---|
| 2155 | + unsigned int nr_io_queues; |
---|
1903 | 2156 | unsigned long size; |
---|
| 2157 | + int result; |
---|
1904 | 2158 | |
---|
1905 | | - struct irq_affinity affd = { |
---|
1906 | | - .pre_vectors = 1 |
---|
1907 | | - }; |
---|
| 2159 | + /* |
---|
| 2160 | + * Sample the module parameters once at reset time so that we have |
---|
| 2161 | + * stable values to work with. |
---|
| 2162 | + */ |
---|
| 2163 | + dev->nr_write_queues = write_queues; |
---|
| 2164 | + dev->nr_poll_queues = poll_queues; |
---|
1908 | 2165 | |
---|
1909 | | - nr_io_queues = num_possible_cpus(); |
---|
| 2166 | + /* |
---|
| 2167 | + * If tags are shared with admin queue (Apple bug), then |
---|
| 2168 | + * make sure we only use one IO queue. |
---|
| 2169 | + */ |
---|
| 2170 | + if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) |
---|
| 2171 | + nr_io_queues = 1; |
---|
| 2172 | + else |
---|
| 2173 | + nr_io_queues = min(nvme_max_io_queues(dev), |
---|
| 2174 | + dev->nr_allocated_queues - 1); |
---|
| 2175 | + |
---|
1910 | 2176 | result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); |
---|
1911 | 2177 | if (result < 0) |
---|
1912 | 2178 | return result; |
---|
1913 | 2179 | |
---|
1914 | 2180 | if (nr_io_queues == 0) |
---|
1915 | 2181 | return 0; |
---|
| 2182 | + |
---|
| 2183 | + clear_bit(NVMEQ_ENABLED, &adminq->flags); |
---|
1916 | 2184 | |
---|
1917 | | - if (dev->cmb && (dev->cmbsz & NVME_CMBSZ_SQS)) { |
---|
| 2185 | + if (dev->cmb_use_sqes) { |
---|
1918 | 2186 | result = nvme_cmb_qdepth(dev, nr_io_queues, |
---|
1919 | 2187 | sizeof(struct nvme_command)); |
---|
1920 | 2188 | if (result > 0) |
---|
1921 | 2189 | dev->q_depth = result; |
---|
1922 | 2190 | else |
---|
1923 | | - nvme_release_cmb(dev); |
---|
| 2191 | + dev->cmb_use_sqes = false; |
---|
1924 | 2192 | } |
---|
1925 | 2193 | |
---|
1926 | 2194 | do { |
---|
.. | .. |
---|
1933 | 2201 | } while (1); |
---|
1934 | 2202 | adminq->q_db = dev->dbs; |
---|
1935 | 2203 | |
---|
| 2204 | + retry: |
---|
1936 | 2205 | /* Deregister the admin queue's interrupt */ |
---|
1937 | 2206 | pci_free_irq(pdev, 0, adminq); |
---|
1938 | 2207 | |
---|
.. | .. |
---|
1941 | 2210 | * setting up the full range we need. |
---|
1942 | 2211 | */ |
---|
1943 | 2212 | pci_free_irq_vectors(pdev); |
---|
1944 | | - result = pci_alloc_irq_vectors_affinity(pdev, 1, nr_io_queues + 1, |
---|
1945 | | - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); |
---|
| 2213 | + |
---|
| 2214 | + result = nvme_setup_irqs(dev, nr_io_queues); |
---|
1946 | 2215 | if (result <= 0) |
---|
1947 | 2216 | return -EIO; |
---|
| 2217 | + |
---|
1948 | 2218 | dev->num_vecs = result; |
---|
1949 | | - dev->max_qid = max(result - 1, 1); |
---|
| 2219 | + result = max(result - 1, 1); |
---|
| 2220 | + dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL]; |
---|
1950 | 2221 | |
---|
1951 | 2222 | /* |
---|
1952 | 2223 | * Should investigate if there's a performance win from allocating |
---|
.. | .. |
---|
1954 | 2225 | * path to scale better, even if the receive path is limited by the |
---|
1955 | 2226 | * number of interrupts. |
---|
1956 | 2227 | */ |
---|
1957 | | - |
---|
1958 | 2228 | result = queue_request_irq(adminq); |
---|
1959 | | - if (result) { |
---|
1960 | | - adminq->cq_vector = -1; |
---|
| 2229 | + if (result) |
---|
1961 | 2230 | return result; |
---|
| 2231 | + set_bit(NVMEQ_ENABLED, &adminq->flags); |
---|
| 2232 | + |
---|
| 2233 | + result = nvme_create_io_queues(dev); |
---|
| 2234 | + if (result || dev->online_queues < 2) |
---|
| 2235 | + return result; |
---|
| 2236 | + |
---|
| 2237 | + if (dev->online_queues - 1 < dev->max_qid) { |
---|
| 2238 | + nr_io_queues = dev->online_queues - 1; |
---|
| 2239 | + nvme_disable_io_queues(dev); |
---|
| 2240 | + nvme_suspend_io_queues(dev); |
---|
| 2241 | + goto retry; |
---|
1962 | 2242 | } |
---|
1963 | | - return nvme_create_io_queues(dev); |
---|
| 2243 | + dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n", |
---|
| 2244 | + dev->io_queues[HCTX_TYPE_DEFAULT], |
---|
| 2245 | + dev->io_queues[HCTX_TYPE_READ], |
---|
| 2246 | + dev->io_queues[HCTX_TYPE_POLL]); |
---|
| 2247 | + return 0; |
---|
1964 | 2248 | } |
---|
1965 | 2249 | |
---|
1966 | 2250 | static void nvme_del_queue_end(struct request *req, blk_status_t error) |
---|
.. | .. |
---|
1968 | 2252 | struct nvme_queue *nvmeq = req->end_io_data; |
---|
1969 | 2253 | |
---|
1970 | 2254 | blk_mq_free_request(req); |
---|
1971 | | - complete(&nvmeq->dev->ioq_wait); |
---|
| 2255 | + complete(&nvmeq->delete_done); |
---|
1972 | 2256 | } |
---|
1973 | 2257 | |
---|
1974 | 2258 | static void nvme_del_cq_end(struct request *req, blk_status_t error) |
---|
1975 | 2259 | { |
---|
1976 | 2260 | struct nvme_queue *nvmeq = req->end_io_data; |
---|
1977 | | - u16 start, end; |
---|
1978 | 2261 | |
---|
1979 | | - if (!error) { |
---|
1980 | | - unsigned long flags; |
---|
1981 | | - |
---|
1982 | | - spin_lock_irqsave(&nvmeq->cq_lock, flags); |
---|
1983 | | - nvme_process_cq(nvmeq, &start, &end, -1); |
---|
1984 | | - spin_unlock_irqrestore(&nvmeq->cq_lock, flags); |
---|
1985 | | - |
---|
1986 | | - nvme_complete_cqes(nvmeq, start, end); |
---|
1987 | | - } |
---|
| 2262 | + if (error) |
---|
| 2263 | + set_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); |
---|
1988 | 2264 | |
---|
1989 | 2265 | nvme_del_queue_end(req, error); |
---|
1990 | 2266 | } |
---|
.. | .. |
---|
1999 | 2275 | cmd.delete_queue.opcode = opcode; |
---|
2000 | 2276 | cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid); |
---|
2001 | 2277 | |
---|
2002 | | - req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); |
---|
| 2278 | + req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT); |
---|
2003 | 2279 | if (IS_ERR(req)) |
---|
2004 | 2280 | return PTR_ERR(req); |
---|
2005 | 2281 | |
---|
2006 | | - req->timeout = ADMIN_TIMEOUT; |
---|
2007 | 2282 | req->end_io_data = nvmeq; |
---|
2008 | 2283 | |
---|
| 2284 | + init_completion(&nvmeq->delete_done); |
---|
2009 | 2285 | blk_execute_rq_nowait(q, NULL, req, false, |
---|
2010 | 2286 | opcode == nvme_admin_delete_cq ? |
---|
2011 | 2287 | nvme_del_cq_end : nvme_del_queue_end); |
---|
2012 | 2288 | return 0; |
---|
2013 | 2289 | } |
---|
2014 | 2290 | |
---|
2015 | | -static void nvme_disable_io_queues(struct nvme_dev *dev) |
---|
| 2291 | +static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) |
---|
2016 | 2292 | { |
---|
2017 | | - int pass, queues = dev->online_queues - 1; |
---|
| 2293 | + int nr_queues = dev->online_queues - 1, sent = 0; |
---|
2018 | 2294 | unsigned long timeout; |
---|
2019 | | - u8 opcode = nvme_admin_delete_sq; |
---|
2020 | 2295 | |
---|
2021 | | - for (pass = 0; pass < 2; pass++) { |
---|
2022 | | - int sent = 0, i = queues; |
---|
2023 | | - |
---|
2024 | | - reinit_completion(&dev->ioq_wait); |
---|
2025 | 2296 | retry: |
---|
2026 | | - timeout = ADMIN_TIMEOUT; |
---|
2027 | | - for (; i > 0; i--, sent++) |
---|
2028 | | - if (nvme_delete_queue(&dev->queues[i], opcode)) |
---|
2029 | | - break; |
---|
2030 | | - |
---|
2031 | | - while (sent--) { |
---|
2032 | | - timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout); |
---|
2033 | | - if (timeout == 0) |
---|
2034 | | - return; |
---|
2035 | | - if (i) |
---|
2036 | | - goto retry; |
---|
2037 | | - } |
---|
2038 | | - opcode = nvme_admin_delete_cq; |
---|
| 2297 | + timeout = ADMIN_TIMEOUT; |
---|
| 2298 | + while (nr_queues > 0) { |
---|
| 2299 | + if (nvme_delete_queue(&dev->queues[nr_queues], opcode)) |
---|
| 2300 | + break; |
---|
| 2301 | + nr_queues--; |
---|
| 2302 | + sent++; |
---|
2039 | 2303 | } |
---|
| 2304 | + while (sent) { |
---|
| 2305 | + struct nvme_queue *nvmeq = &dev->queues[nr_queues + sent]; |
---|
| 2306 | + |
---|
| 2307 | + timeout = wait_for_completion_io_timeout(&nvmeq->delete_done, |
---|
| 2308 | + timeout); |
---|
| 2309 | + if (timeout == 0) |
---|
| 2310 | + return false; |
---|
| 2311 | + |
---|
| 2312 | + sent--; |
---|
| 2313 | + if (nr_queues) |
---|
| 2314 | + goto retry; |
---|
| 2315 | + } |
---|
| 2316 | + return true; |
---|
2040 | 2317 | } |
---|
2041 | 2318 | |
---|
2042 | | -/* |
---|
2043 | | - * return error value only when tagset allocation failed |
---|
2044 | | - */ |
---|
2045 | | -static int nvme_dev_add(struct nvme_dev *dev) |
---|
| 2319 | +static void nvme_dev_add(struct nvme_dev *dev) |
---|
2046 | 2320 | { |
---|
2047 | 2321 | int ret; |
---|
2048 | 2322 | |
---|
2049 | 2323 | if (!dev->ctrl.tagset) { |
---|
2050 | 2324 | dev->tagset.ops = &nvme_mq_ops; |
---|
2051 | 2325 | dev->tagset.nr_hw_queues = dev->online_queues - 1; |
---|
| 2326 | + dev->tagset.nr_maps = 2; /* default + read */ |
---|
| 2327 | + if (dev->io_queues[HCTX_TYPE_POLL]) |
---|
| 2328 | + dev->tagset.nr_maps++; |
---|
2052 | 2329 | dev->tagset.timeout = NVME_IO_TIMEOUT; |
---|
2053 | | - dev->tagset.numa_node = dev_to_node(dev->dev); |
---|
2054 | | - dev->tagset.queue_depth = |
---|
2055 | | - min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; |
---|
2056 | | - dev->tagset.cmd_size = nvme_pci_cmd_size(dev, false); |
---|
2057 | | - if ((dev->ctrl.sgls & ((1 << 0) | (1 << 1))) && sgl_threshold) { |
---|
2058 | | - dev->tagset.cmd_size = max(dev->tagset.cmd_size, |
---|
2059 | | - nvme_pci_cmd_size(dev, true)); |
---|
2060 | | - } |
---|
| 2330 | + dev->tagset.numa_node = dev->ctrl.numa_node; |
---|
| 2331 | + dev->tagset.queue_depth = min_t(unsigned int, dev->q_depth, |
---|
| 2332 | + BLK_MQ_MAX_DEPTH) - 1; |
---|
| 2333 | + dev->tagset.cmd_size = sizeof(struct nvme_iod); |
---|
2061 | 2334 | dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; |
---|
2062 | 2335 | dev->tagset.driver_data = dev; |
---|
| 2336 | + |
---|
| 2337 | + /* |
---|
| 2338 | + * Some Apple controllers requires tags to be unique |
---|
| 2339 | + * across admin and IO queue, so reserve the first 32 |
---|
| 2340 | + * tags of the IO queue. |
---|
| 2341 | + */ |
---|
| 2342 | + if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) |
---|
| 2343 | + dev->tagset.reserved_tags = NVME_AQ_DEPTH; |
---|
2063 | 2344 | |
---|
2064 | 2345 | ret = blk_mq_alloc_tag_set(&dev->tagset); |
---|
2065 | 2346 | if (ret) { |
---|
2066 | 2347 | dev_warn(dev->ctrl.device, |
---|
2067 | 2348 | "IO queues tagset allocation failed %d\n", ret); |
---|
2068 | | - return ret; |
---|
| 2349 | + return; |
---|
2069 | 2350 | } |
---|
2070 | 2351 | dev->ctrl.tagset = &dev->tagset; |
---|
2071 | | - |
---|
2072 | | - nvme_dbbuf_set(dev); |
---|
2073 | 2352 | } else { |
---|
2074 | 2353 | blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); |
---|
2075 | 2354 | |
---|
.. | .. |
---|
2077 | 2356 | nvme_free_queues(dev, dev->online_queues); |
---|
2078 | 2357 | } |
---|
2079 | 2358 | |
---|
2080 | | - return 0; |
---|
| 2359 | + nvme_dbbuf_set(dev); |
---|
2081 | 2360 | } |
---|
2082 | 2361 | |
---|
2083 | 2362 | static int nvme_pci_enable(struct nvme_dev *dev) |
---|
.. | .. |
---|
2090 | 2369 | |
---|
2091 | 2370 | pci_set_master(pdev); |
---|
2092 | 2371 | |
---|
2093 | | - if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && |
---|
2094 | | - dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) |
---|
| 2372 | + if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64))) |
---|
2095 | 2373 | goto disable; |
---|
2096 | 2374 | |
---|
2097 | 2375 | if (readl(dev->bar + NVME_REG_CSTS) == -1) { |
---|
.. | .. |
---|
2110 | 2388 | |
---|
2111 | 2389 | dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP); |
---|
2112 | 2390 | |
---|
2113 | | - dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1, |
---|
| 2391 | + if (dev->ctrl.quirks & NVME_QUIRK_LIMIT_IOQD32) |
---|
| 2392 | + io_queue_depth = 32; |
---|
| 2393 | + |
---|
| 2394 | + dev->q_depth = min_t(u32, NVME_CAP_MQES(dev->ctrl.cap) + 1, |
---|
2114 | 2395 | io_queue_depth); |
---|
| 2396 | + dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ |
---|
2115 | 2397 | dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap); |
---|
2116 | 2398 | dev->dbs = dev->bar + 4096; |
---|
| 2399 | + |
---|
| 2400 | + /* |
---|
| 2401 | + * Some Apple controllers require a non-standard SQE size. |
---|
| 2402 | + * Interestingly they also seem to ignore the CC:IOSQES register |
---|
| 2403 | + * so we don't bother updating it here. |
---|
| 2404 | + */ |
---|
| 2405 | + if (dev->ctrl.quirks & NVME_QUIRK_128_BYTES_SQES) |
---|
| 2406 | + dev->io_sqes = 7; |
---|
| 2407 | + else |
---|
| 2408 | + dev->io_sqes = NVME_NVM_IOSQES; |
---|
2117 | 2409 | |
---|
2118 | 2410 | /* |
---|
2119 | 2411 | * Temporary fix for the Apple controller found in the MacBook8,1 and |
---|
.. | .. |
---|
2131 | 2423 | dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, " |
---|
2132 | 2424 | "set queue depth=%u\n", dev->q_depth); |
---|
2133 | 2425 | } |
---|
| 2426 | + |
---|
| 2427 | + /* |
---|
| 2428 | + * Controllers with the shared tags quirk need the IO queue to be |
---|
| 2429 | + * big enough so that we get 32 tags for the admin queue |
---|
| 2430 | + */ |
---|
| 2431 | + if ((dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) && |
---|
| 2432 | + (dev->q_depth < (NVME_AQ_DEPTH + 2))) { |
---|
| 2433 | + dev->q_depth = NVME_AQ_DEPTH + 2; |
---|
| 2434 | + dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n", |
---|
| 2435 | + dev->q_depth); |
---|
| 2436 | + } |
---|
| 2437 | + |
---|
2134 | 2438 | |
---|
2135 | 2439 | nvme_map_cmb(dev); |
---|
2136 | 2440 | |
---|
.. | .. |
---|
2164 | 2468 | |
---|
2165 | 2469 | static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) |
---|
2166 | 2470 | { |
---|
2167 | | - int i; |
---|
2168 | | - bool dead = true; |
---|
| 2471 | + bool dead = true, freeze = false; |
---|
2169 | 2472 | struct pci_dev *pdev = to_pci_dev(dev->dev); |
---|
2170 | 2473 | |
---|
2171 | 2474 | mutex_lock(&dev->shutdown_lock); |
---|
.. | .. |
---|
2173 | 2476 | u32 csts = readl(dev->bar + NVME_REG_CSTS); |
---|
2174 | 2477 | |
---|
2175 | 2478 | if (dev->ctrl.state == NVME_CTRL_LIVE || |
---|
2176 | | - dev->ctrl.state == NVME_CTRL_RESETTING) |
---|
| 2479 | + dev->ctrl.state == NVME_CTRL_RESETTING) { |
---|
| 2480 | + freeze = true; |
---|
2177 | 2481 | nvme_start_freeze(&dev->ctrl); |
---|
| 2482 | + } |
---|
2178 | 2483 | dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) || |
---|
2179 | 2484 | pdev->error_state != pci_channel_io_normal); |
---|
2180 | 2485 | } |
---|
.. | .. |
---|
2183 | 2488 | * Give the controller a chance to complete all entered requests if |
---|
2184 | 2489 | * doing a safe shutdown. |
---|
2185 | 2490 | */ |
---|
2186 | | - if (!dead) { |
---|
2187 | | - if (shutdown) |
---|
2188 | | - nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); |
---|
2189 | | - } |
---|
| 2491 | + if (!dead && shutdown && freeze) |
---|
| 2492 | + nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); |
---|
2190 | 2493 | |
---|
2191 | 2494 | nvme_stop_queues(&dev->ctrl); |
---|
2192 | 2495 | |
---|
.. | .. |
---|
2194 | 2497 | nvme_disable_io_queues(dev); |
---|
2195 | 2498 | nvme_disable_admin_queue(dev, shutdown); |
---|
2196 | 2499 | } |
---|
2197 | | - for (i = dev->ctrl.queue_count - 1; i >= 0; i--) |
---|
2198 | | - nvme_suspend_queue(&dev->queues[i]); |
---|
2199 | | - |
---|
| 2500 | + nvme_suspend_io_queues(dev); |
---|
| 2501 | + nvme_suspend_queue(&dev->queues[0]); |
---|
2200 | 2502 | nvme_pci_disable(dev); |
---|
| 2503 | + nvme_reap_pending_cqes(dev); |
---|
2201 | 2504 | |
---|
2202 | 2505 | blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); |
---|
2203 | 2506 | blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl); |
---|
| 2507 | + blk_mq_tagset_wait_completed_request(&dev->tagset); |
---|
| 2508 | + blk_mq_tagset_wait_completed_request(&dev->admin_tagset); |
---|
2204 | 2509 | |
---|
2205 | 2510 | /* |
---|
2206 | 2511 | * The driver will not be starting up queues again if shutting down so |
---|
.. | .. |
---|
2215 | 2520 | mutex_unlock(&dev->shutdown_lock); |
---|
2216 | 2521 | } |
---|
2217 | 2522 | |
---|
| 2523 | +static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown) |
---|
| 2524 | +{ |
---|
| 2525 | + if (!nvme_wait_reset(&dev->ctrl)) |
---|
| 2526 | + return -EBUSY; |
---|
| 2527 | + nvme_dev_disable(dev, shutdown); |
---|
| 2528 | + return 0; |
---|
| 2529 | +} |
---|
| 2530 | + |
---|
2218 | 2531 | static int nvme_setup_prp_pools(struct nvme_dev *dev) |
---|
2219 | 2532 | { |
---|
2220 | 2533 | dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, |
---|
2221 | | - PAGE_SIZE, PAGE_SIZE, 0); |
---|
| 2534 | + NVME_CTRL_PAGE_SIZE, |
---|
| 2535 | + NVME_CTRL_PAGE_SIZE, 0); |
---|
2222 | 2536 | if (!dev->prp_page_pool) |
---|
2223 | 2537 | return -ENOMEM; |
---|
2224 | 2538 | |
---|
.. | .. |
---|
2238 | 2552 | dma_pool_destroy(dev->prp_small_pool); |
---|
2239 | 2553 | } |
---|
2240 | 2554 | |
---|
| 2555 | +static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) |
---|
| 2556 | +{ |
---|
| 2557 | + size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl()); |
---|
| 2558 | + size_t alloc_size = sizeof(__le64 *) * npages + |
---|
| 2559 | + sizeof(struct scatterlist) * NVME_MAX_SEGS; |
---|
| 2560 | + |
---|
| 2561 | + WARN_ON_ONCE(alloc_size > PAGE_SIZE); |
---|
| 2562 | + dev->iod_mempool = mempool_create_node(1, |
---|
| 2563 | + mempool_kmalloc, mempool_kfree, |
---|
| 2564 | + (void *)alloc_size, GFP_KERNEL, |
---|
| 2565 | + dev_to_node(dev->dev)); |
---|
| 2566 | + if (!dev->iod_mempool) |
---|
| 2567 | + return -ENOMEM; |
---|
| 2568 | + return 0; |
---|
| 2569 | +} |
---|
| 2570 | + |
---|
| 2571 | +static void nvme_free_tagset(struct nvme_dev *dev) |
---|
| 2572 | +{ |
---|
| 2573 | + if (dev->tagset.tags) |
---|
| 2574 | + blk_mq_free_tag_set(&dev->tagset); |
---|
| 2575 | + dev->ctrl.tagset = NULL; |
---|
| 2576 | +} |
---|
| 2577 | + |
---|
| 2578 | +/* pairs with nvme_pci_alloc_dev */ |
---|
2241 | 2579 | static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) |
---|
2242 | 2580 | { |
---|
2243 | 2581 | struct nvme_dev *dev = to_nvme_dev(ctrl); |
---|
2244 | 2582 | |
---|
2245 | 2583 | nvme_dbbuf_dma_free(dev); |
---|
2246 | | - put_device(dev->dev); |
---|
2247 | | - if (dev->tagset.tags) |
---|
2248 | | - blk_mq_free_tag_set(&dev->tagset); |
---|
| 2584 | + nvme_free_tagset(dev); |
---|
2249 | 2585 | if (dev->ctrl.admin_q) |
---|
2250 | 2586 | blk_put_queue(dev->ctrl.admin_q); |
---|
2251 | | - kfree(dev->queues); |
---|
2252 | 2587 | free_opal_dev(dev->ctrl.opal_dev); |
---|
2253 | 2588 | mempool_destroy(dev->iod_mempool); |
---|
| 2589 | + put_device(dev->dev); |
---|
| 2590 | + kfree(dev->queues); |
---|
2254 | 2591 | kfree(dev); |
---|
2255 | 2592 | } |
---|
2256 | 2593 | |
---|
2257 | | -static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) |
---|
| 2594 | +static void nvme_remove_dead_ctrl(struct nvme_dev *dev) |
---|
2258 | 2595 | { |
---|
2259 | | - dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status); |
---|
2260 | | - |
---|
| 2596 | + /* |
---|
| 2597 | + * Set state to deleting now to avoid blocking nvme_wait_reset(), which |
---|
| 2598 | + * may be holding this pci_dev's device lock. |
---|
| 2599 | + */ |
---|
| 2600 | + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); |
---|
2261 | 2601 | nvme_get_ctrl(&dev->ctrl); |
---|
2262 | 2602 | nvme_dev_disable(dev, false); |
---|
2263 | 2603 | nvme_kill_queues(&dev->ctrl); |
---|
.. | .. |
---|
2271 | 2611 | container_of(work, struct nvme_dev, ctrl.reset_work); |
---|
2272 | 2612 | bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); |
---|
2273 | 2613 | int result; |
---|
2274 | | - enum nvme_ctrl_state new_state = NVME_CTRL_LIVE; |
---|
2275 | 2614 | |
---|
2276 | 2615 | if (dev->ctrl.state != NVME_CTRL_RESETTING) { |
---|
2277 | 2616 | dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n", |
---|
.. | .. |
---|
2286 | 2625 | */ |
---|
2287 | 2626 | if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) |
---|
2288 | 2627 | nvme_dev_disable(dev, false); |
---|
| 2628 | + nvme_sync_queues(&dev->ctrl); |
---|
2289 | 2629 | |
---|
2290 | 2630 | mutex_lock(&dev->shutdown_lock); |
---|
2291 | 2631 | result = nvme_pci_enable(dev); |
---|
.. | .. |
---|
2300 | 2640 | if (result) |
---|
2301 | 2641 | goto out_unlock; |
---|
2302 | 2642 | |
---|
| 2643 | + dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); |
---|
| 2644 | + |
---|
2303 | 2645 | /* |
---|
2304 | 2646 | * Limit the max command size to prevent iod->sg allocations going |
---|
2305 | 2647 | * over a single page. |
---|
2306 | 2648 | */ |
---|
2307 | | - dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; |
---|
| 2649 | + dev->ctrl.max_hw_sectors = min_t(u32, |
---|
| 2650 | + NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9); |
---|
2308 | 2651 | dev->ctrl.max_segments = NVME_MAX_SEGS; |
---|
| 2652 | + |
---|
| 2653 | + /* |
---|
| 2654 | + * Don't limit the IOMMU merged segment size. |
---|
| 2655 | + */ |
---|
| 2656 | + dma_set_max_seg_size(dev->dev, 0xffffffff); |
---|
| 2657 | + |
---|
2309 | 2658 | mutex_unlock(&dev->shutdown_lock); |
---|
2310 | 2659 | |
---|
2311 | 2660 | /* |
---|
.. | .. |
---|
2318 | 2667 | result = -EBUSY; |
---|
2319 | 2668 | goto out; |
---|
2320 | 2669 | } |
---|
| 2670 | + |
---|
| 2671 | + /* |
---|
| 2672 | + * We do not support an SGL for metadata (yet), so we are limited to a |
---|
| 2673 | + * single integrity segment for the separate metadata pointer. |
---|
| 2674 | + */ |
---|
| 2675 | + dev->ctrl.max_integrity_segments = 1; |
---|
2321 | 2676 | |
---|
2322 | 2677 | result = nvme_init_identify(&dev->ctrl); |
---|
2323 | 2678 | if (result) |
---|
.. | .. |
---|
2359 | 2714 | dev_warn(dev->ctrl.device, "IO queues not created\n"); |
---|
2360 | 2715 | nvme_kill_queues(&dev->ctrl); |
---|
2361 | 2716 | nvme_remove_namespaces(&dev->ctrl); |
---|
2362 | | - new_state = NVME_CTRL_ADMIN_ONLY; |
---|
| 2717 | + nvme_free_tagset(dev); |
---|
2363 | 2718 | } else { |
---|
2364 | 2719 | nvme_start_queues(&dev->ctrl); |
---|
2365 | 2720 | nvme_wait_freeze(&dev->ctrl); |
---|
2366 | | - /* hit this only when allocate tagset fails */ |
---|
2367 | | - if (nvme_dev_add(dev)) |
---|
2368 | | - new_state = NVME_CTRL_ADMIN_ONLY; |
---|
| 2721 | + nvme_dev_add(dev); |
---|
2369 | 2722 | nvme_unfreeze(&dev->ctrl); |
---|
2370 | 2723 | } |
---|
2371 | 2724 | |
---|
.. | .. |
---|
2373 | 2726 | * If only admin queue live, keep it to do further investigation or |
---|
2374 | 2727 | * recovery. |
---|
2375 | 2728 | */ |
---|
2376 | | - if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) { |
---|
| 2729 | + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { |
---|
2377 | 2730 | dev_warn(dev->ctrl.device, |
---|
2378 | | - "failed to mark controller state %d\n", new_state); |
---|
| 2731 | + "failed to mark controller live state\n"); |
---|
2379 | 2732 | result = -ENODEV; |
---|
2380 | 2733 | goto out; |
---|
2381 | 2734 | } |
---|
.. | .. |
---|
2386 | 2739 | out_unlock: |
---|
2387 | 2740 | mutex_unlock(&dev->shutdown_lock); |
---|
2388 | 2741 | out: |
---|
2389 | | - nvme_remove_dead_ctrl(dev, result); |
---|
| 2742 | + if (result) |
---|
| 2743 | + dev_warn(dev->ctrl.device, |
---|
| 2744 | + "Removing after probe failure status: %d\n", result); |
---|
| 2745 | + nvme_remove_dead_ctrl(dev); |
---|
2390 | 2746 | } |
---|
2391 | 2747 | |
---|
2392 | 2748 | static void nvme_remove_dead_ctrl_work(struct work_struct *work) |
---|
.. | .. |
---|
2421 | 2777 | { |
---|
2422 | 2778 | struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev); |
---|
2423 | 2779 | |
---|
2424 | | - return snprintf(buf, size, "%s", dev_name(&pdev->dev)); |
---|
| 2780 | + return snprintf(buf, size, "%s\n", dev_name(&pdev->dev)); |
---|
2425 | 2781 | } |
---|
2426 | 2782 | |
---|
2427 | 2783 | static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { |
---|
2428 | 2784 | .name = "pcie", |
---|
2429 | 2785 | .module = THIS_MODULE, |
---|
2430 | | - .flags = NVME_F_METADATA_SUPPORTED, |
---|
| 2786 | + .flags = NVME_F_METADATA_SUPPORTED | |
---|
| 2787 | + NVME_F_PCI_P2PDMA, |
---|
2431 | 2788 | .reg_read32 = nvme_pci_reg_read32, |
---|
2432 | 2789 | .reg_write32 = nvme_pci_reg_write32, |
---|
2433 | 2790 | .reg_read64 = nvme_pci_reg_read64, |
---|
.. | .. |
---|
2478 | 2835 | (dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") || |
---|
2479 | 2836 | dmi_match(DMI_BOARD_NAME, "PRIME Z370-A"))) |
---|
2480 | 2837 | return NVME_QUIRK_NO_APST; |
---|
| 2838 | + } else if ((pdev->vendor == 0x144d && (pdev->device == 0xa801 || |
---|
| 2839 | + pdev->device == 0xa808 || pdev->device == 0xa809)) || |
---|
| 2840 | + (pdev->vendor == 0x1e0f && pdev->device == 0x0001)) { |
---|
| 2841 | + /* |
---|
| 2842 | + * Forcing to use host managed nvme power settings for |
---|
| 2843 | + * lowest idle power with quick resume latency on |
---|
| 2844 | + * Samsung and Toshiba SSDs based on suspend behavior |
---|
| 2845 | + * on Coffee Lake board for LENOVO C640 |
---|
| 2846 | + */ |
---|
| 2847 | + if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) && |
---|
| 2848 | + dmi_match(DMI_BOARD_NAME, "LNVNB161216")) |
---|
| 2849 | + return NVME_QUIRK_SIMPLE_SUSPEND; |
---|
2481 | 2850 | } |
---|
2482 | 2851 | |
---|
2483 | 2852 | return 0; |
---|
.. | .. |
---|
2492 | 2861 | nvme_put_ctrl(&dev->ctrl); |
---|
2493 | 2862 | } |
---|
2494 | 2863 | |
---|
2495 | | -static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
---|
| 2864 | +static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, |
---|
| 2865 | + const struct pci_device_id *id) |
---|
2496 | 2866 | { |
---|
2497 | | - int node, result = -ENOMEM; |
---|
2498 | | - struct nvme_dev *dev; |
---|
2499 | 2867 | unsigned long quirks = id->driver_data; |
---|
2500 | | - size_t alloc_size; |
---|
2501 | | - |
---|
2502 | | - node = dev_to_node(&pdev->dev); |
---|
2503 | | - if (node == NUMA_NO_NODE) |
---|
2504 | | - set_dev_node(&pdev->dev, first_memory_node); |
---|
| 2868 | + int node = dev_to_node(&pdev->dev); |
---|
| 2869 | + struct nvme_dev *dev; |
---|
| 2870 | + int ret = -ENOMEM; |
---|
2505 | 2871 | |
---|
2506 | 2872 | dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); |
---|
2507 | 2873 | if (!dev) |
---|
2508 | | - return -ENOMEM; |
---|
2509 | | - |
---|
2510 | | - dev->queues = kcalloc_node(num_possible_cpus() + 1, |
---|
2511 | | - sizeof(struct nvme_queue), GFP_KERNEL, node); |
---|
2512 | | - if (!dev->queues) |
---|
2513 | | - goto free; |
---|
2514 | | - |
---|
2515 | | - dev->dev = get_device(&pdev->dev); |
---|
2516 | | - pci_set_drvdata(pdev, dev); |
---|
2517 | | - |
---|
2518 | | - result = nvme_dev_map(dev); |
---|
2519 | | - if (result) |
---|
2520 | | - goto put_pci; |
---|
2521 | | - |
---|
| 2874 | + return ERR_PTR(-ENOMEM); |
---|
2522 | 2875 | INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); |
---|
2523 | 2876 | INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); |
---|
2524 | 2877 | mutex_init(&dev->shutdown_lock); |
---|
2525 | | - init_completion(&dev->ioq_wait); |
---|
| 2878 | + |
---|
| 2879 | + dev->nr_write_queues = write_queues; |
---|
| 2880 | + dev->nr_poll_queues = poll_queues; |
---|
| 2881 | + dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1; |
---|
| 2882 | + dev->queues = kcalloc_node(dev->nr_allocated_queues, |
---|
| 2883 | + sizeof(struct nvme_queue), GFP_KERNEL, node); |
---|
| 2884 | + if (!dev->queues) |
---|
| 2885 | + goto out_free_dev; |
---|
| 2886 | + |
---|
| 2887 | + dev->dev = get_device(&pdev->dev); |
---|
| 2888 | + |
---|
| 2889 | + quirks |= check_vendor_combination_bug(pdev); |
---|
| 2890 | + if (!noacpi && acpi_storage_d3(&pdev->dev)) { |
---|
| 2891 | + /* |
---|
| 2892 | + * Some systems use a bios work around to ask for D3 on |
---|
| 2893 | + * platforms that support kernel managed suspend. |
---|
| 2894 | + */ |
---|
| 2895 | + dev_info(&pdev->dev, |
---|
| 2896 | + "platform quirk: setting simple suspend\n"); |
---|
| 2897 | + quirks |= NVME_QUIRK_SIMPLE_SUSPEND; |
---|
| 2898 | + } |
---|
| 2899 | + ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, |
---|
| 2900 | + quirks); |
---|
| 2901 | + if (ret) |
---|
| 2902 | + goto out_put_device; |
---|
| 2903 | + return dev; |
---|
| 2904 | + |
---|
| 2905 | +out_put_device: |
---|
| 2906 | + put_device(dev->dev); |
---|
| 2907 | + kfree(dev->queues); |
---|
| 2908 | +out_free_dev: |
---|
| 2909 | + kfree(dev); |
---|
| 2910 | + return ERR_PTR(ret); |
---|
| 2911 | +} |
---|
| 2912 | + |
---|
| 2913 | +static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
---|
| 2914 | +{ |
---|
| 2915 | + struct nvme_dev *dev; |
---|
| 2916 | + int result = -ENOMEM; |
---|
| 2917 | + |
---|
| 2918 | + dev = nvme_pci_alloc_dev(pdev, id); |
---|
| 2919 | + if (IS_ERR(dev)) |
---|
| 2920 | + return PTR_ERR(dev); |
---|
| 2921 | + |
---|
| 2922 | + result = nvme_dev_map(dev); |
---|
| 2923 | + if (result) |
---|
| 2924 | + goto out_uninit_ctrl; |
---|
2526 | 2925 | |
---|
2527 | 2926 | result = nvme_setup_prp_pools(dev); |
---|
2528 | 2927 | if (result) |
---|
2529 | | - goto unmap; |
---|
| 2928 | + goto out_dev_unmap; |
---|
2530 | 2929 | |
---|
2531 | | - quirks |= check_vendor_combination_bug(pdev); |
---|
2532 | | - |
---|
2533 | | - /* |
---|
2534 | | - * Double check that our mempool alloc size will cover the biggest |
---|
2535 | | - * command we support. |
---|
2536 | | - */ |
---|
2537 | | - alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ, |
---|
2538 | | - NVME_MAX_SEGS, true); |
---|
2539 | | - WARN_ON_ONCE(alloc_size > PAGE_SIZE); |
---|
2540 | | - |
---|
2541 | | - dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, |
---|
2542 | | - mempool_kfree, |
---|
2543 | | - (void *) alloc_size, |
---|
2544 | | - GFP_KERNEL, node); |
---|
2545 | | - if (!dev->iod_mempool) { |
---|
2546 | | - result = -ENOMEM; |
---|
2547 | | - goto release_pools; |
---|
2548 | | - } |
---|
2549 | | - |
---|
2550 | | - result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, |
---|
2551 | | - quirks); |
---|
| 2930 | + result = nvme_pci_alloc_iod_mempool(dev); |
---|
2552 | 2931 | if (result) |
---|
2553 | | - goto release_mempool; |
---|
| 2932 | + goto out_release_prp_pools; |
---|
2554 | 2933 | |
---|
2555 | 2934 | dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); |
---|
| 2935 | + pci_set_drvdata(pdev, dev); |
---|
2556 | 2936 | |
---|
2557 | 2937 | nvme_reset_ctrl(&dev->ctrl); |
---|
2558 | | - nvme_get_ctrl(&dev->ctrl); |
---|
2559 | 2938 | async_schedule(nvme_async_probe, dev); |
---|
2560 | | - |
---|
2561 | 2939 | return 0; |
---|
2562 | 2940 | |
---|
2563 | | - release_mempool: |
---|
2564 | | - mempool_destroy(dev->iod_mempool); |
---|
2565 | | - release_pools: |
---|
| 2941 | +out_release_prp_pools: |
---|
2566 | 2942 | nvme_release_prp_pools(dev); |
---|
2567 | | - unmap: |
---|
| 2943 | +out_dev_unmap: |
---|
2568 | 2944 | nvme_dev_unmap(dev); |
---|
2569 | | - put_pci: |
---|
2570 | | - put_device(dev->dev); |
---|
2571 | | - free: |
---|
2572 | | - kfree(dev->queues); |
---|
2573 | | - kfree(dev); |
---|
| 2945 | +out_uninit_ctrl: |
---|
| 2946 | + nvme_uninit_ctrl(&dev->ctrl); |
---|
2574 | 2947 | return result; |
---|
2575 | 2948 | } |
---|
2576 | 2949 | |
---|
2577 | 2950 | static void nvme_reset_prepare(struct pci_dev *pdev) |
---|
2578 | 2951 | { |
---|
2579 | 2952 | struct nvme_dev *dev = pci_get_drvdata(pdev); |
---|
2580 | | - nvme_dev_disable(dev, false); |
---|
| 2953 | + |
---|
| 2954 | + /* |
---|
| 2955 | + * We don't need to check the return value from waiting for the reset |
---|
| 2956 | + * state as pci_dev device lock is held, making it impossible to race |
---|
| 2957 | + * with ->remove(). |
---|
| 2958 | + */ |
---|
| 2959 | + nvme_disable_prepare_reset(dev, false); |
---|
| 2960 | + nvme_sync_queues(&dev->ctrl); |
---|
2581 | 2961 | } |
---|
2582 | 2962 | |
---|
2583 | 2963 | static void nvme_reset_done(struct pci_dev *pdev) |
---|
2584 | 2964 | { |
---|
2585 | 2965 | struct nvme_dev *dev = pci_get_drvdata(pdev); |
---|
2586 | | - nvme_reset_ctrl_sync(&dev->ctrl); |
---|
| 2966 | + |
---|
| 2967 | + if (!nvme_try_sched_reset(&dev->ctrl)) |
---|
| 2968 | + flush_work(&dev->ctrl.reset_work); |
---|
2587 | 2969 | } |
---|
2588 | 2970 | |
---|
2589 | 2971 | static void nvme_shutdown(struct pci_dev *pdev) |
---|
2590 | 2972 | { |
---|
2591 | 2973 | struct nvme_dev *dev = pci_get_drvdata(pdev); |
---|
2592 | | - nvme_dev_disable(dev, true); |
---|
| 2974 | + |
---|
| 2975 | + nvme_disable_prepare_reset(dev, true); |
---|
2593 | 2976 | } |
---|
2594 | 2977 | |
---|
2595 | 2978 | /* |
---|
.. | .. |
---|
2617 | 3000 | nvme_free_host_mem(dev); |
---|
2618 | 3001 | nvme_dev_remove_admin(dev); |
---|
2619 | 3002 | nvme_free_queues(dev, 0); |
---|
2620 | | - nvme_uninit_ctrl(&dev->ctrl); |
---|
2621 | 3003 | nvme_release_prp_pools(dev); |
---|
2622 | 3004 | nvme_dev_unmap(dev); |
---|
2623 | | - nvme_put_ctrl(&dev->ctrl); |
---|
| 3005 | + nvme_uninit_ctrl(&dev->ctrl); |
---|
2624 | 3006 | } |
---|
2625 | 3007 | |
---|
2626 | 3008 | #ifdef CONFIG_PM_SLEEP |
---|
2627 | | -static int nvme_suspend(struct device *dev) |
---|
| 3009 | +static int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps) |
---|
2628 | 3010 | { |
---|
2629 | | - struct pci_dev *pdev = to_pci_dev(dev); |
---|
2630 | | - struct nvme_dev *ndev = pci_get_drvdata(pdev); |
---|
| 3011 | + return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps); |
---|
| 3012 | +} |
---|
2631 | 3013 | |
---|
2632 | | - nvme_dev_disable(ndev, true); |
---|
2633 | | - return 0; |
---|
| 3014 | +static int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps) |
---|
| 3015 | +{ |
---|
| 3016 | + return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL); |
---|
2634 | 3017 | } |
---|
2635 | 3018 | |
---|
2636 | 3019 | static int nvme_resume(struct device *dev) |
---|
2637 | 3020 | { |
---|
| 3021 | + struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); |
---|
| 3022 | + struct nvme_ctrl *ctrl = &ndev->ctrl; |
---|
| 3023 | + |
---|
| 3024 | + if (ndev->last_ps == U32_MAX || |
---|
| 3025 | + nvme_set_power_state(ctrl, ndev->last_ps) != 0) |
---|
| 3026 | + return nvme_try_sched_reset(&ndev->ctrl); |
---|
| 3027 | + return 0; |
---|
| 3028 | +} |
---|
| 3029 | + |
---|
| 3030 | +static int nvme_suspend(struct device *dev) |
---|
| 3031 | +{ |
---|
| 3032 | + struct pci_dev *pdev = to_pci_dev(dev); |
---|
| 3033 | + struct nvme_dev *ndev = pci_get_drvdata(pdev); |
---|
| 3034 | + struct nvme_ctrl *ctrl = &ndev->ctrl; |
---|
| 3035 | + int ret = -EBUSY; |
---|
| 3036 | + |
---|
| 3037 | + ndev->last_ps = U32_MAX; |
---|
| 3038 | + |
---|
| 3039 | + /* |
---|
| 3040 | + * The platform does not remove power for a kernel managed suspend so |
---|
| 3041 | + * use host managed nvme power settings for lowest idle power if |
---|
| 3042 | + * possible. This should have quicker resume latency than a full device |
---|
| 3043 | + * shutdown. But if the firmware is involved after the suspend or the |
---|
| 3044 | + * device does not support any non-default power states, shut down the |
---|
| 3045 | + * device fully. |
---|
| 3046 | + * |
---|
| 3047 | + * If ASPM is not enabled for the device, shut down the device and allow |
---|
| 3048 | + * the PCI bus layer to put it into D3 in order to take the PCIe link |
---|
| 3049 | + * down, so as to allow the platform to achieve its minimum low-power |
---|
| 3050 | + * state (which may not be possible if the link is up). |
---|
| 3051 | + * |
---|
| 3052 | + * If a host memory buffer is enabled, shut down the device as the NVMe |
---|
| 3053 | + * specification allows the device to access the host memory buffer in |
---|
| 3054 | + * host DRAM from all power states, but hosts will fail access to DRAM |
---|
| 3055 | + * during S3. |
---|
| 3056 | + */ |
---|
| 3057 | + if (pm_suspend_via_firmware() || !ctrl->npss || |
---|
| 3058 | + !pcie_aspm_enabled(pdev) || |
---|
| 3059 | + ndev->nr_host_mem_descs || |
---|
| 3060 | + (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) |
---|
| 3061 | + return nvme_disable_prepare_reset(ndev, true); |
---|
| 3062 | + |
---|
| 3063 | + nvme_start_freeze(ctrl); |
---|
| 3064 | + nvme_wait_freeze(ctrl); |
---|
| 3065 | + nvme_sync_queues(ctrl); |
---|
| 3066 | + |
---|
| 3067 | + if (ctrl->state != NVME_CTRL_LIVE) |
---|
| 3068 | + goto unfreeze; |
---|
| 3069 | + |
---|
| 3070 | + ret = nvme_get_power_state(ctrl, &ndev->last_ps); |
---|
| 3071 | + if (ret < 0) |
---|
| 3072 | + goto unfreeze; |
---|
| 3073 | + |
---|
| 3074 | + /* |
---|
| 3075 | + * A saved state prevents pci pm from generically controlling the |
---|
| 3076 | + * device's power. If we're using protocol specific settings, we don't |
---|
| 3077 | + * want pci interfering. |
---|
| 3078 | + */ |
---|
| 3079 | + pci_save_state(pdev); |
---|
| 3080 | + |
---|
| 3081 | + ret = nvme_set_power_state(ctrl, ctrl->npss); |
---|
| 3082 | + if (ret < 0) |
---|
| 3083 | + goto unfreeze; |
---|
| 3084 | + |
---|
| 3085 | + if (ret) { |
---|
| 3086 | + /* discard the saved state */ |
---|
| 3087 | + pci_load_saved_state(pdev, NULL); |
---|
| 3088 | + |
---|
| 3089 | + /* |
---|
| 3090 | + * Clearing npss forces a controller reset on resume. The |
---|
| 3091 | + * correct value will be rediscovered then. |
---|
| 3092 | + */ |
---|
| 3093 | + ret = nvme_disable_prepare_reset(ndev, true); |
---|
| 3094 | + ctrl->npss = 0; |
---|
| 3095 | + } |
---|
| 3096 | +unfreeze: |
---|
| 3097 | + nvme_unfreeze(ctrl); |
---|
| 3098 | + return ret; |
---|
| 3099 | +} |
---|
| 3100 | + |
---|
| 3101 | +static int nvme_simple_suspend(struct device *dev) |
---|
| 3102 | +{ |
---|
| 3103 | + struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); |
---|
| 3104 | + |
---|
| 3105 | + return nvme_disable_prepare_reset(ndev, true); |
---|
| 3106 | +} |
---|
| 3107 | + |
---|
| 3108 | +static int nvme_simple_resume(struct device *dev) |
---|
| 3109 | +{ |
---|
2638 | 3110 | struct pci_dev *pdev = to_pci_dev(dev); |
---|
2639 | 3111 | struct nvme_dev *ndev = pci_get_drvdata(pdev); |
---|
2640 | 3112 | |
---|
2641 | | - nvme_reset_ctrl(&ndev->ctrl); |
---|
2642 | | - return 0; |
---|
| 3113 | + return nvme_try_sched_reset(&ndev->ctrl); |
---|
2643 | 3114 | } |
---|
2644 | | -#endif |
---|
2645 | 3115 | |
---|
2646 | | -static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume); |
---|
| 3116 | +static const struct dev_pm_ops nvme_dev_pm_ops = { |
---|
| 3117 | + .suspend = nvme_suspend, |
---|
| 3118 | + .resume = nvme_resume, |
---|
| 3119 | + .freeze = nvme_simple_suspend, |
---|
| 3120 | + .thaw = nvme_simple_resume, |
---|
| 3121 | + .poweroff = nvme_simple_suspend, |
---|
| 3122 | + .restore = nvme_simple_resume, |
---|
| 3123 | +}; |
---|
| 3124 | +#endif /* CONFIG_PM_SLEEP */ |
---|
2647 | 3125 | |
---|
2648 | 3126 | static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev, |
---|
2649 | 3127 | pci_channel_state_t state) |
---|
.. | .. |
---|
2686 | 3164 | struct nvme_dev *dev = pci_get_drvdata(pdev); |
---|
2687 | 3165 | |
---|
2688 | 3166 | flush_work(&dev->ctrl.reset_work); |
---|
2689 | | - pci_cleanup_aer_uncorrect_error_status(pdev); |
---|
2690 | 3167 | } |
---|
2691 | 3168 | |
---|
2692 | 3169 | static const struct pci_error_handlers nvme_err_handler = { |
---|
.. | .. |
---|
2698 | 3175 | }; |
---|
2699 | 3176 | |
---|
2700 | 3177 | static const struct pci_device_id nvme_id_table[] = { |
---|
2701 | | - { PCI_VDEVICE(INTEL, 0x0953), |
---|
| 3178 | + { PCI_VDEVICE(INTEL, 0x0953), /* Intel 750/P3500/P3600/P3700 */ |
---|
2702 | 3179 | .driver_data = NVME_QUIRK_STRIPE_SIZE | |
---|
2703 | 3180 | NVME_QUIRK_DEALLOCATE_ZEROES, }, |
---|
2704 | | - { PCI_VDEVICE(INTEL, 0x0a53), |
---|
| 3181 | + { PCI_VDEVICE(INTEL, 0x0a53), /* Intel P3520 */ |
---|
2705 | 3182 | .driver_data = NVME_QUIRK_STRIPE_SIZE | |
---|
2706 | 3183 | NVME_QUIRK_DEALLOCATE_ZEROES, }, |
---|
2707 | | - { PCI_VDEVICE(INTEL, 0x0a54), |
---|
| 3184 | + { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */ |
---|
2708 | 3185 | .driver_data = NVME_QUIRK_STRIPE_SIZE | |
---|
2709 | | - NVME_QUIRK_DEALLOCATE_ZEROES, }, |
---|
2710 | | - { PCI_VDEVICE(INTEL, 0x0a55), |
---|
| 3186 | + NVME_QUIRK_DEALLOCATE_ZEROES | |
---|
| 3187 | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, |
---|
| 3188 | + { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */ |
---|
2711 | 3189 | .driver_data = NVME_QUIRK_STRIPE_SIZE | |
---|
2712 | 3190 | NVME_QUIRK_DEALLOCATE_ZEROES, }, |
---|
2713 | 3191 | { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ |
---|
2714 | 3192 | .driver_data = NVME_QUIRK_NO_DEEPEST_PS | |
---|
2715 | | - NVME_QUIRK_MEDIUM_PRIO_SQ }, |
---|
| 3193 | + NVME_QUIRK_MEDIUM_PRIO_SQ | |
---|
| 3194 | + NVME_QUIRK_NO_TEMP_THRESH_CHANGE | |
---|
| 3195 | + NVME_QUIRK_DISABLE_WRITE_ZEROES, }, |
---|
| 3196 | + { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ |
---|
| 3197 | + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, |
---|
2716 | 3198 | { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ |
---|
2717 | | - .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, |
---|
| 3199 | + .driver_data = NVME_QUIRK_IDENTIFY_CNS | |
---|
| 3200 | + NVME_QUIRK_DISABLE_WRITE_ZEROES | |
---|
| 3201 | + NVME_QUIRK_BOGUS_NID, }, |
---|
| 3202 | + { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ |
---|
| 3203 | + .driver_data = NVME_QUIRK_BOGUS_NID, }, |
---|
| 3204 | + { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ |
---|
| 3205 | + .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, |
---|
2718 | 3206 | { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ |
---|
2719 | | - .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, |
---|
| 3207 | + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | |
---|
| 3208 | + NVME_QUIRK_NO_NS_DESC_LIST, }, |
---|
2720 | 3209 | { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ |
---|
2721 | 3210 | .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, |
---|
2722 | 3211 | { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ |
---|
.. | .. |
---|
2726 | 3215 | { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ |
---|
2727 | 3216 | .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, |
---|
2728 | 3217 | { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ |
---|
2729 | | - .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, |
---|
| 3218 | + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | |
---|
| 3219 | + NVME_QUIRK_DISABLE_WRITE_ZEROES| |
---|
| 3220 | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, |
---|
| 3221 | + { PCI_DEVICE(0x1987, 0x5013), /* Phison E13 */ |
---|
| 3222 | + .driver_data = NVME_QUIRK_LIMIT_IOQD32}, |
---|
| 3223 | + { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ |
---|
| 3224 | + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | |
---|
| 3225 | + NVME_QUIRK_BOGUS_NID, }, |
---|
| 3226 | + { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ |
---|
| 3227 | + .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | |
---|
| 3228 | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, |
---|
2730 | 3229 | { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ |
---|
2731 | 3230 | .driver_data = NVME_QUIRK_LIGHTNVM, }, |
---|
2732 | 3231 | { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ |
---|
2733 | 3232 | .driver_data = NVME_QUIRK_LIGHTNVM, }, |
---|
2734 | 3233 | { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ |
---|
2735 | 3234 | .driver_data = NVME_QUIRK_LIGHTNVM, }, |
---|
2736 | | - { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, |
---|
| 3235 | + { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ |
---|
| 3236 | + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | |
---|
| 3237 | + NVME_QUIRK_BOGUS_NID, }, |
---|
| 3238 | + { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ |
---|
| 3239 | + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | |
---|
| 3240 | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, |
---|
| 3241 | + { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */ |
---|
| 3242 | + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN }, |
---|
| 3243 | + { PCI_DEVICE(0x1344, 0x6001), /* Micron Nitro NVMe */ |
---|
| 3244 | + .driver_data = NVME_QUIRK_BOGUS_NID, }, |
---|
| 3245 | + { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ |
---|
| 3246 | + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, |
---|
| 3247 | + { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ |
---|
| 3248 | + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, |
---|
| 3249 | + { PCI_DEVICE(0x2646, 0x2262), /* KINGSTON SKC2000 NVMe SSD */ |
---|
| 3250 | + .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, |
---|
2737 | 3251 | { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */ |
---|
2738 | 3252 | .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, |
---|
2739 | | - { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, |
---|
| 3253 | + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), |
---|
| 3254 | + .driver_data = NVME_QUIRK_SINGLE_VECTOR }, |
---|
2740 | 3255 | { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, |
---|
| 3256 | + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005), |
---|
| 3257 | + .driver_data = NVME_QUIRK_SINGLE_VECTOR | |
---|
| 3258 | + NVME_QUIRK_128_BYTES_SQES | |
---|
| 3259 | + NVME_QUIRK_SHARED_TAGS | |
---|
| 3260 | + NVME_QUIRK_SKIP_CID_GEN }, |
---|
| 3261 | + { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, |
---|
2741 | 3262 | { 0, } |
---|
2742 | 3263 | }; |
---|
2743 | 3264 | MODULE_DEVICE_TABLE(pci, nvme_id_table); |
---|
.. | .. |
---|
2748 | 3269 | .probe = nvme_probe, |
---|
2749 | 3270 | .remove = nvme_remove, |
---|
2750 | 3271 | .shutdown = nvme_shutdown, |
---|
| 3272 | +#ifdef CONFIG_PM_SLEEP |
---|
2751 | 3273 | .driver = { |
---|
2752 | 3274 | .pm = &nvme_dev_pm_ops, |
---|
2753 | 3275 | }, |
---|
| 3276 | +#endif |
---|
2754 | 3277 | .sriov_configure = pci_sriov_configure_simple, |
---|
2755 | 3278 | .err_handler = &nvme_err_handler, |
---|
2756 | 3279 | }; |
---|
2757 | 3280 | |
---|
2758 | 3281 | static int __init nvme_init(void) |
---|
2759 | 3282 | { |
---|
| 3283 | + BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); |
---|
| 3284 | + BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); |
---|
| 3285 | + BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); |
---|
| 3286 | + BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); |
---|
| 3287 | + |
---|
2760 | 3288 | return pci_register_driver(&nvme_driver); |
---|
2761 | 3289 | } |
---|
2762 | 3290 | |
---|
.. | .. |
---|
2764 | 3292 | { |
---|
2765 | 3293 | pci_unregister_driver(&nvme_driver); |
---|
2766 | 3294 | flush_workqueue(nvme_wq); |
---|
2767 | | - _nvme_check_size(); |
---|
2768 | 3295 | } |
---|
2769 | 3296 | |
---|
2770 | 3297 | MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); |
---|