.. | .. |
---|
200 | 200 | #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ |
---|
201 | 201 | #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ |
---|
202 | 202 | #define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ |
---|
| 203 | +#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ |
---|
| 204 | +#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ |
---|
| 205 | +#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ |
---|
203 | 206 | __u32 num_regions; /* Max region index + 1 */ |
---|
204 | 207 | __u32 num_irqs; /* Max IRQ index + 1 */ |
---|
| 208 | + __u32 cap_offset; /* Offset within info struct of first cap */ |
---|
205 | 209 | }; |
---|
206 | 210 | #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) |
---|
207 | 211 | |
---|
.. | .. |
---|
215 | 219 | #define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" |
---|
216 | 220 | #define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" |
---|
217 | 221 | #define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" |
---|
| 222 | +#define VFIO_DEVICE_API_AP_STRING "vfio-ap" |
---|
| 223 | + |
---|
| 224 | +/* |
---|
| 225 | + * The following capabilities are unique to s390 zPCI devices. Their contents |
---|
| 226 | + * are further-defined in vfio_zdev.h |
---|
| 227 | + */ |
---|
| 228 | +#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE 1 |
---|
| 229 | +#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP 2 |
---|
| 230 | +#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3 |
---|
| 231 | +#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4 |
---|
218 | 232 | |
---|
219 | 233 | /** |
---|
220 | 234 | * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, |
---|
.. | .. |
---|
293 | 307 | __u32 subtype; /* type specific */ |
---|
294 | 308 | }; |
---|
295 | 309 | |
---|
| 310 | +/* |
---|
| 311 | + * List of region types, global per bus driver. |
---|
| 312 | + * If you introduce a new type, please add it here. |
---|
| 313 | + */ |
---|
| 314 | + |
---|
| 315 | +/* PCI region type containing a PCI vendor part */ |
---|
296 | 316 | #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31) |
---|
297 | 317 | #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) |
---|
| 318 | +#define VFIO_REGION_TYPE_GFX (1) |
---|
| 319 | +#define VFIO_REGION_TYPE_CCW (2) |
---|
| 320 | +#define VFIO_REGION_TYPE_MIGRATION (3) |
---|
298 | 321 | |
---|
299 | | -/* 8086 Vendor sub-types */ |
---|
| 322 | +/* sub-types for VFIO_REGION_TYPE_PCI_* */ |
---|
| 323 | + |
---|
| 324 | +/* 8086 vendor PCI sub-types */ |
---|
300 | 325 | #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1) |
---|
301 | 326 | #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) |
---|
302 | 327 | #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) |
---|
| 328 | + |
---|
| 329 | +/* 10de vendor PCI sub-types */ |
---|
| 330 | +/* |
---|
| 331 | + * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. |
---|
| 332 | + */ |
---|
| 333 | +#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) |
---|
| 334 | + |
---|
| 335 | +/* 1014 vendor PCI sub-types */ |
---|
| 336 | +/* |
---|
| 337 | + * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU |
---|
| 338 | + * to do TLB invalidation on a GPU. |
---|
| 339 | + */ |
---|
| 340 | +#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) |
---|
| 341 | + |
---|
| 342 | +/* sub-types for VFIO_REGION_TYPE_GFX */ |
---|
| 343 | +#define VFIO_REGION_SUBTYPE_GFX_EDID (1) |
---|
| 344 | + |
---|
| 345 | +/** |
---|
| 346 | + * struct vfio_region_gfx_edid - EDID region layout. |
---|
| 347 | + * |
---|
| 348 | + * Set display link state and EDID blob. |
---|
| 349 | + * |
---|
| 350 | + * The EDID blob has monitor information such as brand, name, serial |
---|
| 351 | + * number, physical size, supported video modes and more. |
---|
| 352 | + * |
---|
| 353 | + * This special region allows userspace (typically qemu) set a virtual |
---|
| 354 | + * EDID for the virtual monitor, which allows a flexible display |
---|
| 355 | + * configuration. |
---|
| 356 | + * |
---|
| 357 | + * For the edid blob spec look here: |
---|
| 358 | + * https://en.wikipedia.org/wiki/Extended_Display_Identification_Data |
---|
| 359 | + * |
---|
| 360 | + * On linux systems you can find the EDID blob in sysfs: |
---|
| 361 | + * /sys/class/drm/${card}/${connector}/edid |
---|
| 362 | + * |
---|
| 363 | + * You can use the edid-decode ulility (comes with xorg-x11-utils) to |
---|
| 364 | + * decode the EDID blob. |
---|
| 365 | + * |
---|
| 366 | + * @edid_offset: location of the edid blob, relative to the |
---|
| 367 | + * start of the region (readonly). |
---|
| 368 | + * @edid_max_size: max size of the edid blob (readonly). |
---|
| 369 | + * @edid_size: actual edid size (read/write). |
---|
| 370 | + * @link_state: display link state (read/write). |
---|
| 371 | + * VFIO_DEVICE_GFX_LINK_STATE_UP: Monitor is turned on. |
---|
| 372 | + * VFIO_DEVICE_GFX_LINK_STATE_DOWN: Monitor is turned off. |
---|
| 373 | + * @max_xres: max display width (0 == no limitation, readonly). |
---|
| 374 | + * @max_yres: max display height (0 == no limitation, readonly). |
---|
| 375 | + * |
---|
| 376 | + * EDID update protocol: |
---|
| 377 | + * (1) set link-state to down. |
---|
| 378 | + * (2) update edid blob and size. |
---|
| 379 | + * (3) set link-state to up. |
---|
| 380 | + */ |
---|
| 381 | +struct vfio_region_gfx_edid { |
---|
| 382 | + __u32 edid_offset; |
---|
| 383 | + __u32 edid_max_size; |
---|
| 384 | + __u32 edid_size; |
---|
| 385 | + __u32 max_xres; |
---|
| 386 | + __u32 max_yres; |
---|
| 387 | + __u32 link_state; |
---|
| 388 | +#define VFIO_DEVICE_GFX_LINK_STATE_UP 1 |
---|
| 389 | +#define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2 |
---|
| 390 | +}; |
---|
| 391 | + |
---|
| 392 | +/* sub-types for VFIO_REGION_TYPE_CCW */ |
---|
| 393 | +#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) |
---|
| 394 | +#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) |
---|
| 395 | +#define VFIO_REGION_SUBTYPE_CCW_CRW (3) |
---|
| 396 | + |
---|
| 397 | +/* sub-types for VFIO_REGION_TYPE_MIGRATION */ |
---|
| 398 | +#define VFIO_REGION_SUBTYPE_MIGRATION (1) |
---|
| 399 | + |
---|
| 400 | +/* |
---|
| 401 | + * The structure vfio_device_migration_info is placed at the 0th offset of |
---|
| 402 | + * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related |
---|
| 403 | + * migration information. Field accesses from this structure are only supported |
---|
| 404 | + * at their native width and alignment. Otherwise, the result is undefined and |
---|
| 405 | + * vendor drivers should return an error. |
---|
| 406 | + * |
---|
| 407 | + * device_state: (read/write) |
---|
| 408 | + * - The user application writes to this field to inform the vendor driver |
---|
| 409 | + * about the device state to be transitioned to. |
---|
| 410 | + * - The vendor driver should take the necessary actions to change the |
---|
| 411 | + * device state. After successful transition to a given state, the |
---|
| 412 | + * vendor driver should return success on write(device_state, state) |
---|
| 413 | + * system call. If the device state transition fails, the vendor driver |
---|
| 414 | + * should return an appropriate -errno for the fault condition. |
---|
| 415 | + * - On the user application side, if the device state transition fails, |
---|
| 416 | + * that is, if write(device_state, state) returns an error, read |
---|
| 417 | + * device_state again to determine the current state of the device from |
---|
| 418 | + * the vendor driver. |
---|
| 419 | + * - The vendor driver should return previous state of the device unless |
---|
| 420 | + * the vendor driver has encountered an internal error, in which case |
---|
| 421 | + * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. |
---|
| 422 | + * - The user application must use the device reset ioctl to recover the |
---|
| 423 | + * device from VFIO_DEVICE_STATE_ERROR state. If the device is |
---|
| 424 | + * indicated to be in a valid device state by reading device_state, the |
---|
| 425 | + * user application may attempt to transition the device to any valid |
---|
| 426 | + * state reachable from the current state or terminate itself. |
---|
| 427 | + * |
---|
| 428 | + * device_state consists of 3 bits: |
---|
| 429 | + * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, |
---|
| 430 | + * it indicates the _STOP state. When the device state is changed to |
---|
| 431 | + * _STOP, driver should stop the device before write() returns. |
---|
| 432 | + * - If bit 1 is set, it indicates the _SAVING state, which means that the |
---|
| 433 | + * driver should start gathering device state information that will be |
---|
| 434 | + * provided to the VFIO user application to save the device's state. |
---|
| 435 | + * - If bit 2 is set, it indicates the _RESUMING state, which means that |
---|
| 436 | + * the driver should prepare to resume the device. Data provided through |
---|
| 437 | + * the migration region should be used to resume the device. |
---|
| 438 | + * Bits 3 - 31 are reserved for future use. To preserve them, the user |
---|
| 439 | + * application should perform a read-modify-write operation on this |
---|
| 440 | + * field when modifying the specified bits. |
---|
| 441 | + * |
---|
| 442 | + * +------- _RESUMING |
---|
| 443 | + * |+------ _SAVING |
---|
| 444 | + * ||+----- _RUNNING |
---|
| 445 | + * ||| |
---|
| 446 | + * 000b => Device Stopped, not saving or resuming |
---|
| 447 | + * 001b => Device running, which is the default state |
---|
| 448 | + * 010b => Stop the device & save the device state, stop-and-copy state |
---|
| 449 | + * 011b => Device running and save the device state, pre-copy state |
---|
| 450 | + * 100b => Device stopped and the device state is resuming |
---|
| 451 | + * 101b => Invalid state |
---|
| 452 | + * 110b => Error state |
---|
| 453 | + * 111b => Invalid state |
---|
| 454 | + * |
---|
| 455 | + * State transitions: |
---|
| 456 | + * |
---|
| 457 | + * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP |
---|
| 458 | + * (100b) (001b) (011b) (010b) (000b) |
---|
| 459 | + * 0. Running or default state |
---|
| 460 | + * | |
---|
| 461 | + * |
---|
| 462 | + * 1. Normal Shutdown (optional) |
---|
| 463 | + * |------------------------------------->| |
---|
| 464 | + * |
---|
| 465 | + * 2. Save the state or suspend |
---|
| 466 | + * |------------------------->|---------->| |
---|
| 467 | + * |
---|
| 468 | + * 3. Save the state during live migration |
---|
| 469 | + * |----------->|------------>|---------->| |
---|
| 470 | + * |
---|
| 471 | + * 4. Resuming |
---|
| 472 | + * |<---------| |
---|
| 473 | + * |
---|
| 474 | + * 5. Resumed |
---|
| 475 | + * |--------->| |
---|
| 476 | + * |
---|
| 477 | + * 0. Default state of VFIO device is _RUNNING when the user application starts. |
---|
| 478 | + * 1. During normal shutdown of the user application, the user application may |
---|
| 479 | + * optionally change the VFIO device state from _RUNNING to _STOP. This |
---|
| 480 | + * transition is optional. The vendor driver must support this transition but |
---|
| 481 | + * must not require it. |
---|
| 482 | + * 2. When the user application saves state or suspends the application, the |
---|
| 483 | + * device state transitions from _RUNNING to stop-and-copy and then to _STOP. |
---|
| 484 | + * On state transition from _RUNNING to stop-and-copy, driver must stop the |
---|
| 485 | + * device, save the device state and send it to the application through the |
---|
| 486 | + * migration region. The sequence to be followed for such transition is given |
---|
| 487 | + * below. |
---|
| 488 | + * 3. In live migration of user application, the state transitions from _RUNNING |
---|
| 489 | + * to pre-copy, to stop-and-copy, and to _STOP. |
---|
| 490 | + * On state transition from _RUNNING to pre-copy, the driver should start |
---|
| 491 | + * gathering the device state while the application is still running and send |
---|
| 492 | + * the device state data to application through the migration region. |
---|
| 493 | + * On state transition from pre-copy to stop-and-copy, the driver must stop |
---|
| 494 | + * the device, save the device state and send it to the user application |
---|
| 495 | + * through the migration region. |
---|
| 496 | + * Vendor drivers must support the pre-copy state even for implementations |
---|
| 497 | + * where no data is provided to the user before the stop-and-copy state. The |
---|
| 498 | + * user must not be required to consume all migration data before the device |
---|
| 499 | + * transitions to a new state, including the stop-and-copy state. |
---|
| 500 | + * The sequence to be followed for above two transitions is given below. |
---|
| 501 | + * 4. To start the resuming phase, the device state should be transitioned from |
---|
| 502 | + * the _RUNNING to the _RESUMING state. |
---|
| 503 | + * In the _RESUMING state, the driver should use the device state data |
---|
| 504 | + * received through the migration region to resume the device. |
---|
| 505 | + * 5. After providing saved device data to the driver, the application should |
---|
| 506 | + * change the state from _RESUMING to _RUNNING. |
---|
| 507 | + * |
---|
| 508 | + * reserved: |
---|
| 509 | + * Reads on this field return zero and writes are ignored. |
---|
| 510 | + * |
---|
| 511 | + * pending_bytes: (read only) |
---|
| 512 | + * The number of pending bytes still to be migrated from the vendor driver. |
---|
| 513 | + * |
---|
| 514 | + * data_offset: (read only) |
---|
| 515 | + * The user application should read data_offset field from the migration |
---|
| 516 | + * region. The user application should read the device data from this |
---|
| 517 | + * offset within the migration region during the _SAVING state or write |
---|
| 518 | + * the device data during the _RESUMING state. See below for details of |
---|
| 519 | + * sequence to be followed. |
---|
| 520 | + * |
---|
| 521 | + * data_size: (read/write) |
---|
| 522 | + * The user application should read data_size to get the size in bytes of |
---|
| 523 | + * the data copied in the migration region during the _SAVING state and |
---|
| 524 | + * write the size in bytes of the data copied in the migration region |
---|
| 525 | + * during the _RESUMING state. |
---|
| 526 | + * |
---|
| 527 | + * The format of the migration region is as follows: |
---|
| 528 | + * ------------------------------------------------------------------ |
---|
| 529 | + * |vfio_device_migration_info| data section | |
---|
| 530 | + * | | /////////////////////////////// | |
---|
| 531 | + * ------------------------------------------------------------------ |
---|
| 532 | + * ^ ^ |
---|
| 533 | + * offset 0-trapped part data_offset |
---|
| 534 | + * |
---|
| 535 | + * The structure vfio_device_migration_info is always followed by the data |
---|
| 536 | + * section in the region, so data_offset will always be nonzero. The offset |
---|
| 537 | + * from where the data is copied is decided by the kernel driver. The data |
---|
| 538 | + * section can be trapped, mmapped, or partitioned, depending on how the kernel |
---|
| 539 | + * driver defines the data section. The data section partition can be defined |
---|
| 540 | + * as mapped by the sparse mmap capability. If mmapped, data_offset must be |
---|
| 541 | + * page aligned, whereas initial section which contains the |
---|
| 542 | + * vfio_device_migration_info structure, might not end at the offset, which is |
---|
| 543 | + * page aligned. The user is not required to access through mmap regardless |
---|
| 544 | + * of the capabilities of the region mmap. |
---|
| 545 | + * The vendor driver should determine whether and how to partition the data |
---|
| 546 | + * section. The vendor driver should return data_offset accordingly. |
---|
| 547 | + * |
---|
| 548 | + * The sequence to be followed while in pre-copy state and stop-and-copy state |
---|
| 549 | + * is as follows: |
---|
| 550 | + * a. Read pending_bytes, indicating the start of a new iteration to get device |
---|
| 551 | + * data. Repeated read on pending_bytes at this stage should have no side |
---|
| 552 | + * effects. |
---|
| 553 | + * If pending_bytes == 0, the user application should not iterate to get data |
---|
| 554 | + * for that device. |
---|
| 555 | + * If pending_bytes > 0, perform the following steps. |
---|
| 556 | + * b. Read data_offset, indicating that the vendor driver should make data |
---|
| 557 | + * available through the data section. The vendor driver should return this |
---|
| 558 | + * read operation only after data is available from (region + data_offset) |
---|
| 559 | + * to (region + data_offset + data_size). |
---|
| 560 | + * c. Read data_size, which is the amount of data in bytes available through |
---|
| 561 | + * the migration region. |
---|
| 562 | + * Read on data_offset and data_size should return the offset and size of |
---|
| 563 | + * the current buffer if the user application reads data_offset and |
---|
| 564 | + * data_size more than once here. |
---|
| 565 | + * d. Read data_size bytes of data from (region + data_offset) from the |
---|
| 566 | + * migration region. |
---|
| 567 | + * e. Process the data. |
---|
| 568 | + * f. Read pending_bytes, which indicates that the data from the previous |
---|
| 569 | + * iteration has been read. If pending_bytes > 0, go to step b. |
---|
| 570 | + * |
---|
| 571 | + * The user application can transition from the _SAVING|_RUNNING |
---|
| 572 | + * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the |
---|
| 573 | + * number of pending bytes. The user application should iterate in _SAVING |
---|
| 574 | + * (stop-and-copy) until pending_bytes is 0. |
---|
| 575 | + * |
---|
| 576 | + * The sequence to be followed while _RESUMING device state is as follows: |
---|
| 577 | + * While data for this device is available, repeat the following steps: |
---|
| 578 | + * a. Read data_offset from where the user application should write data. |
---|
| 579 | + * b. Write migration data starting at the migration region + data_offset for |
---|
| 580 | + * the length determined by data_size from the migration source. |
---|
| 581 | + * c. Write data_size, which indicates to the vendor driver that data is |
---|
| 582 | + * written in the migration region. Vendor driver must return this write |
---|
| 583 | + * operations on consuming data. Vendor driver should apply the |
---|
| 584 | + * user-provided migration region data to the device resume state. |
---|
| 585 | + * |
---|
| 586 | + * If an error occurs during the above sequences, the vendor driver can return |
---|
| 587 | + * an error code for next read() or write() operation, which will terminate the |
---|
| 588 | + * loop. The user application should then take the next necessary action, for |
---|
| 589 | + * example, failing migration or terminating the user application. |
---|
| 590 | + * |
---|
| 591 | + * For the user application, data is opaque. The user application should write |
---|
| 592 | + * data in the same order as the data is received and the data should be of |
---|
| 593 | + * same transaction size at the source. |
---|
| 594 | + */ |
---|
| 595 | + |
---|
| 596 | +struct vfio_device_migration_info { |
---|
| 597 | + __u32 device_state; /* VFIO device state */ |
---|
| 598 | +#define VFIO_DEVICE_STATE_STOP (0) |
---|
| 599 | +#define VFIO_DEVICE_STATE_RUNNING (1 << 0) |
---|
| 600 | +#define VFIO_DEVICE_STATE_SAVING (1 << 1) |
---|
| 601 | +#define VFIO_DEVICE_STATE_RESUMING (1 << 2) |
---|
| 602 | +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ |
---|
| 603 | + VFIO_DEVICE_STATE_SAVING | \ |
---|
| 604 | + VFIO_DEVICE_STATE_RESUMING) |
---|
| 605 | + |
---|
| 606 | +#define VFIO_DEVICE_STATE_VALID(state) \ |
---|
| 607 | + (state & VFIO_DEVICE_STATE_RESUMING ? \ |
---|
| 608 | + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) |
---|
| 609 | + |
---|
| 610 | +#define VFIO_DEVICE_STATE_IS_ERROR(state) \ |
---|
| 611 | + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ |
---|
| 612 | + VFIO_DEVICE_STATE_RESUMING)) |
---|
| 613 | + |
---|
| 614 | +#define VFIO_DEVICE_STATE_SET_ERROR(state) \ |
---|
| 615 | + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ |
---|
| 616 | + VFIO_DEVICE_STATE_RESUMING) |
---|
| 617 | + |
---|
| 618 | + __u32 reserved; |
---|
| 619 | + __u64 pending_bytes; |
---|
| 620 | + __u64 data_offset; |
---|
| 621 | + __u64 data_size; |
---|
| 622 | +}; |
---|
303 | 623 | |
---|
304 | 624 | /* |
---|
305 | 625 | * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped |
---|
.. | .. |
---|
310 | 630 | * VFIO_DEVICE_SET_IRQS interface must still be used for MSIX configuration. |
---|
311 | 631 | */ |
---|
312 | 632 | #define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3 |
---|
| 633 | + |
---|
| 634 | +/* |
---|
| 635 | + * Capability with compressed real address (aka SSA - small system address) |
---|
| 636 | + * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing |
---|
| 637 | + * and by the userspace to associate a NVLink bridge with a GPU. |
---|
| 638 | + */ |
---|
| 639 | +#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4 |
---|
| 640 | + |
---|
| 641 | +struct vfio_region_info_cap_nvlink2_ssatgt { |
---|
| 642 | + struct vfio_info_cap_header header; |
---|
| 643 | + __u64 tgt; |
---|
| 644 | +}; |
---|
| 645 | + |
---|
| 646 | +/* |
---|
| 647 | + * Capability with an NVLink link speed. The value is read by |
---|
| 648 | + * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed" |
---|
| 649 | + * property in the device tree. The value is fixed in the hardware |
---|
| 650 | + * and failing to provide the correct value results in the link |
---|
| 651 | + * not working with no indication from the driver why. |
---|
| 652 | + */ |
---|
| 653 | +#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5 |
---|
| 654 | + |
---|
| 655 | +struct vfio_region_info_cap_nvlink2_lnkspd { |
---|
| 656 | + struct vfio_info_cap_header header; |
---|
| 657 | + __u32 link_speed; |
---|
| 658 | + __u32 __pad; |
---|
| 659 | +}; |
---|
313 | 660 | |
---|
314 | 661 | /** |
---|
315 | 662 | * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, |
---|
.. | .. |
---|
472 | 819 | |
---|
473 | 820 | enum { |
---|
474 | 821 | VFIO_CCW_IO_IRQ_INDEX, |
---|
| 822 | + VFIO_CCW_CRW_IRQ_INDEX, |
---|
475 | 823 | VFIO_CCW_NUM_IRQS |
---|
476 | 824 | }; |
---|
477 | 825 | |
---|
.. | .. |
---|
602 | 950 | |
---|
603 | 951 | #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) |
---|
604 | 952 | |
---|
| 953 | +/** |
---|
| 954 | + * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17, |
---|
| 955 | + * struct vfio_device_feature) |
---|
| 956 | + * |
---|
| 957 | + * Get, set, or probe feature data of the device. The feature is selected |
---|
| 958 | + * using the FEATURE_MASK portion of the flags field. Support for a feature |
---|
| 959 | + * can be probed by setting both the FEATURE_MASK and PROBE bits. A probe |
---|
| 960 | + * may optionally include the GET and/or SET bits to determine read vs write |
---|
| 961 | + * access of the feature respectively. Probing a feature will return success |
---|
| 962 | + * if the feature is supported and all of the optionally indicated GET/SET |
---|
| 963 | + * methods are supported. The format of the data portion of the structure is |
---|
| 964 | + * specific to the given feature. The data portion is not required for |
---|
| 965 | + * probing. GET and SET are mutually exclusive, except for use with PROBE. |
---|
| 966 | + * |
---|
| 967 | + * Return 0 on success, -errno on failure. |
---|
| 968 | + */ |
---|
| 969 | +struct vfio_device_feature { |
---|
| 970 | + __u32 argsz; |
---|
| 971 | + __u32 flags; |
---|
| 972 | +#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */ |
---|
| 973 | +#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */ |
---|
| 974 | +#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */ |
---|
| 975 | +#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */ |
---|
| 976 | + __u8 data[]; |
---|
| 977 | +}; |
---|
| 978 | + |
---|
| 979 | +#define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) |
---|
| 980 | + |
---|
| 981 | +/* |
---|
| 982 | + * Provide support for setting a PCI VF Token, which is used as a shared |
---|
| 983 | + * secret between PF and VF drivers. This feature may only be set on a |
---|
| 984 | + * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing |
---|
| 985 | + * open VFs. Data provided when setting this feature is a 16-byte array |
---|
| 986 | + * (__u8 b[16]), representing a UUID. |
---|
| 987 | + */ |
---|
| 988 | +#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) |
---|
| 989 | + |
---|
605 | 990 | /* -------- API for Type1 VFIO IOMMU -------- */ |
---|
606 | 991 | |
---|
607 | 992 | /** |
---|
.. | .. |
---|
616 | 1001 | __u32 argsz; |
---|
617 | 1002 | __u32 flags; |
---|
618 | 1003 | #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ |
---|
619 | | - __u64 iova_pgsizes; /* Bitmap of supported page sizes */ |
---|
| 1004 | +#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ |
---|
| 1005 | + __u64 iova_pgsizes; /* Bitmap of supported page sizes */ |
---|
| 1006 | + __u32 cap_offset; /* Offset within info struct of first cap */ |
---|
| 1007 | +}; |
---|
| 1008 | + |
---|
| 1009 | +/* |
---|
| 1010 | + * The IOVA capability allows to report the valid IOVA range(s) |
---|
| 1011 | + * excluding any non-relaxable reserved regions exposed by |
---|
| 1012 | + * devices attached to the container. Any DMA map attempt |
---|
| 1013 | + * outside the valid iova range will return error. |
---|
| 1014 | + * |
---|
| 1015 | + * The structures below define version 1 of this capability. |
---|
| 1016 | + */ |
---|
| 1017 | +#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE 1 |
---|
| 1018 | + |
---|
| 1019 | +struct vfio_iova_range { |
---|
| 1020 | + __u64 start; |
---|
| 1021 | + __u64 end; |
---|
| 1022 | +}; |
---|
| 1023 | + |
---|
| 1024 | +struct vfio_iommu_type1_info_cap_iova_range { |
---|
| 1025 | + struct vfio_info_cap_header header; |
---|
| 1026 | + __u32 nr_iovas; |
---|
| 1027 | + __u32 reserved; |
---|
| 1028 | + struct vfio_iova_range iova_ranges[]; |
---|
| 1029 | +}; |
---|
| 1030 | + |
---|
| 1031 | +/* |
---|
| 1032 | + * The migration capability allows to report supported features for migration. |
---|
| 1033 | + * |
---|
| 1034 | + * The structures below define version 1 of this capability. |
---|
| 1035 | + * |
---|
| 1036 | + * The existence of this capability indicates that IOMMU kernel driver supports |
---|
| 1037 | + * dirty page logging. |
---|
| 1038 | + * |
---|
| 1039 | + * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty |
---|
| 1040 | + * page logging. |
---|
| 1041 | + * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap |
---|
| 1042 | + * size in bytes that can be used by user applications when getting the dirty |
---|
| 1043 | + * bitmap. |
---|
| 1044 | + */ |
---|
| 1045 | +#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 2 |
---|
| 1046 | + |
---|
| 1047 | +struct vfio_iommu_type1_info_cap_migration { |
---|
| 1048 | + struct vfio_info_cap_header header; |
---|
| 1049 | + __u32 flags; |
---|
| 1050 | + __u64 pgsize_bitmap; |
---|
| 1051 | + __u64 max_dirty_bitmap_size; /* in bytes */ |
---|
| 1052 | +}; |
---|
| 1053 | + |
---|
| 1054 | +/* |
---|
| 1055 | + * The DMA available capability allows to report the current number of |
---|
| 1056 | + * simultaneously outstanding DMA mappings that are allowed. |
---|
| 1057 | + * |
---|
| 1058 | + * The structure below defines version 1 of this capability. |
---|
| 1059 | + * |
---|
| 1060 | + * avail: specifies the current number of outstanding DMA mappings allowed. |
---|
| 1061 | + */ |
---|
| 1062 | +#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3 |
---|
| 1063 | + |
---|
| 1064 | +struct vfio_iommu_type1_info_dma_avail { |
---|
| 1065 | + struct vfio_info_cap_header header; |
---|
| 1066 | + __u32 avail; |
---|
620 | 1067 | }; |
---|
621 | 1068 | |
---|
622 | 1069 | #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) |
---|
.. | .. |
---|
639 | 1086 | |
---|
640 | 1087 | #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) |
---|
641 | 1088 | |
---|
| 1089 | +struct vfio_bitmap { |
---|
| 1090 | + __u64 pgsize; /* page size for bitmap in bytes */ |
---|
| 1091 | + __u64 size; /* in bytes */ |
---|
| 1092 | + __u64 __user *data; /* one bit per page */ |
---|
| 1093 | +}; |
---|
| 1094 | + |
---|
642 | 1095 | /** |
---|
643 | 1096 | * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, |
---|
644 | 1097 | * struct vfio_dma_unmap) |
---|
.. | .. |
---|
648 | 1101 | * field. No guarantee is made to the user that arbitrary unmaps of iova |
---|
649 | 1102 | * or size different from those used in the original mapping call will |
---|
650 | 1103 | * succeed. |
---|
| 1104 | + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap |
---|
| 1105 | + * before unmapping IO virtual addresses. When this flag is set, the user must |
---|
| 1106 | + * provide a struct vfio_bitmap in data[]. User must provide zero-allocated |
---|
| 1107 | + * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field. |
---|
| 1108 | + * A bit in the bitmap represents one page, of user provided page size in |
---|
| 1109 | + * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set |
---|
| 1110 | + * indicates that the page at that offset from iova is dirty. A Bitmap of the |
---|
| 1111 | + * pages in the range of unmapped size is returned in the user-provided |
---|
| 1112 | + * vfio_bitmap.data. |
---|
651 | 1113 | */ |
---|
652 | 1114 | struct vfio_iommu_type1_dma_unmap { |
---|
653 | 1115 | __u32 argsz; |
---|
654 | 1116 | __u32 flags; |
---|
| 1117 | +#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) |
---|
655 | 1118 | __u64 iova; /* IO virtual address */ |
---|
656 | 1119 | __u64 size; /* Size of mapping (bytes) */ |
---|
| 1120 | + __u8 data[]; |
---|
657 | 1121 | }; |
---|
658 | 1122 | |
---|
659 | 1123 | #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) |
---|
.. | .. |
---|
665 | 1129 | #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) |
---|
666 | 1130 | #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) |
---|
667 | 1131 | |
---|
| 1132 | +/** |
---|
| 1133 | + * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17, |
---|
| 1134 | + * struct vfio_iommu_type1_dirty_bitmap) |
---|
| 1135 | + * IOCTL is used for dirty pages logging. |
---|
| 1136 | + * Caller should set flag depending on which operation to perform, details as |
---|
| 1137 | + * below: |
---|
| 1138 | + * |
---|
| 1139 | + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs |
---|
| 1140 | + * the IOMMU driver to log pages that are dirtied or potentially dirtied by |
---|
| 1141 | + * the device; designed to be used when a migration is in progress. Dirty pages |
---|
| 1142 | + * are logged until logging is disabled by user application by calling the IOCTL |
---|
| 1143 | + * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag. |
---|
| 1144 | + * |
---|
| 1145 | + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs |
---|
| 1146 | + * the IOMMU driver to stop logging dirtied pages. |
---|
| 1147 | + * |
---|
| 1148 | + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set |
---|
| 1149 | + * returns the dirty pages bitmap for IOMMU container for a given IOVA range. |
---|
| 1150 | + * The user must specify the IOVA range and the pgsize through the structure |
---|
| 1151 | + * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface |
---|
| 1152 | + * supports getting a bitmap of the smallest supported pgsize only and can be |
---|
| 1153 | + * modified in future to get a bitmap of any specified supported pgsize. The |
---|
| 1154 | + * user must provide a zeroed memory area for the bitmap memory and specify its |
---|
| 1155 | + * size in bitmap.size. One bit is used to represent one page consecutively |
---|
| 1156 | + * starting from iova offset. The user should provide page size in bitmap.pgsize |
---|
| 1157 | + * field. A bit set in the bitmap indicates that the page at that offset from |
---|
| 1158 | + * iova is dirty. The caller must set argsz to a value including the size of |
---|
| 1159 | + * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the |
---|
| 1160 | + * actual bitmap. If dirty pages logging is not enabled, an error will be |
---|
| 1161 | + * returned. |
---|
| 1162 | + * |
---|
| 1163 | + * Only one of the flags _START, _STOP and _GET may be specified at a time. |
---|
| 1164 | + * |
---|
| 1165 | + */ |
---|
| 1166 | +struct vfio_iommu_type1_dirty_bitmap { |
---|
| 1167 | + __u32 argsz; |
---|
| 1168 | + __u32 flags; |
---|
| 1169 | +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) |
---|
| 1170 | +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) |
---|
| 1171 | +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) |
---|
| 1172 | + __u8 data[]; |
---|
| 1173 | +}; |
---|
| 1174 | + |
---|
| 1175 | +struct vfio_iommu_type1_dirty_bitmap_get { |
---|
| 1176 | + __u64 iova; /* IO virtual address */ |
---|
| 1177 | + __u64 size; /* Size of iova range */ |
---|
| 1178 | + struct vfio_bitmap bitmap; |
---|
| 1179 | +}; |
---|
| 1180 | + |
---|
| 1181 | +#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) |
---|
| 1182 | + |
---|
668 | 1183 | /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ |
---|
669 | 1184 | |
---|
670 | 1185 | /* |
---|