| .. | .. |
|---|
| 26 | 26 | #include "kfd_priv.h" |
|---|
| 27 | 27 | #include "kfd_topology.h" |
|---|
| 28 | 28 | #include "kfd_iommu.h" |
|---|
| 29 | +#include "amdgpu_amdkfd.h" |
|---|
| 29 | 30 | |
|---|
| 30 | 31 | /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. |
|---|
| 31 | 32 | * GPU processor ID are expressed with Bit[31]=1. |
|---|
| .. | .. |
|---|
| 132 | 133 | #define fiji_cache_info carrizo_cache_info |
|---|
| 133 | 134 | #define polaris10_cache_info carrizo_cache_info |
|---|
| 134 | 135 | #define polaris11_cache_info carrizo_cache_info |
|---|
| 136 | +#define polaris12_cache_info carrizo_cache_info |
|---|
| 137 | +#define vegam_cache_info carrizo_cache_info |
|---|
| 135 | 138 | /* TODO - check & update Vega10 cache details */ |
|---|
| 136 | 139 | #define vega10_cache_info carrizo_cache_info |
|---|
| 137 | 140 | #define raven_cache_info carrizo_cache_info |
|---|
| 141 | +#define renoir_cache_info carrizo_cache_info |
|---|
| 142 | +/* TODO - check & update Navi10 cache details */ |
|---|
| 143 | +#define navi10_cache_info carrizo_cache_info |
|---|
| 138 | 144 | |
|---|
| 139 | 145 | static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, |
|---|
| 140 | 146 | struct crat_subtype_computeunit *cu) |
|---|
| .. | .. |
|---|
| 346 | 352 | struct list_head *device_list) |
|---|
| 347 | 353 | { |
|---|
| 348 | 354 | struct kfd_iolink_properties *props = NULL, *props2; |
|---|
| 349 | | - struct kfd_topology_device *dev, *cpu_dev; |
|---|
| 355 | + struct kfd_topology_device *dev, *to_dev; |
|---|
| 350 | 356 | uint32_t id_from; |
|---|
| 351 | 357 | uint32_t id_to; |
|---|
| 352 | 358 | |
|---|
| 353 | 359 | id_from = iolink->proximity_domain_from; |
|---|
| 354 | 360 | id_to = iolink->proximity_domain_to; |
|---|
| 355 | 361 | |
|---|
| 356 | | - pr_debug("Found IO link entry in CRAT table with id_from=%d\n", |
|---|
| 357 | | - id_from); |
|---|
| 362 | + pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n", |
|---|
| 363 | + id_from, id_to); |
|---|
| 358 | 364 | list_for_each_entry(dev, device_list, list) { |
|---|
| 359 | 365 | if (id_from == dev->proximity_domain) { |
|---|
| 360 | 366 | props = kfd_alloc_struct(props); |
|---|
| .. | .. |
|---|
| 369 | 375 | |
|---|
| 370 | 376 | if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) |
|---|
| 371 | 377 | props->weight = 20; |
|---|
| 378 | + else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) |
|---|
| 379 | + props->weight = 15 * iolink->num_hops_xgmi; |
|---|
| 372 | 380 | else |
|---|
| 373 | 381 | props->weight = node_distance(id_from, id_to); |
|---|
| 374 | 382 | |
|---|
| .. | .. |
|---|
| 389 | 397 | /* CPU topology is created before GPUs are detected, so CPU->GPU |
|---|
| 390 | 398 | * links are not built at that time. If a PCIe type is discovered, it |
|---|
| 391 | 399 | * means a GPU is detected and we are adding GPU->CPU to the topology. |
|---|
| 392 | | - * At this time, also add the corresponded CPU->GPU link. |
|---|
| 400 | + * At this time, also add the corresponded CPU->GPU link if GPU |
|---|
| 401 | + * is large bar. |
|---|
| 402 | + * For xGMI, we only added the link with one direction in the crat |
|---|
| 403 | + * table, add corresponded reversed direction link now. |
|---|
| 393 | 404 | */ |
|---|
| 394 | | - if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) { |
|---|
| 395 | | - cpu_dev = kfd_topology_device_by_proximity_domain(id_to); |
|---|
| 396 | | - if (!cpu_dev) |
|---|
| 405 | + if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { |
|---|
| 406 | + to_dev = kfd_topology_device_by_proximity_domain(id_to); |
|---|
| 407 | + if (!to_dev) |
|---|
| 397 | 408 | return -ENODEV; |
|---|
| 398 | 409 | /* same everything but the other direction */ |
|---|
| 399 | 410 | props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); |
|---|
| 400 | 411 | props2->node_from = id_to; |
|---|
| 401 | 412 | props2->node_to = id_from; |
|---|
| 402 | 413 | props2->kobj = NULL; |
|---|
| 403 | | - cpu_dev->io_link_count++; |
|---|
| 404 | | - cpu_dev->node_props.io_links_count++; |
|---|
| 405 | | - list_add_tail(&props2->list, &cpu_dev->io_link_props); |
|---|
| 414 | + to_dev->io_link_count++; |
|---|
| 415 | + to_dev->node_props.io_links_count++; |
|---|
| 416 | + list_add_tail(&props2->list, &to_dev->io_link_props); |
|---|
| 406 | 417 | } |
|---|
| 407 | 418 | |
|---|
| 408 | 419 | return 0; |
|---|
| .. | .. |
|---|
| 491 | 502 | num_nodes = crat_table->num_domains; |
|---|
| 492 | 503 | image_len = crat_table->length; |
|---|
| 493 | 504 | |
|---|
| 494 | | - pr_info("Parsing CRAT table with %d nodes\n", num_nodes); |
|---|
| 505 | + pr_debug("Parsing CRAT table with %d nodes\n", num_nodes); |
|---|
| 495 | 506 | |
|---|
| 496 | 507 | for (node_id = 0; node_id < num_nodes; node_id++) { |
|---|
| 497 | 508 | top_dev = kfd_create_topology_device(device_list); |
|---|
| .. | .. |
|---|
| 641 | 652 | pcache_info = polaris11_cache_info; |
|---|
| 642 | 653 | num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); |
|---|
| 643 | 654 | break; |
|---|
| 655 | + case CHIP_POLARIS12: |
|---|
| 656 | + pcache_info = polaris12_cache_info; |
|---|
| 657 | + num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); |
|---|
| 658 | + break; |
|---|
| 659 | + case CHIP_VEGAM: |
|---|
| 660 | + pcache_info = vegam_cache_info; |
|---|
| 661 | + num_of_cache_types = ARRAY_SIZE(vegam_cache_info); |
|---|
| 662 | + break; |
|---|
| 644 | 663 | case CHIP_VEGA10: |
|---|
| 664 | + case CHIP_VEGA12: |
|---|
| 665 | + case CHIP_VEGA20: |
|---|
| 666 | + case CHIP_ARCTURUS: |
|---|
| 645 | 667 | pcache_info = vega10_cache_info; |
|---|
| 646 | 668 | num_of_cache_types = ARRAY_SIZE(vega10_cache_info); |
|---|
| 647 | 669 | break; |
|---|
| 648 | 670 | case CHIP_RAVEN: |
|---|
| 649 | 671 | pcache_info = raven_cache_info; |
|---|
| 650 | 672 | num_of_cache_types = ARRAY_SIZE(raven_cache_info); |
|---|
| 673 | + break; |
|---|
| 674 | + case CHIP_RENOIR: |
|---|
| 675 | + pcache_info = renoir_cache_info; |
|---|
| 676 | + num_of_cache_types = ARRAY_SIZE(renoir_cache_info); |
|---|
| 677 | + break; |
|---|
| 678 | + case CHIP_NAVI10: |
|---|
| 679 | + case CHIP_NAVI12: |
|---|
| 680 | + case CHIP_NAVI14: |
|---|
| 681 | + case CHIP_SIENNA_CICHLID: |
|---|
| 682 | + case CHIP_NAVY_FLOUNDER: |
|---|
| 683 | + pcache_info = navi10_cache_info; |
|---|
| 684 | + num_of_cache_types = ARRAY_SIZE(navi10_cache_info); |
|---|
| 651 | 685 | break; |
|---|
| 652 | 686 | default: |
|---|
| 653 | 687 | return -EINVAL; |
|---|
| .. | .. |
|---|
| 678 | 712 | pcache_info, |
|---|
| 679 | 713 | cu_info, |
|---|
| 680 | 714 | mem_available, |
|---|
| 681 | | - cu_info->cu_bitmap[i][j], |
|---|
| 715 | + cu_info->cu_bitmap[i % 4][j + i / 4], |
|---|
| 682 | 716 | ct, |
|---|
| 683 | 717 | cu_processor_id, |
|---|
| 684 | 718 | k); |
|---|
| .. | .. |
|---|
| 708 | 742 | return 0; |
|---|
| 709 | 743 | } |
|---|
| 710 | 744 | |
|---|
| 745 | +static bool kfd_ignore_crat(void) |
|---|
| 746 | +{ |
|---|
| 747 | + bool ret; |
|---|
| 748 | + |
|---|
| 749 | + if (ignore_crat) |
|---|
| 750 | + return true; |
|---|
| 751 | + |
|---|
| 752 | +#ifndef KFD_SUPPORT_IOMMU_V2 |
|---|
| 753 | + ret = true; |
|---|
| 754 | +#else |
|---|
| 755 | + ret = false; |
|---|
| 756 | +#endif |
|---|
| 757 | + |
|---|
| 758 | + return ret; |
|---|
| 759 | +} |
|---|
| 760 | + |
|---|
| 711 | 761 | /* |
|---|
| 712 | 762 | * kfd_create_crat_image_acpi - Allocates memory for CRAT image and |
|---|
| 713 | 763 | * copies CRAT from ACPI (if available). |
|---|
| .. | .. |
|---|
| 724 | 774 | struct acpi_table_header *crat_table; |
|---|
| 725 | 775 | acpi_status status; |
|---|
| 726 | 776 | void *pcrat_image; |
|---|
| 777 | + int rc = 0; |
|---|
| 727 | 778 | |
|---|
| 728 | 779 | if (!crat_image) |
|---|
| 729 | 780 | return -EINVAL; |
|---|
| .. | .. |
|---|
| 733 | 784 | /* Fetch the CRAT table from ACPI */ |
|---|
| 734 | 785 | status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); |
|---|
| 735 | 786 | if (status == AE_NOT_FOUND) { |
|---|
| 736 | | - pr_warn("CRAT table not found\n"); |
|---|
| 787 | + pr_info("CRAT table not found\n"); |
|---|
| 737 | 788 | return -ENODATA; |
|---|
| 738 | 789 | } else if (ACPI_FAILURE(status)) { |
|---|
| 739 | 790 | const char *err = acpi_format_exception(status); |
|---|
| .. | .. |
|---|
| 742 | 793 | return -EINVAL; |
|---|
| 743 | 794 | } |
|---|
| 744 | 795 | |
|---|
| 745 | | - if (ignore_crat) { |
|---|
| 796 | + if (kfd_ignore_crat()) { |
|---|
| 746 | 797 | pr_info("CRAT table disabled by module option\n"); |
|---|
| 747 | 798 | return -ENODATA; |
|---|
| 748 | 799 | } |
|---|
| 749 | 800 | |
|---|
| 750 | | - pcrat_image = kmalloc(crat_table->length, GFP_KERNEL); |
|---|
| 751 | | - if (!pcrat_image) |
|---|
| 752 | | - return -ENOMEM; |
|---|
| 801 | + pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL); |
|---|
| 802 | + if (!pcrat_image) { |
|---|
| 803 | + rc = -ENOMEM; |
|---|
| 804 | + goto out; |
|---|
| 805 | + } |
|---|
| 753 | 806 | |
|---|
| 754 | 807 | memcpy(pcrat_image, crat_table, crat_table->length); |
|---|
| 755 | | - |
|---|
| 756 | 808 | *crat_image = pcrat_image; |
|---|
| 757 | 809 | *size = crat_table->length; |
|---|
| 758 | | - |
|---|
| 759 | | - return 0; |
|---|
| 810 | +out: |
|---|
| 811 | + acpi_put_table(crat_table); |
|---|
| 812 | + return rc; |
|---|
| 760 | 813 | } |
|---|
| 761 | 814 | |
|---|
| 762 | 815 | /* Memory required to create Virtual CRAT. |
|---|
| 763 | 816 | * Since there is no easy way to predict the amount of memory required, the |
|---|
| 764 | | - * following amount are allocated for CPU and GPU Virtual CRAT. This is |
|---|
| 817 | + * following amount is allocated for GPU Virtual CRAT. This is |
|---|
| 765 | 818 | * expected to cover all known conditions. But to be safe additional check |
|---|
| 766 | 819 | * is put in the code to ensure we don't overwrite. |
|---|
| 767 | 820 | */ |
|---|
| 768 | | -#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE) |
|---|
| 769 | | -#define VCRAT_SIZE_FOR_GPU (3 * PAGE_SIZE) |
|---|
| 821 | +#define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE) |
|---|
| 770 | 822 | |
|---|
| 771 | 823 | /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node |
|---|
| 772 | 824 | * |
|---|
| .. | .. |
|---|
| 842 | 894 | */ |
|---|
| 843 | 895 | pgdat = NODE_DATA(numa_node_id); |
|---|
| 844 | 896 | for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) |
|---|
| 845 | | - mem_in_bytes += pgdat->node_zones[zone_type].managed_pages; |
|---|
| 897 | + mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]); |
|---|
| 846 | 898 | mem_in_bytes <<= PAGE_SHIFT; |
|---|
| 847 | 899 | |
|---|
| 848 | 900 | sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); |
|---|
| .. | .. |
|---|
| 852 | 904 | return 0; |
|---|
| 853 | 905 | } |
|---|
| 854 | 906 | |
|---|
| 907 | +#ifdef CONFIG_X86_64 |
|---|
| 855 | 908 | static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, |
|---|
| 856 | 909 | uint32_t *num_entries, |
|---|
| 857 | 910 | struct crat_subtype_iolink *sub_type_hdr) |
|---|
| .. | .. |
|---|
| 894 | 947 | |
|---|
| 895 | 948 | return 0; |
|---|
| 896 | 949 | } |
|---|
| 950 | +#endif |
|---|
| 897 | 951 | |
|---|
| 898 | 952 | /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU |
|---|
| 899 | 953 | * |
|---|
| .. | .. |
|---|
| 909 | 963 | struct crat_subtype_generic *sub_type_hdr; |
|---|
| 910 | 964 | int avail_size = *size; |
|---|
| 911 | 965 | int numa_node_id; |
|---|
| 966 | +#ifdef CONFIG_X86_64 |
|---|
| 912 | 967 | uint32_t entries = 0; |
|---|
| 968 | +#endif |
|---|
| 913 | 969 | int ret = 0; |
|---|
| 914 | 970 | |
|---|
| 915 | | - if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) |
|---|
| 971 | + if (!pcrat_image) |
|---|
| 916 | 972 | return -EINVAL; |
|---|
| 917 | 973 | |
|---|
| 918 | 974 | /* Fill in CRAT Header. |
|---|
| .. | .. |
|---|
| 936 | 992 | CRAT_OEMID_LENGTH); |
|---|
| 937 | 993 | memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, |
|---|
| 938 | 994 | CRAT_OEMTABLEID_LENGTH); |
|---|
| 995 | + acpi_put_table(acpi_table); |
|---|
| 939 | 996 | } |
|---|
| 940 | 997 | crat_table->total_entries = 0; |
|---|
| 941 | 998 | crat_table->num_domains = 0; |
|---|
| .. | .. |
|---|
| 971 | 1028 | sub_type_hdr->length); |
|---|
| 972 | 1029 | |
|---|
| 973 | 1030 | /* Fill in Subtype: IO Link */ |
|---|
| 1031 | +#ifdef CONFIG_X86_64 |
|---|
| 974 | 1032 | ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, |
|---|
| 975 | 1033 | &entries, |
|---|
| 976 | 1034 | (struct crat_subtype_iolink *)sub_type_hdr); |
|---|
| 977 | 1035 | if (ret < 0) |
|---|
| 978 | 1036 | return ret; |
|---|
| 979 | | - crat_table->length += (sub_type_hdr->length * entries); |
|---|
| 980 | | - crat_table->total_entries += entries; |
|---|
| 981 | 1037 | |
|---|
| 982 | | - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + |
|---|
| 983 | | - sub_type_hdr->length * entries); |
|---|
| 1038 | + if (entries) { |
|---|
| 1039 | + crat_table->length += (sub_type_hdr->length * entries); |
|---|
| 1040 | + crat_table->total_entries += entries; |
|---|
| 1041 | + |
|---|
| 1042 | + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + |
|---|
| 1043 | + sub_type_hdr->length * entries); |
|---|
| 1044 | + } |
|---|
| 1045 | +#else |
|---|
| 1046 | + pr_info("IO link not available for non x86 platforms\n"); |
|---|
| 1047 | +#endif |
|---|
| 984 | 1048 | |
|---|
| 985 | 1049 | crat_table->num_domains++; |
|---|
| 986 | 1050 | } |
|---|
| .. | .. |
|---|
| 1037 | 1101 | * |
|---|
| 1038 | 1102 | * Return 0 if successful else return -ve value |
|---|
| 1039 | 1103 | */ |
|---|
| 1040 | | -static int kfd_fill_gpu_direct_io_link(int *avail_size, |
|---|
| 1104 | +static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, |
|---|
| 1041 | 1105 | struct kfd_dev *kdev, |
|---|
| 1042 | 1106 | struct crat_subtype_iolink *sub_type_hdr, |
|---|
| 1043 | 1107 | uint32_t proximity_domain) |
|---|
| .. | .. |
|---|
| 1052 | 1116 | sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; |
|---|
| 1053 | 1117 | sub_type_hdr->length = sizeof(struct crat_subtype_iolink); |
|---|
| 1054 | 1118 | sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; |
|---|
| 1119 | + if (kfd_dev_is_large_bar(kdev)) |
|---|
| 1120 | + sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; |
|---|
| 1055 | 1121 | |
|---|
| 1056 | 1122 | /* Fill in IOLINK subtype. |
|---|
| 1057 | 1123 | * TODO: Fill-in other fields of iolink subtype |
|---|
| .. | .. |
|---|
| 1069 | 1135 | return 0; |
|---|
| 1070 | 1136 | } |
|---|
| 1071 | 1137 | |
|---|
| 1138 | +static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, |
|---|
| 1139 | + struct kfd_dev *kdev, |
|---|
| 1140 | + struct kfd_dev *peer_kdev, |
|---|
| 1141 | + struct crat_subtype_iolink *sub_type_hdr, |
|---|
| 1142 | + uint32_t proximity_domain_from, |
|---|
| 1143 | + uint32_t proximity_domain_to) |
|---|
| 1144 | +{ |
|---|
| 1145 | + *avail_size -= sizeof(struct crat_subtype_iolink); |
|---|
| 1146 | + if (*avail_size < 0) |
|---|
| 1147 | + return -ENOMEM; |
|---|
| 1148 | + |
|---|
| 1149 | + memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); |
|---|
| 1150 | + |
|---|
| 1151 | + sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; |
|---|
| 1152 | + sub_type_hdr->length = sizeof(struct crat_subtype_iolink); |
|---|
| 1153 | + sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED | |
|---|
| 1154 | + CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; |
|---|
| 1155 | + |
|---|
| 1156 | + sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; |
|---|
| 1157 | + sub_type_hdr->proximity_domain_from = proximity_domain_from; |
|---|
| 1158 | + sub_type_hdr->proximity_domain_to = proximity_domain_to; |
|---|
| 1159 | + sub_type_hdr->num_hops_xgmi = |
|---|
| 1160 | + amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); |
|---|
| 1161 | + return 0; |
|---|
| 1162 | +} |
|---|
| 1163 | + |
|---|
| 1072 | 1164 | /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU |
|---|
| 1073 | 1165 | * |
|---|
| 1074 | 1166 | * @pcrat_image: Fill in VCRAT for GPU |
|---|
| .. | .. |
|---|
| 1081 | 1173 | { |
|---|
| 1082 | 1174 | struct crat_header *crat_table = (struct crat_header *)pcrat_image; |
|---|
| 1083 | 1175 | struct crat_subtype_generic *sub_type_hdr; |
|---|
| 1176 | + struct kfd_local_mem_info local_mem_info; |
|---|
| 1177 | + struct kfd_topology_device *peer_dev; |
|---|
| 1084 | 1178 | struct crat_subtype_computeunit *cu; |
|---|
| 1085 | 1179 | struct kfd_cu_info cu_info; |
|---|
| 1086 | 1180 | int avail_size = *size; |
|---|
| 1087 | 1181 | uint32_t total_num_of_cu; |
|---|
| 1088 | 1182 | int num_of_cache_entries = 0; |
|---|
| 1089 | 1183 | int cache_mem_filled = 0; |
|---|
| 1184 | + uint32_t nid = 0; |
|---|
| 1090 | 1185 | int ret = 0; |
|---|
| 1091 | | - struct kfd_local_mem_info local_mem_info; |
|---|
| 1092 | 1186 | |
|---|
| 1093 | 1187 | if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) |
|---|
| 1094 | 1188 | return -EINVAL; |
|---|
| .. | .. |
|---|
| 1128 | 1222 | cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; |
|---|
| 1129 | 1223 | cu->proximity_domain = proximity_domain; |
|---|
| 1130 | 1224 | |
|---|
| 1131 | | - kdev->kfd2kgd->get_cu_info(kdev->kgd, &cu_info); |
|---|
| 1225 | + amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info); |
|---|
| 1132 | 1226 | cu->num_simd_per_cu = cu_info.simd_per_cu; |
|---|
| 1133 | 1227 | cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; |
|---|
| 1134 | 1228 | cu->max_waves_simd = cu_info.max_waves_per_simd; |
|---|
| .. | .. |
|---|
| 1159 | 1253 | * report the total FB size (public+private) as a single |
|---|
| 1160 | 1254 | * private heap. |
|---|
| 1161 | 1255 | */ |
|---|
| 1162 | | - kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info); |
|---|
| 1256 | + amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info); |
|---|
| 1163 | 1257 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + |
|---|
| 1164 | 1258 | sub_type_hdr->length); |
|---|
| 1165 | 1259 | |
|---|
| .. | .. |
|---|
| 1212 | 1306 | */ |
|---|
| 1213 | 1307 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + |
|---|
| 1214 | 1308 | cache_mem_filled); |
|---|
| 1215 | | - ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev, |
|---|
| 1309 | + ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, |
|---|
| 1216 | 1310 | (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); |
|---|
| 1217 | 1311 | |
|---|
| 1218 | 1312 | if (ret < 0) |
|---|
| .. | .. |
|---|
| 1221 | 1315 | crat_table->length += sub_type_hdr->length; |
|---|
| 1222 | 1316 | crat_table->total_entries++; |
|---|
| 1223 | 1317 | |
|---|
| 1318 | + |
|---|
| 1319 | + /* Fill in Subtype: IO_LINKS |
|---|
| 1320 | + * Direct links from GPU to other GPUs through xGMI. |
|---|
| 1321 | + * We will loop GPUs that already be processed (with lower value |
|---|
| 1322 | + * of proximity_domain), add the link for the GPUs with same |
|---|
| 1323 | + * hive id (from this GPU to other GPU) . The reversed iolink |
|---|
| 1324 | + * (from other GPU to this GPU) will be added |
|---|
| 1325 | + * in kfd_parse_subtype_iolink. |
|---|
| 1326 | + */ |
|---|
| 1327 | + if (kdev->hive_id) { |
|---|
| 1328 | + for (nid = 0; nid < proximity_domain; ++nid) { |
|---|
| 1329 | + peer_dev = kfd_topology_device_by_proximity_domain(nid); |
|---|
| 1330 | + if (!peer_dev->gpu) |
|---|
| 1331 | + continue; |
|---|
| 1332 | + if (peer_dev->gpu->hive_id != kdev->hive_id) |
|---|
| 1333 | + continue; |
|---|
| 1334 | + sub_type_hdr = (typeof(sub_type_hdr))( |
|---|
| 1335 | + (char *)sub_type_hdr + |
|---|
| 1336 | + sizeof(struct crat_subtype_iolink)); |
|---|
| 1337 | + ret = kfd_fill_gpu_xgmi_link_to_gpu( |
|---|
| 1338 | + &avail_size, kdev, peer_dev->gpu, |
|---|
| 1339 | + (struct crat_subtype_iolink *)sub_type_hdr, |
|---|
| 1340 | + proximity_domain, nid); |
|---|
| 1341 | + if (ret < 0) |
|---|
| 1342 | + return ret; |
|---|
| 1343 | + crat_table->length += sub_type_hdr->length; |
|---|
| 1344 | + crat_table->total_entries++; |
|---|
| 1345 | + } |
|---|
| 1346 | + } |
|---|
| 1224 | 1347 | *size = crat_table->length; |
|---|
| 1225 | 1348 | pr_info("Virtual CRAT table created for GPU\n"); |
|---|
| 1226 | 1349 | |
|---|
| .. | .. |
|---|
| 1249 | 1372 | uint32_t proximity_domain) |
|---|
| 1250 | 1373 | { |
|---|
| 1251 | 1374 | void *pcrat_image = NULL; |
|---|
| 1252 | | - int ret = 0; |
|---|
| 1375 | + int ret = 0, num_nodes; |
|---|
| 1376 | + size_t dyn_size; |
|---|
| 1253 | 1377 | |
|---|
| 1254 | 1378 | if (!crat_image) |
|---|
| 1255 | 1379 | return -EINVAL; |
|---|
| 1256 | 1380 | |
|---|
| 1257 | 1381 | *crat_image = NULL; |
|---|
| 1258 | 1382 | |
|---|
| 1259 | | - /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and |
|---|
| 1260 | | - * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover |
|---|
| 1261 | | - * all the current conditions. A check is put not to overwrite beyond |
|---|
| 1262 | | - * allocated size |
|---|
| 1383 | + /* Allocate the CPU Virtual CRAT size based on the number of online |
|---|
| 1384 | + * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. |
|---|
| 1385 | + * This should cover all the current conditions. A check is put not |
|---|
| 1386 | + * to overwrite beyond allocated size for GPUs |
|---|
| 1263 | 1387 | */ |
|---|
| 1264 | 1388 | switch (flags) { |
|---|
| 1265 | 1389 | case COMPUTE_UNIT_CPU: |
|---|
| 1266 | | - pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL); |
|---|
| 1390 | + num_nodes = num_online_nodes(); |
|---|
| 1391 | + dyn_size = sizeof(struct crat_header) + |
|---|
| 1392 | + num_nodes * (sizeof(struct crat_subtype_computeunit) + |
|---|
| 1393 | + sizeof(struct crat_subtype_memory) + |
|---|
| 1394 | + (num_nodes - 1) * sizeof(struct crat_subtype_iolink)); |
|---|
| 1395 | + pcrat_image = kvmalloc(dyn_size, GFP_KERNEL); |
|---|
| 1267 | 1396 | if (!pcrat_image) |
|---|
| 1268 | 1397 | return -ENOMEM; |
|---|
| 1269 | | - *size = VCRAT_SIZE_FOR_CPU; |
|---|
| 1398 | + *size = dyn_size; |
|---|
| 1399 | + pr_debug("CRAT size is %ld", dyn_size); |
|---|
| 1270 | 1400 | ret = kfd_create_vcrat_image_cpu(pcrat_image, size); |
|---|
| 1271 | 1401 | break; |
|---|
| 1272 | 1402 | case COMPUTE_UNIT_GPU: |
|---|
| 1273 | 1403 | if (!kdev) |
|---|
| 1274 | 1404 | return -EINVAL; |
|---|
| 1275 | | - pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); |
|---|
| 1405 | + pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); |
|---|
| 1276 | 1406 | if (!pcrat_image) |
|---|
| 1277 | 1407 | return -ENOMEM; |
|---|
| 1278 | 1408 | *size = VCRAT_SIZE_FOR_GPU; |
|---|
| .. | .. |
|---|
| 1291 | 1421 | if (!ret) |
|---|
| 1292 | 1422 | *crat_image = pcrat_image; |
|---|
| 1293 | 1423 | else |
|---|
| 1294 | | - kfree(pcrat_image); |
|---|
| 1424 | + kvfree(pcrat_image); |
|---|
| 1295 | 1425 | |
|---|
| 1296 | 1426 | return ret; |
|---|
| 1297 | 1427 | } |
|---|
| .. | .. |
|---|
| 1304 | 1434 | */ |
|---|
| 1305 | 1435 | void kfd_destroy_crat_image(void *crat_image) |
|---|
| 1306 | 1436 | { |
|---|
| 1307 | | - kfree(crat_image); |
|---|
| 1437 | + kvfree(crat_image); |
|---|
| 1308 | 1438 | } |
|---|