.. | .. |
---|
26 | 26 | #include "kfd_priv.h" |
---|
27 | 27 | #include "kfd_topology.h" |
---|
28 | 28 | #include "kfd_iommu.h" |
---|
| 29 | +#include "amdgpu_amdkfd.h" |
---|
29 | 30 | |
---|
30 | 31 | /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. |
---|
31 | 32 | * GPU processor ID are expressed with Bit[31]=1. |
---|
.. | .. |
---|
132 | 133 | #define fiji_cache_info carrizo_cache_info |
---|
133 | 134 | #define polaris10_cache_info carrizo_cache_info |
---|
134 | 135 | #define polaris11_cache_info carrizo_cache_info |
---|
| 136 | +#define polaris12_cache_info carrizo_cache_info |
---|
| 137 | +#define vegam_cache_info carrizo_cache_info |
---|
135 | 138 | /* TODO - check & update Vega10 cache details */ |
---|
136 | 139 | #define vega10_cache_info carrizo_cache_info |
---|
137 | 140 | #define raven_cache_info carrizo_cache_info |
---|
| 141 | +#define renoir_cache_info carrizo_cache_info |
---|
| 142 | +/* TODO - check & update Navi10 cache details */ |
---|
| 143 | +#define navi10_cache_info carrizo_cache_info |
---|
138 | 144 | |
---|
139 | 145 | static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, |
---|
140 | 146 | struct crat_subtype_computeunit *cu) |
---|
.. | .. |
---|
346 | 352 | struct list_head *device_list) |
---|
347 | 353 | { |
---|
348 | 354 | struct kfd_iolink_properties *props = NULL, *props2; |
---|
349 | | - struct kfd_topology_device *dev, *cpu_dev; |
---|
| 355 | + struct kfd_topology_device *dev, *to_dev; |
---|
350 | 356 | uint32_t id_from; |
---|
351 | 357 | uint32_t id_to; |
---|
352 | 358 | |
---|
353 | 359 | id_from = iolink->proximity_domain_from; |
---|
354 | 360 | id_to = iolink->proximity_domain_to; |
---|
355 | 361 | |
---|
356 | | - pr_debug("Found IO link entry in CRAT table with id_from=%d\n", |
---|
357 | | - id_from); |
---|
| 362 | + pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n", |
---|
| 363 | + id_from, id_to); |
---|
358 | 364 | list_for_each_entry(dev, device_list, list) { |
---|
359 | 365 | if (id_from == dev->proximity_domain) { |
---|
360 | 366 | props = kfd_alloc_struct(props); |
---|
.. | .. |
---|
369 | 375 | |
---|
370 | 376 | if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) |
---|
371 | 377 | props->weight = 20; |
---|
| 378 | + else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) |
---|
| 379 | + props->weight = 15 * iolink->num_hops_xgmi; |
---|
372 | 380 | else |
---|
373 | 381 | props->weight = node_distance(id_from, id_to); |
---|
374 | 382 | |
---|
.. | .. |
---|
389 | 397 | /* CPU topology is created before GPUs are detected, so CPU->GPU |
---|
390 | 398 | * links are not built at that time. If a PCIe type is discovered, it |
---|
391 | 399 | * means a GPU is detected and we are adding GPU->CPU to the topology. |
---|
392 | | - * At this time, also add the corresponded CPU->GPU link. |
---|
| 400 | + * At this time, also add the corresponded CPU->GPU link if GPU |
---|
| 401 | + * is large bar. |
---|
| 402 | + * For xGMI, we only added the link with one direction in the crat |
---|
| 403 | + * table, add corresponded reversed direction link now. |
---|
393 | 404 | */ |
---|
394 | | - if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) { |
---|
395 | | - cpu_dev = kfd_topology_device_by_proximity_domain(id_to); |
---|
396 | | - if (!cpu_dev) |
---|
| 405 | + if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { |
---|
| 406 | + to_dev = kfd_topology_device_by_proximity_domain(id_to); |
---|
| 407 | + if (!to_dev) |
---|
397 | 408 | return -ENODEV; |
---|
398 | 409 | /* same everything but the other direction */ |
---|
399 | 410 | props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); |
---|
400 | 411 | props2->node_from = id_to; |
---|
401 | 412 | props2->node_to = id_from; |
---|
402 | 413 | props2->kobj = NULL; |
---|
403 | | - cpu_dev->io_link_count++; |
---|
404 | | - cpu_dev->node_props.io_links_count++; |
---|
405 | | - list_add_tail(&props2->list, &cpu_dev->io_link_props); |
---|
| 414 | + to_dev->io_link_count++; |
---|
| 415 | + to_dev->node_props.io_links_count++; |
---|
| 416 | + list_add_tail(&props2->list, &to_dev->io_link_props); |
---|
406 | 417 | } |
---|
407 | 418 | |
---|
408 | 419 | return 0; |
---|
.. | .. |
---|
491 | 502 | num_nodes = crat_table->num_domains; |
---|
492 | 503 | image_len = crat_table->length; |
---|
493 | 504 | |
---|
494 | | - pr_info("Parsing CRAT table with %d nodes\n", num_nodes); |
---|
| 505 | + pr_debug("Parsing CRAT table with %d nodes\n", num_nodes); |
---|
495 | 506 | |
---|
496 | 507 | for (node_id = 0; node_id < num_nodes; node_id++) { |
---|
497 | 508 | top_dev = kfd_create_topology_device(device_list); |
---|
.. | .. |
---|
641 | 652 | pcache_info = polaris11_cache_info; |
---|
642 | 653 | num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); |
---|
643 | 654 | break; |
---|
| 655 | + case CHIP_POLARIS12: |
---|
| 656 | + pcache_info = polaris12_cache_info; |
---|
| 657 | + num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); |
---|
| 658 | + break; |
---|
| 659 | + case CHIP_VEGAM: |
---|
| 660 | + pcache_info = vegam_cache_info; |
---|
| 661 | + num_of_cache_types = ARRAY_SIZE(vegam_cache_info); |
---|
| 662 | + break; |
---|
644 | 663 | case CHIP_VEGA10: |
---|
| 664 | + case CHIP_VEGA12: |
---|
| 665 | + case CHIP_VEGA20: |
---|
| 666 | + case CHIP_ARCTURUS: |
---|
645 | 667 | pcache_info = vega10_cache_info; |
---|
646 | 668 | num_of_cache_types = ARRAY_SIZE(vega10_cache_info); |
---|
647 | 669 | break; |
---|
648 | 670 | case CHIP_RAVEN: |
---|
649 | 671 | pcache_info = raven_cache_info; |
---|
650 | 672 | num_of_cache_types = ARRAY_SIZE(raven_cache_info); |
---|
| 673 | + break; |
---|
| 674 | + case CHIP_RENOIR: |
---|
| 675 | + pcache_info = renoir_cache_info; |
---|
| 676 | + num_of_cache_types = ARRAY_SIZE(renoir_cache_info); |
---|
| 677 | + break; |
---|
| 678 | + case CHIP_NAVI10: |
---|
| 679 | + case CHIP_NAVI12: |
---|
| 680 | + case CHIP_NAVI14: |
---|
| 681 | + case CHIP_SIENNA_CICHLID: |
---|
| 682 | + case CHIP_NAVY_FLOUNDER: |
---|
| 683 | + pcache_info = navi10_cache_info; |
---|
| 684 | + num_of_cache_types = ARRAY_SIZE(navi10_cache_info); |
---|
651 | 685 | break; |
---|
652 | 686 | default: |
---|
653 | 687 | return -EINVAL; |
---|
.. | .. |
---|
678 | 712 | pcache_info, |
---|
679 | 713 | cu_info, |
---|
680 | 714 | mem_available, |
---|
681 | | - cu_info->cu_bitmap[i][j], |
---|
| 715 | + cu_info->cu_bitmap[i % 4][j + i / 4], |
---|
682 | 716 | ct, |
---|
683 | 717 | cu_processor_id, |
---|
684 | 718 | k); |
---|
.. | .. |
---|
708 | 742 | return 0; |
---|
709 | 743 | } |
---|
710 | 744 | |
---|
| 745 | +static bool kfd_ignore_crat(void) |
---|
| 746 | +{ |
---|
| 747 | + bool ret; |
---|
| 748 | + |
---|
| 749 | + if (ignore_crat) |
---|
| 750 | + return true; |
---|
| 751 | + |
---|
| 752 | +#ifndef KFD_SUPPORT_IOMMU_V2 |
---|
| 753 | + ret = true; |
---|
| 754 | +#else |
---|
| 755 | + ret = false; |
---|
| 756 | +#endif |
---|
| 757 | + |
---|
| 758 | + return ret; |
---|
| 759 | +} |
---|
| 760 | + |
---|
711 | 761 | /* |
---|
712 | 762 | * kfd_create_crat_image_acpi - Allocates memory for CRAT image and |
---|
713 | 763 | * copies CRAT from ACPI (if available). |
---|
.. | .. |
---|
724 | 774 | struct acpi_table_header *crat_table; |
---|
725 | 775 | acpi_status status; |
---|
726 | 776 | void *pcrat_image; |
---|
| 777 | + int rc = 0; |
---|
727 | 778 | |
---|
728 | 779 | if (!crat_image) |
---|
729 | 780 | return -EINVAL; |
---|
.. | .. |
---|
733 | 784 | /* Fetch the CRAT table from ACPI */ |
---|
734 | 785 | status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); |
---|
735 | 786 | if (status == AE_NOT_FOUND) { |
---|
736 | | - pr_warn("CRAT table not found\n"); |
---|
| 787 | + pr_info("CRAT table not found\n"); |
---|
737 | 788 | return -ENODATA; |
---|
738 | 789 | } else if (ACPI_FAILURE(status)) { |
---|
739 | 790 | const char *err = acpi_format_exception(status); |
---|
.. | .. |
---|
742 | 793 | return -EINVAL; |
---|
743 | 794 | } |
---|
744 | 795 | |
---|
745 | | - if (ignore_crat) { |
---|
| 796 | + if (kfd_ignore_crat()) { |
---|
746 | 797 | pr_info("CRAT table disabled by module option\n"); |
---|
747 | 798 | return -ENODATA; |
---|
748 | 799 | } |
---|
749 | 800 | |
---|
750 | | - pcrat_image = kmalloc(crat_table->length, GFP_KERNEL); |
---|
751 | | - if (!pcrat_image) |
---|
752 | | - return -ENOMEM; |
---|
| 801 | + pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL); |
---|
| 802 | + if (!pcrat_image) { |
---|
| 803 | + rc = -ENOMEM; |
---|
| 804 | + goto out; |
---|
| 805 | + } |
---|
753 | 806 | |
---|
754 | 807 | memcpy(pcrat_image, crat_table, crat_table->length); |
---|
755 | | - |
---|
756 | 808 | *crat_image = pcrat_image; |
---|
757 | 809 | *size = crat_table->length; |
---|
758 | | - |
---|
759 | | - return 0; |
---|
| 810 | +out: |
---|
| 811 | + acpi_put_table(crat_table); |
---|
| 812 | + return rc; |
---|
760 | 813 | } |
---|
761 | 814 | |
---|
762 | 815 | /* Memory required to create Virtual CRAT. |
---|
763 | 816 | * Since there is no easy way to predict the amount of memory required, the |
---|
764 | | - * following amount are allocated for CPU and GPU Virtual CRAT. This is |
---|
| 817 | + * following amount is allocated for GPU Virtual CRAT. This is |
---|
765 | 818 | * expected to cover all known conditions. But to be safe additional check |
---|
766 | 819 | * is put in the code to ensure we don't overwrite. |
---|
767 | 820 | */ |
---|
768 | | -#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE) |
---|
769 | | -#define VCRAT_SIZE_FOR_GPU (3 * PAGE_SIZE) |
---|
| 821 | +#define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE) |
---|
770 | 822 | |
---|
771 | 823 | /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node |
---|
772 | 824 | * |
---|
.. | .. |
---|
842 | 894 | */ |
---|
843 | 895 | pgdat = NODE_DATA(numa_node_id); |
---|
844 | 896 | for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) |
---|
845 | | - mem_in_bytes += pgdat->node_zones[zone_type].managed_pages; |
---|
| 897 | + mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]); |
---|
846 | 898 | mem_in_bytes <<= PAGE_SHIFT; |
---|
847 | 899 | |
---|
848 | 900 | sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); |
---|
.. | .. |
---|
852 | 904 | return 0; |
---|
853 | 905 | } |
---|
854 | 906 | |
---|
| 907 | +#ifdef CONFIG_X86_64 |
---|
855 | 908 | static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, |
---|
856 | 909 | uint32_t *num_entries, |
---|
857 | 910 | struct crat_subtype_iolink *sub_type_hdr) |
---|
.. | .. |
---|
894 | 947 | |
---|
895 | 948 | return 0; |
---|
896 | 949 | } |
---|
| 950 | +#endif |
---|
897 | 951 | |
---|
898 | 952 | /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU |
---|
899 | 953 | * |
---|
.. | .. |
---|
909 | 963 | struct crat_subtype_generic *sub_type_hdr; |
---|
910 | 964 | int avail_size = *size; |
---|
911 | 965 | int numa_node_id; |
---|
| 966 | +#ifdef CONFIG_X86_64 |
---|
912 | 967 | uint32_t entries = 0; |
---|
| 968 | +#endif |
---|
913 | 969 | int ret = 0; |
---|
914 | 970 | |
---|
915 | | - if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) |
---|
| 971 | + if (!pcrat_image) |
---|
916 | 972 | return -EINVAL; |
---|
917 | 973 | |
---|
918 | 974 | /* Fill in CRAT Header. |
---|
.. | .. |
---|
936 | 992 | CRAT_OEMID_LENGTH); |
---|
937 | 993 | memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, |
---|
938 | 994 | CRAT_OEMTABLEID_LENGTH); |
---|
| 995 | + acpi_put_table(acpi_table); |
---|
939 | 996 | } |
---|
940 | 997 | crat_table->total_entries = 0; |
---|
941 | 998 | crat_table->num_domains = 0; |
---|
.. | .. |
---|
971 | 1028 | sub_type_hdr->length); |
---|
972 | 1029 | |
---|
973 | 1030 | /* Fill in Subtype: IO Link */ |
---|
| 1031 | +#ifdef CONFIG_X86_64 |
---|
974 | 1032 | ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, |
---|
975 | 1033 | &entries, |
---|
976 | 1034 | (struct crat_subtype_iolink *)sub_type_hdr); |
---|
977 | 1035 | if (ret < 0) |
---|
978 | 1036 | return ret; |
---|
979 | | - crat_table->length += (sub_type_hdr->length * entries); |
---|
980 | | - crat_table->total_entries += entries; |
---|
981 | 1037 | |
---|
982 | | - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + |
---|
983 | | - sub_type_hdr->length * entries); |
---|
| 1038 | + if (entries) { |
---|
| 1039 | + crat_table->length += (sub_type_hdr->length * entries); |
---|
| 1040 | + crat_table->total_entries += entries; |
---|
| 1041 | + |
---|
| 1042 | + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + |
---|
| 1043 | + sub_type_hdr->length * entries); |
---|
| 1044 | + } |
---|
| 1045 | +#else |
---|
| 1046 | + pr_info("IO link not available for non x86 platforms\n"); |
---|
| 1047 | +#endif |
---|
984 | 1048 | |
---|
985 | 1049 | crat_table->num_domains++; |
---|
986 | 1050 | } |
---|
.. | .. |
---|
1037 | 1101 | * |
---|
1038 | 1102 | * Return 0 if successful else return -ve value |
---|
1039 | 1103 | */ |
---|
1040 | | -static int kfd_fill_gpu_direct_io_link(int *avail_size, |
---|
| 1104 | +static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, |
---|
1041 | 1105 | struct kfd_dev *kdev, |
---|
1042 | 1106 | struct crat_subtype_iolink *sub_type_hdr, |
---|
1043 | 1107 | uint32_t proximity_domain) |
---|
.. | .. |
---|
1052 | 1116 | sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; |
---|
1053 | 1117 | sub_type_hdr->length = sizeof(struct crat_subtype_iolink); |
---|
1054 | 1118 | sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; |
---|
| 1119 | + if (kfd_dev_is_large_bar(kdev)) |
---|
| 1120 | + sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; |
---|
1055 | 1121 | |
---|
1056 | 1122 | /* Fill in IOLINK subtype. |
---|
1057 | 1123 | * TODO: Fill-in other fields of iolink subtype |
---|
.. | .. |
---|
1069 | 1135 | return 0; |
---|
1070 | 1136 | } |
---|
1071 | 1137 | |
---|
| 1138 | +static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, |
---|
| 1139 | + struct kfd_dev *kdev, |
---|
| 1140 | + struct kfd_dev *peer_kdev, |
---|
| 1141 | + struct crat_subtype_iolink *sub_type_hdr, |
---|
| 1142 | + uint32_t proximity_domain_from, |
---|
| 1143 | + uint32_t proximity_domain_to) |
---|
| 1144 | +{ |
---|
| 1145 | + *avail_size -= sizeof(struct crat_subtype_iolink); |
---|
| 1146 | + if (*avail_size < 0) |
---|
| 1147 | + return -ENOMEM; |
---|
| 1148 | + |
---|
| 1149 | + memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); |
---|
| 1150 | + |
---|
| 1151 | + sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; |
---|
| 1152 | + sub_type_hdr->length = sizeof(struct crat_subtype_iolink); |
---|
| 1153 | + sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED | |
---|
| 1154 | + CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; |
---|
| 1155 | + |
---|
| 1156 | + sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; |
---|
| 1157 | + sub_type_hdr->proximity_domain_from = proximity_domain_from; |
---|
| 1158 | + sub_type_hdr->proximity_domain_to = proximity_domain_to; |
---|
| 1159 | + sub_type_hdr->num_hops_xgmi = |
---|
| 1160 | + amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); |
---|
| 1161 | + return 0; |
---|
| 1162 | +} |
---|
| 1163 | + |
---|
1072 | 1164 | /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU |
---|
1073 | 1165 | * |
---|
1074 | 1166 | * @pcrat_image: Fill in VCRAT for GPU |
---|
.. | .. |
---|
1081 | 1173 | { |
---|
1082 | 1174 | struct crat_header *crat_table = (struct crat_header *)pcrat_image; |
---|
1083 | 1175 | struct crat_subtype_generic *sub_type_hdr; |
---|
| 1176 | + struct kfd_local_mem_info local_mem_info; |
---|
| 1177 | + struct kfd_topology_device *peer_dev; |
---|
1084 | 1178 | struct crat_subtype_computeunit *cu; |
---|
1085 | 1179 | struct kfd_cu_info cu_info; |
---|
1086 | 1180 | int avail_size = *size; |
---|
1087 | 1181 | uint32_t total_num_of_cu; |
---|
1088 | 1182 | int num_of_cache_entries = 0; |
---|
1089 | 1183 | int cache_mem_filled = 0; |
---|
| 1184 | + uint32_t nid = 0; |
---|
1090 | 1185 | int ret = 0; |
---|
1091 | | - struct kfd_local_mem_info local_mem_info; |
---|
1092 | 1186 | |
---|
1093 | 1187 | if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) |
---|
1094 | 1188 | return -EINVAL; |
---|
.. | .. |
---|
1128 | 1222 | cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; |
---|
1129 | 1223 | cu->proximity_domain = proximity_domain; |
---|
1130 | 1224 | |
---|
1131 | | - kdev->kfd2kgd->get_cu_info(kdev->kgd, &cu_info); |
---|
| 1225 | + amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info); |
---|
1132 | 1226 | cu->num_simd_per_cu = cu_info.simd_per_cu; |
---|
1133 | 1227 | cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; |
---|
1134 | 1228 | cu->max_waves_simd = cu_info.max_waves_per_simd; |
---|
.. | .. |
---|
1159 | 1253 | * report the total FB size (public+private) as a single |
---|
1160 | 1254 | * private heap. |
---|
1161 | 1255 | */ |
---|
1162 | | - kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info); |
---|
| 1256 | + amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info); |
---|
1163 | 1257 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + |
---|
1164 | 1258 | sub_type_hdr->length); |
---|
1165 | 1259 | |
---|
.. | .. |
---|
1212 | 1306 | */ |
---|
1213 | 1307 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + |
---|
1214 | 1308 | cache_mem_filled); |
---|
1215 | | - ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev, |
---|
| 1309 | + ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, |
---|
1216 | 1310 | (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); |
---|
1217 | 1311 | |
---|
1218 | 1312 | if (ret < 0) |
---|
.. | .. |
---|
1221 | 1315 | crat_table->length += sub_type_hdr->length; |
---|
1222 | 1316 | crat_table->total_entries++; |
---|
1223 | 1317 | |
---|
| 1318 | + |
---|
| 1319 | + /* Fill in Subtype: IO_LINKS |
---|
| 1320 | + * Direct links from GPU to other GPUs through xGMI. |
---|
| 1321 | + * We will loop GPUs that already be processed (with lower value |
---|
| 1322 | + * of proximity_domain), add the link for the GPUs with same |
---|
| 1323 | + * hive id (from this GPU to other GPU) . The reversed iolink |
---|
| 1324 | + * (from other GPU to this GPU) will be added |
---|
| 1325 | + * in kfd_parse_subtype_iolink. |
---|
| 1326 | + */ |
---|
| 1327 | + if (kdev->hive_id) { |
---|
| 1328 | + for (nid = 0; nid < proximity_domain; ++nid) { |
---|
| 1329 | + peer_dev = kfd_topology_device_by_proximity_domain(nid); |
---|
| 1330 | + if (!peer_dev->gpu) |
---|
| 1331 | + continue; |
---|
| 1332 | + if (peer_dev->gpu->hive_id != kdev->hive_id) |
---|
| 1333 | + continue; |
---|
| 1334 | + sub_type_hdr = (typeof(sub_type_hdr))( |
---|
| 1335 | + (char *)sub_type_hdr + |
---|
| 1336 | + sizeof(struct crat_subtype_iolink)); |
---|
| 1337 | + ret = kfd_fill_gpu_xgmi_link_to_gpu( |
---|
| 1338 | + &avail_size, kdev, peer_dev->gpu, |
---|
| 1339 | + (struct crat_subtype_iolink *)sub_type_hdr, |
---|
| 1340 | + proximity_domain, nid); |
---|
| 1341 | + if (ret < 0) |
---|
| 1342 | + return ret; |
---|
| 1343 | + crat_table->length += sub_type_hdr->length; |
---|
| 1344 | + crat_table->total_entries++; |
---|
| 1345 | + } |
---|
| 1346 | + } |
---|
1224 | 1347 | *size = crat_table->length; |
---|
1225 | 1348 | pr_info("Virtual CRAT table created for GPU\n"); |
---|
1226 | 1349 | |
---|
.. | .. |
---|
1249 | 1372 | uint32_t proximity_domain) |
---|
1250 | 1373 | { |
---|
1251 | 1374 | void *pcrat_image = NULL; |
---|
1252 | | - int ret = 0; |
---|
| 1375 | + int ret = 0, num_nodes; |
---|
| 1376 | + size_t dyn_size; |
---|
1253 | 1377 | |
---|
1254 | 1378 | if (!crat_image) |
---|
1255 | 1379 | return -EINVAL; |
---|
1256 | 1380 | |
---|
1257 | 1381 | *crat_image = NULL; |
---|
1258 | 1382 | |
---|
1259 | | - /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and |
---|
1260 | | - * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover |
---|
1261 | | - * all the current conditions. A check is put not to overwrite beyond |
---|
1262 | | - * allocated size |
---|
| 1383 | + /* Allocate the CPU Virtual CRAT size based on the number of online |
---|
| 1384 | + * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. |
---|
| 1385 | + * This should cover all the current conditions. A check is put not |
---|
| 1386 | + * to overwrite beyond allocated size for GPUs |
---|
1263 | 1387 | */ |
---|
1264 | 1388 | switch (flags) { |
---|
1265 | 1389 | case COMPUTE_UNIT_CPU: |
---|
1266 | | - pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL); |
---|
| 1390 | + num_nodes = num_online_nodes(); |
---|
| 1391 | + dyn_size = sizeof(struct crat_header) + |
---|
| 1392 | + num_nodes * (sizeof(struct crat_subtype_computeunit) + |
---|
| 1393 | + sizeof(struct crat_subtype_memory) + |
---|
| 1394 | + (num_nodes - 1) * sizeof(struct crat_subtype_iolink)); |
---|
| 1395 | + pcrat_image = kvmalloc(dyn_size, GFP_KERNEL); |
---|
1267 | 1396 | if (!pcrat_image) |
---|
1268 | 1397 | return -ENOMEM; |
---|
1269 | | - *size = VCRAT_SIZE_FOR_CPU; |
---|
| 1398 | + *size = dyn_size; |
---|
| 1399 | + pr_debug("CRAT size is %ld", dyn_size); |
---|
1270 | 1400 | ret = kfd_create_vcrat_image_cpu(pcrat_image, size); |
---|
1271 | 1401 | break; |
---|
1272 | 1402 | case COMPUTE_UNIT_GPU: |
---|
1273 | 1403 | if (!kdev) |
---|
1274 | 1404 | return -EINVAL; |
---|
1275 | | - pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); |
---|
| 1405 | + pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); |
---|
1276 | 1406 | if (!pcrat_image) |
---|
1277 | 1407 | return -ENOMEM; |
---|
1278 | 1408 | *size = VCRAT_SIZE_FOR_GPU; |
---|
.. | .. |
---|
1291 | 1421 | if (!ret) |
---|
1292 | 1422 | *crat_image = pcrat_image; |
---|
1293 | 1423 | else |
---|
1294 | | - kfree(pcrat_image); |
---|
| 1424 | + kvfree(pcrat_image); |
---|
1295 | 1425 | |
---|
1296 | 1426 | return ret; |
---|
1297 | 1427 | } |
---|
.. | .. |
---|
1304 | 1434 | */ |
---|
1305 | 1435 | void kfd_destroy_crat_image(void *crat_image) |
---|
1306 | 1436 | { |
---|
1307 | | - kfree(crat_image); |
---|
| 1437 | + kvfree(crat_image); |
---|
1308 | 1438 | } |
---|