hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/rknpu/rknpu_gem.c
....@@ -25,6 +25,7 @@
2525 #include "rknpu_drv.h"
2626 #include "rknpu_ioctl.h"
2727 #include "rknpu_gem.h"
28
+#include "rknpu_iommu.h"
2829
2930 #define RKNPU_GEM_ALLOC_FROM_PAGES 1
3031
....@@ -115,15 +116,14 @@
115116 rknpu_obj->kv_addr = NULL;
116117 }
117118
118
- dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents,
119
- DMA_BIDIRECTIONAL);
120
-
121
- drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, true, true);
122
-
123119 if (rknpu_obj->sgt != NULL) {
120
+ dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl,
121
+ rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL);
124122 sg_free_table(rknpu_obj->sgt);
125123 kfree(rknpu_obj->sgt);
126124 }
125
+
126
+ drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, true, true);
127127 }
128128 #endif
129129
....@@ -380,7 +380,8 @@
380380 kfree(rknpu_obj);
381381 }
382382
383
-static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj)
383
+static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj,
384
+ enum rknpu_cache_type cache_type)
384385 {
385386 struct drm_device *drm = rknpu_obj->base.dev;
386387 struct rknpu_device *rknpu_dev = drm->dev_private;
....@@ -393,8 +394,28 @@
393394 unsigned long offset = 0;
394395 int i = 0;
395396 int ret = -EINVAL;
397
+ phys_addr_t cache_start = 0;
398
+ unsigned long cache_offset = 0;
399
+ unsigned long cache_size = 0;
396400
397
- /* iova map to sram */
401
+ switch (cache_type) {
402
+ case RKNPU_CACHE_SRAM:
403
+ cache_start = rknpu_dev->sram_start;
404
+ cache_offset = rknpu_obj->sram_obj->range_start *
405
+ rknpu_dev->sram_mm->chunk_size;
406
+ cache_size = rknpu_obj->sram_size;
407
+ break;
408
+ case RKNPU_CACHE_NBUF:
409
+ cache_start = rknpu_dev->nbuf_start;
410
+ cache_offset = 0;
411
+ cache_size = rknpu_obj->nbuf_size;
412
+ break;
413
+ default:
414
+ LOG_ERROR("Unknown rknpu_cache_type: %d", cache_type);
415
+ return -EINVAL;
416
+ }
417
+
418
+ /* iova map to cache */
398419 domain = iommu_get_domain_for_dev(rknpu_dev->dev);
399420 if (!domain) {
400421 LOG_ERROR("failed to get iommu domain!");
....@@ -403,8 +424,7 @@
403424
404425 cookie = domain->iova_cookie;
405426 iovad = &cookie->iovad;
406
- rknpu_obj->iova_size =
407
- iova_align(iovad, rknpu_obj->sram_size + rknpu_obj->size);
427
+ rknpu_obj->iova_size = iova_align(iovad, cache_size + rknpu_obj->size);
408428 rknpu_obj->iova_start = rknpu_iommu_dma_alloc_iova(
409429 domain, rknpu_obj->iova_size, dma_get_mask(drm->dev), drm->dev);
410430 if (!rknpu_obj->iova_start) {
....@@ -416,20 +436,20 @@
416436 &rknpu_obj->iova_start, rknpu_obj->iova_size);
417437
418438 /*
419
- * Overview SRAM + DDR map to IOVA
439
+ * Overview cache + DDR map to IOVA
420440 * --------
421
- * sram_size: rknpu_obj->sram_size
422
- * - allocate from SRAM, this size value has been page-aligned
441
+ * cache_size:
442
+ * - allocate from CACHE, this size value has been page-aligned
423443 * size: rknpu_obj->size
424444 * - allocate from DDR pages, this size value has been page-aligned
425445 * iova_size: rknpu_obj->iova_size
426
- * - from iova_align(sram_size + size)
427
- * - it may be larger than the (sram_size + size), and the larger part is not mapped
446
+ * - from iova_align(cache_size + size)
447
+ * - it may be larger than the (cache_size + size), and the larger part is not mapped
428448 * --------
429449 *
430
- * |<- sram_size ->| |<- - - - size - - - ->|
450
+ * |<- cache_size ->| |<- - - - size - - - ->|
431451 * +---------------+ +----------------------+
432
- * | SRAM | | DDR |
452
+ * | CACHE | | DDR |
433453 * +---------------+ +----------------------+
434454 * | |
435455 * | V | V |
....@@ -439,20 +459,18 @@
439459 * |<- - - - - - - iova_size - - - - - - ->|
440460 *
441461 */
442
- offset = rknpu_obj->sram_obj->range_start *
443
- rknpu_dev->sram_mm->chunk_size;
444462 ret = iommu_map(domain, rknpu_obj->iova_start,
445
- rknpu_dev->sram_start + offset, rknpu_obj->sram_size,
463
+ cache_start + cache_offset, cache_size,
446464 IOMMU_READ | IOMMU_WRITE);
447465 if (ret) {
448
- LOG_ERROR("sram iommu_map error: %d\n", ret);
466
+ LOG_ERROR("cache iommu_map error: %d\n", ret);
449467 goto free_iova;
450468 }
451469
452470 rknpu_obj->dma_addr = rknpu_obj->iova_start;
453471
454472 if (rknpu_obj->size == 0) {
455
- LOG_INFO("allocate sram size: %lu\n", rknpu_obj->sram_size);
473
+ LOG_INFO("allocate cache size: %lu\n", cache_size);
456474 return 0;
457475 }
458476
....@@ -460,7 +478,7 @@
460478 if (IS_ERR(rknpu_obj->pages)) {
461479 ret = PTR_ERR(rknpu_obj->pages);
462480 LOG_ERROR("failed to get pages: %d\n", ret);
463
- goto sram_unmap;
481
+ goto cache_unmap;
464482 }
465483
466484 rknpu_obj->num_pages = rknpu_obj->size >> PAGE_SHIFT;
....@@ -479,7 +497,7 @@
479497 }
480498
481499 length = rknpu_obj->size;
482
- offset = rknpu_obj->iova_start + rknpu_obj->sram_size;
500
+ offset = rknpu_obj->iova_start + cache_size;
483501
484502 for_each_sg(rknpu_obj->sgt->sgl, s, rknpu_obj->sgt->nents, i) {
485503 size = (length < s->length) ? length : s->length;
....@@ -498,13 +516,13 @@
498516 break;
499517 }
500518
501
- LOG_INFO("allocate size: %lu with sram size: %lu\n", rknpu_obj->size,
502
- rknpu_obj->sram_size);
519
+ LOG_INFO("allocate size: %lu with cache size: %lu\n", rknpu_obj->size,
520
+ cache_size);
503521
504522 return 0;
505523
506524 sgl_unmap:
507
- iommu_unmap(domain, rknpu_obj->iova_start + rknpu_obj->sram_size,
525
+ iommu_unmap(domain, rknpu_obj->iova_start + cache_size,
508526 rknpu_obj->size - length);
509527 sg_free_table(rknpu_obj->sgt);
510528 kfree(rknpu_obj->sgt);
....@@ -512,8 +530,8 @@
512530 put_pages:
513531 drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, false, false);
514532
515
-sram_unmap:
516
- iommu_unmap(domain, rknpu_obj->iova_start, rknpu_obj->sram_size);
533
+cache_unmap:
534
+ iommu_unmap(domain, rknpu_obj->iova_start, cache_size);
517535
518536 free_iova:
519537 rknpu_iommu_dma_free_iova(domain->iova_cookie, rknpu_obj->iova_start,
....@@ -522,20 +540,31 @@
522540 return ret;
523541 }
524542
525
-static void rknpu_gem_free_buf_with_sram(struct rknpu_gem_object *rknpu_obj)
543
+static void rknpu_gem_free_buf_with_cache(struct rknpu_gem_object *rknpu_obj,
544
+ enum rknpu_cache_type cache_type)
526545 {
527546 struct drm_device *drm = rknpu_obj->base.dev;
528547 struct rknpu_device *rknpu_dev = drm->dev_private;
529548 struct iommu_domain *domain = NULL;
549
+ unsigned long cache_size = 0;
550
+
551
+ switch (cache_type) {
552
+ case RKNPU_CACHE_SRAM:
553
+ cache_size = rknpu_obj->sram_size;
554
+ break;
555
+ case RKNPU_CACHE_NBUF:
556
+ cache_size = rknpu_obj->nbuf_size;
557
+ break;
558
+ default:
559
+ LOG_ERROR("Unknown rknpu_cache_type: %d", cache_type);
560
+ return;
561
+ }
530562
531563 domain = iommu_get_domain_for_dev(rknpu_dev->dev);
532564 if (domain) {
533
- iommu_unmap(domain, rknpu_obj->iova_start,
534
- rknpu_obj->sram_size);
565
+ iommu_unmap(domain, rknpu_obj->iova_start, cache_size);
535566 if (rknpu_obj->size > 0)
536
- iommu_unmap(domain,
537
- rknpu_obj->iova_start +
538
- rknpu_obj->sram_size,
567
+ iommu_unmap(domain, rknpu_obj->iova_start + cache_size,
539568 rknpu_obj->size);
540569 rknpu_iommu_dma_free_iova(domain->iova_cookie,
541570 rknpu_obj->iova_start,
....@@ -618,9 +647,35 @@
618647 if (real_sram_size > 0) {
619648 rknpu_obj->sram_size = real_sram_size;
620649
621
- ret = rknpu_gem_alloc_buf_with_sram(rknpu_obj);
650
+ ret = rknpu_gem_alloc_buf_with_cache(rknpu_obj,
651
+ RKNPU_CACHE_SRAM);
622652 if (ret < 0)
623653 goto mm_free;
654
+ remain_ddr_size = 0;
655
+ }
656
+ } else if (IS_ENABLED(CONFIG_NO_GKI) &&
657
+ (flags & RKNPU_MEM_TRY_ALLOC_NBUF) &&
658
+ rknpu_dev->nbuf_size > 0) {
659
+ size_t nbuf_size = 0;
660
+
661
+ rknpu_obj = rknpu_gem_init(drm, remain_ddr_size);
662
+ if (IS_ERR(rknpu_obj))
663
+ return rknpu_obj;
664
+
665
+ nbuf_size = remain_ddr_size <= rknpu_dev->nbuf_size ?
666
+ remain_ddr_size :
667
+ rknpu_dev->nbuf_size;
668
+
669
+ /* set memory type and cache attribute from user side. */
670
+ rknpu_obj->flags = flags;
671
+
672
+ if (nbuf_size > 0) {
673
+ rknpu_obj->nbuf_size = nbuf_size;
674
+
675
+ ret = rknpu_gem_alloc_buf_with_cache(rknpu_obj,
676
+ RKNPU_CACHE_NBUF);
677
+ if (ret < 0)
678
+ goto gem_release;
624679 remain_ddr_size = 0;
625680 }
626681 }
....@@ -640,9 +695,11 @@
640695
641696 if (rknpu_obj)
642697 LOG_DEBUG(
643
- "created dma addr: %pad, cookie: %p, ddr size: %lu, sram size: %lu, attrs: %#lx, flags: %#x\n",
644
- &rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size,
645
- rknpu_obj->sram_size, rknpu_obj->dma_attrs, rknpu_obj->flags);
698
+ "created dma addr: %pad, cookie: %p, ddr size: %lu, sram size: %lu, nbuf size: %lu, attrs: %#lx, flags: %#x\n",
699
+ &rknpu_obj->dma_addr, rknpu_obj->cookie,
700
+ rknpu_obj->size, rknpu_obj->sram_size,
701
+ rknpu_obj->nbuf_size, rknpu_obj->dma_attrs,
702
+ rknpu_obj->flags);
646703
647704 return rknpu_obj;
648705
....@@ -683,7 +740,12 @@
683740 if (rknpu_obj->sram_obj != NULL)
684741 rknpu_mm_free(rknpu_dev->sram_mm,
685742 rknpu_obj->sram_obj);
686
- rknpu_gem_free_buf_with_sram(rknpu_obj);
743
+ rknpu_gem_free_buf_with_cache(rknpu_obj,
744
+ RKNPU_CACHE_SRAM);
745
+ } else if (IS_ENABLED(CONFIG_NO_GKI) &&
746
+ rknpu_obj->nbuf_size > 0) {
747
+ rknpu_gem_free_buf_with_cache(rknpu_obj,
748
+ RKNPU_CACHE_NBUF);
687749 } else {
688750 rknpu_gem_free_buf(rknpu_obj);
689751 }
....@@ -808,6 +870,75 @@
808870 }
809871 #endif
810872
873
+static int rknpu_gem_mmap_cache(struct rknpu_gem_object *rknpu_obj,
874
+ struct vm_area_struct *vma,
875
+ enum rknpu_cache_type cache_type)
876
+{
877
+ struct drm_device *drm = rknpu_obj->base.dev;
878
+#if RKNPU_GEM_ALLOC_FROM_PAGES
879
+ struct rknpu_device *rknpu_dev = drm->dev_private;
880
+#endif
881
+ unsigned long vm_size = 0;
882
+ int ret = -EINVAL;
883
+ unsigned long offset = 0;
884
+ unsigned long num_pages = 0;
885
+ int i = 0;
886
+ phys_addr_t cache_start = 0;
887
+ unsigned long cache_offset = 0;
888
+ unsigned long cache_size = 0;
889
+
890
+ switch (cache_type) {
891
+ case RKNPU_CACHE_SRAM:
892
+ cache_start = rknpu_dev->sram_start;
893
+ cache_offset = rknpu_obj->sram_obj->range_start *
894
+ rknpu_dev->sram_mm->chunk_size;
895
+ cache_size = rknpu_obj->sram_size;
896
+ break;
897
+ case RKNPU_CACHE_NBUF:
898
+ cache_start = rknpu_dev->nbuf_start;
899
+ cache_offset = 0;
900
+ cache_size = rknpu_obj->nbuf_size;
901
+ break;
902
+ default:
903
+ LOG_ERROR("Unknown rknpu_cache_type: %d", cache_type);
904
+ return -EINVAL;
905
+ }
906
+
907
+ vma->vm_flags |= VM_MIXEDMAP;
908
+
909
+ vm_size = vma->vm_end - vma->vm_start;
910
+
911
+ /*
912
+ * Convert a physical address in a cache area to a page frame number (PFN),
913
+ * and store the resulting PFN in the vm_pgoff field of the given VMA.
914
+ *
915
+ * NOTE: This conversion carries a risk because the resulting PFN is not a true
916
+ * page frame number and may not be valid or usable in all contexts.
917
+ */
918
+ vma->vm_pgoff = __phys_to_pfn(cache_start + cache_offset);
919
+
920
+ ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, cache_size,
921
+ vma->vm_page_prot);
922
+ if (ret)
923
+ return -EAGAIN;
924
+
925
+ if (rknpu_obj->size == 0)
926
+ return 0;
927
+
928
+ offset = cache_size;
929
+
930
+ num_pages = (vm_size - cache_size) / PAGE_SIZE;
931
+ for (i = 0; i < num_pages; ++i) {
932
+ ret = vm_insert_page(vma, vma->vm_start + offset,
933
+ rknpu_obj->pages[i]);
934
+ if (ret < 0)
935
+ return ret;
936
+ offset += PAGE_SIZE;
937
+ }
938
+
939
+ return 0;
940
+}
941
+
811942 static int rknpu_gem_mmap_buffer(struct rknpu_gem_object *rknpu_obj,
812943 struct vm_area_struct *vma)
813944 {
....@@ -832,38 +963,10 @@
832963 if (vm_size > rknpu_obj->size)
833964 return -EINVAL;
834965
835
- if (rknpu_obj->sram_size > 0) {
836
- unsigned long offset = 0;
837
- unsigned long num_pages = 0;
838
- int i = 0;
839
-
840
- vma->vm_flags |= VM_MIXEDMAP;
841
-
842
- offset = rknpu_obj->sram_obj->range_start *
843
- rknpu_dev->sram_mm->chunk_size;
844
- vma->vm_pgoff = __phys_to_pfn(rknpu_dev->sram_start + offset);
845
-
846
- ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
847
- rknpu_obj->sram_size, vma->vm_page_prot);
848
- if (ret)
849
- return -EAGAIN;
850
-
851
- if (rknpu_obj->size == 0)
852
- return 0;
853
-
854
- offset = rknpu_obj->sram_size;
855
-
856
- num_pages = (vm_size - rknpu_obj->sram_size) / PAGE_SIZE;
857
- for (i = 0; i < num_pages; ++i) {
858
- ret = vm_insert_page(vma, vma->vm_start + offset,
859
- rknpu_obj->pages[i]);
860
- if (ret < 0)
861
- return ret;
862
- offset += PAGE_SIZE;
863
- }
864
-
865
- return 0;
866
- }
966
+ if (rknpu_obj->sram_size > 0)
967
+ return rknpu_gem_mmap_cache(rknpu_obj, vma, RKNPU_CACHE_SRAM);
968
+ else if (rknpu_obj->nbuf_size > 0)
969
+ return rknpu_gem_mmap_cache(rknpu_obj, vma, RKNPU_CACHE_NBUF);
867970
868971 #if RKNPU_GEM_ALLOC_FROM_PAGES
869972 if ((rknpu_obj->flags & RKNPU_MEM_NON_CONTIGUOUS) &&
....@@ -1199,6 +1302,55 @@
11991302 return rknpu_gem_mmap_obj(obj, vma);
12001303 }
12011304
1305
+static int rknpu_cache_sync(struct rknpu_gem_object *rknpu_obj,
1306
+ unsigned long *length, unsigned long *offset,
1307
+ enum rknpu_cache_type cache_type)
1308
+{
1309
+ struct drm_gem_object *obj = &rknpu_obj->base;
1310
+ struct rknpu_device *rknpu_dev = obj->dev->dev_private;
1311
+ void __iomem *cache_base_io = NULL;
1312
+ unsigned long cache_offset = 0;
1313
+ unsigned long cache_size = 0;
1314
+
1315
+ switch (cache_type) {
1316
+ case RKNPU_CACHE_SRAM:
1317
+ cache_base_io = rknpu_dev->sram_base_io;
1318
+ cache_offset = rknpu_obj->sram_obj->range_start *
1319
+ rknpu_dev->sram_mm->chunk_size;
1320
+ cache_size = rknpu_obj->sram_size;
1321
+ break;
1322
+ case RKNPU_CACHE_NBUF:
1323
+ cache_base_io = rknpu_dev->nbuf_base_io;
1324
+ cache_offset = 0;
1325
+ cache_size = rknpu_obj->nbuf_size;
1326
+ break;
1327
+ default:
1328
+ LOG_ERROR("Unknown rknpu_cache_type: %d", cache_type);
1329
+ return -EINVAL;
1330
+ }
1331
+
1332
+ if ((*offset + *length) <= cache_size) {
1333
+ __dma_map_area(cache_base_io + *offset + cache_offset, *length,
1334
+ DMA_TO_DEVICE);
1335
+ __dma_unmap_area(cache_base_io + *offset + cache_offset,
1336
+ *length, DMA_FROM_DEVICE);
1337
+ *length = 0;
1338
+ *offset = 0;
1339
+ } else if (*offset >= cache_size) {
1340
+ *offset -= cache_size;
1341
+ } else {
1342
+ unsigned long cache_length = cache_size - *offset;
1343
+
1344
+ __dma_map_area(cache_base_io + *offset + cache_offset,
1345
+ cache_length, DMA_TO_DEVICE);
1346
+ __dma_unmap_area(cache_base_io + *offset + cache_offset,
1347
+ cache_length, DMA_FROM_DEVICE);
1348
+ *length -= cache_length;
1349
+ *offset = 0;
1350
+ }
1351
+ return 0;
1352
+}
1353
+
12021354 int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data,
12031355 struct drm_file *file_priv)
12041356 {
....@@ -1233,35 +1385,15 @@
12331385 length = args->size;
12341386 offset = args->offset;
12351387
1236
- if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_obj->sram_size > 0) {
1237
- struct drm_gem_object *obj = &rknpu_obj->base;
1238
- struct rknpu_device *rknpu_dev = obj->dev->dev_private;
1239
- unsigned long sram_offset =
1240
- rknpu_obj->sram_obj->range_start *
1241
- rknpu_dev->sram_mm->chunk_size;
1242
- if ((offset + length) <= rknpu_obj->sram_size) {
1243
- __dma_map_area(rknpu_dev->sram_base_io +
1244
- offset + sram_offset,
1245
- length, DMA_TO_DEVICE);
1246
- __dma_unmap_area(rknpu_dev->sram_base_io +
1247
- offset + sram_offset,
1248
- length, DMA_FROM_DEVICE);
1249
- length = 0;
1250
- offset = 0;
1251
- } else if (offset >= rknpu_obj->sram_size) {
1252
- offset -= rknpu_obj->sram_size;
1253
- } else {
1254
- unsigned long sram_length =
1255
- rknpu_obj->sram_size - offset;
1256
- __dma_map_area(rknpu_dev->sram_base_io +
1257
- offset + sram_offset,
1258
- sram_length, DMA_TO_DEVICE);
1259
- __dma_unmap_area(rknpu_dev->sram_base_io +
1260
- offset + sram_offset,
1261
- sram_length, DMA_FROM_DEVICE);
1262
- length -= sram_length;
1263
- offset = 0;
1264
- }
1388
+ if (IS_ENABLED(CONFIG_NO_GKI) &&
1389
+ IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) &&
1390
+ rknpu_obj->sram_size > 0) {
1391
+ rknpu_cache_sync(rknpu_obj, &length, &offset,
1392
+ RKNPU_CACHE_SRAM);
1393
+ } else if (IS_ENABLED(CONFIG_NO_GKI) &&
1394
+ rknpu_obj->nbuf_size > 0) {
1395
+ rknpu_cache_sync(rknpu_obj, &length, &offset,
1396
+ RKNPU_CACHE_NBUF);
12651397 }
12661398
12671399 for_each_sg(rknpu_obj->sgt->sgl, sg, rknpu_obj->sgt->nents,