// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ #include "rknpu_debugger.h" #include "rknpu_mm.h" int rknpu_mm_create(unsigned int mem_size, unsigned int chunk_size, struct rknpu_mm **mm) { unsigned int num_of_longs; int ret = -EINVAL; if (WARN_ON(mem_size < chunk_size)) return -EINVAL; if (WARN_ON(mem_size == 0)) return -EINVAL; if (WARN_ON(chunk_size == 0)) return -EINVAL; *mm = kzalloc(sizeof(struct rknpu_mm), GFP_KERNEL); if (!(*mm)) return -ENOMEM; (*mm)->chunk_size = chunk_size; (*mm)->total_chunks = mem_size / chunk_size; (*mm)->free_chunks = (*mm)->total_chunks; num_of_longs = ((*mm)->total_chunks + BITS_PER_LONG - 1) / BITS_PER_LONG; (*mm)->bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); if (!(*mm)->bitmap) { ret = -ENOMEM; goto free_mm; } mutex_init(&(*mm)->lock); LOG_DEBUG("total_chunks: %d, bitmap: %p\n", (*mm)->total_chunks, (*mm)->bitmap); return 0; free_mm: kfree(mm); return ret; } void rknpu_mm_destroy(struct rknpu_mm *mm) { if (mm != NULL) { mutex_destroy(&mm->lock); kfree(mm->bitmap); kfree(mm); } } int rknpu_mm_alloc(struct rknpu_mm *mm, unsigned int size, struct rknpu_mm_obj **mm_obj) { unsigned int found, start_search, cur_size; if (size == 0) return -EINVAL; if (size > mm->total_chunks * mm->chunk_size) return -ENOMEM; *mm_obj = kzalloc(sizeof(struct rknpu_mm_obj), GFP_KERNEL); if (!(*mm_obj)) return -ENOMEM; start_search = 0; mutex_lock(&mm->lock); mm_restart_search: /* Find the first chunk that is free */ found = find_next_zero_bit(mm->bitmap, mm->total_chunks, start_search); /* If there wasn't any free chunk, bail out */ if (found == mm->total_chunks) goto mm_no_free_chunk; /* Update fields of mm_obj */ (*mm_obj)->range_start = found; (*mm_obj)->range_end = found; /* If we need only one chunk, mark it as allocated and get out */ if (size <= mm->chunk_size) { set_bit(found, mm->bitmap); goto mm_out; } /* Otherwise, try to see if we have enough contiguous chunks */ cur_size = size - mm->chunk_size; do { (*mm_obj)->range_end = find_next_zero_bit( mm->bitmap, mm->total_chunks, ++found); /* * If next free chunk is not contiguous than we need to * restart our search from the last free chunk we found (which * wasn't contiguous to the previous ones */ if ((*mm_obj)->range_end != found) { start_search = found; goto mm_restart_search; } /* * If we reached end of buffer, bail out with error */ if (found == mm->total_chunks) goto mm_no_free_chunk; /* Check if we don't need another chunk */ if (cur_size <= mm->chunk_size) cur_size = 0; else cur_size -= mm->chunk_size; } while (cur_size > 0); /* Mark the chunks as allocated */ for (found = (*mm_obj)->range_start; found <= (*mm_obj)->range_end; found++) set_bit(found, mm->bitmap); mm_out: mm->free_chunks -= ((*mm_obj)->range_end - (*mm_obj)->range_start + 1); mutex_unlock(&mm->lock); LOG_DEBUG("mm allocate, mm_obj: %p, range_start: %d, range_end: %d\n", *mm_obj, (*mm_obj)->range_start, (*mm_obj)->range_end); return 0; mm_no_free_chunk: mutex_unlock(&mm->lock); kfree(*mm_obj); return -ENOMEM; } int rknpu_mm_free(struct rknpu_mm *mm, struct rknpu_mm_obj *mm_obj) { unsigned int bit; /* Act like kfree when trying to free a NULL object */ if (!mm_obj) return 0; LOG_DEBUG("mm free, mem_obj: %p, range_start: %d, range_end: %d\n", mm_obj, mm_obj->range_start, mm_obj->range_end); mutex_lock(&mm->lock); /* Mark the chunks as free */ for (bit = mm_obj->range_start; bit <= mm_obj->range_end; bit++) clear_bit(bit, mm->bitmap); mm->free_chunks += (mm_obj->range_end - mm_obj->range_start + 1); mutex_unlock(&mm->lock); kfree(mm_obj); return 0; } int rknpu_mm_dump(struct seq_file *m, void *data) { struct rknpu_debugger_node *node = m->private; struct rknpu_debugger *debugger = node->debugger; struct rknpu_device *rknpu_dev = container_of(debugger, struct rknpu_device, debugger); struct rknpu_mm *mm = NULL; int cur = 0, rbot = 0, rtop = 0; size_t ret = 0; char buf[64]; size_t size = sizeof(buf); int seg_chunks = 32, seg_id = 0; int free_size = 0; int i = 0; mm = rknpu_dev->sram_mm; if (mm == NULL) return 0; seq_printf(m, "SRAM bitmap: \"*\" - used, \".\" - free (1bit = %dKB)\n", mm->chunk_size / 1024); rbot = cur = find_first_bit(mm->bitmap, mm->total_chunks); for (i = 0; i < cur; ++i) { ret += scnprintf(buf + ret, size - ret, "."); if (ret >= seg_chunks) { seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); ret = 0; } } while (cur < mm->total_chunks) { rtop = cur; cur = find_next_bit(mm->bitmap, mm->total_chunks, cur + 1); if (cur < mm->total_chunks && cur <= rtop + 1) continue; for (i = rbot; i <= rtop; ++i) { ret += scnprintf(buf + ret, size - ret, "*"); if (ret >= seg_chunks) { seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); ret = 0; } } for (i = rtop + 1; i < cur; ++i) { ret += scnprintf(buf + ret, size - ret, "."); if (ret >= seg_chunks) { seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); ret = 0; } } rbot = cur; } if (ret > 0) seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); free_size = mm->free_chunks * mm->chunk_size; seq_printf(m, "SRAM total size: %d, used: %d, free: %d\n", rknpu_dev->sram_size, rknpu_dev->sram_size - free_size, free_size); return 0; } dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, u64 dma_limit, struct device *dev) { struct rknpu_iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; unsigned long shift, iova_len, iova = 0; #if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) dma_addr_t limit; #endif shift = iova_shift(iovad); iova_len = size >> shift; /* * Freeing non-power-of-two-sized allocations back into the IOVA caches * will come back to bite us badly, so we have to waste a bit of space * rounding up anything cacheable to make sure that can't happen. The * order of the unadjusted size will still match upon freeing. */ if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) iova_len = roundup_pow_of_two(iova_len); #if (KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE) dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); #else if (dev->bus_dma_mask) dma_limit &= dev->bus_dma_mask; #endif if (domain->geometry.force_aperture) dma_limit = min_t(u64, dma_limit, domain->geometry.aperture_end); #if (KERNEL_VERSION(5, 4, 0) <= LINUX_VERSION_CODE) iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); #else limit = min_t(dma_addr_t, dma_limit >> shift, iovad->end_pfn); iova = alloc_iova_fast(iovad, iova_len, limit, true); #endif return (dma_addr_t)iova << shift; } void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, dma_addr_t iova, size_t size) { struct iova_domain *iovad = &cookie->iovad; free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); }