From 04dd17822334871b23ea2862f7798fb0e0007777 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Sat, 11 May 2024 08:53:19 +0000 Subject: [PATCH] change otg to host mode --- kernel/arch/powerpc/perf/imc-pmu.c | 633 +++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 files changed, 543 insertions(+), 90 deletions(-) diff --git a/kernel/arch/powerpc/perf/imc-pmu.c b/kernel/arch/powerpc/perf/imc-pmu.c index 5553226..b773c41 100644 --- a/kernel/arch/powerpc/perf/imc-pmu.c +++ b/kernel/arch/powerpc/perf/imc-pmu.c @@ -1,14 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * In-Memory Collection (IMC) Performance Monitor counter support. * * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation. * (C) 2017 Anju T Sudhakar, IBM Corporation. * (C) 2017 Hemant K Shaw, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or later version. */ #include <linux/perf_event.h> #include <linux/slab.h> @@ -17,6 +13,7 @@ #include <asm/cputhreads.h> #include <asm/smp.h> #include <linux/string.h> +#include <linux/spinlock.h> /* Nest IMC data structures and variables */ @@ -28,13 +25,13 @@ static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); static struct imc_pmu **per_nest_pmu_arr; static cpumask_t nest_imc_cpumask; -struct imc_pmu_ref *nest_imc_refc; +static struct imc_pmu_ref *nest_imc_refc; static int nest_pmus; /* Core IMC data structures and variables */ static cpumask_t core_imc_cpumask; -struct imc_pmu_ref *core_imc_refc; +static struct imc_pmu_ref *core_imc_refc; static struct imc_pmu *core_imc_pmu; /* Thread IMC data structures and variables */ @@ -43,12 +40,27 @@ static struct imc_pmu *thread_imc_pmu; static int thread_imc_mem_size; -struct imc_pmu *imc_event_to_pmu(struct perf_event *event) +/* Trace IMC data structures */ +static DEFINE_PER_CPU(u64 *, trace_imc_mem); +static struct imc_pmu_ref *trace_imc_refc; +static int trace_imc_mem_size; + +/* + * Global data structure used to avoid races between thread, + * core and trace-imc + */ +static struct imc_pmu_ref imc_global_refc = { + .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock), + .id = 0, + .refc = 0, +}; + +static struct imc_pmu *imc_event_to_pmu(struct perf_event *event) { return container_of(event->pmu, struct imc_pmu, pmu); } -PMU_FORMAT_ATTR(event, "config:0-40"); +PMU_FORMAT_ATTR(event, "config:0-61"); PMU_FORMAT_ATTR(offset, "config:0-31"); PMU_FORMAT_ATTR(rvalue, "config:32"); PMU_FORMAT_ATTR(mode, "config:33-40"); @@ -63,6 +75,25 @@ static struct attribute_group imc_format_group = { .name = "format", .attrs = imc_format_attrs, +}; + +/* Format attribute for imc trace-mode */ +PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19"); +PMU_FORMAT_ATTR(cpmc_event, "config:20-27"); +PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29"); +PMU_FORMAT_ATTR(cpmc_load, "config:30-61"); +static struct attribute *trace_imc_format_attrs[] = { + &format_attr_event.attr, + &format_attr_cpmc_reserved.attr, + &format_attr_cpmc_event.attr, + &format_attr_cpmc_samplesel.attr, + &format_attr_cpmc_load.attr, + NULL, +}; + +static struct attribute_group trace_imc_format_group = { +.name = "format", +.attrs = trace_imc_format_attrs, }; /* Get the cpumask printed to a buffer "buf" */ @@ -342,7 +373,14 @@ */ nid = cpu_to_node(cpu); l_cpumask = cpumask_of_node(nid); - target = cpumask_any_but(l_cpumask, cpu); + target = cpumask_last(l_cpumask); + + /* + * If this(target) is the last cpu in the cpumask for this chip, + * check for any possible online cpu in the chip. + */ + if (unlikely(target == cpu)) + target = cpumask_any_but(l_cpumask, cpu); /* * Update the cpumask with the target cpu and @@ -356,7 +394,7 @@ get_hard_smp_processor_id(cpu)); /* * If this is the last cpu in this chip then, skip the reference - * count mutex lock and make the reference count on this chip zero. + * count lock and make the reference count on this chip zero. */ ref = get_nest_pmu_ref(cpu); if (!ref) @@ -418,15 +456,15 @@ /* * See if we need to disable the nest PMU. * If no events are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing + * lock to ensure that we don't race with another task doing * enable or disable the nest counters. */ ref = get_nest_pmu_ref(event->cpu); if (!ref) return; - /* Take the mutex lock for this node and then decrement the reference count */ - mutex_lock(&ref->lock); + /* Take the lock for this node and then decrement the reference count */ + spin_lock(&ref->lock); if (ref->refc == 0) { /* * The scenario where this is true is, when perf session is @@ -438,7 +476,7 @@ * an OPAL call to disable the engine in that node. * */ - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return; } ref->refc--; @@ -446,7 +484,7 @@ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id); return; } @@ -454,7 +492,7 @@ WARN(1, "nest-imc: Invalid event reference count\n"); ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); } static int nest_imc_event_init(struct perf_event *event) @@ -471,15 +509,6 @@ /* Sampling not supported */ if (event->hw.sample_period) - return -EINVAL; - - /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) return -EINVAL; if (event->cpu < 0) @@ -522,26 +551,25 @@ /* * Get the imc_pmu_ref struct for this node. - * Take the mutex lock and then increment the count of nest pmu events - * inited. + * Take the lock and then increment the count of nest pmu events inited. */ ref = get_nest_pmu_ref(event->cpu); if (!ref) return -EINVAL; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("nest-imc: Unable to start the counters for node %d\n", node_id); return rc; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); event->destroy = nest_imc_counters_release; return 0; @@ -559,6 +587,7 @@ { int nid, rc = 0, core_id = (cpu / threads_per_core); struct imc_mem_info *mem_info; + struct page *page; /* * alloc_pages_node() will allocate memory for core in the @@ -569,15 +598,15 @@ mem_info->id = core_id; /* We need only vbase for core counters */ - mem_info->vbase = page_address(alloc_pages_node(nid, - GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | - __GFP_NOWARN, get_order(size))); - if (!mem_info->vbase) + page = alloc_pages_node(nid, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | + __GFP_NOWARN, get_order(size)); + if (!page) return -ENOMEM; + mem_info->vbase = page_address(page); - /* Init the mutex */ core_imc_refc[core_id].id = core_id; - mutex_init(&core_imc_refc[core_id].lock); + spin_lock_init(&core_imc_refc[core_id].lock); rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE, __pa((void *)mem_info->vbase), @@ -656,16 +685,18 @@ return 0; /* Find any online cpu in that core except the current "cpu" */ - ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu); + ncpu = cpumask_last(cpu_sibling_mask(cpu)); + + if (unlikely(ncpu == cpu)) + ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu); if (ncpu >= 0 && ncpu < nr_cpu_ids) { cpumask_set_cpu(ncpu, &core_imc_cpumask); perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu); } else { /* - * If this is the last cpu in this core then, skip taking refernce - * count mutex lock for this core and directly zero "refc" for - * this core. + * If this is the last cpu in this core then skip taking reference + * count lock for this core and directly zero "refc" for this core. */ opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(cpu)); @@ -675,6 +706,16 @@ return -EINVAL; ref->refc = 0; + /* + * Reduce the global reference count, if this is the + * last cpu in this core and core-imc event running + * in this cpu. + */ + spin_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_CORE) + imc_global_refc.refc--; + + spin_unlock(&imc_global_refc.lock); } return 0; } @@ -687,6 +728,23 @@ ppc_core_imc_cpu_offline); } +static void reset_global_refc(struct perf_event *event) +{ + spin_lock(&imc_global_refc.lock); + imc_global_refc.refc--; + + /* + * If no other thread is running any + * event for this domain(thread/core/trace), + * set the global id to zero. + */ + if (imc_global_refc.refc <= 0) { + imc_global_refc.refc = 0; + imc_global_refc.id = 0; + } + spin_unlock(&imc_global_refc.lock); +} + static void core_imc_counters_release(struct perf_event *event) { int rc, core_id; @@ -697,17 +755,17 @@ /* * See if we need to disable the IMC PMU. * If no events are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing + * lock to ensure that we don't race with another task doing * enable or disable the core counters. */ core_id = event->cpu / threads_per_core; - /* Take the mutex lock and decrement the refernce count for this core */ + /* Take the lock and decrement the refernce count for this core */ ref = &core_imc_refc[core_id]; if (!ref) return; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { /* * The scenario where this is true is, when perf session is @@ -719,7 +777,7 @@ * an OPAL call to disable the engine in that core. * */ - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return; } ref->refc--; @@ -727,7 +785,7 @@ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("IMC: Unable to stop the counters for core %d\n", core_id); return; } @@ -735,7 +793,9 @@ WARN(1, "core-imc: Invalid event reference count\n"); ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); + + reset_global_refc(event); } static int core_imc_event_init(struct perf_event *event) @@ -751,15 +811,6 @@ /* Sampling not supported */ if (event->hw.sample_period) - return -EINVAL; - - /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) return -EINVAL; if (event->cpu < 0) @@ -780,7 +831,6 @@ if ((!pcmi->vbase)) return -ENODEV; - /* Get the core_imc mutex for this core */ ref = &core_imc_refc[core_id]; if (!ref) return -EINVAL; @@ -788,22 +838,45 @@ /* * Core pmu units are enabled only when it is used. * See if this is triggered for the first time. - * If yes, take the mutex lock and enable the core counters. + * If yes, take the lock and enable the core counters. * If not, just increment the count in core_imc_refc struct. */ - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("core-imc: Unable to start the counters for core %d\n", core_id); return rc; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); + + /* + * Since the system can run either in accumulation or trace-mode + * of IMC at a time, core-imc events are allowed only if no other + * trace/thread imc events are enabled/monitored. + * + * Take the global lock, and check the refc.id + * to know whether any other trace/thread imc + * events are running. + */ + spin_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) { + /* + * No other trace/thread imc events are running in + * the system, so set the refc.id to core-imc. + */ + imc_global_refc.id = IMC_DOMAIN_CORE; + imc_global_refc.refc++; + } else { + spin_unlock(&imc_global_refc.lock); + return -EBUSY; + } + spin_unlock(&imc_global_refc.lock); event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); event->destroy = core_imc_counters_release; @@ -811,8 +884,11 @@ } /* - * Allocates a page of memory for each of the online cpus, and write the - * physical base address of that page to the LDBAR for that cpu. + * Allocates a page of memory for each of the online cpus, and load + * LDBAR with 0. + * The physical base address of the page allocated for a cpu will be + * written to the LDBAR for that cpu, when the thread-imc event + * is added. * * LDBAR Register Layout: * @@ -830,26 +906,26 @@ */ static int thread_imc_mem_alloc(int cpu_id, int size) { - u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id); + u64 *local_mem = per_cpu(thread_imc_mem, cpu_id); int nid = cpu_to_node(cpu_id); if (!local_mem) { + struct page *page; /* * This case could happen only once at start, since we dont * free the memory in cpu offline path. */ - local_mem = page_address(alloc_pages_node(nid, + page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | - __GFP_NOWARN, get_order(size))); - if (!local_mem) + __GFP_NOWARN, get_order(size)); + if (!page) return -ENOMEM; + local_mem = page_address(page); per_cpu(thread_imc_mem, cpu_id) = local_mem; } - ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE; - - mtspr(SPRN_LDBAR, ldbar_value); + mtspr(SPRN_LDBAR, 0); return 0; } @@ -860,7 +936,23 @@ static int ppc_thread_imc_cpu_offline(unsigned int cpu) { - mtspr(SPRN_LDBAR, 0); + /* + * Set the bit 0 of LDBAR to zero. + * + * If bit 0 of LDBAR is unset, it will stop posting + * the counter data to memory. + * For thread-imc, bit 0 of LDBAR will be set to 1 in the + * event_add function. So reset this bit here, to stop the updates + * to memory in the cpu_offline path. + */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); + + /* Reduce the refc if thread-imc event running on this cpu */ + spin_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_THREAD) + imc_global_refc.refc--; + spin_unlock(&imc_global_refc.lock); + return 0; } @@ -881,6 +973,9 @@ if (event->attr.type != event->pmu->type) return -ENOENT; + if (!perfmon_capable()) + return -EACCES; + /* Sampling not supported */ if (event->hw.sample_period) return -EINVAL; @@ -896,7 +991,22 @@ if (!target) return -EINVAL; + spin_lock(&imc_global_refc.lock); + /* + * Check if any other trace/core imc events are running in the + * system, if not set the global id to thread-imc. + */ + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) { + imc_global_refc.id = IMC_DOMAIN_THREAD; + imc_global_refc.refc++; + } else { + spin_unlock(&imc_global_refc.lock); + return -EBUSY; + } + spin_unlock(&imc_global_refc.lock); + event->pmu->task_ctx_nr = perf_sw_context; + event->destroy = reset_global_refc; return 0; } @@ -1000,6 +1110,7 @@ { int core_id; struct imc_pmu_ref *ref; + u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id()); if (flags & PERF_EF_START) imc_event_start(event, flags); @@ -1008,28 +1119,31 @@ return -EINVAL; core_id = smp_processor_id() / threads_per_core; + ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE; + mtspr(SPRN_LDBAR, ldbar_value); + /* * imc pmus are enabled only when it is used. * See if this is triggered for the first time. - * If yes, take the mutex lock and enable the counters. + * If yes, take the lock and enable the counters. * If not, just increment the count in ref count struct. */ ref = &core_imc_refc[core_id]; if (!ref) return -EINVAL; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("thread-imc: Unable to start the counter\ for core %d\n", core_id); return -EINVAL; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return 0; } @@ -1039,21 +1153,19 @@ int core_id; struct imc_pmu_ref *ref; - /* - * Take a snapshot and calculate the delta and update - * the event counter values. - */ - imc_event_update(event); - core_id = smp_processor_id() / threads_per_core; ref = &core_imc_refc[core_id]; + if (!ref) { + pr_debug("imc: Failed to get event reference count\n"); + return; + } - mutex_lock(&ref->lock); + spin_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("thread-imc: Unable to stop the counters\ for core %d\n", core_id); return; @@ -1061,7 +1173,293 @@ } else if (ref->refc < 0) { ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); + + /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); + + /* + * Take a snapshot and calculate the delta and update + * the event counter values. + */ + imc_event_update(event); +} + +/* + * Allocate a page of memory for each cpu, and load LDBAR with 0. + */ +static int trace_imc_mem_alloc(int cpu_id, int size) +{ + u64 *local_mem = per_cpu(trace_imc_mem, cpu_id); + int phys_id = cpu_to_node(cpu_id), rc = 0; + int core_id = (cpu_id / threads_per_core); + + if (!local_mem) { + struct page *page; + + page = alloc_pages_node(phys_id, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | + __GFP_NOWARN, get_order(size)); + if (!page) + return -ENOMEM; + local_mem = page_address(page); + per_cpu(trace_imc_mem, cpu_id) = local_mem; + + /* Initialise the counters for trace mode */ + rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem), + get_hard_smp_processor_id(cpu_id)); + if (rc) { + pr_info("IMC:opal init failed for trace imc\n"); + return rc; + } + } + + trace_imc_refc[core_id].id = core_id; + spin_lock_init(&trace_imc_refc[core_id].lock); + + mtspr(SPRN_LDBAR, 0); + return 0; +} + +static int ppc_trace_imc_cpu_online(unsigned int cpu) +{ + return trace_imc_mem_alloc(cpu, trace_imc_mem_size); +} + +static int ppc_trace_imc_cpu_offline(unsigned int cpu) +{ + /* + * No need to set bit 0 of LDBAR to zero, as + * it is set to zero for imc trace-mode + * + * Reduce the refc if any trace-imc event running + * on this cpu. + */ + spin_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_TRACE) + imc_global_refc.refc--; + spin_unlock(&imc_global_refc.lock); + + return 0; +} + +static int trace_imc_cpu_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE, + "perf/powerpc/imc_trace:online", + ppc_trace_imc_cpu_online, + ppc_trace_imc_cpu_offline); +} + +static u64 get_trace_imc_event_base_addr(void) +{ + return (u64)per_cpu(trace_imc_mem, smp_processor_id()); +} + +/* + * Function to parse trace-imc data obtained + * and to prepare the perf sample. + */ +static int trace_imc_prepare_sample(struct trace_imc_data *mem, + struct perf_sample_data *data, + u64 *prev_tb, + struct perf_event_header *header, + struct perf_event *event) +{ + /* Sanity checks for a valid record */ + if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb) + *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1)); + else + return -EINVAL; + + if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) != + be64_to_cpu(READ_ONCE(mem->tb2))) + return -EINVAL; + + /* Prepare perf sample */ + data->ip = be64_to_cpu(READ_ONCE(mem->ip)); + data->period = event->hw.last_period; + + header->type = PERF_RECORD_SAMPLE; + header->size = sizeof(*header) + event->header_size; + header->misc = 0; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) { + case 0:/* when MSR HV and PR not set in the trace-record */ + header->misc |= PERF_RECORD_MISC_GUEST_KERNEL; + break; + case 1: /* MSR HV is 0 and PR is 1 */ + header->misc |= PERF_RECORD_MISC_GUEST_USER; + break; + case 2: /* MSR HV is 1 and PR is 0 */ + header->misc |= PERF_RECORD_MISC_KERNEL; + break; + case 3: /* MSR HV is 1 and PR is 1 */ + header->misc |= PERF_RECORD_MISC_USER; + break; + default: + pr_info("IMC: Unable to set the flag based on MSR bits\n"); + break; + } + } else { + if (is_kernel_addr(data->ip)) + header->misc |= PERF_RECORD_MISC_KERNEL; + else + header->misc |= PERF_RECORD_MISC_USER; + } + perf_event_header__init_id(header, data, event); + + return 0; +} + +static void dump_trace_imc_data(struct perf_event *event) +{ + struct trace_imc_data *mem; + int i, ret; + u64 prev_tb = 0; + + mem = (struct trace_imc_data *)get_trace_imc_event_base_addr(); + for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data)); + i++, mem++) { + struct perf_sample_data data; + struct perf_event_header header; + + ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event); + if (ret) /* Exit, if not a valid record */ + break; + else { + /* If this is a valid record, create the sample */ + struct perf_output_handle handle; + + if (perf_output_begin(&handle, &data, event, header.size)) + return; + + perf_output_sample(&handle, &header, &data, event); + perf_output_end(&handle); + } + } +} + +static int trace_imc_event_add(struct perf_event *event, int flags) +{ + int core_id = smp_processor_id() / threads_per_core; + struct imc_pmu_ref *ref = NULL; + u64 local_mem, ldbar_value; + + /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */ + local_mem = get_trace_imc_event_base_addr(); + ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE; + + /* trace-imc reference count */ + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; + if (!ref) { + pr_debug("imc: Failed to get the event reference count\n"); + return -EINVAL; + } + + mtspr(SPRN_LDBAR, ldbar_value); + spin_lock(&ref->lock); + if (ref->refc == 0) { + if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE, + get_hard_smp_processor_id(smp_processor_id()))) { + spin_unlock(&ref->lock); + pr_err("trace-imc: Unable to start the counters for core %d\n", core_id); + return -EINVAL; + } + } + ++ref->refc; + spin_unlock(&ref->lock); + return 0; +} + +static void trace_imc_event_read(struct perf_event *event) +{ + return; +} + +static void trace_imc_event_stop(struct perf_event *event, int flags) +{ + u64 local_mem = get_trace_imc_event_base_addr(); + dump_trace_imc_data(event); + memset((void *)local_mem, 0, sizeof(u64)); +} + +static void trace_imc_event_start(struct perf_event *event, int flags) +{ + return; +} + +static void trace_imc_event_del(struct perf_event *event, int flags) +{ + int core_id = smp_processor_id() / threads_per_core; + struct imc_pmu_ref *ref = NULL; + + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; + if (!ref) { + pr_debug("imc: Failed to get event reference count\n"); + return; + } + + spin_lock(&ref->lock); + ref->refc--; + if (ref->refc == 0) { + if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE, + get_hard_smp_processor_id(smp_processor_id()))) { + spin_unlock(&ref->lock); + pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id); + return; + } + } else if (ref->refc < 0) { + ref->refc = 0; + } + spin_unlock(&ref->lock); + + trace_imc_event_stop(event, flags); +} + +static int trace_imc_event_init(struct perf_event *event) +{ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (!perfmon_capable()) + return -EACCES; + + /* Return if this is a couting event */ + if (event->attr.sample_period == 0) + return -ENOENT; + + /* + * Take the global lock, and make sure + * no other thread is running any core/thread imc + * events + */ + spin_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) { + /* + * No core/thread imc events are running in the + * system, so set the refc.id to trace-imc. + */ + imc_global_refc.id = IMC_DOMAIN_TRACE; + imc_global_refc.refc++; + } else { + spin_unlock(&imc_global_refc.lock); + return -EBUSY; + } + spin_unlock(&imc_global_refc.lock); + + event->hw.idx = -1; + + /* + * There can only be a single PMU for perf_hw_context events which is assigned to + * core PMU. Hence use "perf_sw_context" for trace_imc. + */ + event->pmu->task_ctx_nr = perf_sw_context; + event->destroy = reset_global_refc; + return 0; } /* update_pmu_ops : Populate the appropriate operations for "pmu" */ @@ -1074,6 +1472,7 @@ pmu->pmu.stop = imc_event_stop; pmu->pmu.read = imc_event_update; pmu->pmu.attr_groups = pmu->attr_groups; + pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group; switch (pmu->domain) { @@ -1093,6 +1492,14 @@ pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn; pmu->pmu.commit_txn = thread_imc_pmu_commit_txn; break; + case IMC_DOMAIN_TRACE: + pmu->pmu.event_init = trace_imc_event_init; + pmu->pmu.add = trace_imc_event_add; + pmu->pmu.del = trace_imc_event_del; + pmu->pmu.start = trace_imc_event_start; + pmu->pmu.stop = trace_imc_event_stop; + pmu->pmu.read = trace_imc_event_read; + pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group; default: break; } @@ -1114,10 +1521,10 @@ i = 0; for_each_node(nid) { /* - * Mutex lock to avoid races while tracking the number of + * Take the lock to avoid races while tracking the number of * sessions using the chip's nest pmu units. */ - mutex_init(&nest_imc_refc[i].lock); + spin_lock_init(&nest_imc_refc[i].lock); /* * Loop to init the "id" with the node_id. Variable "i" initialized to @@ -1163,10 +1570,10 @@ static void thread_imc_ldbar_disable(void *dummy) { /* - * By Zeroing LDBAR, we disable thread-imc - * updates. + * By setting 0th bit of LDBAR to zero, we disable thread-imc + * updates to memory. */ - mtspr(SPRN_LDBAR, 0); + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); } void thread_imc_disable(void) @@ -1183,6 +1590,18 @@ free_pages((u64)per_cpu(thread_imc_mem, i), order); } +} + +static void cleanup_all_trace_imc_memory(void) +{ + int i, order = get_order(trace_imc_mem_size); + + for_each_online_cpu(i) { + if (per_cpu(trace_imc_mem, i)) + free_pages((u64)per_cpu(trace_imc_mem, i), order); + + } + kfree(trace_imc_refc); } /* Function to free the attr_groups which are dynamically allocated */ @@ -1225,6 +1644,11 @@ if (pmu_ptr->domain == IMC_DOMAIN_THREAD) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE); cleanup_all_thread_imc_memory(); + } + + if (pmu_ptr->domain == IMC_DOMAIN_TRACE) { + cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE); + cleanup_all_trace_imc_memory(); } } @@ -1308,6 +1732,27 @@ thread_imc_pmu = pmu_ptr; break; + case IMC_DOMAIN_TRACE: + /* Update the pmu name */ + pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc"); + if (!pmu_ptr->pmu.name) + return -ENOMEM; + + nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core); + trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref), + GFP_KERNEL); + if (!trace_imc_refc) + return -ENOMEM; + + trace_imc_mem_size = pmu_ptr->counter_mem_size; + for_each_online_cpu(cpu) { + res = trace_imc_mem_alloc(cpu, trace_imc_mem_size); + if (res) { + cleanup_all_trace_imc_memory(); + goto err; + } + } + break; default: return -EINVAL; } @@ -1381,6 +1826,14 @@ } break; + case IMC_DOMAIN_TRACE: + ret = trace_imc_cpu_init(); + if (ret) { + cleanup_all_trace_imc_memory(); + goto err_free_mem; + } + + break; default: return -EINVAL; /* Unknown domain */ } @@ -1397,7 +1850,7 @@ if (ret) goto err_free_cpuhp_mem; - pr_info("%s performance monitor hardware support registered\n", + pr_debug("%s performance monitor hardware support registered\n", pmu_ptr->pmu.name); return 0; -- Gitblit v1.6.2