From 10ebd8556b7990499c896a550e3d416b444211e6 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 10 May 2024 02:23:07 +0000 Subject: [PATCH] add led --- kernel/arch/s390/kernel/perf_cpum_sf.c | 534 ++++++++++++++++++++++++++++++++++++++--------------------- 1 files changed, 344 insertions(+), 190 deletions(-) diff --git a/kernel/arch/s390/kernel/perf_cpum_sf.c b/kernel/arch/s390/kernel/perf_cpum_sf.c index c8e1e32..bcd31e0 100644 --- a/kernel/arch/s390/kernel/perf_cpum_sf.c +++ b/kernel/arch/s390/kernel/perf_cpum_sf.c @@ -156,21 +156,22 @@ } } - debug_sprintf_event(sfdbg, 5, - "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); + debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__, + (unsigned long)sfb->sdbt); memset(sfb, 0, sizeof(*sfb)); } static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) { - unsigned long sdb, *trailer; + struct hws_trailer_entry *te; + unsigned long sdb; /* Allocate and initialize sample-data-block */ sdb = get_zeroed_page(gfp_flags); if (!sdb) return -ENOMEM; - trailer = trailer_entry_ptr(sdb); - *trailer = SDB_TE_ALERT_REQ_MASK; + te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + te->header.a = 1; /* Link SDB into the sample-data-block-table */ *sdbt = sdb; @@ -212,10 +213,11 @@ * the sampling buffer origin. */ if (sfb->sdbt != get_next_sdbt(tail)) { - debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " - "sampling buffer is not linked: origin=%p" - "tail=%p\n", - (void *) sfb->sdbt, (void *) tail); + debug_sprintf_event(sfdbg, 3, "%s: " + "sampling buffer is not linked: origin %#lx" + " tail %#lx\n", __func__, + (unsigned long)sfb->sdbt, + (unsigned long)tail); return -EINVAL; } @@ -264,8 +266,8 @@ *tail = (unsigned long) sfb->sdbt + 1; sfb->tail = tail; - debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" - " settings: sdbt=%lu sdb=%lu\n", + debug_sprintf_event(sfdbg, 4, "%s: new buffer" + " settings: sdbt %lu sdb %lu\n", __func__, sfb->num_sdbt, sfb->num_sdb); return rc; } @@ -305,12 +307,13 @@ rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); if (rc) { free_sampling_buffer(sfb); - debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " - "realloc_sampling_buffer failed with rc=%i\n", rc); + debug_sprintf_event(sfdbg, 4, "%s: " + "realloc_sampling_buffer failed with rc %i\n", + __func__, rc); } else debug_sprintf_event(sfdbg, 4, - "alloc_sampling_buffer: tear=%p dear=%p\n", - sfb->sdbt, (void *) *sfb->sdbt); + "%s: tear %#lx dear %#lx\n", __func__, + (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt); return rc; } @@ -370,28 +373,33 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) { - unsigned long n_sdb, freq, factor; + unsigned long n_sdb, freq; size_t sample_size; /* Calculate sampling buffers using 4K pages * - * 1. Determine the sample data size which depends on the used - * sampling functions, for example, basic-sampling or - * basic-sampling with diagnostic-sampling. + * 1. The sampling size is 32 bytes for basic sampling. This size + * is the same for all machine types. Diagnostic + * sampling uses auxlilary data buffer setup which provides the + * memory for SDBs using linux common code auxiliary trace + * setup. * - * 2. Use the sampling frequency as input. The sampling buffer is - * designed for almost one second. This can be adjusted through - * the "factor" variable. - * In any case, alloc_sampling_buffer() sets the Alert Request + * 2. Function alloc_sampling_buffer() sets the Alert Request * Control indicator to trigger a measurement-alert to harvest - * sample-data-blocks (sdb). + * sample-data-blocks (SDB). This is done per SDB. This + * measurement alert interrupt fires quick enough to handle + * one SDB, on very high frequency and work loads there might + * be 2 to 3 SBDs available for sample processing. + * Currently there is no need for setup alert request on every + * n-th page. This is counterproductive as one IRQ triggers + * a very high number of samples to be processed at one IRQ. * - * 3. Compute the number of sample-data-blocks and ensure a minimum - * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not - * exceed a "calculated" maximum. The symbolic maximum is - * designed for basic-sampling only and needs to be increased if - * diagnostic-sampling is active. - * See also the remarks for these symbolic constants. + * 3. Use the sampling frequency as input. + * Compute the number of SDBs and ensure a minimum + * of CPUM_SF_MIN_SDB. Depending on frequency add some more + * SDBs to handle a higher sampling rate. + * Use a minimum of CPUM_SF_MIN_SDB and allow for 100 samples + * (one SDB) for every 10000 HZ frequency increment. * * 4. Compute the number of sample-data-block-tables (SDBT) and * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up @@ -399,10 +407,7 @@ */ sample_size = sizeof(struct hws_basic_entry); freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); - factor = 1; - n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); - if (n_sdb < CPUM_SF_MIN_SDB) - n_sdb = CPUM_SF_MIN_SDB; + n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000); /* If there is already a sampling buffer allocated, it is very likely * that the sampling facility is enabled too. If the event to be @@ -417,8 +422,8 @@ return 0; debug_sprintf_event(sfdbg, 3, - "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" - " sample_size=%lu cpuhw=%p\n", + "%s: rate %lu f %lu sdb %lu/%lu" + " sample_size %lu cpuhw %p\n", __func__, SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), sample_size, cpuhw); @@ -478,8 +483,8 @@ if (num) sfb_account_allocs(num, hwc); - debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" - " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); + debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n", + __func__, OVERFLOW_REG(hwc), ratio, num); OVERFLOW_REG(hwc) = 0; } @@ -517,16 +522,15 @@ */ rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); if (rc) - debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " - "failed with rc=%i\n", rc); + debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n", + __func__, rc); if (sfb_has_pending_allocs(sfb, hwc)) - debug_sprintf_event(sfdbg, 5, "sfb: extend: " - "req=%lu alloc=%lu remaining=%lu\n", - num, sfb->num_sdb - num_old, + debug_sprintf_event(sfdbg, 5, "%s: " + "req %lu alloc %lu remaining %lu\n", + __func__, num, sfb->num_sdb - num_old, sfb_pending_allocs(sfb, hwc)); } - /* Number of perf events counting hardware events */ static atomic_t num_events; @@ -552,20 +556,22 @@ err = sf_disable(); if (err) pr_err("Switching off the sampling facility failed " - "with rc=%i\n", err); + "with rc %i\n", err); debug_sprintf_event(sfdbg, 5, - "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); + "%s: initialized: cpuhw %p\n", __func__, + cpusf); break; case PMC_RELEASE: cpusf->flags &= ~PMU_F_RESERVED; err = sf_disable(); if (err) { pr_err("Switching off the sampling facility failed " - "with rc=%i\n", err); + "with rc %i\n", err); } else deallocate_buffers(cpusf); debug_sprintf_event(sfdbg, 5, - "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); + "%s: released: cpuhw %p\n", __func__, + cpusf); break; } if (err) @@ -610,13 +616,6 @@ hwc->sample_period = period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); -} - -static void hw_reset_registers(struct hw_perf_event *hwc, - unsigned long *sdbt_origin) -{ - /* (Re)set to first sample-data-block-table */ - TEAR_REG(hwc) = (unsigned long) sdbt_origin; } static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, @@ -674,7 +673,7 @@ rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (perf_output_begin(&handle, event, header.size)) + if (perf_output_begin(&handle, data, event, header.size)) goto out; /* Update the process ID (see also kernel/events/core.c) */ @@ -687,13 +686,88 @@ rcu_read_unlock(); } +static unsigned long getrate(bool freq, unsigned long sample, + struct hws_qsi_info_block *si) +{ + unsigned long rate; + + if (freq) { + rate = freq_to_sample_rate(si, sample); + rate = hw_limit_rate(si, rate); + } else { + /* The min/max sampling rates specifies the valid range + * of sample periods. If the specified sample period is + * out of range, limit the period to the range boundary. + */ + rate = hw_limit_rate(si, sample); + + /* The perf core maintains a maximum sample rate that is + * configurable through the sysctl interface. Ensure the + * sampling rate does not exceed this value. This also helps + * to avoid throttling when pushing samples with + * perf_event_overflow(). + */ + if (sample_rate_to_freq(si, rate) > + sysctl_perf_event_sample_rate) { + debug_sprintf_event(sfdbg, 1, "%s: " + "Sampling rate exceeds maximum " + "perf sample rate\n", __func__); + rate = 0; + } + } + return rate; +} + +/* The sampling information (si) contains information about the + * min/max sampling intervals and the CPU speed. So calculate the + * correct sampling interval and avoid the whole period adjust + * feedback loop. + * + * Since the CPU Measurement sampling facility can not handle frequency + * calculate the sampling interval when frequency is specified using + * this formula: + * interval := cpu_speed * 1000000 / sample_freq + * + * Returns errno on bad input and zero on success with parameter interval + * set to the correct sampling rate. + * + * Note: This function turns off freq bit to avoid calling function + * perf_adjust_period(). This causes frequency adjustment in the common + * code part which causes tremendous variations in the counter values. + */ +static int __hw_perf_event_init_rate(struct perf_event *event, + struct hws_qsi_info_block *si) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + unsigned long rate; + + if (attr->freq) { + if (!attr->sample_freq) + return -EINVAL; + rate = getrate(attr->freq, attr->sample_freq, si); + attr->freq = 0; /* Don't call perf_adjust_period() */ + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE; + } else { + rate = getrate(attr->freq, attr->sample_period, si); + if (!rate) + return -EINVAL; + } + attr->sample_period = rate; + SAMPL_RATE(hwc) = rate; + hw_init_period(hwc, SAMPL_RATE(hwc)); + debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n", + __func__, event->cpu, event->attr.sample_period, + event->attr.freq, SAMPLE_FREQ_MODE(hwc)); + return 0; +} + static int __hw_perf_event_init(struct perf_event *event) { struct cpu_hw_sf *cpuhw; struct hws_qsi_info_block si; struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; - unsigned long rate; int cpu, err; /* Reserve CPU-measurement sampling facility */ @@ -741,6 +815,12 @@ goto out; } + if (si.ribm & CPU_MF_SF_RIBM_NOTAV) { + pr_warn("CPU Measurement Facility sampling is temporarily not available\n"); + err = -EBUSY; + goto out; + } + /* Always enable basic sampling */ SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; @@ -759,43 +839,9 @@ if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; - /* The sampling information (si) contains information about the - * min/max sampling intervals and the CPU speed. So calculate the - * correct sampling interval and avoid the whole period adjust - * feedback loop. - */ - rate = 0; - if (attr->freq) { - if (!attr->sample_freq) { - err = -EINVAL; - goto out; - } - rate = freq_to_sample_rate(&si, attr->sample_freq); - rate = hw_limit_rate(&si, rate); - attr->freq = 0; - attr->sample_period = rate; - } else { - /* The min/max sampling rates specifies the valid range - * of sample periods. If the specified sample period is - * out of range, limit the period to the range boundary. - */ - rate = hw_limit_rate(&si, hwc->sample_period); - - /* The perf core maintains a maximum sample rate that is - * configurable through the sysctl interface. Ensure the - * sampling rate does not exceed this value. This also helps - * to avoid throttling when pushing samples with - * perf_event_overflow(). - */ - if (sample_rate_to_freq(&si, rate) > - sysctl_perf_event_sample_rate) { - err = -EINVAL; - debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); - goto out; - } - } - SAMPL_RATE(hwc) = rate; - hw_init_period(hwc, SAMPL_RATE(hwc)); + err = __hw_perf_event_init_rate(event, &si); + if (err) + goto out; /* Initialize sample data overflow accounting */ hwc->extra_reg.reg = REG_OVERFLOW; @@ -836,12 +882,21 @@ return err; } +static bool is_callchain_event(struct perf_event *event) +{ + u64 sample_type = event->attr.sample_type; + + return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_STACK_USER); +} + static int cpumsf_pmu_event_init(struct perf_event *event) { int err; /* No support for taken branch sampling */ - if (has_branch_stack(event)) + /* No support for callchain, stacks and registers */ + if (has_branch_stack(event) || is_callchain_event(event)) return -EOPNOTSUPP; switch (event->attr.type) { @@ -867,7 +922,7 @@ /* Check online status of the CPU to which the event is pinned */ if (event->cpu >= 0 && !cpu_online(event->cpu)) - return -ENODEV; + return -ENODEV; /* Force reset of idle/hv excludes regardless of what the * user requested. @@ -915,9 +970,10 @@ * buffer extents */ sfb_account_overflows(cpuhw, hwc); - if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) - extend_sampling_buffer(&cpuhw->sfb, hwc); + extend_sampling_buffer(&cpuhw->sfb, hwc); } + /* Rate may be adjusted with ioctl() */ + cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw); } /* (Re)enable the PMU and sampling facility */ @@ -927,7 +983,7 @@ err = lsctl(&cpuhw->lsctl); if (err) { cpuhw->flags &= ~PMU_F_ENABLED; - pr_err("Loading sampling controls failed: op=%i err=%i\n", + pr_err("Loading sampling controls failed: op %i err %i\n", 1, err); return; } @@ -935,10 +991,11 @@ /* Load current program parameter */ lpp(&S390_lowcore.lpp); - debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " - "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, - cpuhw->lsctl.ed, cpuhw->lsctl.cd, - (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); + debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i " + "interval %#lx tear %#lx dear %#lx\n", __func__, + cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed, + cpuhw->lsctl.cd, cpuhw->lsctl.interval, + cpuhw->lsctl.tear, cpuhw->lsctl.dear); } static void cpumsf_pmu_disable(struct pmu *pmu) @@ -961,13 +1018,14 @@ err = lsctl(&inactive); if (err) { - pr_err("Loading sampling controls failed: op=%i err=%i\n", + pr_err("Loading sampling controls failed: op %i err %i\n", 2, err); return; } /* Save state of TEAR and DEAR register contents */ - if (!qsi(&si)) { + err = qsi(&si); + if (!err) { /* TEAR/DEAR values are valid only if the sampling facility is * enabled. Note that cpumsf_pmu_disable() might be called even * for a disabled sampling facility because cpumsf_pmu_enable() @@ -978,8 +1036,8 @@ cpuhw->lsctl.dear = si.dear; } } else - debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " - "qsi() failed with err=%i\n", err); + debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n", + __func__, err); cpuhw->flags &= ~PMU_F_ENABLED; } @@ -1092,14 +1150,6 @@ local64_add(count, &event->count); } -static void debug_sample_entry(struct hws_basic_entry *sample, - struct hws_trailer_entry *te) -{ - debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " - "sampling data entry: te->f=%i basic.def=%04x (%p)\n", - te->f, sample->def, sample); -} - /* hw_collect_samples() - Walk through a sample-data-block and collect samples * @event: The perf event * @sdbt: Sample-data-block table @@ -1153,7 +1203,11 @@ /* Count discarded samples */ *overflow += 1; } else { - debug_sample_entry(sample, te); + debug_sprintf_event(sfdbg, 4, + "%s: Found unknown" + " sampling data entry: te->f %i" + " basic.def %#4x (%p)\n", __func__, + te->header.f, sample->def, sample); /* Sample slot is not yet written or other record. * * This condition can occur if the buffer was reused @@ -1164,7 +1218,7 @@ * that are not full. Stop processing if the first * invalid format was detected. */ - if (!te->f) + if (!te->header.f) break; } @@ -1172,6 +1226,16 @@ sample->def = 0; sample++; } +} + +static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new) +{ + asm volatile( + " cdsg %[old],%[new],%[ptr]\n" + : [old] "+d" (old), [ptr] "+QS" (*ptr) + : [new] "d" (new) + : "memory", "cc"); + return old; } /* hw_perf_event_update() - Process sampling buffer @@ -1190,10 +1254,11 @@ */ static void hw_perf_event_update(struct perf_event *event, int flush_all) { + unsigned long long event_overflow, sampl_overflow, num_sdb; + union hws_trailer_header old, prev, new; struct hw_perf_event *hwc = &event->hw; struct hws_trailer_entry *te; unsigned long *sdbt; - unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; /* @@ -1213,25 +1278,25 @@ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); /* Leave loop if no more work to do (block full indicator) */ - if (!te->f) { + if (!te->header.f) { done = 1; if (!flush_all) break; } /* Check the sample overflow count */ - if (te->overflow) + if (te->header.overflow) /* Account sample overflows and, if a particular limit * is reached, extend the sampling buffer. * For details, see sfb_account_overflows(). */ - sampl_overflow += te->overflow; + sampl_overflow += te->header.overflow; /* Timestamps are valid for full sample-data-blocks only */ - debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " - "overflow=%llu timestamp=0x%llx\n", - sdbt, te->overflow, - (te->f) ? trailer_timestamp(te) : 0ULL); + debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx " + "overflow %llu timestamp %#llx\n", + __func__, (unsigned long)sdbt, te->header.overflow, + (te->header.f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and * flag if an (perf) event overflow happened. If so, the PMU @@ -1241,12 +1306,16 @@ num_sdb++; /* Reset trailer (using compare-double-and-swap) */ + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; - te_flags |= SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - te->flags, te->overflow, - te_flags, 0ULL)); + old.val = prev.val; + new.val = prev.val; + new.f = 0; + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); /* Advance to next sample-data-block */ sdbt++; @@ -1284,9 +1353,11 @@ } if (sampl_overflow || event_overflow) - debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " - "overflow stats: sample=%llu event=%llu\n", - sampl_overflow, event_overflow); + debug_sprintf_event(sfdbg, 4, "%s: " + "overflows: sample %llu event %llu" + " total %llu num_sdb %llu\n", + __func__, sampl_overflow, event_overflow, + OVERFLOW_REG(hwc), num_sdb); } #define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb) @@ -1329,7 +1400,7 @@ range_scan = AUX_SDB_NUM_ALERT(aux); for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - if (!(te->flags & SDB_TE_BUFFER_FULL_MASK)) + if (!te->header.f) break; } /* i is num of SDBs which are full */ @@ -1337,9 +1408,10 @@ /* Remove alert indicators in the buffer */ te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags &= ~SDB_TE_ALERT_REQ_MASK; + te->header.a = 0; - debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i); + debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n", + __func__, i, range_scan, aux->head); } /* @@ -1372,14 +1444,18 @@ * SDBs between aux->head and aux->empty_mark are already ready * for new data. range_scan is num of SDBs not within them. */ + debug_sprintf_event(sfdbg, 6, + "%s: range %ld head %ld alert %ld empty %ld\n", + __func__, range, aux->head, aux->alert_mark, + aux->empty_mark); if (range > AUX_SDB_NUM_EMPTY(aux)) { range_scan = range - AUX_SDB_NUM_EMPTY(aux); idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - te->flags &= ~(SDB_TE_BUFFER_FULL_MASK | - SDB_TE_ALERT_REQ_MASK); - te->overflow = 0; + te->header.f = 0; + te->header.a = 0; + te->header.overflow = 0; } /* Save the position of empty SDBs */ aux->empty_mark = aux->head + range - 1; @@ -1388,7 +1464,7 @@ /* Set alert indicator */ aux->alert_mark = aux->head + range/2 - 1; te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags = te->flags | SDB_TE_ALERT_REQ_MASK; + te->header.a = 1; /* Reset hardware buffer head */ head = AUX_SDB_INDEX(aux, aux->head); @@ -1397,15 +1473,11 @@ cpuhw->lsctl.tear = base + offset * sizeof(unsigned long); cpuhw->lsctl.dear = aux->sdb_index[head]; - debug_sprintf_event(sfdbg, 6, "aux_output_begin: " - "head->alert_mark->empty_mark (num_alert, range)" - "[%lx -> %lx -> %lx] (%lx, %lx) " - "tear index %lx, tear %lx dear %lx\n", + debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld " + "index %ld tear %#lx dear %#lx\n", __func__, aux->head, aux->alert_mark, aux->empty_mark, - AUX_SDB_NUM_ALERT(aux), range, head / CPUM_SF_SDB_PER_TABLE, - cpuhw->lsctl.tear, - cpuhw->lsctl.dear); + cpuhw->lsctl.tear, cpuhw->lsctl.dear); return 0; } @@ -1419,14 +1491,17 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; + union hws_trailer_header old, prev, new; struct hws_trailer_entry *te; te = aux_sdb_trailer(aux, alert_index); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - *overflow = orig_overflow = te->overflow; - if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { + old.val = prev.val; + new.val = prev.val; + *overflow = old.overflow; + if (old.f) { /* * SDB is already set by hardware. * Abort and try to set somewhere @@ -1434,10 +1509,10 @@ */ return false; } - new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); return true; } @@ -1466,10 +1541,14 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; - unsigned long i, range_scan, idx; + unsigned long i, range_scan, idx, idx_old; + union hws_trailer_header old, prev, new; + unsigned long long orig_overflow; struct hws_trailer_entry *te; + debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld " + "empty %ld\n", __func__, range, aux->head, + aux->alert_mark, aux->empty_mark); if (range <= AUX_SDB_NUM_EMPTY(aux)) /* * No need to scan. All SDBs in range are marked as empty. @@ -1492,26 +1571,32 @@ * indicator fall into this range, set it. */ range_scan = range - AUX_SDB_NUM_EMPTY(aux); - idx = aux->empty_mark + 1; + idx_old = idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - orig_overflow = te->overflow; - new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK; + old.val = prev.val; + new.val = prev.val; + orig_overflow = old.overflow; + new.f = 0; + new.overflow = 0; if (idx == aux->alert_mark) - new_flags |= SDB_TE_ALERT_REQ_MASK; + new.a = 1; else - new_flags &= ~SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); *overflow += orig_overflow; } /* Update empty_mark to new position */ aux->empty_mark = aux->head + range - 1; + debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld " + "empty %ld\n", __func__, range_scan, idx_old, + idx - 1, aux->empty_mark); return true; } @@ -1533,8 +1618,9 @@ /* Inform user space new data arrived */ size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; + debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__, + size >> PAGE_SHIFT); perf_aux_output_end(handle, size); - num_sdb = aux->sfb.num_sdb; num_sdb = aux->sfb.num_sdb; while (!done) { @@ -1544,7 +1630,9 @@ pr_err("The AUX buffer with %lu pages for the " "diagnostic-sampling mode is full\n", num_sdb); - debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n"); + debug_sprintf_event(sfdbg, 1, + "%s: AUX buffer used up\n", + __func__); break; } if (WARN_ON_ONCE(!aux)) @@ -1566,24 +1654,24 @@ size = range << PAGE_SHIFT; perf_aux_output_end(&cpuhw->handle, size); pr_err("Sample data caused the AUX buffer with %lu " - "pages to overflow\n", num_sdb); - debug_sprintf_event(sfdbg, 1, "head %lx range %lx " - "overflow %llx\n", + "pages to overflow\n", aux->sfb.num_sdb); + debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld " + "overflow %lld\n", __func__, aux->head, range, overflow); } else { size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; perf_aux_output_end(&cpuhw->handle, size); - debug_sprintf_event(sfdbg, 6, "head %lx alert %lx " + debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld " "already full, try another\n", + __func__, aux->head, aux->alert_mark); } } if (done) - debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: " - "[%lx -> %lx -> %lx] (%lx, %lx)\n", - aux->head, aux->alert_mark, aux->empty_mark, - AUX_SDB_NUM_ALERT(aux), range); + debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld " + "empty %ld\n", __func__, aux->head, + aux->alert_mark, aux->empty_mark); } /* @@ -1606,8 +1694,7 @@ kfree(aux->sdb_index); kfree(aux); - debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free " - "%lu SDBTs\n", num_sdbt); + debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt); } static void aux_sdb_init(unsigned long sdb) @@ -1665,7 +1752,7 @@ sfb = &aux->sfb; /* Allocate sdbt_index for fast reference */ - n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE; + n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE); aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL); if (!aux->sdbt_index) goto no_sdbt_index; @@ -1715,8 +1802,7 @@ */ aux->empty_mark = sfb->num_sdb - 1; - debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs" - " and %lu SDBs\n", + debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__, sfb->num_sdbt, sfb->num_sdb); return aux; @@ -1737,6 +1823,44 @@ static void cpumsf_pmu_read(struct perf_event *event) { /* Nothing to do ... updates are interrupt-driven */ +} + +/* Check if the new sampling period/freqeuncy is appropriate. + * + * Return non-zero on error and zero on passed checks. + */ +static int cpumsf_pmu_check_period(struct perf_event *event, u64 value) +{ + struct hws_qsi_info_block si; + unsigned long rate; + bool do_freq; + + memset(&si, 0, sizeof(si)); + if (event->cpu == -1) { + if (qsi(&si)) + return -ENODEV; + } else { + /* Event is pinned to a particular CPU, retrieve the per-CPU + * sampling structure for accessing the CPU-specific QSI. + */ + struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu); + + si = cpuhw->qsi; + } + + do_freq = !!SAMPLE_FREQ_MODE(&event->hw); + rate = getrate(do_freq, value, &si); + if (!rate) + return -EINVAL; + + event->attr.sample_period = rate; + SAMPL_RATE(&event->hw) = rate; + hw_init_period(&event->hw, SAMPL_RATE(&event->hw)); + debug_sprintf_event(sfdbg, 4, "%s:" + " cpu %d value %#llx period %#llx freq %d\n", + __func__, event->cpu, value, + event->attr.sample_period, do_freq); + return 0; } /* Activate sampling control. @@ -1810,7 +1934,7 @@ if (!SAMPL_DIAG_MODE(&event->hw)) { cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; - hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt; } /* Ensure sampling functions are in the disabled state. If disabled, @@ -1865,10 +1989,30 @@ CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); -static struct attribute *cpumsf_pmu_events_attr[] = { - CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), - NULL, - NULL, +/* Attribute list for CPU_SF. + * + * The availablitiy depends on the CPU_MF sampling facility authorization + * for basic + diagnositic samples. This is determined at initialization + * time by the sampling facility device driver. + * If the authorization for basic samples is turned off, it should be + * also turned off for diagnostic sampling. + * + * During initialization of the device driver, check the authorization + * level for diagnostic sampling and installs the attribute + * file for diagnostic sampling if necessary. + * + * For now install a placeholder to reference all possible attributes: + * SF_CYCLES_BASIC and SF_CYCLES_BASIC_DIAG. + * Add another entry for the final NULL pointer. + */ +enum { + SF_CYCLES_BASIC_ATTR_IDX = 0, + SF_CYCLES_BASIC_DIAG_ATTR_IDX, + SF_CYCLES_ATTR_MAX +}; + +static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = { + [SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC) }; PMU_FORMAT_ATTR(event, "config:0-63"); @@ -1882,10 +2026,12 @@ .name = "events", .attrs = cpumsf_pmu_events_attr, }; + static struct attribute_group cpumsf_pmu_format_group = { .name = "format", .attrs = cpumsf_pmu_format_attr, }; + static const struct attribute_group *cpumsf_pmu_attr_groups[] = { &cpumsf_pmu_events_group, &cpumsf_pmu_format_group, @@ -1908,6 +2054,8 @@ .setup_aux = aux_buffer_setup, .free_aux = aux_buffer_free, + + .check_period = cpumsf_pmu_check_period, }; static void cpumf_measurement_alert(struct ext_code ext_code, @@ -1941,7 +2089,8 @@ /* Report measurement alerts only for non-PRA codes */ if (alert != CPU_MF_INT_SF_PRA) - debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); + debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__, + alert); /* Sampling authorization change request */ if (alert & CPU_MF_INT_SF_SACA) @@ -1962,6 +2111,7 @@ sf_disable(); } } + static int cpusf_pmu_setup(unsigned int cpu, int flags) { /* Ignore the notification if no events are scheduled on the PMU. @@ -2018,7 +2168,7 @@ sfb_set_limits(min, max); pr_info("The sampling buffer limits have changed to: " - "min=%lu max=%lu (diag=x%lu)\n", + "min %lu max %lu (diag %lu)\n", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); return 0; } @@ -2036,7 +2186,7 @@ static void __init pr_cpumsf_err(unsigned int reason) { pr_err("Sampling facility support for perf is not available: " - "reason=%04x\n", reason); + "reason %#x\n", reason); } static int __init init_cpum_sampling_pmu(void) @@ -2063,7 +2213,10 @@ if (si.ad) { sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); - cpumsf_pmu_events_attr[1] = + /* Sampling of diagnostic data authorized, + * install event into attribute list of PMU device. + */ + cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); } @@ -2096,5 +2249,6 @@ out: return err; } + arch_initcall(init_cpum_sampling_pmu); core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644); -- Gitblit v1.6.2