hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/arch/s390/kernel/perf_cpum_sf.c
....@@ -156,21 +156,22 @@
156156 }
157157 }
158158
159
- debug_sprintf_event(sfdbg, 5,
160
- "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
159
+ debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
160
+ (unsigned long)sfb->sdbt);
161161 memset(sfb, 0, sizeof(*sfb));
162162 }
163163
164164 static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
165165 {
166
- unsigned long sdb, *trailer;
166
+ struct hws_trailer_entry *te;
167
+ unsigned long sdb;
167168
168169 /* Allocate and initialize sample-data-block */
169170 sdb = get_zeroed_page(gfp_flags);
170171 if (!sdb)
171172 return -ENOMEM;
172
- trailer = trailer_entry_ptr(sdb);
173
- *trailer = SDB_TE_ALERT_REQ_MASK;
173
+ te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
174
+ te->header.a = 1;
174175
175176 /* Link SDB into the sample-data-block-table */
176177 *sdbt = sdb;
....@@ -212,10 +213,11 @@
212213 * the sampling buffer origin.
213214 */
214215 if (sfb->sdbt != get_next_sdbt(tail)) {
215
- debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
216
- "sampling buffer is not linked: origin=%p"
217
- "tail=%p\n",
218
- (void *) sfb->sdbt, (void *) tail);
216
+ debug_sprintf_event(sfdbg, 3, "%s: "
217
+ "sampling buffer is not linked: origin %#lx"
218
+ " tail %#lx\n", __func__,
219
+ (unsigned long)sfb->sdbt,
220
+ (unsigned long)tail);
219221 return -EINVAL;
220222 }
221223
....@@ -264,8 +266,8 @@
264266 *tail = (unsigned long) sfb->sdbt + 1;
265267 sfb->tail = tail;
266268
267
- debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
268
- " settings: sdbt=%lu sdb=%lu\n",
269
+ debug_sprintf_event(sfdbg, 4, "%s: new buffer"
270
+ " settings: sdbt %lu sdb %lu\n", __func__,
269271 sfb->num_sdbt, sfb->num_sdb);
270272 return rc;
271273 }
....@@ -305,12 +307,13 @@
305307 rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
306308 if (rc) {
307309 free_sampling_buffer(sfb);
308
- debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
309
- "realloc_sampling_buffer failed with rc=%i\n", rc);
310
+ debug_sprintf_event(sfdbg, 4, "%s: "
311
+ "realloc_sampling_buffer failed with rc %i\n",
312
+ __func__, rc);
310313 } else
311314 debug_sprintf_event(sfdbg, 4,
312
- "alloc_sampling_buffer: tear=%p dear=%p\n",
313
- sfb->sdbt, (void *) *sfb->sdbt);
315
+ "%s: tear %#lx dear %#lx\n", __func__,
316
+ (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
314317 return rc;
315318 }
316319
....@@ -370,28 +373,33 @@
370373
371374 static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
372375 {
373
- unsigned long n_sdb, freq, factor;
376
+ unsigned long n_sdb, freq;
374377 size_t sample_size;
375378
376379 /* Calculate sampling buffers using 4K pages
377380 *
378
- * 1. Determine the sample data size which depends on the used
379
- * sampling functions, for example, basic-sampling or
380
- * basic-sampling with diagnostic-sampling.
381
+ * 1. The sampling size is 32 bytes for basic sampling. This size
382
+ * is the same for all machine types. Diagnostic
383
+ * sampling uses auxlilary data buffer setup which provides the
384
+ * memory for SDBs using linux common code auxiliary trace
385
+ * setup.
381386 *
382
- * 2. Use the sampling frequency as input. The sampling buffer is
383
- * designed for almost one second. This can be adjusted through
384
- * the "factor" variable.
385
- * In any case, alloc_sampling_buffer() sets the Alert Request
387
+ * 2. Function alloc_sampling_buffer() sets the Alert Request
386388 * Control indicator to trigger a measurement-alert to harvest
387
- * sample-data-blocks (sdb).
389
+ * sample-data-blocks (SDB). This is done per SDB. This
390
+ * measurement alert interrupt fires quick enough to handle
391
+ * one SDB, on very high frequency and work loads there might
392
+ * be 2 to 3 SBDs available for sample processing.
393
+ * Currently there is no need for setup alert request on every
394
+ * n-th page. This is counterproductive as one IRQ triggers
395
+ * a very high number of samples to be processed at one IRQ.
388396 *
389
- * 3. Compute the number of sample-data-blocks and ensure a minimum
390
- * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
391
- * exceed a "calculated" maximum. The symbolic maximum is
392
- * designed for basic-sampling only and needs to be increased if
393
- * diagnostic-sampling is active.
394
- * See also the remarks for these symbolic constants.
397
+ * 3. Use the sampling frequency as input.
398
+ * Compute the number of SDBs and ensure a minimum
399
+ * of CPUM_SF_MIN_SDB. Depending on frequency add some more
400
+ * SDBs to handle a higher sampling rate.
401
+ * Use a minimum of CPUM_SF_MIN_SDB and allow for 100 samples
402
+ * (one SDB) for every 10000 HZ frequency increment.
395403 *
396404 * 4. Compute the number of sample-data-block-tables (SDBT) and
397405 * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
....@@ -399,10 +407,7 @@
399407 */
400408 sample_size = sizeof(struct hws_basic_entry);
401409 freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
402
- factor = 1;
403
- n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
404
- if (n_sdb < CPUM_SF_MIN_SDB)
405
- n_sdb = CPUM_SF_MIN_SDB;
410
+ n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
406411
407412 /* If there is already a sampling buffer allocated, it is very likely
408413 * that the sampling facility is enabled too. If the event to be
....@@ -417,8 +422,8 @@
417422 return 0;
418423
419424 debug_sprintf_event(sfdbg, 3,
420
- "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
421
- " sample_size=%lu cpuhw=%p\n",
425
+ "%s: rate %lu f %lu sdb %lu/%lu"
426
+ " sample_size %lu cpuhw %p\n", __func__,
422427 SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
423428 sample_size, cpuhw);
424429
....@@ -478,8 +483,8 @@
478483 if (num)
479484 sfb_account_allocs(num, hwc);
480485
481
- debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
482
- " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
486
+ debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
487
+ __func__, OVERFLOW_REG(hwc), ratio, num);
483488 OVERFLOW_REG(hwc) = 0;
484489 }
485490
....@@ -517,16 +522,15 @@
517522 */
518523 rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
519524 if (rc)
520
- debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
521
- "failed with rc=%i\n", rc);
525
+ debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
526
+ __func__, rc);
522527
523528 if (sfb_has_pending_allocs(sfb, hwc))
524
- debug_sprintf_event(sfdbg, 5, "sfb: extend: "
525
- "req=%lu alloc=%lu remaining=%lu\n",
526
- num, sfb->num_sdb - num_old,
529
+ debug_sprintf_event(sfdbg, 5, "%s: "
530
+ "req %lu alloc %lu remaining %lu\n",
531
+ __func__, num, sfb->num_sdb - num_old,
527532 sfb_pending_allocs(sfb, hwc));
528533 }
529
-
530534
531535 /* Number of perf events counting hardware events */
532536 static atomic_t num_events;
....@@ -552,20 +556,22 @@
552556 err = sf_disable();
553557 if (err)
554558 pr_err("Switching off the sampling facility failed "
555
- "with rc=%i\n", err);
559
+ "with rc %i\n", err);
556560 debug_sprintf_event(sfdbg, 5,
557
- "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
561
+ "%s: initialized: cpuhw %p\n", __func__,
562
+ cpusf);
558563 break;
559564 case PMC_RELEASE:
560565 cpusf->flags &= ~PMU_F_RESERVED;
561566 err = sf_disable();
562567 if (err) {
563568 pr_err("Switching off the sampling facility failed "
564
- "with rc=%i\n", err);
569
+ "with rc %i\n", err);
565570 } else
566571 deallocate_buffers(cpusf);
567572 debug_sprintf_event(sfdbg, 5,
568
- "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
573
+ "%s: released: cpuhw %p\n", __func__,
574
+ cpusf);
569575 break;
570576 }
571577 if (err)
....@@ -610,13 +616,6 @@
610616 hwc->sample_period = period;
611617 hwc->last_period = hwc->sample_period;
612618 local64_set(&hwc->period_left, hwc->sample_period);
613
-}
614
-
615
-static void hw_reset_registers(struct hw_perf_event *hwc,
616
- unsigned long *sdbt_origin)
617
-{
618
- /* (Re)set to first sample-data-block-table */
619
- TEAR_REG(hwc) = (unsigned long) sdbt_origin;
620619 }
621620
622621 static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
....@@ -674,7 +673,7 @@
674673 rcu_read_lock();
675674
676675 perf_prepare_sample(&header, data, event, regs);
677
- if (perf_output_begin(&handle, event, header.size))
676
+ if (perf_output_begin(&handle, data, event, header.size))
678677 goto out;
679678
680679 /* Update the process ID (see also kernel/events/core.c) */
....@@ -687,13 +686,88 @@
687686 rcu_read_unlock();
688687 }
689688
689
+static unsigned long getrate(bool freq, unsigned long sample,
690
+ struct hws_qsi_info_block *si)
691
+{
692
+ unsigned long rate;
693
+
694
+ if (freq) {
695
+ rate = freq_to_sample_rate(si, sample);
696
+ rate = hw_limit_rate(si, rate);
697
+ } else {
698
+ /* The min/max sampling rates specifies the valid range
699
+ * of sample periods. If the specified sample period is
700
+ * out of range, limit the period to the range boundary.
701
+ */
702
+ rate = hw_limit_rate(si, sample);
703
+
704
+ /* The perf core maintains a maximum sample rate that is
705
+ * configurable through the sysctl interface. Ensure the
706
+ * sampling rate does not exceed this value. This also helps
707
+ * to avoid throttling when pushing samples with
708
+ * perf_event_overflow().
709
+ */
710
+ if (sample_rate_to_freq(si, rate) >
711
+ sysctl_perf_event_sample_rate) {
712
+ debug_sprintf_event(sfdbg, 1, "%s: "
713
+ "Sampling rate exceeds maximum "
714
+ "perf sample rate\n", __func__);
715
+ rate = 0;
716
+ }
717
+ }
718
+ return rate;
719
+}
720
+
721
+/* The sampling information (si) contains information about the
722
+ * min/max sampling intervals and the CPU speed. So calculate the
723
+ * correct sampling interval and avoid the whole period adjust
724
+ * feedback loop.
725
+ *
726
+ * Since the CPU Measurement sampling facility can not handle frequency
727
+ * calculate the sampling interval when frequency is specified using
728
+ * this formula:
729
+ * interval := cpu_speed * 1000000 / sample_freq
730
+ *
731
+ * Returns errno on bad input and zero on success with parameter interval
732
+ * set to the correct sampling rate.
733
+ *
734
+ * Note: This function turns off freq bit to avoid calling function
735
+ * perf_adjust_period(). This causes frequency adjustment in the common
736
+ * code part which causes tremendous variations in the counter values.
737
+ */
738
+static int __hw_perf_event_init_rate(struct perf_event *event,
739
+ struct hws_qsi_info_block *si)
740
+{
741
+ struct perf_event_attr *attr = &event->attr;
742
+ struct hw_perf_event *hwc = &event->hw;
743
+ unsigned long rate;
744
+
745
+ if (attr->freq) {
746
+ if (!attr->sample_freq)
747
+ return -EINVAL;
748
+ rate = getrate(attr->freq, attr->sample_freq, si);
749
+ attr->freq = 0; /* Don't call perf_adjust_period() */
750
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE;
751
+ } else {
752
+ rate = getrate(attr->freq, attr->sample_period, si);
753
+ if (!rate)
754
+ return -EINVAL;
755
+ }
756
+ attr->sample_period = rate;
757
+ SAMPL_RATE(hwc) = rate;
758
+ hw_init_period(hwc, SAMPL_RATE(hwc));
759
+ debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
760
+ __func__, event->cpu, event->attr.sample_period,
761
+ event->attr.freq, SAMPLE_FREQ_MODE(hwc));
762
+ return 0;
763
+}
764
+
690765 static int __hw_perf_event_init(struct perf_event *event)
691766 {
692767 struct cpu_hw_sf *cpuhw;
693768 struct hws_qsi_info_block si;
694769 struct perf_event_attr *attr = &event->attr;
695770 struct hw_perf_event *hwc = &event->hw;
696
- unsigned long rate;
697771 int cpu, err;
698772
699773 /* Reserve CPU-measurement sampling facility */
....@@ -741,6 +815,12 @@
741815 goto out;
742816 }
743817
818
+ if (si.ribm & CPU_MF_SF_RIBM_NOTAV) {
819
+ pr_warn("CPU Measurement Facility sampling is temporarily not available\n");
820
+ err = -EBUSY;
821
+ goto out;
822
+ }
823
+
744824 /* Always enable basic sampling */
745825 SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
746826
....@@ -759,43 +839,9 @@
759839 if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
760840 SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
761841
762
- /* The sampling information (si) contains information about the
763
- * min/max sampling intervals and the CPU speed. So calculate the
764
- * correct sampling interval and avoid the whole period adjust
765
- * feedback loop.
766
- */
767
- rate = 0;
768
- if (attr->freq) {
769
- if (!attr->sample_freq) {
770
- err = -EINVAL;
771
- goto out;
772
- }
773
- rate = freq_to_sample_rate(&si, attr->sample_freq);
774
- rate = hw_limit_rate(&si, rate);
775
- attr->freq = 0;
776
- attr->sample_period = rate;
777
- } else {
778
- /* The min/max sampling rates specifies the valid range
779
- * of sample periods. If the specified sample period is
780
- * out of range, limit the period to the range boundary.
781
- */
782
- rate = hw_limit_rate(&si, hwc->sample_period);
783
-
784
- /* The perf core maintains a maximum sample rate that is
785
- * configurable through the sysctl interface. Ensure the
786
- * sampling rate does not exceed this value. This also helps
787
- * to avoid throttling when pushing samples with
788
- * perf_event_overflow().
789
- */
790
- if (sample_rate_to_freq(&si, rate) >
791
- sysctl_perf_event_sample_rate) {
792
- err = -EINVAL;
793
- debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
794
- goto out;
795
- }
796
- }
797
- SAMPL_RATE(hwc) = rate;
798
- hw_init_period(hwc, SAMPL_RATE(hwc));
842
+ err = __hw_perf_event_init_rate(event, &si);
843
+ if (err)
844
+ goto out;
799845
800846 /* Initialize sample data overflow accounting */
801847 hwc->extra_reg.reg = REG_OVERFLOW;
....@@ -836,12 +882,21 @@
836882 return err;
837883 }
838884
885
+static bool is_callchain_event(struct perf_event *event)
886
+{
887
+ u64 sample_type = event->attr.sample_type;
888
+
889
+ return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER |
890
+ PERF_SAMPLE_STACK_USER);
891
+}
892
+
839893 static int cpumsf_pmu_event_init(struct perf_event *event)
840894 {
841895 int err;
842896
843897 /* No support for taken branch sampling */
844
- if (has_branch_stack(event))
898
+ /* No support for callchain, stacks and registers */
899
+ if (has_branch_stack(event) || is_callchain_event(event))
845900 return -EOPNOTSUPP;
846901
847902 switch (event->attr.type) {
....@@ -867,7 +922,7 @@
867922
868923 /* Check online status of the CPU to which the event is pinned */
869924 if (event->cpu >= 0 && !cpu_online(event->cpu))
870
- return -ENODEV;
925
+ return -ENODEV;
871926
872927 /* Force reset of idle/hv excludes regardless of what the
873928 * user requested.
....@@ -915,9 +970,10 @@
915970 * buffer extents
916971 */
917972 sfb_account_overflows(cpuhw, hwc);
918
- if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
919
- extend_sampling_buffer(&cpuhw->sfb, hwc);
973
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
920974 }
975
+ /* Rate may be adjusted with ioctl() */
976
+ cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
921977 }
922978
923979 /* (Re)enable the PMU and sampling facility */
....@@ -927,7 +983,7 @@
927983 err = lsctl(&cpuhw->lsctl);
928984 if (err) {
929985 cpuhw->flags &= ~PMU_F_ENABLED;
930
- pr_err("Loading sampling controls failed: op=%i err=%i\n",
986
+ pr_err("Loading sampling controls failed: op %i err %i\n",
931987 1, err);
932988 return;
933989 }
....@@ -935,10 +991,11 @@
935991 /* Load current program parameter */
936992 lpp(&S390_lowcore.lpp);
937993
938
- debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
939
- "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
940
- cpuhw->lsctl.ed, cpuhw->lsctl.cd,
941
- (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
994
+ debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
995
+ "interval %#lx tear %#lx dear %#lx\n", __func__,
996
+ cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
997
+ cpuhw->lsctl.cd, cpuhw->lsctl.interval,
998
+ cpuhw->lsctl.tear, cpuhw->lsctl.dear);
942999 }
9431000
9441001 static void cpumsf_pmu_disable(struct pmu *pmu)
....@@ -961,13 +1018,14 @@
9611018
9621019 err = lsctl(&inactive);
9631020 if (err) {
964
- pr_err("Loading sampling controls failed: op=%i err=%i\n",
1021
+ pr_err("Loading sampling controls failed: op %i err %i\n",
9651022 2, err);
9661023 return;
9671024 }
9681025
9691026 /* Save state of TEAR and DEAR register contents */
970
- if (!qsi(&si)) {
1027
+ err = qsi(&si);
1028
+ if (!err) {
9711029 /* TEAR/DEAR values are valid only if the sampling facility is
9721030 * enabled. Note that cpumsf_pmu_disable() might be called even
9731031 * for a disabled sampling facility because cpumsf_pmu_enable()
....@@ -978,8 +1036,8 @@
9781036 cpuhw->lsctl.dear = si.dear;
9791037 }
9801038 } else
981
- debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
982
- "qsi() failed with err=%i\n", err);
1039
+ debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
1040
+ __func__, err);
9831041
9841042 cpuhw->flags &= ~PMU_F_ENABLED;
9851043 }
....@@ -1092,14 +1150,6 @@
10921150 local64_add(count, &event->count);
10931151 }
10941152
1095
-static void debug_sample_entry(struct hws_basic_entry *sample,
1096
- struct hws_trailer_entry *te)
1097
-{
1098
- debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
1099
- "sampling data entry: te->f=%i basic.def=%04x (%p)\n",
1100
- te->f, sample->def, sample);
1101
-}
1102
-
11031153 /* hw_collect_samples() - Walk through a sample-data-block and collect samples
11041154 * @event: The perf event
11051155 * @sdbt: Sample-data-block table
....@@ -1153,7 +1203,11 @@
11531203 /* Count discarded samples */
11541204 *overflow += 1;
11551205 } else {
1156
- debug_sample_entry(sample, te);
1206
+ debug_sprintf_event(sfdbg, 4,
1207
+ "%s: Found unknown"
1208
+ " sampling data entry: te->f %i"
1209
+ " basic.def %#4x (%p)\n", __func__,
1210
+ te->header.f, sample->def, sample);
11571211 /* Sample slot is not yet written or other record.
11581212 *
11591213 * This condition can occur if the buffer was reused
....@@ -1164,7 +1218,7 @@
11641218 * that are not full. Stop processing if the first
11651219 * invalid format was detected.
11661220 */
1167
- if (!te->f)
1221
+ if (!te->header.f)
11681222 break;
11691223 }
11701224
....@@ -1172,6 +1226,16 @@
11721226 sample->def = 0;
11731227 sample++;
11741228 }
1229
+}
1230
+
1231
+static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new)
1232
+{
1233
+ asm volatile(
1234
+ " cdsg %[old],%[new],%[ptr]\n"
1235
+ : [old] "+d" (old), [ptr] "+QS" (*ptr)
1236
+ : [new] "d" (new)
1237
+ : "memory", "cc");
1238
+ return old;
11751239 }
11761240
11771241 /* hw_perf_event_update() - Process sampling buffer
....@@ -1190,10 +1254,11 @@
11901254 */
11911255 static void hw_perf_event_update(struct perf_event *event, int flush_all)
11921256 {
1257
+ unsigned long long event_overflow, sampl_overflow, num_sdb;
1258
+ union hws_trailer_header old, prev, new;
11931259 struct hw_perf_event *hwc = &event->hw;
11941260 struct hws_trailer_entry *te;
11951261 unsigned long *sdbt;
1196
- unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
11971262 int done;
11981263
11991264 /*
....@@ -1213,25 +1278,25 @@
12131278 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
12141279
12151280 /* Leave loop if no more work to do (block full indicator) */
1216
- if (!te->f) {
1281
+ if (!te->header.f) {
12171282 done = 1;
12181283 if (!flush_all)
12191284 break;
12201285 }
12211286
12221287 /* Check the sample overflow count */
1223
- if (te->overflow)
1288
+ if (te->header.overflow)
12241289 /* Account sample overflows and, if a particular limit
12251290 * is reached, extend the sampling buffer.
12261291 * For details, see sfb_account_overflows().
12271292 */
1228
- sampl_overflow += te->overflow;
1293
+ sampl_overflow += te->header.overflow;
12291294
12301295 /* Timestamps are valid for full sample-data-blocks only */
1231
- debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
1232
- "overflow=%llu timestamp=0x%llx\n",
1233
- sdbt, te->overflow,
1234
- (te->f) ? trailer_timestamp(te) : 0ULL);
1296
+ debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
1297
+ "overflow %llu timestamp %#llx\n",
1298
+ __func__, (unsigned long)sdbt, te->header.overflow,
1299
+ (te->header.f) ? trailer_timestamp(te) : 0ULL);
12351300
12361301 /* Collect all samples from a single sample-data-block and
12371302 * flag if an (perf) event overflow happened. If so, the PMU
....@@ -1241,12 +1306,16 @@
12411306 num_sdb++;
12421307
12431308 /* Reset trailer (using compare-double-and-swap) */
1309
+ /* READ_ONCE() 16 byte header */
1310
+ prev.val = __cdsg(&te->header.val, 0, 0);
12441311 do {
1245
- te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
1246
- te_flags |= SDB_TE_ALERT_REQ_MASK;
1247
- } while (!cmpxchg_double(&te->flags, &te->overflow,
1248
- te->flags, te->overflow,
1249
- te_flags, 0ULL));
1312
+ old.val = prev.val;
1313
+ new.val = prev.val;
1314
+ new.f = 0;
1315
+ new.a = 1;
1316
+ new.overflow = 0;
1317
+ prev.val = __cdsg(&te->header.val, old.val, new.val);
1318
+ } while (prev.val != old.val);
12501319
12511320 /* Advance to next sample-data-block */
12521321 sdbt++;
....@@ -1284,9 +1353,11 @@
12841353 }
12851354
12861355 if (sampl_overflow || event_overflow)
1287
- debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
1288
- "overflow stats: sample=%llu event=%llu\n",
1289
- sampl_overflow, event_overflow);
1356
+ debug_sprintf_event(sfdbg, 4, "%s: "
1357
+ "overflows: sample %llu event %llu"
1358
+ " total %llu num_sdb %llu\n",
1359
+ __func__, sampl_overflow, event_overflow,
1360
+ OVERFLOW_REG(hwc), num_sdb);
12901361 }
12911362
12921363 #define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
....@@ -1329,7 +1400,7 @@
13291400 range_scan = AUX_SDB_NUM_ALERT(aux);
13301401 for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
13311402 te = aux_sdb_trailer(aux, idx);
1332
- if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
1403
+ if (!te->header.f)
13331404 break;
13341405 }
13351406 /* i is num of SDBs which are full */
....@@ -1337,9 +1408,10 @@
13371408
13381409 /* Remove alert indicators in the buffer */
13391410 te = aux_sdb_trailer(aux, aux->alert_mark);
1340
- te->flags &= ~SDB_TE_ALERT_REQ_MASK;
1411
+ te->header.a = 0;
13411412
1342
- debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i);
1413
+ debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
1414
+ __func__, i, range_scan, aux->head);
13431415 }
13441416
13451417 /*
....@@ -1372,14 +1444,18 @@
13721444 * SDBs between aux->head and aux->empty_mark are already ready
13731445 * for new data. range_scan is num of SDBs not within them.
13741446 */
1447
+ debug_sprintf_event(sfdbg, 6,
1448
+ "%s: range %ld head %ld alert %ld empty %ld\n",
1449
+ __func__, range, aux->head, aux->alert_mark,
1450
+ aux->empty_mark);
13751451 if (range > AUX_SDB_NUM_EMPTY(aux)) {
13761452 range_scan = range - AUX_SDB_NUM_EMPTY(aux);
13771453 idx = aux->empty_mark + 1;
13781454 for (i = 0; i < range_scan; i++, idx++) {
13791455 te = aux_sdb_trailer(aux, idx);
1380
- te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
1381
- SDB_TE_ALERT_REQ_MASK);
1382
- te->overflow = 0;
1456
+ te->header.f = 0;
1457
+ te->header.a = 0;
1458
+ te->header.overflow = 0;
13831459 }
13841460 /* Save the position of empty SDBs */
13851461 aux->empty_mark = aux->head + range - 1;
....@@ -1388,7 +1464,7 @@
13881464 /* Set alert indicator */
13891465 aux->alert_mark = aux->head + range/2 - 1;
13901466 te = aux_sdb_trailer(aux, aux->alert_mark);
1391
- te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
1467
+ te->header.a = 1;
13921468
13931469 /* Reset hardware buffer head */
13941470 head = AUX_SDB_INDEX(aux, aux->head);
....@@ -1397,15 +1473,11 @@
13971473 cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
13981474 cpuhw->lsctl.dear = aux->sdb_index[head];
13991475
1400
- debug_sprintf_event(sfdbg, 6, "aux_output_begin: "
1401
- "head->alert_mark->empty_mark (num_alert, range)"
1402
- "[%lx -> %lx -> %lx] (%lx, %lx) "
1403
- "tear index %lx, tear %lx dear %lx\n",
1476
+ debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
1477
+ "index %ld tear %#lx dear %#lx\n", __func__,
14041478 aux->head, aux->alert_mark, aux->empty_mark,
1405
- AUX_SDB_NUM_ALERT(aux), range,
14061479 head / CPUM_SF_SDB_PER_TABLE,
1407
- cpuhw->lsctl.tear,
1408
- cpuhw->lsctl.dear);
1480
+ cpuhw->lsctl.tear, cpuhw->lsctl.dear);
14091481
14101482 return 0;
14111483 }
....@@ -1419,14 +1491,17 @@
14191491 static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
14201492 unsigned long long *overflow)
14211493 {
1422
- unsigned long long orig_overflow, orig_flags, new_flags;
1494
+ union hws_trailer_header old, prev, new;
14231495 struct hws_trailer_entry *te;
14241496
14251497 te = aux_sdb_trailer(aux, alert_index);
1498
+ /* READ_ONCE() 16 byte header */
1499
+ prev.val = __cdsg(&te->header.val, 0, 0);
14261500 do {
1427
- orig_flags = te->flags;
1428
- *overflow = orig_overflow = te->overflow;
1429
- if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
1501
+ old.val = prev.val;
1502
+ new.val = prev.val;
1503
+ *overflow = old.overflow;
1504
+ if (old.f) {
14301505 /*
14311506 * SDB is already set by hardware.
14321507 * Abort and try to set somewhere
....@@ -1434,10 +1509,10 @@
14341509 */
14351510 return false;
14361511 }
1437
- new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
1438
- } while (!cmpxchg_double(&te->flags, &te->overflow,
1439
- orig_flags, orig_overflow,
1440
- new_flags, 0ULL));
1512
+ new.a = 1;
1513
+ new.overflow = 0;
1514
+ prev.val = __cdsg(&te->header.val, old.val, new.val);
1515
+ } while (prev.val != old.val);
14411516 return true;
14421517 }
14431518
....@@ -1466,10 +1541,14 @@
14661541 static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
14671542 unsigned long long *overflow)
14681543 {
1469
- unsigned long long orig_overflow, orig_flags, new_flags;
1470
- unsigned long i, range_scan, idx;
1544
+ unsigned long i, range_scan, idx, idx_old;
1545
+ union hws_trailer_header old, prev, new;
1546
+ unsigned long long orig_overflow;
14711547 struct hws_trailer_entry *te;
14721548
1549
+ debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
1550
+ "empty %ld\n", __func__, range, aux->head,
1551
+ aux->alert_mark, aux->empty_mark);
14731552 if (range <= AUX_SDB_NUM_EMPTY(aux))
14741553 /*
14751554 * No need to scan. All SDBs in range are marked as empty.
....@@ -1492,26 +1571,32 @@
14921571 * indicator fall into this range, set it.
14931572 */
14941573 range_scan = range - AUX_SDB_NUM_EMPTY(aux);
1495
- idx = aux->empty_mark + 1;
1574
+ idx_old = idx = aux->empty_mark + 1;
14961575 for (i = 0; i < range_scan; i++, idx++) {
14971576 te = aux_sdb_trailer(aux, idx);
1577
+ /* READ_ONCE() 16 byte header */
1578
+ prev.val = __cdsg(&te->header.val, 0, 0);
14981579 do {
1499
- orig_flags = te->flags;
1500
- orig_overflow = te->overflow;
1501
- new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
1580
+ old.val = prev.val;
1581
+ new.val = prev.val;
1582
+ orig_overflow = old.overflow;
1583
+ new.f = 0;
1584
+ new.overflow = 0;
15021585 if (idx == aux->alert_mark)
1503
- new_flags |= SDB_TE_ALERT_REQ_MASK;
1586
+ new.a = 1;
15041587 else
1505
- new_flags &= ~SDB_TE_ALERT_REQ_MASK;
1506
- } while (!cmpxchg_double(&te->flags, &te->overflow,
1507
- orig_flags, orig_overflow,
1508
- new_flags, 0ULL));
1588
+ new.a = 0;
1589
+ prev.val = __cdsg(&te->header.val, old.val, new.val);
1590
+ } while (prev.val != old.val);
15091591 *overflow += orig_overflow;
15101592 }
15111593
15121594 /* Update empty_mark to new position */
15131595 aux->empty_mark = aux->head + range - 1;
15141596
1597
+ debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
1598
+ "empty %ld\n", __func__, range_scan, idx_old,
1599
+ idx - 1, aux->empty_mark);
15151600 return true;
15161601 }
15171602
....@@ -1533,8 +1618,9 @@
15331618
15341619 /* Inform user space new data arrived */
15351620 size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
1621
+ debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
1622
+ size >> PAGE_SHIFT);
15361623 perf_aux_output_end(handle, size);
1537
- num_sdb = aux->sfb.num_sdb;
15381624
15391625 num_sdb = aux->sfb.num_sdb;
15401626 while (!done) {
....@@ -1544,7 +1630,9 @@
15441630 pr_err("The AUX buffer with %lu pages for the "
15451631 "diagnostic-sampling mode is full\n",
15461632 num_sdb);
1547
- debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n");
1633
+ debug_sprintf_event(sfdbg, 1,
1634
+ "%s: AUX buffer used up\n",
1635
+ __func__);
15481636 break;
15491637 }
15501638 if (WARN_ON_ONCE(!aux))
....@@ -1566,24 +1654,24 @@
15661654 size = range << PAGE_SHIFT;
15671655 perf_aux_output_end(&cpuhw->handle, size);
15681656 pr_err("Sample data caused the AUX buffer with %lu "
1569
- "pages to overflow\n", num_sdb);
1570
- debug_sprintf_event(sfdbg, 1, "head %lx range %lx "
1571
- "overflow %llx\n",
1657
+ "pages to overflow\n", aux->sfb.num_sdb);
1658
+ debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
1659
+ "overflow %lld\n", __func__,
15721660 aux->head, range, overflow);
15731661 } else {
15741662 size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
15751663 perf_aux_output_end(&cpuhw->handle, size);
1576
- debug_sprintf_event(sfdbg, 6, "head %lx alert %lx "
1664
+ debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
15771665 "already full, try another\n",
1666
+ __func__,
15781667 aux->head, aux->alert_mark);
15791668 }
15801669 }
15811670
15821671 if (done)
1583
- debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: "
1584
- "[%lx -> %lx -> %lx] (%lx, %lx)\n",
1585
- aux->head, aux->alert_mark, aux->empty_mark,
1586
- AUX_SDB_NUM_ALERT(aux), range);
1672
+ debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
1673
+ "empty %ld\n", __func__, aux->head,
1674
+ aux->alert_mark, aux->empty_mark);
15871675 }
15881676
15891677 /*
....@@ -1606,8 +1694,7 @@
16061694 kfree(aux->sdb_index);
16071695 kfree(aux);
16081696
1609
- debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free "
1610
- "%lu SDBTs\n", num_sdbt);
1697
+ debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
16111698 }
16121699
16131700 static void aux_sdb_init(unsigned long sdb)
....@@ -1665,7 +1752,7 @@
16651752 sfb = &aux->sfb;
16661753
16671754 /* Allocate sdbt_index for fast reference */
1668
- n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE;
1755
+ n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE);
16691756 aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
16701757 if (!aux->sdbt_index)
16711758 goto no_sdbt_index;
....@@ -1715,8 +1802,7 @@
17151802 */
17161803 aux->empty_mark = sfb->num_sdb - 1;
17171804
1718
- debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs"
1719
- " and %lu SDBs\n",
1805
+ debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
17201806 sfb->num_sdbt, sfb->num_sdb);
17211807
17221808 return aux;
....@@ -1737,6 +1823,44 @@
17371823 static void cpumsf_pmu_read(struct perf_event *event)
17381824 {
17391825 /* Nothing to do ... updates are interrupt-driven */
1826
+}
1827
+
1828
+/* Check if the new sampling period/freqeuncy is appropriate.
1829
+ *
1830
+ * Return non-zero on error and zero on passed checks.
1831
+ */
1832
+static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
1833
+{
1834
+ struct hws_qsi_info_block si;
1835
+ unsigned long rate;
1836
+ bool do_freq;
1837
+
1838
+ memset(&si, 0, sizeof(si));
1839
+ if (event->cpu == -1) {
1840
+ if (qsi(&si))
1841
+ return -ENODEV;
1842
+ } else {
1843
+ /* Event is pinned to a particular CPU, retrieve the per-CPU
1844
+ * sampling structure for accessing the CPU-specific QSI.
1845
+ */
1846
+ struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
1847
+
1848
+ si = cpuhw->qsi;
1849
+ }
1850
+
1851
+ do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
1852
+ rate = getrate(do_freq, value, &si);
1853
+ if (!rate)
1854
+ return -EINVAL;
1855
+
1856
+ event->attr.sample_period = rate;
1857
+ SAMPL_RATE(&event->hw) = rate;
1858
+ hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
1859
+ debug_sprintf_event(sfdbg, 4, "%s:"
1860
+ " cpu %d value %#llx period %#llx freq %d\n",
1861
+ __func__, event->cpu, value,
1862
+ event->attr.sample_period, do_freq);
1863
+ return 0;
17401864 }
17411865
17421866 /* Activate sampling control.
....@@ -1810,7 +1934,7 @@
18101934 if (!SAMPL_DIAG_MODE(&event->hw)) {
18111935 cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
18121936 cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
1813
- hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
1937
+ TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
18141938 }
18151939
18161940 /* Ensure sampling functions are in the disabled state. If disabled,
....@@ -1865,10 +1989,30 @@
18651989 CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
18661990 CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
18671991
1868
-static struct attribute *cpumsf_pmu_events_attr[] = {
1869
- CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
1870
- NULL,
1871
- NULL,
1992
+/* Attribute list for CPU_SF.
1993
+ *
1994
+ * The availablitiy depends on the CPU_MF sampling facility authorization
1995
+ * for basic + diagnositic samples. This is determined at initialization
1996
+ * time by the sampling facility device driver.
1997
+ * If the authorization for basic samples is turned off, it should be
1998
+ * also turned off for diagnostic sampling.
1999
+ *
2000
+ * During initialization of the device driver, check the authorization
2001
+ * level for diagnostic sampling and installs the attribute
2002
+ * file for diagnostic sampling if necessary.
2003
+ *
2004
+ * For now install a placeholder to reference all possible attributes:
2005
+ * SF_CYCLES_BASIC and SF_CYCLES_BASIC_DIAG.
2006
+ * Add another entry for the final NULL pointer.
2007
+ */
2008
+enum {
2009
+ SF_CYCLES_BASIC_ATTR_IDX = 0,
2010
+ SF_CYCLES_BASIC_DIAG_ATTR_IDX,
2011
+ SF_CYCLES_ATTR_MAX
2012
+};
2013
+
2014
+static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = {
2015
+ [SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC)
18722016 };
18732017
18742018 PMU_FORMAT_ATTR(event, "config:0-63");
....@@ -1882,10 +2026,12 @@
18822026 .name = "events",
18832027 .attrs = cpumsf_pmu_events_attr,
18842028 };
2029
+
18852030 static struct attribute_group cpumsf_pmu_format_group = {
18862031 .name = "format",
18872032 .attrs = cpumsf_pmu_format_attr,
18882033 };
2034
+
18892035 static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
18902036 &cpumsf_pmu_events_group,
18912037 &cpumsf_pmu_format_group,
....@@ -1908,6 +2054,8 @@
19082054
19092055 .setup_aux = aux_buffer_setup,
19102056 .free_aux = aux_buffer_free,
2057
+
2058
+ .check_period = cpumsf_pmu_check_period,
19112059 };
19122060
19132061 static void cpumf_measurement_alert(struct ext_code ext_code,
....@@ -1941,7 +2089,8 @@
19412089
19422090 /* Report measurement alerts only for non-PRA codes */
19432091 if (alert != CPU_MF_INT_SF_PRA)
1944
- debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
2092
+ debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
2093
+ alert);
19452094
19462095 /* Sampling authorization change request */
19472096 if (alert & CPU_MF_INT_SF_SACA)
....@@ -1962,6 +2111,7 @@
19622111 sf_disable();
19632112 }
19642113 }
2114
+
19652115 static int cpusf_pmu_setup(unsigned int cpu, int flags)
19662116 {
19672117 /* Ignore the notification if no events are scheduled on the PMU.
....@@ -2018,7 +2168,7 @@
20182168
20192169 sfb_set_limits(min, max);
20202170 pr_info("The sampling buffer limits have changed to: "
2021
- "min=%lu max=%lu (diag=x%lu)\n",
2171
+ "min %lu max %lu (diag %lu)\n",
20222172 CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
20232173 return 0;
20242174 }
....@@ -2036,7 +2186,7 @@
20362186 static void __init pr_cpumsf_err(unsigned int reason)
20372187 {
20382188 pr_err("Sampling facility support for perf is not available: "
2039
- "reason=%04x\n", reason);
2189
+ "reason %#x\n", reason);
20402190 }
20412191
20422192 static int __init init_cpum_sampling_pmu(void)
....@@ -2063,7 +2213,10 @@
20632213
20642214 if (si.ad) {
20652215 sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
2066
- cpumsf_pmu_events_attr[1] =
2216
+ /* Sampling of diagnostic data authorized,
2217
+ * install event into attribute list of PMU device.
2218
+ */
2219
+ cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] =
20672220 CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
20682221 }
20692222
....@@ -2096,5 +2249,6 @@
20962249 out:
20972250 return err;
20982251 }
2252
+
20992253 arch_initcall(init_cpum_sampling_pmu);
21002254 core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644);