| .. | .. |
|---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ |
|---|
| 1 | 2 | /* |
|---|
| 2 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 3 | | - * it under the terms of the GNU General Public License as published by |
|---|
| 4 | | - * the Free Software Foundation; either version 2 of the License, or |
|---|
| 5 | | - * (at your option) any later version. |
|---|
| 6 | 3 | * |
|---|
| 7 | | - * This program is distributed in the hope that it will be useful, |
|---|
| 8 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 9 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 10 | | - * GNU General Public License for more details. |
|---|
| 11 | | - * |
|---|
| 12 | | - * Authors: Waiman Long <waiman.long@hpe.com> |
|---|
| 4 | + * Authors: Waiman Long <longman@redhat.com> |
|---|
| 13 | 5 | */ |
|---|
| 14 | 6 | |
|---|
| 15 | | -/* |
|---|
| 16 | | - * When queued spinlock statistical counters are enabled, the following |
|---|
| 17 | | - * debugfs files will be created for reporting the counter values: |
|---|
| 18 | | - * |
|---|
| 19 | | - * <debugfs>/qlockstat/ |
|---|
| 20 | | - * pv_hash_hops - average # of hops per hashing operation |
|---|
| 21 | | - * pv_kick_unlock - # of vCPU kicks issued at unlock time |
|---|
| 22 | | - * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake |
|---|
| 23 | | - * pv_latency_kick - average latency (ns) of vCPU kick operation |
|---|
| 24 | | - * pv_latency_wake - average latency (ns) from vCPU kick to wakeup |
|---|
| 25 | | - * pv_lock_stealing - # of lock stealing operations |
|---|
| 26 | | - * pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs |
|---|
| 27 | | - * pv_wait_again - # of wait's after a queue head vCPU kick |
|---|
| 28 | | - * pv_wait_early - # of early vCPU wait's |
|---|
| 29 | | - * pv_wait_head - # of vCPU wait's at the queue head |
|---|
| 30 | | - * pv_wait_node - # of vCPU wait's at a non-head queue node |
|---|
| 31 | | - * lock_pending - # of locking operations via pending code |
|---|
| 32 | | - * lock_slowpath - # of locking operations via MCS lock queue |
|---|
| 33 | | - * |
|---|
| 34 | | - * Writing to the "reset_counters" file will reset all the above counter |
|---|
| 35 | | - * values. |
|---|
| 36 | | - * |
|---|
| 37 | | - * These statistical counters are implemented as per-cpu variables which are |
|---|
| 38 | | - * summed and computed whenever the corresponding debugfs files are read. This |
|---|
| 39 | | - * minimizes added overhead making the counters usable even in a production |
|---|
| 40 | | - * environment. |
|---|
| 41 | | - * |
|---|
| 42 | | - * There may be slight difference between pv_kick_wake and pv_kick_unlock. |
|---|
| 43 | | - */ |
|---|
| 44 | | -enum qlock_stats { |
|---|
| 45 | | - qstat_pv_hash_hops, |
|---|
| 46 | | - qstat_pv_kick_unlock, |
|---|
| 47 | | - qstat_pv_kick_wake, |
|---|
| 48 | | - qstat_pv_latency_kick, |
|---|
| 49 | | - qstat_pv_latency_wake, |
|---|
| 50 | | - qstat_pv_lock_stealing, |
|---|
| 51 | | - qstat_pv_spurious_wakeup, |
|---|
| 52 | | - qstat_pv_wait_again, |
|---|
| 53 | | - qstat_pv_wait_early, |
|---|
| 54 | | - qstat_pv_wait_head, |
|---|
| 55 | | - qstat_pv_wait_node, |
|---|
| 56 | | - qstat_lock_pending, |
|---|
| 57 | | - qstat_lock_slowpath, |
|---|
| 58 | | - qstat_num, /* Total number of statistical counters */ |
|---|
| 59 | | - qstat_reset_cnts = qstat_num, |
|---|
| 60 | | -}; |
|---|
| 7 | +#include "lock_events.h" |
|---|
| 61 | 8 | |
|---|
| 62 | | -#ifdef CONFIG_QUEUED_LOCK_STAT |
|---|
| 9 | +#ifdef CONFIG_LOCK_EVENT_COUNTS |
|---|
| 10 | +#ifdef CONFIG_PARAVIRT_SPINLOCKS |
|---|
| 63 | 11 | /* |
|---|
| 64 | | - * Collect pvqspinlock statistics |
|---|
| 12 | + * Collect pvqspinlock locking event counts |
|---|
| 65 | 13 | */ |
|---|
| 66 | | -#include <linux/debugfs.h> |
|---|
| 67 | 14 | #include <linux/sched.h> |
|---|
| 68 | 15 | #include <linux/sched/clock.h> |
|---|
| 69 | 16 | #include <linux/fs.h> |
|---|
| 70 | 17 | |
|---|
| 71 | | -static const char * const qstat_names[qstat_num + 1] = { |
|---|
| 72 | | - [qstat_pv_hash_hops] = "pv_hash_hops", |
|---|
| 73 | | - [qstat_pv_kick_unlock] = "pv_kick_unlock", |
|---|
| 74 | | - [qstat_pv_kick_wake] = "pv_kick_wake", |
|---|
| 75 | | - [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup", |
|---|
| 76 | | - [qstat_pv_latency_kick] = "pv_latency_kick", |
|---|
| 77 | | - [qstat_pv_latency_wake] = "pv_latency_wake", |
|---|
| 78 | | - [qstat_pv_lock_stealing] = "pv_lock_stealing", |
|---|
| 79 | | - [qstat_pv_wait_again] = "pv_wait_again", |
|---|
| 80 | | - [qstat_pv_wait_early] = "pv_wait_early", |
|---|
| 81 | | - [qstat_pv_wait_head] = "pv_wait_head", |
|---|
| 82 | | - [qstat_pv_wait_node] = "pv_wait_node", |
|---|
| 83 | | - [qstat_lock_pending] = "lock_pending", |
|---|
| 84 | | - [qstat_lock_slowpath] = "lock_slowpath", |
|---|
| 85 | | - [qstat_reset_cnts] = "reset_counters", |
|---|
| 86 | | -}; |
|---|
| 18 | +#define EVENT_COUNT(ev) lockevents[LOCKEVENT_ ## ev] |
|---|
| 87 | 19 | |
|---|
| 88 | 20 | /* |
|---|
| 89 | | - * Per-cpu counters |
|---|
| 21 | + * PV specific per-cpu counter |
|---|
| 90 | 22 | */ |
|---|
| 91 | | -static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]); |
|---|
| 92 | 23 | static DEFINE_PER_CPU(u64, pv_kick_time); |
|---|
| 93 | 24 | |
|---|
| 94 | 25 | /* |
|---|
| 95 | | - * Function to read and return the qlock statistical counter values |
|---|
| 26 | + * Function to read and return the PV qspinlock counts. |
|---|
| 96 | 27 | * |
|---|
| 97 | 28 | * The following counters are handled specially: |
|---|
| 98 | | - * 1. qstat_pv_latency_kick |
|---|
| 29 | + * 1. pv_latency_kick |
|---|
| 99 | 30 | * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock |
|---|
| 100 | | - * 2. qstat_pv_latency_wake |
|---|
| 31 | + * 2. pv_latency_wake |
|---|
| 101 | 32 | * Average wake latency (ns) = pv_latency_wake/pv_kick_wake |
|---|
| 102 | | - * 3. qstat_pv_hash_hops |
|---|
| 33 | + * 3. pv_hash_hops |
|---|
| 103 | 34 | * Average hops/hash = pv_hash_hops/pv_kick_unlock |
|---|
| 104 | 35 | */ |
|---|
| 105 | | -static ssize_t qstat_read(struct file *file, char __user *user_buf, |
|---|
| 106 | | - size_t count, loff_t *ppos) |
|---|
| 36 | +ssize_t lockevent_read(struct file *file, char __user *user_buf, |
|---|
| 37 | + size_t count, loff_t *ppos) |
|---|
| 107 | 38 | { |
|---|
| 108 | 39 | char buf[64]; |
|---|
| 109 | | - int cpu, counter, len; |
|---|
| 110 | | - u64 stat = 0, kicks = 0; |
|---|
| 40 | + int cpu, id, len; |
|---|
| 41 | + u64 sum = 0, kicks = 0; |
|---|
| 111 | 42 | |
|---|
| 112 | 43 | /* |
|---|
| 113 | 44 | * Get the counter ID stored in file->f_inode->i_private |
|---|
| 114 | 45 | */ |
|---|
| 115 | | - counter = (long)file_inode(file)->i_private; |
|---|
| 46 | + id = (long)file_inode(file)->i_private; |
|---|
| 116 | 47 | |
|---|
| 117 | | - if (counter >= qstat_num) |
|---|
| 48 | + if (id >= lockevent_num) |
|---|
| 118 | 49 | return -EBADF; |
|---|
| 119 | 50 | |
|---|
| 120 | 51 | for_each_possible_cpu(cpu) { |
|---|
| 121 | | - stat += per_cpu(qstats[counter], cpu); |
|---|
| 52 | + sum += per_cpu(lockevents[id], cpu); |
|---|
| 122 | 53 | /* |
|---|
| 123 | | - * Need to sum additional counter for some of them |
|---|
| 54 | + * Need to sum additional counters for some of them |
|---|
| 124 | 55 | */ |
|---|
| 125 | | - switch (counter) { |
|---|
| 56 | + switch (id) { |
|---|
| 126 | 57 | |
|---|
| 127 | | - case qstat_pv_latency_kick: |
|---|
| 128 | | - case qstat_pv_hash_hops: |
|---|
| 129 | | - kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu); |
|---|
| 58 | + case LOCKEVENT_pv_latency_kick: |
|---|
| 59 | + case LOCKEVENT_pv_hash_hops: |
|---|
| 60 | + kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu); |
|---|
| 130 | 61 | break; |
|---|
| 131 | 62 | |
|---|
| 132 | | - case qstat_pv_latency_wake: |
|---|
| 133 | | - kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu); |
|---|
| 63 | + case LOCKEVENT_pv_latency_wake: |
|---|
| 64 | + kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu); |
|---|
| 134 | 65 | break; |
|---|
| 135 | 66 | } |
|---|
| 136 | 67 | } |
|---|
| 137 | 68 | |
|---|
| 138 | | - if (counter == qstat_pv_hash_hops) { |
|---|
| 69 | + if (id == LOCKEVENT_pv_hash_hops) { |
|---|
| 139 | 70 | u64 frac = 0; |
|---|
| 140 | 71 | |
|---|
| 141 | 72 | if (kicks) { |
|---|
| 142 | | - frac = 100ULL * do_div(stat, kicks); |
|---|
| 73 | + frac = 100ULL * do_div(sum, kicks); |
|---|
| 143 | 74 | frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); |
|---|
| 144 | 75 | } |
|---|
| 145 | 76 | |
|---|
| 146 | 77 | /* |
|---|
| 147 | 78 | * Return a X.XX decimal number |
|---|
| 148 | 79 | */ |
|---|
| 149 | | - len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac); |
|---|
| 80 | + len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", |
|---|
| 81 | + sum, frac); |
|---|
| 150 | 82 | } else { |
|---|
| 151 | 83 | /* |
|---|
| 152 | 84 | * Round to the nearest ns |
|---|
| 153 | 85 | */ |
|---|
| 154 | | - if ((counter == qstat_pv_latency_kick) || |
|---|
| 155 | | - (counter == qstat_pv_latency_wake)) { |
|---|
| 86 | + if ((id == LOCKEVENT_pv_latency_kick) || |
|---|
| 87 | + (id == LOCKEVENT_pv_latency_wake)) { |
|---|
| 156 | 88 | if (kicks) |
|---|
| 157 | | - stat = DIV_ROUND_CLOSEST_ULL(stat, kicks); |
|---|
| 89 | + sum = DIV_ROUND_CLOSEST_ULL(sum, kicks); |
|---|
| 158 | 90 | } |
|---|
| 159 | | - len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat); |
|---|
| 91 | + len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum); |
|---|
| 160 | 92 | } |
|---|
| 161 | 93 | |
|---|
| 162 | 94 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); |
|---|
| 163 | 95 | } |
|---|
| 164 | 96 | |
|---|
| 165 | 97 | /* |
|---|
| 166 | | - * Function to handle write request |
|---|
| 167 | | - * |
|---|
| 168 | | - * When counter = reset_cnts, reset all the counter values. |
|---|
| 169 | | - * Since the counter updates aren't atomic, the resetting is done twice |
|---|
| 170 | | - * to make sure that the counters are very likely to be all cleared. |
|---|
| 171 | | - */ |
|---|
| 172 | | -static ssize_t qstat_write(struct file *file, const char __user *user_buf, |
|---|
| 173 | | - size_t count, loff_t *ppos) |
|---|
| 174 | | -{ |
|---|
| 175 | | - int cpu; |
|---|
| 176 | | - |
|---|
| 177 | | - /* |
|---|
| 178 | | - * Get the counter ID stored in file->f_inode->i_private |
|---|
| 179 | | - */ |
|---|
| 180 | | - if ((long)file_inode(file)->i_private != qstat_reset_cnts) |
|---|
| 181 | | - return count; |
|---|
| 182 | | - |
|---|
| 183 | | - for_each_possible_cpu(cpu) { |
|---|
| 184 | | - int i; |
|---|
| 185 | | - unsigned long *ptr = per_cpu_ptr(qstats, cpu); |
|---|
| 186 | | - |
|---|
| 187 | | - for (i = 0 ; i < qstat_num; i++) |
|---|
| 188 | | - WRITE_ONCE(ptr[i], 0); |
|---|
| 189 | | - } |
|---|
| 190 | | - return count; |
|---|
| 191 | | -} |
|---|
| 192 | | - |
|---|
| 193 | | -/* |
|---|
| 194 | | - * Debugfs data structures |
|---|
| 195 | | - */ |
|---|
| 196 | | -static const struct file_operations fops_qstat = { |
|---|
| 197 | | - .read = qstat_read, |
|---|
| 198 | | - .write = qstat_write, |
|---|
| 199 | | - .llseek = default_llseek, |
|---|
| 200 | | -}; |
|---|
| 201 | | - |
|---|
| 202 | | -/* |
|---|
| 203 | | - * Initialize debugfs for the qspinlock statistical counters |
|---|
| 204 | | - */ |
|---|
| 205 | | -static int __init init_qspinlock_stat(void) |
|---|
| 206 | | -{ |
|---|
| 207 | | - struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL); |
|---|
| 208 | | - int i; |
|---|
| 209 | | - |
|---|
| 210 | | - if (!d_qstat) |
|---|
| 211 | | - goto out; |
|---|
| 212 | | - |
|---|
| 213 | | - /* |
|---|
| 214 | | - * Create the debugfs files |
|---|
| 215 | | - * |
|---|
| 216 | | - * As reading from and writing to the stat files can be slow, only |
|---|
| 217 | | - * root is allowed to do the read/write to limit impact to system |
|---|
| 218 | | - * performance. |
|---|
| 219 | | - */ |
|---|
| 220 | | - for (i = 0; i < qstat_num; i++) |
|---|
| 221 | | - if (!debugfs_create_file(qstat_names[i], 0400, d_qstat, |
|---|
| 222 | | - (void *)(long)i, &fops_qstat)) |
|---|
| 223 | | - goto fail_undo; |
|---|
| 224 | | - |
|---|
| 225 | | - if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat, |
|---|
| 226 | | - (void *)(long)qstat_reset_cnts, &fops_qstat)) |
|---|
| 227 | | - goto fail_undo; |
|---|
| 228 | | - |
|---|
| 229 | | - return 0; |
|---|
| 230 | | -fail_undo: |
|---|
| 231 | | - debugfs_remove_recursive(d_qstat); |
|---|
| 232 | | -out: |
|---|
| 233 | | - pr_warn("Could not create 'qlockstat' debugfs entries\n"); |
|---|
| 234 | | - return -ENOMEM; |
|---|
| 235 | | -} |
|---|
| 236 | | -fs_initcall(init_qspinlock_stat); |
|---|
| 237 | | - |
|---|
| 238 | | -/* |
|---|
| 239 | | - * Increment the PV qspinlock statistical counters |
|---|
| 240 | | - */ |
|---|
| 241 | | -static inline void qstat_inc(enum qlock_stats stat, bool cond) |
|---|
| 242 | | -{ |
|---|
| 243 | | - if (cond) |
|---|
| 244 | | - this_cpu_inc(qstats[stat]); |
|---|
| 245 | | -} |
|---|
| 246 | | - |
|---|
| 247 | | -/* |
|---|
| 248 | 98 | * PV hash hop count |
|---|
| 249 | 99 | */ |
|---|
| 250 | | -static inline void qstat_hop(int hopcnt) |
|---|
| 100 | +static inline void lockevent_pv_hop(int hopcnt) |
|---|
| 251 | 101 | { |
|---|
| 252 | | - this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt); |
|---|
| 102 | + this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt); |
|---|
| 253 | 103 | } |
|---|
| 254 | 104 | |
|---|
| 255 | 105 | /* |
|---|
| .. | .. |
|---|
| 261 | 111 | |
|---|
| 262 | 112 | per_cpu(pv_kick_time, cpu) = start; |
|---|
| 263 | 113 | pv_kick(cpu); |
|---|
| 264 | | - this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start); |
|---|
| 114 | + this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start); |
|---|
| 265 | 115 | } |
|---|
| 266 | 116 | |
|---|
| 267 | 117 | /* |
|---|
| .. | .. |
|---|
| 274 | 124 | *pkick_time = 0; |
|---|
| 275 | 125 | pv_wait(ptr, val); |
|---|
| 276 | 126 | if (*pkick_time) { |
|---|
| 277 | | - this_cpu_add(qstats[qstat_pv_latency_wake], |
|---|
| 127 | + this_cpu_add(EVENT_COUNT(pv_latency_wake), |
|---|
| 278 | 128 | sched_clock() - *pkick_time); |
|---|
| 279 | | - qstat_inc(qstat_pv_kick_wake, true); |
|---|
| 129 | + lockevent_inc(pv_kick_wake); |
|---|
| 280 | 130 | } |
|---|
| 281 | 131 | } |
|---|
| 282 | 132 | |
|---|
| 283 | 133 | #define pv_kick(c) __pv_kick(c) |
|---|
| 284 | 134 | #define pv_wait(p, v) __pv_wait(p, v) |
|---|
| 285 | 135 | |
|---|
| 286 | | -#else /* CONFIG_QUEUED_LOCK_STAT */ |
|---|
| 136 | +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ |
|---|
| 287 | 137 | |
|---|
| 288 | | -static inline void qstat_inc(enum qlock_stats stat, bool cond) { } |
|---|
| 289 | | -static inline void qstat_hop(int hopcnt) { } |
|---|
| 138 | +#else /* CONFIG_LOCK_EVENT_COUNTS */ |
|---|
| 290 | 139 | |
|---|
| 291 | | -#endif /* CONFIG_QUEUED_LOCK_STAT */ |
|---|
| 140 | +static inline void lockevent_pv_hop(int hopcnt) { } |
|---|
| 141 | + |
|---|
| 142 | +#endif /* CONFIG_LOCK_EVENT_COUNTS */ |
|---|