| .. | .. | 
|---|
 | 1 | +/* SPDX-License-Identifier: GPL-2.0-or-later */  | 
|---|
| 1 | 2 |  /* | 
|---|
| 2 |  | - * This program is free software; you can redistribute it and/or modify  | 
|---|
| 3 |  | - * it under the terms of the GNU General Public License as published by  | 
|---|
| 4 |  | - * the Free Software Foundation; either version 2 of the License, or  | 
|---|
| 5 |  | - * (at your option) any later version.  | 
|---|
| 6 | 3 |   * | 
|---|
| 7 |  | - * This program is distributed in the hope that it will be useful,  | 
|---|
| 8 |  | - * but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
|---|
| 9 |  | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  | 
|---|
| 10 |  | - * GNU General Public License for more details.  | 
|---|
| 11 |  | - *  | 
|---|
| 12 |  | - * Authors: Waiman Long <waiman.long@hpe.com>  | 
|---|
 | 4 | + * Authors: Waiman Long <longman@redhat.com>  | 
|---|
| 13 | 5 |   */ | 
|---|
| 14 | 6 |   | 
|---|
| 15 |  | -/*  | 
|---|
| 16 |  | - * When queued spinlock statistical counters are enabled, the following  | 
|---|
| 17 |  | - * debugfs files will be created for reporting the counter values:  | 
|---|
| 18 |  | - *  | 
|---|
| 19 |  | - * <debugfs>/qlockstat/  | 
|---|
| 20 |  | - *   pv_hash_hops	- average # of hops per hashing operation  | 
|---|
| 21 |  | - *   pv_kick_unlock	- # of vCPU kicks issued at unlock time  | 
|---|
| 22 |  | - *   pv_kick_wake	- # of vCPU kicks used for computing pv_latency_wake  | 
|---|
| 23 |  | - *   pv_latency_kick	- average latency (ns) of vCPU kick operation  | 
|---|
| 24 |  | - *   pv_latency_wake	- average latency (ns) from vCPU kick to wakeup  | 
|---|
| 25 |  | - *   pv_lock_stealing	- # of lock stealing operations  | 
|---|
| 26 |  | - *   pv_spurious_wakeup	- # of spurious wakeups in non-head vCPUs  | 
|---|
| 27 |  | - *   pv_wait_again	- # of wait's after a queue head vCPU kick  | 
|---|
| 28 |  | - *   pv_wait_early	- # of early vCPU wait's  | 
|---|
| 29 |  | - *   pv_wait_head	- # of vCPU wait's at the queue head  | 
|---|
| 30 |  | - *   pv_wait_node	- # of vCPU wait's at a non-head queue node  | 
|---|
| 31 |  | - *   lock_pending	- # of locking operations via pending code  | 
|---|
| 32 |  | - *   lock_slowpath	- # of locking operations via MCS lock queue  | 
|---|
| 33 |  | - *  | 
|---|
| 34 |  | - * Writing to the "reset_counters" file will reset all the above counter  | 
|---|
| 35 |  | - * values.  | 
|---|
| 36 |  | - *  | 
|---|
| 37 |  | - * These statistical counters are implemented as per-cpu variables which are  | 
|---|
| 38 |  | - * summed and computed whenever the corresponding debugfs files are read. This  | 
|---|
| 39 |  | - * minimizes added overhead making the counters usable even in a production  | 
|---|
| 40 |  | - * environment.  | 
|---|
| 41 |  | - *  | 
|---|
| 42 |  | - * There may be slight difference between pv_kick_wake and pv_kick_unlock.  | 
|---|
| 43 |  | - */  | 
|---|
| 44 |  | -enum qlock_stats {  | 
|---|
| 45 |  | -	qstat_pv_hash_hops,  | 
|---|
| 46 |  | -	qstat_pv_kick_unlock,  | 
|---|
| 47 |  | -	qstat_pv_kick_wake,  | 
|---|
| 48 |  | -	qstat_pv_latency_kick,  | 
|---|
| 49 |  | -	qstat_pv_latency_wake,  | 
|---|
| 50 |  | -	qstat_pv_lock_stealing,  | 
|---|
| 51 |  | -	qstat_pv_spurious_wakeup,  | 
|---|
| 52 |  | -	qstat_pv_wait_again,  | 
|---|
| 53 |  | -	qstat_pv_wait_early,  | 
|---|
| 54 |  | -	qstat_pv_wait_head,  | 
|---|
| 55 |  | -	qstat_pv_wait_node,  | 
|---|
| 56 |  | -	qstat_lock_pending,  | 
|---|
| 57 |  | -	qstat_lock_slowpath,  | 
|---|
| 58 |  | -	qstat_num,	/* Total number of statistical counters */  | 
|---|
| 59 |  | -	qstat_reset_cnts = qstat_num,  | 
|---|
| 60 |  | -};  | 
|---|
 | 7 | +#include "lock_events.h"  | 
|---|
| 61 | 8 |   | 
|---|
| 62 |  | -#ifdef CONFIG_QUEUED_LOCK_STAT  | 
|---|
 | 9 | +#ifdef CONFIG_LOCK_EVENT_COUNTS  | 
|---|
 | 10 | +#ifdef CONFIG_PARAVIRT_SPINLOCKS  | 
|---|
| 63 | 11 |  /* | 
|---|
| 64 |  | - * Collect pvqspinlock statistics  | 
|---|
 | 12 | + * Collect pvqspinlock locking event counts  | 
|---|
| 65 | 13 |   */ | 
|---|
| 66 |  | -#include <linux/debugfs.h>  | 
|---|
| 67 | 14 |  #include <linux/sched.h> | 
|---|
| 68 | 15 |  #include <linux/sched/clock.h> | 
|---|
| 69 | 16 |  #include <linux/fs.h> | 
|---|
| 70 | 17 |   | 
|---|
| 71 |  | -static const char * const qstat_names[qstat_num + 1] = {  | 
|---|
| 72 |  | -	[qstat_pv_hash_hops]	   = "pv_hash_hops",  | 
|---|
| 73 |  | -	[qstat_pv_kick_unlock]     = "pv_kick_unlock",  | 
|---|
| 74 |  | -	[qstat_pv_kick_wake]       = "pv_kick_wake",  | 
|---|
| 75 |  | -	[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",  | 
|---|
| 76 |  | -	[qstat_pv_latency_kick]	   = "pv_latency_kick",  | 
|---|
| 77 |  | -	[qstat_pv_latency_wake]    = "pv_latency_wake",  | 
|---|
| 78 |  | -	[qstat_pv_lock_stealing]   = "pv_lock_stealing",  | 
|---|
| 79 |  | -	[qstat_pv_wait_again]      = "pv_wait_again",  | 
|---|
| 80 |  | -	[qstat_pv_wait_early]      = "pv_wait_early",  | 
|---|
| 81 |  | -	[qstat_pv_wait_head]       = "pv_wait_head",  | 
|---|
| 82 |  | -	[qstat_pv_wait_node]       = "pv_wait_node",  | 
|---|
| 83 |  | -	[qstat_lock_pending]       = "lock_pending",  | 
|---|
| 84 |  | -	[qstat_lock_slowpath]      = "lock_slowpath",  | 
|---|
| 85 |  | -	[qstat_reset_cnts]         = "reset_counters",  | 
|---|
| 86 |  | -};  | 
|---|
 | 18 | +#define EVENT_COUNT(ev)	lockevents[LOCKEVENT_ ## ev]  | 
|---|
| 87 | 19 |   | 
|---|
| 88 | 20 |  /* | 
|---|
| 89 |  | - * Per-cpu counters  | 
|---|
 | 21 | + * PV specific per-cpu counter  | 
|---|
| 90 | 22 |   */ | 
|---|
| 91 |  | -static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]);  | 
|---|
| 92 | 23 |  static DEFINE_PER_CPU(u64, pv_kick_time); | 
|---|
| 93 | 24 |   | 
|---|
| 94 | 25 |  /* | 
|---|
| 95 |  | - * Function to read and return the qlock statistical counter values  | 
|---|
 | 26 | + * Function to read and return the PV qspinlock counts.  | 
|---|
| 96 | 27 |   * | 
|---|
| 97 | 28 |   * The following counters are handled specially: | 
|---|
| 98 |  | - * 1. qstat_pv_latency_kick  | 
|---|
 | 29 | + * 1. pv_latency_kick  | 
|---|
| 99 | 30 |   *    Average kick latency (ns) = pv_latency_kick/pv_kick_unlock | 
|---|
| 100 |  | - * 2. qstat_pv_latency_wake  | 
|---|
 | 31 | + * 2. pv_latency_wake  | 
|---|
| 101 | 32 |   *    Average wake latency (ns) = pv_latency_wake/pv_kick_wake | 
|---|
| 102 |  | - * 3. qstat_pv_hash_hops  | 
|---|
 | 33 | + * 3. pv_hash_hops  | 
|---|
| 103 | 34 |   *    Average hops/hash = pv_hash_hops/pv_kick_unlock | 
|---|
| 104 | 35 |   */ | 
|---|
| 105 |  | -static ssize_t qstat_read(struct file *file, char __user *user_buf,  | 
|---|
| 106 |  | -			  size_t count, loff_t *ppos)  | 
|---|
 | 36 | +ssize_t lockevent_read(struct file *file, char __user *user_buf,  | 
|---|
 | 37 | +		       size_t count, loff_t *ppos)  | 
|---|
| 107 | 38 |  { | 
|---|
| 108 | 39 |  	char buf[64]; | 
|---|
| 109 |  | -	int cpu, counter, len;  | 
|---|
| 110 |  | -	u64 stat = 0, kicks = 0;  | 
|---|
 | 40 | +	int cpu, id, len;  | 
|---|
 | 41 | +	u64 sum = 0, kicks = 0;  | 
|---|
| 111 | 42 |   | 
|---|
| 112 | 43 |  	/* | 
|---|
| 113 | 44 |  	 * Get the counter ID stored in file->f_inode->i_private | 
|---|
| 114 | 45 |  	 */ | 
|---|
| 115 |  | -	counter = (long)file_inode(file)->i_private;  | 
|---|
 | 46 | +	id = (long)file_inode(file)->i_private;  | 
|---|
| 116 | 47 |   | 
|---|
| 117 |  | -	if (counter >= qstat_num)  | 
|---|
 | 48 | +	if (id >= lockevent_num)  | 
|---|
| 118 | 49 |  		return -EBADF; | 
|---|
| 119 | 50 |   | 
|---|
| 120 | 51 |  	for_each_possible_cpu(cpu) { | 
|---|
| 121 |  | -		stat += per_cpu(qstats[counter], cpu);  | 
|---|
 | 52 | +		sum += per_cpu(lockevents[id], cpu);  | 
|---|
| 122 | 53 |  		/* | 
|---|
| 123 |  | -		 * Need to sum additional counter for some of them  | 
|---|
 | 54 | +		 * Need to sum additional counters for some of them  | 
|---|
| 124 | 55 |  		 */ | 
|---|
| 125 |  | -		switch (counter) {  | 
|---|
 | 56 | +		switch (id) {  | 
|---|
| 126 | 57 |   | 
|---|
| 127 |  | -		case qstat_pv_latency_kick:  | 
|---|
| 128 |  | -		case qstat_pv_hash_hops:  | 
|---|
| 129 |  | -			kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu);  | 
|---|
 | 58 | +		case LOCKEVENT_pv_latency_kick:  | 
|---|
 | 59 | +		case LOCKEVENT_pv_hash_hops:  | 
|---|
 | 60 | +			kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu);  | 
|---|
| 130 | 61 |  			break; | 
|---|
| 131 | 62 |   | 
|---|
| 132 |  | -		case qstat_pv_latency_wake:  | 
|---|
| 133 |  | -			kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu);  | 
|---|
 | 63 | +		case LOCKEVENT_pv_latency_wake:  | 
|---|
 | 64 | +			kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu);  | 
|---|
| 134 | 65 |  			break; | 
|---|
| 135 | 66 |  		} | 
|---|
| 136 | 67 |  	} | 
|---|
| 137 | 68 |   | 
|---|
| 138 |  | -	if (counter == qstat_pv_hash_hops) {  | 
|---|
 | 69 | +	if (id == LOCKEVENT_pv_hash_hops) {  | 
|---|
| 139 | 70 |  		u64 frac = 0; | 
|---|
| 140 | 71 |   | 
|---|
| 141 | 72 |  		if (kicks) { | 
|---|
| 142 |  | -			frac = 100ULL * do_div(stat, kicks);  | 
|---|
 | 73 | +			frac = 100ULL * do_div(sum, kicks);  | 
|---|
| 143 | 74 |  			frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); | 
|---|
| 144 | 75 |  		} | 
|---|
| 145 | 76 |   | 
|---|
| 146 | 77 |  		/* | 
|---|
| 147 | 78 |  		 * Return a X.XX decimal number | 
|---|
| 148 | 79 |  		 */ | 
|---|
| 149 |  | -		len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac);  | 
|---|
 | 80 | +		len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n",  | 
|---|
 | 81 | +			       sum, frac);  | 
|---|
| 150 | 82 |  	} else { | 
|---|
| 151 | 83 |  		/* | 
|---|
| 152 | 84 |  		 * Round to the nearest ns | 
|---|
| 153 | 85 |  		 */ | 
|---|
| 154 |  | -		if ((counter == qstat_pv_latency_kick) ||  | 
|---|
| 155 |  | -		    (counter == qstat_pv_latency_wake)) {  | 
|---|
 | 86 | +		if ((id == LOCKEVENT_pv_latency_kick) ||  | 
|---|
 | 87 | +		    (id == LOCKEVENT_pv_latency_wake)) {  | 
|---|
| 156 | 88 |  			if (kicks) | 
|---|
| 157 |  | -				stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);  | 
|---|
 | 89 | +				sum = DIV_ROUND_CLOSEST_ULL(sum, kicks);  | 
|---|
| 158 | 90 |  		} | 
|---|
| 159 |  | -		len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat);  | 
|---|
 | 91 | +		len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);  | 
|---|
| 160 | 92 |  	} | 
|---|
| 161 | 93 |   | 
|---|
| 162 | 94 |  	return simple_read_from_buffer(user_buf, count, ppos, buf, len); | 
|---|
| 163 | 95 |  } | 
|---|
| 164 | 96 |   | 
|---|
| 165 | 97 |  /* | 
|---|
| 166 |  | - * Function to handle write request  | 
|---|
| 167 |  | - *  | 
|---|
| 168 |  | - * When counter = reset_cnts, reset all the counter values.  | 
|---|
| 169 |  | - * Since the counter updates aren't atomic, the resetting is done twice  | 
|---|
| 170 |  | - * to make sure that the counters are very likely to be all cleared.  | 
|---|
| 171 |  | - */  | 
|---|
| 172 |  | -static ssize_t qstat_write(struct file *file, const char __user *user_buf,  | 
|---|
| 173 |  | -			   size_t count, loff_t *ppos)  | 
|---|
| 174 |  | -{  | 
|---|
| 175 |  | -	int cpu;  | 
|---|
| 176 |  | -  | 
|---|
| 177 |  | -	/*  | 
|---|
| 178 |  | -	 * Get the counter ID stored in file->f_inode->i_private  | 
|---|
| 179 |  | -	 */  | 
|---|
| 180 |  | -	if ((long)file_inode(file)->i_private != qstat_reset_cnts)  | 
|---|
| 181 |  | -		return count;  | 
|---|
| 182 |  | -  | 
|---|
| 183 |  | -	for_each_possible_cpu(cpu) {  | 
|---|
| 184 |  | -		int i;  | 
|---|
| 185 |  | -		unsigned long *ptr = per_cpu_ptr(qstats, cpu);  | 
|---|
| 186 |  | -  | 
|---|
| 187 |  | -		for (i = 0 ; i < qstat_num; i++)  | 
|---|
| 188 |  | -			WRITE_ONCE(ptr[i], 0);  | 
|---|
| 189 |  | -	}  | 
|---|
| 190 |  | -	return count;  | 
|---|
| 191 |  | -}  | 
|---|
| 192 |  | -  | 
|---|
| 193 |  | -/*  | 
|---|
| 194 |  | - * Debugfs data structures  | 
|---|
| 195 |  | - */  | 
|---|
| 196 |  | -static const struct file_operations fops_qstat = {  | 
|---|
| 197 |  | -	.read = qstat_read,  | 
|---|
| 198 |  | -	.write = qstat_write,  | 
|---|
| 199 |  | -	.llseek = default_llseek,  | 
|---|
| 200 |  | -};  | 
|---|
| 201 |  | -  | 
|---|
| 202 |  | -/*  | 
|---|
| 203 |  | - * Initialize debugfs for the qspinlock statistical counters  | 
|---|
| 204 |  | - */  | 
|---|
| 205 |  | -static int __init init_qspinlock_stat(void)  | 
|---|
| 206 |  | -{  | 
|---|
| 207 |  | -	struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);  | 
|---|
| 208 |  | -	int i;  | 
|---|
| 209 |  | -  | 
|---|
| 210 |  | -	if (!d_qstat)  | 
|---|
| 211 |  | -		goto out;  | 
|---|
| 212 |  | -  | 
|---|
| 213 |  | -	/*  | 
|---|
| 214 |  | -	 * Create the debugfs files  | 
|---|
| 215 |  | -	 *  | 
|---|
| 216 |  | -	 * As reading from and writing to the stat files can be slow, only  | 
|---|
| 217 |  | -	 * root is allowed to do the read/write to limit impact to system  | 
|---|
| 218 |  | -	 * performance.  | 
|---|
| 219 |  | -	 */  | 
|---|
| 220 |  | -	for (i = 0; i < qstat_num; i++)  | 
|---|
| 221 |  | -		if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,  | 
|---|
| 222 |  | -					 (void *)(long)i, &fops_qstat))  | 
|---|
| 223 |  | -			goto fail_undo;  | 
|---|
| 224 |  | -  | 
|---|
| 225 |  | -	if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,  | 
|---|
| 226 |  | -				 (void *)(long)qstat_reset_cnts, &fops_qstat))  | 
|---|
| 227 |  | -		goto fail_undo;  | 
|---|
| 228 |  | -  | 
|---|
| 229 |  | -	return 0;  | 
|---|
| 230 |  | -fail_undo:  | 
|---|
| 231 |  | -	debugfs_remove_recursive(d_qstat);  | 
|---|
| 232 |  | -out:  | 
|---|
| 233 |  | -	pr_warn("Could not create 'qlockstat' debugfs entries\n");  | 
|---|
| 234 |  | -	return -ENOMEM;  | 
|---|
| 235 |  | -}  | 
|---|
| 236 |  | -fs_initcall(init_qspinlock_stat);  | 
|---|
| 237 |  | -  | 
|---|
| 238 |  | -/*  | 
|---|
| 239 |  | - * Increment the PV qspinlock statistical counters  | 
|---|
| 240 |  | - */  | 
|---|
| 241 |  | -static inline void qstat_inc(enum qlock_stats stat, bool cond)  | 
|---|
| 242 |  | -{  | 
|---|
| 243 |  | -	if (cond)  | 
|---|
| 244 |  | -		this_cpu_inc(qstats[stat]);  | 
|---|
| 245 |  | -}  | 
|---|
| 246 |  | -  | 
|---|
| 247 |  | -/*  | 
|---|
| 248 | 98 |   * PV hash hop count | 
|---|
| 249 | 99 |   */ | 
|---|
| 250 |  | -static inline void qstat_hop(int hopcnt)  | 
|---|
 | 100 | +static inline void lockevent_pv_hop(int hopcnt)  | 
|---|
| 251 | 101 |  { | 
|---|
| 252 |  | -	this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt);  | 
|---|
 | 102 | +	this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt);  | 
|---|
| 253 | 103 |  } | 
|---|
| 254 | 104 |   | 
|---|
| 255 | 105 |  /* | 
|---|
| .. | .. | 
|---|
| 261 | 111 |   | 
|---|
| 262 | 112 |  	per_cpu(pv_kick_time, cpu) = start; | 
|---|
| 263 | 113 |  	pv_kick(cpu); | 
|---|
| 264 |  | -	this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start);  | 
|---|
 | 114 | +	this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start);  | 
|---|
| 265 | 115 |  } | 
|---|
| 266 | 116 |   | 
|---|
| 267 | 117 |  /* | 
|---|
| .. | .. | 
|---|
| 274 | 124 |  	*pkick_time = 0; | 
|---|
| 275 | 125 |  	pv_wait(ptr, val); | 
|---|
| 276 | 126 |  	if (*pkick_time) { | 
|---|
| 277 |  | -		this_cpu_add(qstats[qstat_pv_latency_wake],  | 
|---|
 | 127 | +		this_cpu_add(EVENT_COUNT(pv_latency_wake),  | 
|---|
| 278 | 128 |  			     sched_clock() - *pkick_time); | 
|---|
| 279 |  | -		qstat_inc(qstat_pv_kick_wake, true);  | 
|---|
 | 129 | +		lockevent_inc(pv_kick_wake);  | 
|---|
| 280 | 130 |  	} | 
|---|
| 281 | 131 |  } | 
|---|
| 282 | 132 |   | 
|---|
| 283 | 133 |  #define pv_kick(c)	__pv_kick(c) | 
|---|
| 284 | 134 |  #define pv_wait(p, v)	__pv_wait(p, v) | 
|---|
| 285 | 135 |   | 
|---|
| 286 |  | -#else /* CONFIG_QUEUED_LOCK_STAT */  | 
|---|
 | 136 | +#endif /* CONFIG_PARAVIRT_SPINLOCKS */  | 
|---|
| 287 | 137 |   | 
|---|
| 288 |  | -static inline void qstat_inc(enum qlock_stats stat, bool cond)	{ }  | 
|---|
| 289 |  | -static inline void qstat_hop(int hopcnt)			{ }  | 
|---|
 | 138 | +#else /* CONFIG_LOCK_EVENT_COUNTS */  | 
|---|
| 290 | 139 |   | 
|---|
| 291 |  | -#endif /* CONFIG_QUEUED_LOCK_STAT */  | 
|---|
 | 140 | +static inline void lockevent_pv_hop(int hopcnt)	{ }  | 
|---|
 | 141 | +  | 
|---|
 | 142 | +#endif /* CONFIG_LOCK_EVENT_COUNTS */  | 
|---|