~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,22 +1,9 @@
	1	+// SPDX-License-Identifier: GPL-2.0-or-later
1	2	/*
2	3	* pSeries_lpar.c
3	4	* Copyright (C) 2001 Todd Inglett, IBM Corporation
4	5	*
5	6	* pSeries LPAR support.
6		- *
7		- * This program is free software; you can redistribute it and/or modify
8		- * it under the terms of the GNU General Public License as published by
9		- * the Free Software Foundation; either version 2 of the License, or
10		- * (at your option) any later version.
11		- *
12		- * This program is distributed in the hope that it will be useful,
13		- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15		- * GNU General Public License for more details.
16		- *
17		- * You should have received a copy of the GNU General Public License
18		- * along with this program; if not, write to the Free Software
19		- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20	7	*/
21	8
22	9	/* Enables debugging of low-level hash table routines - careful! */
..	..	@@ -30,10 +17,14 @@
30	17	#include <linux/jump_label.h>
31	18	#include <linux/delay.h>
32	19	#include <linux/stop_machine.h>
	20	+#include <linux/spinlock.h>
	21	+#include <linux/cpuhotplug.h>
	22	+#include <linux/workqueue.h>
	23	+#include <linux/proc_fs.h>
	24	+#include <linux/pgtable.h>
33	25	#include <asm/processor.h>
34	26	#include <asm/mmu.h>
35	27	#include <asm/page.h>
36		-#include <asm/pgtable.h>
37	28	#include <asm/machdep.h>
38	29	#include <asm/mmu_context.h>
39	30	#include <asm/iommu.h>
..	..	@@ -49,6 +40,7 @@
49	40	#include <asm/fadump.h>
50	41	#include <asm/asm-prototypes.h>
51	42	#include <asm/debugfs.h>
	43	+#include <asm/dtl.h>
52	44
53	45	#include "pseries.h"
54	46
..	..	@@ -65,13 +57,607 @@
65	57	EXPORT_SYMBOL(plpar_hcall9);
66	58	EXPORT_SYMBOL(plpar_hcall_norets);
67	59
	60	+/*
	61	+ * H_BLOCK_REMOVE supported block size for this page size in segment who's base
	62	+ * page size is that page size.
	63	+ *
	64	+ * The first index is the segment base page size, the second one is the actual
	65	+ * page size.
	66	+ */
	67	+static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
	68	+
	69	+/*
	70	+ * Due to the involved complexity, and that the current hypervisor is only
	71	+ * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE
	72	+ * buffer size to 8 size block.
	73	+ */
	74	+#define HBLKRM_SUPPORTED_BLOCK_SIZE 8
	75	+
	76	+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
	77	+static u8 dtl_mask = DTL_LOG_PREEMPT;
	78	+#else
	79	+static u8 dtl_mask;
	80	+#endif
	81	+
	82	+void alloc_dtl_buffers(unsigned long *time_limit)
	83	+{
	84	+ int cpu;
	85	+ struct paca_struct *pp;
	86	+ struct dtl_entry *dtl;
	87	+
	88	+ for_each_possible_cpu(cpu) {
	89	+ pp = paca_ptrs[cpu];
	90	+ if (pp->dispatch_log)
	91	+ continue;
	92	+ dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
	93	+ if (!dtl) {
	94	+ pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
	95	+ cpu);
	96	+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
	97	+ pr_warn("Stolen time statistics will be unreliable\n");
	98	+#endif
	99	+ break;
	100	+ }
	101	+
	102	+ pp->dtl_ridx = 0;
	103	+ pp->dispatch_log = dtl;
	104	+ pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
	105	+ pp->dtl_curr = dtl;
	106	+
	107	+ if (time_limit && time_after(jiffies, *time_limit)) {
	108	+ cond_resched();
	109	+ *time_limit = jiffies + HZ;
	110	+ }
	111	+ }
	112	+}
	113	+
	114	+void register_dtl_buffer(int cpu)
	115	+{
	116	+ long ret;
	117	+ struct paca_struct *pp;
	118	+ struct dtl_entry *dtl;
	119	+ int hwcpu = get_hard_smp_processor_id(cpu);
	120	+
	121	+ pp = paca_ptrs[cpu];
	122	+ dtl = pp->dispatch_log;
	123	+ if (dtl && dtl_mask) {
	124	+ pp->dtl_ridx = 0;
	125	+ pp->dtl_curr = dtl;
	126	+ lppaca_of(cpu).dtl_idx = 0;
	127	+
	128	+ /* hypervisor reads buffer length from this field */
	129	+ dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
	130	+ ret = register_dtl(hwcpu, __pa(dtl));
	131	+ if (ret)
	132	+ pr_err("WARNING: DTL registration of cpu %d (hw %d) failed with %ld\n",
	133	+ cpu, hwcpu, ret);
	134	+
	135	+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
	136	+ }
	137	+}
	138	+
	139	+#ifdef CONFIG_PPC_SPLPAR
	140	+struct dtl_worker {
	141	+ struct delayed_work work;
	142	+ int cpu;
	143	+};
	144	+
	145	+struct vcpu_dispatch_data {
	146	+ int last_disp_cpu;
	147	+
	148	+ int total_disp;
	149	+
	150	+ int same_cpu_disp;
	151	+ int same_chip_disp;
	152	+ int diff_chip_disp;
	153	+ int far_chip_disp;
	154	+
	155	+ int numa_home_disp;
	156	+ int numa_remote_disp;
	157	+ int numa_far_disp;
	158	+};
	159	+
	160	+/*
	161	+ * This represents the number of cpus in the hypervisor. Since there is no
	162	+ * architected way to discover the number of processors in the host, we
	163	+ * provision for dealing with NR_CPUS. This is currently 2048 by default, and
	164	+ * is sufficient for our purposes. This will need to be tweaked if
	165	+ * CONFIG_NR_CPUS is changed.
	166	+ */
	167	+#define NR_CPUS_H NR_CPUS
	168	+
	169	+DEFINE_RWLOCK(dtl_access_lock);
	170	+static DEFINE_PER_CPU(struct vcpu_dispatch_data, vcpu_disp_data);
	171	+static DEFINE_PER_CPU(u64, dtl_entry_ridx);
	172	+static DEFINE_PER_CPU(struct dtl_worker, dtl_workers);
	173	+static enum cpuhp_state dtl_worker_state;
	174	+static DEFINE_MUTEX(dtl_enable_mutex);
	175	+static int vcpudispatch_stats_on __read_mostly;
	176	+static int vcpudispatch_stats_freq = 50;
	177	+static __be32 vcpu_associativity, pcpu_associativity;
	178	+
	179	+
	180	+static void free_dtl_buffers(unsigned long *time_limit)
	181	+{
	182	+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
	183	+ int cpu;
	184	+ struct paca_struct *pp;
	185	+
	186	+ for_each_possible_cpu(cpu) {
	187	+ pp = paca_ptrs[cpu];
	188	+ if (!pp->dispatch_log)
	189	+ continue;
	190	+ kmem_cache_free(dtl_cache, pp->dispatch_log);
	191	+ pp->dtl_ridx = 0;
	192	+ pp->dispatch_log = 0;
	193	+ pp->dispatch_log_end = 0;
	194	+ pp->dtl_curr = 0;
	195	+
	196	+ if (time_limit && time_after(jiffies, *time_limit)) {
	197	+ cond_resched();
	198	+ *time_limit = jiffies + HZ;
	199	+ }
	200	+ }
	201	+#endif
	202	+}
	203	+
	204	+static int init_cpu_associativity(void)
	205	+{
	206	+ vcpu_associativity = kcalloc(num_possible_cpus() / threads_per_core,
	207	+ VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL);
	208	+ pcpu_associativity = kcalloc(NR_CPUS_H / threads_per_core,
	209	+ VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL);
	210	+
	211	+ if (!vcpu_associativity \|\| !pcpu_associativity) {
	212	+ pr_err("error allocating memory for associativity information\n");
	213	+ return -ENOMEM;
	214	+ }
	215	+
	216	+ return 0;
	217	+}
	218	+
	219	+static void destroy_cpu_associativity(void)
	220	+{
	221	+ kfree(vcpu_associativity);
	222	+ kfree(pcpu_associativity);
	223	+ vcpu_associativity = pcpu_associativity = 0;
	224	+}
	225	+
	226	+static __be32 __get_cpu_associativity(int cpu, __be32 cpu_assoc, int flag)
	227	+{
	228	+ __be32 *assoc;
	229	+ int rc = 0;
	230	+
	231	+ assoc = &cpu_assoc[(int)(cpu / threads_per_core) * VPHN_ASSOC_BUFSIZE];
	232	+ if (!assoc[0]) {
	233	+ rc = hcall_vphn(cpu, flag, &assoc[0]);
	234	+ if (rc)
	235	+ return NULL;
	236	+ }
	237	+
	238	+ return assoc;
	239	+}
	240	+
	241	+static __be32 *get_pcpu_associativity(int cpu)
	242	+{
	243	+ return __get_cpu_associativity(cpu, pcpu_associativity, VPHN_FLAG_PCPU);
	244	+}
	245	+
	246	+static __be32 *get_vcpu_associativity(int cpu)
	247	+{
	248	+ return __get_cpu_associativity(cpu, vcpu_associativity, VPHN_FLAG_VCPU);
	249	+}
	250	+
	251	+static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu)
	252	+{
	253	+ __be32 last_disp_cpu_assoc, cur_disp_cpu_assoc;
	254	+
	255	+ if (last_disp_cpu >= NR_CPUS_H \|\| cur_disp_cpu >= NR_CPUS_H)
	256	+ return -EINVAL;
	257	+
	258	+ last_disp_cpu_assoc = get_pcpu_associativity(last_disp_cpu);
	259	+ cur_disp_cpu_assoc = get_pcpu_associativity(cur_disp_cpu);
	260	+
	261	+ if (!last_disp_cpu_assoc \|\| !cur_disp_cpu_assoc)
	262	+ return -EIO;
	263	+
	264	+ return cpu_relative_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
	265	+}
	266	+
	267	+static int cpu_home_node_dispatch_distance(int disp_cpu)
	268	+{
	269	+ __be32 disp_cpu_assoc, vcpu_assoc;
	270	+ int vcpu_id = smp_processor_id();
	271	+
	272	+ if (disp_cpu >= NR_CPUS_H) {
	273	+ pr_debug_ratelimited("vcpu dispatch cpu %d > %d\n",
	274	+ disp_cpu, NR_CPUS_H);
	275	+ return -EINVAL;
	276	+ }
	277	+
	278	+ disp_cpu_assoc = get_pcpu_associativity(disp_cpu);
	279	+ vcpu_assoc = get_vcpu_associativity(vcpu_id);
	280	+
	281	+ if (!disp_cpu_assoc \|\| !vcpu_assoc)
	282	+ return -EIO;
	283	+
	284	+ return cpu_relative_distance(disp_cpu_assoc, vcpu_assoc);
	285	+}
	286	+
	287	+static void update_vcpu_disp_stat(int disp_cpu)
	288	+{
	289	+ struct vcpu_dispatch_data *disp;
	290	+ int distance;
	291	+
	292	+ disp = this_cpu_ptr(&vcpu_disp_data);
	293	+ if (disp->last_disp_cpu == -1) {
	294	+ disp->last_disp_cpu = disp_cpu;
	295	+ return;
	296	+ }
	297	+
	298	+ disp->total_disp++;
	299	+
	300	+ if (disp->last_disp_cpu == disp_cpu \|\|
	301	+ (cpu_first_thread_sibling(disp->last_disp_cpu) ==
	302	+ cpu_first_thread_sibling(disp_cpu)))
	303	+ disp->same_cpu_disp++;
	304	+ else {
	305	+ distance = cpu_relative_dispatch_distance(disp->last_disp_cpu,
	306	+ disp_cpu);
	307	+ if (distance < 0)
	308	+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n",
	309	+ smp_processor_id());
	310	+ else {
	311	+ switch (distance) {
	312	+ case 0:
	313	+ disp->same_chip_disp++;
	314	+ break;
	315	+ case 1:
	316	+ disp->diff_chip_disp++;
	317	+ break;
	318	+ case 2:
	319	+ disp->far_chip_disp++;
	320	+ break;
	321	+ default:
	322	+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d (%d -> %d): unexpected relative dispatch distance %d\n",
	323	+ smp_processor_id(),
	324	+ disp->last_disp_cpu,
	325	+ disp_cpu,
	326	+ distance);
	327	+ }
	328	+ }
	329	+ }
	330	+
	331	+ distance = cpu_home_node_dispatch_distance(disp_cpu);
	332	+ if (distance < 0)
	333	+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n",
	334	+ smp_processor_id());
	335	+ else {
	336	+ switch (distance) {
	337	+ case 0:
	338	+ disp->numa_home_disp++;
	339	+ break;
	340	+ case 1:
	341	+ disp->numa_remote_disp++;
	342	+ break;
	343	+ case 2:
	344	+ disp->numa_far_disp++;
	345	+ break;
	346	+ default:
	347	+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d on %d: unexpected numa dispatch distance %d\n",
	348	+ smp_processor_id(),
	349	+ disp_cpu,
	350	+ distance);
	351	+ }
	352	+ }
	353	+
	354	+ disp->last_disp_cpu = disp_cpu;
	355	+}
	356	+
	357	+static void process_dtl_buffer(struct work_struct *work)
	358	+{
	359	+ struct dtl_entry dtle;
	360	+ u64 i = __this_cpu_read(dtl_entry_ridx);
	361	+ struct dtl_entry *dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
	362	+ struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
	363	+ struct lppaca *vpa = local_paca->lppaca_ptr;
	364	+ struct dtl_worker *d = container_of(work, struct dtl_worker, work.work);
	365	+
	366	+ if (!local_paca->dispatch_log)
	367	+ return;
	368	+
	369	+ /* if we have been migrated away, we cancel ourself */
	370	+ if (d->cpu != smp_processor_id()) {
	371	+ pr_debug("vcpudispatch_stats: cpu %d worker migrated -- canceling worker\n",
	372	+ smp_processor_id());
	373	+ return;
	374	+ }
	375	+
	376	+ if (i == be64_to_cpu(vpa->dtl_idx))
	377	+ goto out;
	378	+
	379	+ while (i < be64_to_cpu(vpa->dtl_idx)) {
	380	+ dtle = *dtl;
	381	+ barrier();
	382	+ if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
	383	+ /* buffer has overflowed */
	384	+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d lost %lld DTL samples\n",
	385	+ d->cpu,
	386	+ be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG - i);
	387	+ i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
	388	+ dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
	389	+ continue;
	390	+ }
	391	+ update_vcpu_disp_stat(be16_to_cpu(dtle.processor_id));
	392	+ ++i;
	393	+ ++dtl;
	394	+ if (dtl == dtl_end)
	395	+ dtl = local_paca->dispatch_log;
	396	+ }
	397	+
	398	+ __this_cpu_write(dtl_entry_ridx, i);
	399	+
	400	+out:
	401	+ schedule_delayed_work_on(d->cpu, to_delayed_work(work),
	402	+ HZ / vcpudispatch_stats_freq);
	403	+}
	404	+
	405	+static int dtl_worker_online(unsigned int cpu)
	406	+{
	407	+ struct dtl_worker *d = &per_cpu(dtl_workers, cpu);
	408	+
	409	+ memset(d, 0, sizeof(*d));
	410	+ INIT_DELAYED_WORK(&d->work, process_dtl_buffer);
	411	+ d->cpu = cpu;
	412	+
	413	+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
	414	+ per_cpu(dtl_entry_ridx, cpu) = 0;
	415	+ register_dtl_buffer(cpu);
	416	+#else
	417	+ per_cpu(dtl_entry_ridx, cpu) = be64_to_cpu(lppaca_of(cpu).dtl_idx);
	418	+#endif
	419	+
	420	+ schedule_delayed_work_on(cpu, &d->work, HZ / vcpudispatch_stats_freq);
	421	+ return 0;
	422	+}
	423	+
	424	+static int dtl_worker_offline(unsigned int cpu)
	425	+{
	426	+ struct dtl_worker *d = &per_cpu(dtl_workers, cpu);
	427	+
	428	+ cancel_delayed_work_sync(&d->work);
	429	+
	430	+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
	431	+ unregister_dtl(get_hard_smp_processor_id(cpu));
	432	+#endif
	433	+
	434	+ return 0;
	435	+}
	436	+
	437	+static void set_global_dtl_mask(u8 mask)
	438	+{
	439	+ int cpu;
	440	+
	441	+ dtl_mask = mask;
	442	+ for_each_present_cpu(cpu)
	443	+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
	444	+}
	445	+
	446	+static void reset_global_dtl_mask(void)
	447	+{
	448	+ int cpu;
	449	+
	450	+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
	451	+ dtl_mask = DTL_LOG_PREEMPT;
	452	+#else
	453	+ dtl_mask = 0;
	454	+#endif
	455	+ for_each_present_cpu(cpu)
	456	+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
	457	+}
	458	+
	459	+static int dtl_worker_enable(unsigned long *time_limit)
	460	+{
	461	+ int rc = 0, state;
	462	+
	463	+ if (!write_trylock(&dtl_access_lock)) {
	464	+ rc = -EBUSY;
	465	+ goto out;
	466	+ }
	467	+
	468	+ set_global_dtl_mask(DTL_LOG_ALL);
	469	+
	470	+ /* Setup dtl buffers and register those */
	471	+ alloc_dtl_buffers(time_limit);
	472	+
	473	+ state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/dtl:online",
	474	+ dtl_worker_online, dtl_worker_offline);
	475	+ if (state < 0) {
	476	+ pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n");
	477	+ free_dtl_buffers(time_limit);
	478	+ reset_global_dtl_mask();
	479	+ write_unlock(&dtl_access_lock);
	480	+ rc = -EINVAL;
	481	+ goto out;
	482	+ }
	483	+ dtl_worker_state = state;
	484	+
	485	+out:
	486	+ return rc;
	487	+}
	488	+
	489	+static void dtl_worker_disable(unsigned long *time_limit)
	490	+{
	491	+ cpuhp_remove_state(dtl_worker_state);
	492	+ free_dtl_buffers(time_limit);
	493	+ reset_global_dtl_mask();
	494	+ write_unlock(&dtl_access_lock);
	495	+}
	496	+
	497	+static ssize_t vcpudispatch_stats_write(struct file file, const char __user p,
	498	+ size_t count, loff_t *ppos)
	499	+{
	500	+ unsigned long time_limit = jiffies + HZ;
	501	+ struct vcpu_dispatch_data *disp;
	502	+ int rc, cmd, cpu;
	503	+ char buf[16];
	504	+
	505	+ if (count > 15)
	506	+ return -EINVAL;
	507	+
	508	+ if (copy_from_user(buf, p, count))
	509	+ return -EFAULT;
	510	+
	511	+ buf[count] = 0;
	512	+ rc = kstrtoint(buf, 0, &cmd);
	513	+ if (rc \|\| cmd < 0 \|\| cmd > 1) {
	514	+ pr_err("vcpudispatch_stats: please use 0 to disable or 1 to enable dispatch statistics\n");
	515	+ return rc ? rc : -EINVAL;
	516	+ }
	517	+
	518	+ mutex_lock(&dtl_enable_mutex);
	519	+
	520	+ if ((cmd == 0 && !vcpudispatch_stats_on) \|\|
	521	+ (cmd == 1 && vcpudispatch_stats_on))
	522	+ goto out;
	523	+
	524	+ if (cmd) {
	525	+ rc = init_cpu_associativity();
	526	+ if (rc)
	527	+ goto out;
	528	+
	529	+ for_each_possible_cpu(cpu) {
	530	+ disp = per_cpu_ptr(&vcpu_disp_data, cpu);
	531	+ memset(disp, 0, sizeof(*disp));
	532	+ disp->last_disp_cpu = -1;
	533	+ }
	534	+
	535	+ rc = dtl_worker_enable(&time_limit);
	536	+ if (rc) {
	537	+ destroy_cpu_associativity();
	538	+ goto out;
	539	+ }
	540	+ } else {
	541	+ dtl_worker_disable(&time_limit);
	542	+ destroy_cpu_associativity();
	543	+ }
	544	+
	545	+ vcpudispatch_stats_on = cmd;
	546	+
	547	+out:
	548	+ mutex_unlock(&dtl_enable_mutex);
	549	+ if (rc)
	550	+ return rc;
	551	+ return count;
	552	+}
	553	+
	554	+static int vcpudispatch_stats_display(struct seq_file p, void v)
	555	+{
	556	+ int cpu;
	557	+ struct vcpu_dispatch_data *disp;
	558	+
	559	+ if (!vcpudispatch_stats_on) {
	560	+ seq_puts(p, "off\n");
	561	+ return 0;
	562	+ }
	563	+
	564	+ for_each_online_cpu(cpu) {
	565	+ disp = per_cpu_ptr(&vcpu_disp_data, cpu);
	566	+ seq_printf(p, "cpu%d", cpu);
	567	+ seq_put_decimal_ull(p, " ", disp->total_disp);
	568	+ seq_put_decimal_ull(p, " ", disp->same_cpu_disp);
	569	+ seq_put_decimal_ull(p, " ", disp->same_chip_disp);
	570	+ seq_put_decimal_ull(p, " ", disp->diff_chip_disp);
	571	+ seq_put_decimal_ull(p, " ", disp->far_chip_disp);
	572	+ seq_put_decimal_ull(p, " ", disp->numa_home_disp);
	573	+ seq_put_decimal_ull(p, " ", disp->numa_remote_disp);
	574	+ seq_put_decimal_ull(p, " ", disp->numa_far_disp);
	575	+ seq_puts(p, "\n");
	576	+ }
	577	+
	578	+ return 0;
	579	+}
	580	+
	581	+static int vcpudispatch_stats_open(struct inode inode, struct file file)
	582	+{
	583	+ return single_open(file, vcpudispatch_stats_display, NULL);
	584	+}
	585	+
	586	+static const struct proc_ops vcpudispatch_stats_proc_ops = {
	587	+ .proc_open = vcpudispatch_stats_open,
	588	+ .proc_read = seq_read,
	589	+ .proc_write = vcpudispatch_stats_write,
	590	+ .proc_lseek = seq_lseek,
	591	+ .proc_release = single_release,
	592	+};
	593	+
	594	+static ssize_t vcpudispatch_stats_freq_write(struct file *file,
	595	+ const char __user p, size_t count, loff_t ppos)
	596	+{
	597	+ int rc, freq;
	598	+ char buf[16];
	599	+
	600	+ if (count > 15)
	601	+ return -EINVAL;
	602	+
	603	+ if (copy_from_user(buf, p, count))
	604	+ return -EFAULT;
	605	+
	606	+ buf[count] = 0;
	607	+ rc = kstrtoint(buf, 0, &freq);
	608	+ if (rc \|\| freq < 1 \|\| freq > HZ) {
	609	+ pr_err("vcpudispatch_stats_freq: please specify a frequency between 1 and %d\n",
	610	+ HZ);
	611	+ return rc ? rc : -EINVAL;
	612	+ }
	613	+
	614	+ vcpudispatch_stats_freq = freq;
	615	+
	616	+ return count;
	617	+}
	618	+
	619	+static int vcpudispatch_stats_freq_display(struct seq_file p, void v)
	620	+{
	621	+ seq_printf(p, "%d\n", vcpudispatch_stats_freq);
	622	+ return 0;
	623	+}
	624	+
	625	+static int vcpudispatch_stats_freq_open(struct inode inode, struct file file)
	626	+{
	627	+ return single_open(file, vcpudispatch_stats_freq_display, NULL);
	628	+}
	629	+
	630	+static const struct proc_ops vcpudispatch_stats_freq_proc_ops = {
	631	+ .proc_open = vcpudispatch_stats_freq_open,
	632	+ .proc_read = seq_read,
	633	+ .proc_write = vcpudispatch_stats_freq_write,
	634	+ .proc_lseek = seq_lseek,
	635	+ .proc_release = single_release,
	636	+};
	637	+
	638	+static int __init vcpudispatch_stats_procfs_init(void)
	639	+{
	640	+ if (!lppaca_shared_proc())
	641	+ return 0;
	642	+
	643	+ if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL,
	644	+ &vcpudispatch_stats_proc_ops))
	645	+ pr_err("vcpudispatch_stats: error creating procfs file\n");
	646	+ else if (!proc_create("powerpc/vcpudispatch_stats_freq", 0600, NULL,
	647	+ &vcpudispatch_stats_freq_proc_ops))
	648	+ pr_err("vcpudispatch_stats_freq: error creating procfs file\n");
	649	+
	650	+ return 0;
	651	+}
	652	+
	653	+machine_device_initcall(pseries, vcpudispatch_stats_procfs_init);
	654	+#endif /* CONFIG_PPC_SPLPAR */
	655	+
68	656	void vpa_init(int cpu)
69	657	{
70	658	int hwcpu = get_hard_smp_processor_id(cpu);
71	659	unsigned long addr;
72	660	long ret;
73		- struct paca_struct *pp;
74		- struct dtl_entry *dtl;
75	661
76	662	/*
77	663	* The spec says it "may be problematic" if CPU x registers the VPA of
..	..	@@ -112,22 +698,7 @@
112	698	/*
113	699	* Register dispatch trace log, if one has been allocated.
114	700	*/
115		- pp = paca_ptrs[cpu];
116		- dtl = pp->dispatch_log;
117		- if (dtl) {
118		- pp->dtl_ridx = 0;
119		- pp->dtl_curr = dtl;
120		- lppaca_of(cpu).dtl_idx = 0;
121		-
122		- /* hypervisor reads buffer length from this field */
123		- dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
124		- ret = register_dtl(hwcpu, __pa(dtl));
125		- if (ret)
126		- pr_err("WARNING: DTL registration of cpu %d (hw %d) "
127		- "failed with %ld\n", smp_processor_id(),
128		- hwcpu, ret);
129		- lppaca_of(cpu).dtl_enable_mask = 2;
130		- }
	701	+ register_dtl_buffer(cpu);
131	702	}
132	703
133	704	#ifdef CONFIG_PPC_BOOK3S_64
..	..	@@ -204,7 +775,7 @@
204	775
205	776	/* don't remove a bolted entry */
206	777	lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
207		- (0x1UL << 4), &dummy1, &dummy2);
	778	+ HPTE_V_BOLTED, &dummy1, &dummy2);
208	779	if (lpar_rc == H_SUCCESS)
209	780	return i;
210	781
..	..	@@ -368,11 +939,19 @@
368	939	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
369	940	want_v = hpte_encode_avpn(vpn, psize, ssize);
370	941
371		- /* Bolted entries are always in the primary group */
	942	+ /*
	943	+ * We try to keep bolted entries always in primary hash
	944	+ * But in some case we can find them in secondary too.
	945	+ */
372	946	hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
373	947	slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
374		- if (slot < 0)
375		- return -1;
	948	+ if (slot < 0) {
	949	+ /* Try in secondary */
	950	+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
	951	+ slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
	952	+ if (slot < 0)
	953	+ return -1;
	954	+ }
376	955	return hpte_group + slot;
377	956	}
378	957
..	..	@@ -418,6 +997,90 @@
418	997	BUG_ON(lpar_rc != H_SUCCESS);
419	998	}
420	999
	1000	+
	1001	+/*
	1002	+ * As defined in the PAPR's section 14.5.4.1.8
	1003	+ * The control mask doesn't include the returned reference and change bit from
	1004	+ * the processed PTE.
	1005	+ */
	1006	+#define HBLKR_AVPN 0x0100000000000000UL
	1007	+#define HBLKR_CTRL_MASK 0xf800000000000000UL
	1008	+#define HBLKR_CTRL_SUCCESS 0x8000000000000000UL
	1009	+#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
	1010	+#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL
	1011	+
	1012	+/*
	1013	+ * Returned true if we are supporting this block size for the specified segment
	1014	+ * base page size and actual page size.
	1015	+ *
	1016	+ * Currently, we only support 8 size block.
	1017	+ */
	1018	+static inline bool is_supported_hlbkrm(int bpsize, int psize)
	1019	+{
	1020	+ return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE);
	1021	+}
	1022	+
	1023	+/**
	1024	+ * H_BLOCK_REMOVE caller.
	1025	+ * @idx should point to the latest @param entry set with a PTEX.
	1026	+ * If PTE cannot be processed because another CPUs has already locked that
	1027	+ * group, those entries are put back in @param starting at index 1.
	1028	+ * If entries has to be retried and @retry_busy is set to true, these entries
	1029	+ * are retried until success. If @retry_busy is set to false, the returned
	1030	+ * is the number of entries yet to process.
	1031	+ */
	1032	+static unsigned long call_block_remove(unsigned long idx, unsigned long *param,
	1033	+ bool retry_busy)
	1034	+{
	1035	+ unsigned long i, rc, new_idx;
	1036	+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
	1037	+
	1038	+ if (idx < 2) {
	1039	+ pr_warn("Unexpected empty call to H_BLOCK_REMOVE");
	1040	+ return 0;
	1041	+ }
	1042	+again:
	1043	+ new_idx = 0;
	1044	+ if (idx > PLPAR_HCALL9_BUFSIZE) {
	1045	+ pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx);
	1046	+ idx = PLPAR_HCALL9_BUFSIZE;
	1047	+ } else if (idx < PLPAR_HCALL9_BUFSIZE)
	1048	+ param[idx] = HBR_END;
	1049	+
	1050	+ rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf,
	1051	+ param[0], /* AVA */
	1052	+ param[1], param[2], param[3], param[4], /* TS0-7 */
	1053	+ param[5], param[6], param[7], param[8]);
	1054	+ if (rc == H_SUCCESS)
	1055	+ return 0;
	1056	+
	1057	+ BUG_ON(rc != H_PARTIAL);
	1058	+
	1059	+ /* Check that the unprocessed entries were 'not found' or 'busy' */
	1060	+ for (i = 0; i < idx-1; i++) {
	1061	+ unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK;
	1062	+
	1063	+ if (ctrl == HBLKR_CTRL_ERRBUSY) {
	1064	+ param[++new_idx] = param[i+1];
	1065	+ continue;
	1066	+ }
	1067	+
	1068	+ BUG_ON(ctrl != HBLKR_CTRL_SUCCESS
	1069	+ && ctrl != HBLKR_CTRL_ERRNOTFOUND);
	1070	+ }
	1071	+
	1072	+ /*
	1073	+ * If there were entries found busy, retry these entries if requested,
	1074	+ * of if all the entries have to be retried.
	1075	+ */
	1076	+ if (new_idx && (retry_busy \|\| new_idx == (PLPAR_HCALL9_BUFSIZE-1))) {
	1077	+ idx = new_idx + 1;
	1078	+ goto again;
	1079	+ }
	1080	+
	1081	+ return new_idx;
	1082	+}
	1083	+
421	1084	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
422	1085	/*
423	1086	* Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
..	..	@@ -425,17 +1088,57 @@
425	1088	*/
426	1089	#define PPC64_HUGE_HPTE_BATCH 12
427	1090
428		-static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
429		- unsigned long *vpn, int count,
430		- int psize, int ssize)
	1091	+static void hugepage_block_invalidate(unsigned long slot, unsigned long vpn,
	1092	+ int count, int psize, int ssize)
	1093	+{
	1094	+ unsigned long param[PLPAR_HCALL9_BUFSIZE];
	1095	+ unsigned long shift, current_vpgb, vpgb;
	1096	+ int i, pix = 0;
	1097	+
	1098	+ shift = mmu_psize_defs[psize].shift;
	1099	+
	1100	+ for (i = 0; i < count; i++) {
	1101	+ /*
	1102	+ * Shifting 3 bits more on the right to get a
	1103	+ * 8 pages aligned virtual addresse.
	1104	+ */
	1105	+ vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3));
	1106	+ if (!pix \|\| vpgb != current_vpgb) {
	1107	+ /*
	1108	+ * Need to start a new 8 pages block, flush
	1109	+ * the current one if needed.
	1110	+ */
	1111	+ if (pix)
	1112	+ (void)call_block_remove(pix, param, true);
	1113	+ current_vpgb = vpgb;
	1114	+ param[0] = hpte_encode_avpn(vpn[i], psize, ssize);
	1115	+ pix = 1;
	1116	+ }
	1117	+
	1118	+ param[pix++] = HBR_REQUEST \| HBLKR_AVPN \| slot[i];
	1119	+ if (pix == PLPAR_HCALL9_BUFSIZE) {
	1120	+ pix = call_block_remove(pix, param, false);
	1121	+ /*
	1122	+ * pix = 0 means that all the entries were
	1123	+ * removed, we can start a new block.
	1124	+ * Otherwise, this means that there are entries
	1125	+ * to retry, and pix points to latest one, so
	1126	+ * we should increment it and try to continue
	1127	+ * the same block.
	1128	+ */
	1129	+ if (pix)
	1130	+ pix++;
	1131	+ }
	1132	+ }
	1133	+ if (pix)
	1134	+ (void)call_block_remove(pix, param, true);
	1135	+}
	1136	+
	1137	+static void hugepage_bulk_invalidate(unsigned long slot, unsigned long vpn,
	1138	+ int count, int psize, int ssize)
431	1139	{
432	1140	unsigned long param[PLPAR_HCALL9_BUFSIZE];
433	1141	int i = 0, pix = 0, rc;
434		- unsigned long flags = 0;
435		- int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
436		-
437		- if (lock_tlbie)
438		- spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
439	1142
440	1143	for (i = 0; i < count; i++) {
441	1144
..	..	@@ -463,6 +1166,24 @@
463	1166	param[6], param[7]);
464	1167	BUG_ON(rc != H_SUCCESS);
465	1168	}
	1169	+}
	1170	+
	1171	+static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
	1172	+ unsigned long *vpn,
	1173	+ int count, int psize,
	1174	+ int ssize)
	1175	+{
	1176	+ unsigned long flags = 0;
	1177	+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
	1178	+
	1179	+ if (lock_tlbie)
	1180	+ spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
	1181	+
	1182	+ /* Assuming THP size is 16M */
	1183	+ if (is_supported_hlbkrm(psize, MMU_PAGE_16M))
	1184	+ hugepage_block_invalidate(slot, vpn, count, psize, ssize);
	1185	+ else
	1186	+ hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
466	1187
467	1188	if (lock_tlbie)
468	1189	spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
..	..	@@ -547,6 +1268,220 @@
547	1268	return 0;
548	1269	}
549	1270
	1271	+
	1272	+static inline unsigned long compute_slot(real_pte_t pte,
	1273	+ unsigned long vpn,
	1274	+ unsigned long index,
	1275	+ unsigned long shift,
	1276	+ int ssize)
	1277	+{
	1278	+ unsigned long slot, hash, hidx;
	1279	+
	1280	+ hash = hpt_hash(vpn, shift, ssize);
	1281	+ hidx = __rpte_to_hidx(pte, index);
	1282	+ if (hidx & _PTEIDX_SECONDARY)
	1283	+ hash = ~hash;
	1284	+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
	1285	+ slot += hidx & _PTEIDX_GROUP_IX;
	1286	+ return slot;
	1287	+}
	1288	+
	1289	+/**
	1290	+ * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are
	1291	+ * "all within the same naturally aligned 8 page virtual address block".
	1292	+ */
	1293	+static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
	1294	+ unsigned long *param)
	1295	+{
	1296	+ unsigned long vpn;
	1297	+ unsigned long i, pix = 0;
	1298	+ unsigned long index, shift, slot, current_vpgb, vpgb;
	1299	+ real_pte_t pte;
	1300	+ int psize, ssize;
	1301	+
	1302	+ psize = batch->psize;
	1303	+ ssize = batch->ssize;
	1304	+
	1305	+ for (i = 0; i < number; i++) {
	1306	+ vpn = batch->vpn[i];
	1307	+ pte = batch->pte[i];
	1308	+ pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
	1309	+ /*
	1310	+ * Shifting 3 bits more on the right to get a
	1311	+ * 8 pages aligned virtual addresse.
	1312	+ */
	1313	+ vpgb = (vpn >> (shift - VPN_SHIFT + 3));
	1314	+ if (!pix \|\| vpgb != current_vpgb) {
	1315	+ /*
	1316	+ * Need to start a new 8 pages block, flush
	1317	+ * the current one if needed.
	1318	+ */
	1319	+ if (pix)
	1320	+ (void)call_block_remove(pix, param,
	1321	+ true);
	1322	+ current_vpgb = vpgb;
	1323	+ param[0] = hpte_encode_avpn(vpn, psize,
	1324	+ ssize);
	1325	+ pix = 1;
	1326	+ }
	1327	+
	1328	+ slot = compute_slot(pte, vpn, index, shift, ssize);
	1329	+ param[pix++] = HBR_REQUEST \| HBLKR_AVPN \| slot;
	1330	+
	1331	+ if (pix == PLPAR_HCALL9_BUFSIZE) {
	1332	+ pix = call_block_remove(pix, param, false);
	1333	+ /*
	1334	+ * pix = 0 means that all the entries were
	1335	+ * removed, we can start a new block.
	1336	+ * Otherwise, this means that there are entries
	1337	+ * to retry, and pix points to latest one, so
	1338	+ * we should increment it and try to continue
	1339	+ * the same block.
	1340	+ */
	1341	+ if (pix)
	1342	+ pix++;
	1343	+ }
	1344	+ } pte_iterate_hashed_end();
	1345	+ }
	1346	+
	1347	+ if (pix)
	1348	+ (void)call_block_remove(pix, param, true);
	1349	+}
	1350	+
	1351	+/*
	1352	+ * TLB Block Invalidate Characteristics
	1353	+ *
	1354	+ * These characteristics define the size of the block the hcall H_BLOCK_REMOVE
	1355	+ * is able to process for each couple segment base page size, actual page size.
	1356	+ *
	1357	+ * The ibm,get-system-parameter properties is returning a buffer with the
	1358	+ * following layout:
	1359	+ *
	1360	+ * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ]
	1361	+ * -----------------
	1362	+ * TLB Block Invalidate Specifiers:
	1363	+ * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ]
	1364	+ * [ 1 byte Number of page sizes (N) that are supported for the specified
	1365	+ * TLB invalidate block size ]
	1366	+ * [ 1 byte Encoded segment base page size and actual page size
	1367	+ * MSB=0 means 4k segment base page size and actual page size
	1368	+ * MSB=1 the penc value in mmu_psize_def ]
	1369	+ * ...
	1370	+ * -----------------
	1371	+ * Next TLB Block Invalidate Specifiers...
	1372	+ * -----------------
	1373	+ * [ 0 ]
	1374	+ */
	1375	+static inline void set_hblkrm_bloc_size(int bpsize, int psize,
	1376	+ unsigned int block_size)
	1377	+{
	1378	+ if (block_size > hblkrm_size[bpsize][psize])
	1379	+ hblkrm_size[bpsize][psize] = block_size;
	1380	+}
	1381	+
	1382	+/*
	1383	+ * Decode the Encoded segment base page size and actual page size.
	1384	+ * PAPR specifies:
	1385	+ * - bit 7 is the L bit
	1386	+ * - bits 0-5 are the penc value
	1387	+ * If the L bit is 0, this means 4K segment base page size and actual page size
	1388	+ * otherwise the penc value should be read.
	1389	+ */
	1390	+#define HBLKRM_L_MASK 0x80
	1391	+#define HBLKRM_PENC_MASK 0x3f
	1392	+static inline void __init check_lp_set_hblkrm(unsigned int lp,
	1393	+ unsigned int block_size)
	1394	+{
	1395	+ unsigned int bpsize, psize;
	1396	+
	1397	+ /* First, check the L bit, if not set, this means 4K */
	1398	+ if ((lp & HBLKRM_L_MASK) == 0) {
	1399	+ set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
	1400	+ return;
	1401	+ }
	1402	+
	1403	+ lp &= HBLKRM_PENC_MASK;
	1404	+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
	1405	+ struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
	1406	+
	1407	+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
	1408	+ if (def->penc[psize] == lp) {
	1409	+ set_hblkrm_bloc_size(bpsize, psize, block_size);
	1410	+ return;
	1411	+ }
	1412	+ }
	1413	+ }
	1414	+}
	1415	+
	1416	+#define SPLPAR_TLB_BIC_TOKEN 50
	1417	+
	1418	+/*
	1419	+ * The size of the TLB Block Invalidate Characteristics is variable. But at the
	1420	+ * maximum it will be the number of possible page sizes *2 + 10 bytes.
	1421	+ * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size
	1422	+ * (128 bytes) for the buffer to get plenty of space.
	1423	+ */
	1424	+#define SPLPAR_TLB_BIC_MAXLENGTH 128
	1425	+
	1426	+void __init pseries_lpar_read_hblkrm_characteristics(void)
	1427	+{
	1428	+ const s32 token = rtas_token("ibm,get-system-parameter");
	1429	+ unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
	1430	+ int call_status, len, idx, bpsize;
	1431	+
	1432	+ if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
	1433	+ return;
	1434	+
	1435	+ do {
	1436	+ spin_lock(&rtas_data_buf_lock);
	1437	+ memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
	1438	+ call_status = rtas_call(token, 3, 1, NULL, SPLPAR_TLB_BIC_TOKEN,
	1439	+ __pa(rtas_data_buf), RTAS_DATA_BUF_SIZE);
	1440	+ memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
	1441	+ local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
	1442	+ spin_unlock(&rtas_data_buf_lock);
	1443	+ } while (rtas_busy_delay(call_status));
	1444	+
	1445	+ if (call_status != 0) {
	1446	+ pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
	1447	+ __FILE__, __func__, call_status);
	1448	+ return;
	1449	+ }
	1450	+
	1451	+ /*
	1452	+ * The first two (2) bytes of the data in the buffer are the length of
	1453	+ * the returned data, not counting these first two (2) bytes.
	1454	+ */
	1455	+ len = be16_to_cpu(((u16 )local_buffer)) + 2;
	1456	+ if (len > SPLPAR_TLB_BIC_MAXLENGTH) {
	1457	+ pr_warn("%s too large returned buffer %d", __func__, len);
	1458	+ return;
	1459	+ }
	1460	+
	1461	+ idx = 2;
	1462	+ while (idx < len) {
	1463	+ u8 block_shift = local_buffer[idx++];
	1464	+ u32 block_size;
	1465	+ unsigned int npsize;
	1466	+
	1467	+ if (!block_shift)
	1468	+ break;
	1469	+
	1470	+ block_size = 1 << block_shift;
	1471	+
	1472	+ for (npsize = local_buffer[idx++];
	1473	+ npsize > 0 && idx < len; npsize--)
	1474	+ check_lp_set_hblkrm((unsigned int) local_buffer[idx++],
	1475	+ block_size);
	1476	+ }
	1477	+
	1478	+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
	1479	+ for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
	1480	+ if (hblkrm_size[bpsize][idx])
	1481	+ pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
	1482	+ bpsize, idx, hblkrm_size[bpsize][idx]);
	1483	+}
	1484	+
550	1485	/*
551	1486	* Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
552	1487	* lock.
..	..	@@ -559,12 +1494,17 @@
559	1494	struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
560	1495	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
561	1496	unsigned long param[PLPAR_HCALL9_BUFSIZE];
562		- unsigned long hash, index, shift, hidx, slot;
	1497	+ unsigned long index, shift, slot;
563	1498	real_pte_t pte;
564	1499	int psize, ssize;
565	1500
566	1501	if (lock_tlbie)
567	1502	spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
	1503	+
	1504	+ if (is_supported_hlbkrm(batch->psize, batch->psize)) {
	1505	+ do_block_remove(number, batch, param);
	1506	+ goto out;
	1507	+ }
568	1508
569	1509	psize = batch->psize;
570	1510	ssize = batch->ssize;
..	..	@@ -573,12 +1513,7 @@
573	1513	vpn = batch->vpn[i];
574	1514	pte = batch->pte[i];
575	1515	pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
576		- hash = hpt_hash(vpn, shift, ssize);
577		- hidx = __rpte_to_hidx(pte, index);
578		- if (hidx & _PTEIDX_SECONDARY)
579		- hash = ~hash;
580		- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
581		- slot += hidx & _PTEIDX_GROUP_IX;
	1516	+ slot = compute_slot(pte, vpn, index, shift, ssize);
582	1517	if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
583	1518	/*
584	1519	* lpar doesn't use the passed actual page size
..	..	@@ -609,6 +1544,7 @@
609	1544	BUG_ON(rc != H_SUCCESS);
610	1545	}
611	1546
	1547	+out:
612	1548	if (lock_tlbie)
613	1549	spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
614	1550	}
..	..	@@ -693,8 +1629,10 @@
693	1629	break;
694	1630
695	1631	case H_PARAMETER:
	1632	+ pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n");
696	1633	return -EINVAL;
697	1634	case H_RESOURCE:
	1635	+ pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n");
698	1636	return -EPERM;
699	1637	default:
700	1638	pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc);
..	..	@@ -711,7 +1649,6 @@
711	1649	if (rc != 0) {
712	1650	switch (state.commit_rc) {
713	1651	case H_PTEG_FULL:
714		- pr_warn("Hash collision while resizing HPT\n");
715	1652	return -ENOSPC;
716	1653
717	1654	default:
..	..	@@ -736,9 +1673,11 @@
736	1673
737	1674	if (table_size)
738	1675	flags \|= PROC_TABLE_NEW;
739		- if (radix_enabled())
740		- flags \|= PROC_TABLE_RADIX \| PROC_TABLE_GTSE;
741		- else
	1676	+ if (radix_enabled()) {
	1677	+ flags \|= PROC_TABLE_RADIX;
	1678	+ if (mmu_has_feature(MMU_FTR_GTSE))
	1679	+ flags \|= PROC_TABLE_GTSE;
	1680	+ } else
742	1681	flags \|= PROC_TABLE_HPT_SLB;
743	1682	for (;;) {
744	1683	rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
..	..	@@ -765,17 +1704,27 @@
765	1704	mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range;
766	1705	mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all;
767	1706	mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
768		- register_process_table = pseries_lpar_register_process_table;
769	1707
770	1708	if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
771	1709	mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
	1710	+
	1711	+ /*
	1712	+ * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
	1713	+ * to inform the hypervisor that we wish to use the HPT.
	1714	+ */
	1715	+ if (cpu_has_feature(CPU_FTR_ARCH_300))
	1716	+ pseries_lpar_register_process_table(0, 0, 0);
772	1717	}
773	1718
	1719	+#ifdef CONFIG_PPC_RADIX_MMU
774	1720	void radix_init_pseries(void)
775	1721	{
776	1722	pr_info("Using radix MMU under hypervisor\n");
777		- register_process_table = pseries_lpar_register_process_table;
	1723	+
	1724	+ pseries_lpar_register_process_table(__pa(process_tb),
	1725	+ 0, PRTB_SIZE_SHIFT - 12);
778	1726	}
	1727	+#endif
779	1728
780	1729	#ifdef CONFIG_PPC_SMLPAR
781	1730	#define CMO_FREE_HINT_DEFAULT 1
..	..	@@ -870,8 +1819,7 @@
870	1819
871	1820	/*
872	1821	* Since the tracing code might execute hcalls we need to guard against
873		- * recursion. One example of this are spinlocks calling H_YIELD on
874		- * shared processor partitions.
	1822	+ * recursion.
875	1823	*/
876	1824	static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
877	1825
..	..	@@ -1062,24 +2010,11 @@
1062	2010	return 0;
1063	2011
1064	2012	vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
1065		- if (!vpa_dir) {
1066		- pr_warn("%s: can't create vpa root dir\n", __func__);
1067		- return -ENOMEM;
1068		- }
1069	2013
1070	2014	/* set up the per-cpu vpa file*/
1071	2015	for_each_possible_cpu(i) {
1072		- struct dentry *d;
1073		-
1074	2016	sprintf(name, "cpu-%ld", i);
1075		-
1076		- d = debugfs_create_file(name, 0400, vpa_dir, (void *)i,
1077		- &vpa_fops);
1078		- if (!d) {
1079		- pr_warn("%s: can't create per-cpu vpa file\n",
1080		- __func__);
1081		- return -ENOMEM;
1082		- }
	2017	+ debugfs_create_file(name, 0400, vpa_dir, (void *)i, &vpa_fops);
1083	2018	}
1084	2019
1085	2020	return 0;