1/* $NetBSD: pmap_tlb.c,v 1.8 2016/11/13 12:58:40 maxv Exp $ */
2
3/*-
4 * Copyright (c) 2008-2012 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran and Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * x86 pmap(9) module: TLB shootdowns.
34 *
35 * TLB shootdowns are hard interrupts that operate outside the SPL framework.
36 * They do not need to be blocked, provided that the pmap module gets the
37 * order of events correct. The calls are made by poking the LAPIC directly.
38 * The interrupt handler is short and does one of the following: invalidate
39 * a set of pages, all user TLB entries or the entire TLB.
40 */
41
42#include <sys/cdefs.h>
43__KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.8 2016/11/13 12:58:40 maxv Exp $");
44
45#include <sys/param.h>
46#include <sys/kernel.h>
47
48#include <sys/systm.h>
49#include <sys/atomic.h>
50#include <sys/cpu.h>
51#include <sys/intr.h>
52#include <uvm/uvm.h>
53
54#include <machine/cpuvar.h>
55#ifdef XEN
56#include <xen/xenpmap.h>
57#endif /* XEN */
58#include <x86/i82489reg.h>
59#include <x86/i82489var.h>
60
61/*
62 * TLB shootdown structures.
63 */
64
65typedef struct {
66#ifdef _LP64
67 uintptr_t tp_va[14]; /* whole struct: 128 bytes */
68#else
69 uintptr_t tp_va[13]; /* whole struct: 64 bytes */
70#endif
71 uint16_t tp_count;
72 uint16_t tp_pte;
73 int tp_userpmap;
74 kcpuset_t * tp_cpumask;
75} pmap_tlb_packet_t;
76
77/*
78 * No more than N separate invlpg.
79 *
80 * Statistically, a value of six is big enough to cover the requested number
81 * of pages in ~ 95% of the TLB shootdowns we are getting. We therefore rarely
82 * reach the limit, and increasing it can actually reduce the performance due
83 * to the high cost of invlpg.
84 */
85#define TP_MAXVA 6
86
87/*
88 * TLB shootdown state.
89 */
90static pmap_tlb_packet_t pmap_tlb_packet __cacheline_aligned;
91static volatile u_int pmap_tlb_pendcount __cacheline_aligned;
92static volatile u_int pmap_tlb_gen __cacheline_aligned;
93static struct evcnt pmap_tlb_evcnt __cacheline_aligned;
94
95/*
96 * TLB shootdown statistics.
97 */
98#ifdef TLBSTATS
99static struct evcnt tlbstat_local[TLBSHOOT__MAX];
100static struct evcnt tlbstat_remote[TLBSHOOT__MAX];
101static struct evcnt tlbstat_kernel[TLBSHOOT__MAX];
102static struct evcnt tlbstat_single_req;
103static struct evcnt tlbstat_single_issue;
104static const char * tlbstat_name[ ] = {
105 "APTE",
106 "KENTER",
107 "KREMOVE",
108 "FREE_PTP1",
109 "FREE_PTP2",
110 "REMOVE_PTE",
111 "REMOVE_PTES",
112 "SYNC_PV1",
113 "SYNC_PV2",
114 "WRITE_PROTECT",
115 "ENTER",
116 "UPDATE",
117 "BUS_DMA",
118 "BUS_SPACE"
119};
120#endif
121
122void
123pmap_tlb_init(void)
124{
125
126 memset(&pmap_tlb_packet, 0, sizeof(pmap_tlb_packet_t));
127 pmap_tlb_pendcount = 0;
128 pmap_tlb_gen = 0;
129
130 evcnt_attach_dynamic(&pmap_tlb_evcnt, EVCNT_TYPE_INTR,
131 NULL, "TLB", "shootdown");
132
133#ifdef TLBSTATS
134 int i;
135
136 for (i = 0; i < TLBSHOOT__MAX; i++) {
137 evcnt_attach_dynamic(&tlbstat_local[i], EVCNT_TYPE_MISC,
138 NULL, "tlbshoot local", tlbstat_name[i]);
139 }
140 for (i = 0; i < TLBSHOOT__MAX; i++) {
141 evcnt_attach_dynamic(&tlbstat_remote[i], EVCNT_TYPE_MISC,
142 NULL, "tlbshoot remote", tlbstat_name[i]);
143 }
144 for (i = 0; i < TLBSHOOT__MAX; i++) {
145 evcnt_attach_dynamic(&tlbstat_kernel[i], EVCNT_TYPE_MISC,
146 NULL, "tlbshoot kernel", tlbstat_name[i]);
147 }
148 evcnt_attach_dynamic(&tlbstat_single_req, EVCNT_TYPE_MISC,
149 NULL, "tlbshoot single page", "requests");
150 evcnt_attach_dynamic(&tlbstat_single_issue, EVCNT_TYPE_MISC,
151 NULL, "tlbshoot single page", "issues");
152#endif
153}
154
155void
156pmap_tlb_cpu_init(struct cpu_info *ci)
157{
158 pmap_tlb_packet_t *tp = (pmap_tlb_packet_t *)ci->ci_pmap_data;
159
160 memset(tp, 0, sizeof(pmap_tlb_packet_t));
161 kcpuset_create(&tp->tp_cpumask, true);
162}
163
164static inline void
165pmap_tlbstat_count(struct pmap *pm, vaddr_t va, tlbwhy_t why)
166{
167#ifdef TLBSTATS
168 const cpuid_t cid = cpu_index(curcpu());
169 bool local = false, remote = false;
170
171 if (va != (vaddr_t)-1LL) {
172 atomic_inc_64(&tlbstat_single_req.ev_count);
173 }
174 if (pm == pmap_kernel()) {
175 atomic_inc_64(&tlbstat_kernel[why].ev_count);
176 return;
177 }
178
179 if (va >= VM_MAXUSER_ADDRESS) {
180 remote = kcpuset_isotherset(pm->pm_kernel_cpus, cid);
181 local = kcpuset_isset(pm->pm_kernel_cpus, cid);
182 }
183 remote |= kcpuset_isotherset(pm->pm_cpus, cid);
184 local |= kcpuset_isset(pm->pm_cpus, cid);
185
186 if (local) {
187 atomic_inc_64(&tlbstat_local[why].ev_count);
188 }
189 if (remote) {
190 atomic_inc_64(&tlbstat_remote[why].ev_count);
191 }
192#endif
193}
194
195static inline void
196pmap_tlb_invalidate(const pmap_tlb_packet_t *tp)
197{
198 int i;
199
200 /* Find out what we need to invalidate. */
201 if (tp->tp_count == (uint16_t)-1) {
202 u_int egen = uvm_emap_gen_return();
203 if (tp->tp_pte & PG_G) {
204 /* Invalidating user and kernel TLB entries. */
205 tlbflushg();
206 } else {
207 /* Invalidating user TLB entries only. */
208 tlbflush();
209 }
210 uvm_emap_update(egen);
211 } else {
212 /* Invalidating a single page or a range of pages. */
213 for (i = tp->tp_count - 1; i >= 0; i--) {
214 pmap_update_pg(tp->tp_va[i]);
215 }
216 }
217}
218
219/*
220 * pmap_tlb_shootdown: invalidate a page on all CPUs using pmap 'pm'.
221 */
222void
223pmap_tlb_shootdown(struct pmap *pm, vaddr_t va, pt_entry_t pte, tlbwhy_t why)
224{
225 pmap_tlb_packet_t *tp;
226 int s;
227
228#ifndef XEN
229 KASSERT((pte & PG_G) == 0 || pm == pmap_kernel());
230#endif
231
232 /*
233 * If tearing down the pmap, do nothing. We will flush later
234 * when we are ready to recycle/destroy it.
235 */
236 if (__predict_false(curlwp->l_md.md_gc_pmap == pm)) {
237 return;
238 }
239
240 if ((pte & PG_PS) != 0) {
241 va &= PG_LGFRAME;
242 }
243
244 /*
245 * Add the shootdown operation to our pending set.
246 */
247 s = splvm();
248 tp = (pmap_tlb_packet_t *)curcpu()->ci_pmap_data;
249
250 /* Whole address flush will be needed if PG_G is set. */
251 CTASSERT(PG_G == (uint16_t)PG_G);
252 tp->tp_pte |= (uint16_t)pte;
253
254 if (tp->tp_count == (uint16_t)-1) {
255 /*
256 * Already flushing everything.
257 */
258 } else if (tp->tp_count < TP_MAXVA && va != (vaddr_t)-1LL) {
259 /* Flush a single page. */
260 tp->tp_va[tp->tp_count++] = va;
261 KASSERT(tp->tp_count > 0);
262 } else {
263 /* Flush everything. */
264 tp->tp_count = (uint16_t)-1;
265 }
266
267 if (pm != pmap_kernel()) {
268 kcpuset_merge(tp->tp_cpumask, pm->pm_cpus);
269 if (va >= VM_MAXUSER_ADDRESS) {
270 kcpuset_merge(tp->tp_cpumask, pm->pm_kernel_cpus);
271 }
272 tp->tp_userpmap = 1;
273 } else {
274 kcpuset_copy(tp->tp_cpumask, kcpuset_running);
275 }
276 pmap_tlbstat_count(pm, va, why);
277 splx(s);
278}
279
280#ifdef MULTIPROCESSOR
281#ifdef XEN
282
283static inline void
284pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target)
285{
286
287 if (tp->tp_count != (uint16_t)-1) {
288 /* Invalidating a single page or a range of pages. */
289 for (int i = tp->tp_count - 1; i >= 0; i--) {
290 xen_mcast_invlpg(tp->tp_va[i], target);
291 }
292 } else {
293 xen_mcast_tlbflush(target);
294 }
295
296 /* Remote CPUs have been synchronously flushed. */
297 pmap_tlb_pendcount = 0;
298}
299
300#else
301
302static inline void
303pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target)
304{
305 int err = 0;
306
307 if (!kcpuset_match(target, kcpuset_attached)) {
308 const struct cpu_info * const self = curcpu();
309 CPU_INFO_ITERATOR cii;
310 struct cpu_info *lci;
311
312 for (CPU_INFO_FOREACH(cii, lci)) {
313 const cpuid_t lcid = cpu_index(lci);
314
315 if (__predict_false(lci == self) ||
316 !kcpuset_isset(target, lcid)) {
317 continue;
318 }
319 err |= x86_ipi(LAPIC_TLB_VECTOR,
320 lci->ci_cpuid, LAPIC_DLMODE_FIXED);
321 }
322 } else {
323 err = x86_ipi(LAPIC_TLB_VECTOR, LAPIC_DEST_ALLEXCL,
324 LAPIC_DLMODE_FIXED);
325 }
326 KASSERT(err == 0);
327}
328
329#endif /* XEN */
330#endif /* MULTIPROCESSOR */
331
332/*
333 * pmap_tlb_shootnow: process pending TLB shootdowns queued on current CPU.
334 *
335 * => Must be called with preemption disabled.
336 */
337void
338pmap_tlb_shootnow(void)
339{
340 pmap_tlb_packet_t *tp;
341 struct cpu_info *ci;
342 kcpuset_t *target;
343 u_int local, gen, rcpucount;
344 cpuid_t cid;
345 int s;
346
347 KASSERT(kpreempt_disabled());
348
349 ci = curcpu();
350 tp = (pmap_tlb_packet_t *)ci->ci_pmap_data;
351
352 /* Pre-check first. */
353 if (tp->tp_count == 0) {
354 return;
355 }
356
357 s = splvm();
358 if (tp->tp_count == 0) {
359 splx(s);
360 return;
361 }
362 cid = cpu_index(ci);
363
364 target = tp->tp_cpumask;
365 local = kcpuset_isset(target, cid) ? 1 : 0;
366 rcpucount = kcpuset_countset(target) - local;
367 gen = 0;
368
369#ifdef MULTIPROCESSOR
370 if (rcpucount) {
371 int count;
372
373 /*
374 * Gain ownership of the shootdown mailbox. We must stay
375 * at IPL_VM once we own it or could deadlock against an
376 * interrupt on this CPU trying to do the same.
377 */
378 KASSERT(rcpucount < ncpu);
379
380 while (atomic_cas_uint(&pmap_tlb_pendcount, 0, rcpucount)) {
381 splx(s);
382 count = SPINLOCK_BACKOFF_MIN;
383 while (pmap_tlb_pendcount) {
384 KASSERT(pmap_tlb_pendcount < ncpu);
385 SPINLOCK_BACKOFF(count);
386 }
387 s = splvm();
388 /* An interrupt might have done it for us. */
389 if (tp->tp_count == 0) {
390 splx(s);
391 return;
392 }
393 }
394
395 /*
396 * Start a new generation of updates. Copy our shootdown
397 * requests into the global buffer. Note that tp_cpumask
398 * will not be used by remote CPUs (it would be unsafe).
399 */
400 gen = ++pmap_tlb_gen;
401 memcpy(&pmap_tlb_packet, tp, sizeof(*tp));
402 pmap_tlb_evcnt.ev_count++;
403
404 /*
405 * Initiate shootdowns on remote CPUs.
406 */
407 pmap_tlb_processpacket(tp, target);
408 }
409#endif
410
411 /*
412 * Shootdowns on remote CPUs are now in flight. In the meantime,
413 * perform local shootdown if needed.
414 */
415 if (local) {
416 pmap_tlb_invalidate(tp);
417 }
418
419 /*
420 * Clear out our local buffer.
421 */
422#ifdef TLBSTATS
423 if (tp->tp_count != (uint16_t)-1) {
424 atomic_add_64(&tlbstat_single_issue.ev_count, tp->tp_count);
425 }
426#endif
427 kcpuset_zero(tp->tp_cpumask);
428 tp->tp_userpmap = 0;
429 tp->tp_count = 0;
430 tp->tp_pte = 0;
431 splx(s);
432
433 /*
434 * Now wait for the current generation of updates to be
435 * processed by remote CPUs.
436 */
437 if (rcpucount && pmap_tlb_pendcount) {
438 int count = SPINLOCK_BACKOFF_MIN;
439
440 while (pmap_tlb_pendcount && pmap_tlb_gen == gen) {
441 KASSERT(pmap_tlb_pendcount < ncpu);
442 SPINLOCK_BACKOFF(count);
443 }
444 }
445}
446
447/*
448 * pmap_tlb_intr: pmap shootdown interrupt handler to invalidate TLB entries.
449 *
450 * => Called from IPI only.
451 */
452void
453pmap_tlb_intr(void)
454{
455 const pmap_tlb_packet_t *tp = &pmap_tlb_packet;
456 struct cpu_info *ci = curcpu();
457
458 KASSERT(pmap_tlb_pendcount > 0);
459
460 /* First, TLB flush. */
461 pmap_tlb_invalidate(tp);
462
463 /*
464 * Check the current TLB state. If we do not want further
465 * invalidations for this pmap, then take the CPU out of
466 * the pmap's bitmask.
467 */
468 if (ci->ci_tlbstate == TLBSTATE_LAZY && tp->tp_userpmap) {
469 struct pmap *pm = ci->ci_pmap;
470 cpuid_t cid = cpu_index(ci);
471
472 kcpuset_atomic_clear(pm->pm_cpus, cid);
473 ci->ci_tlbstate = TLBSTATE_STALE;
474 }
475
476 /* Finally, ack the request. */
477 atomic_dec_uint(&pmap_tlb_pendcount);
478}
479