1 | /* $NetBSD: pmap_tlb.c,v 1.8 2016/11/13 12:58:40 maxv Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 2008-2012 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Andrew Doran and Mindaugas Rasiukevicius. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | |
32 | /* |
33 | * x86 pmap(9) module: TLB shootdowns. |
34 | * |
35 | * TLB shootdowns are hard interrupts that operate outside the SPL framework. |
36 | * They do not need to be blocked, provided that the pmap module gets the |
37 | * order of events correct. The calls are made by poking the LAPIC directly. |
38 | * The interrupt handler is short and does one of the following: invalidate |
39 | * a set of pages, all user TLB entries or the entire TLB. |
40 | */ |
41 | |
42 | #include <sys/cdefs.h> |
43 | __KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.8 2016/11/13 12:58:40 maxv Exp $" ); |
44 | |
45 | #include <sys/param.h> |
46 | #include <sys/kernel.h> |
47 | |
48 | #include <sys/systm.h> |
49 | #include <sys/atomic.h> |
50 | #include <sys/cpu.h> |
51 | #include <sys/intr.h> |
52 | #include <uvm/uvm.h> |
53 | |
54 | #include <machine/cpuvar.h> |
55 | #ifdef XEN |
56 | #include <xen/xenpmap.h> |
57 | #endif /* XEN */ |
58 | #include <x86/i82489reg.h> |
59 | #include <x86/i82489var.h> |
60 | |
61 | /* |
62 | * TLB shootdown structures. |
63 | */ |
64 | |
65 | typedef struct { |
66 | #ifdef _LP64 |
67 | uintptr_t tp_va[14]; /* whole struct: 128 bytes */ |
68 | #else |
69 | uintptr_t tp_va[13]; /* whole struct: 64 bytes */ |
70 | #endif |
71 | uint16_t tp_count; |
72 | uint16_t tp_pte; |
73 | int tp_userpmap; |
74 | kcpuset_t * tp_cpumask; |
75 | } pmap_tlb_packet_t; |
76 | |
77 | /* |
78 | * No more than N separate invlpg. |
79 | * |
80 | * Statistically, a value of six is big enough to cover the requested number |
81 | * of pages in ~ 95% of the TLB shootdowns we are getting. We therefore rarely |
82 | * reach the limit, and increasing it can actually reduce the performance due |
83 | * to the high cost of invlpg. |
84 | */ |
85 | #define TP_MAXVA 6 |
86 | |
87 | /* |
88 | * TLB shootdown state. |
89 | */ |
90 | static pmap_tlb_packet_t pmap_tlb_packet __cacheline_aligned; |
91 | static volatile u_int pmap_tlb_pendcount __cacheline_aligned; |
92 | static volatile u_int pmap_tlb_gen __cacheline_aligned; |
93 | static struct evcnt pmap_tlb_evcnt __cacheline_aligned; |
94 | |
95 | /* |
96 | * TLB shootdown statistics. |
97 | */ |
98 | #ifdef TLBSTATS |
99 | static struct evcnt tlbstat_local[TLBSHOOT__MAX]; |
100 | static struct evcnt tlbstat_remote[TLBSHOOT__MAX]; |
101 | static struct evcnt tlbstat_kernel[TLBSHOOT__MAX]; |
102 | static struct evcnt tlbstat_single_req; |
103 | static struct evcnt tlbstat_single_issue; |
104 | static const char * tlbstat_name[ ] = { |
105 | "APTE" , |
106 | "KENTER" , |
107 | "KREMOVE" , |
108 | "FREE_PTP1" , |
109 | "FREE_PTP2" , |
110 | "REMOVE_PTE" , |
111 | "REMOVE_PTES" , |
112 | "SYNC_PV1" , |
113 | "SYNC_PV2" , |
114 | "WRITE_PROTECT" , |
115 | "ENTER" , |
116 | "UPDATE" , |
117 | "BUS_DMA" , |
118 | "BUS_SPACE" |
119 | }; |
120 | #endif |
121 | |
122 | void |
123 | pmap_tlb_init(void) |
124 | { |
125 | |
126 | memset(&pmap_tlb_packet, 0, sizeof(pmap_tlb_packet_t)); |
127 | pmap_tlb_pendcount = 0; |
128 | pmap_tlb_gen = 0; |
129 | |
130 | evcnt_attach_dynamic(&pmap_tlb_evcnt, EVCNT_TYPE_INTR, |
131 | NULL, "TLB" , "shootdown" ); |
132 | |
133 | #ifdef TLBSTATS |
134 | int i; |
135 | |
136 | for (i = 0; i < TLBSHOOT__MAX; i++) { |
137 | evcnt_attach_dynamic(&tlbstat_local[i], EVCNT_TYPE_MISC, |
138 | NULL, "tlbshoot local" , tlbstat_name[i]); |
139 | } |
140 | for (i = 0; i < TLBSHOOT__MAX; i++) { |
141 | evcnt_attach_dynamic(&tlbstat_remote[i], EVCNT_TYPE_MISC, |
142 | NULL, "tlbshoot remote" , tlbstat_name[i]); |
143 | } |
144 | for (i = 0; i < TLBSHOOT__MAX; i++) { |
145 | evcnt_attach_dynamic(&tlbstat_kernel[i], EVCNT_TYPE_MISC, |
146 | NULL, "tlbshoot kernel" , tlbstat_name[i]); |
147 | } |
148 | evcnt_attach_dynamic(&tlbstat_single_req, EVCNT_TYPE_MISC, |
149 | NULL, "tlbshoot single page" , "requests" ); |
150 | evcnt_attach_dynamic(&tlbstat_single_issue, EVCNT_TYPE_MISC, |
151 | NULL, "tlbshoot single page" , "issues" ); |
152 | #endif |
153 | } |
154 | |
155 | void |
156 | pmap_tlb_cpu_init(struct cpu_info *ci) |
157 | { |
158 | pmap_tlb_packet_t *tp = (pmap_tlb_packet_t *)ci->ci_pmap_data; |
159 | |
160 | memset(tp, 0, sizeof(pmap_tlb_packet_t)); |
161 | kcpuset_create(&tp->tp_cpumask, true); |
162 | } |
163 | |
164 | static inline void |
165 | pmap_tlbstat_count(struct pmap *pm, vaddr_t va, tlbwhy_t why) |
166 | { |
167 | #ifdef TLBSTATS |
168 | const cpuid_t cid = cpu_index(curcpu()); |
169 | bool local = false, remote = false; |
170 | |
171 | if (va != (vaddr_t)-1LL) { |
172 | atomic_inc_64(&tlbstat_single_req.ev_count); |
173 | } |
174 | if (pm == pmap_kernel()) { |
175 | atomic_inc_64(&tlbstat_kernel[why].ev_count); |
176 | return; |
177 | } |
178 | |
179 | if (va >= VM_MAXUSER_ADDRESS) { |
180 | remote = kcpuset_isotherset(pm->pm_kernel_cpus, cid); |
181 | local = kcpuset_isset(pm->pm_kernel_cpus, cid); |
182 | } |
183 | remote |= kcpuset_isotherset(pm->pm_cpus, cid); |
184 | local |= kcpuset_isset(pm->pm_cpus, cid); |
185 | |
186 | if (local) { |
187 | atomic_inc_64(&tlbstat_local[why].ev_count); |
188 | } |
189 | if (remote) { |
190 | atomic_inc_64(&tlbstat_remote[why].ev_count); |
191 | } |
192 | #endif |
193 | } |
194 | |
195 | static inline void |
196 | pmap_tlb_invalidate(const pmap_tlb_packet_t *tp) |
197 | { |
198 | int i; |
199 | |
200 | /* Find out what we need to invalidate. */ |
201 | if (tp->tp_count == (uint16_t)-1) { |
202 | u_int egen = uvm_emap_gen_return(); |
203 | if (tp->tp_pte & PG_G) { |
204 | /* Invalidating user and kernel TLB entries. */ |
205 | tlbflushg(); |
206 | } else { |
207 | /* Invalidating user TLB entries only. */ |
208 | tlbflush(); |
209 | } |
210 | uvm_emap_update(egen); |
211 | } else { |
212 | /* Invalidating a single page or a range of pages. */ |
213 | for (i = tp->tp_count - 1; i >= 0; i--) { |
214 | pmap_update_pg(tp->tp_va[i]); |
215 | } |
216 | } |
217 | } |
218 | |
219 | /* |
220 | * pmap_tlb_shootdown: invalidate a page on all CPUs using pmap 'pm'. |
221 | */ |
222 | void |
223 | pmap_tlb_shootdown(struct pmap *pm, vaddr_t va, pt_entry_t pte, tlbwhy_t why) |
224 | { |
225 | pmap_tlb_packet_t *tp; |
226 | int s; |
227 | |
228 | #ifndef XEN |
229 | KASSERT((pte & PG_G) == 0 || pm == pmap_kernel()); |
230 | #endif |
231 | |
232 | /* |
233 | * If tearing down the pmap, do nothing. We will flush later |
234 | * when we are ready to recycle/destroy it. |
235 | */ |
236 | if (__predict_false(curlwp->l_md.md_gc_pmap == pm)) { |
237 | return; |
238 | } |
239 | |
240 | if ((pte & PG_PS) != 0) { |
241 | va &= PG_LGFRAME; |
242 | } |
243 | |
244 | /* |
245 | * Add the shootdown operation to our pending set. |
246 | */ |
247 | s = splvm(); |
248 | tp = (pmap_tlb_packet_t *)curcpu()->ci_pmap_data; |
249 | |
250 | /* Whole address flush will be needed if PG_G is set. */ |
251 | CTASSERT(PG_G == (uint16_t)PG_G); |
252 | tp->tp_pte |= (uint16_t)pte; |
253 | |
254 | if (tp->tp_count == (uint16_t)-1) { |
255 | /* |
256 | * Already flushing everything. |
257 | */ |
258 | } else if (tp->tp_count < TP_MAXVA && va != (vaddr_t)-1LL) { |
259 | /* Flush a single page. */ |
260 | tp->tp_va[tp->tp_count++] = va; |
261 | KASSERT(tp->tp_count > 0); |
262 | } else { |
263 | /* Flush everything. */ |
264 | tp->tp_count = (uint16_t)-1; |
265 | } |
266 | |
267 | if (pm != pmap_kernel()) { |
268 | kcpuset_merge(tp->tp_cpumask, pm->pm_cpus); |
269 | if (va >= VM_MAXUSER_ADDRESS) { |
270 | kcpuset_merge(tp->tp_cpumask, pm->pm_kernel_cpus); |
271 | } |
272 | tp->tp_userpmap = 1; |
273 | } else { |
274 | kcpuset_copy(tp->tp_cpumask, kcpuset_running); |
275 | } |
276 | pmap_tlbstat_count(pm, va, why); |
277 | splx(s); |
278 | } |
279 | |
280 | #ifdef MULTIPROCESSOR |
281 | #ifdef XEN |
282 | |
283 | static inline void |
284 | pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target) |
285 | { |
286 | |
287 | if (tp->tp_count != (uint16_t)-1) { |
288 | /* Invalidating a single page or a range of pages. */ |
289 | for (int i = tp->tp_count - 1; i >= 0; i--) { |
290 | xen_mcast_invlpg(tp->tp_va[i], target); |
291 | } |
292 | } else { |
293 | xen_mcast_tlbflush(target); |
294 | } |
295 | |
296 | /* Remote CPUs have been synchronously flushed. */ |
297 | pmap_tlb_pendcount = 0; |
298 | } |
299 | |
300 | #else |
301 | |
302 | static inline void |
303 | pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target) |
304 | { |
305 | int err = 0; |
306 | |
307 | if (!kcpuset_match(target, kcpuset_attached)) { |
308 | const struct cpu_info * const self = curcpu(); |
309 | CPU_INFO_ITERATOR cii; |
310 | struct cpu_info *lci; |
311 | |
312 | for (CPU_INFO_FOREACH(cii, lci)) { |
313 | const cpuid_t lcid = cpu_index(lci); |
314 | |
315 | if (__predict_false(lci == self) || |
316 | !kcpuset_isset(target, lcid)) { |
317 | continue; |
318 | } |
319 | err |= x86_ipi(LAPIC_TLB_VECTOR, |
320 | lci->ci_cpuid, LAPIC_DLMODE_FIXED); |
321 | } |
322 | } else { |
323 | err = x86_ipi(LAPIC_TLB_VECTOR, LAPIC_DEST_ALLEXCL, |
324 | LAPIC_DLMODE_FIXED); |
325 | } |
326 | KASSERT(err == 0); |
327 | } |
328 | |
329 | #endif /* XEN */ |
330 | #endif /* MULTIPROCESSOR */ |
331 | |
332 | /* |
333 | * pmap_tlb_shootnow: process pending TLB shootdowns queued on current CPU. |
334 | * |
335 | * => Must be called with preemption disabled. |
336 | */ |
337 | void |
338 | pmap_tlb_shootnow(void) |
339 | { |
340 | pmap_tlb_packet_t *tp; |
341 | struct cpu_info *ci; |
342 | kcpuset_t *target; |
343 | u_int local, gen, rcpucount; |
344 | cpuid_t cid; |
345 | int s; |
346 | |
347 | KASSERT(kpreempt_disabled()); |
348 | |
349 | ci = curcpu(); |
350 | tp = (pmap_tlb_packet_t *)ci->ci_pmap_data; |
351 | |
352 | /* Pre-check first. */ |
353 | if (tp->tp_count == 0) { |
354 | return; |
355 | } |
356 | |
357 | s = splvm(); |
358 | if (tp->tp_count == 0) { |
359 | splx(s); |
360 | return; |
361 | } |
362 | cid = cpu_index(ci); |
363 | |
364 | target = tp->tp_cpumask; |
365 | local = kcpuset_isset(target, cid) ? 1 : 0; |
366 | rcpucount = kcpuset_countset(target) - local; |
367 | gen = 0; |
368 | |
369 | #ifdef MULTIPROCESSOR |
370 | if (rcpucount) { |
371 | int count; |
372 | |
373 | /* |
374 | * Gain ownership of the shootdown mailbox. We must stay |
375 | * at IPL_VM once we own it or could deadlock against an |
376 | * interrupt on this CPU trying to do the same. |
377 | */ |
378 | KASSERT(rcpucount < ncpu); |
379 | |
380 | while (atomic_cas_uint(&pmap_tlb_pendcount, 0, rcpucount)) { |
381 | splx(s); |
382 | count = SPINLOCK_BACKOFF_MIN; |
383 | while (pmap_tlb_pendcount) { |
384 | KASSERT(pmap_tlb_pendcount < ncpu); |
385 | SPINLOCK_BACKOFF(count); |
386 | } |
387 | s = splvm(); |
388 | /* An interrupt might have done it for us. */ |
389 | if (tp->tp_count == 0) { |
390 | splx(s); |
391 | return; |
392 | } |
393 | } |
394 | |
395 | /* |
396 | * Start a new generation of updates. Copy our shootdown |
397 | * requests into the global buffer. Note that tp_cpumask |
398 | * will not be used by remote CPUs (it would be unsafe). |
399 | */ |
400 | gen = ++pmap_tlb_gen; |
401 | memcpy(&pmap_tlb_packet, tp, sizeof(*tp)); |
402 | pmap_tlb_evcnt.ev_count++; |
403 | |
404 | /* |
405 | * Initiate shootdowns on remote CPUs. |
406 | */ |
407 | pmap_tlb_processpacket(tp, target); |
408 | } |
409 | #endif |
410 | |
411 | /* |
412 | * Shootdowns on remote CPUs are now in flight. In the meantime, |
413 | * perform local shootdown if needed. |
414 | */ |
415 | if (local) { |
416 | pmap_tlb_invalidate(tp); |
417 | } |
418 | |
419 | /* |
420 | * Clear out our local buffer. |
421 | */ |
422 | #ifdef TLBSTATS |
423 | if (tp->tp_count != (uint16_t)-1) { |
424 | atomic_add_64(&tlbstat_single_issue.ev_count, tp->tp_count); |
425 | } |
426 | #endif |
427 | kcpuset_zero(tp->tp_cpumask); |
428 | tp->tp_userpmap = 0; |
429 | tp->tp_count = 0; |
430 | tp->tp_pte = 0; |
431 | splx(s); |
432 | |
433 | /* |
434 | * Now wait for the current generation of updates to be |
435 | * processed by remote CPUs. |
436 | */ |
437 | if (rcpucount && pmap_tlb_pendcount) { |
438 | int count = SPINLOCK_BACKOFF_MIN; |
439 | |
440 | while (pmap_tlb_pendcount && pmap_tlb_gen == gen) { |
441 | KASSERT(pmap_tlb_pendcount < ncpu); |
442 | SPINLOCK_BACKOFF(count); |
443 | } |
444 | } |
445 | } |
446 | |
447 | /* |
448 | * pmap_tlb_intr: pmap shootdown interrupt handler to invalidate TLB entries. |
449 | * |
450 | * => Called from IPI only. |
451 | */ |
452 | void |
453 | pmap_tlb_intr(void) |
454 | { |
455 | const pmap_tlb_packet_t *tp = &pmap_tlb_packet; |
456 | struct cpu_info *ci = curcpu(); |
457 | |
458 | KASSERT(pmap_tlb_pendcount > 0); |
459 | |
460 | /* First, TLB flush. */ |
461 | pmap_tlb_invalidate(tp); |
462 | |
463 | /* |
464 | * Check the current TLB state. If we do not want further |
465 | * invalidations for this pmap, then take the CPU out of |
466 | * the pmap's bitmask. |
467 | */ |
468 | if (ci->ci_tlbstate == TLBSTATE_LAZY && tp->tp_userpmap) { |
469 | struct pmap *pm = ci->ci_pmap; |
470 | cpuid_t cid = cpu_index(ci); |
471 | |
472 | kcpuset_atomic_clear(pm->pm_cpus, cid); |
473 | ci->ci_tlbstate = TLBSTATE_STALE; |
474 | } |
475 | |
476 | /* Finally, ack the request. */ |
477 | atomic_dec_uint(&pmap_tlb_pendcount); |
478 | } |
479 | |