1/* $NetBSD: vm_machdep.c,v 1.26 2016/11/08 03:05:36 christos Exp $ */
2
3/*-
4 * Copyright (c) 1982, 1986 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
36 */
37
38/*-
39 * Copyright (c) 1995 Charles M. Hannum. All rights reserved.
40 * Copyright (c) 1989, 1990 William Jolitz
41 * All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department, and William Jolitz.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
76 */
77
78/*
79 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
80 */
81
82#include <sys/cdefs.h>
83__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.26 2016/11/08 03:05:36 christos Exp $");
84
85#include "opt_mtrr.h"
86
87#include <sys/param.h>
88#include <sys/systm.h>
89#include <sys/proc.h>
90#include <sys/vnode.h>
91#include <sys/buf.h>
92#include <sys/core.h>
93#include <sys/exec.h>
94#include <sys/ptrace.h>
95
96#include <uvm/uvm.h>
97
98#include <machine/cpu.h>
99#include <machine/gdt.h>
100#include <machine/reg.h>
101#include <machine/specialreg.h>
102
103#ifdef MTRR
104#include <machine/mtrr.h>
105#endif
106
107#include <x86/fpu.h>
108
109void
110cpu_proc_fork(struct proc *p1, struct proc *p2)
111{
112
113 p2->p_md.md_flags = p1->p_md.md_flags;
114}
115
116/*
117 * cpu_lwp_fork: finish a new LWP (l2) operation.
118 *
119 * First LWP (l1) is the process being forked. If it is &lwp0, then we
120 * are creating a kthread, where return path and argument are specified
121 * with `func' and `arg'.
122 *
123 * If an alternate user-level stack is requested (with non-zero values
124 * in both the stack and stacksize arguments), then set up the user stack
125 * pointer accordingly.
126 */
127void
128cpu_lwp_fork(struct lwp *l1, struct lwp *l2, void *stack, size_t stacksize,
129 void (*func)(void *), void *arg)
130{
131 struct pcb *pcb1, *pcb2;
132 struct trapframe *tf;
133 struct switchframe *sf;
134 vaddr_t uv;
135
136 pcb1 = lwp_getpcb(l1);
137 pcb2 = lwp_getpcb(l2);
138
139 /*
140 * If parent LWP was using FPU, then we have to save the FPU h/w
141 * state to PCB so that we can copy it.
142 */
143 fpusave_lwp(l1, true);
144
145 /*
146 * Sync the PCB before we copy it.
147 */
148 if (l1 == curlwp) {
149 KASSERT(pcb1 == curpcb);
150 savectx(pcb1);
151 } else {
152 KASSERT(l1 == &lwp0);
153 }
154
155 /* Copy the PCB from parent. */
156 memcpy(pcb2, pcb1, sizeof(struct pcb));
157 /* Copy any additional fpu state */
158 fpu_save_area_fork(pcb2, pcb1);
159
160#if defined(XEN)
161 pcb2->pcb_iopl = SEL_KPL;
162#endif
163
164 /*
165 * Set the kernel stack address (from the address to uarea) and
166 * trapframe address for child.
167 *
168 * Rig kernel stack so that it would start out in lwp_trampoline()
169 * and call child_return() with l2 as an argument. This causes the
170 * newly-created child process to go directly to user level with a
171 * parent return value of 0 from fork(), while the parent process
172 * returns normally.
173 */
174 uv = uvm_lwp_getuarea(l2);
175
176#ifdef __x86_64__
177 pcb2->pcb_rsp0 = (uv + USPACE - 16) & ~0xf;
178 tf = (struct trapframe *)pcb2->pcb_rsp0 - 1;
179#else
180 pcb2->pcb_esp0 = (uv + USPACE - 16);
181 tf = (struct trapframe *)pcb2->pcb_esp0 - 1;
182
183 pcb2->pcb_iomap = NULL;
184#endif
185 l2->l_md.md_regs = tf;
186
187 /*
188 * Copy the trapframe from parent, so that return to userspace
189 * will be to right address, with correct registers.
190 */
191 memcpy(tf, l1->l_md.md_regs, sizeof(struct trapframe));
192
193 /* Child LWP might get aston() before returning to userspace. */
194 tf->tf_trapno = T_ASTFLT;
195
196#if 0 /* DIAGNOSTIC */
197 /* Set a red zone in the kernel stack after the uarea. */
198 pmap_kremove(uv, PAGE_SIZE);
199 pmap_update(pmap_kernel());
200#endif
201
202 /* If specified, set a different user stack for a child. */
203 if (stack != NULL) {
204#ifdef __x86_64__
205 tf->tf_rsp = (uint64_t)stack + stacksize;
206#else
207 tf->tf_esp = (uint32_t)stack + stacksize;
208#endif
209 }
210
211 l2->l_md.md_flags = l1->l_md.md_flags;
212 l2->l_md.md_astpending = 0;
213
214 sf = (struct switchframe *)tf - 1;
215
216#ifdef __x86_64__
217 sf->sf_r12 = (uint64_t)func;
218 sf->sf_r13 = (uint64_t)arg;
219 sf->sf_rip = (uint64_t)lwp_trampoline;
220 pcb2->pcb_rsp = (uint64_t)sf;
221 pcb2->pcb_rbp = (uint64_t)l2;
222#else
223 /*
224 * XXX Is there a reason sf->sf_edi isn't initialized here?
225 * Could this leak potentially sensitive information to new
226 * userspace processes?
227 */
228 sf->sf_esi = (int)func;
229 sf->sf_ebx = (int)arg;
230 sf->sf_eip = (int)lwp_trampoline;
231 pcb2->pcb_esp = (int)sf;
232 pcb2->pcb_ebp = (int)l2;
233#endif
234}
235
236/*
237 * cpu_lwp_free is called from exit() to let machine-dependent
238 * code free machine-dependent resources. Note that this routine
239 * must not block.
240 */
241void
242cpu_lwp_free(struct lwp *l, int proc)
243{
244
245 /* If we were using the FPU, forget about it. */
246 fpusave_lwp(l, false);
247
248#ifdef MTRR
249 if (proc && l->l_proc->p_md.md_flags & MDP_USEDMTRR)
250 mtrr_clean(l->l_proc);
251#endif
252 /*
253 * Free deferred mappings if any.
254 */
255 struct vm_page *empty_ptps = l->l_md.md_gc_ptp;
256 l->l_md.md_gc_ptp = NULL;
257 pmap_free_ptps(empty_ptps);
258}
259
260/*
261 * cpu_lwp_free2 is called when an LWP is being reaped.
262 * This routine may block.
263 */
264void
265cpu_lwp_free2(struct lwp *l)
266{
267
268 KASSERT(l->l_md.md_gc_ptp == NULL);
269 KASSERT(l->l_md.md_gc_pmap == NULL);
270}
271
272/*
273 * Convert kernel VA to physical address
274 */
275paddr_t
276kvtop(void *addr)
277{
278 paddr_t pa;
279 bool ret __diagused;
280
281 ret = pmap_extract(pmap_kernel(), (vaddr_t)addr, &pa);
282 KASSERT(ret == true);
283 return pa;
284}
285
286/*
287 * Map a user I/O request into kernel virtual address space.
288 * Note: the pages are already locked by uvm_vslock(), so we
289 * do not need to pass an access_type to pmap_enter().
290 */
291int
292vmapbuf(struct buf *bp, vsize_t len)
293{
294 vaddr_t faddr, taddr, off;
295 paddr_t fpa;
296
297 KASSERT((bp->b_flags & B_PHYS) != 0);
298
299 bp->b_saveaddr = bp->b_data;
300 faddr = trunc_page((vaddr_t)bp->b_data);
301 off = (vaddr_t)bp->b_data - faddr;
302 len = round_page(off + len);
303 taddr = uvm_km_alloc(phys_map, len, 0, UVM_KMF_VAONLY | UVM_KMF_WAITVA);
304 bp->b_data = (void *)(taddr + off);
305 /*
306 * The region is locked, so we expect that pmap_pte() will return
307 * non-NULL.
308 * XXX: unwise to expect this in a multithreaded environment.
309 * anything can happen to a pmap between the time we lock a
310 * region, release the pmap lock, and then relock it for
311 * the pmap_extract().
312 *
313 * no need to flush TLB since we expect nothing to be mapped
314 * where we we just allocated (TLB will be flushed when our
315 * mapping is removed).
316 */
317 while (len) {
318 (void) pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map),
319 faddr, &fpa);
320 pmap_kenter_pa(taddr, fpa, VM_PROT_READ|VM_PROT_WRITE, 0);
321 faddr += PAGE_SIZE;
322 taddr += PAGE_SIZE;
323 len -= PAGE_SIZE;
324 }
325 pmap_update(pmap_kernel());
326
327 return 0;
328}
329
330/*
331 * Unmap a previously-mapped user I/O request.
332 */
333void
334vunmapbuf(struct buf *bp, vsize_t len)
335{
336 vaddr_t addr, off;
337
338 KASSERT((bp->b_flags & B_PHYS) != 0);
339
340 addr = trunc_page((vaddr_t)bp->b_data);
341 off = (vaddr_t)bp->b_data - addr;
342 len = round_page(off + len);
343 pmap_kremove(addr, len);
344 pmap_update(pmap_kernel());
345 uvm_km_free(phys_map, addr, len, UVM_KMF_VAONLY);
346 bp->b_data = bp->b_saveaddr;
347 bp->b_saveaddr = 0;
348}
349
350#ifdef __HAVE_CPU_UAREA_ROUTINES
351void *
352cpu_uarea_alloc(bool system)
353{
354 struct pglist pglist;
355 int error;
356
357 /*
358 * Allocate a new physically contiguous uarea which can be
359 * direct-mapped.
360 */
361 error = uvm_pglistalloc(USPACE, 0, ptoa(physmem), 0, 0, &pglist, 1, 1);
362 if (error) {
363 return NULL;
364 }
365
366 /*
367 * Get the physical address from the first page.
368 */
369 const struct vm_page * const pg = TAILQ_FIRST(&pglist);
370 KASSERT(pg != NULL);
371 const paddr_t pa = VM_PAGE_TO_PHYS(pg);
372
373 /*
374 * We need to return a direct-mapped VA for the pa.
375 */
376
377 return (void *)PMAP_MAP_POOLPAGE(pa);
378}
379
380/*
381 * Return true if we freed it, false if we didn't.
382 */
383bool
384cpu_uarea_free(void *vva)
385{
386 vaddr_t va = (vaddr_t) vva;
387
388 if (va >= VM_MIN_KERNEL_ADDRESS && va < VM_MAX_KERNEL_ADDRESS) {
389 return false;
390 }
391
392 /*
393 * Since the pages are physically contiguous, the vm_page structures
394 * will be as well.
395 */
396 struct vm_page *pg = PHYS_TO_VM_PAGE(PMAP_UNMAP_POOLPAGE(va));
397 KASSERT(pg != NULL);
398 for (size_t i = 0; i < UPAGES; i++, pg++) {
399 uvm_pagefree(pg);
400 }
401 return true;
402}
403#endif /* __HAVE_CPU_UAREA_ROUTINES */
404