1/* $NetBSD: exec_subr.c,v 1.76 2016/05/22 14:26:09 christos Exp $ */
2
3/*
4 * Copyright (c) 1993, 1994, 1996 Christopher G. Demetriou
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by Christopher G. Demetriou.
18 * 4. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.76 2016/05/22 14:26:09 christos Exp $");
35
36#include "opt_pax.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/proc.h>
41#include <sys/kmem.h>
42#include <sys/vnode.h>
43#include <sys/filedesc.h>
44#include <sys/exec.h>
45#include <sys/mman.h>
46#include <sys/resourcevar.h>
47#include <sys/device.h>
48#include <sys/pax.h>
49
50#include <uvm/uvm_extern.h>
51
52#define VMCMD_EVCNT_DECL(name) \
53static struct evcnt vmcmd_ev_##name = \
54 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "vmcmd", #name); \
55EVCNT_ATTACH_STATIC(vmcmd_ev_##name)
56
57#define VMCMD_EVCNT_INCR(name) \
58 vmcmd_ev_##name.ev_count++
59
60VMCMD_EVCNT_DECL(calls);
61VMCMD_EVCNT_DECL(extends);
62VMCMD_EVCNT_DECL(kills);
63
64#ifdef DEBUG_STACK
65#define DPRINTF(a) uprintf a
66#else
67#define DPRINTF(a)
68#endif
69
70/*
71 * new_vmcmd():
72 * create a new vmcmd structure and fill in its fields based
73 * on function call arguments. make sure objects ref'd by
74 * the vmcmd are 'held'.
75 */
76
77void
78new_vmcmd(struct exec_vmcmd_set *evsp,
79 int (*proc)(struct lwp * l, struct exec_vmcmd *),
80 vsize_t len, vaddr_t addr, struct vnode *vp, u_long offset,
81 u_int prot, int flags)
82{
83 struct exec_vmcmd *vcp;
84
85 VMCMD_EVCNT_INCR(calls);
86 KASSERT(proc != vmcmd_map_pagedvn || (vp->v_iflag & VI_TEXT));
87 KASSERT(vp == NULL || vp->v_usecount > 0);
88
89 if (evsp->evs_used >= evsp->evs_cnt)
90 vmcmdset_extend(evsp);
91 vcp = &evsp->evs_cmds[evsp->evs_used++];
92 vcp->ev_proc = proc;
93 vcp->ev_len = len;
94 vcp->ev_addr = addr;
95 if ((vcp->ev_vp = vp) != NULL)
96 vref(vp);
97 vcp->ev_offset = offset;
98 vcp->ev_prot = prot;
99 vcp->ev_flags = flags;
100}
101
102void
103vmcmdset_extend(struct exec_vmcmd_set *evsp)
104{
105 struct exec_vmcmd *nvcp;
106 u_int ocnt;
107
108#ifdef DIAGNOSTIC
109 if (evsp->evs_used < evsp->evs_cnt)
110 panic("vmcmdset_extend: not necessary");
111#endif
112
113 /* figure out number of entries in new set */
114 if ((ocnt = evsp->evs_cnt) != 0) {
115 evsp->evs_cnt += ocnt;
116 VMCMD_EVCNT_INCR(extends);
117 } else
118 evsp->evs_cnt = EXEC_DEFAULT_VMCMD_SETSIZE;
119
120 /* allocate it */
121 nvcp = kmem_alloc(evsp->evs_cnt * sizeof(struct exec_vmcmd), KM_SLEEP);
122
123 /* free the old struct, if there was one, and record the new one */
124 if (ocnt) {
125 memcpy(nvcp, evsp->evs_cmds,
126 (ocnt * sizeof(struct exec_vmcmd)));
127 kmem_free(evsp->evs_cmds, ocnt * sizeof(struct exec_vmcmd));
128 }
129 evsp->evs_cmds = nvcp;
130}
131
132void
133kill_vmcmds(struct exec_vmcmd_set *evsp)
134{
135 struct exec_vmcmd *vcp;
136 u_int i;
137
138 VMCMD_EVCNT_INCR(kills);
139
140 if (evsp->evs_cnt == 0)
141 return;
142
143 for (i = 0; i < evsp->evs_used; i++) {
144 vcp = &evsp->evs_cmds[i];
145 if (vcp->ev_vp != NULL)
146 vrele(vcp->ev_vp);
147 }
148 kmem_free(evsp->evs_cmds, evsp->evs_cnt * sizeof(struct exec_vmcmd));
149 evsp->evs_used = evsp->evs_cnt = 0;
150}
151
152/*
153 * vmcmd_map_pagedvn():
154 * handle vmcmd which specifies that a vnode should be mmap'd.
155 * appropriate for handling demand-paged text and data segments.
156 */
157
158int
159vmcmd_map_pagedvn(struct lwp *l, struct exec_vmcmd *cmd)
160{
161 struct uvm_object *uobj;
162 struct vnode *vp = cmd->ev_vp;
163 struct proc *p = l->l_proc;
164 int error;
165 vm_prot_t prot, maxprot;
166
167 KASSERT(vp->v_iflag & VI_TEXT);
168
169 /*
170 * map the vnode in using uvm_map.
171 */
172
173 if (cmd->ev_len == 0)
174 return 0;
175 if (cmd->ev_offset & PAGE_MASK)
176 return EINVAL;
177 if (cmd->ev_addr & PAGE_MASK)
178 return EINVAL;
179 if (cmd->ev_len & PAGE_MASK)
180 return EINVAL;
181
182 prot = cmd->ev_prot;
183 maxprot = UVM_PROT_ALL;
184 PAX_MPROTECT_ADJUST(l, &prot, &maxprot);
185
186 /*
187 * check the file system's opinion about mmapping the file
188 */
189
190 error = VOP_MMAP(vp, prot, l->l_cred);
191 if (error)
192 return error;
193
194 if ((vp->v_vflag & VV_MAPPED) == 0) {
195 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
196 vp->v_vflag |= VV_MAPPED;
197 VOP_UNLOCK(vp);
198 }
199
200 /*
201 * do the map, reference the object for this map entry
202 */
203 uobj = &vp->v_uobj;
204 vref(vp);
205
206 error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, cmd->ev_len,
207 uobj, cmd->ev_offset, 0,
208 UVM_MAPFLAG(prot, maxprot, UVM_INH_COPY,
209 UVM_ADV_NORMAL, UVM_FLAG_COPYONW|UVM_FLAG_FIXED));
210 if (error) {
211 uobj->pgops->pgo_detach(uobj);
212 }
213 return error;
214}
215
216/*
217 * vmcmd_map_readvn():
218 * handle vmcmd which specifies that a vnode should be read from.
219 * appropriate for non-demand-paged text/data segments, i.e. impure
220 * objects (a la OMAGIC and NMAGIC).
221 */
222int
223vmcmd_map_readvn(struct lwp *l, struct exec_vmcmd *cmd)
224{
225 struct proc *p = l->l_proc;
226 int error;
227 long diff;
228
229 if (cmd->ev_len == 0)
230 return 0;
231
232 diff = cmd->ev_addr - trunc_page(cmd->ev_addr);
233 cmd->ev_addr -= diff; /* required by uvm_map */
234 cmd->ev_offset -= diff;
235 cmd->ev_len += diff;
236
237 error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr,
238 round_page(cmd->ev_len), NULL, UVM_UNKNOWN_OFFSET, 0,
239 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
240 UVM_ADV_NORMAL,
241 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW));
242
243 if (error)
244 return error;
245
246 return vmcmd_readvn(l, cmd);
247}
248
249int
250vmcmd_readvn(struct lwp *l, struct exec_vmcmd *cmd)
251{
252 struct proc *p = l->l_proc;
253 int error;
254 vm_prot_t prot, maxprot;
255
256 error = vn_rdwr(UIO_READ, cmd->ev_vp, (void *)cmd->ev_addr,
257 cmd->ev_len, cmd->ev_offset, UIO_USERSPACE, IO_UNIT,
258 l->l_cred, NULL, l);
259 if (error)
260 return error;
261
262 prot = cmd->ev_prot;
263 maxprot = VM_PROT_ALL;
264 PAX_MPROTECT_ADJUST(l, &prot, &maxprot);
265
266#ifdef PMAP_NEED_PROCWR
267 /*
268 * we had to write the process, make sure the pages are synched
269 * with the instruction cache.
270 */
271 if (prot & VM_PROT_EXECUTE)
272 pmap_procwr(p, cmd->ev_addr, cmd->ev_len);
273#endif
274
275 /*
276 * we had to map in the area at PROT_ALL so that vn_rdwr()
277 * could write to it. however, the caller seems to want
278 * it mapped read-only, so now we are going to have to call
279 * uvm_map_protect() to fix up the protection. ICK.
280 */
281 if (maxprot != VM_PROT_ALL) {
282 error = uvm_map_protect(&p->p_vmspace->vm_map,
283 trunc_page(cmd->ev_addr),
284 round_page(cmd->ev_addr + cmd->ev_len),
285 maxprot, true);
286 if (error)
287 return error;
288 }
289
290 if (prot != maxprot) {
291 error = uvm_map_protect(&p->p_vmspace->vm_map,
292 trunc_page(cmd->ev_addr),
293 round_page(cmd->ev_addr + cmd->ev_len),
294 prot, false);
295 if (error)
296 return error;
297 }
298
299 return 0;
300}
301
302/*
303 * vmcmd_map_zero():
304 * handle vmcmd which specifies a zero-filled address space region. The
305 * address range must be first allocated, then protected appropriately.
306 */
307
308int
309vmcmd_map_zero(struct lwp *l, struct exec_vmcmd *cmd)
310{
311 struct proc *p = l->l_proc;
312 int error;
313 long diff;
314 vm_prot_t prot, maxprot;
315
316 diff = cmd->ev_addr - trunc_page(cmd->ev_addr);
317 cmd->ev_addr -= diff; /* required by uvm_map */
318 cmd->ev_len += diff;
319
320 prot = cmd->ev_prot;
321 maxprot = UVM_PROT_ALL;
322 PAX_MPROTECT_ADJUST(l, &prot, &maxprot);
323
324 error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr,
325 round_page(cmd->ev_len), NULL, UVM_UNKNOWN_OFFSET, 0,
326 UVM_MAPFLAG(prot, maxprot, UVM_INH_COPY,
327 UVM_ADV_NORMAL,
328 UVM_FLAG_FIXED|UVM_FLAG_COPYONW));
329 if (cmd->ev_flags & VMCMD_STACK)
330 curproc->p_vmspace->vm_issize += atop(round_page(cmd->ev_len));
331 return error;
332}
333
334/*
335 * exec_read_from():
336 *
337 * Read from vnode into buffer at offset.
338 */
339int
340exec_read_from(struct lwp *l, struct vnode *vp, u_long off, void *bf,
341 size_t size)
342{
343 int error;
344 size_t resid;
345
346 if ((error = vn_rdwr(UIO_READ, vp, bf, size, off, UIO_SYSSPACE,
347 0, l->l_cred, &resid, NULL)) != 0)
348 return error;
349 /*
350 * See if we got all of it
351 */
352 if (resid != 0)
353 return ENOEXEC;
354 return 0;
355}
356
357/*
358 * exec_setup_stack(): Set up the stack segment for an elf
359 * executable.
360 *
361 * Note that the ep_ssize parameter must be set to be the current stack
362 * limit; this is adjusted in the body of execve() to yield the
363 * appropriate stack segment usage once the argument length is
364 * calculated.
365 *
366 * This function returns an int for uniformity with other (future) formats'
367 * stack setup functions. They might have errors to return.
368 */
369
370int
371exec_setup_stack(struct lwp *l, struct exec_package *epp)
372{
373 vsize_t max_stack_size;
374 vaddr_t access_linear_min;
375 vsize_t access_size;
376 vaddr_t noaccess_linear_min;
377 vsize_t noaccess_size;
378
379#ifndef USRSTACK32
380#define USRSTACK32 (0x00000000ffffffffL&~PGOFSET)
381#endif
382#ifndef MAXSSIZ32
383#define MAXSSIZ32 (MAXSSIZ >> 2)
384#endif
385
386 if (epp->ep_flags & EXEC_32) {
387 epp->ep_minsaddr = USRSTACK32;
388 max_stack_size = MAXSSIZ32;
389 } else {
390 epp->ep_minsaddr = USRSTACK;
391 max_stack_size = MAXSSIZ;
392 }
393
394 DPRINTF(("ep_minsaddr=%#jx max_stack_size=%#jx\n",
395 (uintmax_t)epp->ep_minsaddr, (uintmax_t)max_stack_size));
396
397 pax_aslr_stack(epp, &max_stack_size);
398
399 DPRINTF(("[RLIMIT_STACK].lim_cur=%#jx max_stack_size=%#jx\n",
400 (uintmax_t)l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur,
401 (uintmax_t)max_stack_size));
402 epp->ep_ssize = MIN(l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur,
403 max_stack_size);
404
405 l->l_proc->p_stackbase = epp->ep_minsaddr;
406
407 epp->ep_maxsaddr = (vaddr_t)STACK_GROW(epp->ep_minsaddr,
408 max_stack_size);
409
410 DPRINTF(("ep_ssize=%#jx ep_minsaddr=%#jx ep_maxsaddr=%#jx\n",
411 (uintmax_t)epp->ep_ssize, (uintmax_t)epp->ep_minsaddr,
412 (uintmax_t)epp->ep_maxsaddr));
413
414 /*
415 * set up commands for stack. note that this takes *two*, one to
416 * map the part of the stack which we can access, and one to map
417 * the part which we can't.
418 *
419 * arguably, it could be made into one, but that would require the
420 * addition of another mapping proc, which is unnecessary
421 */
422 access_size = epp->ep_ssize;
423 access_linear_min = (vaddr_t)STACK_ALLOC(epp->ep_minsaddr, access_size);
424 noaccess_size = max_stack_size - access_size;
425 noaccess_linear_min = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
426 access_size), noaccess_size);
427
428 DPRINTF(("access_size=%#jx, access_linear_min=%#jx, "
429 "noaccess_size=%#jx, noaccess_linear_min=%#jx\n",
430 (uintmax_t)access_size, (uintmax_t)access_linear_min,
431 (uintmax_t)noaccess_size, (uintmax_t)noaccess_linear_min));
432
433 if (noaccess_size > 0 && noaccess_size <= MAXSSIZ) {
434 NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
435 noaccess_linear_min, NULL, 0, VM_PROT_NONE, VMCMD_STACK);
436 }
437 KASSERT(access_size > 0 && access_size <= MAXSSIZ);
438 NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
439 access_linear_min, NULL, 0, VM_PROT_READ | VM_PROT_WRITE,
440 VMCMD_STACK);
441
442 return 0;
443}
444