1/* $NetBSD: syscall.c,v 1.14 2016/07/07 06:55:40 msaitoh Exp $ */
2
3/*-
4 * Copyright (c) 1998, 2000, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Charles M. Hannum.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: syscall.c,v 1.14 2016/07/07 06:55:40 msaitoh Exp $");
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/proc.h>
38#include <sys/signal.h>
39#include <sys/ktrace.h>
40#include <sys/syscall.h>
41#include <sys/syscallvar.h>
42#include <sys/syscall_stats.h>
43
44#include <uvm/uvm_extern.h>
45
46#include <machine/cpu.h>
47#include <machine/psl.h>
48#include <machine/userret.h>
49
50#include "opt_dtrace.h"
51
52#ifndef __x86_64__
53#include "opt_vm86.h"
54#ifdef VM86
55void syscall_vm86(struct trapframe *);
56#endif
57int x86_copyargs(void *, void *, size_t);
58#endif
59
60void syscall_intern(struct proc *);
61static void syscall(struct trapframe *);
62
63void
64child_return(void *arg)
65{
66 struct lwp *l = arg;
67 struct trapframe *tf = l->l_md.md_regs;
68 struct proc *p = l->l_proc;
69
70 if (p->p_slflag & PSL_TRACED) {
71 ksiginfo_t ksi;
72
73 mutex_enter(proc_lock);
74 KSI_INIT_EMPTY(&ksi);
75 ksi.ksi_signo = SIGTRAP;
76 ksi.ksi_lid = l->l_lid;
77 kpsignal(p, &ksi, NULL);
78 mutex_exit(proc_lock);
79 }
80
81 X86_TF_RAX(tf) = 0;
82 X86_TF_RFLAGS(tf) &= ~PSL_C;
83
84 userret(l);
85 ktrsysret(SYS_fork, 0, 0);
86}
87
88/*
89 * Process the tail end of a posix_spawn() for the child.
90 */
91void
92cpu_spawn_return(struct lwp *l)
93{
94
95 userret(l);
96}
97
98/*
99 * syscall(frame):
100 * System call request from POSIX system call gate interface to kernel.
101 * Like trap(), argument is call by reference.
102 */
103#ifdef KDTRACE_HOOKS
104void syscall(struct trapframe *);
105#else
106static
107#endif
108void
109syscall(struct trapframe *frame)
110{
111 const struct sysent *callp;
112 struct proc *p;
113 struct lwp *l;
114 int error;
115 register_t code, rval[2];
116#ifdef __x86_64__
117 /* Verify that the syscall args will fit in the trapframe space */
118 CTASSERT(offsetof(struct trapframe, tf_arg9) >=
119 sizeof(register_t) * (2 + SYS_MAXSYSARGS - 1));
120#define args (&frame->tf_rdi)
121#else
122 register_t args[2 + SYS_MAXSYSARGS];
123#endif
124
125 l = curlwp;
126 p = l->l_proc;
127 LWP_CACHE_CREDS(l, p);
128
129 code = X86_TF_RAX(frame) & (SYS_NSYSENT - 1);
130 callp = p->p_emul->e_sysent + code;
131
132 SYSCALL_COUNT(syscall_counts, code);
133 SYSCALL_TIME_SYS_ENTRY(l, syscall_times, code);
134
135#ifdef __x86_64__
136 /*
137 * The first 6 syscall args are passed in rdi, rsi, rdx, r10, r8 and r9
138 * (rcx gets copied to r10 in the libc stub because the syscall
139 * instruction overwrites %cx) and are together in the trap frame
140 * with space following for 4 more entries.
141 */
142 if (__predict_false(callp->sy_argsize > 6 * 8)) {
143 error = copyin((register_t *)frame->tf_rsp + 1,
144 &frame->tf_arg6, callp->sy_argsize - 6 * 8);
145 if (error != 0)
146 goto bad;
147 }
148#else
149 if (callp->sy_argsize) {
150 error = x86_copyargs((char *)frame->tf_esp + sizeof(int), args,
151 callp->sy_argsize);
152 if (__predict_false(error != 0))
153 goto bad;
154 }
155#endif
156 error = sy_invoke(callp, l, args, rval, code);
157
158 if (__predict_true(error == 0)) {
159 X86_TF_RAX(frame) = rval[0];
160 X86_TF_RDX(frame) = rval[1];
161 X86_TF_RFLAGS(frame) &= ~PSL_C; /* carry bit */
162 } else {
163 switch (error) {
164 case ERESTART:
165 /*
166 * The offset to adjust the PC by depends on whether we
167 * entered the kernel through the trap or call gate.
168 * We saved the instruction size in tf_err on entry.
169 */
170 X86_TF_RIP(frame) -= frame->tf_err;
171 break;
172 case EJUSTRETURN:
173 /* nothing to do */
174 break;
175 default:
176 bad:
177 X86_TF_RAX(frame) = error;
178 X86_TF_RFLAGS(frame) |= PSL_C; /* carry bit */
179 break;
180 }
181 }
182
183 SYSCALL_TIME_SYS_EXIT(l);
184 userret(l);
185}
186
187void
188syscall_intern(struct proc *p)
189{
190
191 p->p_md.md_syscall = syscall;
192}
193
194#ifdef VM86
195
196void
197syscall_vm86(struct trapframe *frame)
198{
199 struct lwp *l;
200 struct proc *p;
201 ksiginfo_t ksi;
202
203 KSI_INIT_TRAP(&ksi);
204 ksi.ksi_signo = SIGBUS;
205 ksi.ksi_code = BUS_OBJERR;
206 ksi.ksi_trap = T_PROTFLT;
207 ksi.ksi_addr = (void *)frame->tf_eip;
208
209 l = curlwp;
210 p = l->l_proc;
211
212 (*p->p_emul->e_trapsignal)(l, &ksi);
213 userret(l);
214}
215
216#endif
217