1 | /* $NetBSD: sys_aio.c,v 1.41 2016/07/07 06:55:43 msaitoh Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 2007 Mindaugas Rasiukevicius <rmind at NetBSD org> |
5 | * All rights reserved. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions |
9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
20 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
21 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
22 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
23 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
24 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
25 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
26 | * SUCH DAMAGE. |
27 | */ |
28 | |
29 | /* |
30 | * Implementation of POSIX asynchronous I/O. |
31 | * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. |
32 | */ |
33 | |
34 | #include <sys/cdefs.h> |
35 | __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.41 2016/07/07 06:55:43 msaitoh Exp $" ); |
36 | |
37 | #ifdef _KERNEL_OPT |
38 | #include "opt_ddb.h" |
39 | #endif |
40 | |
41 | #include <sys/param.h> |
42 | #include <sys/condvar.h> |
43 | #include <sys/file.h> |
44 | #include <sys/filedesc.h> |
45 | #include <sys/kernel.h> |
46 | #include <sys/kmem.h> |
47 | #include <sys/lwp.h> |
48 | #include <sys/mutex.h> |
49 | #include <sys/pool.h> |
50 | #include <sys/proc.h> |
51 | #include <sys/queue.h> |
52 | #include <sys/signal.h> |
53 | #include <sys/signalvar.h> |
54 | #include <sys/syscall.h> |
55 | #include <sys/syscallargs.h> |
56 | #include <sys/syscallvar.h> |
57 | #include <sys/sysctl.h> |
58 | #include <sys/systm.h> |
59 | #include <sys/types.h> |
60 | #include <sys/vnode.h> |
61 | #include <sys/atomic.h> |
62 | #include <sys/module.h> |
63 | #include <sys/buf.h> |
64 | |
65 | #include <uvm/uvm_extern.h> |
66 | |
67 | MODULE(MODULE_CLASS_MISC, aio, NULL); |
68 | |
69 | /* |
70 | * System-wide limits and counter of AIO operations. |
71 | */ |
72 | u_int aio_listio_max = AIO_LISTIO_MAX; |
73 | static u_int aio_max = AIO_MAX; |
74 | static u_int aio_jobs_count; |
75 | |
76 | static struct sysctllog *aio_sysctl; |
77 | static struct pool aio_job_pool; |
78 | static struct pool aio_lio_pool; |
79 | static void * aio_ehook; |
80 | |
81 | static void aio_worker(void *); |
82 | static void aio_process(struct aio_job *); |
83 | static void aio_sendsig(struct proc *, struct sigevent *); |
84 | static int aio_enqueue_job(int, void *, struct lio_req *); |
85 | static void aio_exit(proc_t *, void *); |
86 | |
87 | static int sysctl_aio_listio_max(SYSCTLFN_PROTO); |
88 | static int sysctl_aio_max(SYSCTLFN_PROTO); |
89 | static int sysctl_aio_init(void); |
90 | |
91 | static const struct syscall_package aio_syscalls[] = { |
92 | { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel }, |
93 | { SYS_aio_error, 0, (sy_call_t *)sys_aio_error }, |
94 | { SYS_aio_fsync, 0, (sy_call_t *)sys_aio_fsync }, |
95 | { SYS_aio_read, 0, (sy_call_t *)sys_aio_read }, |
96 | { SYS_aio_return, 0, (sy_call_t *)sys_aio_return }, |
97 | { SYS___aio_suspend50, 0, (sy_call_t *)sys___aio_suspend50 }, |
98 | { SYS_aio_write, 0, (sy_call_t *)sys_aio_write }, |
99 | { SYS_lio_listio, 0, (sy_call_t *)sys_lio_listio }, |
100 | { 0, 0, NULL }, |
101 | }; |
102 | |
103 | /* |
104 | * Tear down all AIO state. |
105 | */ |
106 | static int |
107 | aio_fini(bool interface) |
108 | { |
109 | int error; |
110 | proc_t *p; |
111 | |
112 | if (interface) { |
113 | /* Stop syscall activity. */ |
114 | error = syscall_disestablish(NULL, aio_syscalls); |
115 | if (error != 0) |
116 | return error; |
117 | /* Abort if any processes are using AIO. */ |
118 | mutex_enter(proc_lock); |
119 | PROCLIST_FOREACH(p, &allproc) { |
120 | if (p->p_aio != NULL) |
121 | break; |
122 | } |
123 | mutex_exit(proc_lock); |
124 | if (p != NULL) { |
125 | error = syscall_establish(NULL, aio_syscalls); |
126 | KASSERT(error == 0); |
127 | return EBUSY; |
128 | } |
129 | } |
130 | if (aio_sysctl != NULL) |
131 | sysctl_teardown(&aio_sysctl); |
132 | |
133 | KASSERT(aio_jobs_count == 0); |
134 | exithook_disestablish(aio_ehook); |
135 | pool_destroy(&aio_job_pool); |
136 | pool_destroy(&aio_lio_pool); |
137 | return 0; |
138 | } |
139 | |
140 | /* |
141 | * Initialize global AIO state. |
142 | */ |
143 | static int |
144 | aio_init(void) |
145 | { |
146 | int error; |
147 | |
148 | pool_init(&aio_job_pool, sizeof(struct aio_job), 0, 0, 0, |
149 | "aio_jobs_pool" , &pool_allocator_nointr, IPL_NONE); |
150 | pool_init(&aio_lio_pool, sizeof(struct lio_req), 0, 0, 0, |
151 | "aio_lio_pool" , &pool_allocator_nointr, IPL_NONE); |
152 | aio_ehook = exithook_establish(aio_exit, NULL); |
153 | |
154 | error = sysctl_aio_init(); |
155 | if (error != 0) { |
156 | (void)aio_fini(false); |
157 | return error; |
158 | } |
159 | error = syscall_establish(NULL, aio_syscalls); |
160 | if (error != 0) |
161 | (void)aio_fini(false); |
162 | return error; |
163 | } |
164 | |
165 | /* |
166 | * Module interface. |
167 | */ |
168 | static int |
169 | aio_modcmd(modcmd_t cmd, void *arg) |
170 | { |
171 | |
172 | switch (cmd) { |
173 | case MODULE_CMD_INIT: |
174 | return aio_init(); |
175 | case MODULE_CMD_FINI: |
176 | return aio_fini(true); |
177 | default: |
178 | return ENOTTY; |
179 | } |
180 | } |
181 | |
182 | /* |
183 | * Initialize Asynchronous I/O data structures for the process. |
184 | */ |
185 | static int |
186 | aio_procinit(struct proc *p) |
187 | { |
188 | struct aioproc *aio; |
189 | struct lwp *l; |
190 | int error; |
191 | vaddr_t uaddr; |
192 | |
193 | /* Allocate and initialize AIO structure */ |
194 | aio = kmem_zalloc(sizeof(struct aioproc), KM_SLEEP); |
195 | if (aio == NULL) |
196 | return EAGAIN; |
197 | |
198 | /* Initialize queue and their synchronization structures */ |
199 | mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE); |
200 | cv_init(&aio->aio_worker_cv, "aiowork" ); |
201 | cv_init(&aio->done_cv, "aiodone" ); |
202 | TAILQ_INIT(&aio->jobs_queue); |
203 | |
204 | /* |
205 | * Create an AIO worker thread. |
206 | * XXX: Currently, AIO thread is not protected against user's actions. |
207 | */ |
208 | uaddr = uvm_uarea_alloc(); |
209 | if (uaddr == 0) { |
210 | aio_exit(p, aio); |
211 | return EAGAIN; |
212 | } |
213 | error = lwp_create(curlwp, p, uaddr, 0, NULL, 0, aio_worker, |
214 | NULL, &l, curlwp->l_class); |
215 | if (error != 0) { |
216 | uvm_uarea_free(uaddr); |
217 | aio_exit(p, aio); |
218 | return error; |
219 | } |
220 | |
221 | /* Recheck if we are really first */ |
222 | mutex_enter(p->p_lock); |
223 | if (p->p_aio) { |
224 | mutex_exit(p->p_lock); |
225 | aio_exit(p, aio); |
226 | lwp_exit(l); |
227 | return 0; |
228 | } |
229 | p->p_aio = aio; |
230 | |
231 | /* Complete the initialization of thread, and run it */ |
232 | aio->aio_worker = l; |
233 | lwp_lock(l); |
234 | l->l_stat = LSRUN; |
235 | l->l_priority = MAXPRI_USER; |
236 | sched_enqueue(l, false); |
237 | lwp_unlock(l); |
238 | mutex_exit(p->p_lock); |
239 | |
240 | return 0; |
241 | } |
242 | |
243 | /* |
244 | * Exit of Asynchronous I/O subsystem of process. |
245 | */ |
246 | static void |
247 | aio_exit(struct proc *p, void *cookie) |
248 | { |
249 | struct aio_job *a_job; |
250 | struct aioproc *aio; |
251 | |
252 | if (cookie != NULL) |
253 | aio = cookie; |
254 | else if ((aio = p->p_aio) == NULL) |
255 | return; |
256 | |
257 | /* Free AIO queue */ |
258 | while (!TAILQ_EMPTY(&aio->jobs_queue)) { |
259 | a_job = TAILQ_FIRST(&aio->jobs_queue); |
260 | TAILQ_REMOVE(&aio->jobs_queue, a_job, list); |
261 | pool_put(&aio_job_pool, a_job); |
262 | atomic_dec_uint(&aio_jobs_count); |
263 | } |
264 | |
265 | /* Destroy and free the entire AIO data structure */ |
266 | cv_destroy(&aio->aio_worker_cv); |
267 | cv_destroy(&aio->done_cv); |
268 | mutex_destroy(&aio->aio_mtx); |
269 | kmem_free(aio, sizeof(struct aioproc)); |
270 | } |
271 | |
272 | /* |
273 | * AIO worker thread and processor. |
274 | */ |
275 | static void |
276 | aio_worker(void *arg) |
277 | { |
278 | struct proc *p = curlwp->l_proc; |
279 | struct aioproc *aio = p->p_aio; |
280 | struct aio_job *a_job; |
281 | struct lio_req *lio; |
282 | sigset_t oss, nss; |
283 | int error __diagused, refcnt; |
284 | |
285 | /* |
286 | * Make an empty signal mask, so it |
287 | * handles only SIGKILL and SIGSTOP. |
288 | */ |
289 | sigfillset(&nss); |
290 | mutex_enter(p->p_lock); |
291 | error = sigprocmask1(curlwp, SIG_SETMASK, &nss, &oss); |
292 | mutex_exit(p->p_lock); |
293 | KASSERT(error == 0); |
294 | |
295 | for (;;) { |
296 | /* |
297 | * Loop for each job in the queue. If there |
298 | * are no jobs then sleep. |
299 | */ |
300 | mutex_enter(&aio->aio_mtx); |
301 | while ((a_job = TAILQ_FIRST(&aio->jobs_queue)) == NULL) { |
302 | if (cv_wait_sig(&aio->aio_worker_cv, &aio->aio_mtx)) { |
303 | /* |
304 | * Thread was interrupted - check for |
305 | * pending exit or suspend. |
306 | */ |
307 | mutex_exit(&aio->aio_mtx); |
308 | lwp_userret(curlwp); |
309 | mutex_enter(&aio->aio_mtx); |
310 | } |
311 | } |
312 | |
313 | /* Take the job from the queue */ |
314 | aio->curjob = a_job; |
315 | TAILQ_REMOVE(&aio->jobs_queue, a_job, list); |
316 | |
317 | atomic_dec_uint(&aio_jobs_count); |
318 | aio->jobs_count--; |
319 | |
320 | mutex_exit(&aio->aio_mtx); |
321 | |
322 | /* Process an AIO operation */ |
323 | aio_process(a_job); |
324 | |
325 | /* Copy data structure back to the user-space */ |
326 | (void)copyout(&a_job->aiocbp, a_job->aiocb_uptr, |
327 | sizeof(struct aiocb)); |
328 | |
329 | mutex_enter(&aio->aio_mtx); |
330 | KASSERT(aio->curjob == a_job); |
331 | aio->curjob = NULL; |
332 | |
333 | /* Decrease a reference counter, if there is a LIO structure */ |
334 | lio = a_job->lio; |
335 | refcnt = (lio != NULL ? --lio->refcnt : -1); |
336 | |
337 | /* Notify all suspenders */ |
338 | cv_broadcast(&aio->done_cv); |
339 | mutex_exit(&aio->aio_mtx); |
340 | |
341 | /* Send a signal, if any */ |
342 | aio_sendsig(p, &a_job->aiocbp.aio_sigevent); |
343 | |
344 | /* Destroy the LIO structure */ |
345 | if (refcnt == 0) { |
346 | aio_sendsig(p, &lio->sig); |
347 | pool_put(&aio_lio_pool, lio); |
348 | } |
349 | |
350 | /* Destroy the job */ |
351 | pool_put(&aio_job_pool, a_job); |
352 | } |
353 | |
354 | /* NOTREACHED */ |
355 | } |
356 | |
357 | static void |
358 | aio_process(struct aio_job *a_job) |
359 | { |
360 | struct proc *p = curlwp->l_proc; |
361 | struct aiocb *aiocbp = &a_job->aiocbp; |
362 | struct file *fp; |
363 | int fd = aiocbp->aio_fildes; |
364 | int error = 0; |
365 | |
366 | KASSERT(a_job->aio_op != 0); |
367 | |
368 | if ((a_job->aio_op & (AIO_READ | AIO_WRITE)) != 0) { |
369 | struct iovec aiov; |
370 | struct uio auio; |
371 | |
372 | if (aiocbp->aio_nbytes > SSIZE_MAX) { |
373 | error = EINVAL; |
374 | goto done; |
375 | } |
376 | |
377 | fp = fd_getfile(fd); |
378 | if (fp == NULL) { |
379 | error = EBADF; |
380 | goto done; |
381 | } |
382 | |
383 | aiov.iov_base = (void *)(uintptr_t)aiocbp->aio_buf; |
384 | aiov.iov_len = aiocbp->aio_nbytes; |
385 | auio.uio_iov = &aiov; |
386 | auio.uio_iovcnt = 1; |
387 | auio.uio_resid = aiocbp->aio_nbytes; |
388 | auio.uio_vmspace = p->p_vmspace; |
389 | |
390 | if (a_job->aio_op & AIO_READ) { |
391 | /* |
392 | * Perform a Read operation |
393 | */ |
394 | KASSERT((a_job->aio_op & AIO_WRITE) == 0); |
395 | |
396 | if ((fp->f_flag & FREAD) == 0) { |
397 | fd_putfile(fd); |
398 | error = EBADF; |
399 | goto done; |
400 | } |
401 | auio.uio_rw = UIO_READ; |
402 | error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, |
403 | &auio, fp->f_cred, FOF_UPDATE_OFFSET); |
404 | } else { |
405 | /* |
406 | * Perform a Write operation |
407 | */ |
408 | KASSERT(a_job->aio_op & AIO_WRITE); |
409 | |
410 | if ((fp->f_flag & FWRITE) == 0) { |
411 | fd_putfile(fd); |
412 | error = EBADF; |
413 | goto done; |
414 | } |
415 | auio.uio_rw = UIO_WRITE; |
416 | error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, |
417 | &auio, fp->f_cred, FOF_UPDATE_OFFSET); |
418 | } |
419 | fd_putfile(fd); |
420 | |
421 | /* Store the result value */ |
422 | a_job->aiocbp.aio_nbytes -= auio.uio_resid; |
423 | a_job->aiocbp._retval = (error == 0) ? |
424 | a_job->aiocbp.aio_nbytes : -1; |
425 | |
426 | } else if ((a_job->aio_op & (AIO_SYNC | AIO_DSYNC)) != 0) { |
427 | /* |
428 | * Perform a file Sync operation |
429 | */ |
430 | struct vnode *vp; |
431 | |
432 | if ((error = fd_getvnode(fd, &fp)) != 0) |
433 | goto done; |
434 | |
435 | if ((fp->f_flag & FWRITE) == 0) { |
436 | fd_putfile(fd); |
437 | error = EBADF; |
438 | goto done; |
439 | } |
440 | |
441 | vp = fp->f_vnode; |
442 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
443 | if (a_job->aio_op & AIO_DSYNC) { |
444 | error = VOP_FSYNC(vp, fp->f_cred, |
445 | FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); |
446 | } else if (a_job->aio_op & AIO_SYNC) { |
447 | error = VOP_FSYNC(vp, fp->f_cred, |
448 | FSYNC_WAIT, 0, 0); |
449 | } |
450 | VOP_UNLOCK(vp); |
451 | fd_putfile(fd); |
452 | |
453 | /* Store the result value */ |
454 | a_job->aiocbp._retval = (error == 0) ? 0 : -1; |
455 | |
456 | } else |
457 | panic("aio_process: invalid operation code\n" ); |
458 | |
459 | done: |
460 | /* Job is done, set the error, if any */ |
461 | a_job->aiocbp._errno = error; |
462 | a_job->aiocbp._state = JOB_DONE; |
463 | } |
464 | |
465 | /* |
466 | * Send AIO signal. |
467 | */ |
468 | static void |
469 | aio_sendsig(struct proc *p, struct sigevent *sig) |
470 | { |
471 | ksiginfo_t ksi; |
472 | |
473 | if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE) |
474 | return; |
475 | |
476 | KSI_INIT(&ksi); |
477 | ksi.ksi_signo = sig->sigev_signo; |
478 | ksi.ksi_code = SI_ASYNCIO; |
479 | ksi.ksi_value = sig->sigev_value; |
480 | mutex_enter(proc_lock); |
481 | kpsignal(p, &ksi, NULL); |
482 | mutex_exit(proc_lock); |
483 | } |
484 | |
485 | /* |
486 | * Enqueue the job. |
487 | */ |
488 | static int |
489 | aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) |
490 | { |
491 | struct proc *p = curlwp->l_proc; |
492 | struct aioproc *aio; |
493 | struct aio_job *a_job; |
494 | struct aiocb aiocbp; |
495 | struct sigevent *sig; |
496 | int error; |
497 | |
498 | /* Non-accurate check for the limit */ |
499 | if (aio_jobs_count + 1 > aio_max) |
500 | return EAGAIN; |
501 | |
502 | /* Get the data structure from user-space */ |
503 | error = copyin(aiocb_uptr, &aiocbp, sizeof(struct aiocb)); |
504 | if (error) |
505 | return error; |
506 | |
507 | /* Check if signal is set, and validate it */ |
508 | sig = &aiocbp.aio_sigevent; |
509 | if (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG || |
510 | sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA) |
511 | return EINVAL; |
512 | |
513 | /* Buffer and byte count */ |
514 | if (((AIO_SYNC | AIO_DSYNC) & op) == 0) |
515 | if (aiocbp.aio_buf == NULL || aiocbp.aio_nbytes > SSIZE_MAX) |
516 | return EINVAL; |
517 | |
518 | /* Check the opcode, if LIO_NOP - simply ignore */ |
519 | if (op == AIO_LIO) { |
520 | KASSERT(lio != NULL); |
521 | if (aiocbp.aio_lio_opcode == LIO_WRITE) |
522 | op = AIO_WRITE; |
523 | else if (aiocbp.aio_lio_opcode == LIO_READ) |
524 | op = AIO_READ; |
525 | else |
526 | return (aiocbp.aio_lio_opcode == LIO_NOP) ? 0 : EINVAL; |
527 | } else { |
528 | KASSERT(lio == NULL); |
529 | } |
530 | |
531 | /* |
532 | * Look for already existing job. If found - the job is in-progress. |
533 | * According to POSIX this is invalid, so return the error. |
534 | */ |
535 | aio = p->p_aio; |
536 | if (aio) { |
537 | mutex_enter(&aio->aio_mtx); |
538 | TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { |
539 | if (a_job->aiocb_uptr != aiocb_uptr) |
540 | continue; |
541 | mutex_exit(&aio->aio_mtx); |
542 | return EINVAL; |
543 | } |
544 | mutex_exit(&aio->aio_mtx); |
545 | } |
546 | |
547 | /* |
548 | * Check if AIO structure is initialized, if not - initialize it. |
549 | * In LIO case, we did that already. We will recheck this with |
550 | * the lock in aio_procinit(). |
551 | */ |
552 | if (lio == NULL && p->p_aio == NULL) |
553 | if (aio_procinit(p)) |
554 | return EAGAIN; |
555 | aio = p->p_aio; |
556 | |
557 | /* |
558 | * Set the state with errno, and copy data |
559 | * structure back to the user-space. |
560 | */ |
561 | aiocbp._state = JOB_WIP; |
562 | aiocbp._errno = EINPROGRESS; |
563 | aiocbp._retval = -1; |
564 | error = copyout(&aiocbp, aiocb_uptr, sizeof(struct aiocb)); |
565 | if (error) |
566 | return error; |
567 | |
568 | /* Allocate and initialize a new AIO job */ |
569 | a_job = pool_get(&aio_job_pool, PR_WAITOK); |
570 | memset(a_job, 0, sizeof(struct aio_job)); |
571 | |
572 | /* |
573 | * Set the data. |
574 | * Store the user-space pointer for searching. Since we |
575 | * are storing only per proc pointers - it is safe. |
576 | */ |
577 | memcpy(&a_job->aiocbp, &aiocbp, sizeof(struct aiocb)); |
578 | a_job->aiocb_uptr = aiocb_uptr; |
579 | a_job->aio_op |= op; |
580 | a_job->lio = lio; |
581 | |
582 | /* |
583 | * Add the job to the queue, update the counters, and |
584 | * notify the AIO worker thread to handle the job. |
585 | */ |
586 | mutex_enter(&aio->aio_mtx); |
587 | |
588 | /* Fail, if the limit was reached */ |
589 | if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max || |
590 | aio->jobs_count >= aio_listio_max) { |
591 | atomic_dec_uint(&aio_jobs_count); |
592 | mutex_exit(&aio->aio_mtx); |
593 | pool_put(&aio_job_pool, a_job); |
594 | return EAGAIN; |
595 | } |
596 | |
597 | TAILQ_INSERT_TAIL(&aio->jobs_queue, a_job, list); |
598 | aio->jobs_count++; |
599 | if (lio) |
600 | lio->refcnt++; |
601 | cv_signal(&aio->aio_worker_cv); |
602 | |
603 | mutex_exit(&aio->aio_mtx); |
604 | |
605 | /* |
606 | * One would handle the errors only with aio_error() function. |
607 | * This way is appropriate according to POSIX. |
608 | */ |
609 | return 0; |
610 | } |
611 | |
612 | /* |
613 | * Syscall functions. |
614 | */ |
615 | |
616 | int |
617 | sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, |
618 | register_t *retval) |
619 | { |
620 | /* { |
621 | syscallarg(int) fildes; |
622 | syscallarg(struct aiocb *) aiocbp; |
623 | } */ |
624 | struct proc *p = l->l_proc; |
625 | struct aioproc *aio; |
626 | struct aio_job *a_job; |
627 | struct aiocb *aiocbp_ptr; |
628 | struct lio_req *lio; |
629 | struct filedesc *fdp = p->p_fd; |
630 | unsigned int cn, errcnt, fildes; |
631 | fdtab_t *dt; |
632 | |
633 | TAILQ_HEAD(, aio_job) tmp_jobs_list; |
634 | |
635 | /* Check for invalid file descriptor */ |
636 | fildes = (unsigned int)SCARG(uap, fildes); |
637 | dt = fdp->fd_dt; |
638 | if (fildes >= dt->dt_nfiles) |
639 | return EBADF; |
640 | if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) |
641 | return EBADF; |
642 | |
643 | /* Check if AIO structure is initialized */ |
644 | if (p->p_aio == NULL) { |
645 | *retval = AIO_NOTCANCELED; |
646 | return 0; |
647 | } |
648 | |
649 | aio = p->p_aio; |
650 | aiocbp_ptr = (struct aiocb *)SCARG(uap, aiocbp); |
651 | |
652 | mutex_enter(&aio->aio_mtx); |
653 | |
654 | /* Cancel the jobs, and remove them from the queue */ |
655 | cn = 0; |
656 | TAILQ_INIT(&tmp_jobs_list); |
657 | TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { |
658 | if (aiocbp_ptr) { |
659 | if (aiocbp_ptr != a_job->aiocb_uptr) |
660 | continue; |
661 | if (fildes != a_job->aiocbp.aio_fildes) { |
662 | mutex_exit(&aio->aio_mtx); |
663 | return EBADF; |
664 | } |
665 | } else if (a_job->aiocbp.aio_fildes != fildes) |
666 | continue; |
667 | |
668 | TAILQ_REMOVE(&aio->jobs_queue, a_job, list); |
669 | TAILQ_INSERT_TAIL(&tmp_jobs_list, a_job, list); |
670 | |
671 | /* Decrease the counters */ |
672 | atomic_dec_uint(&aio_jobs_count); |
673 | aio->jobs_count--; |
674 | lio = a_job->lio; |
675 | if (lio != NULL && --lio->refcnt != 0) |
676 | a_job->lio = NULL; |
677 | |
678 | cn++; |
679 | if (aiocbp_ptr) |
680 | break; |
681 | } |
682 | |
683 | /* There are canceled jobs */ |
684 | if (cn) |
685 | *retval = AIO_CANCELED; |
686 | |
687 | /* We cannot cancel current job */ |
688 | a_job = aio->curjob; |
689 | if (a_job && ((a_job->aiocbp.aio_fildes == fildes) || |
690 | (a_job->aiocb_uptr == aiocbp_ptr))) |
691 | *retval = AIO_NOTCANCELED; |
692 | |
693 | mutex_exit(&aio->aio_mtx); |
694 | |
695 | /* Free the jobs after the lock */ |
696 | errcnt = 0; |
697 | while (!TAILQ_EMPTY(&tmp_jobs_list)) { |
698 | a_job = TAILQ_FIRST(&tmp_jobs_list); |
699 | TAILQ_REMOVE(&tmp_jobs_list, a_job, list); |
700 | /* Set the errno and copy structures back to the user-space */ |
701 | a_job->aiocbp._errno = ECANCELED; |
702 | a_job->aiocbp._state = JOB_DONE; |
703 | if (copyout(&a_job->aiocbp, a_job->aiocb_uptr, |
704 | sizeof(struct aiocb))) |
705 | errcnt++; |
706 | /* Send a signal if any */ |
707 | aio_sendsig(p, &a_job->aiocbp.aio_sigevent); |
708 | if (a_job->lio) { |
709 | lio = a_job->lio; |
710 | aio_sendsig(p, &lio->sig); |
711 | pool_put(&aio_lio_pool, lio); |
712 | } |
713 | pool_put(&aio_job_pool, a_job); |
714 | } |
715 | |
716 | if (errcnt) |
717 | return EFAULT; |
718 | |
719 | /* Set a correct return value */ |
720 | if (*retval == 0) |
721 | *retval = AIO_ALLDONE; |
722 | |
723 | return 0; |
724 | } |
725 | |
726 | int |
727 | sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, |
728 | register_t *retval) |
729 | { |
730 | /* { |
731 | syscallarg(const struct aiocb *) aiocbp; |
732 | } */ |
733 | struct proc *p = l->l_proc; |
734 | struct aioproc *aio = p->p_aio; |
735 | struct aiocb aiocbp; |
736 | int error; |
737 | |
738 | if (aio == NULL) |
739 | return EINVAL; |
740 | |
741 | error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); |
742 | if (error) |
743 | return error; |
744 | |
745 | if (aiocbp._state == JOB_NONE) |
746 | return EINVAL; |
747 | |
748 | *retval = aiocbp._errno; |
749 | |
750 | return 0; |
751 | } |
752 | |
753 | int |
754 | sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap, |
755 | register_t *retval) |
756 | { |
757 | /* { |
758 | syscallarg(int) op; |
759 | syscallarg(struct aiocb *) aiocbp; |
760 | } */ |
761 | int op = SCARG(uap, op); |
762 | |
763 | if ((op != O_DSYNC) && (op != O_SYNC)) |
764 | return EINVAL; |
765 | |
766 | op = O_DSYNC ? AIO_DSYNC : AIO_SYNC; |
767 | |
768 | return aio_enqueue_job(op, SCARG(uap, aiocbp), NULL); |
769 | } |
770 | |
771 | int |
772 | sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap, |
773 | register_t *retval) |
774 | { |
775 | /* { |
776 | syscallarg(struct aiocb *) aiocbp; |
777 | } */ |
778 | |
779 | return aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); |
780 | } |
781 | |
782 | int |
783 | sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, |
784 | register_t *retval) |
785 | { |
786 | /* { |
787 | syscallarg(struct aiocb *) aiocbp; |
788 | } */ |
789 | struct proc *p = l->l_proc; |
790 | struct aioproc *aio = p->p_aio; |
791 | struct aiocb aiocbp; |
792 | int error; |
793 | |
794 | if (aio == NULL) |
795 | return EINVAL; |
796 | |
797 | error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); |
798 | if (error) |
799 | return error; |
800 | |
801 | if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE) |
802 | return EINVAL; |
803 | |
804 | *retval = aiocbp._retval; |
805 | |
806 | /* Reset the internal variables */ |
807 | aiocbp._errno = 0; |
808 | aiocbp._retval = -1; |
809 | aiocbp._state = JOB_NONE; |
810 | error = copyout(&aiocbp, SCARG(uap, aiocbp), sizeof(struct aiocb)); |
811 | |
812 | return error; |
813 | } |
814 | |
815 | int |
816 | sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, |
817 | register_t *retval) |
818 | { |
819 | /* { |
820 | syscallarg(const struct aiocb *const[]) list; |
821 | syscallarg(int) nent; |
822 | syscallarg(const struct timespec *) timeout; |
823 | } */ |
824 | struct aiocb **list; |
825 | struct timespec ts; |
826 | int error, nent; |
827 | |
828 | nent = SCARG(uap, nent); |
829 | if (nent <= 0 || nent > aio_listio_max) |
830 | return EAGAIN; |
831 | |
832 | if (SCARG(uap, timeout)) { |
833 | /* Convert timespec to ticks */ |
834 | error = copyin(SCARG(uap, timeout), &ts, |
835 | sizeof(struct timespec)); |
836 | if (error) |
837 | return error; |
838 | } |
839 | |
840 | list = kmem_alloc(nent * sizeof(*list), KM_SLEEP); |
841 | error = copyin(SCARG(uap, list), list, nent * sizeof(*list)); |
842 | if (error) |
843 | goto out; |
844 | error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); |
845 | out: |
846 | kmem_free(list, nent * sizeof(*list)); |
847 | return error; |
848 | } |
849 | |
850 | int |
851 | aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent, |
852 | struct timespec *ts) |
853 | { |
854 | struct proc *p = l->l_proc; |
855 | struct aioproc *aio; |
856 | struct aio_job *a_job; |
857 | int i, error, timo; |
858 | |
859 | if (p->p_aio == NULL) |
860 | return EAGAIN; |
861 | aio = p->p_aio; |
862 | |
863 | if (ts) { |
864 | timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); |
865 | if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0) |
866 | timo = 1; |
867 | if (timo <= 0) |
868 | return EAGAIN; |
869 | } else |
870 | timo = 0; |
871 | |
872 | mutex_enter(&aio->aio_mtx); |
873 | for (;;) { |
874 | for (i = 0; i < nent; i++) { |
875 | |
876 | /* Skip NULL entries */ |
877 | if (aiocbp_list[i] == NULL) |
878 | continue; |
879 | |
880 | /* Skip current job */ |
881 | if (aio->curjob) { |
882 | a_job = aio->curjob; |
883 | if (a_job->aiocb_uptr == aiocbp_list[i]) |
884 | continue; |
885 | } |
886 | |
887 | /* Look for a job in the queue */ |
888 | TAILQ_FOREACH(a_job, &aio->jobs_queue, list) |
889 | if (a_job->aiocb_uptr == aiocbp_list[i]) |
890 | break; |
891 | |
892 | if (a_job == NULL) { |
893 | struct aiocb aiocbp; |
894 | |
895 | mutex_exit(&aio->aio_mtx); |
896 | |
897 | /* Check if the job is done. */ |
898 | error = copyin(aiocbp_list[i], &aiocbp, |
899 | sizeof(struct aiocb)); |
900 | if (error == 0 && aiocbp._state != JOB_DONE) { |
901 | mutex_enter(&aio->aio_mtx); |
902 | continue; |
903 | } |
904 | return error; |
905 | } |
906 | } |
907 | |
908 | /* Wait for a signal or when timeout occurs */ |
909 | error = cv_timedwait_sig(&aio->done_cv, &aio->aio_mtx, timo); |
910 | if (error) { |
911 | if (error == EWOULDBLOCK) |
912 | error = EAGAIN; |
913 | break; |
914 | } |
915 | } |
916 | mutex_exit(&aio->aio_mtx); |
917 | return error; |
918 | } |
919 | |
920 | int |
921 | sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap, |
922 | register_t *retval) |
923 | { |
924 | /* { |
925 | syscallarg(struct aiocb *) aiocbp; |
926 | } */ |
927 | |
928 | return aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL); |
929 | } |
930 | |
931 | int |
932 | sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, |
933 | register_t *retval) |
934 | { |
935 | /* { |
936 | syscallarg(int) mode; |
937 | syscallarg(struct aiocb *const[]) list; |
938 | syscallarg(int) nent; |
939 | syscallarg(struct sigevent *) sig; |
940 | } */ |
941 | struct proc *p = l->l_proc; |
942 | struct aioproc *aio; |
943 | struct aiocb **aiocbp_list; |
944 | struct lio_req *lio; |
945 | int i, error, errcnt, mode, nent; |
946 | |
947 | mode = SCARG(uap, mode); |
948 | nent = SCARG(uap, nent); |
949 | |
950 | /* Non-accurate checks for the limit and invalid values */ |
951 | if (nent < 1 || nent > aio_listio_max) |
952 | return EINVAL; |
953 | if (aio_jobs_count + nent > aio_max) |
954 | return EAGAIN; |
955 | |
956 | /* Check if AIO structure is initialized, if not - initialize it */ |
957 | if (p->p_aio == NULL) |
958 | if (aio_procinit(p)) |
959 | return EAGAIN; |
960 | aio = p->p_aio; |
961 | |
962 | /* Create a LIO structure */ |
963 | lio = pool_get(&aio_lio_pool, PR_WAITOK); |
964 | lio->refcnt = 1; |
965 | error = 0; |
966 | |
967 | switch (mode) { |
968 | case LIO_WAIT: |
969 | memset(&lio->sig, 0, sizeof(struct sigevent)); |
970 | break; |
971 | case LIO_NOWAIT: |
972 | /* Check for signal, validate it */ |
973 | if (SCARG(uap, sig)) { |
974 | struct sigevent *sig = &lio->sig; |
975 | |
976 | error = copyin(SCARG(uap, sig), &lio->sig, |
977 | sizeof(struct sigevent)); |
978 | if (error == 0 && |
979 | (sig->sigev_signo < 0 || |
980 | sig->sigev_signo >= NSIG || |
981 | sig->sigev_notify < SIGEV_NONE || |
982 | sig->sigev_notify > SIGEV_SA)) |
983 | error = EINVAL; |
984 | } else |
985 | memset(&lio->sig, 0, sizeof(struct sigevent)); |
986 | break; |
987 | default: |
988 | error = EINVAL; |
989 | break; |
990 | } |
991 | |
992 | if (error != 0) { |
993 | pool_put(&aio_lio_pool, lio); |
994 | return error; |
995 | } |
996 | |
997 | /* Get the list from user-space */ |
998 | aiocbp_list = kmem_alloc(nent * sizeof(*aiocbp_list), KM_SLEEP); |
999 | error = copyin(SCARG(uap, list), aiocbp_list, |
1000 | nent * sizeof(*aiocbp_list)); |
1001 | if (error) { |
1002 | mutex_enter(&aio->aio_mtx); |
1003 | goto err; |
1004 | } |
1005 | |
1006 | /* Enqueue all jobs */ |
1007 | errcnt = 0; |
1008 | for (i = 0; i < nent; i++) { |
1009 | error = aio_enqueue_job(AIO_LIO, aiocbp_list[i], lio); |
1010 | /* |
1011 | * According to POSIX, in such error case it may |
1012 | * fail with other I/O operations initiated. |
1013 | */ |
1014 | if (error) |
1015 | errcnt++; |
1016 | } |
1017 | |
1018 | mutex_enter(&aio->aio_mtx); |
1019 | |
1020 | /* Return an error, if any */ |
1021 | if (errcnt) { |
1022 | error = EIO; |
1023 | goto err; |
1024 | } |
1025 | |
1026 | if (mode == LIO_WAIT) { |
1027 | /* |
1028 | * Wait for AIO completion. In such case, |
1029 | * the LIO structure will be freed here. |
1030 | */ |
1031 | while (lio->refcnt > 1 && error == 0) |
1032 | error = cv_wait_sig(&aio->done_cv, &aio->aio_mtx); |
1033 | if (error) |
1034 | error = EINTR; |
1035 | } |
1036 | |
1037 | err: |
1038 | if (--lio->refcnt != 0) |
1039 | lio = NULL; |
1040 | mutex_exit(&aio->aio_mtx); |
1041 | if (lio != NULL) { |
1042 | aio_sendsig(p, &lio->sig); |
1043 | pool_put(&aio_lio_pool, lio); |
1044 | } |
1045 | kmem_free(aiocbp_list, nent * sizeof(*aiocbp_list)); |
1046 | return error; |
1047 | } |
1048 | |
1049 | /* |
1050 | * SysCtl |
1051 | */ |
1052 | |
1053 | static int |
1054 | sysctl_aio_listio_max(SYSCTLFN_ARGS) |
1055 | { |
1056 | struct sysctlnode node; |
1057 | int error, newsize; |
1058 | |
1059 | node = *rnode; |
1060 | node.sysctl_data = &newsize; |
1061 | |
1062 | newsize = aio_listio_max; |
1063 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); |
1064 | if (error || newp == NULL) |
1065 | return error; |
1066 | |
1067 | if (newsize < 1 || newsize > aio_max) |
1068 | return EINVAL; |
1069 | aio_listio_max = newsize; |
1070 | |
1071 | return 0; |
1072 | } |
1073 | |
1074 | static int |
1075 | sysctl_aio_max(SYSCTLFN_ARGS) |
1076 | { |
1077 | struct sysctlnode node; |
1078 | int error, newsize; |
1079 | |
1080 | node = *rnode; |
1081 | node.sysctl_data = &newsize; |
1082 | |
1083 | newsize = aio_max; |
1084 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); |
1085 | if (error || newp == NULL) |
1086 | return error; |
1087 | |
1088 | if (newsize < 1 || newsize < aio_listio_max) |
1089 | return EINVAL; |
1090 | aio_max = newsize; |
1091 | |
1092 | return 0; |
1093 | } |
1094 | |
1095 | static int |
1096 | sysctl_aio_init(void) |
1097 | { |
1098 | int rv; |
1099 | |
1100 | aio_sysctl = NULL; |
1101 | |
1102 | rv = sysctl_createv(&aio_sysctl, 0, NULL, NULL, |
1103 | CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, |
1104 | CTLTYPE_INT, "posix_aio" , |
1105 | SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " |
1106 | "Asynchronous I/O option to which the " |
1107 | "system attempts to conform" ), |
1108 | NULL, _POSIX_ASYNCHRONOUS_IO, NULL, 0, |
1109 | CTL_KERN, CTL_CREATE, CTL_EOL); |
1110 | |
1111 | if (rv != 0) |
1112 | return rv; |
1113 | |
1114 | rv = sysctl_createv(&aio_sysctl, 0, NULL, NULL, |
1115 | CTLFLAG_PERMANENT | CTLFLAG_READWRITE, |
1116 | CTLTYPE_INT, "aio_listio_max" , |
1117 | SYSCTL_DESCR("Maximum number of asynchronous I/O " |
1118 | "operations in a single list I/O call" ), |
1119 | sysctl_aio_listio_max, 0, &aio_listio_max, 0, |
1120 | CTL_KERN, CTL_CREATE, CTL_EOL); |
1121 | |
1122 | if (rv != 0) |
1123 | return rv; |
1124 | |
1125 | rv = sysctl_createv(&aio_sysctl, 0, NULL, NULL, |
1126 | CTLFLAG_PERMANENT | CTLFLAG_READWRITE, |
1127 | CTLTYPE_INT, "aio_max" , |
1128 | SYSCTL_DESCR("Maximum number of asynchronous I/O " |
1129 | "operations" ), |
1130 | sysctl_aio_max, 0, &aio_max, 0, |
1131 | CTL_KERN, CTL_CREATE, CTL_EOL); |
1132 | |
1133 | return rv; |
1134 | } |
1135 | |
1136 | /* |
1137 | * Debugging |
1138 | */ |
1139 | #if defined(DDB) |
1140 | void |
1141 | aio_print_jobs(void (*pr)(const char *, ...)) |
1142 | { |
1143 | struct proc *p = curlwp->l_proc; |
1144 | struct aioproc *aio; |
1145 | struct aio_job *a_job; |
1146 | struct aiocb *aiocbp; |
1147 | |
1148 | if (p == NULL) { |
1149 | (*pr)("AIO: We are not in the processes right now.\n" ); |
1150 | return; |
1151 | } |
1152 | |
1153 | aio = p->p_aio; |
1154 | if (aio == NULL) { |
1155 | (*pr)("AIO data is not initialized (PID = %d).\n" , p->p_pid); |
1156 | return; |
1157 | } |
1158 | |
1159 | (*pr)("AIO: PID = %d\n" , p->p_pid); |
1160 | (*pr)("AIO: Global count of the jobs = %u\n" , aio_jobs_count); |
1161 | (*pr)("AIO: Count of the jobs = %u\n" , aio->jobs_count); |
1162 | |
1163 | if (aio->curjob) { |
1164 | a_job = aio->curjob; |
1165 | (*pr)("\nAIO current job:\n" ); |
1166 | (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n" , |
1167 | a_job->aio_op, a_job->aiocbp._errno, |
1168 | a_job->aiocbp._state, a_job->aiocb_uptr); |
1169 | aiocbp = &a_job->aiocbp; |
1170 | (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n" , |
1171 | aiocbp->aio_fildes, aiocbp->aio_offset, |
1172 | aiocbp->aio_buf, aiocbp->aio_nbytes); |
1173 | } |
1174 | |
1175 | (*pr)("\nAIO queue:\n" ); |
1176 | TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { |
1177 | (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n" , |
1178 | a_job->aio_op, a_job->aiocbp._errno, |
1179 | a_job->aiocbp._state, a_job->aiocb_uptr); |
1180 | aiocbp = &a_job->aiocbp; |
1181 | (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n" , |
1182 | aiocbp->aio_fildes, aiocbp->aio_offset, |
1183 | aiocbp->aio_buf, aiocbp->aio_nbytes); |
1184 | } |
1185 | } |
1186 | #endif /* defined(DDB) */ |
1187 | |