1 | /* $NetBSD: kern_resource.c,v 1.175 2016/07/13 09:52:00 njoly Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 1982, 1986, 1991, 1993 |
5 | * The Regents of the University of California. All rights reserved. |
6 | * (c) UNIX System Laboratories, Inc. |
7 | * All or some portions of this file are derived from material licensed |
8 | * to the University of California by American Telephone and Telegraph |
9 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with |
10 | * the permission of UNIX System Laboratories, Inc. |
11 | * |
12 | * Redistribution and use in source and binary forms, with or without |
13 | * modification, are permitted provided that the following conditions |
14 | * are met: |
15 | * 1. Redistributions of source code must retain the above copyright |
16 | * notice, this list of conditions and the following disclaimer. |
17 | * 2. Redistributions in binary form must reproduce the above copyright |
18 | * notice, this list of conditions and the following disclaimer in the |
19 | * documentation and/or other materials provided with the distribution. |
20 | * 3. Neither the name of the University nor the names of its contributors |
21 | * may be used to endorse or promote products derived from this software |
22 | * without specific prior written permission. |
23 | * |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
34 | * SUCH DAMAGE. |
35 | * |
36 | * @(#)kern_resource.c 8.8 (Berkeley) 2/14/95 |
37 | */ |
38 | |
39 | #include <sys/cdefs.h> |
40 | __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.175 2016/07/13 09:52:00 njoly Exp $" ); |
41 | |
42 | #include <sys/param.h> |
43 | #include <sys/systm.h> |
44 | #include <sys/kernel.h> |
45 | #include <sys/file.h> |
46 | #include <sys/resourcevar.h> |
47 | #include <sys/kmem.h> |
48 | #include <sys/namei.h> |
49 | #include <sys/pool.h> |
50 | #include <sys/proc.h> |
51 | #include <sys/sysctl.h> |
52 | #include <sys/timevar.h> |
53 | #include <sys/kauth.h> |
54 | #include <sys/atomic.h> |
55 | #include <sys/mount.h> |
56 | #include <sys/syscallargs.h> |
57 | #include <sys/atomic.h> |
58 | |
59 | #include <uvm/uvm_extern.h> |
60 | |
61 | /* |
62 | * Maximum process data and stack limits. |
63 | * They are variables so they are patchable. |
64 | */ |
65 | rlim_t maxdmap = MAXDSIZ; |
66 | rlim_t maxsmap = MAXSSIZ; |
67 | |
68 | static pool_cache_t plimit_cache __read_mostly; |
69 | static pool_cache_t pstats_cache __read_mostly; |
70 | |
71 | static kauth_listener_t resource_listener; |
72 | static struct sysctllog *proc_sysctllog; |
73 | |
74 | static int donice(struct lwp *, struct proc *, int); |
75 | static void sysctl_proc_setup(void); |
76 | |
77 | static int |
78 | resource_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, |
79 | void *arg0, void *arg1, void *arg2, void *arg3) |
80 | { |
81 | struct proc *p; |
82 | int result; |
83 | |
84 | result = KAUTH_RESULT_DEFER; |
85 | p = arg0; |
86 | |
87 | switch (action) { |
88 | case KAUTH_PROCESS_NICE: |
89 | if (kauth_cred_geteuid(cred) != kauth_cred_geteuid(p->p_cred) && |
90 | kauth_cred_getuid(cred) != kauth_cred_geteuid(p->p_cred)) { |
91 | break; |
92 | } |
93 | |
94 | if ((u_long)arg1 >= p->p_nice) |
95 | result = KAUTH_RESULT_ALLOW; |
96 | |
97 | break; |
98 | |
99 | case KAUTH_PROCESS_RLIMIT: { |
100 | enum kauth_process_req req; |
101 | |
102 | req = (enum kauth_process_req)(unsigned long)arg1; |
103 | |
104 | switch (req) { |
105 | case KAUTH_REQ_PROCESS_RLIMIT_GET: |
106 | result = KAUTH_RESULT_ALLOW; |
107 | break; |
108 | |
109 | case KAUTH_REQ_PROCESS_RLIMIT_SET: { |
110 | struct rlimit *new_rlimit; |
111 | u_long which; |
112 | |
113 | if ((p != curlwp->l_proc) && |
114 | (proc_uidmatch(cred, p->p_cred) != 0)) |
115 | break; |
116 | |
117 | new_rlimit = arg2; |
118 | which = (u_long)arg3; |
119 | |
120 | if (new_rlimit->rlim_max <= p->p_rlimit[which].rlim_max) |
121 | result = KAUTH_RESULT_ALLOW; |
122 | |
123 | break; |
124 | } |
125 | |
126 | default: |
127 | break; |
128 | } |
129 | |
130 | break; |
131 | } |
132 | |
133 | default: |
134 | break; |
135 | } |
136 | |
137 | return result; |
138 | } |
139 | |
140 | void |
141 | resource_init(void) |
142 | { |
143 | |
144 | plimit_cache = pool_cache_init(sizeof(struct plimit), 0, 0, 0, |
145 | "plimitpl" , NULL, IPL_NONE, NULL, NULL, NULL); |
146 | pstats_cache = pool_cache_init(sizeof(struct pstats), 0, 0, 0, |
147 | "pstatspl" , NULL, IPL_NONE, NULL, NULL, NULL); |
148 | |
149 | resource_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS, |
150 | resource_listener_cb, NULL); |
151 | |
152 | sysctl_proc_setup(); |
153 | } |
154 | |
155 | /* |
156 | * Resource controls and accounting. |
157 | */ |
158 | |
159 | int |
160 | sys_getpriority(struct lwp *l, const struct sys_getpriority_args *uap, |
161 | register_t *retval) |
162 | { |
163 | /* { |
164 | syscallarg(int) which; |
165 | syscallarg(id_t) who; |
166 | } */ |
167 | struct proc *curp = l->l_proc, *p; |
168 | id_t who = SCARG(uap, who); |
169 | int low = NZERO + PRIO_MAX + 1; |
170 | |
171 | mutex_enter(proc_lock); |
172 | switch (SCARG(uap, which)) { |
173 | case PRIO_PROCESS: |
174 | p = who ? proc_find(who) : curp; |
175 | if (p != NULL) |
176 | low = p->p_nice; |
177 | break; |
178 | |
179 | case PRIO_PGRP: { |
180 | struct pgrp *pg; |
181 | |
182 | if (who == 0) |
183 | pg = curp->p_pgrp; |
184 | else if ((pg = pgrp_find(who)) == NULL) |
185 | break; |
186 | LIST_FOREACH(p, &pg->pg_members, p_pglist) { |
187 | if (p->p_nice < low) |
188 | low = p->p_nice; |
189 | } |
190 | break; |
191 | } |
192 | |
193 | case PRIO_USER: |
194 | if (who == 0) |
195 | who = (int)kauth_cred_geteuid(l->l_cred); |
196 | PROCLIST_FOREACH(p, &allproc) { |
197 | mutex_enter(p->p_lock); |
198 | if (kauth_cred_geteuid(p->p_cred) == |
199 | (uid_t)who && p->p_nice < low) |
200 | low = p->p_nice; |
201 | mutex_exit(p->p_lock); |
202 | } |
203 | break; |
204 | |
205 | default: |
206 | mutex_exit(proc_lock); |
207 | return EINVAL; |
208 | } |
209 | mutex_exit(proc_lock); |
210 | |
211 | if (low == NZERO + PRIO_MAX + 1) { |
212 | return ESRCH; |
213 | } |
214 | *retval = low - NZERO; |
215 | return 0; |
216 | } |
217 | |
218 | int |
219 | sys_setpriority(struct lwp *l, const struct sys_setpriority_args *uap, |
220 | register_t *retval) |
221 | { |
222 | /* { |
223 | syscallarg(int) which; |
224 | syscallarg(id_t) who; |
225 | syscallarg(int) prio; |
226 | } */ |
227 | struct proc *curp = l->l_proc, *p; |
228 | id_t who = SCARG(uap, who); |
229 | int found = 0, error = 0; |
230 | |
231 | mutex_enter(proc_lock); |
232 | switch (SCARG(uap, which)) { |
233 | case PRIO_PROCESS: |
234 | p = who ? proc_find(who) : curp; |
235 | if (p != NULL) { |
236 | mutex_enter(p->p_lock); |
237 | found++; |
238 | error = donice(l, p, SCARG(uap, prio)); |
239 | mutex_exit(p->p_lock); |
240 | } |
241 | break; |
242 | |
243 | case PRIO_PGRP: { |
244 | struct pgrp *pg; |
245 | |
246 | if (who == 0) |
247 | pg = curp->p_pgrp; |
248 | else if ((pg = pgrp_find(who)) == NULL) |
249 | break; |
250 | LIST_FOREACH(p, &pg->pg_members, p_pglist) { |
251 | mutex_enter(p->p_lock); |
252 | found++; |
253 | error = donice(l, p, SCARG(uap, prio)); |
254 | mutex_exit(p->p_lock); |
255 | if (error) |
256 | break; |
257 | } |
258 | break; |
259 | } |
260 | |
261 | case PRIO_USER: |
262 | if (who == 0) |
263 | who = (int)kauth_cred_geteuid(l->l_cred); |
264 | PROCLIST_FOREACH(p, &allproc) { |
265 | mutex_enter(p->p_lock); |
266 | if (kauth_cred_geteuid(p->p_cred) == |
267 | (uid_t)SCARG(uap, who)) { |
268 | found++; |
269 | error = donice(l, p, SCARG(uap, prio)); |
270 | } |
271 | mutex_exit(p->p_lock); |
272 | if (error) |
273 | break; |
274 | } |
275 | break; |
276 | |
277 | default: |
278 | mutex_exit(proc_lock); |
279 | return EINVAL; |
280 | } |
281 | mutex_exit(proc_lock); |
282 | |
283 | return (found == 0) ? ESRCH : error; |
284 | } |
285 | |
286 | /* |
287 | * Renice a process. |
288 | * |
289 | * Call with the target process' credentials locked. |
290 | */ |
291 | static int |
292 | donice(struct lwp *l, struct proc *chgp, int n) |
293 | { |
294 | kauth_cred_t cred = l->l_cred; |
295 | |
296 | KASSERT(mutex_owned(chgp->p_lock)); |
297 | |
298 | if (kauth_cred_geteuid(cred) && kauth_cred_getuid(cred) && |
299 | kauth_cred_geteuid(cred) != kauth_cred_geteuid(chgp->p_cred) && |
300 | kauth_cred_getuid(cred) != kauth_cred_geteuid(chgp->p_cred)) |
301 | return EPERM; |
302 | |
303 | if (n > PRIO_MAX) { |
304 | n = PRIO_MAX; |
305 | } |
306 | if (n < PRIO_MIN) { |
307 | n = PRIO_MIN; |
308 | } |
309 | n += NZERO; |
310 | |
311 | if (kauth_authorize_process(cred, KAUTH_PROCESS_NICE, chgp, |
312 | KAUTH_ARG(n), NULL, NULL)) { |
313 | return EACCES; |
314 | } |
315 | |
316 | sched_nice(chgp, n); |
317 | return 0; |
318 | } |
319 | |
320 | int |
321 | sys_setrlimit(struct lwp *l, const struct sys_setrlimit_args *uap, |
322 | register_t *retval) |
323 | { |
324 | /* { |
325 | syscallarg(int) which; |
326 | syscallarg(const struct rlimit *) rlp; |
327 | } */ |
328 | int error, which = SCARG(uap, which); |
329 | struct rlimit alim; |
330 | |
331 | error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit)); |
332 | if (error) { |
333 | return error; |
334 | } |
335 | return dosetrlimit(l, l->l_proc, which, &alim); |
336 | } |
337 | |
338 | int |
339 | dosetrlimit(struct lwp *l, struct proc *p, int which, struct rlimit *limp) |
340 | { |
341 | struct rlimit *alimp; |
342 | int error; |
343 | |
344 | if ((u_int)which >= RLIM_NLIMITS) |
345 | return EINVAL; |
346 | |
347 | if (limp->rlim_cur > limp->rlim_max) { |
348 | /* |
349 | * This is programming error. According to SUSv2, we should |
350 | * return error in this case. |
351 | */ |
352 | return EINVAL; |
353 | } |
354 | |
355 | alimp = &p->p_rlimit[which]; |
356 | /* if we don't change the value, no need to limcopy() */ |
357 | if (limp->rlim_cur == alimp->rlim_cur && |
358 | limp->rlim_max == alimp->rlim_max) |
359 | return 0; |
360 | |
361 | error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT, |
362 | p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_SET), limp, KAUTH_ARG(which)); |
363 | if (error) |
364 | return error; |
365 | |
366 | lim_privatise(p); |
367 | /* p->p_limit is now unchangeable */ |
368 | alimp = &p->p_rlimit[which]; |
369 | |
370 | switch (which) { |
371 | |
372 | case RLIMIT_DATA: |
373 | if (limp->rlim_cur > maxdmap) |
374 | limp->rlim_cur = maxdmap; |
375 | if (limp->rlim_max > maxdmap) |
376 | limp->rlim_max = maxdmap; |
377 | break; |
378 | |
379 | case RLIMIT_STACK: |
380 | if (limp->rlim_cur > maxsmap) |
381 | limp->rlim_cur = maxsmap; |
382 | if (limp->rlim_max > maxsmap) |
383 | limp->rlim_max = maxsmap; |
384 | |
385 | /* |
386 | * Return EINVAL if the new stack size limit is lower than |
387 | * current usage. Otherwise, the process would get SIGSEGV the |
388 | * moment it would try to access anything on its current stack. |
389 | * This conforms to SUSv2. |
390 | */ |
391 | if (limp->rlim_cur < p->p_vmspace->vm_ssize * PAGE_SIZE || |
392 | limp->rlim_max < p->p_vmspace->vm_ssize * PAGE_SIZE) { |
393 | return EINVAL; |
394 | } |
395 | |
396 | /* |
397 | * Stack is allocated to the max at exec time with |
398 | * only "rlim_cur" bytes accessible (In other words, |
399 | * allocates stack dividing two contiguous regions at |
400 | * "rlim_cur" bytes boundary). |
401 | * |
402 | * Since allocation is done in terms of page, roundup |
403 | * "rlim_cur" (otherwise, contiguous regions |
404 | * overlap). If stack limit is going up make more |
405 | * accessible, if going down make inaccessible. |
406 | */ |
407 | limp->rlim_max = round_page(limp->rlim_max); |
408 | limp->rlim_cur = round_page(limp->rlim_cur); |
409 | if (limp->rlim_cur != alimp->rlim_cur) { |
410 | vaddr_t addr; |
411 | vsize_t size; |
412 | vm_prot_t prot; |
413 | char *base, *tmp; |
414 | |
415 | base = p->p_vmspace->vm_minsaddr; |
416 | if (limp->rlim_cur > alimp->rlim_cur) { |
417 | prot = VM_PROT_READ | VM_PROT_WRITE; |
418 | size = limp->rlim_cur - alimp->rlim_cur; |
419 | tmp = STACK_GROW(base, alimp->rlim_cur); |
420 | } else { |
421 | prot = VM_PROT_NONE; |
422 | size = alimp->rlim_cur - limp->rlim_cur; |
423 | tmp = STACK_GROW(base, limp->rlim_cur); |
424 | } |
425 | addr = (vaddr_t)STACK_ALLOC(tmp, size); |
426 | (void) uvm_map_protect(&p->p_vmspace->vm_map, |
427 | addr, addr + size, prot, false); |
428 | } |
429 | break; |
430 | |
431 | case RLIMIT_NOFILE: |
432 | if (limp->rlim_cur > maxfiles) |
433 | limp->rlim_cur = maxfiles; |
434 | if (limp->rlim_max > maxfiles) |
435 | limp->rlim_max = maxfiles; |
436 | break; |
437 | |
438 | case RLIMIT_NPROC: |
439 | if (limp->rlim_cur > maxproc) |
440 | limp->rlim_cur = maxproc; |
441 | if (limp->rlim_max > maxproc) |
442 | limp->rlim_max = maxproc; |
443 | break; |
444 | |
445 | case RLIMIT_NTHR: |
446 | if (limp->rlim_cur > maxlwp) |
447 | limp->rlim_cur = maxlwp; |
448 | if (limp->rlim_max > maxlwp) |
449 | limp->rlim_max = maxlwp; |
450 | break; |
451 | } |
452 | |
453 | mutex_enter(&p->p_limit->pl_lock); |
454 | *alimp = *limp; |
455 | mutex_exit(&p->p_limit->pl_lock); |
456 | return 0; |
457 | } |
458 | |
459 | int |
460 | sys_getrlimit(struct lwp *l, const struct sys_getrlimit_args *uap, |
461 | register_t *retval) |
462 | { |
463 | /* { |
464 | syscallarg(int) which; |
465 | syscallarg(struct rlimit *) rlp; |
466 | } */ |
467 | struct proc *p = l->l_proc; |
468 | int which = SCARG(uap, which); |
469 | struct rlimit rl; |
470 | |
471 | if ((u_int)which >= RLIM_NLIMITS) |
472 | return EINVAL; |
473 | |
474 | mutex_enter(p->p_lock); |
475 | memcpy(&rl, &p->p_rlimit[which], sizeof(rl)); |
476 | mutex_exit(p->p_lock); |
477 | |
478 | return copyout(&rl, SCARG(uap, rlp), sizeof(rl)); |
479 | } |
480 | |
481 | /* |
482 | * Transform the running time and tick information in proc p into user, |
483 | * system, and interrupt time usage. |
484 | * |
485 | * Should be called with p->p_lock held unless called from exit1(). |
486 | */ |
487 | void |
488 | calcru(struct proc *p, struct timeval *up, struct timeval *sp, |
489 | struct timeval *ip, struct timeval *rp) |
490 | { |
491 | uint64_t u, st, ut, it, tot; |
492 | struct lwp *l; |
493 | struct bintime tm; |
494 | struct timeval tv; |
495 | |
496 | KASSERT(p->p_stat == SDEAD || mutex_owned(p->p_lock)); |
497 | |
498 | mutex_spin_enter(&p->p_stmutex); |
499 | st = p->p_sticks; |
500 | ut = p->p_uticks; |
501 | it = p->p_iticks; |
502 | mutex_spin_exit(&p->p_stmutex); |
503 | |
504 | tm = p->p_rtime; |
505 | |
506 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { |
507 | lwp_lock(l); |
508 | bintime_add(&tm, &l->l_rtime); |
509 | if ((l->l_pflag & LP_RUNNING) != 0) { |
510 | struct bintime diff; |
511 | /* |
512 | * Adjust for the current time slice. This is |
513 | * actually fairly important since the error |
514 | * here is on the order of a time quantum, |
515 | * which is much greater than the sampling |
516 | * error. |
517 | */ |
518 | binuptime(&diff); |
519 | bintime_sub(&diff, &l->l_stime); |
520 | bintime_add(&tm, &diff); |
521 | } |
522 | lwp_unlock(l); |
523 | } |
524 | |
525 | tot = st + ut + it; |
526 | bintime2timeval(&tm, &tv); |
527 | u = (uint64_t)tv.tv_sec * 1000000ul + tv.tv_usec; |
528 | |
529 | if (tot == 0) { |
530 | /* No ticks, so can't use to share time out, split 50-50 */ |
531 | st = ut = u / 2; |
532 | } else { |
533 | st = (u * st) / tot; |
534 | ut = (u * ut) / tot; |
535 | } |
536 | if (sp != NULL) { |
537 | sp->tv_sec = st / 1000000; |
538 | sp->tv_usec = st % 1000000; |
539 | } |
540 | if (up != NULL) { |
541 | up->tv_sec = ut / 1000000; |
542 | up->tv_usec = ut % 1000000; |
543 | } |
544 | if (ip != NULL) { |
545 | if (it != 0) |
546 | it = (u * it) / tot; |
547 | ip->tv_sec = it / 1000000; |
548 | ip->tv_usec = it % 1000000; |
549 | } |
550 | if (rp != NULL) { |
551 | *rp = tv; |
552 | } |
553 | } |
554 | |
555 | int |
556 | sys___getrusage50(struct lwp *l, const struct sys___getrusage50_args *uap, |
557 | register_t *retval) |
558 | { |
559 | /* { |
560 | syscallarg(int) who; |
561 | syscallarg(struct rusage *) rusage; |
562 | } */ |
563 | int error; |
564 | struct rusage ru; |
565 | struct proc *p = l->l_proc; |
566 | |
567 | error = getrusage1(p, SCARG(uap, who), &ru); |
568 | if (error != 0) |
569 | return error; |
570 | |
571 | return copyout(&ru, SCARG(uap, rusage), sizeof(ru)); |
572 | } |
573 | |
574 | int |
575 | getrusage1(struct proc *p, int who, struct rusage *ru) { |
576 | |
577 | switch (who) { |
578 | case RUSAGE_SELF: |
579 | mutex_enter(p->p_lock); |
580 | memcpy(ru, &p->p_stats->p_ru, sizeof(*ru)); |
581 | calcru(p, &ru->ru_utime, &ru->ru_stime, NULL, NULL); |
582 | rulwps(p, ru); |
583 | mutex_exit(p->p_lock); |
584 | break; |
585 | case RUSAGE_CHILDREN: |
586 | mutex_enter(p->p_lock); |
587 | memcpy(ru, &p->p_stats->p_cru, sizeof(*ru)); |
588 | mutex_exit(p->p_lock); |
589 | break; |
590 | default: |
591 | return EINVAL; |
592 | } |
593 | |
594 | return 0; |
595 | } |
596 | |
597 | void |
598 | ruadd(struct rusage *ru, struct rusage *ru2) |
599 | { |
600 | long *ip, *ip2; |
601 | int i; |
602 | |
603 | timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime); |
604 | timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime); |
605 | if (ru->ru_maxrss < ru2->ru_maxrss) |
606 | ru->ru_maxrss = ru2->ru_maxrss; |
607 | ip = &ru->ru_first; ip2 = &ru2->ru_first; |
608 | for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) |
609 | *ip++ += *ip2++; |
610 | } |
611 | |
612 | void |
613 | rulwps(proc_t *p, struct rusage *ru) |
614 | { |
615 | lwp_t *l; |
616 | |
617 | KASSERT(mutex_owned(p->p_lock)); |
618 | |
619 | LIST_FOREACH(l, &p->p_lwps, l_sibling) { |
620 | ruadd(ru, &l->l_ru); |
621 | ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); |
622 | ru->ru_nivcsw += l->l_nivcsw; |
623 | } |
624 | } |
625 | |
626 | /* |
627 | * lim_copy: make a copy of the plimit structure. |
628 | * |
629 | * We use copy-on-write after fork, and copy when a limit is changed. |
630 | */ |
631 | struct plimit * |
632 | lim_copy(struct plimit *lim) |
633 | { |
634 | struct plimit *newlim; |
635 | char *corename; |
636 | size_t alen, len; |
637 | |
638 | newlim = pool_cache_get(plimit_cache, PR_WAITOK); |
639 | mutex_init(&newlim->pl_lock, MUTEX_DEFAULT, IPL_NONE); |
640 | newlim->pl_writeable = false; |
641 | newlim->pl_refcnt = 1; |
642 | newlim->pl_sv_limit = NULL; |
643 | |
644 | mutex_enter(&lim->pl_lock); |
645 | memcpy(newlim->pl_rlimit, lim->pl_rlimit, |
646 | sizeof(struct rlimit) * RLIM_NLIMITS); |
647 | |
648 | /* |
649 | * Note: the common case is a use of default core name. |
650 | */ |
651 | alen = 0; |
652 | corename = NULL; |
653 | for (;;) { |
654 | if (lim->pl_corename == defcorename) { |
655 | newlim->pl_corename = defcorename; |
656 | newlim->pl_cnlen = 0; |
657 | break; |
658 | } |
659 | len = lim->pl_cnlen; |
660 | if (len == alen) { |
661 | newlim->pl_corename = corename; |
662 | newlim->pl_cnlen = len; |
663 | memcpy(corename, lim->pl_corename, len); |
664 | corename = NULL; |
665 | break; |
666 | } |
667 | mutex_exit(&lim->pl_lock); |
668 | if (corename) { |
669 | kmem_free(corename, alen); |
670 | } |
671 | alen = len; |
672 | corename = kmem_alloc(alen, KM_SLEEP); |
673 | mutex_enter(&lim->pl_lock); |
674 | } |
675 | mutex_exit(&lim->pl_lock); |
676 | |
677 | if (corename) { |
678 | kmem_free(corename, alen); |
679 | } |
680 | return newlim; |
681 | } |
682 | |
683 | void |
684 | lim_addref(struct plimit *lim) |
685 | { |
686 | atomic_inc_uint(&lim->pl_refcnt); |
687 | } |
688 | |
689 | /* |
690 | * lim_privatise: give a process its own private plimit structure. |
691 | */ |
692 | void |
693 | lim_privatise(proc_t *p) |
694 | { |
695 | struct plimit *lim = p->p_limit, *newlim; |
696 | |
697 | if (lim->pl_writeable) { |
698 | return; |
699 | } |
700 | |
701 | newlim = lim_copy(lim); |
702 | |
703 | mutex_enter(p->p_lock); |
704 | if (p->p_limit->pl_writeable) { |
705 | /* Other thread won the race. */ |
706 | mutex_exit(p->p_lock); |
707 | lim_free(newlim); |
708 | return; |
709 | } |
710 | |
711 | /* |
712 | * Since p->p_limit can be accessed without locked held, |
713 | * old limit structure must not be deleted yet. |
714 | */ |
715 | newlim->pl_sv_limit = p->p_limit; |
716 | newlim->pl_writeable = true; |
717 | p->p_limit = newlim; |
718 | mutex_exit(p->p_lock); |
719 | } |
720 | |
721 | void |
722 | lim_setcorename(proc_t *p, char *name, size_t len) |
723 | { |
724 | struct plimit *lim; |
725 | char *oname; |
726 | size_t olen; |
727 | |
728 | lim_privatise(p); |
729 | lim = p->p_limit; |
730 | |
731 | mutex_enter(&lim->pl_lock); |
732 | oname = lim->pl_corename; |
733 | olen = lim->pl_cnlen; |
734 | lim->pl_corename = name; |
735 | lim->pl_cnlen = len; |
736 | mutex_exit(&lim->pl_lock); |
737 | |
738 | if (oname != defcorename) { |
739 | kmem_free(oname, olen); |
740 | } |
741 | } |
742 | |
743 | void |
744 | lim_free(struct plimit *lim) |
745 | { |
746 | struct plimit *sv_lim; |
747 | |
748 | do { |
749 | if (atomic_dec_uint_nv(&lim->pl_refcnt) > 0) { |
750 | return; |
751 | } |
752 | if (lim->pl_corename != defcorename) { |
753 | kmem_free(lim->pl_corename, lim->pl_cnlen); |
754 | } |
755 | sv_lim = lim->pl_sv_limit; |
756 | mutex_destroy(&lim->pl_lock); |
757 | pool_cache_put(plimit_cache, lim); |
758 | } while ((lim = sv_lim) != NULL); |
759 | } |
760 | |
761 | struct pstats * |
762 | pstatscopy(struct pstats *ps) |
763 | { |
764 | struct pstats *nps; |
765 | size_t len; |
766 | |
767 | nps = pool_cache_get(pstats_cache, PR_WAITOK); |
768 | |
769 | len = (char *)&nps->pstat_endzero - (char *)&nps->pstat_startzero; |
770 | memset(&nps->pstat_startzero, 0, len); |
771 | |
772 | len = (char *)&nps->pstat_endcopy - (char *)&nps->pstat_startcopy; |
773 | memcpy(&nps->pstat_startcopy, &ps->pstat_startcopy, len); |
774 | |
775 | return nps; |
776 | } |
777 | |
778 | void |
779 | pstatsfree(struct pstats *ps) |
780 | { |
781 | |
782 | pool_cache_put(pstats_cache, ps); |
783 | } |
784 | |
785 | /* |
786 | * sysctl_proc_findproc: a routine for sysctl proc subtree helpers that |
787 | * need to pick a valid process by PID. |
788 | * |
789 | * => Hold a reference on the process, on success. |
790 | */ |
791 | static int |
792 | sysctl_proc_findproc(lwp_t *l, pid_t pid, proc_t **p2) |
793 | { |
794 | proc_t *p; |
795 | int error; |
796 | |
797 | if (pid == PROC_CURPROC) { |
798 | p = l->l_proc; |
799 | } else { |
800 | mutex_enter(proc_lock); |
801 | p = proc_find(pid); |
802 | if (p == NULL) { |
803 | mutex_exit(proc_lock); |
804 | return ESRCH; |
805 | } |
806 | } |
807 | error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY; |
808 | if (pid != PROC_CURPROC) { |
809 | mutex_exit(proc_lock); |
810 | } |
811 | *p2 = p; |
812 | return error; |
813 | } |
814 | |
815 | /* |
816 | * sysctl_proc_corename: helper routine to get or set the core file name |
817 | * for a process specified by PID. |
818 | */ |
819 | static int |
820 | sysctl_proc_corename(SYSCTLFN_ARGS) |
821 | { |
822 | struct proc *p; |
823 | struct plimit *lim; |
824 | char *cnbuf, *cname; |
825 | struct sysctlnode node; |
826 | size_t len; |
827 | int error; |
828 | |
829 | /* First, validate the request. */ |
830 | if (namelen != 0 || name[-1] != PROC_PID_CORENAME) |
831 | return EINVAL; |
832 | |
833 | /* Find the process. Hold a reference (p_reflock), if found. */ |
834 | error = sysctl_proc_findproc(l, (pid_t)name[-2], &p); |
835 | if (error) |
836 | return error; |
837 | |
838 | /* XXX-elad */ |
839 | error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, p, |
840 | KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); |
841 | if (error) { |
842 | rw_exit(&p->p_reflock); |
843 | return error; |
844 | } |
845 | |
846 | cnbuf = PNBUF_GET(); |
847 | |
848 | if (oldp) { |
849 | /* Get case: copy the core name into the buffer. */ |
850 | error = kauth_authorize_process(l->l_cred, |
851 | KAUTH_PROCESS_CORENAME, p, |
852 | KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_GET), NULL, NULL); |
853 | if (error) { |
854 | goto done; |
855 | } |
856 | lim = p->p_limit; |
857 | mutex_enter(&lim->pl_lock); |
858 | strlcpy(cnbuf, lim->pl_corename, MAXPATHLEN); |
859 | mutex_exit(&lim->pl_lock); |
860 | } |
861 | |
862 | node = *rnode; |
863 | node.sysctl_data = cnbuf; |
864 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); |
865 | |
866 | /* Return if error, or if caller is only getting the core name. */ |
867 | if (error || newp == NULL) { |
868 | goto done; |
869 | } |
870 | |
871 | /* |
872 | * Set case. Check permission and then validate new core name. |
873 | * It must be either "core", "/core", or end in ".core". |
874 | */ |
875 | error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CORENAME, |
876 | p, KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_SET), cnbuf, NULL); |
877 | if (error) { |
878 | goto done; |
879 | } |
880 | len = strlen(cnbuf); |
881 | if ((len < 4 || strcmp(cnbuf + len - 4, "core" ) != 0) || |
882 | (len > 4 && cnbuf[len - 5] != '/' && cnbuf[len - 5] != '.')) { |
883 | error = EINVAL; |
884 | goto done; |
885 | } |
886 | |
887 | /* Allocate, copy and set the new core name for plimit structure. */ |
888 | cname = kmem_alloc(++len, KM_NOSLEEP); |
889 | if (cname == NULL) { |
890 | error = ENOMEM; |
891 | goto done; |
892 | } |
893 | memcpy(cname, cnbuf, len); |
894 | lim_setcorename(p, cname, len); |
895 | done: |
896 | rw_exit(&p->p_reflock); |
897 | PNBUF_PUT(cnbuf); |
898 | return error; |
899 | } |
900 | |
901 | /* |
902 | * sysctl_proc_stop: helper routine for checking/setting the stop flags. |
903 | */ |
904 | static int |
905 | sysctl_proc_stop(SYSCTLFN_ARGS) |
906 | { |
907 | struct proc *p; |
908 | int isset, flag, error = 0; |
909 | struct sysctlnode node; |
910 | |
911 | if (namelen != 0) |
912 | return EINVAL; |
913 | |
914 | /* Find the process. Hold a reference (p_reflock), if found. */ |
915 | error = sysctl_proc_findproc(l, (pid_t)name[-2], &p); |
916 | if (error) |
917 | return error; |
918 | |
919 | /* XXX-elad */ |
920 | error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, p, |
921 | KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); |
922 | if (error) { |
923 | goto out; |
924 | } |
925 | |
926 | /* Determine the flag. */ |
927 | switch (rnode->sysctl_num) { |
928 | case PROC_PID_STOPFORK: |
929 | flag = PS_STOPFORK; |
930 | break; |
931 | case PROC_PID_STOPEXEC: |
932 | flag = PS_STOPEXEC; |
933 | break; |
934 | case PROC_PID_STOPEXIT: |
935 | flag = PS_STOPEXIT; |
936 | break; |
937 | default: |
938 | error = EINVAL; |
939 | goto out; |
940 | } |
941 | isset = (p->p_flag & flag) ? 1 : 0; |
942 | node = *rnode; |
943 | node.sysctl_data = &isset; |
944 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); |
945 | |
946 | /* Return if error, or if callers is only getting the flag. */ |
947 | if (error || newp == NULL) { |
948 | goto out; |
949 | } |
950 | |
951 | /* Check if caller can set the flags. */ |
952 | error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_STOPFLAG, |
953 | p, KAUTH_ARG(flag), NULL, NULL); |
954 | if (error) { |
955 | goto out; |
956 | } |
957 | mutex_enter(p->p_lock); |
958 | if (isset) { |
959 | p->p_sflag |= flag; |
960 | } else { |
961 | p->p_sflag &= ~flag; |
962 | } |
963 | mutex_exit(p->p_lock); |
964 | out: |
965 | rw_exit(&p->p_reflock); |
966 | return error; |
967 | } |
968 | |
969 | /* |
970 | * sysctl_proc_plimit: helper routine to get/set rlimits of a process. |
971 | */ |
972 | static int |
973 | sysctl_proc_plimit(SYSCTLFN_ARGS) |
974 | { |
975 | struct proc *p; |
976 | u_int limitno; |
977 | int which, error = 0; |
978 | struct rlimit alim; |
979 | struct sysctlnode node; |
980 | |
981 | if (namelen != 0) |
982 | return EINVAL; |
983 | |
984 | which = name[-1]; |
985 | if (which != PROC_PID_LIMIT_TYPE_SOFT && |
986 | which != PROC_PID_LIMIT_TYPE_HARD) |
987 | return EINVAL; |
988 | |
989 | limitno = name[-2] - 1; |
990 | if (limitno >= RLIM_NLIMITS) |
991 | return EINVAL; |
992 | |
993 | if (name[-3] != PROC_PID_LIMIT) |
994 | return EINVAL; |
995 | |
996 | /* Find the process. Hold a reference (p_reflock), if found. */ |
997 | error = sysctl_proc_findproc(l, (pid_t)name[-4], &p); |
998 | if (error) |
999 | return error; |
1000 | |
1001 | /* XXX-elad */ |
1002 | error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, p, |
1003 | KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); |
1004 | if (error) |
1005 | goto out; |
1006 | |
1007 | /* Check if caller can retrieve the limits. */ |
1008 | if (newp == NULL) { |
1009 | error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT, |
1010 | p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_GET), &alim, |
1011 | KAUTH_ARG(which)); |
1012 | if (error) |
1013 | goto out; |
1014 | } |
1015 | |
1016 | /* Retrieve the limits. */ |
1017 | node = *rnode; |
1018 | memcpy(&alim, &p->p_rlimit[limitno], sizeof(alim)); |
1019 | if (which == PROC_PID_LIMIT_TYPE_HARD) { |
1020 | node.sysctl_data = &alim.rlim_max; |
1021 | } else { |
1022 | node.sysctl_data = &alim.rlim_cur; |
1023 | } |
1024 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); |
1025 | |
1026 | /* Return if error, or if we are only retrieving the limits. */ |
1027 | if (error || newp == NULL) { |
1028 | goto out; |
1029 | } |
1030 | error = dosetrlimit(l, p, limitno, &alim); |
1031 | out: |
1032 | rw_exit(&p->p_reflock); |
1033 | return error; |
1034 | } |
1035 | |
1036 | /* |
1037 | * Setup sysctl nodes. |
1038 | */ |
1039 | static void |
1040 | sysctl_proc_setup(void) |
1041 | { |
1042 | |
1043 | sysctl_createv(&proc_sysctllog, 0, NULL, NULL, |
1044 | CTLFLAG_PERMANENT|CTLFLAG_ANYNUMBER, |
1045 | CTLTYPE_NODE, "curproc" , |
1046 | SYSCTL_DESCR("Per-process settings" ), |
1047 | NULL, 0, NULL, 0, |
1048 | CTL_PROC, PROC_CURPROC, CTL_EOL); |
1049 | |
1050 | sysctl_createv(&proc_sysctllog, 0, NULL, NULL, |
1051 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, |
1052 | CTLTYPE_STRING, "corename" , |
1053 | SYSCTL_DESCR("Core file name" ), |
1054 | sysctl_proc_corename, 0, NULL, MAXPATHLEN, |
1055 | CTL_PROC, PROC_CURPROC, PROC_PID_CORENAME, CTL_EOL); |
1056 | sysctl_createv(&proc_sysctllog, 0, NULL, NULL, |
1057 | CTLFLAG_PERMANENT, |
1058 | CTLTYPE_NODE, "rlimit" , |
1059 | SYSCTL_DESCR("Process limits" ), |
1060 | NULL, 0, NULL, 0, |
1061 | CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, CTL_EOL); |
1062 | |
1063 | #define create_proc_plimit(s, n) do { \ |
1064 | sysctl_createv(&proc_sysctllog, 0, NULL, NULL, \ |
1065 | CTLFLAG_PERMANENT, \ |
1066 | CTLTYPE_NODE, s, \ |
1067 | SYSCTL_DESCR("Process " s " limits"), \ |
1068 | NULL, 0, NULL, 0, \ |
1069 | CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \ |
1070 | CTL_EOL); \ |
1071 | sysctl_createv(&proc_sysctllog, 0, NULL, NULL, \ |
1072 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \ |
1073 | CTLTYPE_QUAD, "soft", \ |
1074 | SYSCTL_DESCR("Process soft " s " limit"), \ |
1075 | sysctl_proc_plimit, 0, NULL, 0, \ |
1076 | CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \ |
1077 | PROC_PID_LIMIT_TYPE_SOFT, CTL_EOL); \ |
1078 | sysctl_createv(&proc_sysctllog, 0, NULL, NULL, \ |
1079 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \ |
1080 | CTLTYPE_QUAD, "hard", \ |
1081 | SYSCTL_DESCR("Process hard " s " limit"), \ |
1082 | sysctl_proc_plimit, 0, NULL, 0, \ |
1083 | CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \ |
1084 | PROC_PID_LIMIT_TYPE_HARD, CTL_EOL); \ |
1085 | } while (0/*CONSTCOND*/) |
1086 | |
1087 | create_proc_plimit("cputime" , PROC_PID_LIMIT_CPU); |
1088 | create_proc_plimit("filesize" , PROC_PID_LIMIT_FSIZE); |
1089 | create_proc_plimit("datasize" , PROC_PID_LIMIT_DATA); |
1090 | create_proc_plimit("stacksize" , PROC_PID_LIMIT_STACK); |
1091 | create_proc_plimit("coredumpsize" , PROC_PID_LIMIT_CORE); |
1092 | create_proc_plimit("memoryuse" , PROC_PID_LIMIT_RSS); |
1093 | create_proc_plimit("memorylocked" , PROC_PID_LIMIT_MEMLOCK); |
1094 | create_proc_plimit("maxproc" , PROC_PID_LIMIT_NPROC); |
1095 | create_proc_plimit("descriptors" , PROC_PID_LIMIT_NOFILE); |
1096 | create_proc_plimit("sbsize" , PROC_PID_LIMIT_SBSIZE); |
1097 | create_proc_plimit("vmemoryuse" , PROC_PID_LIMIT_AS); |
1098 | create_proc_plimit("maxlwp" , PROC_PID_LIMIT_NTHR); |
1099 | |
1100 | #undef create_proc_plimit |
1101 | |
1102 | sysctl_createv(&proc_sysctllog, 0, NULL, NULL, |
1103 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, |
1104 | CTLTYPE_INT, "stopfork" , |
1105 | SYSCTL_DESCR("Stop process at fork(2)" ), |
1106 | sysctl_proc_stop, 0, NULL, 0, |
1107 | CTL_PROC, PROC_CURPROC, PROC_PID_STOPFORK, CTL_EOL); |
1108 | sysctl_createv(&proc_sysctllog, 0, NULL, NULL, |
1109 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, |
1110 | CTLTYPE_INT, "stopexec" , |
1111 | SYSCTL_DESCR("Stop process at execve(2)" ), |
1112 | sysctl_proc_stop, 0, NULL, 0, |
1113 | CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXEC, CTL_EOL); |
1114 | sysctl_createv(&proc_sysctllog, 0, NULL, NULL, |
1115 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, |
1116 | CTLTYPE_INT, "stopexit" , |
1117 | SYSCTL_DESCR("Stop process before completing exit" ), |
1118 | sysctl_proc_stop, 0, NULL, 0, |
1119 | CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXIT, CTL_EOL); |
1120 | } |
1121 | |