kern_proc.c source code [src/src/sys/kern/kern_proc.c]

1	/ $NetBSD: kern_proc.c,v 1.199 2016/11/14 08:55:51 kre Exp $ /
2
3	/-*
4	* Copyright (c) 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5	* All rights reserved.
6	*
7	* This code is derived from software contributed to The NetBSD Foundation
8	* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9	* NASA Ames Research Center, and by Andrew Doran.
10	*
11	* Redistribution and use in source and binary forms, with or without
12	* modification, are permitted provided that the following conditions
13	* are met:
14	* 1. Redistributions of source code must retain the above copyright
15	* notice, this list of conditions and the following disclaimer.
16	* 2. Redistributions in binary form must reproduce the above copyright
17	* notice, this list of conditions and the following disclaimer in the
18	* documentation and/or other materials provided with the distribution.
19	*
20	* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22	* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30	* POSSIBILITY OF SUCH DAMAGE.
31	*/
32
33	/*
34	* Copyright (c) 1982, 1986, 1989, 1991, 1993
35	* The Regents of the University of California. All rights reserved.
36	*
37	* Redistribution and use in source and binary forms, with or without
38	* modification, are permitted provided that the following conditions
39	* are met:
40	* 1. Redistributions of source code must retain the above copyright
41	* notice, this list of conditions and the following disclaimer.
42	* 2. Redistributions in binary form must reproduce the above copyright
43	* notice, this list of conditions and the following disclaimer in the
44	* documentation and/or other materials provided with the distribution.
45	* 3. Neither the name of the University nor the names of its contributors
46	* may be used to endorse or promote products derived from this software
47	* without specific prior written permission.
48	*
49	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59	* SUCH DAMAGE.
60	*
61	* @(#)kern_proc.c 8.7 (Berkeley) 2/14/95
62	*/
63
64	#include <sys/cdefs.h>
65	__KERNEL_RCSID(`0`, "$NetBSD: kern_proc.c,v 1.199 2016/11/14 08:55:51 kre Exp $");
66
67	#ifdef _KERNEL_OPT
68	#include "opt_kstack.h"
69	#include "opt_maxuprc.h"
70	#include "opt_dtrace.h"
71	#include "opt_compat_netbsd32.h"
72	#endif
73
74	#include <sys/param.h>
75	#include <sys/systm.h>
76	#include <sys/kernel.h>
77	#include <sys/proc.h>
78	#include <sys/resourcevar.h>
79	#include <sys/buf.h>
80	#include <sys/acct.h>
81	#include <sys/wait.h>
82	#include <sys/file.h>
83	#include <ufs/ufs/quota.h>
84	#include <sys/uio.h>
85	#include <sys/pool.h>
86	#include <sys/pset.h>
87	#include <sys/mbuf.h>
88	#include <sys/ioctl.h>
89	#include <sys/tty.h>
90	#include <sys/signalvar.h>
91	#include <sys/ras.h>
92	#include <sys/filedesc.h>
93	#include <sys/syscall_stats.h>
94	#include <sys/kauth.h>
95	#include <sys/sleepq.h>
96	#include <sys/atomic.h>
97	#include <sys/kmem.h>
98	#include <sys/namei.h>
99	#include <sys/dtrace_bsd.h>
100	#include <sys/sysctl.h>
101	#include <sys/exec.h>
102	#include <sys/cpu.h>
103
104	#include <uvm/uvm_extern.h>
105	#include <uvm/uvm.h>
106
107	#ifdef COMPAT_NETBSD32
108	#include <compat/netbsd32/netbsd32.h>
109	#endif
110
111	/*
112	* Process lists.
113	*/
114
115	struct proclist allproc __cacheline_aligned;
116	struct proclist zombproc __cacheline_aligned;
117
118	kmutex_t * proc_lock __cacheline_aligned;
119
120	/*
121	* pid to proc lookup is done by indexing the pid_table array.
122	* Since pid numbers are only allocated when an empty slot
123	* has been found, there is no need to search any lists ever.
124	* (an orphaned pgrp will lock the slot, a session will lock
125	* the pgrp with the same number.)
126	* If the table is too small it is reallocated with twice the
127	* previous size and the entries 'unzipped' into the two halves.
128	* A linked list of free entries is passed through the pt_proc
129	* field of 'free' items - set odd to be an invalid ptr.
130	*/
131
132	struct pid_table {
133	struct proc *pt_proc;
134	struct pgrp *pt_pgrp;
135	pid_t pt_pid;
136	};
137	#if 1 /* strongly typed cast - should be a noop */
138	static inline uint p2u(struct proc p) { return* (uint)(uintptr_t)p; }
139	#else
140	#define p2u(p) ((uint)p)
141	#endif
142	#define P_VALID(p) (!(p2u(p) & 1))
143	#define P_NEXT(p) (p2u(p) >> 1)
144	#define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 \| 1))
145
146	/*
147	* Table of process IDs (PIDs).
148	*/
149	static struct pid_table *pid_table __read_mostly;
150
151	#define INITIAL_PID_TABLE_SIZE (1 << 5)
152
153	/ Table mask, threshold for growing and number of allocated PIDs. /
154	static u_int pid_tbl_mask __read_mostly;
155	static u_int pid_alloc_lim __read_mostly;
156	static u_int pid_alloc_cnt __cacheline_aligned;
157
158	/ Next free, last free and maximum PIDs. /
159	static u_int next_free_pt __cacheline_aligned;
160	static u_int last_free_pt __cacheline_aligned;
161	static pid_t pid_max __read_mostly;
162
163	/ Components of the first process -- never freed. /
164
165	extern struct emul emul_netbsd; / defined in kern_exec.c /
166
167	struct session session0 = {
168	.s_count = `1`,
169	.s_sid = `0`,
170	};
171	struct pgrp pgrp0 = {
172	.pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members),
173	.pg_session = &session0,
174	};
175	filedesc_t filedesc0;
176	struct cwdinfo cwdi0 = {
177	.cwdi_cmask = CMASK,
178	.cwdi_refcnt = `1`,
179	};
180	struct plimit limit0;
181	struct pstats pstat0;
182	struct vmspace vmspace0;
183	struct sigacts sigacts0;
184	struct proc proc0 = {
185	.p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps),
186	.p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
187	.p_nlwps = `1`,
188	.p_nrlwps = `1`,
189	.p_nlwpid = `1`, / must match lwp0.l_lid /
190	.p_pgrp = &pgrp0,
191	.p_comm = "system",
192	/*
193	* Set P_NOCLDWAIT so that kernel threads are reparented to init(8)
194	* when they exit. init(8) can easily wait them out for us.
195	*/
196	.p_flag = PK_SYSTEM \| PK_NOCLDWAIT,
197	.p_stat = SACTIVE,
198	.p_nice = NZERO,
199	.p_emul = &emul_netbsd,
200	.p_cwdi = &cwdi0,
201	.p_limit = &limit0,
202	.p_fd = &filedesc0,
203	.p_vmspace = &vmspace0,
204	.p_stats = &pstat0,
205	.p_sigacts = &sigacts0,
206	#ifdef PROC0_MD_INITIALIZERS
207	PROC0_MD_INITIALIZERS
208	#endif
209	};
210	kauth_cred_t cred0;
211
212	static const int nofile = NOFILE;
213	static const int maxuprc = MAXUPRC;
214
215	static int sysctl_doeproc(SYSCTLFN_PROTO);
216	static int sysctl_kern_proc_args(SYSCTLFN_PROTO);
217
218	/*
219	* The process list descriptors, used during pid allocation and
220	* by sysctl. No locking on this data structure is needed since
221	* it is completely static.
222	*/
223	const struct proclist_desc proclists[] = {
224	{ &allproc },
225	{ &zombproc },
226	{ NULL },
227	};
228
229	static struct pgrp * pg_remove(pid_t);
230	static void pg_delete(pid_t);
231	static void orphanpg(struct pgrp *);
232
233	static specificdata_domain_t proc_specificdata_domain;
234
235	static pool_cache_t proc_cache;
236
237	static kauth_listener_t proc_listener;
238
239	static int fill_pathname(struct lwp , pid_t, void* , size_t );
240
241	static int
242	proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
243	void arg0, void* arg1, void* arg2, void* *arg3)
244	{
245	struct proc *p;
246	int result;
247
248	result = KAUTH_RESULT_DEFER;
249	p = arg0;
250
251	switch (action) {
252	case KAUTH_PROCESS_CANSEE: {
253	enum kauth_process_req req;
254
255	req = (enum kauth_process_req)arg1;
256
257	switch (req) {
258	case KAUTH_REQ_PROCESS_CANSEE_ARGS:
259	case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
260	case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
261	result = KAUTH_RESULT_ALLOW;
262
263	break;
264
265	case KAUTH_REQ_PROCESS_CANSEE_ENV:
266	if (kauth_cred_getuid(cred) !=
267	kauth_cred_getuid(p->p_cred) \|\|
268	kauth_cred_getuid(cred) !=
269	kauth_cred_getsvuid(p->p_cred))
270	break;
271
272	result = KAUTH_RESULT_ALLOW;
273
274	break;
275
276	default:
277	break;
278	}
279
280	break;
281	}
282
283	case KAUTH_PROCESS_FORK: {
284	int lnprocs = (int)(unsigned long)arg2;
285
286	/*
287	* Don't allow a nonprivileged user to use the last few
288	* processes. The variable lnprocs is the current number of
289	* processes, maxproc is the limit.
290	*/
291	if (__predict_false((lnprocs >= maxproc - `5`)))
292	break;
293
294	result = KAUTH_RESULT_ALLOW;
295
296	break;
297	}
298
299	case KAUTH_PROCESS_CORENAME:
300	case KAUTH_PROCESS_STOPFLAG:
301	if (proc_uidmatch(cred, p->p_cred) == `0`)
302	result = KAUTH_RESULT_ALLOW;
303
304	break;
305
306	default:
307	break;
308	}
309
310	return result;
311	}
312
313	/*
314	* Initialize global process hashing structures.
315	*/
316	void
317	procinit(void)
318	{
319	const struct proclist_desc *pd;
320	u_int i;
321	#define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
322
323	for (pd = proclists; pd->pd_list != NULL; pd++)
324	LIST_INIT(pd->pd_list);
325
326	proc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
327	pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
328	* sizeof(struct pid_table), KM_SLEEP);
329	pid_tbl_mask = INITIAL_PID_TABLE_SIZE - `1`;
330	pid_max = PID_MAX;
331
332	/ Set free list running through table...*
333	Preset 'use count' above PID_MAX so we allocate pid 1 next. /*
334	for (i = `0`; i <= pid_tbl_mask; i++) {
335	pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + `1`);
336	pid_table[i].pt_pgrp = `0`;
337	pid_table[i].pt_pid = `0`;
338	}
339	/ slot 0 is just grabbed /
340	next_free_pt = `1`;
341	/ Need to fix last entry. /
342	last_free_pt = pid_tbl_mask;
343	pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
344	/ point at which we grow table - to avoid reusing pids too often /
345	pid_alloc_lim = pid_tbl_mask - `1`;
346	#undef LINK_EMPTY
347
348	proc_specificdata_domain = specificdata_domain_create();
349	KASSERT(proc_specificdata_domain != NULL);
350
351	proc_cache = pool_cache_init(sizeof(struct proc), `0`, `0`, `0`,
352	"procpl", NULL, IPL_NONE, NULL, NULL, NULL);
353
354	proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
355	proc_listener_cb, NULL);
356	}
357
358	void
359	procinit_sysctl(void)
360	{
361	static struct sysctllog *clog;
362
363	sysctl_createv(&clog, `0`, NULL, NULL,
364	CTLFLAG_PERMANENT,
365	CTLTYPE_NODE, "proc",
366	SYSCTL_DESCR("System-wide process information"),
367	sysctl_doeproc, `0`, NULL, `0`,
368	CTL_KERN, KERN_PROC, CTL_EOL);
369	sysctl_createv(&clog, `0`, NULL, NULL,
370	CTLFLAG_PERMANENT,
371	CTLTYPE_NODE, "proc2",
372	SYSCTL_DESCR("Machine-independent process information"),
373	sysctl_doeproc, `0`, NULL, `0`,
374	CTL_KERN, KERN_PROC2, CTL_EOL);
375	sysctl_createv(&clog, `0`, NULL, NULL,
376	CTLFLAG_PERMANENT,
377	CTLTYPE_NODE, "proc_args",
378	SYSCTL_DESCR("Process argument information"),
379	sysctl_kern_proc_args, `0`, NULL, `0`,
380	CTL_KERN, KERN_PROC_ARGS, CTL_EOL);
381
382	/*
383	"nodes" under these:
384
385	KERN_PROC_ALL
386	KERN_PROC_PID pid
387	KERN_PROC_PGRP pgrp
388	KERN_PROC_SESSION sess
389	KERN_PROC_TTY tty
390	KERN_PROC_UID uid
391	KERN_PROC_RUID uid
392	KERN_PROC_GID gid
393	KERN_PROC_RGID gid
394
395	all in all, probably not worth the effort...
396	*/
397	}
398
399	/*
400	* Initialize process 0.
401	*/
402	void
403	proc0_init(void)
404	{
405	struct proc *p;
406	struct pgrp *pg;
407	struct rlimit *rlim;
408	rlim_t lim;
409	int i;
410
411	p = &proc0;
412	pg = &pgrp0;
413
414	mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
415	mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
416	p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
417
418	rw_init(&p->p_reflock);
419	cv_init(&p->p_waitcv, "wait");
420	cv_init(&p->p_lwpcv, "lwpwait");
421
422	LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling);
423
424	pid_table[`0`].pt_proc = p;
425	LIST_INSERT_HEAD(&allproc, p, p_list);
426
427	pid_table[`0`].pt_pgrp = pg;
428	LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
429
430	#ifdef __HAVE_SYSCALL_INTERN
431	(*p->p_emul->e_syscall_intern)(p);
432	#endif
433
434	/ Create credentials. /
435	cred0 = kauth_cred_alloc();
436	p->p_cred = cred0;
437
438	/ Create the CWD info. /
439	rw_init(&cwdi0.cwdi_lock);
440
441	/ Create the limits structures. /
442	mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE);
443
444	rlim = limit0.pl_rlimit;
445	for (i = `0`; i < __arraycount(limit0.pl_rlimit); i++) {
446	rlim[i].rlim_cur = RLIM_INFINITY;
447	rlim[i].rlim_max = RLIM_INFINITY;
448	}
449
450	rlim[RLIMIT_NOFILE].rlim_max = maxfiles;
451	rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile;
452
453	rlim[RLIMIT_NPROC].rlim_max = maxproc;
454	rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc;
455
456	lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvmexp.free));
457	rlim[RLIMIT_RSS].rlim_max = lim;
458	rlim[RLIMIT_MEMLOCK].rlim_max = lim;
459	rlim[RLIMIT_MEMLOCK].rlim_cur = lim / `3`;
460
461	rlim[RLIMIT_NTHR].rlim_max = maxlwp;
462	rlim[RLIMIT_NTHR].rlim_cur = maxlwp < maxuprc ? maxlwp : maxuprc;
463
464	/ Note that default core name has zero length. /
465	limit0.pl_corename = defcorename;
466	limit0.pl_cnlen = `0`;
467	limit0.pl_refcnt = `1`;
468	limit0.pl_writeable = false;
469	limit0.pl_sv_limit = NULL;
470
471	/ Configure virtual memory system, set vm rlimits. /
472	uvm_init_limits(p);
473
474	/ Initialize file descriptor table for proc0. /
475	fd_init(&filedesc0);
476
477	/*
478	* Initialize proc0's vmspace, which uses the kernel pmap.
479	* All kernel processes (which never have user space mappings)
480	* share proc0's vmspace, and thus, the kernel pmap.
481	*/
482	uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
483	trunc_page(VM_MAXUSER_ADDRESS),
484	#ifdef __USE_TOPDOWN_VM
485	true
486	#else
487	false
488	#endif
489	);
490
491	/ Initialize signal state for proc0. XXX IPL_SCHED /
492	mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
493	siginit(p);
494
495	proc_initspecific(p);
496	kdtrace_proc_ctor(NULL, p);
497	}
498
499	/*
500	* Session reference counting.
501	*/
502
503	void
504	proc_sesshold(struct session *ss)
505	{
506
507	KASSERT(mutex_owned(proc_lock));
508	ss->s_count++;
509	}
510
511	void
512	proc_sessrele(struct session *ss)
513	{
514
515	KASSERT(mutex_owned(proc_lock));
516	/*
517	* We keep the pgrp with the same id as the session in order to
518	* stop a process being given the same pid. Since the pgrp holds
519	* a reference to the session, it must be a 'zombie' pgrp by now.
520	*/
521	if (--ss->s_count == `0`) {
522	struct pgrp *pg;
523
524	pg = pg_remove(ss->s_sid);
525	mutex_exit(proc_lock);
526
527	kmem_free(pg, sizeof(struct pgrp));
528	kmem_free(ss, sizeof(struct session));
529	} else {
530	mutex_exit(proc_lock);
531	}
532	}
533
534	/*
535	* Check that the specified process group is in the session of the
536	* specified process.
537	* Treats -ve ids as process ids.
538	* Used to validate TIOCSPGRP requests.
539	*/
540	int
541	pgid_in_session(struct proc *p, pid_t pg_id)
542	{
543	struct pgrp *pgrp;
544	struct session *session;
545	int error;
546
547	mutex_enter(proc_lock);
548	if (pg_id < `0`) {
549	struct proc *p1 = proc_find(-pg_id);
550	if (p1 == NULL) {
551	error = EINVAL;
552	goto fail;
553	}
554	pgrp = p1->p_pgrp;
555	} else {
556	pgrp = pgrp_find(pg_id);
557	if (pgrp == NULL) {
558	error = EINVAL;
559	goto fail;
560	}
561	}
562	session = pgrp->pg_session;
563	error = (session != p->p_pgrp->pg_session) ? EPERM : `0`;
564	fail:
565	mutex_exit(proc_lock);
566	return error;
567	}
568
569	/*
570	* p_inferior: is p an inferior of q?
571	*/
572	static inline bool
573	p_inferior(struct proc p, struct* proc *q)
574	{
575
576	KASSERT(mutex_owned(proc_lock));
577
578	for (; p != q; p = p->p_pptr)
579	if (p->p_pid == `0`)
580	return false;
581	return true;
582	}
583
584	/*
585	* proc_find: locate a process by the ID.
586	*
587	* => Must be called with proc_lock held.
588	*/
589	proc_t *
590	proc_find_raw(pid_t pid)
591	{
592	struct pid_table *pt;
593	proc_t *p;
594
595	KASSERT(mutex_owned(proc_lock));
596	pt = &pid_table[pid & pid_tbl_mask];
597	p = pt->pt_proc;
598	if (__predict_false(!P_VALID(p) \|\| pt->pt_pid != pid)) {
599	return NULL;
600	}
601	return p;
602	}
603
604	proc_t *
605	proc_find(pid_t pid)
606	{
607	proc_t *p;
608
609	p = proc_find_raw(pid);
610	if (__predict_false(p == NULL)) {
611	return NULL;
612	}
613
614	/*
615	* Only allow live processes to be found by PID.
616	* XXX: p_stat might change, since unlocked.
617	*/
618	if (__predict_true(p->p_stat == SACTIVE \|\| p->p_stat == SSTOP)) {
619	return p;
620	}
621	return NULL;
622	}
623
624	/*
625	* pgrp_find: locate a process group by the ID.
626	*
627	* => Must be called with proc_lock held.
628	*/
629	struct pgrp *
630	pgrp_find(pid_t pgid)
631	{
632	struct pgrp *pg;
633
634	KASSERT(mutex_owned(proc_lock));
635
636	pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
637
638	/*
639	* Cannot look up a process group that only exists because the
640	* session has not died yet (traditional).
641	*/
642	if (pg == NULL \|\| pg->pg_id != pgid \|\| LIST_EMPTY(&pg->pg_members)) {
643	return NULL;
644	}
645	return pg;
646	}
647
648	static void
649	expand_pid_table(void)
650	{
651	size_t pt_size, tsz;
652	struct pid_table n_pt, new_pt;
653	struct proc *proc;
654	struct pgrp *pgrp;
655	pid_t pid, rpid;
656	u_int i;
657	uint new_pt_mask;
658
659	pt_size = pid_tbl_mask + `1`;
660	tsz = pt_size * `2` * sizeof(struct pid_table);
661	new_pt = kmem_alloc(tsz, KM_SLEEP);
662	new_pt_mask = pt_size * `2` - `1`;
663
664	mutex_enter(proc_lock);
665	if (pt_size != pid_tbl_mask + `1`) {
666	/ Another process beat us to it... /
667	mutex_exit(proc_lock);
668	kmem_free(new_pt, tsz);
669	return;
670	}
671
672	/*
673	* Copy entries from old table into new one.
674	* If 'pid' is 'odd' we need to place in the upper half,
675	* even pid's to the lower half.
676	* Free items stay in the low half so we don't have to
677	* fixup the reference to them.
678	* We stuff free items on the front of the freelist
679	* because we can't write to unmodified entries.
680	* Processing the table backwards maintains a semblance
681	* of issuing pid numbers that increase with time.
682	*/
683	i = pt_size - `1`;
684	n_pt = new_pt + i;
685	for (; ; i--, n_pt--) {
686	proc = pid_table[i].pt_proc;
687	pgrp = pid_table[i].pt_pgrp;
688	if (!P_VALID(proc)) {
689	/ Up 'use count' so that link is valid /
690	pid = (P_NEXT(proc) + pt_size) & ~pt_size;
691	rpid = `0`;
692	proc = P_FREE(pid);
693	if (pgrp)
694	pid = pgrp->pg_id;
695	} else {
696	pid = pid_table[i].pt_pid;
697	rpid = pid;
698	}
699
700	/ Save entry in appropriate half of table /
701	n_pt[pid & pt_size].pt_proc = proc;
702	n_pt[pid & pt_size].pt_pgrp = pgrp;
703	n_pt[pid & pt_size].pt_pid = rpid;
704
705	/ Put other piece on start of free list /
706	pid = (pid ^ pt_size) & ~pid_tbl_mask;
707	n_pt[pid & pt_size].pt_proc =
708	P_FREE((pid & ~pt_size) \| next_free_pt);
709	n_pt[pid & pt_size].pt_pgrp = `0`;
710	n_pt[pid & pt_size].pt_pid = `0`;
711
712	next_free_pt = i \| (pid & pt_size);
713	if (i == `0`)
714	break;
715	}
716
717	/ Save old table size and switch tables /
718	tsz = pt_size * sizeof(struct pid_table);
719	n_pt = pid_table;
720	pid_table = new_pt;
721	pid_tbl_mask = new_pt_mask;
722
723	/*
724	* pid_max starts as PID_MAX (= 30000), once we have 16384
725	* allocated pids we need it to be larger!
726	*/
727	if (pid_tbl_mask > PID_MAX) {
728	pid_max = pid_tbl_mask * `2` + `1`;
729	pid_alloc_lim \|= pid_alloc_lim << `1`;
730	} else
731	pid_alloc_lim <<= `1`; / doubles number of free slots... /
732
733	mutex_exit(proc_lock);
734	kmem_free(n_pt, tsz);
735	}
736
737	struct proc *
738	proc_alloc(void)
739	{
740	struct proc *p;
741
742	p = pool_cache_get(proc_cache, PR_WAITOK);
743	p->p_stat = SIDL; / protect against others /
744	proc_initspecific(p);
745	kdtrace_proc_ctor(NULL, p);
746	p->p_pid = -`1`;
747	proc_alloc_pid(p);
748	return p;
749	}
750
751	/*
752	* proc_alloc_pid: allocate PID and record the given proc 'p' so that
753	* proc_find_raw() can find it by the PID.
754	*/
755
756	pid_t
757	proc_alloc_pid(struct proc *p)
758	{
759	struct pid_table *pt;
760	pid_t pid;
761	int nxt;
762
763	for (;;expand_pid_table()) {
764	if (__predict_false(pid_alloc_cnt >= pid_alloc_lim))
765	/ ensure pids cycle through 2000+ values /
766	continue;
767	mutex_enter(proc_lock);
768	pt = &pid_table[next_free_pt];
769	#ifdef DIAGNOSTIC
770	if (__predict_false(P_VALID(pt->pt_proc) \|\| pt->pt_pgrp))
771	panic("proc_alloc: slot busy");
772	#endif
773	nxt = P_NEXT(pt->pt_proc);
774	if (nxt & pid_tbl_mask)
775	break;
776	/ Table full - expand (NB last entry not used....) /
777	mutex_exit(proc_lock);
778	}
779
780	/ pid is 'saved use count' + 'size' + entry /
781	pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + `1` + next_free_pt;
782	if ((uint)pid > (uint)pid_max)
783	pid &= pid_tbl_mask;
784	next_free_pt = nxt & pid_tbl_mask;
785
786	/ Grab table slot /
787	pt->pt_proc = p;
788
789	KASSERT(pt->pt_pid == `0`);
790	pt->pt_pid = pid;
791	if (p->p_pid == -`1`) {
792	p->p_pid = pid;
793	}
794	pid_alloc_cnt++;
795	mutex_exit(proc_lock);
796
797	return pid;
798	}
799
800	/*
801	* Free a process id - called from proc_free (in kern_exit.c)
802	*
803	* Called with the proc_lock held.
804	*/
805	void
806	proc_free_pid(pid_t pid)
807	{
808	struct pid_table *pt;
809
810	KASSERT(mutex_owned(proc_lock));
811
812	pt = &pid_table[pid & pid_tbl_mask];
813
814	/ save pid use count in slot /
815	pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
816	KASSERT(pt->pt_pid == pid);
817	pt->pt_pid = `0`;
818
819	if (pt->pt_pgrp == NULL) {
820	/ link last freed entry onto ours /
821	pid &= pid_tbl_mask;
822	pt = &pid_table[last_free_pt];
823	pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) \| pid);
824	pt->pt_pid = `0`;
825	last_free_pt = pid;
826	pid_alloc_cnt--;
827	}
828
829	atomic_dec_uint(&nprocs);
830	}
831
832	void
833	proc_free_mem(struct proc *p)
834	{
835
836	kdtrace_proc_dtor(NULL, p);
837	pool_cache_put(proc_cache, p);
838	}
839
840	/*
841	* proc_enterpgrp: move p to a new or existing process group (and session).
842	*
843	* If we are creating a new pgrp, the pgid should equal
844	* the calling process' pid.
845	* If is only valid to enter a process group that is in the session
846	* of the process.
847	* Also mksess should only be set if we are creating a process group
848	*
849	* Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return.
850	*/
851	int
852	proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
853	{
854	struct pgrp new_pgrp, pgrp;
855	struct session *sess;
856	struct proc *p;
857	int rval;
858	pid_t pg_id = NO_PGID;
859
860	sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
861
862	/ Allocate data areas we might need before doing any validity checks /
863	mutex_enter(proc_lock); / Because pid_table might change /
864	if (pid_table[pgid & pid_tbl_mask].pt_pgrp == `0`) {
865	mutex_exit(proc_lock);
866	new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
867	mutex_enter(proc_lock);
868	} else
869	new_pgrp = NULL;
870	rval = EPERM; / most common error (to save typing) /
871
872	/ Check pgrp exists or can be created /
873	pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
874	if (pgrp != NULL && pgrp->pg_id != pgid)
875	goto done;
876
877	/ Can only set another process under restricted circumstances. /
878	if (pid != curp->p_pid) {
879	/ Must exist and be one of our children... /
880	p = proc_find(pid);
881	if (p == NULL \|\| !p_inferior(p, curp)) {
882	rval = ESRCH;
883	goto done;
884	}
885	/ ... in the same session... /
886	if (sess != NULL \|\| p->p_session != curp->p_session)
887	goto done;
888	/ ... existing pgid must be in same session ... /
889	if (pgrp != NULL && pgrp->pg_session != p->p_session)
890	goto done;
891	/ ... and not done an exec. /
892	if (p->p_flag & PK_EXEC) {
893	rval = EACCES;
894	goto done;
895	}
896	} else {
897	/ ... setsid() cannot re-enter a pgrp /
898	if (mksess && (curp->p_pgid == curp->p_pid \|\|
899	pgrp_find(curp->p_pid)))
900	goto done;
901	p = curp;
902	}
903
904	/ Changing the process group/session of a session*
905	leader is definitely off limits. /*
906	if (SESS_LEADER(p)) {
907	if (sess == NULL && p->p_pgrp == pgrp)
908	/ unless it's a definite noop /
909	rval = `0`;
910	goto done;
911	}
912
913	/ Can only create a process group with id of process /
914	if (pgrp == NULL && pgid != pid)
915	goto done;
916
917	/ Can only create a session if creating pgrp /
918	if (sess != NULL && pgrp != NULL)
919	goto done;
920
921	/ Check we allocated memory for a pgrp... /
922	if (pgrp == NULL && new_pgrp == NULL)
923	goto done;
924
925	/ Don't attach to 'zombie' pgrp /
926	if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
927	goto done;
928
929	/ Expect to succeed now /
930	rval = `0`;
931
932	if (pgrp == p->p_pgrp)
933	/ nothing to do /
934	goto done;
935
936	/ Ok all setup, link up required structures /
937
938	if (pgrp == NULL) {
939	pgrp = new_pgrp;
940	new_pgrp = NULL;
941	if (sess != NULL) {
942	sess->s_sid = p->p_pid;
943	sess->s_leader = p;
944	sess->s_count = `1`;
945	sess->s_ttyvp = NULL;
946	sess->s_ttyp = NULL;
947	sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
948	memcpy(sess->s_login, p->p_session->s_login,
949	sizeof(sess->s_login));
950	p->p_lflag &= ~PL_CONTROLT;
951	} else {
952	sess = p->p_pgrp->pg_session;
953	proc_sesshold(sess);
954	}
955	pgrp->pg_session = sess;
956	sess = NULL;
957
958	pgrp->pg_id = pgid;
959	LIST_INIT(&pgrp->pg_members);
960	#ifdef DIAGNOSTIC
961	if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
962	panic("enterpgrp: pgrp table slot in use");
963	if (__predict_false(mksess && p != curp))
964	panic("enterpgrp: mksession and p != curproc");
965	#endif
966	pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
967	pgrp->pg_jobc = `0`;
968	}
969
970	/*
971	* Adjust eligibility of affected pgrps to participate in job control.
972	* Increment eligibility counts before decrementing, otherwise we
973	* could reach 0 spuriously during the first call.
974	*/
975	fixjobc(p, pgrp, `1`);
976	fixjobc(p, p->p_pgrp, `0`);
977
978	/ Interlock with ttread(). /
979	mutex_spin_enter(&tty_lock);
980
981	/ Move process to requested group. /
982	LIST_REMOVE(p, p_pglist);
983	if (LIST_EMPTY(&p->p_pgrp->pg_members))
984	/ defer delete until we've dumped the lock /
985	pg_id = p->p_pgrp->pg_id;
986	p->p_pgrp = pgrp;
987	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
988
989	/ Done with the swap; we can release the tty mutex. /
990	mutex_spin_exit(&tty_lock);
991
992	done:
993	if (pg_id != NO_PGID) {
994	/ Releases proc_lock. /
995	pg_delete(pg_id);
996	} else {
997	mutex_exit(proc_lock);
998	}
999	if (sess != NULL)
1000	kmem_free(sess, sizeof(*sess));
1001	if (new_pgrp != NULL)
1002	kmem_free(new_pgrp, sizeof(*new_pgrp));
1003	#ifdef DEBUG_PGRP
1004	if (__predict_false(rval))
1005	printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
1006	pid, pgid, mksess, curp->p_pid, rval);
1007	#endif
1008	return rval;
1009	}
1010
1011	/*
1012	* proc_leavepgrp: remove a process from its process group.
1013	* => must be called with the proc_lock held, which will be released;
1014	*/
1015	void
1016	proc_leavepgrp(struct proc *p)
1017	{
1018	struct pgrp *pgrp;
1019
1020	KASSERT(mutex_owned(proc_lock));
1021
1022	/ Interlock with ttread() /
1023	mutex_spin_enter(&tty_lock);
1024	pgrp = p->p_pgrp;
1025	LIST_REMOVE(p, p_pglist);
1026	p->p_pgrp = NULL;
1027	mutex_spin_exit(&tty_lock);
1028
1029	if (LIST_EMPTY(&pgrp->pg_members)) {
1030	/ Releases proc_lock. /
1031	pg_delete(pgrp->pg_id);
1032	} else {
1033	mutex_exit(proc_lock);
1034	}
1035	}
1036
1037	/*
1038	* pg_remove: remove a process group from the table.
1039	* => must be called with the proc_lock held;
1040	* => returns process group to free;
1041	*/
1042	static struct pgrp *
1043	pg_remove(pid_t pg_id)
1044	{
1045	struct pgrp *pgrp;
1046	struct pid_table *pt;
1047
1048	KASSERT(mutex_owned(proc_lock));
1049
1050	pt = &pid_table[pg_id & pid_tbl_mask];
1051	pgrp = pt->pt_pgrp;
1052
1053	KASSERT(pgrp != NULL);
1054	KASSERT(pgrp->pg_id == pg_id);
1055	KASSERT(LIST_EMPTY(&pgrp->pg_members));
1056
1057	pt->pt_pgrp = NULL;
1058
1059	if (!P_VALID(pt->pt_proc)) {
1060	/ Orphaned pgrp, put slot onto free list. /
1061	KASSERT((P_NEXT(pt->pt_proc) & pid_tbl_mask) == `0`);
1062	pg_id &= pid_tbl_mask;
1063	pt = &pid_table[last_free_pt];
1064	pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) \| pg_id);
1065	KASSERT(pt->pt_pid == `0`);
1066	last_free_pt = pg_id;
1067	pid_alloc_cnt--;
1068	}
1069	return pgrp;
1070	}
1071
1072	/*
1073	* pg_delete: delete and free a process group.
1074	* => must be called with the proc_lock held, which will be released.
1075	*/
1076	static void
1077	pg_delete(pid_t pg_id)
1078	{
1079	struct pgrp *pg;
1080	struct tty *ttyp;
1081	struct session *ss;
1082
1083	KASSERT(mutex_owned(proc_lock));
1084
1085	pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
1086	if (pg == NULL \|\| pg->pg_id != pg_id \|\| !LIST_EMPTY(&pg->pg_members)) {
1087	mutex_exit(proc_lock);
1088	return;
1089	}
1090
1091	ss = pg->pg_session;
1092
1093	/ Remove reference (if any) from tty to this process group /
1094	mutex_spin_enter(&tty_lock);
1095	ttyp = ss->s_ttyp;
1096	if (ttyp != NULL && ttyp->t_pgrp == pg) {
1097	ttyp->t_pgrp = NULL;
1098	KASSERT(ttyp->t_session == ss);
1099	}
1100	mutex_spin_exit(&tty_lock);
1101
1102	/*
1103	* The leading process group in a session is freed by proc_sessrele(),
1104	* if last reference. Note: proc_sessrele() releases proc_lock.
1105	*/
1106	pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
1107	proc_sessrele(ss);
1108
1109	if (pg != NULL) {
1110	/ Free it, if was not done by proc_sessrele(). /
1111	kmem_free(pg, sizeof(struct pgrp));
1112	}
1113	}
1114
1115	/*
1116	* Adjust pgrp jobc counters when specified process changes process group.
1117	* We count the number of processes in each process group that "qualify"
1118	* the group for terminal job control (those with a parent in a different
1119	* process group of the same session). If that count reaches zero, the
1120	* process group becomes orphaned. Check both the specified process'
1121	* process group and that of its children.
1122	* entering == 0 => p is leaving specified group.
1123	* entering == 1 => p is entering specified group.
1124	*
1125	* Call with proc_lock held.
1126	*/
1127	void
1128	fixjobc(struct proc p, struct* pgrp pgrp, int* entering)
1129	{
1130	struct pgrp *hispgrp;
1131	struct session *mysession = pgrp->pg_session;
1132	struct proc *child;
1133
1134	KASSERT(mutex_owned(proc_lock));
1135
1136	/*
1137	* Check p's parent to see whether p qualifies its own process
1138	* group; if so, adjust count for p's process group.
1139	*/
1140	hispgrp = p->p_pptr->p_pgrp;
1141	if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
1142	if (entering) {
1143	pgrp->pg_jobc++;
1144	p->p_lflag &= ~PL_ORPHANPG;
1145	} else if (--pgrp->pg_jobc == `0`)
1146	orphanpg(pgrp);
1147	}
1148
1149	/*
1150	* Check this process' children to see whether they qualify
1151	* their process groups; if so, adjust counts for children's
1152	* process groups.
1153	*/
1154	LIST_FOREACH(child, &p->p_children, p_sibling) {
1155	hispgrp = child->p_pgrp;
1156	if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
1157	!P_ZOMBIE(child)) {
1158	if (entering) {
1159	child->p_lflag &= ~PL_ORPHANPG;
1160	hispgrp->pg_jobc++;
1161	} else if (--hispgrp->pg_jobc == `0`)
1162	orphanpg(hispgrp);
1163	}
1164	}
1165	}
1166
1167	/*
1168	* A process group has become orphaned;
1169	* if there are any stopped processes in the group,
1170	* hang-up all process in that group.
1171	*
1172	* Call with proc_lock held.
1173	*/
1174	static void
1175	orphanpg(struct pgrp *pg)
1176	{
1177	struct proc *p;
1178
1179	KASSERT(mutex_owned(proc_lock));
1180
1181	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1182	if (p->p_stat == SSTOP) {
1183	p->p_lflag \|= PL_ORPHANPG;
1184	psignal(p, SIGHUP);
1185	psignal(p, SIGCONT);
1186	}
1187	}
1188	}
1189
1190	#ifdef DDB
1191	#include <ddb/db_output.h>
1192	void pidtbl_dump(void);
1193	void
1194	pidtbl_dump(void)
1195	{
1196	struct pid_table *pt;
1197	struct proc *p;
1198	struct pgrp *pgrp;
1199	int id;
1200
1201	db_printf("pid table %p size %x, next %x, last %x\n",
1202	pid_table, pid_tbl_mask+`1`,
1203	next_free_pt, last_free_pt);
1204	for (pt = pid_table, id = `0`; id <= pid_tbl_mask; id++, pt++) {
1205	p = pt->pt_proc;
1206	if (!P_VALID(p) && !pt->pt_pgrp)
1207	continue;
1208	db_printf(" id %x: ", id);
1209	if (P_VALID(p))
1210	db_printf("slotpid %d proc %p id %d (0x%x) %s\n",
1211	pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm);
1212	else
1213	db_printf("next %x use %x\n",
1214	P_NEXT(p) & pid_tbl_mask,
1215	P_NEXT(p) & ~pid_tbl_mask);
1216	if ((pgrp = pt->pt_pgrp)) {
1217	db_printf("\tsession %p, sid %d, count %d, login %s\n",
1218	pgrp->pg_session, pgrp->pg_session->s_sid,
1219	pgrp->pg_session->s_count,
1220	pgrp->pg_session->s_login);
1221	db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1222	pgrp, pgrp->pg_id, pgrp->pg_jobc,
1223	LIST_FIRST(&pgrp->pg_members));
1224	LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
1225	db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1226	p->p_pid, p, p->p_pgrp, p->p_comm);
1227	}
1228	}
1229	}
1230	}
1231	#endif /* DDB */
1232
1233	#ifdef KSTACK_CHECK_MAGIC
1234
1235	#define KSTACK_MAGIC 0xdeadbeaf
1236
1237	/ XXX should be per process basis? /
1238	static int kstackleftmin = KSTACK_SIZE;
1239	static int kstackleftthres = KSTACK_SIZE / `8`;
1240
1241	void
1242	kstack_setup_magic(const struct lwp *l)
1243	{
1244	uint32_t *ip;
1245	uint32_t const *end;
1246
1247	KASSERT(l != NULL);
1248	KASSERT(l != &lwp0);
1249
1250	/*
1251	* fill all the stack with magic number
1252	* so that later modification on it can be detected.
1253	*/
1254	ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1255	end = (uint32_t )((char* *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1256	for (; ip < end; ip++) {
1257	*ip = KSTACK_MAGIC;
1258	}
1259	}
1260
1261	void
1262	kstack_check_magic(const struct lwp *l)
1263	{
1264	uint32_t const ip, end;
1265	int stackleft;
1266
1267	KASSERT(l != NULL);
1268
1269	/ don't check proc0 / /XXX/
1270	if (l == &lwp0)
1271	return;
1272
1273	#ifdef __MACHINE_STACK_GROWS_UP
1274	/ stack grows upwards (eg. hppa) /
1275	ip = (uint32_t )((void* *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1276	end = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1277	for (ip--; ip >= end; ip--)
1278	if (*ip != KSTACK_MAGIC)
1279	break;
1280
1281	stackleft = (void )KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void* *)ip;
1282	#else /* __MACHINE_STACK_GROWS_UP */
1283	/ stack grows downwards (eg. i386) /
1284	ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1285	end = (uint32_t )((char* *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1286	for (; ip < end; ip++)
1287	if (*ip != KSTACK_MAGIC)
1288	break;
1289
1290	stackleft = ((const char )ip) - (const* char *)KSTACK_LOWEST_ADDR(l);
1291	#endif /* __MACHINE_STACK_GROWS_UP */
1292
1293	if (kstackleftmin > stackleft) {
1294	kstackleftmin = stackleft;
1295	if (stackleft < kstackleftthres)
1296	printf("warning: kernel stack left %d bytes"
1297	"(pid %u:lid %u)\n", stackleft,
1298	(u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1299	}
1300
1301	if (stackleft <= `0`) {
1302	panic("magic on the top of kernel stack changed for "
1303	"pid %u, lid %u: maybe kernel stack overflow",
1304	(u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1305	}
1306	}
1307	#endif /* KSTACK_CHECK_MAGIC */
1308
1309	int
1310	proclist_foreach_call(struct proclist *list,
1311	int (callback)(struct* proc , void* arg), void* *arg)
1312	{
1313	struct proc marker;
1314	struct proc *p;
1315	int ret = `0`;
1316
1317	marker.p_flag = PK_MARKER;
1318	mutex_enter(proc_lock);
1319	for (p = LIST_FIRST(list); ret == `0` && p != NULL;) {
1320	if (p->p_flag & PK_MARKER) {
1321	p = LIST_NEXT(p, p_list);
1322	continue;
1323	}
1324	LIST_INSERT_AFTER(p, &marker, p_list);
1325	ret = (*callback)(p, arg);
1326	KASSERT(mutex_owned(proc_lock));
1327	p = LIST_NEXT(&marker, p_list);
1328	LIST_REMOVE(&marker, p_list);
1329	}
1330	mutex_exit(proc_lock);
1331
1332	return ret;
1333	}
1334
1335	int
1336	proc_vmspace_getref(struct proc p, struct* vmspace **vm)
1337	{
1338
1339	/ XXXCDC: how should locking work here? /
1340
1341	/ curproc exception is for coredump. /
1342
1343	if ((p != curproc && (p->p_sflag & PS_WEXIT) != `0`) \|\|
1344	(p->p_vmspace->vm_refcnt < `1`)) { / XXX /
1345	return EFAULT;
1346	}
1347
1348	uvmspace_addref(p->p_vmspace);
1349	*vm = p->p_vmspace;
1350
1351	return `0`;
1352	}
1353
1354	/*
1355	* Acquire a write lock on the process credential.
1356	*/
1357	void
1358	proc_crmod_enter(void)
1359	{
1360	struct lwp *l = curlwp;
1361	struct proc *p = l->l_proc;
1362	kauth_cred_t oc;
1363
1364	/ Reset what needs to be reset in plimit. /
1365	if (p->p_limit->pl_corename != defcorename) {
1366	lim_setcorename(p, defcorename, `0`);
1367	}
1368
1369	mutex_enter(p->p_lock);
1370
1371	/ Ensure the LWP cached credentials are up to date. /
1372	if ((oc = l->l_cred) != p->p_cred) {
1373	kauth_cred_hold(p->p_cred);
1374	l->l_cred = p->p_cred;
1375	kauth_cred_free(oc);
1376	}
1377	}
1378
1379	/*
1380	* Set in a new process credential, and drop the write lock. The credential
1381	* must have a reference already. Optionally, free a no-longer required
1382	* credential. The scheduler also needs to inspect p_cred, so we also
1383	* briefly acquire the sched state mutex.
1384	*/
1385	void
1386	proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid)
1387	{
1388	struct lwp l = curlwp, l2;
1389	struct proc *p = l->l_proc;
1390	kauth_cred_t oc;
1391
1392	KASSERT(mutex_owned(p->p_lock));
1393
1394	/ Is there a new credential to set in? /
1395	if (scred != NULL) {
1396	p->p_cred = scred;
1397	LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
1398	if (l2 != l)
1399	l2->l_prflag \|= LPR_CRMOD;
1400	}
1401
1402	/ Ensure the LWP cached credentials are up to date. /
1403	if ((oc = l->l_cred) != scred) {
1404	kauth_cred_hold(scred);
1405	l->l_cred = scred;
1406	}
1407	} else
1408	oc = NULL; / XXXgcc /
1409
1410	if (sugid) {
1411	/*
1412	* Mark process as having changed credentials, stops
1413	* tracing etc.
1414	*/
1415	p->p_flag \|= PK_SUGID;
1416	}
1417
1418	mutex_exit(p->p_lock);
1419
1420	/ If there is a credential to be released, free it now. /
1421	if (fcred != NULL) {
1422	KASSERT(scred != NULL);
1423	kauth_cred_free(fcred);
1424	if (oc != scred)
1425	kauth_cred_free(oc);
1426	}
1427	}
1428
1429	/*
1430	* proc_specific_key_create --
1431	* Create a key for subsystem proc-specific data.
1432	*/
1433	int
1434	proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1435	{
1436
1437	return (specificdata_key_create(proc_specificdata_domain, keyp, dtor));
1438	}
1439
1440	/*
1441	* proc_specific_key_delete --
1442	* Delete a key for subsystem proc-specific data.
1443	*/
1444	void
1445	proc_specific_key_delete(specificdata_key_t key)
1446	{
1447
1448	specificdata_key_delete(proc_specificdata_domain, key);
1449	}
1450
1451	/*
1452	* proc_initspecific --
1453	* Initialize a proc's specificdata container.
1454	*/
1455	void
1456	proc_initspecific(struct proc *p)
1457	{
1458	int error __diagused;
1459
1460	error = specificdata_init(proc_specificdata_domain, &p->p_specdataref);
1461	KASSERT(error == `0`);
1462	}
1463
1464	/*
1465	* proc_finispecific --
1466	* Finalize a proc's specificdata container.
1467	*/
1468	void
1469	proc_finispecific(struct proc *p)
1470	{
1471
1472	specificdata_fini(proc_specificdata_domain, &p->p_specdataref);
1473	}
1474
1475	/*
1476	* proc_getspecific --
1477	* Return proc-specific data corresponding to the specified key.
1478	*/
1479	void *
1480	proc_getspecific(struct proc *p, specificdata_key_t key)
1481	{
1482
1483	return (specificdata_getspecific(proc_specificdata_domain,
1484	&p->p_specdataref, key));
1485	}
1486
1487	/*
1488	* proc_setspecific --
1489	* Set proc-specific data corresponding to the specified key.
1490	*/
1491	void
1492	proc_setspecific(struct proc p, specificdata_key_t key, void* *data)
1493	{
1494
1495	specificdata_setspecific(proc_specificdata_domain,
1496	&p->p_specdataref, key, data);
1497	}
1498
1499	int
1500	proc_uidmatch(kauth_cred_t cred, kauth_cred_t target)
1501	{
1502	int r = `0`;
1503
1504	if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) \|\|
1505	kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) {
1506	/*
1507	* suid proc of ours or proc not ours
1508	*/
1509	r = EPERM;
1510	} else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) {
1511	/*
1512	* sgid proc has sgid back to us temporarily
1513	*/
1514	r = EPERM;
1515	} else {
1516	/*
1517	* our rgid must be in target's group list (ie,
1518	* sub-processes started by a sgid process)
1519	*/
1520	int ismember = `0`;
1521
1522	if (kauth_cred_ismember_gid(cred,
1523	kauth_cred_getgid(target), &ismember) != `0` \|\|
1524	!ismember)
1525	r = EPERM;
1526	}
1527
1528	return (r);
1529	}
1530
1531	/*
1532	* sysctl stuff
1533	*/
1534
1535	#define KERN_PROCSLOP (5 * sizeof(struct kinfo_proc))
1536
1537	static const u_int sysctl_flagmap[] = {
1538	PK_ADVLOCK, P_ADVLOCK,
1539	PK_EXEC, P_EXEC,
1540	PK_NOCLDWAIT, P_NOCLDWAIT,
1541	PK_32, P_32,
1542	PK_CLDSIGIGN, P_CLDSIGIGN,
1543	PK_SUGID, P_SUGID,
1544	`0`
1545	};
1546
1547	static const u_int sysctl_sflagmap[] = {
1548	PS_NOCLDSTOP, P_NOCLDSTOP,
1549	PS_WEXIT, P_WEXIT,
1550	PS_STOPFORK, P_STOPFORK,
1551	PS_STOPEXEC, P_STOPEXEC,
1552	PS_STOPEXIT, P_STOPEXIT,
1553	`0`
1554	};
1555
1556	static const u_int sysctl_slflagmap[] = {
1557	PSL_TRACED, P_TRACED,
1558	PSL_FSTRACE, P_FSTRACE,
1559	PSL_CHTRACED, P_CHTRACED,
1560	PSL_SYSCALL, P_SYSCALL,
1561	`0`
1562	};
1563
1564	static const u_int sysctl_lflagmap[] = {
1565	PL_CONTROLT, P_CONTROLT,
1566	PL_PPWAIT, P_PPWAIT,
1567	`0`
1568	};
1569
1570	static const u_int sysctl_stflagmap[] = {
1571	PST_PROFIL, P_PROFIL,
1572	`0`
1573
1574	};
1575
1576	/ used by kern_lwp also /
1577	const u_int sysctl_lwpflagmap[] = {
1578	LW_SINTR, L_SINTR,
1579	LW_SYSTEM, L_SYSTEM,
1580	`0`
1581	};
1582
1583	/*
1584	* Find the most ``active'' lwp of a process and return it for ps display
1585	* purposes
1586	*/
1587	static struct lwp *
1588	proc_active_lwp(struct proc *p)
1589	{
1590	static const int ostat[] = {
1591	`0`,
1592	`2`, / LSIDL /
1593	`6`, / LSRUN /
1594	`5`, / LSSLEEP /
1595	`4`, / LSSTOP /
1596	`0`, / LSZOMB /
1597	`1`, / LSDEAD /
1598	`7`, / LSONPROC /
1599	`3` / LSSUSPENDED /
1600	};
1601
1602	struct lwp l, lp = NULL;
1603	LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1604	KASSERT(l->l_stat >= `0` && l->l_stat < __arraycount(ostat));
1605	if (lp == NULL \|\|
1606	ostat[l->l_stat] > ostat[lp->l_stat] \|\|
1607	(ostat[l->l_stat] == ostat[lp->l_stat] &&
1608	l->l_cpticks > lp->l_cpticks)) {
1609	lp = l;
1610	continue;
1611	}
1612	}
1613	return lp;
1614	}
1615
1616	static int
1617	sysctl_doeproc(SYSCTLFN_ARGS)
1618	{
1619	union {
1620	struct kinfo_proc kproc;
1621	struct kinfo_proc2 kproc2;
1622	} *kbuf;
1623	struct proc p, next, *marker;
1624	char where, dp;
1625	int type, op, arg, error;
1626	u_int elem_size, kelem_size, elem_count;
1627	size_t buflen, needed;
1628	bool match, zombie, mmmbrains;
1629
1630	if (namelen == `1` && name[`0`] == CTL_QUERY)
1631	return (sysctl_query(SYSCTLFN_CALL(rnode)));
1632
1633	dp = where = oldp;
1634	buflen = where != NULL ? *oldlenp : `0`;
1635	error = `0`;
1636	needed = `0`;
1637	type = rnode->sysctl_num;
1638
1639	if (type == KERN_PROC) {
1640	if (namelen == `0`)
1641	return EINVAL;
1642	switch (op = name[`0`]) {
1643	case KERN_PROC_ALL:
1644	if (namelen != `1`)
1645	return EINVAL;
1646	arg = `0`;
1647	break;
1648	default:
1649	if (namelen != `2`)
1650	return EINVAL;
1651	arg = name[`1`];
1652	break;
1653	}
1654	elem_count = `0`; / Ditto /
1655	kelem_size = elem_size = sizeof(kbuf->kproc);
1656	} else {
1657	if (namelen != `4`)
1658	return EINVAL;
1659	op = name[`0`];
1660	arg = name[`1`];
1661	elem_size = name[`2`];
1662	elem_count = name[`3`];
1663	kelem_size = sizeof(kbuf->kproc2);
1664	}
1665
1666	sysctl_unlock();
1667
1668	kbuf = kmem_alloc(sizeof(*kbuf), KM_SLEEP);
1669	marker = kmem_alloc(sizeof(*marker), KM_SLEEP);
1670	marker->p_flag = PK_MARKER;
1671
1672	mutex_enter(proc_lock);
1673	mmmbrains = false;
1674	for (p = LIST_FIRST(&allproc);; p = next) {
1675	if (p == NULL) {
1676	if (!mmmbrains) {
1677	p = LIST_FIRST(&zombproc);
1678	mmmbrains = true;
1679	}
1680	if (p == NULL)
1681	break;
1682	}
1683	next = LIST_NEXT(p, p_list);
1684	if ((p->p_flag & PK_MARKER) != `0`)
1685	continue;
1686
1687	/*
1688	* Skip embryonic processes.
1689	*/
1690	if (p->p_stat == SIDL)
1691	continue;
1692
1693	mutex_enter(p->p_lock);
1694	error = kauth_authorize_process(l->l_cred,
1695	KAUTH_PROCESS_CANSEE, p,
1696	KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
1697	if (error != `0`) {
1698	mutex_exit(p->p_lock);
1699	continue;
1700	}
1701
1702	/*
1703	* TODO - make more efficient (see notes below).
1704	* do by session.
1705	*/
1706	switch (op) {
1707	case KERN_PROC_PID:
1708	/ could do this with just a lookup /
1709	match = (p->p_pid == (pid_t)arg);
1710	break;
1711
1712	case KERN_PROC_PGRP:
1713	/ could do this by traversing pgrp /
1714	match = (p->p_pgrp->pg_id == (pid_t)arg);
1715	break;
1716
1717	case KERN_PROC_SESSION:
1718	match = (p->p_session->s_sid == (pid_t)arg);
1719	break;
1720
1721	case KERN_PROC_TTY:
1722	match = true;
1723	if (arg == (int) KERN_PROC_TTY_REVOKE) {
1724	if ((p->p_lflag & PL_CONTROLT) == `0` \|\|
1725	p->p_session->s_ttyp == NULL \|\|
1726	p->p_session->s_ttyvp != NULL) {
1727	match = false;
1728	}
1729	} else if ((p->p_lflag & PL_CONTROLT) == `0` \|\|
1730	p->p_session->s_ttyp == NULL) {
1731	if ((dev_t)arg != KERN_PROC_TTY_NODEV) {
1732	match = false;
1733	}
1734	} else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) {
1735	match = false;
1736	}
1737	break;
1738
1739	case KERN_PROC_UID:
1740	match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg);
1741	break;
1742
1743	case KERN_PROC_RUID:
1744	match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg);
1745	break;
1746
1747	case KERN_PROC_GID:
1748	match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg);
1749	break;
1750
1751	case KERN_PROC_RGID:
1752	match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg);
1753	break;
1754
1755	case KERN_PROC_ALL:
1756	match = true;
1757	/ allow everything /
1758	break;
1759
1760	default:
1761	error = EINVAL;
1762	mutex_exit(p->p_lock);
1763	goto cleanup;
1764	}
1765	if (!match) {
1766	mutex_exit(p->p_lock);
1767	continue;
1768	}
1769
1770	/*
1771	* Grab a hold on the process.
1772	*/
1773	if (mmmbrains) {
1774	zombie = true;
1775	} else {
1776	zombie = !rw_tryenter(&p->p_reflock, RW_READER);
1777	}
1778	if (zombie) {
1779	LIST_INSERT_AFTER(p, marker, p_list);
1780	}
1781
1782	if (buflen >= elem_size &&
1783	(type == KERN_PROC \|\| elem_count > `0`)) {
1784	if (type == KERN_PROC) {
1785	kbuf->kproc.kp_proc = *p;
1786	fill_eproc(p, &kbuf->kproc.kp_eproc, zombie);
1787	} else {
1788	fill_kproc2(p, &kbuf->kproc2, zombie);
1789	elem_count--;
1790	}
1791	mutex_exit(p->p_lock);
1792	mutex_exit(proc_lock);
1793	/*
1794	* Copy out elem_size, but not larger than kelem_size
1795	*/
1796	error = sysctl_copyout(l, kbuf, dp,
1797	min(kelem_size, elem_size));
1798	mutex_enter(proc_lock);
1799	if (error) {
1800	goto bah;
1801	}
1802	dp += elem_size;
1803	buflen -= elem_size;
1804	} else {
1805	mutex_exit(p->p_lock);
1806	}
1807	needed += elem_size;
1808
1809	/*
1810	* Release reference to process.
1811	*/
1812	if (zombie) {
1813	next = LIST_NEXT(marker, p_list);
1814	LIST_REMOVE(marker, p_list);
1815	} else {
1816	rw_exit(&p->p_reflock);
1817	next = LIST_NEXT(p, p_list);
1818	}
1819	}
1820	mutex_exit(proc_lock);
1821
1822	if (where != NULL) {
1823	*oldlenp = dp - where;
1824	if (needed > *oldlenp) {
1825	error = ENOMEM;
1826	goto out;
1827	}
1828	} else {
1829	needed += KERN_PROCSLOP;
1830	*oldlenp = needed;
1831	}
1832	if (kbuf)
1833	kmem_free(kbuf, sizeof(*kbuf));
1834	if (marker)
1835	kmem_free(marker, sizeof(*marker));
1836	sysctl_relock();
1837	return `0`;
1838	bah:
1839	if (zombie)
1840	LIST_REMOVE(marker, p_list);
1841	else
1842	rw_exit(&p->p_reflock);
1843	cleanup:
1844	mutex_exit(proc_lock);
1845	out:
1846	if (kbuf)
1847	kmem_free(kbuf, sizeof(*kbuf));
1848	if (marker)
1849	kmem_free(marker, sizeof(*marker));
1850	sysctl_relock();
1851	return error;
1852	}
1853
1854	int
1855	copyin_psstrings(struct proc p, struct* ps_strings *arginfo)
1856	{
1857
1858	#ifdef COMPAT_NETBSD32
1859	if (p->p_flag & PK_32) {
1860	struct ps_strings32 arginfo32;
1861
1862	int error = copyin_proc(p, (void *)p->p_psstrp, &arginfo32,
1863	sizeof(arginfo32));
1864	if (error)
1865	return error;
1866	arginfo->ps_argvstr = (void *)(uintptr_t)arginfo32.ps_argvstr;
1867	arginfo->ps_nargvstr = arginfo32.ps_nargvstr;
1868	arginfo->ps_envstr = (void *)(uintptr_t)arginfo32.ps_envstr;
1869	arginfo->ps_nenvstr = arginfo32.ps_nenvstr;
1870	return `0`;
1871	}
1872	#endif
1873	return copyin_proc(p, (void )p->p_psstrp, arginfo, sizeof(arginfo));
1874	}
1875
1876	static int
1877	copy_procargs_sysctl_cb(void cookie_, const* void *src, size_t off, size_t len)
1878	{
1879	void **cookie = cookie_;
1880	struct lwp *l = cookie[`0`];
1881	char *dst = cookie[`1`];
1882
1883	return sysctl_copyout(l, src, dst + off, len);
1884	}
1885
1886	/*
1887	* sysctl helper routine for kern.proc_args pseudo-subtree.
1888	*/
1889	static int
1890	sysctl_kern_proc_args(SYSCTLFN_ARGS)
1891	{
1892	struct ps_strings pss;
1893	struct proc *p;
1894	pid_t pid;
1895	int type, error;
1896	void *cookie[`2`];
1897
1898	if (namelen == `1` && name[`0`] == CTL_QUERY)
1899	return (sysctl_query(SYSCTLFN_CALL(rnode)));
1900
1901	if (newp != NULL \|\| namelen != `2`)
1902	return (EINVAL);
1903	pid = name[`0`];
1904	type = name[`1`];
1905
1906	switch (type) {
1907	case KERN_PROC_PATHNAME:
1908	sysctl_unlock();
1909	error = fill_pathname(l, pid, oldp, oldlenp);
1910	sysctl_relock();
1911	return error;
1912
1913	case KERN_PROC_ARGV:
1914	case KERN_PROC_NARGV:
1915	case KERN_PROC_ENV:
1916	case KERN_PROC_NENV:
1917	/ ok /
1918	break;
1919	default:
1920	return (EINVAL);
1921	}
1922
1923	sysctl_unlock();
1924
1925	/ check pid /
1926	mutex_enter(proc_lock);
1927	if ((p = proc_find(pid)) == NULL) {
1928	error = EINVAL;
1929	goto out_locked;
1930	}
1931	mutex_enter(p->p_lock);
1932
1933	/ Check permission. /
1934	if (type == KERN_PROC_ARGV \|\| type == KERN_PROC_NARGV)
1935	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
1936	p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL);
1937	else if (type == KERN_PROC_ENV \|\| type == KERN_PROC_NENV)
1938	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
1939	p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL);
1940	else
1941	error = EINVAL; / XXXGCC /
1942	if (error) {
1943	mutex_exit(p->p_lock);
1944	goto out_locked;
1945	}
1946
1947	if (oldp == NULL) {
1948	if (type == KERN_PROC_NARGV \|\| type == KERN_PROC_NENV)
1949	oldlenp = sizeof* (int);
1950	else
1951	oldlenp = ARG_MAX; /* XXX XXX XXX /
1952	error = `0`;
1953	mutex_exit(p->p_lock);
1954	goto out_locked;
1955	}
1956
1957	/*
1958	* Zombies don't have a stack, so we can't read their psstrings.
1959	* System processes also don't have a user stack.
1960	*/
1961	if (P_ZOMBIE(p) \|\| (p->p_flag & PK_SYSTEM) != `0`) {
1962	error = EINVAL;
1963	mutex_exit(p->p_lock);
1964	goto out_locked;
1965	}
1966
1967	error = rw_tryenter(&p->p_reflock, RW_READER) ? `0` : EBUSY;
1968	mutex_exit(p->p_lock);
1969	if (error) {
1970	goto out_locked;
1971	}
1972	mutex_exit(proc_lock);
1973
1974	if (type == KERN_PROC_NARGV \|\| type == KERN_PROC_NENV) {
1975	int value;
1976	if ((error = copyin_psstrings(p, &pss)) == `0`) {
1977	if (type == KERN_PROC_NARGV)
1978	value = pss.ps_nargvstr;
1979	else
1980	value = pss.ps_nenvstr;
1981	error = sysctl_copyout(l, &value, oldp, sizeof(value));
1982	oldlenp = sizeof*(value);
1983	}
1984	} else {
1985	cookie[`0`] = l;
1986	cookie[`1`] = oldp;
1987	error = copy_procargs(p, type, oldlenp,
1988	copy_procargs_sysctl_cb, cookie);
1989	}
1990	rw_exit(&p->p_reflock);
1991	sysctl_relock();
1992	return error;
1993
1994	out_locked:
1995	mutex_exit(proc_lock);
1996	sysctl_relock();
1997	return error;
1998	}
1999
2000	int
2001	copy_procargs(struct proc p, int* oid, size_t *limit,
2002	int (cb)(void* , const* void , size_t, size_t), void* *cookie)
2003	{
2004	struct ps_strings pss;
2005	size_t len, i, loaded, entry_len;
2006	struct uio auio;
2007	struct iovec aiov;
2008	int error, argvlen;
2009	char *arg;
2010	char **argv;
2011	vaddr_t user_argv;
2012	struct vmspace *vmspace;
2013
2014	/*
2015	* Allocate a temporary buffer to hold the argument vector and
2016	* the arguments themselve.
2017	*/
2018	arg = kmem_alloc(PAGE_SIZE, KM_SLEEP);
2019	argv = kmem_alloc(PAGE_SIZE, KM_SLEEP);
2020
2021	/*
2022	* Lock the process down in memory.
2023	*/
2024	vmspace = p->p_vmspace;
2025	uvmspace_addref(vmspace);
2026
2027	/*
2028	* Read in the ps_strings structure.
2029	*/
2030	if ((error = copyin_psstrings(p, &pss)) != `0`)
2031	goto done;
2032
2033	/*
2034	* Now read the address of the argument vector.
2035	*/
2036	switch (oid) {
2037	case KERN_PROC_ARGV:
2038	user_argv = (uintptr_t)pss.ps_argvstr;
2039	argvlen = pss.ps_nargvstr;
2040	break;
2041	case KERN_PROC_ENV:
2042	user_argv = (uintptr_t)pss.ps_envstr;
2043	argvlen = pss.ps_nenvstr;
2044	break;
2045	default:
2046	error = EINVAL;
2047	goto done;
2048	}
2049
2050	if (argvlen < `0`) {
2051	error = EIO;
2052	goto done;
2053	}
2054
2055
2056	/*
2057	* Now copy each string.
2058	*/
2059	len = `0`; / bytes written to user buffer /
2060	loaded = `0`; / bytes from argv already processed /
2061	i = `0`; / To make compiler happy /
2062	entry_len = PROC_PTRSZ(p);
2063
2064	for (; argvlen; --argvlen) {
2065	int finished = `0`;
2066	vaddr_t base;
2067	size_t xlen;
2068	int j;
2069
2070	if (loaded == `0`) {
2071	size_t rem = entry_len * argvlen;
2072	loaded = MIN(rem, PAGE_SIZE);
2073	error = copyin_vmspace(vmspace,
2074	(const void *)user_argv, argv, loaded);
2075	if (error)
2076	break;
2077	user_argv += loaded;
2078	i = `0`;
2079	}
2080
2081	#ifdef COMPAT_NETBSD32
2082	if (p->p_flag & PK_32) {
2083	netbsd32_charp *argv32;
2084
2085	argv32 = (netbsd32_charp *)argv;
2086	base = (vaddr_t)NETBSD32PTR64(argv32[i++]);
2087	} else
2088	#endif
2089	base = (vaddr_t)argv[i++];
2090	loaded -= entry_len;
2091
2092	/*
2093	* The program has messed around with its arguments,
2094	* possibly deleting some, and replacing them with
2095	* NULL's. Treat this as the last argument and not
2096	* a failure.
2097	*/
2098	if (base == `0`)
2099	break;
2100
2101	while (!finished) {
2102	xlen = PAGE_SIZE - (base & PAGE_MASK);
2103
2104	aiov.iov_base = arg;
2105	aiov.iov_len = PAGE_SIZE;
2106	auio.uio_iov = &aiov;
2107	auio.uio_iovcnt = `1`;
2108	auio.uio_offset = base;
2109	auio.uio_resid = xlen;
2110	auio.uio_rw = UIO_READ;
2111	UIO_SETUP_SYSSPACE(&auio);
2112	error = uvm_io(&vmspace->vm_map, &auio, `0`);
2113	if (error)
2114	goto done;
2115
2116	/ Look for the end of the string /
2117	for (j = `0`; j < xlen; j++) {
2118	if (arg[j] == `'\0'`) {
2119	xlen = j + `1`;
2120	finished = `1`;
2121	break;
2122	}
2123	}
2124
2125	/ Check for user buffer overflow /
2126	if (len + xlen > *limit) {
2127	finished = `1`;
2128	if (len > *limit)
2129	xlen = `0`;
2130	else
2131	xlen = *limit - len;
2132	}
2133
2134	/ Copyout the page /
2135	error = (*cb)(cookie, arg, len, xlen);
2136	if (error)
2137	goto done;
2138
2139	len += xlen;
2140	base += xlen;
2141	}
2142	}
2143	*limit = len;
2144
2145	done:
2146	kmem_free(argv, PAGE_SIZE);
2147	kmem_free(arg, PAGE_SIZE);
2148	uvmspace_free(vmspace);
2149	return error;
2150	}
2151
2152	/*
2153	* Fill in an eproc structure for the specified process.
2154	*/
2155	void
2156	fill_eproc(struct proc p, struct* eproc *ep, bool zombie)
2157	{
2158	struct tty *tp;
2159	struct lwp *l;
2160
2161	KASSERT(mutex_owned(proc_lock));
2162	KASSERT(mutex_owned(p->p_lock));
2163
2164	memset(ep, `0`, sizeof(*ep));
2165
2166	ep->e_paddr = p;
2167	ep->e_sess = p->p_session;
2168	if (p->p_cred) {
2169	kauth_cred_topcred(p->p_cred, &ep->e_pcred);
2170	kauth_cred_toucred(p->p_cred, &ep->e_ucred);
2171	}
2172	if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2173	struct vmspace *vm = p->p_vmspace;
2174
2175	ep->e_vm.vm_rssize = vm_resident_count(vm);
2176	ep->e_vm.vm_tsize = vm->vm_tsize;
2177	ep->e_vm.vm_dsize = vm->vm_dsize;
2178	ep->e_vm.vm_ssize = vm->vm_ssize;
2179	ep->e_vm.vm_map.size = vm->vm_map.size;
2180
2181	/ Pick the primary (first) LWP /
2182	l = proc_active_lwp(p);
2183	KASSERT(l != NULL);
2184	lwp_lock(l);
2185	if (l->l_wchan)
2186	strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN);
2187	lwp_unlock(l);
2188	}
2189	ep->e_ppid = p->p_ppid;
2190	if (p->p_pgrp && p->p_session) {
2191	ep->e_pgid = p->p_pgrp->pg_id;
2192	ep->e_jobc = p->p_pgrp->pg_jobc;
2193	ep->e_sid = p->p_session->s_sid;
2194	if ((p->p_lflag & PL_CONTROLT) &&
2195	(tp = ep->e_sess->s_ttyp)) {
2196	ep->e_tdev = tp->t_dev;
2197	ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2198	ep->e_tsess = tp->t_session;
2199	} else
2200	ep->e_tdev = (uint32_t)NODEV;
2201	ep->e_flag = ep->e_sess->s_ttyvp ? EPROC_CTTY : `0`;
2202	if (SESS_LEADER(p))
2203	ep->e_flag \|= EPROC_SLEADER;
2204	strncpy(ep->e_login, ep->e_sess->s_login, MAXLOGNAME);
2205	}
2206	ep->e_xsize = ep->e_xrssize = `0`;
2207	ep->e_xccount = ep->e_xswrss = `0`;
2208	}
2209
2210	/*
2211	* Fill in a kinfo_proc2 structure for the specified process.
2212	*/
2213	void
2214	fill_kproc2(struct proc p, struct* kinfo_proc2 *ki, bool zombie)
2215	{
2216	struct tty *tp;
2217	struct lwp l, l2;
2218	struct timeval ut, st, rt;
2219	sigset_t ss1, ss2;
2220	struct rusage ru;
2221	struct vmspace *vm;
2222
2223	KASSERT(mutex_owned(proc_lock));
2224	KASSERT(mutex_owned(p->p_lock));
2225
2226	sigemptyset(&ss1);
2227	sigemptyset(&ss2);
2228	memset(ki, `0`, sizeof(*ki));
2229
2230	ki->p_paddr = PTRTOUINT64(p);
2231	ki->p_fd = PTRTOUINT64(p->p_fd);
2232	ki->p_cwdi = PTRTOUINT64(p->p_cwdi);
2233	ki->p_stats = PTRTOUINT64(p->p_stats);
2234	ki->p_limit = PTRTOUINT64(p->p_limit);
2235	ki->p_vmspace = PTRTOUINT64(p->p_vmspace);
2236	ki->p_sigacts = PTRTOUINT64(p->p_sigacts);
2237	ki->p_sess = PTRTOUINT64(p->p_session);
2238	ki->p_tsess = `0`; / may be changed if controlling tty below /
2239	ki->p_ru = PTRTOUINT64(&p->p_stats->p_ru);
2240	ki->p_eflag = `0`;
2241	ki->p_exitsig = p->p_exitsig;
2242	ki->p_flag = L_INMEM; / Process never swapped out /
2243	ki->p_flag \|= sysctl_map_flags(sysctl_flagmap, p->p_flag);
2244	ki->p_flag \|= sysctl_map_flags(sysctl_sflagmap, p->p_sflag);
2245	ki->p_flag \|= sysctl_map_flags(sysctl_slflagmap, p->p_slflag);
2246	ki->p_flag \|= sysctl_map_flags(sysctl_lflagmap, p->p_lflag);
2247	ki->p_flag \|= sysctl_map_flags(sysctl_stflagmap, p->p_stflag);
2248	ki->p_pid = p->p_pid;
2249	ki->p_ppid = p->p_ppid;
2250	ki->p_uid = kauth_cred_geteuid(p->p_cred);
2251	ki->p_ruid = kauth_cred_getuid(p->p_cred);
2252	ki->p_gid = kauth_cred_getegid(p->p_cred);
2253	ki->p_rgid = kauth_cred_getgid(p->p_cred);
2254	ki->p_svuid = kauth_cred_getsvuid(p->p_cred);
2255	ki->p_svgid = kauth_cred_getsvgid(p->p_cred);
2256	ki->p_ngroups = kauth_cred_ngroups(p->p_cred);
2257	kauth_cred_getgroups(p->p_cred, ki->p_groups,
2258	min(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[`0`])),
2259	UIO_SYSSPACE);
2260
2261	ki->p_uticks = p->p_uticks;
2262	ki->p_sticks = p->p_sticks;
2263	ki->p_iticks = p->p_iticks;
2264	ki->p_tpgid = NO_PGID; / may be changed if controlling tty below /
2265	ki->p_tracep = PTRTOUINT64(p->p_tracep);
2266	ki->p_traceflag = p->p_traceflag;
2267
2268	memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t));
2269	memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t));
2270
2271	ki->p_cpticks = `0`;
2272	ki->p_pctcpu = p->p_pctcpu;
2273	ki->p_estcpu = `0`;
2274	ki->p_stat = p->p_stat; / Will likely be overridden by LWP status /
2275	ki->p_realstat = p->p_stat;
2276	ki->p_nice = p->p_nice;
2277	ki->p_xstat = P_WAITSTATUS(p);
2278	ki->p_acflag = p->p_acflag;
2279
2280	strncpy(ki->p_comm, p->p_comm,
2281	min(sizeof(ki->p_comm), sizeof(p->p_comm)));
2282	strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename));
2283
2284	ki->p_nlwps = p->p_nlwps;
2285	ki->p_realflag = ki->p_flag;
2286
2287	if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2288	vm = p->p_vmspace;
2289	ki->p_vm_rssize = vm_resident_count(vm);
2290	ki->p_vm_tsize = vm->vm_tsize;
2291	ki->p_vm_dsize = vm->vm_dsize;
2292	ki->p_vm_ssize = vm->vm_ssize;
2293	ki->p_vm_vsize = atop(vm->vm_map.size);
2294	/*
2295	* Since the stack is initially mapped mostly with
2296	* PROT_NONE and grown as needed, adjust the "mapped size"
2297	* to skip the unused stack portion.
2298	*/
2299	ki->p_vm_msize =
2300	atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize;
2301
2302	/ Pick the primary (first) LWP /
2303	l = proc_active_lwp(p);
2304	KASSERT(l != NULL);
2305	lwp_lock(l);
2306	ki->p_nrlwps = p->p_nrlwps;
2307	ki->p_forw = `0`;
2308	ki->p_back = `0`;
2309	ki->p_addr = PTRTOUINT64(l->l_addr);
2310	ki->p_stat = l->l_stat;
2311	ki->p_flag \|= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag);
2312	ki->p_swtime = l->l_swtime;
2313	ki->p_slptime = l->l_slptime;
2314	if (l->l_stat == LSONPROC)
2315	ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags;
2316	else
2317	ki->p_schedflags = `0`;
2318	ki->p_priority = lwp_eprio(l);
2319	ki->p_usrpri = l->l_priority;
2320	if (l->l_wchan)
2321	strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg));
2322	ki->p_wchan = PTRTOUINT64(l->l_wchan);
2323	ki->p_cpuid = cpu_index(l->l_cpu);
2324	lwp_unlock(l);
2325	LIST_FOREACH(l, &p->p_lwps, l_sibling) {
2326	/ This is hardly correct, but... /
2327	sigplusset(&l->l_sigpend.sp_set, &ss1);
2328	sigplusset(&l->l_sigmask, &ss2);
2329	ki->p_cpticks += l->l_cpticks;
2330	ki->p_pctcpu += l->l_pctcpu;
2331	ki->p_estcpu += l->l_estcpu;
2332	}
2333	}
2334	sigplusset(&p->p_sigpend.sp_set, &ss2);
2335	memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t));
2336	memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t));
2337
2338	if (p->p_session != NULL) {
2339	ki->p_sid = p->p_session->s_sid;
2340	ki->p__pgid = p->p_pgrp->pg_id;
2341	if (p->p_session->s_ttyvp)
2342	ki->p_eflag \|= EPROC_CTTY;
2343	if (SESS_LEADER(p))
2344	ki->p_eflag \|= EPROC_SLEADER;
2345	strncpy(ki->p_login, p->p_session->s_login,
2346	min(sizeof ki->p_login - `1`, sizeof p->p_session->s_login));
2347	ki->p_jobc = p->p_pgrp->pg_jobc;
2348	if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) {
2349	ki->p_tdev = tp->t_dev;
2350	ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2351	ki->p_tsess = PTRTOUINT64(tp->t_session);
2352	} else {
2353	ki->p_tdev = (int32_t)NODEV;
2354	}
2355	}
2356
2357	if (!P_ZOMBIE(p) && !zombie) {
2358	ki->p_uvalid = `1`;
2359	ki->p_ustart_sec = p->p_stats->p_start.tv_sec;
2360	ki->p_ustart_usec = p->p_stats->p_start.tv_usec;
2361
2362	calcru(p, &ut, &st, NULL, &rt);
2363	ki->p_rtime_sec = rt.tv_sec;
2364	ki->p_rtime_usec = rt.tv_usec;
2365	ki->p_uutime_sec = ut.tv_sec;
2366	ki->p_uutime_usec = ut.tv_usec;
2367	ki->p_ustime_sec = st.tv_sec;
2368	ki->p_ustime_usec = st.tv_usec;
2369
2370	memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
2371	ki->p_uru_nvcsw = `0`;
2372	ki->p_uru_nivcsw = `0`;
2373	LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
2374	ki->p_uru_nvcsw += (l2->l_ncsw - l2->l_nivcsw);
2375	ki->p_uru_nivcsw += l2->l_nivcsw;
2376	ruadd(&ru, &l2->l_ru);
2377	}
2378	ki->p_uru_maxrss = ru.ru_maxrss;
2379	ki->p_uru_ixrss = ru.ru_ixrss;
2380	ki->p_uru_idrss = ru.ru_idrss;
2381	ki->p_uru_isrss = ru.ru_isrss;
2382	ki->p_uru_minflt = ru.ru_minflt;
2383	ki->p_uru_majflt = ru.ru_majflt;
2384	ki->p_uru_nswap = ru.ru_nswap;
2385	ki->p_uru_inblock = ru.ru_inblock;
2386	ki->p_uru_oublock = ru.ru_oublock;
2387	ki->p_uru_msgsnd = ru.ru_msgsnd;
2388	ki->p_uru_msgrcv = ru.ru_msgrcv;
2389	ki->p_uru_nsignals = ru.ru_nsignals;
2390
2391	timeradd(&p->p_stats->p_cru.ru_utime,
2392	&p->p_stats->p_cru.ru_stime, &ut);
2393	ki->p_uctime_sec = ut.tv_sec;
2394	ki->p_uctime_usec = ut.tv_usec;
2395	}
2396	}
2397
2398
2399	int
2400	proc_find_locked(struct lwp l, struct* proc **p, pid_t pid)
2401	{
2402	int error;
2403
2404	mutex_enter(proc_lock);
2405	if (pid == -`1`)
2406	*p = l->l_proc;
2407	else
2408	*p = proc_find(pid);
2409
2410	if (*p == NULL) {
2411	if (pid != -`1`)
2412	mutex_exit(proc_lock);
2413	return ESRCH;
2414	}
2415	if (pid != -`1`)
2416	mutex_enter((*p)->p_lock);
2417	mutex_exit(proc_lock);
2418
2419	error = kauth_authorize_process(l->l_cred,
2420	KAUTH_PROCESS_CANSEE, *p,
2421	KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
2422	if (error) {
2423	if (pid != -`1`)
2424	mutex_exit((*p)->p_lock);
2425	}
2426	return error;
2427	}
2428
2429	static int
2430	fill_pathname(struct lwp l, pid_t pid, void* oldp, size_t oldlenp)
2431	{
2432	#ifndef _RUMPKERNEL
2433	int error;
2434	struct proc *p;
2435	char *path;
2436	size_t len;
2437
2438	if ((error = proc_find_locked(l, &p, pid)) != `0`)
2439	return error;
2440
2441	if (p->p_textvp == NULL) {
2442	if (pid != -`1`)
2443	mutex_exit(p->p_lock);
2444	return ENOENT;
2445	}
2446
2447	path = PNBUF_GET();
2448	error = vnode_to_path(path, MAXPATHLEN / `2`, p->p_textvp, l, p);
2449	if (error)
2450	goto out;
2451
2452	len = strlen(path) + `1`;
2453	if (oldp != NULL) {
2454	error = sysctl_copyout(l, path, oldp, *oldlenp);
2455	if (error == `0` && *oldlenp < len)
2456	error = ENOSPC;
2457	}
2458	*oldlenp = len;
2459	out:
2460	PNBUF_PUT(path);
2461	if (pid != -`1`)
2462	mutex_exit(p->p_lock);
2463	return error;
2464	#else
2465	return `0`;
2466	#endif
2467	}
2468

Browse the source code of src/src/sys/kern/kern_proc.c