bpf.c source code [src/src/sys/net/bpf.c]

1	/ $NetBSD: bpf.c,v 1.203 2016/07/19 02:47:45 pgoyette Exp $ /
2
3	/*
4	* Copyright (c) 1990, 1991, 1993
5	* The Regents of the University of California. All rights reserved.
6	*
7	* This code is derived from the Stanford/CMU enet packet filter,
8	* (net/enet.c) distributed as part of 4.3BSD, and code contributed
9	* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
10	* Berkeley Laboratory.
11	*
12	* Redistribution and use in source and binary forms, with or without
13	* modification, are permitted provided that the following conditions
14	* are met:
15	* 1. Redistributions of source code must retain the above copyright
16	* notice, this list of conditions and the following disclaimer.
17	* 2. Redistributions in binary form must reproduce the above copyright
18	* notice, this list of conditions and the following disclaimer in the
19	* documentation and/or other materials provided with the distribution.
20	* 3. Neither the name of the University nor the names of its contributors
21	* may be used to endorse or promote products derived from this software
22	* without specific prior written permission.
23	*
24	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34	* SUCH DAMAGE.
35	*
36	* @(#)bpf.c 8.4 (Berkeley) 1/9/95
37	* static char rcsid[] =
38	* "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
39	*/
40
41	#include <sys/cdefs.h>
42	__KERNEL_RCSID(`0`, "$NetBSD: bpf.c,v 1.203 2016/07/19 02:47:45 pgoyette Exp $");
43
44	#if defined(_KERNEL_OPT)
45	#include "opt_bpf.h"
46	#include "sl.h"
47	#include "strip.h"
48	#endif
49
50	#include <sys/param.h>
51	#include <sys/systm.h>
52	#include <sys/mbuf.h>
53	#include <sys/buf.h>
54	#include <sys/time.h>
55	#include <sys/proc.h>
56	#include <sys/ioctl.h>
57	#include <sys/conf.h>
58	#include <sys/vnode.h>
59	#include <sys/queue.h>
60	#include <sys/stat.h>
61	#include <sys/module.h>
62	#include <sys/atomic.h>
63
64	#include <sys/file.h>
65	#include <sys/filedesc.h>
66	#include <sys/tty.h>
67	#include <sys/uio.h>
68
69	#include <sys/protosw.h>
70	#include <sys/socket.h>
71	#include <sys/errno.h>
72	#include <sys/kernel.h>
73	#include <sys/poll.h>
74	#include <sys/sysctl.h>
75	#include <sys/kauth.h>
76
77	#include <net/if.h>
78	#include <net/slip.h>
79
80	#include <net/bpf.h>
81	#include <net/bpfdesc.h>
82	#include <net/bpfjit.h>
83
84	#include <net/if_arc.h>
85	#include <net/if_ether.h>
86
87	#include <netinet/in.h>
88	#include <netinet/if_inarp.h>
89
90
91	#include <compat/sys/sockio.h>
92
93	#ifndef BPF_BUFSIZE
94	/*
95	* 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
96	* jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
97	*/
98	# define BPF_BUFSIZE 32768
99	#endif
100
101	#define PRINET 26 /* interruptible */
102
103	/*
104	* The default read buffer size, and limit for BIOCSBLEN, is sysctl'able.
105	* XXX the default values should be computed dynamically based
106	* on available memory size and available mbuf clusters.
107	*/
108	int bpf_bufsize = BPF_BUFSIZE;
109	int bpf_maxbufsize = BPF_DFLTBUFSIZE; / XXX set dynamically, see above /
110	bool bpf_jit = false;
111
112	struct bpfjit_ops bpfjit_module_ops = {
113	.bj_generate_code = NULL,
114	.bj_free_code = NULL
115	};
116
117	/*
118	* Global BPF statistics returned by net.bpf.stats sysctl.
119	*/
120	struct bpf_stat bpf_gstats;
121
122	/*
123	* Use a mutex to avoid a race condition between gathering the stats/peers
124	* and opening/closing the device.
125	*/
126	static kmutex_t bpf_mtx;
127
128	/*
129	* bpf_iflist is the list of interfaces; each corresponds to an ifnet
130	* bpf_dtab holds the descriptors, indexed by minor device #
131	*/
132	struct bpf_if *bpf_iflist;
133	LIST_HEAD(, bpf_d) bpf_list;
134
135	static int bpf_allocbufs(struct bpf_d *);
136	static void bpf_deliver(struct bpf_if *,
137	void (cpfn)(void , const* void *, size_t),
138	void , u_int, u_int, const* bool);
139	static void bpf_freed(struct bpf_d *);
140	static void bpf_ifname(struct ifnet , struct* ifreq *);
141	static void bpf_mcpy(void* , const* void *, size_t);
142	static int bpf_movein(struct uio , int*, uint64_t,
143	struct mbuf , struct** sockaddr *);
144	static void bpf_attachd(struct bpf_d , struct* bpf_if *);
145	static void bpf_detachd(struct bpf_d *);
146	static int bpf_setif(struct bpf_d , struct* ifreq *);
147	static void bpf_timed_out(void *);
148	static inline void
149	bpf_wakeup(struct bpf_d *);
150	static int bpf_hdrlen(struct bpf_d *);
151	static void catchpacket(struct bpf_d , u_char , u_int, u_int,
152	void ()(void , const* void , size_t), struct* timespec *);
153	static void reset_d(struct bpf_d *);
154	static int bpf_getdltlist(struct bpf_d , struct* bpf_dltlist *);
155	static int bpf_setdlt(struct bpf_d *, u_int);
156
157	static int bpf_read(struct file , off_t , struct uio *, kauth_cred_t,
158	int);
159	static int bpf_write(struct file , off_t , struct uio *, kauth_cred_t,
160	int);
161	static int bpf_ioctl(struct file , u_long, void* *);
162	static int bpf_poll(struct file , int*);
163	static int bpf_stat(struct file , struct* stat *);
164	static int bpf_close(struct file *);
165	static int bpf_kqfilter(struct file , struct* knote *);
166	static void bpf_softintr(void *);
167
168	static const struct fileops bpf_fileops = {
169	.fo_read = bpf_read,
170	.fo_write = bpf_write,
171	.fo_ioctl = bpf_ioctl,
172	.fo_fcntl = fnullop_fcntl,
173	.fo_poll = bpf_poll,
174	.fo_stat = bpf_stat,
175	.fo_close = bpf_close,
176	.fo_kqfilter = bpf_kqfilter,
177	.fo_restart = fnullop_restart,
178	};
179
180	dev_type_open(bpfopen);
181
182	const struct cdevsw bpf_cdevsw = {
183	.d_open = bpfopen,
184	.d_close = noclose,
185	.d_read = noread,
186	.d_write = nowrite,
187	.d_ioctl = noioctl,
188	.d_stop = nostop,
189	.d_tty = notty,
190	.d_poll = nopoll,
191	.d_mmap = nommap,
192	.d_kqfilter = nokqfilter,
193	.d_discard = nodiscard,
194	.d_flag = D_OTHER
195	};
196
197	bpfjit_func_t
198	bpf_jit_generate(bpf_ctx_t bc, void* *code, size_t size)
199	{
200
201	membar_consumer();
202	if (bpfjit_module_ops.bj_generate_code != NULL) {
203	return bpfjit_module_ops.bj_generate_code(bc, code, size);
204	}
205	return NULL;
206	}
207
208	void
209	bpf_jit_freecode(bpfjit_func_t jcode)
210	{
211	KASSERT(bpfjit_module_ops.bj_free_code != NULL);
212	bpfjit_module_ops.bj_free_code(jcode);
213	}
214
215	static int
216	bpf_movein(struct uio uio, int* linktype, uint64_t mtu, struct mbuf **mp,
217	struct sockaddr *sockp)
218	{
219	struct mbuf *m;
220	int error;
221	size_t len;
222	size_t hlen;
223	size_t align;
224
225	/*
226	* Build a sockaddr based on the data link layer type.
227	* We do this at this level because the ethernet header
228	* is copied directly into the data field of the sockaddr.
229	* In the case of SLIP, there is no header and the packet
230	* is forwarded as is.
231	* Also, we are careful to leave room at the front of the mbuf
232	* for the link level header.
233	*/
234	switch (linktype) {
235
236	case DLT_SLIP:
237	sockp->sa_family = AF_INET;
238	hlen = `0`;
239	align = `0`;
240	break;
241
242	case DLT_PPP:
243	sockp->sa_family = AF_UNSPEC;
244	hlen = `0`;
245	align = `0`;
246	break;
247
248	case DLT_EN10MB:
249	sockp->sa_family = AF_UNSPEC;
250	/ XXX Would MAXLINKHDR be better? /
251	/ 6(dst)+6(src)+2(type) /
252	hlen = sizeof(struct ether_header);
253	align = `2`;
254	break;
255
256	case DLT_ARCNET:
257	sockp->sa_family = AF_UNSPEC;
258	hlen = ARC_HDRLEN;
259	align = `5`;
260	break;
261
262	case DLT_FDDI:
263	sockp->sa_family = AF_LINK;
264	/ XXX 4(FORMAC)+6(dst)+6(src) /
265	hlen = `16`;
266	align = `0`;
267	break;
268
269	case DLT_ECONET:
270	sockp->sa_family = AF_UNSPEC;
271	hlen = `6`;
272	align = `2`;
273	break;
274
275	case DLT_NULL:
276	sockp->sa_family = AF_UNSPEC;
277	hlen = `0`;
278	align = `0`;
279	break;
280
281	default:
282	return (EIO);
283	}
284
285	len = uio->uio_resid;
286	/*
287	* If there aren't enough bytes for a link level header or the
288	* packet length exceeds the interface mtu, return an error.
289	*/
290	if (len - hlen > mtu)
291	return (EMSGSIZE);
292
293	/*
294	* XXX Avoid complicated buffer chaining ---
295	* bail if it won't fit in a single mbuf.
296	* (Take into account possible alignment bytes)
297	*/
298	if (len + align > MCLBYTES)
299	return (EIO);
300
301	m = m_gethdr(M_WAIT, MT_DATA);
302	m_reset_rcvif(m);
303	m->m_pkthdr.len = (int)(len - hlen);
304	if (len + align > MHLEN) {
305	m_clget(m, M_WAIT);
306	if ((m->m_flags & M_EXT) == `0`) {
307	error = ENOBUFS;
308	goto bad;
309	}
310	}
311
312	/ Insure the data is properly aligned /
313	if (align > `0`) {
314	m->m_data += align;
315	m->m_len -= (int)align;
316	}
317
318	error = uiomove(mtod(m, void *), len, uio);
319	if (error)
320	goto bad;
321	if (hlen != `0`) {
322	memcpy(sockp->sa_data, mtod(m, void *), hlen);
323	m->m_data += hlen; / XXX /
324	len -= hlen;
325	}
326	m->m_len = (int)len;
327	*mp = m;
328	return (`0`);
329
330	bad:
331	m_freem(m);
332	return (error);
333	}
334
335	/*
336	* Attach file to the bpf interface, i.e. make d listen on bp.
337	* Must be called at splnet.
338	*/
339	static void
340	bpf_attachd(struct bpf_d d, struct* bpf_if *bp)
341	{
342	KASSERT(mutex_owned(&bpf_mtx));
343	/*
344	* Point d at bp, and add d to the interface's list of listeners.
345	* Finally, point the driver's bpf cookie at the interface so
346	* it will divert packets to bpf.
347	*/
348	d->bd_bif = bp;
349	d->bd_next = bp->bif_dlist;
350	bp->bif_dlist = d;
351
352	*bp->bif_driverp = bp;
353	}
354
355	/*
356	* Detach a file from its interface.
357	*/
358	static void
359	bpf_detachd(struct bpf_d *d)
360	{
361	struct bpf_d **p;
362	struct bpf_if *bp;
363
364	KASSERT(mutex_owned(&bpf_mtx));
365
366	bp = d->bd_bif;
367	/*
368	* Check if this descriptor had requested promiscuous mode.
369	* If so, turn it off.
370	*/
371	if (d->bd_promisc) {
372	int error __diagused;
373
374	d->bd_promisc = `0`;
375	/*
376	* Take device out of promiscuous mode. Since we were
377	* able to enter promiscuous mode, we should be able
378	* to turn it off. But we can get an error if
379	* the interface was configured down, so only panic
380	* if we don't get an unexpected error.
381	*/
382	error = ifpromisc(bp->bif_ifp, `0`);
383	#ifdef DIAGNOSTIC
384	if (error)
385	printf("%s: ifpromisc failed: %d", __func__, error);
386	#endif
387	}
388	/ Remove d from the interface's descriptor list. /
389	p = &bp->bif_dlist;
390	while (*p != d) {
391	p = &(*p)->bd_next;
392	if (*p == NULL)
393	panic("%s: descriptor not in list", __func__);
394	}
395	p = (p)->bd_next;
396	if (bp->bif_dlist == NULL)
397	/*
398	* Let the driver know that there are no more listeners.
399	*/
400	*d->bd_bif->bif_driverp = NULL;
401	d->bd_bif = NULL;
402	}
403
404	static void
405	bpf_init(void)
406	{
407
408	mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE);
409
410	LIST_INIT(&bpf_list);
411
412	bpf_gstats.bs_recv = `0`;
413	bpf_gstats.bs_drop = `0`;
414	bpf_gstats.bs_capt = `0`;
415
416	return;
417	}
418
419	/*
420	* bpfilterattach() is called at boot time. We don't need to do anything
421	* here, since any initialization will happen as part of module init code.
422	*/
423	/ ARGSUSED /
424	void
425	bpfilterattach(int n)
426	{
427
428	}
429
430	/*
431	* Open ethernet device. Clones.
432	*/
433	/ ARGSUSED /
434	int
435	bpfopen(dev_t dev, int flag, int mode, struct lwp *l)
436	{
437	struct bpf_d *d;
438	struct file *fp;
439	int error, fd;
440
441	/ falloc() will fill in the descriptor for us. /
442	if ((error = fd_allocfile(&fp, &fd)) != `0`)
443	return error;
444
445	d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK\|M_ZERO);
446	d->bd_bufsize = bpf_bufsize;
447	d->bd_seesent = `1`;
448	d->bd_feedback = `0`;
449	d->bd_pid = l->l_proc->p_pid;
450	#ifdef _LP64
451	if (curproc->p_flag & PK_32)
452	d->bd_compat32 = `1`;
453	#endif
454	getnanotime(&d->bd_btime);
455	d->bd_atime = d->bd_mtime = d->bd_btime;
456	callout_init(&d->bd_callout, `0`);
457	selinit(&d->bd_sel);
458	d->bd_sih = softint_establish(SOFTINT_CLOCK, bpf_softintr, d);
459	d->bd_jitcode = NULL;
460
461	mutex_enter(&bpf_mtx);
462	LIST_INSERT_HEAD(&bpf_list, d, bd_list);
463	mutex_exit(&bpf_mtx);
464
465	return fd_clone(fp, fd, flag, &bpf_fileops, d);
466	}
467
468	/*
469	* Close the descriptor by detaching it from its interface,
470	* deallocating its buffers, and marking it free.
471	*/
472	/ ARGSUSED /
473	static int
474	bpf_close(struct file *fp)
475	{
476	struct bpf_d *d;
477	int s;
478
479	KERNEL_LOCK(`1`, NULL);
480	mutex_enter(&bpf_mtx);
481
482	if ((d = fp->f_bpf) == NULL) {
483	mutex_exit(&bpf_mtx);
484	KERNEL_UNLOCK_ONE(NULL);
485	return `0`;
486	}
487
488	/*
489	* Refresh the PID associated with this bpf file.
490	*/
491	d->bd_pid = curproc->p_pid;
492
493	s = splnet();
494	if (d->bd_state == BPF_WAITING)
495	callout_stop(&d->bd_callout);
496	d->bd_state = BPF_IDLE;
497	if (d->bd_bif)
498	bpf_detachd(d);
499	splx(s);
500	bpf_freed(d);
501	LIST_REMOVE(d, bd_list);
502	fp->f_bpf = NULL;
503
504	mutex_exit(&bpf_mtx);
505	KERNEL_UNLOCK_ONE(NULL);
506
507	callout_destroy(&d->bd_callout);
508	seldestroy(&d->bd_sel);
509	softint_disestablish(d->bd_sih);
510	free(d, M_DEVBUF);
511
512	return (`0`);
513	}
514
515	/*
516	* Rotate the packet buffers in descriptor d. Move the store buffer
517	* into the hold slot, and the free buffer into the store slot.
518	* Zero the length of the new store buffer.
519	*/
520	#define ROTATE_BUFFERS(d) \
521	(d)->bd_hbuf = (d)->bd_sbuf; \
522	(d)->bd_hlen = (d)->bd_slen; \
523	(d)->bd_sbuf = (d)->bd_fbuf; \
524	(d)->bd_slen = 0; \
525	(d)->bd_fbuf = NULL;
526	/*
527	* bpfread - read next chunk of packets from buffers
528	*/
529	static int
530	bpf_read(struct file fp, off_t offp, struct uio *uio,
531	kauth_cred_t cred, int flags)
532	{
533	struct bpf_d *d = fp->f_bpf;
534	int timed_out;
535	int error;
536	int s;
537
538	getnanotime(&d->bd_atime);
539	/*
540	* Restrict application to use a buffer the same size as
541	* the kernel buffers.
542	*/
543	if (uio->uio_resid != d->bd_bufsize)
544	return (EINVAL);
545
546	KERNEL_LOCK(`1`, NULL);
547	s = splnet();
548	if (d->bd_state == BPF_WAITING)
549	callout_stop(&d->bd_callout);
550	timed_out = (d->bd_state == BPF_TIMED_OUT);
551	d->bd_state = BPF_IDLE;
552	/*
553	* If the hold buffer is empty, then do a timed sleep, which
554	* ends when the timeout expires or when enough packets
555	* have arrived to fill the store buffer.
556	*/
557	while (d->bd_hbuf == NULL) {
558	if (fp->f_flag & FNONBLOCK) {
559	if (d->bd_slen == `0`) {
560	splx(s);
561	KERNEL_UNLOCK_ONE(NULL);
562	return (EWOULDBLOCK);
563	}
564	ROTATE_BUFFERS(d);
565	break;
566	}
567
568	if ((d->bd_immediate \|\| timed_out) && d->bd_slen != `0`) {
569	/*
570	* A packet(s) either arrived since the previous
571	* read or arrived while we were asleep.
572	* Rotate the buffers and return what's here.
573	*/
574	ROTATE_BUFFERS(d);
575	break;
576	}
577	error = tsleep(d, PRINET\|PCATCH, "bpf",
578	d->bd_rtout);
579	if (error == EINTR \|\| error == ERESTART) {
580	splx(s);
581	KERNEL_UNLOCK_ONE(NULL);
582	return (error);
583	}
584	if (error == EWOULDBLOCK) {
585	/*
586	* On a timeout, return what's in the buffer,
587	* which may be nothing. If there is something
588	* in the store buffer, we can rotate the buffers.
589	*/
590	if (d->bd_hbuf)
591	/*
592	* We filled up the buffer in between
593	* getting the timeout and arriving
594	* here, so we don't need to rotate.
595	*/
596	break;
597
598	if (d->bd_slen == `0`) {
599	splx(s);
600	KERNEL_UNLOCK_ONE(NULL);
601	return (`0`);
602	}
603	ROTATE_BUFFERS(d);
604	break;
605	}
606	if (error != `0`)
607	goto done;
608	}
609	/*
610	* At this point, we know we have something in the hold slot.
611	*/
612	splx(s);
613
614	/*
615	* Move data from hold buffer into user space.
616	* We know the entire buffer is transferred since
617	* we checked above that the read buffer is bpf_bufsize bytes.
618	*/
619	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
620
621	s = splnet();
622	d->bd_fbuf = d->bd_hbuf;
623	d->bd_hbuf = NULL;
624	d->bd_hlen = `0`;
625	done:
626	splx(s);
627	KERNEL_UNLOCK_ONE(NULL);
628	return (error);
629	}
630
631
632	/*
633	* If there are processes sleeping on this descriptor, wake them up.
634	*/
635	static inline void
636	bpf_wakeup(struct bpf_d *d)
637	{
638	wakeup(d);
639	if (d->bd_async)
640	softint_schedule(d->bd_sih);
641	selnotify(&d->bd_sel, `0`, `0`);
642	}
643
644	static void
645	bpf_softintr(void *cookie)
646	{
647	struct bpf_d *d;
648
649	d = cookie;
650	if (d->bd_async)
651	fownsignal(d->bd_pgid, SIGIO, `0`, `0`, NULL);
652	}
653
654	static void
655	bpf_timed_out(void *arg)
656	{
657	struct bpf_d *d = arg;
658	int s;
659
660	s = splnet();
661	if (d->bd_state == BPF_WAITING) {
662	d->bd_state = BPF_TIMED_OUT;
663	if (d->bd_slen != `0`)
664	bpf_wakeup(d);
665	}
666	splx(s);
667	}
668
669
670	static int
671	bpf_write(struct file fp, off_t offp, struct uio *uio,
672	kauth_cred_t cred, int flags)
673	{
674	struct bpf_d *d = fp->f_bpf;
675	struct ifnet *ifp;
676	struct mbuf m, mc;
677	int error, s;
678	static struct sockaddr_storage dst;
679
680	m = NULL; / XXX gcc /
681
682	KERNEL_LOCK(`1`, NULL);
683
684	if (d->bd_bif == NULL) {
685	KERNEL_UNLOCK_ONE(NULL);
686	return (ENXIO);
687	}
688	getnanotime(&d->bd_mtime);
689
690	ifp = d->bd_bif->bif_ifp;
691
692	if (uio->uio_resid == `0`) {
693	KERNEL_UNLOCK_ONE(NULL);
694	return (`0`);
695	}
696
697	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m,
698	(struct sockaddr *) &dst);
699	if (error) {
700	KERNEL_UNLOCK_ONE(NULL);
701	return (error);
702	}
703
704	if (m->m_pkthdr.len > ifp->if_mtu) {
705	KERNEL_UNLOCK_ONE(NULL);
706	m_freem(m);
707	return (EMSGSIZE);
708	}
709
710	if (d->bd_hdrcmplt)
711	dst.ss_family = pseudo_AF_HDRCMPLT;
712
713	if (d->bd_feedback) {
714	mc = m_dup(m, `0`, M_COPYALL, M_NOWAIT);
715	if (mc != NULL)
716	m_set_rcvif(mc, ifp);
717	/ Set M_PROMISC for outgoing packets to be discarded. /
718	if (`1` /d->bd_direction == BPF_D_INOUT/)
719	m->m_flags \|= M_PROMISC;
720	} else
721	mc = NULL;
722
723	s = splsoftnet();
724	error = if_output_lock(ifp, ifp, m, (struct sockaddr *) &dst, NULL);
725
726	if (mc != NULL) {
727	if (error == `0`)
728	ifp->_if_input(ifp, mc);
729	else
730	m_freem(mc);
731	}
732	splx(s);
733	KERNEL_UNLOCK_ONE(NULL);
734	/*
735	* The driver frees the mbuf.
736	*/
737	return (error);
738	}
739
740	/*
741	* Reset a descriptor by flushing its packet buffer and clearing the
742	* receive and drop counts. Should be called at splnet.
743	*/
744	static void
745	reset_d(struct bpf_d *d)
746	{
747	if (d->bd_hbuf) {
748	/ Free the hold buffer. /
749	d->bd_fbuf = d->bd_hbuf;
750	d->bd_hbuf = NULL;
751	}
752	d->bd_slen = `0`;
753	d->bd_hlen = `0`;
754	d->bd_rcount = `0`;
755	d->bd_dcount = `0`;
756	d->bd_ccount = `0`;
757	}
758
759	/*
760	* FIONREAD Check for read packet available.
761	* BIOCGBLEN Get buffer len [for read()].
762	* BIOCSETF Set ethernet read filter.
763	* BIOCFLUSH Flush read packet buffer.
764	* BIOCPROMISC Put interface into promiscuous mode.
765	* BIOCGDLT Get link layer type.
766	* BIOCGETIF Get interface name.
767	* BIOCSETIF Set interface.
768	* BIOCSRTIMEOUT Set read timeout.
769	* BIOCGRTIMEOUT Get read timeout.
770	* BIOCGSTATS Get packet stats.
771	* BIOCIMMEDIATE Set immediate mode.
772	* BIOCVERSION Get filter language version.
773	* BIOCGHDRCMPLT Get "header already complete" flag.
774	* BIOCSHDRCMPLT Set "header already complete" flag.
775	* BIOCSFEEDBACK Set packet feedback mode.
776	* BIOCGFEEDBACK Get packet feedback mode.
777	* BIOCGSEESENT Get "see sent packets" mode.
778	* BIOCSSEESENT Set "see sent packets" mode.
779	*/
780	/ ARGSUSED /
781	static int
782	bpf_ioctl(struct file fp, u_long cmd, void* *addr)
783	{
784	struct bpf_d *d = fp->f_bpf;
785	int s, error = `0`;
786
787	/*
788	* Refresh the PID associated with this bpf file.
789	*/
790	KERNEL_LOCK(`1`, NULL);
791	d->bd_pid = curproc->p_pid;
792	#ifdef _LP64
793	if (curproc->p_flag & PK_32)
794	d->bd_compat32 = `1`;
795	else
796	d->bd_compat32 = `0`;
797	#endif
798
799	s = splnet();
800	if (d->bd_state == BPF_WAITING)
801	callout_stop(&d->bd_callout);
802	d->bd_state = BPF_IDLE;
803	splx(s);
804
805	switch (cmd) {
806
807	default:
808	error = EINVAL;
809	break;
810
811	/*
812	* Check for read packet available.
813	*/
814	case FIONREAD:
815	{
816	int n;
817
818	s = splnet();
819	n = d->bd_slen;
820	if (d->bd_hbuf)
821	n += d->bd_hlen;
822	splx(s);
823
824	(int* *)addr = n;
825	break;
826	}
827
828	/*
829	* Get buffer len [for read()].
830	*/
831	case BIOCGBLEN:
832	(u_int )addr = d->bd_bufsize;
833	break;
834
835	/*
836	* Set buffer length.
837	*/
838	case BIOCSBLEN:
839	if (d->bd_bif != NULL)
840	error = EINVAL;
841	else {
842	u_int size = (u_int )addr;
843
844	if (size > bpf_maxbufsize)
845	(u_int )addr = size = bpf_maxbufsize;
846	else if (size < BPF_MINBUFSIZE)
847	(u_int )addr = size = BPF_MINBUFSIZE;
848	d->bd_bufsize = size;
849	}
850	break;
851
852	/*
853	* Set link layer read filter.
854	*/
855	case BIOCSETF:
856	error = bpf_setf(d, addr);
857	break;
858
859	/*
860	* Flush read packet buffer.
861	*/
862	case BIOCFLUSH:
863	s = splnet();
864	reset_d(d);
865	splx(s);
866	break;
867
868	/*
869	* Put interface into promiscuous mode.
870	*/
871	case BIOCPROMISC:
872	if (d->bd_bif == NULL) {
873	/*
874	* No interface attached yet.
875	*/
876	error = EINVAL;
877	break;
878	}
879	s = splnet();
880	if (d->bd_promisc == `0`) {
881	error = ifpromisc(d->bd_bif->bif_ifp, `1`);
882	if (error == `0`)
883	d->bd_promisc = `1`;
884	}
885	splx(s);
886	break;
887
888	/*
889	* Get device parameters.
890	*/
891	case BIOCGDLT:
892	if (d->bd_bif == NULL)
893	error = EINVAL;
894	else
895	(u_int )addr = d->bd_bif->bif_dlt;
896	break;
897
898	/*
899	* Get a list of supported device parameters.
900	*/
901	case BIOCGDLTLIST:
902	if (d->bd_bif == NULL)
903	error = EINVAL;
904	else
905	error = bpf_getdltlist(d, addr);
906	break;
907
908	/*
909	* Set device parameters.
910	*/
911	case BIOCSDLT:
912	mutex_enter(&bpf_mtx);
913	if (d->bd_bif == NULL)
914	error = EINVAL;
915	else
916	error = bpf_setdlt(d, (u_int )addr);
917	mutex_exit(&bpf_mtx);
918	break;
919
920	/*
921	* Set interface name.
922	*/
923	#ifdef OBIOCGETIF
924	case OBIOCGETIF:
925	#endif
926	case BIOCGETIF:
927	if (d->bd_bif == NULL)
928	error = EINVAL;
929	else
930	bpf_ifname(d->bd_bif->bif_ifp, addr);
931	break;
932
933	/*
934	* Set interface.
935	*/
936	#ifdef OBIOCSETIF
937	case OBIOCSETIF:
938	#endif
939	case BIOCSETIF:
940	mutex_enter(&bpf_mtx);
941	error = bpf_setif(d, addr);
942	mutex_exit(&bpf_mtx);
943	break;
944
945	/*
946	* Set read timeout.
947	*/
948	case BIOCSRTIMEOUT:
949	{
950	struct timeval *tv = addr;
951
952	/ Compute number of ticks. /
953	d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
954	if ((d->bd_rtout == `0`) && (tv->tv_usec != `0`))
955	d->bd_rtout = `1`;
956	break;
957	}
958
959	#ifdef BIOCGORTIMEOUT
960	/*
961	* Get read timeout.
962	*/
963	case BIOCGORTIMEOUT:
964	{
965	struct timeval50 *tv = addr;
966
967	tv->tv_sec = d->bd_rtout / hz;
968	tv->tv_usec = (d->bd_rtout % hz) * tick;
969	break;
970	}
971	#endif
972
973	#ifdef BIOCSORTIMEOUT
974	/*
975	* Set read timeout.
976	*/
977	case BIOCSORTIMEOUT:
978	{
979	struct timeval50 *tv = addr;
980
981	/ Compute number of ticks. /
982	d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
983	if ((d->bd_rtout == `0`) && (tv->tv_usec != `0`))
984	d->bd_rtout = `1`;
985	break;
986	}
987	#endif
988
989	/*
990	* Get read timeout.
991	*/
992	case BIOCGRTIMEOUT:
993	{
994	struct timeval *tv = addr;
995
996	tv->tv_sec = d->bd_rtout / hz;
997	tv->tv_usec = (d->bd_rtout % hz) * tick;
998	break;
999	}
1000	/*
1001	* Get packet stats.
1002	*/
1003	case BIOCGSTATS:
1004	{
1005	struct bpf_stat *bs = addr;
1006
1007	bs->bs_recv = d->bd_rcount;
1008	bs->bs_drop = d->bd_dcount;
1009	bs->bs_capt = d->bd_ccount;
1010	break;
1011	}
1012
1013	case BIOCGSTATSOLD:
1014	{
1015	struct bpf_stat_old *bs = addr;
1016
1017	bs->bs_recv = d->bd_rcount;
1018	bs->bs_drop = d->bd_dcount;
1019	break;
1020	}
1021
1022	/*
1023	* Set immediate mode.
1024	*/
1025	case BIOCIMMEDIATE:
1026	d->bd_immediate = (u_int )addr;
1027	break;
1028
1029	case BIOCVERSION:
1030	{
1031	struct bpf_version *bv = addr;
1032
1033	bv->bv_major = BPF_MAJOR_VERSION;
1034	bv->bv_minor = BPF_MINOR_VERSION;
1035	break;
1036	}
1037
1038	case BIOCGHDRCMPLT: / get "header already complete" flag /
1039	(u_int )addr = d->bd_hdrcmplt;
1040	break;
1041
1042	case BIOCSHDRCMPLT: / set "header already complete" flag /
1043	d->bd_hdrcmplt = (u_int )addr ? `1` : `0`;
1044	break;
1045
1046	/*
1047	* Get "see sent packets" flag
1048	*/
1049	case BIOCGSEESENT:
1050	(u_int )addr = d->bd_seesent;
1051	break;
1052
1053	/*
1054	* Set "see sent" packets flag
1055	*/
1056	case BIOCSSEESENT:
1057	d->bd_seesent = (u_int )addr;
1058	break;
1059
1060	/*
1061	* Set "feed packets from bpf back to input" mode
1062	*/
1063	case BIOCSFEEDBACK:
1064	d->bd_feedback = (u_int )addr;
1065	break;
1066
1067	/*
1068	* Get "feed packets from bpf back to input" mode
1069	*/
1070	case BIOCGFEEDBACK:
1071	(u_int )addr = d->bd_feedback;
1072	break;
1073
1074	case FIONBIO: / Non-blocking I/O /
1075	/*
1076	* No need to do anything special as we use IO_NDELAY in
1077	* bpfread() as an indication of whether or not to block
1078	* the read.
1079	*/
1080	break;
1081
1082	case FIOASYNC: / Send signal on receive packets /
1083	d->bd_async = (int* *)addr;
1084	break;
1085
1086	case TIOCSPGRP: / Process or group to send signals to /
1087	case FIOSETOWN:
1088	error = fsetown(&d->bd_pgid, cmd, addr);
1089	break;
1090
1091	case TIOCGPGRP:
1092	case FIOGETOWN:
1093	error = fgetown(d->bd_pgid, cmd, addr);
1094	break;
1095	}
1096	KERNEL_UNLOCK_ONE(NULL);
1097	return (error);
1098	}
1099
1100	/*
1101	* Set d's packet filter program to fp. If this file already has a filter,
1102	* free it and replace it. Returns EINVAL for bogus requests.
1103	*/
1104	int
1105	bpf_setf(struct bpf_d d, struct* bpf_program *fp)
1106	{
1107	struct bpf_insn fcode, old;
1108	bpfjit_func_t jcode, oldj;
1109	size_t flen, size;
1110	int s;
1111
1112	jcode = NULL;
1113	flen = fp->bf_len;
1114
1115	if ((fp->bf_insns == NULL && flen) \|\| flen > BPF_MAXINSNS) {
1116	return EINVAL;
1117	}
1118
1119	if (flen) {
1120	/*
1121	* Allocate the buffer, copy the byte-code from
1122	* userspace and validate it.
1123	*/
1124	size = flen * sizeof(*fp->bf_insns);
1125	fcode = malloc(size, M_DEVBUF, M_WAITOK);
1126	if (copyin(fp->bf_insns, fcode, size) != `0` \|\|
1127	!bpf_validate(fcode, (int)flen)) {
1128	free(fcode, M_DEVBUF);
1129	return EINVAL;
1130	}
1131	membar_consumer();
1132	if (bpf_jit)
1133	jcode = bpf_jit_generate(NULL, fcode, flen);
1134	} else {
1135	fcode = NULL;
1136	}
1137
1138	s = splnet();
1139	old = d->bd_filter;
1140	d->bd_filter = fcode;
1141	oldj = d->bd_jitcode;
1142	d->bd_jitcode = jcode;
1143	reset_d(d);
1144	splx(s);
1145
1146	if (old) {
1147	free(old, M_DEVBUF);
1148	}
1149	if (oldj) {
1150	bpf_jit_freecode(oldj);
1151	}
1152
1153	return `0`;
1154	}
1155
1156	/*
1157	* Detach a file from its current interface (if attached at all) and attach
1158	* to the interface indicated by the name stored in ifr.
1159	* Return an errno or 0.
1160	*/
1161	static int
1162	bpf_setif(struct bpf_d d, struct* ifreq *ifr)
1163	{
1164	struct bpf_if *bp;
1165	char *cp;
1166	int unit_seen, i, s, error;
1167
1168	KASSERT(mutex_owned(&bpf_mtx));
1169	/*
1170	* Make sure the provided name has a unit number, and default
1171	* it to '0' if not specified.
1172	* XXX This is ugly ... do this differently?
1173	*/
1174	unit_seen = `0`;
1175	cp = ifr->ifr_name;
1176	cp[sizeof(ifr->ifr_name) - `1`] = `'\0'`; / sanity /
1177	while (*cp++)
1178	if (cp >= `'0'` && cp <= `'9'`)
1179	unit_seen = `1`;
1180	if (!unit_seen) {
1181	/ Make sure to leave room for the '\0'. /
1182	for (i = `0`; i < (IFNAMSIZ - `1`); ++i) {
1183	if ((ifr->ifr_name[i] >= `'a'` &&
1184	ifr->ifr_name[i] <= `'z'`) \|\|
1185	(ifr->ifr_name[i] >= `'A'` &&
1186	ifr->ifr_name[i] <= `'Z'`))
1187	continue;
1188	ifr->ifr_name[i] = `'0'`;
1189	}
1190	}
1191
1192	/*
1193	* Look through attached interfaces for the named one.
1194	*/
1195	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1196	struct ifnet *ifp = bp->bif_ifp;
1197
1198	if (ifp == NULL \|\|
1199	strcmp(ifp->if_xname, ifr->ifr_name) != `0`)
1200	continue;
1201	/ skip additional entry /
1202	if (bp->bif_driverp != &ifp->if_bpf)
1203	continue;
1204	/*
1205	* We found the requested interface.
1206	* Allocate the packet buffers if we need to.
1207	* If we're already attached to requested interface,
1208	* just flush the buffer.
1209	*/
1210	if (d->bd_sbuf == NULL) {
1211	error = bpf_allocbufs(d);
1212	if (error != `0`)
1213	return (error);
1214	}
1215	s = splnet();
1216	if (bp != d->bd_bif) {
1217	if (d->bd_bif)
1218	/*
1219	* Detach if attached to something else.
1220	*/
1221	bpf_detachd(d);
1222
1223	bpf_attachd(d, bp);
1224	}
1225	reset_d(d);
1226	splx(s);
1227	return (`0`);
1228	}
1229	/ Not found. /
1230	return (ENXIO);
1231	}
1232
1233	/*
1234	* Copy the interface name to the ifreq.
1235	*/
1236	static void
1237	bpf_ifname(struct ifnet ifp, struct* ifreq *ifr)
1238	{
1239	memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
1240	}
1241
1242	static int
1243	bpf_stat(struct file fp, struct* stat *st)
1244	{
1245	struct bpf_d *d = fp->f_bpf;
1246
1247	(void)memset(st, `0`, sizeof(*st));
1248	KERNEL_LOCK(`1`, NULL);
1249	st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid);
1250	st->st_atimespec = d->bd_atime;
1251	st->st_mtimespec = d->bd_mtime;
1252	st->st_ctimespec = st->st_birthtimespec = d->bd_btime;
1253	st->st_uid = kauth_cred_geteuid(fp->f_cred);
1254	st->st_gid = kauth_cred_getegid(fp->f_cred);
1255	st->st_mode = S_IFCHR;
1256	KERNEL_UNLOCK_ONE(NULL);
1257	return `0`;
1258	}
1259
1260	/*
1261	* Support for poll() system call
1262	*
1263	* Return true iff the specific operation will not block indefinitely - with
1264	* the assumption that it is safe to positively acknowledge a request for the
1265	* ability to write to the BPF device.
1266	* Otherwise, return false but make a note that a selnotify() must be done.
1267	*/
1268	static int
1269	bpf_poll(struct file fp, int* events)
1270	{
1271	struct bpf_d *d = fp->f_bpf;
1272	int s = splnet();
1273	int revents;
1274
1275	/*
1276	* Refresh the PID associated with this bpf file.
1277	*/
1278	KERNEL_LOCK(`1`, NULL);
1279	d->bd_pid = curproc->p_pid;
1280
1281	revents = events & (POLLOUT \| POLLWRNORM);
1282	if (events & (POLLIN \| POLLRDNORM)) {
1283	/*
1284	* An imitation of the FIONREAD ioctl code.
1285	*/
1286	if (d->bd_hlen != `0` \|\|
1287	((d->bd_immediate \|\| d->bd_state == BPF_TIMED_OUT) &&
1288	d->bd_slen != `0`)) {
1289	revents \|= events & (POLLIN \| POLLRDNORM);
1290	} else {
1291	selrecord(curlwp, &d->bd_sel);
1292	/ Start the read timeout if necessary /
1293	if (d->bd_rtout > `0` && d->bd_state == BPF_IDLE) {
1294	callout_reset(&d->bd_callout, d->bd_rtout,
1295	bpf_timed_out, d);
1296	d->bd_state = BPF_WAITING;
1297	}
1298	}
1299	}
1300
1301	KERNEL_UNLOCK_ONE(NULL);
1302	splx(s);
1303	return (revents);
1304	}
1305
1306	static void
1307	filt_bpfrdetach(struct knote *kn)
1308	{
1309	struct bpf_d *d = kn->kn_hook;
1310	int s;
1311
1312	KERNEL_LOCK(`1`, NULL);
1313	s = splnet();
1314	SLIST_REMOVE(&d->bd_sel.sel_klist, kn, knote, kn_selnext);
1315	splx(s);
1316	KERNEL_UNLOCK_ONE(NULL);
1317	}
1318
1319	static int
1320	filt_bpfread(struct knote kn, long* hint)
1321	{
1322	struct bpf_d *d = kn->kn_hook;
1323	int rv;
1324
1325	KERNEL_LOCK(`1`, NULL);
1326	kn->kn_data = d->bd_hlen;
1327	if (d->bd_immediate)
1328	kn->kn_data += d->bd_slen;
1329	rv = (kn->kn_data > `0`);
1330	KERNEL_UNLOCK_ONE(NULL);
1331	return rv;
1332	}
1333
1334	static const struct filterops bpfread_filtops =
1335	{ `1`, NULL, filt_bpfrdetach, filt_bpfread };
1336
1337	static int
1338	bpf_kqfilter(struct file fp, struct* knote *kn)
1339	{
1340	struct bpf_d *d = fp->f_bpf;
1341	struct klist *klist;
1342	int s;
1343
1344	KERNEL_LOCK(`1`, NULL);
1345
1346	switch (kn->kn_filter) {
1347	case EVFILT_READ:
1348	klist = &d->bd_sel.sel_klist;
1349	kn->kn_fop = &bpfread_filtops;
1350	break;
1351
1352	default:
1353	KERNEL_UNLOCK_ONE(NULL);
1354	return (EINVAL);
1355	}
1356
1357	kn->kn_hook = d;
1358
1359	s = splnet();
1360	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1361	splx(s);
1362	KERNEL_UNLOCK_ONE(NULL);
1363
1364	return (`0`);
1365	}
1366
1367	/*
1368	* Copy data from an mbuf chain into a buffer. This code is derived
1369	* from m_copydata in sys/uipc_mbuf.c.
1370	*/
1371	static void *
1372	bpf_mcpy(void dst_arg, const* void *src_arg, size_t len)
1373	{
1374	const struct mbuf *m;
1375	u_int count;
1376	u_char *dst;
1377
1378	m = src_arg;
1379	dst = dst_arg;
1380	while (len > `0`) {
1381	if (m == NULL)
1382	panic("bpf_mcpy");
1383	count = min(m->m_len, len);
1384	memcpy(dst, mtod(m, const void *), count);
1385	m = m->m_next;
1386	dst += count;
1387	len -= count;
1388	}
1389	return dst_arg;
1390	}
1391
1392	/*
1393	* Dispatch a packet to all the listeners on interface bp.
1394	*
1395	* pkt pointer to the packet, either a data buffer or an mbuf chain
1396	* buflen buffer length, if pkt is a data buffer
1397	* cpfn a function that can copy pkt into the listener's buffer
1398	* pktlen length of the packet
1399	* rcv true if packet came in
1400	*/
1401	static inline void
1402	bpf_deliver(struct bpf_if bp, void* (cpfn)(void , const* void *, size_t),
1403	void pkt, u_int pktlen, u_int buflen, const* bool rcv)
1404	{
1405	uint32_t mem[BPF_MEMWORDS];
1406	bpf_args_t args = {
1407	.pkt = (const uint8_t *)pkt,
1408	.wirelen = pktlen,
1409	.buflen = buflen,
1410	.mem = mem,
1411	.arg = NULL
1412	};
1413	bool gottime = false;
1414	struct timespec ts;
1415
1416	/*
1417	* Note that the IPL does not have to be raised at this point.
1418	* The only problem that could arise here is that if two different
1419	* interfaces shared any data. This is not the case.
1420	*/
1421	for (struct bpf_d *d = bp->bif_dlist; d != NULL; d = d->bd_next) {
1422	u_int slen;
1423
1424	if (!d->bd_seesent && !rcv) {
1425	continue;
1426	}
1427	d->bd_rcount++;
1428	bpf_gstats.bs_recv++;
1429
1430	if (d->bd_jitcode)
1431	slen = d->bd_jitcode(NULL, &args);
1432	else
1433	slen = bpf_filter_ext(NULL, d->bd_filter, &args);
1434
1435	if (!slen) {
1436	continue;
1437	}
1438	if (!gottime) {
1439	gottime = true;
1440	nanotime(&ts);
1441	}
1442	catchpacket(d, pkt, pktlen, slen, cpfn, &ts);
1443	}
1444	}
1445
1446	/*
1447	* Incoming linkage from device drivers. Process the packet pkt, of length
1448	* pktlen, which is stored in a contiguous buffer. The packet is parsed
1449	* by each process' filter, and if accepted, stashed into the corresponding
1450	* buffer.
1451	*/
1452	static void
1453	_bpf_tap(struct bpf_if bp, u_char pkt, u_int pktlen)
1454	{
1455
1456	bpf_deliver(bp, memcpy, pkt, pktlen, pktlen, true);
1457	}
1458
1459	/*
1460	* Incoming linkage from device drivers, when the head of the packet is in
1461	* a buffer, and the tail is in an mbuf chain.
1462	*/
1463	static void
1464	_bpf_mtap2(struct bpf_if bp, void* data, u_int dlen, struct* mbuf *m)
1465	{
1466	u_int pktlen;
1467	struct mbuf mb;
1468
1469	/ Skip outgoing duplicate packets. /
1470	if ((m->m_flags & M_PROMISC) != `0` && m->m_pkthdr.rcvif_index == `0`) {
1471	m->m_flags &= ~M_PROMISC;
1472	return;
1473	}
1474
1475	pktlen = m_length(m) + dlen;
1476
1477	/*
1478	* Craft on-stack mbuf suitable for passing to bpf_filter.
1479	* Note that we cut corners here; we only setup what's
1480	* absolutely needed--this mbuf should never go anywhere else.
1481	*/
1482	(void)memset(&mb, `0`, sizeof(mb));
1483	mb.m_next = m;
1484	mb.m_data = data;
1485	mb.m_len = dlen;
1486
1487	bpf_deliver(bp, bpf_mcpy, &mb, pktlen, `0`, m->m_pkthdr.rcvif_index != `0`);
1488	}
1489
1490	/*
1491	* Incoming linkage from device drivers, when packet is in an mbuf chain.
1492	*/
1493	static void
1494	_bpf_mtap(struct bpf_if bp, struct* mbuf *m)
1495	{
1496	void (cpfn)(void , const* void *, size_t);
1497	u_int pktlen, buflen;
1498	void *marg;
1499
1500	/ Skip outgoing duplicate packets. /
1501	if ((m->m_flags & M_PROMISC) != `0` && m->m_pkthdr.rcvif_index == `0`) {
1502	m->m_flags &= ~M_PROMISC;
1503	return;
1504	}
1505
1506	pktlen = m_length(m);
1507
1508	if (pktlen == m->m_len) {
1509	cpfn = (void *)memcpy;
1510	marg = mtod(m, void *);
1511	buflen = pktlen;
1512	} else {
1513	cpfn = bpf_mcpy;
1514	marg = m;
1515	buflen = `0`;
1516	}
1517
1518	bpf_deliver(bp, cpfn, marg, pktlen, buflen, m->m_pkthdr.rcvif_index != `0`);
1519	}
1520
1521	/*
1522	* We need to prepend the address family as
1523	* a four byte field. Cons up a dummy header
1524	* to pacify bpf. This is safe because bpf
1525	* will only read from the mbuf (i.e., it won't
1526	* try to free it or keep a pointer a to it).
1527	*/
1528	static void
1529	_bpf_mtap_af(struct bpf_if bp, uint32_t af, struct* mbuf *m)
1530	{
1531	struct mbuf m0;
1532
1533	m0.m_flags = `0`;
1534	m0.m_next = m;
1535	m0.m_len = `4`;
1536	m0.m_data = (char *)⁡
1537
1538	_bpf_mtap(bp, &m0);
1539	}
1540
1541	/*
1542	* Put the SLIP pseudo-"link header" in place.
1543	* Note this M_PREPEND() should never fail,
1544	* swince we know we always have enough space
1545	* in the input buffer.
1546	*/
1547	static void
1548	_bpf_mtap_sl_in(struct bpf_if bp, u_char chdr, struct mbuf **m)
1549	{
1550	int s;
1551	u_char *hp;
1552
1553	M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT);
1554	if (*m == NULL)
1555	return;
1556
1557	hp = mtod(m, u_char );
1558	hp[SLX_DIR] = SLIPDIR_IN;
1559	(void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
1560
1561	s = splnet();
1562	_bpf_mtap(bp, *m);
1563	splx(s);
1564
1565	m_adj(*m, SLIP_HDRLEN);
1566	}
1567
1568	/*
1569	* Put the SLIP pseudo-"link header" in
1570	* place. The compressed header is now
1571	* at the beginning of the mbuf.
1572	*/
1573	static void
1574	_bpf_mtap_sl_out(struct bpf_if bp, u_char chdr, struct mbuf *m)
1575	{
1576	struct mbuf m0;
1577	u_char *hp;
1578	int s;
1579
1580	m0.m_flags = `0`;
1581	m0.m_next = m;
1582	m0.m_data = m0.m_dat;
1583	m0.m_len = SLIP_HDRLEN;
1584
1585	hp = mtod(&m0, u_char *);
1586
1587	hp[SLX_DIR] = SLIPDIR_OUT;
1588	(void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
1589
1590	s = splnet();
1591	_bpf_mtap(bp, &m0);
1592	splx(s);
1593	m_freem(m);
1594	}
1595
1596	static int
1597	bpf_hdrlen(struct bpf_d *d)
1598	{
1599	int hdrlen = d->bd_bif->bif_hdrlen;
1600	/*
1601	* Compute the length of the bpf header. This is not necessarily
1602	* equal to SIZEOF_BPF_HDR because we want to insert spacing such
1603	* that the network layer header begins on a longword boundary (for
1604	* performance reasons and to alleviate alignment restrictions).
1605	*/
1606	#ifdef _LP64
1607	if (d->bd_compat32)
1608	return (BPF_WORDALIGN32(hdrlen + SIZEOF_BPF_HDR32) - hdrlen);
1609	else
1610	#endif
1611	return (BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen);
1612	}
1613
1614	/*
1615	* Move the packet data from interface memory (pkt) into the
1616	* store buffer. Call the wakeup functions if it's time to wakeup
1617	* a listener (buffer full), "cpfn" is the routine called to do the
1618	* actual data transfer. memcpy is passed in to copy contiguous chunks,
1619	* while bpf_mcpy is passed in to copy mbuf chains. In the latter case,
1620	* pkt is really an mbuf.
1621	*/
1622	static void
1623	catchpacket(struct bpf_d d, u_char pkt, u_int pktlen, u_int snaplen,
1624	void (cpfn)(void , const* void , size_t), struct* timespec *ts)
1625	{
1626	char *h;
1627	int totlen, curlen, caplen;
1628	int hdrlen = bpf_hdrlen(d);
1629	int do_wakeup = `0`;
1630
1631	++d->bd_ccount;
1632	++bpf_gstats.bs_capt;
1633	/*
1634	* Figure out how many bytes to move. If the packet is
1635	* greater or equal to the snapshot length, transfer that
1636	* much. Otherwise, transfer the whole packet (unless
1637	* we hit the buffer size limit).
1638	*/
1639	totlen = hdrlen + min(snaplen, pktlen);
1640	if (totlen > d->bd_bufsize)
1641	totlen = d->bd_bufsize;
1642	/*
1643	* If we adjusted totlen to fit the bufsize, it could be that
1644	* totlen is smaller than hdrlen because of the link layer header.
1645	*/
1646	caplen = totlen - hdrlen;
1647	if (caplen < `0`)
1648	caplen = `0`;
1649
1650	/*
1651	* Round up the end of the previous packet to the next longword.
1652	*/
1653	#ifdef _LP64
1654	if (d->bd_compat32)
1655	curlen = BPF_WORDALIGN32(d->bd_slen);
1656	else
1657	#endif
1658	curlen = BPF_WORDALIGN(d->bd_slen);
1659	if (curlen + totlen > d->bd_bufsize) {
1660	/*
1661	* This packet will overflow the storage buffer.
1662	* Rotate the buffers if we can, then wakeup any
1663	* pending reads.
1664	*/
1665	if (d->bd_fbuf == NULL) {
1666	/*
1667	* We haven't completed the previous read yet,
1668	* so drop the packet.
1669	*/
1670	++d->bd_dcount;
1671	++bpf_gstats.bs_drop;
1672	return;
1673	}
1674	ROTATE_BUFFERS(d);
1675	do_wakeup = `1`;
1676	curlen = `0`;
1677	} else if (d->bd_immediate \|\| d->bd_state == BPF_TIMED_OUT) {
1678	/*
1679	* Immediate mode is set, or the read timeout has
1680	* already expired during a select call. A packet
1681	* arrived, so the reader should be woken up.
1682	*/
1683	do_wakeup = `1`;
1684	}
1685
1686	/*
1687	* Append the bpf header.
1688	*/
1689	h = (char *)d->bd_sbuf + curlen;
1690	#ifdef _LP64
1691	if (d->bd_compat32) {
1692	struct bpf_hdr32 *hp32;
1693
1694	hp32 = (struct bpf_hdr32 *)h;
1695	hp32->bh_tstamp.tv_sec = ts->tv_sec;
1696	hp32->bh_tstamp.tv_usec = ts->tv_nsec / `1000`;
1697	hp32->bh_datalen = pktlen;
1698	hp32->bh_hdrlen = hdrlen;
1699	hp32->bh_caplen = caplen;
1700	} else
1701	#endif
1702	{
1703	struct bpf_hdr *hp;
1704
1705	hp = (struct bpf_hdr *)h;
1706	hp->bh_tstamp.tv_sec = ts->tv_sec;
1707	hp->bh_tstamp.tv_usec = ts->tv_nsec / `1000`;
1708	hp->bh_datalen = pktlen;
1709	hp->bh_hdrlen = hdrlen;
1710	hp->bh_caplen = caplen;
1711	}
1712
1713	/*
1714	* Copy the packet data into the store buffer and update its length.
1715	*/
1716	(*cpfn)(h + hdrlen, pkt, caplen);
1717	d->bd_slen = curlen + totlen;
1718
1719	/*
1720	* Call bpf_wakeup after bd_slen has been updated so that kevent(2)
1721	* will cause filt_bpfread() to be called with it adjusted.
1722	*/
1723	if (do_wakeup)
1724	bpf_wakeup(d);
1725	}
1726
1727	/*
1728	* Initialize all nonzero fields of a descriptor.
1729	*/
1730	static int
1731	bpf_allocbufs(struct bpf_d *d)
1732	{
1733
1734	d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1735	if (!d->bd_fbuf)
1736	return (ENOBUFS);
1737	d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1738	if (!d->bd_sbuf) {
1739	free(d->bd_fbuf, M_DEVBUF);
1740	return (ENOBUFS);
1741	}
1742	d->bd_slen = `0`;
1743	d->bd_hlen = `0`;
1744	return (`0`);
1745	}
1746
1747	/*
1748	* Free buffers currently in use by a descriptor.
1749	* Called on close.
1750	*/
1751	static void
1752	bpf_freed(struct bpf_d *d)
1753	{
1754	/*
1755	* We don't need to lock out interrupts since this descriptor has
1756	* been detached from its interface and it yet hasn't been marked
1757	* free.
1758	*/
1759	if (d->bd_sbuf != NULL) {
1760	free(d->bd_sbuf, M_DEVBUF);
1761	if (d->bd_hbuf != NULL)
1762	free(d->bd_hbuf, M_DEVBUF);
1763	if (d->bd_fbuf != NULL)
1764	free(d->bd_fbuf, M_DEVBUF);
1765	}
1766	if (d->bd_filter)
1767	free(d->bd_filter, M_DEVBUF);
1768
1769	if (d->bd_jitcode != NULL) {
1770	bpf_jit_freecode(d->bd_jitcode);
1771	}
1772	}
1773
1774	/*
1775	* Attach an interface to bpf. dlt is the link layer type;
1776	* hdrlen is the fixed size of the link header for the specified dlt
1777	* (variable length headers not yet supported).
1778	*/
1779	static void
1780	_bpfattach(struct ifnet ifp, u_int dlt, u_int hdrlen, struct* bpf_if **driverp)
1781	{
1782	struct bpf_if *bp;
1783	bp = malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT);
1784	if (bp == NULL)
1785	panic("bpfattach");
1786
1787	mutex_enter(&bpf_mtx);
1788	bp->bif_dlist = NULL;
1789	bp->bif_driverp = driverp;
1790	bp->bif_ifp = ifp;
1791	bp->bif_dlt = dlt;
1792
1793	bp->bif_next = bpf_iflist;
1794	bpf_iflist = bp;
1795
1796	*bp->bif_driverp = NULL;
1797
1798	bp->bif_hdrlen = hdrlen;
1799	mutex_exit(&bpf_mtx);
1800	#if 0
1801	printf("bpf: %s attached\n", ifp->if_xname);
1802	#endif
1803	}
1804
1805	/*
1806	* Remove an interface from bpf.
1807	*/
1808	static void
1809	_bpfdetach(struct ifnet *ifp)
1810	{
1811	struct bpf_if bp, *pbp;
1812	struct bpf_d *d;
1813	int s;
1814
1815	mutex_enter(&bpf_mtx);
1816	/ Nuke the vnodes for any open instances /
1817	LIST_FOREACH(d, &bpf_list, bd_list) {
1818	if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) {
1819	/*
1820	* Detach the descriptor from an interface now.
1821	* It will be free'ed later by close routine.
1822	*/
1823	s = splnet();
1824	d->bd_promisc = `0`; / we can't touch device. /
1825	bpf_detachd(d);
1826	splx(s);
1827	}
1828	}
1829
1830	again:
1831	for (bp = bpf_iflist, pbp = &bpf_iflist;
1832	bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) {
1833	if (bp->bif_ifp == ifp) {
1834	*pbp = bp->bif_next;
1835	free(bp, M_DEVBUF);
1836	goto again;
1837	}
1838	}
1839	mutex_exit(&bpf_mtx);
1840	}
1841
1842	/*
1843	* Change the data link type of a interface.
1844	*/
1845	static void
1846	_bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1847	{
1848	struct bpf_if *bp;
1849
1850	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1851	if (bp->bif_driverp == &ifp->if_bpf)
1852	break;
1853	}
1854	if (bp == NULL)
1855	panic("bpf_change_type");
1856
1857	bp->bif_dlt = dlt;
1858
1859	bp->bif_hdrlen = hdrlen;
1860	}
1861
1862	/*
1863	* Get a list of available data link type of the interface.
1864	*/
1865	static int
1866	bpf_getdltlist(struct bpf_d d, struct* bpf_dltlist *bfl)
1867	{
1868	int n, error;
1869	struct ifnet *ifp;
1870	struct bpf_if *bp;
1871
1872	ifp = d->bd_bif->bif_ifp;
1873	n = `0`;
1874	error = `0`;
1875	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1876	if (bp->bif_ifp != ifp)
1877	continue;
1878	if (bfl->bfl_list != NULL) {
1879	if (n >= bfl->bfl_len)
1880	return ENOMEM;
1881	error = copyout(&bp->bif_dlt,
1882	bfl->bfl_list + n, sizeof(u_int));
1883	}
1884	n++;
1885	}
1886	bfl->bfl_len = n;
1887	return error;
1888	}
1889
1890	/*
1891	* Set the data link type of a BPF instance.
1892	*/
1893	static int
1894	bpf_setdlt(struct bpf_d *d, u_int dlt)
1895	{
1896	int s, error, opromisc;
1897	struct ifnet *ifp;
1898	struct bpf_if *bp;
1899
1900	KASSERT(mutex_owned(&bpf_mtx));
1901
1902	if (d->bd_bif->bif_dlt == dlt)
1903	return `0`;
1904	ifp = d->bd_bif->bif_ifp;
1905	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1906	if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1907	break;
1908	}
1909	if (bp == NULL)
1910	return EINVAL;
1911	s = splnet();
1912	opromisc = d->bd_promisc;
1913	bpf_detachd(d);
1914	bpf_attachd(d, bp);
1915	reset_d(d);
1916	if (opromisc) {
1917	error = ifpromisc(bp->bif_ifp, `1`);
1918	if (error)
1919	printf("%s: bpf_setdlt: ifpromisc failed (%d)\n",
1920	bp->bif_ifp->if_xname, error);
1921	else
1922	d->bd_promisc = `1`;
1923	}
1924	splx(s);
1925	return `0`;
1926	}
1927
1928	static int
1929	sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS)
1930	{
1931	int newsize, error;
1932	struct sysctlnode node;
1933
1934	node = *rnode;
1935	node.sysctl_data = &newsize;
1936	newsize = bpf_maxbufsize;
1937	error = sysctl_lookup(SYSCTLFN_CALL(&node));
1938	if (error \|\| newp == NULL)
1939	return (error);
1940
1941	if (newsize < BPF_MINBUFSIZE \|\| newsize > BPF_MAXBUFSIZE)
1942	return (EINVAL);
1943
1944	bpf_maxbufsize = newsize;
1945
1946	return (`0`);
1947	}
1948
1949	#if defined(MODULAR) \|\| defined(BPFJIT)
1950	static int
1951	sysctl_net_bpf_jit(SYSCTLFN_ARGS)
1952	{
1953	bool newval;
1954	int error;
1955	struct sysctlnode node;
1956
1957	node = *rnode;
1958	node.sysctl_data = &newval;
1959	newval = bpf_jit;
1960	error = sysctl_lookup(SYSCTLFN_CALL(&node));
1961	if (error != `0` \|\| newp == NULL)
1962	return error;
1963
1964	bpf_jit = newval;
1965
1966	/*
1967	* Do a full sync to publish new bpf_jit value and
1968	* update bpfjit_module_ops.bj_generate_code variable.
1969	*/
1970	membar_sync();
1971
1972	if (newval && bpfjit_module_ops.bj_generate_code == NULL) {
1973	printf("JIT compilation is postponed "
1974	"until after bpfjit module is loaded\n");
1975	}
1976
1977	return `0`;
1978	}
1979	#endif
1980
1981	static int
1982	sysctl_net_bpf_peers(SYSCTLFN_ARGS)
1983	{
1984	int error, elem_count;
1985	struct bpf_d *dp;
1986	struct bpf_d_ext dpe;
1987	size_t len, needed, elem_size, out_size;
1988	char *sp;
1989
1990	if (namelen == `1` && name[`0`] == CTL_QUERY)
1991	return (sysctl_query(SYSCTLFN_CALL(rnode)));
1992
1993	if (namelen != `2`)
1994	return (EINVAL);
1995
1996	/ BPF peers is privileged information. /
1997	error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
1998	KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL);
1999	if (error)
2000	return (EPERM);
2001
2002	len = (oldp != NULL) ? *oldlenp : `0`;
2003	sp = oldp;
2004	elem_size = name[`0`];
2005	elem_count = name[`1`];
2006	out_size = MIN(sizeof(dpe), elem_size);
2007	needed = `0`;
2008
2009	if (elem_size < `1` \|\| elem_count < `0`)
2010	return (EINVAL);
2011
2012	mutex_enter(&bpf_mtx);
2013	LIST_FOREACH(dp, &bpf_list, bd_list) {
2014	if (len >= elem_size && elem_count > `0`) {
2015	#define BPF_EXT(field) dpe.bde_ ## field = dp->bd_ ## field
2016	BPF_EXT(bufsize);
2017	BPF_EXT(promisc);
2018	BPF_EXT(state);
2019	BPF_EXT(immediate);
2020	BPF_EXT(hdrcmplt);
2021	BPF_EXT(seesent);
2022	BPF_EXT(pid);
2023	BPF_EXT(rcount);
2024	BPF_EXT(dcount);
2025	BPF_EXT(ccount);
2026	#undef BPF_EXT
2027	if (dp->bd_bif)
2028	(void)strlcpy(dpe.bde_ifname,
2029	dp->bd_bif->bif_ifp->if_xname,
2030	IFNAMSIZ - `1`);
2031	else
2032	dpe.bde_ifname[`0`] = `'\0'`;
2033
2034	error = copyout(&dpe, sp, out_size);
2035	if (error)
2036	break;
2037	sp += elem_size;
2038	len -= elem_size;
2039	}
2040	needed += elem_size;
2041	if (elem_count > `0` && elem_count != INT_MAX)
2042	elem_count--;
2043	}
2044	mutex_exit(&bpf_mtx);
2045
2046	*oldlenp = needed;
2047
2048	return (error);
2049	}
2050
2051	static struct sysctllog *bpf_sysctllog;
2052	static void
2053	sysctl_net_bpf_setup(void)
2054	{
2055	const struct sysctlnode *node;
2056
2057	node = NULL;
2058	sysctl_createv(&bpf_sysctllog, `0`, NULL, &node,
2059	CTLFLAG_PERMANENT,
2060	CTLTYPE_NODE, "bpf",
2061	SYSCTL_DESCR("BPF options"),
2062	NULL, `0`, NULL, `0`,
2063	CTL_NET, CTL_CREATE, CTL_EOL);
2064	if (node != NULL) {
2065	#if defined(MODULAR) \|\| defined(BPFJIT)
2066	sysctl_createv(&bpf_sysctllog, `0`, NULL, NULL,
2067	CTLFLAG_PERMANENT\|CTLFLAG_READWRITE,
2068	CTLTYPE_BOOL, "jit",
2069	SYSCTL_DESCR("Toggle Just-In-Time compilation"),
2070	sysctl_net_bpf_jit, `0`, &bpf_jit, `0`,
2071	CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2072	#endif
2073	sysctl_createv(&bpf_sysctllog, `0`, NULL, NULL,
2074	CTLFLAG_PERMANENT\|CTLFLAG_READWRITE,
2075	CTLTYPE_INT, "maxbufsize",
2076	SYSCTL_DESCR("Maximum size for data capture buffer"),
2077	sysctl_net_bpf_maxbufsize, `0`, &bpf_maxbufsize, `0`,
2078	CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2079	sysctl_createv(&bpf_sysctllog, `0`, NULL, NULL,
2080	CTLFLAG_PERMANENT,
2081	CTLTYPE_STRUCT, "stats",
2082	SYSCTL_DESCR("BPF stats"),
2083	NULL, `0`, &bpf_gstats, sizeof(bpf_gstats),
2084	CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2085	sysctl_createv(&bpf_sysctllog, `0`, NULL, NULL,
2086	CTLFLAG_PERMANENT,
2087	CTLTYPE_STRUCT, "peers",
2088	SYSCTL_DESCR("BPF peers"),
2089	sysctl_net_bpf_peers, `0`, NULL, `0`,
2090	CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2091	}
2092
2093	}
2094
2095	struct bpf_ops bpf_ops_kernel = {
2096	.bpf_attach = _bpfattach,
2097	.bpf_detach = _bpfdetach,
2098	.bpf_change_type = _bpf_change_type,
2099
2100	.bpf_tap = _bpf_tap,
2101	.bpf_mtap = _bpf_mtap,
2102	.bpf_mtap2 = _bpf_mtap2,
2103	.bpf_mtap_af = _bpf_mtap_af,
2104	.bpf_mtap_sl_in = _bpf_mtap_sl_in,
2105	.bpf_mtap_sl_out = _bpf_mtap_sl_out,
2106	};
2107
2108	MODULE(MODULE_CLASS_DRIVER, bpf, "bpf_filter");
2109
2110	static int
2111	bpf_modcmd(modcmd_t cmd, void *arg)
2112	{
2113	#ifdef _MODULE
2114	devmajor_t bmajor, cmajor;
2115	#endif
2116	int error = `0`;
2117
2118	switch (cmd) {
2119	case MODULE_CMD_INIT:
2120	bpf_init();
2121	#ifdef _MODULE
2122	bmajor = cmajor = NODEVMAJOR;
2123	error = devsw_attach("bpf", NULL, &bmajor,
2124	&bpf_cdevsw, &cmajor);
2125	if (error)
2126	break;
2127	#endif
2128
2129	bpf_ops_handover_enter(&bpf_ops_kernel);
2130	atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel);
2131	bpf_ops_handover_exit();
2132	sysctl_net_bpf_setup();
2133	break;
2134
2135	case MODULE_CMD_FINI:
2136	/*
2137	* While there is no reference counting for bpf callers,
2138	* unload could at least in theory be done similarly to
2139	* system call disestablishment. This should even be
2140	* a little simpler:
2141	*
2142	* 1) replace op vector with stubs
2143	* 2) post update to all cpus with xc
2144	* 3) check that nobody is in bpf anymore
2145	* (it's doubtful we'd want something like l_sysent,
2146	* but we could do something like signed percpu
2147	* counters. if the sum is 0, we're good).
2148	* 4) if fail, unroll changes
2149	*
2150	* NOTE: change won't be atomic to the outside. some
2151	* packets may be not captured even if unload is
2152	* not succesful. I think packet capture not working
2153	* is a perfectly logical consequence of trying to
2154	* disable packet capture.
2155	*/
2156	error = EOPNOTSUPP;
2157	/ insert sysctl teardown /
2158	break;
2159
2160	default:
2161	error = ENOTTY;
2162	break;
2163	}
2164
2165	return error;
2166	}
2167

Browse the source code of src/src/sys/net/bpf.c

Definitions