1 | /* $NetBSD: if_tap.c,v 1.93 2016/10/02 14:17:07 christos Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 2003, 2004, 2008, 2009 The NetBSD Foundation. |
5 | * All rights reserved. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions |
9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
17 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
18 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
20 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /* |
30 | * tap(4) is a virtual Ethernet interface. It appears as a real Ethernet |
31 | * device to the system, but can also be accessed by userland through a |
32 | * character device interface, which allows reading and injecting frames. |
33 | */ |
34 | |
35 | #include <sys/cdefs.h> |
36 | __KERNEL_RCSID(0, "$NetBSD: if_tap.c,v 1.93 2016/10/02 14:17:07 christos Exp $" ); |
37 | |
38 | #if defined(_KERNEL_OPT) |
39 | |
40 | #include "opt_modular.h" |
41 | #include "opt_compat_netbsd.h" |
42 | #endif |
43 | |
44 | #include <sys/param.h> |
45 | #include <sys/systm.h> |
46 | #include <sys/kernel.h> |
47 | #include <sys/malloc.h> |
48 | #include <sys/conf.h> |
49 | #include <sys/cprng.h> |
50 | #include <sys/device.h> |
51 | #include <sys/file.h> |
52 | #include <sys/filedesc.h> |
53 | #include <sys/poll.h> |
54 | #include <sys/proc.h> |
55 | #include <sys/select.h> |
56 | #include <sys/sockio.h> |
57 | #include <sys/sysctl.h> |
58 | #include <sys/kauth.h> |
59 | #include <sys/mutex.h> |
60 | #include <sys/intr.h> |
61 | #include <sys/stat.h> |
62 | #include <sys/device.h> |
63 | #include <sys/module.h> |
64 | #include <sys/atomic.h> |
65 | |
66 | #include <net/if.h> |
67 | #include <net/if_dl.h> |
68 | #include <net/if_ether.h> |
69 | #include <net/if_media.h> |
70 | #include <net/if_tap.h> |
71 | #include <net/bpf.h> |
72 | |
73 | #include <compat/sys/sockio.h> |
74 | |
75 | #include "ioconf.h" |
76 | |
77 | /* |
78 | * sysctl node management |
79 | * |
80 | * It's not really possible to use a SYSCTL_SETUP block with |
81 | * current module implementation, so it is easier to just define |
82 | * our own function. |
83 | * |
84 | * The handler function is a "helper" in Andrew Brown's sysctl |
85 | * framework terminology. It is used as a gateway for sysctl |
86 | * requests over the nodes. |
87 | * |
88 | * tap_log allows the module to log creations of nodes and |
89 | * destroy them all at once using sysctl_teardown. |
90 | */ |
91 | static int tap_node; |
92 | static int tap_sysctl_handler(SYSCTLFN_PROTO); |
93 | static void sysctl_tap_setup(struct sysctllog **); |
94 | |
95 | /* |
96 | * Since we're an Ethernet device, we need the 2 following |
97 | * components: a struct ethercom and a struct ifmedia |
98 | * since we don't attach a PHY to ourselves. |
99 | * We could emulate one, but there's no real point. |
100 | */ |
101 | |
102 | struct tap_softc { |
103 | device_t sc_dev; |
104 | struct ifmedia sc_im; |
105 | struct ethercom sc_ec; |
106 | int sc_flags; |
107 | #define TAP_INUSE 0x00000001 /* tap device can only be opened once */ |
108 | #define TAP_ASYNCIO 0x00000002 /* user is using async I/O (SIGIO) on the device */ |
109 | #define TAP_NBIO 0x00000004 /* user wants calls to avoid blocking */ |
110 | #define TAP_GOING 0x00000008 /* interface is being destroyed */ |
111 | struct selinfo sc_rsel; |
112 | pid_t sc_pgid; /* For async. IO */ |
113 | kmutex_t sc_rdlock; |
114 | kmutex_t sc_kqlock; |
115 | void *sc_sih; |
116 | struct timespec sc_atime; |
117 | struct timespec sc_mtime; |
118 | struct timespec sc_btime; |
119 | }; |
120 | |
121 | /* autoconf(9) glue */ |
122 | |
123 | static int tap_match(device_t, cfdata_t, void *); |
124 | static void tap_attach(device_t, device_t, void *); |
125 | static int tap_detach(device_t, int); |
126 | |
127 | CFATTACH_DECL_NEW(tap, sizeof(struct tap_softc), |
128 | tap_match, tap_attach, tap_detach, NULL); |
129 | extern struct cfdriver tap_cd; |
130 | |
131 | /* Real device access routines */ |
132 | static int tap_dev_close(struct tap_softc *); |
133 | static int tap_dev_read(int, struct uio *, int); |
134 | static int tap_dev_write(int, struct uio *, int); |
135 | static int tap_dev_ioctl(int, u_long, void *, struct lwp *); |
136 | static int tap_dev_poll(int, int, struct lwp *); |
137 | static int tap_dev_kqfilter(int, struct knote *); |
138 | |
139 | /* Fileops access routines */ |
140 | static int tap_fops_close(file_t *); |
141 | static int tap_fops_read(file_t *, off_t *, struct uio *, |
142 | kauth_cred_t, int); |
143 | static int tap_fops_write(file_t *, off_t *, struct uio *, |
144 | kauth_cred_t, int); |
145 | static int tap_fops_ioctl(file_t *, u_long, void *); |
146 | static int tap_fops_poll(file_t *, int); |
147 | static int tap_fops_stat(file_t *, struct stat *); |
148 | static int tap_fops_kqfilter(file_t *, struct knote *); |
149 | |
150 | static const struct fileops tap_fileops = { |
151 | .fo_read = tap_fops_read, |
152 | .fo_write = tap_fops_write, |
153 | .fo_ioctl = tap_fops_ioctl, |
154 | .fo_fcntl = fnullop_fcntl, |
155 | .fo_poll = tap_fops_poll, |
156 | .fo_stat = tap_fops_stat, |
157 | .fo_close = tap_fops_close, |
158 | .fo_kqfilter = tap_fops_kqfilter, |
159 | .fo_restart = fnullop_restart, |
160 | }; |
161 | |
162 | /* Helper for cloning open() */ |
163 | static int tap_dev_cloner(struct lwp *); |
164 | |
165 | /* Character device routines */ |
166 | static int tap_cdev_open(dev_t, int, int, struct lwp *); |
167 | static int tap_cdev_close(dev_t, int, int, struct lwp *); |
168 | static int tap_cdev_read(dev_t, struct uio *, int); |
169 | static int tap_cdev_write(dev_t, struct uio *, int); |
170 | static int tap_cdev_ioctl(dev_t, u_long, void *, int, struct lwp *); |
171 | static int tap_cdev_poll(dev_t, int, struct lwp *); |
172 | static int tap_cdev_kqfilter(dev_t, struct knote *); |
173 | |
174 | const struct cdevsw tap_cdevsw = { |
175 | .d_open = tap_cdev_open, |
176 | .d_close = tap_cdev_close, |
177 | .d_read = tap_cdev_read, |
178 | .d_write = tap_cdev_write, |
179 | .d_ioctl = tap_cdev_ioctl, |
180 | .d_stop = nostop, |
181 | .d_tty = notty, |
182 | .d_poll = tap_cdev_poll, |
183 | .d_mmap = nommap, |
184 | .d_kqfilter = tap_cdev_kqfilter, |
185 | .d_discard = nodiscard, |
186 | .d_flag = D_OTHER |
187 | }; |
188 | |
189 | #define TAP_CLONER 0xfffff /* Maximal minor value */ |
190 | |
191 | /* kqueue-related routines */ |
192 | static void tap_kqdetach(struct knote *); |
193 | static int tap_kqread(struct knote *, long); |
194 | |
195 | /* |
196 | * Those are needed by the if_media interface. |
197 | */ |
198 | |
199 | static int tap_mediachange(struct ifnet *); |
200 | static void tap_mediastatus(struct ifnet *, struct ifmediareq *); |
201 | |
202 | /* |
203 | * Those are needed by the ifnet interface, and would typically be |
204 | * there for any network interface driver. |
205 | * Some other routines are optional: watchdog and drain. |
206 | */ |
207 | |
208 | static void tap_start(struct ifnet *); |
209 | static void tap_stop(struct ifnet *, int); |
210 | static int tap_init(struct ifnet *); |
211 | static int tap_ioctl(struct ifnet *, u_long, void *); |
212 | |
213 | /* Internal functions */ |
214 | static int tap_lifaddr(struct ifnet *, u_long, struct ifaliasreq *); |
215 | static void tap_softintr(void *); |
216 | |
217 | /* |
218 | * tap is a clonable interface, although it is highly unrealistic for |
219 | * an Ethernet device. |
220 | * |
221 | * Here are the bits needed for a clonable interface. |
222 | */ |
223 | static int tap_clone_create(struct if_clone *, int); |
224 | static int tap_clone_destroy(struct ifnet *); |
225 | |
226 | struct if_clone tap_cloners = IF_CLONE_INITIALIZER("tap" , |
227 | tap_clone_create, |
228 | tap_clone_destroy); |
229 | |
230 | /* Helper functionis shared by the two cloning code paths */ |
231 | static struct tap_softc * tap_clone_creator(int); |
232 | int tap_clone_destroyer(device_t); |
233 | |
234 | static struct sysctllog *tap_sysctl_clog; |
235 | |
236 | #ifdef _MODULE |
237 | devmajor_t tap_bmajor = -1, tap_cmajor = -1; |
238 | #endif |
239 | |
240 | static u_int tap_count; |
241 | |
242 | void |
243 | tapattach(int n) |
244 | { |
245 | |
246 | /* |
247 | * Nothing to do here, initialization is handled by the |
248 | * module initialization code in tapinit() below). |
249 | */ |
250 | } |
251 | |
252 | static void |
253 | tapinit(void) |
254 | { |
255 | int error = config_cfattach_attach(tap_cd.cd_name, &tap_ca); |
256 | if (error) { |
257 | aprint_error("%s: unable to register cfattach\n" , |
258 | tap_cd.cd_name); |
259 | (void)config_cfdriver_detach(&tap_cd); |
260 | return; |
261 | } |
262 | |
263 | if_clone_attach(&tap_cloners); |
264 | sysctl_tap_setup(&tap_sysctl_clog); |
265 | #ifdef _MODULE |
266 | devsw_attach("tap" , NULL, &tap_bmajor, &tap_cdevsw, &tap_cmajor); |
267 | #endif |
268 | } |
269 | |
270 | static int |
271 | tapdetach(void) |
272 | { |
273 | int error = 0; |
274 | |
275 | if (tap_count != 0) |
276 | return EBUSY; |
277 | |
278 | #ifdef _MODULE |
279 | if (error == 0) |
280 | error = devsw_detach(NULL, &tap_cdevsw); |
281 | #endif |
282 | if (error == 0) |
283 | sysctl_teardown(&tap_sysctl_clog); |
284 | if (error == 0) |
285 | if_clone_detach(&tap_cloners); |
286 | |
287 | if (error == 0) |
288 | error = config_cfattach_detach(tap_cd.cd_name, &tap_ca); |
289 | |
290 | return error; |
291 | } |
292 | |
293 | /* Pretty much useless for a pseudo-device */ |
294 | static int |
295 | tap_match(device_t parent, cfdata_t cfdata, void *arg) |
296 | { |
297 | |
298 | return (1); |
299 | } |
300 | |
301 | void |
302 | tap_attach(device_t parent, device_t self, void *aux) |
303 | { |
304 | struct tap_softc *sc = device_private(self); |
305 | struct ifnet *ifp; |
306 | const struct sysctlnode *node; |
307 | int error; |
308 | uint8_t enaddr[ETHER_ADDR_LEN] = |
309 | { 0xf2, 0x0b, 0xa4, 0xff, 0xff, 0xff }; |
310 | char enaddrstr[3 * ETHER_ADDR_LEN]; |
311 | |
312 | sc->sc_dev = self; |
313 | sc->sc_sih = NULL; |
314 | getnanotime(&sc->sc_btime); |
315 | sc->sc_atime = sc->sc_mtime = sc->sc_btime; |
316 | sc->sc_flags = 0; |
317 | selinit(&sc->sc_rsel); |
318 | |
319 | /* |
320 | * Initialize the two locks for the device. |
321 | * |
322 | * We need a lock here because even though the tap device can be |
323 | * opened only once, the file descriptor might be passed to another |
324 | * process, say a fork(2)ed child. |
325 | * |
326 | * The Giant saves us from most of the hassle, but since the read |
327 | * operation can sleep, we don't want two processes to wake up at |
328 | * the same moment and both try and dequeue a single packet. |
329 | * |
330 | * The queue for event listeners (used by kqueue(9), see below) has |
331 | * to be protected too, so use a spin lock. |
332 | */ |
333 | mutex_init(&sc->sc_rdlock, MUTEX_DEFAULT, IPL_NONE); |
334 | mutex_init(&sc->sc_kqlock, MUTEX_DEFAULT, IPL_VM); |
335 | |
336 | if (!pmf_device_register(self, NULL, NULL)) |
337 | aprint_error_dev(self, "couldn't establish power handler\n" ); |
338 | |
339 | /* |
340 | * In order to obtain unique initial Ethernet address on a host, |
341 | * do some randomisation. It's not meant for anything but avoiding |
342 | * hard-coding an address. |
343 | */ |
344 | cprng_fast(&enaddr[3], 3); |
345 | |
346 | aprint_verbose_dev(self, "Ethernet address %s\n" , |
347 | ether_snprintf(enaddrstr, sizeof(enaddrstr), enaddr)); |
348 | |
349 | /* |
350 | * Why 1000baseT? Why not? You can add more. |
351 | * |
352 | * Note that there are 3 steps: init, one or several additions to |
353 | * list of supported media, and in the end, the selection of one |
354 | * of them. |
355 | */ |
356 | ifmedia_init(&sc->sc_im, 0, tap_mediachange, tap_mediastatus); |
357 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_1000_T, 0, NULL); |
358 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL); |
359 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_100_TX, 0, NULL); |
360 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL); |
361 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_10_T, 0, NULL); |
362 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); |
363 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_AUTO, 0, NULL); |
364 | ifmedia_set(&sc->sc_im, IFM_ETHER|IFM_AUTO); |
365 | |
366 | /* |
367 | * One should note that an interface must do multicast in order |
368 | * to support IPv6. |
369 | */ |
370 | ifp = &sc->sc_ec.ec_if; |
371 | strcpy(ifp->if_xname, device_xname(self)); |
372 | ifp->if_softc = sc; |
373 | ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; |
374 | ifp->if_ioctl = tap_ioctl; |
375 | ifp->if_start = tap_start; |
376 | ifp->if_stop = tap_stop; |
377 | ifp->if_init = tap_init; |
378 | IFQ_SET_READY(&ifp->if_snd); |
379 | |
380 | sc->sc_ec.ec_capabilities = ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU; |
381 | |
382 | /* Those steps are mandatory for an Ethernet driver. */ |
383 | if_initialize(ifp); |
384 | ether_ifattach(ifp, enaddr); |
385 | if_register(ifp); |
386 | |
387 | /* |
388 | * Add a sysctl node for that interface. |
389 | * |
390 | * The pointer transmitted is not a string, but instead a pointer to |
391 | * the softc structure, which we can use to build the string value on |
392 | * the fly in the helper function of the node. See the comments for |
393 | * tap_sysctl_handler for details. |
394 | * |
395 | * Usually sysctl_createv is called with CTL_CREATE as the before-last |
396 | * component. However, we can allocate a number ourselves, as we are |
397 | * the only consumer of the net.link.<iface> node. In this case, the |
398 | * unit number is conveniently used to number the node. CTL_CREATE |
399 | * would just work, too. |
400 | */ |
401 | if ((error = sysctl_createv(NULL, 0, NULL, |
402 | &node, CTLFLAG_READWRITE, |
403 | CTLTYPE_STRING, device_xname(self), NULL, |
404 | tap_sysctl_handler, 0, (void *)sc, 18, |
405 | CTL_NET, AF_LINK, tap_node, device_unit(sc->sc_dev), |
406 | CTL_EOL)) != 0) |
407 | aprint_error_dev(self, "sysctl_createv returned %d, ignoring\n" , |
408 | error); |
409 | } |
410 | |
411 | /* |
412 | * When detaching, we do the inverse of what is done in the attach |
413 | * routine, in reversed order. |
414 | */ |
415 | static int |
416 | tap_detach(device_t self, int flags) |
417 | { |
418 | struct tap_softc *sc = device_private(self); |
419 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
420 | int error; |
421 | int s; |
422 | |
423 | sc->sc_flags |= TAP_GOING; |
424 | s = splnet(); |
425 | tap_stop(ifp, 1); |
426 | if_down(ifp); |
427 | splx(s); |
428 | |
429 | if (sc->sc_sih != NULL) { |
430 | softint_disestablish(sc->sc_sih); |
431 | sc->sc_sih = NULL; |
432 | } |
433 | |
434 | /* |
435 | * Destroying a single leaf is a very straightforward operation using |
436 | * sysctl_destroyv. One should be sure to always end the path with |
437 | * CTL_EOL. |
438 | */ |
439 | if ((error = sysctl_destroyv(NULL, CTL_NET, AF_LINK, tap_node, |
440 | device_unit(sc->sc_dev), CTL_EOL)) != 0) |
441 | aprint_error_dev(self, |
442 | "sysctl_destroyv returned %d, ignoring\n" , error); |
443 | ether_ifdetach(ifp); |
444 | if_detach(ifp); |
445 | ifmedia_delete_instance(&sc->sc_im, IFM_INST_ANY); |
446 | seldestroy(&sc->sc_rsel); |
447 | mutex_destroy(&sc->sc_rdlock); |
448 | mutex_destroy(&sc->sc_kqlock); |
449 | |
450 | pmf_device_deregister(self); |
451 | |
452 | return (0); |
453 | } |
454 | |
455 | /* |
456 | * This function is called by the ifmedia layer to notify the driver |
457 | * that the user requested a media change. A real driver would |
458 | * reconfigure the hardware. |
459 | */ |
460 | static int |
461 | tap_mediachange(struct ifnet *ifp) |
462 | { |
463 | return (0); |
464 | } |
465 | |
466 | /* |
467 | * Here the user asks for the currently used media. |
468 | */ |
469 | static void |
470 | tap_mediastatus(struct ifnet *ifp, struct ifmediareq *imr) |
471 | { |
472 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; |
473 | imr->ifm_active = sc->sc_im.ifm_cur->ifm_media; |
474 | } |
475 | |
476 | /* |
477 | * This is the function where we SEND packets. |
478 | * |
479 | * There is no 'receive' equivalent. A typical driver will get |
480 | * interrupts from the hardware, and from there will inject new packets |
481 | * into the network stack. |
482 | * |
483 | * Once handled, a packet must be freed. A real driver might not be able |
484 | * to fit all the pending packets into the hardware, and is allowed to |
485 | * return before having sent all the packets. It should then use the |
486 | * if_flags flag IFF_OACTIVE to notify the upper layer. |
487 | * |
488 | * There are also other flags one should check, such as IFF_PAUSE. |
489 | * |
490 | * It is our duty to make packets available to BPF listeners. |
491 | * |
492 | * You should be aware that this function is called by the Ethernet layer |
493 | * at splnet(). |
494 | * |
495 | * When the device is opened, we have to pass the packet(s) to the |
496 | * userland. For that we stay in OACTIVE mode while the userland gets |
497 | * the packets, and we send a signal to the processes waiting to read. |
498 | * |
499 | * wakeup(sc) is the counterpart to the tsleep call in |
500 | * tap_dev_read, while selnotify() is used for kevent(2) and |
501 | * poll(2) (which includes select(2)) listeners. |
502 | */ |
503 | static void |
504 | tap_start(struct ifnet *ifp) |
505 | { |
506 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; |
507 | struct mbuf *m0; |
508 | |
509 | if ((sc->sc_flags & TAP_INUSE) == 0) { |
510 | /* Simply drop packets */ |
511 | for(;;) { |
512 | IFQ_DEQUEUE(&ifp->if_snd, m0); |
513 | if (m0 == NULL) |
514 | return; |
515 | |
516 | ifp->if_opackets++; |
517 | bpf_mtap(ifp, m0); |
518 | |
519 | m_freem(m0); |
520 | } |
521 | } else if (!IFQ_IS_EMPTY(&ifp->if_snd)) { |
522 | ifp->if_flags |= IFF_OACTIVE; |
523 | wakeup(sc); |
524 | selnotify(&sc->sc_rsel, 0, 1); |
525 | if (sc->sc_flags & TAP_ASYNCIO) |
526 | softint_schedule(sc->sc_sih); |
527 | } |
528 | } |
529 | |
530 | static void |
531 | tap_softintr(void *cookie) |
532 | { |
533 | struct tap_softc *sc; |
534 | struct ifnet *ifp; |
535 | int a, b; |
536 | |
537 | sc = cookie; |
538 | |
539 | if (sc->sc_flags & TAP_ASYNCIO) { |
540 | ifp = &sc->sc_ec.ec_if; |
541 | if (ifp->if_flags & IFF_RUNNING) { |
542 | a = POLL_IN; |
543 | b = POLLIN|POLLRDNORM; |
544 | } else { |
545 | a = POLL_HUP; |
546 | b = 0; |
547 | } |
548 | fownsignal(sc->sc_pgid, SIGIO, a, b, NULL); |
549 | } |
550 | } |
551 | |
552 | /* |
553 | * A typical driver will only contain the following handlers for |
554 | * ioctl calls, except SIOCSIFPHYADDR. |
555 | * The latter is a hack I used to set the Ethernet address of the |
556 | * faked device. |
557 | * |
558 | * Note that both ifmedia_ioctl() and ether_ioctl() have to be |
559 | * called under splnet(). |
560 | */ |
561 | static int |
562 | tap_ioctl(struct ifnet *ifp, u_long cmd, void *data) |
563 | { |
564 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; |
565 | struct ifreq *ifr = (struct ifreq *)data; |
566 | int s, error; |
567 | |
568 | s = splnet(); |
569 | |
570 | switch (cmd) { |
571 | #ifdef OSIOCSIFMEDIA |
572 | case OSIOCSIFMEDIA: |
573 | #endif |
574 | case SIOCSIFMEDIA: |
575 | case SIOCGIFMEDIA: |
576 | error = ifmedia_ioctl(ifp, ifr, &sc->sc_im, cmd); |
577 | break; |
578 | case SIOCSIFPHYADDR: |
579 | error = tap_lifaddr(ifp, cmd, (struct ifaliasreq *)data); |
580 | break; |
581 | default: |
582 | error = ether_ioctl(ifp, cmd, data); |
583 | if (error == ENETRESET) |
584 | error = 0; |
585 | break; |
586 | } |
587 | |
588 | splx(s); |
589 | |
590 | return (error); |
591 | } |
592 | |
593 | /* |
594 | * Helper function to set Ethernet address. This has been replaced by |
595 | * the generic SIOCALIFADDR ioctl on a PF_LINK socket. |
596 | */ |
597 | static int |
598 | tap_lifaddr(struct ifnet *ifp, u_long cmd, struct ifaliasreq *ifra) |
599 | { |
600 | const struct sockaddr *sa = &ifra->ifra_addr; |
601 | |
602 | if (sa->sa_family != AF_LINK) |
603 | return (EINVAL); |
604 | |
605 | if_set_sadl(ifp, sa->sa_data, ETHER_ADDR_LEN, false); |
606 | |
607 | return (0); |
608 | } |
609 | |
610 | /* |
611 | * _init() would typically be called when an interface goes up, |
612 | * meaning it should configure itself into the state in which it |
613 | * can send packets. |
614 | */ |
615 | static int |
616 | tap_init(struct ifnet *ifp) |
617 | { |
618 | ifp->if_flags |= IFF_RUNNING; |
619 | |
620 | tap_start(ifp); |
621 | |
622 | return (0); |
623 | } |
624 | |
625 | /* |
626 | * _stop() is called when an interface goes down. It is our |
627 | * responsability to validate that state by clearing the |
628 | * IFF_RUNNING flag. |
629 | * |
630 | * We have to wake up all the sleeping processes to have the pending |
631 | * read requests cancelled. |
632 | */ |
633 | static void |
634 | tap_stop(struct ifnet *ifp, int disable) |
635 | { |
636 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; |
637 | |
638 | ifp->if_flags &= ~IFF_RUNNING; |
639 | wakeup(sc); |
640 | selnotify(&sc->sc_rsel, 0, 1); |
641 | if (sc->sc_flags & TAP_ASYNCIO) |
642 | softint_schedule(sc->sc_sih); |
643 | } |
644 | |
645 | /* |
646 | * The 'create' command of ifconfig can be used to create |
647 | * any numbered instance of a given device. Thus we have to |
648 | * make sure we have enough room in cd_devs to create the |
649 | * user-specified instance. config_attach_pseudo will do this |
650 | * for us. |
651 | */ |
652 | static int |
653 | tap_clone_create(struct if_clone *ifc, int unit) |
654 | { |
655 | if (tap_clone_creator(unit) == NULL) { |
656 | aprint_error("%s%d: unable to attach an instance\n" , |
657 | tap_cd.cd_name, unit); |
658 | return (ENXIO); |
659 | } |
660 | atomic_inc_uint(&tap_count); |
661 | return (0); |
662 | } |
663 | |
664 | /* |
665 | * tap(4) can be cloned by two ways: |
666 | * using 'ifconfig tap0 create', which will use the network |
667 | * interface cloning API, and call tap_clone_create above. |
668 | * opening the cloning device node, whose minor number is TAP_CLONER. |
669 | * See below for an explanation on how this part work. |
670 | */ |
671 | static struct tap_softc * |
672 | tap_clone_creator(int unit) |
673 | { |
674 | struct cfdata *cf; |
675 | |
676 | cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); |
677 | cf->cf_name = tap_cd.cd_name; |
678 | cf->cf_atname = tap_ca.ca_name; |
679 | if (unit == -1) { |
680 | /* let autoconf find the first free one */ |
681 | cf->cf_unit = 0; |
682 | cf->cf_fstate = FSTATE_STAR; |
683 | } else { |
684 | cf->cf_unit = unit; |
685 | cf->cf_fstate = FSTATE_NOTFOUND; |
686 | } |
687 | |
688 | return device_private(config_attach_pseudo(cf)); |
689 | } |
690 | |
691 | /* |
692 | * The clean design of if_clone and autoconf(9) makes that part |
693 | * really straightforward. The second argument of config_detach |
694 | * means neither QUIET nor FORCED. |
695 | */ |
696 | static int |
697 | tap_clone_destroy(struct ifnet *ifp) |
698 | { |
699 | struct tap_softc *sc = ifp->if_softc; |
700 | int error = tap_clone_destroyer(sc->sc_dev); |
701 | |
702 | if (error == 0) |
703 | atomic_dec_uint(&tap_count); |
704 | return error; |
705 | } |
706 | |
707 | int |
708 | tap_clone_destroyer(device_t dev) |
709 | { |
710 | cfdata_t cf = device_cfdata(dev); |
711 | int error; |
712 | |
713 | if ((error = config_detach(dev, 0)) != 0) |
714 | aprint_error_dev(dev, "unable to detach instance\n" ); |
715 | free(cf, M_DEVBUF); |
716 | |
717 | return (error); |
718 | } |
719 | |
720 | /* |
721 | * tap(4) is a bit of an hybrid device. It can be used in two different |
722 | * ways: |
723 | * 1. ifconfig tapN create, then use /dev/tapN to read/write off it. |
724 | * 2. open /dev/tap, get a new interface created and read/write off it. |
725 | * That interface is destroyed when the process that had it created exits. |
726 | * |
727 | * The first way is managed by the cdevsw structure, and you access interfaces |
728 | * through a (major, minor) mapping: tap4 is obtained by the minor number |
729 | * 4. The entry points for the cdevsw interface are prefixed by tap_cdev_. |
730 | * |
731 | * The second way is the so-called "cloning" device. It's a special minor |
732 | * number (chosen as the maximal number, to allow as much tap devices as |
733 | * possible). The user first opens the cloner (e.g., /dev/tap), and that |
734 | * call ends in tap_cdev_open. The actual place where it is handled is |
735 | * tap_dev_cloner. |
736 | * |
737 | * An tap device cannot be opened more than once at a time, so the cdevsw |
738 | * part of open() does nothing but noting that the interface is being used and |
739 | * hence ready to actually handle packets. |
740 | */ |
741 | |
742 | static int |
743 | tap_cdev_open(dev_t dev, int flags, int fmt, struct lwp *l) |
744 | { |
745 | struct tap_softc *sc; |
746 | |
747 | if (minor(dev) == TAP_CLONER) |
748 | return tap_dev_cloner(l); |
749 | |
750 | sc = device_lookup_private(&tap_cd, minor(dev)); |
751 | if (sc == NULL) |
752 | return (ENXIO); |
753 | |
754 | /* The device can only be opened once */ |
755 | if (sc->sc_flags & TAP_INUSE) |
756 | return (EBUSY); |
757 | sc->sc_flags |= TAP_INUSE; |
758 | return (0); |
759 | } |
760 | |
761 | /* |
762 | * There are several kinds of cloning devices, and the most simple is the one |
763 | * tap(4) uses. What it does is change the file descriptor with a new one, |
764 | * with its own fileops structure (which maps to the various read, write, |
765 | * ioctl functions). It starts allocating a new file descriptor with falloc, |
766 | * then actually creates the new tap devices. |
767 | * |
768 | * Once those two steps are successful, we can re-wire the existing file |
769 | * descriptor to its new self. This is done with fdclone(): it fills the fp |
770 | * structure as needed (notably f_devunit gets filled with the fifth parameter |
771 | * passed, the unit of the tap device which will allows us identifying the |
772 | * device later), and returns EMOVEFD. |
773 | * |
774 | * That magic value is interpreted by sys_open() which then replaces the |
775 | * current file descriptor by the new one (through a magic member of struct |
776 | * lwp, l_dupfd). |
777 | * |
778 | * The tap device is flagged as being busy since it otherwise could be |
779 | * externally accessed through the corresponding device node with the cdevsw |
780 | * interface. |
781 | */ |
782 | |
783 | static int |
784 | tap_dev_cloner(struct lwp *l) |
785 | { |
786 | struct tap_softc *sc; |
787 | file_t *fp; |
788 | int error, fd; |
789 | |
790 | if ((error = fd_allocfile(&fp, &fd)) != 0) |
791 | return (error); |
792 | |
793 | if ((sc = tap_clone_creator(-1)) == NULL) { |
794 | fd_abort(curproc, fp, fd); |
795 | return (ENXIO); |
796 | } |
797 | |
798 | sc->sc_flags |= TAP_INUSE; |
799 | |
800 | return fd_clone(fp, fd, FREAD|FWRITE, &tap_fileops, |
801 | (void *)(intptr_t)device_unit(sc->sc_dev)); |
802 | } |
803 | |
804 | /* |
805 | * While all other operations (read, write, ioctl, poll and kqfilter) are |
806 | * really the same whether we are in cdevsw or fileops mode, the close() |
807 | * function is slightly different in the two cases. |
808 | * |
809 | * As for the other, the core of it is shared in tap_dev_close. What |
810 | * it does is sufficient for the cdevsw interface, but the cloning interface |
811 | * needs another thing: the interface is destroyed when the processes that |
812 | * created it closes it. |
813 | */ |
814 | static int |
815 | tap_cdev_close(dev_t dev, int flags, int fmt, |
816 | struct lwp *l) |
817 | { |
818 | struct tap_softc *sc = |
819 | device_lookup_private(&tap_cd, minor(dev)); |
820 | |
821 | if (sc == NULL) |
822 | return (ENXIO); |
823 | |
824 | return tap_dev_close(sc); |
825 | } |
826 | |
827 | /* |
828 | * It might happen that the administrator used ifconfig to externally destroy |
829 | * the interface. In that case, tap_fops_close will be called while |
830 | * tap_detach is already happening. If we called it again from here, we |
831 | * would dead lock. TAP_GOING ensures that this situation doesn't happen. |
832 | */ |
833 | static int |
834 | tap_fops_close(file_t *fp) |
835 | { |
836 | int unit = fp->f_devunit; |
837 | struct tap_softc *sc; |
838 | int error; |
839 | |
840 | sc = device_lookup_private(&tap_cd, unit); |
841 | if (sc == NULL) |
842 | return (ENXIO); |
843 | |
844 | /* tap_dev_close currently always succeeds, but it might not |
845 | * always be the case. */ |
846 | KERNEL_LOCK(1, NULL); |
847 | if ((error = tap_dev_close(sc)) != 0) { |
848 | KERNEL_UNLOCK_ONE(NULL); |
849 | return (error); |
850 | } |
851 | |
852 | /* Destroy the device now that it is no longer useful, |
853 | * unless it's already being destroyed. */ |
854 | if ((sc->sc_flags & TAP_GOING) != 0) { |
855 | KERNEL_UNLOCK_ONE(NULL); |
856 | return (0); |
857 | } |
858 | |
859 | error = tap_clone_destroyer(sc->sc_dev); |
860 | KERNEL_UNLOCK_ONE(NULL); |
861 | return error; |
862 | } |
863 | |
864 | static int |
865 | tap_dev_close(struct tap_softc *sc) |
866 | { |
867 | struct ifnet *ifp; |
868 | int s; |
869 | |
870 | s = splnet(); |
871 | /* Let tap_start handle packets again */ |
872 | ifp = &sc->sc_ec.ec_if; |
873 | ifp->if_flags &= ~IFF_OACTIVE; |
874 | |
875 | /* Purge output queue */ |
876 | if (!(IFQ_IS_EMPTY(&ifp->if_snd))) { |
877 | struct mbuf *m; |
878 | |
879 | for (;;) { |
880 | IFQ_DEQUEUE(&ifp->if_snd, m); |
881 | if (m == NULL) |
882 | break; |
883 | |
884 | ifp->if_opackets++; |
885 | bpf_mtap(ifp, m); |
886 | m_freem(m); |
887 | } |
888 | } |
889 | splx(s); |
890 | |
891 | if (sc->sc_sih != NULL) { |
892 | softint_disestablish(sc->sc_sih); |
893 | sc->sc_sih = NULL; |
894 | } |
895 | sc->sc_flags &= ~(TAP_INUSE | TAP_ASYNCIO); |
896 | |
897 | return (0); |
898 | } |
899 | |
900 | static int |
901 | tap_cdev_read(dev_t dev, struct uio *uio, int flags) |
902 | { |
903 | return tap_dev_read(minor(dev), uio, flags); |
904 | } |
905 | |
906 | static int |
907 | tap_fops_read(file_t *fp, off_t *offp, struct uio *uio, |
908 | kauth_cred_t cred, int flags) |
909 | { |
910 | int error; |
911 | |
912 | KERNEL_LOCK(1, NULL); |
913 | error = tap_dev_read(fp->f_devunit, uio, flags); |
914 | KERNEL_UNLOCK_ONE(NULL); |
915 | return error; |
916 | } |
917 | |
918 | static int |
919 | tap_dev_read(int unit, struct uio *uio, int flags) |
920 | { |
921 | struct tap_softc *sc = device_lookup_private(&tap_cd, unit); |
922 | struct ifnet *ifp; |
923 | struct mbuf *m, *n; |
924 | int error = 0, s; |
925 | |
926 | if (sc == NULL) |
927 | return (ENXIO); |
928 | |
929 | getnanotime(&sc->sc_atime); |
930 | |
931 | ifp = &sc->sc_ec.ec_if; |
932 | if ((ifp->if_flags & IFF_UP) == 0) |
933 | return (EHOSTDOWN); |
934 | |
935 | /* |
936 | * In the TAP_NBIO case, we have to make sure we won't be sleeping |
937 | */ |
938 | if ((sc->sc_flags & TAP_NBIO) != 0) { |
939 | if (!mutex_tryenter(&sc->sc_rdlock)) |
940 | return (EWOULDBLOCK); |
941 | } else { |
942 | mutex_enter(&sc->sc_rdlock); |
943 | } |
944 | |
945 | s = splnet(); |
946 | if (IFQ_IS_EMPTY(&ifp->if_snd)) { |
947 | ifp->if_flags &= ~IFF_OACTIVE; |
948 | /* |
949 | * We must release the lock before sleeping, and re-acquire it |
950 | * after. |
951 | */ |
952 | mutex_exit(&sc->sc_rdlock); |
953 | if (sc->sc_flags & TAP_NBIO) |
954 | error = EWOULDBLOCK; |
955 | else |
956 | error = tsleep(sc, PSOCK|PCATCH, "tap" , 0); |
957 | splx(s); |
958 | |
959 | if (error != 0) |
960 | return (error); |
961 | /* The device might have been downed */ |
962 | if ((ifp->if_flags & IFF_UP) == 0) |
963 | return (EHOSTDOWN); |
964 | if ((sc->sc_flags & TAP_NBIO)) { |
965 | if (!mutex_tryenter(&sc->sc_rdlock)) |
966 | return (EWOULDBLOCK); |
967 | } else { |
968 | mutex_enter(&sc->sc_rdlock); |
969 | } |
970 | s = splnet(); |
971 | } |
972 | |
973 | IFQ_DEQUEUE(&ifp->if_snd, m); |
974 | ifp->if_flags &= ~IFF_OACTIVE; |
975 | splx(s); |
976 | if (m == NULL) { |
977 | error = 0; |
978 | goto out; |
979 | } |
980 | |
981 | ifp->if_opackets++; |
982 | bpf_mtap(ifp, m); |
983 | |
984 | /* |
985 | * One read is one packet. |
986 | */ |
987 | do { |
988 | error = uiomove(mtod(m, void *), |
989 | min(m->m_len, uio->uio_resid), uio); |
990 | m = n = m_free(m); |
991 | } while (m != NULL && uio->uio_resid > 0 && error == 0); |
992 | |
993 | if (m != NULL) |
994 | m_freem(m); |
995 | |
996 | out: |
997 | mutex_exit(&sc->sc_rdlock); |
998 | return (error); |
999 | } |
1000 | |
1001 | static int |
1002 | tap_fops_stat(file_t *fp, struct stat *st) |
1003 | { |
1004 | int error = 0; |
1005 | struct tap_softc *sc; |
1006 | int unit = fp->f_devunit; |
1007 | |
1008 | (void)memset(st, 0, sizeof(*st)); |
1009 | |
1010 | KERNEL_LOCK(1, NULL); |
1011 | sc = device_lookup_private(&tap_cd, unit); |
1012 | if (sc == NULL) { |
1013 | error = ENXIO; |
1014 | goto out; |
1015 | } |
1016 | |
1017 | st->st_dev = makedev(cdevsw_lookup_major(&tap_cdevsw), unit); |
1018 | st->st_atimespec = sc->sc_atime; |
1019 | st->st_mtimespec = sc->sc_mtime; |
1020 | st->st_ctimespec = st->st_birthtimespec = sc->sc_btime; |
1021 | st->st_uid = kauth_cred_geteuid(fp->f_cred); |
1022 | st->st_gid = kauth_cred_getegid(fp->f_cred); |
1023 | out: |
1024 | KERNEL_UNLOCK_ONE(NULL); |
1025 | return error; |
1026 | } |
1027 | |
1028 | static int |
1029 | tap_cdev_write(dev_t dev, struct uio *uio, int flags) |
1030 | { |
1031 | return tap_dev_write(minor(dev), uio, flags); |
1032 | } |
1033 | |
1034 | static int |
1035 | tap_fops_write(file_t *fp, off_t *offp, struct uio *uio, |
1036 | kauth_cred_t cred, int flags) |
1037 | { |
1038 | int error; |
1039 | |
1040 | KERNEL_LOCK(1, NULL); |
1041 | error = tap_dev_write(fp->f_devunit, uio, flags); |
1042 | KERNEL_UNLOCK_ONE(NULL); |
1043 | return error; |
1044 | } |
1045 | |
1046 | static int |
1047 | tap_dev_write(int unit, struct uio *uio, int flags) |
1048 | { |
1049 | struct tap_softc *sc = |
1050 | device_lookup_private(&tap_cd, unit); |
1051 | struct ifnet *ifp; |
1052 | struct mbuf *m, **mp; |
1053 | int error = 0; |
1054 | int s; |
1055 | |
1056 | if (sc == NULL) |
1057 | return (ENXIO); |
1058 | |
1059 | getnanotime(&sc->sc_mtime); |
1060 | ifp = &sc->sc_ec.ec_if; |
1061 | |
1062 | /* One write, one packet, that's the rule */ |
1063 | MGETHDR(m, M_DONTWAIT, MT_DATA); |
1064 | if (m == NULL) { |
1065 | ifp->if_ierrors++; |
1066 | return (ENOBUFS); |
1067 | } |
1068 | m->m_pkthdr.len = uio->uio_resid; |
1069 | |
1070 | mp = &m; |
1071 | while (error == 0 && uio->uio_resid > 0) { |
1072 | if (*mp != m) { |
1073 | MGET(*mp, M_DONTWAIT, MT_DATA); |
1074 | if (*mp == NULL) { |
1075 | error = ENOBUFS; |
1076 | break; |
1077 | } |
1078 | } |
1079 | (*mp)->m_len = min(MHLEN, uio->uio_resid); |
1080 | error = uiomove(mtod(*mp, void *), (*mp)->m_len, uio); |
1081 | mp = &(*mp)->m_next; |
1082 | } |
1083 | if (error) { |
1084 | ifp->if_ierrors++; |
1085 | m_freem(m); |
1086 | return (error); |
1087 | } |
1088 | |
1089 | ifp->if_ipackets++; |
1090 | m_set_rcvif(m, ifp); |
1091 | |
1092 | bpf_mtap(ifp, m); |
1093 | s = splnet(); |
1094 | if_input(ifp, m); |
1095 | splx(s); |
1096 | |
1097 | return (0); |
1098 | } |
1099 | |
1100 | static int |
1101 | tap_cdev_ioctl(dev_t dev, u_long cmd, void *data, int flags, |
1102 | struct lwp *l) |
1103 | { |
1104 | return tap_dev_ioctl(minor(dev), cmd, data, l); |
1105 | } |
1106 | |
1107 | static int |
1108 | tap_fops_ioctl(file_t *fp, u_long cmd, void *data) |
1109 | { |
1110 | return tap_dev_ioctl(fp->f_devunit, cmd, data, curlwp); |
1111 | } |
1112 | |
1113 | static int |
1114 | tap_dev_ioctl(int unit, u_long cmd, void *data, struct lwp *l) |
1115 | { |
1116 | struct tap_softc *sc = device_lookup_private(&tap_cd, unit); |
1117 | |
1118 | if (sc == NULL) |
1119 | return ENXIO; |
1120 | |
1121 | switch (cmd) { |
1122 | case FIONREAD: |
1123 | { |
1124 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
1125 | struct mbuf *m; |
1126 | int s; |
1127 | |
1128 | s = splnet(); |
1129 | IFQ_POLL(&ifp->if_snd, m); |
1130 | |
1131 | if (m == NULL) |
1132 | *(int *)data = 0; |
1133 | else |
1134 | *(int *)data = m->m_pkthdr.len; |
1135 | splx(s); |
1136 | return 0; |
1137 | } |
1138 | case TIOCSPGRP: |
1139 | case FIOSETOWN: |
1140 | return fsetown(&sc->sc_pgid, cmd, data); |
1141 | case TIOCGPGRP: |
1142 | case FIOGETOWN: |
1143 | return fgetown(sc->sc_pgid, cmd, data); |
1144 | case FIOASYNC: |
1145 | if (*(int *)data) { |
1146 | if (sc->sc_sih == NULL) { |
1147 | sc->sc_sih = softint_establish(SOFTINT_CLOCK, |
1148 | tap_softintr, sc); |
1149 | if (sc->sc_sih == NULL) |
1150 | return EBUSY; /* XXX */ |
1151 | } |
1152 | sc->sc_flags |= TAP_ASYNCIO; |
1153 | } else { |
1154 | sc->sc_flags &= ~TAP_ASYNCIO; |
1155 | if (sc->sc_sih != NULL) { |
1156 | softint_disestablish(sc->sc_sih); |
1157 | sc->sc_sih = NULL; |
1158 | } |
1159 | } |
1160 | return 0; |
1161 | case FIONBIO: |
1162 | if (*(int *)data) |
1163 | sc->sc_flags |= TAP_NBIO; |
1164 | else |
1165 | sc->sc_flags &= ~TAP_NBIO; |
1166 | return 0; |
1167 | #ifdef OTAPGIFNAME |
1168 | case OTAPGIFNAME: |
1169 | #endif |
1170 | case TAPGIFNAME: |
1171 | { |
1172 | struct ifreq *ifr = (struct ifreq *)data; |
1173 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
1174 | |
1175 | strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); |
1176 | return 0; |
1177 | } |
1178 | default: |
1179 | return ENOTTY; |
1180 | } |
1181 | } |
1182 | |
1183 | static int |
1184 | tap_cdev_poll(dev_t dev, int events, struct lwp *l) |
1185 | { |
1186 | return tap_dev_poll(minor(dev), events, l); |
1187 | } |
1188 | |
1189 | static int |
1190 | tap_fops_poll(file_t *fp, int events) |
1191 | { |
1192 | return tap_dev_poll(fp->f_devunit, events, curlwp); |
1193 | } |
1194 | |
1195 | static int |
1196 | tap_dev_poll(int unit, int events, struct lwp *l) |
1197 | { |
1198 | struct tap_softc *sc = |
1199 | device_lookup_private(&tap_cd, unit); |
1200 | int revents = 0; |
1201 | |
1202 | if (sc == NULL) |
1203 | return POLLERR; |
1204 | |
1205 | if (events & (POLLIN|POLLRDNORM)) { |
1206 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
1207 | struct mbuf *m; |
1208 | int s; |
1209 | |
1210 | s = splnet(); |
1211 | IFQ_POLL(&ifp->if_snd, m); |
1212 | |
1213 | if (m != NULL) |
1214 | revents |= events & (POLLIN|POLLRDNORM); |
1215 | else { |
1216 | mutex_spin_enter(&sc->sc_kqlock); |
1217 | selrecord(l, &sc->sc_rsel); |
1218 | mutex_spin_exit(&sc->sc_kqlock); |
1219 | } |
1220 | splx(s); |
1221 | } |
1222 | revents |= events & (POLLOUT|POLLWRNORM); |
1223 | |
1224 | return (revents); |
1225 | } |
1226 | |
1227 | static struct filterops tap_read_filterops = { 1, NULL, tap_kqdetach, |
1228 | tap_kqread }; |
1229 | static struct filterops tap_seltrue_filterops = { 1, NULL, tap_kqdetach, |
1230 | filt_seltrue }; |
1231 | |
1232 | static int |
1233 | tap_cdev_kqfilter(dev_t dev, struct knote *kn) |
1234 | { |
1235 | return tap_dev_kqfilter(minor(dev), kn); |
1236 | } |
1237 | |
1238 | static int |
1239 | tap_fops_kqfilter(file_t *fp, struct knote *kn) |
1240 | { |
1241 | return tap_dev_kqfilter(fp->f_devunit, kn); |
1242 | } |
1243 | |
1244 | static int |
1245 | tap_dev_kqfilter(int unit, struct knote *kn) |
1246 | { |
1247 | struct tap_softc *sc = |
1248 | device_lookup_private(&tap_cd, unit); |
1249 | |
1250 | if (sc == NULL) |
1251 | return (ENXIO); |
1252 | |
1253 | KERNEL_LOCK(1, NULL); |
1254 | switch(kn->kn_filter) { |
1255 | case EVFILT_READ: |
1256 | kn->kn_fop = &tap_read_filterops; |
1257 | break; |
1258 | case EVFILT_WRITE: |
1259 | kn->kn_fop = &tap_seltrue_filterops; |
1260 | break; |
1261 | default: |
1262 | KERNEL_UNLOCK_ONE(NULL); |
1263 | return (EINVAL); |
1264 | } |
1265 | |
1266 | kn->kn_hook = sc; |
1267 | mutex_spin_enter(&sc->sc_kqlock); |
1268 | SLIST_INSERT_HEAD(&sc->sc_rsel.sel_klist, kn, kn_selnext); |
1269 | mutex_spin_exit(&sc->sc_kqlock); |
1270 | KERNEL_UNLOCK_ONE(NULL); |
1271 | return (0); |
1272 | } |
1273 | |
1274 | static void |
1275 | tap_kqdetach(struct knote *kn) |
1276 | { |
1277 | struct tap_softc *sc = (struct tap_softc *)kn->kn_hook; |
1278 | |
1279 | KERNEL_LOCK(1, NULL); |
1280 | mutex_spin_enter(&sc->sc_kqlock); |
1281 | SLIST_REMOVE(&sc->sc_rsel.sel_klist, kn, knote, kn_selnext); |
1282 | mutex_spin_exit(&sc->sc_kqlock); |
1283 | KERNEL_UNLOCK_ONE(NULL); |
1284 | } |
1285 | |
1286 | static int |
1287 | tap_kqread(struct knote *kn, long hint) |
1288 | { |
1289 | struct tap_softc *sc = (struct tap_softc *)kn->kn_hook; |
1290 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
1291 | struct mbuf *m; |
1292 | int s, rv; |
1293 | |
1294 | KERNEL_LOCK(1, NULL); |
1295 | s = splnet(); |
1296 | IFQ_POLL(&ifp->if_snd, m); |
1297 | |
1298 | if (m == NULL) |
1299 | kn->kn_data = 0; |
1300 | else |
1301 | kn->kn_data = m->m_pkthdr.len; |
1302 | splx(s); |
1303 | rv = (kn->kn_data != 0 ? 1 : 0); |
1304 | KERNEL_UNLOCK_ONE(NULL); |
1305 | return rv; |
1306 | } |
1307 | |
1308 | /* |
1309 | * sysctl management routines |
1310 | * You can set the address of an interface through: |
1311 | * net.link.tap.tap<number> |
1312 | * |
1313 | * Note the consistent use of tap_log in order to use |
1314 | * sysctl_teardown at unload time. |
1315 | * |
1316 | * In the kernel you will find a lot of SYSCTL_SETUP blocks. Those |
1317 | * blocks register a function in a special section of the kernel |
1318 | * (called a link set) which is used at init_sysctl() time to cycle |
1319 | * through all those functions to create the kernel's sysctl tree. |
1320 | * |
1321 | * It is not possible to use link sets in a module, so the |
1322 | * easiest is to simply call our own setup routine at load time. |
1323 | * |
1324 | * In the SYSCTL_SETUP blocks you find in the kernel, nodes have the |
1325 | * CTLFLAG_PERMANENT flag, meaning they cannot be removed. Once the |
1326 | * whole kernel sysctl tree is built, it is not possible to add any |
1327 | * permanent node. |
1328 | * |
1329 | * It should be noted that we're not saving the sysctlnode pointer |
1330 | * we are returned when creating the "tap" node. That structure |
1331 | * cannot be trusted once out of the calling function, as it might |
1332 | * get reused. So we just save the MIB number, and always give the |
1333 | * full path starting from the root for later calls to sysctl_createv |
1334 | * and sysctl_destroyv. |
1335 | */ |
1336 | static void |
1337 | sysctl_tap_setup(struct sysctllog **clog) |
1338 | { |
1339 | const struct sysctlnode *node; |
1340 | int error = 0; |
1341 | |
1342 | if ((error = sysctl_createv(clog, 0, NULL, NULL, |
1343 | CTLFLAG_PERMANENT, |
1344 | CTLTYPE_NODE, "link" , NULL, |
1345 | NULL, 0, NULL, 0, |
1346 | CTL_NET, AF_LINK, CTL_EOL)) != 0) |
1347 | return; |
1348 | |
1349 | /* |
1350 | * The first four parameters of sysctl_createv are for management. |
1351 | * |
1352 | * The four that follows, here starting with a '0' for the flags, |
1353 | * describe the node. |
1354 | * |
1355 | * The next series of four set its value, through various possible |
1356 | * means. |
1357 | * |
1358 | * Last but not least, the path to the node is described. That path |
1359 | * is relative to the given root (third argument). Here we're |
1360 | * starting from the root. |
1361 | */ |
1362 | if ((error = sysctl_createv(clog, 0, NULL, &node, |
1363 | CTLFLAG_PERMANENT, |
1364 | CTLTYPE_NODE, "tap" , NULL, |
1365 | NULL, 0, NULL, 0, |
1366 | CTL_NET, AF_LINK, CTL_CREATE, CTL_EOL)) != 0) |
1367 | return; |
1368 | tap_node = node->sysctl_num; |
1369 | } |
1370 | |
1371 | /* |
1372 | * The helper functions make Andrew Brown's interface really |
1373 | * shine. It makes possible to create value on the fly whether |
1374 | * the sysctl value is read or written. |
1375 | * |
1376 | * As shown as an example in the man page, the first step is to |
1377 | * create a copy of the node to have sysctl_lookup work on it. |
1378 | * |
1379 | * Here, we have more work to do than just a copy, since we have |
1380 | * to create the string. The first step is to collect the actual |
1381 | * value of the node, which is a convenient pointer to the softc |
1382 | * of the interface. From there we create the string and use it |
1383 | * as the value, but only for the *copy* of the node. |
1384 | * |
1385 | * Then we let sysctl_lookup do the magic, which consists in |
1386 | * setting oldp and newp as required by the operation. When the |
1387 | * value is read, that means that the string will be copied to |
1388 | * the user, and when it is written, the new value will be copied |
1389 | * over in the addr array. |
1390 | * |
1391 | * If newp is NULL, the user was reading the value, so we don't |
1392 | * have anything else to do. If a new value was written, we |
1393 | * have to check it. |
1394 | * |
1395 | * If it is incorrect, we can return an error and leave 'node' as |
1396 | * it is: since it is a copy of the actual node, the change will |
1397 | * be forgotten. |
1398 | * |
1399 | * Upon a correct input, we commit the change to the ifnet |
1400 | * structure of our interface. |
1401 | */ |
1402 | static int |
1403 | tap_sysctl_handler(SYSCTLFN_ARGS) |
1404 | { |
1405 | struct sysctlnode node; |
1406 | struct tap_softc *sc; |
1407 | struct ifnet *ifp; |
1408 | int error; |
1409 | size_t len; |
1410 | char addr[3 * ETHER_ADDR_LEN]; |
1411 | uint8_t enaddr[ETHER_ADDR_LEN]; |
1412 | |
1413 | node = *rnode; |
1414 | sc = node.sysctl_data; |
1415 | ifp = &sc->sc_ec.ec_if; |
1416 | (void)ether_snprintf(addr, sizeof(addr), CLLADDR(ifp->if_sadl)); |
1417 | node.sysctl_data = addr; |
1418 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); |
1419 | if (error || newp == NULL) |
1420 | return (error); |
1421 | |
1422 | len = strlen(addr); |
1423 | if (len < 11 || len > 17) |
1424 | return (EINVAL); |
1425 | |
1426 | /* Commit change */ |
1427 | if (ether_aton_r(enaddr, sizeof(enaddr), addr) != 0) |
1428 | return (EINVAL); |
1429 | if_set_sadl(ifp, enaddr, ETHER_ADDR_LEN, false); |
1430 | return (error); |
1431 | } |
1432 | |
1433 | /* |
1434 | * Module infrastructure |
1435 | */ |
1436 | #include "if_module.h" |
1437 | |
1438 | IF_MODULE(MODULE_CLASS_DRIVER, tap, "" ) |
1439 | |