1/* $NetBSD: route.c,v 1.182 2016/11/15 01:50:06 ozaki-r Exp $ */
2
3/*-
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the project nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62/*
63 * Copyright (c) 1980, 1986, 1991, 1993
64 * The Regents of the University of California. All rights reserved.
65 *
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
68 * are met:
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)route.c 8.3 (Berkeley) 1/9/95
91 */
92
93#ifdef _KERNEL_OPT
94#include "opt_inet.h"
95#include "opt_route.h"
96#include "opt_net_mpsafe.h"
97#endif
98
99#include <sys/cdefs.h>
100__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.182 2016/11/15 01:50:06 ozaki-r Exp $");
101
102#include <sys/param.h>
103#ifdef RTFLUSH_DEBUG
104#include <sys/sysctl.h>
105#endif
106#include <sys/systm.h>
107#include <sys/callout.h>
108#include <sys/proc.h>
109#include <sys/mbuf.h>
110#include <sys/socket.h>
111#include <sys/socketvar.h>
112#include <sys/domain.h>
113#include <sys/protosw.h>
114#include <sys/kernel.h>
115#include <sys/ioctl.h>
116#include <sys/pool.h>
117#include <sys/kauth.h>
118#include <sys/workqueue.h>
119#include <sys/syslog.h>
120
121#include <net/if.h>
122#include <net/if_dl.h>
123#include <net/route.h>
124
125#include <netinet/in.h>
126#include <netinet/in_var.h>
127
128#ifdef RTFLUSH_DEBUG
129#define rtcache_debug() __predict_false(_rtcache_debug)
130#else /* RTFLUSH_DEBUG */
131#define rtcache_debug() 0
132#endif /* RTFLUSH_DEBUG */
133
134struct rtstat rtstat;
135
136static int rttrash; /* routes not in table but not freed */
137
138static struct pool rtentry_pool;
139static struct pool rttimer_pool;
140
141static struct callout rt_timer_ch; /* callout for rt_timer_timer() */
142static struct workqueue *rt_timer_wq;
143static struct work rt_timer_wk;
144
145static void rt_timer_init(void);
146static void rt_timer_queue_remove_all(struct rttimer_queue *);
147static void rt_timer_remove_all(struct rtentry *);
148static void rt_timer_timer(void *);
149
150#ifdef RTFLUSH_DEBUG
151static int _rtcache_debug = 0;
152#endif /* RTFLUSH_DEBUG */
153
154static kauth_listener_t route_listener;
155
156static int rtdeletemsg(struct rtentry *);
157static void rtflushall(int);
158
159static void rt_maskedcopy(const struct sockaddr *,
160 struct sockaddr *, const struct sockaddr *);
161
162static void rtcache_clear(struct route *);
163static void rtcache_clear_rtentry(int, struct rtentry *);
164static void rtcache_invalidate(struct dom_rtlist *);
165
166#ifdef DDB
167static void db_print_sa(const struct sockaddr *);
168static void db_print_ifa(struct ifaddr *);
169static int db_show_rtentry(struct rtentry *, void *);
170#endif
171
172#ifdef RTFLUSH_DEBUG
173static void sysctl_net_rtcache_setup(struct sysctllog **);
174static void
175sysctl_net_rtcache_setup(struct sysctllog **clog)
176{
177 const struct sysctlnode *rnode;
178
179 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
180 CTLTYPE_NODE,
181 "rtcache", SYSCTL_DESCR("Route cache related settings"),
182 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
183 return;
184 if (sysctl_createv(clog, 0, &rnode, &rnode,
185 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
186 "debug", SYSCTL_DESCR("Debug route caches"),
187 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
188 return;
189}
190#endif /* RTFLUSH_DEBUG */
191
192static inline void
193rt_destroy(struct rtentry *rt)
194{
195 if (rt->_rt_key != NULL)
196 sockaddr_free(rt->_rt_key);
197 if (rt->rt_gateway != NULL)
198 sockaddr_free(rt->rt_gateway);
199 if (rt_gettag(rt) != NULL)
200 sockaddr_free(rt_gettag(rt));
201 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
202}
203
204static inline const struct sockaddr *
205rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
206{
207 if (rt->_rt_key == key)
208 goto out;
209
210 if (rt->_rt_key != NULL)
211 sockaddr_free(rt->_rt_key);
212 rt->_rt_key = sockaddr_dup(key, flags);
213out:
214 rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
215 return rt->_rt_key;
216}
217
218struct ifaddr *
219rt_get_ifa(struct rtentry *rt)
220{
221 struct ifaddr *ifa;
222
223 if ((ifa = rt->rt_ifa) == NULL)
224 return ifa;
225 else if (ifa->ifa_getifa == NULL)
226 return ifa;
227#if 0
228 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
229 return ifa;
230#endif
231 else {
232 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
233 if (ifa == NULL)
234 return NULL;
235 rt_replace_ifa(rt, ifa);
236 return ifa;
237 }
238}
239
240static void
241rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
242{
243 rt->rt_ifa = ifa;
244 if (ifa->ifa_seqno != NULL)
245 rt->rt_ifa_seqno = *ifa->ifa_seqno;
246}
247
248/*
249 * Is this route the connected route for the ifa?
250 */
251static int
252rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
253{
254 const struct sockaddr *key, *dst, *odst;
255 struct sockaddr_storage maskeddst;
256
257 key = rt_getkey(rt);
258 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
259 if (dst == NULL ||
260 dst->sa_family != key->sa_family ||
261 dst->sa_len != key->sa_len)
262 return 0;
263 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
264 odst = dst;
265 dst = (struct sockaddr *)&maskeddst;
266 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
267 ifa->ifa_netmask);
268 }
269 return (memcmp(dst, key, dst->sa_len) == 0);
270}
271
272void
273rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
274{
275 if (rt->rt_ifa &&
276 rt->rt_ifa != ifa &&
277 rt->rt_ifa->ifa_flags & IFA_ROUTE &&
278 rt_ifa_connected(rt, rt->rt_ifa))
279 {
280 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
281 "replace deleted IFA_ROUTE\n",
282 (void *)rt->_rt_key, (void *)rt->rt_ifa);
283 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
284 if (rt_ifa_connected(rt, ifa)) {
285 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
286 "replace added IFA_ROUTE\n",
287 (void *)rt->_rt_key, (void *)ifa);
288 ifa->ifa_flags |= IFA_ROUTE;
289 }
290 }
291
292 ifaref(ifa);
293 ifafree(rt->rt_ifa);
294 rt_set_ifa1(rt, ifa);
295}
296
297static void
298rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
299{
300 ifaref(ifa);
301 rt_set_ifa1(rt, ifa);
302}
303
304static int
305route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
306 void *arg0, void *arg1, void *arg2, void *arg3)
307{
308 struct rt_msghdr *rtm;
309 int result;
310
311 result = KAUTH_RESULT_DEFER;
312 rtm = arg1;
313
314 if (action != KAUTH_NETWORK_ROUTE)
315 return result;
316
317 if (rtm->rtm_type == RTM_GET)
318 result = KAUTH_RESULT_ALLOW;
319
320 return result;
321}
322
323void
324rt_init(void)
325{
326
327#ifdef RTFLUSH_DEBUG
328 sysctl_net_rtcache_setup(NULL);
329#endif
330
331 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
332 NULL, IPL_SOFTNET);
333 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
334 NULL, IPL_SOFTNET);
335
336 rn_init(); /* initialize all zeroes, all ones, mask table */
337 rtbl_init();
338
339 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
340 route_listener_cb, NULL);
341}
342
343static void
344rtflushall(int family)
345{
346 struct domain *dom;
347
348 if (rtcache_debug())
349 printf("%s: enter\n", __func__);
350
351 if ((dom = pffinddomain(family)) == NULL)
352 return;
353
354 rtcache_invalidate(&dom->dom_rtcache);
355}
356
357static void
358rtcache(struct route *ro)
359{
360 struct domain *dom;
361
362 rtcache_invariants(ro);
363 KASSERT(ro->_ro_rt != NULL);
364 KASSERT(ro->ro_invalid == false);
365 KASSERT(rtcache_getdst(ro) != NULL);
366
367 if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL)
368 return;
369
370 LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next);
371 rtcache_invariants(ro);
372}
373
374#ifdef RT_DEBUG
375static void
376dump_rt(const struct rtentry *rt)
377{
378 char buf[512];
379
380 aprint_normal("rt: ");
381 aprint_normal("p=%p ", rt);
382 if (rt->_rt_key == NULL) {
383 aprint_normal("dst=(NULL) ");
384 } else {
385 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
386 aprint_normal("dst=%s ", buf);
387 }
388 if (rt->rt_gateway == NULL) {
389 aprint_normal("gw=(NULL) ");
390 } else {
391 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
392 aprint_normal("gw=%s ", buf);
393 }
394 aprint_normal("flags=%x ", rt->rt_flags);
395 if (rt->rt_ifp == NULL) {
396 aprint_normal("if=(NULL) ");
397 } else {
398 aprint_normal("if=%s ", rt->rt_ifp->if_xname);
399 }
400 aprint_normal("\n");
401}
402#endif /* RT_DEBUG */
403
404/*
405 * Packet routing routines. If success, refcnt of a returned rtentry
406 * will be incremented. The caller has to rtfree it by itself.
407 */
408struct rtentry *
409rtalloc1(const struct sockaddr *dst, int report)
410{
411 rtbl_t *rtbl;
412 struct rtentry *rt;
413 int s;
414
415 s = splsoftnet();
416 rtbl = rt_gettable(dst->sa_family);
417 if (rtbl == NULL)
418 goto miss;
419
420 rt = rt_matchaddr(rtbl, dst);
421 if (rt == NULL)
422 goto miss;
423
424 rt->rt_refcnt++;
425
426 splx(s);
427 return rt;
428miss:
429 rtstat.rts_unreach++;
430 if (report) {
431 struct rt_addrinfo info;
432
433 memset(&info, 0, sizeof(info));
434 info.rti_info[RTAX_DST] = dst;
435 rt_missmsg(RTM_MISS, &info, 0, 0);
436 }
437 splx(s);
438 return NULL;
439}
440
441#if defined(DEBUG) && !defined(NET_MPSAFE)
442/*
443 * Check the following constraint for each rtcache:
444 * if a rtcache holds a rtentry, the rtentry's refcnt is more than zero,
445 * i.e., the rtentry should be referenced at least by the rtcache.
446 */
447static void
448rtcache_check_rtrefcnt(int family)
449{
450 struct domain *dom = pffinddomain(family);
451 struct route *ro;
452
453 if (dom == NULL)
454 return;
455
456 LIST_FOREACH(ro, &dom->dom_rtcache, ro_rtcache_next)
457 KDASSERT(ro->_ro_rt == NULL || ro->_ro_rt->rt_refcnt > 0);
458}
459#endif
460
461void
462rtfree(struct rtentry *rt)
463{
464 struct ifaddr *ifa;
465
466 KASSERT(rt != NULL);
467 KASSERT(rt->rt_refcnt > 0);
468
469 rt->rt_refcnt--;
470#if defined(DEBUG) && !defined(NET_MPSAFE)
471 if (rt_getkey(rt) != NULL)
472 rtcache_check_rtrefcnt(rt_getkey(rt)->sa_family);
473#endif
474 if (rt->rt_refcnt == 0 && (rt->rt_flags & RTF_UP) == 0) {
475 rt_assert_inactive(rt);
476 rttrash--;
477 ifa = rt->rt_ifa;
478 rt->rt_ifa = NULL;
479 ifafree(ifa);
480 rt->rt_ifp = NULL;
481 rt_destroy(rt);
482 pool_put(&rtentry_pool, rt);
483 }
484}
485
486/*
487 * Force a routing table entry to the specified
488 * destination to go through the given gateway.
489 * Normally called as a result of a routing redirect
490 * message from the network layer.
491 *
492 * N.B.: must be called at splsoftnet
493 */
494void
495rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
496 const struct sockaddr *netmask, int flags, const struct sockaddr *src,
497 struct rtentry **rtp)
498{
499 struct rtentry *rt;
500 int error = 0;
501 uint64_t *stat = NULL;
502 struct rt_addrinfo info;
503 struct ifaddr *ifa;
504 struct psref psref;
505
506 /* verify the gateway is directly reachable */
507 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
508 error = ENETUNREACH;
509 goto out;
510 }
511 rt = rtalloc1(dst, 0);
512 /*
513 * If the redirect isn't from our current router for this dst,
514 * it's either old or wrong. If it redirects us to ourselves,
515 * we have a routing loop, perhaps as a result of an interface
516 * going down recently.
517 */
518 if (!(flags & RTF_DONE) && rt &&
519 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
520 error = EINVAL;
521 else {
522 int s = pserialize_read_enter();
523 struct ifaddr *_ifa;
524
525 _ifa = ifa_ifwithaddr(gateway);
526 if (_ifa != NULL)
527 error = EHOSTUNREACH;
528 pserialize_read_exit(s);
529 }
530 if (error)
531 goto done;
532 /*
533 * Create a new entry if we just got back a wildcard entry
534 * or the lookup failed. This is necessary for hosts
535 * which use routing redirects generated by smart gateways
536 * to dynamically build the routing tables.
537 */
538 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
539 goto create;
540 /*
541 * Don't listen to the redirect if it's
542 * for a route to an interface.
543 */
544 if (rt->rt_flags & RTF_GATEWAY) {
545 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
546 /*
547 * Changing from route to net => route to host.
548 * Create new route, rather than smashing route to net.
549 */
550 create:
551 if (rt != NULL)
552 rtfree(rt);
553 flags |= RTF_GATEWAY | RTF_DYNAMIC;
554 memset(&info, 0, sizeof(info));
555 info.rti_info[RTAX_DST] = dst;
556 info.rti_info[RTAX_GATEWAY] = gateway;
557 info.rti_info[RTAX_NETMASK] = netmask;
558 info.rti_ifa = ifa;
559 info.rti_flags = flags;
560 rt = NULL;
561 error = rtrequest1(RTM_ADD, &info, &rt);
562 if (rt != NULL)
563 flags = rt->rt_flags;
564 stat = &rtstat.rts_dynamic;
565 } else {
566 /*
567 * Smash the current notion of the gateway to
568 * this destination. Should check about netmask!!!
569 */
570 error = rt_setgate(rt, gateway);
571 if (error == 0) {
572 rt->rt_flags |= RTF_MODIFIED;
573 flags |= RTF_MODIFIED;
574 }
575 stat = &rtstat.rts_newgateway;
576 }
577 } else
578 error = EHOSTUNREACH;
579done:
580 if (rt) {
581 if (rtp != NULL && !error)
582 *rtp = rt;
583 else
584 rtfree(rt);
585 }
586out:
587 if (error)
588 rtstat.rts_badredirect++;
589 else if (stat != NULL)
590 (*stat)++;
591 memset(&info, 0, sizeof(info));
592 info.rti_info[RTAX_DST] = dst;
593 info.rti_info[RTAX_GATEWAY] = gateway;
594 info.rti_info[RTAX_NETMASK] = netmask;
595 info.rti_info[RTAX_AUTHOR] = src;
596 rt_missmsg(RTM_REDIRECT, &info, flags, error);
597 ifa_release(ifa, &psref);
598}
599
600/*
601 * Delete a route and generate a message.
602 * It doesn't free a passed rt.
603 */
604static int
605rtdeletemsg(struct rtentry *rt)
606{
607 int error;
608 struct rt_addrinfo info;
609 struct rtentry *retrt;
610
611 /*
612 * Request the new route so that the entry is not actually
613 * deleted. That will allow the information being reported to
614 * be accurate (and consistent with route_output()).
615 */
616 memset(&info, 0, sizeof(info));
617 info.rti_info[RTAX_DST] = rt_getkey(rt);
618 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
619 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
620 info.rti_flags = rt->rt_flags;
621 error = rtrequest1(RTM_DELETE, &info, &retrt);
622
623 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
624
625 if (error == 0)
626 rtfree(retrt);
627 return error;
628}
629
630struct ifaddr *
631ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
632 const struct sockaddr *gateway, struct psref *psref)
633{
634 struct ifaddr *ifa = NULL;
635
636 if ((flags & RTF_GATEWAY) == 0) {
637 /*
638 * If we are adding a route to an interface,
639 * and the interface is a pt to pt link
640 * we should search for the destination
641 * as our clue to the interface. Otherwise
642 * we can use the local address.
643 */
644 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
645 ifa = ifa_ifwithdstaddr_psref(dst, psref);
646 if (ifa == NULL)
647 ifa = ifa_ifwithaddr_psref(gateway, psref);
648 } else {
649 /*
650 * If we are adding a route to a remote net
651 * or host, the gateway may still be on the
652 * other end of a pt to pt link.
653 */
654 ifa = ifa_ifwithdstaddr_psref(gateway, psref);
655 }
656 if (ifa == NULL)
657 ifa = ifa_ifwithnet_psref(gateway, psref);
658 if (ifa == NULL) {
659 int s;
660 struct rtentry *rt;
661
662 rt = rtalloc1(dst, 0);
663 if (rt == NULL)
664 return NULL;
665 /*
666 * Just in case. May not need to do this workaround.
667 * Revisit when working on rtentry MP-ification.
668 */
669 s = pserialize_read_enter();
670 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
671 if (ifa == rt->rt_ifa)
672 break;
673 }
674 if (ifa != NULL)
675 ifa_acquire(ifa, psref);
676 pserialize_read_exit(s);
677 rtfree(rt);
678 if (ifa == NULL)
679 return NULL;
680 }
681 if (ifa->ifa_addr->sa_family != dst->sa_family) {
682 struct ifaddr *nifa;
683 int s;
684
685 s = pserialize_read_enter();
686 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
687 if (nifa != NULL) {
688 ifa_release(ifa, psref);
689 ifa_acquire(nifa, psref);
690 ifa = nifa;
691 }
692 pserialize_read_exit(s);
693 }
694 return ifa;
695}
696
697/*
698 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
699 * The caller has to rtfree it by itself.
700 */
701int
702rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
703 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
704{
705 struct rt_addrinfo info;
706
707 memset(&info, 0, sizeof(info));
708 info.rti_flags = flags;
709 info.rti_info[RTAX_DST] = dst;
710 info.rti_info[RTAX_GATEWAY] = gateway;
711 info.rti_info[RTAX_NETMASK] = netmask;
712 return rtrequest1(req, &info, ret_nrt);
713}
714
715/*
716 * It's a utility function to add/remove a route to/from the routing table
717 * and tell user processes the addition/removal on success.
718 */
719int
720rtrequest_newmsg(const int req, const struct sockaddr *dst,
721 const struct sockaddr *gateway, const struct sockaddr *netmask,
722 const int flags)
723{
724 int error;
725 struct rtentry *ret_nrt = NULL;
726
727 KASSERT(req == RTM_ADD || req == RTM_DELETE);
728
729 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt);
730 if (error != 0)
731 return error;
732
733 KASSERT(ret_nrt != NULL);
734
735 rt_newmsg(req, ret_nrt); /* tell user process */
736 rtfree(ret_nrt);
737
738 return 0;
739}
740
741struct ifnet *
742rt_getifp(struct rt_addrinfo *info, struct psref *psref)
743{
744 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
745
746 if (info->rti_ifp != NULL)
747 return NULL;
748 /*
749 * ifp may be specified by sockaddr_dl when protocol address
750 * is ambiguous
751 */
752 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
753 struct ifaddr *ifa;
754 int s = pserialize_read_enter();
755
756 ifa = ifa_ifwithnet(ifpaddr);
757 if (ifa != NULL)
758 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
759 psref);
760 pserialize_read_exit(s);
761 }
762
763 return info->rti_ifp;
764}
765
766struct ifaddr *
767rt_getifa(struct rt_addrinfo *info, struct psref *psref)
768{
769 struct ifaddr *ifa = NULL;
770 const struct sockaddr *dst = info->rti_info[RTAX_DST];
771 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
772 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
773 int flags = info->rti_flags;
774 const struct sockaddr *sa;
775
776 if (info->rti_ifa == NULL && ifaaddr != NULL) {
777 ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
778 if (ifa != NULL)
779 goto got;
780 }
781
782 sa = ifaaddr != NULL ? ifaaddr :
783 (gateway != NULL ? gateway : dst);
784 if (sa != NULL && info->rti_ifp != NULL)
785 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
786 else if (dst != NULL && gateway != NULL)
787 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
788 else if (sa != NULL)
789 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
790 if (ifa == NULL)
791 return NULL;
792got:
793 if (ifa->ifa_getifa != NULL) {
794 /* FIXME NOMPSAFE */
795 ifa = (*ifa->ifa_getifa)(ifa, dst);
796 if (ifa == NULL)
797 return NULL;
798 ifa_acquire(ifa, psref);
799 }
800 info->rti_ifa = ifa;
801 if (info->rti_ifp == NULL)
802 info->rti_ifp = ifa->ifa_ifp;
803 return ifa;
804}
805
806/*
807 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
808 * The caller has to rtfree it by itself.
809 */
810int
811rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
812{
813 int s = splsoftnet(), ss;
814 int error = 0, rc;
815 struct rtentry *rt;
816 rtbl_t *rtbl;
817 struct ifaddr *ifa = NULL, *ifa2 = NULL;
818 struct sockaddr_storage maskeddst;
819 const struct sockaddr *dst = info->rti_info[RTAX_DST];
820 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
821 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
822 int flags = info->rti_flags;
823 struct psref psref_ifp, psref_ifa;
824 int bound = 0;
825 struct ifnet *ifp = NULL;
826 bool need_to_release_ifa = true;
827#define senderr(x) { error = x ; goto bad; }
828
829 bound = curlwp_bind();
830 if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
831 senderr(ESRCH);
832 if (flags & RTF_HOST)
833 netmask = NULL;
834 switch (req) {
835 case RTM_DELETE:
836 if (netmask) {
837 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
838 netmask);
839 dst = (struct sockaddr *)&maskeddst;
840 }
841 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
842 senderr(ESRCH);
843 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
844 senderr(ESRCH);
845 rt->rt_flags &= ~RTF_UP;
846 if ((ifa = rt->rt_ifa)) {
847 if (ifa->ifa_flags & IFA_ROUTE &&
848 rt_ifa_connected(rt, ifa)) {
849 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
850 "deleted IFA_ROUTE\n",
851 (void *)rt->_rt_key, (void *)ifa);
852 ifa->ifa_flags &= ~IFA_ROUTE;
853 }
854 if (ifa->ifa_rtrequest)
855 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
856 ifa = NULL;
857 }
858 rttrash++;
859 rt_timer_remove_all(rt);
860 if (ret_nrt) {
861 *ret_nrt = rt;
862 rt->rt_refcnt++;
863 } else if (rt->rt_refcnt <= 0) {
864 /* Adjust the refcount */
865 rt->rt_refcnt++;
866 rtfree(rt);
867 }
868 rtcache_clear_rtentry(dst->sa_family, rt);
869 break;
870
871 case RTM_ADD:
872 if (info->rti_ifa == NULL) {
873 ifp = rt_getifp(info, &psref_ifp);
874 ifa = rt_getifa(info, &psref_ifa);
875 if (ifa == NULL)
876 senderr(ENETUNREACH);
877 } else {
878 /* Caller should have a reference of ifa */
879 ifa = info->rti_ifa;
880 need_to_release_ifa = false;
881 }
882 rt = pool_get(&rtentry_pool, PR_NOWAIT);
883 if (rt == NULL)
884 senderr(ENOBUFS);
885 memset(rt, 0, sizeof(*rt));
886 rt->rt_flags = RTF_UP | flags;
887 LIST_INIT(&rt->rt_timer);
888
889 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
890 if (netmask) {
891 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
892 netmask);
893 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
894 } else {
895 rt_setkey(rt, dst, M_NOWAIT);
896 }
897 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
898 if (rt_getkey(rt) == NULL ||
899 rt_setgate(rt, gateway) != 0) {
900 pool_put(&rtentry_pool, rt);
901 senderr(ENOBUFS);
902 }
903
904 rt_set_ifa(rt, ifa);
905 if (info->rti_info[RTAX_TAG] != NULL) {
906 const struct sockaddr *tag;
907 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
908 if (tag == NULL)
909 senderr(ENOBUFS);
910 }
911 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
912
913 ss = pserialize_read_enter();
914 if (info->rti_info[RTAX_IFP] != NULL) {
915 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
916 if (ifa2 != NULL)
917 rt->rt_ifp = ifa2->ifa_ifp;
918 else
919 rt->rt_ifp = ifa->ifa_ifp;
920 } else
921 rt->rt_ifp = ifa->ifa_ifp;
922 pserialize_read_exit(ss);
923
924 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
925 rc = rt_addaddr(rtbl, rt, netmask);
926 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
927 if (rc != 0) {
928 ifafree(ifa); /* for rt_set_ifa above */
929 rt_destroy(rt);
930 pool_put(&rtentry_pool, rt);
931 senderr(rc);
932 }
933 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
934 if (ifa->ifa_rtrequest)
935 ifa->ifa_rtrequest(req, rt, info);
936 if (need_to_release_ifa)
937 ifa_release(ifa, &psref_ifa);
938 ifa = NULL;
939 if_put(ifp, &psref_ifp);
940 ifp = NULL;
941 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
942 if (ret_nrt) {
943 *ret_nrt = rt;
944 rt->rt_refcnt++;
945 }
946 rtflushall(dst->sa_family);
947 break;
948 case RTM_GET:
949 if (netmask != NULL) {
950 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
951 netmask);
952 dst = (struct sockaddr *)&maskeddst;
953 }
954 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
955 senderr(ESRCH);
956 if (ret_nrt != NULL) {
957 *ret_nrt = rt;
958 rt->rt_refcnt++;
959 }
960 break;
961 }
962bad:
963 if (need_to_release_ifa)
964 ifa_release(ifa, &psref_ifa);
965 if_put(ifp, &psref_ifp);
966 curlwp_bindx(bound);
967 splx(s);
968 return error;
969}
970
971int
972rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
973{
974 struct sockaddr *new, *old;
975
976 KASSERT(rt->_rt_key != NULL);
977 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
978
979 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
980 if (new == NULL)
981 return ENOMEM;
982
983 old = rt->rt_gateway;
984 rt->rt_gateway = new;
985 if (old != NULL)
986 sockaddr_free(old);
987
988 KASSERT(rt->_rt_key != NULL);
989 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
990
991 if (rt->rt_flags & RTF_GATEWAY) {
992 struct rtentry *gwrt = rtalloc1(gate, 1);
993 /*
994 * If we switched gateways, grab the MTU from the new
995 * gateway route if the current MTU, if the current MTU is
996 * greater than the MTU of gateway.
997 * Note that, if the MTU of gateway is 0, we will reset the
998 * MTU of the route to run PMTUD again from scratch. XXX
999 */
1000 if (gwrt != NULL) {
1001 KASSERT(gwrt->_rt_key != NULL);
1002 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
1003 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
1004 rt->rt_rmx.rmx_mtu &&
1005 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
1006 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
1007 }
1008 rtfree(gwrt);
1009 }
1010 }
1011 KASSERT(rt->_rt_key != NULL);
1012 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1013 return 0;
1014}
1015
1016static void
1017rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
1018 const struct sockaddr *netmask)
1019{
1020 const char *netmaskp = &netmask->sa_data[0],
1021 *srcp = &src->sa_data[0];
1022 char *dstp = &dst->sa_data[0];
1023 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
1024 const char *srcend = (char *)dst + src->sa_len;
1025
1026 dst->sa_len = src->sa_len;
1027 dst->sa_family = src->sa_family;
1028
1029 while (dstp < maskend)
1030 *dstp++ = *srcp++ & *netmaskp++;
1031 if (dstp < srcend)
1032 memset(dstp, 0, (size_t)(srcend - dstp));
1033}
1034
1035/*
1036 * Inform the routing socket of a route change.
1037 */
1038void
1039rt_newmsg(const int cmd, const struct rtentry *rt)
1040{
1041 struct rt_addrinfo info;
1042
1043 memset((void *)&info, 0, sizeof(info));
1044 info.rti_info[RTAX_DST] = rt_getkey(rt);
1045 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1046 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1047 if (rt->rt_ifp) {
1048 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1049 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1050 }
1051
1052 rt_missmsg(cmd, &info, rt->rt_flags, 0);
1053}
1054
1055/*
1056 * Set up or tear down a routing table entry, normally
1057 * for an interface.
1058 */
1059int
1060rtinit(struct ifaddr *ifa, int cmd, int flags)
1061{
1062 struct rtentry *rt;
1063 struct sockaddr *dst, *odst;
1064 struct sockaddr_storage maskeddst;
1065 struct rtentry *nrt = NULL;
1066 int error;
1067 struct rt_addrinfo info;
1068
1069 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1070 if (cmd == RTM_DELETE) {
1071 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1072 /* Delete subnet route for this interface */
1073 odst = dst;
1074 dst = (struct sockaddr *)&maskeddst;
1075 rt_maskedcopy(odst, dst, ifa->ifa_netmask);
1076 }
1077 if ((rt = rtalloc1(dst, 0)) != NULL) {
1078 if (rt->rt_ifa != ifa) {
1079 rtfree(rt);
1080 return (flags & RTF_HOST) ? EHOSTUNREACH
1081 : ENETUNREACH;
1082 }
1083 rtfree(rt);
1084 }
1085 }
1086 memset(&info, 0, sizeof(info));
1087 info.rti_ifa = ifa;
1088 info.rti_flags = flags | ifa->ifa_flags;
1089 info.rti_info[RTAX_DST] = dst;
1090 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1091
1092 /*
1093 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1094 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate
1095 * variable) when RTF_HOST is 1. still not sure if i can safely
1096 * change it to meet bsdi4 behavior.
1097 */
1098 if (cmd != RTM_LLINFO_UPD)
1099 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1100 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
1101 &nrt);
1102 if (error != 0)
1103 return error;
1104
1105 rt = nrt;
1106 switch (cmd) {
1107 case RTM_DELETE:
1108 rt_newmsg(cmd, rt);
1109 break;
1110 case RTM_LLINFO_UPD:
1111 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
1112 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
1113 rt_newmsg(RTM_CHANGE, rt);
1114 break;
1115 case RTM_ADD:
1116 if (rt->rt_ifa != ifa) {
1117 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa,
1118 rt->rt_ifa);
1119 if (rt->rt_ifa->ifa_rtrequest != NULL) {
1120 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
1121 &info);
1122 }
1123 rt_replace_ifa(rt, ifa);
1124 rt->rt_ifp = ifa->ifa_ifp;
1125 if (ifa->ifa_rtrequest != NULL)
1126 ifa->ifa_rtrequest(RTM_ADD, rt, &info);
1127 }
1128 rt_newmsg(cmd, rt);
1129 break;
1130 }
1131 rtfree(rt);
1132 return error;
1133}
1134
1135/*
1136 * Create a local route entry for the address.
1137 * Announce the addition of the address and the route to the routing socket.
1138 */
1139int
1140rt_ifa_addlocal(struct ifaddr *ifa)
1141{
1142 struct rtentry *rt;
1143 int e;
1144
1145 /* If there is no loopback entry, allocate one. */
1146 rt = rtalloc1(ifa->ifa_addr, 0);
1147#ifdef RT_DEBUG
1148 if (rt != NULL)
1149 dump_rt(rt);
1150#endif
1151 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
1152 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1153 {
1154 struct rt_addrinfo info;
1155 struct rtentry *nrt;
1156
1157 memset(&info, 0, sizeof(info));
1158 info.rti_flags = RTF_HOST | RTF_LOCAL;
1159 if (!(ifa->ifa_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))
1160 info.rti_flags |= RTF_LLDATA;
1161 info.rti_info[RTAX_DST] = ifa->ifa_addr;
1162 info.rti_info[RTAX_GATEWAY] =
1163 (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
1164 info.rti_ifa = ifa;
1165 nrt = NULL;
1166 e = rtrequest1(RTM_ADD, &info, &nrt);
1167 if (nrt && ifa != nrt->rt_ifa)
1168 rt_replace_ifa(nrt, ifa);
1169 rt_newaddrmsg(RTM_ADD, ifa, e, nrt);
1170 if (nrt != NULL) {
1171#ifdef RT_DEBUG
1172 dump_rt(nrt);
1173#endif
1174 rtfree(nrt);
1175 }
1176 } else {
1177 e = 0;
1178 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL);
1179 }
1180 if (rt != NULL)
1181 rtfree(rt);
1182 return e;
1183}
1184
1185/*
1186 * Remove the local route entry for the address.
1187 * Announce the removal of the address and the route to the routing socket.
1188 */
1189int
1190rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
1191{
1192 struct rtentry *rt;
1193 int e = 0;
1194
1195 rt = rtalloc1(ifa->ifa_addr, 0);
1196
1197 /*
1198 * Before deleting, check if a corresponding loopbacked
1199 * host route surely exists. With this check, we can avoid
1200 * deleting an interface direct route whose destination is
1201 * the same as the address being removed. This can happen
1202 * when removing a subnet-router anycast address on an
1203 * interface attached to a shared medium.
1204 */
1205 if (rt != NULL &&
1206 (rt->rt_flags & RTF_HOST) &&
1207 (rt->rt_ifp->if_flags & IFF_LOOPBACK))
1208 {
1209 /* If we cannot replace the route's ifaddr with the equivalent
1210 * ifaddr of another interface, I believe it is safest to
1211 * delete the route.
1212 */
1213 if (alt_ifa == NULL) {
1214 e = rtdeletemsg(rt);
1215 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1216 } else {
1217 rt_replace_ifa(rt, alt_ifa);
1218 rt_newmsg(RTM_CHANGE, rt);
1219 }
1220 } else
1221 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1222 if (rt != NULL)
1223 rtfree(rt);
1224 return e;
1225}
1226
1227/*
1228 * Route timer routines. These routes allow functions to be called
1229 * for various routes at any time. This is useful in supporting
1230 * path MTU discovery and redirect route deletion.
1231 *
1232 * This is similar to some BSDI internal functions, but it provides
1233 * for multiple queues for efficiency's sake...
1234 */
1235
1236LIST_HEAD(, rttimer_queue) rttimer_queue_head;
1237static int rt_init_done = 0;
1238
1239/*
1240 * Some subtle order problems with domain initialization mean that
1241 * we cannot count on this being run from rt_init before various
1242 * protocol initializations are done. Therefore, we make sure
1243 * that this is run when the first queue is added...
1244 */
1245
1246static void rt_timer_work(struct work *, void *);
1247
1248static void
1249rt_timer_init(void)
1250{
1251 int error;
1252
1253 assert(rt_init_done == 0);
1254
1255 LIST_INIT(&rttimer_queue_head);
1256 callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1257 error = workqueue_create(&rt_timer_wq, "rt_timer",
1258 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
1259 if (error)
1260 panic("%s: workqueue_create failed (%d)\n", __func__, error);
1261 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1262 rt_init_done = 1;
1263}
1264
1265struct rttimer_queue *
1266rt_timer_queue_create(u_int timeout)
1267{
1268 struct rttimer_queue *rtq;
1269
1270 if (rt_init_done == 0)
1271 rt_timer_init();
1272
1273 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1274 if (rtq == NULL)
1275 return NULL;
1276 memset(rtq, 0, sizeof(*rtq));
1277
1278 rtq->rtq_timeout = timeout;
1279 TAILQ_INIT(&rtq->rtq_head);
1280 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1281
1282 return rtq;
1283}
1284
1285void
1286rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1287{
1288
1289 rtq->rtq_timeout = timeout;
1290}
1291
1292static void
1293rt_timer_queue_remove_all(struct rttimer_queue *rtq)
1294{
1295 struct rttimer *r;
1296
1297 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1298 LIST_REMOVE(r, rtt_link);
1299 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1300 (*r->rtt_func)(r->rtt_rt, r);
1301 rtfree(r->rtt_rt);
1302 pool_put(&rttimer_pool, r);
1303 if (rtq->rtq_count > 0)
1304 rtq->rtq_count--;
1305 else
1306 printf("rt_timer_queue_remove_all: "
1307 "rtq_count reached 0\n");
1308 }
1309}
1310
1311void
1312rt_timer_queue_destroy(struct rttimer_queue *rtq)
1313{
1314
1315 rt_timer_queue_remove_all(rtq);
1316
1317 LIST_REMOVE(rtq, rtq_link);
1318
1319 /*
1320 * Caller is responsible for freeing the rttimer_queue structure.
1321 */
1322}
1323
1324unsigned long
1325rt_timer_count(struct rttimer_queue *rtq)
1326{
1327 return rtq->rtq_count;
1328}
1329
1330static void
1331rt_timer_remove_all(struct rtentry *rt)
1332{
1333 struct rttimer *r;
1334
1335 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1336 LIST_REMOVE(r, rtt_link);
1337 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1338 if (r->rtt_queue->rtq_count > 0)
1339 r->rtt_queue->rtq_count--;
1340 else
1341 printf("rt_timer_remove_all: rtq_count reached 0\n");
1342 pool_put(&rttimer_pool, r);
1343 rt->rt_refcnt--; /* XXX */
1344 }
1345}
1346
1347int
1348rt_timer_add(struct rtentry *rt,
1349 void (*func)(struct rtentry *, struct rttimer *),
1350 struct rttimer_queue *queue)
1351{
1352 struct rttimer *r;
1353
1354 KASSERT(func != NULL);
1355 /*
1356 * If there's already a timer with this action, destroy it before
1357 * we add a new one.
1358 */
1359 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1360 if (r->rtt_func == func)
1361 break;
1362 }
1363 if (r != NULL) {
1364 LIST_REMOVE(r, rtt_link);
1365 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1366 if (r->rtt_queue->rtq_count > 0)
1367 r->rtt_queue->rtq_count--;
1368 else
1369 printf("rt_timer_add: rtq_count reached 0\n");
1370 rtfree(r->rtt_rt);
1371 } else {
1372 r = pool_get(&rttimer_pool, PR_NOWAIT);
1373 if (r == NULL)
1374 return ENOBUFS;
1375 }
1376
1377 memset(r, 0, sizeof(*r));
1378
1379 rt->rt_refcnt++;
1380 r->rtt_rt = rt;
1381 r->rtt_time = time_uptime;
1382 r->rtt_func = func;
1383 r->rtt_queue = queue;
1384 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1385 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1386 r->rtt_queue->rtq_count++;
1387
1388 return 0;
1389}
1390
1391static void
1392rt_timer_work(struct work *wk, void *arg)
1393{
1394 struct rttimer_queue *rtq;
1395 struct rttimer *r;
1396 int s;
1397
1398 s = splsoftnet();
1399 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1400 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1401 (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
1402 LIST_REMOVE(r, rtt_link);
1403 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1404 (*r->rtt_func)(r->rtt_rt, r);
1405 rtfree(r->rtt_rt);
1406 pool_put(&rttimer_pool, r);
1407 if (rtq->rtq_count > 0)
1408 rtq->rtq_count--;
1409 else
1410 printf("rt_timer_timer: rtq_count reached 0\n");
1411 }
1412 }
1413 splx(s);
1414
1415 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1416}
1417
1418static void
1419rt_timer_timer(void *arg)
1420{
1421
1422 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
1423}
1424
1425static struct rtentry *
1426_rtcache_init(struct route *ro, int flag)
1427{
1428 rtcache_invariants(ro);
1429 KASSERT(ro->_ro_rt == NULL);
1430
1431 if (rtcache_getdst(ro) == NULL)
1432 return NULL;
1433 ro->ro_invalid = false;
1434 if ((ro->_ro_rt = rtalloc1(rtcache_getdst(ro), flag)) != NULL)
1435 rtcache(ro);
1436
1437 rtcache_invariants(ro);
1438 return ro->_ro_rt;
1439}
1440
1441struct rtentry *
1442rtcache_init(struct route *ro)
1443{
1444 return _rtcache_init(ro, 1);
1445}
1446
1447struct rtentry *
1448rtcache_init_noclone(struct route *ro)
1449{
1450 return _rtcache_init(ro, 0);
1451}
1452
1453struct rtentry *
1454rtcache_update(struct route *ro, int clone)
1455{
1456 rtcache_clear(ro);
1457 return _rtcache_init(ro, clone);
1458}
1459
1460void
1461rtcache_copy(struct route *new_ro, const struct route *old_ro)
1462{
1463 struct rtentry *rt;
1464
1465 KASSERT(new_ro != old_ro);
1466 rtcache_invariants(new_ro);
1467 rtcache_invariants(old_ro);
1468
1469 if ((rt = rtcache_validate(old_ro)) != NULL)
1470 rt->rt_refcnt++;
1471
1472 if (rtcache_getdst(old_ro) == NULL ||
1473 rtcache_setdst(new_ro, rtcache_getdst(old_ro)) != 0)
1474 return;
1475
1476 new_ro->ro_invalid = false;
1477 if ((new_ro->_ro_rt = rt) != NULL)
1478 rtcache(new_ro);
1479 rtcache_invariants(new_ro);
1480}
1481
1482static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist);
1483
1484static void
1485rtcache_invalidate(struct dom_rtlist *rtlist)
1486{
1487 struct route *ro;
1488
1489 while ((ro = LIST_FIRST(rtlist)) != NULL) {
1490 rtcache_invariants(ro);
1491 KASSERT(ro->_ro_rt != NULL);
1492 ro->ro_invalid = true;
1493 LIST_REMOVE(ro, ro_rtcache_next);
1494 LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next);
1495 rtcache_invariants(ro);
1496 }
1497}
1498
1499static void
1500rtcache_clear_rtentry(int family, struct rtentry *rt)
1501{
1502 struct domain *dom;
1503 struct route *ro, *nro;
1504
1505 if ((dom = pffinddomain(family)) == NULL)
1506 return;
1507
1508 LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) {
1509 if (ro->_ro_rt == rt)
1510 rtcache_clear(ro);
1511 }
1512}
1513
1514static void
1515rtcache_clear(struct route *ro)
1516{
1517 rtcache_invariants(ro);
1518 if (ro->_ro_rt == NULL)
1519 return;
1520
1521 LIST_REMOVE(ro, ro_rtcache_next);
1522
1523 rtfree(ro->_ro_rt);
1524 ro->_ro_rt = NULL;
1525 ro->ro_invalid = false;
1526 rtcache_invariants(ro);
1527}
1528
1529struct rtentry *
1530rtcache_lookup2(struct route *ro, const struct sockaddr *dst, int clone,
1531 int *hitp)
1532{
1533 const struct sockaddr *odst;
1534 struct rtentry *rt = NULL;
1535
1536 odst = rtcache_getdst(ro);
1537 if (odst == NULL)
1538 goto miss;
1539
1540 if (sockaddr_cmp(odst, dst) != 0) {
1541 rtcache_free(ro);
1542 goto miss;
1543 }
1544
1545 rt = rtcache_validate(ro);
1546 if (rt == NULL) {
1547 rtcache_clear(ro);
1548 goto miss;
1549 }
1550
1551 *hitp = 1;
1552 rtcache_invariants(ro);
1553
1554 return rt;
1555miss:
1556 *hitp = 0;
1557 if (rtcache_setdst(ro, dst) == 0)
1558 rt = _rtcache_init(ro, clone);
1559
1560 rtcache_invariants(ro);
1561
1562 return rt;
1563}
1564
1565void
1566rtcache_free(struct route *ro)
1567{
1568 rtcache_clear(ro);
1569 if (ro->ro_sa != NULL) {
1570 sockaddr_free(ro->ro_sa);
1571 ro->ro_sa = NULL;
1572 }
1573 rtcache_invariants(ro);
1574}
1575
1576int
1577rtcache_setdst(struct route *ro, const struct sockaddr *sa)
1578{
1579 KASSERT(sa != NULL);
1580
1581 rtcache_invariants(ro);
1582 if (ro->ro_sa != NULL) {
1583 if (ro->ro_sa->sa_family == sa->sa_family) {
1584 rtcache_clear(ro);
1585 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
1586 rtcache_invariants(ro);
1587 return 0;
1588 }
1589 /* free ro_sa, wrong family */
1590 rtcache_free(ro);
1591 }
1592
1593 KASSERT(ro->_ro_rt == NULL);
1594
1595 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
1596 rtcache_invariants(ro);
1597 return ENOMEM;
1598 }
1599 rtcache_invariants(ro);
1600 return 0;
1601}
1602
1603const struct sockaddr *
1604rt_settag(struct rtentry *rt, const struct sockaddr *tag)
1605{
1606 if (rt->rt_tag != tag) {
1607 if (rt->rt_tag != NULL)
1608 sockaddr_free(rt->rt_tag);
1609 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
1610 }
1611 return rt->rt_tag;
1612}
1613
1614struct sockaddr *
1615rt_gettag(const struct rtentry *rt)
1616{
1617 return rt->rt_tag;
1618}
1619
1620int
1621rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
1622{
1623
1624 if ((rt->rt_flags & RTF_REJECT) != 0) {
1625 /* Mimic looutput */
1626 if (ifp->if_flags & IFF_LOOPBACK)
1627 return (rt->rt_flags & RTF_HOST) ?
1628 EHOSTUNREACH : ENETUNREACH;
1629 else if (rt->rt_rmx.rmx_expire == 0 ||
1630 time_uptime < rt->rt_rmx.rmx_expire)
1631 return (rt->rt_flags & RTF_GATEWAY) ?
1632 EHOSTUNREACH : EHOSTDOWN;
1633 }
1634
1635 return 0;
1636}
1637
1638void
1639rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *),
1640 void *v)
1641{
1642
1643 for (;;) {
1644 int s;
1645 int error;
1646 struct rtentry *rt, *retrt = NULL;
1647
1648 s = splsoftnet();
1649 rt = rtbl_search_matched_entry(family, f, v);
1650 if (rt == NULL) {
1651 splx(s);
1652 return;
1653 }
1654 rt->rt_refcnt++;
1655 splx(s);
1656
1657 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway,
1658 rt_mask(rt), rt->rt_flags, &retrt);
1659 if (error == 0) {
1660 KASSERT(retrt == rt);
1661 KASSERT((retrt->rt_flags & RTF_UP) == 0);
1662 retrt->rt_ifp = NULL;
1663 rtfree(rt);
1664 rtfree(retrt);
1665 } else if (error == ESRCH) {
1666 /* Someone deleted the entry already. */
1667 rtfree(rt);
1668 } else {
1669 log(LOG_ERR, "%s: unable to delete rtentry @ %p, "
1670 "error = %d\n", rt->rt_ifp->if_xname, rt, error);
1671 /* XXX how to treat this case? */
1672 }
1673 }
1674}
1675
1676#ifdef DDB
1677
1678#include <machine/db_machdep.h>
1679#include <ddb/db_interface.h>
1680#include <ddb/db_output.h>
1681
1682#define rt_expire rt_rmx.rmx_expire
1683
1684static void
1685db_print_sa(const struct sockaddr *sa)
1686{
1687 int len;
1688 const u_char *p;
1689
1690 if (sa == NULL) {
1691 db_printf("[NULL]");
1692 return;
1693 }
1694
1695 p = (const u_char *)sa;
1696 len = sa->sa_len;
1697 db_printf("[");
1698 while (len > 0) {
1699 db_printf("%d", *p);
1700 p++; len--;
1701 if (len) db_printf(",");
1702 }
1703 db_printf("]\n");
1704}
1705
1706static void
1707db_print_ifa(struct ifaddr *ifa)
1708{
1709 if (ifa == NULL)
1710 return;
1711 db_printf(" ifa_addr=");
1712 db_print_sa(ifa->ifa_addr);
1713 db_printf(" ifa_dsta=");
1714 db_print_sa(ifa->ifa_dstaddr);
1715 db_printf(" ifa_mask=");
1716 db_print_sa(ifa->ifa_netmask);
1717 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n",
1718 ifa->ifa_flags,
1719 ifa->ifa_refcnt,
1720 ifa->ifa_metric);
1721}
1722
1723/*
1724 * Function to pass to rt_walktree().
1725 * Return non-zero error to abort walk.
1726 */
1727static int
1728db_show_rtentry(struct rtentry *rt, void *w)
1729{
1730 db_printf("rtentry=%p", rt);
1731
1732 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
1733 rt->rt_flags, rt->rt_refcnt,
1734 rt->rt_use, (uint64_t)rt->rt_expire);
1735
1736 db_printf(" key="); db_print_sa(rt_getkey(rt));
1737 db_printf(" mask="); db_print_sa(rt_mask(rt));
1738 db_printf(" gw="); db_print_sa(rt->rt_gateway);
1739
1740 db_printf(" ifp=%p ", rt->rt_ifp);
1741 if (rt->rt_ifp)
1742 db_printf("(%s)", rt->rt_ifp->if_xname);
1743 else
1744 db_printf("(NULL)");
1745
1746 db_printf(" ifa=%p\n", rt->rt_ifa);
1747 db_print_ifa(rt->rt_ifa);
1748
1749 db_printf(" gwroute=%p llinfo=%p\n",
1750 rt->rt_gwroute, rt->rt_llinfo);
1751
1752 return 0;
1753}
1754
1755/*
1756 * Function to print all the route trees.
1757 * Use this from ddb: "show routes"
1758 */
1759void
1760db_show_routes(db_expr_t addr, bool have_addr,
1761 db_expr_t count, const char *modif)
1762{
1763 rt_walktree(AF_INET, db_show_rtentry, NULL);
1764}
1765#endif
1766