1/* $NetBSD: mbuf.h,v 1.167 2016/10/04 14:13:21 christos Exp $ */
2
3/*-
4 * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center and Matt Thomas of 3am Software Foundry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)mbuf.h 8.5 (Berkeley) 2/19/95
62 */
63
64#ifndef _SYS_MBUF_H_
65#define _SYS_MBUF_H_
66
67#ifdef _KERNEL_OPT
68#include "opt_mbuftrace.h"
69#endif
70
71#ifndef M_WAITOK
72#include <sys/malloc.h>
73#endif
74#include <sys/pool.h>
75#include <sys/queue.h>
76#if defined(_KERNEL)
77#include <sys/percpu_types.h>
78#include <sys/socket.h> /* for AF_UNSPEC */
79#include <sys/psref.h>
80#endif /* defined(_KERNEL) */
81
82/* For offsetof() */
83#if defined(_KERNEL) || defined(_STANDALONE)
84#include <sys/systm.h>
85#else
86#include <stddef.h>
87#endif
88
89#include <uvm/uvm_param.h> /* for MIN_PAGE_SIZE */
90
91#include <net/if.h>
92
93/*
94 * Mbufs are of a single size, MSIZE (machine/param.h), which
95 * includes overhead. An mbuf may add a single "mbuf cluster" of size
96 * MCLBYTES (also in machine/param.h), which has no additional overhead
97 * and is used instead of the internal data area; this is done when
98 * at least MINCLSIZE of data must be stored.
99 */
100
101/* Packet tags structure */
102struct m_tag {
103 SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */
104 uint16_t m_tag_id; /* Tag ID */
105 uint16_t m_tag_len; /* Length of data */
106};
107
108/* mbuf ownership structure */
109struct mowner {
110 char mo_name[16]; /* owner name (fxp0) */
111 char mo_descr[16]; /* owner description (input) */
112 LIST_ENTRY(mowner) mo_link; /* */
113 struct percpu *mo_counters;
114};
115
116#define MOWNER_INIT(x, y) { .mo_name = x, .mo_descr = y }
117
118enum mowner_counter_index {
119 MOWNER_COUNTER_CLAIMS, /* # of small mbuf claimed */
120 MOWNER_COUNTER_RELEASES, /* # of small mbuf released */
121 MOWNER_COUNTER_CLUSTER_CLAIMS, /* # of M_CLUSTER mbuf claimed */
122 MOWNER_COUNTER_CLUSTER_RELEASES,/* # of M_CLUSTER mbuf released */
123 MOWNER_COUNTER_EXT_CLAIMS, /* # of M_EXT mbuf claimed */
124 MOWNER_COUNTER_EXT_RELEASES, /* # of M_EXT mbuf released */
125
126 MOWNER_COUNTER_NCOUNTERS,
127};
128
129#if defined(_KERNEL)
130struct mowner_counter {
131 u_long mc_counter[MOWNER_COUNTER_NCOUNTERS];
132};
133#endif /* defined(_KERNEL) */
134
135/* userland-exported version of struct mowner */
136struct mowner_user {
137 char mo_name[16]; /* owner name (fxp0) */
138 char mo_descr[16]; /* owner description (input) */
139 LIST_ENTRY(mowner) mo_link; /* unused padding; for compatibility */
140 u_long mo_counter[MOWNER_COUNTER_NCOUNTERS]; /* counters */
141};
142
143/*
144 * Macros for type conversion
145 * mtod(m,t) - convert mbuf pointer to data pointer of correct type
146 */
147#define mtod(m, t) ((t)((m)->m_data))
148
149/* header at beginning of each mbuf: */
150struct m_hdr {
151 struct mbuf *mh_next; /* next buffer in chain */
152 struct mbuf *mh_nextpkt; /* next chain in queue/record */
153 char *mh_data; /* location of data */
154 struct mowner *mh_owner; /* mbuf owner */
155 int mh_len; /* amount of data in this mbuf */
156 int mh_flags; /* flags; see below */
157 paddr_t mh_paddr; /* physical address of mbuf */
158 short mh_type; /* type of data in this mbuf */
159};
160
161/*
162 * record/packet header in first mbuf of chain; valid if M_PKTHDR set
163 *
164 * A note about csum_data: For the out-bound direction, the low 16 bits
165 * indicates the offset after the L4 header where the final L4 checksum value
166 * is to be stored and the high 16 bits is the length of the L3 header (the
167 * start of the data to be checksumed). For the in-bound direction, it is only
168 * valid if the M_CSUM_DATA flag is set. In this case, an L4 checksum has been
169 * calculated by hardware, but it is up to software to perform final
170 * verification.
171 *
172 * Note for in-bound TCP/UDP checksums, we expect the csum_data to NOT
173 * be bit-wise inverted (the final step in the calculation of an IP
174 * checksum) -- this is so we can accumulate the checksum for fragmented
175 * packets during reassembly.
176 *
177 * Size ILP32: 36
178 * LP64: 56
179 */
180struct pkthdr {
181 union {
182 void *ctx; /* for M_GETCTX/M_SETCTX */
183 if_index_t index; /* rcv interface index */
184 } _rcvif;
185#define rcvif_index _rcvif.index
186 SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
187 int len; /* total packet length */
188 int csum_flags; /* checksum flags */
189 uint32_t csum_data; /* checksum data */
190 u_int segsz; /* segment size */
191
192 /*
193 * Following three fields are open-coded struct altq_pktattr
194 * to rearrange struct pkthdr fields flexibly.
195 */
196 void *pattr_class; /* ALTQ: sched class set by classifier */
197 void *pattr_hdr; /* ALTQ: saved header position in mbuf */
198 int pattr_af; /* ALTQ: address family */
199};
200
201/*
202 * Note: These bits are carefully arrange so that the compiler can have
203 * a prayer of generating a jump table.
204 */
205#define M_CSUM_TCPv4 0x00000001 /* TCP header/payload */
206#define M_CSUM_UDPv4 0x00000002 /* UDP header/payload */
207#define M_CSUM_TCP_UDP_BAD 0x00000004 /* TCP/UDP checksum bad */
208#define M_CSUM_DATA 0x00000008 /* consult csum_data */
209#define M_CSUM_TCPv6 0x00000010 /* IPv6 TCP header/payload */
210#define M_CSUM_UDPv6 0x00000020 /* IPv6 UDP header/payload */
211#define M_CSUM_IPv4 0x00000040 /* IPv4 header */
212#define M_CSUM_IPv4_BAD 0x00000080 /* IPv4 header checksum bad */
213#define M_CSUM_TSOv4 0x00000100 /* TCPv4 segmentation offload */
214#define M_CSUM_TSOv6 0x00000200 /* TCPv6 segmentation offload */
215
216/* Checksum-assist quirks: keep separate from jump-table bits. */
217#define M_CSUM_NO_PSEUDOHDR 0x80000000 /* Rx csum_data does not include
218 * the UDP/TCP pseudo-hdr, and
219 * is not yet 1s-complemented.
220 */
221
222#define M_CSUM_BITS \
223 "\20\1TCPv4\2UDPv4\3TCP_UDP_BAD\4DATA\5TCPv6\6UDPv6\7IPv4\10IPv4_BAD" \
224 "\11TSOv4\12TSOv6\40NO_PSEUDOHDR"
225
226/*
227 * Macros for manipulating csum_data on outgoing packets. These are
228 * used to pass information down from the L4/L3 to the L2.
229 */
230#define M_CSUM_DATA_IPv4_IPHL(x) ((x) >> 16)
231#define M_CSUM_DATA_IPv4_OFFSET(x) ((x) & 0xffff)
232
233/*
234 * Macros for M_CSUM_TCPv6 and M_CSUM_UDPv6
235 *
236 * M_CSUM_DATA_IPv6_HL: length of ip6_hdr + ext header.
237 * ie. offset of UDP/TCP header in the packet.
238 *
239 * M_CSUM_DATA_IPv6_OFFSET: offset of the checksum field in UDP/TCP header.
240 */
241
242#define M_CSUM_DATA_IPv6_HL(x) ((x) >> 16)
243#define M_CSUM_DATA_IPv6_HL_SET(x, v) (x) = ((x) & 0xffff) | ((v) << 16)
244#define M_CSUM_DATA_IPv6_OFFSET(x) ((x) & 0xffff)
245
246/*
247 * Max # of pages we can attach to m_ext. This is carefully chosen
248 * to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page.
249 */
250#ifdef MIN_PAGE_SIZE
251#define M_EXT_MAXPAGES ((65536 / MIN_PAGE_SIZE) + 1)
252#endif
253
254/* description of external storage mapped into mbuf, valid if M_EXT set */
255struct _m_ext_storage {
256 unsigned int ext_refcnt;
257 int ext_flags;
258 char *ext_buf; /* start of buffer */
259 void (*ext_free) /* free routine if not the usual */
260 (struct mbuf *, void *, size_t, void *);
261 void *ext_arg; /* argument for ext_free */
262 size_t ext_size; /* size of buffer, for ext_free */
263 union {
264 paddr_t extun_paddr; /* physical address (M_EXT_CLUSTER) */
265 /* pages (M_EXT_PAGES) */
266 /*
267 * XXX This is gross, but it doesn't really matter; this is
268 * XXX overlaid on top of the mbuf data area.
269 */
270#ifdef M_EXT_MAXPAGES
271 struct vm_page *extun_pgs[M_EXT_MAXPAGES];
272#endif
273 } ext_un;
274#define ext_paddr ext_un.extun_paddr
275#define ext_pgs ext_un.extun_pgs
276#ifdef DEBUG
277 const char *ext_ofile;
278 const char *ext_nfile;
279 int ext_oline;
280 int ext_nline;
281#endif
282};
283
284struct _m_ext {
285 struct mbuf *ext_ref;
286 struct _m_ext_storage ext_storage;
287};
288
289#define M_PADDR_INVALID POOL_PADDR_INVALID
290
291/*
292 * Definition of "struct mbuf".
293 * Don't change this without understanding how MHLEN/MLEN are defined.
294 */
295#define MBUF_DEFINE(name, mhlen, mlen) \
296 struct name { \
297 struct m_hdr m_hdr; \
298 union { \
299 struct { \
300 struct pkthdr MH_pkthdr; \
301 union { \
302 struct _m_ext MH_ext; \
303 char MH_databuf[(mhlen)]; \
304 } MH_dat; \
305 } MH; \
306 char M_databuf[(mlen)]; \
307 } M_dat; \
308 }
309#define m_next m_hdr.mh_next
310#define m_len m_hdr.mh_len
311#define m_data m_hdr.mh_data
312#define m_owner m_hdr.mh_owner
313#define m_type m_hdr.mh_type
314#define m_flags m_hdr.mh_flags
315#define m_nextpkt m_hdr.mh_nextpkt
316#define m_paddr m_hdr.mh_paddr
317#define m_pkthdr M_dat.MH.MH_pkthdr
318#define m_ext_storage M_dat.MH.MH_dat.MH_ext.ext_storage
319#define m_ext_ref M_dat.MH.MH_dat.MH_ext.ext_ref
320#define m_ext m_ext_ref->m_ext_storage
321#define m_pktdat M_dat.MH.MH_dat.MH_databuf
322#define m_dat M_dat.M_databuf
323
324/*
325 * Dummy mbuf structure to calculate the right values for MLEN/MHLEN, taking
326 * into account inter-structure padding.
327 */
328MBUF_DEFINE(_mbuf_dummy, 1, 1);
329
330/* normal data len */
331#define MLEN (MSIZE - offsetof(struct _mbuf_dummy, m_dat))
332/* data len w/pkthdr */
333#define MHLEN (MSIZE - offsetof(struct _mbuf_dummy, m_pktdat))
334
335#define MINCLSIZE (MHLEN+MLEN+1) /* smallest amount to put in cluster */
336#define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */
337
338/*
339 * The *real* struct mbuf
340 */
341MBUF_DEFINE(mbuf, MHLEN, MLEN);
342
343/* mbuf flags */
344#define M_EXT 0x00000001 /* has associated external storage */
345#define M_PKTHDR 0x00000002 /* start of record */
346#define M_EOR 0x00000004 /* end of record */
347#define M_PROTO1 0x00000008 /* protocol-specific */
348
349/* mbuf pkthdr flags, also in m_flags */
350#define M_AUTHIPHDR 0x00000010 /* data origin authentication for
351 * IP header */
352#define M_DECRYPTED 0x00000020 /* confidentiality */
353#define M_LOOP 0x00000040 /* for Mbuf statistics */
354#define M_AUTHIPDGM 0x00000080 /* data origin authentication */
355#define M_BCAST 0x00000100 /* send/received as link-level
356 * broadcast */
357#define M_MCAST 0x00000200 /* send/received as link-level
358 * multicast */
359#define M_CANFASTFWD 0x00000400 /* used by filters to indicate
360 * packet can be fast-forwarded */
361#define M_ANYCAST6 0x00000800 /* received as IPv6 anycast */
362
363#define M_LINK0 0x00001000 /* link layer specific flag */
364#define M_LINK1 0x00002000 /* link layer specific flag */
365#define M_LINK2 0x00004000 /* link layer specific flag */
366
367#define M_LINK3 0x00008000 /* link layer specific flag */
368#define M_LINK4 0x00010000 /* link layer specific flag */
369#define M_LINK5 0x00020000 /* link layer specific flag */
370#define M_LINK6 0x00040000 /* link layer specific flag */
371#define M_LINK7 0x00080000 /* link layer specific flag */
372
373/* additional flags for M_EXT mbufs */
374#define M_EXT_FLAGS 0xff000000
375#define M_EXT_CLUSTER 0x01000000 /* ext is a cluster */
376#define M_EXT_PAGES 0x02000000 /* ext_pgs is valid */
377#define M_EXT_ROMAP 0x04000000 /* ext mapping is r-o at MMU */
378#define M_EXT_RW 0x08000000 /* ext storage is writable */
379
380/* for source-level compatibility */
381#define M_CLUSTER M_EXT_CLUSTER
382#define M_NOTIFICATION M_PROTO1
383
384#define M_FLAGS_BITS \
385 "\20\1EXT\2PKTHDR\3EOR\4PROTO1\5AUTHIPHDR\6DECRYPTED\7LOOP\10AUTHIPDGM" \
386 "\11BCAST\12MCAST\13CANFASTFWD\14ANYCAST6\15LINK0\16LINK1\17LINK2\20LINK3" \
387 "\21LINK4\22LINK5\23LINK6\24LINK7" \
388 "\31EXT_CLUSTER\32EXT_PAGES\33EXT_ROMAP\34EXT_RW"
389
390/* flags copied when copying m_pkthdr */
391#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST|M_CANFASTFWD|M_ANYCAST6|M_LINK0|M_LINK1|M_LINK2|M_AUTHIPHDR|M_DECRYPTED|M_LOOP|M_AUTHIPDGM)
392
393/* flag copied when shallow-copying external storage */
394#define M_EXTCOPYFLAGS (M_EXT|M_EXT_FLAGS)
395
396/* mbuf types */
397#define MT_FREE 0 /* should be on free list */
398#define MT_DATA 1 /* dynamic (data) allocation */
399#define MT_HEADER 2 /* packet header */
400#define MT_SONAME 3 /* socket name */
401#define MT_SOOPTS 4 /* socket options */
402#define MT_FTABLE 5 /* fragment reassembly header */
403#define MT_CONTROL 6 /* extra-data protocol message */
404#define MT_OOBDATA 7 /* expedited data */
405
406#ifdef MBUFTYPES
407static const char * const mbuftypes[] = {
408 "mbfree",
409 "mbdata",
410 "mbheader",
411 "mbsoname",
412 "mbsopts",
413 "mbftable",
414 "mbcontrol",
415 "mboobdata",
416};
417#endif
418
419/* flags to m_get/MGET */
420#define M_DONTWAIT M_NOWAIT
421#define M_WAIT M_WAITOK
422
423#ifdef MBUFTRACE
424/*
425 * mbuf allocation tracing
426 */
427void mowner_init(struct mbuf *, int);
428void mowner_ref(struct mbuf *, int);
429void m_claim(struct mbuf *, struct mowner *);
430void mowner_revoke(struct mbuf *, bool, int);
431void mowner_attach(struct mowner *);
432void mowner_detach(struct mowner *);
433void m_claimm(struct mbuf *, struct mowner *);
434#else
435#define mowner_init(m, type) do { } while (/* CONSTCOND */ 0)
436#define mowner_ref(m, flags) do { } while (/* CONSTCOND */ 0)
437#define mowner_revoke(m, all, flags) do { } while (/* CONSTCOND */ 0)
438#define m_claim(m, mowner) do { } while (/* CONSTCOND */ 0)
439#define mowner_attach(mo) do { } while (/* CONSTCOND */ 0)
440#define mowner_detach(mo) do { } while (/* CONSTCOND */ 0)
441#define m_claimm(m, mo) do { } while (/* CONSTCOND */ 0)
442#endif
443
444#define MCLAIM(m, mo) m_claim((m), (mo))
445#define MOWNER_ATTACH(mo) mowner_attach(mo)
446#define MOWNER_DETACH(mo) mowner_detach(mo)
447
448/*
449 * mbuf allocation/deallocation macros:
450 *
451 * MGET(struct mbuf *m, int how, int type)
452 * allocates an mbuf and initializes it to contain internal data.
453 *
454 * MGETHDR(struct mbuf *m, int how, int type)
455 * allocates an mbuf and initializes it to contain a packet header
456 * and internal data.
457 *
458 * If 'how' is M_WAIT, these macros (and the corresponding functions)
459 * are guaranteed to return successfully.
460 */
461#define MGET(m, how, type) m = m_get((how), (type))
462#define MGETHDR(m, how, type) m = m_gethdr((how), (type))
463
464#if defined(_KERNEL)
465#define _M_
466/*
467 * Macros for tracking external storage associated with an mbuf.
468 */
469#ifdef DEBUG
470#define MCLREFDEBUGN(m, file, line) \
471do { \
472 (m)->m_ext.ext_nfile = (file); \
473 (m)->m_ext.ext_nline = (line); \
474} while (/* CONSTCOND */ 0)
475
476#define MCLREFDEBUGO(m, file, line) \
477do { \
478 (m)->m_ext.ext_ofile = (file); \
479 (m)->m_ext.ext_oline = (line); \
480} while (/* CONSTCOND */ 0)
481#else
482#define MCLREFDEBUGN(m, file, line)
483#define MCLREFDEBUGO(m, file, line)
484#endif
485
486#define MCLINITREFERENCE(m) \
487do { \
488 KDASSERT(((m)->m_flags & M_EXT) == 0); \
489 (m)->m_ext_ref = (m); \
490 (m)->m_ext.ext_refcnt = 1; \
491 MCLREFDEBUGO((m), __FILE__, __LINE__); \
492 MCLREFDEBUGN((m), NULL, 0); \
493} while (/* CONSTCOND */ 0)
494
495/*
496 * Macros for mbuf external storage.
497 *
498 * MCLGET allocates and adds an mbuf cluster to a normal mbuf;
499 * the flag M_EXT is set upon success.
500 *
501 * MEXTMALLOC allocates external storage and adds it to
502 * a normal mbuf; the flag M_EXT is set upon success.
503 *
504 * MEXTADD adds pre-allocated external storage to
505 * a normal mbuf; the flag M_EXT is set upon success.
506 */
507
508#define _MCLGET(m, pool_cache, size, how) \
509do { \
510 (m)->m_ext_storage.ext_buf = (char *) \
511 pool_cache_get_paddr((pool_cache), \
512 (how) == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : 0, \
513 &(m)->m_ext_storage.ext_paddr); \
514 if ((m)->m_ext_storage.ext_buf != NULL) { \
515 MCLINITREFERENCE(m); \
516 (m)->m_data = (m)->m_ext.ext_buf; \
517 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \
518 M_EXT|M_CLUSTER|M_EXT_RW; \
519 (m)->m_ext.ext_flags = 0; \
520 (m)->m_ext.ext_size = (size); \
521 (m)->m_ext.ext_free = NULL; \
522 (m)->m_ext.ext_arg = (pool_cache); \
523 /* ext_paddr initialized above */ \
524 mowner_ref((m), M_EXT|M_CLUSTER); \
525 } \
526} while (/* CONSTCOND */ 0)
527
528/*
529 * The standard mbuf cluster pool.
530 */
531#define MCLGET(m, how) _MCLGET((m), mcl_cache, MCLBYTES, (how))
532
533#define MEXTMALLOC(m, size, how) \
534do { \
535 (m)->m_ext_storage.ext_buf = (char *) \
536 malloc((size), mbtypes[(m)->m_type], (how)); \
537 if ((m)->m_ext_storage.ext_buf != NULL) { \
538 MCLINITREFERENCE(m); \
539 (m)->m_data = (m)->m_ext.ext_buf; \
540 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \
541 M_EXT|M_EXT_RW; \
542 (m)->m_ext.ext_flags = 0; \
543 (m)->m_ext.ext_size = (size); \
544 (m)->m_ext.ext_free = NULL; \
545 (m)->m_ext.ext_arg = NULL; \
546 mowner_ref((m), M_EXT); \
547 } \
548} while (/* CONSTCOND */ 0)
549
550#define MEXTADD(m, buf, size, type, free, arg) \
551do { \
552 MCLINITREFERENCE(m); \
553 (m)->m_data = (m)->m_ext.ext_buf = (char *)(buf); \
554 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | M_EXT; \
555 (m)->m_ext.ext_flags = 0; \
556 (m)->m_ext.ext_size = (size); \
557 (m)->m_ext.ext_free = (free); \
558 (m)->m_ext.ext_arg = (arg); \
559 mowner_ref((m), M_EXT); \
560} while (/* CONSTCOND */ 0)
561
562/*
563 * Reset the data pointer on an mbuf.
564 */
565#define MRESETDATA(m) \
566do { \
567 if ((m)->m_flags & M_EXT) \
568 (m)->m_data = (m)->m_ext.ext_buf; \
569 else if ((m)->m_flags & M_PKTHDR) \
570 (m)->m_data = (m)->m_pktdat; \
571 else \
572 (m)->m_data = (m)->m_dat; \
573} while (/* CONSTCOND */ 0)
574
575/*
576 * Copy mbuf pkthdr from `from' to `to'.
577 * `from' must have M_PKTHDR set, and `to' must be empty.
578 */
579#define M_COPY_PKTHDR(to, from) \
580do { \
581 (to)->m_pkthdr = (from)->m_pkthdr; \
582 (to)->m_flags = (from)->m_flags & M_COPYFLAGS; \
583 SLIST_INIT(&(to)->m_pkthdr.tags); \
584 m_tag_copy_chain((to), (from)); \
585 (to)->m_data = (to)->m_pktdat; \
586} while (/* CONSTCOND */ 0)
587
588/*
589 * Move mbuf pkthdr from `from' to `to'.
590 * `from' must have M_PKTHDR set, and `to' must be empty.
591 */
592#define M_MOVE_PKTHDR(to, from) m_move_pkthdr(to, from)
593
594/*
595 * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place
596 * an object of the specified size at the end of the mbuf, longword aligned.
597 */
598#define M_ALIGN(m, len) \
599do { \
600 (m)->m_data += (MLEN - (len)) &~ (sizeof(long) - 1); \
601} while (/* CONSTCOND */ 0)
602
603/*
604 * As above, for mbufs allocated with m_gethdr/MGETHDR
605 * or initialized by M_COPY_PKTHDR.
606 */
607#define MH_ALIGN(m, len) \
608do { \
609 (m)->m_data += (MHLEN - (len)) &~ (sizeof(long) - 1); \
610} while (/* CONSTCOND */ 0)
611
612/*
613 * Determine if an mbuf's data area is read-only. This is true
614 * if external storage is read-only mapped, or not marked as R/W,
615 * or referenced by more than one mbuf.
616 */
617#define M_READONLY(m) \
618 (((m)->m_flags & M_EXT) != 0 && \
619 (((m)->m_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW || \
620 (m)->m_ext.ext_refcnt > 1))
621
622#define M_UNWRITABLE(__m, __len) \
623 ((__m)->m_len < (__len) || M_READONLY((__m)))
624/*
625 * Determine if an mbuf's data area is read-only at the MMU.
626 */
627#define M_ROMAP(m) \
628 (((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP))
629
630/*
631 * Compute the amount of space available
632 * before the current start of data in an mbuf.
633 */
634#define _M_LEADINGSPACE(m) \
635 ((m)->m_flags & M_EXT ? (m)->m_data - (m)->m_ext.ext_buf : \
636 (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \
637 (m)->m_data - (m)->m_dat)
638
639#define M_LEADINGSPACE(m) \
640 (M_READONLY((m)) ? 0 : _M_LEADINGSPACE((m)))
641
642/*
643 * Compute the amount of space available
644 * after the end of data in an mbuf.
645 */
646#define _M_TRAILINGSPACE(m) \
647 ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \
648 ((m)->m_data + (m)->m_len) : \
649 &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
650
651#define M_TRAILINGSPACE(m) \
652 (M_READONLY((m)) ? 0 : _M_TRAILINGSPACE((m)))
653
654/*
655 * Compute the address of an mbuf's data area.
656 */
657#define M_BUFADDR(m) \
658 (((m)->m_flags & M_PKTHDR) ? (m)->m_pktdat : (m)->m_dat)
659
660/*
661 * Compute the offset of the beginning of the data buffer of a non-ext
662 * mbuf.
663 */
664#define M_BUFOFFSET(m) \
665 (((m)->m_flags & M_PKTHDR) ? \
666 offsetof(struct mbuf, m_pktdat) : offsetof(struct mbuf, m_dat))
667
668/*
669 * Arrange to prepend space of size plen to mbuf m.
670 * If a new mbuf must be allocated, how specifies whether to wait.
671 * If how is M_DONTWAIT and allocation fails, the original mbuf chain
672 * is freed and m is set to NULL.
673 */
674#define M_PREPEND(m, plen, how) \
675do { \
676 if (M_LEADINGSPACE(m) >= (plen)) { \
677 (m)->m_data -= (plen); \
678 (m)->m_len += (plen); \
679 } else \
680 (m) = m_prepend((m), (plen), (how)); \
681 if ((m) && (m)->m_flags & M_PKTHDR) \
682 (m)->m_pkthdr.len += (plen); \
683} while (/* CONSTCOND */ 0)
684
685/* change mbuf to new type */
686#define MCHTYPE(m, t) \
687do { \
688 KASSERT((t) != MT_FREE); \
689 mbstat_type_add((m)->m_type, -1); \
690 mbstat_type_add(t, 1); \
691 (m)->m_type = t; \
692} while (/* CONSTCOND */ 0)
693
694/* length to m_copy to copy all */
695#define M_COPYALL -1
696
697/* compatibility with 4.3 */
698#define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT)
699
700/*
701 * Allow drivers and/or protocols to store private context information.
702 */
703#define M_GETCTX(m, t) ((t)(m)->m_pkthdr._rcvif.ctx)
704#define M_SETCTX(m, c) ((void)((m)->m_pkthdr._rcvif.ctx = (void *)(c)))
705#define M_CLEARCTX(m) M_SETCTX((m), NULL)
706
707#endif /* defined(_KERNEL) */
708
709/*
710 * Simple mbuf queueing system
711 *
712 * this is basically a SIMPLEQ adapted to mbuf use (ie using
713 * m_nextpkt instead of field.sqe_next).
714 *
715 * m_next is ignored, so queueing chains of mbufs is possible
716 */
717#define MBUFQ_HEAD(name) \
718struct name { \
719 struct mbuf *mq_first; \
720 struct mbuf **mq_last; \
721}
722
723#define MBUFQ_INIT(q) do { \
724 (q)->mq_first = NULL; \
725 (q)->mq_last = &(q)->mq_first; \
726} while (/*CONSTCOND*/0)
727
728#define MBUFQ_ENQUEUE(q, m) do { \
729 (m)->m_nextpkt = NULL; \
730 *(q)->mq_last = (m); \
731 (q)->mq_last = &(m)->m_nextpkt; \
732} while (/*CONSTCOND*/0)
733
734#define MBUFQ_PREPEND(q, m) do { \
735 if (((m)->m_nextpkt = (q)->mq_first) == NULL) \
736 (q)->mq_last = &(m)->m_nextpkt; \
737 (q)->mq_first = (m); \
738} while (/*CONSTCOND*/0)
739
740#define MBUFQ_DEQUEUE(q, m) do { \
741 if (((m) = (q)->mq_first) != NULL) { \
742 if (((q)->mq_first = (m)->m_nextpkt) == NULL) \
743 (q)->mq_last = &(q)->mq_first; \
744 else \
745 (m)->m_nextpkt = NULL; \
746 } \
747} while (/*CONSTCOND*/0)
748
749#define MBUFQ_DRAIN(q) do { \
750 struct mbuf *__m0; \
751 while ((__m0 = (q)->mq_first) != NULL) { \
752 (q)->mq_first = __m0->m_nextpkt; \
753 m_freem(__m0); \
754 } \
755 (q)->mq_last = &(q)->mq_first; \
756} while (/*CONSTCOND*/0)
757
758#define MBUFQ_FIRST(q) ((q)->mq_first)
759#define MBUFQ_NEXT(m) ((m)->m_nextpkt)
760#define MBUFQ_LAST(q) (*(q)->mq_last)
761
762/*
763 * Mbuf statistics.
764 * For statistics related to mbuf and cluster allocations, see also the
765 * pool headers (mb_cache and mcl_cache).
766 */
767struct mbstat {
768 u_long _m_spare; /* formerly m_mbufs */
769 u_long _m_spare1; /* formerly m_clusters */
770 u_long _m_spare2; /* spare field */
771 u_long _m_spare3; /* formely m_clfree - free clusters */
772 u_long m_drops; /* times failed to find space */
773 u_long m_wait; /* times waited for space */
774 u_long m_drain; /* times drained protocols for space */
775 u_short m_mtypes[256]; /* type specific mbuf allocations */
776};
777
778struct mbstat_cpu {
779 u_int m_mtypes[256]; /* type specific mbuf allocations */
780};
781
782/*
783 * Mbuf sysctl variables.
784 */
785#define MBUF_MSIZE 1 /* int: mbuf base size */
786#define MBUF_MCLBYTES 2 /* int: mbuf cluster size */
787#define MBUF_NMBCLUSTERS 3 /* int: limit on the # of clusters */
788#define MBUF_MBLOWAT 4 /* int: mbuf low water mark */
789#define MBUF_MCLLOWAT 5 /* int: mbuf cluster low water mark */
790#define MBUF_STATS 6 /* struct: mbstat */
791#define MBUF_MOWNERS 7 /* struct: m_owner[] */
792#define MBUF_MAXID 8 /* number of valid MBUF ids */
793
794#define CTL_MBUF_NAMES { \
795 { 0, 0 }, \
796 { "msize", CTLTYPE_INT }, \
797 { "mclbytes", CTLTYPE_INT }, \
798 { "nmbclusters", CTLTYPE_INT }, \
799 { "mblowat", CTLTYPE_INT }, \
800 { "mcllowat", CTLTYPE_INT }, \
801 { 0 /* "stats" */, CTLTYPE_STRUCT }, \
802 { 0 /* "mowners" */, CTLTYPE_STRUCT }, \
803}
804
805#ifdef _KERNEL
806extern struct mbstat mbstat;
807extern int nmbclusters; /* limit on the # of clusters */
808extern int mblowat; /* mbuf low water mark */
809extern int mcllowat; /* mbuf cluster low water mark */
810extern int max_linkhdr; /* largest link-level header */
811extern int max_protohdr; /* largest protocol header */
812extern int max_hdr; /* largest link+protocol header */
813extern int max_datalen; /* MHLEN - max_hdr */
814extern const int msize; /* mbuf base size */
815extern const int mclbytes; /* mbuf cluster size */
816extern pool_cache_t mb_cache;
817extern pool_cache_t mcl_cache;
818#ifdef MBUFTRACE
819LIST_HEAD(mownerhead, mowner);
820extern struct mownerhead mowners;
821extern struct mowner unknown_mowners[];
822extern struct mowner revoked_mowner;
823#endif
824
825MALLOC_DECLARE(M_MBUF);
826MALLOC_DECLARE(M_SONAME);
827
828struct mbuf *m_copym(struct mbuf *, int, int, int);
829struct mbuf *m_copypacket(struct mbuf *, int);
830struct mbuf *m_devget(char *, int, int, struct ifnet *,
831 void (*copy)(const void *, void *, size_t));
832struct mbuf *m_dup(struct mbuf *, int, int, int);
833struct mbuf *m_get(int, int);
834struct mbuf *m_getclr(int, int);
835struct mbuf *m_gethdr(int, int);
836struct mbuf *m_prepend(struct mbuf *,int, int);
837struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
838struct mbuf *m_pullup(struct mbuf *, int);
839struct mbuf *m_copyup(struct mbuf *, int, int);
840struct mbuf *m_split(struct mbuf *,int, int);
841struct mbuf *m_getptr(struct mbuf *, int, int *);
842void m_adj(struct mbuf *, int);
843struct mbuf *m_defrag(struct mbuf *, int);
844int m_apply(struct mbuf *, int, int,
845 int (*)(void *, void *, unsigned int), void *);
846void m_cat(struct mbuf *,struct mbuf *);
847void m_clget(struct mbuf *, int);
848int m_mballoc(int, int);
849void m_copyback(struct mbuf *, int, int, const void *);
850struct mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int);
851int m_makewritable(struct mbuf **, int, int, int);
852struct mbuf *m_getcl(int, int, int);
853void m_copydata(struct mbuf *, int, int, void *);
854struct mbuf *m__free(const char *, int, struct mbuf *);
855void m__freem(const char *, int, struct mbuf *);
856#ifdef DEBUG
857#define m_free(m) m__free(__func__, __LINE__, m)
858#define m_freem(m) m__freem(__func__, __LINE__, m)
859#else
860struct mbuf *m_free(struct mbuf *);
861void m_freem(struct mbuf *);
862#endif
863void m_reclaim(void *, int);
864void mbinit(void);
865void m_ext_free(struct mbuf *);
866char * m_mapin(struct mbuf *);
867void m_move_pkthdr(struct mbuf *, struct mbuf *);
868
869bool m_ensure_contig(struct mbuf **, int);
870struct mbuf *m_add(struct mbuf *, struct mbuf *);
871void m_align(struct mbuf *, int);
872int m_append(struct mbuf *, int, const void *);
873
874/* Inline routines. */
875static __inline u_int m_length(const struct mbuf *) __unused;
876
877/* Statistics */
878void mbstat_type_add(int, int);
879
880/* Packet tag routines */
881struct m_tag *m_tag_get(int, int, int);
882void m_tag_free(struct m_tag *);
883void m_tag_prepend(struct mbuf *, struct m_tag *);
884void m_tag_unlink(struct mbuf *, struct m_tag *);
885void m_tag_delete(struct mbuf *, struct m_tag *);
886void m_tag_delete_chain(struct mbuf *, struct m_tag *);
887void m_tag_delete_nonpersistent(struct mbuf *);
888struct m_tag *m_tag_find(const struct mbuf *, int, struct m_tag *);
889struct m_tag *m_tag_copy(struct m_tag *);
890int m_tag_copy_chain(struct mbuf *, struct mbuf *);
891void m_tag_init(struct mbuf *);
892struct m_tag *m_tag_first(struct mbuf *);
893struct m_tag *m_tag_next(struct mbuf *, struct m_tag *);
894
895/* Packet tag types */
896#define PACKET_TAG_NONE 0 /* Nothing */
897#define PACKET_TAG_VLAN 1 /* VLAN ID */
898#define PACKET_TAG_ENCAP 2 /* encapsulation data */
899#define PACKET_TAG_ESP 3 /* ESP information */
900#define PACKET_TAG_PF 11 /* packet filter */
901#define PACKET_TAG_ALTQ_QID 12 /* ALTQ queue id */
902
903#define PACKET_TAG_IPSEC_IN_CRYPTO_DONE 16
904#define PACKET_TAG_IPSEC_IN_DONE 17
905#define PACKET_TAG_IPSEC_OUT_DONE 18
906#define PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED 19 /* NIC IPsec crypto req'ed */
907#define PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO 20 /* NIC notifies IPsec */
908#define PACKET_TAG_IPSEC_PENDING_TDB 21 /* Reminder to do IPsec */
909
910#define PACKET_TAG_IPSEC_SOCKET 22 /* IPSEC socket ref */
911#define PACKET_TAG_IPSEC_HISTORY 23 /* IPSEC history */
912
913#define PACKET_TAG_IPSEC_NAT_T_PORTS 25 /* two uint16_t */
914
915#define PACKET_TAG_INET6 26 /* IPv6 info */
916
917#define PACKET_TAG_ECO_RETRYPARMS 27 /* Econet retry parameters */
918
919#define PACKET_TAG_TUNNEL_INFO 28 /* tunnel identification and
920 * protocol callback, for
921 * loop detection/recovery
922 */
923
924#define PACKET_TAG_MPLS 29 /* Indicate it's for MPLS */
925
926/*
927 * Return the number of bytes in the mbuf chain, m.
928 */
929static __inline u_int
930m_length(const struct mbuf *m)
931{
932 const struct mbuf *m0;
933 u_int pktlen;
934
935 if ((m->m_flags & M_PKTHDR) != 0)
936 return m->m_pkthdr.len;
937
938 pktlen = 0;
939 for (m0 = m; m0 != NULL; m0 = m0->m_next)
940 pktlen += m0->m_len;
941 return pktlen;
942}
943
944static __inline void
945m_hdr_init(struct mbuf *m, short type, struct mbuf *next, char *data, int len)
946{
947
948 KASSERT(m != NULL);
949
950 mowner_init(m, type);
951 m->m_ext_ref = m; /* default */
952 m->m_type = type;
953 m->m_len = len;
954 m->m_next = next;
955 m->m_nextpkt = NULL; /* default */
956 m->m_data = data;
957 m->m_flags = 0; /* default */
958}
959
960static __inline void
961m_set_rcvif(struct mbuf *m, const struct ifnet *ifp)
962{
963
964 m->m_pkthdr.rcvif_index = ifp->if_index;
965}
966
967static __inline void
968m_reset_rcvif(struct mbuf *m)
969{
970
971 /* A caller may expect whole _rcvif union is zeroed */
972 /* m->m_pkthdr.rcvif_index = 0; */
973 m->m_pkthdr._rcvif.ctx = NULL;
974}
975
976static __inline void
977m_copy_rcvif(struct mbuf *m, const struct mbuf *n)
978{
979
980 m->m_pkthdr.rcvif_index = n->m_pkthdr.rcvif_index;
981}
982
983static __inline void
984m_pkthdr_init(struct mbuf *m)
985{
986
987 KASSERT(m != NULL);
988
989 m->m_data = m->m_pktdat;
990 m->m_flags = M_PKTHDR;
991
992 m_reset_rcvif(m);
993 m->m_pkthdr.len = 0;
994 m->m_pkthdr.csum_flags = 0;
995 m->m_pkthdr.csum_data = 0;
996 SLIST_INIT(&m->m_pkthdr.tags);
997
998 m->m_pkthdr.pattr_class = NULL;
999 m->m_pkthdr.pattr_af = AF_UNSPEC;
1000 m->m_pkthdr.pattr_hdr = NULL;
1001}
1002
1003void m_print(const struct mbuf *, const char *, void (*)(const char *, ...)
1004 __printflike(1, 2));
1005
1006/*
1007 * Get rcvif of a mbuf.
1008 *
1009 * The caller must call m_put_rcvif after using rcvif. The caller cannot
1010 * block or sleep during using rcvif. Insofar as the constraint is satisfied,
1011 * the API ensures a got rcvif isn't be freed until m_put_rcvif is called.
1012 */
1013static __inline struct ifnet *
1014m_get_rcvif(const struct mbuf *m, int *s)
1015{
1016
1017 *s = pserialize_read_enter();
1018 return if_byindex(m->m_pkthdr.rcvif_index);
1019}
1020
1021static __inline void
1022m_put_rcvif(struct ifnet *ifp, int *s)
1023{
1024
1025 if (ifp == NULL)
1026 return;
1027 pserialize_read_exit(*s);
1028}
1029
1030/*
1031 * Get rcvif of a mbuf.
1032 *
1033 * The caller must call m_put_rcvif_psref after using rcvif. The API ensures
1034 * a got rcvif isn't be freed until m_put_rcvif_psref is called.
1035 */
1036static __inline struct ifnet *
1037m_get_rcvif_psref(const struct mbuf *m, struct psref *psref)
1038{
1039
1040 return if_get_byindex(m->m_pkthdr.rcvif_index, psref);
1041}
1042
1043static __inline void
1044m_put_rcvif_psref(struct ifnet *ifp, struct psref *psref)
1045{
1046
1047 if (ifp == NULL)
1048 return;
1049 if_put(ifp, psref);
1050}
1051
1052/*
1053 * Get rcvif of a mbuf.
1054 *
1055 * This is NOT an MP-safe API and shouldn't be used at where we want MP-safe.
1056 */
1057static __inline struct ifnet *
1058m_get_rcvif_NOMPSAFE(const struct mbuf *m)
1059{
1060
1061 return if_byindex(m->m_pkthdr.rcvif_index);
1062}
1063
1064#endif /* _KERNEL */
1065#endif /* !_SYS_MBUF_H_ */
1066