1/* $NetBSD: buf.h,v 1.125 2016/01/11 08:40:52 martin Exp $ */
2
3/*-
4 * Copyright (c) 1999, 2000, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1989, 1993
35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)buf.h 8.9 (Berkeley) 3/30/95
67 */
68
69#ifndef _SYS_BUF_H_
70#define _SYS_BUF_H_
71
72#include <sys/pool.h>
73#include <sys/queue.h>
74#include <sys/mutex.h>
75#include <sys/condvar.h>
76#include <sys/rbtree.h>
77#if defined(_KERNEL)
78#include <sys/workqueue.h>
79#endif /* defined(_KERNEL) */
80
81struct buf;
82struct mount;
83struct vnode;
84struct kauth_cred;
85
86#define NOLIST ((struct buf *)0x87654321)
87
88extern kmutex_t bufcache_lock;
89extern kmutex_t buffer_lock;
90
91#if defined(_KERNEL)
92extern void (*biodone_vfs)(buf_t *);
93#endif
94
95/*
96 * The buffer header describes an I/O operation in the kernel.
97 *
98 * Field markings and the corresponding locks:
99 *
100 * b thread of execution that holds BC_BUSY, does not correspond
101 * directly to any particular LWP
102 * c bufcache_lock
103 * o b_objlock
104 *
105 * For buffers associated with a vnode, b_objlock points to vp->v_interlock.
106 * If not associated with a vnode, it points to the generic buffer_lock.
107 */
108
109/* required for the conditional union member below to be ~safe */
110#if defined(_KERNEL)
111__CTASSERT(sizeof(struct work) <= sizeof(TAILQ_ENTRY(buf)));
112#endif
113
114struct buf {
115 union {
116 TAILQ_ENTRY(buf) u_actq;
117 rb_node_t u_rbnode;
118#if defined(_KERNEL)
119 /* u_work is smaller than u_actq */
120 struct work u_work;
121#endif
122 } b_u; /* b: device driver queue */
123#define b_actq b_u.u_actq
124#define b_work b_u.u_work
125 void (*b_iodone)(struct buf *);/* b: call when done */
126 int b_error; /* b: errno value. */
127 int b_resid; /* b: remaining I/O. */
128 u_int b_flags; /* b: B_* flags */
129 int b_prio; /* b: priority for queue */
130 int b_bufsize; /* b: allocated size */
131 int b_bcount; /* b: valid bytes in buffer */
132 dev_t b_dev; /* b: associated device */
133 void *b_data; /* b: fs private data */
134 daddr_t b_blkno; /* b: physical block number
135 (partition relative) */
136 daddr_t b_rawblkno; /* b: raw physical block number
137 (volume relative) */
138 struct proc *b_proc; /* b: proc if BB_PHYS */
139 void *b_saveaddr; /* b: saved b_data for physio */
140
141 /*
142 * b: private data for owner.
143 * - buffer cache buffers are owned by corresponding filesystem.
144 * - non-buffer cache buffers are owned by subsystem which
145 * allocated them. (filesystem, disk driver, etc)
146 */
147 void *b_private;
148 off_t b_dcookie; /* NFS: Offset cookie if dir block */
149
150 kcondvar_t b_busy; /* c: threads waiting on buf */
151 u_int b_refcnt; /* c: refcount for b_busy */
152 void *b_unused; /* : unused */
153 LIST_ENTRY(buf) b_hash; /* c: hash chain */
154 LIST_ENTRY(buf) b_vnbufs; /* c: associated vnode */
155 TAILQ_ENTRY(buf) b_freelist; /* c: position if not active */
156 LIST_ENTRY(buf) b_wapbllist; /* c: transaction buffer list */
157 daddr_t b_lblkno; /* c: logical block number */
158 int b_freelistindex;/* c: free list index (BQ_) */
159 u_int b_cflags; /* c: BC_* flags */
160 struct vnode *b_vp; /* c: file vnode */
161
162 kcondvar_t b_done; /* o: waiting on completion */
163 u_int b_oflags; /* o: BO_* flags */
164 kmutex_t *b_objlock; /* o: completion lock */
165};
166
167/*
168 * For portability with historic industry practice, the cylinder number has
169 * to be maintained in the `b_resid' field.
170 */
171#define b_cylinder b_resid /* Cylinder number for disksort(). */
172
173/*
174 * These flags are kept in b_cflags (owned by buffer cache).
175 */
176#define BC_AGE 0x00000001 /* Move to age queue when I/O done. */
177#define BC_BUSY 0x00000010 /* I/O in progress. */
178#define BC_INVAL 0x00002000 /* Does not contain valid info. */
179#define BC_NOCACHE 0x00008000 /* Do not cache block after use. */
180#define BC_WANTED 0x00800000 /* Process wants this buffer. */
181#define BC_VFLUSH 0x04000000 /* Buffer is being synced. */
182
183/*
184 * These flags are kept in b_oflags (owned by associated object).
185 */
186#define BO_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
187#define BO_DONE 0x00000200 /* I/O completed. */
188
189/*
190 * These flags are kept in b_flags (owned by buffer holder).
191 */
192#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
193#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
194#define B_COWDONE 0x00000400 /* Copy-on-write already done. */
195#define B_GATHERED 0x00001000 /* LFS: already in a segment. */
196#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
197#define B_PHYS 0x00040000 /* I/O to user memory. */
198#define B_RAW 0x00080000 /* Set by physio for raw transfers. */
199#define B_READ 0x00100000 /* Read buffer. */
200#define B_DEVPRIVATE 0x02000000 /* Device driver private flag. */
201
202#define BUF_FLAGBITS \
203 "\20\1AGE\3ASYNC\4BAD\5BUSY\10DELWRI" \
204 "\12DONE\13COWDONE\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \
205 "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH"
206
207/* Avoid weird code due to B_WRITE being a "pseudo flag" */
208#define BUF_ISREAD(bp) (((bp)->b_flags & B_READ) == B_READ)
209#define BUF_ISWRITE(bp) (((bp)->b_flags & B_READ) == B_WRITE)
210
211/*
212 * This structure describes a clustered I/O. It is stored in the b_saveaddr
213 * field of the buffer on which I/O is done. At I/O completion, cluster
214 * callback uses the structure to parcel I/O's to individual buffers, and
215 * then free's this structure.
216 */
217struct cluster_save {
218 long bs_bcount; /* Saved b_bcount. */
219 long bs_bufsize; /* Saved b_bufsize. */
220 void *bs_saveaddr; /* Saved b_addr. */
221 int bs_nchildren; /* Number of associated buffers. */
222 struct buf *bs_children; /* List of associated buffers. */
223};
224
225/*
226 * Zero out the buffer's data area.
227 */
228#define clrbuf(bp) \
229do { \
230 memset((bp)->b_data, 0, (u_int)(bp)->b_bcount); \
231 (bp)->b_resid = 0; \
232} while (/* CONSTCOND */ 0)
233
234/* Flags to low-level allocation routines. */
235#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
236#define B_SYNC 0x02 /* Do all allocations synchronously. */
237#define B_METAONLY 0x04 /* Return indirect block buffer. */
238#define B_CONTIG 0x08 /* Allocate file contiguously. */
239
240/* Flags to bread() and breadn(). */
241#define B_MODIFY 0x01 /* Hint: caller might modify buffer */
242
243#ifdef _KERNEL
244
245#define BIO_GETPRIO(bp) ((bp)->b_prio)
246#define BIO_SETPRIO(bp, prio) (bp)->b_prio = (prio)
247#define BIO_COPYPRIO(bp1, bp2) BIO_SETPRIO(bp1, BIO_GETPRIO(bp2))
248
249#define BPRIO_NPRIO 3
250#define BPRIO_TIMECRITICAL 2
251#define BPRIO_TIMELIMITED 1
252#define BPRIO_TIMENONCRITICAL 0
253#define BPRIO_DEFAULT BPRIO_TIMELIMITED
254
255extern u_int nbuf; /* The number of buffer headers */
256
257/*
258 * Definitions for the buffer free lists.
259 */
260#define BQUEUES 4 /* number of free buffer queues */
261
262#define BQ_LOCKED 0 /* super-blocks &c */
263#define BQ_LRU 1 /* lru, useful buffers */
264#define BQ_AGE 2 /* rubbish */
265#define BQ_EMPTY 3 /* buffer headers with no memory */
266
267struct bqueue {
268 TAILQ_HEAD(, buf) bq_queue;
269 uint64_t bq_bytes;
270 buf_t *bq_marker;
271};
272
273extern struct bqueue bufqueues[BQUEUES];
274
275__BEGIN_DECLS
276/*
277 * bufferio(9) ops
278 */
279void biodone(buf_t *);
280int biowait(buf_t *);
281buf_t *getiobuf(struct vnode *, bool);
282void putiobuf(buf_t *);
283void nestiobuf_setup(buf_t *, buf_t *, int, size_t);
284void nestiobuf_done(buf_t *, int, int);
285
286void nestiobuf_iodone(buf_t *);
287int physio(void (*)(buf_t *), buf_t *, dev_t, int,
288 void (*)(buf_t *), struct uio *);
289
290/*
291 * buffercache(9) ops
292 */
293int bread(struct vnode *, daddr_t, int, int, buf_t **);
294int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
295 int, buf_t **);
296int bwrite(buf_t *);
297void bawrite(buf_t *);
298void bdwrite(buf_t *);
299buf_t *getblk(struct vnode *, daddr_t, int, int, int);
300buf_t *geteblk(int);
301buf_t *incore(struct vnode *, daddr_t);
302int allocbuf(buf_t *, int, int);
303void brelsel(buf_t *, int);
304void brelse(buf_t *, int);
305
306/*
307 * So-far indeterminate ops that might belong to either
308 * bufferio(9) or buffercache(9).
309 */
310void bremfree(buf_t *);
311void bufinit(void);
312void bufinit2(void);
313void minphys(buf_t *);
314void brelvp(buf_t *);
315void reassignbuf(buf_t *, struct vnode *);
316void bgetvp(struct vnode *, buf_t *);
317int buf_syncwait(void);
318u_long buf_memcalc(void);
319int buf_drain(int);
320int buf_setvalimit(vsize_t);
321#if defined(DDB) || defined(DEBUGPRINT)
322void vfs_buf_print(buf_t *, int, void (*)(const char *, ...)
323 __printflike(1, 2));
324#endif
325void buf_init(buf_t *);
326void buf_destroy(buf_t *);
327int bbusy(buf_t *, bool, int, kmutex_t *);
328
329
330__END_DECLS
331#endif /* _KERNEL */
332#endif /* !_SYS_BUF_H_ */
333