1 | /* $NetBSD: fss.c,v 1.95 2016/07/31 12:17:36 hannken Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 2003 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Juergen Hannken-Illjes. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | |
32 | /* |
33 | * File system snapshot disk driver. |
34 | * |
35 | * Block/character interface to the snapshot of a mounted file system. |
36 | */ |
37 | |
38 | #include <sys/cdefs.h> |
39 | __KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.95 2016/07/31 12:17:36 hannken Exp $" ); |
40 | |
41 | #include <sys/param.h> |
42 | #include <sys/systm.h> |
43 | #include <sys/namei.h> |
44 | #include <sys/proc.h> |
45 | #include <sys/errno.h> |
46 | #include <sys/malloc.h> |
47 | #include <sys/buf.h> |
48 | #include <sys/ioctl.h> |
49 | #include <sys/disklabel.h> |
50 | #include <sys/device.h> |
51 | #include <sys/disk.h> |
52 | #include <sys/stat.h> |
53 | #include <sys/mount.h> |
54 | #include <sys/vnode.h> |
55 | #include <sys/file.h> |
56 | #include <sys/uio.h> |
57 | #include <sys/conf.h> |
58 | #include <sys/kthread.h> |
59 | #include <sys/fstrans.h> |
60 | #include <sys/vfs_syscalls.h> /* For do_sys_unlink(). */ |
61 | |
62 | #include <miscfs/specfs/specdev.h> |
63 | |
64 | #include <dev/fssvar.h> |
65 | |
66 | #include <uvm/uvm.h> |
67 | |
68 | #include "ioconf.h" |
69 | |
70 | dev_type_open(fss_open); |
71 | dev_type_close(fss_close); |
72 | dev_type_read(fss_read); |
73 | dev_type_write(fss_write); |
74 | dev_type_ioctl(fss_ioctl); |
75 | dev_type_strategy(fss_strategy); |
76 | dev_type_dump(fss_dump); |
77 | dev_type_size(fss_size); |
78 | |
79 | static void fss_unmount_hook(struct mount *); |
80 | static int fss_copy_on_write(void *, struct buf *, bool); |
81 | static inline void fss_error(struct fss_softc *, const char *); |
82 | static int fss_create_files(struct fss_softc *, struct fss_set *, |
83 | off_t *, struct lwp *); |
84 | static int fss_create_snapshot(struct fss_softc *, struct fss_set *, |
85 | struct lwp *); |
86 | static int fss_delete_snapshot(struct fss_softc *, struct lwp *); |
87 | static int fss_softc_alloc(struct fss_softc *); |
88 | static void fss_softc_free(struct fss_softc *); |
89 | static int fss_read_cluster(struct fss_softc *, u_int32_t); |
90 | static void fss_bs_thread(void *); |
91 | static int fss_bs_io(struct fss_softc *, fss_io_type, |
92 | u_int32_t, off_t, int, void *); |
93 | static u_int32_t *fss_bs_indir(struct fss_softc *, u_int32_t); |
94 | |
95 | static kmutex_t fss_device_lock; /* Protect all units. */ |
96 | static int fss_num_attached = 0; /* Number of attached devices. */ |
97 | static struct vfs_hooks fss_vfs_hooks = { |
98 | .vh_unmount = fss_unmount_hook |
99 | }; |
100 | |
101 | const struct bdevsw fss_bdevsw = { |
102 | .d_open = fss_open, |
103 | .d_close = fss_close, |
104 | .d_strategy = fss_strategy, |
105 | .d_ioctl = fss_ioctl, |
106 | .d_dump = fss_dump, |
107 | .d_psize = fss_size, |
108 | .d_discard = nodiscard, |
109 | .d_flag = D_DISK | D_MPSAFE |
110 | }; |
111 | |
112 | const struct cdevsw fss_cdevsw = { |
113 | .d_open = fss_open, |
114 | .d_close = fss_close, |
115 | .d_read = fss_read, |
116 | .d_write = fss_write, |
117 | .d_ioctl = fss_ioctl, |
118 | .d_stop = nostop, |
119 | .d_tty = notty, |
120 | .d_poll = nopoll, |
121 | .d_mmap = nommap, |
122 | .d_kqfilter = nokqfilter, |
123 | .d_discard = nodiscard, |
124 | .d_flag = D_DISK | D_MPSAFE |
125 | }; |
126 | |
127 | static int fss_match(device_t, cfdata_t, void *); |
128 | static void fss_attach(device_t, device_t, void *); |
129 | static int fss_detach(device_t, int); |
130 | |
131 | CFATTACH_DECL_NEW(fss, sizeof(struct fss_softc), |
132 | fss_match, fss_attach, fss_detach, NULL); |
133 | extern struct cfdriver fss_cd; |
134 | |
135 | void |
136 | fssattach(int num) |
137 | { |
138 | |
139 | mutex_init(&fss_device_lock, MUTEX_DEFAULT, IPL_NONE); |
140 | if (config_cfattach_attach(fss_cd.cd_name, &fss_ca)) |
141 | aprint_error("%s: unable to register\n" , fss_cd.cd_name); |
142 | } |
143 | |
144 | static int |
145 | fss_match(device_t self, cfdata_t cfdata, void *aux) |
146 | { |
147 | return 1; |
148 | } |
149 | |
150 | static void |
151 | fss_attach(device_t parent, device_t self, void *aux) |
152 | { |
153 | struct fss_softc *sc = device_private(self); |
154 | |
155 | sc->sc_dev = self; |
156 | sc->sc_bdev = NODEV; |
157 | mutex_init(&sc->sc_slock, MUTEX_DEFAULT, IPL_NONE); |
158 | mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE); |
159 | cv_init(&sc->sc_work_cv, "fssbs" ); |
160 | cv_init(&sc->sc_cache_cv, "cowwait" ); |
161 | bufq_alloc(&sc->sc_bufq, "fcfs" , 0); |
162 | sc->sc_dkdev = malloc(sizeof(*sc->sc_dkdev), M_DEVBUF, M_WAITOK); |
163 | sc->sc_dkdev->dk_info = NULL; |
164 | disk_init(sc->sc_dkdev, device_xname(self), NULL); |
165 | if (!pmf_device_register(self, NULL, NULL)) |
166 | aprint_error_dev(self, "couldn't establish power handler\n" ); |
167 | |
168 | if (fss_num_attached++ == 0) |
169 | vfs_hooks_attach(&fss_vfs_hooks); |
170 | } |
171 | |
172 | static int |
173 | fss_detach(device_t self, int flags) |
174 | { |
175 | struct fss_softc *sc = device_private(self); |
176 | |
177 | if (sc->sc_flags & FSS_ACTIVE) |
178 | return EBUSY; |
179 | |
180 | if (--fss_num_attached == 0) |
181 | vfs_hooks_detach(&fss_vfs_hooks); |
182 | |
183 | pmf_device_deregister(self); |
184 | mutex_destroy(&sc->sc_slock); |
185 | mutex_destroy(&sc->sc_lock); |
186 | cv_destroy(&sc->sc_work_cv); |
187 | cv_destroy(&sc->sc_cache_cv); |
188 | bufq_drain(sc->sc_bufq); |
189 | bufq_free(sc->sc_bufq); |
190 | disk_destroy(sc->sc_dkdev); |
191 | free(sc->sc_dkdev, M_DEVBUF); |
192 | |
193 | return 0; |
194 | } |
195 | |
196 | int |
197 | fss_open(dev_t dev, int flags, int mode, struct lwp *l) |
198 | { |
199 | int mflag; |
200 | cfdata_t cf; |
201 | struct fss_softc *sc; |
202 | |
203 | mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN); |
204 | |
205 | mutex_enter(&fss_device_lock); |
206 | |
207 | sc = device_lookup_private(&fss_cd, minor(dev)); |
208 | if (sc == NULL) { |
209 | cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); |
210 | cf->cf_name = fss_cd.cd_name; |
211 | cf->cf_atname = fss_cd.cd_name; |
212 | cf->cf_unit = minor(dev); |
213 | cf->cf_fstate = FSTATE_STAR; |
214 | sc = device_private(config_attach_pseudo(cf)); |
215 | if (sc == NULL) { |
216 | mutex_exit(&fss_device_lock); |
217 | return ENOMEM; |
218 | } |
219 | } |
220 | |
221 | mutex_enter(&sc->sc_slock); |
222 | |
223 | sc->sc_flags |= mflag; |
224 | |
225 | mutex_exit(&sc->sc_slock); |
226 | mutex_exit(&fss_device_lock); |
227 | |
228 | return 0; |
229 | } |
230 | |
231 | int |
232 | fss_close(dev_t dev, int flags, int mode, struct lwp *l) |
233 | { |
234 | int mflag, error; |
235 | cfdata_t cf; |
236 | struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev)); |
237 | |
238 | mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN); |
239 | error = 0; |
240 | |
241 | mutex_enter(&fss_device_lock); |
242 | restart: |
243 | mutex_enter(&sc->sc_slock); |
244 | if ((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) != mflag) { |
245 | sc->sc_flags &= ~mflag; |
246 | mutex_exit(&sc->sc_slock); |
247 | mutex_exit(&fss_device_lock); |
248 | return 0; |
249 | } |
250 | if ((sc->sc_flags & FSS_ACTIVE) != 0 && |
251 | (sc->sc_uflags & FSS_UNCONFIG_ON_CLOSE) != 0) { |
252 | sc->sc_uflags &= ~FSS_UNCONFIG_ON_CLOSE; |
253 | mutex_exit(&sc->sc_slock); |
254 | error = fss_ioctl(dev, FSSIOCCLR, NULL, FWRITE, l); |
255 | goto restart; |
256 | } |
257 | if ((sc->sc_flags & FSS_ACTIVE) != 0) { |
258 | mutex_exit(&sc->sc_slock); |
259 | mutex_exit(&fss_device_lock); |
260 | return error; |
261 | } |
262 | |
263 | KASSERT((sc->sc_flags & FSS_ACTIVE) == 0); |
264 | KASSERT((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) == mflag); |
265 | mutex_exit(&sc->sc_slock); |
266 | cf = device_cfdata(sc->sc_dev); |
267 | error = config_detach(sc->sc_dev, DETACH_QUIET); |
268 | if (! error) |
269 | free(cf, M_DEVBUF); |
270 | mutex_exit(&fss_device_lock); |
271 | |
272 | return error; |
273 | } |
274 | |
275 | void |
276 | fss_strategy(struct buf *bp) |
277 | { |
278 | const bool write = ((bp->b_flags & B_READ) != B_READ); |
279 | struct fss_softc *sc = device_lookup_private(&fss_cd, minor(bp->b_dev)); |
280 | |
281 | mutex_enter(&sc->sc_slock); |
282 | |
283 | if (write || !FSS_ISVALID(sc)) { |
284 | |
285 | mutex_exit(&sc->sc_slock); |
286 | |
287 | bp->b_error = (write ? EROFS : ENXIO); |
288 | bp->b_resid = bp->b_bcount; |
289 | biodone(bp); |
290 | return; |
291 | } |
292 | |
293 | bp->b_rawblkno = bp->b_blkno; |
294 | bufq_put(sc->sc_bufq, bp); |
295 | cv_signal(&sc->sc_work_cv); |
296 | |
297 | mutex_exit(&sc->sc_slock); |
298 | } |
299 | |
300 | int |
301 | fss_read(dev_t dev, struct uio *uio, int flags) |
302 | { |
303 | return physio(fss_strategy, NULL, dev, B_READ, minphys, uio); |
304 | } |
305 | |
306 | int |
307 | fss_write(dev_t dev, struct uio *uio, int flags) |
308 | { |
309 | return physio(fss_strategy, NULL, dev, B_WRITE, minphys, uio); |
310 | } |
311 | |
312 | int |
313 | fss_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) |
314 | { |
315 | int error; |
316 | struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev)); |
317 | struct fss_set _fss; |
318 | struct fss_set *fss = (struct fss_set *)data; |
319 | struct fss_set50 *fss50 = (struct fss_set50 *)data; |
320 | struct fss_get *fsg = (struct fss_get *)data; |
321 | #ifndef _LP64 |
322 | struct fss_get50 *fsg50 = (struct fss_get50 *)data; |
323 | #endif |
324 | |
325 | switch (cmd) { |
326 | case FSSIOCSET50: |
327 | fss = &_fss; |
328 | fss->fss_mount = fss50->fss_mount; |
329 | fss->fss_bstore = fss50->fss_bstore; |
330 | fss->fss_csize = fss50->fss_csize; |
331 | fss->fss_flags = 0; |
332 | /* Fall through */ |
333 | case FSSIOCSET: |
334 | mutex_enter(&sc->sc_lock); |
335 | if ((flag & FWRITE) == 0) |
336 | error = EPERM; |
337 | else if ((sc->sc_flags & FSS_ACTIVE) != 0) |
338 | error = EBUSY; |
339 | else |
340 | error = fss_create_snapshot(sc, fss, l); |
341 | if (error == 0) |
342 | sc->sc_uflags = fss->fss_flags; |
343 | mutex_exit(&sc->sc_lock); |
344 | break; |
345 | |
346 | case FSSIOCCLR: |
347 | mutex_enter(&sc->sc_lock); |
348 | if ((flag & FWRITE) == 0) |
349 | error = EPERM; |
350 | else if ((sc->sc_flags & FSS_ACTIVE) == 0) |
351 | error = ENXIO; |
352 | else |
353 | error = fss_delete_snapshot(sc, l); |
354 | mutex_exit(&sc->sc_lock); |
355 | break; |
356 | |
357 | #ifndef _LP64 |
358 | case FSSIOCGET50: |
359 | mutex_enter(&sc->sc_lock); |
360 | switch (sc->sc_flags & (FSS_PERSISTENT | FSS_ACTIVE)) { |
361 | case FSS_ACTIVE: |
362 | memcpy(fsg50->fsg_mount, sc->sc_mntname, MNAMELEN); |
363 | fsg50->fsg_csize = FSS_CLSIZE(sc); |
364 | timeval_to_timeval50(&sc->sc_time, &fsg50->fsg_time); |
365 | fsg50->fsg_mount_size = sc->sc_clcount; |
366 | fsg50->fsg_bs_size = sc->sc_clnext; |
367 | error = 0; |
368 | break; |
369 | case FSS_PERSISTENT | FSS_ACTIVE: |
370 | memcpy(fsg50->fsg_mount, sc->sc_mntname, MNAMELEN); |
371 | fsg50->fsg_csize = 0; |
372 | timeval_to_timeval50(&sc->sc_time, &fsg50->fsg_time); |
373 | fsg50->fsg_mount_size = 0; |
374 | fsg50->fsg_bs_size = 0; |
375 | error = 0; |
376 | break; |
377 | default: |
378 | error = ENXIO; |
379 | break; |
380 | } |
381 | mutex_exit(&sc->sc_lock); |
382 | break; |
383 | #endif /* _LP64 */ |
384 | |
385 | case FSSIOCGET: |
386 | mutex_enter(&sc->sc_lock); |
387 | switch (sc->sc_flags & (FSS_PERSISTENT | FSS_ACTIVE)) { |
388 | case FSS_ACTIVE: |
389 | memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN); |
390 | fsg->fsg_csize = FSS_CLSIZE(sc); |
391 | fsg->fsg_time = sc->sc_time; |
392 | fsg->fsg_mount_size = sc->sc_clcount; |
393 | fsg->fsg_bs_size = sc->sc_clnext; |
394 | error = 0; |
395 | break; |
396 | case FSS_PERSISTENT | FSS_ACTIVE: |
397 | memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN); |
398 | fsg->fsg_csize = 0; |
399 | fsg->fsg_time = sc->sc_time; |
400 | fsg->fsg_mount_size = 0; |
401 | fsg->fsg_bs_size = 0; |
402 | error = 0; |
403 | break; |
404 | default: |
405 | error = ENXIO; |
406 | break; |
407 | } |
408 | mutex_exit(&sc->sc_lock); |
409 | break; |
410 | |
411 | case FSSIOFSET: |
412 | mutex_enter(&sc->sc_slock); |
413 | sc->sc_uflags = *(int *)data; |
414 | mutex_exit(&sc->sc_slock); |
415 | error = 0; |
416 | break; |
417 | |
418 | case FSSIOFGET: |
419 | mutex_enter(&sc->sc_slock); |
420 | *(int *)data = sc->sc_uflags; |
421 | mutex_exit(&sc->sc_slock); |
422 | error = 0; |
423 | break; |
424 | |
425 | default: |
426 | error = EINVAL; |
427 | break; |
428 | } |
429 | |
430 | return error; |
431 | } |
432 | |
433 | int |
434 | fss_size(dev_t dev) |
435 | { |
436 | return -1; |
437 | } |
438 | |
439 | int |
440 | fss_dump(dev_t dev, daddr_t blkno, void *va, |
441 | size_t size) |
442 | { |
443 | return EROFS; |
444 | } |
445 | |
446 | /* |
447 | * An error occurred reading or writing the snapshot or backing store. |
448 | * If it is the first error log to console and disestablish cow handler. |
449 | * The caller holds the mutex. |
450 | */ |
451 | static inline void |
452 | fss_error(struct fss_softc *sc, const char *msg) |
453 | { |
454 | |
455 | if ((sc->sc_flags & (FSS_ACTIVE | FSS_ERROR)) != FSS_ACTIVE) |
456 | return; |
457 | |
458 | aprint_error_dev(sc->sc_dev, "snapshot invalid: %s\n" , msg); |
459 | if ((sc->sc_flags & FSS_PERSISTENT) == 0) |
460 | fscow_disestablish(sc->sc_mount, fss_copy_on_write, sc); |
461 | sc->sc_flags |= FSS_ERROR; |
462 | } |
463 | |
464 | /* |
465 | * Allocate the variable sized parts of the softc and |
466 | * fork the kernel thread. |
467 | * |
468 | * The fields sc_clcount, sc_clshift, sc_cache_size and sc_indir_size |
469 | * must be initialized. |
470 | */ |
471 | static int |
472 | fss_softc_alloc(struct fss_softc *sc) |
473 | { |
474 | int i, error; |
475 | |
476 | if ((sc->sc_flags & FSS_PERSISTENT) == 0) { |
477 | sc->sc_copied = |
478 | kmem_zalloc(howmany(sc->sc_clcount, NBBY), KM_SLEEP); |
479 | if (sc->sc_copied == NULL) |
480 | return(ENOMEM); |
481 | |
482 | sc->sc_cache = kmem_alloc(sc->sc_cache_size * |
483 | sizeof(struct fss_cache), KM_SLEEP); |
484 | if (sc->sc_cache == NULL) |
485 | return(ENOMEM); |
486 | |
487 | for (i = 0; i < sc->sc_cache_size; i++) { |
488 | sc->sc_cache[i].fc_type = FSS_CACHE_FREE; |
489 | sc->sc_cache[i].fc_data = |
490 | kmem_alloc(FSS_CLSIZE(sc), KM_SLEEP); |
491 | if (sc->sc_cache[i].fc_data == NULL) |
492 | return(ENOMEM); |
493 | cv_init(&sc->sc_cache[i].fc_state_cv, "cowwait1" ); |
494 | } |
495 | |
496 | sc->sc_indir_valid = |
497 | kmem_zalloc(howmany(sc->sc_indir_size, NBBY), KM_SLEEP); |
498 | if (sc->sc_indir_valid == NULL) |
499 | return(ENOMEM); |
500 | |
501 | sc->sc_indir_data = kmem_zalloc(FSS_CLSIZE(sc), KM_SLEEP); |
502 | if (sc->sc_indir_data == NULL) |
503 | return(ENOMEM); |
504 | } else { |
505 | sc->sc_copied = NULL; |
506 | sc->sc_cache = NULL; |
507 | sc->sc_indir_valid = NULL; |
508 | sc->sc_indir_data = NULL; |
509 | } |
510 | |
511 | sc->sc_flags |= FSS_BS_THREAD; |
512 | if ((error = kthread_create(PRI_BIO, KTHREAD_MUSTJOIN, NULL, |
513 | fss_bs_thread, sc, &sc->sc_bs_lwp, |
514 | "%s" , device_xname(sc->sc_dev))) != 0) { |
515 | sc->sc_flags &= ~FSS_BS_THREAD; |
516 | return error; |
517 | } |
518 | |
519 | disk_attach(sc->sc_dkdev); |
520 | |
521 | return 0; |
522 | } |
523 | |
524 | /* |
525 | * Free the variable sized parts of the softc. |
526 | */ |
527 | static void |
528 | fss_softc_free(struct fss_softc *sc) |
529 | { |
530 | int i; |
531 | |
532 | if ((sc->sc_flags & FSS_BS_THREAD) != 0) { |
533 | mutex_enter(&sc->sc_slock); |
534 | sc->sc_flags &= ~FSS_BS_THREAD; |
535 | cv_signal(&sc->sc_work_cv); |
536 | mutex_exit(&sc->sc_slock); |
537 | kthread_join(sc->sc_bs_lwp); |
538 | |
539 | disk_detach(sc->sc_dkdev); |
540 | } |
541 | |
542 | if (sc->sc_copied != NULL) |
543 | kmem_free(sc->sc_copied, howmany(sc->sc_clcount, NBBY)); |
544 | sc->sc_copied = NULL; |
545 | |
546 | if (sc->sc_cache != NULL) { |
547 | for (i = 0; i < sc->sc_cache_size; i++) |
548 | if (sc->sc_cache[i].fc_data != NULL) { |
549 | cv_destroy(&sc->sc_cache[i].fc_state_cv); |
550 | kmem_free(sc->sc_cache[i].fc_data, |
551 | FSS_CLSIZE(sc)); |
552 | } |
553 | kmem_free(sc->sc_cache, |
554 | sc->sc_cache_size*sizeof(struct fss_cache)); |
555 | } |
556 | sc->sc_cache = NULL; |
557 | |
558 | if (sc->sc_indir_valid != NULL) |
559 | kmem_free(sc->sc_indir_valid, howmany(sc->sc_indir_size, NBBY)); |
560 | sc->sc_indir_valid = NULL; |
561 | |
562 | if (sc->sc_indir_data != NULL) |
563 | kmem_free(sc->sc_indir_data, FSS_CLSIZE(sc)); |
564 | sc->sc_indir_data = NULL; |
565 | } |
566 | |
567 | /* |
568 | * Set all active snapshots on this file system into ERROR state. |
569 | */ |
570 | static void |
571 | fss_unmount_hook(struct mount *mp) |
572 | { |
573 | int i; |
574 | struct fss_softc *sc; |
575 | |
576 | mutex_enter(&fss_device_lock); |
577 | for (i = 0; i < fss_cd.cd_ndevs; i++) { |
578 | if ((sc = device_lookup_private(&fss_cd, i)) == NULL) |
579 | continue; |
580 | mutex_enter(&sc->sc_slock); |
581 | if ((sc->sc_flags & FSS_ACTIVE) != 0 && sc->sc_mount == mp) |
582 | fss_error(sc, "forced by unmount" ); |
583 | mutex_exit(&sc->sc_slock); |
584 | } |
585 | mutex_exit(&fss_device_lock); |
586 | } |
587 | |
588 | /* |
589 | * A buffer is written to the snapshotted block device. Copy to |
590 | * backing store if needed. |
591 | */ |
592 | static int |
593 | fss_copy_on_write(void *v, struct buf *bp, bool data_valid) |
594 | { |
595 | int error; |
596 | u_int32_t cl, ch, c; |
597 | struct fss_softc *sc = v; |
598 | |
599 | mutex_enter(&sc->sc_slock); |
600 | if (!FSS_ISVALID(sc)) { |
601 | mutex_exit(&sc->sc_slock); |
602 | return 0; |
603 | } |
604 | |
605 | cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); |
606 | ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); |
607 | error = 0; |
608 | if (curlwp == uvm.pagedaemon_lwp) { |
609 | for (c = cl; c <= ch; c++) |
610 | if (isclr(sc->sc_copied, c)) { |
611 | error = ENOMEM; |
612 | break; |
613 | } |
614 | } |
615 | mutex_exit(&sc->sc_slock); |
616 | |
617 | if (error == 0) |
618 | for (c = cl; c <= ch; c++) { |
619 | error = fss_read_cluster(sc, c); |
620 | if (error) |
621 | break; |
622 | } |
623 | |
624 | return error; |
625 | } |
626 | |
627 | /* |
628 | * Lookup and open needed files. |
629 | * |
630 | * For file system internal snapshot initializes sc_mntname, sc_mount, |
631 | * sc_bs_vp and sc_time. |
632 | * |
633 | * Otherwise returns dev and size of the underlying block device. |
634 | * Initializes sc_mntname, sc_mount, sc_bdev, sc_bs_vp and sc_mount |
635 | */ |
636 | static int |
637 | fss_create_files(struct fss_softc *sc, struct fss_set *fss, |
638 | off_t *bsize, struct lwp *l) |
639 | { |
640 | int error, bits, fsbsize; |
641 | uint64_t numsec; |
642 | unsigned int secsize; |
643 | struct timespec ts; |
644 | /* nd -> nd2 to reduce mistakes while updating only some namei calls */ |
645 | struct pathbuf *pb2; |
646 | struct nameidata nd2; |
647 | struct vnode *vp; |
648 | |
649 | /* |
650 | * Get the mounted file system. |
651 | */ |
652 | |
653 | error = namei_simple_user(fss->fss_mount, |
654 | NSM_FOLLOW_NOEMULROOT, &vp); |
655 | if (error != 0) |
656 | return error; |
657 | |
658 | if ((vp->v_vflag & VV_ROOT) != VV_ROOT) { |
659 | vrele(vp); |
660 | return EINVAL; |
661 | } |
662 | |
663 | sc->sc_mount = vp->v_mount; |
664 | memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN); |
665 | |
666 | vrele(vp); |
667 | |
668 | /* |
669 | * Check for file system internal snapshot. |
670 | */ |
671 | |
672 | error = namei_simple_user(fss->fss_bstore, |
673 | NSM_FOLLOW_NOEMULROOT, &vp); |
674 | if (error != 0) |
675 | return error; |
676 | |
677 | if (vp->v_type == VREG && vp->v_mount == sc->sc_mount) { |
678 | sc->sc_flags |= FSS_PERSISTENT; |
679 | sc->sc_bs_vp = vp; |
680 | |
681 | fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; |
682 | bits = sizeof(sc->sc_bs_bshift)*NBBY; |
683 | for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < bits; |
684 | sc->sc_bs_bshift++) |
685 | if (FSS_FSBSIZE(sc) == fsbsize) |
686 | break; |
687 | if (sc->sc_bs_bshift >= bits) |
688 | return EINVAL; |
689 | |
690 | sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; |
691 | sc->sc_clshift = 0; |
692 | |
693 | if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) { |
694 | error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE); |
695 | if (error) |
696 | return error; |
697 | } |
698 | error = vn_lock(vp, LK_EXCLUSIVE); |
699 | if (error != 0) |
700 | return error; |
701 | error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts); |
702 | TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts); |
703 | |
704 | VOP_UNLOCK(sc->sc_bs_vp); |
705 | |
706 | return error; |
707 | } |
708 | vrele(vp); |
709 | |
710 | /* |
711 | * Get the block device it is mounted on and its size. |
712 | */ |
713 | |
714 | error = spec_node_lookup_by_mount(sc->sc_mount, &vp); |
715 | if (error) |
716 | return error; |
717 | sc->sc_bdev = vp->v_rdev; |
718 | |
719 | error = getdisksize(vp, &numsec, &secsize); |
720 | vrele(vp); |
721 | if (error) |
722 | return error; |
723 | |
724 | *bsize = (off_t)numsec*secsize; |
725 | |
726 | /* |
727 | * Get the backing store |
728 | */ |
729 | |
730 | error = pathbuf_copyin(fss->fss_bstore, &pb2); |
731 | if (error) { |
732 | return error; |
733 | } |
734 | NDINIT(&nd2, LOOKUP, FOLLOW, pb2); |
735 | if ((error = vn_open(&nd2, FREAD|FWRITE, 0)) != 0) { |
736 | pathbuf_destroy(pb2); |
737 | return error; |
738 | } |
739 | VOP_UNLOCK(nd2.ni_vp); |
740 | |
741 | sc->sc_bs_vp = nd2.ni_vp; |
742 | |
743 | if (nd2.ni_vp->v_type != VREG && nd2.ni_vp->v_type != VCHR) { |
744 | pathbuf_destroy(pb2); |
745 | return EINVAL; |
746 | } |
747 | pathbuf_destroy(pb2); |
748 | |
749 | if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) { |
750 | error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE); |
751 | if (error) |
752 | return error; |
753 | } |
754 | if (sc->sc_bs_vp->v_type == VREG) { |
755 | fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; |
756 | if (fsbsize & (fsbsize-1)) /* No power of two */ |
757 | return EINVAL; |
758 | for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32; |
759 | sc->sc_bs_bshift++) |
760 | if (FSS_FSBSIZE(sc) == fsbsize) |
761 | break; |
762 | if (sc->sc_bs_bshift >= 32) |
763 | return EINVAL; |
764 | sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; |
765 | } else { |
766 | sc->sc_bs_bshift = DEV_BSHIFT; |
767 | sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; |
768 | } |
769 | |
770 | return 0; |
771 | } |
772 | |
773 | /* |
774 | * Create a snapshot. |
775 | */ |
776 | static int |
777 | fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct lwp *l) |
778 | { |
779 | int len, error; |
780 | u_int32_t csize; |
781 | off_t bsize; |
782 | |
783 | bsize = 0; /* XXX gcc */ |
784 | |
785 | /* |
786 | * Open needed files. |
787 | */ |
788 | if ((error = fss_create_files(sc, fss, &bsize, l)) != 0) |
789 | goto bad; |
790 | |
791 | if (sc->sc_flags & FSS_PERSISTENT) { |
792 | fss_softc_alloc(sc); |
793 | sc->sc_flags |= FSS_ACTIVE; |
794 | return 0; |
795 | } |
796 | |
797 | /* |
798 | * Set cluster size. Must be a power of two and |
799 | * a multiple of backing store block size. |
800 | */ |
801 | if (fss->fss_csize <= 0) |
802 | csize = MAXPHYS; |
803 | else |
804 | csize = fss->fss_csize; |
805 | if (bsize/csize > FSS_CLUSTER_MAX) |
806 | csize = bsize/FSS_CLUSTER_MAX+1; |
807 | |
808 | for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32; |
809 | sc->sc_clshift++) |
810 | if (FSS_CLSIZE(sc) >= csize) |
811 | break; |
812 | if (sc->sc_clshift >= 32) { |
813 | error = EINVAL; |
814 | goto bad; |
815 | } |
816 | sc->sc_clmask = FSS_CLSIZE(sc)-1; |
817 | |
818 | /* |
819 | * Set number of cache slots. |
820 | */ |
821 | if (FSS_CLSIZE(sc) <= 8192) |
822 | sc->sc_cache_size = 32; |
823 | else if (FSS_CLSIZE(sc) <= 65536) |
824 | sc->sc_cache_size = 8; |
825 | else |
826 | sc->sc_cache_size = 4; |
827 | |
828 | /* |
829 | * Set number of clusters and size of last cluster. |
830 | */ |
831 | sc->sc_clcount = FSS_BTOCL(sc, bsize-1)+1; |
832 | sc->sc_clresid = FSS_CLOFF(sc, bsize-1)+1; |
833 | |
834 | /* |
835 | * Set size of indirect table. |
836 | */ |
837 | len = sc->sc_clcount*sizeof(u_int32_t); |
838 | sc->sc_indir_size = FSS_BTOCL(sc, len)+1; |
839 | sc->sc_clnext = sc->sc_indir_size; |
840 | sc->sc_indir_cur = 0; |
841 | |
842 | if ((error = fss_softc_alloc(sc)) != 0) |
843 | goto bad; |
844 | |
845 | /* |
846 | * Activate the snapshot. |
847 | */ |
848 | |
849 | if ((error = vfs_suspend(sc->sc_mount, 0)) != 0) |
850 | goto bad; |
851 | |
852 | microtime(&sc->sc_time); |
853 | |
854 | error = fscow_establish(sc->sc_mount, fss_copy_on_write, sc); |
855 | if (error == 0) |
856 | sc->sc_flags |= FSS_ACTIVE; |
857 | |
858 | vfs_resume(sc->sc_mount); |
859 | |
860 | if (error != 0) |
861 | goto bad; |
862 | |
863 | aprint_debug_dev(sc->sc_dev, "%s snapshot active\n" , sc->sc_mntname); |
864 | aprint_debug_dev(sc->sc_dev, |
865 | "%u clusters of %u, %u cache slots, %u indir clusters\n" , |
866 | sc->sc_clcount, FSS_CLSIZE(sc), |
867 | sc->sc_cache_size, sc->sc_indir_size); |
868 | |
869 | return 0; |
870 | |
871 | bad: |
872 | fss_softc_free(sc); |
873 | if (sc->sc_bs_vp != NULL) { |
874 | if (sc->sc_flags & FSS_PERSISTENT) |
875 | vrele(sc->sc_bs_vp); |
876 | else |
877 | vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred); |
878 | } |
879 | sc->sc_bs_vp = NULL; |
880 | |
881 | return error; |
882 | } |
883 | |
884 | /* |
885 | * Delete a snapshot. |
886 | */ |
887 | static int |
888 | fss_delete_snapshot(struct fss_softc *sc, struct lwp *l) |
889 | { |
890 | |
891 | if ((sc->sc_flags & (FSS_PERSISTENT | FSS_ERROR)) == 0) |
892 | fscow_disestablish(sc->sc_mount, fss_copy_on_write, sc); |
893 | |
894 | mutex_enter(&sc->sc_slock); |
895 | sc->sc_flags &= ~(FSS_ACTIVE|FSS_ERROR); |
896 | sc->sc_mount = NULL; |
897 | sc->sc_bdev = NODEV; |
898 | mutex_exit(&sc->sc_slock); |
899 | |
900 | fss_softc_free(sc); |
901 | if (sc->sc_flags & FSS_PERSISTENT) |
902 | vrele(sc->sc_bs_vp); |
903 | else |
904 | vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred); |
905 | sc->sc_bs_vp = NULL; |
906 | sc->sc_flags &= ~FSS_PERSISTENT; |
907 | |
908 | return 0; |
909 | } |
910 | |
911 | /* |
912 | * Read a cluster from the snapshotted block device to the cache. |
913 | */ |
914 | static int |
915 | fss_read_cluster(struct fss_softc *sc, u_int32_t cl) |
916 | { |
917 | int error, todo, offset, len; |
918 | daddr_t dblk; |
919 | struct buf *bp, *mbp; |
920 | struct fss_cache *scp, *scl; |
921 | |
922 | /* |
923 | * Get a free cache slot. |
924 | */ |
925 | scl = sc->sc_cache+sc->sc_cache_size; |
926 | |
927 | mutex_enter(&sc->sc_slock); |
928 | |
929 | restart: |
930 | if (isset(sc->sc_copied, cl) || !FSS_ISVALID(sc)) { |
931 | mutex_exit(&sc->sc_slock); |
932 | return 0; |
933 | } |
934 | |
935 | for (scp = sc->sc_cache; scp < scl; scp++) |
936 | if (scp->fc_cluster == cl) { |
937 | if (scp->fc_type == FSS_CACHE_VALID) { |
938 | mutex_exit(&sc->sc_slock); |
939 | return 0; |
940 | } else if (scp->fc_type == FSS_CACHE_BUSY) { |
941 | cv_wait(&scp->fc_state_cv, &sc->sc_slock); |
942 | goto restart; |
943 | } |
944 | } |
945 | |
946 | for (scp = sc->sc_cache; scp < scl; scp++) |
947 | if (scp->fc_type == FSS_CACHE_FREE) { |
948 | scp->fc_type = FSS_CACHE_BUSY; |
949 | scp->fc_cluster = cl; |
950 | break; |
951 | } |
952 | if (scp >= scl) { |
953 | cv_wait(&sc->sc_cache_cv, &sc->sc_slock); |
954 | goto restart; |
955 | } |
956 | |
957 | mutex_exit(&sc->sc_slock); |
958 | |
959 | /* |
960 | * Start the read. |
961 | */ |
962 | dblk = btodb(FSS_CLTOB(sc, cl)); |
963 | if (cl == sc->sc_clcount-1) { |
964 | todo = sc->sc_clresid; |
965 | memset((char *)scp->fc_data + todo, 0, FSS_CLSIZE(sc) - todo); |
966 | } else |
967 | todo = FSS_CLSIZE(sc); |
968 | offset = 0; |
969 | mbp = getiobuf(NULL, true); |
970 | mbp->b_bufsize = todo; |
971 | mbp->b_data = scp->fc_data; |
972 | mbp->b_resid = mbp->b_bcount = todo; |
973 | mbp->b_flags = B_READ; |
974 | mbp->b_cflags = BC_BUSY; |
975 | mbp->b_dev = sc->sc_bdev; |
976 | while (todo > 0) { |
977 | len = todo; |
978 | if (len > MAXPHYS) |
979 | len = MAXPHYS; |
980 | if (btodb(FSS_CLTOB(sc, cl)) == dblk && len == todo) |
981 | bp = mbp; |
982 | else { |
983 | bp = getiobuf(NULL, true); |
984 | nestiobuf_setup(mbp, bp, offset, len); |
985 | } |
986 | bp->b_lblkno = 0; |
987 | bp->b_blkno = dblk; |
988 | bdev_strategy(bp); |
989 | dblk += btodb(len); |
990 | offset += len; |
991 | todo -= len; |
992 | } |
993 | error = biowait(mbp); |
994 | putiobuf(mbp); |
995 | |
996 | mutex_enter(&sc->sc_slock); |
997 | scp->fc_type = (error ? FSS_CACHE_FREE : FSS_CACHE_VALID); |
998 | cv_broadcast(&scp->fc_state_cv); |
999 | if (error == 0) { |
1000 | setbit(sc->sc_copied, scp->fc_cluster); |
1001 | cv_signal(&sc->sc_work_cv); |
1002 | } |
1003 | mutex_exit(&sc->sc_slock); |
1004 | |
1005 | return error; |
1006 | } |
1007 | |
1008 | /* |
1009 | * Read/write clusters from/to backing store. |
1010 | * For persistent snapshots must be called with cl == 0. off is the |
1011 | * offset into the snapshot. |
1012 | */ |
1013 | static int |
1014 | fss_bs_io(struct fss_softc *sc, fss_io_type rw, |
1015 | u_int32_t cl, off_t off, int len, void *data) |
1016 | { |
1017 | int error; |
1018 | |
1019 | off += FSS_CLTOB(sc, cl); |
1020 | |
1021 | vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY); |
1022 | |
1023 | error = vn_rdwr((rw == FSS_READ ? UIO_READ : UIO_WRITE), sc->sc_bs_vp, |
1024 | data, len, off, UIO_SYSSPACE, |
1025 | IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_NODELOCKED, |
1026 | sc->sc_bs_lwp->l_cred, NULL, NULL); |
1027 | if (error == 0) { |
1028 | mutex_enter(sc->sc_bs_vp->v_interlock); |
1029 | error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off), |
1030 | round_page(off+len), PGO_CLEANIT | PGO_FREE | PGO_SYNCIO); |
1031 | } |
1032 | |
1033 | VOP_UNLOCK(sc->sc_bs_vp); |
1034 | |
1035 | return error; |
1036 | } |
1037 | |
1038 | /* |
1039 | * Get a pointer to the indirect slot for this cluster. |
1040 | */ |
1041 | static u_int32_t * |
1042 | fss_bs_indir(struct fss_softc *sc, u_int32_t cl) |
1043 | { |
1044 | u_int32_t icl; |
1045 | int ioff; |
1046 | |
1047 | icl = cl/(FSS_CLSIZE(sc)/sizeof(u_int32_t)); |
1048 | ioff = cl%(FSS_CLSIZE(sc)/sizeof(u_int32_t)); |
1049 | |
1050 | if (sc->sc_indir_cur == icl) |
1051 | return &sc->sc_indir_data[ioff]; |
1052 | |
1053 | if (sc->sc_indir_dirty) { |
1054 | if (fss_bs_io(sc, FSS_WRITE, sc->sc_indir_cur, 0, |
1055 | FSS_CLSIZE(sc), (void *)sc->sc_indir_data) != 0) |
1056 | return NULL; |
1057 | setbit(sc->sc_indir_valid, sc->sc_indir_cur); |
1058 | } |
1059 | |
1060 | sc->sc_indir_dirty = 0; |
1061 | sc->sc_indir_cur = icl; |
1062 | |
1063 | if (isset(sc->sc_indir_valid, sc->sc_indir_cur)) { |
1064 | if (fss_bs_io(sc, FSS_READ, sc->sc_indir_cur, 0, |
1065 | FSS_CLSIZE(sc), (void *)sc->sc_indir_data) != 0) |
1066 | return NULL; |
1067 | } else |
1068 | memset(sc->sc_indir_data, 0, FSS_CLSIZE(sc)); |
1069 | |
1070 | return &sc->sc_indir_data[ioff]; |
1071 | } |
1072 | |
1073 | /* |
1074 | * The kernel thread (one for every active snapshot). |
1075 | * |
1076 | * After wakeup it cleans the cache and runs the I/O requests. |
1077 | */ |
1078 | static void |
1079 | fss_bs_thread(void *arg) |
1080 | { |
1081 | bool thread_idle, is_valid; |
1082 | int error, i, todo, len, crotor, is_read; |
1083 | long off; |
1084 | char *addr; |
1085 | u_int32_t c, cl, ch, *indirp; |
1086 | struct buf *bp, *nbp; |
1087 | struct fss_softc *sc; |
1088 | struct fss_cache *scp, *scl; |
1089 | |
1090 | sc = arg; |
1091 | scl = sc->sc_cache+sc->sc_cache_size; |
1092 | crotor = 0; |
1093 | thread_idle = false; |
1094 | |
1095 | mutex_enter(&sc->sc_slock); |
1096 | |
1097 | for (;;) { |
1098 | if (thread_idle) |
1099 | cv_wait(&sc->sc_work_cv, &sc->sc_slock); |
1100 | thread_idle = true; |
1101 | if ((sc->sc_flags & FSS_BS_THREAD) == 0) { |
1102 | mutex_exit(&sc->sc_slock); |
1103 | kthread_exit(0); |
1104 | } |
1105 | |
1106 | /* |
1107 | * Process I/O requests (persistent) |
1108 | */ |
1109 | |
1110 | if (sc->sc_flags & FSS_PERSISTENT) { |
1111 | if ((bp = bufq_get(sc->sc_bufq)) == NULL) |
1112 | continue; |
1113 | is_valid = FSS_ISVALID(sc); |
1114 | is_read = (bp->b_flags & B_READ); |
1115 | thread_idle = false; |
1116 | mutex_exit(&sc->sc_slock); |
1117 | |
1118 | if (is_valid) { |
1119 | disk_busy(sc->sc_dkdev); |
1120 | error = fss_bs_io(sc, FSS_READ, 0, |
1121 | dbtob(bp->b_blkno), bp->b_bcount, |
1122 | bp->b_data); |
1123 | disk_unbusy(sc->sc_dkdev, |
1124 | (error ? 0 : bp->b_bcount), is_read); |
1125 | } else |
1126 | error = ENXIO; |
1127 | |
1128 | bp->b_error = error; |
1129 | bp->b_resid = (error ? bp->b_bcount : 0); |
1130 | biodone(bp); |
1131 | |
1132 | mutex_enter(&sc->sc_slock); |
1133 | continue; |
1134 | } |
1135 | |
1136 | /* |
1137 | * Clean the cache |
1138 | */ |
1139 | for (i = 0; i < sc->sc_cache_size; i++) { |
1140 | crotor = (crotor + 1) % sc->sc_cache_size; |
1141 | scp = sc->sc_cache + crotor; |
1142 | if (scp->fc_type != FSS_CACHE_VALID) |
1143 | continue; |
1144 | mutex_exit(&sc->sc_slock); |
1145 | |
1146 | thread_idle = false; |
1147 | indirp = fss_bs_indir(sc, scp->fc_cluster); |
1148 | if (indirp != NULL) { |
1149 | error = fss_bs_io(sc, FSS_WRITE, sc->sc_clnext, |
1150 | 0, FSS_CLSIZE(sc), scp->fc_data); |
1151 | } else |
1152 | error = EIO; |
1153 | |
1154 | mutex_enter(&sc->sc_slock); |
1155 | if (error == 0) { |
1156 | *indirp = sc->sc_clnext++; |
1157 | sc->sc_indir_dirty = 1; |
1158 | } else |
1159 | fss_error(sc, "write error on backing store" ); |
1160 | |
1161 | scp->fc_type = FSS_CACHE_FREE; |
1162 | cv_broadcast(&sc->sc_cache_cv); |
1163 | break; |
1164 | } |
1165 | |
1166 | /* |
1167 | * Process I/O requests |
1168 | */ |
1169 | if ((bp = bufq_get(sc->sc_bufq)) == NULL) |
1170 | continue; |
1171 | is_valid = FSS_ISVALID(sc); |
1172 | is_read = (bp->b_flags & B_READ); |
1173 | thread_idle = false; |
1174 | |
1175 | if (!is_valid) { |
1176 | mutex_exit(&sc->sc_slock); |
1177 | |
1178 | bp->b_error = ENXIO; |
1179 | bp->b_resid = bp->b_bcount; |
1180 | biodone(bp); |
1181 | |
1182 | mutex_enter(&sc->sc_slock); |
1183 | continue; |
1184 | } |
1185 | |
1186 | disk_busy(sc->sc_dkdev); |
1187 | |
1188 | /* |
1189 | * First read from the snapshotted block device unless |
1190 | * this request is completely covered by backing store. |
1191 | */ |
1192 | |
1193 | cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); |
1194 | off = FSS_CLOFF(sc, dbtob(bp->b_blkno)); |
1195 | ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); |
1196 | error = 0; |
1197 | bp->b_resid = 0; |
1198 | bp->b_error = 0; |
1199 | for (c = cl; c <= ch; c++) { |
1200 | if (isset(sc->sc_copied, c)) |
1201 | continue; |
1202 | mutex_exit(&sc->sc_slock); |
1203 | |
1204 | /* Not on backing store, read from device. */ |
1205 | nbp = getiobuf(NULL, true); |
1206 | nbp->b_flags = B_READ; |
1207 | nbp->b_resid = nbp->b_bcount = bp->b_bcount; |
1208 | nbp->b_bufsize = bp->b_bcount; |
1209 | nbp->b_data = bp->b_data; |
1210 | nbp->b_blkno = bp->b_blkno; |
1211 | nbp->b_lblkno = 0; |
1212 | nbp->b_dev = sc->sc_bdev; |
1213 | SET(nbp->b_cflags, BC_BUSY); /* mark buffer busy */ |
1214 | |
1215 | bdev_strategy(nbp); |
1216 | |
1217 | error = biowait(nbp); |
1218 | if (error != 0) { |
1219 | bp->b_resid = bp->b_bcount; |
1220 | bp->b_error = nbp->b_error; |
1221 | disk_unbusy(sc->sc_dkdev, 0, is_read); |
1222 | biodone(bp); |
1223 | } |
1224 | putiobuf(nbp); |
1225 | |
1226 | mutex_enter(&sc->sc_slock); |
1227 | break; |
1228 | } |
1229 | if (error) |
1230 | continue; |
1231 | |
1232 | /* |
1233 | * Replace those parts that have been saved to backing store. |
1234 | */ |
1235 | |
1236 | addr = bp->b_data; |
1237 | todo = bp->b_bcount; |
1238 | for (c = cl; c <= ch; c++, off = 0, todo -= len, addr += len) { |
1239 | len = FSS_CLSIZE(sc)-off; |
1240 | if (len > todo) |
1241 | len = todo; |
1242 | if (isclr(sc->sc_copied, c)) |
1243 | continue; |
1244 | mutex_exit(&sc->sc_slock); |
1245 | |
1246 | indirp = fss_bs_indir(sc, c); |
1247 | if (indirp == NULL || *indirp == 0) { |
1248 | /* |
1249 | * Not on backing store. Either in cache |
1250 | * or hole in the snapshotted block device. |
1251 | */ |
1252 | |
1253 | mutex_enter(&sc->sc_slock); |
1254 | for (scp = sc->sc_cache; scp < scl; scp++) |
1255 | if (scp->fc_type == FSS_CACHE_VALID && |
1256 | scp->fc_cluster == c) |
1257 | break; |
1258 | if (scp < scl) |
1259 | memcpy(addr, (char *)scp->fc_data+off, |
1260 | len); |
1261 | else |
1262 | memset(addr, 0, len); |
1263 | continue; |
1264 | } |
1265 | |
1266 | /* |
1267 | * Read from backing store. |
1268 | */ |
1269 | error = |
1270 | fss_bs_io(sc, FSS_READ, *indirp, off, len, addr); |
1271 | |
1272 | mutex_enter(&sc->sc_slock); |
1273 | if (error) { |
1274 | bp->b_resid = bp->b_bcount; |
1275 | bp->b_error = error; |
1276 | break; |
1277 | } |
1278 | } |
1279 | mutex_exit(&sc->sc_slock); |
1280 | |
1281 | disk_unbusy(sc->sc_dkdev, (error ? 0 : bp->b_bcount), is_read); |
1282 | biodone(bp); |
1283 | |
1284 | mutex_enter(&sc->sc_slock); |
1285 | } |
1286 | } |
1287 | |
1288 | #ifdef _MODULE |
1289 | |
1290 | #include <sys/module.h> |
1291 | |
1292 | MODULE(MODULE_CLASS_DRIVER, fss, NULL); |
1293 | CFDRIVER_DECL(fss, DV_DISK, NULL); |
1294 | |
1295 | devmajor_t fss_bmajor = -1, fss_cmajor = -1; |
1296 | |
1297 | static int |
1298 | fss_modcmd(modcmd_t cmd, void *arg) |
1299 | { |
1300 | int error = 0; |
1301 | |
1302 | switch (cmd) { |
1303 | case MODULE_CMD_INIT: |
1304 | mutex_init(&fss_device_lock, MUTEX_DEFAULT, IPL_NONE); |
1305 | error = config_cfdriver_attach(&fss_cd); |
1306 | if (error) { |
1307 | mutex_destroy(&fss_device_lock); |
1308 | break; |
1309 | } |
1310 | error = config_cfattach_attach(fss_cd.cd_name, &fss_ca); |
1311 | if (error) { |
1312 | config_cfdriver_detach(&fss_cd); |
1313 | mutex_destroy(&fss_device_lock); |
1314 | break; |
1315 | } |
1316 | error = devsw_attach(fss_cd.cd_name, |
1317 | &fss_bdevsw, &fss_bmajor, &fss_cdevsw, &fss_cmajor); |
1318 | |
1319 | if (error) { |
1320 | config_cfattach_detach(fss_cd.cd_name, &fss_ca); |
1321 | config_cfdriver_detach(&fss_cd); |
1322 | mutex_destroy(&fss_device_lock); |
1323 | break; |
1324 | } |
1325 | break; |
1326 | |
1327 | case MODULE_CMD_FINI: |
1328 | devsw_detach(&fss_bdevsw, &fss_cdevsw); |
1329 | error = config_cfattach_detach(fss_cd.cd_name, &fss_ca); |
1330 | if (error) { |
1331 | devsw_attach(fss_cd.cd_name, &fss_bdevsw, &fss_bmajor, |
1332 | &fss_cdevsw, &fss_cmajor); |
1333 | break; |
1334 | } |
1335 | config_cfdriver_detach(&fss_cd); |
1336 | mutex_destroy(&fss_device_lock); |
1337 | break; |
1338 | |
1339 | default: |
1340 | error = ENOTTY; |
1341 | break; |
1342 | } |
1343 | |
1344 | return error; |
1345 | } |
1346 | |
1347 | #endif /* _MODULE */ |
1348 | |