1/* $NetBSD: subr_extent.c,v 1.79 2015/08/24 22:50:32 pooka Exp $ */
2
3/*-
4 * Copyright (c) 1996, 1998, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Matthias Drochner.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * General purpose extent manager.
34 */
35
36#include <sys/cdefs.h>
37__KERNEL_RCSID(0, "$NetBSD: subr_extent.c,v 1.79 2015/08/24 22:50:32 pooka Exp $");
38
39#ifdef _KERNEL
40#ifdef _KERNEL_OPT
41#include "opt_lockdebug.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/extent.h>
46#include <sys/kmem.h>
47#include <sys/pool.h>
48#include <sys/time.h>
49#include <sys/systm.h>
50#include <sys/proc.h>
51
52#include <uvm/uvm_extern.h>
53
54#elif defined(_EXTENT_TESTING)
55
56/*
57 * user-land definitions, so it can fit into a testing harness.
58 */
59#include <sys/param.h>
60#include <sys/pool.h>
61#include <sys/extent.h>
62
63#include <errno.h>
64#include <stdlib.h>
65#include <stdio.h>
66#include <string.h>
67
68/*
69 * Use multi-line #defines to avoid screwing up the kernel tags file;
70 * without this, ctags produces a tags file where panic() shows up
71 * in subr_extent.c rather than subr_prf.c.
72 */
73#define \
74kmem_alloc(s, flags) malloc(s)
75#define \
76kmem_free(p, s) free(p)
77#define \
78cv_wait_sig(cv, lock) (EWOULDBLOCK)
79#define \
80pool_get(pool, flags) kmem_alloc((pool)->pr_size,0)
81#define \
82pool_put(pool, rp) kmem_free(rp,0)
83#define \
84panic(a) printf(a)
85#define mutex_init(a, b, c)
86#define mutex_destroy(a)
87#define mutex_enter(l)
88#define mutex_exit(l)
89#define cv_wait(cv, lock)
90#define cv_broadcast(cv)
91#define cv_init(a, b)
92#define cv_destroy(a)
93#define KMEM_IS_RUNNING (1)
94#define IPL_VM (0)
95#define MUTEX_DEFAULT (0)
96#endif
97
98static struct pool expool;
99
100/*
101 * Macro to align to an arbitrary power-of-two boundary.
102 */
103#define EXTENT_ALIGN(_start, _align, _skew) \
104 (((((_start) - (_skew)) + ((_align) - 1)) & (-(_align))) + (_skew))
105
106/*
107 * Create the extent_region pool.
108 */
109void
110extent_init(void)
111{
112
113#if defined(_KERNEL)
114 pool_init(&expool, sizeof(struct extent_region), 0, 0, 0,
115 "extent", NULL, IPL_VM);
116#else
117 expool.pr_size = sizeof(struct extent_region);
118#endif
119}
120
121/*
122 * Allocate an extent region descriptor. EXTENT MUST NOT BE LOCKED.
123 * We will handle any locking we may need.
124 */
125static struct extent_region *
126extent_alloc_region_descriptor(struct extent *ex, int flags)
127{
128 struct extent_region *rp;
129 int exflags, error;
130
131 /*
132 * XXX Make a static, create-time flags word, so we don't
133 * XXX have to lock to read it!
134 */
135 mutex_enter(&ex->ex_lock);
136 exflags = ex->ex_flags;
137 mutex_exit(&ex->ex_lock);
138
139 if (exflags & EXF_FIXED) {
140 struct extent_fixed *fex = (struct extent_fixed *)ex;
141
142 mutex_enter(&ex->ex_lock);
143 for (;;) {
144 if ((rp = LIST_FIRST(&fex->fex_freelist)) != NULL) {
145 /*
146 * Don't muck with flags after pulling it off
147 * the freelist; it may have been dynamically
148 * allocated, and kindly given to us. We
149 * need to remember that information.
150 */
151 LIST_REMOVE(rp, er_link);
152 mutex_exit(&ex->ex_lock);
153 return (rp);
154 }
155 if (flags & EX_MALLOCOK) {
156 mutex_exit(&ex->ex_lock);
157 goto alloc;
158 }
159 if ((flags & EX_WAITOK) == 0) {
160 mutex_exit(&ex->ex_lock);
161 return (NULL);
162 }
163 ex->ex_flags |= EXF_FLWANTED;
164 if ((flags & EX_CATCH) != 0)
165 error = cv_wait_sig(&ex->ex_cv, &ex->ex_lock);
166 else {
167 cv_wait(&ex->ex_cv, &ex->ex_lock);
168 error = 0;
169 }
170 if (error != 0) {
171 mutex_exit(&ex->ex_lock);
172 return (NULL);
173 }
174 }
175 }
176
177 alloc:
178 rp = pool_get(&expool, (flags & EX_WAITOK) ? PR_WAITOK : 0);
179
180 if (rp != NULL)
181 rp->er_flags = ER_ALLOC;
182
183 return (rp);
184}
185
186/*
187 * Free an extent region descriptor. EXTENT _MUST_ BE LOCKED!
188 */
189static void
190extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
191{
192
193 if (ex->ex_flags & EXF_FIXED) {
194 struct extent_fixed *fex = (struct extent_fixed *)ex;
195
196 /*
197 * If someone's waiting for a region descriptor,
198 * be nice and give them this one, rather than
199 * just free'ing it back to the system.
200 */
201 if (rp->er_flags & ER_ALLOC) {
202 if (ex->ex_flags & EXF_FLWANTED) {
203 /* Clear all but ER_ALLOC flag. */
204 rp->er_flags = ER_ALLOC;
205 LIST_INSERT_HEAD(&fex->fex_freelist, rp,
206 er_link);
207 goto wake_em_up;
208 } else
209 pool_put(&expool, rp);
210 } else {
211 /* Clear all flags. */
212 rp->er_flags = 0;
213 LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
214 }
215
216 wake_em_up:
217 ex->ex_flags &= ~EXF_FLWANTED;
218 cv_broadcast(&ex->ex_cv);
219 return;
220 }
221
222 /*
223 * We know it's dynamically allocated if we get here.
224 */
225 pool_put(&expool, rp);
226}
227
228/*
229 * Allocate and initialize an extent map.
230 */
231struct extent *
232extent_create(const char *name, u_long start, u_long end,
233 void *storage, size_t storagesize, int flags)
234{
235 struct extent *ex;
236 char *cp = storage;
237 size_t sz = storagesize;
238 struct extent_region *rp;
239 int fixed_extent = (storage != NULL);
240
241#ifndef _KERNEL
242 extent_init();
243#endif
244
245#ifdef DIAGNOSTIC
246 /* Check arguments. */
247 if (name == NULL)
248 panic("extent_create: name == NULL");
249 if (end < start) {
250 printf("extent_create: extent `%s', start 0x%lx, end 0x%lx\n",
251 name, start, end);
252 panic("extent_create: end < start");
253 }
254 if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
255 panic("extent_create: fixed extent, bad storagesize 0x%lx",
256 (u_long)storagesize);
257 if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
258 panic("extent_create: storage provided for non-fixed");
259#endif
260
261 /* Allocate extent descriptor. */
262 if (fixed_extent) {
263 struct extent_fixed *fex;
264
265 memset(storage, 0, storagesize);
266
267 /*
268 * Align all descriptors on "long" boundaries.
269 */
270 fex = (struct extent_fixed *)cp;
271 ex = (struct extent *)fex;
272 cp += ALIGN(sizeof(struct extent_fixed));
273 sz -= ALIGN(sizeof(struct extent_fixed));
274 fex->fex_storage = storage;
275 fex->fex_storagesize = storagesize;
276
277 /*
278 * In a fixed extent, we have to pre-allocate region
279 * descriptors and place them in the extent's freelist.
280 */
281 LIST_INIT(&fex->fex_freelist);
282 while (sz >= ALIGN(sizeof(struct extent_region))) {
283 rp = (struct extent_region *)cp;
284 cp += ALIGN(sizeof(struct extent_region));
285 sz -= ALIGN(sizeof(struct extent_region));
286 LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
287 }
288 } else {
289 ex = kmem_alloc(sizeof(*ex),
290 (flags & EX_WAITOK) ? KM_SLEEP : KM_NOSLEEP);
291 if (ex == NULL)
292 return (NULL);
293 }
294
295 /* Fill in the extent descriptor and return it to the caller. */
296 mutex_init(&ex->ex_lock, MUTEX_DEFAULT, IPL_VM);
297 cv_init(&ex->ex_cv, "extent");
298 LIST_INIT(&ex->ex_regions);
299 ex->ex_name = name;
300 ex->ex_start = start;
301 ex->ex_end = end;
302 ex->ex_flags = 0;
303 if (fixed_extent)
304 ex->ex_flags |= EXF_FIXED;
305 if (flags & EX_NOCOALESCE)
306 ex->ex_flags |= EXF_NOCOALESCE;
307 return (ex);
308}
309
310/*
311 * Destroy an extent map.
312 * Since we're freeing the data, there can't be any references
313 * so we don't need any locking.
314 */
315void
316extent_destroy(struct extent *ex)
317{
318 struct extent_region *rp, *orp;
319
320#ifdef DIAGNOSTIC
321 /* Check arguments. */
322 if (ex == NULL)
323 panic("extent_destroy: NULL extent");
324#endif
325
326 /* Free all region descriptors in extent. */
327 for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) {
328 orp = rp;
329 rp = LIST_NEXT(rp, er_link);
330 LIST_REMOVE(orp, er_link);
331 extent_free_region_descriptor(ex, orp);
332 }
333
334 cv_destroy(&ex->ex_cv);
335 mutex_destroy(&ex->ex_lock);
336
337 /* If we're not a fixed extent, free the extent descriptor itself. */
338 if ((ex->ex_flags & EXF_FIXED) == 0)
339 kmem_free(ex, sizeof(*ex));
340}
341
342/*
343 * Insert a region descriptor into the sorted region list after the
344 * entry "after" or at the head of the list (if "after" is NULL).
345 * The region descriptor we insert is passed in "rp". We must
346 * allocate the region descriptor before calling this function!
347 * If we don't need the region descriptor, it will be freed here.
348 */
349static void
350extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
351 int flags, struct extent_region *after, struct extent_region *rp)
352{
353 struct extent_region *nextr;
354 int appended = 0;
355
356 if (after == NULL) {
357 /*
358 * We're the first in the region list. If there's
359 * a region after us, attempt to coalesce to save
360 * descriptor overhead.
361 */
362 if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
363 (LIST_FIRST(&ex->ex_regions) != NULL) &&
364 ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
365 /*
366 * We can coalesce. Prepend us to the first region.
367 */
368 LIST_FIRST(&ex->ex_regions)->er_start = start;
369 extent_free_region_descriptor(ex, rp);
370 return;
371 }
372
373 /*
374 * Can't coalesce. Fill in the region descriptor
375 * in, and insert us at the head of the region list.
376 */
377 rp->er_start = start;
378 rp->er_end = start + (size - 1);
379 LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
380 return;
381 }
382
383 /*
384 * If EXF_NOCOALESCE is set, coalescing is disallowed.
385 */
386 if (ex->ex_flags & EXF_NOCOALESCE)
387 goto cant_coalesce;
388
389 /*
390 * Attempt to coalesce with the region before us.
391 */
392 if ((after->er_end + 1) == start) {
393 /*
394 * We can coalesce. Append ourselves and make
395 * note of it.
396 */
397 after->er_end = start + (size - 1);
398 appended = 1;
399 }
400
401 /*
402 * Attempt to coalesce with the region after us.
403 */
404 if ((LIST_NEXT(after, er_link) != NULL) &&
405 ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
406 /*
407 * We can coalesce. Note that if we appended ourselves
408 * to the previous region, we exactly fit the gap, and
409 * can free the "next" region descriptor.
410 */
411 if (appended) {
412 /*
413 * Yup, we can free it up.
414 */
415 after->er_end = LIST_NEXT(after, er_link)->er_end;
416 nextr = LIST_NEXT(after, er_link);
417 LIST_REMOVE(nextr, er_link);
418 extent_free_region_descriptor(ex, nextr);
419 } else {
420 /*
421 * Nope, just prepend us to the next region.
422 */
423 LIST_NEXT(after, er_link)->er_start = start;
424 }
425
426 extent_free_region_descriptor(ex, rp);
427 return;
428 }
429
430 /*
431 * We weren't able to coalesce with the next region, but
432 * we don't need to allocate a region descriptor if we
433 * appended ourselves to the previous region.
434 */
435 if (appended) {
436 extent_free_region_descriptor(ex, rp);
437 return;
438 }
439
440 cant_coalesce:
441
442 /*
443 * Fill in the region descriptor and insert ourselves
444 * into the region list.
445 */
446 rp->er_start = start;
447 rp->er_end = start + (size - 1);
448 LIST_INSERT_AFTER(after, rp, er_link);
449}
450
451/*
452 * Allocate a specific region in an extent map.
453 */
454int
455extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
456{
457 struct extent_region *rp, *last, *myrp;
458 u_long end = start + (size - 1);
459 int error;
460
461#ifdef DIAGNOSTIC
462 /* Check arguments. */
463 if (ex == NULL)
464 panic("extent_alloc_region: NULL extent");
465 if (size < 1) {
466 printf("extent_alloc_region: extent `%s', size 0x%lx\n",
467 ex->ex_name, size);
468 panic("extent_alloc_region: bad size");
469 }
470 if (end < start) {
471 printf(
472 "extent_alloc_region: extent `%s', start 0x%lx, size 0x%lx\n",
473 ex->ex_name, start, size);
474 panic("extent_alloc_region: overflow");
475 }
476#endif
477#ifdef LOCKDEBUG
478 if (flags & EX_WAITSPACE) {
479 ASSERT_SLEEPABLE();
480 }
481#endif
482
483 /*
484 * Make sure the requested region lies within the
485 * extent.
486 *
487 * We don't lock to check the range, because those values
488 * are never modified, and if another thread deletes the
489 * extent, we're screwed anyway.
490 */
491 if ((start < ex->ex_start) || (end > ex->ex_end)) {
492#ifdef DIAGNOSTIC
493 printf("extent_alloc_region: extent `%s' (0x%lx - 0x%lx)\n",
494 ex->ex_name, ex->ex_start, ex->ex_end);
495 printf("extent_alloc_region: start 0x%lx, end 0x%lx\n",
496 start, end);
497 panic("extent_alloc_region: region lies outside extent");
498#else
499 return (EINVAL);
500#endif
501 }
502
503 /*
504 * Allocate the region descriptor. It will be freed later
505 * if we can coalesce with another region. Don't lock before
506 * here! This could block.
507 */
508 myrp = extent_alloc_region_descriptor(ex, flags);
509 if (myrp == NULL) {
510#ifdef DIAGNOSTIC
511 printf(
512 "extent_alloc_region: can't allocate region descriptor\n");
513#endif
514 return (ENOMEM);
515 }
516
517 mutex_enter(&ex->ex_lock);
518 alloc_start:
519
520 /*
521 * Attempt to place ourselves in the desired area of the
522 * extent. We save ourselves some work by keeping the list sorted.
523 * In other words, if the start of the current region is greater
524 * than the end of our region, we don't have to search any further.
525 */
526
527 /*
528 * Keep a pointer to the last region we looked at so
529 * that we don't have to traverse the list again when
530 * we insert ourselves. If "last" is NULL when we
531 * finally insert ourselves, we go at the head of the
532 * list. See extent_insert_and_optimize() for details.
533 */
534 last = NULL;
535
536 LIST_FOREACH(rp, &ex->ex_regions, er_link) {
537 if (rp->er_start > end) {
538 /*
539 * We lie before this region and don't
540 * conflict.
541 */
542 break;
543 }
544
545 /*
546 * The current region begins before we end.
547 * Check for a conflict.
548 */
549 if (rp->er_end >= start) {
550 /*
551 * We conflict. If we can (and want to) wait,
552 * do so.
553 */
554 if (flags & EX_WAITSPACE) {
555 if ((flags & EX_CATCH) != 0)
556 error = cv_wait_sig(&ex->ex_cv,
557 &ex->ex_lock);
558 else {
559 cv_wait(&ex->ex_cv, &ex->ex_lock);
560 error = 0;
561 }
562 if (error == 0)
563 goto alloc_start;
564 mutex_exit(&ex->ex_lock);
565 } else {
566 mutex_exit(&ex->ex_lock);
567 error = EAGAIN;
568 }
569 extent_free_region_descriptor(ex, myrp);
570 return error;
571 }
572 /*
573 * We don't conflict, but this region lies before
574 * us. Keep a pointer to this region, and keep
575 * trying.
576 */
577 last = rp;
578 }
579
580 /*
581 * We don't conflict with any regions. "last" points
582 * to the region we fall after, or is NULL if we belong
583 * at the beginning of the region list. Insert ourselves.
584 */
585 extent_insert_and_optimize(ex, start, size, flags, last, myrp);
586 mutex_exit(&ex->ex_lock);
587 return (0);
588}
589
590/*
591 * Macro to check (x + y) <= z. This check is designed to fail
592 * if an overflow occurs.
593 */
594#define LE_OV(x, y, z) ((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
595
596/*
597 * Allocate a region in an extent map subregion.
598 *
599 * If EX_FAST is specified, we return the first fit in the map.
600 * Otherwise, we try to minimize fragmentation by finding the
601 * smallest gap that will hold the request.
602 *
603 * The allocated region is aligned to "alignment", which must be
604 * a power of 2.
605 */
606int
607extent_alloc_subregion1(struct extent *ex, u_long substart, u_long subend,
608 u_long size, u_long alignment, u_long skew, u_long boundary,
609 int flags, u_long *result)
610{
611 struct extent_region *rp, *myrp, *last, *bestlast;
612 u_long newstart, newend, exend, beststart, bestovh, ovh;
613 u_long dontcross;
614 int error;
615
616#ifdef DIAGNOSTIC
617 /*
618 * Check arguments.
619 *
620 * We don't lock to check these, because these values
621 * are never modified, and if another thread deletes the
622 * extent, we're screwed anyway.
623 */
624 if (ex == NULL)
625 panic("extent_alloc_subregion: NULL extent");
626 if (result == NULL)
627 panic("extent_alloc_subregion: NULL result pointer");
628 if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
629 (subend > ex->ex_end) || (subend < ex->ex_start)) {
630 printf("extent_alloc_subregion: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
631 ex->ex_name, ex->ex_start, ex->ex_end);
632 printf("extent_alloc_subregion: substart 0x%lx, subend 0x%lx\n",
633 substart, subend);
634 panic("extent_alloc_subregion: bad subregion");
635 }
636 if ((size < 1) || ((size - 1) > (subend - substart))) {
637 printf("extent_alloc_subregion: extent `%s', size 0x%lx\n",
638 ex->ex_name, size);
639 panic("extent_alloc_subregion: bad size");
640 }
641 if (alignment == 0)
642 panic("extent_alloc_subregion: bad alignment");
643 if (boundary && (boundary < size)) {
644 printf(
645 "extent_alloc_subregion: extent `%s', size 0x%lx, "
646 "boundary 0x%lx\n", ex->ex_name, size, boundary);
647 panic("extent_alloc_subregion: bad boundary");
648 }
649#endif
650#ifdef LOCKDEBUG
651 if (flags & EX_WAITSPACE) {
652 ASSERT_SLEEPABLE();
653 }
654#endif
655
656 /*
657 * Allocate the region descriptor. It will be freed later
658 * if we can coalesce with another region. Don't lock before
659 * here! This could block.
660 */
661 myrp = extent_alloc_region_descriptor(ex, flags);
662 if (myrp == NULL) {
663#ifdef DIAGNOSTIC
664 printf(
665 "extent_alloc_subregion: can't allocate region descriptor\n");
666#endif
667 return (ENOMEM);
668 }
669
670 alloc_start:
671 mutex_enter(&ex->ex_lock);
672
673 /*
674 * Keep a pointer to the last region we looked at so
675 * that we don't have to traverse the list again when
676 * we insert ourselves. If "last" is NULL when we
677 * finally insert ourselves, we go at the head of the
678 * list. See extent_insert_and_optimize() for deatails.
679 */
680 last = NULL;
681
682 /*
683 * Keep track of size and location of the smallest
684 * chunk we fit in.
685 *
686 * Since the extent can be as large as the numeric range
687 * of the CPU (0 - 0xffffffff for 32-bit systems), the
688 * best overhead value can be the maximum unsigned integer.
689 * Thus, we initialize "bestovh" to 0, since we insert ourselves
690 * into the region list immediately on an exact match (which
691 * is the only case where "bestovh" would be set to 0).
692 */
693 bestovh = 0;
694 beststart = 0;
695 bestlast = NULL;
696
697 /*
698 * Keep track of end of free region. This is either the end of extent
699 * or the start of a region past the subend.
700 */
701 exend = ex->ex_end;
702
703 /*
704 * For N allocated regions, we must make (N + 1)
705 * checks for unallocated space. The first chunk we
706 * check is the area from the beginning of the subregion
707 * to the first allocated region after that point.
708 */
709 newstart = EXTENT_ALIGN(substart, alignment, skew);
710 if (newstart < ex->ex_start) {
711#ifdef DIAGNOSTIC
712 printf(
713 "extent_alloc_subregion: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
714 ex->ex_name, ex->ex_start, ex->ex_end, alignment);
715 mutex_exit(&ex->ex_lock);
716 panic("extent_alloc_subregion: overflow after alignment");
717#else
718 extent_free_region_descriptor(ex, myrp);
719 mutex_exit(&ex->ex_lock);
720 return (EINVAL);
721#endif
722 }
723
724 /*
725 * Find the first allocated region that begins on or after
726 * the subregion start, advancing the "last" pointer along
727 * the way.
728 */
729 LIST_FOREACH(rp, &ex->ex_regions, er_link) {
730 if (rp->er_start >= newstart)
731 break;
732 last = rp;
733 }
734
735 /*
736 * Relocate the start of our candidate region to the end of
737 * the last allocated region (if there was one overlapping
738 * our subrange).
739 */
740 if (last != NULL && last->er_end >= newstart)
741 newstart = EXTENT_ALIGN((last->er_end + 1), alignment, skew);
742
743 for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) {
744 /*
745 * If the region pasts the subend, bail out and see
746 * if we fit against the subend.
747 */
748 if (rp->er_start > subend) {
749 exend = rp->er_start;
750 break;
751 }
752
753 /*
754 * Check the chunk before "rp". Note that our
755 * comparison is safe from overflow conditions.
756 */
757 if (LE_OV(newstart, size, rp->er_start)) {
758 /*
759 * Do a boundary check, if necessary. Note
760 * that a region may *begin* on the boundary,
761 * but it must end before the boundary.
762 */
763 if (boundary) {
764 newend = newstart + (size - 1);
765
766 /*
767 * Calculate the next boundary after the start
768 * of this region.
769 */
770 dontcross = EXTENT_ALIGN(newstart+1, boundary,
771 (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
772 - 1;
773
774#if 0
775 printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
776 newstart, newend, ex->ex_start, ex->ex_end,
777 boundary, dontcross);
778#endif
779
780 /* Check for overflow */
781 if (dontcross < ex->ex_start)
782 dontcross = ex->ex_end;
783 else if (newend > dontcross) {
784 /*
785 * Candidate region crosses boundary.
786 * Throw away the leading part and see
787 * if we still fit.
788 */
789 newstart = dontcross + 1;
790 newend = newstart + (size - 1);
791 dontcross += boundary;
792 if (!LE_OV(newstart, size, rp->er_start))
793 goto skip;
794 }
795
796 /*
797 * If we run past the end of
798 * the extent or the boundary
799 * overflows, then the request
800 * can't fit.
801 */
802 if (newstart + size - 1 > ex->ex_end ||
803 dontcross < newstart)
804 goto fail;
805 }
806
807 /*
808 * We would fit into this space. Calculate
809 * the overhead (wasted space). If we exactly
810 * fit, or we're taking the first fit, insert
811 * ourselves into the region list.
812 */
813 ovh = rp->er_start - newstart - size;
814 if ((flags & EX_FAST) || (ovh == 0))
815 goto found;
816
817 /*
818 * Don't exactly fit, but check to see
819 * if we're better than any current choice.
820 */
821 if ((bestovh == 0) || (ovh < bestovh)) {
822 bestovh = ovh;
823 beststart = newstart;
824 bestlast = last;
825 }
826 }
827
828skip:
829 /*
830 * Skip past the current region and check again.
831 */
832 newstart = EXTENT_ALIGN((rp->er_end + 1), alignment, skew);
833 if (newstart < rp->er_end) {
834 /*
835 * Overflow condition. Don't error out, since
836 * we might have a chunk of space that we can
837 * use.
838 */
839 goto fail;
840 }
841
842 last = rp;
843 }
844
845 /*
846 * The final check is from the current starting point to the
847 * end of the subregion. If there were no allocated regions,
848 * "newstart" is set to the beginning of the subregion, or
849 * just past the end of the last allocated region, adjusted
850 * for alignment in either case.
851 */
852 if (LE_OV(newstart, (size - 1), subend)) {
853 /*
854 * Do a boundary check, if necessary. Note
855 * that a region may *begin* on the boundary,
856 * but it must end before the boundary.
857 */
858 if (boundary) {
859 newend = newstart + (size - 1);
860
861 /*
862 * Calculate the next boundary after the start
863 * of this region.
864 */
865 dontcross = EXTENT_ALIGN(newstart+1, boundary,
866 (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
867 - 1;
868
869#if 0
870 printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
871 newstart, newend, ex->ex_start, ex->ex_end,
872 boundary, dontcross);
873#endif
874
875 /* Check for overflow */
876 if (dontcross < ex->ex_start)
877 dontcross = ex->ex_end;
878 else if (newend > dontcross) {
879 /*
880 * Candidate region crosses boundary.
881 * Throw away the leading part and see
882 * if we still fit.
883 */
884 newstart = dontcross + 1;
885 newend = newstart + (size - 1);
886 dontcross += boundary;
887 if (!LE_OV(newstart, (size - 1), subend))
888 goto fail;
889 }
890
891 /*
892 * If we run past the end of
893 * the extent or the boundary
894 * overflows, then the request
895 * can't fit.
896 */
897 if (newstart + size - 1 > ex->ex_end ||
898 dontcross < newstart)
899 goto fail;
900 }
901
902 /*
903 * We would fit into this space. Calculate
904 * the overhead (wasted space). If we exactly
905 * fit, or we're taking the first fit, insert
906 * ourselves into the region list.
907 */
908 ovh = exend - newstart - (size - 1);
909 if ((flags & EX_FAST) || (ovh == 0))
910 goto found;
911
912 /*
913 * Don't exactly fit, but check to see
914 * if we're better than any current choice.
915 */
916 if ((bestovh == 0) || (ovh < bestovh)) {
917 bestovh = ovh;
918 beststart = newstart;
919 bestlast = last;
920 }
921 }
922
923 fail:
924 /*
925 * One of the following two conditions have
926 * occurred:
927 *
928 * There is no chunk large enough to hold the request.
929 *
930 * If EX_FAST was not specified, there is not an
931 * exact match for the request.
932 *
933 * Note that if we reach this point and EX_FAST is
934 * set, then we know there is no space in the extent for
935 * the request.
936 */
937 if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
938 /*
939 * We have a match that's "good enough".
940 */
941 newstart = beststart;
942 last = bestlast;
943 goto found;
944 }
945
946 /*
947 * No space currently available. Wait for it to free up,
948 * if possible.
949 */
950 if (flags & EX_WAITSPACE) {
951 if ((flags & EX_CATCH) != 0) {
952 error = cv_wait_sig(&ex->ex_cv, &ex->ex_lock);
953 } else {
954 cv_wait(&ex->ex_cv, &ex->ex_lock);
955 error = 0;
956 }
957 if (error == 0)
958 goto alloc_start;
959 mutex_exit(&ex->ex_lock);
960 } else {
961 mutex_exit(&ex->ex_lock);
962 error = EAGAIN;
963 }
964
965 extent_free_region_descriptor(ex, myrp);
966 return error;
967
968 found:
969 /*
970 * Insert ourselves into the region list.
971 */
972 extent_insert_and_optimize(ex, newstart, size, flags, last, myrp);
973 mutex_exit(&ex->ex_lock);
974 *result = newstart;
975 return (0);
976}
977
978int
979extent_alloc_subregion(struct extent *ex, u_long start, u_long end, u_long size,
980 u_long alignment, u_long boundary, int flags, u_long *result)
981{
982
983 return (extent_alloc_subregion1(ex, start, end, size, alignment,
984 0, boundary, flags, result));
985}
986
987int
988extent_alloc(struct extent *ex, u_long size, u_long alignment, u_long boundary,
989 int flags, u_long *result)
990{
991
992 return (extent_alloc_subregion1(ex, ex->ex_start, ex->ex_end,
993 size, alignment, 0, boundary,
994 flags, result));
995}
996
997int
998extent_alloc1(struct extent *ex, u_long size, u_long alignment, u_long skew,
999 u_long boundary, int flags, u_long *result)
1000{
1001
1002 return (extent_alloc_subregion1(ex, ex->ex_start, ex->ex_end,
1003 size, alignment, skew, boundary,
1004 flags, result));
1005}
1006
1007int
1008extent_free(struct extent *ex, u_long start, u_long size, int flags)
1009{
1010 struct extent_region *rp, *nrp = NULL;
1011 u_long end = start + (size - 1);
1012 int coalesce;
1013
1014#ifdef DIAGNOSTIC
1015 /*
1016 * Check arguments.
1017 *
1018 * We don't lock to check these, because these values
1019 * are never modified, and if another thread deletes the
1020 * extent, we're screwed anyway.
1021 */
1022 if (ex == NULL)
1023 panic("extent_free: NULL extent");
1024 if ((start < ex->ex_start) || (end > ex->ex_end)) {
1025 extent_print(ex);
1026 printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
1027 ex->ex_name, start, size);
1028 panic("extent_free: extent `%s', region not within extent",
1029 ex->ex_name);
1030 }
1031 /* Check for an overflow. */
1032 if (end < start) {
1033 extent_print(ex);
1034 printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
1035 ex->ex_name, start, size);
1036 panic("extent_free: overflow");
1037 }
1038#endif
1039
1040 /*
1041 * If we're allowing coalescing, we must allocate a region
1042 * descriptor now, since it might block.
1043 *
1044 * XXX Make a static, create-time flags word, so we don't
1045 * XXX have to lock to read it!
1046 */
1047 mutex_enter(&ex->ex_lock);
1048 coalesce = (ex->ex_flags & EXF_NOCOALESCE) == 0;
1049 mutex_exit(&ex->ex_lock);
1050
1051 if (coalesce) {
1052 /* Allocate a region descriptor. */
1053 nrp = extent_alloc_region_descriptor(ex, flags);
1054 if (nrp == NULL)
1055 return (ENOMEM);
1056 }
1057
1058 mutex_enter(&ex->ex_lock);
1059
1060 /*
1061 * Find region and deallocate. Several possibilities:
1062 *
1063 * 1. (start == er_start) && (end == er_end):
1064 * Free descriptor.
1065 *
1066 * 2. (start == er_start) && (end < er_end):
1067 * Adjust er_start.
1068 *
1069 * 3. (start > er_start) && (end == er_end):
1070 * Adjust er_end.
1071 *
1072 * 4. (start > er_start) && (end < er_end):
1073 * Fragment region. Requires descriptor alloc.
1074 *
1075 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1076 * is not set.
1077 */
1078 LIST_FOREACH(rp, &ex->ex_regions, er_link) {
1079 /*
1080 * Save ourselves some comparisons; does the current
1081 * region end before chunk to be freed begins? If so,
1082 * then we haven't found the appropriate region descriptor.
1083 */
1084 if (rp->er_end < start)
1085 continue;
1086
1087 /*
1088 * Save ourselves some traversal; does the current
1089 * region begin after the chunk to be freed ends? If so,
1090 * then we've already passed any possible region descriptors
1091 * that might have contained the chunk to be freed.
1092 */
1093 if (rp->er_start > end)
1094 break;
1095
1096 /* Case 1. */
1097 if ((start == rp->er_start) && (end == rp->er_end)) {
1098 LIST_REMOVE(rp, er_link);
1099 extent_free_region_descriptor(ex, rp);
1100 goto done;
1101 }
1102
1103 /*
1104 * The following cases all require that EXF_NOCOALESCE
1105 * is not set.
1106 */
1107 if (!coalesce)
1108 continue;
1109
1110 /* Case 2. */
1111 if ((start == rp->er_start) && (end < rp->er_end)) {
1112 rp->er_start = (end + 1);
1113 goto done;
1114 }
1115
1116 /* Case 3. */
1117 if ((start > rp->er_start) && (end == rp->er_end)) {
1118 rp->er_end = (start - 1);
1119 goto done;
1120 }
1121
1122 /* Case 4. */
1123 if ((start > rp->er_start) && (end < rp->er_end)) {
1124 /* Fill in new descriptor. */
1125 nrp->er_start = end + 1;
1126 nrp->er_end = rp->er_end;
1127
1128 /* Adjust current descriptor. */
1129 rp->er_end = start - 1;
1130
1131 /* Insert new descriptor after current. */
1132 LIST_INSERT_AFTER(rp, nrp, er_link);
1133
1134 /* We used the new descriptor, so don't free it below */
1135 nrp = NULL;
1136 goto done;
1137 }
1138 }
1139
1140 /* Region not found, or request otherwise invalid. */
1141 mutex_exit(&ex->ex_lock);
1142 extent_print(ex);
1143 printf("extent_free: start 0x%lx, end 0x%lx\n", start, end);
1144 panic("extent_free: region not found");
1145
1146 done:
1147 if (nrp != NULL)
1148 extent_free_region_descriptor(ex, nrp);
1149 cv_broadcast(&ex->ex_cv);
1150 mutex_exit(&ex->ex_lock);
1151 return (0);
1152}
1153
1154void
1155extent_print(struct extent *ex)
1156{
1157 struct extent_region *rp;
1158
1159 if (ex == NULL)
1160 panic("extent_print: NULL extent");
1161
1162 mutex_enter(&ex->ex_lock);
1163
1164 printf("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1165 ex->ex_start, ex->ex_end, ex->ex_flags);
1166
1167 LIST_FOREACH(rp, &ex->ex_regions, er_link)
1168 printf(" 0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1169
1170 mutex_exit(&ex->ex_lock);
1171}
1172