1/* $NetBSD: x86_machdep.c,v 1.76 2016/11/15 15:00:56 maxv Exp $ */
2
3/*-
4 * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi,
5 * Copyright (c) 2005, 2008, 2009 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Julio M. Merino Vidal.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.76 2016/11/15 15:00:56 maxv Exp $");
35
36#include "opt_modular.h"
37#include "opt_physmem.h"
38#include "opt_splash.h"
39
40#include <sys/types.h>
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/kcore.h>
44#include <sys/errno.h>
45#include <sys/kauth.h>
46#include <sys/mutex.h>
47#include <sys/cpu.h>
48#include <sys/intr.h>
49#include <sys/atomic.h>
50#include <sys/module.h>
51#include <sys/sysctl.h>
52#include <sys/extent.h>
53#include <sys/rnd.h>
54
55#include <x86/cpuvar.h>
56#include <x86/cputypes.h>
57#include <x86/machdep.h>
58#include <x86/nmi.h>
59#include <x86/pio.h>
60
61#include <dev/splash/splash.h>
62#include <dev/isa/isareg.h>
63#include <dev/ic/i8042reg.h>
64#include <dev/mm.h>
65
66#include <machine/bootinfo.h>
67#include <machine/vmparam.h>
68
69#include <uvm/uvm_extern.h>
70
71#include "acpica.h"
72#if NACPICA > 0
73#include <dev/acpi/acpivar.h>
74#endif
75
76#include "opt_md.h"
77#if defined(MEMORY_DISK_HOOKS) && defined(MEMORY_DISK_DYNAMIC)
78#include <dev/md.h>
79#endif
80
81void (*x86_cpu_idle)(void);
82static bool x86_cpu_idle_ipi;
83static char x86_cpu_idle_text[16];
84
85#ifdef XEN
86char module_machine_amd64_xen[] = "amd64-xen";
87char module_machine_i386_xen[] = "i386-xen";
88char module_machine_i386pae_xen[] = "i386pae-xen";
89#endif
90
91
92/* --------------------------------------------------------------------- */
93
94/*
95 * Main bootinfo structure. This is filled in by the bootstrap process
96 * done in locore.S based on the information passed by the boot loader.
97 */
98struct bootinfo bootinfo;
99
100/* --------------------------------------------------------------------- */
101
102static kauth_listener_t x86_listener;
103
104/*
105 * Given the type of a bootinfo entry, looks for a matching item inside
106 * the bootinfo structure. If found, returns a pointer to it (which must
107 * then be casted to the appropriate bootinfo_* type); otherwise, returns
108 * NULL.
109 */
110void *
111lookup_bootinfo(int type)
112{
113 bool found;
114 int i;
115 struct btinfo_common *bic;
116
117 bic = (struct btinfo_common *)(bootinfo.bi_data);
118 found = FALSE;
119 for (i = 0; i < bootinfo.bi_nentries && !found; i++) {
120 if (bic->type == type)
121 found = TRUE;
122 else
123 bic = (struct btinfo_common *)
124 ((uint8_t *)bic + bic->len);
125 }
126
127 return found ? bic : NULL;
128}
129
130#ifdef notyet
131/*
132 * List the available bootinfo entries.
133 */
134static const char *btinfo_str[] = {
135 BTINFO_STR
136};
137
138void
139aprint_bootinfo(void)
140{
141 int i;
142 struct btinfo_common *bic;
143
144 aprint_normal("bootinfo:");
145 bic = (struct btinfo_common *)(bootinfo.bi_data);
146 for (i = 0; i < bootinfo.bi_nentries; i++) {
147 if (bic->type >= 0 && bic->type < __arraycount(btinfo_str))
148 aprint_normal(" %s", btinfo_str[bic->type]);
149 else
150 aprint_normal(" %d", bic->type);
151 bic = (struct btinfo_common *)
152 ((uint8_t *)bic + bic->len);
153 }
154 aprint_normal("\n");
155}
156#endif
157
158/*
159 * mm_md_physacc: check if given pa is accessible.
160 */
161int
162mm_md_physacc(paddr_t pa, vm_prot_t prot)
163{
164 extern phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
165 extern int mem_cluster_cnt;
166 int i;
167
168 for (i = 0; i < mem_cluster_cnt; i++) {
169 const phys_ram_seg_t *seg = &mem_clusters[i];
170 paddr_t lstart = seg->start;
171
172 if (lstart <= pa && pa - lstart <= seg->size) {
173 return 0;
174 }
175 }
176 return kauth_authorize_machdep(kauth_cred_get(),
177 KAUTH_MACHDEP_UNMANAGEDMEM, NULL, NULL, NULL, NULL);
178}
179
180#ifdef MODULAR
181/*
182 * Push any modules loaded by the boot loader.
183 */
184void
185module_init_md(void)
186{
187 struct btinfo_modulelist *biml;
188 struct bi_modulelist_entry *bi, *bimax;
189
190 /* setup module path for XEN kernels */
191#ifdef XEN
192#if defined(amd64)
193 module_machine = module_machine_amd64_xen;
194#elif defined(i386)
195#ifdef PAE
196 module_machine = module_machine_i386pae_xen;
197#else
198 module_machine = module_machine_i386_xen;
199#endif
200#endif
201#endif
202
203 biml = lookup_bootinfo(BTINFO_MODULELIST);
204 if (biml == NULL) {
205 aprint_debug("No module info at boot\n");
206 return;
207 }
208
209 bi = (struct bi_modulelist_entry *)((uint8_t *)biml + sizeof(*biml));
210 bimax = bi + biml->num;
211 for (; bi < bimax; bi++) {
212 switch (bi->type) {
213 case BI_MODULE_ELF:
214 aprint_debug("Prep module path=%s len=%d pa=%x\n",
215 bi->path, bi->len, bi->base);
216 KASSERT(trunc_page(bi->base) == bi->base);
217 module_prime(bi->path,
218 (void *)((uintptr_t)bi->base + KERNBASE),
219 bi->len);
220 break;
221 case BI_MODULE_IMAGE:
222#ifdef SPLASHSCREEN
223 aprint_debug("Splash image path=%s len=%d pa=%x\n",
224 bi->path, bi->len, bi->base);
225 KASSERT(trunc_page(bi->base) == bi->base);
226 splash_setimage(
227 (void *)((uintptr_t)bi->base + KERNBASE), bi->len);
228#endif
229 break;
230 case BI_MODULE_RND:
231 aprint_debug("Random seed data path=%s len=%d pa=%x\n",
232 bi->path, bi->len, bi->base);
233 KASSERT(trunc_page(bi->base) == bi->base);
234 rnd_seed(
235 (void *)((uintptr_t)bi->base + KERNBASE),
236 bi->len);
237 break;
238 case BI_MODULE_FS:
239 aprint_debug("File-system image path=%s len=%d pa=%x\n",
240 bi->path, bi->len, bi->base);
241 KASSERT(trunc_page(bi->base) == bi->base);
242#if defined(MEMORY_DISK_HOOKS) && defined(MEMORY_DISK_DYNAMIC)
243 md_root_setconf((void *)((uintptr_t)bi->base + KERNBASE),
244 bi->len);
245#endif
246 break;
247 default:
248 aprint_debug("Skipping non-ELF module\n");
249 break;
250 }
251 }
252}
253#endif /* MODULAR */
254
255void
256cpu_need_resched(struct cpu_info *ci, int flags)
257{
258 struct cpu_info *cur;
259 lwp_t *l;
260
261 KASSERT(kpreempt_disabled());
262 cur = curcpu();
263 l = ci->ci_data.cpu_onproc;
264 ci->ci_want_resched |= flags;
265
266 if (__predict_false((l->l_pflag & LP_INTR) != 0)) {
267 /*
268 * No point doing anything, it will switch soon.
269 * Also here to prevent an assertion failure in
270 * kpreempt() due to preemption being set on a
271 * soft interrupt LWP.
272 */
273 return;
274 }
275
276 if (l == ci->ci_data.cpu_idlelwp) {
277 if (ci == cur)
278 return;
279 if (x86_cpu_idle_ipi != false) {
280 cpu_kick(ci);
281 }
282 return;
283 }
284
285 if ((flags & RESCHED_KPREEMPT) != 0) {
286#ifdef __HAVE_PREEMPTION
287 atomic_or_uint(&l->l_dopreempt, DOPREEMPT_ACTIVE);
288 if (ci == cur) {
289 softint_trigger(1 << SIR_PREEMPT);
290 } else {
291 x86_send_ipi(ci, X86_IPI_KPREEMPT);
292 }
293 return;
294#endif
295 }
296
297 aston(l, X86_AST_PREEMPT);
298 if (ci == cur) {
299 return;
300 }
301 if ((flags & RESCHED_IMMED) != 0) {
302 cpu_kick(ci);
303 }
304}
305
306void
307cpu_signotify(struct lwp *l)
308{
309
310 KASSERT(kpreempt_disabled());
311 aston(l, X86_AST_GENERIC);
312 if (l->l_cpu != curcpu())
313 cpu_kick(l->l_cpu);
314}
315
316void
317cpu_need_proftick(struct lwp *l)
318{
319
320 KASSERT(kpreempt_disabled());
321 KASSERT(l->l_cpu == curcpu());
322
323 l->l_pflag |= LP_OWEUPC;
324 aston(l, X86_AST_GENERIC);
325}
326
327bool
328cpu_intr_p(void)
329{
330 int idepth;
331
332 kpreempt_disable();
333 idepth = curcpu()->ci_idepth;
334 kpreempt_enable();
335 return (idepth >= 0);
336}
337
338#ifdef __HAVE_PREEMPTION
339/*
340 * Called to check MD conditions that would prevent preemption, and to
341 * arrange for those conditions to be rechecked later.
342 */
343bool
344cpu_kpreempt_enter(uintptr_t where, int s)
345{
346 struct pcb *pcb;
347 lwp_t *l;
348
349 KASSERT(kpreempt_disabled());
350 l = curlwp;
351
352 /*
353 * If SPL raised, can't go. Note this implies that spin
354 * mutexes at IPL_NONE are _not_ valid to use.
355 */
356 if (s > IPL_PREEMPT) {
357 softint_trigger(1 << SIR_PREEMPT);
358 aston(l, X86_AST_PREEMPT); /* paranoid */
359 return false;
360 }
361
362 /* Must save cr2 or it could be clobbered. */
363 pcb = lwp_getpcb(l);
364 pcb->pcb_cr2 = rcr2();
365
366 return true;
367}
368
369/*
370 * Called after returning from a kernel preemption, and called with
371 * preemption disabled.
372 */
373void
374cpu_kpreempt_exit(uintptr_t where)
375{
376 extern char x86_copyfunc_start, x86_copyfunc_end;
377 struct pcb *pcb;
378
379 KASSERT(kpreempt_disabled());
380
381 /*
382 * If we interrupted any of the copy functions we must reload
383 * the pmap when resuming, as they cannot tolerate it being
384 * swapped out.
385 */
386 if (where >= (uintptr_t)&x86_copyfunc_start &&
387 where < (uintptr_t)&x86_copyfunc_end) {
388 pmap_load();
389 }
390
391 /* Restore cr2 only after the pmap, as pmap_load can block. */
392 pcb = lwp_getpcb(curlwp);
393 lcr2(pcb->pcb_cr2);
394}
395
396/*
397 * Return true if preemption is disabled for MD reasons. Must be called
398 * with preemption disabled, and thus is only for diagnostic checks.
399 */
400bool
401cpu_kpreempt_disabled(void)
402{
403
404 return curcpu()->ci_ilevel > IPL_NONE;
405}
406#endif /* __HAVE_PREEMPTION */
407
408SYSCTL_SETUP(sysctl_machdep_cpu_idle, "sysctl machdep cpu_idle")
409{
410 const struct sysctlnode *mnode, *node;
411
412 sysctl_createv(NULL, 0, NULL, &mnode,
413 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
414 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
415
416 sysctl_createv(NULL, 0, &mnode, &node,
417 CTLFLAG_PERMANENT, CTLTYPE_STRING, "idle-mechanism",
418 SYSCTL_DESCR("Mechanism used for the idle loop."),
419 NULL, 0, x86_cpu_idle_text, 0,
420 CTL_CREATE, CTL_EOL);
421}
422
423void
424x86_cpu_idle_init(void)
425{
426
427#ifndef XEN
428 if ((cpu_feature[1] & CPUID2_MONITOR) == 0 ||
429 cpu_vendor == CPUVENDOR_AMD)
430 x86_cpu_idle_set(x86_cpu_idle_halt, "halt", true);
431 else
432 x86_cpu_idle_set(x86_cpu_idle_mwait, "mwait", false);
433#else
434 x86_cpu_idle_set(x86_cpu_idle_xen, "xen", true);
435#endif
436}
437
438void
439x86_cpu_idle_get(void (**func)(void), char *text, size_t len)
440{
441
442 *func = x86_cpu_idle;
443
444 (void)strlcpy(text, x86_cpu_idle_text, len);
445}
446
447void
448x86_cpu_idle_set(void (*func)(void), const char *text, bool ipi)
449{
450
451 x86_cpu_idle = func;
452 x86_cpu_idle_ipi = ipi;
453
454 (void)strlcpy(x86_cpu_idle_text, text, sizeof(x86_cpu_idle_text));
455}
456
457#ifndef XEN
458
459#define KBTOB(x) ((size_t)(x) * 1024UL)
460#define MBTOB(x) ((size_t)(x) * 1024UL * 1024UL)
461
462static struct {
463 int freelist;
464 uint64_t limit;
465} x86_freelists[VM_NFREELIST] = {
466 { VM_FREELIST_DEFAULT, 0 },
467#ifdef VM_FREELIST_FIRST1T
468 /* 40-bit addresses needed for modern graphics. */
469 { VM_FREELIST_FIRST1T, 1ULL * 1024 * 1024 * 1024 * 1024 },
470#endif
471#ifdef VM_FREELIST_FIRST64G
472 /* 36-bit addresses needed for oldish graphics. */
473 { VM_FREELIST_FIRST64G, 64ULL * 1024 * 1024 * 1024 },
474#endif
475#ifdef VM_FREELIST_FIRST4G
476 /* 32-bit addresses needed for PCI 32-bit DMA and old graphics. */
477 { VM_FREELIST_FIRST4G, 4ULL * 1024 * 1024 * 1024 },
478#endif
479 /* 30-bit addresses needed for ancient graphics. */
480 { VM_FREELIST_FIRST1G, 1ULL * 1024 * 1024 * 1024 },
481 /* 24-bit addresses needed for ISA DMA. */
482 { VM_FREELIST_FIRST16, 16 * 1024 * 1024 },
483};
484
485extern paddr_t avail_start, avail_end;
486
487int
488x86_select_freelist(uint64_t maxaddr)
489{
490 unsigned int i;
491
492 if (avail_end <= maxaddr)
493 return VM_NFREELIST;
494
495 for (i = 0; i < __arraycount(x86_freelists); i++) {
496 if ((x86_freelists[i].limit - 1) <= maxaddr)
497 return x86_freelists[i].freelist;
498 }
499
500 panic("no freelist for maximum address %"PRIx64, maxaddr);
501}
502
503static int
504x86_add_cluster(struct extent *iomem_ex, uint64_t seg_start, uint64_t seg_end,
505 uint32_t type)
506{
507 uint64_t new_physmem = 0;
508 phys_ram_seg_t *cluster;
509 int i;
510
511#ifdef i386
512#ifdef PAE
513#define TOPLIMIT 0x1000000000ULL /* 64GB */
514#else
515#define TOPLIMIT 0x100000000ULL /* 4GB */
516#endif
517#else
518#define TOPLIMIT 0x100000000000ULL /* 16TB */
519#endif
520
521 if (seg_end > TOPLIMIT) {
522 aprint_verbose("WARNING: skipping large memory map entry: "
523 "0x%"PRIx64"/0x%"PRIx64"/0x%x\n",
524 seg_start, (seg_end - seg_start), type);
525 return 0;
526 }
527
528 /*
529 * XXX: Chop the last page off the size so that it can fit in avail_end.
530 */
531 if (seg_end == TOPLIMIT)
532 seg_end -= PAGE_SIZE;
533
534 if (seg_end <= seg_start)
535 return 0;
536
537 for (i = 0; i < mem_cluster_cnt; i++) {
538 cluster = &mem_clusters[i];
539 if ((cluster->start == round_page(seg_start)) &&
540 (cluster->size == trunc_page(seg_end) - cluster->start)) {
541#ifdef DEBUG_MEMLOAD
542 printf("WARNING: skipping duplicate segment entry\n");
543#endif
544 return 0;
545 }
546 }
547
548 /*
549 * Allocate the physical addresses used by RAM from the iomem extent
550 * map. This is done before the addresses are page rounded just to make
551 * sure we get them all.
552 */
553 if (seg_start < 0x100000000ULL) {
554 uint64_t io_end;
555
556 if (seg_end > 0x100000000ULL)
557 io_end = 0x100000000ULL;
558 else
559 io_end = seg_end;
560
561 if (iomem_ex != NULL && extent_alloc_region(iomem_ex, seg_start,
562 io_end - seg_start, EX_NOWAIT)) {
563 /* XXX What should we do? */
564 printf("WARNING: CAN't ALLOCATE MEMORY SEGMENT "
565 "(0x%"PRIx64"/0x%"PRIx64"/0x%x) FROM "
566 "IOMEM EXTENT MAP!\n",
567 seg_start, seg_end - seg_start, type);
568 return 0;
569 }
570 }
571
572 /* If it's not free memory, skip it. */
573 if (type != BIM_Memory)
574 return 0;
575
576 if (mem_cluster_cnt >= VM_PHYSSEG_MAX) {
577 panic("%s: too many memory segments (increase VM_PHYSSEG_MAX)",
578 __func__);
579 }
580
581#ifdef PHYSMEM_MAX_ADDR
582 if (seg_start >= MBTOB(PHYSMEM_MAX_ADDR))
583 return 0;
584 if (seg_end > MBTOB(PHYSMEM_MAX_ADDR))
585 seg_end = MBTOB(PHYSMEM_MAX_ADDR);
586#endif
587
588 seg_start = round_page(seg_start);
589 seg_end = trunc_page(seg_end);
590
591 if (seg_start == seg_end)
592 return 0;
593
594 cluster = &mem_clusters[mem_cluster_cnt];
595 cluster->start = seg_start;
596 if (iomem_ex != NULL)
597 new_physmem = physmem + atop(seg_end - seg_start);
598
599#ifdef PHYSMEM_MAX_SIZE
600 if (iomem_ex != NULL) {
601 if (physmem >= atop(MBTOB(PHYSMEM_MAX_SIZE)))
602 return 0;
603 if (new_physmem > atop(MBTOB(PHYSMEM_MAX_SIZE))) {
604 seg_end = seg_start + MBTOB(PHYSMEM_MAX_SIZE) - ptoa(physmem);
605 new_physmem = atop(MBTOB(PHYSMEM_MAX_SIZE));
606 }
607 }
608#endif
609
610 cluster->size = seg_end - seg_start;
611
612 if (iomem_ex != NULL) {
613 if (avail_end < seg_end)
614 avail_end = seg_end;
615 physmem = new_physmem;
616 }
617 mem_cluster_cnt++;
618
619 return 0;
620}
621
622static int
623x86_parse_clusters(struct btinfo_memmap *bim, struct extent *iomem_ex)
624{
625 uint64_t seg_start, seg_end;
626 uint64_t addr, size;
627 uint32_t type;
628 int x;
629
630 KASSERT(bim != NULL);
631 KASSERT(bim->num > 0);
632
633#ifdef DEBUG_MEMLOAD
634 printf("BIOS MEMORY MAP (%d ENTRIES):\n", bim->num);
635#endif
636
637 for (x = 0; x < bim->num; x++) {
638 addr = bim->entry[x].addr;
639 size = bim->entry[x].size;
640 type = bim->entry[x].type;
641#ifdef DEBUG_MEMLOAD
642 printf(" addr 0x%"PRIx64" size 0x%"PRIx64" type 0x%x\n",
643 addr, size, type);
644#endif
645
646 /* If the segment is not memory, skip it. */
647 switch (type) {
648 case BIM_Memory:
649 case BIM_ACPI:
650 case BIM_NVS:
651 break;
652 default:
653 continue;
654 }
655
656 /* If the segment is smaller than a page, skip it. */
657 if (size < PAGE_SIZE)
658 continue;
659
660 seg_start = addr;
661 seg_end = addr + size;
662
663 /*
664 * XXX XXX: Avoid compatibility holes.
665 *
666 * Holes within memory space that allow access to be directed
667 * to the PC-compatible frame buffer (0xa0000-0xbffff), to
668 * adapter ROM space (0xc0000-0xdffff), and to system BIOS
669 * space (0xe0000-0xfffff).
670 *
671 * Some laptop (for example, Toshiba Satellite2550X) report
672 * this area and occurred problems, so we avoid this area.
673 */
674 if (seg_start < 0x100000 && seg_end > 0xa0000) {
675 printf("WARNING: memory map entry overlaps "
676 "with ``Compatibility Holes'': "
677 "0x%"PRIx64"/0x%"PRIx64"/0x%x\n", seg_start,
678 seg_end - seg_start, type);
679
680 x86_add_cluster(iomem_ex, seg_start, 0xa0000, type);
681 x86_add_cluster(iomem_ex, 0x100000, seg_end, type);
682 } else {
683 x86_add_cluster(iomem_ex, seg_start, seg_end, type);
684 }
685 }
686
687 return 0;
688}
689
690static int
691x86_fake_clusters(struct extent *iomem_ex)
692{
693 phys_ram_seg_t *cluster;
694 KASSERT(mem_cluster_cnt == 0);
695
696 /*
697 * Allocate the physical addresses used by RAM from the iomem extent
698 * map. This is done before the addresses are page rounded just to make
699 * sure we get them all.
700 */
701 if (extent_alloc_region(iomem_ex, 0, KBTOB(biosbasemem), EX_NOWAIT)) {
702 /* XXX What should we do? */
703 printf("WARNING: CAN'T ALLOCATE BASE MEMORY FROM "
704 "IOMEM EXTENT MAP!\n");
705 }
706
707 cluster = &mem_clusters[0];
708 cluster->start = 0;
709 cluster->size = trunc_page(KBTOB(biosbasemem));
710 physmem += atop(cluster->size);
711
712 if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem),
713 EX_NOWAIT)) {
714 /* XXX What should we do? */
715 printf("WARNING: CAN'T ALLOCATE EXTENDED MEMORY FROM "
716 "IOMEM EXTENT MAP!\n");
717 }
718
719#if NISADMA > 0
720 /*
721 * Some motherboards/BIOSes remap the 384K of RAM that would
722 * normally be covered by the ISA hole to the end of memory
723 * so that it can be used. However, on a 16M system, this
724 * would cause bounce buffers to be allocated and used.
725 * This is not desirable behaviour, as more than 384K of
726 * bounce buffers might be allocated. As a work-around,
727 * we round memory down to the nearest 1M boundary if
728 * we're using any isadma devices and the remapped memory
729 * is what puts us over 16M.
730 */
731 if (biosextmem > (15*1024) && biosextmem < (16*1024)) {
732 char pbuf[9];
733
734 format_bytes(pbuf, sizeof(pbuf), biosextmem - (15*1024));
735 printf("Warning: ignoring %s of remapped memory\n", pbuf);
736 biosextmem = (15*1024);
737 }
738#endif
739
740 cluster = &mem_clusters[1];
741 cluster->start = IOM_END;
742 cluster->size = trunc_page(KBTOB(biosextmem));
743 physmem += atop(cluster->size);
744
745 mem_cluster_cnt = 2;
746
747 avail_end = IOM_END + trunc_page(KBTOB(biosextmem));
748
749 return 0;
750}
751
752/*
753 * x86_load_region: load the physical memory region from seg_start to seg_end
754 * into the VM system.
755 */
756static void
757x86_load_region(uint64_t seg_start, uint64_t seg_end)
758{
759 unsigned int i;
760 uint64_t tmp;
761
762 i = __arraycount(x86_freelists);
763 while (i--) {
764 if (x86_freelists[i].limit <= seg_start)
765 continue;
766 if (x86_freelists[i].freelist == VM_FREELIST_DEFAULT)
767 continue;
768 tmp = MIN(x86_freelists[i].limit, seg_end);
769 if (tmp == seg_start)
770 continue;
771
772#ifdef DEBUG_MEMLOAD
773 printf("loading freelist %d 0x%"PRIx64"-0x%"PRIx64
774 " (0x%"PRIx64"-0x%"PRIx64")\n", x86_freelists[i].freelist,
775 seg_start, tmp, (uint64_t)atop(seg_start),
776 (uint64_t)atop(tmp));
777#endif
778
779 uvm_page_physload(atop(seg_start), atop(tmp), atop(seg_start),
780 atop(tmp), x86_freelists[i].freelist);
781 seg_start = tmp;
782 }
783
784 if (seg_start != seg_end) {
785#ifdef DEBUG_MEMLOAD
786 printf("loading default 0x%"PRIx64"-0x%"PRIx64
787 " (0x%"PRIx64"-0x%"PRIx64")\n", seg_start, seg_end,
788 (uint64_t)atop(seg_start), (uint64_t)atop(seg_end));
789#endif
790 uvm_page_physload(atop(seg_start), atop(seg_end),
791 atop(seg_start), atop(seg_end), VM_FREELIST_DEFAULT);
792 }
793}
794
795/*
796 * init_x86_clusters: retrieve the memory clusters provided by the BIOS, and
797 * initialize mem_clusters.
798 */
799void
800init_x86_clusters(void)
801{
802 extern struct extent *iomem_ex;
803 struct btinfo_memmap *bim;
804
805 /*
806 * Check to see if we have a memory map from the BIOS (passed to us by
807 * the boot program).
808 */
809#ifdef i386
810 extern int biosmem_implicit;
811 bim = lookup_bootinfo(BTINFO_MEMMAP);
812 if ((biosmem_implicit || (biosbasemem == 0 && biosextmem == 0)) &&
813 bim != NULL && bim->num > 0)
814 x86_parse_clusters(bim, iomem_ex);
815#else
816#if !defined(REALBASEMEM) && !defined(REALEXTMEM)
817 bim = lookup_bootinfo(BTINFO_MEMMAP);
818 if (bim != NULL && bim->num > 0)
819 x86_parse_clusters(bim, iomem_ex);
820#else
821 (void)bim, (void)iomem_ex;
822#endif
823#endif
824
825 if (mem_cluster_cnt == 0) {
826 /*
827 * If x86_parse_clusters didn't find any valid segment, create
828 * fake clusters.
829 */
830 x86_fake_clusters(iomem_ex);
831 }
832}
833
834/*
835 * init_x86_vm: initialize the VM system on x86. We basically internalize as
836 * many physical pages as we can, starting at avail_start, but we don't
837 * internalize the kernel physical pages (from IOM_END to pa_kend).
838 */
839int
840init_x86_vm(paddr_t pa_kend)
841{
842 uint64_t seg_start, seg_end;
843 uint64_t seg_start1, seg_end1;
844 int x;
845 unsigned i;
846
847 for (i = 0; i < __arraycount(x86_freelists); i++) {
848 if (avail_end < x86_freelists[i].limit)
849 x86_freelists[i].freelist = VM_FREELIST_DEFAULT;
850 }
851
852#ifdef amd64
853 extern vaddr_t kern_end;
854 extern vaddr_t module_start, module_end;
855
856 module_start = kern_end;
857 module_end = KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2;
858#endif
859
860 /*
861 * Now, load the memory clusters (which have already been rounded and
862 * truncated) into the VM system.
863 *
864 * NOTE: we assume that memory starts at 0 and that the kernel is
865 * loaded at IOM_END (1MB).
866 */
867 for (x = 0; x < mem_cluster_cnt; x++) {
868 const phys_ram_seg_t *cluster = &mem_clusters[x];
869
870 seg_start = cluster->start;
871 seg_end = cluster->start + cluster->size;
872 seg_start1 = 0;
873 seg_end1 = 0;
874
875 /* Skip memory before our available starting point. */
876 if (seg_end <= avail_start)
877 continue;
878
879 if (seg_start <= avail_start && avail_start < seg_end) {
880 seg_start = avail_start;
881 if (seg_start == seg_end)
882 continue;
883 }
884
885 /*
886 * If this segment contains the kernel, split it in two, around
887 * the kernel.
888 */
889 if (seg_start <= IOM_END && pa_kend <= seg_end) {
890 seg_start1 = pa_kend;
891 seg_end1 = seg_end;
892 seg_end = IOM_END;
893 KASSERT(seg_end < seg_end1);
894 }
895
896 /* First hunk */
897 if (seg_start != seg_end) {
898 x86_load_region(seg_start, seg_end);
899 }
900
901 /* Second hunk */
902 if (seg_start1 != seg_end1) {
903 x86_load_region(seg_start1, seg_end1);
904 }
905 }
906
907 return 0;
908}
909
910#endif /* !XEN */
911
912void
913x86_reset(void)
914{
915 uint8_t b;
916
917#if NACPICA > 0
918 /*
919 * If ACPI is active, try to reset using the reset register
920 * defined in the FADT.
921 */
922 if (acpi_active) {
923 if (acpi_reset() == 0) {
924 delay(500000); /* wait 0.5 sec to see if that did it */
925 }
926 }
927#endif
928
929 /*
930 * The keyboard controller has 4 random output pins, one of which is
931 * connected to the RESET pin on the CPU in many PCs. We tell the
932 * keyboard controller to pulse this line a couple of times.
933 */
934 outb(IO_KBD + KBCMDP, KBC_PULSE0);
935 delay(100000);
936 outb(IO_KBD + KBCMDP, KBC_PULSE0);
937 delay(100000);
938
939 /*
940 * Attempt to force a reset via the Reset Control register at
941 * I/O port 0xcf9. Bit 2 forces a system reset when it
942 * transitions from 0 to 1. Bit 1 selects the type of reset
943 * to attempt: 0 selects a "soft" reset, and 1 selects a
944 * "hard" reset. We try a "hard" reset. The first write sets
945 * bit 1 to select a "hard" reset and clears bit 2. The
946 * second write forces a 0 -> 1 transition in bit 2 to trigger
947 * a reset.
948 */
949 outb(0xcf9, 0x2);
950 outb(0xcf9, 0x6);
951 DELAY(500000); /* wait 0.5 sec to see if that did it */
952
953 /*
954 * Attempt to force a reset via the Fast A20 and Init register
955 * at I/O port 0x92. Bit 1 serves as an alternate A20 gate.
956 * Bit 0 asserts INIT# when set to 1. We are careful to only
957 * preserve bit 1 while setting bit 0. We also must clear bit
958 * 0 before setting it if it isn't already clear.
959 */
960 b = inb(0x92);
961 if (b != 0xff) {
962 if ((b & 0x1) != 0)
963 outb(0x92, b & 0xfe);
964 outb(0x92, b | 0x1);
965 DELAY(500000); /* wait 0.5 sec to see if that did it */
966 }
967}
968
969static int
970x86_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
971 void *arg0, void *arg1, void *arg2, void *arg3)
972{
973 int result;
974
975 result = KAUTH_RESULT_DEFER;
976
977 switch (action) {
978 case KAUTH_MACHDEP_IOPERM_GET:
979 case KAUTH_MACHDEP_LDT_GET:
980 case KAUTH_MACHDEP_LDT_SET:
981 case KAUTH_MACHDEP_MTRR_GET:
982 result = KAUTH_RESULT_ALLOW;
983
984 break;
985
986 default:
987 break;
988 }
989
990 return result;
991}
992
993void
994machdep_init(void)
995{
996
997 x86_listener = kauth_listen_scope(KAUTH_SCOPE_MACHDEP,
998 x86_listener_cb, NULL);
999}
1000
1001/*
1002 * x86_startup: x86 common startup routine
1003 *
1004 * called by cpu_startup.
1005 */
1006
1007void
1008x86_startup(void)
1009{
1010
1011#if !defined(XEN)
1012 nmi_init();
1013#endif /* !defined(XEN) */
1014}
1015
1016/*
1017 * machine dependent system variables.
1018 */
1019static int
1020sysctl_machdep_booted_kernel(SYSCTLFN_ARGS)
1021{
1022 struct btinfo_bootpath *bibp;
1023 struct sysctlnode node;
1024
1025 bibp = lookup_bootinfo(BTINFO_BOOTPATH);
1026 if(!bibp)
1027 return ENOENT; /* ??? */
1028
1029 node = *rnode;
1030 node.sysctl_data = bibp->bootpath;
1031 node.sysctl_size = sizeof(bibp->bootpath);
1032 return sysctl_lookup(SYSCTLFN_CALL(&node));
1033}
1034
1035static int
1036sysctl_machdep_diskinfo(SYSCTLFN_ARGS)
1037{
1038 struct sysctlnode node;
1039 extern struct bi_devmatch *x86_alldisks;
1040 extern int x86_ndisks;
1041
1042 if (x86_alldisks == NULL)
1043 return EOPNOTSUPP;
1044
1045 node = *rnode;
1046 node.sysctl_data = x86_alldisks;
1047 node.sysctl_size = sizeof(struct disklist) +
1048 (x86_ndisks - 1) * sizeof(struct nativedisk_info);
1049 return sysctl_lookup(SYSCTLFN_CALL(&node));
1050}
1051
1052static void
1053const_sysctl(struct sysctllog **clog, const char *name, int type,
1054 u_quad_t value, int tag)
1055{
1056 (sysctl_createv)(clog, 0, NULL, NULL,
1057 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
1058 type, name, NULL, NULL, value, NULL, 0,
1059 CTL_MACHDEP, tag, CTL_EOL);
1060}
1061
1062SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
1063{
1064 extern uint64_t tsc_freq;
1065 extern int sparse_dump;
1066
1067 sysctl_createv(clog, 0, NULL, NULL,
1068 CTLFLAG_PERMANENT,
1069 CTLTYPE_NODE, "machdep", NULL,
1070 NULL, 0, NULL, 0,
1071 CTL_MACHDEP, CTL_EOL);
1072
1073 sysctl_createv(clog, 0, NULL, NULL,
1074 CTLFLAG_PERMANENT,
1075 CTLTYPE_STRUCT, "console_device", NULL,
1076 sysctl_consdev, 0, NULL, sizeof(dev_t),
1077 CTL_MACHDEP, CPU_CONSDEV, CTL_EOL);
1078 sysctl_createv(clog, 0, NULL, NULL,
1079 CTLFLAG_PERMANENT,
1080 CTLTYPE_STRING, "booted_kernel", NULL,
1081 sysctl_machdep_booted_kernel, 0, NULL, 0,
1082 CTL_MACHDEP, CPU_BOOTED_KERNEL, CTL_EOL);
1083 sysctl_createv(clog, 0, NULL, NULL,
1084 CTLFLAG_PERMANENT,
1085 CTLTYPE_STRUCT, "diskinfo", NULL,
1086 sysctl_machdep_diskinfo, 0, NULL, 0,
1087 CTL_MACHDEP, CPU_DISKINFO, CTL_EOL);
1088
1089 sysctl_createv(clog, 0, NULL, NULL,
1090 CTLFLAG_PERMANENT,
1091 CTLTYPE_STRING, "cpu_brand", NULL,
1092 NULL, 0, cpu_brand_string, 0,
1093 CTL_MACHDEP, CTL_CREATE, CTL_EOL);
1094 sysctl_createv(clog, 0, NULL, NULL,
1095 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1096 CTLTYPE_INT, "sparse_dump", NULL,
1097 NULL, 0, &sparse_dump, 0,
1098 CTL_MACHDEP, CTL_CREATE, CTL_EOL);
1099 sysctl_createv(clog, 0, NULL, NULL,
1100 CTLFLAG_PERMANENT,
1101 CTLTYPE_QUAD, "tsc_freq", NULL,
1102 NULL, 0, &tsc_freq, 0,
1103 CTL_MACHDEP, CTL_CREATE, CTL_EOL);
1104 sysctl_createv(clog, 0, NULL, NULL,
1105 CTLFLAG_PERMANENT,
1106 CTLTYPE_INT, "pae",
1107 SYSCTL_DESCR("Whether the kernel uses PAE"),
1108 NULL, 0, &use_pae, 0,
1109 CTL_MACHDEP, CTL_CREATE, CTL_EOL);
1110
1111 /* None of these can ever change once the system has booted */
1112 const_sysctl(clog, "fpu_present", CTLTYPE_INT, i386_fpu_present,
1113 CPU_FPU_PRESENT);
1114 const_sysctl(clog, "osfxsr", CTLTYPE_INT, i386_use_fxsave,
1115 CPU_OSFXSR);
1116 const_sysctl(clog, "sse", CTLTYPE_INT, i386_has_sse,
1117 CPU_SSE);
1118 const_sysctl(clog, "sse2", CTLTYPE_INT, i386_has_sse2,
1119 CPU_SSE2);
1120
1121 const_sysctl(clog, "fpu_save", CTLTYPE_INT, x86_fpu_save,
1122 CTL_CREATE);
1123 const_sysctl(clog, "fpu_save_size", CTLTYPE_INT, x86_fpu_save_size,
1124 CTL_CREATE);
1125 const_sysctl(clog, "xsave_features", CTLTYPE_QUAD, x86_xsave_features,
1126 CTL_CREATE);
1127
1128#ifndef XEN
1129 const_sysctl(clog, "biosbasemem", CTLTYPE_INT, biosbasemem,
1130 CPU_BIOSBASEMEM);
1131 const_sysctl(clog, "biosextmem", CTLTYPE_INT, biosextmem,
1132 CPU_BIOSEXTMEM);
1133#endif
1134}
1135