1/* $NetBSD: acpi_cpu_md.c,v 1.77 2014/04/17 16:01:24 christos Exp $ */
2
3/*-
4 * Copyright (c) 2010, 2011 Jukka Ruohonen <jruohonen@iki.fi>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.77 2014/04/17 16:01:24 christos Exp $");
31
32#include <sys/param.h>
33#include <sys/bus.h>
34#include <sys/cpufreq.h>
35#include <sys/device.h>
36#include <sys/kcore.h>
37#include <sys/sysctl.h>
38#include <sys/xcall.h>
39
40#include <x86/cpu.h>
41#include <x86/cpufunc.h>
42#include <x86/cputypes.h>
43#include <x86/cpuvar.h>
44#include <x86/cpu_msr.h>
45#include <x86/machdep.h>
46#include <x86/x86/tsc.h>
47
48#include <dev/acpi/acpica.h>
49#include <dev/acpi/acpi_cpu.h>
50
51#include <dev/pci/pcivar.h>
52#include <dev/pci/pcidevs.h>
53
54#include <machine/acpi_machdep.h>
55
56/*
57 * Intel IA32_MISC_ENABLE.
58 */
59#define MSR_MISC_ENABLE_EST __BIT(16)
60#define MSR_MISC_ENABLE_TURBO __BIT(38)
61
62/*
63 * AMD C1E.
64 */
65#define MSR_CMPHALT 0xc0010055
66
67#define MSR_CMPHALT_SMI __BIT(27)
68#define MSR_CMPHALT_C1E __BIT(28)
69#define MSR_CMPHALT_BMSTS __BIT(29)
70
71/*
72 * AMD families 10h, 11h, 12h, 14h, and 15h.
73 */
74#define MSR_10H_LIMIT 0xc0010061
75#define MSR_10H_CONTROL 0xc0010062
76#define MSR_10H_STATUS 0xc0010063
77#define MSR_10H_CONFIG 0xc0010064
78
79/*
80 * AMD family 0Fh.
81 */
82#define MSR_0FH_CONTROL 0xc0010041
83#define MSR_0FH_STATUS 0xc0010042
84
85#define MSR_0FH_STATUS_CFID __BITS( 0, 5)
86#define MSR_0FH_STATUS_CVID __BITS(32, 36)
87#define MSR_0FH_STATUS_PENDING __BITS(31, 31)
88
89#define MSR_0FH_CONTROL_FID __BITS( 0, 5)
90#define MSR_0FH_CONTROL_VID __BITS( 8, 12)
91#define MSR_0FH_CONTROL_CHG __BITS(16, 16)
92#define MSR_0FH_CONTROL_CNT __BITS(32, 51)
93
94#define ACPI_0FH_STATUS_FID __BITS( 0, 5)
95#define ACPI_0FH_STATUS_VID __BITS( 6, 10)
96
97#define ACPI_0FH_CONTROL_FID __BITS( 0, 5)
98#define ACPI_0FH_CONTROL_VID __BITS( 6, 10)
99#define ACPI_0FH_CONTROL_VST __BITS(11, 17)
100#define ACPI_0FH_CONTROL_MVS __BITS(18, 19)
101#define ACPI_0FH_CONTROL_PLL __BITS(20, 26)
102#define ACPI_0FH_CONTROL_RVO __BITS(28, 29)
103#define ACPI_0FH_CONTROL_IRT __BITS(30, 31)
104
105#define FID_TO_VCO_FID(fidd) (((fid) < 8) ? (8 + ((fid) << 1)) : (fid))
106
107static char native_idle_text[16];
108void (*native_idle)(void) = NULL;
109
110static int acpicpu_md_quirk_piix4(const struct pci_attach_args *);
111static void acpicpu_md_pstate_hwf_reset(void *, void *);
112static int acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *,
113 uint32_t *);
114static int acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *);
115static int acpicpu_md_pstate_fidvid_read(uint32_t *, uint32_t *);
116static void acpicpu_md_pstate_fidvid_write(uint32_t, uint32_t,
117 uint32_t, uint32_t);
118static int acpicpu_md_pstate_sysctl_init(void);
119static int acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO);
120static int acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO);
121static int acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO);
122
123extern struct acpicpu_softc **acpicpu_sc;
124static struct sysctllog *acpicpu_log = NULL;
125
126struct cpu_info *
127acpicpu_md_match(device_t parent, cfdata_t match, void *aux)
128{
129 struct cpufeature_attach_args *cfaa = aux;
130
131 if (strcmp(cfaa->name, "frequency") != 0)
132 return NULL;
133
134 return cfaa->ci;
135}
136
137struct cpu_info *
138acpicpu_md_attach(device_t parent, device_t self, void *aux)
139{
140 struct cpufeature_attach_args *cfaa = aux;
141
142 return cfaa->ci;
143}
144
145uint32_t
146acpicpu_md_flags(void)
147{
148 struct cpu_info *ci = curcpu();
149 struct pci_attach_args pa;
150 uint32_t family, val = 0;
151 uint32_t regs[4];
152 uint64_t msr;
153
154 if (acpi_md_ncpus() == 1)
155 val |= ACPICPU_FLAG_C_BM;
156
157 if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0)
158 val |= ACPICPU_FLAG_C_FFH;
159
160 /*
161 * By default, assume that the local APIC timer
162 * as well as TSC are stalled during C3 sleep.
163 */
164 val |= ACPICPU_FLAG_C_APIC | ACPICPU_FLAG_C_TSC;
165
166 /*
167 * Detect whether TSC is invariant. If it is not, we keep the flag to
168 * note that TSC will not run at constant rate. Depending on the CPU,
169 * this may affect P- and T-state changes, but especially relevant
170 * are C-states; with variant TSC, states larger than C1 may
171 * completely stop the counter.
172 */
173 if (tsc_is_invariant())
174 val &= ~ACPICPU_FLAG_C_TSC;
175
176 switch (cpu_vendor) {
177
178 case CPUVENDOR_IDT:
179
180 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
181 val |= ACPICPU_FLAG_P_FFH;
182
183 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
184 val |= ACPICPU_FLAG_T_FFH;
185
186 break;
187
188 case CPUVENDOR_INTEL:
189
190 /*
191 * Bus master control and arbitration should be
192 * available on all supported Intel CPUs (to be
193 * sure, this is double-checked later from the
194 * firmware data). These flags imply that it is
195 * not necessary to flush caches before C3 state.
196 */
197 val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB;
198
199 /*
200 * Check if we can use "native", MSR-based,
201 * access. If not, we have to resort to I/O.
202 */
203 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
204 val |= ACPICPU_FLAG_P_FFH;
205
206 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
207 val |= ACPICPU_FLAG_T_FFH;
208
209 /*
210 * Check whether MSR_APERF, MSR_MPERF, and Turbo
211 * Boost are available. Also see if we might have
212 * an invariant local APIC timer ("ARAT").
213 */
214 if (cpuid_level >= 0x06) {
215
216 x86_cpuid(0x00000006, regs);
217
218 if ((regs[2] & CPUID_DSPM_HWF) != 0)
219 val |= ACPICPU_FLAG_P_HWF;
220
221 if ((regs[0] & CPUID_DSPM_IDA) != 0)
222 val |= ACPICPU_FLAG_P_TURBO;
223
224 if ((regs[0] & CPUID_DSPM_ARAT) != 0)
225 val &= ~ACPICPU_FLAG_C_APIC;
226 }
227
228 break;
229
230 case CPUVENDOR_AMD:
231
232 x86_cpuid(0x80000000, regs);
233
234 if (regs[0] < 0x80000007)
235 break;
236
237 x86_cpuid(0x80000007, regs);
238
239 family = CPUID_TO_FAMILY(ci->ci_signature);
240
241 switch (family) {
242
243 case 0x0f:
244
245 /*
246 * Disable C1E if present.
247 */
248 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT)
249 val |= ACPICPU_FLAG_C_C1E;
250
251 /*
252 * Evaluate support for the "FID/VID
253 * algorithm" also used by powernow(4).
254 */
255 if ((regs[3] & CPUID_APM_FID) == 0)
256 break;
257
258 if ((regs[3] & CPUID_APM_VID) == 0)
259 break;
260
261 val |= ACPICPU_FLAG_P_FFH | ACPICPU_FLAG_P_FIDVID;
262 break;
263
264 case 0x10:
265 case 0x11:
266
267 /*
268 * Disable C1E if present.
269 */
270 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT)
271 val |= ACPICPU_FLAG_C_C1E;
272
273 /* FALLTHROUGH */
274
275 case 0x12:
276 case 0x14: /* AMD Fusion */
277 case 0x15: /* AMD Bulldozer */
278
279 /*
280 * Like with Intel, detect MSR-based P-states,
281 * and AMD's "turbo" (Core Performance Boost),
282 * respectively.
283 */
284 if ((regs[3] & CPUID_APM_HWP) != 0)
285 val |= ACPICPU_FLAG_P_FFH;
286
287 if ((regs[3] & CPUID_APM_CPB) != 0)
288 val |= ACPICPU_FLAG_P_TURBO;
289
290 /*
291 * Also check for APERF and MPERF,
292 * first available in the family 10h.
293 */
294 if (cpuid_level >= 0x06) {
295
296 x86_cpuid(0x00000006, regs);
297
298 if ((regs[2] & CPUID_DSPM_HWF) != 0)
299 val |= ACPICPU_FLAG_P_HWF;
300 }
301
302 break;
303 }
304
305 break;
306 }
307
308 /*
309 * There are several erratums for PIIX4.
310 */
311 if (pci_find_device(&pa, acpicpu_md_quirk_piix4) != 0)
312 val |= ACPICPU_FLAG_PIIX4;
313
314 return val;
315}
316
317static int
318acpicpu_md_quirk_piix4(const struct pci_attach_args *pa)
319{
320
321 /*
322 * XXX: The pci_find_device(9) function only
323 * deals with attached devices. Change this
324 * to use something like pci_device_foreach().
325 */
326 if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL)
327 return 0;
328
329 if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA ||
330 PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC)
331 return 1;
332
333 return 0;
334}
335
336void
337acpicpu_md_quirk_c1e(void)
338{
339 const uint64_t c1e = MSR_CMPHALT_SMI | MSR_CMPHALT_C1E;
340 uint64_t val;
341
342 val = rdmsr(MSR_CMPHALT);
343
344 if ((val & c1e) != 0)
345 wrmsr(MSR_CMPHALT, val & ~c1e);
346}
347
348int
349acpicpu_md_cstate_start(struct acpicpu_softc *sc)
350{
351 const size_t size = sizeof(native_idle_text);
352 struct acpicpu_cstate *cs;
353 bool ipi = false;
354 int i;
355
356 /*
357 * Save the cpu_idle(9) loop used by default.
358 */
359 x86_cpu_idle_get(&native_idle, native_idle_text, size);
360
361 for (i = 0; i < ACPI_C_STATE_COUNT; i++) {
362
363 cs = &sc->sc_cstate[i];
364
365 if (cs->cs_method == ACPICPU_C_STATE_HALT) {
366 ipi = true;
367 break;
368 }
369 }
370
371 x86_cpu_idle_set(acpicpu_cstate_idle, "acpi", ipi);
372
373 return 0;
374}
375
376int
377acpicpu_md_cstate_stop(void)
378{
379 static char text[16];
380 void (*func)(void);
381 uint64_t xc;
382 bool ipi;
383
384 x86_cpu_idle_get(&func, text, sizeof(text));
385
386 if (func == native_idle)
387 return EALREADY;
388
389 ipi = (native_idle != x86_cpu_idle_halt) ? false : true;
390 x86_cpu_idle_set(native_idle, native_idle_text, ipi);
391
392 /*
393 * Run a cross-call to ensure that all CPUs are
394 * out from the ACPI idle-loop before detachment.
395 */
396 xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
397 xc_wait(xc);
398
399 return 0;
400}
401
402/*
403 * Called with interrupts enabled.
404 */
405void
406acpicpu_md_cstate_enter(int method, int state)
407{
408 struct cpu_info *ci = curcpu();
409
410 KASSERT(ci->ci_ilevel == IPL_NONE);
411
412 switch (method) {
413
414 case ACPICPU_C_STATE_FFH:
415
416 x86_monitor(&ci->ci_want_resched, 0, 0);
417
418 if (__predict_false(ci->ci_want_resched != 0))
419 return;
420
421 x86_mwait((state - 1) << 4, 0);
422 break;
423
424 case ACPICPU_C_STATE_HALT:
425
426 x86_disable_intr();
427
428 if (__predict_false(ci->ci_want_resched != 0)) {
429 x86_enable_intr();
430 return;
431 }
432
433 x86_stihlt();
434 break;
435 }
436}
437
438int
439acpicpu_md_pstate_start(struct acpicpu_softc *sc)
440{
441 uint64_t xc, val;
442
443 switch (cpu_vendor) {
444
445 case CPUVENDOR_IDT:
446 case CPUVENDOR_INTEL:
447
448 /*
449 * Make sure EST is enabled.
450 */
451 if ((sc->sc_flags & ACPICPU_FLAG_P_FFH) != 0) {
452
453 val = rdmsr(MSR_MISC_ENABLE);
454
455 if ((val & MSR_MISC_ENABLE_EST) == 0) {
456
457 val |= MSR_MISC_ENABLE_EST;
458 wrmsr(MSR_MISC_ENABLE, val);
459 val = rdmsr(MSR_MISC_ENABLE);
460
461 if ((val & MSR_MISC_ENABLE_EST) == 0)
462 return ENOTTY;
463 }
464 }
465 }
466
467 /*
468 * Reset the APERF and MPERF counters.
469 */
470 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) {
471 xc = xc_broadcast(0, acpicpu_md_pstate_hwf_reset, NULL, NULL);
472 xc_wait(xc);
473 }
474
475 return acpicpu_md_pstate_sysctl_init();
476}
477
478int
479acpicpu_md_pstate_stop(void)
480{
481
482 if (acpicpu_log == NULL)
483 return EALREADY;
484
485 sysctl_teardown(&acpicpu_log);
486 acpicpu_log = NULL;
487
488 return 0;
489}
490
491int
492acpicpu_md_pstate_init(struct acpicpu_softc *sc)
493{
494 struct cpu_info *ci = sc->sc_ci;
495 struct acpicpu_pstate *ps, msr;
496 uint32_t family, i = 0;
497
498 (void)memset(&msr, 0, sizeof(struct acpicpu_pstate));
499
500 switch (cpu_vendor) {
501
502 case CPUVENDOR_IDT:
503 case CPUVENDOR_INTEL:
504
505 /*
506 * If the so-called Turbo Boost is present,
507 * the P0-state is always the "turbo state".
508 * It is shown as the P1 frequency + 1 MHz.
509 *
510 * For discussion, see:
511 *
512 * Intel Corporation: Intel Turbo Boost Technology
513 * in Intel Core(tm) Microarchitectures (Nehalem)
514 * Based Processors. White Paper, November 2008.
515 */
516 if (sc->sc_pstate_count >= 2 &&
517 (sc->sc_flags & ACPICPU_FLAG_P_TURBO) != 0) {
518
519 ps = &sc->sc_pstate[0];
520
521 if (ps->ps_freq == sc->sc_pstate[1].ps_freq + 1)
522 ps->ps_flags |= ACPICPU_FLAG_P_TURBO;
523 }
524
525 msr.ps_control_addr = MSR_PERF_CTL;
526 msr.ps_control_mask = __BITS(0, 15);
527
528 msr.ps_status_addr = MSR_PERF_STATUS;
529 msr.ps_status_mask = __BITS(0, 15);
530 break;
531
532 case CPUVENDOR_AMD:
533
534 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
535 msr.ps_flags |= ACPICPU_FLAG_P_FIDVID;
536
537 family = CPUID_TO_FAMILY(ci->ci_signature);
538
539 switch (family) {
540
541 case 0x0f:
542 msr.ps_control_addr = MSR_0FH_CONTROL;
543 msr.ps_status_addr = MSR_0FH_STATUS;
544 break;
545
546 case 0x10:
547 case 0x11:
548 case 0x12:
549 case 0x14:
550 case 0x15:
551 msr.ps_control_addr = MSR_10H_CONTROL;
552 msr.ps_control_mask = __BITS(0, 2);
553
554 msr.ps_status_addr = MSR_10H_STATUS;
555 msr.ps_status_mask = __BITS(0, 2);
556 break;
557
558 default:
559 /*
560 * If we have an unknown AMD CPU, rely on XPSS.
561 */
562 if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0)
563 return EOPNOTSUPP;
564 }
565
566 break;
567
568 default:
569 return ENODEV;
570 }
571
572 /*
573 * Fill the P-state structures with MSR addresses that are
574 * known to be correct. If we do not know the addresses,
575 * leave the values intact. If a vendor uses XPSS, we do
576 * not necessarily need to do anything to support new CPUs.
577 */
578 while (i < sc->sc_pstate_count) {
579
580 ps = &sc->sc_pstate[i];
581
582 if (msr.ps_flags != 0)
583 ps->ps_flags |= msr.ps_flags;
584
585 if (msr.ps_status_addr != 0)
586 ps->ps_status_addr = msr.ps_status_addr;
587
588 if (msr.ps_status_mask != 0)
589 ps->ps_status_mask = msr.ps_status_mask;
590
591 if (msr.ps_control_addr != 0)
592 ps->ps_control_addr = msr.ps_control_addr;
593
594 if (msr.ps_control_mask != 0)
595 ps->ps_control_mask = msr.ps_control_mask;
596
597 i++;
598 }
599
600 return 0;
601}
602
603/*
604 * Read the IA32_APERF and IA32_MPERF counters. The first
605 * increments at the rate of the fixed maximum frequency
606 * configured during the boot, whereas APERF counts at the
607 * rate of the actual frequency. Note that the MSRs must be
608 * read without delay, and that only the ratio between
609 * IA32_APERF and IA32_MPERF is architecturally defined.
610 *
611 * The function thus returns the percentage of the actual
612 * frequency in terms of the maximum frequency of the calling
613 * CPU since the last call. A value zero implies an error.
614 *
615 * For further details, refer to:
616 *
617 * Intel Corporation: Intel 64 and IA-32 Architectures
618 * Software Developer's Manual. Section 13.2, Volume 3A:
619 * System Programming Guide, Part 1. July, 2008.
620 *
621 * Advanced Micro Devices: BIOS and Kernel Developer's
622 * Guide (BKDG) for AMD Family 10h Processors. Section
623 * 2.4.5, Revision 3.48, April 2010.
624 */
625uint8_t
626acpicpu_md_pstate_hwf(struct cpu_info *ci)
627{
628 struct acpicpu_softc *sc;
629 uint64_t aperf, mperf;
630 uint8_t rv = 0;
631
632 sc = acpicpu_sc[ci->ci_acpiid];
633
634 if (__predict_false(sc == NULL))
635 return 0;
636
637 if (__predict_false((sc->sc_flags & ACPICPU_FLAG_P_HWF) == 0))
638 return 0;
639
640 aperf = sc->sc_pstate_aperf;
641 mperf = sc->sc_pstate_mperf;
642
643 x86_disable_intr();
644
645 sc->sc_pstate_aperf = rdmsr(MSR_APERF);
646 sc->sc_pstate_mperf = rdmsr(MSR_MPERF);
647
648 x86_enable_intr();
649
650 aperf = sc->sc_pstate_aperf - aperf;
651 mperf = sc->sc_pstate_mperf - mperf;
652
653 if (__predict_true(mperf != 0))
654 rv = (aperf * 100) / mperf;
655
656 return rv;
657}
658
659static void
660acpicpu_md_pstate_hwf_reset(void *arg1, void *arg2)
661{
662 struct cpu_info *ci = curcpu();
663 struct acpicpu_softc *sc;
664
665 sc = acpicpu_sc[ci->ci_acpiid];
666
667 if (__predict_false(sc == NULL))
668 return;
669
670 x86_disable_intr();
671
672 wrmsr(MSR_APERF, 0);
673 wrmsr(MSR_MPERF, 0);
674
675 x86_enable_intr();
676
677 sc->sc_pstate_aperf = 0;
678 sc->sc_pstate_mperf = 0;
679}
680
681int
682acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq)
683{
684 struct acpicpu_pstate *ps = NULL;
685 uint64_t val;
686 uint32_t i;
687
688 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
689 return acpicpu_md_pstate_fidvid_get(sc, freq);
690
691 /*
692 * Pick any P-state for the status address.
693 */
694 for (i = 0; i < sc->sc_pstate_count; i++) {
695
696 ps = &sc->sc_pstate[i];
697
698 if (__predict_true(ps->ps_freq != 0))
699 break;
700 }
701
702 if (__predict_false(ps == NULL))
703 return ENODEV;
704
705 if (__predict_false(ps->ps_status_addr == 0))
706 return EINVAL;
707
708 val = rdmsr(ps->ps_status_addr);
709
710 if (__predict_true(ps->ps_status_mask != 0))
711 val = val & ps->ps_status_mask;
712
713 /*
714 * Search for the value from known P-states.
715 */
716 for (i = 0; i < sc->sc_pstate_count; i++) {
717
718 ps = &sc->sc_pstate[i];
719
720 if (__predict_false(ps->ps_freq == 0))
721 continue;
722
723 if (val == ps->ps_status) {
724 *freq = ps->ps_freq;
725 return 0;
726 }
727 }
728
729 /*
730 * If the value was not found, try APERF/MPERF.
731 * The state is P0 if the return value is 100 %.
732 */
733 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) {
734
735 KASSERT(sc->sc_pstate_count > 0);
736 KASSERT(sc->sc_pstate[0].ps_freq != 0);
737
738 if (acpicpu_md_pstate_hwf(sc->sc_ci) == 100) {
739 *freq = sc->sc_pstate[0].ps_freq;
740 return 0;
741 }
742 }
743
744 return EIO;
745}
746
747int
748acpicpu_md_pstate_set(struct acpicpu_pstate *ps)
749{
750 uint64_t val = 0;
751
752 if (__predict_false(ps->ps_control_addr == 0))
753 return EINVAL;
754
755 if ((ps->ps_flags & ACPICPU_FLAG_P_FIDVID) != 0)
756 return acpicpu_md_pstate_fidvid_set(ps);
757
758 /*
759 * If the mask is set, do a read-modify-write.
760 */
761 if (__predict_true(ps->ps_control_mask != 0)) {
762 val = rdmsr(ps->ps_control_addr);
763 val &= ~ps->ps_control_mask;
764 }
765
766 val |= ps->ps_control;
767
768 wrmsr(ps->ps_control_addr, val);
769 DELAY(ps->ps_latency);
770
771 return 0;
772}
773
774static int
775acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *sc, uint32_t *freq)
776{
777 struct acpicpu_pstate *ps;
778 uint32_t fid, i, vid;
779 uint32_t cfid, cvid;
780 int rv;
781
782 /*
783 * AMD family 0Fh needs special treatment.
784 * While it wants to use ACPI, it does not
785 * comply with the ACPI specifications.
786 */
787 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
788
789 if (rv != 0)
790 return rv;
791
792 for (i = 0; i < sc->sc_pstate_count; i++) {
793
794 ps = &sc->sc_pstate[i];
795
796 if (__predict_false(ps->ps_freq == 0))
797 continue;
798
799 fid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_FID);
800 vid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_VID);
801
802 if (cfid == fid && cvid == vid) {
803 *freq = ps->ps_freq;
804 return 0;
805 }
806 }
807
808 return EIO;
809}
810
811static int
812acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *ps)
813{
814 const uint64_t ctrl = ps->ps_control;
815 uint32_t cfid, cvid, fid, i, irt;
816 uint32_t pll, vco_cfid, vco_fid;
817 uint32_t val, vid, vst;
818 int rv;
819
820 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
821
822 if (rv != 0)
823 return rv;
824
825 fid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_FID);
826 vid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VID);
827 irt = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_IRT);
828 vst = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VST);
829 pll = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_PLL);
830
831 vst = vst * 20;
832 pll = pll * 1000 / 5;
833 irt = 10 * __BIT(irt);
834
835 /*
836 * Phase 1.
837 */
838 while (cvid > vid) {
839
840 val = 1 << __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_MVS);
841 val = (val > cvid) ? 0 : cvid - val;
842
843 acpicpu_md_pstate_fidvid_write(cfid, val, 1, vst);
844 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
845
846 if (rv != 0)
847 return rv;
848 }
849
850 i = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_RVO);
851
852 for (; i > 0 && cvid > 0; --i) {
853
854 acpicpu_md_pstate_fidvid_write(cfid, cvid - 1, 1, vst);
855 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
856
857 if (rv != 0)
858 return rv;
859 }
860
861 /*
862 * Phase 2.
863 */
864 if (cfid != fid) {
865
866 vco_fid = FID_TO_VCO_FID(fid);
867 vco_cfid = FID_TO_VCO_FID(cfid);
868
869 while (abs(vco_fid - vco_cfid) > 2) {
870
871 if (fid <= cfid)
872 val = cfid - 2;
873 else {
874 val = (cfid > 6) ? cfid + 2 :
875 FID_TO_VCO_FID(cfid) + 2;
876 }
877
878 acpicpu_md_pstate_fidvid_write(val, cvid, pll, irt);
879 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
880
881 if (rv != 0)
882 return rv;
883
884 vco_cfid = FID_TO_VCO_FID(cfid);
885 }
886
887 acpicpu_md_pstate_fidvid_write(fid, cvid, pll, irt);
888 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
889
890 if (rv != 0)
891 return rv;
892 }
893
894 /*
895 * Phase 3.
896 */
897 if (cvid != vid) {
898
899 acpicpu_md_pstate_fidvid_write(cfid, vid, 1, vst);
900 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
901
902 if (rv != 0)
903 return rv;
904 }
905
906 return 0;
907}
908
909static int
910acpicpu_md_pstate_fidvid_read(uint32_t *cfid, uint32_t *cvid)
911{
912 int i = ACPICPU_P_STATE_RETRY * 100;
913 uint64_t val;
914
915 do {
916 val = rdmsr(MSR_0FH_STATUS);
917
918 } while (__SHIFTOUT(val, MSR_0FH_STATUS_PENDING) != 0 && --i >= 0);
919
920 if (i == 0)
921 return EAGAIN;
922
923 if (cfid != NULL)
924 *cfid = __SHIFTOUT(val, MSR_0FH_STATUS_CFID);
925
926 if (cvid != NULL)
927 *cvid = __SHIFTOUT(val, MSR_0FH_STATUS_CVID);
928
929 return 0;
930}
931
932static void
933acpicpu_md_pstate_fidvid_write(uint32_t fid,
934 uint32_t vid, uint32_t cnt, uint32_t tmo)
935{
936 uint64_t val = 0;
937
938 val |= __SHIFTIN(fid, MSR_0FH_CONTROL_FID);
939 val |= __SHIFTIN(vid, MSR_0FH_CONTROL_VID);
940 val |= __SHIFTIN(cnt, MSR_0FH_CONTROL_CNT);
941 val |= __SHIFTIN(0x1, MSR_0FH_CONTROL_CHG);
942
943 wrmsr(MSR_0FH_CONTROL, val);
944 DELAY(tmo);
945}
946
947int
948acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent)
949{
950 struct acpicpu_tstate *ts;
951 uint64_t val;
952 uint32_t i;
953
954 val = rdmsr(MSR_THERM_CONTROL);
955
956 for (i = 0; i < sc->sc_tstate_count; i++) {
957
958 ts = &sc->sc_tstate[i];
959
960 if (ts->ts_percent == 0)
961 continue;
962
963 if (val == ts->ts_status) {
964 *percent = ts->ts_percent;
965 return 0;
966 }
967 }
968
969 return EIO;
970}
971
972int
973acpicpu_md_tstate_set(struct acpicpu_tstate *ts)
974{
975 uint64_t val;
976 uint8_t i;
977
978 val = ts->ts_control;
979 val = val & __BITS(0, 4);
980
981 wrmsr(MSR_THERM_CONTROL, val);
982
983 if (ts->ts_status == 0) {
984 DELAY(ts->ts_latency);
985 return 0;
986 }
987
988 for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) {
989
990 val = rdmsr(MSR_THERM_CONTROL);
991
992 if (val == ts->ts_status)
993 return 0;
994
995 DELAY(ts->ts_latency);
996 }
997
998 return EAGAIN;
999}
1000
1001/*
1002 * A kludge for backwards compatibility.
1003 */
1004static int
1005acpicpu_md_pstate_sysctl_init(void)
1006{
1007 const struct sysctlnode *fnode, *mnode, *rnode;
1008 const char *str;
1009 int rv;
1010
1011 switch (cpu_vendor) {
1012
1013 case CPUVENDOR_IDT:
1014 case CPUVENDOR_INTEL:
1015 str = "est";
1016 break;
1017
1018 case CPUVENDOR_AMD:
1019 str = "powernow";
1020 break;
1021
1022 default:
1023 return ENODEV;
1024 }
1025
1026
1027 rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode,
1028 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
1029 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
1030
1031 if (rv != 0)
1032 goto fail;
1033
1034 rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode,
1035 0, CTLTYPE_NODE, str, NULL,
1036 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1037
1038 if (rv != 0)
1039 goto fail;
1040
1041 rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode,
1042 0, CTLTYPE_NODE, "frequency", NULL,
1043 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1044
1045 if (rv != 0)
1046 goto fail;
1047
1048 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1049 CTLFLAG_READWRITE, CTLTYPE_INT, "target", NULL,
1050 acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1051
1052 if (rv != 0)
1053 goto fail;
1054
1055 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1056 CTLFLAG_READONLY, CTLTYPE_INT, "current", NULL,
1057 acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1058
1059 if (rv != 0)
1060 goto fail;
1061
1062 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1063 CTLFLAG_READONLY, CTLTYPE_STRING, "available", NULL,
1064 acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1065
1066 if (rv != 0)
1067 goto fail;
1068
1069 return 0;
1070
1071fail:
1072 if (acpicpu_log != NULL) {
1073 sysctl_teardown(&acpicpu_log);
1074 acpicpu_log = NULL;
1075 }
1076
1077 return rv;
1078}
1079
1080static int
1081acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS)
1082{
1083 struct sysctlnode node;
1084 uint32_t freq;
1085 int err;
1086
1087 freq = cpufreq_get(curcpu());
1088
1089 if (freq == 0)
1090 return ENXIO;
1091
1092 node = *rnode;
1093 node.sysctl_data = &freq;
1094
1095 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1096
1097 if (err != 0 || newp == NULL)
1098 return err;
1099
1100 return 0;
1101}
1102
1103static int
1104acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS)
1105{
1106 struct sysctlnode node;
1107 uint32_t freq;
1108 int err;
1109
1110 freq = cpufreq_get(curcpu());
1111
1112 if (freq == 0)
1113 return ENXIO;
1114
1115 node = *rnode;
1116 node.sysctl_data = &freq;
1117
1118 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1119
1120 if (err != 0 || newp == NULL)
1121 return err;
1122
1123 cpufreq_set_all(freq);
1124
1125 return 0;
1126}
1127
1128static int
1129acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS)
1130{
1131 struct cpu_info *ci = curcpu();
1132 struct acpicpu_softc *sc;
1133 struct sysctlnode node;
1134 char buf[1024];
1135 size_t len;
1136 uint32_t i;
1137 int err;
1138
1139 sc = acpicpu_sc[ci->ci_acpiid];
1140
1141 if (sc == NULL)
1142 return ENXIO;
1143
1144 (void)memset(&buf, 0, sizeof(buf));
1145
1146 mutex_enter(&sc->sc_mtx);
1147
1148 for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) {
1149
1150 if (sc->sc_pstate[i].ps_freq == 0)
1151 continue;
1152
1153 if (len >= sizeof(buf))
1154 break;
1155 len += snprintf(buf + len, sizeof(buf) - len, "%u%s",
1156 sc->sc_pstate[i].ps_freq,
1157 i < (sc->sc_pstate_count - 1) ? " " : "");
1158 }
1159
1160 mutex_exit(&sc->sc_mtx);
1161
1162 node = *rnode;
1163 node.sysctl_data = buf;
1164
1165 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1166
1167 if (err != 0 || newp == NULL)
1168 return err;
1169
1170 return 0;
1171}
1172
1173