kern_clock.c source code [src/src/sys/kern/kern_clock.c]

1	/ $NetBSD: kern_clock.c,v 1.134 2015/04/22 16:46:58 pooka Exp $ /
2
3	/-*
4	* Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5	* All rights reserved.
6	*
7	* This code is derived from software contributed to The NetBSD Foundation
8	* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9	* NASA Ames Research Center.
10	* This code is derived from software contributed to The NetBSD Foundation
11	* by Charles M. Hannum.
12	*
13	* Redistribution and use in source and binary forms, with or without
14	* modification, are permitted provided that the following conditions
15	* are met:
16	* 1. Redistributions of source code must retain the above copyright
17	* notice, this list of conditions and the following disclaimer.
18	* 2. Redistributions in binary form must reproduce the above copyright
19	* notice, this list of conditions and the following disclaimer in the
20	* documentation and/or other materials provided with the distribution.
21	*
22	* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24	* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32	* POSSIBILITY OF SUCH DAMAGE.
33	*/
34
35	/-*
36	* Copyright (c) 1982, 1986, 1991, 1993
37	* The Regents of the University of California. All rights reserved.
38	* (c) UNIX System Laboratories, Inc.
39	* All or some portions of this file are derived from material licensed
40	* to the University of California by American Telephone and Telegraph
41	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
42	* the permission of UNIX System Laboratories, Inc.
43	*
44	* Redistribution and use in source and binary forms, with or without
45	* modification, are permitted provided that the following conditions
46	* are met:
47	* 1. Redistributions of source code must retain the above copyright
48	* notice, this list of conditions and the following disclaimer.
49	* 2. Redistributions in binary form must reproduce the above copyright
50	* notice, this list of conditions and the following disclaimer in the
51	* documentation and/or other materials provided with the distribution.
52	* 3. Neither the name of the University nor the names of its contributors
53	* may be used to endorse or promote products derived from this software
54	* without specific prior written permission.
55	*
56	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66	* SUCH DAMAGE.
67	*
68	* @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
69	*/
70
71	#include <sys/cdefs.h>
72	__KERNEL_RCSID(`0`, "$NetBSD: kern_clock.c,v 1.134 2015/04/22 16:46:58 pooka Exp $");
73
74	#ifdef _KERNEL_OPT
75	#include "opt_dtrace.h"
76	#include "opt_perfctrs.h"
77	#endif
78
79	#include <sys/param.h>
80	#include <sys/systm.h>
81	#include <sys/callout.h>
82	#include <sys/kernel.h>
83	#include <sys/proc.h>
84	#include <sys/resourcevar.h>
85	#include <sys/signalvar.h>
86	#include <sys/sysctl.h>
87	#include <sys/timex.h>
88	#include <sys/sched.h>
89	#include <sys/time.h>
90	#include <sys/timetc.h>
91	#include <sys/cpu.h>
92	#include <sys/atomic.h>
93
94	#ifdef GPROF
95	#include <sys/gmon.h>
96	#endif
97
98	#ifdef KDTRACE_HOOKS
99	#include <sys/dtrace_bsd.h>
100	#include <sys/cpu.h>
101
102	cyclic_clock_func_t cyclic_clock_func[MAXCPUS];
103	#endif
104
105	static int sysctl_kern_clockrate(SYSCTLFN_PROTO);
106
107	/*
108	* Clock handling routines.
109	*
110	* This code is written to operate with two timers that run independently of
111	* each other. The main clock, running hz times per second, is used to keep
112	* track of real time. The second timer handles kernel and user profiling,
113	* and does resource use estimation. If the second timer is programmable,
114	* it is randomized to avoid aliasing between the two clocks. For example,
115	* the randomization prevents an adversary from always giving up the CPU
116	* just before its quantum expires. Otherwise, it would never accumulate
117	* CPU ticks. The mean frequency of the second timer is stathz.
118	*
119	* If no second timer exists, stathz will be zero; in this case we drive
120	* profiling and statistics off the main clock. This WILL NOT be accurate;
121	* do not do it unless absolutely necessary.
122	*
123	* The statistics clock may (or may not) be run at a higher rate while
124	* profiling. This profile clock runs at profhz. We require that profhz
125	* be an integral multiple of stathz.
126	*
127	* If the statistics clock is running fast, it must be divided by the ratio
128	* profhz/stathz for statistics. (For profiling, every tick counts.)
129	*/
130
131	int stathz;
132	int profhz;
133	int profsrc;
134	int schedhz;
135	int profprocs;
136	int hardclock_ticks;
137	static int hardscheddiv; / hard => sched divider (used if schedhz == 0) /
138	static int psdiv; / prof => stat divider /
139	int psratio; / ratio: prof / stat /
140
141	static u_int get_intr_timecount(struct timecounter *);
142
143	static struct timecounter intr_timecounter = {
144	get_intr_timecount, / get_timecount /
145	`0`, / no poll_pps /
146	~`0u`, / counter_mask /
147	`0`, / frequency /
148	"clockinterrupt", / name /
149	`0`, / quality - minimum implementation level for a clock /
150	NULL, / prev /
151	NULL, / next /
152	};
153
154	static u_int
155	get_intr_timecount(struct timecounter *tc)
156	{
157
158	return (u_int)hardclock_ticks;
159	}
160
161	/*
162	* Initialize clock frequencies and start both clocks running.
163	*/
164	void
165	initclocks(void)
166	{
167	static struct sysctllog *clog;
168	int i;
169
170	/*
171	* Set divisors to 1 (normal case) and let the machine-specific
172	* code do its bit.
173	*/
174	psdiv = `1`;
175	/*
176	* provide minimum default time counter
177	* will only run at interrupt resolution
178	*/
179	intr_timecounter.tc_frequency = hz;
180	tc_init(&intr_timecounter);
181	cpu_initclocks();
182
183	/*
184	* Compute profhz and stathz, fix profhz if needed.
185	*/
186	i = stathz ? stathz : hz;
187	if (profhz == `0`)
188	profhz = i;
189	psratio = profhz / i;
190	if (schedhz == `0`) {
191	/ 16Hz is best /
192	hardscheddiv = hz / `16`;
193	if (hardscheddiv <= `0`)
194	panic("hardscheddiv");
195	}
196
197	sysctl_createv(&clog, `0`, NULL, NULL,
198	CTLFLAG_PERMANENT,
199	CTLTYPE_STRUCT, "clockrate",
200	SYSCTL_DESCR("Kernel clock rates"),
201	sysctl_kern_clockrate, `0`, NULL,
202	sizeof(struct clockinfo),
203	CTL_KERN, KERN_CLOCKRATE, CTL_EOL);
204	sysctl_createv(&clog, `0`, NULL, NULL,
205	CTLFLAG_PERMANENT,
206	CTLTYPE_INT, "hardclock_ticks",
207	SYSCTL_DESCR("Number of hardclock ticks"),
208	NULL, `0`, &hardclock_ticks, sizeof(hardclock_ticks),
209	CTL_KERN, KERN_HARDCLOCK_TICKS, CTL_EOL);
210	}
211
212	/*
213	* The real-time timer, interrupting hz times per second.
214	*/
215	void
216	hardclock(struct clockframe *frame)
217	{
218	struct lwp *l;
219	struct cpu_info *ci;
220
221	ci = curcpu();
222	l = ci->ci_data.cpu_onproc;
223
224	timer_tick(l, CLKF_USERMODE(frame));
225
226	/*
227	* If no separate statistics clock is available, run it from here.
228	*/
229	if (stathz == `0`)
230	statclock(frame);
231	/*
232	* If no separate schedclock is provided, call it here
233	* at about 16 Hz.
234	*/
235	if (schedhz == `0`) {
236	if ((int)(--ci->ci_schedstate.spc_schedticks) <= `0`) {
237	schedclock(l);
238	ci->ci_schedstate.spc_schedticks = hardscheddiv;
239	}
240	}
241	if ((--ci->ci_schedstate.spc_ticks) <= `0`)
242	sched_tick(ci);
243
244	if (CPU_IS_PRIMARY(ci)) {
245	hardclock_ticks++;
246	tc_ticktock();
247	}
248
249	/*
250	* Update real-time timeout queue.
251	*/
252	callout_hardclock();
253
254	#ifdef KDTRACE_HOOKS
255	cyclic_clock_func_t func = cyclic_clock_func[cpu_index(ci)];
256	if (func) {
257	(func)((struct* clockframe *)frame);
258	}
259	#endif
260	}
261
262	/*
263	* Start profiling on a process.
264	*
265	* Kernel profiling passes proc0 which never exits and hence
266	* keeps the profile clock running constantly.
267	*/
268	void
269	startprofclock(struct proc *p)
270	{
271
272	KASSERT(mutex_owned(&p->p_stmutex));
273
274	if ((p->p_stflag & PST_PROFIL) == `0`) {
275	p->p_stflag \|= PST_PROFIL;
276	/*
277	* This is only necessary if using the clock as the
278	* profiling source.
279	*/
280	if (++profprocs == `1` && stathz != `0`)
281	psdiv = psratio;
282	}
283	}
284
285	/*
286	* Stop profiling on a process.
287	*/
288	void
289	stopprofclock(struct proc *p)
290	{
291
292	KASSERT(mutex_owned(&p->p_stmutex));
293
294	if (p->p_stflag & PST_PROFIL) {
295	p->p_stflag &= ~PST_PROFIL;
296	/*
297	* This is only necessary if using the clock as the
298	* profiling source.
299	*/
300	if (--profprocs == `0` && stathz != `0`)
301	psdiv = `1`;
302	}
303	}
304
305	#if defined(PERFCTRS)
306	/*
307	* Independent profiling "tick" in case we're using a separate
308	* clock or profiling event source. Currently, that's just
309	* performance counters--hence the wrapper.
310	*/
311	void
312	proftick(struct clockframe *frame)
313	{
314	#ifdef GPROF
315	struct gmonparam *g;
316	intptr_t i;
317	#endif
318	struct lwp *l;
319	struct proc *p;
320
321	l = curcpu()->ci_data.cpu_onproc;
322	p = (l ? l->l_proc : NULL);
323	if (CLKF_USERMODE(frame)) {
324	mutex_spin_enter(&p->p_stmutex);
325	if (p->p_stflag & PST_PROFIL)
326	addupc_intr(l, CLKF_PC(frame));
327	mutex_spin_exit(&p->p_stmutex);
328	} else {
329	#ifdef GPROF
330	g = &_gmonparam;
331	if (g->state == GMON_PROF_ON) {
332	i = CLKF_PC(frame) - g->lowpc;
333	if (i < g->textsize) {
334	i /= HISTFRACTION * sizeof(*g->kcount);
335	g->kcount[i]++;
336	}
337	}
338	#endif
339	#ifdef LWP_PC
340	if (p != NULL && (p->p_stflag & PST_PROFIL) != `0`)
341	addupc_intr(l, LWP_PC(l));
342	#endif
343	}
344	}
345	#endif
346
347	void
348	schedclock(struct lwp *l)
349	{
350	if ((l->l_flag & LW_IDLE) != `0`)
351	return;
352
353	sched_schedclock(l);
354	}
355
356	/*
357	* Statistics clock. Grab profile sample, and if divider reaches 0,
358	* do process and kernel statistics.
359	*/
360	void
361	statclock(struct clockframe *frame)
362	{
363	#ifdef GPROF
364	struct gmonparam *g;
365	intptr_t i;
366	#endif
367	struct cpu_info *ci = curcpu();
368	struct schedstate_percpu *spc = &ci->ci_schedstate;
369	struct proc *p;
370	struct lwp *l;
371
372	/*
373	* Notice changes in divisor frequency, and adjust clock
374	* frequency accordingly.
375	*/
376	if (spc->spc_psdiv != psdiv) {
377	spc->spc_psdiv = psdiv;
378	spc->spc_pscnt = psdiv;
379	if (psdiv == `1`) {
380	setstatclockrate(stathz);
381	} else {
382	setstatclockrate(profhz);
383	}
384	}
385	l = ci->ci_data.cpu_onproc;
386	if ((l->l_flag & LW_IDLE) != `0`) {
387	/*
388	* don't account idle lwps as swapper.
389	*/
390	p = NULL;
391	} else {
392	p = l->l_proc;
393	mutex_spin_enter(&p->p_stmutex);
394	}
395
396	if (CLKF_USERMODE(frame)) {
397	if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK)
398	addupc_intr(l, CLKF_PC(frame));
399	if (--spc->spc_pscnt > `0`) {
400	mutex_spin_exit(&p->p_stmutex);
401	return;
402	}
403
404	/*
405	* Came from user mode; CPU was in user state.
406	* If this process is being profiled record the tick.
407	*/
408	p->p_uticks++;
409	if (p->p_nice > NZERO)
410	spc->spc_cp_time[CP_NICE]++;
411	else
412	spc->spc_cp_time[CP_USER]++;
413	} else {
414	#ifdef GPROF
415	/*
416	* Kernel statistics are just like addupc_intr, only easier.
417	*/
418	g = &_gmonparam;
419	if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
420	i = CLKF_PC(frame) - g->lowpc;
421	if (i < g->textsize) {
422	i /= HISTFRACTION * sizeof(*g->kcount);
423	g->kcount[i]++;
424	}
425	}
426	#endif
427	#ifdef LWP_PC
428	if (p != NULL && profsrc == PROFSRC_CLOCK &&
429	(p->p_stflag & PST_PROFIL)) {
430	addupc_intr(l, LWP_PC(l));
431	}
432	#endif
433	if (--spc->spc_pscnt > `0`) {
434	if (p != NULL)
435	mutex_spin_exit(&p->p_stmutex);
436	return;
437	}
438	/*
439	* Came from kernel mode, so we were:
440	* - handling an interrupt,
441	* - doing syscall or trap work on behalf of the current
442	* user process, or
443	* - spinning in the idle loop.
444	* Whichever it is, charge the time as appropriate.
445	* Note that we charge interrupts to the current process,
446	* regardless of whether they are ``for'' that process,
447	* so that we know how much of its real time was spent
448	* in ``non-process'' (i.e., interrupt) work.
449	*/
450	if (CLKF_INTR(frame) \|\| (curlwp->l_pflag & LP_INTR) != `0`) {
451	if (p != NULL) {
452	p->p_iticks++;
453	}
454	spc->spc_cp_time[CP_INTR]++;
455	} else if (p != NULL) {
456	p->p_sticks++;
457	spc->spc_cp_time[CP_SYS]++;
458	} else {
459	spc->spc_cp_time[CP_IDLE]++;
460	}
461	}
462	spc->spc_pscnt = psdiv;
463
464	if (p != NULL) {
465	atomic_inc_uint(&l->l_cpticks);
466	mutex_spin_exit(&p->p_stmutex);
467	}
468	}
469
470	/*
471	* sysctl helper routine for kern.clockrate. Assembles a struct on
472	* the fly to be returned to the caller.
473	*/
474	static int
475	sysctl_kern_clockrate(SYSCTLFN_ARGS)
476	{
477	struct clockinfo clkinfo;
478	struct sysctlnode node;
479
480	clkinfo.tick = tick;
481	clkinfo.tickadj = tickadj;
482	clkinfo.hz = hz;
483	clkinfo.profhz = profhz;
484	clkinfo.stathz = stathz ? stathz : hz;
485
486	node = *rnode;
487	node.sysctl_data = &clkinfo;
488	return (sysctl_lookup(SYSCTLFN_CALL(&node)));
489	}
490

Browse the source code of src/src/sys/kern/kern_clock.c