27 #include <errno.h> |
27 #include <errno.h> |
28 #include <unistd.h> |
28 #include <unistd.h> |
29 #include <stdlib.h> |
29 #include <stdlib.h> |
30 #include <limits.h> |
30 #include <limits.h> |
31 #include <string.h> |
31 #include <string.h> |
|
32 #include <getopt.h> |
32 #include <libgen.h> |
33 #include <libgen.h> |
33 #include <syslog.h> |
34 #include <syslog.h> |
34 #include <kstat.h> |
35 #include <kstat.h> |
35 #include <sys/processor.h> |
36 #include <sys/processor.h> |
|
37 #include <sys/modhash.h> |
36 |
38 |
37 #include "intrs.h" |
39 #include "intrs.h" |
38 |
40 |
|
41 /* Interrupt vector info */ |
39 typedef struct ivec { |
42 typedef struct ivec { |
40 int cookie; |
43 int cookie; |
41 hrtime_t time; |
44 uint64_t time; |
42 hrtime_t crtime; |
45 hrtime_t crtime; |
43 int pil; |
46 int pil; |
44 int ino; |
47 int ino; |
45 int ihs; |
48 int ihs; |
46 int num_ino; |
49 int num_ino; |
47 int origcpu; |
50 int origcpu; |
48 int nowcpu; |
51 int nowcpu; |
49 int inum; |
52 int inum; |
50 } ivec_t; |
53 } ivec_t; |
51 |
54 |
52 typedef struct bus_stat { |
55 uu_list_pool_t *ivec_pool; |
53 bus_stat_t *next; |
56 |
|
57 /* MSI device info */ |
|
58 typedef struct msi_dev { |
|
59 msi_dev_t *next; |
|
60 char *devpath[MAXPATHLEN]; |
|
61 int num_intr; |
|
62 *ivec_t ivecs; |
|
63 } msi_dev_t; |
|
64 |
|
65 uu_list_pool_t *msi_dev_pool; |
|
66 |
|
67 /* Bus info */ |
|
68 typedef struct bus_dev { |
|
69 bus_dev_t *next; |
54 char *buspath[MAXPATHLEN]; |
70 char *buspath[MAXPATHLEN]; |
55 int num_intr; |
71 int num_intr; |
56 ivec_t *ivecs; |
72 ivec_t *ivecs; |
57 } bus_stat_t; |
73 int is_pcplusmp; |
58 |
74 msi_dev_t *msi_head; |
|
75 } bus_dev_t; |
|
76 |
|
77 uu_list_pool_t *bus_dev_pool; |
|
78 |
|
79 /* Per-CPU statistics */ |
59 typedef struct cpu_stat { |
80 typedef struct cpu_stat { |
60 int state; |
81 int state; |
61 uint64_t tot; |
82 uint64_t tot; |
62 hrtime_t crtime; |
83 hrtime_t crtime; |
63 bus_stat_t *bus_stats; |
84 bus_dev_t *bus_head; |
64 } cpu_stat_t; |
85 } cpu_stat_t; |
65 |
86 |
|
87 uu_list_pool_t *cpu_stat_pool; |
|
88 |
|
89 /* Interrupt statistics */ |
66 type def struct intr_stat { |
90 type def struct intr_stat { |
67 double snaptime; |
91 hrtime_t snaptime; |
68 *cpu_stat_t *cpus; |
92 *cpu_stat_t *cpus; |
69 } intr_stat_t; |
93 } intr_stat_t; |
70 |
94 |
|
95 uu_list_pool_t *intr_stat_pool; |
|
96 |
71 typedef enum sleeptime { |
97 typedef enum sleeptime { |
72 NORMAL_SLEEPTIME = 10, /* time to sleep between samples */ |
98 NORMAL_SLEEPTIME = 10, /* time to sleep between samples */ |
73 IDLE_SLEEPTIME = 45, /* time to sleep when idle */ |
99 IDLE_SLEEPTIME = 45, /* time to sleep when idle */ |
74 ONECPU_SLEEPTIME = 60 * 15, /* used if only 1 CPU on system */ |
100 ONECPU_SLEEPTIME = 60 * 15, /* used if only 1 CPU on system */ |
75 } sleeptime_t; |
101 } sleeptime_t; |
76 |
102 |
77 int using_scengen; /* 1 if using scenario simulator */ |
103 int using_scengen; /* 1 if using scenario simulator */ |
78 int debug; |
104 int debug; |
79 int foreground; |
105 int foreground; |
80 |
106 |
81 int max_cpus; |
107 int max_cpus; |
82 |
108 |
83 sleeptime_t sleeptime = NORMAL_SLEEPTIME; /* either normal_ or idle_ or onecpu_ */ |
109 sleeptime_t sleeptime = NORMAL_SLEEPTIME; |
84 |
110 |
85 float idle_intrload = 0.1; /* idle if interrupt load < 10% */ |
111 float idle_intrload = 0.1; /* idle if interrupt load < 10% */ |
86 |
112 |
87 float timerange_toohi = 0.1; |
113 float timerange_toohi = 0.1; |
88 int statslen = 60; /* time period (in secs) to keep in @deltas */ |
114 int statslen = 60; /* time period (in secs) to keep in @deltas */ |
89 |
115 |
90 int main(int argc, char **argv) |
116 int main(int argc, char **argv) |
91 { |
117 { |
92 const char *cmdname; |
118 const char *cmdname; |
93 kstat_ctl_t *kc; |
119 kstat_ctl_t *kc; |
94 kstat_t *ksp; |
120 kstat_t *ksp; |
95 intr_stat_t stat; |
121 intr_stat_t stat; |
|
122 char c; |
96 |
123 |
97 max_cpus = sysconf(_SC_CPUID_MAX) + 1; |
124 max_cpus = sysconf(_SC_CPUID_MAX) + 1; |
98 |
125 |
99 cmdname = basename(argv[0]); |
126 cmdname = basename(argv[0]); |
100 /* |
127 /* |
101 * Parse arguments. intrd does not accept any public arguments; the two |
128 * Parse arguments. intrd does not accept any public arguments; the two |
102 * arguments below are meant for testing purposes. -D generates a significant |
129 * arguments below are meant for testing purposes. -D generates a significant |
103 * amount of syslog output. -S <filename> loads the filename as a perl |
130 * amount of syslog output. -S <filename> loads the filename as a perl |
104 * script. That file is expected to implement a kstat "simulator" which |
131 * script. That file is expected to implement a kstat "simulator" which |
105 * can be used to feed information to intrd and verify intrd's responses. |
132 * can be used to feed information to intrd and verify intrd's responses. |
106 */ |
133 */ |
107 for (; --argc > 0; ++argv) { |
134 while ((c = getopt(argc, argv, "S:Df")) != EOF) { |
108 if (argv[1][0] != '-' || argv[1][1] == '\0' || |
135 switch (c) { |
109 argv[1][2] != '\0') { |
136 case 'S': |
110 continue; |
137 using_scengen = 1; |
111 } |
138 foreground = 1; |
112 |
139 load_simulator(optarg); |
113 switch (argv[1][1]) { |
140 break; |
114 case 'S': |
141 case 'D': |
115 using_scengen = 1; |
142 debug = 1; |
116 foreground = 1; |
143 break; |
117 if (argc > 1) { |
144 case 'f': |
118 --argc; |
145 foreground = 1; |
119 load_simulator(++argv[1]); |
146 break; |
120 } |
147 default: |
121 break; |
|
122 case 'D': |
|
123 debug = 1; |
|
124 break; |
|
125 case 'f': |
|
126 foreground = 1; |
|
127 break; |
|
128 default: |
|
129 } |
148 } |
130 } |
149 } |
131 |
150 |
132 if (!foreground) { |
151 if (!foreground) { |
133 if (daemon(0, 0) == -1) { |
152 if (daemon(0, 0) == -1) { |
153 /* |
172 /* |
154 * If no pci_intrs kstats were found, we need to exit, but we can't because |
173 * If no pci_intrs kstats were found, we need to exit, but we can't because |
155 * SMF will restart us and/or report an error to the administrator. But |
174 * SMF will restart us and/or report an error to the administrator. But |
156 * there's nothing an administrator can do. So print out a message to syslog |
175 * there's nothing an administrator can do. So print out a message to syslog |
157 * and silently pause forever. |
176 * and silently pause forever. |
158 */ |
177 */ |
159 for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) { |
178 for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) { |
160 if ((ksp->ks_type == KSTAT_TYPE_NAMED) && |
179 if ((ksp->ks_type == KSTAT_TYPE_NAMED) && |
161 !strcmp(ksp->ks_module, "pci_intrs")) { |
180 (strcmp(ksp->ks_module, "pci_intrs") == 0)) { |
162 break; |
181 break; |
163 } |
182 } |
164 } |
183 } |
165 if (ksp == NULL) { |
184 if (ksp == NULL) { |
166 kstat_close(kc); |
185 kstat_close(kc); |
167 syslog(LOG_INFO, "no interrupts were found: " \ |
186 syslog(LOG_INFO, "no interrupts were found: " \ |
168 "your I/O bus may not yet be supported\n"); |
187 "your I/O bus may not yet be supported\n"); |
169 do {} while (!sleep(ONECPU_SLEEPTIME)); |
188 do {} while (sleep(ONECPU_SLEEPTIME) == 0); |
170 return 0; |
189 return 0; |
171 } |
190 } |
172 |
191 |
173 if ((stat.cpus = malloc(sizeof(cpu_stat_t) * max_cpus)) == NULL) { |
192 stat.cpus = malloc(sizeof (cpu_stat_t) * max_cpus); |
|
193 if (stat.cpus == NULL) { |
174 return 1; |
194 return 1; |
175 } |
195 } |
176 } |
196 } |
177 |
197 |
178 |
198 |
208 int do_reconfig_cpu($$$); |
228 int do_reconfig_cpu($$$); |
209 */ |
229 */ |
210 |
230 |
211 |
231 |
212 /* |
232 /* |
213 # |
233 * |
214 * What follow are the basic data structures routines of intrd. |
234 * What follow are the basic data structures routines of intrd. |
215 # |
235 * |
216 * getstat() is responsible for reading the kstats and generating a "stat" hash. |
236 * getstat() is responsible for reading the kstats and generating a "stat" hash. |
217 # |
237 * |
218 * generate_delta() is responsible for taking two "stat" hashes and creating |
238 * generate_delta() is responsible for taking two "stat" hashes and creating |
219 * a new "delta" hash that represents what has changed over time. |
239 * a new "delta" hash that represents what has changed over time. |
220 # |
240 * |
221 * compress_deltas() is responsible for taking a list of deltas and generating |
241 * compress_deltas() is responsible for taking a list of deltas and generating |
222 * a single delta hash that encompasses all the time periods described by the |
242 * a single delta hash that encompasses all the time periods described by the |
223 * deltas. |
243 * deltas. |
224 */ |
244 */ |
225 |
245 |
226 |
246 |
227 /* |
247 /* |
228 # |
248 * |
229 * getstat() is handed a reference to a kstat and generates a hash, returned |
249 * getstat() is handed a reference to a kstat and generates a hash, returned |
230 * by reference, containing all the fields from the kstats which we need. |
250 * by reference, containing all the fields from the kstats which we need. |
231 * If it returns the scalar 0, it failed to gather the kstats, and the caller |
251 * If it returns the scalar 0, it failed to gather the kstats, and the caller |
232 * should react accordingly. |
252 * should react accordingly. |
233 # |
253 * |
234 * getstat() is also responsible for maintaining a reasonable $sleeptime. |
254 * getstat() is also responsible for maintaining a reasonable $sleeptime. |
235 # |
255 * |
236 * {"snaptime"} kstat's snaptime |
256 * {"snaptime"} kstat's snaptime |
237 * {<cpuid>} one hash reference per online cpu |
257 * {<cpuid>} one hash reference per online cpu |
238 * ->{"tot"} == cpu:<cpuid>:sys:cpu_nsec_{user + kernel + idle} |
258 * ->{"tot"} == cpu:<cpuid>:sys:cpu_nsec_{user + kernel + idle} |
239 * ->{"crtime"} == cpu:<cpuid>:sys:crtime |
259 * ->{"crtime"} == cpu:<cpuid>:sys:crtime |
240 * ->{"ivecs"} |
260 * ->{"ivecs"} |
247 * Will be > 1 on pcplusmp X86 systems for devices |
267 * Will be > 1 on pcplusmp X86 systems for devices |
248 * with multiple MSI interrupts. |
268 * with multiple MSI interrupts. |
249 * ->{"buspath"} == pci_intrs:<ivec#>:<nexus>:buspath |
269 * ->{"buspath"} == pci_intrs:<ivec#>:<nexus>:buspath |
250 * ->{"name"} == pci_intrs:<ivec#>:<nexus>:name |
270 * ->{"name"} == pci_intrs:<ivec#>:<nexus>:name |
251 * ->{"ihs"} == pci_intrs:<ivec#>:<nexus>:ihs |
271 * ->{"ihs"} == pci_intrs:<ivec#>:<nexus>:ihs |
252 # |
272 * |
253 */ |
273 */ |
254 |
274 |
255 int getstat(kstat_ctl_t *kc, intr_stat_t *stat) |
275 int getstat(kstat_ctl_t *kc, intr_stat_t *stat) |
256 { |
276 { |
257 int cpucnt = 0; |
277 int cpucnt = 0; |
258 kstat_t *ksp; |
278 kstat_t *ksp; |
259 double minsnap, maxsnap; |
279 hrtime_t minsnap, maxsnap; |
260 |
280 |
261 /* Hash of hash which matches (MSI device, ino) combos to kstats. */ |
281 /* Hash of hash which matches (MSI device, ino) combos to kstats. */ |
262 int *msidevs; |
282 msi_dev_t *msidevs; |
263 |
283 |
264 /* |
284 /* |
265 * kstats are not generated atomically. Each kstat hierarchy will |
285 * kstats are not generated atomically. Each kstat hierarchy will |
266 * have been generated within the kernel at a different time. On a |
286 * have been generated within the kernel at a different time. On a |
267 * thrashing system, we may not run quickly enough in order to get |
287 * thrashing system, we may not run quickly enough in order to get |
284 * |
304 * |
285 * record cpu:<cpuid>:sys:snaptime, and check $minsnap/$maxsnap. |
305 * record cpu:<cpuid>:sys:snaptime, and check $minsnap/$maxsnap. |
286 */ |
306 */ |
287 |
307 |
288 cpu_stats = stat->cpus; |
308 cpu_stats = stat->cpus; |
289 bzero(cpu_stats, sizeof(cpu_stat_t) * max_cpus); |
309 bzero(cpu_stats, sizeof (cpu_stat_t) * max_cpus); |
290 |
310 |
291 for (ksp = kc->kc_chain; ksp != null; ksp = ksp->ks_next) { |
311 for (ksp = kc->kc_chain; ksp != null; ksp = ksp->ks_next) { |
292 kstat_t *ksp_sys; |
312 kstat_t *ksp_sys; |
293 kstat_named_t *knp; |
313 kstat_named_t *knp; |
294 int cpu; |
314 int cpu; |
295 double snaptime; |
315 hrtime_t snaptime; |
296 |
316 |
297 if ((ksp->ks_type != kstat_type_named) || |
317 if ((ksp->ks_type != kstat_type_named) || |
298 strcmp(ksp->ks_module, "cpu_info") || |
318 strcmp(ksp->ks_module, "cpu_info") || |
299 (kstat_read(kc, ksp) == -1)) { |
319 (kstat_read(kc, ksp) == -1)) { |
300 continue; |
320 continue; |
301 } |
321 } |
302 knp = kstat_data_lookup(ksp, "state"); |
322 knp = kstat_data_lookup(ksp, "state"); |
303 if ((knp == NULL) || strcmp(knp->name, PS_ONLINE)) { |
323 if ((knp == NULL) || strcmp(knp->name, PS_ONLINE) || |
304 continue; |
324 ((cpu = ksp->ks_instance) >= max_cpus)) { |
305 } |
325 continue; |
306 cpu = ksp->ks_instance; |
326 } |
307 ksp_sys = kstat_lookup(kc, "cpu", cpu, "sys"); |
327 ksp_sys = kstat_lookup(kc, "cpu", cpu, "sys"); |
308 if ((ksp_sys == NULL) || (kstat_read(kc, ksp_sys, NULL) == -1)) { |
328 if ((ksp_sys == NULL) || (kstat_read(kc, ksp_sys) == -1)) { |
309 continue; |
329 continue; |
310 } |
330 } |
311 cpu_stats[cpu].state = P_ONLINE; |
331 cpu_stats[cpu].state = P_ONLINE; |
312 knp = ksp_sys->ks_data; |
332 knp = ksp_sys->ks_data; |
313 for (i = 0; i < ksp_sys->ks_ndata; i++) { |
333 for (i = 0; i < ksp_sys->ks_ndata; i++) { |
314 if (!strcmp(knp[i].name, "cpu_nsec_idle") || |
334 if ((strcmp(knp[i].name, "cpu_nsec_idle") == 0) || |
315 !strcmp(knp[i].name, "cpu_nsec_user") || |
335 (strcmp(knp[i].name, "cpu_nsec_user") == 0) || |
316 !strcmp(knp[i].name, "cpu_nsec_kernel")) { |
336 (strcmp(knp[i].name, "cpu_nsec_kernel") == 0)) { |
317 cpu_stats[cpu].tot += knp[i].value.ui64; |
337 cpu_stats[cpu].tot += knp[i].value.ui64; |
318 } |
338 } |
319 cpu_stats[cpu].crtime = ksp_sys->crtime; |
339 cpu_stats[cpu].crtime = ksp_sys->crtime; |
320 snaptime = ksp_sys->snaptime; |
340 snaptime = ksp_sys->snaptime; |
321 if (minsnap == -1 || snaptime < minsnap) { |
341 if (minsnap == -1 || snaptime < minsnap) { |
342 */ |
362 */ |
343 |
363 |
344 for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) { |
364 for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) { |
345 kstat_named_t *knp; |
365 kstat_named_t *knp; |
346 int cpu; |
366 int cpu; |
347 double snaptime; |
367 int ino; |
|
368 cpu_stat_t *cpup; |
|
369 bus_dev_t *busp; |
|
370 bus_dev_t *bus_last; |
|
371 ivec_t *ivecp; |
|
372 hrtime_t snaptime; |
348 |
373 |
349 if ((ksp->ks_type != KSTAT_TYPE_NAMED) || |
374 if ((ksp->ks_type != KSTAT_TYPE_NAMED) || |
350 strcmp(ksp->ks_module, "pci_intrs") || |
375 strcmp(ksp->ks_module, "pci_intrs") || |
351 (kstat_read(kc, ksp) == -1)) { |
376 (kstat_read(kc, ksp) == -1)) { |
352 continue; |
377 continue; |
353 } |
378 } |
354 knp = kstat_data_lookup(ksp, "cpu"); |
379 knp = kstat_data_lookup(ksp, "cpu"); |
355 if ((knp == NULL) || ((cpu = knp->value.ui32) > max_cpus) || |
380 if ((knp == NULL) || ((cpu = knp->value.ui32) >= max_cpus) || |
356 (cpu_stats[cpu].state != P_ONLINE)) { |
381 (cpu_stats[cpu].state != P_ONLINE)) { |
357 continue; |
382 continue; |
358 } |
383 } |
|
384 cpup = &cpu_stats[cpu]; |
359 knp = kstat_data_lookup(ksp, "type"); |
385 knp = kstat_data_lookup(ksp, "type"); |
360 if ((knp == NULL) || strcmp(knp->value.c, "disabled")) { |
386 if ((knp == NULL) || strcmp(knp->value.c, "disabled")) { |
361 continue; |
387 continue; |
362 } |
388 } |
363 knp = kstat_data_lookup(ksp, "buspath"); |
389 knp = kstat_data_lookup(ksp, "buspath"); |
364 if (knp == NULL) { |
390 if (knp == NULL) { |
365 continue; |
391 continue; |
366 } |
392 } |
|
393 |
|
394 for (bus_last = NULL, busp = cpup->bus_head; busp != NULL; |
|
395 bus_last = busp, busp = busp->next) { |
|
396 if (strcmp(knp->value.c, busp->buspath) == 0) { |
|
397 break; |
|
398 } |
|
399 } |
|
400 |
|
401 if (busp == NULL) { |
|
402 busp = malloc(sizeof (bus_dev_t)); |
|
403 if (busp == NULL) { |
|
404 return -1; |
|
405 } |
|
406 |
|
407 busp->next = NULL; |
|
408 |
|
409 strlcpy(busp->buspath, knp->value.c, MAXPATHLEN); |
|
410 busp->is_pcplusmp = |
|
411 intrinfo(busp->buspath, &(busp->num_intr)); |
|
412 |
|
413 busp->ivecs = malloc(sizeof (ivec_t) * busp->num_intr); |
|
414 if (busp->ivecs == NULL) { |
|
415 free(busp); |
|
416 return -1; |
|
417 } |
|
418 bzero(busp->ivecs, sizeof (ivec_t) * busp->num_intr); |
|
419 |
|
420 if (bus_last == NULL) { |
|
421 cpup->bus_head = busp; |
|
422 } else { |
|
423 bus_last->next = busp; |
|
424 } |
|
425 } |
|
426 knp = kstat_data_lookup(ksp, "ino"); |
|
427 if ((knp == NULL) || |
|
428 ((ino = knp->value.ui32) >= busp->num_intr)) { |
|
429 continue; |
|
430 } |
|
431 ivecp = &(busp->ivecs[ino]); |
|
432 |
|
433 knp = kstat_data_lookup(ksp, "time"); |
|
434 if (knp == NULL) { |
|
435 continue; |
|
436 } |
|
437 ivecp->time = knp->value.ui64; |
|
438 |
|
439 if (busp->is_pcplusmp) { |
|
440 knp = kstat_data_lookup(ksp, "type"); |
|
441 if (knp == NULL) { |
|
442 continue; |
|
443 } |
|
444 if (strcmp(knp->value.c, "msi") == 0) { |
|
445 for (msi_last = NULL, msip = busp->msi_head; |
|
446 msip != NULL; |
|
447 msi_last = msip, msip = msip->next) { |
|
448 if (ivecp->cookie == msip->cookie) { |
|
449 break; |
|
450 } |
|
451 } |
|
452 |
|
453 } |
|
454 |
|
455 ivecp->num_ino = 1; |
|
456 ivecp->crtime = ksp->crtime; |
367 snaptime = ksp->snaptime; |
457 snaptime = ksp->snaptime; |
368 if (minsnap == -1 || snaptime < minsnap) { |
458 if (minsnap == -1 || snaptime < minsnap) { |
369 minsnap = snaptime; |
459 minsnap = snaptime; |
370 } |
460 } |
371 if (snaptime > maxsnap) { |
461 if (snaptime > maxsnap) { |