intrd.c
changeset 4 a42e422f55c0
parent 3 380ada8fd621
equal deleted inserted replaced
3:380ada8fd621 4:a42e422f55c0
    27 #include <errno.h>
    27 #include <errno.h>
    28 #include <unistd.h>
    28 #include <unistd.h>
    29 #include <stdlib.h>
    29 #include <stdlib.h>
    30 #include <limits.h>
    30 #include <limits.h>
    31 #include <string.h>
    31 #include <string.h>
       
    32 #include <getopt.h>
    32 #include <libgen.h>
    33 #include <libgen.h>
    33 #include <syslog.h>
    34 #include <syslog.h>
    34 #include <kstat.h>
    35 #include <kstat.h>
    35 #include <sys/processor.h>
    36 #include <sys/processor.h>
       
    37 #include <sys/modhash.h>
    36 
    38 
    37 #include "intrs.h"
    39 #include "intrs.h"
    38 
    40 
       
    41 /* Interrupt vector info */
    39 typedef struct ivec {
    42 typedef struct ivec {
    40 	int cookie;
    43 	int cookie;
    41 	hrtime_t time;
    44 	uint64_t time;
    42 	hrtime_t crtime;
    45 	hrtime_t crtime;
    43 	int pil;
    46 	int pil;
    44 	int ino;
    47 	int ino;
    45 	int ihs;
    48 	int ihs;
    46 	int num_ino;
    49 	int num_ino;
    47 	int origcpu;
    50 	int origcpu;
    48 	int nowcpu;
    51 	int nowcpu;
    49 	int inum;
    52 	int inum;
    50 } ivec_t;
    53 } ivec_t;
    51 
    54 
    52 typedef struct bus_stat {
    55 uu_list_pool_t *ivec_pool;
    53 	bus_stat_t *next;
    56 
       
    57 /* MSI device info */
       
    58 typedef struct msi_dev {
       
    59 	msi_dev_t *next;
       
    60 	char *devpath[MAXPATHLEN];
       
    61 	int num_intr;
       
    62 	*ivec_t ivecs;
       
    63 } msi_dev_t;
       
    64 
       
    65 uu_list_pool_t *msi_dev_pool;
       
    66 
       
    67 /* Bus info */
       
    68 typedef struct bus_dev {
       
    69 	bus_dev_t *next;
    54 	char *buspath[MAXPATHLEN];
    70 	char *buspath[MAXPATHLEN];
    55 	int num_intr;
    71 	int num_intr;
    56 	ivec_t *ivecs;
    72 	ivec_t *ivecs;
    57 } bus_stat_t;
    73 	int is_pcplusmp;
    58 
    74 	msi_dev_t *msi_head;
       
    75 } bus_dev_t;
       
    76 
       
    77 uu_list_pool_t *bus_dev_pool;
       
    78 
       
    79 /* Per-CPU statistics */
    59 typedef struct cpu_stat {
    80 typedef struct cpu_stat {
    60 	int state;
    81 	int state;
    61 	uint64_t tot;
    82 	uint64_t tot;
    62 	hrtime_t crtime;
    83 	hrtime_t crtime;
    63 	bus_stat_t *bus_stats;
    84 	bus_dev_t *bus_head;
    64 } cpu_stat_t;
    85 } cpu_stat_t;
    65 
    86 
       
    87 uu_list_pool_t *cpu_stat_pool;
       
    88 
       
    89 /* Interrupt statistics */
    66 type def struct intr_stat {
    90 type def struct intr_stat {
    67 	double snaptime;
    91 	hrtime_t snaptime;
    68 	*cpu_stat_t *cpus;
    92 	*cpu_stat_t *cpus;
    69 } intr_stat_t;
    93 } intr_stat_t;
    70 
    94 
       
    95 uu_list_pool_t *intr_stat_pool;
       
    96 
    71 typedef enum sleeptime {
    97 typedef enum sleeptime {
    72 	NORMAL_SLEEPTIME = 10,			/* time to sleep between samples */
    98 	NORMAL_SLEEPTIME = 10,		/* time to sleep between samples */
    73 	IDLE_SLEEPTIME = 45,			/* time to sleep when idle */
    99 	IDLE_SLEEPTIME = 45,		/* time to sleep when idle */
    74 	ONECPU_SLEEPTIME = 60 * 15,		/* used if only 1 CPU on system */
   100 	ONECPU_SLEEPTIME = 60 * 15,	/* used if only 1 CPU on system */
    75 } sleeptime_t;
   101 } sleeptime_t;
    76 
   102 
    77 int using_scengen;	/* 1 if using scenario simulator */
   103 int using_scengen;			/* 1 if using scenario simulator */
    78 int debug;
   104 int debug;
    79 int foreground;
   105 int foreground;
    80 
   106 
    81 int max_cpus;
   107 int max_cpus;
    82 
   108 
    83 sleeptime_t sleeptime = NORMAL_SLEEPTIME;	/* either normal_ or idle_ or onecpu_ */
   109 sleeptime_t sleeptime = NORMAL_SLEEPTIME;
    84 
   110 
    85 float idle_intrload = 0.1; 			/*  idle if interrupt load < 10% */
   111 float idle_intrload = 0.1; 		/*  idle if interrupt load < 10% */
    86 
   112 
    87 float timerange_toohi = 0.1;
   113 float timerange_toohi = 0.1;
    88 int statslen = 60;	/* time period (in secs) to keep in @deltas */
   114 int statslen = 60;	/* time period (in secs) to keep in @deltas */
    89 
   115 
    90 int main(int argc, char **argv)
   116 int main(int argc, char **argv)
    91 {
   117 {
    92 	const char *cmdname;
   118 	const char *cmdname;
    93 	kstat_ctl_t *kc;
   119 	kstat_ctl_t *kc;
    94 	kstat_t *ksp;
   120 	kstat_t *ksp;
    95 	intr_stat_t stat;
   121 	intr_stat_t stat;
       
   122 	char c;
    96 
   123 
    97 	max_cpus = sysconf(_SC_CPUID_MAX) + 1;
   124 	max_cpus = sysconf(_SC_CPUID_MAX) + 1;
    98 
   125 
    99 	cmdname = basename(argv[0]);
   126 	cmdname = basename(argv[0]);
   100 /*
   127 /*
   101  * Parse arguments. intrd does not accept any public arguments; the two
   128  * Parse arguments. intrd does not accept any public arguments; the two
   102  * arguments below are meant for testing purposes. -D generates a significant
   129  * arguments below are meant for testing purposes. -D generates a significant
   103  * amount of syslog output. -S <filename> loads the filename as a perl
   130  * amount of syslog output. -S <filename> loads the filename as a perl
   104  * script. That file is expected to implement a kstat "simulator" which
   131  * script. That file is expected to implement a kstat "simulator" which
   105  * can be used to feed information to intrd and verify intrd's responses.
   132  * can be used to feed information to intrd and verify intrd's responses.
   106 */
   133  */
   107 	for (; --argc > 0; ++argv) {
   134  	while ((c = getopt(argc, argv, "S:Df")) != EOF) {
   108 		if (argv[1][0] != '-' || argv[1][1] == '\0' ||
   135 		switch (c) {
   109 		    argv[1][2] != '\0') {
   136 		case 'S':
   110 			continue;
   137 			using_scengen = 1;
   111 		}
   138 			foreground = 1;
   112 
   139 			load_simulator(optarg);
   113 		switch (argv[1][1]) {
   140 			break;
   114 			case 'S':
   141 		case 'D':
   115 				using_scengen = 1;
   142 			debug = 1;
   116 				foreground = 1;
   143 			break;
   117 				if (argc > 1) {
   144 		case 'f':
   118 					--argc;
   145 			foreground = 1;
   119 					load_simulator(++argv[1]);
   146 			break;
   120 				}
   147 		default:
   121 				break;
       
   122 			case 'D':
       
   123 				debug = 1;
       
   124 				break;
       
   125 			case 'f':
       
   126 				foreground = 1;
       
   127 				break;
       
   128 			default:
       
   129 		}
   148 		}
   130 	}
   149 	}
   131 
   150 
   132 	if (!foreground) {
   151 	if (!foreground) {
   133 		if (daemon(0, 0) == -1) {
   152 		if (daemon(0, 0) == -1) {
   153 /*
   172 /*
   154  * If no pci_intrs kstats were found, we need to exit, but we can't because
   173  * If no pci_intrs kstats were found, we need to exit, but we can't because
   155  * SMF will restart us and/or report an error to the administrator. But
   174  * SMF will restart us and/or report an error to the administrator. But
   156  * there's nothing an administrator can do. So print out a message to syslog
   175  * there's nothing an administrator can do. So print out a message to syslog
   157  * and silently pause forever.
   176  * and silently pause forever.
   158 */
   177  */
   159 	for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
   178 	for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
   160 		if ((ksp->ks_type == KSTAT_TYPE_NAMED) &&
   179 		if ((ksp->ks_type == KSTAT_TYPE_NAMED) &&
   161 		    !strcmp(ksp->ks_module, "pci_intrs")) {
   180 		    (strcmp(ksp->ks_module, "pci_intrs") == 0)) {
   162 		    break;
   181 		    break;
   163 		}
   182 		}
   164 	}
   183 	}
   165 	if (ksp == NULL) {
   184 	if (ksp == NULL) {
   166 		kstat_close(kc);
   185 		kstat_close(kc);
   167 		syslog(LOG_INFO, "no interrupts were found: " \
   186 		syslog(LOG_INFO, "no interrupts were found: " \
   168 			"your I/O bus may not yet be supported\n");
   187 			"your I/O bus may not yet be supported\n");
   169 		do {} while (!sleep(ONECPU_SLEEPTIME));
   188 		do {} while (sleep(ONECPU_SLEEPTIME) == 0);
   170 		return 0;
   189 		return 0;
   171 	}
   190 	}
   172 
   191 
   173 	if ((stat.cpus = malloc(sizeof(cpu_stat_t) * max_cpus)) == NULL) {
   192 	stat.cpus = malloc(sizeof (cpu_stat_t) * max_cpus);
       
   193 	if (stat.cpus == NULL) {
   174 		return 1;
   194 		return 1;
   175 	}
   195 	}
   176 }
   196 }
   177 
   197 
   178 
   198 
   208 int do_reconfig_cpu($$$);	
   228 int do_reconfig_cpu($$$);	
   209 */
   229 */
   210 
   230 
   211 
   231 
   212 /*
   232 /*
   213 #
   233  *
   214  * What follow are the basic data structures routines of intrd.
   234  * What follow are the basic data structures routines of intrd.
   215 #
   235  *
   216  * getstat() is responsible for reading the kstats and generating a "stat" hash.
   236  * getstat() is responsible for reading the kstats and generating a "stat" hash.
   217 #
   237  *
   218  * generate_delta() is responsible for taking two "stat" hashes and creating
   238  * generate_delta() is responsible for taking two "stat" hashes and creating
   219  * a new "delta" hash that represents what has changed over time.
   239  * a new "delta" hash that represents what has changed over time.
   220 #
   240  *
   221  * compress_deltas() is responsible for taking a list of deltas and generating
   241  * compress_deltas() is responsible for taking a list of deltas and generating
   222  * a single delta hash that encompasses all the time periods described by the
   242  * a single delta hash that encompasses all the time periods described by the
   223  * deltas.
   243  * deltas.
   224 */
   244 */
   225 
   245 
   226 
   246 
   227 /*
   247 /*
   228 #
   248  *
   229  * getstat() is handed a reference to a kstat and generates a hash, returned
   249  * getstat() is handed a reference to a kstat and generates a hash, returned
   230  * by reference, containing all the fields from the kstats which we need.
   250  * by reference, containing all the fields from the kstats which we need.
   231  * If it returns the scalar 0, it failed to gather the kstats, and the caller
   251  * If it returns the scalar 0, it failed to gather the kstats, and the caller
   232  * should react accordingly.
   252  * should react accordingly.
   233 #
   253  *
   234  * getstat() is also responsible for maintaining a reasonable $sleeptime.
   254  * getstat() is also responsible for maintaining a reasonable $sleeptime.
   235 #
   255  *
   236  * {"snaptime"}          kstat's snaptime
   256  * {"snaptime"}          kstat's snaptime
   237  * {<cpuid>}             one hash reference per online cpu
   257  * {<cpuid>}             one hash reference per online cpu
   238  *  ->{"tot"}            == cpu:<cpuid>:sys:cpu_nsec_{user + kernel + idle}
   258  *  ->{"tot"}            == cpu:<cpuid>:sys:cpu_nsec_{user + kernel + idle}
   239  *  ->{"crtime"}         == cpu:<cpuid>:sys:crtime
   259  *  ->{"crtime"}         == cpu:<cpuid>:sys:crtime
   240  *  ->{"ivecs"}
   260  *  ->{"ivecs"}
   247  *				Will be > 1 on pcplusmp X86 systems for devices
   267  *				Will be > 1 on pcplusmp X86 systems for devices
   248  *				with multiple MSI interrupts.
   268  *				with multiple MSI interrupts.
   249  *        ->{"buspath"}  == pci_intrs:<ivec#>:<nexus>:buspath
   269  *        ->{"buspath"}  == pci_intrs:<ivec#>:<nexus>:buspath
   250  *        ->{"name"}     == pci_intrs:<ivec#>:<nexus>:name
   270  *        ->{"name"}     == pci_intrs:<ivec#>:<nexus>:name
   251  *        ->{"ihs"}      == pci_intrs:<ivec#>:<nexus>:ihs
   271  *        ->{"ihs"}      == pci_intrs:<ivec#>:<nexus>:ihs
   252 #
   272  *
   253 */
   273 */
   254 
   274 
   255 int getstat(kstat_ctl_t *kc, intr_stat_t *stat)
   275 int getstat(kstat_ctl_t *kc, intr_stat_t *stat)
   256 {
   276 {
   257 	int cpucnt = 0;
   277 	int cpucnt = 0;
   258 	kstat_t *ksp;
   278 	kstat_t *ksp;
   259 	double minsnap, maxsnap;
   279 	hrtime_t minsnap, maxsnap;
   260 
   280 
   261 	/* Hash of hash which matches (MSI device, ino) combos to kstats. */
   281 	/* Hash of hash which matches (MSI device, ino) combos to kstats. */
   262 	int *msidevs;
   282 	msi_dev_t *msidevs;
   263 
   283 
   264 	/*
   284 	/*
   265 	 * kstats are not generated atomically. Each kstat hierarchy will
   285 	 * kstats are not generated atomically. Each kstat hierarchy will
   266 	 * have been generated within the kernel at a different time. On a
   286 	 * have been generated within the kernel at a different time. On a
   267 	 * thrashing system, we may not run quickly enough in order to get
   287 	 * thrashing system, we may not run quickly enough in order to get
   284 	 *
   304 	 *
   285 	 * record cpu:<cpuid>:sys:snaptime, and check $minsnap/$maxsnap.
   305 	 * record cpu:<cpuid>:sys:snaptime, and check $minsnap/$maxsnap.
   286 	 */
   306 	 */
   287 
   307 
   288 	cpu_stats = stat->cpus;
   308 	cpu_stats = stat->cpus;
   289 	bzero(cpu_stats, sizeof(cpu_stat_t) * max_cpus);
   309 	bzero(cpu_stats, sizeof (cpu_stat_t) * max_cpus);
   290 
   310 
   291 	for (ksp = kc->kc_chain; ksp != null; ksp = ksp->ks_next) {
   311 	for (ksp = kc->kc_chain; ksp != null; ksp = ksp->ks_next) {
   292 		kstat_t *ksp_sys;
   312 		kstat_t *ksp_sys;
   293 		kstat_named_t *knp;
   313 		kstat_named_t *knp;
   294 		int cpu;
   314 		int cpu;
   295 		double snaptime;
   315 		hrtime_t snaptime;
   296 
   316 
   297 		if ((ksp->ks_type != kstat_type_named) ||
   317 		if ((ksp->ks_type != kstat_type_named) ||
   298 		    strcmp(ksp->ks_module, "cpu_info") ||
   318 		    strcmp(ksp->ks_module, "cpu_info") ||
   299 		    (kstat_read(kc, ksp) == -1)) {
   319 		    (kstat_read(kc, ksp) == -1)) {
   300 		    continue;
   320 		    continue;
   301 		}
   321 		}
   302 		knp = kstat_data_lookup(ksp, "state");
   322 		knp = kstat_data_lookup(ksp, "state");
   303 		if ((knp == NULL) || strcmp(knp->name, PS_ONLINE)) {
   323 		if ((knp == NULL) || strcmp(knp->name, PS_ONLINE) ||
   304 			continue;
   324 		    ((cpu = ksp->ks_instance) >= max_cpus)) {
   305 		}
   325 			continue;
   306 		cpu = ksp->ks_instance;
   326 		}
   307 		ksp_sys = kstat_lookup(kc, "cpu", cpu, "sys");
   327 		ksp_sys = kstat_lookup(kc, "cpu", cpu, "sys");
   308 		if ((ksp_sys == NULL) || (kstat_read(kc, ksp_sys, NULL) == -1)) {
   328 		if ((ksp_sys == NULL) || (kstat_read(kc, ksp_sys) == -1)) {
   309 			continue;
   329 			continue;
   310 		}
   330 		}
   311 		cpu_stats[cpu].state = P_ONLINE;
   331 		cpu_stats[cpu].state = P_ONLINE;
   312 		knp = ksp_sys->ks_data;
   332 		knp = ksp_sys->ks_data;
   313 		for (i = 0; i < ksp_sys->ks_ndata; i++) {
   333 		for (i = 0; i < ksp_sys->ks_ndata; i++) {
   314 			if (!strcmp(knp[i].name, "cpu_nsec_idle") ||
   334 			if ((strcmp(knp[i].name, "cpu_nsec_idle") == 0) ||
   315 			    !strcmp(knp[i].name, "cpu_nsec_user") ||
   335 			    (strcmp(knp[i].name, "cpu_nsec_user") == 0) ||
   316 			    !strcmp(knp[i].name, "cpu_nsec_kernel")) {
   336 			    (strcmp(knp[i].name, "cpu_nsec_kernel") == 0)) {
   317 				cpu_stats[cpu].tot += knp[i].value.ui64;
   337 				cpu_stats[cpu].tot += knp[i].value.ui64;
   318 		}
   338 		}
   319 		cpu_stats[cpu].crtime = ksp_sys->crtime;
   339 		cpu_stats[cpu].crtime = ksp_sys->crtime;
   320 		snaptime = ksp_sys->snaptime;
   340 		snaptime = ksp_sys->snaptime;
   321 		if (minsnap == -1 || snaptime < minsnap) {
   341 		if (minsnap == -1 || snaptime < minsnap) {
   342 	 */
   362 	 */
   343 
   363 
   344 	for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
   364 	for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
   345 		kstat_named_t *knp;
   365 		kstat_named_t *knp;
   346 		int cpu;
   366 		int cpu;
   347 		double snaptime;
   367 		int ino;
       
   368 		cpu_stat_t *cpup;
       
   369 		bus_dev_t *busp;
       
   370 		bus_dev_t *bus_last;
       
   371 		ivec_t *ivecp;
       
   372 		hrtime_t snaptime;
   348 
   373 
   349 		if ((ksp->ks_type != KSTAT_TYPE_NAMED) ||
   374 		if ((ksp->ks_type != KSTAT_TYPE_NAMED) ||
   350 		    strcmp(ksp->ks_module, "pci_intrs") ||
   375 		    strcmp(ksp->ks_module, "pci_intrs") ||
   351 		    (kstat_read(kc, ksp) == -1)) {
   376 		    (kstat_read(kc, ksp) == -1)) {
   352 			continue;
   377 			continue;
   353 		}
   378 		}
   354 		knp = kstat_data_lookup(ksp, "cpu");
   379 		knp = kstat_data_lookup(ksp, "cpu");
   355 		if ((knp == NULL) || ((cpu = knp->value.ui32) > max_cpus) ||
   380 		if ((knp == NULL) || ((cpu = knp->value.ui32) >= max_cpus) ||
   356 		    (cpu_stats[cpu].state != P_ONLINE)) {
   381 		    (cpu_stats[cpu].state != P_ONLINE)) {
   357 			continue;
   382 			continue;
   358 		}
   383 		}
       
   384 		cpup = &cpu_stats[cpu];
   359 		knp = kstat_data_lookup(ksp, "type");
   385 		knp = kstat_data_lookup(ksp, "type");
   360 		if ((knp == NULL) || strcmp(knp->value.c, "disabled")) {
   386 		if ((knp == NULL) || strcmp(knp->value.c, "disabled")) {
   361 			continue;
   387 			continue;
   362 		}
   388 		}
   363 		knp = kstat_data_lookup(ksp, "buspath");
   389 		knp = kstat_data_lookup(ksp, "buspath");
   364 		if (knp == NULL) {
   390 		if (knp == NULL) {
   365 			continue;
   391 			continue;
   366 		}
   392 		}
       
   393 		
       
   394 		for (bus_last = NULL, busp = cpup->bus_head; busp != NULL;
       
   395 		     bus_last = busp, busp = busp->next) {
       
   396 			if (strcmp(knp->value.c, busp->buspath) == 0) {
       
   397 				break;
       
   398 			}
       
   399 		}
       
   400 
       
   401 		if (busp == NULL) {
       
   402 			busp = malloc(sizeof (bus_dev_t));
       
   403 			if (busp == NULL) {
       
   404 				return -1;
       
   405 			}
       
   406 
       
   407 			busp->next = NULL;
       
   408 
       
   409 			strlcpy(busp->buspath, knp->value.c, MAXPATHLEN);
       
   410 			busp->is_pcplusmp =
       
   411 			    intrinfo(busp->buspath, &(busp->num_intr));
       
   412 
       
   413 			busp->ivecs = malloc(sizeof (ivec_t) * busp->num_intr);
       
   414 			if (busp->ivecs == NULL) {
       
   415 				free(busp);
       
   416 				return -1;
       
   417 			}
       
   418 			bzero(busp->ivecs, sizeof (ivec_t) * busp->num_intr);
       
   419 
       
   420 			if (bus_last == NULL) {
       
   421 				cpup->bus_head = busp;
       
   422 			} else {
       
   423 				bus_last->next = busp;
       
   424 			}
       
   425 		}
       
   426 		knp = kstat_data_lookup(ksp, "ino");
       
   427 		if ((knp == NULL) ||
       
   428 		    ((ino = knp->value.ui32) >= busp->num_intr)) {
       
   429 			continue;
       
   430 		}
       
   431 		ivecp = &(busp->ivecs[ino]);
       
   432 
       
   433 		knp = kstat_data_lookup(ksp, "time");
       
   434 		if (knp == NULL) {
       
   435 			continue;
       
   436 		}
       
   437 		ivecp->time = knp->value.ui64;
       
   438 
       
   439 		if (busp->is_pcplusmp) {
       
   440 			knp = kstat_data_lookup(ksp, "type");
       
   441 			if (knp == NULL) {
       
   442 				continue;
       
   443 			}
       
   444 			if (strcmp(knp->value.c, "msi") == 0) {
       
   445 				for (msi_last = NULL, msip = busp->msi_head;
       
   446 				     msip != NULL;
       
   447 				     msi_last = msip, msip = msip->next) {
       
   448 					if (ivecp->cookie == msip->cookie) {
       
   449 						break;
       
   450 					}
       
   451 			}
       
   452 
       
   453 		}
       
   454 			
       
   455 		ivecp->num_ino = 1;
       
   456 		ivecp->crtime = ksp->crtime;
   367 		snaptime = ksp->snaptime;
   457 		snaptime = ksp->snaptime;
   368 		if (minsnap == -1 || snaptime < minsnap) {
   458 		if (minsnap == -1 || snaptime < minsnap) {
   369 			minsnap = snaptime;
   459 			minsnap = snaptime;
   370 		}
   460 		}
   371 		if (snaptime > maxsnap) {
   461 		if (snaptime > maxsnap) {