Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 2 Jun 2010 17:27:21 +0200
From:      Ulrich =?utf-8?B?U3DDtnJsZWlu?= <uqs@spoerlein.net>
To:        Gustau =?utf-8?B?UMOpcmV6?= <gperez@entel.upc.edu>
Cc:        freebsd-stable@freebsd.org, Mikolaj Golub <to.my.trociny@gmail.com>
Subject:   Re: bsnmpd returns incorrect hrProcessorLoad values
Message-ID:  <20100602152721.GA3594@acme.spoerlein.net>
In-Reply-To: <4B69C572.1020601@entel.upc.edu>
References:  <4B62C890.3020802@entel.upc.edu> <86ljfg7hl3.fsf@kopusha.onet> <4B69C572.1020601@entel.upc.edu>

next in thread | previous in thread | raw e-mail | index | archive | help

--jRHKVT23PllUwdXP
Content-Type: text/plain; charset=utf-8
Content-Disposition: inline
Content-Transfer-Encoding: 8bit

On Wed, 03.02.2010 at 19:50:26 +0100, Gustau Pérez wrote:
> En/na Mikolaj Golub ha escrit:
> > On Fri, 29 Jan 2010 12:37:52 +0100 Gustau Pérez wrote:
> >
> >   
> >>   Hi,
> >>
> >>   I'm using cacti to monitor some servers running FBSD. I was using 7.2
> >> with SCHED_4BSD. With this configuration : bsnmpd+bsnmp-ucd was
> >> returning right values for the cores' load.
> >>
> >>    I recently updated the servers (via csup) to RELENG_8 and bsnmpd is
> >> returning negative values for the cores' load. If I try something like
> >> in a 4-core system :
> >>
> >>               snmpwalk -v 2c -c community server .1.3.6.1.2.1.25.3.3.1
> >>
> >>    what I get is :
> >>
> >>         .1.3.6.1.2.1.25.3.3.1.1.6 = OID: .0.0
> >>         .1.3.6.1.2.1.25.3.3.1.1.10 = OID: .0.0
> >>         .1.3.6.1.2.1.25.3.3.1.1.14 = OID: .0.0
> >>         .1.3.6.1.2.1.25.3.3.1.1.18 = OID: .0.0
> >>         .1.3.6.1.2.1.25.3.3.1.2.6 = INTEGER: -182
> >>         .1.3.6.1.2.1.25.3.3.1.2.10 = INTEGER: -182
> >>         .1.3.6.1.2.1.25.3.3.1.2.14 = INTEGER: -182
> >>         .1.3.6.1.2.1.25.3.3.1.2.18 = INTEGER: -182

Guys,

can you please try the attached patch? I haven't yet tried it on an UP
system but it should mostly work. It is not finished though.

Regards,
Uli

--jRHKVT23PllUwdXP
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="bsnmpd.diff"

Index: usr.sbin/bsnmpd/modules/snmp_hostres/hostres_processor_tbl.c
===================================================================
--- usr.sbin/bsnmpd/modules/snmp_hostres/hostres_processor_tbl.c	(revision 208628)
+++ usr.sbin/bsnmpd/modules/snmp_hostres/hostres_processor_tbl.c	(working copy)
@@ -63,6 +63,7 @@
 
 	/* the samples from the last minute, as required by MIB */
 	double		samples[MAX_CPU_SAMPLES];
+	long		states[MAX_CPU_SAMPLES][CPUSTATES];
 
 	/* current sample to fill in next time, must be < MAX_CPU_SAMPLES */
 	uint32_t	cur_sample_idx;
@@ -112,6 +113,43 @@
 	return ((int)floor((double)sum/(double)e->sample_cnt));
 }
 
+static int
+get_avg_usage(struct processor_entry *e)
+{
+	u_int i, oldest;
+	long delta = 0;
+	double load = 0.0;
+
+	assert(e != NULL);
+
+	/* Need two samples to perform delta calculation */
+	if (e->sample_cnt <= 1)
+		return (0);
+
+	/* oldest usable index */
+	if (e->sample_cnt == MAX_CPU_SAMPLES)
+		oldest = (e->cur_sample_idx + 1) % MAX_CPU_SAMPLES;
+	else
+		oldest = 0;
+
+	/* FIXME handle wrap around */
+	for (i = 0; i < CPUSTATES; i++) {
+		delta += e->states[e->cur_sample_idx][i];
+		delta -= e->states[oldest][i];
+	}
+	if (delta == 0)
+		return 0;
+
+	/* XXX idle time is in the last index always?!? */
+	load = (double)(e->states[e->cur_sample_idx][CPUSTATES-1] -
+	    e->states[oldest][CPUSTATES-1]) / delta;
+	load = 100 - (load*100);
+	HRDBG("CPU no. %d delta ticks %ld pct usage %.2f", e->cpu_no,
+	    delta, load);
+
+	return (floor(load));
+}
+
 /*
  * Stolen from /usr/src/bin/ps/print.c. The idle process should never
  * be swapped out :-)
@@ -132,11 +170,15 @@
  * Save a new sample
  */
 static void
-save_sample(struct processor_entry *e, struct kinfo_proc *kp)
+save_sample(struct processor_entry *e, struct kinfo_proc *kp, long *cp_times)
 {
+	int i;
 
+	for (i = 0; cp_times != NULL && i < CPUSTATES; i++)
+		e->states[e->cur_sample_idx][i] = cp_times[i];
+
 	e->samples[e->cur_sample_idx] = 100.0 - processor_getpcpu(kp);
-	e->load = get_avg_load(e);
+	e->load = get_avg_usage(e);
 	e->cur_sample_idx = (e->cur_sample_idx + 1) % MAX_CPU_SAMPLES;
 
 	if (++e->sample_cnt > MAX_CPU_SAMPLES)
@@ -241,8 +283,6 @@
 		entry->idle_pid = kp->ki_pid;
 		HRDBG("CPU no. %d with SNMP index=%d has idle PID %d",
 		    entry->cpu_no, entry->index, entry->idle_pid);
-
-		save_sample(entry, kp);
 	}
 }
 
@@ -386,12 +426,22 @@
 refresh_processor_tbl(void)
 {
 	struct processor_entry *entry;
-	int need_pids;
+	int need_pids, nproc;
 	struct kinfo_proc *plist;
-	int nproc;
+	size_t size;
 
 	processor_refill_tbl();
 
+	long pcpu_cp_times[hw_ncpu * CPUSTATES];
+	memset(pcpu_cp_times, 0, sizeof(pcpu_cp_times));
+
+	size = hw_ncpu * CPUSTATES * sizeof(long);
+	/* FIXME: assert entry->ncpu <= hw_ncpu <= length of cp_times */
+	if (sysctlbyname("kern.cp_times", pcpu_cp_times, &size, NULL, 0) == -1) {
+		syslog(LOG_ERR, "hrProcessorTable: sysctl(kern.cp_times) failed");
+		return;
+	}
+
 	need_pids = 0;
 	TAILQ_FOREACH(entry, &processor_tbl, link) {
 		if (entry->idle_pid <= 0) {
@@ -410,7 +460,7 @@
 			need_pids = 1;
 			continue;
 		}
-		save_sample(entry, plist);
+		save_sample(entry, plist, &pcpu_cp_times[entry->cpu_no * CPUSTATES]);
 	}
 
 	if (need_pids == 1)
Index: usr.sbin/bsnmpd/modules/snmp_hostres/Makefile
===================================================================
--- usr.sbin/bsnmpd/modules/snmp_hostres/Makefile	(revision 208628)
+++ usr.sbin/bsnmpd/modules/snmp_hostres/Makefile	(working copy)
@@ -48,7 +48,8 @@
 	printcap.c
 
 #Not having NDEBUG defined will enable assertions and a lot of output on stderr
-CFLAGS+= -DNDEBUG -I${LPRSRC}
+WARNS?=	1
+CFLAGS+= -I${LPRSRC}
 XSYM=	host hrStorageOther hrStorageRam hrStorageVirtualMemory \
 	hrStorageFixedDisk hrStorageRemovableDisk hrStorageFloppyDisk \
 	hrStorageCompactDisc hrStorageRamDisk hrStorageFlashMemory \

--jRHKVT23PllUwdXP--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20100602152721.GA3594>