Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 08 Feb 2005 19:55:48 -0800
From:      Nate Lawson <nate@root.org>
To:        "Stephane E. Potvin" <sepotvin@videotron.ca>
Cc:        acpi@freebsd.org
Subject:   Re: HEADSUP: cpufreq import complete, acpi_throttling changed
Message-ID:  <420989C4.2040701@root.org>
In-Reply-To: <4206D5C6.1060109@videotron.ca>
References:  <42068A5C.1030300@root.org> <4206D5C6.1060109@videotron.ca>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------040809040900060008000000
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

Stephane E. Potvin wrote:
> I've lost cpu throttling with this commit on my Dell Inspiron XPS. The
> acpi_throttle driver doesn't probe correctly on the HT cpu which seems
> to cause some problems.
> 
> ...
> acpi0: <DELL CPi R  > on motherboard
> cpu0: <ACPI CPU (3 Cx states)> on acpi0
> acpi_throttle0: <ACPI CPU Throttling> on cpu0
> cpu1: <ACPI CPU (3 Cx states)> on acpi0
> cpu1: acpi_throttle: add child failed
> ...
> 
> If you need anything (acpi ASL dump, ...) I'll send them offline as they
> are quite large.

Try the attached patch.  It adds support for MP frequency switching so 
that acpi_throttle0 and 1 will attach and the sysctl can be used to 
switch both simultaneously.

After compiling it in, please send the output of devinfo -rv and sysctl 
dev.cpu.

-- 
Nate

--------------040809040900060008000000
Content-Type: text/plain;
 name="cpufreq_mp.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="cpufreq_mp.diff"

Index: kern/kern_cpu.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_cpu.c,v
retrieving revision 1.2
diff -u -r1.2 kern_cpu.c
--- kern/kern_cpu.c	6 Feb 2005 21:08:35 -0000	1.2
+++ kern/kern_cpu.c	9 Feb 2005 03:51:22 -0000
@@ -171,7 +171,8 @@
 {
 	struct cpufreq_softc *sc;
 	const struct cf_setting *set;
-	int error, i;
+	struct pcpu *pc;
+	int cpu_id, error, i;
 
 	sc = device_get_softc(dev);
 
@@ -179,6 +180,18 @@
 	if (CPUFREQ_CMP(sc->curr_level.total_set.freq, level->total_set.freq))
 		return (0);
 
+	/* If the setting is for a different CPU, switch to it. */
+	cpu_id = PCPU_GET(cpuid);
+	pc = cpu_get_pcpu(dev);
+	KASSERT(pc, ("NULL pcpu for dev %p", dev));
+#ifdef SMP
+	if (cpu_id != pc->pc_cpuid) {
+		mtx_lock_spin(&sched_lock);
+		sched_bind(curthread, pc->pc_cpuid);
+		mtx_unlock_spin(&sched_lock);
+	}
+#endif
+
 	/* First, set the absolute frequency via its driver. */
 	set = &level->abs_set;
 	if (set->dev) {
@@ -212,6 +225,14 @@
 	error = 0;
 
 out:
+#ifdef SMP
+	/* If we switched to another CPU, switch back before exiting. */
+	if (cpu_id != pc->pc_cpuid) {
+		mtx_lock_spin(&sched_lock);
+		sched_unbind(curthread);
+		mtx_unlock_spin(&sched_lock);
+	}
+#endif
 	if (error)
 		device_printf(set->dev, "set freq failed, err %d\n", error);
 	return (error);
@@ -329,7 +350,7 @@
 		if (error || set_count == 0)
 			continue;
 
-		switch (type) {
+		switch (type & CPUFREQ_TYPE_MASK) {
 		case CPUFREQ_TYPE_ABSOLUTE:
 			error = cpufreq_insert_abs(sc, sets, set_count);
 			break;
@@ -567,11 +588,12 @@
 {
 	struct cpufreq_softc *sc;
 	struct cf_level *levels;
-	int count, error, freq, i;
+	int count, devcount, error, freq, i, x;
+	device_t *devs;
 
+	devs = NULL;
 	sc = oidp->oid_arg1;
-	count = CF_MAX_LEVELS;
-	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
+	levels = malloc(CF_MAX_LEVELS * sizeof(*levels), M_TEMP, M_NOWAIT);
 	if (levels == NULL)
 		return (ENOMEM);
 
@@ -583,20 +605,35 @@
 	if (error != 0 || req->newptr == NULL)
 		goto out;
 
-	error = CPUFREQ_LEVELS(sc->dev, levels, &count);
+	/*
+	 * While we only call cpufreq_get() on one device (assuming all
+	 * CPUs have equal levels), we call cpufreq_set() on all CPUs.
+	 * This is needed for some MP systems, including HT.
+	 */
+	error = devclass_get_devices(cpufreq_dc, &devs, &devcount);
 	if (error)
 		goto out;
-	for (i = 0; i < count; i++) {
-		if (CPUFREQ_CMP(levels[i].total_set.freq, freq)) {
-			error = CPUFREQ_SET(sc->dev, &levels[i],
-			    CPUFREQ_PRIO_USER);
+	for (x = 0; x < devcount; x++) {
+		count = CF_MAX_LEVELS;
+		error = CPUFREQ_LEVELS(devs[x], levels, &count);
+		if (error)
+			break;
+		for (i = 0; i < count; i++) {
+			if (CPUFREQ_CMP(levels[i].total_set.freq, freq)) {
+				error = CPUFREQ_SET(devs[x], &levels[i],
+				    CPUFREQ_PRIO_USER);
+				break;
+			}
+		}
+		if (i == count) {
+			error = EINVAL;
 			break;
 		}
 	}
-	if (i == count)
-		error = EINVAL;
 
 out:
+	if (devs)
+		free(devs, M_TEMP);
 	if (levels)
 		free(levels, M_TEMP);
 	return (error);
@@ -645,17 +682,16 @@
 	device_t cf_dev, cpu_dev;
 
 	/*
-	 * Only add one cpufreq device (on cpu0) for all control.  Once
-	 * independent multi-cpu control appears, we can assign one cpufreq
-	 * device per cpu.
+	 * Add only one cpufreq device to each CPU.  Currently, all CPUs
+	 * must offer the same levels and be switched at the same time.
 	 */
-	cf_dev = devclass_get_device(cpufreq_dc, 0);
-	if (cf_dev)
+	cpu_dev = device_get_parent(dev);
+	KASSERT(cpu_dev != NULL, ("no parent for %p", dev));
+	if (device_find_child(cpu_dev, "cpufreq", -1))
 		return (0);
 
-	/* Add the child device and sysctls. */
-	cpu_dev = devclass_get_device(devclass_find("cpu"), 0);
-	cf_dev = BUS_ADD_CHILD(cpu_dev, 0, "cpufreq", 0);
+	/* Add the child device and possibly sysctls. */
+	cf_dev = BUS_ADD_CHILD(cpu_dev, 0, "cpufreq", -1);
 	if (cf_dev == NULL)
 		return (ENOMEM);
 	device_quiet(cf_dev);
@@ -688,9 +724,8 @@
 		if (CPUFREQ_DRV_SETTINGS(devs[i], &set, &count, &type) == 0)
 			cfcount++;
 	}
-	if (cfcount <= 1) {
+	if (cfcount <= 1)
 		device_delete_child(device_get_parent(cf_dev), cf_dev);
-	}
 	free(devs, M_TEMP);
 
 	return (0);
Index: sys/cpu.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/cpu.h,v
retrieving revision 1.1
diff -u -r1.1 cpu.h
--- sys/cpu.h	4 Feb 2005 05:31:10 -0000	1.1
+++ sys/cpu.h	9 Feb 2005 03:49:49 -0000
@@ -48,7 +48,7 @@
 
 /* Each driver's CPU frequency setting is exported in this format. */
 struct cf_setting {
-	int	freq;	/* Processor clock in Mhz or percent (in 100ths.) */
+	int	freq;	/* CPU clock in Mhz or 100ths of a percent. */
 	int	volts;	/* Voltage in mV. */
 	int	power;	/* Power consumed in mW. */
 	int	lat;	/* Transition latency in us. */
@@ -82,9 +82,18 @@
  * frequency settings of 100, 200 and 300, 400 and a relative driver that
  * provides settings of 50%, 100%.  The cpufreq core would export frequency
  * levels of 50, 100, 150, 200, 300, 400.
+ *
+ * The "info only" flag signifies that settings returned by
+ * CPUFREQ_DRV_SETTINGS cannot be passed to the CPUFREQ_DRV_SET method and
+ * are only informational.  This is for some drivers that can return
+ * information about settings but rely on another machine-dependent driver
+ * for actually performing the frequency transition (e.g., ACPI performance
+ * states of type "functional fixed hardware.")
  */
 #define CPUFREQ_TYPE_RELATIVE	(1<<0)
 #define CPUFREQ_TYPE_ABSOLUTE	(1<<1)
+#define CPUFREQ_FLAG_INFO_ONLY	(1<<16)
+#define CPUFREQ_TYPE_MASK	0xffff
 
 /*
  * When setting a level, the caller indicates the priority of this request.
Index: dev/acpica/acpi_perf.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/acpica/acpi_perf.c,v
retrieving revision 1.5
diff -u -r1.5 acpi_perf.c
--- dev/acpica/acpi_perf.c	7 Feb 2005 04:03:06 -0000	1.5
+++ dev/acpica/acpi_perf.c	8 Feb 2005 03:09:14 -0000
@@ -136,7 +136,7 @@
 	ACPI_HANDLE handle;
 
 	/* Make sure we're not being doubly invoked. */
-	if (device_find_child(parent, "acpi_perf", 0) != NULL)
+	if (device_find_child(parent, "acpi_perf", -1) != NULL)
 		return;
 
 	/* Get the handle for the Processor object and check for perf states. */
@@ -145,7 +145,7 @@
 		return;
 	if (ACPI_FAILURE(AcpiEvaluateObject(handle, "_PSS", NULL, NULL)))
 		return;
-	if (BUS_ADD_CHILD(parent, 0, "acpi_perf", 0) == NULL)
+	if (BUS_ADD_CHILD(parent, 0, "acpi_perf", -1) == NULL)
 		device_printf(parent, "add acpi_perf child failed\n");
 }
 
Index: dev/acpica/acpi_throttle.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/acpica/acpi_throttle.c,v
retrieving revision 1.1
diff -u -r1.1 acpi_throttle.c
--- dev/acpica/acpi_throttle.c	6 Feb 2005 21:09:51 -0000	1.1
+++ dev/acpica/acpi_throttle.c	8 Feb 2005 03:09:29 -0000
@@ -127,19 +127,32 @@
 static void
 acpi_throttle_identify(driver_t *driver, device_t parent)
 {
+	ACPI_BUFFER buf;
+	ACPI_HANDLE handle;
+	ACPI_OBJECT *obj;
 
 	/* Make sure we're not being doubly invoked. */
-	if (device_find_child(parent, "acpi_throttle", 0) != NULL)
+	if (device_find_child(parent, "acpi_throttle", -1) != NULL)
 		return;
 
 	/* Check for a valid duty width and parent CPU type. */
-	if (acpi_get_handle(parent) == NULL)
+	handle = acpi_get_handle(parent);
+	if (handle == NULL)
 		return;
 	if (AcpiGbl_FADT->DutyWidth == 0 ||
 	    acpi_get_type(parent) != ACPI_TYPE_PROCESSOR)
 		return;
-	if (BUS_ADD_CHILD(parent, 0, "acpi_throttle", 0) == NULL)
-		device_printf(parent, "acpi_throttle: add child failed\n");
+
+	/* If there's a NULL P_BLK or incorrect length, don't add a child. */
+	buf.Pointer = NULL;
+	buf.Length = ACPI_ALLOCATE_BUFFER;
+	if (ACPI_FAILURE(AcpiEvaluateObject(handle, NULL, NULL, &buf)))
+		return;
+	obj = (ACPI_OBJECT *)buf.Pointer;
+	if (obj->Processor.PblkAddress && obj->Processor.PblkLength >= 4 &&
+	    BUS_ADD_CHILD(parent, 0, "acpi_throttle", -1) == NULL)
+		device_printf(parent, "add throttle child failed\n");
+	AcpiOsFree(obj);
 }
 
 static int
@@ -155,7 +168,7 @@
 {
 	struct acpi_throttle_softc *sc;
 	ACPI_BUFFER buf;
-	ACPI_OBJECT*obj;
+	ACPI_OBJECT *obj;
 	ACPI_STATUS status;
 
 	sc = device_get_softc(dev);

--------------040809040900060008000000--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?420989C4.2040701>