Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 28 Mar 2009 08:54:47 +0000 (UTC)
From:      Michael Reifenberger <mr@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r190501 - in head/sys: conf i386/cpufreq modules/cpufreq
Message-ID:  <200903280854.n2S8slQ2090245@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mr
Date: Sat Mar 28 08:54:47 2009
New Revision: 190501
URL: http://svn.freebsd.org/changeset/base/190501

Log:
  Add support for Phenom (Family 10h) to cpufreq.
  Its a newer version provided by the author than in the PR.
  
  PR:		kern/128575
  Submitted by:	Gen Otsuji annona2 [at] gmail.com

Added:
  head/sys/i386/cpufreq/hwpstate.c   (contents, props changed)
Modified:
  head/sys/conf/files.amd64
  head/sys/conf/files.i386
  head/sys/modules/cpufreq/Makefile

Modified: head/sys/conf/files.amd64
==============================================================================
--- head/sys/conf/files.amd64	Sat Mar 28 07:44:08 2009	(r190500)
+++ head/sys/conf/files.amd64	Sat Mar 28 08:54:47 2009	(r190501)
@@ -283,6 +283,7 @@ i386/bios/smbios.c		optional	smbios
 i386/bios/vpd.c			optional	vpd
 i386/cpufreq/powernow.c		optional	cpufreq
 i386/cpufreq/est.c		optional	cpufreq
+i386/cpufreq/hwpstate.c		optional	cpufreq
 i386/cpufreq/p4tcc.c		optional	cpufreq
 #
 libkern/memmove.c		standard

Modified: head/sys/conf/files.i386
==============================================================================
--- head/sys/conf/files.i386	Sat Mar 28 07:44:08 2009	(r190500)
+++ head/sys/conf/files.i386	Sat Mar 28 08:54:47 2009	(r190501)
@@ -242,6 +242,7 @@ i386/bios/smapi_bios.S		optional smapi
 i386/bios/smbios.c		optional smbios
 i386/bios/vpd.c			optional vpd
 i386/cpufreq/est.c		optional cpufreq
+i386/cpufreq/hwpstate.c		optional cpufreq
 i386/cpufreq/p4tcc.c		optional cpufreq
 i386/cpufreq/powernow.c		optional cpufreq
 i386/cpufreq/smist.c		optional cpufreq

Added: head/sys/i386/cpufreq/hwpstate.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/i386/cpufreq/hwpstate.c	Sat Mar 28 08:54:47 2009	(r190501)
@@ -0,0 +1,515 @@
+/*-
+ * Copyright (c) 2005 Nate Lawson
+ * Copyright (c) 2004 Colin Percival
+ * Copyright (c) 2004-2005 Bruno Durcot
+ * Copyright (c) 2004 FUKUDA Nobuhiko
+ * Copyright (c) 2009 Michael Reifenberger
+ * Copyright (c) 2009 Norikatsu Shigemura
+ * Copyright (c) 2008-2009 Gen Otsuji
+ *
+ * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c
+ * in various parts. The authors of these files are
+ * Colin Percival, Bruno Durcot, and FUKUDA Nobuhiko.
+ * This code contains patches by Michael Reifenberger and Norikatsu Shigemura.
+ * Thank you.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * For more info:
+ * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 10h Processors
+ * 31116 Rev 3.20  February 04, 2009
+ * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 11h Processors
+ * 41256 Rev 3.00 - July 07, 2008
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/pcpu.h>
+#include <sys/smp.h>
+#include <sys/sched.h>
+
+#include <machine/md_var.h>
+#include <machine/cputypes.h>
+#include <machine/specialreg.h>
+
+#include <contrib/dev/acpica/acpi.h>
+#include <dev/acpica/acpivar.h>
+
+#include "acpi_if.h"
+#include "cpufreq_if.h"
+
+#define	MSR_AMD_10H_11H_LIMIT	0xc0010061
+#define	MSR_AMD_10H_11H_CONTROL	0xc0010062
+#define	MSR_AMD_10H_11H_STATUS	0xc0010063
+#define	MSR_AMD_10H_11H_CONFIG	0xc0010064
+
+#define	AMD_10H_11H_MAX_STATES	16
+
+/* for MSR_AMD_10H_11H_LIMIT C001_0061 */
+#define	AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)	(((msr) >> 4) & 0x7)
+#define	AMD_10H_11H_GET_PSTATE_LIMIT(msr)	(((msr)) & 0x7)
+/* for MSR_AMD_10H_11H_CONFIG 10h:C001_0064:68 / 11h:C001_0064:6B */
+#define	AMD_10H_11H_CUR_VID(msr)		(((msr) >> 9) & 0x7F)
+#define	AMD_10H_11H_CUR_DID(msr)		(((msr) >> 6) & 0x07)
+#define	AMD_10H_11H_CUR_FID(msr)		((msr) & 0x3F)
+
+#if defined(__amd64__)
+#define CPU_FAMILY(id)	AMD64_CPU_FAMILY(id)
+#elif defined(__i386__)
+#define CPU_FAMILY(id)	I386_CPU_FAMILY(id)
+#endif
+
+#define	HWPSTATE_DEBUG(dev, msg...)			\
+	do{						\
+		if(hwpstate_verbose)			\
+			device_printf(dev, msg);	\
+	}while(0)
+
+struct hwpstate_setting {
+	int	freq;		/* CPU clock in Mhz or 100ths of a percent. */
+	int	volts;		/* Voltage in mV. */
+	int	power;		/* Power consumed in mW. */
+	int	lat;		/* Transition latency in us. */
+	int	pstate_id;	/* P-State id */
+};
+
+struct hwpstate_softc {
+	device_t		dev;
+	struct hwpstate_setting	hwpstate_settings[AMD_10H_11H_MAX_STATES];
+	int			cfnum;
+};
+
+static void	hwpstate_identify(driver_t *driver, device_t parent);
+static int	hwpstate_probe(device_t dev);
+static int	hwpstate_attach(device_t dev);
+static int	hwpstate_detach(device_t dev);
+static int	hwpstate_set(device_t dev, const struct cf_setting *cf);
+static int	hwpstate_get(device_t dev, struct cf_setting *cf);
+static int	hwpstate_settings(device_t dev, struct cf_setting *sets, int *count);
+static int	hwpstate_type(device_t dev, int *type);
+static int	hwpstate_shutdown(device_t dev);
+static int	hwpstate_features(driver_t *driver, u_int *features);
+static int	hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev);
+static int	hwpstate_get_info_from_msr(device_t dev);
+static int	hwpstate_goto_pstate(device_t dev, int pstate_id);
+
+static int	hwpstate_verbose = 0;
+SYSCTL_INT(_debug, OID_AUTO, hwpstate_verbose, CTLFLAG_RDTUN,
+       &hwpstate_verbose, 0, "Debug hwpstate");
+
+static device_method_t hwpstate_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_identify,	hwpstate_identify),
+	DEVMETHOD(device_probe,		hwpstate_probe),
+	DEVMETHOD(device_attach,	hwpstate_attach),
+	DEVMETHOD(device_detach,	hwpstate_detach),
+	DEVMETHOD(device_shutdown,	hwpstate_shutdown),
+
+	/* cpufreq interface */
+	DEVMETHOD(cpufreq_drv_set,	hwpstate_set),
+	DEVMETHOD(cpufreq_drv_get,	hwpstate_get),
+	DEVMETHOD(cpufreq_drv_settings,	hwpstate_settings),
+	DEVMETHOD(cpufreq_drv_type,	hwpstate_type),
+
+	/* ACPI interface */
+	DEVMETHOD(acpi_get_features,	hwpstate_features),
+
+	{0, 0}
+};
+
+static devclass_t hwpstate_devclass;
+static driver_t hwpstate_driver = {
+	"hwpstate",
+	hwpstate_methods,
+	sizeof(struct hwpstate_softc),
+};
+
+DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, hwpstate_devclass, 0, 0);
+
+/*
+ * Go to Px-state on all cpus considering the limit.
+ */
+static int
+hwpstate_goto_pstate(device_t dev, int pstate)
+{
+	struct hwpstate_softc *sc;
+	struct pcpu *pc;
+	int i;
+	uint64_t msr;
+	int j;
+	int limit;
+	int id = pstate;
+	int error;
+	
+	sc = device_get_softc(dev);
+	/* get the current pstate limit */
+	msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
+	limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
+	if(limit > id)
+		id = limit;
+
+	error = 0;
+	/*
+	 * We are going to the same Px-state on all cpus.
+	 */
+	for (i = 0; i < mp_ncpus; i++) {
+		/* Find each cpu. */
+		pc = pcpu_find(i);
+		if (pc == NULL)
+			return (ENXIO);
+		thread_lock(curthread);
+		/* Bind to each cpu. */
+		sched_bind(curthread, pc->pc_cpuid);
+		thread_unlock(curthread);
+		HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n",
+			id, PCPU_GET(cpuid));
+		/* Go To Px-state */
+		wrmsr(MSR_AMD_10H_11H_CONTROL, id);
+		/* wait loop (100*100 usec is enough ?) */
+		for(j = 0; j < 100; j++){
+			msr = rdmsr(MSR_AMD_10H_11H_STATUS);
+			if(msr == id){
+				break;
+			}
+			DELAY(100);
+		}
+		/* get the result. not assure msr=id */
+		msr = rdmsr(MSR_AMD_10H_11H_STATUS);
+		HWPSTATE_DEBUG(dev, "result  P%d-state on cpu%d\n",
+		    (int)msr, PCPU_GET(cpuid));
+		if (msr != id) {
+			HWPSTATE_DEBUG(dev, "error: loop is not enough.\n");
+			error = ENXIO;
+		}
+		thread_lock(curthread);
+		sched_unbind(curthread);
+		thread_unlock(curthread);
+	}
+	return (error);
+}
+
+static int
+hwpstate_set(device_t dev, const struct cf_setting *cf)
+{
+	struct hwpstate_softc *sc;
+	struct hwpstate_setting *set;
+	int i;
+
+	if (cf == NULL)
+		return (EINVAL);
+	sc = device_get_softc(dev);
+	set = sc->hwpstate_settings;
+	for (i = 0; i < sc->cfnum; i++)
+		if (CPUFREQ_CMP(cf->freq, set[i].freq))
+			break;
+	if (i == sc->cfnum)
+		return (EINVAL);
+
+	return (hwpstate_goto_pstate(dev, set[i].pstate_id));
+}
+
+static int
+hwpstate_get(device_t dev, struct cf_setting *cf)
+{
+	struct hwpstate_softc *sc;
+	struct hwpstate_setting set;
+	uint64_t msr;
+
+	sc = device_get_softc(dev);
+	if (cf == NULL)
+		return (EINVAL);
+	msr = rdmsr(MSR_AMD_10H_11H_STATUS);
+	if(msr >= sc->cfnum)
+		return (EINVAL);
+	set = sc->hwpstate_settings[msr];
+
+	cf->freq = set.freq;
+	cf->volts = set.volts;
+	cf->power = set.power;
+	cf->lat = set.lat;
+	cf->dev = dev;
+	return (0);
+}
+
+static int
+hwpstate_settings(device_t dev, struct cf_setting *sets, int *count)
+{
+	struct hwpstate_softc *sc;
+	struct hwpstate_setting set;
+	int i;
+
+	if (sets == NULL || count == NULL)
+		return (EINVAL);
+	sc = device_get_softc(dev);
+	if (*count < sc->cfnum)
+		return (E2BIG);
+	for (i = 0; i < sc->cfnum; i++, sets++) {
+		set = sc->hwpstate_settings[i];
+		sets->freq = set.freq;
+		sets->volts = set.volts;
+		sets->power = set.power;
+		sets->lat = set.lat;
+		sets->dev = dev;
+	}
+	*count = sc->cfnum;
+
+	return (0);
+}
+
+static int
+hwpstate_type(device_t dev, int *type)
+{
+
+	if (type == NULL)
+		return (EINVAL);
+
+	*type = CPUFREQ_TYPE_ABSOLUTE;
+	return (0);
+}
+
+static void
+hwpstate_identify(driver_t *driver, device_t parent)
+{
+	device_t child;
+
+	if (device_find_child(parent, "hwpstate", -1) != NULL)
+		return;
+
+	if (cpu_vendor_id != CPU_VENDOR_AMD || CPU_FAMILY(cpu_id) < 0x10)
+		return;
+
+	/*
+	 * Check if hardware pstate enable bit is set.
+	 */
+	if ((amd_pminfo & AMDPM_HW_PSTATE) == 0) {
+		HWPSTATE_DEBUG(parent, "hwpstate enable bit is not set.\n");
+		return;
+	}
+
+	if (resource_disabled("hwpstate", 0))
+		return;
+
+	if ((child = BUS_ADD_CHILD(parent, 10, "hwpstate", -1)) == NULL)
+		device_printf(parent, "hwpstate: add child failed\n");
+}
+
+static int
+hwpstate_probe(device_t dev)
+{
+	struct hwpstate_softc *sc;
+	device_t perf_dev;
+	uint64_t msr;
+	int error, type;
+
+	/*
+	 * Only hwpstate0.
+	 * It goes well with acpi_throttle.
+	 */
+	if (device_get_unit(dev) != 0)
+		return (ENXIO);
+
+	sc = device_get_softc(dev);
+	sc->dev = dev;
+
+	/*
+	 * Check if acpi_perf has INFO only flag.
+	 */
+	perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1);
+	error = TRUE;
+	if (perf_dev && device_is_attached(perf_dev)) {
+		error = CPUFREQ_DRV_TYPE(perf_dev, &type);
+		if (error == 0) {
+			if ((type & CPUFREQ_FLAG_INFO_ONLY) == 0) {
+				/*
+				 * If acpi_perf doesn't have INFO_ONLY flag,
+				 * it will take care of pstate transitions.
+				 */
+				HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n");
+				return (ENXIO);
+			} else {
+				/*
+				 * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
+				 * we can get _PSS info from acpi_perf
+				 * without going into ACPI.
+				 */
+				HWPSTATE_DEBUG(dev, "going to fetch info from acpi_perf\n");
+				error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
+			}
+		}
+	}
+
+	if (error == 0) {
+		/*
+		 * Now we get _PSS info from acpi_perf without error.
+		 * Let's check it.
+		 */
+		msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
+		if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) {
+			HWPSTATE_DEBUG(dev, "msr and acpi _PSS count mismatch.\n");
+			error = TRUE;
+		}
+	}
+
+	/*
+	 * If we cannot get info from acpi_perf,
+	 * Let's get info from MSRs.
+	 */
+	if (error)
+		error = hwpstate_get_info_from_msr(dev);
+	if (error)
+		return (error);
+
+	device_set_desc(dev, "Cool`n'Quiet 2.0");
+	return (0);
+}
+
+static int
+hwpstate_attach(device_t dev)
+{
+
+	return (cpufreq_register(dev));
+}
+
+static int
+hwpstate_get_info_from_msr(device_t dev)
+{
+	struct hwpstate_softc *sc;
+	struct hwpstate_setting *hwpstate_set;
+	uint64_t msr;
+	int family, i, fid, did;
+
+	family = CPU_FAMILY(cpu_id);
+	sc = device_get_softc(dev);
+	/* Get pstate count */
+	msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
+	sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr);
+	hwpstate_set = sc->hwpstate_settings;
+	for (i = 0; i < sc->cfnum; i++) {
+		msr = rdmsr(MSR_AMD_10H_11H_CONFIG + i);
+		if ((msr & ((uint64_t)1 << 63)) != ((uint64_t)1 << 63)) {
+			HWPSTATE_DEBUG(dev, "msr is not valid.\n");
+			return (ENXIO);
+		}
+		did = AMD_10H_11H_CUR_DID(msr);
+		fid = AMD_10H_11H_CUR_FID(msr);
+		switch(family) {
+		case 0x11:
+			/* fid/did to frequency */
+			hwpstate_set[i].freq = 100 * (fid + 0x08) / (1 << did);
+			break;
+		case 0x10:
+			/* fid/did to frequency */
+			hwpstate_set[i].freq = 100 * (fid + 0x10) / (1 << did);
+			break;
+		default:
+			HWPSTATE_DEBUG(dev, "get_info_from_msr: AMD family %d CPU's are not implemented yet. sorry.\n", family);
+			return (ENXIO);
+			break;
+		}
+		hwpstate_set[i].pstate_id = i;
+		/* There was volts calculation, but deleted it. */
+		hwpstate_set[i].volts = CPUFREQ_VAL_UNKNOWN;
+		hwpstate_set[i].power = CPUFREQ_VAL_UNKNOWN;
+		hwpstate_set[i].lat = CPUFREQ_VAL_UNKNOWN;
+	}
+	return (0);
+}
+
+static int
+hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev)
+{
+	struct hwpstate_softc *sc;
+	struct cf_setting *perf_set;
+	struct hwpstate_setting *hwpstate_set;
+	int count, error, i;
+
+	perf_set = malloc(MAX_SETTINGS * sizeof(*perf_set), M_TEMP, M_NOWAIT);
+	if (perf_set == NULL) {
+		HWPSTATE_DEBUG(dev, "nomem\n");
+		return (ENOMEM);
+	}
+	/*
+	 * Fetch settings from acpi_perf.
+	 * Now it is attached, and has info only flag.
+	 */
+	count = MAX_SETTINGS;
+	error = CPUFREQ_DRV_SETTINGS(perf_dev, perf_set, &count);
+	if (error) {
+		HWPSTATE_DEBUG(dev, "error: CPUFREQ_DRV_SETTINGS.\n");
+		goto out;
+	}
+	sc = device_get_softc(dev);
+	sc->cfnum = count;
+	hwpstate_set = sc->hwpstate_settings;
+	for (i = 0; i < count; i++) {
+		if (i == perf_set[i].spec[0]) {
+			hwpstate_set[i].pstate_id = i;
+			hwpstate_set[i].freq = perf_set[i].freq;
+			hwpstate_set[i].volts = perf_set[i].volts;
+			hwpstate_set[i].power = perf_set[i].power;
+			hwpstate_set[i].lat = perf_set[i].lat;
+		} else {
+			HWPSTATE_DEBUG(dev, "ACPI _PSS object mismatch.\n");
+			error = ENXIO;
+			goto out;
+		}
+	}
+out:
+	if (perf_set)
+		free(perf_set, M_TEMP);
+	return (error);
+}
+
+static int
+hwpstate_detach(device_t dev)
+{
+
+	hwpstate_goto_pstate(dev, 0);
+	return (cpufreq_unregister(dev));
+}
+
+static int
+hwpstate_shutdown(device_t dev)
+{
+
+	/* hwpstate_goto_pstate(dev, 0); */
+	return (0);
+}
+
+static int
+hwpstate_features(driver_t *driver, u_int *features)
+{
+
+	/* Notify the ACPI CPU that we support direct access to MSRs */
+	*features = ACPI_CAP_PERF_MSRS;
+	return (0);
+}

Modified: head/sys/modules/cpufreq/Makefile
==============================================================================
--- head/sys/modules/cpufreq/Makefile	Sat Mar 28 07:44:08 2009	(r190500)
+++ head/sys/modules/cpufreq/Makefile	Sat Mar 28 08:54:47 2009	(r190501)
@@ -12,7 +12,7 @@ SRCS+=	bus_if.h cpufreq_if.h device_if.h
 CFLAGS+= -I${.CURDIR}/../../contrib/dev/acpica
 
 SRCS+=	acpi_if.h opt_acpi.h
-SRCS+=	est.c p4tcc.c powernow.c
+SRCS+=	est.c hwpstate.c p4tcc.c powernow.c
 .endif
 
 .if ${MACHINE} == "i386"



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200903280854.n2S8slQ2090245>