From owner-svn-src-projects@FreeBSD.ORG Sat Oct 18 06:19:53 2008 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 796A5106568E; Sat, 18 Oct 2008 06:19:53 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 65E408FC18; Sat, 18 Oct 2008 06:19:53 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id m9I6JrMm008786; Sat, 18 Oct 2008 06:19:53 GMT (envelope-from kmacy@svn.freebsd.org) Received: (from kmacy@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id m9I6Jqlj008773; Sat, 18 Oct 2008 06:19:52 GMT (envelope-from kmacy@svn.freebsd.org) Message-Id: <200810180619.m9I6Jqlj008773@svn.freebsd.org> From: Kip Macy Date: Sat, 18 Oct 2008 06:19:52 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r184016 - in projects/release_6_3_xen/sys/i386: conf i386 include isa pci X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 18 Oct 2008 06:19:53 -0000 Author: kmacy Date: Sat Oct 18 06:19:52 2008 New Revision: 184016 URL: http://svn.freebsd.org/changeset/base/184016 Log: merge i386 xen support Modified: projects/release_6_3_xen/sys/i386/conf/DEFAULTS projects/release_6_3_xen/sys/i386/i386/busdma_machdep.c projects/release_6_3_xen/sys/i386/i386/genassym.c projects/release_6_3_xen/sys/i386/i386/intr_machdep.c projects/release_6_3_xen/sys/i386/i386/machdep.c projects/release_6_3_xen/sys/i386/i386/support.s projects/release_6_3_xen/sys/i386/i386/swtch.s projects/release_6_3_xen/sys/i386/i386/sys_machdep.c projects/release_6_3_xen/sys/i386/i386/trap.c projects/release_6_3_xen/sys/i386/i386/vm_machdep.c projects/release_6_3_xen/sys/i386/include/asmacros.h projects/release_6_3_xen/sys/i386/include/cpufunc.h projects/release_6_3_xen/sys/i386/include/param.h projects/release_6_3_xen/sys/i386/include/pcpu.h projects/release_6_3_xen/sys/i386/include/pmap.h projects/release_6_3_xen/sys/i386/include/segments.h projects/release_6_3_xen/sys/i386/include/vmparam.h projects/release_6_3_xen/sys/i386/isa/npx.c projects/release_6_3_xen/sys/i386/pci/pci_cfgreg.c projects/release_6_3_xen/sys/i386/pci/pci_pir.c Modified: projects/release_6_3_xen/sys/i386/conf/DEFAULTS ============================================================================== --- projects/release_6_3_xen/sys/i386/conf/DEFAULTS Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/conf/DEFAULTS Sat Oct 18 06:19:52 2008 (r184016) @@ -15,3 +15,5 @@ device npx # Pseudo devices. device mem # Memory and kernel memory devices device io # I/O device + +options NATIVE Modified: projects/release_6_3_xen/sys/i386/i386/busdma_machdep.c ============================================================================== --- projects/release_6_3_xen/sys/i386/i386/busdma_machdep.c Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/i386/busdma_machdep.c Sat Oct 18 06:19:52 2008 (r184016) @@ -140,6 +140,11 @@ static bus_addr_t add_bounce_page(bus_dm static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); +#ifdef XEN +#undef pmap_kextract +#define pmap_kextract pmap_kextract_ma +#endif + /* * Return true if a match is made. * Modified: projects/release_6_3_xen/sys/i386/i386/genassym.c ============================================================================== --- projects/release_6_3_xen/sys/i386/i386/genassym.c Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/i386/genassym.c Sat Oct 18 06:19:52 2008 (r184016) @@ -227,3 +227,9 @@ ASSYM(MTX_RECURSECNT, offsetof(struct mt ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base)); ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat)); #endif + +#ifdef XEN +#include +ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3)); +ASSYM(HYPERVISOR_VIRT_START, __HYPERVISOR_VIRT_START); +#endif Modified: projects/release_6_3_xen/sys/i386/i386/intr_machdep.c ============================================================================== --- projects/release_6_3_xen/sys/i386/i386/intr_machdep.c Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/i386/intr_machdep.c Sat Oct 18 06:19:52 2008 (r184016) @@ -283,7 +283,12 @@ intr_execute_handlers(struct intsrc *isr /* Schedule the ithread if needed. */ if (thread) { error = intr_event_schedule_thread(ie); +#ifndef XEN KASSERT(error == 0, ("bad stray interrupt")); +#else + if (error != 0) + log(LOG_CRIT, "bad stray interrupt %d", vector); +#endif } critical_exit(); td->td_intr_nesting_level--; Modified: projects/release_6_3_xen/sys/i386/i386/machdep.c ============================================================================== --- projects/release_6_3_xen/sys/i386/i386/machdep.c Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/i386/machdep.c Sat Oct 18 06:19:52 2008 (r184016) @@ -141,6 +141,24 @@ int arch_i386_is_xbox = 0; uint32_t arch_i386_xbox_memsize = 0; #endif +#ifdef XEN +/* XEN includes */ +#include +#include +#include +#include +#include + +void Xhypervisor_callback(void); +void failsafe_callback(void); + +extern trap_info_t trap_table[]; +struct proc_ldt default_proc_ldt; +extern int init_first; +int running_xen = 1; +extern unsigned long physfree; +#endif /* XEN */ + /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); @@ -280,8 +298,9 @@ cpu_startup(dummy) */ bufinit(); vm_pager_bufferinit(); - +#ifndef XEN cpu_setregs(); +#endif } /* @@ -1106,6 +1125,25 @@ cpu_est_clockrate(int cpu_id, uint64_t * return (0); } +static int cpu_idle_hlt = 1; +SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, + &cpu_idle_hlt, 0, "Idle loop HLT enable"); +#ifdef XEN + +void +cpu_halt(void) +{ + HYPERVISOR_shutdown(SHUTDOWN_poweroff); +} + +static void +cpu_idle_default(void) +{ + idle_block(); +} + +#else + /* * Shutdown the CPU as much as possible */ @@ -1131,9 +1169,6 @@ cpu_halt(void) * XXX I'm turning it on for SMP as well by default for now. It seems to * help lock contention somewhat, and this is critical for HTT. -Peter */ -static int cpu_idle_hlt = 1; -SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, - &cpu_idle_hlt, 0, "Idle loop HLT enable"); static void cpu_idle_default(void) @@ -1145,6 +1180,7 @@ cpu_idle_default(void) */ __asm __volatile("sti; hlt"); } +#endif /* !XEN */ /* * Note that we have to be careful here to avoid a race between checking @@ -1156,7 +1192,7 @@ void cpu_idle(void) { -#ifdef SMP +#if defined(SMP) && !defined(XEN) if (mp_grab_cpu_hlt()) return; #endif @@ -1315,10 +1351,16 @@ SYSCTL_ULONG(_machdep, OID_AUTO, guessed */ int _default_ldt; + +#ifdef XEN +union descriptor *gdt; +union descriptor *ldt; +#else union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ +union descriptor ldt[NLDT]; /* local descriptor table */ +#endif static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ -union descriptor ldt[NLDT]; /* local descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ int private_tss; /* flag indicating private tss */ @@ -1353,7 +1395,7 @@ struct soft_segment_descriptor gdt_segs[ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ + SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ @@ -1380,7 +1422,7 @@ struct soft_segment_descriptor gdt_segs[ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ - 0, /* segment descriptor priority level */ + SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ @@ -1389,7 +1431,7 @@ struct soft_segment_descriptor gdt_segs[ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ + SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ @@ -1416,11 +1458,12 @@ struct soft_segment_descriptor gdt_segs[ { 0x400, /* segment base address */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ + SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, +#ifndef XEN /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ @@ -1512,6 +1555,7 @@ struct soft_segment_descriptor gdt_segs[ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, +#endif /* !XEN */ }; static struct soft_segment_descriptor ldt_segs[] = { @@ -1680,7 +1724,17 @@ getmemsize(int first) goto physmap_done; } #endif - +#ifdef XEN + has_smap = 0; + Maxmem = xen_start_info->nr_pages - init_first; + physmem = Maxmem; + basemem = 0; + physmap[0] = init_first << PAGE_SHIFT; + physmap[1] = ptoa(Maxmem) - round_page(MSGBUF_SIZE); + physmap_idx = 0; + goto physmap_done; +#endif + hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); bzero(&vmf, sizeof(vmf)); @@ -1858,7 +1912,7 @@ int15e820: vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; -#else +#elif !defined(XEN) /* * Prefer the RTC value for extended memory. */ @@ -1948,7 +2002,7 @@ physmap_done: if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; - +#ifndef XEN /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. @@ -2066,7 +2120,10 @@ do_next: } *pte = 0; invltlb(); - +#else + phys_avail[0] = physfree; + phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE; +#endif /* * XXX * The last chunk must contain at least one page plus the message @@ -2088,6 +2145,257 @@ do_next: avail_end = phys_avail[pa_indx]; } +#ifdef XEN + +#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) +void +init386(int first) +{ + int error, gsel_tss, metadata_missing, x; + unsigned long off, gdtmachpfn; + struct pcpu *pc; + struct callback_register event = { + .type = CALLBACKTYPE_event, + .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback }, + }; + struct callback_register failsafe = { + .type = CALLBACKTYPE_failsafe, + .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback }, + }; + + thread0.td_kstack = proc0kstack; + thread0.td_pcb = (struct pcb *) + (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; + + /* + * This may be done better later if it gets more high level + * components in it. If so just link td->td_proc here. + */ + proc_linkup(&proc0, &ksegrp0, &thread0); + + metadata_missing = 0; + if (xen_start_info->mod_start) { + preload_metadata = (caddr_t)xen_start_info->mod_start; + preload_bootstrap_relocate(KERNBASE); + } else { + metadata_missing = 1; + } + if (envmode == 1) + kern_envp = static_env; + else if ((caddr_t)xen_start_info->cmd_line) + kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line); + + boothowto |= xen_boothowto(kern_envp); + + /* Init basic tunables, hz etc */ + init_param1(); + + /* + * XEN occupies a portion of the upper virtual address space + * At its base it manages an array mapping machine page frames + * to physical page frames - hence we need to be able to + * access 4GB - (64MB - 4MB + 64k) + */ + gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + + pc = &__pcpu[0]; + gdt_segs[GPRIV_SEL].ssd_base = (int) pc; + gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; + + PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW); + bzero(gdt, PAGE_SIZE); + for (x = 0; x < NGDT; x++) + ssdtosd(&gdt_segs[x], &gdt[x].sd); + + + printk("gdt=%p\n", gdt); + printk("PTmap=%p\n", PTmap); + printk("addr=%p\n", *vtopte((unsigned long)gdt) & ~PG_RW); + + gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; + PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~(PG_RW|PG_M|PG_A)); + PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0); + lgdt(&r_gdt /* unused */); + gdtset = 1; + + if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) { + panic("set_trap_table failed - error %d\n", error); + } + + error = HYPERVISOR_callback_op(CALLBACKOP_register, &event); + if (error == 0) + error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); +#if CONFIG_XEN_COMPAT <= 0x030002 + if (error == -ENOXENSYS) + HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), + (unsigned long)Xhypervisor_callback, + GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); +#endif + pcpu_init(pc, 0, sizeof(struct pcpu)); + PCPU_SET(prvspace, pc); + PCPU_SET(curthread, &thread0); + PCPU_SET(curpcb, thread0.td_pcb); + PCPU_SET(pdir, (unsigned long)IdlePTD); + + /* + * Initialize mutexes. + * + * icu_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can't be + * under witness. + */ + mutex_init(); + mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); + + /* make ldt memory segments */ + PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW); + bzero(ldt, PAGE_SIZE); + ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); + ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); + for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) + ssdtosd(&ldt_segs[x], &ldt[x].sd); + + default_proc_ldt.ldt_base = (caddr_t)ldt; + default_proc_ldt.ldt_len = 6; + _default_ldt = (int)&default_proc_ldt; + PCPU_SET(currentldt, _default_ldt) + PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW); + xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0])); + +#ifdef XBOX + /* + * The following code queries the PCI ID of 0:0:0. For the XBOX, + * This should be 0x10de / 0x02a5. + * + * This is exactly what Linux does. + */ + outl(0xcf8, 0x80000000); + if (inl(0xcfc) == 0x02a510de) { + arch_i386_is_xbox = 1; + pic16l_setled(XBOX_LED_GREEN); + + /* + * We are an XBOX, but we may have either 64MB or 128MB of + * memory. The PCI host bridge should be programmed for this, + * so we just query it. + */ + outl(0xcf8, 0x80000084); + arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64; + } +#endif /* XBOX */ +#if defined (XEN_PRIVILEGED) + /* + * Initialize the i8254 before the console so that console + * initialization can use DELAY(). + */ + i8254_init(); +#endif + /* + * Initialize the console before we print anything out. + */ + cninit(); + + if (metadata_missing) + printf("WARNING: loader(8) metadata is missing!\n"); + +#ifdef DEV_ISA + if (xen_start_info->flags & SIF_PRIVILEGED) { + elcr_probe(); +#ifdef DEV_ATPIC + atpic_startup(); +#endif + } +#endif + +#ifdef DDB + ksym_start = bootinfo.bi_symtab; + ksym_end = bootinfo.bi_esymtab; +#endif + + kdb_init(); + +#ifdef KDB + if (boothowto & RB_KDB) + kdb_enter("Boot flags requested debugger"); +#endif + + finishidentcpu(); /* Final stage of CPU initialization */ + setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, + GSEL(GCODE_SEL, SEL_KPL)); + setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, + GSEL(GCODE_SEL, SEL_KPL)); + initializecpu(); /* Initialize CPU registers */ + + /* make an initial tss so cpu can get interrupt stack on syscall! */ + /* Note: -16 is so we can grow the trapframe if we came from vm86 */ + PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); + PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); + gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); + HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), + PCPU_GET(common_tss.tss_esp0)); + + + /* pointer to selector slot for %fs/%gs */ + PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); + + dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = + dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; + dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = + dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); +#ifdef PAE + dblfault_tss.tss_cr3 = (int)IdlePDPT; +#else + dblfault_tss.tss_cr3 = (int)IdlePTD; +#endif + dblfault_tss.tss_eip = (int)dblfault_handler; + dblfault_tss.tss_eflags = PSL_KERNEL; + dblfault_tss.tss_ds = dblfault_tss.tss_es = + dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); + dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); + dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); + dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); + + vm86_initialize(); + getmemsize(first); + init_param2(physmem); + + + /* Map the message buffer. */ + for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) + pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); + + /* now running on new page tables, configured,and u/iom is accessible */ + + msgbufinit(msgbufp, MSGBUF_SIZE); + + /* transfer to user mode */ + + _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); + _udatasel = GSEL(GUDATA_SEL, SEL_UPL); + + /* setup proc 0's pcb */ + thread0.td_pcb->pcb_flags = 0; +#ifdef PAE + thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; +#else + thread0.td_pcb->pcb_cr3 = (int)IdlePTD; +#endif + thread0.td_pcb->pcb_ext = 0; + thread0.td_frame = &proc0_tf; + thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0]; + thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1]; +} + +#else void init386(first) int first; @@ -2353,6 +2661,7 @@ init386(first) thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } +#endif /* !XEN */ void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) Modified: projects/release_6_3_xen/sys/i386/i386/support.s ============================================================================== --- projects/release_6_3_xen/sys/i386/i386/support.s Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/i386/support.s Sat Oct 18 06:19:52 2008 (r184016) @@ -1426,10 +1426,11 @@ ENTRY(bcmp) */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) +#ifndef XEN /* reload the descriptor table */ movl 4(%esp),%eax lgdt (%eax) - +#endif /* flush the prefetch q */ jmp 1f nop Modified: projects/release_6_3_xen/sys/i386/i386/swtch.s ============================================================================== --- projects/release_6_3_xen/sys/i386/i386/swtch.s Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/i386/swtch.s Sat Oct 18 06:19:52 2008 (r184016) @@ -71,7 +71,7 @@ ENTRY(cpu_throw) movl 8(%esp),%ecx /* New thread */ movl TD_PCB(%ecx),%edx movl PCB_CR3(%edx),%eax - movl %eax,%cr3 /* new address space */ + LOAD_CR3(%eax) /* new address space */ /* set bit in new pm_active */ movl TD_PROC(%ecx),%eax movl P_VMSPACE(%eax), %ebx @@ -114,11 +114,13 @@ ENTRY(cpu_switch) movl %gs,PCB_GS(%edx) pushfl /* PSL */ popl PCB_PSL(%edx) +#ifndef XEN /* Check to see if we need to call a switchout function. */ movl PCB_SWITCHOUT(%edx),%eax cmpl $0, %eax je 1f call *%eax +#endif 1: /* Test if debug registers should be saved. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) @@ -171,7 +173,7 @@ ENTRY(cpu_switch) movl %cr3,%ebx /* The same address space? */ cmpl %ebx,%eax je sw1 - movl %eax,%cr3 /* new address space */ + LOAD_CR3(%eax) /* new address space */ /* Release bit from old pmap->pm_active */ movl PCPU(CURPMAP), %ebx @@ -191,6 +193,18 @@ ENTRY(cpu_switch) btsl %esi, PM_ACTIVE(%ebx) /* set new */ sw1: +#ifdef XEN + pushl %eax + pushl %ecx + pushl %edx + call xen_handle_thread_switch + popl %edx + popl %ecx + popl %eax + /* + * XXX set IOPL + */ +#else /* * At this point, we've switched address spaces and are ready * to load up the rest of the next context. @@ -238,7 +252,7 @@ sw1: movl 12(%esi), %ebx movl %eax, 8(%edi) movl %ebx, 12(%edi) - +#endif /* Restore context. */ movl PCB_EBX(%edx),%ebx movl PCB_ESP(%edx),%esp @@ -263,7 +277,7 @@ sw1: movl _default_ldt,%eax cmpl PCPU(CURRENTLDT),%eax je 2f - lldt _default_ldt + LLDT(_default_ldt) movl %eax,PCPU(CURRENTLDT) jmp 2f 1: @@ -366,7 +380,7 @@ ENTRY(savectx) * parent's npx state for forks by forgetting to reload. */ pushfl - cli + CLI movl PCPU(FPCURTHREAD),%eax testl %eax,%eax je 1f Modified: projects/release_6_3_xen/sys/i386/i386/sys_machdep.c ============================================================================== --- projects/release_6_3_xen/sys/i386/i386/sys_machdep.c Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/i386/sys_machdep.c Sat Oct 18 06:19:52 2008 (r184016) @@ -58,6 +58,25 @@ __FBSDID("$FreeBSD$"); #include /* for kernel_map */ +#ifdef XEN +#include + +void i386_reset_ldt(struct proc_ldt *pldt); + +void +i386_reset_ldt(struct proc_ldt *pldt) +{ + xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len); +} +#define SEG_VIRT_END (HYPERVISOR_VIRT_START >> 12) & 0xffff +#define SET_DESCRIPTOR(index, sd) \ + HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[index]), *(uint64_t *)&(sd)); +#else +#define i386_reset_ldt(x) +#define SEG_VIRT_END 0xffff +#define SET_DESCRIPTOR(index, sd) PCPU_GET(fsgs_gdt)[index] = (sd); +#endif + #define MAX_LD 8192 #define LD_PER_PAGE 512 #define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1)) @@ -163,7 +182,7 @@ sysarch(td, uap) */ sd.sd_lobase = base & 0xffffff; sd.sd_hibase = (base >> 24) & 0xff; - sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ + sd.sd_lolimit = SEG_VIRT_END; /* 4GB limit, wraps */ sd.sd_hilimit = 0xf; sd.sd_type = SDT_MEMRWA; sd.sd_dpl = SEL_UPL; @@ -173,7 +192,7 @@ sysarch(td, uap) sd.sd_gran = 1; critical_enter(); td->td_pcb->pcb_fsd = sd; - PCPU_GET(fsgs_gdt)[0] = sd; + SET_DESCRIPTOR(0, sd); critical_exit(); td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL); } @@ -193,7 +212,7 @@ sysarch(td, uap) */ sd.sd_lobase = base & 0xffffff; sd.sd_hibase = (base >> 24) & 0xff; - sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ + sd.sd_lolimit = SEG_VIRT_END; /* 4GB limit, wraps */ sd.sd_hilimit = 0xf; sd.sd_type = SDT_MEMRWA; sd.sd_dpl = SEL_UPL; @@ -203,7 +222,7 @@ sysarch(td, uap) sd.sd_gran = 1; critical_enter(); td->td_pcb->pcb_gsd = sd; - PCPU_GET(fsgs_gdt)[1] = sd; + SET_DESCRIPTOR(1, sd); critical_exit(); load_gs(GSEL(GUGS_SEL, SEL_UPL)); } @@ -364,6 +383,10 @@ set_user_ldt(struct mdproc *mdp) struct proc_ldt *pldt; pldt = mdp->md_ldt; +#ifdef XEN + i386_reset_ldt(pldt); + PCPU_SET(currentldt, (int)pldt); +#else #ifdef SMP gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd; #else @@ -371,6 +394,7 @@ set_user_ldt(struct mdproc *mdp) #endif lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL)); +#endif /* !XEN */ } #ifdef SMP @@ -385,6 +409,39 @@ set_user_ldt_rv(struct thread *td) } #endif +#ifdef XEN + +struct proc_ldt * +user_ldt_alloc(struct mdproc *mdp, int len) +{ + struct proc_ldt *pldt, *new_ldt; + + MALLOC(new_ldt, struct proc_ldt *, sizeof(struct proc_ldt), + M_SUBPROC, M_WAITOK); + + new_ldt->ldt_len = len = NEW_MAX_LD(len); + new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, + round_page(len * sizeof(union descriptor))); + if (new_ldt->ldt_base == NULL) { + FREE(new_ldt, M_SUBPROC); + return NULL; + } + new_ldt->ldt_refcnt = 1; + new_ldt->ldt_active = 0; + + if ((pldt = mdp->md_ldt)) { + if (len > pldt->ldt_len) + len = pldt->ldt_len; + bcopy(pldt->ldt_base, new_ldt->ldt_base, + len * sizeof(union descriptor)); + } else { + bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE); + } + pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, + new_ldt->ldt_len*sizeof(union descriptor)); + return new_ldt; +} +#else /* * Must be called with either sched_lock free or held but not recursed. * If it does not return NULL, it will return with it owned. @@ -425,6 +482,7 @@ user_ldt_alloc(struct mdproc *mdp, int l } return new_ldt; } +#endif /* * Must be called either with sched_lock free or held but not recursed. @@ -443,8 +501,11 @@ user_ldt_free(struct thread *td) mtx_lock_spin(&sched_lock); mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); if (td == PCPU_GET(curthread)) { +#ifndef XEN lldt(_default_ldt); +#endif PCPU_SET(currentldt, _default_ldt); + i386_reset_ldt((struct proc_ldt *)_default_ldt); } mdp->md_ldt = NULL; @@ -549,6 +610,9 @@ i386_set_ldt(td, uap, descs) } if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { +#ifdef XEN + load_gs(0); /* XXX check if we really still need this */ +#endif /* complain a for a while if using old methods */ if (ldt_warnings++ < NUM_LDT_WARNINGS) { printf("Warning: pid %d used static ldt allocation.\n", @@ -671,6 +735,23 @@ again: return (error); } +#ifdef XEN +static int +i386_set_ldt_data(struct thread *td, int start, int num, + union descriptor *descs) +{ + struct mdproc *mdp = &td->td_proc->p_md; + struct proc_ldt *pldt = mdp->md_ldt; + int i, error; + + for (i = 0; i < num; i++) { + error = HYPERVISOR_update_descriptor(vtomach(&((union descriptor *)(pldt->ldt_base))[start + i]), *(uint64_t *)(descs + i)); + if (error) + panic("failed to update ldt: %d", error); + } + return (0); +} +#else static int i386_set_ldt_data(struct thread *td, int start, int num, union descriptor *descs) @@ -686,6 +767,7 @@ i386_set_ldt_data(struct thread *td, int num * sizeof(union descriptor)); return (0); } +#endif static int i386_ldt_grow(struct thread *td, int len) Modified: projects/release_6_3_xen/sys/i386/i386/trap.c ============================================================================== --- projects/release_6_3_xen/sys/i386/i386/trap.c Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/i386/trap.c Sat Oct 18 06:19:52 2008 (r184016) @@ -215,6 +215,7 @@ trap(frame) goto out; #endif +#ifndef XEN if ((frame.tf_eflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled @@ -245,6 +246,7 @@ trap(frame) enable_intr(); } } +#endif eva = 0; code = frame.tf_err; Modified: projects/release_6_3_xen/sys/i386/i386/vm_machdep.c ============================================================================== --- projects/release_6_3_xen/sys/i386/i386/vm_machdep.c Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/i386/vm_machdep.c Sat Oct 18 06:19:52 2008 (r184016) @@ -89,6 +89,9 @@ __FBSDID("$FreeBSD$"); #include #include +#ifdef XEN +#include +#endif #ifdef PC98 #include #else @@ -264,7 +267,7 @@ cpu_fork(td1, p2, td2, flags) /* Setup to release sched_lock in fork_exit(). */ td2->td_md.md_spinlock_count = 1; - td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; + td2->td_md.md_saved_flags = PSL_USER; /* * Now, cpu_switch() can schedule the new process. @@ -436,7 +439,7 @@ cpu_set_upcall(struct thread *td, struct /* Setup to release sched_lock in fork_exit(). */ td->td_md.md_spinlock_count = 1; - td->td_md.md_saved_flags = PSL_KERNEL | PSL_I; + td->td_md.md_saved_flags = PSL_USER; } /* @@ -593,6 +596,9 @@ cpu_reset_real() int b; #endif +#ifdef XEN + HYPERVISOR_shutdown(SHUTDOWN_poweroff); +#endif disable_intr(); #ifdef CPU_ELAN if (elan_mmcr != NULL) @@ -759,8 +765,11 @@ sf_buf_alloc(struct vm_page *m, int flag */ ptep = vtopte(sf->kva); opte = *ptep; +#ifdef XEN + PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag | PG_RW | PG_V); +#else *ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V; - +#endif /* * Avoid unnecessary TLB invalidations: If the sf_buf's old * virtual-to-physical mapping was not used, then any processor @@ -809,6 +818,14 @@ sf_buf_free(struct sf_buf *sf) if (sf->ref_count == 0) { TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry); nsfbufsused--; +#ifdef XEN + /* + * Xen doesn't like having dangling R/W mappings + */ + pmap_qremove(sf->kva, 1); + sf->m = NULL; + LIST_REMOVE(sf, list_entry); +#endif if (sf_buf_alloc_want > 0) wakeup_one(&sf_buf_freelist); } Modified: projects/release_6_3_xen/sys/i386/include/asmacros.h ============================================================================== --- projects/release_6_3_xen/sys/i386/include/asmacros.h Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/include/asmacros.h Sat Oct 18 06:19:52 2008 (r184016) @@ -134,6 +134,46 @@ #define MEXITCOUNT #endif /* GPROF */ +/* + * Setup the kernel segment registers. + */ +#define SET_KERNEL_SREGS \ + movl $KDSEL, %eax ; /* reload with kernel's data segment */ \ + movl %eax, %ds ; \ + movl %eax, %es ; \ + movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \ + movl %eax, %fs + +#ifdef XEN +#define LOAD_CR3(reg) \ + movl reg,PCPU(CR3); \ + pushl %ecx ; \ + pushl %edx ; \ + pushl %esi ; \ + pushl reg ; \ + call xen_load_cr3 ; \ + addl $4,%esp ; \ + popl %esi ; \ + popl %edx ; \ + popl %ecx ; \ + +#define READ_CR3(reg) movl PCPU(CR3),reg; +#define LLDT(arg) \ + pushl %edx ; \ + pushl %eax ; \ + xorl %eax,%eax ; \ + movl %eax,%gs ; \ + call i386_reset_ldt ; \ + popl %eax ; \ + popl %edx +#define CLI call ni_cli +#else +#define LOAD_CR3(reg) movl reg,%cr3; +#define READ_CR3(reg) movl %cr3,reg; +#define LLDT(arg) lldt arg; +#define CLI cli +#endif /* !XEN */ + #ifdef LOCORE /* * Convenience macros for declaring interrupt entry points and trap @@ -145,4 +185,30 @@ #endif /* LOCORE */ +#ifdef __STDC__ +#define ELFNOTE(name, type, desctype, descdata...) \ +.pushsection .note.name ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz #name ; \ +2:.align 4 ; \ +3:desctype descdata ; \ +4:.align 4 ; \ +.popsection +#else /* !__STDC__, i.e. -traditional */ +#define ELFNOTE(name, type, desctype, descdata) \ +.pushsection .note.name ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz "name" ; \ +2:.align 4 ; \ +3:desctype descdata ; \ +4:.align 4 ; \ +.popsection +#endif /* __STDC__ */ + #endif /* !_MACHINE_ASMACROS_H_ */ Modified: projects/release_6_3_xen/sys/i386/include/cpufunc.h ============================================================================== --- projects/release_6_3_xen/sys/i386/include/cpufunc.h Sat Oct 18 06:19:12 2008 (r184015) +++ projects/release_6_3_xen/sys/i386/include/cpufunc.h Sat Oct 18 06:19:52 2008 (r184016) @@ -42,6 +42,17 @@ #error this file needs sys/cdefs.h as a prerequisite #endif +#ifdef XEN +extern void xen_cli(void); +extern void xen_sti(void); +extern u_int xen_rcr2(void); +extern void xen_load_cr3(u_int data); +extern void xen_tlb_flush(void); +extern void xen_invlpg(u_int addr); +extern int xen_save_and_cli(void); +extern void xen_restore_flags(u_int eflags); +#endif + struct region_descriptor; #define readb(va) (*(volatile u_int8_t *) (va)) @@ -81,7 +92,11 @@ bsrl(u_int mask) static __inline void disable_intr(void) { +#ifdef XEN + xen_cli(); +#else __asm __volatile("cli" : : : "memory"); +#endif } static __inline void *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***