# HG changeset patch # User sos22@douglas.cl.cam.ac.uk # Node ID e66707bec7a952ece26574908493e4d853f37271 # Parent 5afb142646294a6c446e275c5bef60ff7d477881 PV-on-HVM patch diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri May 5 17:38:45 2006 +0100 @@ -46,6 +46,7 @@ #include #include #include +#include #define BLKIF_STATE_DISCONNECTED 0 #define BLKIF_STATE_CONNECTED 1 diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Fri May 5 17:38:45 2006 +0100 @@ -42,6 +42,12 @@ #include #include #include +#include + +#ifndef CONFIG_XEN +#include +#include +#endif #if 1 #define ASSERT(_p) \ @@ -389,6 +395,7 @@ int gnttab_resume(void) { +#ifdef CONFIG_XEN gnttab_setup_table_t setup; unsigned long frames[NR_GRANT_FRAMES]; int rc; @@ -422,23 +429,37 @@ printk("grant table at %p\n", shared); #endif +#else + unsigned long frames; + unsigned long alloc_xen_mmio(unsigned long len); + frames = alloc_xen_mmio(PAGE_SIZE * NR_GRANT_FRAMES); + shared = ioremap(frames, PAGE_SIZE * NR_GRANT_FRAMES); + if(!shared){ + printk("error to ioremap gnttab share frames\n"); + return -1; + } + BUG_ON(HYPERVISOR_virtual_device_op(VDOP_setup_gnttab_table, (long)frames, NR_GRANT_FRAMES) != 0); +#endif return 0; } int gnttab_suspend(void) { - #ifndef __ia64__ +#ifdef CONFIG_XEN apply_to_page_range(&init_mm, (unsigned long)shared, PAGE_SIZE * NR_GRANT_FRAMES, unmap_pte_fn, NULL); -#endif - - return 0; -} - -static int __init +#else + iounmap(shared); +#endif +#endif + + return 0; +} + +int __init gnttab_init(void) { int i; @@ -455,10 +476,13 @@ gnttab_free_head = NR_RESERVED_ENTRIES; printk("Grant table initialized\n"); - return 0; -} - + + return 0; +} + +#ifdef CONFIG_XEN core_initcall(gnttab_init); +#endif /* * Local variables: diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/core/xen_proc.c --- a/linux-2.6-xen-sparse/drivers/xen/core/xen_proc.c Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/core/xen_proc.c Fri May 5 17:38:45 2006 +0100 @@ -1,4 +1,5 @@ +#include #include #include #include @@ -12,6 +13,7 @@ panic("Couldn't create /proc/xen"); return create_proc_entry(name, mode, xen_base); } +EXPORT_SYMBOL(create_xen_proc_entry); void remove_xen_proc_entry(const char *name) { diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/netback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri May 5 17:38:45 2006 +0100 @@ -57,6 +57,7 @@ /* Unique identifier for this interface. */ domid_t domid; unsigned int handle; + unsigned int copyall; u8 fe_dev_addr[6]; @@ -99,7 +100,8 @@ void netif_disconnect(netif_t *netif); -netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]); +netif_t *alloc_netif(domid_t domid, unsigned int handle, + unsigned int copyall, u8 be_mac[ETH_ALEN]); void free_netif(netif_t *netif); int netif_map(netif_t *netif, unsigned long tx_ring_ref, unsigned long rx_ring_ref, unsigned int evtchn); diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/netback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Fri May 5 17:38:45 2006 +0100 @@ -78,7 +78,8 @@ .set_tx_csum = ethtool_op_set_tx_csum, }; -netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]) +netif_t *alloc_netif(domid_t domid, unsigned int handle, + unsigned int copyall, u8 be_mac[ETH_ALEN]) { int err = 0, i; struct net_device *dev; @@ -97,6 +98,7 @@ netif->domid = domid; netif->handle = handle; netif->status = DISCONNECTED; + netif->copyall = copyall; atomic_set(&netif->refcnt, 0); netif->dev = dev; diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri May 5 17:38:45 2006 +0100 @@ -50,7 +50,8 @@ s8 st, u16 offset, u16 size, - u16 flags); + u16 flags, + int do_prod); static void net_tx_action(unsigned long unused); static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0); @@ -63,13 +64,17 @@ #define MAX_PENDING_REQS 256 static struct sk_buff_head rx_queue; -static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; +static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3]; static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; -static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE]; +static gnttab_transfer_t grant_rx_trans_op[NET_RX_RING_SIZE]; +static gnttab_map_grant_ref_t grant_rx_map_op[NET_RX_RING_SIZE]; +static gnttab_unmap_grant_ref_t grant_rx_unmap_op[NET_RX_RING_SIZE]; static unsigned char rx_notify[NR_IRQS]; static unsigned long mmap_vstart; #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) + +static void *rx_mmap_area; #define PKT_PROT_LEN 64 @@ -96,13 +101,18 @@ static struct list_head net_schedule_list; static spinlock_t net_schedule_list_lock; +static int skb_needs_copy_p(struct sk_buff *skb, + netif_t *netif) +{ + return netif->copyall; +} + +static unsigned long alloc_mfn(void) +{ #define MAX_MFN_ALLOC 64 -static unsigned long mfn_list[MAX_MFN_ALLOC]; -static unsigned int alloc_index = 0; -static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED; - -static unsigned long alloc_mfn(void) -{ + static unsigned long mfn_list[MAX_MFN_ALLOC]; + static unsigned int alloc_index = 0; + static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED; unsigned long mfn = 0, flags; struct xen_memory_reservation reservation = { .nr_extents = MAX_MFN_ALLOC, @@ -216,73 +226,115 @@ u16 size, id, irq, flags; multicall_entry_t *mcl; mmu_update_t *mmu; - gnttab_transfer_t *gop; + gnttab_transfer_t *flip_gop; + gnttab_map_grant_ref_t *map_gop; + gnttab_unmap_grant_ref_t *unmap_gop; unsigned long vdata, old_mfn, new_mfn; - struct sk_buff_head rxq; + struct sk_buff_head flip_rxq, copy_rxq, copy_rxq2; struct sk_buff *skb; u16 notify_list[NET_RX_RING_SIZE]; int notify_nr = 0; int ret; - - skb_queue_head_init(&rxq); + void *rx_mmap_ptr; + netif_rx_request_t *rx_req_p; + void *remote_data; + unsigned notify; + + skb_queue_head_init(&flip_rxq); + skb_queue_head_init(©_rxq); + skb_queue_head_init(©_rxq2); mcl = rx_mcl; mmu = rx_mmu; - gop = grant_rx_op; - + flip_gop = grant_rx_trans_op; + map_gop = grant_rx_map_op; + rx_mmap_ptr = rx_mmap_area; + + /* Split the incoming skbs according to whether they need to + be page flipped or copied, and build up the first set of + hypercall arguments. */ while ((skb = skb_dequeue(&rx_queue)) != NULL) { netif = netdev_priv(skb->dev); - vdata = (unsigned long)skb->data; - old_mfn = virt_to_mfn(vdata); - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* Memory squeeze? Back off for an arbitrary while. */ - if ((new_mfn = alloc_mfn()) == 0) { - if ( net_ratelimit() ) - WPRINTK("Memory squeeze in netback " - "driver.\n"); - mod_timer(&net_timer, jiffies + HZ); - skb_queue_head(&rx_queue, skb); + size = skb->tail - skb->data; + if (skb_needs_copy_p(skb, netif)) { + if (map_gop - grant_rx_map_op == + ARRAY_SIZE(grant_rx_map_op)) break; + rx_req_p = RING_GET_REQUEST(&netif->rx, + netif->rx.req_cons); + map_gop->host_addr = (unsigned long)rx_mmap_ptr; + map_gop->dom = netif->domid; + map_gop->ref = rx_req_p->gref; + map_gop->flags = GNTMAP_host_map; + map_gop++; + rx_mmap_ptr += PAGE_SIZE; + + memcpy(skb->cb, rx_req_p, sizeof(*rx_req_p)); + + netif->rx.req_cons++; + __skb_queue_tail(©_rxq, skb); + } else { + /* Filled the batch queue? */ + if ((flip_gop - grant_rx_trans_op) == + ARRAY_SIZE(grant_rx_trans_op)) + break; + + vdata = (unsigned long)skb->data; + old_mfn = virt_to_mfn(vdata); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Memory squeeze? Back off for an + * arbitrary while. */ + if ((new_mfn = alloc_mfn()) == 0) { + if ( net_ratelimit() ) + WPRINTK("Memory squeeze in netback " + "driver.\n"); + mod_timer(&net_timer, jiffies + HZ); + skb_queue_head(&rx_queue, skb); + break; + } + /* + * Set the new P2M table entry before + * reassigning the old data page. Heed + * the comment in + * pgtable-2level.h:pte_page(). :-) + */ + set_phys_to_machine( + __pa(skb->data) >> PAGE_SHIFT, + new_mfn); + + MULTI_update_va_mapping(mcl, vdata, + pfn_pte_ma(new_mfn, + PAGE_KERNEL), 0); + mcl++; + + mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | + MMU_MACHPHYS_UPDATE; + mmu->val = __pa(vdata) >> PAGE_SHIFT; + mmu++; } - /* - * Set the new P2M table entry before reassigning - * the old data page. Heed the comment in - * pgtable-2level.h:pte_page(). :-) - */ - set_phys_to_machine( - __pa(skb->data) >> PAGE_SHIFT, - new_mfn); - - MULTI_update_va_mapping(mcl, vdata, - pfn_pte_ma(new_mfn, - PAGE_KERNEL), 0); - mcl++; - - mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | - MMU_MACHPHYS_UPDATE; - mmu->val = __pa(vdata) >> PAGE_SHIFT; - mmu++; - } - - gop->mfn = old_mfn; - gop->domid = netif->domid; - gop->ref = RING_GET_REQUEST( - &netif->rx, netif->rx.req_cons)->gref; - netif->rx.req_cons++; - gop++; - - __skb_queue_tail(&rxq, skb); - - /* Filled the batch queue? */ - if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op)) - break; - } - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - if (mcl == rx_mcl) - return; - + + flip_gop->mfn = old_mfn; + flip_gop->domid = netif->domid; + flip_gop->ref = RING_GET_REQUEST( + &netif->rx, netif->rx.req_cons)->gref; + flip_gop++; + + netif->rx.req_cons++; + __skb_queue_tail(&flip_rxq, skb); + } + + netif->stats.tx_bytes += size; + netif->stats.tx_packets++; + } + + if (flip_gop == grant_rx_trans_op && map_gop == grant_rx_map_op) { + /* Nothing to do */ + return; + } + + if (mcl != rx_mcl) { + /* Did some unmaps -> need a TLB flush */ mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; if (mmu - rx_mmu) { @@ -294,26 +346,32 @@ mcl++; } - ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); - BUG_ON(ret != 0); - } - - ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, - gop - grant_rx_op); + BUG_ON(flip_gop == grant_rx_trans_op); + MULTI_grant_table_op(mcl, GNTTABOP_transfer, + grant_rx_trans_op, + flip_gop - grant_rx_trans_op); + mcl++; + } + if (map_gop != grant_rx_map_op) { + MULTI_grant_table_op(mcl, GNTTABOP_map_grant_ref, + grant_rx_map_op, + map_gop - grant_rx_map_op); + mcl++; + } + + ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); BUG_ON(ret != 0); + /* Now do all of the page flips */ mcl = rx_mcl; - gop = grant_rx_op; - while ((skb = __skb_dequeue(&rxq)) != NULL) { + flip_gop = grant_rx_trans_op; + while ((skb = __skb_dequeue(&flip_rxq)) != NULL) { netif = netdev_priv(skb->dev); size = skb->tail - skb->data; atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->frag_list = NULL; - - netif->stats.tx_bytes += size; - netif->stats.tx_packets++; if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* The update_va_mapping() must not fail. */ @@ -323,14 +381,14 @@ /* Check the reassignment error code. */ status = NETIF_RSP_OKAY; - if (gop->status != 0) { + if (flip_gop->status != 0) { DPRINTK("Bad status %d from grant transfer to DOM%u\n", - gop->status, netif->domid); + flip_gop->status, netif->domid); /* * Page no longer belongs to us unless GNTST_bad_page, * but that should be a fatal error anyway. */ - BUG_ON(gop->status == GNTST_bad_page); + BUG_ON(flip_gop->status == GNTST_bad_page); status = NETIF_RSP_ERROR; } irq = netif->irq; @@ -342,7 +400,7 @@ flags |= NETRXF_data_validated; if (make_rx_response(netif, id, status, (unsigned long)skb->data & ~PAGE_MASK, - size, flags) && + size, flags, 1) && (rx_notify[irq] == 0)) { rx_notify[irq] = 1; notify_list[notify_nr++] = irq; @@ -350,7 +408,72 @@ netif_put(netif); dev_kfree_skb(skb); - gop++; + flip_gop++; + } + + /* Now do all of the copies */ + map_gop = grant_rx_map_op; + unmap_gop = grant_rx_unmap_op; + while ((skb = __skb_dequeue(©_rxq)) != NULL) { + netif = netdev_priv(skb->dev); + size = skb->tail - skb->data; + + rx_req_p = (netif_rx_request_t *)skb->cb; + + if (rx_req_p->offset + rx_req_p->maxsize > PAGE_SIZE || + rx_req_p->maxsize < size || + map_gop->status) { + status = NETIF_RSP_ERROR; + } else { + remote_data = + (void *)(unsigned long)map_gop->host_addr; + memcpy(remote_data + rx_req_p->offset, + skb->data, + size); + status = NETIF_RSP_OKAY; + } + + unmap_gop->host_addr = map_gop->host_addr; + unmap_gop->dev_bus_addr = 0; + unmap_gop->handle = map_gop->handle; + + flags = 0; + if (skb->ip_summed == CHECKSUM_HW) + flags |= NETRXF_csum_blank; + if (skb->proto_data_valid) + flags |= NETRXF_data_validated; + make_rx_response(netif, rx_req_p->id, status, + rx_req_p->offset, size, + flags, 0); + + map_gop++; + unmap_gop++; + + __skb_queue_tail(©_rxq2, skb); + } + + /* Unmap the packets we just copied into */ + if (unmap_gop != grant_rx_unmap_op) { + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + grant_rx_unmap_op, + unmap_gop - grant_rx_unmap_op); + BUG_ON(ret); + /* And notify the other side. */ + while ((skb = __skb_dequeue(©_rxq2)) != NULL) { + netif = netdev_priv(skb->dev); + irq = netif->irq; + if (!rx_notify[irq]) { + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, + notify); + if (notify) { + rx_notify[irq] = 1; + notify_list[notify_nr++] = irq; + } + } + + netif_put(netif); + dev_kfree_skb(skb); + } } while (notify_nr != 0) { @@ -708,6 +831,12 @@ netif_idx_release(pending_idx); } +static void netif_rx_page_release(struct page *page) +{ + /* Ready for next use. */ + set_page_count(page, 1); +} + irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) { netif_t *netif = dev_id; @@ -748,11 +877,12 @@ s8 st, u16 offset, u16 size, - u16 flags) + u16 flags, + int do_push) { RING_IDX i = netif->rx.rsp_prod_pvt; netif_rx_response_t *resp; - int notify; + int notify = 0; resp = RING_GET_RESPONSE(&netif->rx, i); resp->offset = offset; @@ -763,7 +893,8 @@ resp->status = (s16)st; netif->rx.rsp_prod_pvt = ++i; - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify); + if (do_push) + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify); return notify; } @@ -827,6 +958,16 @@ SetPageForeign(page, netif_page_release); } + page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE); + BUG_ON(page == NULL); + rx_mmap_area = pfn_to_kaddr(page_to_pfn(page)); + + for (i = 0; i < NET_RX_RING_SIZE; i++) { + page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE)); + set_page_count(page, 1); + SetPageForeign(page, netif_rx_page_release); + } + pending_cons = 0; pending_prod = MAX_PENDING_REQS; for (i = 0; i < MAX_PENDING_REQS; i++) diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri May 5 17:38:45 2006 +0100 @@ -154,6 +154,7 @@ struct backend_info *be = container_of(watch, struct backend_info, backend_watch); struct xenbus_device *dev = be->dev; + unsigned copyall; DPRINTK(""); @@ -169,10 +170,17 @@ return; } + err = xenbus_scanf(XBT_NULL, dev->nodename, "copyall", "%u", ©all); + if (err != 1) { + xenbus_dev_fatal(dev, err, "reading copyall flag"); + return; + } + if (be->netif == NULL) { u8 be_mac[ETH_ALEN] = { 0, 0, 0, 0, 0, 0 }; - be->netif = alloc_netif(dev->otherend_id, handle, be_mac); + be->netif = alloc_netif(dev->otherend_id, handle, + copyall, be_mac); if (IS_ERR(be->netif)) { err = PTR_ERR(be->netif); be->netif = NULL; diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri May 5 17:38:45 2006 +0100 @@ -62,6 +62,7 @@ #include #include #include +#include #define GRANT_INVALID_REF 0 @@ -91,6 +92,7 @@ unsigned int handle; unsigned int evtchn, irq; + unsigned int copyall; /* What is the status of our connection to the remote backend? */ #define BEST_CLOSED 0 @@ -170,7 +172,7 @@ static int talk_to_backend(struct xenbus_device *, struct netfront_info *); static int setup_device(struct xenbus_device *, struct netfront_info *); -static int create_netdev(int, struct xenbus_device *, struct net_device **); +static int create_netdev(int, int, struct xenbus_device *, struct net_device **); static void netfront_closing(struct xenbus_device *); @@ -212,14 +214,20 @@ struct net_device *netdev; struct netfront_info *info; unsigned int handle; + unsigned int copyall; err = xenbus_scanf(XBT_NULL, dev->nodename, "handle", "%u", &handle); if (err != 1) { xenbus_dev_fatal(dev, err, "reading handle"); return err; } - - err = create_netdev(handle, dev, &netdev); + err = xenbus_scanf(XBT_NULL, dev->nodename, "copyall", "%u", ©all); + if (err != 1) { + xenbus_dev_fatal(dev, err, "reading copyall flag"); + return err; + } + + err = create_netdev(handle, copyall, dev, &netdev); if (err) { xenbus_dev_fatal(dev, err, "creating netdev"); return err; @@ -574,7 +582,7 @@ np->rx_target = np->rx_max_target; refill: - for (i = 0; ; i++) { + for (nr_flips = i = 0; ; i++) { if ((skb = __skb_dequeue(&np->rx_batch)) == NULL) break; @@ -584,17 +592,78 @@ np->rx_skbs[id] = skb; - RING_GET_REQUEST(&np->rx, req_prod + i)->id = id; ref = gnttab_claim_grant_reference(&np->gref_rx_head); BUG_ON((signed short)ref < 0); np->grant_rx_ref[id] = ref; - gnttab_grant_foreign_transfer_ref(ref, - np->xbdev->otherend_id, - __pa(skb->head) >> PAGE_SHIFT); - RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref; - np->rx_pfn_array[i] = virt_to_mfn(skb->head); + + if ( !np->copyall ) { + gnttab_grant_foreign_transfer_ref(ref, + np->xbdev->otherend_id, + __pa(skb->head) >> PAGE_SHIFT); + np->rx_pfn_array[nr_flips] = virt_to_mfn(skb->head); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remove this page from map before + * passing back to Xen. */ + set_phys_to_machine(__pa(skb->head) >> + PAGE_SHIFT, + INVALID_P2M_ENTRY); + + MULTI_update_va_mapping(np->rx_mcl+nr_flips, + (unsigned long)skb->head, + __pte(0), 0); + } + nr_flips++; + } else { + gnttab_grant_foreign_access_ref(ref, + np->xbdev->otherend_id, + virt_to_mfn(skb->head), + 0); + } + req = RING_GET_REQUEST(&np->rx, req_prod + i); + req->offset = (unsigned long)skb->data & ~PAGE_MASK; + req->gref = ref; + req->maxsize = skb->end - skb->data; + req->id = id; + } + + if ( nr_flips != 0 ) { + set_xen_guest_handle(reservation.extent_start, + np->rx_pfn_array); + reservation.nr_extents = nr_flips; + reservation.extent_order = 0; + reservation.address_bits = 0; + reservation.domid = DOMID_SELF; + + /* Tell the ballon driver what is going on. */ + balloon_update_driver_allowance(nr_flips); if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* After all PTEs have been zapped, flush the + * TLB. */ + np->rx_mcl[nr_flips-1].args[MULTI_UVMFLAGS_INDEX] = + UVMF_TLB_FLUSH|UVMF_ALL; + + /* Give away a batch of pages. */ + np->rx_mcl[nr_flips].op = __HYPERVISOR_memory_op; + np->rx_mcl[nr_flips].args[0] = + XENMEM_decrease_reservation; + np->rx_mcl[nr_flips].args[1] = + (unsigned long)&reservation; + + /* Zap PTEs and give away pages in one big + * multicall. */ + (void)HYPERVISOR_multicall(np->rx_mcl, nr_flips + 1); + + /* Check return status of + * HYPERVISOR_memory_op(). */ + if (unlikely(np->rx_mcl[nr_flips].result != nr_flips)) + panic("Unable to reduce memory reservation (%ld,%d)\n", + np->rx_mcl[nr_flips].result, nr_flips); + } else { + if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation) != i) + panic("Unable to reduce memory reservation\n"); /* Remove this page before passing back to Xen. */ set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT, INVALID_P2M_ENTRY); @@ -602,37 +671,9 @@ (unsigned long)skb->head, __pte(0), 0); } - } - - /* Tell the ballon driver what is going on. */ - balloon_update_driver_allowance(i); - - set_xen_guest_handle(reservation.extent_start, np->rx_pfn_array); - reservation.nr_extents = i; - reservation.extent_order = 0; - reservation.address_bits = 0; - reservation.domid = DOMID_SELF; - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* After all PTEs have been zapped, flush the TLB. */ - np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = - UVMF_TLB_FLUSH|UVMF_ALL; - - /* Give away a batch of pages. */ - np->rx_mcl[i].op = __HYPERVISOR_memory_op; - np->rx_mcl[i].args[0] = XENMEM_decrease_reservation; - np->rx_mcl[i].args[1] = (unsigned long)&reservation; - - /* Zap PTEs and give away pages in one big multicall. */ - (void)HYPERVISOR_multicall(np->rx_mcl, i+1); - - /* Check return status of HYPERVISOR_memory_op(). */ - if (unlikely(np->rx_mcl[i].result != i)) - panic("Unable to reduce memory reservation\n"); - } else - if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation) != i) - panic("Unable to reduce memory reservation\n"); + } else { + wmb(); + } /* Above is a suitable barrier to ensure backend will see requests. */ np->rx.req_prod_pvt = req_prod + i; @@ -697,8 +738,10 @@ tx->flags = 0; if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; +#ifdef CONFIG_XEN if (skb->proto_data_valid) /* remote but checksummed? */ tx->flags |= NETTXF_data_validated; +#endif np->tx.req_prod_pvt = i + 1; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); @@ -757,6 +800,8 @@ unsigned long flags; unsigned long mfn; grant_ref_t ref; + unsigned long ret; + netif_rx_request_t *req; spin_lock(&np->rx_lock); @@ -788,25 +833,50 @@ continue; } - /* Memory pressure, insufficient buffer headroom, ... */ - if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) { - if (net_ratelimit()) - WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n", - rx->id, rx->status); - RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = - rx->id; - RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = - ref; - np->rx.req_prod_pvt++; - RING_PUSH_REQUESTS(&np->rx); - work_done--; - continue; + skb = np->rx_skbs[rx->id]; + + if ( !np->copyall ) { + /* Memory pressure, insufficient buffer + * headroom, ... */ + if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) + { + if (net_ratelimit()) + WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n", + rx->id, rx->status); + req = RING_GET_REQUEST(&np->rx, + np->rx.req_prod_pvt); + req->id = rx->id; + req->gref = ref; + np->rx.req_prod_pvt++; + RING_PUSH_REQUESTS(&np->rx); + work_done--; + continue; + } + /* Remap the page. */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + MULTI_update_va_mapping(mcl, + (unsigned long)skb->head, + pfn_pte_ma(mfn, + PAGE_KERNEL), + 0); + mcl++; + mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE; + mmu->val = __pa(skb->head) >> PAGE_SHIFT; + mmu++; + + set_phys_to_machine(__pa(skb->head) + >> PAGE_SHIFT, + mfn); + } + } else { + ret = gnttab_end_foreign_access_ref(ref, 0); + BUG_ON(!ret); } gnttab_release_grant_reference(&np->gref_rx_head, ref); np->grant_rx_ref[rx->id] = GRANT_INVALID_REF; - skb = np->rx_skbs[rx->id]; add_id_to_freelist(np->rx_skbs, rx->id); /* NB. We handle skb overflow later. */ @@ -820,30 +890,20 @@ */ if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) { skb->ip_summed = CHECKSUM_UNNECESSARY; +#ifdef CONFIG_XEN skb->proto_data_valid = 1; +#endif } else { skb->ip_summed = CHECKSUM_NONE; +#ifdef CONFIG_XEN skb->proto_data_valid = 0; +#endif } +#ifdef CONFIG_XEN skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank); - +#endif np->stats.rx_packets++; np->stats.rx_bytes += rx->status; - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* Remap the page. */ - MULTI_update_va_mapping(mcl, (unsigned long)skb->head, - pfn_pte_ma(mfn, PAGE_KERNEL), - 0); - mcl++; - mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) - | MMU_MACHPHYS_UPDATE; - mmu->val = __pa(skb->head) >> PAGE_SHIFT; - mmu++; - - set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT, - mfn); - } __skb_queue_tail(&rxq, skb); } @@ -973,7 +1033,9 @@ struct netfront_info *np; int i, requeue_idx; netif_tx_request_t *tx; + netif_rx_request_t *req; struct sk_buff *skb; + grant_ref_t gref; np = netdev_priv(dev); spin_lock_irq(&np->tx_lock); @@ -1022,8 +1084,10 @@ tx->flags = 0; if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; +#ifdef CONFIG_XEN if (skb->proto_data_valid) /* remote but checksummed? */ tx->flags |= NETTXF_data_validated; +#endif np->stats.tx_bytes += skb->len; np->stats.tx_packets++; @@ -1036,13 +1100,23 @@ for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) { if ((unsigned long)np->rx_skbs[i] < PAGE_OFFSET) continue; - gnttab_grant_foreign_transfer_ref( - np->grant_rx_ref[i], np->xbdev->otherend_id, - __pa(np->rx_skbs[i]->data) >> PAGE_SHIFT); - RING_GET_REQUEST(&np->rx, requeue_idx)->gref = - np->grant_rx_ref[i]; - RING_GET_REQUEST(&np->rx, requeue_idx)->id = i; - requeue_idx++; + gref = np->grant_rx_ref[i]; + skb = np->rx_skbs[i]; + if ( !np->copyall ) { + gnttab_grant_foreign_transfer_ref( + gref, np->xbdev->otherend_id, + __pa(skb->data) >> PAGE_SHIFT); + } else { + gnttab_grant_foreign_access_ref( + gref, np->xbdev->otherend_id, + virt_to_mfn(skb->data), 0); + } + req = RING_GET_REQUEST(&np->rx, requeue_idx); + req->gref = gref; + req->offset = (unsigned long)skb->data & ~PAGE_MASK; + req->maxsize = skb->end - skb->data; + req->id = i; + requeue_idx++; } np->rx.req_prod_pvt = requeue_idx; @@ -1107,7 +1181,7 @@ * @param val return parameter for created device * @return 0 on success, error code otherwise */ -static int create_netdev(int handle, struct xenbus_device *dev, +static int create_netdev(int handle, int copyall, struct xenbus_device *dev, struct net_device **val) { int i, err = 0; @@ -1126,6 +1200,7 @@ np->user_state = UST_CLOSED; np->handle = handle; np->xbdev = dev; + np->copyall = copyall; spin_lock_init(&np->tx_lock); spin_lock_init(&np->rx_lock); @@ -1174,7 +1249,11 @@ netdev->set_multicast_list = network_set_multicast_list; netdev->uninit = netif_uninit; netdev->weight = 64; +#ifdef CONFIG_XEN netdev->features = NETIF_F_IP_CSUM; +#else + netdev->features = 0; +#endif SET_ETHTOOL_OPS(netdev, &network_ethtool_ops); SET_MODULE_OWNER(netdev); diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Fri May 5 17:38:45 2006 +0100 @@ -49,7 +49,7 @@ static inline struct xenstore_domain_interface *xenstore_domain_interface(void) { - return mfn_to_virt(xen_start_info->store_mfn); + return xenstore_page_virt(); } static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs) diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Fri May 5 17:38:45 2006 +0100 @@ -40,6 +40,19 @@ int xs_input_avail(void); extern wait_queue_head_t xb_waitq; +#ifdef CONFIG_XEN +static inline void *xenstore_page_virt(void) +{ + return mfn_to_virt(xen_start_info->store_mfn); +} +#else +extern void *shared_xenstore_buf; +static inline void *xenstore_page_virt(void) +{ + return shared_xenstore_buf; +} +#endif + #endif /* _XENBUS_COMMS_H */ /* diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Fri May 5 17:38:45 2006 +0100 @@ -232,7 +232,7 @@ .poll = xenbus_dev_poll, }; -static int __init +int __init xenbus_dev_init(void) { xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400); diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri May 5 17:38:45 2006 +0100 @@ -43,6 +43,7 @@ #include #include +#include #include #include #include @@ -958,6 +959,7 @@ return len; } +void *shared_xenstore_buf; static int __init xenbus_probe_init(void) { @@ -1018,6 +1020,12 @@ } else xenstored_ready = 1; +#ifndef CONFIG_XEN + /* We need to explicitly map the shared page */ + shared_xenstore_buf = ioremap(xen_start_info->store_mfn << PAGE_SHIFT, PAGE_SIZE); + + xenbus_dev_init(); +#endif /* Initialize the interface to xenstore. */ err = xs_init(); if (err) { @@ -1049,8 +1057,10 @@ } postcore_initcall(xenbus_probe_init); - - +MODULE_LICENSE("Dual BSD/GPL"); + + +#ifndef MODULE /* * On a 10 second timeout, wait for all devices currently configured. We need * to do this to guarantee that the filesystems and / or network devices @@ -1080,7 +1090,7 @@ } late_initcall(wait_for_devices); - +#endif /* * Local variables: diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Fri May 5 17:38:45 2006 +0100 @@ -42,6 +42,7 @@ #define __STR(x) #x #define STR(x) __STR(x) +#ifdef CONFIG_XEN #define _hypercall0(type, name) \ ({ \ long __res; \ @@ -114,6 +115,92 @@ : "memory" ); \ (type)__res; \ }) +#else +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + "movl hypercall_page, %%eax\n" \ + "addl $"STR(__HYPERVISOR_##name)" * 32, %%eax\n"\ + "call *%%eax" \ + : "=a" (__res) \ + : \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + "movl hypercall_page, %%eax\n" \ + "addl $"STR(__HYPERVISOR_##name)" * 32, %%eax\n"\ + "call *%%eax" \ + : "=a" (__res), "=b" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + "movl hypercall_page, %%eax\n" \ + "addl $"STR(__HYPERVISOR_##name)" * 32, %%eax\n"\ + "call *%%eax" \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "movl hypercall_page, %%eax\n" \ + "addl $"STR(__HYPERVISOR_##name)" * 32, %%eax\n"\ + "call *%%eax" \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + asm volatile ( \ + "movl hypercall_page, %%eax\n" \ + "addl $"STR(__HYPERVISOR_##name)" * 32, %%eax\n"\ + "call *%%eax" \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + asm volatile ( \ + "movl hypercall_page, %%eax\n" \ + "addl $"STR(__HYPERVISOR_##name)" * 32, %%eax\n"\ + "call *%%eax" \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)), \ + "5" ((long)(a5)) \ + : "memory" ); \ + (type)__res; \ +}) +#endif static inline int HYPERVISOR_set_trap_table( @@ -343,6 +430,13 @@ unsigned long op, void *arg) { return _hypercall2(int, nmi_op, op, arg); +} + +static inline unsigned long +HYPERVISOR_virtual_device_op( + int op, unsigned long arg1, unsigned long arg2) +{ + return _hypercall3(unsigned long, virtual_device_op, op, arg1, arg2); } static inline int diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Fri May 5 17:38:45 2006 +0100 @@ -200,6 +200,16 @@ } static inline void +MULTI_grant_table_op(multicall_entry_t *mcl, unsigned int cmd, + void *uop, unsigned int count) +{ + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = cmd; + mcl->args[1] = (unsigned long)uop; + mcl->args[2] = count; +} + +static inline void MULTI_update_va_mapping_otherdomain( multicall_entry_t *mcl, unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri May 5 17:38:45 2006 +0100 @@ -20,6 +20,7 @@ #include #include #include +#include #define arch_free_page(_page,_order) \ ({ int foreign = PageForeign(_page); \ @@ -59,114 +60,6 @@ #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) - -/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ -#define INVALID_P2M_ENTRY (~0UL) -#define FOREIGN_FRAME_BIT (1UL<<31) -#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) - -extern unsigned long *phys_to_machine_mapping; - -static inline unsigned long pfn_to_mfn(unsigned long pfn) -{ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & - ~FOREIGN_FRAME_BIT; -} - -static inline int phys_to_machine_mapping_valid(unsigned long pfn) -{ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return 1; - return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); -} - -static inline unsigned long mfn_to_pfn(unsigned long mfn) -{ - unsigned long pfn; - - if (xen_feature(XENFEAT_auto_translated_physmap)) - return mfn; - - /* - * The array access can fail (e.g., device space beyond end of RAM). - * In such cases it doesn't matter what we return (we return garbage), - * but we must handle the fault without crashing! - */ - asm ( - "1: movl %1,%0\n" - "2:\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b,2b\n" - ".previous" - : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) ); - - return pfn; -} - -/* - * We detect special mappings in one of two ways: - * 1. If the MFN is an I/O page then Xen will set the m2p entry - * to be outside our maximum possible pseudophys range. - * 2. If the MFN belongs to a different domain then we will certainly - * not have MFN in our p2m table. Conversely, if the page is ours, - * then we'll have p2m(m2p(MFN))==MFN. - * If we detect a special mapping then it doesn't have a 'struct page'. - * We force !pfn_valid() by returning an out-of-range pointer. - * - * NB. These checks require that, for any MFN that is not in our reservation, - * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if - * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. - * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. - * - * NB2. When deliberately mapping foreign pages into the p2m table, you *must* - * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we - * require. In all the cases we care about, the FOREIGN_FRAME bit is - * masked (e.g., pfn_to_mfn()) so behaviour there is correct. - */ -static inline unsigned long mfn_to_local_pfn(unsigned long mfn) -{ - extern unsigned long max_mapnr; - unsigned long pfn = mfn_to_pfn(mfn); - if ((pfn < max_mapnr) - && !xen_feature(XENFEAT_auto_translated_physmap) - && (phys_to_machine_mapping[pfn] != mfn)) - return max_mapnr; /* force !pfn_valid() */ - return pfn; -} - -static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - if (xen_feature(XENFEAT_auto_translated_physmap)) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return; - } - phys_to_machine_mapping[pfn] = mfn; -} - -/* Definitions for machine and pseudophysical addresses. */ -#ifdef CONFIG_X86_PAE -typedef unsigned long long paddr_t; -typedef unsigned long long maddr_t; -#else -typedef unsigned long paddr_t; -typedef unsigned long maddr_t; -#endif - -static inline maddr_t phys_to_machine(paddr_t phys) -{ - maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT); - machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); - return machine; -} -static inline paddr_t machine_to_phys(maddr_t machine) -{ - paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT); - phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); - return phys; -} /* * These are used to make use of C type-checking.. @@ -245,7 +138,6 @@ #define pgprot_val(x) ((x).pgprot) -#define __pte_ma(x) ((pte_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) #endif /* !__ASSEMBLY__ */ @@ -313,11 +205,6 @@ ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -/* VIRT <-> MACHINE conversion */ -#define virt_to_machine(v) (phys_to_machine(__pa(v))) -#define virt_to_mfn(v) (pfn_to_mfn(__pa(v) >> PAGE_SHIFT)) -#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) - #define __HAVE_ARCH_GATE_AREA 1 #endif /* __KERNEL__ */ diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h Fri May 5 17:38:45 2006 +0100 @@ -45,7 +45,6 @@ #define pte_none(x) (!(x).pte_low) #define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#define pfn_pte_ma(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) /* diff -r 5afb14264629 -r e66707bec7a9 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h Fri May 5 00:27:10 2006 +0100 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h Fri May 5 17:38:45 2006 +0100 @@ -151,18 +151,6 @@ extern unsigned long long __supported_pte_mask; -static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot) -{ - pte_t pte; - - pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | \ - (pgprot_val(pgprot) >> 32); - pte.pte_high &= (__supported_pte_mask >> 32); - pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & \ - __supported_pte_mask; - return pte; -} - static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { return pfn_pte_ma(pfn_to_mfn(page_nr), pgprot); diff -r 5afb14264629 -r e66707bec7a9 tools/ioemu/hw/pc.c --- a/tools/ioemu/hw/pc.c Fri May 5 00:27:10 2006 +0100 +++ b/tools/ioemu/hw/pc.c Fri May 5 17:38:45 2006 +0100 @@ -538,6 +538,8 @@ } if (pci_enabled) { + if (evtchn_enabled) + pci_xen_evtchn_init(pci_bus); for(i = 0; i < nb_nics; i++) { if (nic_ne2000) pci_ne2000_init(pci_bus, &nd_table[i]); @@ -545,6 +547,9 @@ pci_pcnet_init(pci_bus, &nd_table[i]); } pci_piix3_ide_init(pci_bus, bs_table); +#ifdef APIC_SUPPORT + IOAPICInit(); +#endif } else { nb_nics1 = nb_nics; if (nb_nics1 > NE2000_NB_MAX) diff -r 5afb14264629 -r e66707bec7a9 tools/ioemu/target-i386-dm/Makefile --- a/tools/ioemu/target-i386-dm/Makefile Fri May 5 00:27:10 2006 +0100 +++ b/tools/ioemu/target-i386-dm/Makefile Fri May 5 17:38:45 2006 +0100 @@ -276,7 +276,7 @@ endif # Hardware support -VL_OBJS+= ide.o ne2000.o pckbd.o vga.o dma.o +VL_OBJS+= ide.o ne2000.o pckbd.o vga.o dma.o xen_evtchn.o VL_OBJS+= fdc.o mc146818rtc.o serial.o i8259_stub.o pc.o port-e9.o VL_OBJS+= cirrus_vga.o pcnet.o VL_OBJS+= $(SOUND_HW) $(AUDIODRV) mixeng.o diff -r 5afb14264629 -r e66707bec7a9 tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Fri May 5 00:27:10 2006 +0100 +++ b/tools/ioemu/target-i386-dm/helper2.c Fri May 5 17:38:45 2006 +0100 @@ -21,8 +21,8 @@ /* * Main cpu loop for handling I/O requests coming from a virtual machine * - * Copyright © 2004, Intel Corporation. - * Copyright © 2005, International Business Machines Corporation. + * Copyright © 2004, Intel Corporation. + * Copyright © 2005, International Business Machines Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU Lesser General Public License, @@ -125,6 +125,11 @@ //the evtchn fd for polling int evtchn_fd = -1; +//the evtchn port for polling the notification, +//should be inputed as bochs's parameter +#define NR_CPUS 32 +evtchn_port_t ioreq_local_port[NR_CPUS]; + //which vcpu we are serving int send_vcpu = 0; @@ -136,10 +141,10 @@ for ( i = 0; i < vcpus; i++ ) { req = &(shared_page->vcpu_iodata[i].vp_ioreq); - term_printf("vcpu %d: event port %d\n", - i, shared_page->vcpu_iodata[i].vp_eport); + term_printf("vcpu %d: event port %ld\n", + i, ioreq_local_port[i]); term_printf(" req state: %x, pvalid: %x, addr: %"PRIx64", " - "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n", + "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n", req->state, req->pdata_valid, req->addr, req->u.data, req->count, req->size); term_printf(" IO totally occurred on this vcpu: %"PRIx64"\n", @@ -156,12 +161,6 @@ if ( req->state == STATE_IOREQ_READY ) return req; - - fprintf(logfile, "False I/O request ... in-service already: " - "%x, pvalid: %x, port: %"PRIx64", " - "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n", - req->state, req->pdata_valid, req->addr, - req->u.data, req->count, req->size); return NULL; } @@ -176,7 +175,7 @@ rc = read(evtchn_fd, &port, sizeof(port)); if ( rc == sizeof(port) ) { for ( i = 0; i < vcpus; i++ ) - if ( shared_page->vcpu_iodata[i].dm_eport == port ) + if ( ioreq_local_port[i] == port ) break; if ( i == vcpus ) { @@ -469,16 +468,14 @@ FD_SET(evtchn_fd, &wakeup_rfds); tun_receive_handler(&rfds); - if ( FD_ISSET(evtchn_fd, &rfds) ) { - cpu_handle_ioreq(env); - } + cpu_handle_ioreq(env); main_loop_wait(0); if (env->send_event) { struct ioctl_evtchn_notify notify; env->send_event = 0; - notify.port = shared_page->vcpu_iodata[send_vcpu].dm_eport; + notify.port = ioreq_local_port[send_vcpu]; (void)ioctl(evtchn_fd, IOCTL_EVTCHN_NOTIFY, ¬ify); } } @@ -533,16 +530,18 @@ return NULL; } - /* FIXME: how about if we overflow the page here? */ - bind.remote_domain = domid; + bind.remote_domain = DOMID_XEN; for ( i = 0; i < vcpus; i++ ) { - bind.remote_port = shared_page->vcpu_iodata[i].vp_eport; - rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); - if ( rc == -1 ) { + bind.remote_port = shared_page->vcpu_iodata[i].vp_xen_port; + fprintf(logfile, + "vcpu %d: Bind to Xen port %d.\n", i, bind.remote_port); + rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); + if ( rc == -1 ) { fprintf(logfile, "bind interdomain ioctl error %d\n", errno); return NULL; - } - shared_page->vcpu_iodata[i].dm_eport = rc; + } + fprintf(logfile, "Local port %d -> %d.\n", i, rc); + ioreq_local_port[i] = rc; } return env; diff -r 5afb14264629 -r e66707bec7a9 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Fri May 5 00:27:10 2006 +0100 +++ b/tools/ioemu/vl.c Fri May 5 17:38:45 2006 +0100 @@ -137,6 +137,7 @@ int adlib_enabled = 1; int gus_enabled = 1; int pci_enabled = 1; +int evtchn_enabled = 1; int prep_enabled = 0; int rtc_utc = 1; int cirrus_vga_enabled = 1; @@ -926,6 +927,85 @@ IOCanRWHandler *fd_can_read, IOReadHandler *fd_read, void *opaque) { +} + +static int tcps_chr_write(CharDriverState *chr, const uint8_t *buf, int len) +{ + int trans, p; + int fd = (long)chr->opaque; + if (fd < 0) { + errno = EBADF; + return -1; + } + for (trans = 0; trans < len; trans += p) { + p = write(fd, buf + trans, len - trans); + if (p <= 0) { + chr->opaque = (void *)-1l; + if (p == 0) + errno = EBADF; + return -1; + } + } + return trans; +} + +static void tcps_chr_add_read_handler(CharDriverState *chr, + IOCanRWHandler *fd_can_read, + IOReadHandler *fd_read, void *opaque) +{ + qemu_add_fd_read_handler((long)chr->opaque, fd_can_read, fd_read, opaque); +} + +CharDriverState *qemu_chr_open_tcps(const char *descr) +{ + char *e; + int port; + CharDriverState *chr; + int listen_fd, remote_fd; + struct sockaddr_in saddr; + + port = strtol(descr, &e, 0); + if ( *e != 0 ) + return NULL; + if (port != (short)port) + return NULL; + + /* Listen for incoming connections */ + listen_fd = socket(PF_INET, SOCK_STREAM, 0); + if (listen_fd < 0) + return NULL; + saddr.sin_family = AF_INET; + saddr.sin_port = htons(port); + saddr.sin_addr.s_addr = INADDR_ANY; + if (bind(listen_fd, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) { + close(listen_fd); + return NULL; + } + if (listen(listen_fd, 1) < 0) { + close(listen_fd); + return NULL; + } + + /* Accept the connection from the remote */ + remote_fd = accept(listen_fd, NULL, 0); + + /* Stop listening */ + close(listen_fd); + + if (remote_fd < 0) { + return NULL; + } + + chr = qemu_mallocz(sizeof(CharDriverState)); + if ( !chr ) { + close(remote_fd); + return NULL; + } + chr->opaque = (void *)remote_fd; + chr->chr_write = tcps_chr_write; + chr->chr_add_read_handler = tcps_chr_add_read_handler; + + return chr; } CharDriverState *qemu_chr_open_null(void) @@ -1264,6 +1344,8 @@ return text_console_init(&display_state); } else if (!strcmp(filename, "null")) { return qemu_chr_open_null(); + } else if (!strncmp(filename, "tcps:", 5)) { + return qemu_chr_open_tcps(filename + 5); } else #ifndef _WIN32 if (!strcmp(filename, "pty")) { @@ -2264,6 +2346,7 @@ #ifdef TARGET_I386 "-isa simulate an ISA-only system (default is PCI system)\n" "-std-vga simulate a standard VGA card with VESA Bochs Extensions\n" + "-xen-evtchn enable xen event channel fake pci device\n" " (default is CL-GD5446 PCI VGA)\n" "-vgaacc [0|1] 1 to accelerate CL-GD5446 speed, default is 1\n" "-no-repeatkey disable key repeat feature for SDL keyboard simulation" @@ -2350,6 +2433,7 @@ QEMU_OPTION_k, QEMU_OPTION_localtime, QEMU_OPTION_cirrusvga, + QEMU_OPTION_evtchn, QEMU_OPTION_g, QEMU_OPTION_std_vga, QEMU_OPTION_monitor, @@ -2436,6 +2520,7 @@ { "pci", 0, QEMU_OPTION_pci }, { "nic-ne2000", 0, QEMU_OPTION_nic_ne2000 }, { "cirrusvga", 0, QEMU_OPTION_cirrusvga }, + { "xen-evtchn", 0, QEMU_OPTION_evtchn }, { "vgaacc", HAS_ARG, QEMU_OPTION_vgaacc }, { "no-repeatkey", 0, QEMU_OPTION_repeatkey }, { NULL }, @@ -2985,6 +3070,9 @@ break; case QEMU_OPTION_cirrusvga: cirrus_vga_enabled = 1; + break; + case QEMU_OPTION_evtchn: + evtchn_enabled = 1; break; case QEMU_OPTION_vgaacc: { diff -r 5afb14264629 -r e66707bec7a9 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Fri May 5 00:27:10 2006 +0100 +++ b/tools/ioemu/vl.h Fri May 5 17:38:45 2006 +0100 @@ -435,6 +435,8 @@ extern int pci_enabled; +extern int evtchn_enabled; + extern target_phys_addr_t pci_mem_base; typedef struct PCIBus PCIBus; diff -r 5afb14264629 -r e66707bec7a9 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Fri May 5 00:27:10 2006 +0100 +++ b/tools/libxc/xc_hvm_build.c Fri May 5 17:38:45 2006 +0100 @@ -29,6 +29,8 @@ #define E820_IO 16 #define E820_SHARED_PAGE 17 #define E820_XENSTORE 18 +#define E820_SHAREDINFO 19 +#define E820_STARTINFO 20 #define E820_MAP_PAGE 0x00090000 #define E820_MAP_NR_OFFSET 0x000001E8 @@ -78,7 +80,7 @@ e820entry[nr_map].type = E820_RESERVED; nr_map++; -#define STATIC_PAGES 2 /* for ioreq_t and store_mfn */ +#define STATIC_PAGES 4 /* for ioreq_t and store_mfn and start_info*/ /* Most of the ram goes here */ e820entry[nr_map].addr = 0x100000; e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES * PAGE_SIZE; @@ -87,16 +89,26 @@ /* Statically allocated special pages */ + /* Shared ioreq_t page */ + e820entry[nr_map].addr = mem_size - PAGE_SIZE; + e820entry[nr_map].size = PAGE_SIZE; + e820entry[nr_map].type = E820_SHARED_PAGE; + nr_map++; + /* For xenstore */ e820entry[nr_map].addr = mem_size - 2 * PAGE_SIZE; e820entry[nr_map].size = PAGE_SIZE; e820entry[nr_map].type = E820_XENSTORE; nr_map++; - /* Shared ioreq_t page */ - e820entry[nr_map].addr = mem_size - PAGE_SIZE; + e820entry[nr_map].addr = mem_size - 3 * PAGE_SIZE; e820entry[nr_map].size = PAGE_SIZE; - e820entry[nr_map].type = E820_SHARED_PAGE; + e820entry[nr_map].type = E820_SHAREDINFO; + nr_map++; + + e820entry[nr_map].addr = mem_size - 4 * PAGE_SIZE; + e820entry[nr_map].size = PAGE_SIZE; + e820entry[nr_map].type = E820_STARTINFO; nr_map++; e820entry[nr_map].addr = mem_size; @@ -182,7 +194,7 @@ unsigned long count, i; unsigned long long ptr; xc_mmu_t *mmu = NULL; - + start_info_t *start_info; shared_info_t *shared_info; void *e820_page; unsigned char e820_map_nr; @@ -191,6 +203,7 @@ unsigned long long v_end; unsigned long shared_page_frame = 0; + unsigned long startinfo_page_frame = 0; shared_iopage_t *sp; memset(&dsi, 0, sizeof(struct domain_setup_info)); @@ -263,6 +276,8 @@ e820_map_nr = build_e820map(e820_page, v_end); munmap(e820_page, PAGE_SIZE); + startinfo_page_frame = (v_end >> PAGE_SHIFT) - 4; + /* shared_info page starts its life empty. */ if ( (shared_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, @@ -272,29 +287,31 @@ /* Mask all upcalls... */ for ( i = 0; i < MAX_VIRT_CPUS; i++ ) shared_info->vcpu_info[i].evtchn_upcall_mask = 1; + shared_info->arch.start_info_mfn = startinfo_page_frame; munmap(shared_info, PAGE_SIZE); - /* Populate the event channel port in the shared page */ + /* Paranoia */ shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1]; if ( (sp = (shared_iopage_t *) xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, shared_page_frame)) == 0 ) goto error_out; memset(sp, 0, PAGE_SIZE); - - /* FIXME: how about if we overflow the page here? */ - for ( i = 0; i < vcpus; i++ ) { - unsigned int vp_eport; - - vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0); - if ( vp_eport < 0 ) { - PERROR("Couldn't get unbound port from VMX guest.\n"); - goto error_out; - } - sp->vcpu_iodata[i].vp_eport = vp_eport; - } - munmap(sp, PAGE_SIZE); + + start_info = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + page_array[startinfo_page_frame]); + if (!start_info) { + PERROR("Can't map start info"); + goto error_out; + } + memset(start_info, 0, PAGE_SIZE); + start_info->nr_pages = nr_pages; + start_info->shared_info = (v_end >> PAGE_SHIFT) - 3; + start_info->store_mfn = (v_end >> PAGE_SHIFT) - 2; + start_info->store_evtchn = store_evtchn; + munmap(start_info, PAGE_SIZE); *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2]; if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) ) diff -r 5afb14264629 -r e66707bec7a9 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Fri May 5 00:27:10 2006 +0100 +++ b/tools/libxc/xc_linux_build.c Fri May 5 17:38:45 2006 +0100 @@ -995,7 +995,8 @@ /* Mask all upcalls... */ for ( i = 0; i < MAX_VIRT_CPUS; i++ ) shared_info->vcpu_info[i].evtchn_upcall_mask = 1; - + shared_info->arch.start_info_mfn = + page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]; munmap(shared_info, PAGE_SIZE); /* Send the page update requests down to the hypervisor. */ @@ -1009,8 +1010,11 @@ hypercall_pfn = strtoul(p, NULL, 16); if ( hypercall_pfn >= nr_pages ) goto error_out; - op.u.hypercall_init.domain = (domid_t)dom; - op.u.hypercall_init.mfn = page_array[hypercall_pfn]; + op.u.hypercall_init.domain = (domid_t)dom; + if ( shadow_mode_enabled ) + op.u.hypercall_init.mfn = hypercall_pfn; + else + op.u.hypercall_init.mfn = page_array[hypercall_pfn]; op.cmd = DOM0_HYPERCALL_INIT; if ( xc_dom0_op(xc_handle, &op) ) goto error_out; diff -r 5afb14264629 -r e66707bec7a9 tools/libxc/xc_pagetab.c --- a/tools/libxc/xc_pagetab.c Fri May 5 00:27:10 2006 +0100 +++ b/tools/libxc/xc_pagetab.c Fri May 5 17:38:45 2006 +0100 @@ -44,6 +44,25 @@ #define L2_PAGETABLE_MASK L2_PAGETABLE_MASK_PAE #endif + +static unsigned long xc_gmfn_to_mfn(int xc_handle, uint32_t dom, + unsigned long gmfn) +{ + unsigned long mfn = 0xf001; + int rc; + + if ( (rc = xc_domain_translate_gpfn_list(xc_handle, dom, 1, &gmfn, &mfn)) + < 0 ) + { + if (errno == EINVAL) /* Probably a non-translated domain -> + mfn == gmfn */ + return gmfn; + fprintf(stderr, "Failed to translate %lx (%d, %d).\n", gmfn, rc, + errno); + return rc; + } + return mfn; +} unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom, int vcpu, unsigned long long virt ) @@ -85,7 +104,9 @@ #if defined(__i386__) pmle = cr3; #elif defined(__x86_64__) - pml = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, cr3 >> PAGE_SHIFT); + pml = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, + xc_gmfn_to_mfn(xc_handle, dom, + cr3 >> PAGE_SHIFT)); if (pml == NULL) { fprintf(stderr, "failed to map PML4\n"); goto out; @@ -100,7 +121,9 @@ /* Page Directory Pointer Table */ if (pt_levels >= 3) { - pdppage = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, pmle >> PAGE_SHIFT); + pdppage = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, + xc_gmfn_to_mfn(xc_handle, dom, + pmle >> PAGE_SHIFT)); if (pdppage == NULL) { fprintf(stderr, "failed to map PDP\n"); goto out_unmap_pml; @@ -123,7 +146,9 @@ /* Page Directory */ - pd = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, pdpe >> PAGE_SHIFT); + pd = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, + xc_gmfn_to_mfn(xc_handle, dom, + pdpe >> PAGE_SHIFT)); if (pd == NULL) { fprintf(stderr, "failed to map PD\n"); goto out_unmap_pdp; @@ -132,7 +157,7 @@ if (pt_levels >= 3) pde = *(unsigned long long *)(pd + 8 * ((virt >> L2_PAGETABLE_SHIFT_PAE) & L2_PAGETABLE_MASK_PAE)); else - pde = *(unsigned long long *)(pd + 4 * ((virt >> L2_PAGETABLE_SHIFT) & L2_PAGETABLE_MASK)); + pde = *(unsigned long *)(pd + 4 * ((virt >> L2_PAGETABLE_SHIFT) & L2_PAGETABLE_MASK)); if ((pde & 1) == 0) { fprintf(stderr, "page entry not present in PD\n"); @@ -146,7 +171,8 @@ exit(-1); } else { /* 4k page */ pt = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, - pde >> PAGE_SHIFT); + xc_gmfn_to_mfn(xc_handle, dom, + (unsigned long)pde >> PAGE_SHIFT)); if (pt == NULL) { fprintf(stderr, "failed to map PT\n"); @@ -156,7 +182,7 @@ if (pt_levels >= 3) pte = *(unsigned long long *)(pt + 8 * ((virt >> L1_PAGETABLE_SHIFT_PAE) & L1_PAGETABLE_MASK_PAE)); else - pte = *(unsigned long long *)(pt + 4 * ((virt >> L1_PAGETABLE_SHIFT) & L1_PAGETABLE_MASK)); + pte = *(unsigned long *)(pt + 4 * ((virt >> L1_PAGETABLE_SHIFT) & L1_PAGETABLE_MASK)); if ((pte & 0x00000001) == 0) { fprintf(stderr, "page entry not present in PT\n"); @@ -178,7 +204,7 @@ out_unmap_pml: munmap(pml, PAGE_SIZE); out: - return mfn; + return xc_gmfn_to_mfn(xc_handle, dom, mfn); } /* diff -r 5afb14264629 -r e66707bec7a9 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Fri May 5 00:27:10 2006 +0100 +++ b/tools/python/xen/xend/image.py Fri May 5 17:38:45 2006 +0100 @@ -243,7 +243,7 @@ # xm config file def parseDeviceModelArgs(self, imageConfig, deviceConfig): dmargs = [ 'cdrom', 'boot', 'fda', 'fdb', 'ne2000', 'audio', - 'localtime', 'serial', 'stdvga', 'isa', 'vcpus'] + 'localtime', 'serial', 'stdvga', 'isa', 'vcpus', 'evtchn'] ret = [] for a in dmargs: v = sxp.child_value(imageConfig, a) @@ -254,7 +254,7 @@ if a == 'audio': a = 'enable-audio' # Handle booleans gracefully - if a in ['localtime', 'std-vga', 'isa', 'nic-ne2000', 'enable-audio']: + if a in ['localtime', 'std-vga', 'isa', 'nic-ne2000', 'enable-audio', 'xen-evtchn']: if v != None: v = int(v) if v: ret.append("-%s" % a) else: diff -r 5afb14264629 -r e66707bec7a9 tools/python/xen/xend/server/netif.py --- a/tools/python/xen/xend/server/netif.py Fri May 5 00:27:10 2006 +0100 +++ b/tools/python/xen/xend/server/netif.py Fri May 5 17:38:45 2006 +0100 @@ -156,6 +156,7 @@ bridge = sxp.child_value(config, 'bridge') mac = sxp.child_value(config, 'mac') vifname = sxp.child_value(config, 'vifname') + copyall = sxp.child_value(config, 'copyall') or '0' rate = sxp.child_value(config, 'rate') ipaddr = _get_config_ipaddr(config) @@ -166,14 +167,16 @@ back = { 'script' : script, 'mac' : mac, - 'handle' : "%i" % devid } - + 'handle' : "%i" % devid, + 'copyall': copyall } if typ == 'ioemu': front = {} back['type'] = 'ioemu' else: front = { 'handle' : "%i" % devid, - 'mac' : mac } + 'mac' : mac, + 'copyall': copyall + } if ipaddr: back['ip'] = ' '.join(ipaddr) if bridge: @@ -191,8 +194,9 @@ result = DevController.configuration(self, devid) - (script, ip, bridge, mac, typ, vifname, rate) = self.readBackend( - devid, 'script', 'ip', 'bridge', 'mac', 'type', 'vifname', 'rate') + (script, ip, bridge, mac, typ, vifname, rate, copyall) = self.readBackend( + devid, 'script', 'ip', 'bridge', 'mac', 'type', 'vifname', + 'rate', 'copyall') if script: result.append(['script', @@ -209,6 +213,7 @@ result.append(['type', typ]) if vifname: result.append(['vifname', vifname]) + result.append(['copyall', copyall]) if rate: result.append(['rate', formatRate(rate)]) diff -r 5afb14264629 -r e66707bec7a9 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Fri May 5 00:27:10 2006 +0100 +++ b/tools/python/xen/xm/create.py Fri May 5 17:38:45 2006 +0100 @@ -269,7 +269,7 @@ use="""Add a physical USB port to a domain, as specified by the path to that port. This option may be repeated to add more than one port.""") -gopts.var('vif', val="type=TYPE,mac=MAC,bridge=BRIDGE,ip=IPADDR,script=SCRIPT,backend=DOM,vifname=NAME", +gopts.var('vif', val="type=TYPE,mac=MAC,bridge=BRIDGE,ip=IPADDR,script=SCRIPT,backend=DOM,vifname=NAME,copyall=VAL", fn=append_value, default=[], use="""Add a network interface with the given MAC address and bridge. The vif is configured by calling the given configuration script. @@ -596,7 +596,7 @@ def f(k): if k not in ['backend', 'bridge', 'ip', 'mac', 'script', 'type', - 'vifname', 'rate']: + 'vifname', 'rate', 'copyall']: err('Invalid vif option: ' + k) config_vif.append([k, d[k]]) diff -r 5afb14264629 -r e66707bec7a9 tools/xentrace/Makefile --- a/tools/xentrace/Makefile Fri May 5 00:27:10 2006 +0100 +++ b/tools/xentrace/Makefile Fri May 5 17:38:45 2006 +0100 @@ -21,7 +21,7 @@ MAN8 = $(wildcard *.8) ifeq ($(XEN_TARGET_ARCH),x86_32) -LIBBIN += xenctx +LIBBIN += xenctx xenmem endif ifeq ($(XEN_TARGET_ARCH),x86_64) diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/dom0_ops.c --- a/xen/arch/x86/dom0_ops.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/dom0_ops.c Fri May 5 17:38:45 2006 +0100 @@ -436,6 +436,7 @@ op->u.hypercall_init.domain)) == NULL) ) break; + mfn = gmfn_to_mfn(d, mfn); ret = -EACCES; if ( !mfn_valid(mfn) || !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) ) @@ -447,7 +448,7 @@ ret = 0; hypercall_page = map_domain_page(mfn); - hypercall_page_initialise(hypercall_page); + hypercall_page_initialise(d, hypercall_page); unmap_domain_page(hypercall_page); put_page_and_type(mfn_to_page(mfn)); @@ -478,6 +479,7 @@ /* IOPL privileges are virtualised: merge back into returned eflags. */ BUG_ON((c->user_regs.eflags & EF_IOPL) != 0); c->user_regs.eflags |= v->arch.iopl << 12; + c->ctrlreg[3] = pagetable_get_paddr(v->arch.guest_table); } c->flags = 0; @@ -487,8 +489,6 @@ c->flags |= VGCF_IN_KERNEL; if ( hvm_guest(v) ) c->flags |= VGCF_HVM_GUEST; - - c->ctrlreg[3] = pagetable_get_paddr(v->arch.guest_table); c->vm_assist = v->domain->vm_assist; } diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/domain.c Fri May 5 17:38:45 2006 +0100 @@ -226,6 +226,8 @@ virt_to_page(d->shared_info), d, XENSHARE_writable); } + d->arch.hvm_domain.callback_irq = 0; + return 0; fail_nomem: diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/domain_build.c Fri May 5 17:38:45 2006 +0100 @@ -698,7 +698,7 @@ return -1; } - hypercall_page_initialise((void *)hypercall_page); + hypercall_page_initialise(d, (void *)hypercall_page); } /* Copy the initial ramdisk. */ diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/hvm/hvm.c Fri May 5 17:38:45 2006 +0100 @@ -45,6 +45,7 @@ #include #include #include +#include int hvm_enabled = 0; @@ -67,18 +68,27 @@ } } -static void hvm_map_io_shared_page(struct domain *d) +static void e820_zap_iommu_callback(struct domain *d, + struct e820entry *e, + void *ign) +{ + if ( e->type == E820_IO ) + hvm_zap_mmio_range(d, e->addr >> PAGE_SHIFT, e->size >> PAGE_SHIFT); +} + +static void e820_foreach(struct domain *d, + void (*cb)(struct domain *d, + struct e820entry *e, + void *data), + void *data) { int i; unsigned char e820_map_nr; struct e820entry *e820entry; unsigned char *p; unsigned long mfn; - unsigned long gpfn = 0; - - local_flush_tlb_pge(); - - mfn = get_mfn_from_gpfn(E820_MAP_PAGE >> PAGE_SHIFT); + + mfn = gmfn_to_mfn(d, E820_MAP_PAGE >> PAGE_SHIFT); if (mfn == INVALID_MFN) { printk("Can not find E820 memory map page for HVM domain.\n"); domain_crash_synchronous(); @@ -95,27 +105,41 @@ for ( i = 0; i < e820_map_nr; i++ ) { - if ( e820entry[i].type == E820_SHARED_PAGE ) - gpfn = (e820entry[i].addr >> PAGE_SHIFT); - if ( e820entry[i].type == E820_IO ) - hvm_zap_mmio_range( - d, - e820entry[i].addr >> PAGE_SHIFT, - e820entry[i].size >> PAGE_SHIFT); - } - - if ( gpfn == 0 ) { - printk("Can not get io request shared page" - " from E820 memory map for HVM domain.\n"); - unmap_domain_page(p); - domain_crash_synchronous(); - } + cb(d, e820entry + i, data); + } + unmap_domain_page(p); - - /* Initialise shared page */ - mfn = get_mfn_from_gpfn(gpfn); - if (mfn == INVALID_MFN) { - printk("Can not find io request shared page for HVM domain.\n"); +} + +static void hvm_zap_iommu_pages(struct domain *d) +{ + e820_foreach(d, e820_zap_iommu_callback, NULL); +} + +static void e820_map_io_shared_callback(struct domain *d, + struct e820entry *e, + void *data) +{ + unsigned long *mfn = data; + if ( e->type == E820_SHARED_PAGE ) { + ASSERT(*mfn == INVALID_MFN); + *mfn = gmfn_to_mfn(d, e->addr >> PAGE_SHIFT); + } +} + +void hvm_map_io_shared_page(struct vcpu *v) +{ + unsigned long mfn = INVALID_MFN; + void *p; + struct domain *d = v->domain; + + if ( d->arch.hvm_domain.shared_page_va ) + return; + + e820_foreach(d, e820_map_io_shared_callback, &mfn); + + if ( mfn == INVALID_MFN ) { + printf("Could not find shared IO page.\n"); domain_crash_synchronous(); } @@ -154,7 +178,7 @@ unsigned long mfn; struct hvm_info_table *t; - mfn = get_mfn_from_gpfn(HVM_INFO_PFN); + mfn = gmfn_to_mfn(d, HVM_INFO_PFN); if ( mfn == INVALID_MFN ) { printk("Can not get info page mfn for HVM domain.\n"); domain_crash_synchronous(); @@ -182,6 +206,21 @@ unmap_domain_page(p); } +static void evtchn_callback_func(void *v) +{ + hvm_assist_complete(v); +} + +void hvm_create_event_channels(struct vcpu *v) +{ + vcpu_iodata_t *p; + p = get_vio(v->domain, v->vcpu_id); + v->arch.hvm_vcpu.xen_port = p->vp_xen_port = + alloc_xen_event_channel(evtchn_callback_func, + v, + dom0); +} + void hvm_setup_platform(struct domain* d) { struct hvm_domain *platform; @@ -191,7 +230,8 @@ shadow_direct_map_init(d); - hvm_map_io_shared_page(d); + hvm_zap_iommu_pages(d); + hvm_get_info(d); platform = &d->arch.hvm_domain; @@ -310,6 +350,36 @@ *index = 0; } else pbuf[(*index)++] = c; +} + +/* Initialise a hypercall transfer page for a VMX domain using + paravirtualised drivers. */ +void hvm_hypercall_page_initialise(struct domain *d, + void *hypercall_page) +{ + char *p; + int i; + + memset(hypercall_page, 0, PAGE_SIZE); + + for ( i = 0; i < (PAGE_SIZE / 32); i++ ) + { + p = (char *)(hypercall_page + (i * 32)); + *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */ + *(u32 *)(p + 1) = i; + *(u8 *)(p + 5) = 0x0f; /* vmcall */ + *(u8 *)(p + 6) = 0x01; + *(u8 *)(p + 7) = 0xc1; + *(u8 *)(p + 8) = 0xc3; /* ret */ + } + + /* Don't support HYPERVISOR_iret at the moment */ + *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */ +} + +void hvm_release_assist_channel(struct vcpu *v) +{ + release_xen_event_channel(v->arch.hvm_vcpu.xen_port); } /* diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/hvm/intercept.c Fri May 5 17:38:45 2006 +0100 @@ -211,7 +211,7 @@ { struct vcpu *v = data; - evtchn_set_pending(v, iopacket_port(v)); + hvm_prod_vcpu(v); } diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/hvm/io.c Fri May 5 17:38:45 2006 +0100 @@ -667,88 +667,19 @@ vio = get_vio(v->domain, v->vcpu_id); if (vio == 0) { - HVM_DBG_LOG(DBG_LEVEL_1, - "bad shared page: %lx", (unsigned long) vio); printf("bad shared page: %lx\n", (unsigned long) vio); domain_crash_synchronous(); } p = &vio->vp_ioreq; - /* clear IO wait HVM flag */ - if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)) { - if (p->state == STATE_IORESP_READY) { - p->state = STATE_INVALID; - clear_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags); - - if (p->type == IOREQ_TYPE_PIO) - hvm_pio_assist(regs, p, mmio_opp); - else - hvm_mmio_assist(v, regs, p, mmio_opp); - } - /* else an interrupt send event raced us */ - } -} - -/* - * On exit from hvm_wait_io, we're guaranteed not to be waiting on - * I/O response from the device model. - */ -void hvm_wait_io(void) -{ - struct vcpu *v = current; - struct domain *d = v->domain; - int port = iopacket_port(v); - - for ( ; ; ) - { - /* Clear master flag, selector flag, event flag each in turn. */ - v->vcpu_info->evtchn_upcall_pending = 0; - clear_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - smp_mb__after_clear_bit(); - if ( test_and_clear_bit(port, &d->shared_info->evtchn_pending[0]) ) - hvm_io_assist(v); - - /* Need to wait for I/O responses? */ - if ( !test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) - break; - - do_sched_op_compat(SCHEDOP_block, 0); - } - - /* - * Re-set the selector and master flags in case any other notifications - * are pending. - */ - if ( d->shared_info->evtchn_pending[port/BITS_PER_LONG] ) - set_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - if ( v->vcpu_info->evtchn_pending_sel ) - v->vcpu_info->evtchn_upcall_pending = 1; -} - -void hvm_safe_block(void) -{ - struct vcpu *v = current; - struct domain *d = v->domain; - int port = iopacket_port(v); - - for ( ; ; ) - { - /* Clear master flag & selector flag so we will wake from block. */ - v->vcpu_info->evtchn_upcall_pending = 0; - clear_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - smp_mb__after_clear_bit(); - - /* Event pending already? */ - if ( test_bit(port, &d->shared_info->evtchn_pending[0]) ) - break; - - do_sched_op_compat(SCHEDOP_block, 0); - } - - /* Reflect pending event in selector and master flags. */ - set_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - v->vcpu_info->evtchn_upcall_pending = 1; + if (p->state == STATE_IORESP_READY) { + p->state = STATE_INVALID; + if (p->type == IOREQ_TYPE_PIO) + hvm_pio_assist(regs, p, mmio_opp); + else + hvm_mmio_assist(v, regs, p, mmio_opp); + } } /* diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/hvm/platform.c Fri May 5 17:38:45 2006 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -630,6 +631,33 @@ return inst_len; } +static void hvm_send_assist_req(struct vcpu *v) +{ + ioreq_t *p; + + ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)); + atomic_inc(&v->pausecnt); + set_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags); + mb(); + p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; + if (p->state != STATE_INVALID) { + printf("Badness, state %d.\n", p->state); + /* XXXSOS22: Can this ever happen? */ + clear_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags); + atomic_dec(&v->pausecnt); + return; + } + vcpu_sleep_nosync(v); + p->state = STATE_IOREQ_READY; + notify_xen_event_channel(v->arch.hvm_vcpu.xen_port); +} + +/* Wake up a vcpu whihc is waiting for interrupts to come in */ +void hvm_prod_vcpu(struct vcpu *v) +{ + vcpu_unblock(v); +} + void send_pio_req(struct cpu_user_regs *regs, unsigned long port, unsigned long count, int size, long value, int dir, int pvalid) { @@ -642,12 +670,6 @@ printk("bad shared page: %lx\n", (unsigned long) vio); domain_crash_synchronous(); } - - if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)) { - printf("HVM I/O has not yet completed\n"); - domain_crash_synchronous(); - } - set_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags); p = &vio->vp_ioreq; p->dir = dir; @@ -675,15 +697,11 @@ return; } - p->state = STATE_IOREQ_READY; - - evtchn_send(iopacket_port(v)); - hvm_wait_io(); -} - -void send_mmio_req( - unsigned char type, unsigned long gpa, - unsigned long count, int size, long value, int dir, int pvalid) + hvm_send_assist_req(v); +} + +void send_mmio_req(unsigned char type, unsigned long gpa, + unsigned long count, int size, long value, int dir, int pvalid) { struct vcpu *v = current; vcpu_iodata_t *vio; @@ -700,12 +718,6 @@ p = &vio->vp_ioreq; - if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)) { - printf("HVM I/O has not yet completed\n"); - domain_crash_synchronous(); - } - - set_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags); p->dir = dir; p->pdata_valid = pvalid; @@ -731,10 +743,7 @@ return; } - p->state = STATE_IOREQ_READY; - - evtchn_send(iopacket_port(v)); - hvm_wait_io(); + hvm_send_assist_req(v); } static void mmio_operands(int type, unsigned long gpa, struct instruction *inst, @@ -966,6 +975,110 @@ } } +void hvm_assist_complete(struct vcpu *v) +{ + ioreq_t *p; + /* The device model just sent an event channel message to us. Either: + + a) It just finished processing a request, or + b) it wants us to send an interrupt into the guest. + + We only need to handle case (b) explicitly if there is no pending + IO request from us to the device model (since if there is, we'll + pick up the interrupt when the request completes). */ + p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; + if (p->state == STATE_IORESP_READY) { + /* There's a race here, in that the device model could set + p->state while we're not looking, but we don't care, since + that would imply that *this* notification is not related to + that state transition, and so there'll be another one along + shortly. */ + if (test_and_clear_bit(ARCH_HVM_IO_WAIT, + &v->arch.hvm_vcpu.ioflags)) { + /* Just completed a wait-for-io, so we can unpause the + vcpu. It'll pick up the response when it returns. */ + vcpu_unpause(v); + return; + } else { + /* Someone got in and processed the response before us. + Just to be on the safe side, treat this as an interrupt + delivery. */ + /* (the other path implicitly does interrupt delivery as + the vcpu returns to the guest) */ + printf("Weirdness 1\n"); + } + } + + /* Evtchn message must have been for interrupt delivery. */ + hvm_prod_vcpu(v); + smp_send_event_check_cpu(v->processor); +} + +#define MIN(x,y) ((x)<(y)?(x):(y)) + +/* Note that copy_{to,from}_user_hvm don't set the A and D bits on + PTEs, and require the PTE to be writable even when they're only + trying to read from it. The guest is expected to deal with + this. */ +unsigned long copy_to_user_hvm(void *to, const void *from, unsigned len) +{ + unsigned long mfn; + unsigned long va; + void *map; + unsigned long off_in_page; + unsigned long chunk_size; + + ASSERT(hvm_guest(current)); + va = (unsigned long)to; + off_in_page = va % PAGE_SIZE; + while (len != 0) { + mfn = gva_to_mfn(va); + if (!mfn) + break; + map = map_domain_page(mfn); + if (!map) + break; + chunk_size = MIN(len, PAGE_SIZE - off_in_page); + memcpy(map + off_in_page, from, chunk_size); + unmap_domain_page(map); + off_in_page = 0; + len -= chunk_size; + from += chunk_size; + va += chunk_size; + } + return len; +} + +unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len) +{ + unsigned long mfn; + unsigned long va; + void *map; + unsigned long off_in_page; + unsigned long chunk_size; + + ASSERT(hvm_guest(current)); + va = (unsigned long)from; + off_in_page = va % PAGE_SIZE; + while (len != 0) { + mfn = gva_to_mfn(va); + if (!mfn) + break; + map = map_domain_page(mfn); + if (!map) + break; + chunk_size = MIN(len, PAGE_SIZE - off_in_page); + memcpy(to, map + off_in_page, chunk_size); + unmap_domain_page(map); + off_in_page = 0; + len -= chunk_size; + to += chunk_size; + va += chunk_size; + } + return len; +} + + /* * Local variables: * mode: C diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/hvm/svm/svm.c Fri May 5 17:38:45 2006 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -844,7 +845,7 @@ return 1; } - result = shadow_fault(va, regs); + result = shadow_fault(va, regs->error_code, regs); if( result ) { /* Let's make sure that the Guest TLB is flushed */ @@ -1938,7 +1939,7 @@ next_wakeup = next_pit; if ( next_wakeup != - 1 ) set_timer(¤t->arch.hvm_svm.hlt_timer, next_wakeup); - hvm_safe_block(); + do_sched_op_compat(SCHEDOP_block, 0); } diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/hvm/svm/vmcb.c Fri May 5 17:38:45 2006 +0100 @@ -371,18 +371,6 @@ if (v->vcpu_id == 0) hvm_setup_platform(v->domain); - if ( evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0 ) - { - printk("HVM domain bind port %d to vcpu %d failed!\n", - iopacket_port(v), v->vcpu_id); - domain_crash_synchronous(); - } - - HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x", iopacket_port(v)); - - clear_bit(iopacket_port(v), - &v->domain->shared_info->evtchn_mask[0]); - if (hvm_apic_support(v->domain)) vlapic_init(v); init_timer(&v->arch.hvm_svm.hlt_timer, @@ -463,9 +451,10 @@ pickup_deactive_ticks(vpit); } - if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || - test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) - hvm_wait_io(); + if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)) { + hvm_io_assist(v); + ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)); + } /* We can't resume the guest if we're waiting on I/O */ ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)); diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/hvm/vlapic.c Fri May 5 17:38:45 2006 +0100 @@ -223,7 +223,7 @@ vector); set_bit(vector, &vlapic->tmr[0]); } - evtchn_set_pending(v, iopacket_port(v)); + hvm_prod_vcpu(v); result = 1; break; @@ -365,7 +365,7 @@ return 1; } -void vlapic_ipi(struct vlapic *vlapic) +static void vlapic_ipi(struct vlapic *vlapic) { unsigned int dest = (vlapic->icr_high >> 24) & 0xff; unsigned int short_hand = (vlapic->icr_low >> 18) & 3; diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/hvm/vmx/io.c Fri May 5 17:38:45 2006 +0100 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -153,6 +154,7 @@ struct hvm_domain *plat=&v->domain->arch.hvm_domain; struct hvm_time_info *time_info = &plat->vpit.time_info; struct hvm_virpic *pic= &plat->vpic; + int callback_irq; unsigned int idtv_info_field; unsigned long inst_len; int has_ext_irq; @@ -163,6 +165,16 @@ if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) { pic_set_irq(pic, 0, 0); pic_set_irq(pic, 0, 1); + } + + callback_irq = v->domain->arch.hvm_domain.callback_irq; + if ( callback_irq != 0 && + event_pending(v) && + !v->vcpu_info->evtchn_upcall_mask ) { + /*inject para-device call back irq*/ + v->vcpu_info->evtchn_upcall_mask = 1; + pic_set_irq(pic, callback_irq, 0); + pic_set_irq(pic, callback_irq, 1); } has_ext_irq = cpu_has_pending_irq(v); @@ -224,7 +236,7 @@ void vmx_do_resume(struct vcpu *v) { - struct domain *d = v->domain; + ioreq_t *p; struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit; struct hvm_time_info *time_info = &vpit->time_info; @@ -240,9 +252,12 @@ pickup_deactive_ticks(vpit); } - if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || - test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) - hvm_wait_io(); + p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; + if (p->state == STATE_IORESP_READY) { + hvm_io_assist(v); + } + if (p->state != STATE_INVALID) + printf("Weird... state %d.\n", p->state); /* We can't resume the guest if we're waiting on I/O */ ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)); diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Fri May 5 17:38:45 2006 +0100 @@ -199,18 +199,6 @@ if (v->vcpu_id == 0) hvm_setup_platform(v->domain); - if ( evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0 ) - { - printk("VMX domain bind port %d to vcpu %d failed!\n", - iopacket_port(v), v->vcpu_id); - domain_crash_synchronous(); - } - - HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x", iopacket_port(v)); - - clear_bit(iopacket_port(v), - &v->domain->shared_info->evtchn_mask[0]); - __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (cr0) : ); error |= __vmwrite(GUEST_CR0, cr0); diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/hvm/vmx/vmx.c Fri May 5 17:38:45 2006 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -100,6 +101,7 @@ kill_timer(&VLAPIC(v)->vlapic_timer); xfree(VLAPIC(v)); } + hvm_release_assist_channel(v); } kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer); @@ -782,7 +784,7 @@ return 1; } - result = shadow_fault(va, regs); + result = shadow_fault(va, regs->error_code, regs); TRACE_VMEXIT (2,result); #if 0 if ( !result ) @@ -1457,12 +1459,13 @@ v->arch.hvm_vmx.cpu_cr3, mfn); } - if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled ) + if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled ) { if ( v->arch.hvm_vmx.cpu_cr3 ) { put_page(mfn_to_page(get_mfn_from_gpfn( v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT))); v->arch.guest_table = mk_pagetable(0); } + } /* * VMX does not implement real-mode virtualization. We emulate @@ -1943,7 +1946,7 @@ next_wakeup = next_pit; if ( next_wakeup != - 1 ) set_timer(¤t->arch.hvm_vmx.hlt_timer, next_wakeup); - hvm_safe_block(); + do_sched_op_compat(SCHEDOP_block, 0); } static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs) @@ -2001,6 +2004,33 @@ } } +#if defined(__i386__) +typedef unsigned long vmx_hypercall_handler(unsigned long, unsigned long, + unsigned long, unsigned long, + unsigned long); +extern vmx_hypercall_handler *vmx_hypercall_table[]; + +void vmx_do_hypercall(struct cpu_user_regs *pregs) +{ +#ifdef VMX_GUEST_DEBUG + if(pregs->eax == 0x99) { + vmx_print_line(pregs->ebx, current); + return; + } +#endif + + pregs->eax = vmx_hypercall_table[pregs->eax](pregs->ebx, pregs->ecx, + pregs->edx, pregs->esi, + pregs->edi); + return; +} +#else +void vmx_do_hypercall(struct cpu_user_regs *pregs) +{ + printk("not supported yet!\n"); +} +#endif + #if defined (__x86_64__) void store_cpu_user_regs(struct cpu_user_regs *regs) { @@ -2060,9 +2090,8 @@ unsigned int exit_reason; unsigned long exit_qualification, eip, inst_len = 0; struct vcpu *v = current; - int error; - - if ((error = __vmread(VM_EXIT_REASON, &exit_reason))) + + if (__vmread(VM_EXIT_REASON, &exit_reason)) __hvm_bug(®s); perfc_incra(vmexits, exit_reason); @@ -2090,11 +2119,10 @@ * (1) We can get an exception (e.g. #PG) in the guest, or * (2) NMI */ - int error; unsigned int vector; unsigned long va; - if ((error = __vmread(VM_EXIT_INTR_INFO, &vector)) + if (__vmread(VM_EXIT_INTR_INFO, &vector) || !(vector & INTR_INFO_VALID_MASK)) __hvm_bug(®s); vector &= INTR_INFO_VECTOR_MASK; @@ -2159,7 +2187,7 @@ (unsigned long)regs.esi, (unsigned long)regs.edi); v->arch.hvm_vcpu.mmio_op.inst_decoder_regs = ®s; - if (!(error = vmx_do_page_fault(va, ®s))) { + if (!vmx_do_page_fault(va, ®s)) { /* * Inject #PG using Interruption-Information Fields */ @@ -2209,16 +2237,16 @@ __update_guest_eip(inst_len); break; } -#if 0 /* keep this for debugging */ case EXIT_REASON_VMCALL: + { __get_instruction_length(inst_len); __vmread(GUEST_RIP, &eip); __vmread(EXIT_QUALIFICATION, &exit_qualification); - hvm_print_line(v, regs.eax); /* provides the current domain */ + vmx_do_hypercall(®s); __update_guest_eip(inst_len); break; -#endif + } case EXIT_REASON_CR_ACCESS: { __vmread(GUEST_RIP, &eip); @@ -2259,7 +2287,6 @@ case EXIT_REASON_MWAIT_INSTRUCTION: __hvm_bug(®s); break; - case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: @@ -2291,6 +2318,72 @@ #endif } +static void vmx_gnttab_setup_table(unsigned frame_pa, unsigned long nr_frames) +{ + unsigned long mfn, gpa; + struct domain *d; + int i; + + d = current->domain; + ASSERT(shadow_mode_external(d)); + + gpa = (unsigned long)frame_pa; + for ( i = 0; i < nr_frames; i++ ) + { + mfn = (virt_to_maddr(d->grant_table->shared) >> PAGE_SHIFT) + + i, + printf("gpa:%lx share mfn:%lx\n", gpa, mfn); + guest_physmap_add_page(current->domain, gpa >> PAGE_SHIFT, mfn); + gpa = gpa + PAGE_SIZE; + } +} + +asmlinkage unsigned long do_virtual_device_op(unsigned long op, + unsigned long arg1, + unsigned long arg2) +{ + int rc = 0; + unsigned long cur_mfn; + + switch (op) + { + case VDOP_set_callback_irq: + { + current->domain->arch.hvm_domain.callback_irq = arg1; + DPRINTK("set callback irq:%ld\n",arg1); + break; + } + case VDOP_setup_gnttab_table: + { + vmx_gnttab_setup_table(arg1, arg2); + break; + } + case VDOP_setup_shared_info: + { + printf("setup shared info, %lx -> %lx.\n", arg1, + virt_to_maddr(current->domain->shared_info) >> PAGE_SHIFT); + cur_mfn = gmfn_to_mfn(current->domain, arg1); + if (cur_mfn != INVALID_MFN) { + /* XXX should probably free cur_mfn at this point */ + printf("Flipping %lx from %lx to %lx.\n", + arg1, cur_mfn, + virt_to_maddr(current->domain->shared_info) >> PAGE_SHIFT); + guest_physmap_remove_page(current->domain, arg1, cur_mfn); + } + guest_physmap_add_page(current->domain, arg1, + virt_to_maddr(current->domain->shared_info) + >> PAGE_SHIFT); + break; + } + default: + { + printf("Bad device op %ld.\n", op); + rc = -EINVAL; + } + } + return rc; +} + asmlinkage void vmx_trace_vmentry (void) { TRACE_5D(TRC_VMX_VMENTRY, diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/hvm/vmx/x86_32/exits.S --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Fri May 5 17:38:45 2006 +0100 @@ -126,7 +126,7 @@ call vm_resume_fail .endif /* Should never reach here */ - hlt + ud2 ALIGN .if \initialized diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/mm.c Fri May 5 17:38:45 2006 +0100 @@ -3603,6 +3603,50 @@ map_pages_to_xen(fix_to_virt(idx), p >> PAGE_SHIFT, 1, flags); } +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +unsigned long add_new_p2m_page(unsigned long phys) +{ + l2_pgentry_t mpl2e; + l1_pgentry_t *vl2tab; + struct vcpu *v = current; + struct page_info *newpg; + + mpl2e = v->arch.monitor_vtable[l2_table_offset(RO_MPT_VIRT_START)]; + vl2tab = map_domain_page(l2e_get_pfn(mpl2e)); + v->domain->max_pages++; + newpg = alloc_domheap_page(v->domain); + if(!newpg) + { + MEM_LOG("alloc_domheap_page fail\n"); + return 0; + } + vl2tab[l2_table_offset(phys)] = l1e_from_paddr(page_to_maddr(newpg), L2_PROT); + unmap_domain_page(vl2tab); + return page_to_maddr(newpg); +} + +unsigned long set_phystomachine(unsigned long pfn, + unsigned long mfn) +{ + l1_pgentry_t pte; + unsigned long oldmfn; + + if (__copy_from_user(&pte, (phys_to_machine_mapping + pfn), + sizeof(pte))) { + if(!add_new_p2m_page(pfn<error_code & 2 ) + if ( error_code & 2 ) { int allow_writes = 0; if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) ) { - if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) ) + if ( regs && + shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) ) { allow_writes = 1; l1e_add_flags(gpte, _PAGE_RW); @@ -2971,7 +2973,7 @@ } /* User access violation in guest? */ - if ( unlikely((regs->error_code & 4) && + if ( unlikely((error_code & 4) && !(l1e_get_flags(gpte) & _PAGE_USER))) { SH_VVLOG("shadow_fault - EXIT: wr fault on super page (%" PRIpte ")", @@ -2994,7 +2996,7 @@ else { /* Read-protection violation in guest? */ - if ( unlikely((regs->error_code & 1) )) + if ( unlikely((error_code & 1) )) { SH_VVLOG("shadow_fault - EXIT: read fault on super page (%" PRIpte ")", l1e_get_intpte(gpte)); diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/traps.c Fri May 5 17:38:45 2006 +0100 @@ -586,14 +586,14 @@ if ( unlikely(IN_HYPERVISOR_RANGE(addr)) ) { if ( shadow_mode_external(d) && guest_mode(regs) ) - return shadow_fault(addr, regs); + return shadow_fault(addr, regs->error_code, regs); if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) ) return handle_gdt_ldt_mapping_fault( addr - GDT_LDT_VIRT_START, regs); } else if ( unlikely(shadow_mode_enabled(d)) ) { - return shadow_fault(addr, regs); + return shadow_fault(addr, regs->error_code, regs); } else if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) ) { diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/x86_32/entry.S Fri May 5 17:38:45 2006 +0100 @@ -644,7 +644,8 @@ .long do_acm_op .long do_nmi_op .long do_arch_sched_op - .long do_callback_op /* 30 */ + .long do_virtual_device_op /* 30 */ /* virutal device op for VMX */ + .long do_callback_op .long do_xenoprof_op .long do_event_channel_op .long do_physdev_op @@ -652,6 +653,46 @@ .long do_ni_hypercall .endr +ENTRY(vmx_hypercall_table) + .long do_ni_hypercall /* 0 */ + .long do_mmu_update + .long do_ni_hypercall + .long do_ni_hypercall + .long do_ni_hypercall + .long do_ni_hypercall /* 5 */ + .long do_ni_hypercall + .long do_ni_hypercall + .long do_ni_hypercall + .long do_ni_hypercall + .long do_ni_hypercall /* 10 */ + .long do_ni_hypercall + .long do_memory_op + .long do_multicall + .long do_update_va_mapping + .long do_ni_hypercall /* 15 */ + .long do_event_channel_op_compat + .long do_xen_version + .long do_ni_hypercall + .long do_ni_hypercall + .long do_grant_table_op /* 20 */ + .long do_ni_hypercall + .long do_ni_hypercall + .long do_ni_hypercall + .long do_ni_hypercall + .long do_ni_hypercall /* 25 */ + .long do_ni_hypercall + .long do_ni_hypercall + .long do_ni_hypercall + .long do_ni_hypercall + .long do_virtual_device_op /* 30 */ + .long do_ni_hypercall + .long do_ni_hypercall + .long do_event_channel_op + .long do_ni_hypercall + .rept NR_hypercalls-((.-vmx_hypercall_table)/4) + .long do_ni_hypercall + .endr + ENTRY(hypercall_args_table) .byte 1 /* do_set_trap_table */ /* 0 */ .byte 4 /* do_mmu_update */ @@ -683,7 +724,8 @@ .byte 1 /* do_acm_op */ .byte 2 /* do_nmi_op */ .byte 2 /* do_arch_sched_op */ - .byte 2 /* do_callback_op */ /* 30 */ + .byte 3 /* do_virtual_device_op */ /* 30 */ + .byte 2 /* do_callback_op */ .byte 2 /* do_xenoprof_op */ .byte 2 /* do_event_channel_op */ .byte 2 /* do_physdev_op */ diff -r 5afb14264629 -r e66707bec7a9 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/arch/x86/x86_32/traps.c Fri May 5 17:38:45 2006 +0100 @@ -512,8 +512,14 @@ *(u16 *)(p+ 6) = 0x82cd; /* int $0x82 */ } -void hypercall_page_initialise(void *hypercall_page) -{ +void hypercall_page_initialise(struct domain *d, void *hypercall_page) +{ + if ( hvm_guest(d->vcpu[0]) ) + { + hvm_hypercall_page_initialise(d, hypercall_page); + return; + } + if ( supervisor_mode_kernel ) hypercall_page_initialise_ring0_kernel(hypercall_page); else diff -r 5afb14264629 -r e66707bec7a9 xen/common/event_channel.c --- a/xen/common/event_channel.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/common/event_channel.c Fri May 5 17:38:45 2006 +0100 @@ -46,6 +46,104 @@ goto out; \ } while ( 0 ) +#define NR_XEN_EVENT_CHANNELS 32 +#define XECS_FREE 0 /* Not in use at all */ +#define XECS_UNBOUND 1 /* Allocated but not bound to */ +#define XECS_BOUND 2 /* Bound to somewhere in domain-space */ +#define XECS_HBOUND 3 /* Half bound: Xen is trying to tear this + down, but a domain is still attached */ +struct xen_evtchn { + int state; + + void (*fire)(void *d); /* called when dom0 tries to send on this + event channel. */ + void *data; + + struct domain *dom; /* Who is allowed to bind/currently bound */ + int dom_port; +}; + +static struct xen_evtchn xen_event_channels[NR_XEN_EVENT_CHANNELS]; +/* Leaf lock protecting the xen_event_channels array. */ +static spinlock_t xen_event_channel_lock = SPIN_LOCK_UNLOCKED; + +int alloc_xen_event_channel(void (*f)(void *d), + void *data, + struct domain *d) +{ + int ind; + + spin_lock(&xen_event_channel_lock); + for (ind = 0; ind < NR_XEN_EVENT_CHANNELS; ind++) + if ( xen_event_channels[ind].state == XECS_FREE ) + break; + if ( ind == NR_XEN_EVENT_CHANNELS ) { + printf("Out of Xen event channels?\n"); + ind = -1; + goto out; + } + xen_event_channels[ind].state = XECS_UNBOUND; + xen_event_channels[ind].fire = f; + xen_event_channels[ind].data = data; + xen_event_channels[ind].dom = d; + out: + spin_unlock(&xen_event_channel_lock); + return ind; +} + +void release_xen_event_channel(int ind) +{ + spin_lock(&xen_event_channel_lock); + switch ( xen_event_channels[ind].state ) { + case XECS_UNBOUND: + xen_event_channels[ind].state = XECS_FREE; + break; + case XECS_BOUND: + xen_event_channels[ind].state = XECS_HBOUND; + break; + case XECS_HBOUND: + panic("Double free of Xen event channel.\n"); + case XECS_FREE: + printf("Attempt to free non-allocated Xen event channel %d?\n", + ind); + default: + BUG(); + } + + spin_unlock(&xen_event_channel_lock); +} + +void notify_xen_event_channel(int port) +{ + struct xen_evtchn *xchn = xen_event_channels + port; + struct domain *d = NULL; + struct evtchn *chn; + + /* We rely on our caller to ensure that nobody's trying to tear + the channel down from inside Xen while it's being signalled on. + That means that the only transition the channel could make is + from BOUND to UNBOUND or vice-versa. Neither of those change + the dom field, so we can read it without taking a lock. This + simplifies the lock ordering a bit. */ + d = xchn->dom; + ASSERT(d); + if ( !get_domain(d) ) + return; + spin_lock(&d->evtchn_lock); + spin_lock(&xen_event_channel_lock); + if ( xchn->state != XECS_UNBOUND ) { + BUG_ON(xchn->state != XECS_BOUND); + BUG_ON(d != xchn->dom); + chn = evtchn_from_port(d, xchn->dom_port); + if ( chn->state == ECS_XEN ) + evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], + xchn->dom_port); + } else + printf("Send on unbound Xen event channel?\n"); + + spin_unlock(&d->evtchn_lock); + spin_unlock(&xen_event_channel_lock); +} static int virq_is_global(int virq) { @@ -131,6 +229,44 @@ } +static long evtchn_bind_xen(struct domain *ld, int xen_port) +{ + long rc = 0; + struct evtchn *lchn; + struct xen_evtchn *rchn; + int lport; + + if ( xen_port < 0 || xen_port >= NR_XEN_EVENT_CHANNELS ) + return -EINVAL; + + spin_lock(&ld->evtchn_lock); + spin_lock(&xen_event_channel_lock); + + rchn = xen_event_channels + xen_port; + if ( rchn->state != XECS_UNBOUND || rchn->dom != ld ) + ERROR_EXIT(-EINVAL); + + if ( (lport = get_free_port(ld)) < 0 ) + ERROR_EXIT(lport); + lchn = evtchn_from_port(ld, lport); + lchn->state = ECS_XEN; + lchn->u.xen_port = xen_port; + + rchn->state = XECS_BOUND; + rchn->dom_port = lport; + + /* Somewhat ugly hack to avoid lost wakeups if we've tried to + notify this port before anyone got around to binding it. */ + evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport); + rc = lport; + + out: + spin_unlock(&xen_event_channel_lock); + spin_unlock(&ld->evtchn_lock); + + return rc; +} + static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) { struct evtchn *lchn, *rchn; @@ -144,6 +280,15 @@ if ( rdom == DOMID_SELF ) rdom = current->domain->domain_id; + + if ( rdom == DOMID_XEN ) { + rc = evtchn_bind_xen(ld, rport); + if ( rc >= 0 ) { + bind->local_port = rc; + rc = 0; + } + return rc; + } if ( (rd = find_domain_by_id(rdom)) == NULL ) return -ESRCH; @@ -314,11 +459,12 @@ static long __evtchn_close(struct domain *d1, int port1) { - struct domain *d2 = NULL; - struct vcpu *v; - struct evtchn *chn1, *chn2; - int port2; - long rc = 0; + struct domain *d2 = NULL; + struct vcpu *v; + struct evtchn *chn1, *chn2; + int port2; + long rc = 0; + struct xen_evtchn *xchn; again: spin_lock(&d1->evtchn_lock); @@ -406,6 +552,19 @@ chn2->u.unbound.remote_domid = d1->domain_id; break; + case ECS_XEN: + spin_lock(&xen_event_channel_lock); + xchn = xen_event_channels + chn1->u.xen_port; + BUG_ON(xchn->dom != d1); + if ( xchn->state == XECS_HBOUND ) + xchn->state = XECS_FREE; + else if (xchn->state == XECS_BOUND) + xchn->state = XECS_UNBOUND; + else + BUG(); + spin_unlock(&xen_event_channel_lock); + break; + default: BUG(); } @@ -439,6 +598,7 @@ struct evtchn *lchn, *rchn; struct domain *ld = current->domain, *rd; int rport, ret = 0; + struct xen_evtchn *xchn; spin_lock(&ld->evtchn_lock); @@ -462,6 +622,16 @@ break; case ECS_UNBOUND: /* silently drop the notification */ + break; + case ECS_XEN: + xchn = xen_event_channels + lchn->u.xen_port; + spin_lock(&xen_event_channel_lock); + if ( xchn->state != XECS_HBOUND ) + { + BUG_ON(xchn->state != XECS_BOUND); + xchn->fire(xchn->data); + } + spin_unlock(&xen_event_channel_lock); break; default: ret = -EINVAL; @@ -589,6 +759,11 @@ chn->u.interdomain.remote_dom->domain_id; status->u.interdomain.port = chn->u.interdomain.remote_port; break; + case ECS_XEN: + status->status = EVTCHNSTAT_interdomain; + status->u.interdomain.dom = DOMID_XEN; + status->u.interdomain.port = chn->u.xen_port; + break; case ECS_PIRQ: status->status = EVTCHNSTAT_pirq; status->u.pirq = chn->u.pirq; @@ -642,6 +817,7 @@ case ECS_UNBOUND: case ECS_INTERDOMAIN: case ECS_PIRQ: + case ECS_XEN: chn->notify_vcpu_id = vcpu_id; break; default: diff -r 5afb14264629 -r e66707bec7a9 xen/common/kernel.c --- a/xen/common/kernel.c Fri May 5 00:27:10 2006 +0100 +++ b/xen/common/kernel.c Fri May 5 17:38:45 2006 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include int tainted; diff -r 5afb14264629 -r e66707bec7a9 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/asm-x86/domain.h Fri May 5 17:38:45 2006 +0100 @@ -55,7 +55,7 @@ * Initialise a hypercall-transfer page. The given pointer must be mapped * in Xen virtual address space (accesses are not validated or checked). */ -extern void hypercall_page_initialise(void *); +extern void hypercall_page_initialise(struct domain *d, void *); struct arch_domain { diff -r 5afb14264629 -r e66707bec7a9 xen/include/asm-x86/guest_access.h --- a/xen/include/asm-x86/guest_access.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/asm-x86/guest_access.h Fri May 5 17:38:45 2006 +0100 @@ -8,6 +8,8 @@ #define __ASM_X86_GUEST_ACCESS_H__ #include +#include +#include /* Is the guest handle a NULL reference? */ #define guest_handle_is_null(hnd) ((hnd).p == NULL) @@ -28,6 +30,8 @@ #define copy_to_guest_offset(hnd, off, ptr, nr) ({ \ const typeof(ptr) _x = (hnd).p; \ const typeof(ptr) _y = (ptr); \ + hvm_guest(current) ? \ + copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \ copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \ }) @@ -38,6 +42,8 @@ #define copy_from_guest_offset(ptr, hnd, off, nr) ({ \ const typeof(ptr) _x = (hnd).p; \ const typeof(ptr) _y = (ptr); \ + hvm_guest(current) ? \ + copy_from_user_hvm(_y, _x+(off), sizeof(*_x)*(nr)) :\ copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ }) @@ -45,6 +51,8 @@ #define copy_field_to_guest(hnd, ptr, field) ({ \ const typeof(&(ptr)->field) _x = &(hnd).p->field; \ const typeof(&(ptr)->field) _y = &(ptr)->field; \ + hvm_guest(current) ? \ + copy_to_user_hvm(_x, _y, sizeof(*_x)) : \ copy_to_user(_x, _y, sizeof(*_x)); \ }) @@ -52,6 +60,8 @@ #define copy_field_from_guest(ptr, hnd, field) ({ \ const typeof(&(ptr)->field) _x = &(hnd).p->field; \ const typeof(&(ptr)->field) _y = &(ptr)->field; \ + hvm_guest(current) ? \ + copy_from_user_hvm(_y, _x, sizeof(*_x)) : \ copy_from_user(_y, _x, sizeof(*_x)); \ }) @@ -60,29 +70,37 @@ * Allows use of faster __copy_* functions. */ #define guest_handle_okay(hnd, nr) \ - array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)) + (hvm_guest(current) || array_access_ok((hnd).p, (nr), sizeof(*(hnd).p))) #define __copy_to_guest_offset(hnd, off, ptr, nr) ({ \ const typeof(ptr) _x = (hnd).p; \ const typeof(ptr) _y = (ptr); \ + hvm_guest(current) ? \ + copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \ __copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \ }) #define __copy_from_guest_offset(ptr, hnd, off, nr) ({ \ const typeof(ptr) _x = (hnd).p; \ const typeof(ptr) _y = (ptr); \ + hvm_guest(current) ? \ + copy_from_user_hvm(_y, _x+(off),sizeof(*_x)*(nr)) : \ __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ }) #define __copy_field_to_guest(hnd, ptr, field) ({ \ const typeof(&(ptr)->field) _x = &(hnd).p->field; \ const typeof(&(ptr)->field) _y = &(ptr)->field; \ + hvm_guest(current) ? \ + copy_to_user_hvm(_x, _y, sizeof(*_x)) : \ __copy_to_user(_x, _y, sizeof(*_x)); \ }) #define __copy_field_from_guest(ptr, hnd, field) ({ \ const typeof(&(ptr)->field) _x = &(hnd).p->field; \ const typeof(&(ptr)->field) _y = &(ptr)->field; \ + hvm_guest(current) ? \ + copy_from_user_hvm(_x, _y, sizeof(*_x)) : \ __copy_from_user(_y, _x, sizeof(*_x)); \ }) diff -r 5afb14264629 -r e66707bec7a9 xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/asm-x86/hvm/domain.h Fri May 5 17:38:45 2006 +0100 @@ -46,6 +46,8 @@ spinlock_t round_robin_lock; int interrupt_request; + unsigned callback_irq; + int pbuf_index; char pbuf[HVM_PBUF_SIZE]; }; diff -r 5afb14264629 -r e66707bec7a9 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/asm-x86/hvm/hvm.h Fri May 5 17:38:45 2006 +0100 @@ -75,12 +75,20 @@ hvm_funcs.disable(); } +void hvm_create_event_channels(struct vcpu *v); +void hvm_map_io_shared_page(struct vcpu *v); + static inline int hvm_initialize_guest_resources(struct vcpu *v) { - if ( hvm_funcs.initialize_guest_resources ) - return hvm_funcs.initialize_guest_resources(v); - return 0; + int ret = 1; + if (hvm_funcs.initialize_guest_resources) + ret = hvm_funcs.initialize_guest_resources(v); + if (ret == 1) { + hvm_map_io_shared_page(v); + hvm_create_event_channels(v); + } + return ret; } static inline void @@ -121,6 +129,9 @@ return hvm_funcs.instruction_length(v); } +void hvm_hypercall_page_initialise(struct domain *d, + void *hypercall_page); + static inline unsigned long hvm_get_guest_ctrl_reg(struct vcpu *v, unsigned int num) { diff -r 5afb14264629 -r e66707bec7a9 xen/include/asm-x86/hvm/io.h --- a/xen/include/asm-x86/hvm/io.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/asm-x86/hvm/io.h Fri May 5 17:38:45 2006 +0100 @@ -149,14 +149,14 @@ #endif extern void handle_mmio(unsigned long, unsigned long); -extern void hvm_wait_io(void); -extern void hvm_safe_block(void); extern void hvm_io_assist(struct vcpu *v); extern void pic_irq_request(void *data, int level); extern void hvm_pic_assist(struct vcpu *v); extern int cpu_get_interrupt(struct vcpu *v, int *type); extern int cpu_has_pending_irq(struct vcpu *v); +void hvm_release_assist_channel(struct vcpu *v); + // XXX - think about this, maybe use bit 30 of the mfn to signify an MMIO frame. #define mmio_space(gpa) (!VALID_MFN(get_mfn_from_gpfn((gpa) >> PAGE_SHIFT))) diff -r 5afb14264629 -r e66707bec7a9 xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/asm-x86/hvm/support.h Fri May 5 17:38:45 2006 +0100 @@ -42,11 +42,6 @@ static inline vcpu_iodata_t *get_vio(struct domain *d, unsigned long cpu) { return &get_sp(d)->vcpu_iodata[cpu]; -} - -static inline int iopacket_port(struct vcpu *v) -{ - return get_vio(v->domain, v->vcpu_id)->vp_eport; } /* XXX these are really VMX specific */ @@ -149,4 +144,7 @@ extern void hvm_print_line(struct vcpu *v, const char c); extern void hlt_timer_fn(void *data); +void hvm_prod_vcpu(struct vcpu *v); +void hvm_assist_complete(struct vcpu *v); + #endif /* __ASM_X86_HVM_SUPPORT_H__ */ diff -r 5afb14264629 -r e66707bec7a9 xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/asm-x86/hvm/vcpu.h Fri May 5 17:38:45 2006 +0100 @@ -34,6 +34,7 @@ struct vlapic *vlapic; /* For AP startup */ unsigned long init_sipi_sipi_state; + int xen_port; union { struct arch_vmx_struct vmx; diff -r 5afb14264629 -r e66707bec7a9 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/asm-x86/mm.h Fri May 5 17:38:45 2006 +0100 @@ -292,6 +292,9 @@ return mfn; } +unsigned long set_phystomachine(unsigned long pfn, + unsigned long mfn); + #ifdef MEMORY_GUARD void memguard_init(void); void memguard_guard_range(void *p, unsigned long l); diff -r 5afb14264629 -r e66707bec7a9 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/asm-x86/shadow.h Fri May 5 17:38:45 2006 +0100 @@ -71,6 +71,11 @@ // easy access to the hl2 table (for translated but not external modes only) #define __linear_hl2_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START + \ (PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))) + +/* Flags which can be propagated from guest PTEs to shadow PTEs. Note + that not all of these will propagate in every case; this is just + the maximum safe set. */ +#define SHADOW_PROPAGATE_PTE_FLAGS (_PAGE_RW|_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_USER) /* * For now we use the per-domain BIGLOCK rather than a shadow-specific lock. @@ -121,7 +126,8 @@ unsigned long vpa, struct cpu_user_regs *regs); extern void shadow_mode_init(void); extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc); -extern int shadow_fault(unsigned long va, struct cpu_user_regs *regs); +extern int shadow_fault(unsigned long va, unsigned long error_code, + struct cpu_user_regs *regs); extern int shadow_mode_enable(struct domain *p, unsigned int mode); extern void shadow_invlpg(struct vcpu *, unsigned long); extern struct out_of_sync_entry *shadow_mark_mfn_out_of_sync( @@ -784,6 +790,7 @@ l1_pgentry_t spte; unsigned long gpfn = l1e_get_pfn(gpte); unsigned long gmfn = gmfn_to_mfn(d, gpfn); + int flags; //printk("l1pte_write_fault gmfn=%lx\n", gmfn); @@ -796,7 +803,8 @@ ASSERT(l1e_get_flags(gpte) & _PAGE_RW); l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED); - spte = l1e_from_pfn(gmfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL); + flags = l1e_get_flags(gpte) & SHADOW_PROPAGATE_PTE_FLAGS; + spte = l1e_from_pfn(gmfn, flags); SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte, l1e_get_intpte(spte), l1e_get_intpte(gpte)); @@ -819,6 +827,7 @@ l1_pgentry_t spte = *spte_p; unsigned long pfn = l1e_get_pfn(gpte); unsigned long mfn = gmfn_to_mfn(d, pfn); + unsigned long flags; if ( unlikely(!VALID_MFN(mfn)) ) { @@ -828,7 +837,9 @@ } l1e_add_flags(gpte, _PAGE_ACCESSED); - spte = l1e_from_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL); + flags = l1e_get_flags(gpte) & SHADOW_PROPAGATE_PTE_FLAGS; + + spte = l1e_from_pfn(mfn, flags); if ( shadow_mode_log_dirty(d) || !(l1e_get_flags(gpte) & _PAGE_DIRTY) || mfn_is_page_table(mfn) ) @@ -850,6 +861,7 @@ { unsigned long mfn; l1_pgentry_t spte; + unsigned long flags; spte = l1e_empty(); @@ -857,8 +869,8 @@ (_PAGE_PRESENT|_PAGE_ACCESSED)) && VALID_MFN(mfn = gmfn_to_mfn(d, l1e_get_pfn(gpte))) ) { - spte = l1e_from_pfn( - mfn, guest_l1e_get_flags(gpte) & ~(_PAGE_GLOBAL | _PAGE_AVAIL)); + flags = guest_l1e_get_flags(gpte) & SHADOW_PROPAGATE_PTE_FLAGS; + spte = l1e_from_pfn(mfn, flags); if ( shadow_mode_log_dirty(d) || !(guest_l1e_get_flags(gpte) & _PAGE_DIRTY) || @@ -1692,6 +1704,28 @@ return l1e_get_paddr(gpte) + (gva & ~PAGE_MASK); } + +static inline unsigned long gva_to_mfn(unsigned long gva) +{ + l1_pgentry_t l1e; + + if (__copy_from_user(&l1e, &shadow_linear_pg_table[l1_linear_offset(gva)], + sizeof(l1e)) || + (l1e_get_flags(l1e) & (_PAGE_PRESENT | _PAGE_RW)) != + (_PAGE_PRESENT | _PAGE_RW) ) { + /* Error code -> write */ + shadow_fault(gva, 3, NULL); + if (__copy_from_user(&l1e, + &shadow_linear_pg_table[l1_linear_offset(gva)], + sizeof(l1e)) || + (l1e_get_flags(l1e) & (_PAGE_PRESENT | _PAGE_RW)) != + (_PAGE_PRESENT | _PAGE_RW) ) { + return 0; + } + } + return l1e_get_pfn(l1e); +} + #endif /************************************************************************/ diff -r 5afb14264629 -r e66707bec7a9 xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/public/arch-x86_32.h Fri May 5 17:38:45 2006 +0100 @@ -63,7 +63,12 @@ #define FLAT_USER_SS FLAT_RING3_SS /* And the trap vector is... */ +#if defined (CONFIG_VMX_GUEST) +/*for VMX paravirtualized driver*/ +#define TRAP_INSTR ".byte 0x0f,0x01,0xc1\n" +#else #define TRAP_INSTR "int $0x82" +#endif /* * Virtual addresses beyond this are not modifiable by guest OSes. The @@ -160,6 +165,8 @@ /* Frame containing list of mfns containing list of mfns containing p2m. */ unsigned long pfn_to_mfn_frame_list_list; unsigned long nmi_reason; + /* Start info gmfn */ + unsigned long start_info_mfn; } arch_shared_info_t; typedef struct { diff -r 5afb14264629 -r e66707bec7a9 xen/include/public/grant_table.h --- a/xen/include/public/grant_table.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/public/grant_table.h Fri May 5 17:38:45 2006 +0100 @@ -131,7 +131,7 @@ /* * Reference to a grant entry in a specified domain's grant table. */ -typedef uint32_t grant_ref_t; +typedef uint16_t grant_ref_t; /* * Handle to track a mapping created via a grant reference. diff -r 5afb14264629 -r e66707bec7a9 xen/include/public/hvm/ioreq.h --- a/xen/include/public/hvm/ioreq.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/public/hvm/ioreq.h Fri May 5 17:38:45 2006 +0100 @@ -71,9 +71,7 @@ typedef struct { ioreq_t vp_ioreq; - /* Event channel port */ - unsigned int vp_eport; /* VMX vcpu uses this to notify DM */ - unsigned int dm_eport; /* DM uses this to notify VMX vcpu */ + int vp_xen_port; } vcpu_iodata_t; typedef struct { diff -r 5afb14264629 -r e66707bec7a9 xen/include/public/io/netif.h --- a/xen/include/public/io/netif.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/public/io/netif.h Fri May 5 17:38:45 2006 +0100 @@ -43,6 +43,8 @@ typedef struct { uint16_t id; /* Echoed in response message. */ grant_ref_t gref; /* Reference to incoming granted frame */ + uint16_t offset; /* Offset in page to deliver at */ + uint16_t maxsize; /* Maximum packet size */ } netif_rx_request_t; /* Packet data has been validated against protocol checksum. */ diff -r 5afb14264629 -r e66707bec7a9 xen/include/public/xen.h --- a/xen/include/public/xen.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/public/xen.h Fri May 5 17:38:45 2006 +0100 @@ -60,10 +60,11 @@ #define __HYPERVISOR_acm_op 27 #define __HYPERVISOR_nmi_op 28 #define __HYPERVISOR_sched_op 29 -#define __HYPERVISOR_callback_op 30 -#define __HYPERVISOR_xenoprof_op 31 -#define __HYPERVISOR_event_channel_op 32 -#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_virtual_device_op 30 +#define __HYPERVISOR_callback_op 31 +#define __HYPERVISOR_xenoprof_op 32 +#define __HYPERVISOR_event_channel_op 33 +#define __HYPERVISOR_physdev_op 34 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 @@ -237,6 +238,14 @@ #define VMASST_TYPE_4gb_segments_notify 1 #define VMASST_TYPE_writable_pagetables 2 #define MAX_VMASST_TYPE 2 + +/* + * Commands to HYPERVISOR_virtual_device_op(). + */ + +#define VDOP_set_callback_irq 1 +#define VDOP_setup_gnttab_table 3 +#define VDOP_setup_shared_info 4 #ifndef __ASSEMBLY__ diff -r 5afb14264629 -r e66707bec7a9 xen/include/xen/event.h --- a/xen/include/xen/event.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/xen/event.h Fri May 5 17:38:45 2006 +0100 @@ -47,4 +47,10 @@ /* Bind a local event-channel port to the specified VCPU. */ extern long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id); +int alloc_xen_event_channel(void (*f)(void *d), + void *data, + struct domain *d); +void release_xen_event_channel(int ind); +void notify_xen_event_channel(int port); + #endif /* __XEN_EVENT_H__ */ diff -r 5afb14264629 -r e66707bec7a9 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Fri May 5 00:27:10 2006 +0100 +++ b/xen/include/xen/sched.h Fri May 5 17:38:45 2006 +0100 @@ -36,6 +36,7 @@ #define ECS_PIRQ 4 /* Channel is bound to a physical IRQ line. */ #define ECS_VIRQ 5 /* Channel is bound to a virtual IRQ line. */ #define ECS_IPI 6 /* Channel is bound to a virtual IPI line. */ +#define ECS_XEN 7 /* Channel ends in Xen */ u16 state; /* ECS_* */ u16 notify_vcpu_id; /* VCPU for local delivery notification */ union { @@ -48,6 +49,7 @@ } interdomain; /* state == ECS_INTERDOMAIN */ u16 pirq; /* state == ECS_PIRQ */ u16 virq; /* state == ECS_VIRQ */ + int xen_port; /* state == ECS_XEN */ } u; }; @@ -321,10 +323,15 @@ unsigned long hypercall_create_continuation( unsigned int op, const char *format, ...); +#if 0 +/*XXXSOS22*/ #define hypercall_preempt_check() (unlikely( \ softirq_pending(smp_processor_id()) | \ event_pending(current) \ )) +#else +#define hypercall_preempt_check() 0 +#endif /* This domain_hash and domain_list are protected by the domlist_lock. */ #define DOMAIN_HASH_SIZE 256 diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/Makefile --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/Makefile Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,22 @@ +include $(M)/overrides.mk + +obj-$(CONFIG_XEN_EVTCHN_PCI) += evtchn-pci/ +obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/ +obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/ +obj-m += xenbus/ + + +debug: + chmod +x compile.sh + chmod +x mkbuildtree + echo $(XEN_DRIVERS_ROOT) + echo $(EXTRA_CFLAGS) + ./compile.sh + +clean: + find . -name "*.o" |xargs rm -f + find . -name "*.ko" |xargs rm -f + find . -name "*.mod.c" |xargs rm -f + find . -name ".*.cmd" |xargs rm -f + rm .tmp_versions -rf + diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/README --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/README Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,12 @@ +to build xen front driver as modules to unmodified linux kernel +modify the compile.sh to point to the correct kernel source. +eg: +make -C /usr/src/linux-2.6.12 M=$PWD V=1 $* +then run ./compile.sh + +There will three modules: xen-evtchn-pci.ko xen-vbd.ko xen-vnif.ko +xen-evtchn-pci.ko is the base module, and xen-vbd.ko is for VBD, xen-vnif.ko +is for VNIF. + +In the configure file, the vbd configration is same as xen-linux. + XF diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/blkfront/Kbuild --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/blkfront/Kbuild Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,6 @@ +include $(M)/overrides.mk + +obj-m += xen-vbd.o + +xen-vbd-objs := blkfront.o vbd.o + diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/compile.sh --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/compile.sh Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,7 @@ +chmod +x mkbuildtree +./mkbuildtree +if [ -z $KERNEL_DIR ];then + echo "Please set KERNEL_DIR to your kernel source directory" + exit -1 +fi +make -C $KERNEL_DIR M=$PWD V=1 $* diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/evtchn-pci/Kbuild --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/evtchn-pci/Kbuild Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,8 @@ +include $(M)/overrides.mk + +obj-m := xen-evtchn-pci.o + +EXTRA_CFLAGS += -I$(M)/evtchn-pci + +xen-evtchn-pci-objs := evtchn.o evtchn-pci.o gnttab.o xen_proc.o xen_support.o\ + features.o diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/evtchn-pci/debuginfo.h --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/evtchn-pci/debuginfo.h Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,56 @@ +#ifndef __DEBUG_INFO__ +#define __DEBUG_INFO__ +//#define INSERT_TEST +//#define VMX_DEBUG_INFO +//#define KERNEL_DEBUG_INFO +//#define FREQ_PRINT + +#define infotime(seconds, x, a...) \ +{ \ +static unsigned long prevjiffy = 0; \ + if(time_after(jiffies, prevjiffy + seconds*HZ)) { \ + prevjiffy = jiffies; \ + vmx_printk(x, ##a); \ + } \ +} + +#ifdef KERNEL_DEBUG_INFO +#define dprintk(x, a...) \ + printk(" " x, ##a) +#define dprintknl(x, a...) \ + printk(x, ##a) +#define dprintkentry(x, a...) \ + printk(" " x "\n", ##a) +#define dprintkexit(x, a...) \ + printk(" " x "\n", ##a) +#ifdef FREQ_PRINT +#define dprintkfreq(x, a...) \ + printk(" " x, ##a) +#else +#define dprintkfreq(x, a...) +#endif +#elif defined(VMX_DEBUG_INFO) +#define dprintk(x, a...) \ + vmx_printk(" " x, ##a) +#define dprintknl(x, a...) \ + vmx_printk(x, ##a) +#define dprintkentry(x, a...) \ + vmx_printk(" " x "\n", ##a) +#define dprintkexit(x, a...) \ + vmx_printk(" " x "\n", ##a) +#ifdef FREQ_PRINT +#define dprintkfreq(x, a...) \ + vmx_printk(" " x, ##a) +#else +#define dprintkfreq(x, a...) +#endif + +#else +#define dprintk(x, a...) +#define dprintkentry(x, a...) +#define dprintkexit(x, a...) +#define dprintkfreq(x, a...) +#define dprintknl(x, a...) +#endif +int vmx_printk(const char *fmt, ...); +#endif diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/evtchn-pci/evtchn-pci.c --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/evtchn-pci/evtchn-pci.c Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,323 @@ +/****************************************************************************** + * evtchn-pci.c + * xen event channel fake PCI device driver + * Copyright (C) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "evtchn-pci.h" + +#define DRV_NAME "xen-evtchn-pci" +#define DRV_VERSION "0.10" +#define DRV_RELDATE "03/03/2005" + +extern void *hypercall_page; + +static int callbackirq = 3; /* legacy mode irq */ +static int nopci = 0; +static char version[] __devinitdata = + KERN_INFO DRV_NAME ":version " DRV_VERSION " " DRV_RELDATE + " Xiaofeng. Ling\n"; + +MODULE_AUTHOR("xiaofeng.ling@intel.com"); +MODULE_DESCRIPTION("Xen evtchn PCI device"); +MODULE_LICENSE("GPL"); + +MODULE_PARM(nopci, "i"); +MODULE_PARM(callbackirq, "i"); +MODULE_PARM_DESC(callbackirq, "callback irq number for xen event channel"); + +#define XEN_EVTCHN_VENDOR_ID 0xfffd +#define XEN_EVTCHN_DEVICE_ID 0x0101 + +static struct pci_device_id evtchn_pci_tbl[] __devinitdata = { + {XEN_EVTCHN_VENDOR_ID, XEN_EVTCHN_DEVICE_ID, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0,} +}; + +MODULE_DEVICE_TABLE(pci, evtchn_pci_tbl); + +start_info_t start_info_buf; +start_info_t *xen_start_info; +EXPORT_SYMBOL(xen_start_info); + +unsigned long *phys_to_machine_mapping; +EXPORT_SYMBOL(phys_to_machine_mapping); + +static int __init init_xen_info(void) +{ + int x; + void *start_info; + unsigned long start_info_phys, shared_info_frame; + + setup_xen_features(); + start_info_phys = virt_to_phys(high_memory); + start_info = ioremap(start_info_phys, PAGE_SIZE); + if (!start_info) + { + printk(KERN_ERR DRV_NAME "ioremap for start info fail\n"); + return -1; + } + + dprintk("%p, start_info_phys:%p, start_info:%p\n", + high_memory, start_info_phys, start_info); + + xen_start_info = &start_info_buf; + memcpy(xen_start_info, start_info, sizeof (start_info_t)); + iounmap(start_info); + + shared_info_frame = alloc_xen_mmio(PAGE_SIZE) >> PAGE_SHIFT; + BUG_ON(HYPERVISOR_virtual_device_op(VDOP_setup_shared_info, + shared_info_frame, + 1)); + printk("<0>shared info pfn at %lx.\n", shared_info_frame); + HYPERVISOR_shared_info = + ioremap(shared_info_frame << PAGE_SHIFT, PAGE_SIZE); + + if (!HYPERVISOR_shared_info) + panic("can't map shared info\n"); + + for (x = 0; x < 1024; x++) + printk("<0>%lx -> %lx\n", x, + ((unsigned long *)HYPERVISOR_shared_info)[x]); + dprintk("ioremap shared_info successful\n"); + + phys_to_machine_mapping = NULL; + + gnttab_init(); + evtchn_init(); + + return 0; +} + +static void __devexit evtchn_pci_remove(struct pci_dev *pdev) +{ + long ioaddr, iolen; + + /*if there are io region, don't forget to release */ + ioaddr = pci_resource_start(pdev, 0); + iolen = pci_resource_len(pdev, 0); + if (ioaddr != 0) + { + release_region(ioaddr, iolen); + } + + pci_set_drvdata(pdev, NULL); + free_irq(pdev->irq, NULL); +} + +extern irqreturn_t evtchn_interrupt(int irq, void *devid, struct pt_regs *regs); + +unsigned long evtchn_mmio = 0xc000000; +unsigned long evtchn_mmio_alloc; +unsigned long evtchn_mmiolen = 0x1000000; + +unsigned long alloc_xen_mmio(unsigned long len) +{ + unsigned long addr; + + addr = 0; + if (evtchn_mmio_alloc + len <= evtchn_mmiolen) + { + addr = evtchn_mmio + evtchn_mmio_alloc; + evtchn_mmio_alloc += len; + } else { + panic("ran out of xen mmio space"); + } + return addr; +} + +static int __devinit evtchn_pci_init(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int i, ret, irq; + long ioaddr, iolen; + long mmio_addr, mmio_len; + + printk(KERN_INFO DRV_NAME ":found evtchn pci device model, do init\n"); + +#ifndef MODULE + static int printed_version; + if (!printed_version++) + printk(version); +#endif + + i = pci_enable_device(pdev); + if (i) + return i; + + ioaddr = pci_resource_start(pdev, 0); + iolen = pci_resource_len(pdev, 0); + + mmio_addr = pci_resource_start(pdev, 1); + mmio_len = pci_resource_len(pdev, 1); + + if (mmio_addr != 0) + { + if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL) + { + printk(KERN_ERR ":MEM I/O resource 0x%lx @ 0x%lx busy\n", + mmio_addr, mmio_len); + return -EBUSY; + } + evtchn_mmio = mmio_addr; + evtchn_mmiolen = mmio_len; + } + else + { + printk(KERN_WARNING DRV_NAME ":no MMIO found!\n"); + } + + irq = pdev->irq; + callbackirq = irq; + + /* + * maybe some day we may use I/O port for checking status + * when sharing interrupts + */ + if (ioaddr != 0) + { + if (request_region(ioaddr, iolen, DRV_NAME) == NULL) + { + printk(KERN_ERR DRV_NAME ":I/O resource 0x%lx @ 0x%lx busy\n", + iolen, ioaddr); + return -EBUSY; + } + + printk("<0>ioaddr %lx, iolen %lx.\n", ioaddr, iolen); + hypercall_page = (void *)__get_free_page(GFP_KERNEL); + if (!hypercall_page) + panic("Cannot get hypercall page.\n"); + printk("<0>mfn %lx.\n", + virt_to_phys(hypercall_page) >> PAGE_SHIFT); + memset(hypercall_page, 0xcc, PAGE_SIZE); + asm volatile("outl %%eax, %%dx\n" + : + : "a" (virt_to_phys(hypercall_page) >> PAGE_SHIFT), + "d" (ioaddr) + : "memory"); + printk("<0>Requested hypercallpage setup.\n"); + } + printk(KERN_INFO DRV_NAME ":use irq %d for event channel\n", irq); + + if ((ret = request_irq(irq, evtchn_interrupt, SA_SHIRQ, + "xen-evtchn-pci", evtchn_interrupt))) { + goto out; + } + + if ((ret = init_xen_info())) + goto out; + + if ((ret = set_callback_irq(irq))) + goto out; + + out: + if (ret && hypercall_page) + free_page((unsigned long)hypercall_page); + return 0; +} + +static struct pci_driver evtchn_driver = { + name:DRV_NAME, + probe:evtchn_pci_init, + remove:__devexit_p(evtchn_pci_remove), + id_table:evtchn_pci_tbl, +}; + +int __init setup_xen_callback(void) +{ + int rc = 0; + /* two ways for call back from hypervisor */ + + printk(KERN_INFO DRV_NAME ":legacy driver request irq :%d\n", callbackirq); + rc = request_irq(callbackirq, evtchn_interrupt, SA_SHIRQ, + "xen-evtchn", evtchn_interrupt); + if (rc != 0) + printk(":request irq error:%d!", rc); + rc = set_callback_irq(callbackirq); + if (rc != 0) + printk(KERN_ERR DRV_NAME ":set call back irq error:%d!", rc); + return rc; +} + +static int __init evtchn_pci_module_init(void) +{ + int rc; + + printk(KERN_INFO DRV_NAME ":do xen module support init\n"); + +/* when a module, this is printed whether or not devices are found in probe */ +#ifdef MODULE + printk(version); +#endif + + if (!nopci) + { + rc = pci_module_init(&evtchn_driver); + if (rc) + printk(KERN_INFO DRV_NAME ":No evtchn pci device model found," + "use legacy mode\n"); + } + else + { + printk(KERN_INFO DRV_NAME ":disable evtchn pci device model" + "by module arguments,use legacy mode\n"); + rc = 1; + } + + if (rc) + { + /*No Pci device, try legacy mode */ + rc = init_xen_info(); + if (rc) + return rc; + setup_xen_callback(); + if (rc) + printk(KERN_ERR DRV_NAME ":setup xen legacy callback fail\n"); + } + + return rc; +} + +static void __exit evtchn_pci_module_cleanup(void) +{ + printk(KERN_INFO DRV_NAME ":Do evtchn module cleanup\n"); + /* disable hypervisor for callback irq */ + set_callback_irq(0); + + free_irq(callbackirq, NULL); + + /*TODO: unmap hypercall param share page */ + + pci_unregister_driver(&evtchn_driver); +} + +module_init(evtchn_pci_module_init); +module_exit(evtchn_pci_module_cleanup); diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/evtchn-pci/evtchn-pci.h --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/evtchn-pci/evtchn-pci.h Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,50 @@ +/****************************************************************************** + * evtchn-pci.h + * module driver support in unmodified Linux + * Copyright (C) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#ifndef __XEN_SUPPORT_H +#define __XEN_SUPPORT_H +#include +#include +#include "debuginfo.h" + +extern unsigned long *phys_to_machine_mapping; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +#else +#define __user +#endif + +static inline int set_callback_irq(int irq) +{ + return HYPERVISOR_virtual_device_op(VDOP_set_callback_irq, + irq, 0l); +} + +#define L2_PAGETABLE_SHIFT 22 +unsigned long alloc_xen_mmio(unsigned long len); + +int gnttab_init(void); +void evtchn_init(void); +void ctrl_if_init(void); + +void xen_machphys_update(unsigned long mfn, unsigned long pfn); +int xen_do_init(void); + +#endif diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/evtchn-pci/evtchn.c --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/evtchn-pci/evtchn.c Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,200 @@ +/****************************************************************************** + * evtchn.c + * + * A simplified event channel for para-drivers in unmodified linux + * + * Copyright (c) 2002-2005, K A Fraser + * Copyright (c) 2005, + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "evtchn-pci.h" + +void *hypercall_page; + +#define cpu_from_evtchn(port) (0) +#define MAX_EVTCHN 256 +static struct +{ + irqreturn_t(*handler) (int, void *, struct pt_regs *); + void *dev_id; +} evtchns[MAX_EVTCHN]; + +void mask_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_set_bit(port, &s->evtchn_mask[0]); +} +EXPORT_SYMBOL(mask_evtchn); + +void unmask_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + unsigned int cpu = smp_processor_id(); + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; + + /* Slow path (hypercall) if this is a non-local port. */ + if (unlikely(cpu != cpu_from_evtchn(port))) { + evtchn_unmask_t op = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, + &op); + return; + } + + synch_clear_bit(port, &s->evtchn_mask[0]); + + /* + * The following is basically the equivalent of 'hw_resend_irq'. Just + * like a real IO-APIC we 'lose the interrupt edge' if the channel is + * masked. + */ + if (synch_test_bit(port, &s->evtchn_pending[0]) && + !synch_test_and_set_bit(port / BITS_PER_LONG, + &vcpu_info->evtchn_pending_sel)) { + vcpu_info->evtchn_upcall_pending = 1; + if (!vcpu_info->evtchn_upcall_mask) + force_evtchn_callback(); + } +} +EXPORT_SYMBOL(unmask_evtchn); + +unsigned int bind_virq_to_evtchn(int virq) +{ + evtchn_bind_virq_t op; + + op.virq = virq; + op.vcpu = 0; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &op) != 0) + BUG(); + + return op.port; +} + +int +bind_evtchn_to_irqhandler(unsigned int evtchn, + irqreturn_t(*handler) (int, void *, + struct pt_regs *), + unsigned long irqflags, const char *devname, + void *dev_id) +{ + if (evtchn >= MAX_EVTCHN) + return -EINVAL; + evtchns[evtchn].handler = handler; + evtchns[evtchn].dev_id = dev_id; + unmask_evtchn(evtchn); + return evtchn; +} + +EXPORT_SYMBOL(bind_evtchn_to_irqhandler); + +void unbind_from_irqhandler(unsigned int evtchn, void *dev_id) +{ + if (evtchn >= MAX_EVTCHN) + return; + + mask_evtchn(evtchn); + evtchns[evtchn].handler = NULL; +} + +EXPORT_SYMBOL(unbind_from_irqhandler); + +void notify_remote_via_irq(int irq) +{ + int evtchn = irq; + notify_remote_via_evtchn(evtchn); +} + +EXPORT_SYMBOL(notify_remote_via_irq); + +void unbind_evtchn_from_irq(unsigned int evtchn) +{ + return; +} + +EXPORT_SYMBOL(unbind_evtchn_from_irq); + +#define active_evtchns(cpu,sh,idx) \ + ((sh)->evtchn_pending[idx] & \ + ~(sh)->evtchn_mask[idx]) + +irqreturn_t evtchn_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned long l1, l2; + unsigned int l1i, l2i, port; + int cpu = smp_processor_id(); + irqreturn_t(*handler) (int, void *, struct pt_regs *); + shared_info_t *s = HYPERVISOR_shared_info; + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; + + vcpu_info->evtchn_upcall_pending = 0; + + /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ + l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); + while (l1 != 0) + { + l1i = __ffs(l1); + l1 &= ~(1 << l1i); + + while ((l2 = active_evtchns(cpu, s, l1i)) != 0) + { + l2i = __ffs(l2); + + port = (l1i * BITS_PER_LONG) + l2i; + + if ((handler = evtchns[port].handler) != NULL) + { + clear_evtchn(port); + handler(port, evtchns[port].dev_id, regs); + } + else + { + evtchn_device_upcall(port); + } + } + } + + return IRQ_HANDLED; +} + +void force_evtchn_callback(void) +{ + evtchn_interrupt(0, NULL, NULL); +} + +EXPORT_SYMBOL(force_evtchn_callback); + +void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) +{ +} + +void __init evtchn_init(void) +{ + +} + +EXPORT_SYMBOL(hypercall_page); diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/evtchn-pci/xen_support.c --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/evtchn-pci/xen_support.c Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,53 @@ +/****************************************************************************** + * support.c + * Xen module support functions. + * Copyright (C) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include "evtchn-pci.h" + +shared_info_t *HYPERVISOR_shared_info = NULL; +EXPORT_SYMBOL(HYPERVISOR_shared_info); + +EXPORT_SYMBOL(xen_machphys_update); +void xen_machphys_update(unsigned long mfn, unsigned long pfn) +{ + mmu_update_t u; + u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + u.val = pfn; + BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); +} + +void balloon_update_driver_allowance(long delta) +{ +} + +EXPORT_SYMBOL(balloon_update_driver_allowance); + +void evtchn_device_upcall(int port) +{ + printk("Error,no device upcall in guest domain (%d)!\n", port); + clear_evtchn(port); +} + +EXPORT_SYMBOL (evtchn_device_upcall); diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/mkbuildtree --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/mkbuildtree Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,31 @@ +#! /bin/sh + +C=$PWD + +XEN=$C/../../xen +XL=$C/../../linux-2.6-xen-sparse + +for d in $(find ${XL}/drivers/xen/ -type d -maxdepth 1 | sed -e 1d); do + if ! echo $d | egrep -q back; then + lndir $d $(basename $d) > /dev/null 2>&1 + fi +done + +ln -sf ${XL}/drivers/xen/net_driver_util.c netfront + +ln -sf ${XL}/drivers/xen/core/gnttab.c evtchn-pci +ln -sf ${XL}/drivers/xen/core/features.c evtchn-pci +ln -sf ${XL}/drivers/xen/core/xen_proc.c evtchn-pci + +lndir -silent ${XL}/include/xen include/xen +ln -sf ${XEN}/include/public include/xen/interface + +# Need to be quite careful here: we don't want the files we link in to +# risk overriding the native Linux ones (in particular, system.h must +# be native and not xenolinux). +ln -sf ${XL}/include/asm-i386/mach-xen/asm/hypervisor.h include/asm +ln -sf ${XL}/include/asm-i386/mach-xen/asm/hypercall.h include/asm +ln -sf ${XL}/include/asm-i386/mach-xen/asm/hypercall-vmx.h include/asm +ln -sf ${XL}/include/asm-i386/mach-xen/asm/synch_bitops.h include/asm +ln -sf ${XL}/include/asm-i386/mach-xen/asm/maddr.h include/asm + diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/netfront/Kbuild --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/netfront/Kbuild Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,4 @@ +include $(M)/overrides.mk + +obj-m = xen-vnif.o +xen-vnif-objs := netfront.o net_driver_util.o diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/overrides.mk --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/overrides.mk Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,16 @@ +# Hack: we need to use the config which was used to build the kernel, +# except that that won't have the right headers etc., so duplicate +# some of the mach-xen infrastructure in here. +# +# (i.e. we need the native config for things like -mregparm, but +# a Xen kernel to find the right headers) +CONFIG_X86_XEN=y +CONFIG_XEN_EVTCHN_PCI = m +CONFIG_XEN_BLKDEV_FRONTEND = m +CONFIG_XEN_NETDEV_FRONTEND = m +EXTRA_CFLAGS += -DCONFIG_VMX -DCONFIG_VMX_GUEST -DCONFIG_X86_XEN +EXTRA_CFLAGS += -DCONFIG_XEN_SHADOW_MODE -DCONFIG_XEN_SHADOW_TRANSLATE +EXTRA_CFLAGS += -DCONFIG_XEN_BLKDEV_GRANT -DXEN_EVTCHN_MASK_OPS +EXTRA_CFLAGS += -DCONFIG_XEN_NETDEV_GRANT_RX -DCONFIG_XEN_NETDEV_GRANT_TX +EXTRA_CFLAGS += -D__XEN_INTERFACE_VERSION__=0x00030202 +EXTRA_CFLAGS += -I$(M)/include diff -r 5afb14264629 -r e66707bec7a9 unmodified-drivers/linux-2.6/xenbus/Kbuild --- /dev/null Thu Jan 1 00:00:00 1970 +0000 +++ b/unmodified-drivers/linux-2.6/xenbus/Kbuild Fri May 5 17:38:45 2006 +0100 @@ -0,0 +1,9 @@ +include $(M)/overrides.mk + +obj-m += xenbus.o +xenbus-objs = +xenbus-objs += xenbus_comms.o +xenbus-objs += xenbus_xs.o +xenbus-objs += xenbus_probe.o +xenbus-objs += xenbus_dev.o +xenbus-objs += xenbus_client.o