之前在helloworld中主要分析了hugepage的使用,這回在l2fwd中主要分析一下uio和PMD的實(shí)現(xiàn)
main函數(shù)中首先調(diào)用了rte_eal_init初始化eal環(huán)境,其中主要是hugepage的初始化;
ret = rte_eal_init(argc, argv);if (ret < 0) rte_exit(EXIT_FAILURE, "Invalid EAL arguments/n");
接著創(chuàng)建了mbuf pool
/* create the mbuf pool */l2fwd_pktmbuf_pool = rte_mempool_create("mbuf_pool", NB_MBUF, MBUF_SIZE, 32, sizeof(struct rte_pktmbuf_pool_PRivate), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL, rte_socket_id(), 0);if (l2fwd_pktmbuf_pool == NULL) rte_exit(EXIT_FAILURE, "Cannot init mbuf pool/n");
然后是PMD驅(qū)動(dòng)的注冊(cè)和PCI設(shè)備驅(qū)動(dòng)加載
/* init driver(s) */if (rte_pmd_init_all() < 0) rte_exit(EXIT_FAILURE, "Cannot init pmd/n");if (rte_eal_pci_probe() < 0) rte_exit(EXIT_FAILURE, "Cannot probe PCI/n");
首先是PMD驅(qū)動(dòng)的注冊(cè),目前DPDK支持igb igbvf em ixgbe ixgbevf virtio vmxnet3;不過(guò)這些具體是什么還不清楚,后面以虛擬機(jī)環(huán)境中使用的em驅(qū)動(dòng)為例子分析;
static inlineint rte_pmd_init_all(void){ int ret = -ENODEV;#ifdef RTE_LIBRTE_IGB_PMD if ((ret = rte_igb_pmd_init()) != 0) { RTE_LOG(ERR, PMD, "Cannot init igb PMD/n"); return (ret); } if ((ret = rte_igbvf_pmd_init()) != 0) { RTE_LOG(ERR, PMD, "Cannot init igbvf PMD/n"); return (ret); }#endif /* RTE_LIBRTE_IGB_PMD */#ifdef RTE_LIBRTE_EM_PMD if ((ret = rte_em_pmd_init()) != 0) { RTE_LOG(ERR, PMD, "Cannot init em PMD/n"); return (ret); }#endif /* RTE_LIBRTE_EM_PMD */#ifdef RTE_LIBRTE_IXGBE_PMD if ((ret = rte_ixgbe_pmd_init()) != 0) { RTE_LOG(ERR, PMD, "Cannot init ixgbe PMD/n"); return (ret); } if ((ret = rte_ixgbevf_pmd_init()) != 0) { RTE_LOG(ERR, PMD, "Cannot init ixgbevf PMD/n"); return (ret); }#endif /* RTE_LIBRTE_IXGBE_PMD */#ifdef RTE_LIBRTE_VIRTIO_PMD if ((ret = rte_virtio_pmd_init()) != 0) { RTE_LOG(ERR, PMD, "Cannot init virtio PMD/n"); return (ret); }#endif /* RTE_LIBRTE_VIRTIO_PMD */#ifdef RTE_LIBRTE_VMXNET3_PMD if ((ret = rte_vmxnet3_pmd_init()) != 0) { RTE_LOG(ERR, PMD, "Cannot init vmxnet3 PMD/n"); return (ret); }#endif /* RTE_LIBRTE_VMXNET3_PMD */ if (ret == -ENODEV) RTE_LOG(ERR, PMD, "No PMD(s) are configured/n"); return (ret);}
注冊(cè)EM驅(qū)動(dòng)
intrte_em_pmd_init(void){ rte_eth_driver_register(&rte_em_pmd); return 0;}/** * Register an Ethernet [Poll Mode] driver. * * Function invoked by the initialization function of an Ethernet driver * to simultaneously register itself as a PCI driver and as an Ethernet * Poll Mode Driver. * Invokes the rte_eal_pci_register() function to register the *pci_drv* * structure embedded in the *eth_drv* structure, after having stored the * address of the rte_eth_dev_init() function in the *devinit* field of * the *pci_drv* structure. * During the PCI probing phase, the rte_eth_dev_init() function is * invoked for each PCI [Ethernet device] matching the embedded PCI * identifiers provided by the driver. */voidrte_eth_driver_register(struct eth_driver *eth_drv){ eth_drv->pci_drv.devinit = rte_eth_dev_init; rte_eal_pci_register(ð_drv->pci_drv);}/* register a driver */voidrte_eal_pci_register(struct rte_pci_driver *driver){ TAILQ_INSERT_TAIL(&driver_list, driver, next);}
這里PMD驅(qū)動(dòng)結(jié)構(gòu)包含了PMD驅(qū)動(dòng)部分和PCI驅(qū)動(dòng)部分
/** * @internal * The structure associated with a PMD Ethernet driver. * * Each Ethernet driver acts as a PCI driver and is represented by a generic * *eth_driver* structure that holds: * * - An *rte_pci_driver* structure (which must be the first field). * * - The *eth_dev_init* function invoked for each matching PCI device. * * - The size of the private data to allocate for each matching device. */struct eth_driver { struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */ eth_dev_init_t eth_dev_init; /**< Device init function. */ unsigned int dev_private_size; /**< Size of device private data. */};
接下來(lái),如果不存在白名單則加載每個(gè)device的所有驅(qū)動(dòng);在白名單中的device加載驅(qū)動(dòng)失敗直接退出;
/* * Scan the content of the PCI bus, and call the devinit() function for * all registered drivers that have a matching entry in its id_table * for discovered devices. */intrte_eal_pci_probe(void){ struct rte_pci_device *dev = NULL; TAILQ_FOREACH(dev, &device_list, next) if (!eal_dev_whitelist_exists()) pci_probe_all_drivers(dev); else if (pcidev_is_whitelisted(dev) && pci_probe_all_drivers(dev) < 0 ) rte_exit(EXIT_FAILURE, "Requested device " PCI_PRI_FMT " cannot be used/n", dev->addr.domain,dev->addr.bus, dev->addr.devid, dev->addr.function); return 0;}
對(duì)于每個(gè)device,嘗試是否可以加載driver,RTE_PCI_DRV_MULTipLE標(biāo)記的驅(qū)動(dòng)需要加載多次,第三方驅(qū)動(dòng)可能需要;
/* * If vendor/device ID match, call the devinit() function of all * registered driver for the given device. Return -1 if no driver is * found for this device. * For drivers with the RTE_PCI_DRV_MULTIPLE flag enabled, register * the same device multiple times until failure to do so. * It is required for non-Intel NIC drivers provided by third-parties such * as 6WIND. */static intpci_probe_all_drivers(struct rte_pci_device *dev){ struct rte_pci_driver *dr = NULL; int rc; dev->blacklisted = !!is_blacklisted(dev); TAILQ_FOREACH(dr, &driver_list, next) { rc = rte_eal_pci_probe_one_driver(dr, dev); if (rc < 0) /* negative value is an error */ break; if (rc > 0) /* positive value means driver not found */ continue; /* initialize subsequent driver instances for this device */ if ((dr->drv_flags & RTE_PCI_DRV_MULTIPLE) && (!dev->blacklisted)) while (rte_eal_pci_probe_one_driver(dr, dev) == 0) ; return 0; } return -1;}
驅(qū)動(dòng)加載
/* * If vendor/device ID match, call the devinit() function of the * driver. */intrte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev){ struct rte_pci_id *id_table; /* id table位于rte_pci_dev_ids.h */ for (id_table = dr->id_table ; id_table->vendor_id != 0; id_table++) { /* check if device's identifiers match the driver's ones */ if (id_table->vendor_id != dev->id.vendor_id && id_table->vendor_id != PCI_ANY_ID) continue; if (id_table->device_id != dev->id.device_id && id_table->device_id != PCI_ANY_ID) continue; if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && id_table->subsystem_vendor_id != PCI_ANY_ID) continue; if (id_table->subsystem_device_id != dev->id.subsystem_device_id && id_table->subsystem_device_id != PCI_ANY_ID) continue; /* 當(dāng)前driver與device匹配 */ struct rte_pci_addr *loc = &dev->addr; RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i/n", loc->domain, loc->bus, loc->devid, loc->function, dev->numa_node); RTE_LOG(DEBUG, EAL, " probe driver: %x:%x %s/n", dev->id.vendor_id, dev->id.device_id, dr->name); /* 黑名單設(shè)備不加載 */ /* no initialization when blacklisted, return without error */ if (dev->blacklisted) { RTE_LOG(DEBUG, EAL, " Device is blacklisted, not initializing/n"); return 0; }#ifdef RTE_EAL_UNBIND_PORTS if (dr->drv_flags & RTE_PCI_DRV_NEED_IGB_UIO) { /* unbind driver and load uio resources for Intel NICs */ if (pci_switch_module(dr, dev, 1, IGB_UIO_NAME) < 0) return -1; } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND && rte_eal_process_type() == RTE_PROC_PRIMARY) { /* unbind current driver */ if (pci_unbind_kernel_driver(dev) < 0) return -1; }#else /* 首先獲取設(shè)備的uio映射地址和大小,然后映射到/dev/uiox上 */ if (dr->drv_flags & RTE_PCI_DRV_NEED_IGB_UIO) /* just map resources for Intel NICs */ if (pci_uio_map_resource(dev) < 0) return -1;#endif /* reference driver structure */ dev->driver = dr; /* 調(diào)用PCI驅(qū)動(dòng)的初始化函數(shù) */ /* call the driver devinit() function */ return dr->devinit(dr, dev); } /* return positive value if driver is not found */ return 1;}
映射PCI地址空間到用戶空間的過(guò)程
/* map the PCI resource of a PCI device in virtual memory */static intpci_uio_map_resource(struct rte_pci_device *dev){ int i, j; char dirname[PATH_MAX]; char filename[PATH_MAX]; char devname[PATH_MAX]; /* contains the /dev/uioX */ void *mapaddr; int uio_num; unsigned long start,size; uint64_t phaddr; uint64_t offset; uint64_t pagesz; ssize_t nb_maps; struct rte_pci_addr *loc = &dev->addr; struct uio_resource *uio_res; struct uio_map *maps; dev->intr_handle.fd = -1; /* PRIMARY進(jìn)程才做映射 */ /* secondary processes - use already recorded details */ if ((rte_eal_process_type() != RTE_PROC_PRIMARY) && (dev->id.vendor_id != PCI_VENDOR_ID_QUMRANET)) return (pci_uio_map_secondary(dev)); /* 通過(guò)/sys/bus/pci/devices/0000:02:01.0/uio/uio0找到與當(dāng)前device關(guān)聯(lián)的uio設(shè)備ID */ /* find uio resource */ uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname)); if (uio_num < 0) { RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " "skipping/n", loc->domain, loc->bus, loc->devid, loc->function); return -1; } /* 忽略 */ if(dev->id.vendor_id == PCI_VENDOR_ID_QUMRANET) { /* get portio size */ rte_snprintf(filename, sizeof(filename), "%s/portio/port0/size", dirname); if (eal_parse_sysfs_value(filename, &size) < 0) { RTE_LOG(ERR, EAL, "%s(): cannot parse size/n", __func__); return -1; } /* get portio start */ rte_snprintf(filename, sizeof(filename), "%s/portio/port0/start", dirname); if (eal_parse_sysfs_value(filename, &start) < 0) { RTE_LOG(ERR, EAL, "%s(): cannot parse portio start/n", __func__); return -1; } dev->mem_resource[0].addr = (void *)(uintptr_t)start; dev->mem_resource[0].len = (uint64_t)size; RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx with size=0x%lx/n", start, size); /* rte_virtio_pmd does not need any other bar even if available */ return (0); } /* allocate the mapping details for secondary processes*/ if ((uio_res = rte_zmalloc("UIO_RES", sizeof (*uio_res), 0)) == NULL) { RTE_LOG(ERR, EAL, "%s(): cannot store uio mmap details/n", __func__); return (-1); } rte_snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); rte_snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname); memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr)); /* uio設(shè)備所有map記錄到uio_res->maps中,并返回map的個(gè)數(shù) */ /* collect info about device mappings */ if ((nb_maps = pci_uio_get_mappings(dirname, uio_res->maps, sizeof (uio_res->maps) / sizeof (uio_res->maps[0]))) < 0) return (nb_maps); uio_res->nb_maps = nb_maps; /* Map all BARs */ pagesz = sysconf(_SC_PAGESIZE); maps = uio_res->maps; for (i = 0; i != PCI_MAX_RESOURCE; i++) { /* rte_eal_init -> rte_eal_pci_init 中初始化了dev->mem_resource */ /* /sys/bus/pci/devices/0000:02:01.0/resource 文件中讀取 物理地址起始地址 物理地址結(jié)束 FLAG(第10個(gè)bit表示IO memory) 0x00000000fd5a0000 0x00000000fd5bffff 0x0000000000140204 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x00000000fdff0000 0x00000000fdffffff 0x0000000000140204 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000002000 0x000000000000203f 0x0000000000040101 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x00000000e7b00000 0x00000000e7b0ffff 0x000000000004e200 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 */ /* skip empty BAR */ if ((phaddr = dev->mem_resource[i].phys_addr) == 0) continue; /* 查找PCI IO地址和uio匹配的 */ for (j = 0; j != nb_maps && (phaddr != maps[j].phaddr || dev->mem_resource[i].len != maps[j].size); j++) ; /* 打開/dev/uiox,把它的內(nèi)存映射到用戶空間 */ /* if matching map is found, then use it */ if (j != nb_maps) { offset = j * pagesz; if (maps[j].addr != NULL || (mapaddr = pci_map_resource(dev, NULL, devname, (off_t)offset, (size_t)maps[j].size)) == NULL) { return (-1); } maps[j].addr = mapaddr; maps[j].offset = offset; dev->mem_resource[i].addr = mapaddr; } }
/* uio_res加入uio_res_list鏈表 */ TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); return (0);}
回到pci驅(qū)動(dòng)的初始化rte_eth_dev_init
static intrte_eth_dev_init(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev){ struct eth_driver *eth_drv; struct rte_eth_dev *eth_dev; int diag; eth_drv = (struct eth_driver *)pci_drv; /* 分配或查找名為rte_eth_dev_data的memzone,并從全局?jǐn)?shù)組rte_eth_devices中返回當(dāng)前端口的entry */ eth_dev = rte_eth_dev_allocate(); if (eth_dev == NULL) return -ENOMEM; if (rte_eal_process_type() == RTE_PROC_PRIMARY){ /* 分配PMD驅(qū)動(dòng)的private內(nèi)存 */ eth_dev->data->dev_private = rte_zmalloc("ethdev private structure", eth_drv->dev_private_size, CACHE_LINE_SIZE); if (eth_dev->data->dev_private == NULL) rte_panic("Cannot allocate memzone for private port data/n"); } eth_dev->pci_dev = pci_dev; eth_dev->driver = eth_drv; eth_dev->data->rx_mbuf_alloc_failed = 0; /* init user callbacks */ TAILQ_INIT(&(eth_dev->callbacks)); /* * Set the default maximum frame size. */ eth_dev->data->max_frame_size = ETHER_MAX_LEN; /* 這次調(diào)用的是PMD驅(qū)動(dòng)的初始化, 當(dāng)前函數(shù)的上下文為PCI驅(qū)動(dòng)的初始化函數(shù) */ /* Invoke PMD device initialization function */ diag = (*eth_drv->eth_dev_init)(eth_drv, eth_dev); if (diag == 0) return (0); /* 初始化出錯(cuò),回收內(nèi)存,端口數(shù)修正 */ PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%u device_id=0x%x)" " failed/n", pci_drv->name, (unsigned) pci_dev->id.vendor_id, (unsigned) pci_dev->id.device_id); if (rte_eal_process_type() == RTE_PROC_PRIMARY) rte_free(eth_dev->data->dev_private); nb_ports--; return diag;}
PMD驅(qū)動(dòng)的初始化過(guò)程
static inteth_em_dev_init(__attribute__((unused)) struct eth_driver *eth_drv, struct rte_eth_dev *eth_dev){ struct rte_pci_device *pci_dev; struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); struct e1000_vfta * shadow_vfta = E1000_DEV_PRIVATE_TO_VFTA(eth_dev->data->dev_private); pci_dev = eth_dev->pci_dev; eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different * RX function */ if (rte_eal_process_type() != RTE_PROC_PRIMARY){ if (eth_dev->data->scattered_rx) eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_scattered_pkts; return 0; } hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; hw->device_id = pci_dev->id.device_id; /* For ICH8 support we'll need to map the Flash memory BAR */ if (e1000_setup_init_funcs(hw, TRUE) != E1000_SUCCESS || em_hw_init(hw) != 0) { PMD_INIT_LOG(ERR, "port_id %d vendorID=0x%x deviceID=0x%x: " "failed to init HW", eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); return -(ENODEV); } /* Allocate memory for storing MAC addresses */ eth_dev->data->mac_addrs = rte_zmalloc("e1000", ETHER_ADDR_LEN * hw->mac.rar_entry_count, 0); if (eth_dev->data->mac_addrs == NULL) { PMD_INIT_LOG(ERR, "Failed to allocate %d bytes needed to " "store MAC addresses", ETHER_ADDR_LEN * hw->mac.rar_entry_count); return -(ENOMEM); } /* Copy the permanent MAC address */ ether_addr_copy((struct ether_addr *) hw->mac.addr, eth_dev->data->mac_addrs); /* initialize the vfta */ memset(shadow_vfta, 0, sizeof(*shadow_vfta)); PMD_INIT_LOG(INFO, "port_id %d vendorID=0x%x deviceID=0x%x/n", eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); rte_intr_callback_register(&(pci_dev->intr_handle), eth_em_interrupt_handler, (void *)eth_dev); return (0);}
PMD驅(qū)動(dòng)初始化主要是一些硬件相關(guān)的寄存器初始化以及函數(shù)的初始化,細(xì)節(jié)就不再分析了;函數(shù)的最后注冊(cè)了一個(gè)中斷處理函數(shù),下面主要分析中斷處理的過(guò)程;
intrte_intr_callback_register(struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg){ int ret, wake_thread; struct rte_intr_source *src; struct rte_intr_callback *callback; wake_thread = 0; /* intr_handle.fd為pci內(nèi)存映射對(duì)應(yīng)/dev/uiox文件描述符 */ /* first do parameter checking */ if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) { RTE_LOG(ERR, EAL, "Registering with invalid input parameter/n"); return -EINVAL; } /* allocate a new interrupt callback entity */ callback = rte_zmalloc("interrupt callback list", sizeof(*callback), 0); if (callback == NULL) { RTE_LOG(ERR, EAL, "Can not allocate memory/n"); return -ENOMEM; } callback->cb_fn = cb; callback->cb_arg = cb_arg; rte_spinlock_lock(&intr_lock); /* check if there is at least one callback registered for the fd */ TAILQ_FOREACH(src, &intr_sources, next) { if (src->intr_handle.fd == intr_handle->fd) { /* we had no interrupts for this */ if TAILQ_EMPTY(&src->callbacks) wake_thread = 1; TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); ret = 0; break; } } /* no existing callbacks for this - add new source */ if (src == NULL) { if ((src = rte_zmalloc("interrupt source list", sizeof(*src), 0)) == NULL) { RTE_LOG(ERR, EAL, "Can not allocate memory/n"); rte_free(callback); ret = -ENOMEM; } else { src->intr_handle = *intr_handle; TAILQ_INIT(&src->callbacks); TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); TAILQ_INSERT_TAIL(&intr_sources, src, next); wake_thread = 1; ret = 0; } } rte_spinlock_unlock(&intr_lock); /* wake_thread=1會(huì)通知中斷處理線程有新的fd加入 */ /** * check if need to notify the pipe fd waited by epoll_wait to * rebuild the wait list. */ if (wake_thread) if (write(intr_pipe.writefd, "1", 1) < 0) return -EPIPE; return (ret);}
在rte_eal_init初始化過(guò)程中調(diào)用了rte_eal_intr_init, rte_eal_intr_init里面會(huì)初始化一個(gè)中斷處理線程
intrte_eal_intr_init(void){ int ret = 0; /* init the global interrupt source head */ TAILQ_INIT(&intr_sources); /** * create a pipe which will be waited by epoll and notified to * rebuild the wait list of epoll. */ if (pipe(intr_pipe.pipefd) < 0) return -1; /* 中斷處理線程,用于監(jiān)聽intr_sources中fd是否需要處理,并調(diào)用對(duì)應(yīng)回調(diào) */ /* create the host thread to wait/handle the interrupt */ ret = pthread_create(&intr_thread, NULL, eal_intr_thread_main, NULL); if (ret != 0) RTE_LOG(ERR, EAL, "Failed to create thread for interrupt handling/n"); return -ret;}
/** * It builds/rebuilds up the epoll file descriptor with all the * file descriptors being waited on. Then handles the interrupts. * * @param arg * pointer. (unused) * * @return * never return; */static __attribute__((noreturn)) void *eal_intr_thread_main(__rte_unused void *arg){ struct epoll_event ev; /* host thread, never break out */ for (;;) { /* build up the epoll fd with all descriptors we are to * wait on then pass it to the handle_interrupts function */ static struct epoll_event pipe_event = { .events = EPOLLIN | EPOLLPRI, }; struct rte_intr_source *src; unsigned numfds = 0; /* 創(chuàng)建epoll */ /* create epoll fd */ int pfd = epoll_create(1); if (pfd < 0) rte_panic("Cannot create epoll instance/n"); /* 如果有新的中斷處理函數(shù)注冊(cè),則會(huì)設(shè)置intr_pipe.readfd,本線程則會(huì)重新讀取intr_sources中所有中斷并加入epoll */ pipe_event.data.fd = intr_pipe.readfd; /** * add pipe fd into wait list, this pipe is used to * rebuild the wait list. */ if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd, &pipe_event) < 0) { rte_panic("Error adding fd to %d epoll_ctl, %s/n", intr_pipe.readfd, strerror(errno)); } numfds++; rte_spinlock_lock(&intr_lock); /* intr_sources中所有fd加入epoll */ TAILQ_FOREACH(src, &intr_sources, next) { if (src->callbacks.tqh_first == NULL) continue; /* skip those with no callbacks */ ev.events = EPOLLIN | EPOLLPRI; ev.data.fd = src->intr_handle.fd; /** * add all the uio device file descriptor * into wait list. */ if (epoll_ctl(pfd, EPOLL_CTL_ADD, src->intr_handle.fd, &ev) < 0){ rte_panic("Error adding fd %d epoll_ctl, %s/n", src->intr_handle.fd, strerror(errno)); } else numfds++; } rte_spinlock_unlock(&intr_lock); /* 等待fd事件,然后調(diào)用對(duì)應(yīng)callback */ /* serve the interrupt */ eal_intr_handle_interrupts(pfd, numfds); /** * when we return, we need to rebuild the * list of fds to monitor. */ close(pfd); }}
static inteal_intr_process_interrupts(struct epoll_event *events, int nfds){ int n, bytes_read; struct rte_intr_source *src; struct rte_intr_callback *cb; union rte_intr_read_buffer buf; struct rte_intr_callback active_cb; for (n = 0; n < nfds; n++) { /* 如果是intr_pipe.readfd,則表示需要重新建epoll的fd等待鏈表,本循環(huán)退出 */ /** * if the pipe fd is ready to read, return out to * rebuild the wait list. */ if (events[n].data.fd == intr_pipe.readfd){ int r = read(intr_pipe.readfd, buf.charbuf, sizeof(buf.charbuf)); RTE_SET_USED(r); return -1; } /* 中斷fd */ rte_spinlock_lock(&intr_lock); TAILQ_FOREACH(src, &intr_sources, next) if (src->intr_handle.fd == events[n].data.fd) break; if (src == NULL){ rte_spinlock_unlock(&intr_lock); continue; } /* mark this interrupt source as active and release the lock. */ src->active = 1; rte_spinlock_unlock(&intr_lock); /* EM的中斷只需要處理這兩個(gè) */ /* set the length to be read dor different handle type */ switch (src->intr_handle.type) { case RTE_INTR_HANDLE_UIO: bytes_read = 4; break; case RTE_INTR_HANDLE_ALARM: bytes_read = sizeof(uint64_t); break; default: bytes_read = 1; break; } /** * read out to clear the ready-to-be-read flag * for epoll_wait. */ bytes_read = read(events[n].data.fd, &buf, bytes_read); if (bytes_read < 0) RTE_LOG(ERR, EAL, "Error reading from file " "descriptor %d: %s/n", events[n].data.fd, strerror(errno)); else if (bytes_read == 0) RTE_LOG(ERR, EAL, "Read nothing from file " "descriptor %d/n", events[n].data.fd); /* callback調(diào)用 */ /* grab a lock, again to call callbacks and update status. */ rte_spinlock_lock(&intr_lock); if (bytes_read > 0) { /* Finally, call all callbacks. */ TAILQ_FOREACH(cb, &src->callbacks, next) { /* make a copy and unlock. */ active_cb = *cb; rte_spinlock_unlock(&intr_lock); /* call the actual callback */ active_cb.cb_fn(&src->intr_handle, active_cb.cb_arg); /*get the lcok back. */ rte_spinlock_lock(&intr_lock); } } /* we done with that interrupt source, release it. */ src->active = 0; rte_spinlock_unlock(&intr_lock); } return 0;}
對(duì)于E1000的驅(qū)動(dòng)注冊(cè)的callback eth_em_interrupt_handler里面處理了link狀態(tài)的回調(diào), link down消息則關(guān)閉收發(fā)包, link up開啟收發(fā)包;
static voideth_em_interrupt_handler(__rte_unused struct rte_intr_handle *handle, void *param){ struct rte_eth_dev *dev = (struct rte_eth_dev *)param; /* 讀取寄存器, 硬件狀態(tài)是否改變? */ eth_em_interrupt_get_status(dev); /* 根據(jù)link狀態(tài)設(shè)置對(duì)應(yīng)的收發(fā)包寄存器 */ eth_em_interrupt_action(dev); /* 調(diào)用用戶注冊(cè)的回調(diào), 如果用戶關(guān)心事件可以用rte_eth_dev_callback_register注冊(cè)相應(yīng)回調(diào) */ _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);}
后面還有收發(fā)包隊(duì)列的初始化, 待分析;
新聞熱點(diǎn)
疑難解答
圖片精選