本文共 4138 字,大约阅读时间需要 13 分钟。
上面的原型是epoll的fd所维护的主结构,下面是每一个具体的fd结构.
以后每一个fd加入到epoll中,就会创建一个struct epitem结构,并插入至红黑树中。
接着是epoll_ctl函数原型:
- asmlinkage long sys_epoll_ctl(int epfd,int op,int fd,struct epoll_event __user *event)
- {
- int error;
- struct file *file,*tfile;
- struct eventpoll *ep;
- struct epoll_event epds;
-
- error = -FAULT;
- //判断行参的合法性
- if(ep_op_has_event(op) && copy_from_user(&epds,event,sizeof(struct epoll_event)))
- goto error_return;
-
- error = -EBADF;
- file = fget (epfd);
- if(!file) goto error_return;
-
- tfile = fget(fd);
- if(!tfile) goto error_fput;
-
- error = -EPERM;
- //不能没有poll驱动
- if(!tfile->f_op || !tfile->f_op->poll)
- goto error_tgt_fput;
-
- error =-EINVAL;
- //防止自己监听自己
- if(file == tfile || !is_file_poll(file))
- goto error_tgt_fput;
- //在create时存入进去的,现在将其拿出来
- ep = file->private->data;
-
- mutex_lock(&ep->mtx);
- //防止重复添加
- epi = epi_find(ep,tfile,fd);
- error = -EINVAL;
-
- switch(op)
- {
- ….....
- case EPOLL_CTL_ADD:
- if(!epi)
- {
- epds.events |=EPOLLERR | POLLHUP;
- error = ep_insert(ep,&epds,tfile,fd);
- } else
- error = -EEXIST;
- break;
- …....
- }
- return error;
- }
下面就是插入代码:
- static int ep_insert(struct eventpoll *ep,struct epoll_event *event,
- struct file *tfile,int fd)
- {
- int error ,revents,pwake = 0;
- unsigned long flags ;
- struct epitem *epi;
- /*
- struct ep_queue{
- poll_table pt;
- struct epitem *epi;
- }
- */
- struct ep_pqueue epq;
-
- //分配一个epitem结构体来保存每个加入的fd
- error = -ENOMEM;
- if(!(epi = kmem_cache_alloc(epi_cache,GFP_KERNEL)))
- goto error_return;
- //初始化该结构体
- ep_rb_initnode(&epi->rbn);
- INIT_LIST_HEAD(&epi->rdllink);
- INIT_LIST_HEAD(&epi->fllink);
- INIT_LIST_HEAD(&epi->pwqlist);
- epi->ep = ep;
- ep_set_ffd(&epi->ffd,tfile,fd);
- epi->event = *event;
- epi->nwait = 0;
- epi->next = EP_UNACTIVE_PTR;
-
- epq.epi = epi;
- //安装poll回调函数
- init_poll_funcptr(&epq.pt,ep_ptable_queue_proc);
- //调用poll函数来获取当前事件位,其实是利用它来调用注册函数ep_ptable_queue_proc
- revents = tfile->f_op->poll(tfile,&epq.pt);
-
- if(epi->nwait < 0)
- goto error_unregister;
-
- spin_lock(&tfile->f_ep_lock);
- list_add_tail(&epi->fllink,&tfile->f_ep_lilnks);
- spin_unlock(&tfile->f_ep_lock);
-
- ep_rbtree_insert(ep,epi);
- spin_lock_irqsave(&ep->lock,flags);
-
- if((revents & event->events) && !ep_is_linked(&epi->rdllink))
- {
- list_add_tail(&epi->rdllink,&ep->rdllist);
- if(waitqueue_active(&ep->wq))
- __wake_up_locked(&ep->wq,TAKS_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);
-
- if(waitqueue_active(&ep->poll_wait))
- pwake++;
- }
-
- spin_unlock_irqrestore(&ep->lock,flags);
- if(pwake)
- ep_poll_safewake(&psw,&ep->poll_wait);
- …....
-
- return 0;
-
- …...
-
- }
- //当poll醒来时就回调用该函数
- static void ep_ptable_queue_proc(struct file *file,wait_queue_head_t *whead,
- poll_table *pt)
- {
- //从注册时的结构中struct ep_pqueue中获取项epi
- struct epitem *epi = ep_item_from_epqueue(pt);
- /*//epitem的私有项,通过pwqlist来进行链接
- *struct eppoll_entry
- {
- struct list_head llink;
- void *base;
- wait_queue_t wait;
- wait_queue_head_t *whead;
- }
- */
- struct eppoll_entry *pwq;//struct epitem的私有项,为每一个fd保存内核poll
-
- //为每一个等待的结构分配一项
- if(epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache,
- GFP_KERNEL)))
- {
- //醒来就调用ep_poll_callback,这里才是真正意义上的poll醒来时的回调函数
- init_waitqueue_func_entry(&pwq->wait,ep_poll_callback);
- pwq->whead = whead;
- pwq->base = epi;
- //加入到该驱动的等待队列
- add_wait_queue(whead,&pwq->wait);
- //将等待链接也放入到epitem链表中去
- list_add_tail(&pwq->llink,&epi->pwqlist);
- epi->nwait ++;
- } else {
- epi->nwait = -1;
- }
- }
- //当poll监听的事件到达时,就会调用下面的函数
- static int ep_poll_callback(wait_queue_t *wait,unsigned mode,int sync,void *key)
- {
- int pwake = 0;
- unsigned long flags;
- struct epitem *epi = ep_item_from_wait(wait);
- struct eventpoll *ep = epi->ep;
-
- spin_lock_irqsave(&ep->lock,flags);
- //判断注册的感兴趣事件
- //#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
- //有非EPOLLONESHONT或EPOLLET事件
- if(!(epi->event.events & ~EP_PRIVATE_BITS))
- goto out_unlock;
-
- if(unlikely(ep->ovflist != EP_UNACTIVE_PTR))
- {
- if(epi->next == EP_UNACTIVE_PTR) {
- epi->next = ep->ovflist;
- ep->ovflist = epi;
- }
- goto out_unlock;
- }
-
- if(ep_is_linked(&epi->rdllink))
- goto is_linked;
- //关键是这一句,将该fd加入到epoll监听的就绪链表中
- list_add_tail(&epi->rdllink,&ep->rdllist);
- is_linked:
- if(waitqueue_active(&ep->wq))
- __wake_up_locked(&ep->wq,TASK_UNINTERRUPTIBLE
- | TASK_INTERRUPTIBLE);
- if(waitqueue_active(&ep->poll_wait))
- pwake++;
- out_unlock:
- spin_unlock_irqrestore(&ep->lock,flags);
-
- if(pwake)
- ep_poll_safewake(&psw,&ep->poll_wait);
- return 1;
- }
这里采用了两级回调方式,流程如下:
目前为止,整个数据结构就可以描述如下:
转载地址:http://wzafb.baihongyu.com/