1、 local_irq_save(flags); queue = &_get_cpu_var (softnet_data );/获取当前cpu的 softnet_data 数据 _get_cpu_var(netdev_rx_stat ).total +;/当前cpu接收的帧数+1 if (queue-input_pkt_queue .qlen input_pkt_queue, skb); local_irq_restore(flags); return net_rx_success; /当队列是空的时候,表明这个队列并没有被软中断所schedule,因此我们需要将此队列加入到软中断的处理链表中。
2、可以看到加入的正好是backlog,由于调用netif_rx的是非napi的驱动,因此backlog就是初始化时的process_backlog函数。 napi_schedule(&backlog); goto enqueue; _get_cpu_var(netdev_rx_stat ).dropped +; kfree_skb(skb);/ 上面代码中用到一个关键的数据结构 softnet_data ,在网卡收发数据的时候,需要维护一个缓冲区队列,来缓存可能存在的突发数据,在协议栈中用一个队列层来表示该缓冲区,队列层位于数据链路层和网络层之间。softnet_data 就是数据链路层中的数据
3、结构,它是一个per-cpu变量,每个cpu都有一个/* * netif_receive_skb - process receive buffer from network buffer to process * netif_receive_skb() is the main receive data processing function. * it always succeeds. the buffer may be dropped during processing * for congestion control or by the protocol layers. * this fu
4、nction may only be called from softirq context and interrupts * should be enabled. * return values (usually ignored): * net_rx_success: no congestion * net_rx_drop: packet was dropped/netif_receive_skb 是对于 netif_rx 的 napi 对等函数; 它递交一个报文给内核. 当一个 napi 兼容的驱动已耗尽接收报 文的供应, 它应当重开中断, 并且调用 netif_rx_complete(现
5、在是 _napi_complete() 来停止轮询.int netif_receive_skb( struct sk_buff * skb) struct packet_type * ptype, *pt_prev ; struct net_device * orig_dev; struct net_device * master; struct net_device * null_or_orig; struct net_device * null_or_bond; int ret = net_rx_drop; _be16 type;tstamp .tv64 ) if (vlan_tx_tag
6、_present (skb ) & vlan_hwaccel_do_receive(skb) /* if weve gotten here through napi, check netpoll */ if (netpoll_receive_skb (skb )skb_iif ) skb-skb_iif = skb -dev- ifindex;/ 记录帧的入口 null_or_orig = null; orig_dev = skb-dev; master = access_once (orig_dev -master); if (master) if (skb_bond_should_drop
7、 (skb , master ) null_or_orig = orig_dev ; /* deliver only exact match */ elsedev = master ; skb_reset_network_header(skb); skb_reset_transport_header(skb);mac_len = skb -network_header - skb-mac_header ; pt_prev = null; rcu_read_lock();#ifdef config_net_cls_act if (skb-tc_verd & tc_ncls) tc_verd =
8、clr_tc_ncls( skb-tc_verd ); goto ncls;#endif /处理 ptype_all 上所有的 packet_type-func() ,这里先提一下linux 是根据packet_type 通过 dev_add_pack() 函数来注册相应的处理函数,后面会讲如何注册,每种包对应哪个处理函数 / static struct list_head ptype_all _read_mostly; list_for_each_entry_rcu(ptype, &ptype_all , list ) if (ptype-dev = null_or_orig | ptype
9、-dev = skb- dev | ptype-dev = orig_dev) if (pt_prev) ret = deliver_skb (skb , pt_prev , orig_dev );/调用相应的包处理函数 pt_prev = ptype; skb = handle_ing (skb , &pt_prev , &ret , orig_dev );skb) goto out;ncls: /若编译内核时选上bridge,下面会执行网桥模块 skb = handle_bridge (skb , & /编译内核时选上mac_vlan模块,下面才会执行 skb = handle_macvl
10、an (skb , & * make sure frames received on vlan interfaces stacked on * bonding interfaces still make their way to any base bonding * device that may have registered for a specific ptype. the * handler may have to adjust skb-dev and orig_dev. null_or_bond = null; if (skb-priv_flags & iff_802_1q_vlan
11、) & (vlan_dev_real_dev( skb-dev)- iff_bonding) null_or_bond = vlan_dev_real_dev (skb -dev); /最后 type = skb-protocol; &ptype_basentohs(type)&15处理ptype_basentohs(type)&15上的所有的 packet_type-func(),根据第二层不同协议来进入不同的钩子函数,重要的有:ip_rcv(), arp_rcv() type = skb-protocol ; list_for_each_entry_rcu(ptype,ptype_base
12、ntohs (type ) & ptype_hash_mask, list) type = type & (ptype -dev = null_or_orig | dev | ptype-dev = orig_dev |dev = null_or_bond) if (pt_prev) ret = pt_prev -func( skb, skb-dev, pt_prev , orig_dev ); else /* jamal, now you will not able to escape explaining * me how you were going to use this. :-) r
13、et = net_rx_drop ;out: rcu_read_unlock(); return ret; * dev_queue_xmit - transmit a buffer buffer to transmit * queue a buffer for transmission to a network device. the caller must * have set the device and priority and built the buffer before calling * this function. the function can be called from
14、 an interrupt. * a negative errno code is returned on a failure. a success does not * guarantee the frame will be transmitted as it may be dropped due * to congestion or traffic shaping. * - * i notice this method can also return errors from the queue disciplines, * including net_xmit_drop, which is
15、 a positive value. so, errors can also * be positive. * regardless of the return value, the skb is consumed, so it is currently * difficult to retry a send to this method. (you can bump the ref count * before sending to hold a reference for retry if you are careful.) * when calling this method, inte
16、rrupts must be enabled. this is because * the bh enable code must have irqs enabled so that it will not deadlock. * -blgint dev_queue_xmit( struct sk_buff * skb) struct net_device * dev = skb- struct netdev_queue * txq; struct qdisc * q; int rc = - enomem; /* gso will handle the following emulations
17、 directly. */ if (netif_needs_gso (dev , skb )/如果是gso数据包,且设备支持gso数据包的处理 goto gso; /* convert a paged skb to linear, if required */ if (skb_needs_linearize (skb , dev ) & _skb_linearize(skb) goto out_kfree_skb; /* if packet is not checksummed and device does not support * checksumming for this protoc
18、ol, complete checksumming here.ip_summed = checksum_partial) skb_set_transport_header(skb, skb-csum_start - skb_headroom(skb);dev_can_checksum (dev , skb ) & skb_checksum_help(skb)gso: /* disable soft irqs for various locks below. also * stops preemption for rcu. rcu_read_lock_bh(); txq = dev_pick_t
19、x (dev , skb ); q = rcu_dereference_bh(txq-qdisc );tc_verd = set_tc_at( skb-tc_verd , at_egress ); if (q-enqueue ) rc = _dev_xmit_skb (skb , q , dev , txq ); /* the device has no queue. common case for software devices: loopback, all the sorts of tunnels. really, it is unlikely that netif_tx_lock pr
20、otection is necessary here. (f.e. loopback and ip tunnels are clean ignoring statistics counters.) however, it is possible, that they rely on protection made by us here. check this and shot the lock. it is not prone from deadlocks. either shot noqueue qdisc, it is even simpler 8) if (dev-flags & iff
21、_up) int cpu = smp_processor_id(); /* ok because bhs are off */ if (txq-xmit_lock_owner != cpu) hard_tx_lock(dev, txq, cpu);netif_tx_queue_stopped (txq ) rc = dev_hard_start_xmit (skb , dev , txq ); if (dev_xmit_complete (rc ) hard_tx_unlock(dev, txq); if (net_ratelimit () printk(kern_crit virtual d
22、evice %s asks to queue packet!n , dev -name); /* recursion is detected! it is possible, * unfortunately */dead loop on virtual device %s, fix it urgently! rc = -enetdown ; rcu_read_unlock_bh();out_kfree_skb: return rc;数据链路层不得不谈到 struct net_device 相关结构,在2.6.29之后 net_device 结构进行了调整,操作函数被重构到了 net_devic
23、e_ops 中。下面简要分析一下:struct net_device /*this first field, name, is the beginning of the visible part of this structure. it contains the string that is the name of the interface. by visible, we mean that this part of the data structure is generic and doesnt contain any private areas specific to a partic
24、ular type of device.*/ char nameifnamsiz ; /* device name hash chain */ struct hlist_node name_hlist; /* snmp alias */ char *ifalias ; * i/o specific fields * fixme: merge these and struct ifmap into one unsigned long mem_end; /* shared mem end */ unsigned long mem_start; /* shared mem start */ unsigned long base_addr; /* device i/o address */
copyright@ 2008-2023 冰点文库 网站版权所有
经营许可证编号:鄂ICP备19020893号-2