diff options
author | David S. Miller <davem@davemloft.net> | 2013-04-25 01:22:53 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-04-25 01:22:53 -0400 |
commit | 660f7d22298ceb82facd2088c197f2de5cbfb7d6 (patch) | |
tree | 2413117b5a07e3632a392e4f03a76ebb37e9c157 | |
parent | 92dea7c06656f709a3957aacef20574ce3dbe6fc (diff) | |
parent | 2940b26bec9fe5bf183c994678e62b55d35717e6 (diff) | |
download | linux-660f7d22298ceb82facd2088c197f2de5cbfb7d6.tar.bz2 |
Merge branch 'af_packet-timestamp'
Daniel Borkmann says:
====================
This is a joint effort with Willem to bring optional i) tx hw/sw
timestamping into PF_PACKET, that was reported by Paul Chavent,
and ii) to expose the type of timestamp to the user, which is in
the current situation not possible to distinguish with the RX_RING
and TX_RING API (but distinguishable through the normal timestamping
API), reported by Richard Cochran. This set is based on top of
``packet: account statistics only in tpacket_stats_u''. Related
discussion can be found in: http://patchwork.ozlabs.org/patch/238125/
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/networking/packet_mmap.txt | 41 | ||||
-rw-r--r-- | include/uapi/linux/if_packet.h | 27 | ||||
-rw-r--r-- | net/core/skbuff.c | 12 | ||||
-rw-r--r-- | net/packet/af_packet.c | 87 |
4 files changed, 122 insertions, 45 deletions
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 65efb85e49de..23dd80e82b8e 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -1016,10 +1016,11 @@ retry_block: ------------------------------------------------------------------------------- The PACKET_TIMESTAMP setting determines the source of the timestamp in -the packet meta information. If your NIC is capable of timestamping -packets in hardware, you can request those hardware timestamps to used. -Note: you may need to enable the generation of hardware timestamps with -SIOCSHWTSTAMP. +the packet meta information for mmap(2)ed RX_RING and TX_RINGs. If your +NIC is capable of timestamping packets in hardware, you can request those +hardware timestamps to be used. Note: you may need to enable the generation +of hardware timestamps with SIOCSHWTSTAMP (see related information from +Documentation/networking/timestamping.txt). PACKET_TIMESTAMP accepts the same integer bit field as SO_TIMESTAMPING. However, only the SOF_TIMESTAMPING_SYS_HARDWARE @@ -1031,8 +1032,36 @@ SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set. req |= SOF_TIMESTAMPING_SYS_HARDWARE; setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req)) -If PACKET_TIMESTAMP is not set, a software timestamp generated inside -the networking stack is used (the behavior before this setting was added). +For the mmap(2)ed ring buffers, such timestamps are stored in the +tpacket{,2,3}_hdr structure's tp_sec and tp_{n,u}sec members. To determine +what kind of timestamp has been reported, the tp_status field is binary |'ed +with the following possible bits ... + + TP_STATUS_TS_SYS_HARDWARE + TP_STATUS_TS_RAW_HARDWARE + TP_STATUS_TS_SOFTWARE + +... that are equivalent to its SOF_TIMESTAMPING_* counterparts. For the +RX_RING, if none of those 3 are set (i.e. PACKET_TIMESTAMP is not set), +then this means that a software fallback was invoked *within* PF_PACKET's +processing code (less precise). + +Getting timestamps for the TX_RING works as follows: i) fill the ring frames, +ii) call sendto() e.g. in blocking mode, iii) wait for status of relevant +frames to be updated resp. the frame handed over to the application, iv) walk +through the frames to pick up the individual hw/sw timestamps. + +Only (!) if transmit timestamping is enabled, then these bits are combined +with binary | with TP_STATUS_AVAILABLE, so you must check for that in your +application (e.g. !(tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)) +in a first step to see if the frame belongs to the application, and then +one can extract the type of timestamp in a second step from tp_status)! + +If you don't care about them, thus having it disabled, checking for +TP_STATUS_AVAILABLE resp. TP_STATUS_WRONG_FORMAT is sufficient. If in the +TX_RING part only TP_STATUS_AVAILABLE is set, then the tp_sec and tp_{n,u}sec +members do not contain a valid value. For TX_RINGs, by default no timestamp +is generated! See include/linux/net_tstamp.h and Documentation/networking/timestamping for more information on hardware timestamps. diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 8136658ea477..b950c02030c0 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -86,19 +86,24 @@ struct tpacket_auxdata { }; /* Rx ring - header status */ -#define TP_STATUS_KERNEL 0x0 -#define TP_STATUS_USER 0x1 -#define TP_STATUS_COPY 0x2 -#define TP_STATUS_LOSING 0x4 -#define TP_STATUS_CSUMNOTREADY 0x8 -#define TP_STATUS_VLAN_VALID 0x10 /* auxdata has valid tp_vlan_tci */ -#define TP_STATUS_BLK_TMO 0x20 +#define TP_STATUS_KERNEL 0 +#define TP_STATUS_USER (1 << 0) +#define TP_STATUS_COPY (1 << 1) +#define TP_STATUS_LOSING (1 << 2) +#define TP_STATUS_CSUMNOTREADY (1 << 3) +#define TP_STATUS_VLAN_VALID (1 << 4) /* auxdata has valid tp_vlan_tci */ +#define TP_STATUS_BLK_TMO (1 << 5) /* Tx ring - header status */ -#define TP_STATUS_AVAILABLE 0x0 -#define TP_STATUS_SEND_REQUEST 0x1 -#define TP_STATUS_SENDING 0x2 -#define TP_STATUS_WRONG_FORMAT 0x4 +#define TP_STATUS_AVAILABLE 0 +#define TP_STATUS_SEND_REQUEST (1 << 0) +#define TP_STATUS_SENDING (1 << 1) +#define TP_STATUS_WRONG_FORMAT (1 << 2) + +/* Rx and Tx ring - header status */ +#define TP_STATUS_TS_SOFTWARE (1 << 29) +#define TP_STATUS_TS_SYS_HARDWARE (1 << 30) +#define TP_STATUS_TS_RAW_HARDWARE (1 << 31) /* Rx ring - feature request bits */ #define TP_FT_REQ_FILL_RXHASH 0x1 diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 898cf5c566f9..af9185d0be6a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3327,12 +3327,8 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, if (!sk) return; - skb = skb_clone(orig_skb, GFP_ATOMIC); - if (!skb) - return; - if (hwtstamps) { - *skb_hwtstamps(skb) = + *skb_hwtstamps(orig_skb) = *hwtstamps; } else { /* @@ -3340,9 +3336,13 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, * so keep the shared tx_flags and only * store software time stamp */ - skb->tstamp = ktime_get_real(); + orig_skb->tstamp = ktime_get_real(); } + skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!skb) + return; + serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7e387ff64465..ba8309a3e01b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -339,6 +339,59 @@ static int __packet_get_status(struct packet_sock *po, void *frame) } } +static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, + unsigned int flags) +{ + struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); + + if (shhwtstamps) { + if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) && + ktime_to_timespec_cond(shhwtstamps->syststamp, ts)) + return TP_STATUS_TS_SYS_HARDWARE; + if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) && + ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) + return TP_STATUS_TS_RAW_HARDWARE; + } + + if (ktime_to_timespec_cond(skb->tstamp, ts)) + return TP_STATUS_TS_SOFTWARE; + + return 0; +} + +static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, + struct sk_buff *skb) +{ + union tpacket_uhdr h; + struct timespec ts; + __u32 ts_status; + + if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) + return 0; + + h.raw = frame; + switch (po->tp_version) { + case TPACKET_V1: + h.h1->tp_sec = ts.tv_sec; + h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; + break; + case TPACKET_V2: + h.h2->tp_sec = ts.tv_sec; + h.h2->tp_nsec = ts.tv_nsec; + break; + case TPACKET_V3: + default: + WARN(1, "TPACKET version not supported.\n"); + BUG(); + } + + /* one flush is safe, as both fields always lie on the same cacheline */ + flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); + smp_wmb(); + + return ts_status; +} + static void *packet_lookup_frame(struct packet_sock *po, struct packet_ring_buffer *rb, unsigned int position, @@ -1657,26 +1710,6 @@ drop: return 0; } -static void tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, - unsigned int flags) -{ - struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); - - if (shhwtstamps) { - if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->syststamp, ts)) - return; - if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) - return; - } - - if (ktime_to_timespec_cond(skb->tstamp, ts)) - return; - - getnstimeofday(ts); -} - static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { @@ -1691,6 +1724,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, unsigned short macoff, netoff, hdrlen; struct sk_buff *copy_skb = NULL; struct timespec ts; + __u32 ts_status; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -1773,7 +1807,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, spin_unlock(&sk->sk_receive_queue.lock); skb_copy_bits(skb, 0, h.raw + macoff, snaplen); - tpacket_get_timestamp(skb, &ts, po->tp_tstamp); + + if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) + getnstimeofday(&ts); + + status |= ts_status; switch (po->tp_version) { case TPACKET_V1: @@ -1874,10 +1912,14 @@ static void tpacket_destruct_skb(struct sk_buff *skb) void *ph; if (likely(po->tx_ring.pg_vec)) { + __u32 ts; + ph = skb_shinfo(skb)->destructor_arg; BUG_ON(atomic_read(&po->tx_ring.pending) == 0); atomic_dec(&po->tx_ring.pending); - __packet_set_status(po, ph, TP_STATUS_AVAILABLE); + + ts = __packet_set_timestamp(po, ph, skb); + __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); } sock_wfree(skb); @@ -1900,6 +1942,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; + sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags); skb_shinfo(skb)->destructor_arg = ph.raw; switch (po->tp_version) { |