diff options
Diffstat (limited to 'Documentation')
20 files changed, 317 insertions, 74 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net index 416c5d59f52e..d322b0581194 100644 --- a/Documentation/ABI/testing/sysfs-class-net +++ b/Documentation/ABI/testing/sysfs-class-net @@ -1,3 +1,14 @@ +What: /sys/class/net/<iface>/name_assign_type +Date: July 2014 +KernelVersion: 3.17 +Contact: netdev@vger.kernel.org +Description: + Indicates the name assignment type. Possible values are: + 1: enumerated by the kernel, possibly in an unpredictable way + 2: predictably named by the kernel + 3: named by userspace + 4: renamed + What: /sys/class/net/<iface>/addr_assign_type Date: July 2010 KernelVersion: 3.2 diff --git a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt index d01ed63d3ebb..42409bfe04c4 100644 --- a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt +++ b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt @@ -8,10 +8,16 @@ Required properties: - SerDes integration registers (1/2) - SerDes integration registers (2/2) +Optional properties: +- amd,speed-set: Speed capabilities of the device + 0 - 1GbE and 10GbE (default) + 1 - 2.5GbE and 10GbE + Example: xgbe_phy@e1240800 { compatible = "amd,xgbe-phy-seattle-v1a", "ethernet-phy-ieee802.3-c45"; reg = <0 0xe1240800 0 0x00400>, <0 0xe1250000 0 0x00060>, <0 0xe1250080 0 0x00004>; + amd,speed-set = <0>; }; diff --git a/Documentation/devicetree/bindings/net/amd-xgbe.txt b/Documentation/devicetree/bindings/net/amd-xgbe.txt index ea0c7908a3b8..41354f730beb 100644 --- a/Documentation/devicetree/bindings/net/amd-xgbe.txt +++ b/Documentation/devicetree/bindings/net/amd-xgbe.txt @@ -8,16 +8,21 @@ Required properties: - interrupt-parent: Should be the phandle for the interrupt controller that services interrupts for this device - interrupts: Should contain the amd-xgbe interrupt -- clocks: Should be the DMA clock for the amd-xgbe device (used for - calculating the correct Rx interrupt watchdog timer value on a DMA - channel for coalescing) -- clock-names: Should be the name of the DMA clock, "dma_clk" +- clocks: + - DMA clock for the amd-xgbe device (used for calculating the + correct Rx interrupt watchdog timer value on a DMA channel + for coalescing) + - PTP clock for the amd-xgbe device +- clock-names: Should be the names of the clocks + - "dma_clk" for the DMA clock + - "ptp_clk" for the PTP clock - phy-handle: See ethernet.txt file in the same directory - phy-mode: See ethernet.txt file in the same directory Optional properties: - mac-address: mac address to be assigned to the device. Can be overridden by UEFI. +- dma-coherent: Present if dma operations are coherent Example: xgbe@e0700000 { @@ -26,8 +31,8 @@ Example: <0 0xe0780000 0 0x80000>; interrupt-parent = <&gic>; interrupts = <0 325 4>; - clocks = <&xgbe_clk>; - clock-names = "dma_clk"; + clocks = <&xgbe_dma_clk>, <&xgbe_ptp_clk>; + clock-names = "dma_clk", "ptp_clk"; phy-handle = <&phy>; phy-mode = "xgmii"; mac-address = [ 02 a1 a2 a3 a4 a5 ]; diff --git a/Documentation/devicetree/bindings/net/broadcom-systemport.txt b/Documentation/devicetree/bindings/net/broadcom-systemport.txt index c183ea90d9bc..aa7ad622259d 100644 --- a/Documentation/devicetree/bindings/net/broadcom-systemport.txt +++ b/Documentation/devicetree/bindings/net/broadcom-systemport.txt @@ -4,7 +4,8 @@ Required properties: - compatible: should be one of "brcm,systemport-v1.00" or "brcm,systemport" - reg: address and length of the register set for the device. - interrupts: interrupts for the device, first cell must be for the the rx - interrupts, and the second cell should be for the transmit queues + interrupts, and the second cell should be for the transmit queues. An + optional third interrupt cell for Wake-on-LAN can be specified - local-mac-address: Ethernet MAC address (48 bits) of this adapter - phy-mode: Should be a string describing the PHY interface to the Ethernet switch/PHY, see Documentation/devicetree/bindings/net/ethernet.txt diff --git a/Documentation/devicetree/bindings/net/davinci-mdio.txt b/Documentation/devicetree/bindings/net/davinci-mdio.txt index 72efaaf764f7..0369e25aabd2 100644 --- a/Documentation/devicetree/bindings/net/davinci-mdio.txt +++ b/Documentation/devicetree/bindings/net/davinci-mdio.txt @@ -1,8 +1,8 @@ -TI SoC Davinci MDIO Controller Device Tree Bindings +TI SoC Davinci/Keystone2 MDIO Controller Device Tree Bindings --------------------------------------------------- Required properties: -- compatible : Should be "ti,davinci_mdio" +- compatible : Should be "ti,davinci_mdio" or "ti,keystone_mdio" - reg : physical base address and size of the davinci mdio registers map - bus_freq : Mdio Bus frequency @@ -19,7 +19,7 @@ file. Examples: mdio: davinci_mdio@4A101000 { - compatible = "ti,cpsw"; + compatible = "ti,davinci_mdio"; reg = <0x4A101000 0x1000>; bus_freq = <1000000>; }; @@ -27,7 +27,7 @@ Examples: (or) mdio: davinci_mdio@4A101000 { - compatible = "ti,cpsw"; + compatible = "ti,davinci_mdio"; ti,hwmods = "davinci_mdio"; bus_freq = <1000000>; }; diff --git a/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt b/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt new file mode 100644 index 000000000000..0071883c08d8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt @@ -0,0 +1,29 @@ +*CC2520 IEEE 802.15.4 Compatible Radio* + +Required properties: + - compatible: should be "ti,cc2520" + - spi-max-frequency: maximal bus speed (8000000), should be set to 4000000 depends + sync or async operation mode + - reg: the chipselect index + - pinctrl-0: pin control group to be used for this controller. + - pinctrl-names: must contain a "default" entry. + - fifo-gpio: GPIO spec for the FIFO pin + - fifop-gpio: GPIO spec for the FIFOP pin + - sfd-gpio: GPIO spec for the SFD pin + - cca-gpio: GPIO spec for the CCA pin + - vreg-gpio: GPIO spec for the VREG pin + - reset-gpio: GPIO spec for the RESET pin +Example: + cc2520@0 { + compatible = "ti,cc2520"; + reg = <0>; + spi-max-frequency = <4000000>; + pinctrl-names = "default"; + pinctrl-0 = <&cc2520_cape_pins>; + fifo-gpio = <&gpio1 18 0>; + fifop-gpio = <&gpio1 19 0>; + sfd-gpio = <&gpio1 13 0>; + cca-gpio = <&gpio1 16 0>; + vreg-gpio = <&gpio0 31 0>; + reset-gpio = <&gpio1 12 0>; + }; diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt new file mode 100644 index 000000000000..aa4f4230bfd7 --- /dev/null +++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt @@ -0,0 +1,61 @@ +* Marvell Armada 375 Ethernet Controller (PPv2) + +Required properties: + +- compatible: should be "marvell,armada-375-pp2" +- reg: addresses and length of the register sets for the device. + Must contain the following register sets: + - common controller registers + - LMS registers + In addition, at least one port register set is required. +- clocks: a pointer to the reference clocks for this device, consequently: + - main controller clock + - GOP clock +- clock-names: names of used clocks, must be "pp_clk" and "gop_clk". + +The ethernet ports are represented by subnodes. At least one port is +required. + +Required properties (port): + +- interrupts: interrupt for the port +- port-id: should be '0' or '1' for ethernet ports, and '2' for the + loopback port +- phy-mode: See ethernet.txt file in the same directory + +Optional properties (port): + +- marvell,loopback: port is loopback mode +- phy: a phandle to a phy node defining the PHY address (as the reg + property, a single integer). Note: if this property isn't present, + then fixed link is assumed, and the 'fixed-link' property is + mandatory. + +Example: + +ethernet@f0000 { + compatible = "marvell,armada-375-pp2"; + reg = <0xf0000 0xa000>, + <0xc0000 0x3060>, + <0xc4000 0x100>, + <0xc5000 0x100>; + clocks = <&gateclk 3>, <&gateclk 19>; + clock-names = "pp_clk", "gop_clk"; + status = "okay"; + + eth0: eth0@c4000 { + interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>; + port-id = <0>; + status = "okay"; + phy = <&phy0>; + phy-mode = "gmii"; + }; + + eth1: eth1@c5000 { + interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>; + port-id = <1>; + status = "okay"; + phy = <&phy3>; + phy-mode = "gmii"; + }; +}; diff --git a/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt b/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt new file mode 100644 index 000000000000..3b58ae480344 --- /dev/null +++ b/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt @@ -0,0 +1,33 @@ +* STMicroelectronics SAS. ST21NFCB NFC Controller + +Required properties: +- compatible: Should be "st,st21nfcb_i2c". +- clock-frequency: I²C work frequency. +- reg: address on the bus +- interrupt-parent: phandle for the interrupt gpio controller +- interrupts: GPIO interrupt to which the chip is connected +- reset-gpios: Output GPIO pin used to reset the ST21NFCB + +Optional SoC Specific Properties: +- pinctrl-names: Contains only one value - "default". +- pintctrl-0: Specifies the pin control groups used for this controller. + +Example (for ARM-based BeagleBoard xM with ST21NFCB on I2C2): + +&i2c2 { + + status = "okay"; + + st21nfcb: st21nfcb@8 { + + compatible = "st,st21nfcb_i2c"; + + reg = <0x08>; + clock-frequency = <400000>; + + interrupt-parent = <&gpio5>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; + + reset-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>; + }; +}; diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt index e7106b50dbdc..34d4db1a4e25 100644 --- a/Documentation/devicetree/bindings/net/sh_eth.txt +++ b/Documentation/devicetree/bindings/net/sh_eth.txt @@ -9,6 +9,7 @@ Required properties: "renesas,ether-r8a7779" if the device is a part of R8A7779 SoC. "renesas,ether-r8a7790" if the device is a part of R8A7790 SoC. "renesas,ether-r8a7791" if the device is a part of R8A7791 SoC. + "renesas,ether-r8a7794" if the device is a part of R8A7794 SoC. "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC. - reg: offset and length of (1) the E-DMAC/feLic register block (required), (2) the TSU register block (optional). diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt index a2acd2b26baf..9b03c57563a4 100644 --- a/Documentation/devicetree/bindings/net/stmmac.txt +++ b/Documentation/devicetree/bindings/net/stmmac.txt @@ -25,6 +25,10 @@ Required properties: - snps,force_sf_dma_mode Force DMA to use the Store and Forward mode for both tx and rx. This flag is ignored if force_thresh_dma_mode is set. +- snps,multicast-filter-bins: Number of multicast filter hash bins + supported by this device instance +- snps,perfect-filter-entries: Number of perfect filter entries supported + by this device instance Optional properties: - resets: Should contain a phandle to the STMMAC reset signal, if any @@ -47,6 +51,8 @@ Examples: mac-address = [000000000000]; /* Filled in by U-Boot */ max-frame-size = <3800>; phy-mode = "gmii"; + snps,multicast-filter-bins = <256>; + snps,perfect-filter-entries = <128>; clocks = <&clock>; clock-names = "stmmaceth">; }; diff --git a/Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt b/Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt new file mode 100644 index 000000000000..5dbf169cd81c --- /dev/null +++ b/Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt @@ -0,0 +1,41 @@ +Broadcom BCM43xx Fullmac wireless SDIO devices + +This node provides properties for controlling the Broadcom wireless device. The +node is expected to be specified as a child node to the SDIO controller that +connects the device to the system. + +Required properties: + + - compatible : Should be "brcm,bcm4329-fmac". + +Optional properties: + - brcm,drive-strength : drive strength used for SDIO pins on device in mA + (default = 6). + - interrupt-parent : the phandle for the interrupt controller to which the + device interrupts are connected. + - interrupts : specifies attributes for the out-of-band interrupt (host-wake). + When not specified the device will use in-band SDIO interrupts. + - interrupt-names : name of the out-of-band interrupt, which must be set + to "host-wake". + +Example: + +mmc3: mmc@01c12000 { + #address-cells = <1>; + #size-cells = <0>; + + pinctrl-names = "default"; + pinctrl-0 = <&mmc3_pins_a>; + vmmc-supply = <®_vmmc3>; + bus-width = <4>; + non-removable; + status = "okay"; + + brcmf: bcrmf@1 { + reg = <1>; + compatible = "brcm,bcm4329-fmac"; + interrupt-parent = <&pio>; + interrupts = <10 8>; /* PH10 / EINT10 */ + interrupt-names = "host-wake"; + }; +}; diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 9c723ecd0025..eeb5b2e97bed 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -542,10 +542,10 @@ mode XOR policy: Transmit based on the selected transmit hash policy. The default policy is a simple [(source - MAC address XOR'd with destination MAC address) modulo - slave count]. Alternate transmit policies may be - selected via the xmit_hash_policy option, described - below. + MAC address XOR'd with destination MAC address XOR + packet type ID) modulo slave count]. Alternate transmit + policies may be selected via the xmit_hash_policy option, + described below. This mode provides load balancing and fault tolerance. @@ -801,10 +801,11 @@ xmit_hash_policy layer2 - Uses XOR of hardware MAC addresses to generate the - hash. The formula is + Uses XOR of hardware MAC addresses and packet type ID + field to generate the hash. The formula is - (source MAC XOR destination MAC) modulo slave count + hash = source MAC XOR destination MAC XOR packet type ID + slave number = hash modulo slave count This algorithm will place all traffic to a particular network peer on the same slave. @@ -819,7 +820,7 @@ xmit_hash_policy Uses XOR of hardware MAC addresses and IP addresses to generate the hash. The formula is - hash = source MAC XOR destination MAC + hash = source MAC XOR destination MAC XOR packet type ID hash = hash XOR source IP XOR destination IP hash = hash XOR (hash RSHIFT 16) hash = hash XOR (hash RSHIFT 8) @@ -2301,13 +2302,13 @@ broadcast: Like active-backup, there is not much advantage to this bandwidth. Additionally, the linux bonding 802.3ad implementation - distributes traffic by peer (using an XOR of MAC addresses), - so in a "gatewayed" configuration, all outgoing traffic will - generally use the same device. Incoming traffic may also end - up on a single device, but that is dependent upon the - balancing policy of the peer's 8023.ad implementation. In a - "local" configuration, traffic will be distributed across the - devices in the bond. + distributes traffic by peer (using an XOR of MAC addresses + and packet type ID), so in a "gatewayed" configuration, all + outgoing traffic will generally use the same device. Incoming + traffic may also end up on a single device, but that is + dependent upon the balancing policy of the peer's 8023.ad + implementation. In a "local" configuration, traffic will be + distributed across the devices in the bond. Finally, the 802.3ad mode mandates the use of the MII monitor, therefore, the ARP monitor is not available in this mode. diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index ee78eba78a9d..c48a9704bda8 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -586,12 +586,12 @@ team driver's classifier for its load-balancing mode, netfilter's xt_bpf extension, PTP dissector/classifier, and much more. They are all internally converted by the kernel into the new instruction set representation and run in the eBPF interpreter. For in-kernel handlers, this all works transparently -by using sk_unattached_filter_create() for setting up the filter, resp. -sk_unattached_filter_destroy() for destroying it. The macro -SK_RUN_FILTER(filter, ctx) transparently invokes eBPF interpreter or JITed -code to run the filter. 'filter' is a pointer to struct sk_filter that we -got from sk_unattached_filter_create(), and 'ctx' the given context (e.g. -skb pointer). All constraints and restrictions from sk_chk_filter() apply +by using bpf_prog_create() for setting up the filter, resp. +bpf_prog_destroy() for destroying it. The macro +BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed +code to run the filter. 'filter' is a pointer to struct bpf_prog that we +got from bpf_prog_create(), and 'ctx' the given context (e.g. +skb pointer). All constraints and restrictions from bpf_check_classic() apply before a conversion to the new layout is being done behind the scenes! Currently, the classic BPF format is being used for JITing on most of the diff --git a/Documentation/networking/i40e.txt b/Documentation/networking/i40e.txt index f737273c6dc1..a251bf4fe9c9 100644 --- a/Documentation/networking/i40e.txt +++ b/Documentation/networking/i40e.txt @@ -69,8 +69,11 @@ Additional Configurations FCoE ---- - Fiber Channel over Ethernet (FCoE) hardware offload is not currently - supported. + The driver supports Fiber Channel over Ethernet (FCoE) and Data Center + Bridging (DCB) functionality. Configuring DCB and FCoE is outside the scope + of this driver doc. Refer to http://www.open-fcoe.org/ for FCoE project + information and http://www.open-lldp.org/ or email list + e1000-eedc@lists.sourceforge.net for DCB information. MAC and VLAN anti-spoofing feature ---------------------------------- diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ab42c95f9985..29a93518bf18 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -101,19 +101,17 @@ ipfrag_high_thresh - INTEGER Maximum memory used to reassemble IP fragments. When ipfrag_high_thresh bytes of memory is allocated for this purpose, the fragment handler will toss packets until ipfrag_low_thresh - is reached. + is reached. This also serves as a maximum limit to namespaces + different from the initial one. ipfrag_low_thresh - INTEGER - See ipfrag_high_thresh + Maximum memory used to reassemble IP fragments before the kernel + begins to remove incomplete fragment queues to free up resources. + The kernel still accepts new fragments for defragmentation. ipfrag_time - INTEGER Time in seconds to keep an IP fragment in memory. -ipfrag_secret_interval - INTEGER - Regeneration interval (in seconds) of the hash secret (or lifetime - for the hash secret) for IP fragments. - Default: 600 - ipfrag_max_dist - INTEGER ipfrag_max_dist is a non-negative integer value which defines the maximum "disorder" which is allowed among fragments which share a @@ -1132,6 +1130,15 @@ flowlabel_consistency - BOOLEAN FALSE: disabled Default: TRUE +auto_flowlabels - BOOLEAN + Automatically generate flow labels based based on a flow hash + of the packet. This allows intermediate devices, such as routers, + to idenfify packet flows for mechanisms like Equal Cost Multipath + Routing (see RFC 6438). + TRUE: enabled + FALSE: disabled + Default: false + anycast_src_echo_reply - BOOLEAN Controls the use of anycast addresses as source addresses for ICMPv6 echo reply @@ -1153,11 +1160,6 @@ ip6frag_low_thresh - INTEGER ip6frag_time - INTEGER Time in seconds to keep an IPv6 fragment in memory. -ip6frag_secret_interval - INTEGER - Regeneration interval (in seconds) of the hash secret (or lifetime - for the hash secret) for IPv6 fragments. - Default: 600 - conf/default/*: Change the interface-specific default settings. @@ -1210,6 +1212,18 @@ accept_ra_defrtr - BOOLEAN Functional default: enabled if accept_ra is enabled. disabled if accept_ra is disabled. +accept_ra_from_local - BOOLEAN + Accept RA with source-address that is found on local machine + if the RA is otherwise proper and able to be accepted. + Default is to NOT accept these as it may be an un-intended + network loop. + + Functional default: + enabled if accept_ra_from_local is enabled + on a specific interface. + disabled if accept_ra_from_local is disabled + on a specific interface. + accept_ra_pinfo - BOOLEAN Learn Prefix Information in Router Advertisement. diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 38112d512f47..a6d7cb91069e 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -1008,14 +1008,9 @@ hardware timestamps to be used. Note: you may need to enable the generation of hardware timestamps with SIOCSHWTSTAMP (see related information from Documentation/networking/timestamping.txt). -PACKET_TIMESTAMP accepts the same integer bit field as -SO_TIMESTAMPING. However, only the SOF_TIMESTAMPING_SYS_HARDWARE -and SOF_TIMESTAMPING_RAW_HARDWARE values are recognized by -PACKET_TIMESTAMP. SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over -SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set. - - int req = 0; - req |= SOF_TIMESTAMPING_SYS_HARDWARE; +PACKET_TIMESTAMP accepts the same integer bit field as SO_TIMESTAMPING: + + int req = SOF_TIMESTAMPING_RAW_HARDWARE; setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req)) For the mmap(2)ed ring buffers, such timestamps are stored in the @@ -1023,14 +1018,13 @@ tpacket{,2,3}_hdr structure's tp_sec and tp_{n,u}sec members. To determine what kind of timestamp has been reported, the tp_status field is binary |'ed with the following possible bits ... - TP_STATUS_TS_SYS_HARDWARE TP_STATUS_TS_RAW_HARDWARE TP_STATUS_TS_SOFTWARE ... that are equivalent to its SOF_TIMESTAMPING_* counterparts. For the -RX_RING, if none of those 3 are set (i.e. PACKET_TIMESTAMP is not set), -then this means that a software fallback was invoked *within* PF_PACKET's -processing code (less precise). +RX_RING, if neither is set (i.e. PACKET_TIMESTAMP is not set), then a +software fallback was invoked *within* PF_PACKET's processing code (less +precise). Getting timestamps for the TX_RING works as follows: i) fill the ring frames, ii) call sendto() e.g. in blocking mode, iii) wait for status of relevant diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt index 3544c98401fd..e839e7efc835 100644 --- a/Documentation/networking/phy.txt +++ b/Documentation/networking/phy.txt @@ -272,6 +272,8 @@ Writing a PHY driver txtsamp: Requests a transmit timestamp at the PHY level for a 'skb' set_wol: Enable Wake-on-LAN at the PHY level get_wol: Get the Wake-on-LAN status at the PHY level + read_mmd_indirect: Read PHY MMD indirect register + write_mmd_indirect: Write PHY MMD indirect register Of these, only config_aneg and read_status are required to be assigned by the driver code. The rest are optional. Also, it is @@ -284,7 +286,21 @@ Writing a PHY driver Feel free to look at the Marvell, Cicada, and Davicom drivers in drivers/net/phy/ for examples (the lxt and qsemi drivers have - not been tested as of this writing) + not been tested as of this writing). + + The PHY's MMD register accesses are handled by the PAL framework + by default, but can be overridden by a specific PHY driver if + required. This could be the case if a PHY was released for + manufacturing before the MMD PHY register definitions were + standardized by the IEEE. Most modern PHYs will be able to use + the generic PAL framework for accessing the PHY's MMD registers. + An example of such usage is for Energy Efficient Ethernet support, + implemented in the PAL. This support uses the PAL to access MMD + registers for EEE query and configuration if the PHY supports + the IEEE standard access mechanisms, or can use the PHY's specific + access interfaces if overridden by the specific PHY driver. See + the Micrel driver in drivers/net/phy/ for an example of how this + can be implemented. Board Fixups diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt index 0e30c7845b2b..0dffc6e37902 100644 --- a/Documentation/networking/pktgen.txt +++ b/Documentation/networking/pktgen.txt @@ -24,6 +24,34 @@ For monitoring and control pktgen creates: /proc/net/pktgen/ethX +Tuning NIC for max performance +============================== + +The default NIC setting are (likely) not tuned for pktgen's artificial +overload type of benchmarking, as this could hurt the normal use-case. + +Specifically increasing the TX ring buffer in the NIC: + # ethtool -G ethX tx 1024 + +A larger TX ring can improve pktgen's performance, while it can hurt +in the general case, 1) because the TX ring buffer might get larger +than the CPUs L1/L2 cache, 2) because it allow more queueing in the +NIC HW layer (which is bad for bufferbloat). + +One should be careful to conclude, that packets/descriptors in the HW +TX ring cause delay. Drivers usually delay cleaning up the +ring-buffers (for various performance reasons), thus packets stalling +the TX ring, might just be waiting for cleanup. + +This cleanup issues is specifically the case, for the driver ixgbe +(Intel 82599 chip). This driver (ixgbe) combine TX+RX ring cleanups, +and the cleanup interval is affected by the ethtool --coalesce setting +of parameter "rx-usecs". + +For ixgbe use e.g "30" resulting in approx 33K interrupts/sec (1/30*10^6): + # ethtool -C ethX rx-usecs 30 + + Viewing threads =============== /proc/net/pktgen/kpktgend_0 diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index bc3554124903..897f942b976b 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -40,7 +40,7 @@ the set bits correspond to data that is available, then the control message will not be generated: SOF_TIMESTAMPING_SOFTWARE: report systime if available -SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available +SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available (deprecated) SOF_TIMESTAMPING_RAW_HARDWARE: report hwtimeraw if available It is worth noting that timestamps may be collected for reasons other @@ -88,13 +88,12 @@ hwtimeraw is the original hardware time stamp. Filled in if SOF_TIMESTAMPING_RAW_HARDWARE is set. No assumptions about its relation to system time should be made. -hwtimetrans is the hardware time stamp transformed so that it -corresponds as good as possible to system time. This correlation is -not perfect; as a consequence, sorting packets received via different -NICs by their hwtimetrans may differ from the order in which they were -received. hwtimetrans may be non-monotonic even for the same NIC. -Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support -by the network device and will be empty without that support. +hwtimetrans is always zero. This field is deprecated. It used to hold +hw timestamps converted to system time. Instead, expose the hardware +clock device on the NIC directly as a HW PTP clock source, to allow +time conversion in userspace and optionally synchronize system time +with a userspace PTP stack such as linuxptp. For the PTP clock API, +see Documentation/ptp/ptp.txt. SIOCSHWTSTAMP, SIOCGHWTSTAMP: @@ -185,7 +184,6 @@ struct skb_shared_hwtstamps { * since arbitrary point in time */ ktime_t hwtstamp; - ktime_t syststamp; /* hwtstamp transformed to system time base */ }; Time stamps for outgoing packets are to be generated as follows: diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c index 8ba82bfe6a33..5cdfd743447b 100644 --- a/Documentation/networking/timestamping/timestamping.c +++ b/Documentation/networking/timestamping/timestamping.c @@ -76,7 +76,6 @@ static void usage(const char *error) " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n" " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" - " SOF_TIMESTAMPING_SYS_HARDWARE - request reporting of transformed HW time stamps\n" " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" " SIOCGSTAMP - check last socket time stamp\n" " SIOCGSTAMPNS - more accurate socket time stamp\n"); @@ -202,9 +201,7 @@ static void printpacket(struct msghdr *msg, int res, (long)stamp->tv_sec, (long)stamp->tv_nsec); stamp++; - printf("HW transformed %ld.%09ld ", - (long)stamp->tv_sec, - (long)stamp->tv_nsec); + /* skip deprecated HW transformed */ stamp++; printf("HW raw %ld.%09ld", (long)stamp->tv_sec, @@ -361,8 +358,6 @@ int main(int argc, char **argv) so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; - else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SYS_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_SYS_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; else |