diff options
533 files changed, 22392 insertions, 10019 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-drivers-janz-cmodio b/Documentation/ABI/testing/sysfs-bus-pci-drivers-janz-cmodio new file mode 100644 index 000000000000..4d08f28dc871 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-pci-drivers-janz-cmodio @@ -0,0 +1,8 @@ +What: /sys/bus/pci/drivers/janz-cmodio/.../modulbus_number +Date: May 2010 +KernelVersion: 2.6.35 +Contact: Ira W. Snyder <ira.snyder@gmail.com> +Description: + Value representing the HEX switch S2 of the janz carrier board CMOD-IO or CAN-PCI2 + + Read-only: value of the configuration switch (0..15) diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net index 5ecfd72ba684..668604fc8e06 100644 --- a/Documentation/ABI/testing/sysfs-class-net +++ b/Documentation/ABI/testing/sysfs-class-net @@ -39,6 +39,25 @@ Description: Format is a string, e.g: 00:11:22:33:44:55 for an Ethernet MAC address. +What: /sys/class/net/<bridge iface>/bridge/group_fwd_mask +Date: January 2012 +KernelVersion: 3.2 +Contact: netdev@vger.kernel.org +Description: + Bitmask to allow forwarding of link local frames with address + 01-80-C2-00-00-0X on a bridge device. Only values that set bits + not matching BR_GROUPFWD_RESTRICTED in net/bridge/br_private.h + allowed. + Default value 0 does not forward any link local frames. + + Restricted bits: + 0: 01-80-C2-00-00-00 Bridge Group Address used for STP + 1: 01-80-C2-00-00-01 (MAC Control) 802.3 used for MAC PAUSE + 2: 01-80-C2-00-00-02 (Link Aggregation) 802.3ad + + Any values not setting these bits can be used. Take special + care when forwarding control frames e.g. 802.1X-PAE or LLDP. + What: /sys/class/net/<iface>/broadcast Date: April 2005 KernelVersion: 2.6.12 diff --git a/Documentation/ABI/testing/sysfs-class-net-janz-ican3 b/Documentation/ABI/testing/sysfs-class-net-janz-ican3 new file mode 100644 index 000000000000..fdbc03a2b8f8 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-net-janz-ican3 @@ -0,0 +1,19 @@ +What: /sys/class/net/<iface>/termination +Date: May 2010 +KernelVersion: 2.6.35 +Contact: Ira W. Snyder <ira.snyder@gmail.com> +Description: + Value representing the can bus termination + + Default: 1 (termination active) + Reading: get actual termination state + Writing: set actual termination state (0=no termination, 1=termination active) + +What: /sys/class/net/<iface>/fwinfo +Date: May 2015 +KernelVersion: 3.19 +Contact: Andreas Gröger <andreas24groeger@gmail.com> +Description: + Firmware stamp of ican3 module + Read-only: 32 byte string identification of the ICAN3 module + (known values: "JANZ-ICAN3 ICANOS 1.xx", "JANZ-ICAN3 CAL/CANopen 1.xx") diff --git a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt deleted file mode 100644 index 8db32384a486..000000000000 --- a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt +++ /dev/null @@ -1,48 +0,0 @@ -* AMD 10GbE PHY driver (amd-xgbe-phy) - -Required properties: -- compatible: Should be "amd,xgbe-phy-seattle-v1a" and - "ethernet-phy-ieee802.3-c45" -- reg: Address and length of the register sets for the device - - SerDes Rx/Tx registers - - SerDes integration registers (1/2) - - SerDes integration registers (2/2) -- interrupt-parent: Should be the phandle for the interrupt controller - that services interrupts for this device -- interrupts: Should contain the amd-xgbe-phy interrupt. - -Optional properties: -- amd,speed-set: Speed capabilities of the device - 0 - 1GbE and 10GbE (default) - 1 - 2.5GbE and 10GbE - -The following optional properties are represented by an array with each -value corresponding to a particular speed. The first array value represents -the setting for the 1GbE speed, the second value for the 2.5GbE speed and -the third value for the 10GbE speed. All three values are required if the -property is used. -- amd,serdes-blwc: Baseline wandering correction enablement - 0 - Off - 1 - On -- amd,serdes-cdr-rate: CDR rate speed selection -- amd,serdes-pq-skew: PQ (data sampling) skew -- amd,serdes-tx-amp: TX amplitude boost -- amd,serdes-dfe-tap-config: DFE taps available to run -- amd,serdes-dfe-tap-enable: DFE taps to enable - -Example: - xgbe_phy@e1240800 { - compatible = "amd,xgbe-phy-seattle-v1a", "ethernet-phy-ieee802.3-c45"; - reg = <0 0xe1240800 0 0x00400>, - <0 0xe1250000 0 0x00060>, - <0 0xe1250080 0 0x00004>; - interrupt-parent = <&gic>; - interrupts = <0 323 4>; - amd,speed-set = <0>; - amd,serdes-blwc = <1>, <1>, <0>; - amd,serdes-cdr-rate = <2>, <2>, <7>; - amd,serdes-pq-skew = <10>, <10>, <30>; - amd,serdes-tx-amp = <15>, <15>, <10>; - amd,serdes-dfe-tap-config = <3>, <3>, <1>; - amd,serdes-dfe-tap-enable = <0>, <0>, <127>; - }; diff --git a/Documentation/devicetree/bindings/net/amd-xgbe.txt b/Documentation/devicetree/bindings/net/amd-xgbe.txt index 26efd526d16c..4bb624a73b54 100644 --- a/Documentation/devicetree/bindings/net/amd-xgbe.txt +++ b/Documentation/devicetree/bindings/net/amd-xgbe.txt @@ -5,12 +5,16 @@ Required properties: - reg: Address and length of the register sets for the device - MAC registers - PCS registers + - SerDes Rx/Tx registers + - SerDes integration registers (1/2) + - SerDes integration registers (2/2) - interrupt-parent: Should be the phandle for the interrupt controller that services interrupts for this device - interrupts: Should contain the amd-xgbe interrupt(s). The first interrupt listed is required and is the general device interrupt. If the optional amd,per-channel-interrupt property is specified, then one additional - interrupt for each DMA channel supported by the device should be specified + interrupt for each DMA channel supported by the device should be specified. + The last interrupt listed should be the PCS auto-negotiation interrupt. - clocks: - DMA clock for the amd-xgbe device (used for calculating the correct Rx interrupt watchdog timer value on a DMA channel @@ -19,7 +23,6 @@ Required properties: - clock-names: Should be the names of the clocks - "dma_clk" for the DMA clock - "ptp_clk" for the PTP clock -- phy-handle: See ethernet.txt file in the same directory - phy-mode: See ethernet.txt file in the same directory Optional properties: @@ -29,19 +32,46 @@ Optional properties: - amd,per-channel-interrupt: Indicates that Rx and Tx complete will generate a unique interrupt for each DMA channel - this requires an additional interrupt be configured for each DMA channel +- amd,speed-set: Speed capabilities of the device + 0 - 1GbE and 10GbE (default) + 1 - 2.5GbE and 10GbE + +The following optional properties are represented by an array with each +value corresponding to a particular speed. The first array value represents +the setting for the 1GbE speed, the second value for the 2.5GbE speed and +the third value for the 10GbE speed. All three values are required if the +property is used. +- amd,serdes-blwc: Baseline wandering correction enablement + 0 - Off + 1 - On +- amd,serdes-cdr-rate: CDR rate speed selection +- amd,serdes-pq-skew: PQ (data sampling) skew +- amd,serdes-tx-amp: TX amplitude boost +- amd,serdes-dfe-tap-config: DFE taps available to run +- amd,serdes-dfe-tap-enable: DFE taps to enable Example: xgbe@e0700000 { compatible = "amd,xgbe-seattle-v1a"; reg = <0 0xe0700000 0 0x80000>, - <0 0xe0780000 0 0x80000>; + <0 0xe0780000 0 0x80000>, + <0 0xe1240800 0 0x00400>, + <0 0xe1250000 0 0x00060>, + <0 0xe1250080 0 0x00004>; interrupt-parent = <&gic>; interrupts = <0 325 4>, - <0 326 1>, <0 327 1>, <0 328 1>, <0 329 1>; + <0 326 1>, <0 327 1>, <0 328 1>, <0 329 1>, + <0 323 4>; amd,per-channel-interrupt; clocks = <&xgbe_dma_clk>, <&xgbe_ptp_clk>; clock-names = "dma_clk", "ptp_clk"; - phy-handle = <&phy>; phy-mode = "xgmii"; mac-address = [ 02 a1 a2 a3 a4 a5 ]; + amd,speed-set = <0>; + amd,serdes-blwc = <1>, <1>, <0>; + amd,serdes-cdr-rate = <2>, <2>, <7>; + amd,serdes-pq-skew = <10>, <10>, <30>; + amd,serdes-tx-amp = <15>, <15>, <10>; + amd,serdes-dfe-tap-config = <3>, <3>, <1>; + amd,serdes-dfe-tap-enable = <0>, <0>, <127>; }; diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index ba19d671e808..8ec5fdf444e9 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -9,6 +9,7 @@ Required properties: the Cadence GEM, or the generic form: "cdns,gem". Use "cdns,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs. Use "cdns,sama5d4-gem" for the Gigabit IP available on Atmel sama5d4 SoCs. + Use "cdns,zynqmp-gem" for Zynq Ultrascale+ MPSoC. - reg: Address and length of the register set for the device - interrupts: Should contain macb interrupt - phy-mode: See ethernet.txt file in the same directory. diff --git a/Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.txt b/Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.txt new file mode 100644 index 000000000000..7edba1264f6f --- /dev/null +++ b/Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.txt @@ -0,0 +1,20 @@ +* NXP LPC1850 GMAC ethernet controller + +This device is a platform glue layer for stmmac. +Please see stmmac.txt for the other unchanged properties. + +Required properties: + - compatible: Should contain "nxp,lpc1850-dwmac" + +Examples: + +mac: ethernet@40010000 { + compatible = "nxp,lpc1850-dwmac", "snps,dwmac-3.611", "snps,dwmac"; + reg = <0x40010000 0x2000>; + interrupts = <5>; + interrupt-names = "macirq"; + clocks = <&ccu1 CLK_CPU_ETHERNET>; + clock-names = "stmmaceth"; + resets = <&rgu 22>; + reset-names = "stmmaceth"; +} diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt index 40831fbaff72..525e1658f2da 100644 --- a/Documentation/devicetree/bindings/net/phy.txt +++ b/Documentation/devicetree/bindings/net/phy.txt @@ -30,6 +30,9 @@ Optional Properties: - max-speed: Maximum PHY supported speed (10, 100, 1000...) +- broken-turn-around: If set, indicates the PHY device does not correctly + release the turn around line low at the end of a MDIO transaction. + Example: ethernet-phy@0 { diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 83bf4986baea..334b49ef02d1 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -51,6 +51,7 @@ Table of Contents 3.4 Configuring Bonding Manually via Sysfs 3.5 Configuration with Interfaces Support 3.6 Overriding Configuration for Special Cases +3.7 Configuring LACP for 802.3ad mode in a more secure way 4. Querying Bonding Configuration 4.1 Bonding Configuration @@ -178,6 +179,27 @@ active_slave active slave, or the empty string if there is no active slave or the current mode does not use an active slave. +ad_actor_sys_prio + + In an AD system, this specifies the system priority. The allowed range + is 1 - 65535. If the value is not specified, it takes 65535 as the + default value. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + +ad_actor_system + + In an AD system, this specifies the mac-address for the actor in + protocol packet exchanges (LACPDUs). The value cannot be NULL or + multicast. It is preferred to have the local-admin bit set for this + mac but driver does not enforce it. If the value is not given then + system defaults to using the masters' mac address as actors' system + address. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + ad_select Specifies the 802.3ad aggregation selection logic to use. The @@ -220,6 +242,21 @@ ad_select This option was added in bonding version 3.4.0. +ad_user_port_key + + In an AD system, the port-key has three parts as shown below - + + Bits Use + 00 Duplex + 01-05 Speed + 06-15 User-defined + + This defines the upper 10 bits of the port key. The values can be + from 0 - 1023. If not given, the system defaults to 0. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + all_slaves_active Specifies that duplicate frames (received on inactive ports) should be @@ -1622,6 +1659,53 @@ output port selection. This feature first appeared in bonding driver version 3.7.0 and support for output slave selection was limited to round-robin and active-backup modes. +3.7 Configuring LACP for 802.3ad mode in a more secure way +---------------------------------------------------------- + +When using 802.3ad bonding mode, the Actor (host) and Partner (switch) +exchange LACPDUs. These LACPDUs cannot be sniffed, because they are +destined to link local mac addresses (which switches/bridges are not +supposed to forward). However, most of the values are easily predictable +or are simply the machine's MAC address (which is trivially known to all +other hosts in the same L2). This implies that other machines in the L2 +domain can spoof LACPDU packets from other hosts to the switch and potentially +cause mayhem by joining (from the point of view of the switch) another +machine's aggregate, thus receiving a portion of that hosts incoming +traffic and / or spoofing traffic from that machine themselves (potentially +even successfully terminating some portion of flows). Though this is not +a likely scenario, one could avoid this possibility by simply configuring +few bonding parameters: + + (a) ad_actor_system : You can set a random mac-address that can be used for + these LACPDU exchanges. The value can not be either NULL or Multicast. + Also it's preferable to set the local-admin bit. Following shell code + generates a random mac-address as described above. + + # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \ + $(( (RANDOM & 0xFE) | 0x02 )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF ))) + # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system + + (b) ad_actor_sys_prio : Randomize the system priority. The default value + is 65535, but system can take the value from 1 - 65535. Following shell + code generates random priority and sets it. + + # sys_prio=$(( 1 + RANDOM + RANDOM )) + # echo $sys_prio > /sys/class/net/bond0/bonding/ad_actor_sys_prio + + (c) ad_user_port_key : Use the user portion of the port-key. The default + keeps this empty. These are the upper 10 bits of the port-key and value + ranges from 0 - 1023. Following shell code generates these 10 bits and + sets it. + + # usr_port_key=$(( RANDOM & 0x3FF )) + # echo $usr_port_key > /sys/class/net/bond0/bonding/ad_user_port_key + + 4 Querying Bonding Configuration ================================= diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt index 5abad1e921ca..b48d4a149411 100644 --- a/Documentation/networking/can.txt +++ b/Documentation/networking/can.txt @@ -268,6 +268,9 @@ solution for a couple of reasons: struct can_frame { canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ __u8 can_dlc; /* frame payload length in byte (0 .. 8) */ + __u8 __pad; /* padding */ + __u8 __res0; /* reserved / padding */ + __u8 __res1; /* reserved / padding */ __u8 data[8] __attribute__((aligned(8))); }; diff --git a/Documentation/networking/dctcp.txt b/Documentation/networking/dctcp.txt index 0d5dfbc89ec9..13a857753208 100644 --- a/Documentation/networking/dctcp.txt +++ b/Documentation/networking/dctcp.txt @@ -8,6 +8,7 @@ the data center network to provide multi-bit feedback to the end hosts. To enable it on end hosts: sysctl -w net.ipv4.tcp_congestion_control=dctcp + sysctl -w net.ipv4.tcp_ecn_fallback=0 (optional) All switches in the data center network running DCTCP must support ECN marking and be configured for marking when reaching defined switch buffer diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 071fb18dc57c..cb083e0d682c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -267,6 +267,15 @@ tcp_ecn - INTEGER but do not request ECN on outgoing connections. Default: 2 +tcp_ecn_fallback - BOOLEAN + If the kernel detects that ECN connection misbehaves, enable fall + back to non-ECN. Currently, this knob implements the fallback + from RFC3168, section 6.1.1.1., but we reserve that in future, + additional detection mechanisms could be implemented under this + knob. The value is not used, if tcp_ecn or per route (or congestion + control) ECN settings are disabled. + Default: 1 (fallback enabled) + tcp_fack - BOOLEAN Enable FACK congestion avoidance and fast retransmission. The value is not used, if tcp_sack is not enabled. @@ -1213,6 +1222,14 @@ auto_flowlabels - BOOLEAN FALSE: disabled Default: false +flowlabel_state_ranges - BOOLEAN + Split the flow label number space into two ranges. 0-0x7FFFF is + reserved for the IPv6 flow manager facility, 0x80000-0xFFFFF + is reserved for stateless flow labels as described in RFC6437. + TRUE: enabled + FALSE: disabled + Default: true + anycast_src_echo_reply - BOOLEAN Controls the use of anycast addresses as source addresses for ICMPv6 echo reply diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt index 0344f1d45b37..f4be85e96005 100644 --- a/Documentation/networking/pktgen.txt +++ b/Documentation/networking/pktgen.txt @@ -1,6 +1,6 @@ - HOWTO for the linux packet generator + HOWTO for the linux packet generator ------------------------------------ Enable CONFIG_NET_PKTGEN to compile and build pktgen either in-kernel @@ -50,17 +50,33 @@ For ixgbe use e.g. "30" resulting in approx 33K interrupts/sec (1/30*10^6): # ethtool -C ethX rx-usecs 30 -Viewing threads -=============== -/proc/net/pktgen/kpktgend_0 -Name: kpktgend_0 max_before_softirq: 10000 -Running: -Stopped: eth1 -Result: OK: max_before_softirq=10000 +Kernel threads +============== +Pktgen creates a thread for each CPU with affinity to that CPU. +Which is controlled through procfile /proc/net/pktgen/kpktgend_X. + +Example: /proc/net/pktgen/kpktgend_0 + + Running: + Stopped: eth4@0 + Result: OK: add_device=eth4@0 + +Most important are the devices assigned to the thread. + +The two basic thread commands are: + * add_device DEVICE@NAME -- adds a single device + * rem_device_all -- remove all associated devices + +When adding a device to a thread, a corrosponding procfile is created +which is used for configuring this device. Thus, device names need to +be unique. -Most important are the devices assigned to the thread. Note that a -device can only belong to one thread. +To support adding the same device to multiple threads, which is useful +with multi queue NICs, a the device naming scheme is extended with "@": + device@something +The part after "@" can be anything, but it is custom to use the thread +number. Viewing devices =============== @@ -69,29 +85,32 @@ The Params section holds configured information. The Current section holds running statistics. The Result is printed after a run or after interruption. Example: -/proc/net/pktgen/eth1 +/proc/net/pktgen/eth4@0 -Params: count 10000000 min_pkt_size: 60 max_pkt_size: 60 - frags: 0 delay: 0 clone_skb: 1000000 ifname: eth1 + Params: count 100000 min_pkt_size: 60 max_pkt_size: 60 + frags: 0 delay: 0 clone_skb: 64 ifname: eth4@0 flows: 0 flowlen: 0 - dst_min: 10.10.11.2 dst_max: - src_min: src_max: - src_mac: 00:00:00:00:00:00 dst_mac: 00:04:23:AC:FD:82 - udp_src_min: 9 udp_src_max: 9 udp_dst_min: 9 udp_dst_max: 9 - src_mac_count: 0 dst_mac_count: 0 - Flags: -Current: - pkts-sofar: 10000000 errors: 39664 - started: 1103053986245187us stopped: 1103053999346329us idle: 880401us - seq_num: 10000011 cur_dst_mac_offset: 0 cur_src_mac_offset: 0 - cur_saddr: 0x10a0a0a cur_daddr: 0x20b0a0a - cur_udp_dst: 9 cur_udp_src: 9 + queue_map_min: 0 queue_map_max: 0 + dst_min: 192.168.81.2 dst_max: + src_min: src_max: + src_mac: 90:e2:ba:0a:56:b4 dst_mac: 00:1b:21:3c:9d:f8 + udp_src_min: 9 udp_src_max: 109 udp_dst_min: 9 udp_dst_max: 9 + src_mac_count: 0 dst_mac_count: 0 + Flags: UDPSRC_RND NO_TIMESTAMP QUEUE_MAP_CPU + Current: + pkts-sofar: 100000 errors: 0 + started: 623913381008us stopped: 623913396439us idle: 25us + seq_num: 100001 cur_dst_mac_offset: 0 cur_src_mac_offset: 0 + cur_saddr: 192.168.8.3 cur_daddr: 192.168.81.2 + cur_udp_dst: 9 cur_udp_src: 42 + cur_queue_map: 0 flows: 0 -Result: OK: 13101142(c12220741+d880401) usec, 10000000 (60byte,0frags) - 763292pps 390Mb/sec (390805504bps) errors: 39664 + Result: OK: 15430(c15405+d25) usec, 100000 (60byte,0frags) + 6480562pps 3110Mb/sec (3110669760bps) errors: 0 -Configuring threads and devices -================================ + +Configuring devices +=================== This is done via the /proc interface, and most easily done via pgset as defined in the sample scripts. @@ -126,7 +145,7 @@ Examples: To select queue 1 of a given device, use queue_map_min=1 and queue_map_max=1 - pgset "src_mac_count 1" Sets the number of MACs we'll range through. + pgset "src_mac_count 1" Sets the number of MACs we'll range through. The 'minimum' MAC is what you set with srcmac. pgset "dst_mac_count 1" Sets the number of MACs we'll range through. @@ -145,6 +164,7 @@ Examples: UDPCSUM, IPSEC # IPsec encapsulation (needs CONFIG_XFRM) NODE_ALLOC # node specific memory allocation + NO_TIMESTAMP # disable timestamping pgset spi SPI_VALUE Set specific SA used to transform packet. @@ -192,24 +212,43 @@ Examples: pgset "rate 300M" set rate to 300 Mb/s pgset "ratep 1000000" set rate to 1Mpps + pgset "xmit_mode netif_receive" RX inject into stack netif_receive_skb() + Works with "burst" but not with "clone_skb". + Default xmit_mode is "start_xmit". + Sample scripts ============== -A collection of small tutorial scripts for pktgen is in the -samples/pktgen directory: +A collection of tutorial scripts and helpers for pktgen is in the +samples/pktgen directory. The helper parameters.sh file support easy +and consistant parameter parsing across the sample scripts. + +Usage example and help: + ./pktgen_sample01_simple.sh -i eth4 -m 00:1B:21:3C:9D:F8 -d 192.168.8.2 + +Usage: ./pktgen_sample01_simple.sh [-vx] -i ethX + -i : ($DEV) output interface/device (required) + -s : ($PKT_SIZE) packet size + -d : ($DEST_IP) destination IP + -m : ($DST_MAC) destination MAC-addr + -t : ($THREADS) threads to start + -c : ($SKB_CLONE) SKB clones send before alloc new SKB + -b : ($BURST) HW level bursting of SKBs + -v : ($VERBOSE) verbose + -x : ($DEBUG) debug + +The global variables being set are also listed. E.g. the required +interface/device parameter "-i" sets variable $DEV. Copy the +pktgen_sampleXX scripts and modify them to fit your own needs. + +The old scripts: -pktgen.conf-1-1 # 1 CPU 1 dev pktgen.conf-1-2 # 1 CPU 2 dev -pktgen.conf-2-1 # 2 CPU's 1 dev -pktgen.conf-2-2 # 2 CPU's 2 dev pktgen.conf-1-1-rdos # 1 CPU 1 dev w. route DoS pktgen.conf-1-1-ip6 # 1 CPU 1 dev ipv6 pktgen.conf-1-1-ip6-rdos # 1 CPU 1 dev ipv6 w. route DoS pktgen.conf-1-1-flows # 1 CPU 1 dev multiple flows. -Run in shell: ./pktgen.conf-X-Y -This does all the setup including sending. - Interrupt affinity =================== @@ -217,6 +256,9 @@ Note that when adding devices to a specific CPU it is a good idea to also assign /proc/irq/XX/smp_affinity so that the TX interrupts are bound to the same CPU. This reduces cache bouncing when freeing skbs. +Plus using the device flag QUEUE_MAP_CPU, which maps the SKBs TX queue +to the running threads CPU (directly from smp_processor_id()). + Enable IPsec ============ Default IPsec transformation with ESP encapsulation plus transport mode @@ -237,18 +279,19 @@ Current commands and configuration options start stop +reset ** Thread commands: add_device rem_device_all -max_before_softirq ** Device commands: count clone_skb +burst debug frags @@ -257,10 +300,17 @@ delay src_mac_count dst_mac_count -pkt_size +pkt_size min_pkt_size max_pkt_size +queue_map_min +queue_map_max +skb_priority + +tos (ipv4) +traffic_class (ipv6) + mpls udp_src_min @@ -269,6 +319,8 @@ udp_src_max udp_dst_min udp_dst_max +node + flag IPSRC_RND IPDST_RND @@ -287,6 +339,9 @@ flag UDPCSUM IPSEC NODE_ALLOC + NO_TIMESTAMP + +spi (ipsec) dst_min dst_max @@ -299,8 +354,10 @@ src_mac clear_counters -dst6 src6 +dst6 +dst6_max +dst6_min flows flowlen @@ -308,6 +365,17 @@ flowlen rate ratep +xmit_mode <start_xmit|netif_receive> + +vlan_cfi +vlan_id +vlan_p + +svlan_cfi +svlan_id +svlan_p + + References: ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/ ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/ diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index f981a9295a39..616f89267d23 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -1,59 +1,358 @@ -Switch (and switch-ish) device drivers HOWTO -=========================== - -Please note that the word "switch" is here used in very generic meaning. -This include devices supporting L2/L3 but also various flow offloading chips, -including switches embedded into SR-IOV NICs. - -Lets describe a topology a bit. Imagine the following example: - - +----------------------------+ +---------------+ - | SOME switch chip | | CPU | - +----------------------------+ +---------------+ - port1 port2 port3 port4 MNGMNT | PCI-E | - | | | | | +---------------+ - PHY PHY | | | | NIC0 NIC1 - | | | | | | - | | +- PCI-E -+ | | - | +------- MII -------+ | - +------------- MII ------------+ - -In this example, there are two independent lines between the switch silicon -and CPU. NIC0 and NIC1 drivers are not aware of a switch presence. They are -separate from the switch driver. SOME switch chip is by managed by a driver -via PCI-E device MNGMNT. Note that MNGMNT device, NIC0 and NIC1 may be -connected to some other type of bus. - -Now, for the previous example show the representation in kernel: - - +----------------------------+ +---------------+ - | SOME switch chip | | CPU | - +----------------------------+ +---------------+ - sw0p0 sw0p1 sw0p2 sw0p3 MNGMNT | PCI-E | - | | | | | +---------------+ - PHY PHY | | | | eth0 eth1 - | | | | | | - | | +- PCI-E -+ | | - | +------- MII -------+ | - +------------- MII ------------+ - -Lets call the example switch driver for SOME switch chip "SOMEswitch". This -driver takes care of PCI-E device MNGMNT. There is a netdevice instance sw0pX -created for each port of a switch. These netdevices are instances -of "SOMEswitch" driver. sw0pX netdevices serve as a "representation" -of the switch chip. eth0 and eth1 are instances of some other existing driver. - -The only difference of the switch-port netdevice from the ordinary netdevice -is that is implements couple more NDOs: - - ndo_switch_parent_id_get - This returns the same ID for two port netdevices - of the same physical switch chip. This is - mandatory to be implemented by all switch drivers - and serves the caller for recognition of a port - netdevice. - ndo_switch_parent_* - Functions that serve for a manipulation of the switch - chip itself (it can be though of as a "parent" of the - port, therefore the name). They are not port-specific. - Caller might use arbitrary port netdevice of the same - switch and it will make no difference. - ndo_switch_port_* - Functions that serve for a port-specific manipulation. +Ethernet switch device driver model (switchdev) +=============================================== +Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> +Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> + + +The Ethernet switch device driver model (switchdev) is an in-kernel driver +model for switch devices which offload the forwarding (data) plane from the +kernel. + +Figure 1 is a block diagram showing the components of the switchdev model for +an example setup using a data-center-class switch ASIC chip. Other setups +with SR-IOV or soft switches, such as OVS, are possible. + + +                             User-space tools                                  +                                                                               +       user space                   |                                          +      +-------------------------------------------------------------------+    +       kernel                       | Netlink                                  +                                    |                                          +                     +--------------+-------------------------------+          +                     |         Network stack                        |          +                     |           (Linux)                            |          +                     |                                              |          +                     +----------------------------------------------+          +                                                                               + sw1p2 sw1p4 sw1p6 +                      sw1p1  + sw1p3 +  sw1p5 +         eth1              +                        +    |    +    |    +    |            +                +                        |    |    |    |    |    |            |                +                     +--+----+----+----+-+--+----+---+  +-----+-----+          +                     |         Switch driver         |  |    mgmt   |          +                     |        (this document)        |  |   driver  |          +                     |                               |  |           |          +                     +--------------+----------------+  +-----------+          +                                    |                                          +       kernel                       | HW bus (eg PCI)                          +      +-------------------------------------------------------------------+    +       hardware                     |                                          +                     +--------------+---+------------+                         +                     |         Switch device (sw1)   |                         +                     |  +----+                       +--------+                +                     |  |    v offloaded data path   | mgmt port               +                     |  |    |                       |                         +                     +--|----|----+----+----+----+---+                         +                        |    |    |    |    |    |                             +                        +    +    +    +    +    +                             +                       p1   p2   p3   p4   p5   p6 +                                        +                             front-panel ports                                 +                                                                               + + Fig 1. + + +Include Files +------------- + +#include <linux/netdevice.h> +#include <net/switchdev.h> + + +Configuration +------------- + +Use "depends NET_SWITCHDEV" in driver's Kconfig to ensure switchdev model +support is built for driver. + + +Switch Ports +------------ + +On switchdev driver initialization, the driver will allocate and register a +struct net_device (using register_netdev()) for each enumerated physical switch +port, called the port netdev. A port netdev is the software representation of +the physical port and provides a conduit for control traffic to/from the +controller (the kernel) and the network, as well as an anchor point for higher +level constructs such as bridges, bonds, VLANs, tunnels, and L3 routers. Using +standard netdev tools (iproute2, ethtool, etc), the port netdev can also +provide to the user access to the physical properties of the switch port such +as PHY link state and I/O statistics. + +There is (currently) no higher-level kernel object for the switch beyond the +port netdevs. All of the switchdev driver ops are netdev ops or switchdev ops. + +A switch management port is outside the scope of the switchdev driver model. +Typically, the management port is not participating in offloaded data plane and +is loaded with a different driver, such as a NIC driver, on the management port +device. + +Port Netdev Naming +^^^^^^^^^^^^^^^^^^ + +Udev rules should be used for port netdev naming, using some unique attribute +of the port as a key, for example the port MAC address or the port PHYS name. +Hard-coding of kernel netdev names within the driver is discouraged; let the +kernel pick the default netdev name, and let udev set the final name based on a +port attribute. + +Using port PHYS name (ndo_get_phys_port_name) for the key is particularly +useful for dynamically-named ports where the device names its ports based on +external configuration. For example, if a physical 40G port is split logically +into 4 10G ports, resulting in 4 port netdevs, the device can give a unique +name for each port using port PHYS name. The udev rule would be: + +SUBSYSTEM=="net", ACTION=="add", DRIVER="<driver>", ATTR{phys_port_name}!="", \ + NAME="$attr{phys_port_name}" + +Suggested naming convention is "swXpYsZ", where X is the switch name or ID, Y +is the port name or ID, and Z is the sub-port name or ID. For example, sw1p1s0 +would be sub-port 0 on port 1 on switch 1. + +Switch ID +^^^^^^^^^ + +The switchdev driver must implement the switchdev op switchdev_port_attr_get for +SWITCHDEV_ATTR_PORT_PARENT_ID for each port netdev, returning the same physical ID +for each port of a switch. The ID must be unique between switches on the same +system. The ID does not need to be unique between switches on different +systems. + +The switch ID is used to locate ports on a switch and to know if aggregated +ports belong to the same switch. + +Port Features +^^^^^^^^^^^^^ + +NETIF_F_NETNS_LOCAL + +If the switchdev driver (and device) only supports offloading of the default +network namespace (netns), the driver should set this feature flag to prevent +the port netdev from being moved out of the default netns. A netns-aware +driver/device would not set this flag and be responsible for partitioning +hardware to preserve netns containment. This means hardware cannot forward +traffic from a port in one namespace to another port in another namespace. + +Port Topology +^^^^^^^^^^^^^ + +The port netdevs representing the physical switch ports can be organized into +higher-level switching constructs. The default construct is a standalone +router port, used to offload L3 forwarding. Two or more ports can be bonded +together to form a LAG. Two or more ports (or LAGs) can be bridged to bridge +to L2 networks. VLANs can be applied to sub-divide L2 networks. L2-over-L3 +tunnels can be built on ports. These constructs are built using standard Linux +tools such as the bridge driver, the bonding/team drivers, and netlink-based +tools such as iproute2. + +The switchdev driver can know a particular port's position in the topology by +monitoring NETDEV_CHANGEUPPER notifications. For example, a port moved into a +bond will see it's upper master change. If that bond is moved into a bridge, +the bond's upper master will change. And so on. The driver will track such +movements to know what position a port is in in the overall topology by +registering for netdevice events and acting on NETDEV_CHANGEUPPER. + +L2 Forwarding Offload +--------------------- + +The idea is to offload the L2 data forwarding (switching) path from the kernel +to the switchdev device by mirroring bridge FDB entries down to the device. An +FDB entry is the {port, MAC, VLAN} tuple forwarding destination. + +To offloading L2 bridging, the switchdev driver/device should support: + + - Static FDB entries installed on a bridge port + - Notification of learned/forgotten src mac/vlans from device + - STP state changes on the port + - VLAN flooding of multicast/broadcast and unknown unicast packets + +Static FDB Entries +^^^^^^^^^^^^^^^^^^ + +The switchdev driver should implement ndo_fdb_add, ndo_fdb_del and ndo_fdb_dump +to support static FDB entries installed to the device. Static bridge FDB +entries are installed, for example, using iproute2 bridge cmd: + + bridge fdb add ADDR dev DEV [vlan VID] [self] + +XXX: what should be done if offloading this rule to hardware fails (for +example, due to full capacity in hardware tables) ? + +Note: by default, the bridge does not filter on VLAN and only bridges untagged +traffic. To enable VLAN support, turn on VLAN filtering: + + echo 1 >/sys/class/net/<bridge>/bridge/vlan_filtering + +Notification of Learned/Forgotten Source MAC/VLANs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The switch device will learn/forget source MAC address/VLAN on ingress packets +and notify the switch driver of the mac/vlan/port tuples. The switch driver, +in turn, will notify the bridge driver using the switchdev notifier call: + + err = call_switchdev_notifiers(val, dev, info); + +Where val is SWITCHDEV_FDB_ADD when learning and SWITCHDEV_FDB_DEL when forgetting, and +info points to a struct switchdev_notifier_fdb_info. On SWITCHDEV_FDB_ADD, the bridge +driver will install the FDB entry into the bridge's FDB and mark the entry as +NTF_EXT_LEARNED. The iproute2 bridge command will label these entries +"offload": + + $ bridge fdb + 52:54:00:12:35:01 dev sw1p1 master br0 permanent + 00:02:00:00:02:00 dev sw1p1 master br0 offload + 00:02:00:00:02:00 dev sw1p1 self + 52:54:00:12:35:02 dev sw1p2 master br0 permanent + 00:02:00:00:03:00 dev sw1p2 master br0 offload + 00:02:00:00:03:00 dev sw1p2 self + 33:33:00:00:00:01 dev eth0 self permanent + 01:00:5e:00:00:01 dev eth0 self permanent + 33:33:ff:00:00:00 dev eth0 self permanent + 01:80:c2:00:00:0e dev eth0 self permanent + 33:33:00:00:00:01 dev br0 self permanent + 01:00:5e:00:00:01 dev br0 self permanent + 33:33:ff:12:35:01 dev br0 self permanent + +Learning on the port should be disabled on the bridge using the bridge command: + + bridge link set dev DEV learning off + +Learning on the device port should be enabled, as well as learning_sync: + + bridge link set dev DEV learning on self + bridge link set dev DEV learning_sync on self + +Learning_sync attribute enables syncing of the learned/forgotton FDB entry to +the bridge's FDB. It's possible, but not optimal, to enable learning on the +device port and on the bridge port, and disable learning_sync. + +To support learning and learning_sync port attributes, the driver implements +switchdev op switchdev_port_attr_get/set for SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS. The driver +should initialize the attributes to the hardware defaults. + +FDB Ageing +^^^^^^^^^^ + +There are two FDB ageing models supported: 1) ageing by the device, and 2) +ageing by the kernel. Ageing by the device is preferred if many FDB entries +are supported. The driver calls call_switchdev_notifiers(SWITCHDEV_FDB_DEL, ...) to +age out the FDB entry. In this model, ageing by the kernel should be turned +off. XXX: how to turn off ageing in kernel on a per-port basis or otherwise +prevent the kernel from ageing out the FDB entry? + +In the kernel ageing model, the standard bridge ageing mechanism is used to age +out stale FDB entries. To keep an FDB entry "alive", the driver should refresh +the FDB entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...). The +notification will reset the FDB entry's last-used time to now. The driver +should rate limit refresh notifications, for example, no more than once a +second. If the FDB entry expires, ndo_fdb_del is called to remove entry from +the device. XXX: this last part isn't currently correct: ndo_fdb_del isn't +called, so the stale entry remains in device...this need to get fixed. + +FDB Flush +^^^^^^^^^ + +XXX: Unimplemented. Need to support FDB flush by bridge driver for port and +remove both static and learned FDB entries. + +STP State Change on Port +^^^^^^^^^^^^^^^^^^^^^^^^ + +Internally or with a third-party STP protocol implementation (e.g. mstpd), the +bridge driver maintains the STP state for ports, and will notify the switch +driver of STP state change on a port using the switchdev op switchdev_attr_port_set for +SWITCHDEV_ATTR_PORT_STP_UPDATE. + +State is one of BR_STATE_*. The switch driver can use STP state updates to +update ingress packet filter list for the port. For example, if port is +DISABLED, no packets should pass, but if port moves to BLOCKED, then STP BPDUs +and other IEEE 01:80:c2:xx:xx:xx link-local multicast packets can pass. + +Note that STP BDPUs are untagged and STP state applies to all VLANs on the port +so packet filters should be applied consistently across untagged and tagged +VLANs on the port. + +Flooding L2 domain +^^^^^^^^^^^^^^^^^^ + +For a given L2 VLAN domain, the switch device should flood multicast/broadcast +and unknown unicast packets to all ports in domain, if allowed by port's +current STP state. The switch driver, knowing which ports are within which +vlan L2 domain, can program the switch device for flooding. The packet should +also be sent to the port netdev for processing by the bridge driver. The +bridge should not reflood the packet to the same ports the device flooded. +XXX: the mechanism to avoid duplicate flood packets is being discuseed. + +It is possible for the switch device to not handle flooding and push the +packets up to the bridge driver for flooding. This is not ideal as the number +of ports scale in the L2 domain as the device is much more efficient at +flooding packets that software. + +IGMP Snooping +^^^^^^^^^^^^^ + +XXX: complete this section + + +L3 routing +---------- + +Offloading L3 routing requires that device be programmed with FIB entries from +the kernel, with the device doing the FIB lookup and forwarding. The device +does a longest prefix match (LPM) on FIB entries matching route prefix and +forwards the packet to the matching FIB entry's nexthop(s) egress ports. To +program the device, the switchdev driver is called with add/delete ops for IPv4 +and IPv6 FIB entries. For IPv4, the driver implements switchdev ops: + + int (*switchdev_fib_ipv4_add)(struct net_device *dev, + __be32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 nlflags, u32 tb_id); + + int (*switchdev_fib_ipv4_del)(struct net_device *dev, + __be32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 tb_id); + +to add/delete IPv4 dst/dest_len prefix on table tb_id. The *fi structure holds +details on the route and route's nexthops. *dev is one of the port netdevs +mentioned in the routes next hop list. If the output port netdevs referenced +in the route's nexthop list don't all have the same switch ID, the driver is +not called to add/delete the FIB entry. + +Routes offloaded to the device are labeled with "offload" in the ip route +listing: + + $ ip route show + default via 192.168.0.2 dev eth0 + 11.0.0.0/30 dev sw1p1 proto kernel scope link src 11.0.0.2 offload + 11.0.0.4/30 via 11.0.0.1 dev sw1p1 proto zebra metric 20 offload + 11.0.0.8/30 dev sw1p2 proto kernel scope link src 11.0.0.10 offload + 11.0.0.12/30 via 11.0.0.9 dev sw1p2 proto zebra metric 20 offload + 12.0.0.2 proto zebra metric 30 offload + nexthop via 11.0.0.1 dev sw1p1 weight 1 + nexthop via 11.0.0.9 dev sw1p2 weight 1 + 12.0.0.3 via 11.0.0.1 dev sw1p1 proto zebra metric 20 offload + 12.0.0.4 via 11.0.0.9 dev sw1p2 proto zebra metric 20 offload + 192.168.0.0/24 dev eth0 proto kernel scope link src 192.168.0.15 + +XXX: add/del IPv6 FIB API + +Nexthop Resolution +^^^^^^^^^^^^^^^^^^ + +The FIB entry's nexthop list contains the nexthop tuple (gateway, dev), but for +the switch device to forward the packet with the correct dst mac address, the +nexthop gateways must be resolved to the neighbor's mac address. Neighbor mac +address discovery comes via the ARP (or ND) process and is available via the +arp_tbl neighbor table. To resolve the routes nexthop gateways, the driver +should trigger the kernel's neighbor resolution process. See the rocker +driver's rocker_port_ipv4_resolve() for an example. + +The driver can monitor for updates to arp_tbl using the netevent notifier +NETEVENT_NEIGH_UPDATE. The device can be programmed with resolved nexthops +for the routes as arp_tbl updates. diff --git a/Documentation/networking/tc-actions-env-rules.txt b/Documentation/networking/tc-actions-env-rules.txt index 70d6cf608251..f37814693ad3 100644 --- a/Documentation/networking/tc-actions-env-rules.txt +++ b/Documentation/networking/tc-actions-env-rules.txt @@ -8,14 +8,8 @@ For example if your action queues a packet to be processed later, or intentionally branches by redirecting a packet, then you need to clone the packet. -There are certain fields in the skb tc_verd that need to be reset so we -avoid loops, etc. A few are generic enough that skb_act_clone() -resets them for you, so invoke skb_act_clone() rather than skb_clone(). - 2) If you munge any packet thou shalt call pskb_expand_head in the case someone else is referencing the skb. After that you "own" the skb. -You must also tell us if it is ok to munge the packet (TC_OK2MUNGE), -this way any action downstream can stomp on the packet. 3) Dropping packets you don't own is a no-no. You simply return TC_ACT_SHOT to the caller and they will drop it. diff --git a/Documentation/s390/qeth.txt b/Documentation/s390/qeth.txt index 74122ada9949..aa06fcf5f8c2 100644 --- a/Documentation/s390/qeth.txt +++ b/Documentation/s390/qeth.txt @@ -1,6 +1,6 @@ IBM s390 QDIO Ethernet Driver -HiperSockets Bridge Port Support +OSA and HiperSockets Bridge Port Support Uevents @@ -8,7 +8,7 @@ To generate the events the device must be assigned a role of either a primary or a secondary Bridge Port. For more information, see "z/VM Connectivity, SC24-6174". -When run on HiperSockets Bridge Capable Port hardware, and the state +When run on an OSA or HiperSockets Bridge Capable Port hardware, and the state of some configured Bridge Port device on the channel changes, a udev event with ACTION=CHANGE is emitted on behalf of the corresponding ccwgroup device. The event has the following attributes: diff --git a/MAINTAINERS b/MAINTAINERS index 60d24f224271..df106f87a3ba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -652,7 +652,6 @@ M: Tom Lendacky <thomas.lendacky@amd.com> L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/amd/xgbe/ -F: drivers/net/phy/amd-xgbe-phy.c AMS (Apple Motion Sensor) DRIVER M: Michael Hanselmann <linux-kernel@hansmi.ch> diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e0e23582c8b4..4550d247e308 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -873,6 +873,16 @@ b_epilogue: off = offsetof(struct sk_buff, queue_mapping); emit(ARM_LDRH_I(r_A, r_skb, off), ctx); break; + case BPF_LDX | BPF_W | BPF_ABS: + /* + * load a 32bit word from struct seccomp_data. + * seccomp_check_filter() will already have checked + * that k is 32bit aligned and lies within the + * struct seccomp_data. + */ + ctx->seen |= SEEN_SKB; + emit(ARM_LDR_I(r_A, r_skb, k), ctx); + break; default: return -1; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 99f76103c6b7..2ca777635d8e 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -12,6 +12,7 @@ #include <linux/filter.h> #include <linux/if_vlan.h> #include <asm/cacheflush.h> +#include <linux/bpf.h> int bpf_jit_enable __read_mostly; @@ -37,7 +38,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) return ptr + len; } -#define EMIT(bytes, len) do { prog = emit_code(prog, bytes, len); } while (0) +#define EMIT(bytes, len) \ + do { prog = emit_code(prog, bytes, len); cnt += len; } while (0) #define EMIT1(b1) EMIT(b1, 1) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) @@ -186,31 +188,31 @@ struct jit_context { #define BPF_MAX_INSN_SIZE 128 #define BPF_INSN_SAFETY 64 -static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, - int oldproglen, struct jit_context *ctx) +#define STACKSIZE \ + (MAX_BPF_STACK + \ + 32 /* space for rbx, r13, r14, r15 */ + \ + 8 /* space for skb_copy_bits() buffer */) + +#define PROLOGUE_SIZE 51 + +/* emit x64 prologue code for BPF program and check it's size. + * bpf_tail_call helper will skip it while jumping into another program + */ +static void emit_prologue(u8 **pprog) { - struct bpf_insn *insn = bpf_prog->insnsi; - int insn_cnt = bpf_prog->len; - bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0); - bool seen_exit = false; - u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; - int i; - int proglen = 0; - u8 *prog = temp; - int stacksize = MAX_BPF_STACK + - 32 /* space for rbx, r13, r14, r15 */ + - 8 /* space for skb_copy_bits() buffer */; + u8 *prog = *pprog; + int cnt = 0; EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ - /* sub rsp, stacksize */ - EMIT3_off32(0x48, 0x81, 0xEC, stacksize); + /* sub rsp, STACKSIZE */ + EMIT3_off32(0x48, 0x81, 0xEC, STACKSIZE); /* all classic BPF filters use R6(rbx) save it */ /* mov qword ptr [rbp-X],rbx */ - EMIT3_off32(0x48, 0x89, 0x9D, -stacksize); + EMIT3_off32(0x48, 0x89, 0x9D, -STACKSIZE); /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8 * as temporary, so all tcpdump filters need to spill/fill R7(r13) and @@ -221,16 +223,112 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, */ /* mov qword ptr [rbp-X],r13 */ - EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8); + EMIT3_off32(0x4C, 0x89, 0xAD, -STACKSIZE + 8); /* mov qword ptr [rbp-X],r14 */ - EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16); + EMIT3_off32(0x4C, 0x89, 0xB5, -STACKSIZE + 16); /* mov qword ptr [rbp-X],r15 */ - EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24); + EMIT3_off32(0x4C, 0x89, 0xBD, -STACKSIZE + 24); /* clear A and X registers */ EMIT2(0x31, 0xc0); /* xor eax, eax */ EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */ + /* clear tail_cnt: mov qword ptr [rbp-X], rax */ + EMIT3_off32(0x48, 0x89, 0x85, -STACKSIZE + 32); + + BUILD_BUG_ON(cnt != PROLOGUE_SIZE); + *pprog = prog; +} + +/* generate the following code: + * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... + * if (index >= array->map.max_entries) + * goto out; + * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + * prog = array->prog[index]; + * if (prog == NULL) + * goto out; + * goto *(prog->bpf_func + prologue_size); + * out: + */ +static void emit_bpf_tail_call(u8 **pprog) +{ + u8 *prog = *pprog; + int label1, label2, label3; + int cnt = 0; + + /* rdi - pointer to ctx + * rsi - pointer to bpf_array + * rdx - index in bpf_array + */ + + /* if (index >= array->map.max_entries) + * goto out; + */ + EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ + offsetof(struct bpf_array, map.max_entries)); + EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ +#define OFFSET1 44 /* number of bytes to jump */ + EMIT2(X86_JBE, OFFSET1); /* jbe out */ + label1 = cnt; + + /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + */ + EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ + EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ +#define OFFSET2 33 + EMIT2(X86_JA, OFFSET2); /* ja out */ + label2 = cnt; + EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ + EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ + + /* prog = array->prog[index]; */ + EMIT4(0x48, 0x8D, 0x44, 0xD6); /* lea rax, [rsi + rdx * 8 + 0x50] */ + EMIT1(offsetof(struct bpf_array, prog)); + EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ + + /* if (prog == NULL) + * goto out; + */ + EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */ +#define OFFSET3 10 + EMIT2(X86_JE, OFFSET3); /* je out */ + label3 = cnt; + + /* goto *(prog->bpf_func + prologue_size); */ + EMIT4(0x48, 0x8B, 0x40, /* mov rax, qword ptr [rax + 32] */ + offsetof(struct bpf_prog, bpf_func)); + EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ + + /* now we're ready to jump into next BPF program + * rdi == ctx (1st arg) + * rax == prog->bpf_func + prologue_size + */ + EMIT2(0xFF, 0xE0); /* jmp rax */ + + /* out: */ + BUILD_BUG_ON(cnt - label1 != OFFSET1); + BUILD_BUG_ON(cnt - label2 != OFFSET2); + BUILD_BUG_ON(cnt - label3 != OFFSET3); + *pprog = prog; +} + +static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, + int oldproglen, struct jit_context *ctx) +{ + struct bpf_insn *insn = bpf_prog->insnsi; + int insn_cnt = bpf_prog->len; + bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0); + bool seen_exit = false; + u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; + int i, cnt = 0; + int proglen = 0; + u8 *prog = temp; + + emit_prologue(&prog); + if (seen_ld_abs) { /* r9d : skb->len - skb->data_len (headlen) * r10 : skb->data @@ -739,6 +837,10 @@ xadd: if (is_imm8(insn->off)) } break; + case BPF_JMP | BPF_CALL | BPF_X: + emit_bpf_tail_call(&prog); + break; + /* cond jump */ case BPF_JMP | BPF_JEQ | BPF_X: case BPF_JMP | BPF_JNE | BPF_X: @@ -891,13 +993,13 @@ common_load: /* update cleanup_addr */ ctx->cleanup_addr = proglen; /* mov rbx, qword ptr [rbp-X] */ - EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize); + EMIT3_off32(0x48, 0x8B, 0x9D, -STACKSIZE); /* mov r13, qword ptr [rbp-X] */ - EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8); + EMIT3_off32(0x4C, 0x8B, 0xAD, -STACKSIZE + 8); /* mov r14, qword ptr [rbp-X] */ - EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16); + EMIT3_off32(0x4C, 0x8B, 0xB5, -STACKSIZE + 16); /* mov r15, qword ptr [rbp-X] */ - EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24); + EMIT3_off32(0x4C, 0x8B, 0xBD, -STACKSIZE + 24); EMIT1(0xC9); /* leave */ EMIT1(0xC3); /* ret */ diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f22cc56fd1b3..5ad0d5354535 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -244,7 +244,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) if (!type) goto unlock; - sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto); + sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0); err = -ENOMEM; if (!sk2) goto unlock; @@ -324,7 +324,7 @@ static int alg_create(struct net *net, struct socket *sock, int protocol, return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto); + sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto, kern); if (!sk) goto out; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cee20354ac37..c097909c589c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -598,7 +598,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection) memcpy(&peer_in6, &connection->peer_addr, peer_addr_len); what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family, + err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (err < 0) { sock = NULL; @@ -693,7 +693,7 @@ static int prepare_listen_socket(struct drbd_connection *connection, struct acce memcpy(&my_addr, &connection->my_addr, my_addr_len); what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family, + err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); if (err) { s_listen = NULL; diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 8dc7290089bb..0d29b5a6356d 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -601,14 +601,14 @@ static const struct proto_ops data_sock_ops = { }; static int -data_sock_create(struct net *net, struct socket *sock, int protocol) +data_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; if (sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto); + sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern); if (!sk) return -ENOMEM; @@ -756,14 +756,14 @@ static const struct proto_ops base_sock_ops = { static int -base_sock_create(struct net *net, struct socket *sock, int protocol) +base_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto); + sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern); if (!sk) return -ENOMEM; @@ -785,7 +785,7 @@ mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern) switch (proto) { case ISDN_P_BASE: - err = base_sock_create(net, sock, proto); + err = base_sock_create(net, sock, proto, kern); break; case ISDN_P_TE_S0: case ISDN_P_NT_S0: @@ -799,7 +799,7 @@ mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern) case ISDN_P_B_L2DTMF: case ISDN_P_B_L2DSP: case ISDN_P_B_L2DSPHDLC: - err = data_sock_create(net, sock, proto); + err = data_sock_create(net, sock, proto, kern); break; default: return err; diff --git a/drivers/mfd/janz-cmodio.c b/drivers/mfd/janz-cmodio.c index 433f823037dd..ec1f46a6be3a 100644 --- a/drivers/mfd/janz-cmodio.c +++ b/drivers/mfd/janz-cmodio.c @@ -267,6 +267,10 @@ static void cmodio_pci_remove(struct pci_dev *dev) static const struct pci_device_id cmodio_pci_ids[] = { { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_JANZ, 0x0101 }, { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, PCI_VENDOR_ID_JANZ, 0x0100 }, + { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_JANZ, 0x0201 }, + { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_JANZ, 0x0202 }, + { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, PCI_VENDOR_ID_JANZ, 0x0201 }, + { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, PCI_VENDOR_ID_JANZ, 0x0202 }, { 0, } }; MODULE_DEVICE_TABLE(pci, cmodio_pci_ids); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index df51d6025a90..019fceffc9e5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -179,6 +179,20 @@ config VXLAN To compile this driver as a module, choose M here: the module will be called vxlan. +config GENEVE + tristate "Generic Network Virtualization Encapsulation netdev" + depends on INET && GENEVE_CORE + select NET_IP_TUNNEL + ---help--- + This allows one to create geneve virtual interfaces that provide + Layer 2 Networks over Layer 3 Networks. GENEVE is often used + to tunnel virtual network infrastructure in virtualized environments. + For more information see: + http://tools.ietf.org/html/draft-gross-geneve-02 + + To compile this driver as a module, choose M here: the module + will be called geneve. + config NETCONSOLE tristate "Network console logging support" ---help--- diff --git a/drivers/net/Makefile b/drivers/net/Makefile index e25fdd7d905e..c12cb22478a7 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_TUN) += tun.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o +obj-$(CONFIG_GENEVE) += geneve.o obj-$(CONFIG_NLMON) += nlmon.o # diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index fbd54f0e32e8..7fde4d5c2b28 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -75,10 +75,10 @@ /* Port Key definitions * key is determined according to the link speed, duplex and * user key (which is yet not supported) - * -------------------------------------------------------------- - * Port key : | User key | Speed | Duplex | - * -------------------------------------------------------------- - * 16 6 1 0 + * -------------------------------------------------------------- + * Port key | User key (10 bits) | Speed (5 bits) | Duplex| + * -------------------------------------------------------------- + * |15 6|5 1|0 */ #define AD_DUPLEX_KEY_MASKS 0x1 #define AD_SPEED_KEY_MASKS 0x3E @@ -1908,8 +1908,14 @@ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) BOND_AD_INFO(bond).aggregator_identifier = 0; - BOND_AD_INFO(bond).system.sys_priority = 0xFFFF; - BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); + BOND_AD_INFO(bond).system.sys_priority = + bond->params.ad_actor_sys_prio; + if (is_zero_ether_addr(bond->params.ad_actor_system)) + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->dev->dev_addr); + else + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->params.ad_actor_system); /* initialize how many times this module is called in one * second (should be about every 100ms) @@ -1945,10 +1951,10 @@ void bond_3ad_bind_slave(struct slave *slave) port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; - /* key is determined according to the link speed, duplex and user key(which - * is yet not supported) + /* key is determined according to the link speed, duplex and + * user key */ - port->actor_admin_port_key = 0; + port->actor_admin_port_key = bond->params.ad_user_port_key << 6; port->actor_admin_port_key |= __get_duplex(port); port->actor_admin_port_key |= (__get_link_speed(port) << 1); port->actor_oper_port_key = port->actor_admin_port_key; @@ -1959,6 +1965,8 @@ void bond_3ad_bind_slave(struct slave *slave) port->sm_vars &= ~AD_PORT_LACP_ENABLED; /* actor system is the bond's system */ port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; + port->actor_system_priority = + BOND_AD_INFO(bond).system.sys_priority; /* tx timer(to verify that no more than MAX_TX_IN_SECOND * lacpdu's are sent in one second) */ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d5fe5d5f490f..2268438f3f63 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -76,7 +76,7 @@ #include <net/netns/generic.h> #include <net/pkt_sched.h> #include <linux/rculist.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> #include <net/switchdev.h> #include <net/bonding.h> #include <net/bond_3ad.h> @@ -1015,10 +1015,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, netdev_features_t mask; struct slave *slave; - /* If any slave has the offload feature flag set, - * set the offload flag on the bond. - */ - mask = features | NETIF_F_HW_SWITCH_OFFLOAD; + mask = features; features &= ~NETIF_F_ONE_FOR_ALL; features |= NETIF_F_ALL_FOR_ALL; @@ -3054,16 +3051,16 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, int noff, proto = -1; if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) - return skb_flow_dissect(skb, fk); + return skb_flow_dissect_flow_keys(skb, fk); - fk->ports = 0; + fk->ports.ports = 0; noff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP)) { if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) return false; iph = ip_hdr(skb); - fk->src = iph->saddr; - fk->dst = iph->daddr; + fk->addrs.src = iph->saddr; + fk->addrs.dst = iph->daddr; noff += iph->ihl << 2; if (!ip_is_fragment(iph)) proto = iph->protocol; @@ -3071,15 +3068,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) return false; iph6 = ipv6_hdr(skb); - fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); - fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); + fk->addrs.src = (__force __be32)ipv6_addr_hash(&iph6->saddr); + fk->addrs.dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); noff += sizeof(*iph6); proto = iph6->nexthdr; } else { return false; } if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) - fk->ports = skb_flow_get_ports(skb, noff, proto); + fk->ports.ports = skb_flow_get_ports(skb, noff, proto); return true; } @@ -3105,8 +3102,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) hash = bond_eth_hash(skb); else - hash = (__force u32)flow.ports; - hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; + hash = (__force u32)flow.ports.ports; + hash ^= (__force u32)flow.addrs.dst ^ (__force u32)flow.addrs.src; hash ^= (hash >> 16); hash ^= (hash >> 8); @@ -4039,8 +4036,12 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_add_slave = bond_enslave, .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, - .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, - .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, .ndo_features_check = passthru_features_check, }; @@ -4140,6 +4141,8 @@ static int bond_check_params(struct bond_params *params) struct bond_opt_value newval; const struct bond_opt_value *valptr; int arp_all_targets_value; + u16 ad_actor_sys_prio = 0; + u16 ad_user_port_key = 0; /* Convert string parameters. */ if (mode) { @@ -4434,6 +4437,24 @@ static int bond_check_params(struct bond_params *params) fail_over_mac_value = BOND_FOM_NONE; } + bond_opt_initstr(&newval, "default"); + valptr = bond_opt_parse( + bond_opt_get(BOND_OPT_AD_ACTOR_SYS_PRIO), + &newval); + if (!valptr) { + pr_err("Error: No ad_actor_sys_prio default value"); + return -EINVAL; + } + ad_actor_sys_prio = valptr->value; + + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_USER_PORT_KEY), + &newval); + if (!valptr) { + pr_err("Error: No ad_user_port_key default value"); + return -EINVAL; + } + ad_user_port_key = valptr->value; + if (lp_interval == 0) { pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); @@ -4462,6 +4483,9 @@ static int bond_check_params(struct bond_params *params) params->lp_interval = lp_interval; params->packets_per_slave = packets_per_slave; params->tlb_dynamic_lb = 1; /* Default value */ + params->ad_actor_sys_prio = ad_actor_sys_prio; + eth_zero_addr(params->ad_actor_system); + params->ad_user_port_key = ad_user_port_key; if (packets_per_slave > 0) { params->reciprocal_packets_per_slave = reciprocal_value(packets_per_slave); diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 7b1124366011..f7015eb4f8db 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -94,6 +94,10 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 }, [IFLA_BOND_AD_SELECT] = { .type = NLA_U8 }, [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, + [IFLA_BOND_AD_ACTOR_SYS_PRIO] = { .type = NLA_U16 }, + [IFLA_BOND_AD_USER_PORT_KEY] = { .type = NLA_U16 }, + [IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NLA_BINARY, + .len = ETH_ALEN }, }; static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { @@ -379,6 +383,36 @@ static int bond_changelink(struct net_device *bond_dev, if (err) return err; } + if (data[IFLA_BOND_AD_ACTOR_SYS_PRIO]) { + int actor_sys_prio = + nla_get_u16(data[IFLA_BOND_AD_ACTOR_SYS_PRIO]); + + bond_opt_initval(&newval, actor_sys_prio); + err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYS_PRIO, &newval); + if (err) + return err; + } + + if (data[IFLA_BOND_AD_USER_PORT_KEY]) { + int port_key = + nla_get_u16(data[IFLA_BOND_AD_USER_PORT_KEY]); + + bond_opt_initval(&newval, port_key); + err = __bond_opt_set(bond, BOND_OPT_AD_USER_PORT_KEY, &newval); + if (err) + return err; + } + + if (data[IFLA_BOND_AD_ACTOR_SYSTEM]) { + if (nla_len(data[IFLA_BOND_AD_ACTOR_SYSTEM]) != ETH_ALEN) + return -EINVAL; + + bond_opt_initval(&newval, + nla_get_be64(data[IFLA_BOND_AD_ACTOR_SYSTEM])); + err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval); + if (err) + return err; + } return 0; } @@ -426,6 +460,9 @@ static size_t bond_get_size(const struct net_device *bond_dev) nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_ACTOR_KEY */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_PARTNER_KEY*/ nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_INFO_PARTNER_MAC*/ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_ACTOR_SYS_PRIO */ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_USER_PORT_KEY */ + nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */ 0; } @@ -551,6 +588,19 @@ static int bond_fill_info(struct sk_buff *skb, if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info info; + if (nla_put_u16(skb, IFLA_BOND_AD_ACTOR_SYS_PRIO, + bond->params.ad_actor_sys_prio)) + goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_BOND_AD_USER_PORT_KEY, + bond->params.ad_user_port_key)) + goto nla_put_failure; + + if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM, + sizeof(bond->params.ad_actor_system), + &bond->params.ad_actor_system)) + goto nla_put_failure; + if (!bond_3ad_get_active_agg_info(bond, &info)) { struct nlattr *nest; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index e8d3c1d35453..e9c624d54dd4 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -70,6 +70,12 @@ static int bond_option_slaves_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, + const struct bond_opt_value *newval); +static int bond_option_ad_actor_system_set(struct bonding *bond, + const struct bond_opt_value *newval); +static int bond_option_ad_user_port_key_set(struct bonding *bond, + const struct bond_opt_value *newval); static const struct bond_opt_value bond_mode_tbl[] = { @@ -186,6 +192,18 @@ static const struct bond_opt_value bond_tlb_dynamic_lb_tbl[] = { { NULL, -1, 0} }; +static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = { + { "minval", 1, BOND_VALFLAG_MIN}, + { "maxval", 65535, BOND_VALFLAG_MAX | BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0}, +}; + +static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { + { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, + { "maxval", 1023, BOND_VALFLAG_MAX}, + { NULL, -1, 0}, +}; + static const struct bond_option bond_opts[BOND_OPT_LAST] = { [BOND_OPT_MODE] = { .id = BOND_OPT_MODE, @@ -379,6 +397,29 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_tlb_dynamic_lb_tbl, .flags = BOND_OPTFLAG_IFDOWN, .set = bond_option_tlb_dynamic_lb_set, + }, + [BOND_OPT_AD_ACTOR_SYS_PRIO] = { + .id = BOND_OPT_AD_ACTOR_SYS_PRIO, + .name = "ad_actor_sys_prio", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .flags = BOND_OPTFLAG_IFDOWN, + .values = bond_ad_actor_sys_prio_tbl, + .set = bond_option_ad_actor_sys_prio_set, + }, + [BOND_OPT_AD_ACTOR_SYSTEM] = { + .id = BOND_OPT_AD_ACTOR_SYSTEM, + .name = "ad_actor_system", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .flags = BOND_OPTFLAG_RAWVAL | BOND_OPTFLAG_IFDOWN, + .set = bond_option_ad_actor_system_set, + }, + [BOND_OPT_AD_USER_PORT_KEY] = { + .id = BOND_OPT_AD_USER_PORT_KEY, + .name = "ad_user_port_key", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .flags = BOND_OPTFLAG_IFDOWN, + .values = bond_ad_user_port_key_tbl, + .set = bond_option_ad_user_port_key_set, } }; @@ -1349,3 +1390,53 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, return 0; } + +static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_info(bond->dev, "Setting ad_actor_sys_prio to %llu\n", + newval->value); + + bond->params.ad_actor_sys_prio = newval->value; + return 0; +} + +static int bond_option_ad_actor_system_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + u8 macaddr[ETH_ALEN]; + u8 *mac; + int i; + + if (newval->string) { + i = sscanf(newval->string, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &macaddr[0], &macaddr[1], &macaddr[2], + &macaddr[3], &macaddr[4], &macaddr[5]); + if (i != ETH_ALEN) + goto err; + mac = macaddr; + } else { + mac = (u8 *)&newval->value; + } + + if (!is_valid_ether_addr(mac)) + goto err; + + netdev_info(bond->dev, "Setting ad_actor_system to %pM\n", mac); + ether_addr_copy(bond->params.ad_actor_system, mac); + return 0; + +err: + netdev_err(bond->dev, "Invalid MAC address.\n"); + return -EINVAL; +} + +static int bond_option_ad_user_port_key_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_info(bond->dev, "Setting ad_user_port_key to %llu\n", + newval->value); + + bond->params.ad_user_port_key = newval->value; + return 0; +} diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index b20b35acb47d..e7f3047a26df 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -135,6 +135,10 @@ static void bond_info_show_master(struct seq_file *seq) bond->params.ad_select); seq_printf(seq, "Aggregator selection policy (ad_select): %s\n", optval->string); + seq_printf(seq, "System priority: %d\n", + BOND_AD_INFO(bond).system.sys_priority); + seq_printf(seq, "System MAC address: %pM\n", + &BOND_AD_INFO(bond).system.sys_mac_addr); if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { seq_printf(seq, "bond %s has no active aggregator\n", @@ -198,6 +202,8 @@ static void bond_info_show_slave(struct seq_file *seq, seq_puts(seq, "details actor lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->actor_system_priority); + seq_printf(seq, " system mac address: %pM\n", + &port->actor_system); seq_printf(seq, " port key: %d\n", port->actor_oper_port_key); seq_printf(seq, " port priority: %d\n", @@ -210,6 +216,8 @@ static void bond_info_show_slave(struct seq_file *seq, seq_puts(seq, "details partner lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->partner_oper.system_priority); + seq_printf(seq, " system mac address: %pM\n", + &port->partner_oper.system); seq_printf(seq, " oper key: %d\n", port->partner_oper.key); seq_printf(seq, " port priority: %d\n", diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 7e9e151d4d61..143a2abd1c1c 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -692,6 +692,49 @@ static ssize_t bonding_show_packets_per_slave(struct device *d, static DEVICE_ATTR(packets_per_slave, S_IRUGO | S_IWUSR, bonding_show_packets_per_slave, bonding_sysfs_store_option); +static ssize_t bonding_show_ad_actor_sys_prio(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + if (BOND_MODE(bond) == BOND_MODE_8023AD) + return sprintf(buf, "%hu\n", bond->params.ad_actor_sys_prio); + + return 0; +} +static DEVICE_ATTR(ad_actor_sys_prio, S_IRUGO | S_IWUSR, + bonding_show_ad_actor_sys_prio, bonding_sysfs_store_option); + +static ssize_t bonding_show_ad_actor_system(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + if (BOND_MODE(bond) == BOND_MODE_8023AD) + return sprintf(buf, "%pM\n", bond->params.ad_actor_system); + + return 0; +} + +static DEVICE_ATTR(ad_actor_system, S_IRUGO | S_IWUSR, + bonding_show_ad_actor_system, bonding_sysfs_store_option); + +static ssize_t bonding_show_ad_user_port_key(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + if (BOND_MODE(bond) == BOND_MODE_8023AD) + return sprintf(buf, "%hu\n", bond->params.ad_user_port_key); + + return 0; +} +static DEVICE_ATTR(ad_user_port_key, S_IRUGO | S_IWUSR, + bonding_show_ad_user_port_key, bonding_sysfs_store_option); + static struct attribute *per_bond_attrs[] = { &dev_attr_slaves.attr, &dev_attr_mode.attr, @@ -725,6 +768,9 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_lp_interval.attr, &dev_attr_packets_per_slave.attr, &dev_attr_tlb_dynamic_lb.attr, + &dev_attr_ad_actor_sys_prio.attr, + &dev_attr_ad_actor_system.attr, + &dev_attr_ad_user_port_key.attr, NULL, }; diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index ad0a7e8c2c2b..6201c5a1a884 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -93,13 +93,13 @@ (FLEXCAN_CTRL_ERR_BUS | FLEXCAN_CTRL_ERR_STATE) /* FLEXCAN control register 2 (CTRL2) bits */ -#define FLEXCAN_CRL2_ECRWRE BIT(29) -#define FLEXCAN_CRL2_WRMFRZ BIT(28) -#define FLEXCAN_CRL2_RFFN(x) (((x) & 0x0f) << 24) -#define FLEXCAN_CRL2_TASD(x) (((x) & 0x1f) << 19) -#define FLEXCAN_CRL2_MRP BIT(18) -#define FLEXCAN_CRL2_RRS BIT(17) -#define FLEXCAN_CRL2_EACEN BIT(16) +#define FLEXCAN_CTRL2_ECRWRE BIT(29) +#define FLEXCAN_CTRL2_WRMFRZ BIT(28) +#define FLEXCAN_CTRL2_RFFN(x) (((x) & 0x0f) << 24) +#define FLEXCAN_CTRL2_TASD(x) (((x) & 0x1f) << 19) +#define FLEXCAN_CTRL2_MRP BIT(18) +#define FLEXCAN_CTRL2_RRS BIT(17) +#define FLEXCAN_CTRL2_EACEN BIT(16) /* FLEXCAN memory error control register (MECR) bits */ #define FLEXCAN_MECR_ECRWRDIS BIT(31) @@ -158,7 +158,6 @@ FLEXCAN_IFLAG_BUF(FLEXCAN_TX_BUF_ID)) /* FLEXCAN message buffers */ -#define FLEXCAN_MB_CNT_CODE(x) (((x) & 0xf) << 24) #define FLEXCAN_MB_CODE_RX_INACTIVE (0x0 << 24) #define FLEXCAN_MB_CODE_RX_EMPTY (0x4 << 24) #define FLEXCAN_MB_CODE_RX_FULL (0x2 << 24) @@ -184,14 +183,14 @@ * FLEXCAN hardware feature flags * * Below is some version info we got: - * SOC Version IP-Version Glitch- [TR]WRN_INT Memory err - * Filter? connected? detection - * MX25 FlexCAN2 03.00.00.00 no no no - * MX28 FlexCAN2 03.00.04.00 yes yes no - * MX35 FlexCAN2 03.00.00.00 no no no - * MX53 FlexCAN2 03.00.00.00 yes no no - * MX6s FlexCAN3 10.00.12.00 yes yes no - * VF610 FlexCAN3 ? no yes yes + * SOC Version IP-Version Glitch- [TR]WRN_INT Memory err RTR re- + * Filter? connected? detection ception in MB + * MX25 FlexCAN2 03.00.00.00 no no no no + * MX28 FlexCAN2 03.00.04.00 yes yes no no + * MX35 FlexCAN2 03.00.00.00 no no no no + * MX53 FlexCAN2 03.00.00.00 yes no no no + * MX6s FlexCAN3 10.00.12.00 yes yes no yes + * VF610 FlexCAN3 ? no yes yes yes? * * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected. */ @@ -221,7 +220,7 @@ struct flexcan_regs { u32 imask1; /* 0x28 */ u32 iflag2; /* 0x2c */ u32 iflag1; /* 0x30 */ - u32 crl2; /* 0x34 */ + u32 ctrl2; /* 0x34 */ u32 esr2; /* 0x38 */ u32 imeur; /* 0x3c */ u32 lrfr; /* 0x40 */ @@ -230,6 +229,16 @@ struct flexcan_regs { u32 rxfir; /* 0x4c */ u32 _reserved3[12]; /* 0x50 */ struct flexcan_mb cantxfg[64]; /* 0x80 */ + /* FIFO-mode: + * MB + * 0x080...0x08f 0 RX message buffer + * 0x090...0x0df 1-5 reserverd + * 0x0e0...0x0ff 6-7 8 entry ID table + * (mx25, mx28, mx35, mx53) + * 0x0e0...0x2df 6-7..37 8..128 entry ID table + * size conf'ed via ctrl2::RFFN + * (mx6, vf610) + */ u32 _reserved4[408]; u32 mecr; /* 0xae0 */ u32 erriar; /* 0xae4 */ @@ -468,7 +477,7 @@ static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev) struct flexcan_regs __iomem *regs = priv->base; struct can_frame *cf = (struct can_frame *)skb->data; u32 can_id; - u32 ctrl = FLEXCAN_MB_CNT_CODE(0xc) | (cf->can_dlc << 16); + u32 ctrl = FLEXCAN_MB_CODE_TX_DATA | (cf->can_dlc << 16); if (can_dropped_invalid_skb(dev, skb)) return NETDEV_TX_OK; @@ -815,7 +824,7 @@ static int flexcan_chip_start(struct net_device *dev) { struct flexcan_priv *priv = netdev_priv(dev); struct flexcan_regs __iomem *regs = priv->base; - u32 reg_mcr, reg_ctrl, reg_crl2, reg_mecr; + u32 reg_mcr, reg_ctrl, reg_ctrl2, reg_mecr; int err, i; /* enable module */ @@ -918,9 +927,9 @@ static int flexcan_chip_start(struct net_device *dev) * and Correction of Memory Errors" to write to * MECR register */ - reg_crl2 = flexcan_read(®s->crl2); - reg_crl2 |= FLEXCAN_CRL2_ECRWRE; - flexcan_write(reg_crl2, ®s->crl2); + reg_ctrl2 = flexcan_read(®s->ctrl2); + reg_ctrl2 |= FLEXCAN_CTRL2_ECRWRE; + flexcan_write(reg_ctrl2, ®s->ctrl2); reg_mecr = flexcan_read(®s->mecr); reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS; diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c index 4dd183a3643a..c1e85368a198 100644 --- a/drivers/net/can/janz-ican3.c +++ b/drivers/net/can/janz-ican3.c @@ -40,6 +40,7 @@ #define MSYNC_PEER 0x00 /* ICAN only */ #define MSYNC_LOCL 0x01 /* host only */ #define TARGET_RUNNING 0x02 +#define FIRMWARE_STAMP 0x60 /* big endian firmware stamp */ #define MSYNC_RB0 0x01 #define MSYNC_RB1 0x02 @@ -83,6 +84,7 @@ #define MSG_COFFREQ 0x42 #define MSG_CONREQ 0x43 #define MSG_CCONFREQ 0x47 +#define MSG_LMTS 0xb4 /* * Janz ICAN3 CAN Inquiry Message Types @@ -165,6 +167,12 @@ /* SJA1000 Clock Input */ #define ICAN3_CAN_CLOCK 8000000 +/* Janz ICAN3 firmware types */ +enum ican3_fwtype { + ICAN3_FWTYPE_ICANOS, + ICAN3_FWTYPE_CAL_CANOPEN, +}; + /* Driver Name */ #define DRV_NAME "janz-ican3" @@ -215,6 +223,10 @@ struct ican3_dev { struct completion buserror_comp; struct can_berr_counter bec; + /* firmware type */ + enum ican3_fwtype fwtype; + char fwinfo[32]; + /* old and new style host interface */ unsigned int iftype; @@ -750,13 +762,61 @@ static int ican3_set_id_filter(struct ican3_dev *mod, bool accept) */ static int ican3_set_bus_state(struct ican3_dev *mod, bool on) { + struct can_bittiming *bt = &mod->can.bittiming; struct ican3_msg msg; + u8 btr0, btr1; + int res; - memset(&msg, 0, sizeof(msg)); - msg.spec = on ? MSG_CONREQ : MSG_COFFREQ; - msg.len = cpu_to_le16(0); + /* This algorithm was stolen from drivers/net/can/sja1000/sja1000.c */ + /* The bittiming register command for the ICAN3 just sets the bit timing */ + /* registers on the SJA1000 chip directly */ + btr0 = ((bt->brp - 1) & 0x3f) | (((bt->sjw - 1) & 0x3) << 6); + btr1 = ((bt->prop_seg + bt->phase_seg1 - 1) & 0xf) | + (((bt->phase_seg2 - 1) & 0x7) << 4); + if (mod->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) + btr1 |= 0x80; - return ican3_send_msg(mod, &msg); + if (mod->fwtype == ICAN3_FWTYPE_ICANOS) { + if (on) { + /* set bittiming */ + memset(&msg, 0, sizeof(msg)); + msg.spec = MSG_CBTRREQ; + msg.len = cpu_to_le16(4); + msg.data[0] = 0x00; + msg.data[1] = 0x00; + msg.data[2] = btr0; + msg.data[3] = btr1; + + res = ican3_send_msg(mod, &msg); + if (res) + return res; + } + + /* can-on/off request */ + memset(&msg, 0, sizeof(msg)); + msg.spec = on ? MSG_CONREQ : MSG_COFFREQ; + msg.len = cpu_to_le16(0); + + return ican3_send_msg(mod, &msg); + + } else if (mod->fwtype == ICAN3_FWTYPE_CAL_CANOPEN) { + memset(&msg, 0, sizeof(msg)); + msg.spec = MSG_LMTS; + if (on) { + msg.len = cpu_to_le16(4); + msg.data[0] = 0; + msg.data[1] = 0; + msg.data[2] = btr0; + msg.data[3] = btr1; + } else { + msg.len = cpu_to_le16(2); + msg.data[0] = 1; + msg.data[1] = 0; + } + + return ican3_send_msg(mod, &msg); + } + return -ENOTSUPP; } static int ican3_set_termination(struct ican3_dev *mod, bool on) @@ -1402,7 +1462,7 @@ static int ican3_reset_module(struct ican3_dev *mod) return 0; msleep(10); - } while (time_before(jiffies, start + HZ / 4)); + } while (time_before(jiffies, start + HZ / 2)); netdev_err(mod->ndev, "failed to reset CAN module\n"); return -ETIMEDOUT; @@ -1427,6 +1487,17 @@ static int ican3_startup_module(struct ican3_dev *mod) return ret; } + /* detect firmware */ + memcpy_fromio(mod->fwinfo, mod->dpm + FIRMWARE_STAMP, sizeof(mod->fwinfo) - 1); + if (strncmp(mod->fwinfo, "JANZ-ICAN3", 10)) { + netdev_err(mod->ndev, "ICAN3 not detected (found %s)\n", mod->fwinfo); + return -ENODEV; + } + if (strstr(mod->fwinfo, "CAL/CANopen")) + mod->fwtype = ICAN3_FWTYPE_CAL_CANOPEN; + else + mod->fwtype = ICAN3_FWTYPE_ICANOS; + /* re-enable interrupts so we can send messages */ iowrite8(1 << mod->num, &mod->ctrl->int_enable); @@ -1615,36 +1686,6 @@ static const struct can_bittiming_const ican3_bittiming_const = { .brp_inc = 1, }; -/* - * This routine was stolen from drivers/net/can/sja1000/sja1000.c - * - * The bittiming register command for the ICAN3 just sets the bit timing - * registers on the SJA1000 chip directly - */ -static int ican3_set_bittiming(struct net_device *ndev) -{ - struct ican3_dev *mod = netdev_priv(ndev); - struct can_bittiming *bt = &mod->can.bittiming; - struct ican3_msg msg; - u8 btr0, btr1; - - btr0 = ((bt->brp - 1) & 0x3f) | (((bt->sjw - 1) & 0x3) << 6); - btr1 = ((bt->prop_seg + bt->phase_seg1 - 1) & 0xf) | - (((bt->phase_seg2 - 1) & 0x7) << 4); - if (mod->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) - btr1 |= 0x80; - - memset(&msg, 0, sizeof(msg)); - msg.spec = MSG_CBTRREQ; - msg.len = cpu_to_le16(4); - msg.data[0] = 0x00; - msg.data[1] = 0x00; - msg.data[2] = btr0; - msg.data[3] = btr1; - - return ican3_send_msg(mod, &msg); -} - static int ican3_set_mode(struct net_device *ndev, enum can_mode mode) { struct ican3_dev *mod = netdev_priv(ndev); @@ -1730,11 +1771,22 @@ static ssize_t ican3_sysfs_set_term(struct device *dev, return count; } +static ssize_t ican3_sysfs_show_fwinfo(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ican3_dev *mod = netdev_priv(to_net_dev(dev)); + + return scnprintf(buf, PAGE_SIZE, "%s\n", mod->fwinfo); +} + static DEVICE_ATTR(termination, S_IWUSR | S_IRUGO, ican3_sysfs_show_term, ican3_sysfs_set_term); +static DEVICE_ATTR(fwinfo, S_IRUSR | S_IRUGO, ican3_sysfs_show_fwinfo, NULL); static struct attribute *ican3_sysfs_attrs[] = { &dev_attr_termination.attr, + &dev_attr_fwinfo.attr, NULL, }; @@ -1794,7 +1846,6 @@ static int ican3_probe(struct platform_device *pdev) mod->can.clock.freq = ICAN3_CAN_CLOCK; mod->can.bittiming_const = &ican3_bittiming_const; - mod->can.do_set_bittiming = ican3_set_bittiming; mod->can.do_set_mode = ican3_set_mode; mod->can.do_get_berr_counter = ican3_get_berr_counter; mod->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES @@ -1866,7 +1917,7 @@ static int ican3_probe(struct platform_device *pdev) goto out_free_irq; } - dev_info(dev, "module %d: registered CAN device\n", pdata->modno); + netdev_info(mod->ndev, "module %d: registered CAN device\n", pdata->modno); return 0; out_free_irq: diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 18550c7ebe6f..7ad0a4d8e475 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -37,22 +37,22 @@ config NET_DSA_MV88E6123_61_65 ethernet switch chips. config NET_DSA_MV88E6171 - tristate "Marvell 88E6171/6172 ethernet switch chip support" + tristate "Marvell 88E6171/6175/6350/6351 ethernet switch chip support" depends on NET_DSA select NET_DSA_MV88E6XXX select NET_DSA_TAG_EDSA ---help--- - This enables support for the Marvell 88E6171/6172 ethernet switch - chips. + This enables support for the Marvell 88E6171/6175/6350/6351 + ethernet switches chips. config NET_DSA_MV88E6352 - tristate "Marvell 88E6176/88E6352 ethernet switch chip support" + tristate "Marvell 88E6172/88E6176/88E6352 ethernet switch chip support" depends on NET_DSA select NET_DSA_MV88E6XXX select NET_DSA_TAG_EDSA ---help--- - This enables support for the Marvell 88E6176 and 88E6352 ethernet - switch chips. + This enables support for the Marvell 88E6172, 88E6176 and 88E6352 + ethernet switch chips. config NET_DSA_BCM_SF2 tristate "Broadcom Starfighter 2 Ethernet switch support" diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index cedb572bf25a..103fde3da476 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -911,6 +911,13 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, */ if (port == 7) { status->link = priv->port_sts[port].link; + /* For MoCA interfaces, also force a link down notification + * since some version of the user-space daemon (mocad) use + * cmd->autoneg to force the link, which messes up the PHY + * state machine and make it go in PHY_FORCING state instead. + */ + if (!status->link) + netif_carrier_off(ds->ports[port]); status->duplex = 1; } else { status->link = 1; diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c index b4af6d5aff7c..71a29a7ce538 100644 --- a/drivers/net/dsa/mv88e6123_61_65.c +++ b/drivers/net/dsa/mv88e6123_61_65.c @@ -54,192 +54,40 @@ static char *mv88e6123_61_65_probe(struct device *host_dev, int sw_addr) static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) { + u32 upstream_port = dsa_upstream_port(ds); int ret; - int i; + u32 reg; + + ret = mv88e6xxx_setup_global(ds); + if (ret) + return ret; /* Disable the PHY polling unit (since there won't be any * external PHYs to poll), don't discard packets with * excessive collisions, and mask all interrupt sources. */ - REG_WRITE(REG_GLOBAL, 0x04, 0x0000); - - /* Set the default address aging time to 5 minutes, and - * enable address learn messages to be sent to all message - * ports. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); - - /* Configure the priority mapping registers. */ - ret = mv88e6xxx_config_prio(ds); - if (ret < 0) - return ret; + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, 0x0000); /* Configure the upstream port, and configure the upstream * port as the port to which ingress and egress monitor frames * are to be sent. */ - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110)); + reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; + REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); /* Disable remote management for now, and set the switch's * DSA device number. */ - REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:2x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x02, 0xffff); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:0x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); - - /* Disable the loopback filter, disable flow control - * messages, disable flood broadcast override, disable - * removing of provider tags, disable ATU age violation - * interrupts, disable tag flow control, force flow - * control priority to the highest, and send all special - * multicast frames to the CPU at the highest priority. - */ - REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); - - /* Program the DSA routing table. */ - for (i = 0; i < 32; i++) { - int nexthop; - - nexthop = 0x1f; - if (i != ds->index && i < ds->dst->pd->nr_chips) - nexthop = ds->pd->rtable[i] & 0x1f; - - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); - } - - /* Clear all trunk masks. */ - for (i = 0; i < 8; i++) - REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); - - /* Clear all trunk mappings. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); - - /* Disable ingress rate limiting by resetting all ingress - * rate limit registers to their initial state. - */ - for (i = 0; i < 6; i++) - REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8)); - - /* Initialise cross-chip port VLAN table to reset defaults. */ - REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000); - - /* Clear the priority override table. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8)); - - /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */ + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f); return 0; } -static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) -{ - int addr = REG_PORT(p); - u16 val; - - /* MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values on physical - * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * full duplex. - */ - if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - REG_WRITE(addr, 0x01, 0x003e); - else - REG_WRITE(addr, 0x01, 0x0003); - - /* Do not limit the period of time that this port can be - * paused for by the remote end or the period of time that - * this port can pause the remote end. - */ - REG_WRITE(addr, 0x02, 0x0000); - - /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, - * disable Header mode, enable IGMP/MLD snooping, disable VLAN - * tunneling, determine priority by looking at 802.1p and IP - * priority fields (IP prio has precedence), and set STP state - * to Forwarding. - * - * If this is the CPU link, use DSA or EDSA tagging depending - * on which tagging mode was configured. - * - * If this is a link to another switch, use DSA tagging mode. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown unicasts and multicasts. - */ - val = 0x0433; - if (dsa_is_cpu_port(ds, p)) { - if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) - val |= 0x3300; - else - val |= 0x0100; - } - if (ds->dsa_port_mask & (1 << p)) - val |= 0x0100; - if (p == dsa_upstream_port(ds)) - val |= 0x000c; - REG_WRITE(addr, 0x04, val); - - /* Port Control 2: don't force a good FCS, set the maximum - * frame size to 10240 bytes, don't let the switch add or - * strip 802.1q tags, don't discard tagged or untagged frames - * on this port, do a destination address lookup on all - * received packets as usual, disable ARP mirroring and don't - * send a copy of all transmitted/received frames on this port - * to the CPU. - */ - REG_WRITE(addr, 0x08, 0x2080); - - /* Egress rate control: disable egress rate control. */ - REG_WRITE(addr, 0x09, 0x0001); - - /* Egress rate control 2: disable egress rate control. */ - REG_WRITE(addr, 0x0a, 0x0000); - - /* Port Association Vector: when learning source addresses - * of packets, add the address to the address database using - * a port bitmap that has only the bit for this port set and - * the other bits clear. - */ - REG_WRITE(addr, 0x0b, 1 << p); - - /* Port ATU control: disable limiting the number of address - * database entries that this port is allowed to use. - */ - REG_WRITE(addr, 0x0c, 0x0000); - - /* Priority Override: disable DA, SA and VTU priority override. */ - REG_WRITE(addr, 0x0d, 0x0000); - - /* Port Ethertype: use the Ethertype DSA Ethertype value. */ - REG_WRITE(addr, 0x0f, ETH_P_EDSA); - - /* Tag Remap: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x18, 0x3210); - - /* Tag Remap 2: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x19, 0x7654); - - return mv88e6xxx_setup_port_common(ds, p); -} - static int mv88e6123_61_65_setup(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int i; int ret; ret = mv88e6xxx_setup_common(ds); @@ -262,19 +110,11 @@ static int mv88e6123_61_65_setup(struct dsa_switch *ds) if (ret < 0) return ret; - /* @@@ initialise vtu and atu */ - ret = mv88e6123_61_65_setup_global(ds); if (ret < 0) return ret; - for (i = 0; i < ps->num_ports; i++) { - ret = mv88e6123_61_65_setup_port(ds, i); - if (ret < 0) - return ret; - } - - return 0; + return mv88e6xxx_setup_ports(ds); } struct dsa_switch_driver mv88e6123_61_65_switch_driver = { diff --git a/drivers/net/dsa/mv88e6131.c b/drivers/net/dsa/mv88e6131.c index e54824fa0d95..32f4a08e9bc9 100644 --- a/drivers/net/dsa/mv88e6131.c +++ b/drivers/net/dsa/mv88e6131.c @@ -37,6 +37,8 @@ static char *mv88e6131_probe(struct device *host_dev, int sw_addr) return "Marvell 88E6131 (B2)"; if (ret_masked == PORT_SWITCH_ID_6131) return "Marvell 88E6131"; + if (ret_masked == PORT_SWITCH_ID_6185) + return "Marvell 88E6185"; } return NULL; @@ -44,186 +46,62 @@ static char *mv88e6131_probe(struct device *host_dev, int sw_addr) static int mv88e6131_setup_global(struct dsa_switch *ds) { + u32 upstream_port = dsa_upstream_port(ds); int ret; - int i; + u32 reg; + + ret = mv88e6xxx_setup_global(ds); + if (ret) + return ret; /* Enable the PHY polling unit, don't discard packets with * excessive collisions, use a weighted fair queueing scheme * to arbitrate between packet queues, set the maximum frame * size to 1632, and mask all interrupt sources. */ - REG_WRITE(REG_GLOBAL, 0x04, 0x4400); - - /* Set the default address aging time to 5 minutes, and - * enable address learn messages to be sent to all message - * ports. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); - - /* Configure the priority mapping registers. */ - ret = mv88e6xxx_config_prio(ds); - if (ret < 0) - return ret; + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, + GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_MAX_FRAME_1632); /* Set the VLAN ethertype to 0x8100. */ - REG_WRITE(REG_GLOBAL, 0x19, 0x8100); + REG_WRITE(REG_GLOBAL, GLOBAL_CORE_TAG_TYPE, 0x8100); /* Disable ARP mirroring, and configure the upstream port as * the port to which ingress and egress monitor frames are to * be sent. */ - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1100) | 0x00f0); + reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | + GLOBAL_MONITOR_CONTROL_ARP_DISABLED; + REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); /* Disable cascade port functionality unless this device * is used in a cascade configuration, and set the switch's * DSA device number. */ if (ds->dst->pd->nr_chips > 1) - REG_WRITE(REG_GLOBAL, 0x1c, 0xf000 | (ds->index & 0x1f)); + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, + GLOBAL_CONTROL_2_MULTIPLE_CASCADE | + (ds->index & 0x1f)); else - REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f)); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:0x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); - - /* Ignore removed tag data on doubly tagged packets, disable - * flow control messages, force flow control priority to the - * highest, and send all special multicast frames to the CPU - * port at the highest priority. - */ - REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); - - /* Program the DSA routing table. */ - for (i = 0; i < 32; i++) { - int nexthop; - - nexthop = 0x1f; - if (ds->pd->rtable && - i != ds->index && i < ds->dst->pd->nr_chips) - nexthop = ds->pd->rtable[i] & 0x1f; - - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); - } - - /* Clear all trunk masks. */ - for (i = 0; i < 8; i++) - REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7ff); - - /* Clear all trunk mappings. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, + GLOBAL_CONTROL_2_NO_CASCADE | + (ds->index & 0x1f)); /* Force the priority of IGMP/MLD snoop frames and ARP frames * to the highest setting. */ - REG_WRITE(REG_GLOBAL2, 0x0f, 0x00ff); + REG_WRITE(REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, + GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP | + 7 << GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT | + GLOBAL2_PRIO_OVERRIDE_FORCE_ARP | + 7 << GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT); return 0; } -static int mv88e6131_setup_port(struct dsa_switch *ds, int p) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int addr = REG_PORT(p); - u16 val; - - /* MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values on physical - * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * (100 Mb/s on 6085) full duplex. - */ - if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - if (ps->id == PORT_SWITCH_ID_6085) - REG_WRITE(addr, 0x01, 0x003d); /* 100 Mb/s */ - else - REG_WRITE(addr, 0x01, 0x003e); /* 1000 Mb/s */ - else - REG_WRITE(addr, 0x01, 0x0003); - - /* Port Control: disable Core Tag, disable Drop-on-Lock, - * transmit frames unmodified, disable Header mode, - * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN - * tunneling, determine priority by looking at 802.1p and - * IP priority fields (IP prio has precedence), and set STP - * state to Forwarding. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown unicasts, and enable DSA tagging - * mode. - * - * If this is the link to another switch, use DSA tagging - * mode, but do not enable forwarding of unknown unicasts. - */ - val = 0x0433; - if (p == dsa_upstream_port(ds)) { - val |= 0x0104; - /* On 6085, unknown multicast forward is controlled - * here rather than in Port Control 2 register. - */ - if (ps->id == PORT_SWITCH_ID_6085) - val |= 0x0008; - } - if (ds->dsa_port_mask & (1 << p)) - val |= 0x0100; - REG_WRITE(addr, 0x04, val); - - /* Port Control 2: don't force a good FCS, don't use - * VLAN-based, source address-based or destination - * address-based priority overrides, don't let the switch - * add or strip 802.1q tags, don't discard tagged or - * untagged frames on this port, do a destination address - * lookup on received packets as usual, don't send a copy - * of all transmitted/received frames on this port to the - * CPU, and configure the upstream port number. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown multicast addresses. - */ - if (ps->id == PORT_SWITCH_ID_6085) - /* on 6085, bits 3:0 are reserved, bit 6 control ARP - * mirroring, and multicast forward is handled in - * Port Control register. - */ - REG_WRITE(addr, 0x08, 0x0080); - else { - val = 0x0080 | dsa_upstream_port(ds); - if (p == dsa_upstream_port(ds)) - val |= 0x0040; - REG_WRITE(addr, 0x08, val); - } - - /* Rate Control: disable ingress rate limiting. */ - REG_WRITE(addr, 0x09, 0x0000); - - /* Rate Control 2: disable egress rate limiting. */ - REG_WRITE(addr, 0x0a, 0x0000); - - /* Port Association Vector: when learning source addresses - * of packets, add the address to the address database using - * a port bitmap that has only the bit for this port set and - * the other bits clear. - */ - REG_WRITE(addr, 0x0b, 1 << p); - - /* Tag Remap: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x18, 0x3210); - - /* Tag Remap 2: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x19, 0x7654); - - return mv88e6xxx_setup_port_common(ds, p); -} - static int mv88e6131_setup(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int i; int ret; ret = mv88e6xxx_setup_common(ds); @@ -234,6 +112,7 @@ static int mv88e6131_setup(struct dsa_switch *ds) switch (ps->id) { case PORT_SWITCH_ID_6085: + case PORT_SWITCH_ID_6185: ps->num_ports = 10; break; case PORT_SWITCH_ID_6095: @@ -251,19 +130,11 @@ static int mv88e6131_setup(struct dsa_switch *ds) if (ret < 0) return ret; - /* @@@ initialise vtu and atu */ - ret = mv88e6131_setup_global(ds); if (ret < 0) return ret; - for (i = 0; i < ps->num_ports; i++) { - ret = mv88e6131_setup_port(ds, i); - if (ret < 0) - return ret; - } - - return 0; + return mv88e6xxx_setup_ports(ds); } static int mv88e6131_port_to_phy_addr(struct dsa_switch *ds, int port) diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index 9104efea0e3e..1c7808495a9d 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -1,4 +1,4 @@ -/* net/dsa/mv88e6171.c - Marvell 88e6171/8826172 switch chip support +/* net/dsa/mv88e6171.c - Marvell 88e6171 switch chip support * Copyright (c) 2008-2009 Marvell Semiconductor * Copyright (c) 2014 Claudio Leite <leitec@staticky.com> * @@ -29,8 +29,12 @@ static char *mv88e6171_probe(struct device *host_dev, int sw_addr) if (ret >= 0) { if ((ret & 0xfff0) == PORT_SWITCH_ID_6171) return "Marvell 88E6171"; - if ((ret & 0xfff0) == PORT_SWITCH_ID_6172) - return "Marvell 88E6172"; + if ((ret & 0xfff0) == PORT_SWITCH_ID_6175) + return "Marvell 88E6175"; + if ((ret & 0xfff0) == PORT_SWITCH_ID_6350) + return "Marvell 88E6350"; + if ((ret & 0xfff0) == PORT_SWITCH_ID_6351) + return "Marvell 88E6351"; } return NULL; @@ -38,196 +42,41 @@ static char *mv88e6171_probe(struct device *host_dev, int sw_addr) static int mv88e6171_setup_global(struct dsa_switch *ds) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + u32 upstream_port = dsa_upstream_port(ds); int ret; - int i; + u32 reg; + + ret = mv88e6xxx_setup_global(ds); + if (ret) + return ret; /* Discard packets with excessive collisions, mask all * interrupt sources, enable PPU. */ - REG_WRITE(REG_GLOBAL, 0x04, 0x6000); - - /* Set the default address aging time to 5 minutes, and - * enable address learn messages to be sent to all message - * ports. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); - - /* Configure the priority mapping registers. */ - ret = mv88e6xxx_config_prio(ds); - if (ret < 0) - return ret; + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, + GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_DISCARD_EXCESS); /* Configure the upstream port, and configure the upstream * port as the port to which ingress and egress monitor frames * are to be sent. */ - if (REG_READ(REG_PORT(0), 0x03) == 0x1710) - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1111)); - else - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110)); + reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_MIRROR_SHIFT; + REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); /* Disable remote management for now, and set the switch's * DSA device number. */ - REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:2x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x02, 0xffff); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:0x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); - - /* Disable the loopback filter, disable flow control - * messages, disable flood broadcast override, disable - * removing of provider tags, disable ATU age violation - * interrupts, disable tag flow control, force flow - * control priority to the highest, and send all special - * multicast frames to the CPU at the highest priority. - */ - REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); - - /* Program the DSA routing table. */ - for (i = 0; i < 32; i++) { - int nexthop; - - nexthop = 0x1f; - if (i != ds->index && i < ds->dst->pd->nr_chips) - nexthop = ds->pd->rtable[i] & 0x1f; - - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); - } - - /* Clear all trunk masks. */ - for (i = 0; i < ps->num_ports; i++) - REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); - - /* Clear all trunk mappings. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); - - /* Disable ingress rate limiting by resetting all ingress - * rate limit registers to their initial state. - */ - for (i = 0; i < 6; i++) - REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8)); - - /* Initialise cross-chip port VLAN table to reset defaults. */ - REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000); - - /* Clear the priority override table. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8)); - - /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */ + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f); return 0; } -static int mv88e6171_setup_port(struct dsa_switch *ds, int p) -{ - int addr = REG_PORT(p); - u16 val; - - /* MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values on physical - * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * full duplex. - */ - val = REG_READ(addr, 0x01); - if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - REG_WRITE(addr, 0x01, val | 0x003e); - else - REG_WRITE(addr, 0x01, val | 0x0003); - - /* Do not limit the period of time that this port can be - * paused for by the remote end or the period of time that - * this port can pause the remote end. - */ - REG_WRITE(addr, 0x02, 0x0000); - - /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, - * disable Header mode, enable IGMP/MLD snooping, disable VLAN - * tunneling, determine priority by looking at 802.1p and IP - * priority fields (IP prio has precedence), and set STP state - * to Forwarding. - * - * If this is the CPU link, use DSA or EDSA tagging depending - * on which tagging mode was configured. - * - * If this is a link to another switch, use DSA tagging mode. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown unicasts and multicasts. - */ - val = 0x0433; - if (dsa_is_cpu_port(ds, p)) { - if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) - val |= 0x3300; - else - val |= 0x0100; - } - if (ds->dsa_port_mask & (1 << p)) - val |= 0x0100; - if (p == dsa_upstream_port(ds)) - val |= 0x000c; - REG_WRITE(addr, 0x04, val); - - /* Port Control 2: don't force a good FCS, set the maximum - * frame size to 10240 bytes, don't let the switch add or - * strip 802.1q tags, don't discard tagged or untagged frames - * on this port, do a destination address lookup on all - * received packets as usual, disable ARP mirroring and don't - * send a copy of all transmitted/received frames on this port - * to the CPU. - */ - REG_WRITE(addr, 0x08, 0x2080); - - /* Egress rate control: disable egress rate control. */ - REG_WRITE(addr, 0x09, 0x0001); - - /* Egress rate control 2: disable egress rate control. */ - REG_WRITE(addr, 0x0a, 0x0000); - - /* Port Association Vector: when learning source addresses - * of packets, add the address to the address database using - * a port bitmap that has only the bit for this port set and - * the other bits clear. - */ - REG_WRITE(addr, 0x0b, 1 << p); - - /* Port ATU control: disable limiting the number of address - * database entries that this port is allowed to use. - */ - REG_WRITE(addr, 0x0c, 0x0000); - - /* Priority Override: disable DA, SA and VTU priority override. */ - REG_WRITE(addr, 0x0d, 0x0000); - - /* Port Ethertype: use the Ethertype DSA Ethertype value. */ - REG_WRITE(addr, 0x0f, ETH_P_EDSA); - - /* Tag Remap: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x18, 0x3210); - - /* Tag Remap 2: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x19, 0x7654); - - return mv88e6xxx_setup_port_common(ds, p); -} - static int mv88e6171_setup(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int i; int ret; ret = mv88e6xxx_setup_common(ds); @@ -240,44 +89,11 @@ static int mv88e6171_setup(struct dsa_switch *ds) if (ret < 0) return ret; - /* @@@ initialise vtu and atu */ - ret = mv88e6171_setup_global(ds); if (ret < 0) return ret; - for (i = 0; i < ps->num_ports; i++) { - if (!(dsa_is_cpu_port(ds, i) || ds->phys_port_mask & (1 << i))) - continue; - - ret = mv88e6171_setup_port(ds, i); - if (ret < 0) - return ret; - } - - return 0; -} - -static int mv88e6171_get_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - if (ps->id == PORT_SWITCH_ID_6172) - return mv88e6xxx_get_eee(ds, port, e); - - return -EOPNOTSUPP; -} - -static int mv88e6171_set_eee(struct dsa_switch *ds, int port, - struct phy_device *phydev, struct ethtool_eee *e) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - if (ps->id == PORT_SWITCH_ID_6172) - return mv88e6xxx_set_eee(ds, port, phydev, e); - - return -EOPNOTSUPP; + return mv88e6xxx_setup_ports(ds); } struct dsa_switch_driver mv88e6171_switch_driver = { @@ -292,8 +108,6 @@ struct dsa_switch_driver mv88e6171_switch_driver = { .get_strings = mv88e6xxx_get_strings, .get_ethtool_stats = mv88e6xxx_get_ethtool_stats, .get_sset_count = mv88e6xxx_get_sset_count, - .set_eee = mv88e6171_set_eee, - .get_eee = mv88e6171_get_eee, #ifdef CONFIG_NET_DSA_HWMON .get_temp = mv88e6xxx_get_temp, #endif @@ -308,4 +122,6 @@ struct dsa_switch_driver mv88e6171_switch_driver = { }; MODULE_ALIAS("platform:mv88e6171"); -MODULE_ALIAS("platform:mv88e6172"); +MODULE_ALIAS("platform:mv88e6175"); +MODULE_ALIAS("platform:mv88e6350"); +MODULE_ALIAS("platform:mv88e6351"); diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index 126c11b81e75..632815c10a40 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -32,6 +32,8 @@ static char *mv88e6352_probe(struct device *host_dev, int sw_addr) ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), PORT_SWITCH_ID); if (ret >= 0) { + if ((ret & 0xfff0) == PORT_SWITCH_ID_6172) + return "Marvell 88E6172"; if ((ret & 0xfff0) == PORT_SWITCH_ID_6176) return "Marvell 88E6176"; if (ret == PORT_SWITCH_ID_6352_A0) @@ -47,187 +49,37 @@ static char *mv88e6352_probe(struct device *host_dev, int sw_addr) static int mv88e6352_setup_global(struct dsa_switch *ds) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + u32 upstream_port = dsa_upstream_port(ds); int ret; - int i; + u32 reg; + + ret = mv88e6xxx_setup_global(ds); + if (ret) + return ret; /* Discard packets with excessive collisions, * mask all interrupt sources, enable PPU (bit 14, undocumented). */ - REG_WRITE(REG_GLOBAL, 0x04, 0x6000); - - /* Set the default address aging time to 5 minutes, and - * enable address learn messages to be sent to all message - * ports. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); - - /* Configure the priority mapping registers. */ - ret = mv88e6xxx_config_prio(ds); - if (ret < 0) - return ret; + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, + GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_DISCARD_EXCESS); /* Configure the upstream port, and configure the upstream * port as the port to which ingress and egress monitor frames * are to be sent. */ - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110)); + reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; + REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); /* Disable remote management for now, and set the switch's * DSA device number. */ REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f); - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:2x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x02, 0xffff); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:0x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); - - /* Disable the loopback filter, disable flow control - * messages, disable flood broadcast override, disable - * removing of provider tags, disable ATU age violation - * interrupts, disable tag flow control, force flow - * control priority to the highest, and send all special - * multicast frames to the CPU at the highest priority. - */ - REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); - - /* Program the DSA routing table. */ - for (i = 0; i < 32; i++) { - int nexthop = 0x1f; - - if (i != ds->index && i < ds->dst->pd->nr_chips) - nexthop = ds->pd->rtable[i] & 0x1f; - - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); - } - - /* Clear all trunk masks. */ - for (i = 0; i < 8; i++) - REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7f); - - /* Clear all trunk mappings. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); - - /* Disable ingress rate limiting by resetting all ingress - * rate limit registers to their initial state. - */ - for (i = 0; i < ps->num_ports; i++) - REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8)); - - /* Initialise cross-chip port VLAN table to reset defaults. */ - REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000); - - /* Clear the priority override table. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8)); - - /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */ - return 0; } -static int mv88e6352_setup_port(struct dsa_switch *ds, int p) -{ - int addr = REG_PORT(p); - u16 val; - - /* MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values on physical - * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * full duplex. - */ - if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - REG_WRITE(addr, 0x01, 0x003e); - else - REG_WRITE(addr, 0x01, 0x0003); - - /* Do not limit the period of time that this port can be - * paused for by the remote end or the period of time that - * this port can pause the remote end. - */ - REG_WRITE(addr, 0x02, 0x0000); - - /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, - * disable Header mode, enable IGMP/MLD snooping, disable VLAN - * tunneling, determine priority by looking at 802.1p and IP - * priority fields (IP prio has precedence), and set STP state - * to Forwarding. - * - * If this is the CPU link, use DSA or EDSA tagging depending - * on which tagging mode was configured. - * - * If this is a link to another switch, use DSA tagging mode. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown unicasts and multicasts. - */ - val = 0x0433; - if (dsa_is_cpu_port(ds, p)) { - if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) - val |= 0x3300; - else - val |= 0x0100; - } - if (ds->dsa_port_mask & (1 << p)) - val |= 0x0100; - if (p == dsa_upstream_port(ds)) - val |= 0x000c; - REG_WRITE(addr, 0x04, val); - - /* Port Control 2: don't force a good FCS, set the maximum - * frame size to 10240 bytes, don't let the switch add or - * strip 802.1q tags, don't discard tagged or untagged frames - * on this port, do a destination address lookup on all - * received packets as usual, disable ARP mirroring and don't - * send a copy of all transmitted/received frames on this port - * to the CPU. - */ - REG_WRITE(addr, 0x08, 0x2080); - - /* Egress rate control: disable egress rate control. */ - REG_WRITE(addr, 0x09, 0x0001); - - /* Egress rate control 2: disable egress rate control. */ - REG_WRITE(addr, 0x0a, 0x0000); - - /* Port Association Vector: when learning source addresses - * of packets, add the address to the address database using - * a port bitmap that has only the bit for this port set and - * the other bits clear. - */ - REG_WRITE(addr, 0x0b, 1 << p); - - /* Port ATU control: disable limiting the number of address - * database entries that this port is allowed to use. - */ - REG_WRITE(addr, 0x0c, 0x0000); - - /* Priority Override: disable DA, SA and VTU priority override. */ - REG_WRITE(addr, 0x0d, 0x0000); - - /* Port Ethertype: use the Ethertype DSA Ethertype value. */ - REG_WRITE(addr, 0x0f, ETH_P_EDSA); - - /* Tag Remap: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x18, 0x3210); - - /* Tag Remap 2: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x19, 0x7654); - - return mv88e6xxx_setup_port_common(ds, p); -} - #ifdef CONFIG_NET_DSA_HWMON static int mv88e6352_get_temp(struct dsa_switch *ds, int *temp) @@ -292,7 +144,6 @@ static int mv88e6352_setup(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; - int i; ret = mv88e6xxx_setup_common(ds); if (ret < 0) @@ -306,19 +157,11 @@ static int mv88e6352_setup(struct dsa_switch *ds) if (ret < 0) return ret; - /* @@@ initialise vtu and atu */ - ret = mv88e6352_setup_global(ds); if (ret < 0) return ret; - for (i = 0; i < ps->num_ports; i++) { - ret = mv88e6352_setup_port(ds, i); - if (ret < 0) - return ret; - } - - return 0; + return mv88e6xxx_setup_ports(ds); } static int mv88e6352_read_eeprom_word(struct dsa_switch *ds, int addr) @@ -552,3 +395,4 @@ struct dsa_switch_driver mv88e6352_switch_driver = { }; MODULE_ALIAS("platform:mv88e6352"); +MODULE_ALIAS("platform:mv88e6172"); diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index cf309aa92802..7fba330ce702 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -19,6 +19,34 @@ #include <net/dsa.h> #include "mv88e6xxx.h" +/* MDIO bus access can be nested in the case of PHYs connected to the + * internal MDIO bus of the switch, which is accessed via MDIO bus of + * the Ethernet interface. Avoid lockdep false positives by using + * mutex_lock_nested(). + */ +static int mv88e6xxx_mdiobus_read(struct mii_bus *bus, int addr, u32 regnum) +{ + int ret; + + mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING); + ret = bus->read(bus, addr, regnum); + mutex_unlock(&bus->mdio_lock); + + return ret; +} + +static int mv88e6xxx_mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, + u16 val) +{ + int ret; + + mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING); + ret = bus->write(bus, addr, regnum, val); + mutex_unlock(&bus->mdio_lock); + + return ret; +} + /* If the switch's ADDR[4:0] strap pins are strapped to zero, it will * use all 32 SMI bus addresses on its SMI bus, and all switch registers * will be directly accessible on some {device address,register address} @@ -33,7 +61,7 @@ static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr) int i; for (i = 0; i < 16; i++) { - ret = mdiobus_read(bus, sw_addr, SMI_CMD); + ret = mv88e6xxx_mdiobus_read(bus, sw_addr, SMI_CMD); if (ret < 0) return ret; @@ -49,7 +77,7 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) int ret; if (sw_addr == 0) - return mdiobus_read(bus, addr, reg); + return mv88e6xxx_mdiobus_read(bus, addr, reg); /* Wait for the bus to become free. */ ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); @@ -57,8 +85,8 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) return ret; /* Transmit the read command. */ - ret = mdiobus_write(bus, sw_addr, SMI_CMD, - SMI_CMD_OP_22_READ | (addr << 5) | reg); + ret = mv88e6xxx_mdiobus_write(bus, sw_addr, SMI_CMD, + SMI_CMD_OP_22_READ | (addr << 5) | reg); if (ret < 0) return ret; @@ -68,7 +96,7 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) return ret; /* Read the data. */ - ret = mdiobus_read(bus, sw_addr, SMI_DATA); + ret = mv88e6xxx_mdiobus_read(bus, sw_addr, SMI_DATA); if (ret < 0) return ret; @@ -112,7 +140,7 @@ int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, int ret; if (sw_addr == 0) - return mdiobus_write(bus, addr, reg, val); + return mv88e6xxx_mdiobus_write(bus, addr, reg, val); /* Wait for the bus to become free. */ ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); @@ -120,13 +148,13 @@ int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, return ret; /* Transmit the data to write. */ - ret = mdiobus_write(bus, sw_addr, SMI_DATA, val); + ret = mv88e6xxx_mdiobus_write(bus, sw_addr, SMI_DATA, val); if (ret < 0) return ret; /* Transmit the write command. */ - ret = mdiobus_write(bus, sw_addr, SMI_CMD, - SMI_CMD_OP_22_WRITE | (addr << 5) | reg); + ret = mv88e6xxx_mdiobus_write(bus, sw_addr, SMI_CMD, + SMI_CMD_OP_22_WRITE | (addr << 5) | reg); if (ret < 0) return ret; @@ -165,24 +193,6 @@ int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) return ret; } -int mv88e6xxx_config_prio(struct dsa_switch *ds) -{ - /* Configure the IP ToS mapping registers. */ - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff); - - /* Configure the IEEE 802.1p priority mapping register. */ - REG_WRITE(REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41); - - return 0; -} - int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr) { REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, (addr[0] << 8) | addr[1]); @@ -217,20 +227,20 @@ int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) return 0; } -/* Must be called with phy mutex held */ +/* Must be called with SMI mutex held */ static int _mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum) { if (addr >= 0) - return mv88e6xxx_reg_read(ds, addr, regnum); + return _mv88e6xxx_reg_read(ds, addr, regnum); return 0xffff; } -/* Must be called with phy mutex held */ +/* Must be called with SMI mutex held */ static int _mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val) { if (addr >= 0) - return mv88e6xxx_reg_write(ds, addr, regnum, val); + return _mv88e6xxx_reg_write(ds, addr, regnum, val); return 0; } @@ -434,26 +444,113 @@ void mv88e6xxx_poll_link(struct dsa_switch *ds) } } +static bool mv88e6xxx_6065_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6031: + case PORT_SWITCH_ID_6061: + case PORT_SWITCH_ID_6035: + case PORT_SWITCH_ID_6065: + return true; + } + return false; +} + +static bool mv88e6xxx_6095_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6092: + case PORT_SWITCH_ID_6095: + return true; + } + return false; +} + +static bool mv88e6xxx_6097_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6046: + case PORT_SWITCH_ID_6085: + case PORT_SWITCH_ID_6096: + case PORT_SWITCH_ID_6097: + return true; + } + return false; +} + +static bool mv88e6xxx_6165_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6123: + case PORT_SWITCH_ID_6161: + case PORT_SWITCH_ID_6165: + return true; + } + return false; +} + +static bool mv88e6xxx_6185_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6121: + case PORT_SWITCH_ID_6122: + case PORT_SWITCH_ID_6152: + case PORT_SWITCH_ID_6155: + case PORT_SWITCH_ID_6182: + case PORT_SWITCH_ID_6185: + case PORT_SWITCH_ID_6108: + case PORT_SWITCH_ID_6131: + return true; + } + return false; +} + +static bool mv88e6xxx_6351_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6171: + case PORT_SWITCH_ID_6175: + case PORT_SWITCH_ID_6350: + case PORT_SWITCH_ID_6351: + return true; + } + return false; +} + static bool mv88e6xxx_6352_family(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); switch (ps->id) { - case PORT_SWITCH_ID_6352: case PORT_SWITCH_ID_6172: case PORT_SWITCH_ID_6176: + case PORT_SWITCH_ID_6240: + case PORT_SWITCH_ID_6352: return true; } return false; } -static int mv88e6xxx_stats_wait(struct dsa_switch *ds) +/* Must be called with SMI mutex held */ +static int _mv88e6xxx_stats_wait(struct dsa_switch *ds) { int ret; int i; for (i = 0; i < 10; i++) { - ret = REG_READ(REG_GLOBAL, GLOBAL_STATS_OP); + ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_OP); if ((ret & GLOBAL_STATS_OP_BUSY) == 0) return 0; } @@ -461,7 +558,8 @@ static int mv88e6xxx_stats_wait(struct dsa_switch *ds) return -ETIMEDOUT; } -static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) +/* Must be called with SMI mutex held */ +static int _mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) { int ret; @@ -469,42 +567,45 @@ static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) port = (port + 1) << 5; /* Snapshot the hardware statistics counters for this port. */ - REG_WRITE(REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_CAPTURE_PORT | - GLOBAL_STATS_OP_HIST_RX_TX | port); + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_CAPTURE_PORT | + GLOBAL_STATS_OP_HIST_RX_TX | port); + if (ret < 0) + return ret; /* Wait for the snapshotting to complete. */ - ret = mv88e6xxx_stats_wait(ds); + ret = _mv88e6xxx_stats_wait(ds); if (ret < 0) return ret; return 0; } -static void mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val) +/* Must be called with SMI mutex held */ +static void _mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val) { u32 _val; int ret; *val = 0; - ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_READ_CAPTURED | - GLOBAL_STATS_OP_HIST_RX_TX | stat); + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_READ_CAPTURED | + GLOBAL_STATS_OP_HIST_RX_TX | stat); if (ret < 0) return; - ret = mv88e6xxx_stats_wait(ds); + ret = _mv88e6xxx_stats_wait(ds); if (ret < 0) return; - ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_COUNTER_32); + ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_COUNTER_32); if (ret < 0) return; _val = ret << 16; - ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_COUNTER_01); + ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_COUNTER_01); if (ret < 0) return; @@ -587,11 +688,11 @@ static void _mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int ret; int i; - mutex_lock(&ps->stats_mutex); + mutex_lock(&ps->smi_mutex); - ret = mv88e6xxx_stats_snapshot(ds, port); + ret = _mv88e6xxx_stats_snapshot(ds, port); if (ret < 0) { - mutex_unlock(&ps->stats_mutex); + mutex_unlock(&ps->smi_mutex); return; } @@ -608,8 +709,8 @@ static void _mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, goto error; low = ret; if (s->sizeof_stat == 4) { - ret = mv88e6xxx_reg_read(ds, REG_PORT(port), - s->reg - 0x100 + 1); + ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), + s->reg - 0x100 + 1); if (ret < 0) goto error; high = ret; @@ -617,14 +718,14 @@ static void _mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, data[i] = (((u64)high) << 16) | low; continue; } - mv88e6xxx_stats_read(ds, s->reg, &low); + _mv88e6xxx_stats_read(ds, s->reg, &low); if (s->sizeof_stat == 8) - mv88e6xxx_stats_read(ds, s->reg + 1, &high); + _mv88e6xxx_stats_read(ds, s->reg + 1, &high); data[i] = (((u64)high) << 32) | low; } error: - mutex_unlock(&ps->stats_mutex); + mutex_unlock(&ps->smi_mutex); } /* All the statistics in the table */ @@ -694,7 +795,7 @@ int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) *temp = 0; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_write(ds, 0x0, 0x16, 0x6); if (ret < 0) @@ -727,19 +828,23 @@ int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) error: _mv88e6xxx_phy_write(ds, 0x0, 0x16, 0x0); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } #endif /* CONFIG_NET_DSA_HWMON */ -static int mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask) +/* Must be called with SMI lock held */ +static int _mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, + u16 mask) { unsigned long timeout = jiffies + HZ / 10; while (time_before(jiffies, timeout)) { int ret; - ret = REG_READ(reg, offset); + ret = _mv88e6xxx_reg_read(ds, reg, offset); + if (ret < 0) + return ret; if (!(ret & mask)) return 0; @@ -748,10 +853,22 @@ static int mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask) return -ETIMEDOUT; } -int mv88e6xxx_phy_wait(struct dsa_switch *ds) +static int mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = _mv88e6xxx_wait(ds, reg, offset, mask); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +static int _mv88e6xxx_phy_wait(struct dsa_switch *ds) { - return mv88e6xxx_wait(ds, REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_BUSY); + return _mv88e6xxx_wait(ds, REG_GLOBAL2, GLOBAL2_SMI_OP, + GLOBAL2_SMI_OP_BUSY); } int mv88e6xxx_eeprom_load_wait(struct dsa_switch *ds) @@ -767,56 +884,46 @@ int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds) } /* Must be called with SMI lock held */ -static int _mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask) -{ - unsigned long timeout = jiffies + HZ / 10; - - while (time_before(jiffies, timeout)) { - int ret; - - ret = _mv88e6xxx_reg_read(ds, reg, offset); - if (ret < 0) - return ret; - if (!(ret & mask)) - return 0; - - usleep_range(1000, 2000); - } - return -ETIMEDOUT; -} - -/* Must be called with SMI lock held */ static int _mv88e6xxx_atu_wait(struct dsa_switch *ds) { return _mv88e6xxx_wait(ds, REG_GLOBAL, GLOBAL_ATU_OP, GLOBAL_ATU_OP_BUSY); } -/* Must be called with phy mutex held */ +/* Must be called with SMI mutex held */ static int _mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int addr, int regnum) { int ret; - REG_WRITE(REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_22_READ | (addr << 5) | regnum); + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SMI_OP, + GLOBAL2_SMI_OP_22_READ | (addr << 5) | + regnum); + if (ret < 0) + return ret; - ret = mv88e6xxx_phy_wait(ds); + ret = _mv88e6xxx_phy_wait(ds); if (ret < 0) return ret; - return REG_READ(REG_GLOBAL2, GLOBAL2_SMI_DATA); + return _mv88e6xxx_reg_read(ds, REG_GLOBAL2, GLOBAL2_SMI_DATA); } -/* Must be called with phy mutex held */ +/* Must be called with SMI mutex held */ static int _mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int addr, int regnum, u16 val) { - REG_WRITE(REG_GLOBAL2, GLOBAL2_SMI_DATA, val); - REG_WRITE(REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_22_WRITE | (addr << 5) | regnum); + int ret; - return mv88e6xxx_phy_wait(ds); + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SMI_DATA, val); + if (ret < 0) + return ret; + + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SMI_OP, + GLOBAL2_SMI_OP_22_WRITE | (addr << 5) | + regnum); + + return _mv88e6xxx_phy_wait(ds); } int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) @@ -824,7 +931,7 @@ int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int reg; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); reg = _mv88e6xxx_phy_read_indirect(ds, port, 16); if (reg < 0) @@ -833,7 +940,7 @@ int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) e->eee_enabled = !!(reg & 0x0200); e->tx_lpi_enabled = !!(reg & 0x0100); - reg = mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS); + reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS); if (reg < 0) goto out; @@ -841,7 +948,7 @@ int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) reg = 0; out: - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return reg; } @@ -852,7 +959,7 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, int reg; int ret; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_read_indirect(ds, port, 16); if (ret < 0) @@ -866,7 +973,7 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, ret = _mv88e6xxx_phy_write_indirect(ds, port, 16, reg); out: - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1241,13 +1348,212 @@ static void mv88e6xxx_bridge_work(struct work_struct *work) } } -int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port) +static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret, fid; + u16 reg; mutex_lock(&ps->smi_mutex); + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) || + mv88e6xxx_6065_family(ds)) { + /* MAC Forcing register: don't force link, speed, + * duplex or flow control state to any particular + * values on physical ports, but force the CPU port + * and all DSA ports to their maximum bandwidth and + * full duplex. + */ + reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL); + if (dsa_is_cpu_port(ds, port) || + ds->dsa_port_mask & (1 << port)) { + reg |= PORT_PCS_CTRL_FORCE_LINK | + PORT_PCS_CTRL_LINK_UP | + PORT_PCS_CTRL_DUPLEX_FULL | + PORT_PCS_CTRL_FORCE_DUPLEX; + if (mv88e6xxx_6065_family(ds)) + reg |= PORT_PCS_CTRL_100; + else + reg |= PORT_PCS_CTRL_1000; + } else { + reg |= PORT_PCS_CTRL_UNFORCED; + } + + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_PCS_CTRL, reg); + if (ret) + goto abort; + } + + /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, + * disable Header mode, enable IGMP/MLD snooping, disable VLAN + * tunneling, determine priority by looking at 802.1p and IP + * priority fields (IP prio has precedence), and set STP state + * to Forwarding. + * + * If this is the CPU link, use DSA or EDSA tagging depending + * on which tagging mode was configured. + * + * If this is a link to another switch, use DSA tagging mode. + * + * If this is the upstream port for this switch, enable + * forwarding of unknown unicasts and multicasts. + */ + reg = 0; + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) || + mv88e6xxx_6185_family(ds)) + reg = PORT_CONTROL_IGMP_MLD_SNOOP | + PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP | + PORT_CONTROL_STATE_FORWARDING; + if (dsa_is_cpu_port(ds, port)) { + if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) + reg |= PORT_CONTROL_DSA_TAG; + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) { + if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) + reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA; + else + reg |= PORT_CONTROL_FRAME_MODE_DSA; + } + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) || + mv88e6xxx_6185_family(ds)) { + if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) + reg |= PORT_CONTROL_EGRESS_ADD_TAG; + } + } + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds)) { + if (ds->dsa_port_mask & (1 << port)) + reg |= PORT_CONTROL_FRAME_MODE_DSA; + if (port == dsa_upstream_port(ds)) + reg |= PORT_CONTROL_FORWARD_UNKNOWN | + PORT_CONTROL_FORWARD_UNKNOWN_MC; + } + if (reg) { + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_CONTROL, reg); + if (ret) + goto abort; + } + + /* Port Control 2: don't force a good FCS, set the maximum + * frame size to 10240 bytes, don't let the switch add or + * strip 802.1q tags, don't discard tagged or untagged frames + * on this port, do a destination address lookup on all + * received packets as usual, disable ARP mirroring and don't + * send a copy of all transmitted/received frames on this port + * to the CPU. + */ + reg = 0; + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6095_family(ds)) + reg = PORT_CONTROL_2_MAP_DA; + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds)) + reg |= PORT_CONTROL_2_JUMBO_10240; + + if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) { + /* Set the upstream port this port should use */ + reg |= dsa_upstream_port(ds); + /* enable forwarding of unknown multicast addresses to + * the upstream port + */ + if (port == dsa_upstream_port(ds)) + reg |= PORT_CONTROL_2_FORWARD_UNKNOWN; + } + + if (reg) { + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_CONTROL_2, reg); + if (ret) + goto abort; + } + + /* Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_ASSOC_VECTOR, + 1 << port); + if (ret) + goto abort; + + /* Egress rate control 2: disable egress rate control. */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_RATE_CONTROL_2, + 0x0000); + if (ret) + goto abort; + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) { + /* Do not limit the period of time that this port can + * be paused for by the remote end or the period of + * time that this port can pause the remote end. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_PAUSE_CTRL, 0x0000); + if (ret) + goto abort; + + /* Port ATU control: disable limiting the number of + * address database entries that this port is allowed + * to use. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_ATU_CONTROL, 0x0000); + /* Priority Override: disable DA, SA and VTU priority + * override. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_PRI_OVERRIDE, 0x0000); + if (ret) + goto abort; + + /* Port Ethertype: use the Ethertype DSA Ethertype + * value. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_ETH_TYPE, ETH_P_EDSA); + if (ret) + goto abort; + /* Tag Remap: use an identity 802.1p prio -> switch + * prio mapping. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_TAG_REGMAP_0123, 0x3210); + if (ret) + goto abort; + + /* Tag Remap 2: use an identity 802.1p prio -> switch + * prio mapping. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_TAG_REGMAP_4567, 0x7654); + if (ret) + goto abort; + } + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds)) { + /* Rate Control: disable ingress rate limiting. */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_RATE_CONTROL, 0x0001); + if (ret) + goto abort; + } + /* Port Control 1: disable trunking, disable sending * learning messages to this port. */ @@ -1281,13 +1587,25 @@ abort: return ret; } +int mv88e6xxx_setup_ports(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + int i; + + for (i = 0; i < ps->num_ports; i++) { + ret = mv88e6xxx_setup_port(ds, i); + if (ret < 0) + return ret; + } + return 0; +} + int mv88e6xxx_setup_common(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); mutex_init(&ps->smi_mutex); - mutex_init(&ps->stats_mutex); - mutex_init(&ps->phy_mutex); ps->id = REG_READ(REG_PORT(0), PORT_SWITCH_ID) & 0xfff0; @@ -1298,6 +1616,104 @@ int mv88e6xxx_setup_common(struct dsa_switch *ds) return 0; } +int mv88e6xxx_setup_global(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int i; + + /* Set the default address aging time to 5 minutes, and + * enable address learn messages to be sent to all message + * ports. + */ + REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL, + 0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL); + + /* Configure the IP ToS mapping registers. */ + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff); + + /* Configure the IEEE 802.1p priority mapping register. */ + REG_WRITE(REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41); + + /* Send all frames with destination addresses matching + * 01:80:c2:00:00:0x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff); + + /* Ignore removed tag data on doubly tagged packets, disable + * flow control messages, force flow control priority to the + * highest, and send all special multicast frames to the CPU + * port at the highest priority. + */ + REG_WRITE(REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, + 0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 | + GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI); + + /* Program the DSA routing table. */ + for (i = 0; i < 32; i++) { + int nexthop = 0x1f; + + if (ds->pd->rtable && + i != ds->index && i < ds->dst->pd->nr_chips) + nexthop = ds->pd->rtable[i] & 0x1f; + + REG_WRITE(REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, + GLOBAL2_DEVICE_MAPPING_UPDATE | + (i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) | + nexthop); + } + + /* Clear all trunk masks. */ + for (i = 0; i < 8; i++) + REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MASK, + 0x8000 | (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) | + ((1 << ps->num_ports) - 1)); + + /* Clear all trunk mappings. */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, + GLOBAL2_TRUNK_MAPPING_UPDATE | + (i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT)); + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) { + /* Send all frames with destination addresses matching + * 01:80:c2:00:00:2x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, 0xffff); + + /* Initialise cross-chip port VLAN table to reset + * defaults. + */ + REG_WRITE(REG_GLOBAL2, GLOBAL2_PVT_ADDR, 0x9000); + + /* Clear the priority override table. */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, + 0x8000 | (i << 8)); + } + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds)) { + /* Disable ingress rate limiting by resetting all + * ingress rate limit registers to their initial + * state. + */ + for (i = 0; i < ps->num_ports; i++) + REG_WRITE(REG_GLOBAL2, GLOBAL2_INGRESS_OP, + 0x9000 | (i << 8)); + } + + return 0; +} + int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); @@ -1343,14 +1759,14 @@ int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg) struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page); if (ret < 0) goto error; ret = _mv88e6xxx_phy_read_indirect(ds, port, reg); error: _mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1360,7 +1776,7 @@ int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page); if (ret < 0) goto error; @@ -1368,7 +1784,7 @@ int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, ret = _mv88e6xxx_phy_write_indirect(ds, port, reg, val); error: _mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1391,9 +1807,9 @@ mv88e6xxx_phy_read(struct dsa_switch *ds, int port, int regnum) if (addr < 0) return addr; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_read(ds, addr, regnum); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1407,9 +1823,9 @@ mv88e6xxx_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) if (addr < 0) return addr; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_write(ds, addr, regnum, val); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1423,9 +1839,9 @@ mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int port, int regnum) if (addr < 0) return addr; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_read_indirect(ds, addr, regnum); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1440,9 +1856,9 @@ mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int port, int regnum, if (addr < 0) return addr; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_write_indirect(ds, addr, regnum, val); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index e045154f3364..e10ccdb4ffbc 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -40,9 +40,31 @@ #define PORT_STATUS_TX_PAUSED BIT(5) #define PORT_STATUS_FLOW_CTRL BIT(4) #define PORT_PCS_CTRL 0x01 +#define PORT_PCS_CTRL_FC BIT(7) +#define PORT_PCS_CTRL_FORCE_FC BIT(6) +#define PORT_PCS_CTRL_LINK_UP BIT(5) +#define PORT_PCS_CTRL_FORCE_LINK BIT(4) +#define PORT_PCS_CTRL_DUPLEX_FULL BIT(3) +#define PORT_PCS_CTRL_FORCE_DUPLEX BIT(2) +#define PORT_PCS_CTRL_10 0x00 +#define PORT_PCS_CTRL_100 0x01 +#define PORT_PCS_CTRL_1000 0x02 +#define PORT_PCS_CTRL_UNFORCED 0x03 +#define PORT_PAUSE_CTRL 0x02 #define PORT_SWITCH_ID 0x03 +#define PORT_SWITCH_ID_6031 0x0310 +#define PORT_SWITCH_ID_6035 0x0350 +#define PORT_SWITCH_ID_6046 0x0480 +#define PORT_SWITCH_ID_6061 0x0610 +#define PORT_SWITCH_ID_6065 0x0650 #define PORT_SWITCH_ID_6085 0x04a0 +#define PORT_SWITCH_ID_6092 0x0970 #define PORT_SWITCH_ID_6095 0x0950 +#define PORT_SWITCH_ID_6096 0x0980 +#define PORT_SWITCH_ID_6097 0x0990 +#define PORT_SWITCH_ID_6108 0x1070 +#define PORT_SWITCH_ID_6121 0x1040 +#define PORT_SWITCH_ID_6122 0x1050 #define PORT_SWITCH_ID_6123 0x1210 #define PORT_SWITCH_ID_6123_A1 0x1212 #define PORT_SWITCH_ID_6123_A2 0x1213 @@ -58,13 +80,38 @@ #define PORT_SWITCH_ID_6165_A2 0x1653 #define PORT_SWITCH_ID_6171 0x1710 #define PORT_SWITCH_ID_6172 0x1720 +#define PORT_SWITCH_ID_6175 0x1750 #define PORT_SWITCH_ID_6176 0x1760 #define PORT_SWITCH_ID_6182 0x1a60 #define PORT_SWITCH_ID_6185 0x1a70 +#define PORT_SWITCH_ID_6240 0x2400 +#define PORT_SWITCH_ID_6320 0x1250 +#define PORT_SWITCH_ID_6350 0x3710 +#define PORT_SWITCH_ID_6351 0x3750 #define PORT_SWITCH_ID_6352 0x3520 #define PORT_SWITCH_ID_6352_A0 0x3521 #define PORT_SWITCH_ID_6352_A1 0x3522 #define PORT_CONTROL 0x04 +#define PORT_CONTROL_USE_CORE_TAG BIT(15) +#define PORT_CONTROL_DROP_ON_LOCK BIT(14) +#define PORT_CONTROL_EGRESS_UNMODIFIED (0x0 << 12) +#define PORT_CONTROL_EGRESS_UNTAGGED (0x1 << 12) +#define PORT_CONTROL_EGRESS_TAGGED (0x2 << 12) +#define PORT_CONTROL_EGRESS_ADD_TAG (0x3 << 12) +#define PORT_CONTROL_HEADER BIT(11) +#define PORT_CONTROL_IGMP_MLD_SNOOP BIT(10) +#define PORT_CONTROL_DOUBLE_TAG BIT(9) +#define PORT_CONTROL_FRAME_MODE_NORMAL (0x0 << 8) +#define PORT_CONTROL_FRAME_MODE_DSA (0x1 << 8) +#define PORT_CONTROL_FRAME_MODE_PROVIDER (0x2 << 8) +#define PORT_CONTROL_FRAME_ETHER_TYPE_DSA (0x3 << 8) +#define PORT_CONTROL_DSA_TAG BIT(8) +#define PORT_CONTROL_VLAN_TUNNEL BIT(7) +#define PORT_CONTROL_TAG_IF_BOTH BIT(6) +#define PORT_CONTROL_USE_IP BIT(5) +#define PORT_CONTROL_USE_TAG BIT(4) +#define PORT_CONTROL_FORWARD_UNKNOWN_MC BIT(3) +#define PORT_CONTROL_FORWARD_UNKNOWN BIT(2) #define PORT_CONTROL_STATE_MASK 0x03 #define PORT_CONTROL_STATE_DISABLED 0x00 #define PORT_CONTROL_STATE_BLOCKING 0x01 @@ -74,15 +121,32 @@ #define PORT_BASE_VLAN 0x06 #define PORT_DEFAULT_VLAN 0x07 #define PORT_CONTROL_2 0x08 +#define PORT_CONTROL_2_IGNORE_FCS BIT(15) +#define PORT_CONTROL_2_VTU_PRI_OVERRIDE BIT(14) +#define PORT_CONTROL_2_SA_PRIO_OVERRIDE BIT(13) +#define PORT_CONTROL_2_DA_PRIO_OVERRIDE BIT(12) +#define PORT_CONTROL_2_JUMBO_1522 (0x00 << 12) +#define PORT_CONTROL_2_JUMBO_2048 (0x01 << 12) +#define PORT_CONTROL_2_JUMBO_10240 (0x02 << 12) +#define PORT_CONTROL_2_DISCARD_TAGGED BIT(9) +#define PORT_CONTROL_2_DISCARD_UNTAGGED BIT(8) +#define PORT_CONTROL_2_MAP_DA BIT(7) +#define PORT_CONTROL_2_DEFAULT_FORWARD BIT(6) +#define PORT_CONTROL_2_FORWARD_UNKNOWN BIT(6) +#define PORT_CONTROL_2_EGRESS_MONITOR BIT(5) +#define PORT_CONTROL_2_INGRESS_MONITOR BIT(4) #define PORT_RATE_CONTROL 0x09 #define PORT_RATE_CONTROL_2 0x0a #define PORT_ASSOC_VECTOR 0x0b +#define PORT_ATU_CONTROL 0x0c +#define PORT_PRI_OVERRIDE 0x0d +#define PORT_ETH_TYPE 0x0f #define PORT_IN_DISCARD_LO 0x10 #define PORT_IN_DISCARD_HI 0x11 #define PORT_IN_FILTERED 0x12 #define PORT_OUT_FILTERED 0x13 -#define PORT_TAG_REGMAP_0123 0x19 -#define PORT_TAG_REGMAP_4567 0x1a +#define PORT_TAG_REGMAP_0123 0x18 +#define PORT_TAG_REGMAP_4567 0x19 #define REG_GLOBAL 0x1b #define GLOBAL_STATUS 0x00 @@ -102,7 +166,7 @@ #define GLOBAL_CONTROL_DISCARD_EXCESS BIT(13) /* 6352 */ #define GLOBAL_CONTROL_SCHED_PRIO BIT(11) /* 6152 */ #define GLOBAL_CONTROL_MAX_FRAME_1632 BIT(10) /* 6152 */ -#define GLOBAL_CONTROL_RELOAD_EEPROM BIT(9) /* 6152 */ +#define GLOBAL_CONTROL_RELOAD_EEPROM BIT(9) /* 6152 */ #define GLOBAL_CONTROL_DEVICE_EN BIT(7) #define GLOBAL_CONTROL_STATS_DONE_EN BIT(6) #define GLOBAL_CONTROL_VTU_PROBLEM_EN BIT(5) @@ -117,6 +181,7 @@ #define GLOBAL_VTU_DATA_4_7 0x08 #define GLOBAL_VTU_DATA_8_11 0x09 #define GLOBAL_ATU_CONTROL 0x0a +#define GLOBAL_ATU_CONTROL_LEARN2ALL BIT(3) #define GLOBAL_ATU_OP 0x0b #define GLOBAL_ATU_OP_BUSY BIT(15) #define GLOBAL_ATU_OP_NOP (0 << 12) @@ -151,7 +216,15 @@ #define GLOBAL_IEEE_PRI 0x18 #define GLOBAL_CORE_TAG_TYPE 0x19 #define GLOBAL_MONITOR_CONTROL 0x1a +#define GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT 12 +#define GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT 8 +#define GLOBAL_MONITOR_CONTROL_ARP_SHIFT 4 +#define GLOBAL_MONITOR_CONTROL_MIRROR_SHIFT 0 +#define GLOBAL_MONITOR_CONTROL_ARP_DISABLED (0xf0) #define GLOBAL_CONTROL_2 0x1c +#define GLOBAL_CONTROL_2_NO_CASCADE 0xe000 +#define GLOBAL_CONTROL_2_MULTIPLE_CASCADE 0xf000 + #define GLOBAL_STATS_OP 0x1d #define GLOBAL_STATS_OP_BUSY BIT(15) #define GLOBAL_STATS_OP_NOP (0 << 12) @@ -172,9 +245,20 @@ #define GLOBAL2_MGMT_EN_0X 0x03 #define GLOBAL2_FLOW_CONTROL 0x04 #define GLOBAL2_SWITCH_MGMT 0x05 +#define GLOBAL2_SWITCH_MGMT_USE_DOUBLE_TAG_DATA BIT(15) +#define GLOBAL2_SWITCH_MGMT_PREVENT_LOOPS BIT(14) +#define GLOBAL2_SWITCH_MGMT_FLOW_CONTROL_MSG BIT(13) +#define GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI BIT(7) +#define GLOBAL2_SWITCH_MGMT_RSVD2CPU BIT(3) #define GLOBAL2_DEVICE_MAPPING 0x06 +#define GLOBAL2_DEVICE_MAPPING_UPDATE BIT(15) +#define GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT 8 #define GLOBAL2_TRUNK_MASK 0x07 +#define GLOBAL2_TRUNK_MASK_UPDATE BIT(15) +#define GLOBAL2_TRUNK_MASK_NUM_SHIFT 12 #define GLOBAL2_TRUNK_MAPPING 0x08 +#define GLOBAL2_TRUNK_MAPPING_UPDATE BIT(15) +#define GLOBAL2_TRUNK_MAPPING_ID_SHIFT 11 #define GLOBAL2_INGRESS_OP 0x09 #define GLOBAL2_INGRESS_DATA 0x0a #define GLOBAL2_PVT_ADDR 0x0b @@ -183,6 +267,10 @@ #define GLOBAL2_SWITCH_MAC_BUSY BIT(15) #define GLOBAL2_ATU_STATS 0x0e #define GLOBAL2_PRIO_OVERRIDE 0x0f +#define GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP BIT(7) +#define GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT 4 +#define GLOBAL2_PRIO_OVERRIDE_FORCE_ARP BIT(3) +#define GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT 0 #define GLOBAL2_EEPROM_OP 0x14 #define GLOBAL2_EEPROM_OP_BUSY BIT(15) #define GLOBAL2_EEPROM_OP_LOAD BIT(11) @@ -260,14 +348,14 @@ struct mv88e6xxx_hw_stat { }; int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active); -int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port); +int mv88e6xxx_setup_ports(struct dsa_switch *ds); int mv88e6xxx_setup_common(struct dsa_switch *ds); +int mv88e6xxx_setup_global(struct dsa_switch *ds); int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg); int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg); int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, int reg, u16 val); int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val); -int mv88e6xxx_config_prio(struct dsa_switch *ds); int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr); int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr); int mv88e6xxx_phy_read(struct dsa_switch *ds, int port, int regnum); @@ -289,7 +377,6 @@ int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port); void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *_p); int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp); -int mv88e6xxx_phy_wait(struct dsa_switch *ds); int mv88e6xxx_eeprom_load_wait(struct dsa_switch *ds); int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds); int mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int addr, int regnum); diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index 426916036151..acd53173fcc0 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -179,10 +179,8 @@ config SUNLANCE config AMD_XGBE tristate "AMD 10GbE Ethernet driver" - depends on (OF_NET || ACPI) && HAS_IOMEM && HAS_DMA + depends on ((OF_NET && OF_ADDRESS) || ACPI) && HAS_IOMEM && HAS_DMA depends on ARM64 || COMPILE_TEST - select PHYLIB - select AMD_XGBE_PHY select BITREVERSE select CRC32 select PTP_1588_CLOCK diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 34c28aac767f..b6fa89102526 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -857,6 +857,48 @@ */ #define PCS_MMD_SELECT 0xff +/* SerDes integration register offsets */ +#define SIR0_KR_RT_1 0x002c +#define SIR0_STATUS 0x0040 +#define SIR1_SPEED 0x0000 + +/* SerDes integration register entry bit positions and sizes */ +#define SIR0_KR_RT_1_RESET_INDEX 11 +#define SIR0_KR_RT_1_RESET_WIDTH 1 +#define SIR0_STATUS_RX_READY_INDEX 0 +#define SIR0_STATUS_RX_READY_WIDTH 1 +#define SIR0_STATUS_TX_READY_INDEX 8 +#define SIR0_STATUS_TX_READY_WIDTH 1 +#define SIR1_SPEED_CDR_RATE_INDEX 12 +#define SIR1_SPEED_CDR_RATE_WIDTH 4 +#define SIR1_SPEED_DATARATE_INDEX 4 +#define SIR1_SPEED_DATARATE_WIDTH 2 +#define SIR1_SPEED_PLLSEL_INDEX 3 +#define SIR1_SPEED_PLLSEL_WIDTH 1 +#define SIR1_SPEED_RATECHANGE_INDEX 6 +#define SIR1_SPEED_RATECHANGE_WIDTH 1 +#define SIR1_SPEED_TXAMP_INDEX 8 +#define SIR1_SPEED_TXAMP_WIDTH 4 +#define SIR1_SPEED_WORDMODE_INDEX 0 +#define SIR1_SPEED_WORDMODE_WIDTH 3 + +/* SerDes RxTx register offsets */ +#define RXTX_REG6 0x0018 +#define RXTX_REG20 0x0050 +#define RXTX_REG22 0x0058 +#define RXTX_REG114 0x01c8 +#define RXTX_REG129 0x0204 + +/* SerDes RxTx register entry bit positions and sizes */ +#define RXTX_REG6_RESETB_RXD_INDEX 8 +#define RXTX_REG6_RESETB_RXD_WIDTH 1 +#define RXTX_REG20_BLWC_ENA_INDEX 2 +#define RXTX_REG20_BLWC_ENA_WIDTH 1 +#define RXTX_REG114_PQ_REG_INDEX 9 +#define RXTX_REG114_PQ_REG_WIDTH 7 +#define RXTX_REG129_RXDFE_CONFIG_INDEX 14 +#define RXTX_REG129_RXDFE_CONFIG_WIDTH 2 + /* Descriptor/Packet entry bit positions and sizes */ #define RX_PACKET_ERRORS_CRC_INDEX 2 #define RX_PACKET_ERRORS_CRC_WIDTH 1 @@ -973,10 +1015,47 @@ #define TX_NORMAL_DESC2_VLAN_INSERT 0x2 /* MDIO undefined or vendor specific registers */ +#ifndef MDIO_PMA_10GBR_PMD_CTRL +#define MDIO_PMA_10GBR_PMD_CTRL 0x0096 +#endif + +#ifndef MDIO_PMA_10GBR_FECCTRL +#define MDIO_PMA_10GBR_FECCTRL 0x00ab +#endif + +#ifndef MDIO_AN_XNP +#define MDIO_AN_XNP 0x0016 +#endif + +#ifndef MDIO_AN_LPX +#define MDIO_AN_LPX 0x0019 +#endif + #ifndef MDIO_AN_COMP_STAT #define MDIO_AN_COMP_STAT 0x0030 #endif +#ifndef MDIO_AN_INTMASK +#define MDIO_AN_INTMASK 0x8001 +#endif + +#ifndef MDIO_AN_INT +#define MDIO_AN_INT 0x8002 +#endif + +#ifndef MDIO_CTRL1_SPEED1G +#define MDIO_CTRL1_SPEED1G (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100) +#endif + +/* MDIO mask values */ +#define XGBE_XNP_MCF_NULL_MESSAGE 0x001 +#define XGBE_XNP_ACK_PROCESSED BIT(12) +#define XGBE_XNP_MP_FORMATTED BIT(13) +#define XGBE_XNP_NP_EXCHANGE BIT(15) + +#define XGBE_KR_TRAINING_START BIT(0) +#define XGBE_KR_TRAINING_ENABLE BIT(1) + /* Bit setting and getting macros * The get macro will extract the current bit field value from within * the variable @@ -1119,6 +1198,82 @@ do { \ ioread32((_pdata)->xpcs_regs + (_off)) /* Macros for building, reading or writing register values or bits + * within the register values of SerDes integration registers. + */ +#define XSIR_GET_BITS(_var, _prefix, _field) \ + GET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH) + +#define XSIR_SET_BITS(_var, _prefix, _field, _val) \ + SET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH, (_val)) + +#define XSIR0_IOREAD(_pdata, _reg) \ + ioread16((_pdata)->sir0_regs + _reg) + +#define XSIR0_IOREAD_BITS(_pdata, _reg, _field) \ + GET_BITS(XSIR0_IOREAD((_pdata), _reg), \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH) + +#define XSIR0_IOWRITE(_pdata, _reg, _val) \ + iowrite16((_val), (_pdata)->sir0_regs + _reg) + +#define XSIR0_IOWRITE_BITS(_pdata, _reg, _field, _val) \ +do { \ + u16 reg_val = XSIR0_IOREAD((_pdata), _reg); \ + SET_BITS(reg_val, \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH, (_val)); \ + XSIR0_IOWRITE((_pdata), _reg, reg_val); \ +} while (0) + +#define XSIR1_IOREAD(_pdata, _reg) \ + ioread16((_pdata)->sir1_regs + _reg) + +#define XSIR1_IOREAD_BITS(_pdata, _reg, _field) \ + GET_BITS(XSIR1_IOREAD((_pdata), _reg), \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH) + +#define XSIR1_IOWRITE(_pdata, _reg, _val) \ + iowrite16((_val), (_pdata)->sir1_regs + _reg) + +#define XSIR1_IOWRITE_BITS(_pdata, _reg, _field, _val) \ +do { \ + u16 reg_val = XSIR1_IOREAD((_pdata), _reg); \ + SET_BITS(reg_val, \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH, (_val)); \ + XSIR1_IOWRITE((_pdata), _reg, reg_val); \ +} while (0) + +/* Macros for building, reading or writing register values or bits + * within the register values of SerDes RxTx registers. + */ +#define XRXTX_IOREAD(_pdata, _reg) \ + ioread16((_pdata)->rxtx_regs + _reg) + +#define XRXTX_IOREAD_BITS(_pdata, _reg, _field) \ + GET_BITS(XRXTX_IOREAD((_pdata), _reg), \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH) + +#define XRXTX_IOWRITE(_pdata, _reg, _val) \ + iowrite16((_val), (_pdata)->rxtx_regs + _reg) + +#define XRXTX_IOWRITE_BITS(_pdata, _reg, _field, _val) \ +do { \ + u16 reg_val = XRXTX_IOREAD((_pdata), _reg); \ + SET_BITS(reg_val, \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH, (_val)); \ + XRXTX_IOWRITE((_pdata), _reg, reg_val); \ +} while (0) + +/* Macros for building, reading or writing register values or bits * using MDIO. Different from above because of the use of standardized * Linux include values. No shifting is performed with the bit * operations, everything works on mask values. diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c b/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c index 8a50b01c2686..a6b9899e285f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c @@ -150,9 +150,12 @@ static int xgbe_dcb_ieee_setets(struct net_device *netdev, tc_ets = 0; tc_ets_weight = 0; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - DBGPR(" TC%u: tx_bw=%hhu, rx_bw=%hhu, tsa=%hhu\n", i, - ets->tc_tx_bw[i], ets->tc_rx_bw[i], ets->tc_tsa[i]); - DBGPR(" PRIO%u: TC=%hhu\n", i, ets->prio_tc[i]); + netif_dbg(pdata, drv, netdev, + "TC%u: tx_bw=%hhu, rx_bw=%hhu, tsa=%hhu\n", i, + ets->tc_tx_bw[i], ets->tc_rx_bw[i], + ets->tc_tsa[i]); + netif_dbg(pdata, drv, netdev, "PRIO%u: TC=%hhu\n", i, + ets->prio_tc[i]); if ((ets->tc_tx_bw[i] || ets->tc_tsa[i]) && (i >= pdata->hw_feat.tc_cnt)) @@ -214,8 +217,9 @@ static int xgbe_dcb_ieee_setpfc(struct net_device *netdev, { struct xgbe_prv_data *pdata = netdev_priv(netdev); - DBGPR(" cap=%hhu, en=%hhx, mbc=%hhu, delay=%hhu\n", - pfc->pfc_cap, pfc->pfc_en, pfc->mbc, pfc->delay); + netif_dbg(pdata, drv, netdev, + "cap=%hhu, en=%#hhx, mbc=%hhu, delay=%hhu\n", + pfc->pfc_cap, pfc->pfc_en, pfc->mbc, pfc->delay); if (!pdata->pfc) { pdata->pfc = devm_kzalloc(pdata->dev, sizeof(*pdata->pfc), @@ -238,9 +242,10 @@ static u8 xgbe_dcb_getdcbx(struct net_device *netdev) static u8 xgbe_dcb_setdcbx(struct net_device *netdev, u8 dcbx) { + struct xgbe_prv_data *pdata = netdev_priv(netdev); u8 support = xgbe_dcb_getdcbx(netdev); - DBGPR(" DCBX=%#hhx\n", dcbx); + netif_dbg(pdata, drv, netdev, "DCBX=%#hhx\n", dcbx); if (dcbx & ~support) return 1; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index d81fc6bd4759..dd03ad865caf 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -208,8 +208,9 @@ static int xgbe_init_ring(struct xgbe_prv_data *pdata, if (!ring->rdata) return -ENOMEM; - DBGPR(" rdesc=0x%p, rdesc_dma=0x%llx, rdata=0x%p\n", - ring->rdesc, ring->rdesc_dma, ring->rdata); + netif_dbg(pdata, drv, pdata->netdev, + "rdesc=%p, rdesc_dma=%pad, rdata=%p\n", + ring->rdesc, &ring->rdesc_dma, ring->rdata); DBGPR("<--xgbe_init_ring\n"); @@ -226,7 +227,9 @@ static int xgbe_alloc_ring_resources(struct xgbe_prv_data *pdata) channel = pdata->channel; for (i = 0; i < pdata->channel_count; i++, channel++) { - DBGPR(" %s - tx_ring:\n", channel->name); + netif_dbg(pdata, drv, pdata->netdev, "%s - Tx ring:\n", + channel->name); + ret = xgbe_init_ring(pdata, channel->tx_ring, pdata->tx_desc_count); if (ret) { @@ -235,12 +238,14 @@ static int xgbe_alloc_ring_resources(struct xgbe_prv_data *pdata) goto err_ring; } - DBGPR(" %s - rx_ring:\n", channel->name); + netif_dbg(pdata, drv, pdata->netdev, "%s - Rx ring:\n", + channel->name); + ret = xgbe_init_ring(pdata, channel->rx_ring, pdata->rx_desc_count); if (ret) { netdev_alert(pdata->netdev, - "error initializing Tx ring\n"); + "error initializing Rx ring\n"); goto err_ring; } } @@ -476,8 +481,6 @@ static void xgbe_unmap_rdata(struct xgbe_prv_data *pdata, if (rdata->state_saved) { rdata->state_saved = 0; - rdata->state.incomplete = 0; - rdata->state.context_next = 0; rdata->state.skb = NULL; rdata->state.len = 0; rdata->state.error = 0; @@ -518,8 +521,6 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) rdata = XGBE_GET_DESC_DATA(ring, cur_index); if (tso) { - DBGPR(" TSO packet\n"); - /* Map the TSO header */ skb_dma = dma_map_single(pdata->dev, skb->data, packet->header_len, DMA_TO_DEVICE); @@ -529,6 +530,9 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) } rdata->skb_dma = skb_dma; rdata->skb_dma_len = packet->header_len; + netif_dbg(pdata, tx_queued, pdata->netdev, + "skb header: index=%u, dma=%pad, len=%u\n", + cur_index, &skb_dma, packet->header_len); offset = packet->header_len; @@ -550,8 +554,9 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) } rdata->skb_dma = skb_dma; rdata->skb_dma_len = len; - DBGPR(" skb data: index=%u, dma=0x%llx, len=%u\n", - cur_index, skb_dma, len); + netif_dbg(pdata, tx_queued, pdata->netdev, + "skb data: index=%u, dma=%pad, len=%u\n", + cur_index, &skb_dma, len); datalen -= len; offset += len; @@ -563,7 +568,8 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - DBGPR(" mapping frag %u\n", i); + netif_dbg(pdata, tx_queued, pdata->netdev, + "mapping frag %u\n", i); frag = &skb_shinfo(skb)->frags[i]; offset = 0; @@ -582,8 +588,9 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) rdata->skb_dma = skb_dma; rdata->skb_dma_len = len; rdata->mapped_as_page = 1; - DBGPR(" skb data: index=%u, dma=0x%llx, len=%u\n", - cur_index, skb_dma, len); + netif_dbg(pdata, tx_queued, pdata->netdev, + "skb frag: index=%u, dma=%pad, len=%u\n", + cur_index, &skb_dma, len); datalen -= len; offset += len; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 21d9497518fd..506e832c9e9a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -710,7 +710,8 @@ static int xgbe_set_promiscuous_mode(struct xgbe_prv_data *pdata, if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PR) == val) return 0; - DBGPR(" %s promiscuous mode\n", enable ? "entering" : "leaving"); + netif_dbg(pdata, drv, pdata->netdev, "%s promiscuous mode\n", + enable ? "entering" : "leaving"); XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, val); return 0; @@ -724,7 +725,8 @@ static int xgbe_set_all_multicast_mode(struct xgbe_prv_data *pdata, if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PM) == val) return 0; - DBGPR(" %s allmulti mode\n", enable ? "entering" : "leaving"); + netif_dbg(pdata, drv, pdata->netdev, "%s allmulti mode\n", + enable ? "entering" : "leaving"); XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PM, val); return 0; @@ -749,8 +751,9 @@ static void xgbe_set_mac_reg(struct xgbe_prv_data *pdata, mac_addr[0] = ha->addr[4]; mac_addr[1] = ha->addr[5]; - DBGPR(" adding mac address %pM at 0x%04x\n", ha->addr, - *mac_reg); + netif_dbg(pdata, drv, pdata->netdev, + "adding mac address %pM at %#x\n", + ha->addr, *mac_reg); XGMAC_SET_BITS(mac_addr_hi, MAC_MACA1HR, AE, 1); } @@ -907,23 +910,6 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, else mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); - /* If the PCS is changing modes, match the MAC speed to it */ - if (((mmd_address >> 16) == MDIO_MMD_PCS) && - ((mmd_address & 0xffff) == MDIO_CTRL2)) { - struct phy_device *phydev = pdata->phydev; - - if (mmd_data & MDIO_PCS_CTRL2_TYPE) { - /* KX mode */ - if (phydev->supported & SUPPORTED_1000baseKX_Full) - xgbe_set_gmii_speed(pdata); - else - xgbe_set_gmii_2500_speed(pdata); - } else { - /* KR mode */ - xgbe_set_xgmii_speed(pdata); - } - } - /* The PCS registers are accessed using mmio. The underlying APB3 * management interface uses indirect addressing to access the MMD * register sets. This requires accessing of the PCS register in two @@ -1322,7 +1308,8 @@ static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata) for (i = 0; i < pdata->hw_feat.tc_cnt; i++) { switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_STRICT: - DBGPR(" TC%u using SP\n", i); + netif_dbg(pdata, drv, pdata->netdev, + "TC%u using SP\n", i); XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, MTL_TSA_SP); break; @@ -1330,7 +1317,8 @@ static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata) weight = total_weight * ets->tc_tx_bw[i] / 100; weight = clamp(weight, min_weight, total_weight); - DBGPR(" TC%u using DWRR (weight %u)\n", i, weight); + netif_dbg(pdata, drv, pdata->netdev, + "TC%u using DWRR (weight %u)\n", i, weight); XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, MTL_TSA_ETS); XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_QWR, QW, @@ -1359,7 +1347,8 @@ static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata) } mask &= 0xff; - DBGPR(" TC%u PFC mask=%#x\n", tc, mask); + netif_dbg(pdata, drv, pdata->netdev, "TC%u PFC mask=%#x\n", + tc, mask); reg = MTL_TCPM0R + (MTL_TCPM_INC * (tc / MTL_TCPM_TC_PER_REG)); reg_val = XGMAC_IOREAD(pdata, reg); @@ -1457,8 +1446,9 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel) /* Create a context descriptor if this is a TSO packet */ if (tso_context || vlan_context) { if (tso_context) { - DBGPR(" TSO context descriptor, mss=%u\n", - packet->mss); + netif_dbg(pdata, tx_queued, pdata->netdev, + "TSO context descriptor, mss=%u\n", + packet->mss); /* Set the MSS size */ XGMAC_SET_BITS_LE(rdesc->desc2, TX_CONTEXT_DESC2, @@ -1476,8 +1466,9 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel) } if (vlan_context) { - DBGPR(" VLAN context descriptor, ctag=%u\n", - packet->vlan_ctag); + netif_dbg(pdata, tx_queued, pdata->netdev, + "VLAN context descriptor, ctag=%u\n", + packet->vlan_ctag); /* Mark it as a CONTEXT descriptor */ XGMAC_SET_BITS_LE(rdesc->desc3, TX_CONTEXT_DESC3, @@ -1533,6 +1524,8 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel) packet->tcp_payload_len); XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, TCPHDRLEN, packet->tcp_header_len / 4); + + pdata->ext_stats.tx_tso_packets++; } else { /* Enable CRC and Pad Insertion */ XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, CPC, 0); @@ -1594,9 +1587,9 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel) rdesc = rdata->rdesc; XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN, 1); -#ifdef XGMAC_ENABLE_TX_DESC_DUMP - xgbe_dump_tx_desc(ring, start_index, packet->rdesc_count, 1); -#endif + if (netif_msg_tx_queued(pdata)) + xgbe_dump_tx_desc(pdata, ring, start_index, + packet->rdesc_count, 1); /* Make sure ownership is written to the descriptor */ dma_wmb(); @@ -1618,11 +1611,12 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel) static int xgbe_dev_read(struct xgbe_channel *channel) { + struct xgbe_prv_data *pdata = channel->pdata; struct xgbe_ring *ring = channel->rx_ring; struct xgbe_ring_data *rdata; struct xgbe_ring_desc *rdesc; struct xgbe_packet_data *packet = &ring->packet_data; - struct net_device *netdev = channel->pdata->netdev; + struct net_device *netdev = pdata->netdev; unsigned int err, etlt, l34t; DBGPR("-->xgbe_dev_read: cur = %d\n", ring->cur); @@ -1637,9 +1631,8 @@ static int xgbe_dev_read(struct xgbe_channel *channel) /* Make sure descriptor fields are read after reading the OWN bit */ dma_rmb(); -#ifdef XGMAC_ENABLE_RX_DESC_DUMP - xgbe_dump_rx_desc(ring, rdesc, ring->cur); -#endif + if (netif_msg_rx_status(pdata)) + xgbe_dump_rx_desc(pdata, ring, ring->cur); if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, CTXT)) { /* Timestamp Context Descriptor */ @@ -1661,9 +1654,12 @@ static int xgbe_dev_read(struct xgbe_channel *channel) CONTEXT_NEXT, 1); /* Get the header length */ - if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD)) + if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD)) { rdata->rx.hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2, RX_NORMAL_DESC2, HL); + if (rdata->rx.hdr_len) + pdata->ext_stats.rx_split_header_packets++; + } /* Get the RSS hash */ if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, RSV)) { @@ -1700,14 +1696,14 @@ static int xgbe_dev_read(struct xgbe_channel *channel) INCOMPLETE, 0); /* Set checksum done indicator as appropriate */ - if (channel->pdata->netdev->features & NETIF_F_RXCSUM) + if (netdev->features & NETIF_F_RXCSUM) XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CSUM_DONE, 1); /* Check for errors (only valid in last descriptor) */ err = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, ES); etlt = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, ETLT); - DBGPR(" err=%u, etlt=%#x\n", err, etlt); + netif_dbg(pdata, rx_status, netdev, "err=%u, etlt=%#x\n", err, etlt); if (!err || !etlt) { /* No error if err is 0 or etlt is 0 */ @@ -1718,7 +1714,8 @@ static int xgbe_dev_read(struct xgbe_channel *channel) packet->vlan_ctag = XGMAC_GET_BITS_LE(rdesc->desc0, RX_NORMAL_DESC0, OVT); - DBGPR(" vlan-ctag=0x%04x\n", packet->vlan_ctag); + netif_dbg(pdata, rx_status, netdev, "vlan-ctag=%#06x\n", + packet->vlan_ctag); } } else { if ((etlt == 0x05) || (etlt == 0x06)) @@ -2026,9 +2023,9 @@ static void xgbe_config_tx_fifo_size(struct xgbe_prv_data *pdata) for (i = 0; i < pdata->tx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TQS, fifo_size); - netdev_notice(pdata->netdev, - "%d Tx hardware queues, %d byte fifo per queue\n", - pdata->tx_q_count, ((fifo_size + 1) * 256)); + netif_info(pdata, drv, pdata->netdev, + "%d Tx hardware queues, %d byte fifo per queue\n", + pdata->tx_q_count, ((fifo_size + 1) * 256)); } static void xgbe_config_rx_fifo_size(struct xgbe_prv_data *pdata) @@ -2042,9 +2039,9 @@ static void xgbe_config_rx_fifo_size(struct xgbe_prv_data *pdata) for (i = 0; i < pdata->rx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RQS, fifo_size); - netdev_notice(pdata->netdev, - "%d Rx hardware queues, %d byte fifo per queue\n", - pdata->rx_q_count, ((fifo_size + 1) * 256)); + netif_info(pdata, drv, pdata->netdev, + "%d Rx hardware queues, %d byte fifo per queue\n", + pdata->rx_q_count, ((fifo_size + 1) * 256)); } static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata) @@ -2063,14 +2060,16 @@ static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata) for (i = 0, queue = 0; i < pdata->hw_feat.tc_cnt; i++) { for (j = 0; j < qptc; j++) { - DBGPR(" TXq%u mapped to TC%u\n", queue, i); + netif_dbg(pdata, drv, pdata->netdev, + "TXq%u mapped to TC%u\n", queue, i); XGMAC_MTL_IOWRITE_BITS(pdata, queue, MTL_Q_TQOMR, Q2TCMAP, i); pdata->q2tc_map[queue++] = i; } if (i < qptc_extra) { - DBGPR(" TXq%u mapped to TC%u\n", queue, i); + netif_dbg(pdata, drv, pdata->netdev, + "TXq%u mapped to TC%u\n", queue, i); XGMAC_MTL_IOWRITE_BITS(pdata, queue, MTL_Q_TQOMR, Q2TCMAP, i); pdata->q2tc_map[queue++] = i; @@ -2088,13 +2087,15 @@ static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata) for (i = 0, prio = 0; i < prio_queues;) { mask = 0; for (j = 0; j < ppq; j++) { - DBGPR(" PRIO%u mapped to RXq%u\n", prio, i); + netif_dbg(pdata, drv, pdata->netdev, + "PRIO%u mapped to RXq%u\n", prio, i); mask |= (1 << prio); pdata->prio2q_map[prio++] = i; } if (i < ppq_extra) { - DBGPR(" PRIO%u mapped to RXq%u\n", prio, i); + netif_dbg(pdata, drv, pdata->netdev, + "PRIO%u mapped to RXq%u\n", prio, i); mask |= (1 << prio); pdata->prio2q_map[prio++] = i; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index db84ddcfec84..e4521799ba9c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -183,9 +183,10 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata) channel->rx_ring = rx_ring++; } - DBGPR(" %s: queue=%u, dma_regs=%p, dma_irq=%d, tx=%p, rx=%p\n", - channel->name, channel->queue_index, channel->dma_regs, - channel->dma_irq, channel->tx_ring, channel->rx_ring); + netif_dbg(pdata, drv, pdata->netdev, + "%s: dma_regs=%p, dma_irq=%d, tx=%p, rx=%p\n", + channel->name, channel->dma_regs, channel->dma_irq, + channel->tx_ring, channel->rx_ring); } pdata->channel = channel_mem; @@ -235,7 +236,8 @@ static int xgbe_maybe_stop_tx_queue(struct xgbe_channel *channel, struct xgbe_prv_data *pdata = channel->pdata; if (count > xgbe_tx_avail_desc(ring)) { - DBGPR(" Tx queue stopped, not enough descriptors available\n"); + netif_info(pdata, drv, pdata->netdev, + "Tx queue stopped, not enough descriptors available\n"); netif_stop_subqueue(pdata->netdev, channel->queue_index); ring->tx.queue_stopped = 1; @@ -330,7 +332,7 @@ static irqreturn_t xgbe_isr(int irq, void *data) if (!dma_isr) goto isr_done; - DBGPR(" DMA_ISR = %08x\n", dma_isr); + netif_dbg(pdata, intr, pdata->netdev, "DMA_ISR=%#010x\n", dma_isr); for (i = 0; i < pdata->channel_count; i++) { if (!(dma_isr & (1 << i))) @@ -339,7 +341,8 @@ static irqreturn_t xgbe_isr(int irq, void *data) channel = pdata->channel + i; dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR); - DBGPR(" DMA_CH%u_ISR = %08x\n", i, dma_ch_isr); + netif_dbg(pdata, intr, pdata->netdev, "DMA_CH%u_ISR=%#010x\n", + i, dma_ch_isr); /* The TI or RI interrupt bits may still be set even if using * per channel DMA interrupts. Check to be sure those are not @@ -386,8 +389,6 @@ static irqreturn_t xgbe_isr(int irq, void *data) } } - DBGPR(" DMA_ISR = %08x\n", XGMAC_IOREAD(pdata, DMA_ISR)); - isr_done: return IRQ_HANDLED; } @@ -436,43 +437,61 @@ static void xgbe_tx_timer(unsigned long data) DBGPR("<--xgbe_tx_timer\n"); } -static void xgbe_init_tx_timers(struct xgbe_prv_data *pdata) +static void xgbe_service(struct work_struct *work) +{ + struct xgbe_prv_data *pdata = container_of(work, + struct xgbe_prv_data, + service_work); + + pdata->phy_if.phy_status(pdata); +} + +static void xgbe_service_timer(unsigned long data) +{ + struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + + schedule_work(&pdata->service_work); + + mod_timer(&pdata->service_timer, jiffies + HZ); +} + +static void xgbe_init_timers(struct xgbe_prv_data *pdata) { struct xgbe_channel *channel; unsigned int i; - DBGPR("-->xgbe_init_tx_timers\n"); + setup_timer(&pdata->service_timer, xgbe_service_timer, + (unsigned long)pdata); channel = pdata->channel; for (i = 0; i < pdata->channel_count; i++, channel++) { if (!channel->tx_ring) break; - DBGPR(" %s adding tx timer\n", channel->name); setup_timer(&channel->tx_timer, xgbe_tx_timer, (unsigned long)channel); } +} - DBGPR("<--xgbe_init_tx_timers\n"); +static void xgbe_start_timers(struct xgbe_prv_data *pdata) +{ + mod_timer(&pdata->service_timer, jiffies + HZ); } -static void xgbe_stop_tx_timers(struct xgbe_prv_data *pdata) +static void xgbe_stop_timers(struct xgbe_prv_data *pdata) { struct xgbe_channel *channel; unsigned int i; - DBGPR("-->xgbe_stop_tx_timers\n"); + del_timer_sync(&pdata->service_timer); channel = pdata->channel; for (i = 0; i < pdata->channel_count; i++, channel++) { if (!channel->tx_ring) break; - DBGPR(" %s deleting tx timer\n", channel->name); del_timer_sync(&channel->tx_timer); } - - DBGPR("<--xgbe_stop_tx_timers\n"); } void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata) @@ -759,112 +778,12 @@ static void xgbe_free_rx_data(struct xgbe_prv_data *pdata) DBGPR("<--xgbe_free_rx_data\n"); } -static void xgbe_adjust_link(struct net_device *netdev) -{ - struct xgbe_prv_data *pdata = netdev_priv(netdev); - struct xgbe_hw_if *hw_if = &pdata->hw_if; - struct phy_device *phydev = pdata->phydev; - int new_state = 0; - - if (!phydev) - return; - - if (phydev->link) { - /* Flow control support */ - if (pdata->pause_autoneg) { - if (phydev->pause || phydev->asym_pause) { - pdata->tx_pause = 1; - pdata->rx_pause = 1; - } else { - pdata->tx_pause = 0; - pdata->rx_pause = 0; - } - } - - if (pdata->tx_pause != pdata->phy_tx_pause) { - hw_if->config_tx_flow_control(pdata); - pdata->phy_tx_pause = pdata->tx_pause; - } - - if (pdata->rx_pause != pdata->phy_rx_pause) { - hw_if->config_rx_flow_control(pdata); - pdata->phy_rx_pause = pdata->rx_pause; - } - - /* Speed support */ - if (phydev->speed != pdata->phy_speed) { - new_state = 1; - - switch (phydev->speed) { - case SPEED_10000: - hw_if->set_xgmii_speed(pdata); - break; - - case SPEED_2500: - hw_if->set_gmii_2500_speed(pdata); - break; - - case SPEED_1000: - hw_if->set_gmii_speed(pdata); - break; - } - pdata->phy_speed = phydev->speed; - } - - if (phydev->link != pdata->phy_link) { - new_state = 1; - pdata->phy_link = 1; - } - } else if (pdata->phy_link) { - new_state = 1; - pdata->phy_link = 0; - pdata->phy_speed = SPEED_UNKNOWN; - } - - if (new_state) - phy_print_status(phydev); -} - static int xgbe_phy_init(struct xgbe_prv_data *pdata) { - struct net_device *netdev = pdata->netdev; - struct phy_device *phydev = pdata->phydev; - int ret; - pdata->phy_link = -1; pdata->phy_speed = SPEED_UNKNOWN; - pdata->phy_tx_pause = pdata->tx_pause; - pdata->phy_rx_pause = pdata->rx_pause; - ret = phy_connect_direct(netdev, phydev, &xgbe_adjust_link, - pdata->phy_mode); - if (ret) { - netdev_err(netdev, "phy_connect_direct failed\n"); - return ret; - } - - if (!phydev->drv || (phydev->drv->phy_id == 0)) { - netdev_err(netdev, "phy_id not valid\n"); - ret = -ENODEV; - goto err_phy_connect; - } - DBGPR(" phy_connect_direct succeeded for PHY %s, link=%d\n", - dev_name(&phydev->dev), phydev->link); - - return 0; - -err_phy_connect: - phy_disconnect(phydev); - - return ret; -} - -static void xgbe_phy_exit(struct xgbe_prv_data *pdata) -{ - if (!pdata->phydev) - return; - - phy_disconnect(pdata->phydev); + return pdata->phy_if.phy_reset(pdata); } int xgbe_powerdown(struct net_device *netdev, unsigned int caller) @@ -889,13 +808,14 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller) netif_tx_stop_all_queues(netdev); + xgbe_stop_timers(pdata); + flush_workqueue(pdata->dev_workqueue); + hw_if->powerdown_tx(pdata); hw_if->powerdown_rx(pdata); xgbe_napi_disable(pdata, 0); - phy_stop(pdata->phydev); - pdata->power_down = 1; spin_unlock_irqrestore(&pdata->lock, flags); @@ -924,8 +844,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller) pdata->power_down = 0; - phy_start(pdata->phydev); - xgbe_napi_enable(pdata, 0); hw_if->powerup_tx(pdata); @@ -936,6 +854,8 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller) netif_tx_start_all_queues(netdev); + xgbe_start_timers(pdata); + spin_unlock_irqrestore(&pdata->lock, flags); DBGPR("<--xgbe_powerup\n"); @@ -946,6 +866,7 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller) static int xgbe_start(struct xgbe_prv_data *pdata) { struct xgbe_hw_if *hw_if = &pdata->hw_if; + struct xgbe_phy_if *phy_if = &pdata->phy_if; struct net_device *netdev = pdata->netdev; int ret; @@ -953,7 +874,9 @@ static int xgbe_start(struct xgbe_prv_data *pdata) hw_if->init(pdata); - phy_start(pdata->phydev); + ret = phy_if->phy_start(pdata); + if (ret) + goto err_phy; xgbe_napi_enable(pdata, 1); @@ -964,10 +887,11 @@ static int xgbe_start(struct xgbe_prv_data *pdata) hw_if->enable_tx(pdata); hw_if->enable_rx(pdata); - xgbe_init_tx_timers(pdata); - netif_tx_start_all_queues(netdev); + xgbe_start_timers(pdata); + schedule_work(&pdata->service_work); + DBGPR("<--xgbe_start\n"); return 0; @@ -975,8 +899,9 @@ static int xgbe_start(struct xgbe_prv_data *pdata) err_napi: xgbe_napi_disable(pdata, 1); - phy_stop(pdata->phydev); + phy_if->phy_stop(pdata); +err_phy: hw_if->exit(pdata); return ret; @@ -985,6 +910,7 @@ err_napi: static void xgbe_stop(struct xgbe_prv_data *pdata) { struct xgbe_hw_if *hw_if = &pdata->hw_if; + struct xgbe_phy_if *phy_if = &pdata->phy_if; struct xgbe_channel *channel; struct net_device *netdev = pdata->netdev; struct netdev_queue *txq; @@ -994,7 +920,8 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) netif_tx_stop_all_queues(netdev); - xgbe_stop_tx_timers(pdata); + xgbe_stop_timers(pdata); + flush_workqueue(pdata->dev_workqueue); hw_if->disable_tx(pdata); hw_if->disable_rx(pdata); @@ -1003,7 +930,7 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) xgbe_napi_disable(pdata, 1); - phy_stop(pdata->phydev); + phy_if->phy_stop(pdata); hw_if->exit(pdata); @@ -1374,7 +1301,7 @@ static int xgbe_open(struct net_device *netdev) ret = clk_prepare_enable(pdata->sysclk); if (ret) { netdev_alert(netdev, "dma clk_prepare_enable failed\n"); - goto err_phy_init; + return ret; } ret = clk_prepare_enable(pdata->ptpclk); @@ -1399,14 +1326,17 @@ static int xgbe_open(struct net_device *netdev) if (ret) goto err_channels; - /* Initialize the device restart and Tx timestamp work struct */ + INIT_WORK(&pdata->service_work, xgbe_service); INIT_WORK(&pdata->restart_work, xgbe_restart); INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp); + xgbe_init_timers(pdata); ret = xgbe_start(pdata); if (ret) goto err_rings; + clear_bit(XGBE_DOWN, &pdata->dev_state); + DBGPR("<--xgbe_open\n"); return 0; @@ -1423,9 +1353,6 @@ err_ptpclk: err_sysclk: clk_disable_unprepare(pdata->sysclk); -err_phy_init: - xgbe_phy_exit(pdata); - return ret; } @@ -1449,8 +1376,7 @@ static int xgbe_close(struct net_device *netdev) clk_disable_unprepare(pdata->ptpclk); clk_disable_unprepare(pdata->sysclk); - /* Release the phy */ - xgbe_phy_exit(pdata); + set_bit(XGBE_DOWN, &pdata->dev_state); DBGPR("<--xgbe_close\n"); @@ -1478,7 +1404,8 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev) ret = NETDEV_TX_OK; if (skb->len == 0) { - netdev_err(netdev, "empty skb received from stack\n"); + netif_err(pdata, tx_err, netdev, + "empty skb received from stack\n"); dev_kfree_skb_any(skb); goto tx_netdev_return; } @@ -1494,7 +1421,8 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev) ret = xgbe_prep_tso(skb, packet); if (ret) { - netdev_err(netdev, "error processing TSO packet\n"); + netif_err(pdata, tx_err, netdev, + "error processing TSO packet\n"); dev_kfree_skb_any(skb); goto tx_netdev_return; } @@ -1513,9 +1441,8 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev) /* Configure required descriptor fields for transmission */ hw_if->dev_xmit(channel); -#ifdef XGMAC_ENABLE_TX_PKT_DUMP - xgbe_print_pkt(netdev, skb, true); -#endif + if (netif_msg_pktdata(pdata)) + xgbe_print_pkt(netdev, skb, true); /* Stop the queue in advance if there may not be enough descriptors */ xgbe_maybe_stop_tx_queue(channel, ring, XGBE_TX_MAX_DESCS); @@ -1710,7 +1637,8 @@ static int xgbe_setup_tc(struct net_device *netdev, u8 tc) (pdata->q2tc_map[queue] == i)) queue++; - DBGPR(" TC%u using TXq%u-%u\n", i, offset, queue - 1); + netif_dbg(pdata, drv, netdev, "TC%u using TXq%u-%u\n", + i, offset, queue - 1); netdev_set_tc_queue(netdev, i, queue - offset, offset); offset = queue; } @@ -1820,9 +1748,10 @@ static void xgbe_rx_refresh(struct xgbe_channel *channel) lower_32_bits(rdata->rdesc_dma)); } -static struct sk_buff *xgbe_create_skb(struct napi_struct *napi, +static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata, + struct napi_struct *napi, struct xgbe_ring_data *rdata, - unsigned int *len) + unsigned int len) { struct sk_buff *skb; u8 *packet; @@ -1832,14 +1761,31 @@ static struct sk_buff *xgbe_create_skb(struct napi_struct *napi, if (!skb) return NULL; + /* Start with the header buffer which may contain just the header + * or the header plus data + */ + dma_sync_single_for_cpu(pdata->dev, rdata->rx.hdr.dma, + rdata->rx.hdr.dma_len, DMA_FROM_DEVICE); + packet = page_address(rdata->rx.hdr.pa.pages) + rdata->rx.hdr.pa.pages_offset; - copy_len = (rdata->rx.hdr_len) ? rdata->rx.hdr_len : *len; + copy_len = (rdata->rx.hdr_len) ? rdata->rx.hdr_len : len; copy_len = min(rdata->rx.hdr.dma_len, copy_len); skb_copy_to_linear_data(skb, packet, copy_len); skb_put(skb, copy_len); - *len -= copy_len; + len -= copy_len; + if (len) { + /* Add the remaining data as a frag */ + dma_sync_single_for_cpu(pdata->dev, rdata->rx.buf.dma, + rdata->rx.buf.dma_len, DMA_FROM_DEVICE); + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + rdata->rx.buf.pa.pages, + rdata->rx.buf.pa.pages_offset, + len, rdata->rx.buf.dma_len); + rdata->rx.buf.pa.pages = NULL; + } return skb; } @@ -1877,9 +1823,8 @@ static int xgbe_tx_poll(struct xgbe_channel *channel) * bit */ dma_rmb(); -#ifdef XGMAC_ENABLE_TX_DESC_DUMP - xgbe_dump_tx_desc(ring, ring->dirty, 1, 0); -#endif + if (netif_msg_tx_done(pdata)) + xgbe_dump_tx_desc(pdata, ring, ring->dirty, 1, 0); if (hw_if->is_last_desc(rdesc)) { tx_packets += rdata->tx.packets; @@ -1922,7 +1867,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) struct sk_buff *skb; struct skb_shared_hwtstamps *hwtstamps; unsigned int incomplete, error, context_next, context; - unsigned int len, put_len, max_len; + unsigned int len, rdesc_len, max_len; unsigned int received = 0; int packet_count = 0; @@ -1932,6 +1877,9 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) if (!ring) return 0; + incomplete = 0; + context_next = 0; + napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi; rdata = XGBE_GET_DESC_DATA(ring, ring->cur); @@ -1941,15 +1889,11 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) /* First time in loop see if we need to restore state */ if (!received && rdata->state_saved) { - incomplete = rdata->state.incomplete; - context_next = rdata->state.context_next; skb = rdata->state.skb; error = rdata->state.error; len = rdata->state.len; } else { memset(packet, 0, sizeof(*packet)); - incomplete = 0; - context_next = 0; skb = NULL; error = 0; len = 0; @@ -1983,29 +1927,23 @@ read_again: if (error || packet->errors) { if (packet->errors) - DBGPR("Error in received packet\n"); + netif_err(pdata, rx_err, netdev, + "error in received packet\n"); dev_kfree_skb(skb); goto next_packet; } if (!context) { - put_len = rdata->rx.len - len; - len += put_len; - - if (!skb) { - dma_sync_single_for_cpu(pdata->dev, - rdata->rx.hdr.dma, - rdata->rx.hdr.dma_len, - DMA_FROM_DEVICE); - - skb = xgbe_create_skb(napi, rdata, &put_len); - if (!skb) { + /* Length is cumulative, get this descriptor's length */ + rdesc_len = rdata->rx.len - len; + len += rdesc_len; + + if (rdesc_len && !skb) { + skb = xgbe_create_skb(pdata, napi, rdata, + rdesc_len); + if (!skb) error = 1; - goto skip_data; - } - } - - if (put_len) { + } else if (rdesc_len) { dma_sync_single_for_cpu(pdata->dev, rdata->rx.buf.dma, rdata->rx.buf.dma_len, @@ -2014,12 +1952,12 @@ read_again: skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rdata->rx.buf.pa.pages, rdata->rx.buf.pa.pages_offset, - put_len, rdata->rx.buf.dma_len); + rdesc_len, + rdata->rx.buf.dma_len); rdata->rx.buf.pa.pages = NULL; } } -skip_data: if (incomplete || context_next) goto read_again; @@ -2033,14 +1971,14 @@ skip_data: max_len += VLAN_HLEN; if (skb->len > max_len) { - DBGPR("packet length exceeds configured MTU\n"); + netif_err(pdata, rx_err, netdev, + "packet length exceeds configured MTU\n"); dev_kfree_skb(skb); goto next_packet; } -#ifdef XGMAC_ENABLE_RX_PKT_DUMP - xgbe_print_pkt(netdev, skb, false); -#endif + if (netif_msg_pktdata(pdata)) + xgbe_print_pkt(netdev, skb, false); skb_checksum_none_assert(skb); if (XGMAC_GET_BITS(packet->attributes, @@ -2072,7 +2010,6 @@ skip_data: skb_record_rx_queue(skb, channel->queue_index); skb_mark_napi_id(skb, napi); - netdev->last_rx = jiffies; napi_gro_receive(napi, skb); next_packet: @@ -2083,8 +2020,6 @@ next_packet: if (received && (incomplete || context_next)) { rdata = XGBE_GET_DESC_DATA(ring, ring->cur); rdata->state_saved = 1; - rdata->state.incomplete = incomplete; - rdata->state.context_next = context_next; rdata->state.skb = skb; rdata->state.len = len; rdata->state.error = error; @@ -2165,8 +2100,8 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget) return processed; } -void xgbe_dump_tx_desc(struct xgbe_ring *ring, unsigned int idx, - unsigned int count, unsigned int flag) +void xgbe_dump_tx_desc(struct xgbe_prv_data *pdata, struct xgbe_ring *ring, + unsigned int idx, unsigned int count, unsigned int flag) { struct xgbe_ring_data *rdata; struct xgbe_ring_desc *rdesc; @@ -2174,20 +2109,29 @@ void xgbe_dump_tx_desc(struct xgbe_ring *ring, unsigned int idx, while (count--) { rdata = XGBE_GET_DESC_DATA(ring, idx); rdesc = rdata->rdesc; - pr_alert("TX_NORMAL_DESC[%d %s] = %08x:%08x:%08x:%08x\n", idx, - (flag == 1) ? "QUEUED FOR TX" : "TX BY DEVICE", - le32_to_cpu(rdesc->desc0), le32_to_cpu(rdesc->desc1), - le32_to_cpu(rdesc->desc2), le32_to_cpu(rdesc->desc3)); + netdev_dbg(pdata->netdev, + "TX_NORMAL_DESC[%d %s] = %08x:%08x:%08x:%08x\n", idx, + (flag == 1) ? "QUEUED FOR TX" : "TX BY DEVICE", + le32_to_cpu(rdesc->desc0), + le32_to_cpu(rdesc->desc1), + le32_to_cpu(rdesc->desc2), + le32_to_cpu(rdesc->desc3)); idx++; } } -void xgbe_dump_rx_desc(struct xgbe_ring *ring, struct xgbe_ring_desc *desc, +void xgbe_dump_rx_desc(struct xgbe_prv_data *pdata, struct xgbe_ring *ring, unsigned int idx) { - pr_alert("RX_NORMAL_DESC[%d RX BY DEVICE] = %08x:%08x:%08x:%08x\n", idx, - le32_to_cpu(desc->desc0), le32_to_cpu(desc->desc1), - le32_to_cpu(desc->desc2), le32_to_cpu(desc->desc3)); + struct xgbe_ring_data *rdata; + struct xgbe_ring_desc *rdesc; + + rdata = XGBE_GET_DESC_DATA(ring, idx); + rdesc = rdata->rdesc; + netdev_dbg(pdata->netdev, + "RX_NORMAL_DESC[%d RX BY DEVICE] = %08x:%08x:%08x:%08x\n", + idx, le32_to_cpu(rdesc->desc0), le32_to_cpu(rdesc->desc1), + le32_to_cpu(rdesc->desc2), le32_to_cpu(rdesc->desc3)); } void xgbe_print_pkt(struct net_device *netdev, struct sk_buff *skb, bool tx_rx) @@ -2197,21 +2141,21 @@ void xgbe_print_pkt(struct net_device *netdev, struct sk_buff *skb, bool tx_rx) unsigned char buffer[128]; unsigned int i, j; - netdev_alert(netdev, "\n************** SKB dump ****************\n"); + netdev_dbg(netdev, "\n************** SKB dump ****************\n"); - netdev_alert(netdev, "%s packet of %d bytes\n", - (tx_rx ? "TX" : "RX"), skb->len); + netdev_dbg(netdev, "%s packet of %d bytes\n", + (tx_rx ? "TX" : "RX"), skb->len); - netdev_alert(netdev, "Dst MAC addr: %pM\n", eth->h_dest); - netdev_alert(netdev, "Src MAC addr: %pM\n", eth->h_source); - netdev_alert(netdev, "Protocol: 0x%04hx\n", ntohs(eth->h_proto)); + netdev_dbg(netdev, "Dst MAC addr: %pM\n", eth->h_dest); + netdev_dbg(netdev, "Src MAC addr: %pM\n", eth->h_source); + netdev_dbg(netdev, "Protocol: %#06hx\n", ntohs(eth->h_proto)); for (i = 0, j = 0; i < skb->len;) { j += snprintf(buffer + j, sizeof(buffer) - j, "%02hhx", buf[i++]); if ((i % 32) == 0) { - netdev_alert(netdev, " 0x%04x: %s\n", i - 32, buffer); + netdev_dbg(netdev, " %#06x: %s\n", i - 32, buffer); j = 0; } else if ((i % 16) == 0) { buffer[j++] = ' '; @@ -2221,7 +2165,7 @@ void xgbe_print_pkt(struct net_device *netdev, struct sk_buff *skb, bool tx_rx) } } if (i % 32) - netdev_alert(netdev, " 0x%04x: %s\n", i - (i % 32), buffer); + netdev_dbg(netdev, " %#06x: %s\n", i - (i % 32), buffer); - netdev_alert(netdev, "\n************** SKB dump ****************\n"); + netdev_dbg(netdev, "\n************** SKB dump ****************\n"); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 5f149e8ee20f..59e090e95c0e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -133,6 +133,12 @@ struct xgbe_stats { offsetof(struct xgbe_prv_data, mmc_stats._var), \ } +#define XGMAC_EXT_STAT(_string, _var) \ + { _string, \ + FIELD_SIZEOF(struct xgbe_ext_stats, _var), \ + offsetof(struct xgbe_prv_data, ext_stats._var), \ + } + static const struct xgbe_stats xgbe_gstring_stats[] = { XGMAC_MMC_STAT("tx_bytes", txoctetcount_gb), XGMAC_MMC_STAT("tx_packets", txframecount_gb), @@ -140,6 +146,7 @@ static const struct xgbe_stats xgbe_gstring_stats[] = { XGMAC_MMC_STAT("tx_broadcast_packets", txbroadcastframes_gb), XGMAC_MMC_STAT("tx_multicast_packets", txmulticastframes_gb), XGMAC_MMC_STAT("tx_vlan_packets", txvlanframes_g), + XGMAC_EXT_STAT("tx_tso_packets", tx_tso_packets), XGMAC_MMC_STAT("tx_64_byte_packets", tx64octets_gb), XGMAC_MMC_STAT("tx_65_to_127_byte_packets", tx65to127octets_gb), XGMAC_MMC_STAT("tx_128_to_255_byte_packets", tx128to255octets_gb), @@ -171,6 +178,7 @@ static const struct xgbe_stats xgbe_gstring_stats[] = { XGMAC_MMC_STAT("rx_fifo_overflow_errors", rxfifooverflow), XGMAC_MMC_STAT("rx_watchdog_errors", rxwatchdogerror), XGMAC_MMC_STAT("rx_pause_frames", rxpauseframes), + XGMAC_EXT_STAT("rx_split_header_packets", rx_split_header_packets), }; #define XGBE_STATS_COUNT ARRAY_SIZE(xgbe_gstring_stats) @@ -239,9 +247,9 @@ static void xgbe_get_pauseparam(struct net_device *netdev, DBGPR("-->xgbe_get_pauseparam\n"); - pause->autoneg = pdata->pause_autoneg; - pause->tx_pause = pdata->tx_pause; - pause->rx_pause = pdata->rx_pause; + pause->autoneg = pdata->phy.pause_autoneg; + pause->tx_pause = pdata->phy.tx_pause; + pause->rx_pause = pdata->phy.rx_pause; DBGPR("<--xgbe_get_pauseparam\n"); } @@ -250,7 +258,6 @@ static int xgbe_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { struct xgbe_prv_data *pdata = netdev_priv(netdev); - struct phy_device *phydev = pdata->phydev; int ret = 0; DBGPR("-->xgbe_set_pauseparam\n"); @@ -258,21 +265,26 @@ static int xgbe_set_pauseparam(struct net_device *netdev, DBGPR(" autoneg = %d, tx_pause = %d, rx_pause = %d\n", pause->autoneg, pause->tx_pause, pause->rx_pause); - pdata->pause_autoneg = pause->autoneg; - if (pause->autoneg) { - phydev->advertising |= ADVERTISED_Pause; - phydev->advertising |= ADVERTISED_Asym_Pause; + if (pause->autoneg && (pdata->phy.autoneg != AUTONEG_ENABLE)) + return -EINVAL; + + pdata->phy.pause_autoneg = pause->autoneg; + pdata->phy.tx_pause = pause->tx_pause; + pdata->phy.rx_pause = pause->rx_pause; - } else { - phydev->advertising &= ~ADVERTISED_Pause; - phydev->advertising &= ~ADVERTISED_Asym_Pause; + pdata->phy.advertising &= ~ADVERTISED_Pause; + pdata->phy.advertising &= ~ADVERTISED_Asym_Pause; - pdata->tx_pause = pause->tx_pause; - pdata->rx_pause = pause->rx_pause; + if (pause->rx_pause) { + pdata->phy.advertising |= ADVERTISED_Pause; + pdata->phy.advertising |= ADVERTISED_Asym_Pause; } + if (pause->tx_pause) + pdata->phy.advertising ^= ADVERTISED_Asym_Pause; + if (netif_running(netdev)) - ret = phy_start_aneg(phydev); + ret = pdata->phy_if.phy_config_aneg(pdata); DBGPR("<--xgbe_set_pauseparam\n"); @@ -283,36 +295,39 @@ static int xgbe_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd) { struct xgbe_prv_data *pdata = netdev_priv(netdev); - int ret; DBGPR("-->xgbe_get_settings\n"); - if (!pdata->phydev) - return -ENODEV; + cmd->phy_address = pdata->phy.address; + + cmd->supported = pdata->phy.supported; + cmd->advertising = pdata->phy.advertising; + cmd->lp_advertising = pdata->phy.lp_advertising; + + cmd->autoneg = pdata->phy.autoneg; + ethtool_cmd_speed_set(cmd, pdata->phy.speed); + cmd->duplex = pdata->phy.duplex; - ret = phy_ethtool_gset(pdata->phydev, cmd); + cmd->port = PORT_NONE; + cmd->transceiver = XCVR_INTERNAL; DBGPR("<--xgbe_get_settings\n"); - return ret; + return 0; } static int xgbe_set_settings(struct net_device *netdev, struct ethtool_cmd *cmd) { struct xgbe_prv_data *pdata = netdev_priv(netdev); - struct phy_device *phydev = pdata->phydev; u32 speed; int ret; DBGPR("-->xgbe_set_settings\n"); - if (!pdata->phydev) - return -ENODEV; - speed = ethtool_cmd_speed(cmd); - if (cmd->phy_address != phydev->addr) + if (cmd->phy_address != pdata->phy.address) return -EINVAL; if ((cmd->autoneg != AUTONEG_ENABLE) && @@ -333,23 +348,23 @@ static int xgbe_set_settings(struct net_device *netdev, return -EINVAL; } - cmd->advertising &= phydev->supported; + cmd->advertising &= pdata->phy.supported; if ((cmd->autoneg == AUTONEG_ENABLE) && !cmd->advertising) return -EINVAL; ret = 0; - phydev->autoneg = cmd->autoneg; - phydev->speed = speed; - phydev->duplex = cmd->duplex; - phydev->advertising = cmd->advertising; + pdata->phy.autoneg = cmd->autoneg; + pdata->phy.speed = speed; + pdata->phy.duplex = cmd->duplex; + pdata->phy.advertising = cmd->advertising; if (cmd->autoneg == AUTONEG_ENABLE) - phydev->advertising |= ADVERTISED_Autoneg; + pdata->phy.advertising |= ADVERTISED_Autoneg; else - phydev->advertising &= ~ADVERTISED_Autoneg; + pdata->phy.advertising &= ~ADVERTISED_Autoneg; if (netif_running(netdev)) - ret = phy_start_aneg(phydev); + ret = pdata->phy_if.phy_config_aneg(pdata); DBGPR("<--xgbe_set_settings\n"); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 714905384900..fb7c961da49f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -124,9 +124,11 @@ #include <linux/of.h> #include <linux/of_net.h> #include <linux/of_address.h> +#include <linux/of_platform.h> #include <linux/clk.h> #include <linux/property.h> #include <linux/acpi.h> +#include <linux/mdio.h> #include "xgbe.h" #include "xgbe-common.h" @@ -136,6 +138,49 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(XGBE_DRV_VERSION); MODULE_DESCRIPTION(XGBE_DRV_DESC); +static int debug = -1; +module_param(debug, int, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(debug, " Network interface message level setting"); + +static const u32 default_msg_level = (NETIF_MSG_LINK | NETIF_MSG_IFDOWN | + NETIF_MSG_IFUP); + +static const u32 xgbe_serdes_blwc[] = { + XGBE_SPEED_1000_BLWC, + XGBE_SPEED_2500_BLWC, + XGBE_SPEED_10000_BLWC, +}; + +static const u32 xgbe_serdes_cdr_rate[] = { + XGBE_SPEED_1000_CDR, + XGBE_SPEED_2500_CDR, + XGBE_SPEED_10000_CDR, +}; + +static const u32 xgbe_serdes_pq_skew[] = { + XGBE_SPEED_1000_PQ, + XGBE_SPEED_2500_PQ, + XGBE_SPEED_10000_PQ, +}; + +static const u32 xgbe_serdes_tx_amp[] = { + XGBE_SPEED_1000_TXAMP, + XGBE_SPEED_2500_TXAMP, + XGBE_SPEED_10000_TXAMP, +}; + +static const u32 xgbe_serdes_dfe_tap_cfg[] = { + XGBE_SPEED_1000_DFE_TAP_CONFIG, + XGBE_SPEED_2500_DFE_TAP_CONFIG, + XGBE_SPEED_10000_DFE_TAP_CONFIG, +}; + +static const u32 xgbe_serdes_dfe_tap_ena[] = { + XGBE_SPEED_1000_DFE_TAP_ENABLE, + XGBE_SPEED_2500_DFE_TAP_ENABLE, + XGBE_SPEED_10000_DFE_TAP_ENABLE, +}; + static void xgbe_default_config(struct xgbe_prv_data *pdata) { DBGPR("-->xgbe_default_config\n"); @@ -153,8 +198,6 @@ static void xgbe_default_config(struct xgbe_prv_data *pdata) pdata->rx_pause = 1; pdata->phy_speed = SPEED_UNKNOWN; pdata->power_down = 0; - pdata->default_autoneg = AUTONEG_ENABLE; - pdata->default_speed = SPEED_10000; DBGPR("<--xgbe_default_config\n"); } @@ -162,6 +205,7 @@ static void xgbe_default_config(struct xgbe_prv_data *pdata) static void xgbe_init_all_fptrs(struct xgbe_prv_data *pdata) { xgbe_init_function_ptrs_dev(&pdata->hw_if); + xgbe_init_function_ptrs_phy(&pdata->phy_if); xgbe_init_function_ptrs_desc(&pdata->desc_if); } @@ -248,23 +292,82 @@ static int xgbe_of_support(struct xgbe_prv_data *pdata) return 0; } + +static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata) +{ + struct device *dev = pdata->dev; + struct device_node *phy_node; + struct platform_device *phy_pdev; + + phy_node = of_parse_phandle(dev->of_node, "phy-handle", 0); + if (phy_node) { + /* Old style device tree: + * The XGBE and PHY resources are separate + */ + phy_pdev = of_find_device_by_node(phy_node); + of_node_put(phy_node); + } else { + /* New style device tree: + * The XGBE and PHY resources are grouped together with + * the PHY resources listed last + */ + get_device(dev); + phy_pdev = pdata->pdev; + } + + return phy_pdev; +} #else /* CONFIG_OF */ static int xgbe_of_support(struct xgbe_prv_data *pdata) { return -EINVAL; } -#endif /*CONFIG_OF */ + +static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata) +{ + return NULL; +} +#endif /* CONFIG_OF */ + +static unsigned int xgbe_resource_count(struct platform_device *pdev, + unsigned int type) +{ + unsigned int count; + int i; + + for (i = 0, count = 0; i < pdev->num_resources; i++) { + struct resource *res = &pdev->resource[i]; + + if (type == resource_type(res)) + count++; + } + + return count; +} + +static struct platform_device *xgbe_get_phy_pdev(struct xgbe_prv_data *pdata) +{ + struct platform_device *phy_pdev; + + if (pdata->use_acpi) { + get_device(pdata->dev); + phy_pdev = pdata->pdev; + } else { + phy_pdev = xgbe_of_get_phy_pdev(pdata); + } + + return phy_pdev; +} static int xgbe_probe(struct platform_device *pdev) { struct xgbe_prv_data *pdata; - struct xgbe_hw_if *hw_if; - struct xgbe_desc_if *desc_if; struct net_device *netdev; - struct device *dev = &pdev->dev; + struct device *dev = &pdev->dev, *phy_dev; + struct platform_device *phy_pdev; struct resource *res; const char *phy_mode; - unsigned int i; + unsigned int i, phy_memnum, phy_irqnum; int ret; DBGPR("--> xgbe_probe\n"); @@ -289,9 +392,36 @@ static int xgbe_probe(struct platform_device *pdev) mutex_init(&pdata->rss_mutex); spin_lock_init(&pdata->tstamp_lock); + pdata->msg_enable = netif_msg_init(debug, default_msg_level); + + set_bit(XGBE_DOWN, &pdata->dev_state); + /* Check if we should use ACPI or DT */ pdata->use_acpi = (!pdata->adev || acpi_disabled) ? 0 : 1; + phy_pdev = xgbe_get_phy_pdev(pdata); + if (!phy_pdev) { + dev_err(dev, "unable to obtain phy device\n"); + ret = -EINVAL; + goto err_phydev; + } + phy_dev = &phy_pdev->dev; + + if (pdev == phy_pdev) { + /* New style device tree or ACPI: + * The XGBE and PHY resources are grouped together with + * the PHY resources listed last + */ + phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3; + phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1; + } else { + /* Old style device tree: + * The XGBE and PHY resources are separate + */ + phy_memnum = 0; + phy_irqnum = 0; + } + /* Set and validate the number of descriptors for a ring */ BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_TX_DESC_CNT); pdata->tx_desc_count = XGBE_TX_DESC_CNT; @@ -318,7 +448,8 @@ static int xgbe_probe(struct platform_device *pdev) ret = PTR_ERR(pdata->xgmac_regs); goto err_io; } - DBGPR(" xgmac_regs = %p\n", pdata->xgmac_regs); + if (netif_msg_probe(pdata)) + dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); pdata->xpcs_regs = devm_ioremap_resource(dev, res); @@ -327,7 +458,38 @@ static int xgbe_probe(struct platform_device *pdev) ret = PTR_ERR(pdata->xpcs_regs); goto err_io; } - DBGPR(" xpcs_regs = %p\n", pdata->xpcs_regs); + if (netif_msg_probe(pdata)) + dev_dbg(dev, "xpcs_regs = %p\n", pdata->xpcs_regs); + + res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); + pdata->rxtx_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->rxtx_regs)) { + dev_err(dev, "rxtx ioremap failed\n"); + ret = PTR_ERR(pdata->rxtx_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "rxtx_regs = %p\n", pdata->rxtx_regs); + + res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); + pdata->sir0_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->sir0_regs)) { + dev_err(dev, "sir0 ioremap failed\n"); + ret = PTR_ERR(pdata->sir0_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "sir0_regs = %p\n", pdata->sir0_regs); + + res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); + pdata->sir1_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->sir1_regs)) { + dev_err(dev, "sir1 ioremap failed\n"); + ret = PTR_ERR(pdata->sir1_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "sir1_regs = %p\n", pdata->sir1_regs); /* Retrieve the MAC address */ ret = device_property_read_u8_array(dev, XGBE_MAC_ADDR_PROPERTY, @@ -355,6 +517,115 @@ static int xgbe_probe(struct platform_device *pdev) if (device_property_present(dev, XGBE_DMA_IRQS_PROPERTY)) pdata->per_channel_irq = 1; + /* Retrieve the PHY speedset */ + ret = device_property_read_u32(phy_dev, XGBE_SPEEDSET_PROPERTY, + &pdata->speed_set); + if (ret) { + dev_err(dev, "invalid %s property\n", XGBE_SPEEDSET_PROPERTY); + goto err_io; + } + + switch (pdata->speed_set) { + case XGBE_SPEEDSET_1000_10000: + case XGBE_SPEEDSET_2500_10000: + break; + default: + dev_err(dev, "invalid %s property\n", XGBE_SPEEDSET_PROPERTY); + ret = -EINVAL; + goto err_io; + } + + /* Retrieve the PHY configuration properties */ + if (device_property_present(phy_dev, XGBE_BLWC_PROPERTY)) { + ret = device_property_read_u32_array(phy_dev, + XGBE_BLWC_PROPERTY, + pdata->serdes_blwc, + XGBE_SPEEDS); + if (ret) { + dev_err(dev, "invalid %s property\n", + XGBE_BLWC_PROPERTY); + goto err_io; + } + } else { + memcpy(pdata->serdes_blwc, xgbe_serdes_blwc, + sizeof(pdata->serdes_blwc)); + } + + if (device_property_present(phy_dev, XGBE_CDR_RATE_PROPERTY)) { + ret = device_property_read_u32_array(phy_dev, + XGBE_CDR_RATE_PROPERTY, + pdata->serdes_cdr_rate, + XGBE_SPEEDS); + if (ret) { + dev_err(dev, "invalid %s property\n", + XGBE_CDR_RATE_PROPERTY); + goto err_io; + } + } else { + memcpy(pdata->serdes_cdr_rate, xgbe_serdes_cdr_rate, + sizeof(pdata->serdes_cdr_rate)); + } + + if (device_property_present(phy_dev, XGBE_PQ_SKEW_PROPERTY)) { + ret = device_property_read_u32_array(phy_dev, + XGBE_PQ_SKEW_PROPERTY, + pdata->serdes_pq_skew, + XGBE_SPEEDS); + if (ret) { + dev_err(dev, "invalid %s property\n", + XGBE_PQ_SKEW_PROPERTY); + goto err_io; + } + } else { + memcpy(pdata->serdes_pq_skew, xgbe_serdes_pq_skew, + sizeof(pdata->serdes_pq_skew)); + } + + if (device_property_present(phy_dev, XGBE_TX_AMP_PROPERTY)) { + ret = device_property_read_u32_array(phy_dev, + XGBE_TX_AMP_PROPERTY, + pdata->serdes_tx_amp, + XGBE_SPEEDS); + if (ret) { + dev_err(dev, "invalid %s property\n", + XGBE_TX_AMP_PROPERTY); + goto err_io; + } + } else { + memcpy(pdata->serdes_tx_amp, xgbe_serdes_tx_amp, + sizeof(pdata->serdes_tx_amp)); + } + + if (device_property_present(phy_dev, XGBE_DFE_CFG_PROPERTY)) { + ret = device_property_read_u32_array(phy_dev, + XGBE_DFE_CFG_PROPERTY, + pdata->serdes_dfe_tap_cfg, + XGBE_SPEEDS); + if (ret) { + dev_err(dev, "invalid %s property\n", + XGBE_DFE_CFG_PROPERTY); + goto err_io; + } + } else { + memcpy(pdata->serdes_dfe_tap_cfg, xgbe_serdes_dfe_tap_cfg, + sizeof(pdata->serdes_dfe_tap_cfg)); + } + + if (device_property_present(phy_dev, XGBE_DFE_ENA_PROPERTY)) { + ret = device_property_read_u32_array(phy_dev, + XGBE_DFE_ENA_PROPERTY, + pdata->serdes_dfe_tap_ena, + XGBE_SPEEDS); + if (ret) { + dev_err(dev, "invalid %s property\n", + XGBE_DFE_ENA_PROPERTY); + goto err_io; + } + } else { + memcpy(pdata->serdes_dfe_tap_ena, xgbe_serdes_dfe_tap_ena, + sizeof(pdata->serdes_dfe_tap_ena)); + } + /* Obtain device settings unique to ACPI/OF */ if (pdata->use_acpi) ret = xgbe_acpi_support(pdata); @@ -382,17 +653,23 @@ static int xgbe_probe(struct platform_device *pdev) } pdata->dev_irq = ret; + /* Get the auto-negotiation interrupt */ + ret = platform_get_irq(phy_pdev, phy_irqnum++); + if (ret < 0) { + dev_err(dev, "platform_get_irq phy 0 failed\n"); + goto err_io; + } + pdata->an_irq = ret; + netdev->irq = pdata->dev_irq; netdev->base_addr = (unsigned long)pdata->xgmac_regs; memcpy(netdev->dev_addr, pdata->mac_addr, netdev->addr_len); /* Set all the function pointers */ xgbe_init_all_fptrs(pdata); - hw_if = &pdata->hw_if; - desc_if = &pdata->desc_if; /* Issue software reset to device */ - hw_if->exit(pdata); + pdata->hw_if.exit(pdata); /* Populate the hardware features */ xgbe_get_all_hw_features(pdata); @@ -401,8 +678,6 @@ static int xgbe_probe(struct platform_device *pdev) xgbe_default_config(pdata); /* Set the DMA mask */ - if (!dev->dma_mask) - dev->dma_mask = &dev->coherent_dma_mask; ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(pdata->hw_feat.dma_width)); if (ret) { @@ -447,16 +722,8 @@ static int xgbe_probe(struct platform_device *pdev) XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, TCP4TE, 1); XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1); - /* Prepare to regsiter with MDIO */ - pdata->mii_bus_id = kasprintf(GFP_KERNEL, "%s", pdev->name); - if (!pdata->mii_bus_id) { - dev_err(dev, "failed to allocate mii bus id\n"); - ret = -ENOMEM; - goto err_io; - } - ret = xgbe_mdio_register(pdata); - if (ret) - goto err_bus_id; + /* Call MDIO/PHY initialization routine */ + pdata->phy_if.phy_init(pdata); /* Set device operations */ netdev->netdev_ops = xgbe_get_netdev_ops(); @@ -501,26 +768,52 @@ static int xgbe_probe(struct platform_device *pdev) ret = register_netdev(netdev); if (ret) { dev_err(dev, "net device registration failed\n"); - goto err_reg_netdev; + goto err_io; + } + + /* Create the PHY/ANEG name based on netdev name */ + snprintf(pdata->an_name, sizeof(pdata->an_name) - 1, "%s-pcs", + netdev_name(netdev)); + + /* Create workqueues */ + pdata->dev_workqueue = + create_singlethread_workqueue(netdev_name(netdev)); + if (!pdata->dev_workqueue) { + netdev_err(netdev, "device workqueue creation failed\n"); + ret = -ENOMEM; + goto err_netdev; + } + + pdata->an_workqueue = + create_singlethread_workqueue(pdata->an_name); + if (!pdata->an_workqueue) { + netdev_err(netdev, "phy workqueue creation failed\n"); + ret = -ENOMEM; + goto err_wq; } xgbe_ptp_register(pdata); xgbe_debugfs_init(pdata); + platform_device_put(phy_pdev); + netdev_notice(netdev, "net device enabled\n"); DBGPR("<-- xgbe_probe\n"); return 0; -err_reg_netdev: - xgbe_mdio_unregister(pdata); +err_wq: + destroy_workqueue(pdata->dev_workqueue); -err_bus_id: - kfree(pdata->mii_bus_id); +err_netdev: + unregister_netdev(netdev); err_io: + platform_device_put(phy_pdev); + +err_phydev: free_netdev(netdev); err_alloc: @@ -540,11 +833,13 @@ static int xgbe_remove(struct platform_device *pdev) xgbe_ptp_unregister(pdata); - unregister_netdev(netdev); + flush_workqueue(pdata->an_workqueue); + destroy_workqueue(pdata->an_workqueue); - xgbe_mdio_unregister(pdata); + flush_workqueue(pdata->dev_workqueue); + destroy_workqueue(pdata->dev_workqueue); - kfree(pdata->mii_bus_id); + unregister_netdev(netdev); free_netdev(netdev); @@ -557,16 +852,17 @@ static int xgbe_remove(struct platform_device *pdev) static int xgbe_suspend(struct device *dev) { struct net_device *netdev = dev_get_drvdata(dev); - int ret; + struct xgbe_prv_data *pdata = netdev_priv(netdev); + int ret = 0; DBGPR("-->xgbe_suspend\n"); - if (!netif_running(netdev)) { - DBGPR("<--xgbe_dev_suspend\n"); - return -EINVAL; - } + if (netif_running(netdev)) + ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT); - ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT); + pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); DBGPR("<--xgbe_suspend\n"); @@ -576,16 +872,16 @@ static int xgbe_suspend(struct device *dev) static int xgbe_resume(struct device *dev) { struct net_device *netdev = dev_get_drvdata(dev); - int ret; + struct xgbe_prv_data *pdata = netdev_priv(netdev); + int ret = 0; DBGPR("-->xgbe_resume\n"); - if (!netif_running(netdev)) { - DBGPR("<--xgbe_dev_resume\n"); - return -EINVAL; - } + pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); - ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); + if (netif_running(netdev)) + ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); DBGPR("<--xgbe_resume\n"); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 59e267f3f1b7..cea19a37806e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -119,194 +119,1151 @@ #include <linux/mdio.h> #include <linux/phy.h> #include <linux/of.h> +#include <linux/bitops.h> +#include <linux/jiffies.h> #include "xgbe.h" #include "xgbe-common.h" -static int xgbe_mdio_read(struct mii_bus *mii, int prtad, int mmd_reg) +static void xgbe_an_enable_kr_training(struct xgbe_prv_data *pdata) { - struct xgbe_prv_data *pdata = mii->priv; - struct xgbe_hw_if *hw_if = &pdata->hw_if; - int mmd_data; + unsigned int reg; - DBGPR_MDIO("-->xgbe_mdio_read: prtad=%#x mmd_reg=%#x\n", - prtad, mmd_reg); + reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); - mmd_data = hw_if->read_mmd_regs(pdata, prtad, mmd_reg); + reg |= XGBE_KR_TRAINING_ENABLE; + XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); +} + +static void xgbe_an_disable_kr_training(struct xgbe_prv_data *pdata) +{ + unsigned int reg; - DBGPR_MDIO("<--xgbe_mdio_read: mmd_data=%#x\n", mmd_data); + reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); - return mmd_data; + reg &= ~XGBE_KR_TRAINING_ENABLE; + XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); } -static int xgbe_mdio_write(struct mii_bus *mii, int prtad, int mmd_reg, - u16 mmd_val) +static void xgbe_pcs_power_cycle(struct xgbe_prv_data *pdata) { - struct xgbe_prv_data *pdata = mii->priv; - struct xgbe_hw_if *hw_if = &pdata->hw_if; - int mmd_data = mmd_val; + unsigned int reg; - DBGPR_MDIO("-->xgbe_mdio_write: prtad=%#x mmd_reg=%#x mmd_data=%#x\n", - prtad, mmd_reg, mmd_data); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - hw_if->write_mmd_regs(pdata, prtad, mmd_reg, mmd_data); + reg |= MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); - DBGPR_MDIO("<--xgbe_mdio_write\n"); + usleep_range(75, 100); - return 0; + reg &= ~MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); } -void xgbe_dump_phy_registers(struct xgbe_prv_data *pdata) +static void xgbe_serdes_start_ratechange(struct xgbe_prv_data *pdata) { - struct device *dev = pdata->dev; - struct phy_device *phydev = pdata->mii->phy_map[XGBE_PRTAD]; - int i; - - dev_alert(dev, "\n************* PHY Reg dump **********************\n"); - - dev_alert(dev, "PCS Control Reg (%#04x) = %#04x\n", MDIO_CTRL1, - XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1)); - dev_alert(dev, "PCS Status Reg (%#04x) = %#04x\n", MDIO_STAT1, - XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1)); - dev_alert(dev, "Phy Id (PHYS ID 1 %#04x)= %#04x\n", MDIO_DEVID1, - XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID1)); - dev_alert(dev, "Phy Id (PHYS ID 2 %#04x)= %#04x\n", MDIO_DEVID2, - XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID2)); - dev_alert(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS1, - XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS1)); - dev_alert(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS2, - XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS2)); - - dev_alert(dev, "Auto-Neg Control Reg (%#04x) = %#04x\n", MDIO_CTRL1, - XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1)); - dev_alert(dev, "Auto-Neg Status Reg (%#04x) = %#04x\n", MDIO_STAT1, - XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_STAT1)); - dev_alert(dev, "Auto-Neg Ad Reg 1 (%#04x) = %#04x\n", - MDIO_AN_ADVERTISE, - XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE)); - dev_alert(dev, "Auto-Neg Ad Reg 2 (%#04x) = %#04x\n", - MDIO_AN_ADVERTISE + 1, - XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1)); - dev_alert(dev, "Auto-Neg Ad Reg 3 (%#04x) = %#04x\n", - MDIO_AN_ADVERTISE + 2, - XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2)); - dev_alert(dev, "Auto-Neg Completion Reg (%#04x) = %#04x\n", - MDIO_AN_COMP_STAT, - XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_COMP_STAT)); - - dev_alert(dev, "MMD Device Mask = %#x\n", - phydev->c45_ids.devices_in_package); - for (i = 0; i < ARRAY_SIZE(phydev->c45_ids.device_ids); i++) - dev_alert(dev, " MMD %d: ID = %#08x\n", i, - phydev->c45_ids.device_ids[i]); - - dev_alert(dev, "\n*************************************************\n"); -} - -int xgbe_mdio_register(struct xgbe_prv_data *pdata) -{ - struct mii_bus *mii; - struct phy_device *phydev; - int ret = 0; - - DBGPR("-->xgbe_mdio_register\n"); - - mii = mdiobus_alloc(); - if (!mii) { - dev_err(pdata->dev, "mdiobus_alloc failed\n"); - return -ENOMEM; + /* Assert Rx and Tx ratechange */ + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 1); +} + +static void xgbe_serdes_complete_ratechange(struct xgbe_prv_data *pdata) +{ + unsigned int wait; + u16 status; + + /* Release Rx and Tx ratechange */ + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 0); + + /* Wait for Rx and Tx ready */ + wait = XGBE_RATECHANGE_COUNT; + while (wait--) { + usleep_range(50, 75); + + status = XSIR0_IOREAD(pdata, SIR0_STATUS); + if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) && + XSIR_GET_BITS(status, SIR0_STATUS, TX_READY)) + goto rx_reset; } - /* Register on the MDIO bus (don't probe any PHYs) */ - mii->name = XGBE_PHY_NAME; - mii->read = xgbe_mdio_read; - mii->write = xgbe_mdio_write; - snprintf(mii->id, sizeof(mii->id), "%s", pdata->mii_bus_id); - mii->priv = pdata; - mii->phy_mask = ~0; - mii->parent = pdata->dev; - ret = mdiobus_register(mii); - if (ret) { - dev_err(pdata->dev, "mdiobus_register failed\n"); - goto err_mdiobus_alloc; + netdev_dbg(pdata->netdev, "SerDes rx/tx not ready (%#hx)\n", + status); + +rx_reset: + /* Perform Rx reset for the DFE changes */ + XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 0); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 1); +} + +static void xgbe_xgmii_mode(struct xgbe_prv_data *pdata) +{ + unsigned int reg; + + /* Enable KR training */ + xgbe_an_enable_kr_training(pdata); + + /* Set MAC to 10G speed */ + pdata->hw_if.set_xgmii_speed(pdata); + + /* Set PCS to KR/10G speed */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + reg &= ~MDIO_PCS_CTRL2_TYPE; + reg |= MDIO_PCS_CTRL2_10GBR; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg &= ~MDIO_CTRL1_SPEEDSEL; + reg |= MDIO_CTRL1_SPEED10G; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + xgbe_pcs_power_cycle(pdata); + + /* Set SerDes to 10G speed */ + xgbe_serdes_start_ratechange(pdata); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_10000_RATE); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_10000_WORD); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_10000_PLL); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, + pdata->serdes_cdr_rate[XGBE_SPEED_10000]); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, + pdata->serdes_tx_amp[XGBE_SPEED_10000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, + pdata->serdes_blwc[XGBE_SPEED_10000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, + pdata->serdes_pq_skew[XGBE_SPEED_10000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, + pdata->serdes_dfe_tap_cfg[XGBE_SPEED_10000]); + XRXTX_IOWRITE(pdata, RXTX_REG22, + pdata->serdes_dfe_tap_ena[XGBE_SPEED_10000]); + + xgbe_serdes_complete_ratechange(pdata); +} + +static void xgbe_gmii_2500_mode(struct xgbe_prv_data *pdata) +{ + unsigned int reg; + + /* Disable KR training */ + xgbe_an_disable_kr_training(pdata); + + /* Set MAC to 2.5G speed */ + pdata->hw_if.set_gmii_2500_speed(pdata); + + /* Set PCS to KX/1G speed */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + reg &= ~MDIO_PCS_CTRL2_TYPE; + reg |= MDIO_PCS_CTRL2_10GBX; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg &= ~MDIO_CTRL1_SPEEDSEL; + reg |= MDIO_CTRL1_SPEED1G; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + xgbe_pcs_power_cycle(pdata); + + /* Set SerDes to 2.5G speed */ + xgbe_serdes_start_ratechange(pdata); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_2500_RATE); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_2500_WORD); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_2500_PLL); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, + pdata->serdes_cdr_rate[XGBE_SPEED_2500]); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, + pdata->serdes_tx_amp[XGBE_SPEED_2500]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, + pdata->serdes_blwc[XGBE_SPEED_2500]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, + pdata->serdes_pq_skew[XGBE_SPEED_2500]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, + pdata->serdes_dfe_tap_cfg[XGBE_SPEED_2500]); + XRXTX_IOWRITE(pdata, RXTX_REG22, + pdata->serdes_dfe_tap_ena[XGBE_SPEED_2500]); + + xgbe_serdes_complete_ratechange(pdata); +} + +static void xgbe_gmii_mode(struct xgbe_prv_data *pdata) +{ + unsigned int reg; + + /* Disable KR training */ + xgbe_an_disable_kr_training(pdata); + + /* Set MAC to 1G speed */ + pdata->hw_if.set_gmii_speed(pdata); + + /* Set PCS to KX/1G speed */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + reg &= ~MDIO_PCS_CTRL2_TYPE; + reg |= MDIO_PCS_CTRL2_10GBX; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg &= ~MDIO_CTRL1_SPEEDSEL; + reg |= MDIO_CTRL1_SPEED1G; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + xgbe_pcs_power_cycle(pdata); + + /* Set SerDes to 1G speed */ + xgbe_serdes_start_ratechange(pdata); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_1000_RATE); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_1000_WORD); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_1000_PLL); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, + pdata->serdes_cdr_rate[XGBE_SPEED_1000]); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, + pdata->serdes_tx_amp[XGBE_SPEED_1000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, + pdata->serdes_blwc[XGBE_SPEED_1000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, + pdata->serdes_pq_skew[XGBE_SPEED_1000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, + pdata->serdes_dfe_tap_cfg[XGBE_SPEED_1000]); + XRXTX_IOWRITE(pdata, RXTX_REG22, + pdata->serdes_dfe_tap_ena[XGBE_SPEED_1000]); + + xgbe_serdes_complete_ratechange(pdata); +} + +static void xgbe_cur_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode *mode) +{ + unsigned int reg; + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + if ((reg & MDIO_PCS_CTRL2_TYPE) == MDIO_PCS_CTRL2_10GBR) + *mode = XGBE_MODE_KR; + else + *mode = XGBE_MODE_KX; +} + +static bool xgbe_in_kr_mode(struct xgbe_prv_data *pdata) +{ + enum xgbe_mode mode; + + xgbe_cur_mode(pdata, &mode); + + return (mode == XGBE_MODE_KR); +} + +static void xgbe_switch_mode(struct xgbe_prv_data *pdata) +{ + /* If we are in KR switch to KX, and vice-versa */ + if (xgbe_in_kr_mode(pdata)) { + if (pdata->speed_set == XGBE_SPEEDSET_1000_10000) + xgbe_gmii_mode(pdata); + else + xgbe_gmii_2500_mode(pdata); + } else { + xgbe_xgmii_mode(pdata); } - DBGPR(" mdiobus_register succeeded for %s\n", pdata->mii_bus_id); - - /* Probe the PCS using Clause 45 */ - phydev = get_phy_device(mii, XGBE_PRTAD, true); - if (IS_ERR(phydev) || !phydev || - !phydev->c45_ids.device_ids[MDIO_MMD_PCS]) { - dev_err(pdata->dev, "get_phy_device failed\n"); - ret = phydev ? PTR_ERR(phydev) : -ENOLINK; - goto err_mdiobus_register; +} + +static void xgbe_set_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + enum xgbe_mode cur_mode; + + xgbe_cur_mode(pdata, &cur_mode); + if (mode != cur_mode) + xgbe_switch_mode(pdata); +} + +static void xgbe_set_an(struct xgbe_prv_data *pdata, bool enable, bool restart) +{ + unsigned int reg; + + reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1); + reg &= ~MDIO_AN_CTRL1_ENABLE; + + if (enable) + reg |= MDIO_AN_CTRL1_ENABLE; + + if (restart) + reg |= MDIO_AN_CTRL1_RESTART; + + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg); +} + +static void xgbe_restart_an(struct xgbe_prv_data *pdata) +{ + xgbe_set_an(pdata, true, true); +} + +static void xgbe_disable_an(struct xgbe_prv_data *pdata) +{ + xgbe_set_an(pdata, false, false); +} + +static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) +{ + unsigned int ad_reg, lp_reg, reg; + + *state = XGBE_RX_COMPLETE; + + /* If we're not in KR mode then we're done */ + if (!xgbe_in_kr_mode(pdata)) + return XGBE_AN_PAGE_RECEIVED; + + /* Enable/Disable FEC */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); + + reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FECCTRL); + reg &= ~(MDIO_PMA_10GBR_FECABLE_ABLE | MDIO_PMA_10GBR_FECABLE_ERRABLE); + if ((ad_reg & 0xc000) && (lp_reg & 0xc000)) + reg |= pdata->fec_ability; + + XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FECCTRL, reg); + + /* Start KR training */ + reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); + if (reg & XGBE_KR_TRAINING_ENABLE) { + XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 1); + + reg |= XGBE_KR_TRAINING_START; + XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, + reg); + + XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 0); } - request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, - MDIO_ID_ARGS(phydev->c45_ids.device_ids[MDIO_MMD_PCS])); - ret = phy_device_register(phydev); - if (ret) { - dev_err(pdata->dev, "phy_device_register failed\n"); - goto err_phy_device; + return XGBE_AN_PAGE_RECEIVED; +} + +static enum xgbe_an xgbe_an_tx_xnp(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) +{ + u16 msg; + + *state = XGBE_RX_XNP; + + msg = XGBE_XNP_MCF_NULL_MESSAGE; + msg |= XGBE_XNP_MP_FORMATTED; + + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_XNP + 2, 0); + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_XNP + 1, 0); + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_XNP, msg); + + return XGBE_AN_PAGE_RECEIVED; +} + +static enum xgbe_an xgbe_an_rx_bpa(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) +{ + unsigned int link_support; + unsigned int reg, ad_reg, lp_reg; + + /* Read Base Ability register 2 first */ + reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); + + /* Check for a supported mode, otherwise restart in a different one */ + link_support = xgbe_in_kr_mode(pdata) ? 0x80 : 0x20; + if (!(reg & link_support)) + return XGBE_AN_INCOMPAT_LINK; + + /* Check Extended Next Page support */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA); + + return ((ad_reg & XGBE_XNP_NP_EXCHANGE) || + (lp_reg & XGBE_XNP_NP_EXCHANGE)) + ? xgbe_an_tx_xnp(pdata, state) + : xgbe_an_tx_training(pdata, state); +} + +static enum xgbe_an xgbe_an_rx_xnp(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) +{ + unsigned int ad_reg, lp_reg; + + /* Check Extended Next Page support */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_XNP); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPX); + + return ((ad_reg & XGBE_XNP_NP_EXCHANGE) || + (lp_reg & XGBE_XNP_NP_EXCHANGE)) + ? xgbe_an_tx_xnp(pdata, state) + : xgbe_an_tx_training(pdata, state); +} + +static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata) +{ + enum xgbe_rx *state; + unsigned long an_timeout; + enum xgbe_an ret; + + if (!pdata->an_start) { + pdata->an_start = jiffies; + } else { + an_timeout = pdata->an_start + + msecs_to_jiffies(XGBE_AN_MS_TIMEOUT); + if (time_after(jiffies, an_timeout)) { + /* Auto-negotiation timed out, reset state */ + pdata->kr_state = XGBE_RX_BPA; + pdata->kx_state = XGBE_RX_BPA; + + pdata->an_start = jiffies; + } } - if (!phydev->dev.driver) { - dev_err(pdata->dev, "phy driver probe failed\n"); - ret = -EIO; - goto err_phy_device; + + state = xgbe_in_kr_mode(pdata) ? &pdata->kr_state + : &pdata->kx_state; + + switch (*state) { + case XGBE_RX_BPA: + ret = xgbe_an_rx_bpa(pdata, state); + break; + + case XGBE_RX_XNP: + ret = xgbe_an_rx_xnp(pdata, state); + break; + + default: + ret = XGBE_AN_ERROR; } - /* Add a reference to the PHY driver so it can't be unloaded */ - pdata->phy_module = phydev->dev.driver->owner; - if (!try_module_get(pdata->phy_module)) { - dev_err(pdata->dev, "try_module_get failed\n"); - ret = -EIO; - goto err_phy_device; + return ret; +} + +static enum xgbe_an xgbe_an_incompat_link(struct xgbe_prv_data *pdata) +{ + /* Be sure we aren't looping trying to negotiate */ + if (xgbe_in_kr_mode(pdata)) { + pdata->kr_state = XGBE_RX_ERROR; + + if (!(pdata->phy.advertising & ADVERTISED_1000baseKX_Full) && + !(pdata->phy.advertising & ADVERTISED_2500baseX_Full)) + return XGBE_AN_NO_LINK; + + if (pdata->kx_state != XGBE_RX_BPA) + return XGBE_AN_NO_LINK; + } else { + pdata->kx_state = XGBE_RX_ERROR; + + if (!(pdata->phy.advertising & ADVERTISED_10000baseKR_Full)) + return XGBE_AN_NO_LINK; + + if (pdata->kr_state != XGBE_RX_BPA) + return XGBE_AN_NO_LINK; } - pdata->mii = mii; - pdata->mdio_mmd = MDIO_MMD_PCS; + xgbe_disable_an(pdata); + + xgbe_switch_mode(pdata); + + xgbe_restart_an(pdata); + + return XGBE_AN_INCOMPAT_LINK; +} + +static irqreturn_t xgbe_an_isr(int irq, void *data) +{ + struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + + /* Interrupt reason must be read and cleared outside of IRQ context */ + disable_irq_nosync(pdata->an_irq); + + queue_work(pdata->an_workqueue, &pdata->an_irq_work); + + return IRQ_HANDLED; +} + +static void xgbe_an_irq_work(struct work_struct *work) +{ + struct xgbe_prv_data *pdata = container_of(work, + struct xgbe_prv_data, + an_irq_work); + + /* Avoid a race between enabling the IRQ and exiting the work by + * waiting for the work to finish and then queueing it + */ + flush_work(&pdata->an_work); + queue_work(pdata->an_workqueue, &pdata->an_work); +} + +static void xgbe_an_state_machine(struct work_struct *work) +{ + struct xgbe_prv_data *pdata = container_of(work, + struct xgbe_prv_data, + an_work); + enum xgbe_an cur_state = pdata->an_state; + unsigned int int_reg, int_mask; + + mutex_lock(&pdata->an_mutex); + + /* Read the interrupt */ + int_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_INT); + if (!int_reg) + goto out; + +next_int: + if (int_reg & XGBE_AN_PG_RCV) { + pdata->an_state = XGBE_AN_PAGE_RECEIVED; + int_mask = XGBE_AN_PG_RCV; + } else if (int_reg & XGBE_AN_INC_LINK) { + pdata->an_state = XGBE_AN_INCOMPAT_LINK; + int_mask = XGBE_AN_INC_LINK; + } else if (int_reg & XGBE_AN_INT_CMPLT) { + pdata->an_state = XGBE_AN_COMPLETE; + int_mask = XGBE_AN_INT_CMPLT; + } else { + pdata->an_state = XGBE_AN_ERROR; + int_mask = 0; + } + + /* Clear the interrupt to be processed */ + int_reg &= ~int_mask; + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, int_reg); + + pdata->an_result = pdata->an_state; + +again: + cur_state = pdata->an_state; + + switch (pdata->an_state) { + case XGBE_AN_READY: + pdata->an_supported = 0; + break; + + case XGBE_AN_PAGE_RECEIVED: + pdata->an_state = xgbe_an_page_received(pdata); + pdata->an_supported++; + break; + + case XGBE_AN_INCOMPAT_LINK: + pdata->an_supported = 0; + pdata->parallel_detect = 0; + pdata->an_state = xgbe_an_incompat_link(pdata); + break; + + case XGBE_AN_COMPLETE: + pdata->parallel_detect = pdata->an_supported ? 0 : 1; + netdev_dbg(pdata->netdev, "%s successful\n", + pdata->an_supported ? "Auto negotiation" + : "Parallel detection"); + break; + + case XGBE_AN_NO_LINK: + break; + + default: + pdata->an_state = XGBE_AN_ERROR; + } + + if (pdata->an_state == XGBE_AN_NO_LINK) { + int_reg = 0; + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + } else if (pdata->an_state == XGBE_AN_ERROR) { + netdev_err(pdata->netdev, + "error during auto-negotiation, state=%u\n", + cur_state); + + int_reg = 0; + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + } + + if (pdata->an_state >= XGBE_AN_COMPLETE) { + pdata->an_result = pdata->an_state; + pdata->an_state = XGBE_AN_READY; + pdata->kr_state = XGBE_RX_BPA; + pdata->kx_state = XGBE_RX_BPA; + pdata->an_start = 0; + } + + if (cur_state != pdata->an_state) + goto again; + + if (int_reg) + goto next_int; + +out: + enable_irq(pdata->an_irq); + + mutex_unlock(&pdata->an_mutex); +} + +static void xgbe_an_init(struct xgbe_prv_data *pdata) +{ + unsigned int reg; + + /* Set up Advertisement register 3 first */ + reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); + if (pdata->phy.advertising & ADVERTISED_10000baseR_FEC) + reg |= 0xc000; + else + reg &= ~0xc000; + + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2, reg); + + /* Set up Advertisement register 2 next */ + reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); + if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) + reg |= 0x80; + else + reg &= ~0x80; + + if ((pdata->phy.advertising & ADVERTISED_1000baseKX_Full) || + (pdata->phy.advertising & ADVERTISED_2500baseX_Full)) + reg |= 0x20; + else + reg &= ~0x20; + + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1, reg); + + /* Set up Advertisement register 1 last */ + reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); + if (pdata->phy.advertising & ADVERTISED_Pause) + reg |= 0x400; + else + reg &= ~0x400; + + if (pdata->phy.advertising & ADVERTISED_Asym_Pause) + reg |= 0x800; + else + reg &= ~0x800; + + /* We don't intend to perform XNP */ + reg &= ~XGBE_XNP_NP_EXCHANGE; + + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE, reg); +} + +static const char *xgbe_phy_fc_string(struct xgbe_prv_data *pdata) +{ + if (pdata->tx_pause && pdata->rx_pause) + return "rx/tx"; + else if (pdata->rx_pause) + return "rx"; + else if (pdata->tx_pause) + return "tx"; + else + return "off"; +} + +static const char *xgbe_phy_speed_string(int speed) +{ + switch (speed) { + case SPEED_1000: + return "1Gbps"; + case SPEED_2500: + return "2.5Gbps"; + case SPEED_10000: + return "10Gbps"; + case SPEED_UNKNOWN: + return "Unknown"; + default: + return "Unsupported"; + } +} + +static void xgbe_phy_print_status(struct xgbe_prv_data *pdata) +{ + if (pdata->phy.link) + netdev_info(pdata->netdev, + "Link is Up - %s/%s - flow control %s\n", + xgbe_phy_speed_string(pdata->phy.speed), + pdata->phy.duplex == DUPLEX_FULL ? "Full" : "Half", + xgbe_phy_fc_string(pdata)); + else + netdev_info(pdata->netdev, "Link is Down\n"); +} + +static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata) +{ + int new_state = 0; + + if (pdata->phy.link) { + /* Flow control support */ + pdata->pause_autoneg = pdata->phy.pause_autoneg; + + if (pdata->tx_pause != pdata->phy.tx_pause) { + new_state = 1; + pdata->hw_if.config_tx_flow_control(pdata); + pdata->tx_pause = pdata->phy.tx_pause; + } + + if (pdata->rx_pause != pdata->phy.rx_pause) { + new_state = 1; + pdata->hw_if.config_rx_flow_control(pdata); + pdata->rx_pause = pdata->phy.rx_pause; + } + + /* Speed support */ + if (pdata->phy_speed != pdata->phy.speed) { + new_state = 1; + pdata->phy_speed = pdata->phy.speed; + } + + if (pdata->phy_link != pdata->phy.link) { + new_state = 1; + pdata->phy_link = pdata->phy.link; + } + } else if (pdata->phy_link) { + new_state = 1; + pdata->phy_link = 0; + pdata->phy_speed = SPEED_UNKNOWN; + } + + if (new_state && netif_msg_link(pdata)) + xgbe_phy_print_status(pdata); +} - phydev->autoneg = pdata->default_autoneg; - if (phydev->autoneg == AUTONEG_DISABLE) { - phydev->speed = pdata->default_speed; - phydev->duplex = DUPLEX_FULL; +static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata) +{ + /* Disable auto-negotiation */ + xgbe_disable_an(pdata); + + /* Validate/Set specified speed */ + switch (pdata->phy.speed) { + case SPEED_10000: + xgbe_set_mode(pdata, XGBE_MODE_KR); + break; + + case SPEED_2500: + case SPEED_1000: + xgbe_set_mode(pdata, XGBE_MODE_KX); + break; + + default: + return -EINVAL; + } + + /* Validate duplex mode */ + if (pdata->phy.duplex != DUPLEX_FULL) + return -EINVAL; + + return 0; +} + +static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata) +{ + set_bit(XGBE_LINK_INIT, &pdata->dev_state); + pdata->link_check = jiffies; + + if (pdata->phy.autoneg != AUTONEG_ENABLE) + return xgbe_phy_config_fixed(pdata); + + /* Disable auto-negotiation interrupt */ + disable_irq(pdata->an_irq); - phydev->advertising &= ~ADVERTISED_Autoneg; + /* Start auto-negotiation in a supported mode */ + if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) { + xgbe_set_mode(pdata, XGBE_MODE_KR); + } else if ((pdata->phy.advertising & ADVERTISED_1000baseKX_Full) || + (pdata->phy.advertising & ADVERTISED_2500baseX_Full)) { + xgbe_set_mode(pdata, XGBE_MODE_KX); + } else { + enable_irq(pdata->an_irq); + return -EINVAL; } - pdata->phydev = phydev; + /* Disable and stop any in progress auto-negotiation */ + xgbe_disable_an(pdata); + + /* Clear any auto-negotitation interrupts */ + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + + pdata->an_result = XGBE_AN_READY; + pdata->an_state = XGBE_AN_READY; + pdata->kr_state = XGBE_RX_BPA; + pdata->kx_state = XGBE_RX_BPA; - DBGPHY_REGS(pdata); + /* Re-enable auto-negotiation interrupt */ + enable_irq(pdata->an_irq); - DBGPR("<--xgbe_mdio_register\n"); + /* Set up advertisement registers based on current settings */ + xgbe_an_init(pdata); + + /* Enable and start auto-negotiation */ + xgbe_restart_an(pdata); return 0; +} -err_phy_device: - phy_device_free(phydev); +static int xgbe_phy_config_aneg(struct xgbe_prv_data *pdata) +{ + int ret; -err_mdiobus_register: - mdiobus_unregister(mii); + mutex_lock(&pdata->an_mutex); -err_mdiobus_alloc: - mdiobus_free(mii); + ret = __xgbe_phy_config_aneg(pdata); + if (ret) + set_bit(XGBE_LINK_ERR, &pdata->dev_state); + else + clear_bit(XGBE_LINK_ERR, &pdata->dev_state); + + mutex_unlock(&pdata->an_mutex); return ret; } -void xgbe_mdio_unregister(struct xgbe_prv_data *pdata) +static bool xgbe_phy_aneg_done(struct xgbe_prv_data *pdata) +{ + return (pdata->an_result == XGBE_AN_COMPLETE); +} + +static void xgbe_check_link_timeout(struct xgbe_prv_data *pdata) +{ + unsigned long link_timeout; + + link_timeout = pdata->link_check + (XGBE_LINK_TIMEOUT * HZ); + if (time_after(jiffies, link_timeout)) + xgbe_phy_config_aneg(pdata); +} + +static void xgbe_phy_status_force(struct xgbe_prv_data *pdata) +{ + if (xgbe_in_kr_mode(pdata)) { + pdata->phy.speed = SPEED_10000; + } else { + switch (pdata->speed_set) { + case XGBE_SPEEDSET_1000_10000: + pdata->phy.speed = SPEED_1000; + break; + + case XGBE_SPEEDSET_2500_10000: + pdata->phy.speed = SPEED_2500; + break; + } + } + pdata->phy.duplex = DUPLEX_FULL; +} + +static void xgbe_phy_status_aneg(struct xgbe_prv_data *pdata) { - DBGPR("-->xgbe_mdio_unregister\n"); + unsigned int ad_reg, lp_reg; + + pdata->phy.lp_advertising = 0; + + if ((pdata->phy.autoneg != AUTONEG_ENABLE) || pdata->parallel_detect) + return xgbe_phy_status_force(pdata); + + pdata->phy.lp_advertising |= ADVERTISED_Autoneg; + pdata->phy.lp_advertising |= ADVERTISED_Backplane; + + /* Compare Advertisement and Link Partner register 1 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA); + if (lp_reg & 0x400) + pdata->phy.lp_advertising |= ADVERTISED_Pause; + if (lp_reg & 0x800) + pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause; + + if (pdata->phy.pause_autoneg) { + /* Set flow control based on auto-negotiation result */ + pdata->phy.tx_pause = 0; + pdata->phy.rx_pause = 0; + + if (ad_reg & lp_reg & 0x400) { + pdata->phy.tx_pause = 1; + pdata->phy.rx_pause = 1; + } else if (ad_reg & lp_reg & 0x800) { + if (ad_reg & 0x400) + pdata->phy.rx_pause = 1; + else if (lp_reg & 0x400) + pdata->phy.tx_pause = 1; + } + } + + /* Compare Advertisement and Link Partner register 2 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); + if (lp_reg & 0x80) + pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full; + if (lp_reg & 0x20) { + switch (pdata->speed_set) { + case XGBE_SPEEDSET_1000_10000: + pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full; + break; + case XGBE_SPEEDSET_2500_10000: + pdata->phy.lp_advertising |= ADVERTISED_2500baseX_Full; + break; + } + } - pdata->phydev = NULL; + ad_reg &= lp_reg; + if (ad_reg & 0x80) { + pdata->phy.speed = SPEED_10000; + xgbe_set_mode(pdata, XGBE_MODE_KR); + } else if (ad_reg & 0x20) { + switch (pdata->speed_set) { + case XGBE_SPEEDSET_1000_10000: + pdata->phy.speed = SPEED_1000; + break; - module_put(pdata->phy_module); - pdata->phy_module = NULL; + case XGBE_SPEEDSET_2500_10000: + pdata->phy.speed = SPEED_2500; + break; + } - mdiobus_unregister(pdata->mii); - pdata->mii->priv = NULL; + xgbe_set_mode(pdata, XGBE_MODE_KX); + } else { + pdata->phy.speed = SPEED_UNKNOWN; + } + + /* Compare Advertisement and Link Partner register 3 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); + if (lp_reg & 0xc000) + pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC; + + pdata->phy.duplex = DUPLEX_FULL; +} + +static void xgbe_phy_status(struct xgbe_prv_data *pdata) +{ + unsigned int reg, link_aneg; + + if (test_bit(XGBE_LINK_ERR, &pdata->dev_state)) { + if (test_and_clear_bit(XGBE_LINK, &pdata->dev_state)) + netif_carrier_off(pdata->netdev); + + pdata->phy.link = 0; + goto adjust_link; + } + + link_aneg = (pdata->phy.autoneg == AUTONEG_ENABLE); + + /* Get the link status. Link status is latched low, so read + * once to clear and then read again to get current state + */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + pdata->phy.link = (reg & MDIO_STAT1_LSTATUS) ? 1 : 0; + + if (pdata->phy.link) { + if (link_aneg && !xgbe_phy_aneg_done(pdata)) { + xgbe_check_link_timeout(pdata); + return; + } + + xgbe_phy_status_aneg(pdata); + + if (test_bit(XGBE_LINK_INIT, &pdata->dev_state)) + clear_bit(XGBE_LINK_INIT, &pdata->dev_state); + + if (!test_bit(XGBE_LINK, &pdata->dev_state)) { + set_bit(XGBE_LINK, &pdata->dev_state); + netif_carrier_on(pdata->netdev); + } + } else { + if (test_bit(XGBE_LINK_INIT, &pdata->dev_state)) { + xgbe_check_link_timeout(pdata); + + if (link_aneg) + return; + } + + xgbe_phy_status_aneg(pdata); + + if (test_bit(XGBE_LINK, &pdata->dev_state)) { + clear_bit(XGBE_LINK, &pdata->dev_state); + netif_carrier_off(pdata->netdev); + } + } + +adjust_link: + xgbe_phy_adjust_link(pdata); +} + +static void xgbe_phy_stop(struct xgbe_prv_data *pdata) +{ + /* Disable auto-negotiation */ + xgbe_disable_an(pdata); + + /* Disable auto-negotiation interrupts */ + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0); + + devm_free_irq(pdata->dev, pdata->an_irq, pdata); + + pdata->phy.link = 0; + if (test_and_clear_bit(XGBE_LINK, &pdata->dev_state)) + netif_carrier_off(pdata->netdev); + + xgbe_phy_adjust_link(pdata); +} + +static int xgbe_phy_start(struct xgbe_prv_data *pdata) +{ + struct net_device *netdev = pdata->netdev; + int ret; + + ret = devm_request_irq(pdata->dev, pdata->an_irq, + xgbe_an_isr, 0, pdata->an_name, + pdata); + if (ret) { + netdev_err(netdev, "phy irq request failed\n"); + return ret; + } + + /* Set initial mode - call the mode setting routines + * directly to insure we are properly configured + */ + if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) { + xgbe_xgmii_mode(pdata); + } else if (pdata->phy.advertising & ADVERTISED_1000baseKX_Full) { + xgbe_gmii_mode(pdata); + } else if (pdata->phy.advertising & ADVERTISED_2500baseX_Full) { + xgbe_gmii_2500_mode(pdata); + } else { + ret = -EINVAL; + goto err_irq; + } + + /* Set up advertisement registers based on current settings */ + xgbe_an_init(pdata); + + /* Enable auto-negotiation interrupts */ + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0x07); + + return xgbe_phy_config_aneg(pdata); + +err_irq: + devm_free_irq(pdata->dev, pdata->an_irq, pdata); + + return ret; +} + +static int xgbe_phy_reset(struct xgbe_prv_data *pdata) +{ + unsigned int count, reg; + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg |= MDIO_CTRL1_RESET; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + count = 50; + do { + msleep(20); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + } while ((reg & MDIO_CTRL1_RESET) && --count); + + if (reg & MDIO_CTRL1_RESET) + return -ETIMEDOUT; + + /* Disable auto-negotiation for now */ + xgbe_disable_an(pdata); + + /* Clear auto-negotiation interrupts */ + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + + return 0; +} + +static void xgbe_dump_phy_registers(struct xgbe_prv_data *pdata) +{ + struct device *dev = pdata->dev; + + dev_dbg(dev, "\n************* PHY Reg dump **********************\n"); + + dev_dbg(dev, "PCS Control Reg (%#04x) = %#04x\n", MDIO_CTRL1, + XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1)); + dev_dbg(dev, "PCS Status Reg (%#04x) = %#04x\n", MDIO_STAT1, + XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1)); + dev_dbg(dev, "Phy Id (PHYS ID 1 %#04x)= %#04x\n", MDIO_DEVID1, + XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID1)); + dev_dbg(dev, "Phy Id (PHYS ID 2 %#04x)= %#04x\n", MDIO_DEVID2, + XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID2)); + dev_dbg(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS1, + XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS1)); + dev_dbg(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS2, + XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS2)); + + dev_dbg(dev, "Auto-Neg Control Reg (%#04x) = %#04x\n", MDIO_CTRL1, + XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1)); + dev_dbg(dev, "Auto-Neg Status Reg (%#04x) = %#04x\n", MDIO_STAT1, + XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_STAT1)); + dev_dbg(dev, "Auto-Neg Ad Reg 1 (%#04x) = %#04x\n", + MDIO_AN_ADVERTISE, + XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE)); + dev_dbg(dev, "Auto-Neg Ad Reg 2 (%#04x) = %#04x\n", + MDIO_AN_ADVERTISE + 1, + XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1)); + dev_dbg(dev, "Auto-Neg Ad Reg 3 (%#04x) = %#04x\n", + MDIO_AN_ADVERTISE + 2, + XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2)); + dev_dbg(dev, "Auto-Neg Completion Reg (%#04x) = %#04x\n", + MDIO_AN_COMP_STAT, + XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_COMP_STAT)); + + dev_dbg(dev, "\n*************************************************\n"); +} + +static void xgbe_phy_init(struct xgbe_prv_data *pdata) +{ + mutex_init(&pdata->an_mutex); + INIT_WORK(&pdata->an_irq_work, xgbe_an_irq_work); + INIT_WORK(&pdata->an_work, xgbe_an_state_machine); + pdata->mdio_mmd = MDIO_MMD_PCS; + + /* Initialize supported features */ + pdata->phy.supported = SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_Backplane; + pdata->phy.supported |= SUPPORTED_10000baseKR_Full; + switch (pdata->speed_set) { + case XGBE_SPEEDSET_1000_10000: + pdata->phy.supported |= SUPPORTED_1000baseKX_Full; + break; + case XGBE_SPEEDSET_2500_10000: + pdata->phy.supported |= SUPPORTED_2500baseX_Full; + break; + } + + pdata->fec_ability = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, + MDIO_PMA_10GBR_FECABLE); + pdata->fec_ability &= (MDIO_PMA_10GBR_FECABLE_ABLE | + MDIO_PMA_10GBR_FECABLE_ERRABLE); + if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) + pdata->phy.supported |= SUPPORTED_10000baseR_FEC; + + pdata->phy.advertising = pdata->phy.supported; + + pdata->phy.address = 0; + + pdata->phy.autoneg = AUTONEG_ENABLE; + pdata->phy.speed = SPEED_UNKNOWN; + pdata->phy.duplex = DUPLEX_UNKNOWN; + + pdata->phy.link = 0; + + pdata->phy.pause_autoneg = pdata->pause_autoneg; + pdata->phy.tx_pause = pdata->tx_pause; + pdata->phy.rx_pause = pdata->rx_pause; + + /* Fix up Flow Control advertising */ + pdata->phy.advertising &= ~ADVERTISED_Pause; + pdata->phy.advertising &= ~ADVERTISED_Asym_Pause; + + if (pdata->rx_pause) { + pdata->phy.advertising |= ADVERTISED_Pause; + pdata->phy.advertising |= ADVERTISED_Asym_Pause; + } + + if (pdata->tx_pause) + pdata->phy.advertising ^= ADVERTISED_Asym_Pause; + + if (netif_msg_drv(pdata)) + xgbe_dump_phy_registers(pdata); +} + +void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *phy_if) +{ + phy_if->phy_init = xgbe_phy_init; - mdiobus_free(pdata->mii); - pdata->mii = NULL; + phy_if->phy_reset = xgbe_phy_reset; + phy_if->phy_start = xgbe_phy_start; + phy_if->phy_stop = xgbe_phy_stop; - DBGPR("<--xgbe_mdio_unregister\n"); + phy_if->phy_status = xgbe_phy_status; + phy_if->phy_config_aneg = xgbe_phy_config_aneg; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index e62dfa2deab6..63d72a140053 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -129,7 +129,7 @@ #include <net/dcbnl.h> #define XGBE_DRV_NAME "amd-xgbe" -#define XGBE_DRV_VERSION "1.0.0-a" +#define XGBE_DRV_VERSION "1.0.2" #define XGBE_DRV_DESC "AMD 10 Gigabit Ethernet Driver" /* Descriptor related defines */ @@ -178,14 +178,17 @@ #define XGMAC_JUMBO_PACKET_MTU 9000 #define XGMAC_MAX_JUMBO_PACKET 9018 -/* MDIO bus phy name */ -#define XGBE_PHY_NAME "amd_xgbe_phy" -#define XGBE_PRTAD 0 - /* Common property names */ #define XGBE_MAC_ADDR_PROPERTY "mac-address" #define XGBE_PHY_MODE_PROPERTY "phy-mode" #define XGBE_DMA_IRQS_PROPERTY "amd,per-channel-interrupt" +#define XGBE_SPEEDSET_PROPERTY "amd,speed-set" +#define XGBE_BLWC_PROPERTY "amd,serdes-blwc" +#define XGBE_CDR_RATE_PROPERTY "amd,serdes-cdr-rate" +#define XGBE_PQ_SKEW_PROPERTY "amd,serdes-pq-skew" +#define XGBE_TX_AMP_PROPERTY "amd,serdes-tx-amp" +#define XGBE_DFE_CFG_PROPERTY "amd,serdes-dfe-tap-config" +#define XGBE_DFE_ENA_PROPERTY "amd,serdes-dfe-tap-enable" /* Device-tree clock names */ #define XGBE_DMA_CLOCK "dma_clk" @@ -241,6 +244,49 @@ #define XGBE_RSS_LOOKUP_TABLE_TYPE 0 #define XGBE_RSS_HASH_KEY_TYPE 1 +/* Auto-negotiation */ +#define XGBE_AN_MS_TIMEOUT 500 +#define XGBE_LINK_TIMEOUT 10 + +#define XGBE_AN_INT_CMPLT 0x01 +#define XGBE_AN_INC_LINK 0x02 +#define XGBE_AN_PG_RCV 0x04 +#define XGBE_AN_INT_MASK 0x07 + +/* Rate-change complete wait/retry count */ +#define XGBE_RATECHANGE_COUNT 500 + +/* Default SerDes settings */ +#define XGBE_SPEED_10000_BLWC 0 +#define XGBE_SPEED_10000_CDR 0x7 +#define XGBE_SPEED_10000_PLL 0x1 +#define XGBE_SPEED_10000_PQ 0x12 +#define XGBE_SPEED_10000_RATE 0x0 +#define XGBE_SPEED_10000_TXAMP 0xa +#define XGBE_SPEED_10000_WORD 0x7 +#define XGBE_SPEED_10000_DFE_TAP_CONFIG 0x1 +#define XGBE_SPEED_10000_DFE_TAP_ENABLE 0x7f + +#define XGBE_SPEED_2500_BLWC 1 +#define XGBE_SPEED_2500_CDR 0x2 +#define XGBE_SPEED_2500_PLL 0x0 +#define XGBE_SPEED_2500_PQ 0xa +#define XGBE_SPEED_2500_RATE 0x1 +#define XGBE_SPEED_2500_TXAMP 0xf +#define XGBE_SPEED_2500_WORD 0x1 +#define XGBE_SPEED_2500_DFE_TAP_CONFIG 0x3 +#define XGBE_SPEED_2500_DFE_TAP_ENABLE 0x0 + +#define XGBE_SPEED_1000_BLWC 1 +#define XGBE_SPEED_1000_CDR 0x2 +#define XGBE_SPEED_1000_PLL 0x0 +#define XGBE_SPEED_1000_PQ 0xa +#define XGBE_SPEED_1000_RATE 0x3 +#define XGBE_SPEED_1000_TXAMP 0xf +#define XGBE_SPEED_1000_WORD 0x1 +#define XGBE_SPEED_1000_DFE_TAP_CONFIG 0x3 +#define XGBE_SPEED_1000_DFE_TAP_ENABLE 0x0 + struct xgbe_prv_data; struct xgbe_packet_data { @@ -334,8 +380,6 @@ struct xgbe_ring_data { */ unsigned int state_saved; struct { - unsigned int incomplete; - unsigned int context_next; struct sk_buff *skb; unsigned int len; unsigned int error; @@ -414,6 +458,13 @@ struct xgbe_channel { struct xgbe_ring *rx_ring; } ____cacheline_aligned; +enum xgbe_state { + XGBE_DOWN, + XGBE_LINK, + XGBE_LINK_INIT, + XGBE_LINK_ERR, +}; + enum xgbe_int { XGMAC_INT_DMA_CH_SR_TI, XGMAC_INT_DMA_CH_SR_TPS, @@ -445,6 +496,57 @@ enum xgbe_mtl_fifo_size { XGMAC_MTL_FIFO_SIZE_256K = 0x3ff, }; +enum xgbe_speed { + XGBE_SPEED_1000 = 0, + XGBE_SPEED_2500, + XGBE_SPEED_10000, + XGBE_SPEEDS, +}; + +enum xgbe_an { + XGBE_AN_READY = 0, + XGBE_AN_PAGE_RECEIVED, + XGBE_AN_INCOMPAT_LINK, + XGBE_AN_COMPLETE, + XGBE_AN_NO_LINK, + XGBE_AN_ERROR, +}; + +enum xgbe_rx { + XGBE_RX_BPA = 0, + XGBE_RX_XNP, + XGBE_RX_COMPLETE, + XGBE_RX_ERROR, +}; + +enum xgbe_mode { + XGBE_MODE_KR = 0, + XGBE_MODE_KX, +}; + +enum xgbe_speedset { + XGBE_SPEEDSET_1000_10000 = 0, + XGBE_SPEEDSET_2500_10000, +}; + +struct xgbe_phy { + u32 supported; + u32 advertising; + u32 lp_advertising; + + int address; + + int autoneg; + int speed; + int duplex; + + int link; + + int pause_autoneg; + int tx_pause; + int rx_pause; +}; + struct xgbe_mmc_stats { /* Tx Stats */ u64 txoctetcount_gb; @@ -492,6 +594,11 @@ struct xgbe_mmc_stats { u64 rxwatchdogerror; }; +struct xgbe_ext_stats { + u64 tx_tso_packets; + u64 rx_split_header_packets; +}; + struct xgbe_hw_if { int (*tx_complete)(struct xgbe_ring_desc *); @@ -591,6 +698,20 @@ struct xgbe_hw_if { int (*set_rss_lookup_table)(struct xgbe_prv_data *, const u32 *); }; +struct xgbe_phy_if { + /* For initial PHY setup */ + void (*phy_init)(struct xgbe_prv_data *); + + /* For PHY support when setting device up/down */ + int (*phy_reset)(struct xgbe_prv_data *); + int (*phy_start)(struct xgbe_prv_data *); + void (*phy_stop)(struct xgbe_prv_data *); + + /* For PHY support while device is up */ + void (*phy_status)(struct xgbe_prv_data *); + int (*phy_config_aneg)(struct xgbe_prv_data *); +}; + struct xgbe_desc_if { int (*alloc_ring_resources)(struct xgbe_prv_data *); void (*free_ring_resources)(struct xgbe_prv_data *); @@ -660,6 +781,9 @@ struct xgbe_prv_data { /* XGMAC/XPCS related mmio registers */ void __iomem *xgmac_regs; /* XGMAC CSRs */ void __iomem *xpcs_regs; /* XPCS MMD registers */ + void __iomem *rxtx_regs; /* SerDes Rx/Tx CSRs */ + void __iomem *sir0_regs; /* SerDes integration registers (1/2) */ + void __iomem *sir1_regs; /* SerDes integration registers (2/2) */ /* Overall device lock */ spinlock_t lock; @@ -670,10 +794,14 @@ struct xgbe_prv_data { /* RSS addressing mutex */ struct mutex rss_mutex; + /* Flags representing xgbe_state */ + unsigned long dev_state; + int dev_irq; unsigned int per_channel_irq; struct xgbe_hw_if hw_if; + struct xgbe_phy_if phy_if; struct xgbe_desc_if desc_if; /* AXI DMA settings */ @@ -682,6 +810,11 @@ struct xgbe_prv_data { unsigned int arcache; unsigned int awcache; + /* Service routine support */ + struct workqueue_struct *dev_workqueue; + struct work_struct service_work; + struct timer_list service_timer; + /* Rings for Tx/Rx on a DMA channel */ struct xgbe_channel *channel; unsigned int channel_count; @@ -729,27 +862,12 @@ struct xgbe_prv_data { u32 rss_table[XGBE_RSS_MAX_TABLE_SIZE]; u32 rss_options; - /* MDIO settings */ - struct module *phy_module; - char *mii_bus_id; - struct mii_bus *mii; - int mdio_mmd; - struct phy_device *phydev; - int default_autoneg; - int default_speed; - - /* Current PHY settings */ - phy_interface_t phy_mode; - int phy_link; - int phy_speed; - unsigned int phy_tx_pause; - unsigned int phy_rx_pause; - /* Netdev related settings */ unsigned char mac_addr[ETH_ALEN]; netdev_features_t netdev_features; struct napi_struct napi; struct xgbe_mmc_stats mmc_stats; + struct xgbe_ext_stats ext_stats; /* Filtering support */ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; @@ -787,6 +905,54 @@ struct xgbe_prv_data { /* Keeps track of power mode */ unsigned int power_down; + /* Network interface message level setting */ + u32 msg_enable; + + /* Current PHY settings */ + phy_interface_t phy_mode; + int phy_link; + int phy_speed; + + /* MDIO/PHY related settings */ + struct xgbe_phy phy; + int mdio_mmd; + unsigned long link_check; + + char an_name[IFNAMSIZ + 32]; + struct workqueue_struct *an_workqueue; + + int an_irq; + struct work_struct an_irq_work; + + unsigned int speed_set; + + /* SerDes UEFI configurable settings. + * Switching between modes/speeds requires new values for some + * SerDes settings. The values can be supplied as device + * properties in array format. The first array entry is for + * 1GbE, second for 2.5GbE and third for 10GbE + */ + u32 serdes_blwc[XGBE_SPEEDS]; + u32 serdes_cdr_rate[XGBE_SPEEDS]; + u32 serdes_pq_skew[XGBE_SPEEDS]; + u32 serdes_tx_amp[XGBE_SPEEDS]; + u32 serdes_dfe_tap_cfg[XGBE_SPEEDS]; + u32 serdes_dfe_tap_ena[XGBE_SPEEDS]; + + /* Auto-negotiation state machine support */ + struct mutex an_mutex; + enum xgbe_an an_result; + enum xgbe_an an_state; + enum xgbe_rx kr_state; + enum xgbe_rx kx_state; + struct work_struct an_work; + unsigned int an_supported; + unsigned int parallel_detect; + unsigned int fec_ability; + unsigned long an_start; + + unsigned int lpm_ctrl; /* CTRL1 for resume */ + #ifdef CONFIG_DEBUG_FS struct dentry *xgbe_debugfs; @@ -800,6 +966,7 @@ struct xgbe_prv_data { /* Function prototypes*/ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *); +void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *); void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *); struct net_device_ops *xgbe_get_netdev_ops(void); struct ethtool_ops *xgbe_get_ethtool_ops(void); @@ -807,14 +974,11 @@ struct ethtool_ops *xgbe_get_ethtool_ops(void); const struct dcbnl_rtnl_ops *xgbe_get_dcbnl_ops(void); #endif -int xgbe_mdio_register(struct xgbe_prv_data *); -void xgbe_mdio_unregister(struct xgbe_prv_data *); -void xgbe_dump_phy_registers(struct xgbe_prv_data *); void xgbe_ptp_register(struct xgbe_prv_data *); void xgbe_ptp_unregister(struct xgbe_prv_data *); -void xgbe_dump_tx_desc(struct xgbe_ring *, unsigned int, unsigned int, - unsigned int); -void xgbe_dump_rx_desc(struct xgbe_ring *, struct xgbe_ring_desc *, +void xgbe_dump_tx_desc(struct xgbe_prv_data *, struct xgbe_ring *, + unsigned int, unsigned int, unsigned int); +void xgbe_dump_rx_desc(struct xgbe_prv_data *, struct xgbe_ring *, unsigned int); void xgbe_print_pkt(struct net_device *, struct sk_buff *, bool); void xgbe_get_all_hw_features(struct xgbe_prv_data *); @@ -831,18 +995,6 @@ static inline void xgbe_debugfs_init(struct xgbe_prv_data *pdata) {} static inline void xgbe_debugfs_exit(struct xgbe_prv_data *pdata) {} #endif /* CONFIG_DEBUG_FS */ -/* NOTE: Uncomment for TX and RX DESCRIPTOR DUMP in KERNEL LOG */ -#if 0 -#define XGMAC_ENABLE_TX_DESC_DUMP -#define XGMAC_ENABLE_RX_DESC_DUMP -#endif - -/* NOTE: Uncomment for TX and RX PACKET DUMP in KERNEL LOG */ -#if 0 -#define XGMAC_ENABLE_TX_PKT_DUMP -#define XGMAC_ENABLE_RX_PKT_DUMP -#endif - /* NOTE: Uncomment for function trace log messages in KERNEL LOG */ #if 0 #define YDEBUG @@ -852,10 +1004,8 @@ static inline void xgbe_debugfs_exit(struct xgbe_prv_data *pdata) {} /* For debug prints */ #ifdef YDEBUG #define DBGPR(x...) pr_alert(x) -#define DBGPHY_REGS(x...) xgbe_dump_phy_registers(x) #else #define DBGPR(x...) do { } while (0) -#define DBGPHY_REGS(x...) do { } while (0) #endif #ifdef YDEBUG_MDIO diff --git a/drivers/net/ethernet/apm/xgene/Makefile b/drivers/net/ethernet/apm/xgene/Makefile index 68be565548c0..700b5abe5de5 100644 --- a/drivers/net/ethernet/apm/xgene/Makefile +++ b/drivers/net/ethernet/apm/xgene/Makefile @@ -3,5 +3,5 @@ # xgene-enet-objs := xgene_enet_hw.o xgene_enet_sgmac.o xgene_enet_xgmac.o \ - xgene_enet_main.o xgene_enet_ethtool.o + xgene_enet_main.o xgene_enet_ring2.o xgene_enet_ethtool.o obj-$(CONFIG_NET_XGENE) += xgene-enet.o diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index b927021c6c40..25873d142b95 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -87,10 +87,11 @@ static void xgene_enet_ring_rd32(struct xgene_enet_desc_ring *ring, static void xgene_enet_write_ring_state(struct xgene_enet_desc_ring *ring) { + struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); int i; xgene_enet_ring_wr32(ring, CSR_RING_CONFIG, ring->num); - for (i = 0; i < NUM_RING_CONFIG; i++) { + for (i = 0; i < pdata->ring_ops->num_ring_config; i++) { xgene_enet_ring_wr32(ring, CSR_RING_WR_BASE + (i * 4), ring->state[i]); } @@ -98,7 +99,7 @@ static void xgene_enet_write_ring_state(struct xgene_enet_desc_ring *ring) static void xgene_enet_clr_ring_state(struct xgene_enet_desc_ring *ring) { - memset(ring->state, 0, sizeof(u32) * NUM_RING_CONFIG); + memset(ring->state, 0, sizeof(ring->state)); xgene_enet_write_ring_state(ring); } @@ -141,8 +142,8 @@ static void xgene_enet_clr_desc_ring_id(struct xgene_enet_desc_ring *ring) xgene_enet_ring_wr32(ring, CSR_RING_ID_BUF, 0); } -struct xgene_enet_desc_ring *xgene_enet_setup_ring( - struct xgene_enet_desc_ring *ring) +static struct xgene_enet_desc_ring *xgene_enet_setup_ring( + struct xgene_enet_desc_ring *ring) { u32 size = ring->size; u32 i, data; @@ -168,7 +169,7 @@ struct xgene_enet_desc_ring *xgene_enet_setup_ring( return ring; } -void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring) +static void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring) { u32 data; bool is_bufpool; @@ -186,6 +187,22 @@ out: xgene_enet_clr_ring_state(ring); } +static void xgene_enet_wr_cmd(struct xgene_enet_desc_ring *ring, int count) +{ + iowrite32(count, ring->cmd); +} + +static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) +{ + u32 __iomem *cmd_base = ring->cmd_base; + u32 ring_state, num_msgs; + + ring_state = ioread32(&cmd_base[1]); + num_msgs = GET_VAL(NUMMSGSINQ, ring_state); + + return num_msgs; +} + void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring, struct xgene_enet_pdata *pdata, enum xgene_enet_err_code status) @@ -803,3 +820,12 @@ struct xgene_port_ops xgene_gport_ops = { .cle_bypass = xgene_enet_cle_bypass, .shutdown = xgene_gport_shutdown, }; + +struct xgene_ring_ops xgene_ring1_ops = { + .num_ring_config = NUM_RING_CONFIG, + .num_ring_id_shift = 6, + .setup = xgene_enet_setup_ring, + .clear = xgene_enet_clear_ring, + .wr_cmd = xgene_enet_wr_cmd, + .len = xgene_enet_ring_len, +}; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index d9bc89d69266..541bed056012 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -26,6 +26,7 @@ struct xgene_enet_pdata; struct xgene_enet_stats; +struct xgene_enet_desc_ring; /* clears and then set bits */ static inline void xgene_set_bits(u32 *dst, u32 val, u32 start, u32 len) @@ -101,8 +102,8 @@ enum xgene_enet_rm { #define BLOCK_ETH_CSR_OFFSET 0x2000 #define BLOCK_ETH_RING_IF_OFFSET 0x9000 +#define BLOCK_ETH_CLKRST_CSR_OFFSET 0xc000 #define BLOCK_ETH_DIAG_CSR_OFFSET 0xD000 - #define BLOCK_ETH_MAC_OFFSET 0x0000 #define BLOCK_ETH_MAC_CSR_OFFSET 0x2800 @@ -261,6 +262,7 @@ enum xgene_enet_ring_type { enum xgene_ring_owner { RING_OWNER_ETH0, + RING_OWNER_ETH1, RING_OWNER_CPU = 15, RING_OWNER_INVALID }; @@ -314,9 +316,6 @@ static inline u16 xgene_enet_get_numslots(u16 id, u32 size) size / WORK_DESC_SIZE; } -struct xgene_enet_desc_ring *xgene_enet_setup_ring( - struct xgene_enet_desc_ring *ring); -void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring); void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring, struct xgene_enet_pdata *pdata, enum xgene_enet_err_code status); @@ -327,5 +326,6 @@ bool xgene_ring_mgr_init(struct xgene_enet_pdata *p); extern struct xgene_mac_ops xgene_gmac_ops; extern struct xgene_port_ops xgene_gport_ops; +extern struct xgene_ring_ops xgene_ring1_ops; #endif /* __XGENE_ENET_HW_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 40d3530d7f30..1bb317532f75 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -28,6 +28,8 @@ #define RES_RING_CSR 1 #define RES_RING_CMD 2 +static const struct of_device_id xgene_enet_of_match[]; + static void xgene_enet_init_bufpool(struct xgene_enet_desc_ring *buf_pool) { struct xgene_enet_raw_desc16 *raw_desc; @@ -48,6 +50,7 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, { struct sk_buff *skb; struct xgene_enet_raw_desc16 *raw_desc; + struct xgene_enet_pdata *pdata; struct net_device *ndev; struct device *dev; dma_addr_t dma_addr; @@ -58,6 +61,7 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, ndev = buf_pool->ndev; dev = ndev_to_dev(buf_pool->ndev); + pdata = netdev_priv(ndev); bufdatalen = BUF_LEN_CODE_2K | (SKB_BUFFER_SIZE & GENMASK(11, 0)); len = XGENE_ENET_MAX_MTU; @@ -82,7 +86,7 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, tail = (tail + 1) & slots; } - iowrite32(nbuf, buf_pool->cmd); + pdata->ring_ops->wr_cmd(buf_pool, nbuf); buf_pool->tail = tail; return 0; @@ -102,26 +106,16 @@ static u8 xgene_enet_hdr_len(const void *data) return (eth->h_proto == htons(ETH_P_8021Q)) ? VLAN_ETH_HLEN : ETH_HLEN; } -static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) -{ - u32 __iomem *cmd_base = ring->cmd_base; - u32 ring_state, num_msgs; - - ring_state = ioread32(&cmd_base[1]); - num_msgs = ring_state & CREATE_MASK(NUMMSGSINQ_POS, NUMMSGSINQ_LEN); - - return num_msgs >> NUMMSGSINQ_POS; -} - static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool) { + struct xgene_enet_pdata *pdata = netdev_priv(buf_pool->ndev); struct xgene_enet_raw_desc16 *raw_desc; u32 slots = buf_pool->slots - 1; u32 tail = buf_pool->tail; u32 userinfo; int i, len; - len = xgene_enet_ring_len(buf_pool); + len = pdata->ring_ops->len(buf_pool); for (i = 0; i < len; i++) { tail = (tail - 1) & slots; raw_desc = &buf_pool->raw_desc16[tail]; @@ -131,7 +125,7 @@ static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool) dev_kfree_skb_any(buf_pool->rx_skb[userinfo]); } - iowrite32(-len, buf_pool->cmd); + pdata->ring_ops->wr_cmd(buf_pool, -len); buf_pool->tail = tail; } @@ -263,8 +257,8 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, struct xgene_enet_desc_ring *cp_ring = tx_ring->cp_ring; u32 tx_level, cq_level; - tx_level = xgene_enet_ring_len(tx_ring); - cq_level = xgene_enet_ring_len(cp_ring); + tx_level = pdata->ring_ops->len(tx_ring); + cq_level = pdata->ring_ops->len(cp_ring); if (unlikely(tx_level > pdata->tx_qcnt_hi || cq_level > pdata->cp_qcnt_hi)) { netif_stop_queue(ndev); @@ -276,7 +270,7 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - iowrite32(1, tx_ring->cmd); + pdata->ring_ops->wr_cmd(tx_ring, 1); skb_tx_timestamp(skb); tx_ring->tail = (tx_ring->tail + 1) & (tx_ring->slots - 1); @@ -389,11 +383,11 @@ static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring, } while (--budget); if (likely(count)) { - iowrite32(-count, ring->cmd); + pdata->ring_ops->wr_cmd(ring, -count); ring->head = head; if (netif_queue_stopped(ring->ndev)) { - if (xgene_enet_ring_len(ring) < pdata->cp_qcnt_low) + if (pdata->ring_ops->len(ring) < pdata->cp_qcnt_low) netif_wake_queue(ring->ndev); } } @@ -510,6 +504,7 @@ static int xgene_enet_open(struct net_device *ndev) else schedule_delayed_work(&pdata->link_work, PHY_POLL_LINK_OFF); + netif_carrier_off(ndev); netif_start_queue(ndev); return ret; @@ -545,7 +540,7 @@ static void xgene_enet_delete_ring(struct xgene_enet_desc_ring *ring) pdata = netdev_priv(ring->ndev); dev = ndev_to_dev(ring->ndev); - xgene_enet_clear_ring(ring); + pdata->ring_ops->clear(ring); dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma); } @@ -598,15 +593,17 @@ static int xgene_enet_get_ring_size(struct device *dev, static void xgene_enet_free_desc_ring(struct xgene_enet_desc_ring *ring) { + struct xgene_enet_pdata *pdata; struct device *dev; if (!ring) return; dev = ndev_to_dev(ring->ndev); + pdata = netdev_priv(ring->ndev); if (ring->desc_addr) { - xgene_enet_clear_ring(ring); + pdata->ring_ops->clear(ring); dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma); } devm_kfree(dev, ring); @@ -637,6 +634,25 @@ static void xgene_enet_free_desc_rings(struct xgene_enet_pdata *pdata) } } +static bool is_irq_mbox_required(struct xgene_enet_pdata *pdata, + struct xgene_enet_desc_ring *ring) +{ + if ((pdata->enet_id == XGENE_ENET2) && + (xgene_enet_ring_owner(ring->id) == RING_OWNER_CPU)) { + return true; + } + + return false; +} + +static void __iomem *xgene_enet_ring_cmd_base(struct xgene_enet_pdata *pdata, + struct xgene_enet_desc_ring *ring) +{ + u8 num_ring_id_shift = pdata->ring_ops->num_ring_id_shift; + + return pdata->ring_cmd_addr + (ring->num << num_ring_id_shift); +} + static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring( struct net_device *ndev, u32 ring_num, enum xgene_enet_ring_cfgsize cfgsize, u32 ring_id) @@ -668,9 +684,20 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring( } ring->size = size; - ring->cmd_base = pdata->ring_cmd_addr + (ring->num << 6); + if (is_irq_mbox_required(pdata, ring)) { + ring->irq_mbox_addr = dma_zalloc_coherent(dev, INTR_MBOX_SIZE, + &ring->irq_mbox_dma, GFP_KERNEL); + if (!ring->irq_mbox_addr) { + dma_free_coherent(dev, size, ring->desc_addr, + ring->dma); + devm_kfree(dev, ring); + return NULL; + } + } + + ring->cmd_base = xgene_enet_ring_cmd_base(pdata, ring); ring->cmd = ring->cmd_base + INC_DEC_CMD_ADDR; - ring = xgene_enet_setup_ring(ring); + ring = pdata->ring_ops->setup(ring); netdev_dbg(ndev, "ring info: num=%d size=%d id=%d slots=%d\n", ring->num, ring->size, ring->id, ring->slots); @@ -682,12 +709,34 @@ static u16 xgene_enet_get_ring_id(enum xgene_ring_owner owner, u8 bufnum) return (owner << 6) | (bufnum & GENMASK(5, 0)); } +static enum xgene_ring_owner xgene_derive_ring_owner(struct xgene_enet_pdata *p) +{ + enum xgene_ring_owner owner; + + if (p->enet_id == XGENE_ENET1) { + switch (p->phy_mode) { + case PHY_INTERFACE_MODE_SGMII: + owner = RING_OWNER_ETH0; + break; + default: + owner = (!p->port_id) ? RING_OWNER_ETH0 : + RING_OWNER_ETH1; + break; + } + } else { + owner = (!p->port_id) ? RING_OWNER_ETH0 : RING_OWNER_ETH1; + } + + return owner; +} + static int xgene_enet_create_desc_rings(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct device *dev = ndev_to_dev(ndev); struct xgene_enet_desc_ring *rx_ring, *tx_ring, *cp_ring; struct xgene_enet_desc_ring *buf_pool = NULL; + enum xgene_ring_owner owner; u8 cpu_bufnum = pdata->cpu_bufnum; u8 eth_bufnum = pdata->eth_bufnum; u8 bp_bufnum = pdata->bp_bufnum; @@ -696,6 +745,7 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) int ret; /* allocate rx descriptor ring */ + owner = xgene_derive_ring_owner(pdata); ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, cpu_bufnum++); rx_ring = xgene_enet_create_desc_ring(ndev, ring_num++, RING_CFGSIZE_16KB, ring_id); @@ -705,7 +755,8 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) } /* allocate buffer pool for receiving packets */ - ring_id = xgene_enet_get_ring_id(RING_OWNER_ETH0, bp_bufnum++); + owner = xgene_derive_ring_owner(pdata); + ring_id = xgene_enet_get_ring_id(owner, bp_bufnum++); buf_pool = xgene_enet_create_desc_ring(ndev, ring_num++, RING_CFGSIZE_2KB, ring_id); if (!buf_pool) { @@ -734,7 +785,8 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) pdata->rx_ring = rx_ring; /* allocate tx descriptor ring */ - ring_id = xgene_enet_get_ring_id(RING_OWNER_ETH0, eth_bufnum++); + owner = xgene_derive_ring_owner(pdata); + ring_id = xgene_enet_get_ring_id(owner, eth_bufnum++); tx_ring = xgene_enet_create_desc_ring(ndev, ring_num++, RING_CFGSIZE_16KB, ring_id); if (!tx_ring) { @@ -824,14 +876,21 @@ static int xgene_get_port_id(struct device *dev, struct xgene_enet_pdata *pdata) int ret; ret = device_property_read_u32(dev, "port-id", &id); - if (!ret && id > 1) { - dev_err(dev, "Incorrect port-id specified\n"); - return -ENODEV; - } - pdata->port_id = id; + switch (ret) { + case -EINVAL: + pdata->port_id = 0; + ret = 0; + break; + case 0: + pdata->port_id = id & BIT(0); + break; + default: + dev_err(dev, "Incorrect port-id specified: errno: %d\n", ret); + break; + } - return 0; + return ret; } static int xgene_get_mac_address(struct device *dev, @@ -876,6 +935,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) struct device *dev; struct resource *res; void __iomem *base_addr; + u32 offset; int ret; pdev = pdata->pdev; @@ -962,14 +1022,20 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) pdata->clk = NULL; } - base_addr = pdata->base_addr - (pdata->port_id * MAC_OFFSET); + if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII) + base_addr = pdata->base_addr - (pdata->port_id * MAC_OFFSET); + else + base_addr = pdata->base_addr; pdata->eth_csr_addr = base_addr + BLOCK_ETH_CSR_OFFSET; pdata->eth_ring_if_addr = base_addr + BLOCK_ETH_RING_IF_OFFSET; pdata->eth_diag_csr_addr = base_addr + BLOCK_ETH_DIAG_CSR_OFFSET; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII || pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) { pdata->mcx_mac_addr = pdata->base_addr + BLOCK_ETH_MAC_OFFSET; - pdata->mcx_mac_csr_addr = base_addr + BLOCK_ETH_MAC_CSR_OFFSET; + offset = (pdata->enet_id == XGENE_ENET1) ? + BLOCK_ETH_MAC_CSR_OFFSET : + X2_BLOCK_ETH_MAC_CSR_OFFSET; + pdata->mcx_mac_csr_addr = base_addr + offset; } else { pdata->mcx_mac_addr = base_addr + BLOCK_AXG_MAC_OFFSET; pdata->mcx_mac_csr_addr = base_addr + BLOCK_AXG_MAC_CSR_OFFSET; @@ -1034,23 +1100,44 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata) break; } - switch (pdata->port_id) { - case 0: - pdata->cpu_bufnum = START_CPU_BUFNUM_0; - pdata->eth_bufnum = START_ETH_BUFNUM_0; - pdata->bp_bufnum = START_BP_BUFNUM_0; - pdata->ring_num = START_RING_NUM_0; - break; - case 1: - pdata->cpu_bufnum = START_CPU_BUFNUM_1; - pdata->eth_bufnum = START_ETH_BUFNUM_1; - pdata->bp_bufnum = START_BP_BUFNUM_1; - pdata->ring_num = START_RING_NUM_1; - break; - default: - break; + if (pdata->enet_id == XGENE_ENET1) { + switch (pdata->port_id) { + case 0: + pdata->cpu_bufnum = START_CPU_BUFNUM_0; + pdata->eth_bufnum = START_ETH_BUFNUM_0; + pdata->bp_bufnum = START_BP_BUFNUM_0; + pdata->ring_num = START_RING_NUM_0; + break; + case 1: + pdata->cpu_bufnum = START_CPU_BUFNUM_1; + pdata->eth_bufnum = START_ETH_BUFNUM_1; + pdata->bp_bufnum = START_BP_BUFNUM_1; + pdata->ring_num = START_RING_NUM_1; + break; + default: + break; + } + pdata->ring_ops = &xgene_ring1_ops; + } else { + switch (pdata->port_id) { + case 0: + pdata->cpu_bufnum = X2_START_CPU_BUFNUM_0; + pdata->eth_bufnum = X2_START_ETH_BUFNUM_0; + pdata->bp_bufnum = X2_START_BP_BUFNUM_0; + pdata->ring_num = X2_START_RING_NUM_0; + break; + case 1: + pdata->cpu_bufnum = X2_START_CPU_BUFNUM_1; + pdata->eth_bufnum = X2_START_ETH_BUFNUM_1; + pdata->bp_bufnum = X2_START_BP_BUFNUM_1; + pdata->ring_num = X2_START_RING_NUM_1; + break; + default: + break; + } + pdata->rm = RM0; + pdata->ring_ops = &xgene_ring2_ops; } - } static void xgene_enet_napi_add(struct xgene_enet_pdata *pdata) @@ -1086,6 +1173,9 @@ static int xgene_enet_probe(struct platform_device *pdev) struct xgene_enet_pdata *pdata; struct device *dev = &pdev->dev; struct xgene_mac_ops *mac_ops; +#ifdef CONFIG_OF + const struct of_device_id *of_id; +#endif int ret; ndev = alloc_etherdev(sizeof(struct xgene_enet_pdata)); @@ -1104,6 +1194,17 @@ static int xgene_enet_probe(struct platform_device *pdev) NETIF_F_GSO | NETIF_F_GRO; +#ifdef CONFIG_OF + of_id = of_match_device(xgene_enet_of_match, &pdev->dev); + if (of_id) { + pdata->enet_id = (enum xgene_enet_id)of_id->data; + if (!pdata->enet_id) { + free_netdev(ndev); + return -ENODEV; + } + } +#endif + ret = xgene_enet_get_resources(pdata); if (ret) goto err; @@ -1175,9 +1276,11 @@ MODULE_DEVICE_TABLE(acpi, xgene_enet_acpi_match); #ifdef CONFIG_OF static const struct of_device_id xgene_enet_of_match[] = { - {.compatible = "apm,xgene-enet",}, - {.compatible = "apm,xgene1-sgenet",}, - {.compatible = "apm,xgene1-xgenet",}, + {.compatible = "apm,xgene-enet", .data = (void *)XGENE_ENET1}, + {.compatible = "apm,xgene1-sgenet", .data = (void *)XGENE_ENET1}, + {.compatible = "apm,xgene1-xgenet", .data = (void *)XGENE_ENET1}, + {.compatible = "apm,xgene2-sgenet", .data = (void *)XGENE_ENET2}, + {.compatible = "apm,xgene2-xgenet", .data = (void *)XGENE_ENET2}, {}, }; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 8f3d232b09bc..1c85fc87703a 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -35,6 +35,7 @@ #include <linux/if_vlan.h> #include <linux/phy.h> #include "xgene_enet_hw.h" +#include "xgene_enet_ring2.h" #define XGENE_DRV_VERSION "v1.0" #define XGENE_ENET_MAX_MTU 1536 @@ -51,12 +52,26 @@ #define START_BP_BUFNUM_1 0x2A #define START_RING_NUM_1 264 +#define X2_START_CPU_BUFNUM_0 0 +#define X2_START_ETH_BUFNUM_0 0 +#define X2_START_BP_BUFNUM_0 0x20 +#define X2_START_RING_NUM_0 0 +#define X2_START_CPU_BUFNUM_1 0xc +#define X2_START_ETH_BUFNUM_1 0 +#define X2_START_BP_BUFNUM_1 0x20 +#define X2_START_RING_NUM_1 256 + #define IRQ_ID_SIZE 16 #define XGENE_MAX_TXC_RINGS 1 #define PHY_POLL_LINK_ON (10 * HZ) #define PHY_POLL_LINK_OFF (PHY_POLL_LINK_ON / 5) +enum xgene_enet_id { + XGENE_ENET1 = 1, + XGENE_ENET2 +}; + /* software context of a descriptor ring */ struct xgene_enet_desc_ring { struct net_device *ndev; @@ -68,10 +83,12 @@ struct xgene_enet_desc_ring { u16 irq; char irq_name[IRQ_ID_SIZE]; u32 size; - u32 state[NUM_RING_CONFIG]; + u32 state[X2_NUM_RING_CONFIG]; void __iomem *cmd_base; void __iomem *cmd; dma_addr_t dma; + dma_addr_t irq_mbox_dma; + void *irq_mbox_addr; u16 dst_ring_num; u8 nbufpool; struct sk_buff *(*rx_skb); @@ -105,6 +122,15 @@ struct xgene_port_ops { void (*shutdown)(struct xgene_enet_pdata *pdata); }; +struct xgene_ring_ops { + u8 num_ring_config; + u8 num_ring_id_shift; + struct xgene_enet_desc_ring * (*setup)(struct xgene_enet_desc_ring *); + void (*clear)(struct xgene_enet_desc_ring *); + void (*wr_cmd)(struct xgene_enet_desc_ring *, int); + u32 (*len)(struct xgene_enet_desc_ring *); +}; + /* ethernet private data */ struct xgene_enet_pdata { struct net_device *ndev; @@ -113,6 +139,7 @@ struct xgene_enet_pdata { int phy_speed; struct clk *clk; struct platform_device *pdev; + enum xgene_enet_id enet_id; struct xgene_enet_desc_ring *tx_ring; struct xgene_enet_desc_ring *rx_ring; char *dev_name; @@ -136,6 +163,7 @@ struct xgene_enet_pdata { struct rtnl_link_stats64 stats; struct xgene_mac_ops *mac_ops; struct xgene_port_ops *port_ops; + struct xgene_ring_ops *ring_ops; struct delayed_work link_work; u32 port_id; u8 cpu_bufnum; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c new file mode 100644 index 000000000000..0b6896bb351e --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c @@ -0,0 +1,200 @@ +/* Applied Micro X-Gene SoC Ethernet Driver + * + * Copyright (c) 2015, Applied Micro Circuits Corporation + * Author: Iyappan Subramanian <isubramanian@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "xgene_enet_main.h" +#include "xgene_enet_hw.h" +#include "xgene_enet_ring2.h" + +static void xgene_enet_ring_init(struct xgene_enet_desc_ring *ring) +{ + u32 *ring_cfg = ring->state; + u64 addr = ring->dma; + + if (xgene_enet_ring_owner(ring->id) == RING_OWNER_CPU) { + ring_cfg[0] |= SET_VAL(X2_INTLINE, ring->id & RING_BUFNUM_MASK); + ring_cfg[3] |= SET_BIT(X2_DEQINTEN); + } + ring_cfg[0] |= SET_VAL(X2_CFGCRID, 1); + + addr >>= 8; + ring_cfg[2] |= QCOHERENT | SET_VAL(RINGADDRL, addr); + + addr >>= 27; + ring_cfg[3] |= SET_VAL(RINGSIZE, ring->cfgsize) + | ACCEPTLERR + | SET_VAL(RINGADDRH, addr); + ring_cfg[4] |= SET_VAL(X2_SELTHRSH, 1); + ring_cfg[5] |= SET_BIT(X2_QBASE_AM) | SET_BIT(X2_MSG_AM); +} + +static void xgene_enet_ring_set_type(struct xgene_enet_desc_ring *ring) +{ + u32 *ring_cfg = ring->state; + bool is_bufpool; + u32 val; + + is_bufpool = xgene_enet_is_bufpool(ring->id); + val = (is_bufpool) ? RING_BUFPOOL : RING_REGULAR; + ring_cfg[4] |= SET_VAL(X2_RINGTYPE, val); + if (is_bufpool) + ring_cfg[3] |= SET_VAL(RINGMODE, BUFPOOL_MODE); +} + +static void xgene_enet_ring_set_recombbuf(struct xgene_enet_desc_ring *ring) +{ + u32 *ring_cfg = ring->state; + + ring_cfg[3] |= RECOMBBUF; + ring_cfg[4] |= SET_VAL(X2_RECOMTIMEOUT, 0x7); +} + +static void xgene_enet_ring_wr32(struct xgene_enet_desc_ring *ring, + u32 offset, u32 data) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); + + iowrite32(data, pdata->ring_csr_addr + offset); +} + +static void xgene_enet_write_ring_state(struct xgene_enet_desc_ring *ring) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); + int i; + + xgene_enet_ring_wr32(ring, CSR_RING_CONFIG, ring->num); + for (i = 0; i < pdata->ring_ops->num_ring_config; i++) { + xgene_enet_ring_wr32(ring, CSR_RING_WR_BASE + (i * 4), + ring->state[i]); + } +} + +static void xgene_enet_clr_ring_state(struct xgene_enet_desc_ring *ring) +{ + memset(ring->state, 0, sizeof(ring->state)); + xgene_enet_write_ring_state(ring); +} + +static void xgene_enet_set_ring_state(struct xgene_enet_desc_ring *ring) +{ + enum xgene_ring_owner owner; + + xgene_enet_ring_set_type(ring); + + owner = xgene_enet_ring_owner(ring->id); + if (owner == RING_OWNER_ETH0 || owner == RING_OWNER_ETH1) + xgene_enet_ring_set_recombbuf(ring); + + xgene_enet_ring_init(ring); + xgene_enet_write_ring_state(ring); +} + +static void xgene_enet_set_ring_id(struct xgene_enet_desc_ring *ring) +{ + u32 ring_id_val, ring_id_buf; + bool is_bufpool; + + if (xgene_enet_ring_owner(ring->id) == RING_OWNER_CPU) + return; + + is_bufpool = xgene_enet_is_bufpool(ring->id); + + ring_id_val = ring->id & GENMASK(9, 0); + ring_id_val |= OVERWRITE; + + ring_id_buf = (ring->num << 9) & GENMASK(18, 9); + ring_id_buf |= PREFETCH_BUF_EN; + if (is_bufpool) + ring_id_buf |= IS_BUFFER_POOL; + + xgene_enet_ring_wr32(ring, CSR_RING_ID, ring_id_val); + xgene_enet_ring_wr32(ring, CSR_RING_ID_BUF, ring_id_buf); +} + +static void xgene_enet_clr_desc_ring_id(struct xgene_enet_desc_ring *ring) +{ + u32 ring_id; + + ring_id = ring->id | OVERWRITE; + xgene_enet_ring_wr32(ring, CSR_RING_ID, ring_id); + xgene_enet_ring_wr32(ring, CSR_RING_ID_BUF, 0); +} + +static struct xgene_enet_desc_ring *xgene_enet_setup_ring( + struct xgene_enet_desc_ring *ring) +{ + bool is_bufpool; + u32 addr, i; + + xgene_enet_clr_ring_state(ring); + xgene_enet_set_ring_state(ring); + xgene_enet_set_ring_id(ring); + + ring->slots = xgene_enet_get_numslots(ring->id, ring->size); + + is_bufpool = xgene_enet_is_bufpool(ring->id); + if (is_bufpool || xgene_enet_ring_owner(ring->id) != RING_OWNER_CPU) + return ring; + + addr = CSR_VMID0_INTR_MBOX + (4 * (ring->id & RING_BUFNUM_MASK)); + xgene_enet_ring_wr32(ring, addr, ring->irq_mbox_dma >> 10); + + for (i = 0; i < ring->slots; i++) + xgene_enet_mark_desc_slot_empty(&ring->raw_desc[i]); + + return ring; +} + +static void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring) +{ + xgene_enet_clr_desc_ring_id(ring); + xgene_enet_clr_ring_state(ring); +} + +static void xgene_enet_wr_cmd(struct xgene_enet_desc_ring *ring, int count) +{ + u32 data = 0; + + if (xgene_enet_ring_owner(ring->id) == RING_OWNER_CPU) { + data = SET_VAL(X2_INTLINE, ring->id & RING_BUFNUM_MASK) | + INTR_CLEAR; + } + data |= (count & GENMASK(16, 0)); + + iowrite32(data, ring->cmd); +} + +static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) +{ + u32 __iomem *cmd_base = ring->cmd_base; + u32 ring_state, num_msgs; + + ring_state = ioread32(&cmd_base[1]); + num_msgs = GET_VAL(X2_NUMMSGSINQ, ring_state); + + return num_msgs; +} + +struct xgene_ring_ops xgene_ring2_ops = { + .num_ring_config = X2_NUM_RING_CONFIG, + .num_ring_id_shift = 13, + .setup = xgene_enet_setup_ring, + .clear = xgene_enet_clear_ring, + .wr_cmd = xgene_enet_wr_cmd, + .len = xgene_enet_ring_len, +}; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.h b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.h new file mode 100644 index 000000000000..8b235db23c42 --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.h @@ -0,0 +1,49 @@ +/* Applied Micro X-Gene SoC Ethernet Driver + * + * Copyright (c) 2015, Applied Micro Circuits Corporation + * Author: Iyappan Subramanian <isubramanian@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __XGENE_ENET_RING2_H__ +#define __XGENE_ENET_RING2_H__ + +#include "xgene_enet_main.h" + +#define X2_NUM_RING_CONFIG 6 + +#define INTR_MBOX_SIZE 1024 +#define CSR_VMID0_INTR_MBOX 0x0270 +#define INTR_CLEAR BIT(23) + +#define X2_MSG_AM_POS 10 +#define X2_QBASE_AM_POS 11 +#define X2_INTLINE_POS 24 +#define X2_INTLINE_LEN 5 +#define X2_CFGCRID_POS 29 +#define X2_CFGCRID_LEN 3 +#define X2_SELTHRSH_POS 7 +#define X2_SELTHRSH_LEN 3 +#define X2_RINGTYPE_POS 23 +#define X2_RINGTYPE_LEN 2 +#define X2_DEQINTEN_POS 29 +#define X2_RECOMTIMEOUT_POS 0 +#define X2_RECOMTIMEOUT_LEN 7 +#define X2_NUMMSGSINQ_POS 0 +#define X2_NUMMSGSINQ_LEN 17 + +extern struct xgene_ring_ops xgene_ring2_ops; + +#endif /* __XGENE_ENET_RING2_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c index f27fb6f2a93b..ff240b3cb2b8 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c @@ -21,6 +21,7 @@ #include "xgene_enet_main.h" #include "xgene_enet_hw.h" #include "xgene_enet_sgmac.h" +#include "xgene_enet_xgmac.h" static void xgene_enet_wr_csr(struct xgene_enet_pdata *p, u32 offset, u32 val) { @@ -39,6 +40,14 @@ static void xgene_enet_wr_diag_csr(struct xgene_enet_pdata *p, iowrite32(val, p->eth_diag_csr_addr + offset); } +static void xgene_enet_wr_mcx_csr(struct xgene_enet_pdata *pdata, + u32 offset, u32 val) +{ + void __iomem *addr = pdata->mcx_mac_csr_addr + offset; + + iowrite32(val, addr); +} + static bool xgene_enet_wr_indirect(struct xgene_indirect_ctl *ctl, u32 wr_addr, u32 wr_data) { @@ -140,8 +149,9 @@ static int xgene_enet_ecc_init(struct xgene_enet_pdata *p) static void xgene_enet_config_ring_if_assoc(struct xgene_enet_pdata *p) { - u32 val = 0xffffffff; + u32 val; + val = (p->enet_id == XGENE_ENET1) ? 0xffffffff : 0; xgene_enet_wr_ring_if(p, ENET_CFGSSQMIWQASSOC_ADDR, val); xgene_enet_wr_ring_if(p, ENET_CFGSSQMIFPQASSOC_ADDR, val); } @@ -227,6 +237,8 @@ static void xgene_sgmac_init(struct xgene_enet_pdata *p) { u32 data, loop = 10; u32 offset = p->port_id * 4; + u32 enet_spare_cfg_reg, rsif_config_reg; + u32 cfg_bypass_reg, rx_dv_gate_reg; xgene_sgmac_reset(p); @@ -239,7 +251,7 @@ static void xgene_sgmac_init(struct xgene_enet_pdata *p) SGMII_STATUS_ADDR >> 2); if ((data & AUTO_NEG_COMPLETE) && (data & LINK_STATUS)) break; - usleep_range(10, 20); + usleep_range(1000, 2000); } if (!(data & AUTO_NEG_COMPLETE) || !(data & LINK_STATUS)) netdev_err(p->ndev, "Auto-negotiation failed\n"); @@ -249,33 +261,38 @@ static void xgene_sgmac_init(struct xgene_enet_pdata *p) xgene_enet_wr_mac(p, MAC_CONFIG_2_ADDR, data | FULL_DUPLEX2); xgene_enet_wr_mac(p, INTERFACE_CONTROL_ADDR, ENET_GHD_MODE); - data = xgene_enet_rd_csr(p, ENET_SPARE_CFG_REG_ADDR); + if (p->enet_id == XGENE_ENET1) { + enet_spare_cfg_reg = ENET_SPARE_CFG_REG_ADDR; + rsif_config_reg = RSIF_CONFIG_REG_ADDR; + cfg_bypass_reg = CFG_BYPASS_ADDR; + rx_dv_gate_reg = SG_RX_DV_GATE_REG_0_ADDR; + } else { + enet_spare_cfg_reg = XG_ENET_SPARE_CFG_REG_ADDR; + rsif_config_reg = XG_RSIF_CONFIG_REG_ADDR; + cfg_bypass_reg = XG_CFG_BYPASS_ADDR; + rx_dv_gate_reg = XG_MCX_RX_DV_GATE_REG_0_ADDR; + } + + data = xgene_enet_rd_csr(p, enet_spare_cfg_reg); data |= MPA_IDLE_WITH_QMI_EMPTY; - xgene_enet_wr_csr(p, ENET_SPARE_CFG_REG_ADDR, data); + xgene_enet_wr_csr(p, enet_spare_cfg_reg, data); xgene_sgmac_set_mac_addr(p); - data = xgene_enet_rd_csr(p, DEBUG_REG_ADDR); - data |= CFG_BYPASS_UNISEC_TX | CFG_BYPASS_UNISEC_RX; - xgene_enet_wr_csr(p, DEBUG_REG_ADDR, data); - /* Adjust MDC clock frequency */ data = xgene_enet_rd_mac(p, MII_MGMT_CONFIG_ADDR); MGMT_CLOCK_SEL_SET(&data, 7); xgene_enet_wr_mac(p, MII_MGMT_CONFIG_ADDR, data); /* Enable drop if bufpool not available */ - data = xgene_enet_rd_csr(p, RSIF_CONFIG_REG_ADDR); + data = xgene_enet_rd_csr(p, rsif_config_reg); data |= CFG_RSIF_FPBUFF_TIMEOUT_EN; - xgene_enet_wr_csr(p, RSIF_CONFIG_REG_ADDR, data); - - /* Rtype should be copied from FP */ - xgene_enet_wr_csr(p, RSIF_RAM_DBG_REG0_ADDR, 0); + xgene_enet_wr_csr(p, rsif_config_reg, data); /* Bypass traffic gating */ - xgene_enet_wr_csr(p, CFG_LINK_AGGR_RESUME_0_ADDR + offset, TX_PORT0); - xgene_enet_wr_csr(p, CFG_BYPASS_ADDR, RESUME_TX); - xgene_enet_wr_csr(p, SG_RX_DV_GATE_REG_0_ADDR + offset, RESUME_RX0); + xgene_enet_wr_csr(p, XG_ENET_SPARE_CFG_REG_1_ADDR, 0x84); + xgene_enet_wr_csr(p, cfg_bypass_reg, RESUME_TX); + xgene_enet_wr_mcx_csr(p, rx_dv_gate_reg + offset, RESUME_RX0); } static void xgene_sgmac_rxtx(struct xgene_enet_pdata *p, u32 bits, bool set) @@ -331,14 +348,23 @@ static void xgene_enet_cle_bypass(struct xgene_enet_pdata *p, u32 dst_ring_num, u16 bufpool_id) { u32 data, fpsel; + u32 cle_bypass_reg0, cle_bypass_reg1; u32 offset = p->port_id * MAC_OFFSET; + if (p->enet_id == XGENE_ENET1) { + cle_bypass_reg0 = CLE_BYPASS_REG0_0_ADDR; + cle_bypass_reg1 = CLE_BYPASS_REG1_0_ADDR; + } else { + cle_bypass_reg0 = XCLE_BYPASS_REG0_ADDR; + cle_bypass_reg1 = XCLE_BYPASS_REG1_ADDR; + } + data = CFG_CLE_BYPASS_EN0; - xgene_enet_wr_csr(p, CLE_BYPASS_REG0_0_ADDR + offset, data); + xgene_enet_wr_csr(p, cle_bypass_reg0 + offset, data); fpsel = xgene_enet_ring_bufnum(bufpool_id) - 0x20; data = CFG_CLE_DSTQID0(dst_ring_num) | CFG_CLE_FPSEL0(fpsel); - xgene_enet_wr_csr(p, CLE_BYPASS_REG1_0_ADDR + offset, data); + xgene_enet_wr_csr(p, cle_bypass_reg1 + offset, data); } static void xgene_enet_shutdown(struct xgene_enet_pdata *p) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index a18a9d1f1143..27ba2fe3fca6 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -122,7 +122,6 @@ static bool xgene_enet_rd_indirect(void __iomem *addr, void __iomem *rd, return true; } - static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata, u32 rd_addr, u32 *rd_data) { diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h index 5a5296a6d1df..bf0a99435737 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h @@ -21,9 +21,28 @@ #ifndef __XGENE_ENET_XGMAC_H__ #define __XGENE_ENET_XGMAC_H__ +#define X2_BLOCK_ETH_MAC_CSR_OFFSET 0x3000 #define BLOCK_AXG_MAC_OFFSET 0x0800 #define BLOCK_AXG_MAC_CSR_OFFSET 0x2000 +#define XGENET_CONFIG_REG_ADDR 0x20 +#define XGENET_SRST_ADDR 0x00 +#define XGENET_CLKEN_ADDR 0x08 + +#define CSR_CLK BIT(0) +#define XGENET_CLK BIT(1) +#define PCS_CLK BIT(3) +#define AN_REF_CLK BIT(4) +#define AN_CLK BIT(5) +#define AD_CLK BIT(6) + +#define CSR_RST BIT(0) +#define XGENET_RST BIT(1) +#define PCS_RST BIT(3) +#define AN_REF_RST BIT(4) +#define AN_RST BIT(5) +#define AD_RST BIT(6) + #define AXGMAC_CONFIG_0 0x0000 #define AXGMAC_CONFIG_1 0x0004 #define HSTMACRST BIT(31) @@ -38,6 +57,7 @@ #define HSTMACADR_MSW_ADDR 0x0014 #define HSTMAXFRAME_LENGTH_ADDR 0x0020 +#define XG_MCX_RX_DV_GATE_REG_0_ADDR 0x0004 #define XG_RSIF_CONFIG_REG_ADDR 0x00a0 #define XCLE_BYPASS_REG0_ADDR 0x0160 #define XCLE_BYPASS_REG1_ADDR 0x0164 diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 783543ad1fcf..084a50a555de 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -456,6 +456,67 @@ static int bcm_sysport_set_wol(struct net_device *dev, return 0; } +static int bcm_sysport_get_coalesce(struct net_device *dev, + struct ethtool_coalesce *ec) +{ + struct bcm_sysport_priv *priv = netdev_priv(dev); + u32 reg; + + reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(0)); + + ec->tx_coalesce_usecs = (reg >> RING_TIMEOUT_SHIFT) * 8192 / 1000; + ec->tx_max_coalesced_frames = reg & RING_INTR_THRESH_MASK; + + reg = rdma_readl(priv, RDMA_MBDONE_INTR); + + ec->rx_coalesce_usecs = (reg >> RDMA_TIMEOUT_SHIFT) * 8192 / 1000; + ec->rx_max_coalesced_frames = reg & RDMA_INTR_THRESH_MASK; + + return 0; +} + +static int bcm_sysport_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *ec) +{ + struct bcm_sysport_priv *priv = netdev_priv(dev); + unsigned int i; + u32 reg; + + /* Base system clock is 125Mhz, DMA timeout is this reference clock + * divided by 1024, which yield roughly 8.192 us, our maximum value has + * to fit in the RING_TIMEOUT_MASK (16 bits). + */ + if (ec->tx_max_coalesced_frames > RING_INTR_THRESH_MASK || + ec->tx_coalesce_usecs > (RING_TIMEOUT_MASK * 8) + 1 || + ec->rx_max_coalesced_frames > RDMA_INTR_THRESH_MASK || + ec->rx_coalesce_usecs > (RDMA_TIMEOUT_MASK * 8) + 1) + return -EINVAL; + + if ((ec->tx_coalesce_usecs == 0 && ec->tx_max_coalesced_frames == 0) || + (ec->rx_coalesce_usecs == 0 && ec->rx_max_coalesced_frames == 0)) + return -EINVAL; + + for (i = 0; i < dev->num_tx_queues; i++) { + reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(i)); + reg &= ~(RING_INTR_THRESH_MASK | + RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT); + reg |= ec->tx_max_coalesced_frames; + reg |= DIV_ROUND_UP(ec->tx_coalesce_usecs * 1000, 8192) << + RING_TIMEOUT_SHIFT; + tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(i)); + } + + reg = rdma_readl(priv, RDMA_MBDONE_INTR); + reg &= ~(RDMA_INTR_THRESH_MASK | + RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT); + reg |= ec->rx_max_coalesced_frames; + reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192) << + RDMA_TIMEOUT_SHIFT; + rdma_writel(priv, reg, RDMA_MBDONE_INTR); + + return 0; +} + static void bcm_sysport_free_cb(struct bcm_sysport_cb *cb) { dev_kfree_skb_any(cb->skb); @@ -1641,6 +1702,8 @@ static struct ethtool_ops bcm_sysport_ethtool_ops = { .get_sset_count = bcm_sysport_get_sset_count, .get_wol = bcm_sysport_get_wol, .set_wol = bcm_sysport_set_wol, + .get_coalesce = bcm_sysport_get_coalesce, + .set_coalesce = bcm_sysport_set_coalesce, }; static const struct net_device_ops bcm_sysport_netdev_ops = { diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index e2c043eabbf3..42a4b4a0bc14 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -292,7 +292,7 @@ struct bcm_rsb { #define RDMA_END_ADDR_LO 0x102c #define RDMA_MBDONE_INTR 0x1030 -#define RDMA_INTR_THRESH_MASK 0xff +#define RDMA_INTR_THRESH_MASK 0x1ff #define RDMA_TIMEOUT_SHIFT 16 #define RDMA_TIMEOUT_MASK 0xffff diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ec56a9b65dc3..2ef202d10948 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -662,7 +662,7 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data) { if (fp->rx_frag_size) - put_page(virt_to_head_page(data)); + skb_free_frag(data); else kfree(data); } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 069952fa5d64..73c934cf6c61 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6618,7 +6618,7 @@ static void tg3_tx(struct tg3_napi *tnapi) static void tg3_frag_free(bool is_frag, void *data) { if (is_frag) - put_page(virt_to_head_page(data)); + skb_free_frag(data); else kfree(data); } diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index fc646a41d548..740d04fd2223 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -54,6 +54,8 @@ #define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1)) #define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1)) +#define GEM_MTU_MIN_SIZE 68 + /* * Graceful stop timeouts in us. We should allow up to * 1 frame time (10 Mbits/s, full-duplex, ignoring collisions) @@ -785,7 +787,7 @@ static int gem_rx(struct macb *bp, int budget) } /* now everything is ready for receiving packet */ bp->rx_skbuff[entry] = NULL; - len = MACB_BFEXT(RX_FRMLEN, ctrl); + len = ctrl & bp->rx_frm_len_mask; netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len); @@ -831,7 +833,7 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, struct macb_dma_desc *desc; desc = macb_rx_desc(bp, last_frag); - len = MACB_BFEXT(RX_FRMLEN, desc->ctrl); + len = desc->ctrl & bp->rx_frm_len_mask; netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n", macb_rx_ring_wrap(first_frag), @@ -1651,7 +1653,10 @@ static void macb_init_hw(struct macb *bp) config |= MACB_BF(RBOF, NET_IP_ALIGN); /* Make eth data aligned */ config |= MACB_BIT(PAE); /* PAuse Enable */ config |= MACB_BIT(DRFCS); /* Discard Rx FCS */ - config |= MACB_BIT(BIG); /* Receive oversized frames */ + if (bp->caps & MACB_CAPS_JUMBO) + config |= MACB_BIT(JFRAME); /* Enable jumbo frames */ + else + config |= MACB_BIT(BIG); /* Receive oversized frames */ if (bp->dev->flags & IFF_PROMISC) config |= MACB_BIT(CAF); /* Copy All Frames */ else if (macb_is_gem(bp) && bp->dev->features & NETIF_F_RXCSUM) @@ -1660,8 +1665,13 @@ static void macb_init_hw(struct macb *bp) config |= MACB_BIT(NBC); /* No BroadCast */ config |= macb_dbw(bp); macb_writel(bp, NCFGR, config); + if ((bp->caps & MACB_CAPS_JUMBO) && bp->jumbo_max_len) + gem_writel(bp, JML, bp->jumbo_max_len); bp->speed = SPEED_10; bp->duplex = DUPLEX_HALF; + bp->rx_frm_len_mask = MACB_RX_FRMLEN_MASK; + if (bp->caps & MACB_CAPS_JUMBO) + bp->rx_frm_len_mask = MACB_RX_JFRMLEN_MASK; macb_configure_dma(bp); @@ -1865,6 +1875,26 @@ static int macb_close(struct net_device *dev) return 0; } +static int macb_change_mtu(struct net_device *dev, int new_mtu) +{ + struct macb *bp = netdev_priv(dev); + u32 max_mtu; + + if (netif_running(dev)) + return -EBUSY; + + max_mtu = ETH_DATA_LEN; + if (bp->caps & MACB_CAPS_JUMBO) + max_mtu = gem_readl(bp, JML) - ETH_HLEN - ETH_FCS_LEN; + + if ((new_mtu > max_mtu) || (new_mtu < GEM_MTU_MIN_SIZE)) + return -EINVAL; + + dev->mtu = new_mtu; + + return 0; +} + static void gem_update_stats(struct macb *bp) { int i; @@ -2141,7 +2171,7 @@ static const struct net_device_ops macb_netdev_ops = { .ndo_get_stats = macb_get_stats, .ndo_do_ioctl = macb_ioctl, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, + .ndo_change_mtu = macb_change_mtu, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = macb_poll_controller, @@ -2702,6 +2732,16 @@ static const struct macb_config emac_config = { .init = at91ether_init, }; + +static const struct macb_config zynqmp_config = { + .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE | + MACB_CAPS_JUMBO, + .dma_burst_length = 16, + .clk_init = macb_clk_init, + .init = macb_init, + .jumbo_max_len = 10240, +}; + static const struct macb_config zynq_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF, @@ -2720,6 +2760,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config }, { .compatible = "cdns,at91rm9200-emac", .data = &emac_config }, { .compatible = "cdns,emac", .data = &emac_config }, + { .compatible = "cdns,zynqmp-gem", .data = &zynqmp_config}, { .compatible = "cdns,zynq-gem", .data = &zynq_config }, { /* sentinel */ } }; @@ -2789,6 +2830,10 @@ static int macb_probe(struct platform_device *pdev) bp->pclk = pclk; bp->hclk = hclk; bp->tx_clk = tx_clk; + if (macb_config->jumbo_max_len) { + bp->jumbo_max_len = macb_config->jumbo_max_len; + } + spin_lock_init(&bp->lock); /* setup capabilities */ diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 24b1d9bcd865..d74655993d4b 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -71,6 +71,7 @@ #define GEM_NCFGR 0x0004 /* Network Config */ #define GEM_USRIO 0x000c /* User IO */ #define GEM_DMACFG 0x0010 /* DMA Configuration */ +#define GEM_JML 0x0048 /* Jumbo Max Length */ #define GEM_HRB 0x0080 /* Hash Bottom */ #define GEM_HRT 0x0084 /* Hash Top */ #define GEM_SA1B 0x0088 /* Specific1 Bottom */ @@ -398,6 +399,7 @@ #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 #define MACB_CAPS_MACB_IS_GEM 0x80000000 +#define MACB_CAPS_JUMBO 0x00000008 /* Bit manipulation macros */ #define MACB_BIT(name) \ @@ -515,6 +517,9 @@ struct macb_dma_desc { #define MACB_RX_BROADCAST_OFFSET 31 #define MACB_RX_BROADCAST_SIZE 1 +#define MACB_RX_FRMLEN_MASK 0xFFF +#define MACB_RX_JFRMLEN_MASK 0x3FFF + /* RX checksum offload disabled: bit 24 clear in NCFGR */ #define GEM_RX_TYPEID_MATCH_OFFSET 22 #define GEM_RX_TYPEID_MATCH_SIZE 2 @@ -758,6 +763,7 @@ struct macb_config { int (*clk_init)(struct platform_device *pdev, struct clk **pclk, struct clk **hclk, struct clk **tx_clk); int (*init)(struct platform_device *pdev); + int jumbo_max_len; }; struct macb_queue { @@ -827,6 +833,9 @@ struct macb { unsigned int max_tx_length; u64 ethtool_stats[GEM_STATS_LEN]; + + unsigned int rx_frm_len_mask; + unsigned int jumbo_max_len; }; static inline bool macb_is_gem(struct macb *bp) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 524d11098c56..6e884d1efcda 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -328,6 +328,17 @@ struct adapter_params { unsigned int max_ird_adapter; /* Max read depth per adapter */ }; +/* State needed to monitor the forward progress of SGE Ingress DMA activities + * and possible hangs. + */ +struct sge_idma_monitor_state { + unsigned int idma_1s_thresh; /* 1s threshold in Core Clock ticks */ + unsigned int idma_stalled[2]; /* synthesized stalled timers in HZ */ + unsigned int idma_state[2]; /* IDMA Hang detect state */ + unsigned int idma_qid[2]; /* IDMA Hung Ingress Queue ID */ + unsigned int idma_warn[2]; /* time to warning in HZ */ +}; + #include "t4fw_api.h" #define FW_VERSION(chip) ( \ @@ -630,12 +641,7 @@ struct sge { u32 fl_align; /* response queue message alignment */ u32 fl_starve_thres; /* Free List starvation threshold */ - /* State variables for detecting an SGE Ingress DMA hang */ - unsigned int idma_1s_thresh;/* SGE same State Counter 1s threshold */ - unsigned int idma_stalled[2];/* SGE synthesized stalled timers in HZ */ - unsigned int idma_state[2]; /* SGE IDMA Hang detect state */ - unsigned int idma_qid[2]; /* SGE IDMA Hung Ingress Queue ID */ - + struct sge_idma_monitor_state idma_monitor; unsigned int egr_start; unsigned int egr_sz; unsigned int ingr_start; @@ -1055,7 +1061,7 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb); int t4_ofld_send(struct adapter *adap, struct sk_buff *skb); int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct net_device *dev, int intr_idx, - struct sge_fl *fl, rspq_handler_t hnd); + struct sge_fl *fl, rspq_handler_t hnd, int cong); int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *netdevq, unsigned int iqid); @@ -1151,6 +1157,10 @@ int t4_link_start(struct adapter *adap, unsigned int mbox, unsigned int port, struct link_config *lc); int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port); +u32 t4_read_pcie_cfg4(struct adapter *adap, int reg); +u32 t4_get_util_window(struct adapter *adap); +void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window); + #define T4_MEMORY_WRITE 0 #define T4_MEMORY_READ 1 int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, @@ -1195,12 +1205,15 @@ int t4_init_devlog_params(struct adapter *adapter); int t4_init_sge_params(struct adapter *adapter); int t4_init_tp_params(struct adapter *adap); int t4_filter_field_shift(const struct adapter *adap, int filter_sel); +int t4_init_rss_mode(struct adapter *adap, int mbox); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf); void t4_fatal_err(struct adapter *adapter); int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, int start, int n, const u16 *rspq, unsigned int nrspq); int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode, unsigned int flags); +int t4_config_vi_rss(struct adapter *adapter, int mbox, unsigned int viid, + unsigned int flags, unsigned int defq); int t4_read_rss(struct adapter *adapter, u16 *entries); void t4_read_rss_key(struct adapter *adapter, u32 *key); void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx); @@ -1211,10 +1224,7 @@ void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index, u32 t4_read_rss_pf_map(struct adapter *adapter); u32 t4_read_rss_pf_mask(struct adapter *adapter); -int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, - u64 *parity); -int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, - u64 *parity); +unsigned int t4_get_mps_bg_map(struct adapter *adapter, int idx); void t4_pmtx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]); void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]); int t4_read_cim_ibq(struct adapter *adap, unsigned int qid, u32 *data, @@ -1310,4 +1320,9 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val); void t4_sge_decode_idma_state(struct adapter *adapter, int state); void t4_free_mem(void *addr); +void t4_idma_monitor_init(struct adapter *adapter, + struct sge_idma_monitor_state *idma); +void t4_idma_monitor(struct adapter *adapter, + struct sge_idma_monitor_state *idma, + int hz, int ticks); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 10d82b51d7ef..401272a2691e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -578,7 +578,7 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) const struct sge_rspq *rq = &adap->sge.ethrxq[pi->first_qset].rspq; c->rx_coalesce_usecs = qtimer_val(adap, rq); - c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN) ? + c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN_F) ? adap->sge.counter_val[rq->pktcnt_idx] : 0; c->use_adaptive_rx_coalesce = get_adaptive_rx_setting(dev); return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 803d91beec6f..5b8f80fbe80e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -856,23 +856,39 @@ static void free_msix_queue_irqs(struct adapter *adap) * * Sets up the portion of the HW RSS table for the port's VI to distribute * packets to the Rx queues in @queues. + * Should never be called before setting up sge eth rx queues */ int cxgb4_write_rss(const struct port_info *pi, const u16 *queues) { u16 *rss; int i, err; - const struct sge_eth_rxq *q = &pi->adapter->sge.ethrxq[pi->first_qset]; + struct adapter *adapter = pi->adapter; + const struct sge_eth_rxq *rxq; + rxq = &adapter->sge.ethrxq[pi->first_qset]; rss = kmalloc(pi->rss_size * sizeof(u16), GFP_KERNEL); if (!rss) return -ENOMEM; /* map the queue indices to queue ids */ for (i = 0; i < pi->rss_size; i++, queues++) - rss[i] = q[*queues].rspq.abs_id; + rss[i] = rxq[*queues].rspq.abs_id; - err = t4_config_rss_range(pi->adapter, pi->adapter->fn, pi->viid, 0, + err = t4_config_rss_range(adapter, adapter->fn, pi->viid, 0, pi->rss_size, rss, pi->rss_size); + /* If Tunnel All Lookup isn't specified in the global RSS + * Configuration, then we need to specify a default Ingress + * Queue for any ingress packets which aren't hashed. We'll + * use our first ingress queue ... + */ + if (!err) + err = t4_config_vi_rss(adapter, adapter->mbox, pi->viid, + FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN_F | + FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN_F | + FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN_F | + FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN_F | + FW_RSS_VI_CONFIG_CMD_UDPEN_F, + rss[0]); kfree(rss); return err; } @@ -885,11 +901,15 @@ int cxgb4_write_rss(const struct port_info *pi, const u16 *queues) */ static int setup_rss(struct adapter *adap) { - int i, err; + int i, j, err; for_each_port(adap, i) { const struct port_info *pi = adap2pinfo(adap, i); + /* Fill default values with equal distribution */ + for (j = 0; j < pi->rss_size; j++) + pi->rss[j] = j % pi->nqsets; + err = cxgb4_write_rss(pi, pi->rss); if (err) return err; @@ -977,7 +997,7 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i / per_chan], msi_idx, q->fl.size ? &q->fl : NULL, - uldrx_handler); + uldrx_handler, 0); if (err) return err; memset(&q->stats, 0, sizeof(q->stats)); @@ -1007,7 +1027,7 @@ static int setup_sge_queues(struct adapter *adap) msi_idx = 1; /* vector 0 is for non-queue interrupts */ else { err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0, - NULL, NULL); + NULL, NULL, -1); if (err) return err; msi_idx = -((int)s->intrq.abs_id + 1); @@ -1027,7 +1047,7 @@ static int setup_sge_queues(struct adapter *adap) * new/deleted queues. */ err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], - msi_idx, NULL, fwevtq_handler); + msi_idx, NULL, fwevtq_handler, -1); if (err) { freeout: t4_free_sge_resources(adap); return err; @@ -1044,7 +1064,9 @@ freeout: t4_free_sge_resources(adap); msi_idx++; err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx, &q->fl, - t4_ethrx_handler); + t4_ethrx_handler, + t4_get_mps_bg_map(adap, + pi->tx_chan)); if (err) goto freeout; q->rspq.idx = j; @@ -1398,7 +1420,7 @@ int cxgb4_set_rspq_intr_params(struct sge_rspq *q, } us = us == 0 ? 6 : closest_timer(&adap->sge, us); - q->intr_params = QINTR_TIMER_IDX(us) | (cnt > 0 ? QINTR_CNT_EN : 0); + q->intr_params = QINTR_TIMER_IDX_V(us) | QINTR_CNT_EN_V(cnt > 0); return 0; } @@ -2432,6 +2454,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.max_ordird_qp = adap->params.max_ordird_qp; lli.max_ird_adapter = adap->params.max_ird_adapter; lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; + lli.nodeid = dev_to_node(adap->pdev_dev); handle = ulds[uld].add(&lli); if (IS_ERR(handle)) { @@ -3034,86 +3057,11 @@ void t4_fatal_err(struct adapter *adap) dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n"); } -/* Return the specified PCI-E Configuration Space register from our Physical - * Function. We try first via a Firmware LDST Command since we prefer to let - * the firmware own all of these registers, but if that fails we go for it - * directly ourselves. - */ -static u32 t4_read_pcie_cfg4(struct adapter *adap, int reg) -{ - struct fw_ldst_cmd ldst_cmd; - u32 val; - int ret; - - /* Construct and send the Firmware LDST Command to retrieve the - * specified PCI-E Configuration Space register. - */ - memset(&ldst_cmd, 0, sizeof(ldst_cmd)); - ldst_cmd.op_to_addrspace = - htonl(FW_CMD_OP_V(FW_LDST_CMD) | - FW_CMD_REQUEST_F | - FW_CMD_READ_F | - FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_FUNC_PCIE)); - ldst_cmd.cycles_to_len16 = htonl(FW_LEN16(ldst_cmd)); - ldst_cmd.u.pcie.select_naccess = FW_LDST_CMD_NACCESS_V(1); - ldst_cmd.u.pcie.ctrl_to_fn = - (FW_LDST_CMD_LC_F | FW_LDST_CMD_FN_V(adap->fn)); - ldst_cmd.u.pcie.r = reg; - ret = t4_wr_mbox(adap, adap->mbox, &ldst_cmd, sizeof(ldst_cmd), - &ldst_cmd); - - /* If the LDST Command suucceeded, exctract the returned register - * value. Otherwise read it directly ourself. - */ - if (ret == 0) - val = ntohl(ldst_cmd.u.pcie.data[0]); - else - t4_hw_pci_read_cfg4(adap, reg, &val); - - return val; -} - static void setup_memwin(struct adapter *adap) { - u32 mem_win0_base, mem_win1_base, mem_win2_base, mem_win2_aperture; + u32 nic_win_base = t4_get_util_window(adap); - if (is_t4(adap->params.chip)) { - u32 bar0; - - /* Truncation intentional: we only read the bottom 32-bits of - * the 64-bit BAR0/BAR1 ... We use the hardware backdoor - * mechanism to read BAR0 instead of using - * pci_resource_start() because we could be operating from - * within a Virtual Machine which is trapping our accesses to - * our Configuration Space and we need to set up the PCI-E - * Memory Window decoders with the actual addresses which will - * be coming across the PCI-E link. - */ - bar0 = t4_read_pcie_cfg4(adap, PCI_BASE_ADDRESS_0); - bar0 &= PCI_BASE_ADDRESS_MEM_MASK; - adap->t4_bar0 = bar0; - - mem_win0_base = bar0 + MEMWIN0_BASE; - mem_win1_base = bar0 + MEMWIN1_BASE; - mem_win2_base = bar0 + MEMWIN2_BASE; - mem_win2_aperture = MEMWIN2_APERTURE; - } else { - /* For T5, only relative offset inside the PCIe BAR is passed */ - mem_win0_base = MEMWIN0_BASE; - mem_win1_base = MEMWIN1_BASE; - mem_win2_base = MEMWIN2_BASE_T5; - mem_win2_aperture = MEMWIN2_APERTURE_T5; - } - t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, 0), - mem_win0_base | BIR_V(0) | - WINDOW_V(ilog2(MEMWIN0_APERTURE) - 10)); - t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, 1), - mem_win1_base | BIR_V(0) | - WINDOW_V(ilog2(MEMWIN1_APERTURE) - 10)); - t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, 2), - mem_win2_base | BIR_V(0) | - WINDOW_V(ilog2(mem_win2_aperture) - 10)); - t4_read_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, 2)); + t4_setup_memwin(adap, nic_win_base, MEMWIN_NIC); } static void setup_memwin_rdma(struct adapter *adap) @@ -4340,7 +4288,12 @@ static int enable_msix(struct adapter *adap) static int init_rss(struct adapter *adap) { - unsigned int i, j; + unsigned int i; + int err; + + err = t4_init_rss_mode(adap, adap->mbox); + if (err) + return err; for_each_port(adap, i) { struct port_info *pi = adap2pinfo(adap, i); @@ -4348,8 +4301,6 @@ static int init_rss(struct adapter *adap) pi->rss = kcalloc(pi->rss_size, sizeof(u16), GFP_KERNEL); if (!pi->rss) return -ENOMEM; - for (j = 0; j < pi->rss_size; j++) - pi->rss[j] = ethtool_rxfh_indir_default(j, pi->nqsets); } return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 78ab4d406ce2..df34293f35e8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -264,6 +264,7 @@ struct cxgb4_lld_info { unsigned int max_ordird_qp; /* Max ORD/IRD depth per RDMA QP */ unsigned int max_ird_adapter; /* Max IRD memory per adapter */ bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ + int nodeid; /* device numa node id */ }; struct cxgb4_uld_info { diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 0d2eddab04ef..dd18fcb644f9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -100,16 +100,6 @@ */ #define TX_QCHECK_PERIOD (HZ / 2) -/* SGE Hung Ingress DMA Threshold Warning time (in Hz) and Warning Repeat Rate - * (in RX_QCHECK_PERIOD multiples). If we find one of the SGE Ingress DMA - * State Machines in the same state for this amount of time (in HZ) then we'll - * issue a warning about a potential hang. We'll repeat the warning as the - * SGE Ingress DMA Channel appears to be hung every N RX_QCHECK_PERIODs till - * the situation clears. If the situation clears, we'll note that as well. - */ -#define SGE_IDMA_WARN_THRESH (1 * HZ) -#define SGE_IDMA_WARN_REPEAT (20 * RX_QCHECK_PERIOD) - /* * Max number of Tx descriptors to be reclaimed by the Tx timer. */ @@ -540,6 +530,10 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) val = PIDX_T5_V(q->pend_cred / 8) | DBTYPE_F; val |= DBPRIO_F; + + /* Make sure all memory writes to the Free List queue are + * committed before we tell the hardware about them. + */ wmb(); /* If we don't have access to the new User Doorbell (T5+), use @@ -930,7 +924,10 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) */ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { - wmb(); /* write descriptors before telling HW */ + /* Make sure that all writes to the TX Descriptors are committed + * before we tell the hardware about them. + */ + wmb(); /* If we don't have access to the new User Doorbell (T5+), use the old * doorbell mechanism; otherwise use the new BAR2 mechanism. @@ -1047,7 +1044,7 @@ nocsum: /* * unknown protocol, disable HW csum * and hope a bad packet is detected */ - return TXPKT_L4CSUM_DIS; + return TXPKT_L4CSUM_DIS_F; } } else { /* @@ -1064,14 +1061,15 @@ nocsum: /* } if (likely(csum_type >= TX_CSUM_TCPIP)) - return TXPKT_CSUM_TYPE(csum_type) | - TXPKT_IPHDR_LEN(skb_network_header_len(skb)) | - TXPKT_ETHHDR_LEN(skb_network_offset(skb) - ETH_HLEN); + return TXPKT_CSUM_TYPE_V(csum_type) | + TXPKT_IPHDR_LEN_V(skb_network_header_len(skb)) | + TXPKT_ETHHDR_LEN_V(skb_network_offset(skb) - ETH_HLEN); else { int start = skb_transport_offset(skb); - return TXPKT_CSUM_TYPE(csum_type) | TXPKT_CSUM_START(start) | - TXPKT_CSUM_LOC(start + skb->csum_offset); + return TXPKT_CSUM_TYPE_V(csum_type) | + TXPKT_CSUM_START_V(start) | + TXPKT_CSUM_LOC_V(start + skb->csum_offset); } } @@ -1112,11 +1110,11 @@ cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap, return -ENOTSUPP; /* FC CRC offload */ - *cntrl = TXPKT_CSUM_TYPE(TX_CSUM_FCOE) | - TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS | - TXPKT_CSUM_START(CXGB_FCOE_TXPKT_CSUM_START) | - TXPKT_CSUM_END(CXGB_FCOE_TXPKT_CSUM_END) | - TXPKT_CSUM_LOC(CXGB_FCOE_TXPKT_CSUM_END); + *cntrl = TXPKT_CSUM_TYPE_V(TX_CSUM_FCOE) | + TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F | + TXPKT_CSUM_START_V(CXGB_FCOE_TXPKT_CSUM_START) | + TXPKT_CSUM_END_V(CXGB_FCOE_TXPKT_CSUM_END) | + TXPKT_CSUM_LOC_V(CXGB_FCOE_TXPKT_CSUM_END); return 0; } #endif /* CONFIG_CHELSIO_T4_FCOE */ @@ -1130,7 +1128,6 @@ cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap, */ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) { - int len; u32 wr_mid; u64 cntrl, *end; int qidx, credits; @@ -1143,6 +1140,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) const struct skb_shared_info *ssi; dma_addr_t addr[MAX_SKB_FRAGS + 1]; bool immediate = false; + int len, max_pkt_len; #ifdef CONFIG_CHELSIO_T4_FCOE int err; #endif /* CONFIG_CHELSIO_T4_FCOE */ @@ -1156,13 +1154,20 @@ out_free: dev_kfree_skb_any(skb); return NETDEV_TX_OK; } + /* Discard the packet if the length is greater than mtu */ + max_pkt_len = ETH_HLEN + dev->mtu; + if (skb_vlan_tag_present(skb)) + max_pkt_len += VLAN_HLEN; + if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len))) + goto out_free; + pi = netdev_priv(dev); adap = pi->adapter; qidx = skb_get_queue_mapping(skb); q = &adap->sge.ethtxq[qidx + pi->first_qset]; reclaim_completed_tx(adap, &q->q, true); - cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS; + cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F; #ifdef CONFIG_CHELSIO_T4_FCOE err = cxgb_fcoe_offload(skb, adap, pi, &cntrl); @@ -1213,23 +1218,23 @@ out_free: dev_kfree_skb_any(skb); len += sizeof(*lso); wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | FW_WR_IMMDLEN_V(len)); - lso->c.lso_ctrl = htonl(LSO_OPCODE(CPL_TX_PKT_LSO) | - LSO_FIRST_SLICE | LSO_LAST_SLICE | - LSO_IPV6(v6) | - LSO_ETHHDR_LEN(eth_xtra_len / 4) | - LSO_IPHDR_LEN(l3hdr_len / 4) | - LSO_TCPHDR_LEN(tcp_hdr(skb)->doff)); + lso->c.lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) | + LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F | + LSO_IPV6_V(v6) | + LSO_ETHHDR_LEN_V(eth_xtra_len / 4) | + LSO_IPHDR_LEN_V(l3hdr_len / 4) | + LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff)); lso->c.ipid_ofst = htons(0); lso->c.mss = htons(ssi->gso_size); lso->c.seqno_offset = htonl(0); if (is_t4(adap->params.chip)) lso->c.len = htonl(skb->len); else - lso->c.len = htonl(LSO_T5_XFER_SIZE(skb->len)); + lso->c.len = htonl(LSO_T5_XFER_SIZE_V(skb->len)); cpl = (void *)(lso + 1); - cntrl = TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | - TXPKT_IPHDR_LEN(l3hdr_len) | - TXPKT_ETHHDR_LEN(eth_xtra_len); + cntrl = TXPKT_CSUM_TYPE_V(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | + TXPKT_IPHDR_LEN_V(l3hdr_len) | + TXPKT_ETHHDR_LEN_V(eth_xtra_len); q->tso++; q->tx_cso += ssi->gso_segs; } else { @@ -1238,23 +1243,24 @@ out_free: dev_kfree_skb_any(skb); FW_WR_IMMDLEN_V(len)); cpl = (void *)(wr + 1); if (skb->ip_summed == CHECKSUM_PARTIAL) { - cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS; + cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS_F; q->tx_cso++; } } if (skb_vlan_tag_present(skb)) { q->vlan_ins++; - cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(skb_vlan_tag_get(skb)); + cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb)); #ifdef CONFIG_CHELSIO_T4_FCOE if (skb->protocol == htons(ETH_P_FCOE)) - cntrl |= TXPKT_VLAN( + cntrl |= TXPKT_VLAN_V( ((skb->priority & 0x7) << VLAN_PRIO_SHIFT)); #endif /* CONFIG_CHELSIO_T4_FCOE */ } - cpl->ctrl0 = htonl(TXPKT_OPCODE(CPL_TX_PKT_XT) | - TXPKT_INTF(pi->tx_chan) | TXPKT_PF(adap->fn)); + cpl->ctrl0 = htonl(TXPKT_OPCODE_V(CPL_TX_PKT_XT) | + TXPKT_INTF_V(pi->tx_chan) | + TXPKT_PF_V(adap->fn)); cpl->pack = htons(0); cpl->len = htons(skb->len); cpl->ctrl1 = cpu_to_be64(cntrl); @@ -1964,7 +1970,7 @@ static void restore_rx_bufs(const struct pkt_gl *si, struct sge_fl *q, static inline bool is_new_response(const struct rsp_ctrl *r, const struct sge_rspq *q) { - return RSPD_GEN(r->type_gen) == q->gen; + return (r->type_gen >> RSPD_GEN_S) == q->gen; } /** @@ -2011,19 +2017,19 @@ static int process_responses(struct sge_rspq *q, int budget) break; dma_rmb(); - rsp_type = RSPD_TYPE(rc->type_gen); - if (likely(rsp_type == RSP_TYPE_FLBUF)) { + rsp_type = RSPD_TYPE_G(rc->type_gen); + if (likely(rsp_type == RSPD_TYPE_FLBUF_X)) { struct page_frag *fp; struct pkt_gl si; const struct rx_sw_desc *rsd; u32 len = ntohl(rc->pldbuflen_qid), bufsz, frags; - if (len & RSPD_NEWBUF) { + if (len & RSPD_NEWBUF_F) { if (likely(q->offset > 0)) { free_rx_bufs(q->adap, &rxq->fl, 1); q->offset = 0; } - len = RSPD_LEN(len); + len = RSPD_LEN_G(len); } si.tot_len = len; @@ -2058,7 +2064,7 @@ static int process_responses(struct sge_rspq *q, int budget) q->offset += ALIGN(fp->size, s->fl_align); else restore_rx_bufs(&si, &rxq->fl, frags); - } else if (likely(rsp_type == RSP_TYPE_CPL)) { + } else if (likely(rsp_type == RSPD_TYPE_CPL_X)) { ret = q->handler(q, q->cur_desc, NULL); } else { ret = q->handler(q, (const __be64 *)rc, CXGB4_MSG_AN); @@ -2066,7 +2072,7 @@ static int process_responses(struct sge_rspq *q, int budget) if (unlikely(ret)) { /* couldn't process descriptor, back off for recovery */ - q->next_intr_params = QINTR_TIMER_IDX(NOMEM_TMR_IDX); + q->next_intr_params = QINTR_TIMER_IDX_V(NOMEM_TMR_IDX); break; } @@ -2090,7 +2096,7 @@ int cxgb_busy_poll(struct napi_struct *napi) return LL_FLUSH_BUSY; work_done = process_responses(q, 4); - params = QINTR_TIMER_IDX(TIMERREG_COUNTER0_X) | QINTR_CNT_EN; + params = QINTR_TIMER_IDX_V(TIMERREG_COUNTER0_X) | QINTR_CNT_EN_V(1); q->next_intr_params = params; val = CIDXINC_V(work_done) | SEINTARM_V(params); @@ -2137,7 +2143,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) int timer_index; napi_complete(napi); - timer_index = QINTR_TIMER_IDX_GET(q->next_intr_params); + timer_index = QINTR_TIMER_IDX_G(q->next_intr_params); if (q->adaptive_rx) { if (work_done > max(timer_pkt_quota[timer_index], @@ -2147,15 +2153,16 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) timer_index = timer_index - 1; timer_index = clamp(timer_index, 0, SGE_TIMERREGS - 1); - q->next_intr_params = QINTR_TIMER_IDX(timer_index) | - V_QINTR_CNT_EN; + q->next_intr_params = + QINTR_TIMER_IDX_V(timer_index) | + QINTR_CNT_EN_V(0); params = q->next_intr_params; } else { params = q->next_intr_params; q->next_intr_params = q->intr_params; } } else - params = QINTR_TIMER_IDX(7); + params = QINTR_TIMER_IDX_V(7); val = CIDXINC_V(work_done) | SEINTARM_V(params); @@ -2203,7 +2210,7 @@ static unsigned int process_intrq(struct adapter *adap) break; dma_rmb(); - if (RSPD_TYPE(rc->type_gen) == RSP_TYPE_INTR) { + if (RSPD_TYPE_G(rc->type_gen) == RSPD_TYPE_INTR_X) { unsigned int qid = ntohl(rc->pldbuflen_qid); qid -= adap->sge.ingr_start; @@ -2279,7 +2286,7 @@ irq_handler_t t4_intr_handler(struct adapter *adap) static void sge_rx_timer_cb(unsigned long data) { unsigned long m; - unsigned int i, idma_same_state_cnt[2]; + unsigned int i; struct adapter *adap = (struct adapter *)data; struct sge *s = &adap->sge; @@ -2300,67 +2307,16 @@ static void sge_rx_timer_cb(unsigned long data) set_bit(id, s->starving_fl); } } + /* The remainder of the SGE RX Timer Callback routine is dedicated to + * global Master PF activities like checking for chip ingress stalls, + * etc. + */ + if (!(adap->flags & MASTER_PF)) + goto done; - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 13); - idma_same_state_cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH_A); - idma_same_state_cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - - for (i = 0; i < 2; i++) { - u32 debug0, debug11; - - /* If the Ingress DMA Same State Counter ("timer") is less - * than 1s, then we can reset our synthesized Stall Timer and - * continue. If we have previously emitted warnings about a - * potential stalled Ingress Queue, issue a note indicating - * that the Ingress Queue has resumed forward progress. - */ - if (idma_same_state_cnt[i] < s->idma_1s_thresh) { - if (s->idma_stalled[i] >= SGE_IDMA_WARN_THRESH) - CH_WARN(adap, "SGE idma%d, queue%u,resumed after %d sec\n", - i, s->idma_qid[i], - s->idma_stalled[i]/HZ); - s->idma_stalled[i] = 0; - continue; - } - - /* Synthesize an SGE Ingress DMA Same State Timer in the Hz - * domain. The first time we get here it'll be because we - * passed the 1s Threshold; each additional time it'll be - * because the RX Timer Callback is being fired on its regular - * schedule. - * - * If the stall is below our Potential Hung Ingress Queue - * Warning Threshold, continue. - */ - if (s->idma_stalled[i] == 0) - s->idma_stalled[i] = HZ; - else - s->idma_stalled[i] += RX_QCHECK_PERIOD; - - if (s->idma_stalled[i] < SGE_IDMA_WARN_THRESH) - continue; - - /* We'll issue a warning every SGE_IDMA_WARN_REPEAT Hz */ - if (((s->idma_stalled[i] - HZ) % SGE_IDMA_WARN_REPEAT) != 0) - continue; - - /* Read and save the SGE IDMA State and Queue ID information. - * We do this every time in case it changes across time ... - */ - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 0); - debug0 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - s->idma_state[i] = (debug0 >> (i * 9)) & 0x3f; - - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 11); - debug11 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - s->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff; - - CH_WARN(adap, "SGE idma%u, queue%u, maybe stuck state%u %dsecs (debug0=%#x, debug11=%#x)\n", - i, s->idma_qid[i], s->idma_state[i], - s->idma_stalled[i]/HZ, debug0, debug11); - t4_sge_decode_idma_state(adap, s->idma_state[i]); - } + t4_idma_monitor(adap, &s->idma_monitor, HZ, RX_QCHECK_PERIOD); +done: mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD); } @@ -2437,9 +2393,12 @@ static void __iomem *bar2_address(struct adapter *adapter, return adapter->bar2 + bar2_qoffset; } +/* @intr_idx: MSI/MSI-X vector if >=0, -(absolute qid + 1) if < 0 + * @cong: < 0 -> no congestion feedback, >= 0 -> congestion channel map + */ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct net_device *dev, int intr_idx, - struct sge_fl *fl, rspq_handler_t hnd) + struct sge_fl *fl, rspq_handler_t hnd, int cong) { int ret, flsz = 0; struct fw_iq_cmd c; @@ -2462,7 +2421,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, FW_LEN16(c)); c.type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE_V(FW_IQ_TYPE_FL_INT_CAP) | FW_IQ_CMD_IQASYNCH_V(fwevtq) | FW_IQ_CMD_VIID_V(pi->viid) | - FW_IQ_CMD_IQANDST_V(intr_idx < 0) | FW_IQ_CMD_IQANUD_V(1) | + FW_IQ_CMD_IQANDST_V(intr_idx < 0) | + FW_IQ_CMD_IQANUD_V(UPDATEDELIVERY_INTERRUPT_X) | FW_IQ_CMD_IQANDSTINDEX_V(intr_idx >= 0 ? intr_idx : -intr_idx - 1)); c.iqdroprss_to_iqesize = htons(FW_IQ_CMD_IQPCIECH_V(pi->tx_chan) | @@ -2471,8 +2431,19 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, FW_IQ_CMD_IQESIZE_V(ilog2(iq->iqe_len) - 4)); c.iqsize = htons(iq->size); c.iqaddr = cpu_to_be64(iq->phys_addr); + if (cong >= 0) + c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F); if (fl) { + /* Allocate the ring for the hardware free list (with space + * for its status page) along with the associated software + * descriptor ring. The free list size needs to be a multiple + * of the Egress Queue Unit and at least 2 Egress Units larger + * than the SGE's Egress Congrestion Threshold + * (fl_starve_thres - 1). + */ + if (fl->size < s->fl_starve_thres - 1 + 2 * 8) + fl->size = s->fl_starve_thres - 1 + 2 * 8; fl->size = roundup(fl->size, 8); fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64), sizeof(struct rx_sw_desc), &fl->addr, @@ -2481,12 +2452,18 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, goto fl_nomem; flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc); - c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN_F | - FW_IQ_CMD_FL0FETCHRO_F | - FW_IQ_CMD_FL0DATARO_F | - FW_IQ_CMD_FL0PADEN_F); - c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN_V(2) | - FW_IQ_CMD_FL0FBMAX_V(3)); + c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F | + FW_IQ_CMD_FL0FETCHRO_F | + FW_IQ_CMD_FL0DATARO_F | + FW_IQ_CMD_FL0PADEN_F); + if (cong >= 0) + c.iqns_to_fl0congen |= + htonl(FW_IQ_CMD_FL0CNGCHMAP_V(cong) | + FW_IQ_CMD_FL0CONGCIF_F | + FW_IQ_CMD_FL0CONGEN_F); + c.fl0dcaen_to_fl0cidxfthresh = + htons(FW_IQ_CMD_FL0FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_IQ_CMD_FL0FBMAX_V(FETCHBURSTMAX_512B_X)); c.fl0size = htons(flsz); c.fl0addr = cpu_to_be64(fl->addr); } @@ -2532,6 +2509,41 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, &fl->bar2_qid); refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); } + + /* For T5 and later we attempt to set up the Congestion Manager values + * of the new RX Ethernet Queue. This should really be handled by + * firmware because it's more complex than any host driver wants to + * get involved with and it's different per chip and this is almost + * certainly wrong. Firmware would be wrong as well, but it would be + * a lot easier to fix in one place ... For now we do something very + * simple (and hopefully less wrong). + */ + if (!is_t4(adap->params.chip) && cong >= 0) { + u32 param, val; + int i; + + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | + FW_PARAMS_PARAM_YZ_V(iq->cntxt_id)); + if (cong == 0) { + val = CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_QUEUE_X); + } else { + val = + CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_CHANNEL_X); + for (i = 0; i < 4; i++) { + if (cong & (1 << i)) + val |= + CONMCTXT_CNGCHMAP_V(1 << (i << 2)); + } + } + ret = t4_set_params(adap, adap->mbox, adap->fn, 0, 1, + ¶m, &val); + if (ret) + dev_warn(adap->pdev_dev, "Failed to set Congestion" + " Manager Context for Ingress Queue %d: %d\n", + iq->cntxt_id, -ret); + } + return 0; fl_nomem: @@ -2595,14 +2607,15 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c)); c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F | FW_EQ_ETH_CMD_VIID_V(pi->viid)); - c.fetchszm_to_iqid = htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(2) | - FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | - FW_EQ_ETH_CMD_FETCHRO_V(1) | - FW_EQ_ETH_CMD_IQID_V(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_ETH_CMD_FBMIN_V(2) | - FW_EQ_ETH_CMD_FBMAX_V(3) | - FW_EQ_ETH_CMD_CIDXFTHRESH_V(5) | - FW_EQ_ETH_CMD_EQSIZE_V(nentries)); + c.fetchszm_to_iqid = + htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = + htonl(FW_EQ_ETH_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | + FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | + FW_EQ_ETH_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); @@ -2637,7 +2650,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, txq->q.desc = alloc_ring(adap->pdev_dev, nentries, sizeof(struct tx_desc), 0, &txq->q.phys_addr, - NULL, 0, NUMA_NO_NODE); + NULL, 0, dev_to_node(adap->pdev_dev)); if (!txq->q.desc) return -ENOMEM; @@ -2649,14 +2662,15 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, FW_EQ_CTRL_CMD_EQSTART_F | FW_LEN16(c)); c.cmpliqid_eqid = htonl(FW_EQ_CTRL_CMD_CMPLIQID_V(cmplqid)); c.physeqid_pkd = htonl(0); - c.fetchszm_to_iqid = htonl(FW_EQ_CTRL_CMD_HOSTFCMODE_V(2) | - FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) | - FW_EQ_CTRL_CMD_FETCHRO_F | - FW_EQ_CTRL_CMD_IQID_V(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_CTRL_CMD_FBMIN_V(2) | - FW_EQ_CTRL_CMD_FBMAX_V(3) | - FW_EQ_CTRL_CMD_CIDXFTHRESH_V(5) | - FW_EQ_CTRL_CMD_EQSIZE_V(nentries)); + c.fetchszm_to_iqid = + htonl(FW_EQ_CTRL_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_CTRL_CMD_FETCHRO_F | FW_EQ_CTRL_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = + htonl(FW_EQ_CTRL_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_EQ_CTRL_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | + FW_EQ_CTRL_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | + FW_EQ_CTRL_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); @@ -2701,14 +2715,15 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, FW_EQ_OFLD_CMD_VFN_V(0)); c.alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_ALLOC_F | FW_EQ_OFLD_CMD_EQSTART_F | FW_LEN16(c)); - c.fetchszm_to_iqid = htonl(FW_EQ_OFLD_CMD_HOSTFCMODE_V(2) | - FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) | - FW_EQ_OFLD_CMD_FETCHRO_F | - FW_EQ_OFLD_CMD_IQID_V(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_OFLD_CMD_FBMIN_V(2) | - FW_EQ_OFLD_CMD_FBMAX_V(3) | - FW_EQ_OFLD_CMD_CIDXFTHRESH_V(5) | - FW_EQ_OFLD_CMD_EQSIZE_V(nentries)); + c.fetchszm_to_iqid = + htonl(FW_EQ_OFLD_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = + htonl(FW_EQ_OFLD_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | + FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | + FW_EQ_OFLD_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); @@ -3023,7 +3038,11 @@ int t4_sge_init(struct adapter *adap) * Packing Boundary. T5 introduced the ability to specify these * separately. The actual Ingress Packet Data alignment boundary * within Packed Buffer Mode is the maximum of these two - * specifications. + * specifications. (Note that it makes no real practical sense to + * have the Pading Boudary be larger than the Packing Boundary but you + * could set the chip up that way and, in fact, legacy T4 code would + * end doing this because it would initialize the Padding Boundary and + * leave the Packing Boundary initialized to 0 (16 bytes).) */ ingpadboundary = 1 << (INGPADBOUNDARY_G(sge_control) + INGPADBOUNDARY_SHIFT_X); @@ -3067,11 +3086,14 @@ int t4_sge_init(struct adapter *adap) egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl); s->fl_starve_thres = 2*egress_threshold + 1; + t4_idma_monitor_init(adap, &s->idma_monitor); + + /* Set up timers used for recuring callbacks to process RX and TX + * administrative tasks. + */ setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap); setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap); - s->idma_1s_thresh = core_ticks_per_usec(adap) * 1000000; /* 1 s */ - s->idma_stalled[0] = 0; - s->idma_stalled[1] = 0; + spin_lock_init(&s->intrq_lock); return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index e8578a742f2a..be34eaac41bf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -214,8 +214,8 @@ static void fw_asrt(struct adapter *adap, u32 mbox_addr) get_mbox_rpl(adap, (__be64 *)&asrt, sizeof(asrt) / 8, mbox_addr); dev_alert(adap->pdev_dev, "FW assertion at %.16s:%u, val0 %#x, val1 %#x\n", - asrt.u.assert.filename_0_7, ntohl(asrt.u.assert.line), - ntohl(asrt.u.assert.x), ntohl(asrt.u.assert.y)); + asrt.u.assert.filename_0_7, be32_to_cpu(asrt.u.assert.line), + be32_to_cpu(asrt.u.assert.x), be32_to_cpu(asrt.u.assert.y)); } static void dump_mbox(struct adapter *adap, int mbox, u32 data_reg) @@ -333,116 +333,6 @@ int t4_wr_mbox_meat(struct adapter *adap, int mbox, const void *cmd, int size, } /** - * t4_mc_read - read from MC through backdoor accesses - * @adap: the adapter - * @addr: address of first byte requested - * @idx: which MC to access - * @data: 64 bytes of data containing the requested address - * @ecc: where to store the corresponding 64-bit ECC word - * - * Read 64 bytes of data from MC starting at a 64-byte-aligned address - * that covers the requested address @addr. If @parity is not %NULL it - * is assigned the 64-bit ECC word for the read data. - */ -int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc) -{ - int i; - u32 mc_bist_cmd, mc_bist_cmd_addr, mc_bist_cmd_len; - u32 mc_bist_status_rdata, mc_bist_data_pattern; - - if (is_t4(adap->params.chip)) { - mc_bist_cmd = MC_BIST_CMD_A; - mc_bist_cmd_addr = MC_BIST_CMD_ADDR_A; - mc_bist_cmd_len = MC_BIST_CMD_LEN_A; - mc_bist_status_rdata = MC_BIST_STATUS_RDATA_A; - mc_bist_data_pattern = MC_BIST_DATA_PATTERN_A; - } else { - mc_bist_cmd = MC_REG(MC_P_BIST_CMD_A, idx); - mc_bist_cmd_addr = MC_REG(MC_P_BIST_CMD_ADDR_A, idx); - mc_bist_cmd_len = MC_REG(MC_P_BIST_CMD_LEN_A, idx); - mc_bist_status_rdata = MC_REG(MC_P_BIST_STATUS_RDATA_A, idx); - mc_bist_data_pattern = MC_REG(MC_P_BIST_DATA_PATTERN_A, idx); - } - - if (t4_read_reg(adap, mc_bist_cmd) & START_BIST_F) - return -EBUSY; - t4_write_reg(adap, mc_bist_cmd_addr, addr & ~0x3fU); - t4_write_reg(adap, mc_bist_cmd_len, 64); - t4_write_reg(adap, mc_bist_data_pattern, 0xc); - t4_write_reg(adap, mc_bist_cmd, BIST_OPCODE_V(1) | START_BIST_F | - BIST_CMD_GAP_V(1)); - i = t4_wait_op_done(adap, mc_bist_cmd, START_BIST_F, 0, 10, 1); - if (i) - return i; - -#define MC_DATA(i) MC_BIST_STATUS_REG(mc_bist_status_rdata, i) - - for (i = 15; i >= 0; i--) - *data++ = htonl(t4_read_reg(adap, MC_DATA(i))); - if (ecc) - *ecc = t4_read_reg64(adap, MC_DATA(16)); -#undef MC_DATA - return 0; -} - -/** - * t4_edc_read - read from EDC through backdoor accesses - * @adap: the adapter - * @idx: which EDC to access - * @addr: address of first byte requested - * @data: 64 bytes of data containing the requested address - * @ecc: where to store the corresponding 64-bit ECC word - * - * Read 64 bytes of data from EDC starting at a 64-byte-aligned address - * that covers the requested address @addr. If @parity is not %NULL it - * is assigned the 64-bit ECC word for the read data. - */ -int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc) -{ - int i; - u32 edc_bist_cmd, edc_bist_cmd_addr, edc_bist_cmd_len; - u32 edc_bist_cmd_data_pattern, edc_bist_status_rdata; - - if (is_t4(adap->params.chip)) { - edc_bist_cmd = EDC_REG(EDC_BIST_CMD_A, idx); - edc_bist_cmd_addr = EDC_REG(EDC_BIST_CMD_ADDR_A, idx); - edc_bist_cmd_len = EDC_REG(EDC_BIST_CMD_LEN_A, idx); - edc_bist_cmd_data_pattern = EDC_REG(EDC_BIST_DATA_PATTERN_A, - idx); - edc_bist_status_rdata = EDC_REG(EDC_BIST_STATUS_RDATA_A, - idx); - } else { - edc_bist_cmd = EDC_REG_T5(EDC_H_BIST_CMD_A, idx); - edc_bist_cmd_addr = EDC_REG_T5(EDC_H_BIST_CMD_ADDR_A, idx); - edc_bist_cmd_len = EDC_REG_T5(EDC_H_BIST_CMD_LEN_A, idx); - edc_bist_cmd_data_pattern = - EDC_REG_T5(EDC_H_BIST_DATA_PATTERN_A, idx); - edc_bist_status_rdata = - EDC_REG_T5(EDC_H_BIST_STATUS_RDATA_A, idx); - } - - if (t4_read_reg(adap, edc_bist_cmd) & START_BIST_F) - return -EBUSY; - t4_write_reg(adap, edc_bist_cmd_addr, addr & ~0x3fU); - t4_write_reg(adap, edc_bist_cmd_len, 64); - t4_write_reg(adap, edc_bist_cmd_data_pattern, 0xc); - t4_write_reg(adap, edc_bist_cmd, - BIST_OPCODE_V(1) | BIST_CMD_GAP_V(1) | START_BIST_F); - i = t4_wait_op_done(adap, edc_bist_cmd, START_BIST_F, 0, 10, 1); - if (i) - return i; - -#define EDC_DATA(i) (EDC_BIST_STATUS_REG(edc_bist_status_rdata, i)) - - for (i = 15; i >= 0; i--) - *data++ = htonl(t4_read_reg(adap, EDC_DATA(i))); - if (ecc) - *ecc = t4_read_reg64(adap, EDC_DATA(16)); -#undef EDC_DATA - return 0; -} - -/** * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window * @adap: the adapter * @win: PCI-E Memory Window to use @@ -625,6 +515,102 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, return 0; } +/* Return the specified PCI-E Configuration Space register from our Physical + * Function. We try first via a Firmware LDST Command since we prefer to let + * the firmware own all of these registers, but if that fails we go for it + * directly ourselves. + */ +u32 t4_read_pcie_cfg4(struct adapter *adap, int reg) +{ + u32 val, ldst_addrspace; + + /* If fw_attach != 0, construct and send the Firmware LDST Command to + * retrieve the specified PCI-E Configuration Space register. + */ + struct fw_ldst_cmd ldst_cmd; + int ret; + + memset(&ldst_cmd, 0, sizeof(ldst_cmd)); + ldst_addrspace = FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_FUNC_PCIE); + ldst_cmd.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_READ_F | + ldst_addrspace); + ldst_cmd.cycles_to_len16 = cpu_to_be32(FW_LEN16(ldst_cmd)); + ldst_cmd.u.pcie.select_naccess = FW_LDST_CMD_NACCESS_V(1); + ldst_cmd.u.pcie.ctrl_to_fn = + (FW_LDST_CMD_LC_F | FW_LDST_CMD_FN_V(adap->fn)); + ldst_cmd.u.pcie.r = reg; + + /* If the LDST Command succeeds, return the result, otherwise + * fall through to reading it directly ourselves ... + */ + ret = t4_wr_mbox(adap, adap->mbox, &ldst_cmd, sizeof(ldst_cmd), + &ldst_cmd); + if (ret == 0) + val = be32_to_cpu(ldst_cmd.u.pcie.data[0]); + else + /* Read the desired Configuration Space register via the PCI-E + * Backdoor mechanism. + */ + t4_hw_pci_read_cfg4(adap, reg, &val); + return val; +} + +/* Get the window based on base passed to it. + * Window aperture is currently unhandled, but there is no use case for it + * right now + */ +static u32 t4_get_window(struct adapter *adap, u32 pci_base, u64 pci_mask, + u32 memwin_base) +{ + u32 ret; + + if (is_t4(adap->params.chip)) { + u32 bar0; + + /* Truncation intentional: we only read the bottom 32-bits of + * the 64-bit BAR0/BAR1 ... We use the hardware backdoor + * mechanism to read BAR0 instead of using + * pci_resource_start() because we could be operating from + * within a Virtual Machine which is trapping our accesses to + * our Configuration Space and we need to set up the PCI-E + * Memory Window decoders with the actual addresses which will + * be coming across the PCI-E link. + */ + bar0 = t4_read_pcie_cfg4(adap, pci_base); + bar0 &= pci_mask; + adap->t4_bar0 = bar0; + + ret = bar0 + memwin_base; + } else { + /* For T5, only relative offset inside the PCIe BAR is passed */ + ret = memwin_base; + } + return ret; +} + +/* Get the default utility window (win0) used by everyone */ +u32 t4_get_util_window(struct adapter *adap) +{ + return t4_get_window(adap, PCI_BASE_ADDRESS_0, + PCI_BASE_ADDRESS_MEM_MASK, MEMWIN0_BASE); +} + +/* Set up memory window for accessing adapter memory ranges. (Read + * back MA register to ensure that changes propagate before we attempt + * to use the new values.) + */ +void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window) +{ + t4_write_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, window), + memwin_base | BIR_V(0) | + WINDOW_V(ilog2(MEMWIN0_APERTURE) - WINDOW_SHIFT_X)); + t4_read_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, window)); +} + /** * t4_get_regs_len - return the size of the chips register set * @adapter: the adapter @@ -666,7 +652,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x11fc, 0x123c, 0x1300, 0x173c, 0x1800, 0x18fc, - 0x3000, 0x30d8, + 0x3000, 0x305c, + 0x3068, 0x30d8, 0x30e0, 0x5924, 0x5960, 0x59d4, 0x5a00, 0x5af8, @@ -729,7 +716,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x19238, 0x1924c, 0x193f8, 0x19474, 0x19490, 0x194f8, - 0x19800, 0x19f30, + 0x19800, 0x19f4c, 0x1a000, 0x1a06c, 0x1a0b0, 0x1a120, 0x1a128, 0x1a138, @@ -878,7 +865,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x27780, 0x2778c, 0x27800, 0x27c38, 0x27c80, 0x27d7c, - 0x27e00, 0x27e04 + 0x27e00, 0x27e04, }; static const unsigned int t5_reg_ranges[] = { @@ -888,7 +875,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x1280, 0x173c, 0x1800, 0x18fc, 0x3000, 0x3028, - 0x3060, 0x30d8, + 0x3068, 0x30d8, 0x30e0, 0x30fc, 0x3140, 0x357c, 0x35a8, 0x35cc, @@ -900,7 +887,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x5940, 0x59dc, 0x59fc, 0x5a18, 0x5a60, 0x5a9c, - 0x5b9c, 0x5bfc, + 0x5b94, 0x5bfc, 0x6000, 0x6040, 0x6058, 0x614c, 0x7700, 0x7798, @@ -1014,27 +1001,30 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x30800, 0x30834, 0x308c0, 0x30908, 0x30910, 0x309ac, - 0x30a00, 0x30a04, - 0x30a0c, 0x30a2c, + 0x30a00, 0x30a2c, 0x30a44, 0x30a50, 0x30a74, 0x30c24, + 0x30d00, 0x30d00, 0x30d08, 0x30d14, 0x30d1c, 0x30d20, 0x30d3c, 0x30d50, 0x31200, 0x3120c, 0x31220, 0x31220, 0x31240, 0x31240, - 0x31600, 0x31600, - 0x31608, 0x3160c, + 0x31600, 0x3160c, 0x31a00, 0x31a1c, - 0x31e04, 0x31e20, + 0x31e00, 0x31e20, 0x31e38, 0x31e3c, 0x31e80, 0x31e80, 0x31e88, 0x31ea8, 0x31eb0, 0x31eb4, 0x31ec8, 0x31ed4, 0x31fb8, 0x32004, - 0x32208, 0x3223c, + 0x32200, 0x32200, + 0x32208, 0x32240, + 0x32248, 0x32280, + 0x32288, 0x322c0, + 0x322c8, 0x322fc, 0x32600, 0x32630, 0x32a00, 0x32abc, 0x32b00, 0x32b70, @@ -1074,27 +1064,30 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x34800, 0x34834, 0x348c0, 0x34908, 0x34910, 0x349ac, - 0x34a00, 0x34a04, - 0x34a0c, 0x34a2c, + 0x34a00, 0x34a2c, 0x34a44, 0x34a50, 0x34a74, 0x34c24, + 0x34d00, 0x34d00, 0x34d08, 0x34d14, 0x34d1c, 0x34d20, 0x34d3c, 0x34d50, 0x35200, 0x3520c, 0x35220, 0x35220, 0x35240, 0x35240, - 0x35600, 0x35600, - 0x35608, 0x3560c, + 0x35600, 0x3560c, 0x35a00, 0x35a1c, - 0x35e04, 0x35e20, + 0x35e00, 0x35e20, 0x35e38, 0x35e3c, 0x35e80, 0x35e80, 0x35e88, 0x35ea8, 0x35eb0, 0x35eb4, 0x35ec8, 0x35ed4, 0x35fb8, 0x36004, - 0x36208, 0x3623c, + 0x36200, 0x36200, + 0x36208, 0x36240, + 0x36248, 0x36280, + 0x36288, 0x362c0, + 0x362c8, 0x362fc, 0x36600, 0x36630, 0x36a00, 0x36abc, 0x36b00, 0x36b70, @@ -1134,27 +1127,30 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x38800, 0x38834, 0x388c0, 0x38908, 0x38910, 0x389ac, - 0x38a00, 0x38a04, - 0x38a0c, 0x38a2c, + 0x38a00, 0x38a2c, 0x38a44, 0x38a50, 0x38a74, 0x38c24, + 0x38d00, 0x38d00, 0x38d08, 0x38d14, 0x38d1c, 0x38d20, 0x38d3c, 0x38d50, 0x39200, 0x3920c, 0x39220, 0x39220, 0x39240, 0x39240, - 0x39600, 0x39600, - 0x39608, 0x3960c, + 0x39600, 0x3960c, 0x39a00, 0x39a1c, - 0x39e04, 0x39e20, + 0x39e00, 0x39e20, 0x39e38, 0x39e3c, 0x39e80, 0x39e80, 0x39e88, 0x39ea8, 0x39eb0, 0x39eb4, 0x39ec8, 0x39ed4, 0x39fb8, 0x3a004, - 0x3a208, 0x3a23c, + 0x3a200, 0x3a200, + 0x3a208, 0x3a240, + 0x3a248, 0x3a280, + 0x3a288, 0x3a2c0, + 0x3a2c8, 0x3a2fc, 0x3a600, 0x3a630, 0x3aa00, 0x3aabc, 0x3ab00, 0x3ab70, @@ -1194,27 +1190,30 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x3c800, 0x3c834, 0x3c8c0, 0x3c908, 0x3c910, 0x3c9ac, - 0x3ca00, 0x3ca04, - 0x3ca0c, 0x3ca2c, + 0x3ca00, 0x3ca2c, 0x3ca44, 0x3ca50, 0x3ca74, 0x3cc24, + 0x3cd00, 0x3cd00, 0x3cd08, 0x3cd14, 0x3cd1c, 0x3cd20, 0x3cd3c, 0x3cd50, 0x3d200, 0x3d20c, 0x3d220, 0x3d220, 0x3d240, 0x3d240, - 0x3d600, 0x3d600, - 0x3d608, 0x3d60c, + 0x3d600, 0x3d60c, 0x3da00, 0x3da1c, - 0x3de04, 0x3de20, + 0x3de00, 0x3de20, 0x3de38, 0x3de3c, 0x3de80, 0x3de80, 0x3de88, 0x3dea8, 0x3deb0, 0x3deb4, 0x3dec8, 0x3ded4, 0x3dfb8, 0x3e004, - 0x3e208, 0x3e23c, + 0x3e200, 0x3e200, + 0x3e208, 0x3e240, + 0x3e248, 0x3e280, + 0x3e288, 0x3e2c0, + 0x3e2c8, 0x3e2fc, 0x3e600, 0x3e630, 0x3ea00, 0x3eabc, 0x3eb00, 0x3eb70, @@ -1247,7 +1246,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x3fcf0, 0x3fcfc, 0x40000, 0x4000c, 0x40040, 0x40068, - 0x40080, 0x40144, + 0x4007c, 0x40144, 0x40180, 0x4018c, 0x40200, 0x40298, 0x402ac, 0x4033c, @@ -1275,7 +1274,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x47800, 0x47814, 0x48000, 0x4800c, 0x48040, 0x48068, - 0x48080, 0x48144, + 0x4807c, 0x48144, 0x48180, 0x4818c, 0x48200, 0x48298, 0x482ac, 0x4833c, @@ -1618,7 +1617,7 @@ int t4_read_flash(struct adapter *adapter, unsigned int addr, if (ret) return ret; if (byte_oriented) - *data = (__force __u32) (htonl(*data)); + *data = (__force __u32)(cpu_to_be32(*data)); } return 0; } @@ -1979,7 +1978,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) "FW image size not multiple of 512 bytes\n"); return -EINVAL; } - if (ntohs(hdr->len512) * 512 != size) { + if ((unsigned int)be16_to_cpu(hdr->len512) * 512 != size) { dev_err(adap->pdev_dev, "FW image size differs from size in FW header\n"); return -EINVAL; @@ -1993,7 +1992,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) return -EINVAL; for (csum = 0, i = 0; i < size / sizeof(csum); i++) - csum += ntohl(p[i]); + csum += be32_to_cpu(p[i]); if (csum != 0xffffffff) { dev_err(adap->pdev_dev, @@ -2012,7 +2011,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) * first page with a bad version. */ memcpy(first_page, fw_data, SF_PAGE_SIZE); - ((struct fw_hdr *)first_page)->fw_ver = htonl(0xffffffff); + ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff); ret = t4_write_flash(adap, fw_img_start, SF_PAGE_SIZE, first_page); if (ret) goto out; @@ -2107,19 +2106,22 @@ int t4_link_start(struct adapter *adap, unsigned int mbox, unsigned int port, fc |= FW_PORT_CAP_FC_TX; memset(&c, 0, sizeof(c)); - c.op_to_portid = htonl(FW_CMD_OP_V(FW_PORT_CMD) | FW_CMD_REQUEST_F | - FW_CMD_EXEC_F | FW_PORT_CMD_PORTID_V(port)); - c.action_to_len16 = htonl(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) | - FW_LEN16(c)); + c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) | + FW_CMD_REQUEST_F | FW_CMD_EXEC_F | + FW_PORT_CMD_PORTID_V(port)); + c.action_to_len16 = + cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) | + FW_LEN16(c)); if (!(lc->supported & FW_PORT_CAP_ANEG)) { - c.u.l1cfg.rcap = htonl((lc->supported & ADVERT_MASK) | fc); + c.u.l1cfg.rcap = cpu_to_be32((lc->supported & ADVERT_MASK) | + fc); lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX); } else if (lc->autoneg == AUTONEG_DISABLE) { - c.u.l1cfg.rcap = htonl(lc->requested_speed | fc | mdi); + c.u.l1cfg.rcap = cpu_to_be32(lc->requested_speed | fc | mdi); lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX); } else - c.u.l1cfg.rcap = htonl(lc->advertising | fc | mdi); + c.u.l1cfg.rcap = cpu_to_be32(lc->advertising | fc | mdi); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -2137,11 +2139,13 @@ int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port) struct fw_port_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_portid = htonl(FW_CMD_OP_V(FW_PORT_CMD) | FW_CMD_REQUEST_F | - FW_CMD_EXEC_F | FW_PORT_CMD_PORTID_V(port)); - c.action_to_len16 = htonl(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) | - FW_LEN16(c)); - c.u.l1cfg.rcap = htonl(FW_PORT_CAP_ANEG); + c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) | + FW_CMD_REQUEST_F | FW_CMD_EXEC_F | + FW_PORT_CMD_PORTID_V(port)); + c.action_to_len16 = + cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) | + FW_LEN16(c)); + c.u.l1cfg.rcap = cpu_to_be32(FW_PORT_CAP_ANEG); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -2945,18 +2949,18 @@ int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, struct fw_rss_ind_tbl_cmd cmd; memset(&cmd, 0, sizeof(cmd)); - cmd.op_to_viid = htonl(FW_CMD_OP_V(FW_RSS_IND_TBL_CMD) | + cmd.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_RSS_IND_TBL_CMD) | FW_CMD_REQUEST_F | FW_CMD_WRITE_F | FW_RSS_IND_TBL_CMD_VIID_V(viid)); - cmd.retval_len16 = htonl(FW_LEN16(cmd)); + cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd)); /* each fw_rss_ind_tbl_cmd takes up to 32 entries */ while (n > 0) { int nq = min(n, 32); __be32 *qp = &cmd.iq0_to_iq2; - cmd.niqid = htons(nq); - cmd.startidx = htons(start); + cmd.niqid = cpu_to_be16(nq); + cmd.startidx = cpu_to_be16(start); start += nq; n -= nq; @@ -2974,7 +2978,7 @@ int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, if (++rsp >= rsp_end) rsp = rspq; - *qp++ = htonl(v); + *qp++ = cpu_to_be32(v); nq -= 3; } @@ -3000,20 +3004,46 @@ int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode, struct fw_rss_glb_config_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_write = htonl(FW_CMD_OP_V(FW_RSS_GLB_CONFIG_CMD) | - FW_CMD_REQUEST_F | FW_CMD_WRITE_F); - c.retval_len16 = htonl(FW_LEN16(c)); + c.op_to_write = cpu_to_be32(FW_CMD_OP_V(FW_RSS_GLB_CONFIG_CMD) | + FW_CMD_REQUEST_F | FW_CMD_WRITE_F); + c.retval_len16 = cpu_to_be32(FW_LEN16(c)); if (mode == FW_RSS_GLB_CONFIG_CMD_MODE_MANUAL) { - c.u.manual.mode_pkd = htonl(FW_RSS_GLB_CONFIG_CMD_MODE_V(mode)); + c.u.manual.mode_pkd = + cpu_to_be32(FW_RSS_GLB_CONFIG_CMD_MODE_V(mode)); } else if (mode == FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) { c.u.basicvirtual.mode_pkd = - htonl(FW_RSS_GLB_CONFIG_CMD_MODE_V(mode)); - c.u.basicvirtual.synmapen_to_hashtoeplitz = htonl(flags); + cpu_to_be32(FW_RSS_GLB_CONFIG_CMD_MODE_V(mode)); + c.u.basicvirtual.synmapen_to_hashtoeplitz = cpu_to_be32(flags); } else return -EINVAL; return t4_wr_mbox(adapter, mbox, &c, sizeof(c), NULL); } +/** + * t4_config_vi_rss - configure per VI RSS settings + * @adapter: the adapter + * @mbox: mbox to use for the FW command + * @viid: the VI id + * @flags: RSS flags + * @defq: id of the default RSS queue for the VI. + * + * Configures VI-specific RSS properties. + */ +int t4_config_vi_rss(struct adapter *adapter, int mbox, unsigned int viid, + unsigned int flags, unsigned int defq) +{ + struct fw_rss_vi_config_cmd c; + + memset(&c, 0, sizeof(c)); + c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_RSS_VI_CONFIG_CMD) | + FW_CMD_REQUEST_F | FW_CMD_WRITE_F | + FW_RSS_VI_CONFIG_CMD_VIID_V(viid)); + c.retval_len16 = cpu_to_be32(FW_LEN16(c)); + c.u.basicvirtual.defaultq_to_udpen = cpu_to_be32(flags | + FW_RSS_VI_CONFIG_CMD_DEFAULTQ_V(defq)); + return t4_wr_mbox(adapter, mbox, &c, sizeof(c), NULL); +} + /* Read an RSS table row */ static int rd_rss_row(struct adapter *adap, int row, u32 *val) { @@ -3401,7 +3431,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]) } /** - * get_mps_bg_map - return the buffer groups associated with a port + * t4_get_mps_bg_map - return the buffer groups associated with a port * @adap: the adapter * @idx: the port index * @@ -3409,7 +3439,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]) * with the given port. Bit i is set if buffer group i is used by the * port. */ -static unsigned int get_mps_bg_map(struct adapter *adap, int idx) +unsigned int t4_get_mps_bg_map(struct adapter *adap, int idx) { u32 n = NUMPORTS_G(t4_read_reg(adap, MPS_CMN_CTL_A)); @@ -3460,7 +3490,7 @@ const char *t4_get_port_type_description(enum fw_port_type port_type) */ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) { - u32 bgmap = get_mps_bg_map(adap, idx); + u32 bgmap = t4_get_mps_bg_map(adap, idx); #define GET_STAT(name) \ t4_read_reg64(adap, \ @@ -3644,33 +3674,38 @@ int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid) { memset(wr, 0, sizeof(*wr)); - wr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); - wr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*wr) / 16)); - wr->tid_to_iq = htonl(FW_FILTER_WR_TID_V(ftid) | - FW_FILTER_WR_NOREPLY_V(qid < 0)); - wr->del_filter_to_l2tix = htonl(FW_FILTER_WR_DEL_FILTER_F); + wr->op_pkd = cpu_to_be32(FW_WR_OP_V(FW_FILTER_WR)); + wr->len16_pkd = cpu_to_be32(FW_WR_LEN16_V(sizeof(*wr) / 16)); + wr->tid_to_iq = cpu_to_be32(FW_FILTER_WR_TID_V(ftid) | + FW_FILTER_WR_NOREPLY_V(qid < 0)); + wr->del_filter_to_l2tix = cpu_to_be32(FW_FILTER_WR_DEL_FILTER_F); if (qid >= 0) - wr->rx_chan_rx_rpl_iq = htons(FW_FILTER_WR_RX_RPL_IQ_V(qid)); + wr->rx_chan_rx_rpl_iq = + cpu_to_be16(FW_FILTER_WR_RX_RPL_IQ_V(qid)); } #define INIT_CMD(var, cmd, rd_wr) do { \ - (var).op_to_write = htonl(FW_CMD_OP_V(FW_##cmd##_CMD) | \ - FW_CMD_REQUEST_F | FW_CMD_##rd_wr##_F); \ - (var).retval_len16 = htonl(FW_LEN16(var)); \ + (var).op_to_write = cpu_to_be32(FW_CMD_OP_V(FW_##cmd##_CMD) | \ + FW_CMD_REQUEST_F | \ + FW_CMD_##rd_wr##_F); \ + (var).retval_len16 = cpu_to_be32(FW_LEN16(var)); \ } while (0) int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val) { + u32 ldst_addrspace; struct fw_ldst_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_addrspace = htonl(FW_CMD_OP_V(FW_LDST_CMD) | FW_CMD_REQUEST_F | - FW_CMD_WRITE_F | - FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_FIRMWARE)); - c.cycles_to_len16 = htonl(FW_LEN16(c)); - c.u.addrval.addr = htonl(addr); - c.u.addrval.val = htonl(val); + ldst_addrspace = FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_FIRMWARE); + c.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | + ldst_addrspace); + c.cycles_to_len16 = cpu_to_be32(FW_LEN16(c)); + c.u.addrval.addr = cpu_to_be32(addr); + c.u.addrval.val = cpu_to_be32(val); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -3690,19 +3725,22 @@ int t4_mdio_rd(struct adapter *adap, unsigned int mbox, unsigned int phy_addr, unsigned int mmd, unsigned int reg, u16 *valp) { int ret; + u32 ldst_addrspace; struct fw_ldst_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_addrspace = htonl(FW_CMD_OP_V(FW_LDST_CMD) | FW_CMD_REQUEST_F | - FW_CMD_READ_F | FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_MDIO)); - c.cycles_to_len16 = htonl(FW_LEN16(c)); - c.u.mdio.paddr_mmd = htons(FW_LDST_CMD_PADDR_V(phy_addr) | - FW_LDST_CMD_MMD_V(mmd)); - c.u.mdio.raddr = htons(reg); + ldst_addrspace = FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_MDIO); + c.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F | + ldst_addrspace); + c.cycles_to_len16 = cpu_to_be32(FW_LEN16(c)); + c.u.mdio.paddr_mmd = cpu_to_be16(FW_LDST_CMD_PADDR_V(phy_addr) | + FW_LDST_CMD_MMD_V(mmd)); + c.u.mdio.raddr = cpu_to_be16(reg); ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); if (ret == 0) - *valp = ntohs(c.u.mdio.rval); + *valp = be16_to_cpu(c.u.mdio.rval); return ret; } @@ -3720,16 +3758,19 @@ int t4_mdio_rd(struct adapter *adap, unsigned int mbox, unsigned int phy_addr, int t4_mdio_wr(struct adapter *adap, unsigned int mbox, unsigned int phy_addr, unsigned int mmd, unsigned int reg, u16 val) { + u32 ldst_addrspace; struct fw_ldst_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_addrspace = htonl(FW_CMD_OP_V(FW_LDST_CMD) | FW_CMD_REQUEST_F | - FW_CMD_WRITE_F | FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_MDIO)); - c.cycles_to_len16 = htonl(FW_LEN16(c)); - c.u.mdio.paddr_mmd = htons(FW_LDST_CMD_PADDR_V(phy_addr) | - FW_LDST_CMD_MMD_V(mmd)); - c.u.mdio.raddr = htons(reg); - c.u.mdio.rval = htons(val); + ldst_addrspace = FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_MDIO); + c.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) | + FW_CMD_REQUEST_F | FW_CMD_WRITE_F | + ldst_addrspace); + c.cycles_to_len16 = cpu_to_be32(FW_LEN16(c)); + c.u.mdio.paddr_mmd = cpu_to_be16(FW_LDST_CMD_PADDR_V(phy_addr) | + FW_LDST_CMD_MMD_V(mmd)); + c.u.mdio.raddr = cpu_to_be16(reg); + c.u.mdio.rval = cpu_to_be16(val); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -3863,11 +3904,11 @@ int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox, retry: memset(&c, 0, sizeof(c)); INIT_CMD(c, HELLO, WRITE); - c.err_to_clearinit = htonl( + c.err_to_clearinit = cpu_to_be32( FW_HELLO_CMD_MASTERDIS_V(master == MASTER_CANT) | FW_HELLO_CMD_MASTERFORCE_V(master == MASTER_MUST) | - FW_HELLO_CMD_MBMASTER_V(master == MASTER_MUST ? mbox : - FW_HELLO_CMD_MBMASTER_M) | + FW_HELLO_CMD_MBMASTER_V(master == MASTER_MUST ? + mbox : FW_HELLO_CMD_MBMASTER_M) | FW_HELLO_CMD_MBASYNCNOT_V(evt_mbox) | FW_HELLO_CMD_STAGE_V(fw_hello_cmd_stage_os) | FW_HELLO_CMD_CLEARINIT_F); @@ -3888,7 +3929,7 @@ retry: return ret; } - v = ntohl(c.err_to_clearinit); + v = be32_to_cpu(c.err_to_clearinit); master_mbox = FW_HELLO_CMD_MBMASTER_G(v); if (state) { if (v & FW_HELLO_CMD_ERR_F) @@ -4017,7 +4058,7 @@ int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset) memset(&c, 0, sizeof(c)); INIT_CMD(c, RESET, WRITE); - c.val = htonl(reset); + c.val = cpu_to_be32(reset); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -4050,8 +4091,8 @@ static int t4_fw_halt(struct adapter *adap, unsigned int mbox, int force) memset(&c, 0, sizeof(c)); INIT_CMD(c, RESET, WRITE); - c.val = htonl(PIORST_F | PIORSTMODE_F); - c.halt_pkd = htonl(FW_RESET_CMD_HALT_F); + c.val = cpu_to_be32(PIORST_F | PIORSTMODE_F); + c.halt_pkd = cpu_to_be32(FW_RESET_CMD_HALT_F); ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -4190,7 +4231,7 @@ int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, * the newly loaded firmware will handle this right by checking * its header flags to see if it advertises the capability. */ - reset = ((ntohl(fw_hdr->flags) & FW_HDR_FLAGS_RESET_HALT) == 0); + reset = ((be32_to_cpu(fw_hdr->flags) & FW_HDR_FLAGS_RESET_HALT) == 0); return t4_fw_restart(adap, mbox, reset); } @@ -4345,17 +4386,19 @@ int t4_query_params(struct adapter *adap, unsigned int mbox, unsigned int pf, return -EINVAL; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP_V(FW_PARAMS_CMD) | FW_CMD_REQUEST_F | - FW_CMD_READ_F | FW_PARAMS_CMD_PFN_V(pf) | - FW_PARAMS_CMD_VFN_V(vf)); - c.retval_len16 = htonl(FW_LEN16(c)); + c.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_PARAMS_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F | + FW_PARAMS_CMD_PFN_V(pf) | + FW_PARAMS_CMD_VFN_V(vf)); + c.retval_len16 = cpu_to_be32(FW_LEN16(c)); + for (i = 0; i < nparams; i++, p += 2) - *p = htonl(*params++); + *p = cpu_to_be32(*params++); ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); if (ret == 0) for (i = 0, p = &c.param[0].val; i < nparams; i++, p += 2) - *val++ = ntohl(*p); + *val++ = be32_to_cpu(*p); return ret; } @@ -4423,13 +4466,14 @@ int t4_set_params(struct adapter *adap, unsigned int mbox, unsigned int pf, return -EINVAL; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP_V(FW_PARAMS_CMD) | FW_CMD_REQUEST_F | - FW_CMD_WRITE_F | FW_PARAMS_CMD_PFN_V(pf) | - FW_PARAMS_CMD_VFN_V(vf)); - c.retval_len16 = htonl(FW_LEN16(c)); + c.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_PARAMS_CMD) | + FW_CMD_REQUEST_F | FW_CMD_WRITE_F | + FW_PARAMS_CMD_PFN_V(pf) | + FW_PARAMS_CMD_VFN_V(vf)); + c.retval_len16 = cpu_to_be32(FW_LEN16(c)); while (nparams--) { - *p++ = htonl(*params++); - *p++ = htonl(*val++); + *p++ = cpu_to_be32(*params++); + *p++ = cpu_to_be32(*val++); } return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); @@ -4465,20 +4509,21 @@ int t4_cfg_pfvf(struct adapter *adap, unsigned int mbox, unsigned int pf, struct fw_pfvf_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP_V(FW_PFVF_CMD) | FW_CMD_REQUEST_F | - FW_CMD_WRITE_F | FW_PFVF_CMD_PFN_V(pf) | - FW_PFVF_CMD_VFN_V(vf)); - c.retval_len16 = htonl(FW_LEN16(c)); - c.niqflint_niq = htonl(FW_PFVF_CMD_NIQFLINT_V(rxqi) | - FW_PFVF_CMD_NIQ_V(rxq)); - c.type_to_neq = htonl(FW_PFVF_CMD_CMASK_V(cmask) | - FW_PFVF_CMD_PMASK_V(pmask) | - FW_PFVF_CMD_NEQ_V(txq)); - c.tc_to_nexactf = htonl(FW_PFVF_CMD_TC_V(tc) | FW_PFVF_CMD_NVI_V(vi) | - FW_PFVF_CMD_NEXACTF_V(nexact)); - c.r_caps_to_nethctrl = htonl(FW_PFVF_CMD_R_CAPS_V(rcaps) | - FW_PFVF_CMD_WX_CAPS_V(wxcaps) | - FW_PFVF_CMD_NETHCTRL_V(txq_eth_ctrl)); + c.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_PFVF_CMD) | FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | FW_PFVF_CMD_PFN_V(pf) | + FW_PFVF_CMD_VFN_V(vf)); + c.retval_len16 = cpu_to_be32(FW_LEN16(c)); + c.niqflint_niq = cpu_to_be32(FW_PFVF_CMD_NIQFLINT_V(rxqi) | + FW_PFVF_CMD_NIQ_V(rxq)); + c.type_to_neq = cpu_to_be32(FW_PFVF_CMD_CMASK_V(cmask) | + FW_PFVF_CMD_PMASK_V(pmask) | + FW_PFVF_CMD_NEQ_V(txq)); + c.tc_to_nexactf = cpu_to_be32(FW_PFVF_CMD_TC_V(tc) | + FW_PFVF_CMD_NVI_V(vi) | + FW_PFVF_CMD_NEXACTF_V(nexact)); + c.r_caps_to_nethctrl = cpu_to_be32(FW_PFVF_CMD_R_CAPS_V(rcaps) | + FW_PFVF_CMD_WX_CAPS_V(wxcaps) | + FW_PFVF_CMD_NETHCTRL_V(txq_eth_ctrl)); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -4507,10 +4552,10 @@ int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port, struct fw_vi_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP_V(FW_VI_CMD) | FW_CMD_REQUEST_F | - FW_CMD_WRITE_F | FW_CMD_EXEC_F | - FW_VI_CMD_PFN_V(pf) | FW_VI_CMD_VFN_V(vf)); - c.alloc_to_len16 = htonl(FW_VI_CMD_ALLOC_F | FW_LEN16(c)); + c.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_VI_CMD) | FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | FW_CMD_EXEC_F | + FW_VI_CMD_PFN_V(pf) | FW_VI_CMD_VFN_V(vf)); + c.alloc_to_len16 = cpu_to_be32(FW_VI_CMD_ALLOC_F | FW_LEN16(c)); c.portid_pkd = FW_VI_CMD_PORTID_V(port); c.nmac = nmac - 1; @@ -4532,8 +4577,8 @@ int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port, } } if (rss_size) - *rss_size = FW_VI_CMD_RSSSIZE_G(ntohs(c.rsssize_pkd)); - return FW_VI_CMD_VIID_G(ntohs(c.type_viid)); + *rss_size = FW_VI_CMD_RSSSIZE_G(be16_to_cpu(c.rsssize_pkd)); + return FW_VI_CMD_VIID_G(be16_to_cpu(c.type_viid)); } /** @@ -4569,14 +4614,16 @@ int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid, vlanex = FW_VI_RXMODE_CMD_VLANEXEN_M; memset(&c, 0, sizeof(c)); - c.op_to_viid = htonl(FW_CMD_OP_V(FW_VI_RXMODE_CMD) | FW_CMD_REQUEST_F | - FW_CMD_WRITE_F | FW_VI_RXMODE_CMD_VIID_V(viid)); - c.retval_len16 = htonl(FW_LEN16(c)); - c.mtu_to_vlanexen = htonl(FW_VI_RXMODE_CMD_MTU_V(mtu) | - FW_VI_RXMODE_CMD_PROMISCEN_V(promisc) | - FW_VI_RXMODE_CMD_ALLMULTIEN_V(all_multi) | - FW_VI_RXMODE_CMD_BROADCASTEN_V(bcast) | - FW_VI_RXMODE_CMD_VLANEXEN_V(vlanex)); + c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_RXMODE_CMD) | + FW_CMD_REQUEST_F | FW_CMD_WRITE_F | + FW_VI_RXMODE_CMD_VIID_V(viid)); + c.retval_len16 = cpu_to_be32(FW_LEN16(c)); + c.mtu_to_vlanexen = + cpu_to_be32(FW_VI_RXMODE_CMD_MTU_V(mtu) | + FW_VI_RXMODE_CMD_PROMISCEN_V(promisc) | + FW_VI_RXMODE_CMD_ALLMULTIEN_V(all_multi) | + FW_VI_RXMODE_CMD_BROADCASTEN_V(bcast) | + FW_VI_RXMODE_CMD_VLANEXEN_V(vlanex)); return t4_wr_mbox_meat(adap, mbox, &c, sizeof(c), NULL, sleep_ok); } @@ -4617,15 +4664,17 @@ int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, return -EINVAL; memset(&c, 0, sizeof(c)); - c.op_to_viid = htonl(FW_CMD_OP_V(FW_VI_MAC_CMD) | FW_CMD_REQUEST_F | - FW_CMD_WRITE_F | (free ? FW_CMD_EXEC_F : 0) | - FW_VI_MAC_CMD_VIID_V(viid)); - c.freemacs_to_len16 = htonl(FW_VI_MAC_CMD_FREEMACS_V(free) | - FW_CMD_LEN16_V((naddr + 2) / 2)); + c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) | + FW_CMD_REQUEST_F | FW_CMD_WRITE_F | + (free ? FW_CMD_EXEC_F : 0) | + FW_VI_MAC_CMD_VIID_V(viid)); + c.freemacs_to_len16 = cpu_to_be32(FW_VI_MAC_CMD_FREEMACS_V(free) | + FW_CMD_LEN16_V((naddr + 2) / 2)); for (i = 0, p = c.u.exact; i < naddr; i++, p++) { - p->valid_to_idx = htons(FW_VI_MAC_CMD_VALID_F | - FW_VI_MAC_CMD_IDX_V(FW_VI_MAC_ADD_MAC)); + p->valid_to_idx = + cpu_to_be16(FW_VI_MAC_CMD_VALID_F | + FW_VI_MAC_CMD_IDX_V(FW_VI_MAC_ADD_MAC)); memcpy(p->macaddr, addr[i], sizeof(p->macaddr)); } @@ -4634,7 +4683,7 @@ int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, return ret; for (i = 0, p = c.u.exact; i < naddr; i++, p++) { - u16 index = FW_VI_MAC_CMD_IDX_G(ntohs(p->valid_to_idx)); + u16 index = FW_VI_MAC_CMD_IDX_G(be16_to_cpu(p->valid_to_idx)); if (idx) idx[i] = index >= max_naddr ? 0xffff : index; @@ -4680,17 +4729,18 @@ int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid, mode = add_smt ? FW_VI_MAC_SMT_AND_MPSTCAM : FW_VI_MAC_MPS_TCAM_ENTRY; memset(&c, 0, sizeof(c)); - c.op_to_viid = htonl(FW_CMD_OP_V(FW_VI_MAC_CMD) | FW_CMD_REQUEST_F | - FW_CMD_WRITE_F | FW_VI_MAC_CMD_VIID_V(viid)); - c.freemacs_to_len16 = htonl(FW_CMD_LEN16_V(1)); - p->valid_to_idx = htons(FW_VI_MAC_CMD_VALID_F | - FW_VI_MAC_CMD_SMAC_RESULT_V(mode) | - FW_VI_MAC_CMD_IDX_V(idx)); + c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) | + FW_CMD_REQUEST_F | FW_CMD_WRITE_F | + FW_VI_MAC_CMD_VIID_V(viid)); + c.freemacs_to_len16 = cpu_to_be32(FW_CMD_LEN16_V(1)); + p->valid_to_idx = cpu_to_be16(FW_VI_MAC_CMD_VALID_F | + FW_VI_MAC_CMD_SMAC_RESULT_V(mode) | + FW_VI_MAC_CMD_IDX_V(idx)); memcpy(p->macaddr, addr, sizeof(p->macaddr)); ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); if (ret == 0) { - ret = FW_VI_MAC_CMD_IDX_G(ntohs(p->valid_to_idx)); + ret = FW_VI_MAC_CMD_IDX_G(be16_to_cpu(p->valid_to_idx)); if (ret >= max_mac_addr) ret = -ENOMEM; } @@ -4714,11 +4764,12 @@ int t4_set_addr_hash(struct adapter *adap, unsigned int mbox, unsigned int viid, struct fw_vi_mac_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_viid = htonl(FW_CMD_OP_V(FW_VI_MAC_CMD) | FW_CMD_REQUEST_F | - FW_CMD_WRITE_F | FW_VI_ENABLE_CMD_VIID_V(viid)); - c.freemacs_to_len16 = htonl(FW_VI_MAC_CMD_HASHVECEN_F | - FW_VI_MAC_CMD_HASHUNIEN_V(ucast) | - FW_CMD_LEN16_V(1)); + c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) | + FW_CMD_REQUEST_F | FW_CMD_WRITE_F | + FW_VI_ENABLE_CMD_VIID_V(viid)); + c.freemacs_to_len16 = cpu_to_be32(FW_VI_MAC_CMD_HASHVECEN_F | + FW_VI_MAC_CMD_HASHUNIEN_V(ucast) | + FW_CMD_LEN16_V(1)); c.u.hash.hashvec = cpu_to_be64(vec); return t4_wr_mbox_meat(adap, mbox, &c, sizeof(c), NULL, sleep_ok); } @@ -4741,12 +4792,13 @@ int t4_enable_vi_params(struct adapter *adap, unsigned int mbox, struct fw_vi_enable_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_viid = htonl(FW_CMD_OP_V(FW_VI_ENABLE_CMD) | FW_CMD_REQUEST_F | - FW_CMD_EXEC_F | FW_VI_ENABLE_CMD_VIID_V(viid)); - - c.ien_to_len16 = htonl(FW_VI_ENABLE_CMD_IEN_V(rx_en) | - FW_VI_ENABLE_CMD_EEN_V(tx_en) | FW_LEN16(c) | - FW_VI_ENABLE_CMD_DCB_INFO_V(dcb_en)); + c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_ENABLE_CMD) | + FW_CMD_REQUEST_F | FW_CMD_EXEC_F | + FW_VI_ENABLE_CMD_VIID_V(viid)); + c.ien_to_len16 = cpu_to_be32(FW_VI_ENABLE_CMD_IEN_V(rx_en) | + FW_VI_ENABLE_CMD_EEN_V(tx_en) | + FW_VI_ENABLE_CMD_DCB_INFO_V(dcb_en) | + FW_LEN16(c)); return t4_wr_mbox_ns(adap, mbox, &c, sizeof(c), NULL); } @@ -4781,10 +4833,11 @@ int t4_identify_port(struct adapter *adap, unsigned int mbox, unsigned int viid, struct fw_vi_enable_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_viid = htonl(FW_CMD_OP_V(FW_VI_ENABLE_CMD) | FW_CMD_REQUEST_F | - FW_CMD_EXEC_F | FW_VI_ENABLE_CMD_VIID_V(viid)); - c.ien_to_len16 = htonl(FW_VI_ENABLE_CMD_LED_F | FW_LEN16(c)); - c.blinkdur = htons(nblinks); + c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_ENABLE_CMD) | + FW_CMD_REQUEST_F | FW_CMD_EXEC_F | + FW_VI_ENABLE_CMD_VIID_V(viid)); + c.ien_to_len16 = cpu_to_be32(FW_VI_ENABLE_CMD_LED_F | FW_LEN16(c)); + c.blinkdur = cpu_to_be16(nblinks); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -4808,14 +4861,14 @@ int t4_iq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, struct fw_iq_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP_V(FW_IQ_CMD) | FW_CMD_REQUEST_F | - FW_CMD_EXEC_F | FW_IQ_CMD_PFN_V(pf) | - FW_IQ_CMD_VFN_V(vf)); - c.alloc_to_len16 = htonl(FW_IQ_CMD_FREE_F | FW_LEN16(c)); - c.type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE_V(iqtype)); - c.iqid = htons(iqid); - c.fl0id = htons(fl0id); - c.fl1id = htons(fl1id); + c.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_IQ_CMD) | FW_CMD_REQUEST_F | + FW_CMD_EXEC_F | FW_IQ_CMD_PFN_V(pf) | + FW_IQ_CMD_VFN_V(vf)); + c.alloc_to_len16 = cpu_to_be32(FW_IQ_CMD_FREE_F | FW_LEN16(c)); + c.type_to_iqandstindex = cpu_to_be32(FW_IQ_CMD_TYPE_V(iqtype)); + c.iqid = cpu_to_be16(iqid); + c.fl0id = cpu_to_be16(fl0id); + c.fl1id = cpu_to_be16(fl1id); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -4835,11 +4888,12 @@ int t4_eth_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, struct fw_eq_eth_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_ETH_CMD) | FW_CMD_REQUEST_F | - FW_CMD_EXEC_F | FW_EQ_ETH_CMD_PFN_V(pf) | - FW_EQ_ETH_CMD_VFN_V(vf)); - c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_FREE_F | FW_LEN16(c)); - c.eqid_pkd = htonl(FW_EQ_ETH_CMD_EQID_V(eqid)); + c.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_EQ_ETH_CMD) | + FW_CMD_REQUEST_F | FW_CMD_EXEC_F | + FW_EQ_ETH_CMD_PFN_V(pf) | + FW_EQ_ETH_CMD_VFN_V(vf)); + c.alloc_to_len16 = cpu_to_be32(FW_EQ_ETH_CMD_FREE_F | FW_LEN16(c)); + c.eqid_pkd = cpu_to_be32(FW_EQ_ETH_CMD_EQID_V(eqid)); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -4859,11 +4913,12 @@ int t4_ctrl_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, struct fw_eq_ctrl_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_CTRL_CMD) | FW_CMD_REQUEST_F | - FW_CMD_EXEC_F | FW_EQ_CTRL_CMD_PFN_V(pf) | - FW_EQ_CTRL_CMD_VFN_V(vf)); - c.alloc_to_len16 = htonl(FW_EQ_CTRL_CMD_FREE_F | FW_LEN16(c)); - c.cmpliqid_eqid = htonl(FW_EQ_CTRL_CMD_EQID_V(eqid)); + c.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_EQ_CTRL_CMD) | + FW_CMD_REQUEST_F | FW_CMD_EXEC_F | + FW_EQ_CTRL_CMD_PFN_V(pf) | + FW_EQ_CTRL_CMD_VFN_V(vf)); + c.alloc_to_len16 = cpu_to_be32(FW_EQ_CTRL_CMD_FREE_F | FW_LEN16(c)); + c.cmpliqid_eqid = cpu_to_be32(FW_EQ_CTRL_CMD_EQID_V(eqid)); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -4883,11 +4938,12 @@ int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, struct fw_eq_ofld_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_OFLD_CMD) | FW_CMD_REQUEST_F | - FW_CMD_EXEC_F | FW_EQ_OFLD_CMD_PFN_V(pf) | - FW_EQ_OFLD_CMD_VFN_V(vf)); - c.alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_FREE_F | FW_LEN16(c)); - c.eqid_pkd = htonl(FW_EQ_OFLD_CMD_EQID_V(eqid)); + c.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_EQ_OFLD_CMD) | + FW_CMD_REQUEST_F | FW_CMD_EXEC_F | + FW_EQ_OFLD_CMD_PFN_V(pf) | + FW_EQ_OFLD_CMD_VFN_V(vf)); + c.alloc_to_len16 = cpu_to_be32(FW_EQ_OFLD_CMD_FREE_F | FW_LEN16(c)); + c.eqid_pkd = cpu_to_be32(FW_EQ_OFLD_CMD_EQID_V(eqid)); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -4905,11 +4961,11 @@ int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl) if (opcode == FW_PORT_CMD) { /* link/module state change message */ int speed = 0, fc = 0; const struct fw_port_cmd *p = (void *)rpl; - int chan = FW_PORT_CMD_PORTID_G(ntohl(p->op_to_portid)); + int chan = FW_PORT_CMD_PORTID_G(be32_to_cpu(p->op_to_portid)); int port = adap->chan_map[chan]; struct port_info *pi = adap2pinfo(adap, port); struct link_config *lc = &pi->link_cfg; - u32 stat = ntohl(p->u.info.lstatus_to_modtype); + u32 stat = be32_to_cpu(p->u.info.lstatus_to_modtype); int link_ok = (stat & FW_PORT_CMD_LSTATUS_F) != 0; u32 mod = FW_PORT_CMD_MODTYPE_G(stat); @@ -5223,18 +5279,19 @@ int t4_init_devlog_params(struct adapter *adap) /* Otherwise, ask the firmware for it's Device Log Parameters. */ memset(&devlog_cmd, 0, sizeof(devlog_cmd)); - devlog_cmd.op_to_write = htonl(FW_CMD_OP_V(FW_DEVLOG_CMD) | - FW_CMD_REQUEST_F | FW_CMD_READ_F); - devlog_cmd.retval_len16 = htonl(FW_LEN16(devlog_cmd)); + devlog_cmd.op_to_write = cpu_to_be32(FW_CMD_OP_V(FW_DEVLOG_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F); + devlog_cmd.retval_len16 = cpu_to_be32(FW_LEN16(devlog_cmd)); ret = t4_wr_mbox(adap, adap->mbox, &devlog_cmd, sizeof(devlog_cmd), &devlog_cmd); if (ret) return ret; - devlog_meminfo = ntohl(devlog_cmd.memtype_devlog_memaddr16_devlog); + devlog_meminfo = + be32_to_cpu(devlog_cmd.memtype_devlog_memaddr16_devlog); dparams->memtype = FW_DEVLOG_CMD_MEMTYPE_DEVLOG_G(devlog_meminfo); dparams->start = FW_DEVLOG_CMD_MEMADDR16_DEVLOG_G(devlog_meminfo) << 4; - dparams->size = ntohl(devlog_cmd.memsize_devlog); + dparams->size = be32_to_cpu(devlog_cmd.memsize_devlog); return 0; } @@ -5373,6 +5430,29 @@ int t4_filter_field_shift(const struct adapter *adap, int filter_sel) return field_shift; } +int t4_init_rss_mode(struct adapter *adap, int mbox) +{ + int i, ret; + struct fw_rss_vi_config_cmd rvc; + + memset(&rvc, 0, sizeof(rvc)); + + for_each_port(adap, i) { + struct port_info *p = adap2pinfo(adap, i); + + rvc.op_to_viid = + cpu_to_be32(FW_CMD_OP_V(FW_RSS_VI_CONFIG_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F | + FW_RSS_VI_CONFIG_CMD_VIID_V(p->viid)); + rvc.retval_len16 = cpu_to_be32(FW_LEN16(rvc)); + ret = t4_wr_mbox(adap, mbox, &rvc, sizeof(rvc), &rvc); + if (ret) + return ret; + p->rss_mode = be32_to_cpu(rvc.u.basicvirtual.defaultq_to_udpen); + } + return 0; +} + int t4_port_init(struct adapter *adap, int mbox, int pf, int vf) { u8 addr[6]; @@ -5390,10 +5470,10 @@ int t4_port_init(struct adapter *adap, int mbox, int pf, int vf) while ((adap->params.portvec & (1 << j)) == 0) j++; - c.op_to_portid = htonl(FW_CMD_OP_V(FW_PORT_CMD) | - FW_CMD_REQUEST_F | FW_CMD_READ_F | - FW_PORT_CMD_PORTID_V(j)); - c.action_to_len16 = htonl( + c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F | + FW_PORT_CMD_PORTID_V(j)); + c.action_to_len16 = cpu_to_be32( FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_GET_PORT_INFO) | FW_LEN16(c)); ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); @@ -5411,22 +5491,23 @@ int t4_port_init(struct adapter *adap, int mbox, int pf, int vf) memcpy(adap->port[i]->dev_addr, addr, ETH_ALEN); adap->port[i]->dev_port = j; - ret = ntohl(c.u.info.lstatus_to_modtype); + ret = be32_to_cpu(c.u.info.lstatus_to_modtype); p->mdio_addr = (ret & FW_PORT_CMD_MDIOCAP_F) ? FW_PORT_CMD_MDIOADDR_G(ret) : -1; p->port_type = FW_PORT_CMD_PTYPE_G(ret); p->mod_type = FW_PORT_MOD_TYPE_NA; - rvc.op_to_viid = htonl(FW_CMD_OP_V(FW_RSS_VI_CONFIG_CMD) | - FW_CMD_REQUEST_F | FW_CMD_READ_F | - FW_RSS_VI_CONFIG_CMD_VIID(p->viid)); - rvc.retval_len16 = htonl(FW_LEN16(rvc)); + rvc.op_to_viid = + cpu_to_be32(FW_CMD_OP_V(FW_RSS_VI_CONFIG_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F | + FW_RSS_VI_CONFIG_CMD_VIID(p->viid)); + rvc.retval_len16 = cpu_to_be32(FW_LEN16(rvc)); ret = t4_wr_mbox(adap, mbox, &rvc, sizeof(rvc), &rvc); if (ret) return ret; - p->rss_mode = ntohl(rvc.u.basicvirtual.defaultq_to_udpen); + p->rss_mode = be32_to_cpu(rvc.u.basicvirtual.defaultq_to_udpen); - init_link_config(&p->link_cfg, ntohs(c.u.info.pcap)); + init_link_config(&p->link_cfg, be16_to_cpu(c.u.info.pcap)); j++; } return 0; @@ -5717,3 +5798,130 @@ void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr) t4_write_reg(adap, TP_DBG_LA_CONFIG_A, cfg | adap->params.tp.la_mask); } + +/* SGE Hung Ingress DMA Warning Threshold time and Warning Repeat Rate (in + * seconds). If we find one of the SGE Ingress DMA State Machines in the same + * state for more than the Warning Threshold then we'll issue a warning about + * a potential hang. We'll repeat the warning as the SGE Ingress DMA Channel + * appears to be hung every Warning Repeat second till the situation clears. + * If the situation clears, we'll note that as well. + */ +#define SGE_IDMA_WARN_THRESH 1 +#define SGE_IDMA_WARN_REPEAT 300 + +/** + * t4_idma_monitor_init - initialize SGE Ingress DMA Monitor + * @adapter: the adapter + * @idma: the adapter IDMA Monitor state + * + * Initialize the state of an SGE Ingress DMA Monitor. + */ +void t4_idma_monitor_init(struct adapter *adapter, + struct sge_idma_monitor_state *idma) +{ + /* Initialize the state variables for detecting an SGE Ingress DMA + * hang. The SGE has internal counters which count up on each clock + * tick whenever the SGE finds its Ingress DMA State Engines in the + * same state they were on the previous clock tick. The clock used is + * the Core Clock so we have a limit on the maximum "time" they can + * record; typically a very small number of seconds. For instance, + * with a 600MHz Core Clock, we can only count up to a bit more than + * 7s. So we'll synthesize a larger counter in order to not run the + * risk of having the "timers" overflow and give us the flexibility to + * maintain a Hung SGE State Machine of our own which operates across + * a longer time frame. + */ + idma->idma_1s_thresh = core_ticks_per_usec(adapter) * 1000000; /* 1s */ + idma->idma_stalled[0] = 0; + idma->idma_stalled[1] = 0; +} + +/** + * t4_idma_monitor - monitor SGE Ingress DMA state + * @adapter: the adapter + * @idma: the adapter IDMA Monitor state + * @hz: number of ticks/second + * @ticks: number of ticks since the last IDMA Monitor call + */ +void t4_idma_monitor(struct adapter *adapter, + struct sge_idma_monitor_state *idma, + int hz, int ticks) +{ + int i, idma_same_state_cnt[2]; + + /* Read the SGE Debug Ingress DMA Same State Count registers. These + * are counters inside the SGE which count up on each clock when the + * SGE finds its Ingress DMA State Engines in the same states they + * were in the previous clock. The counters will peg out at + * 0xffffffff without wrapping around so once they pass the 1s + * threshold they'll stay above that till the IDMA state changes. + */ + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 13); + idma_same_state_cnt[0] = t4_read_reg(adapter, SGE_DEBUG_DATA_HIGH_A); + idma_same_state_cnt[1] = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + + for (i = 0; i < 2; i++) { + u32 debug0, debug11; + + /* If the Ingress DMA Same State Counter ("timer") is less + * than 1s, then we can reset our synthesized Stall Timer and + * continue. If we have previously emitted warnings about a + * potential stalled Ingress Queue, issue a note indicating + * that the Ingress Queue has resumed forward progress. + */ + if (idma_same_state_cnt[i] < idma->idma_1s_thresh) { + if (idma->idma_stalled[i] >= SGE_IDMA_WARN_THRESH * hz) + dev_warn(adapter->pdev_dev, "SGE idma%d, queue %u, " + "resumed after %d seconds\n", + i, idma->idma_qid[i], + idma->idma_stalled[i] / hz); + idma->idma_stalled[i] = 0; + continue; + } + + /* Synthesize an SGE Ingress DMA Same State Timer in the Hz + * domain. The first time we get here it'll be because we + * passed the 1s Threshold; each additional time it'll be + * because the RX Timer Callback is being fired on its regular + * schedule. + * + * If the stall is below our Potential Hung Ingress Queue + * Warning Threshold, continue. + */ + if (idma->idma_stalled[i] == 0) { + idma->idma_stalled[i] = hz; + idma->idma_warn[i] = 0; + } else { + idma->idma_stalled[i] += ticks; + idma->idma_warn[i] -= ticks; + } + + if (idma->idma_stalled[i] < SGE_IDMA_WARN_THRESH * hz) + continue; + + /* We'll issue a warning every SGE_IDMA_WARN_REPEAT seconds. + */ + if (idma->idma_warn[i] > 0) + continue; + idma->idma_warn[i] = SGE_IDMA_WARN_REPEAT * hz; + + /* Read and save the SGE IDMA State and Queue ID information. + * We do this every time in case it changes across time ... + * can't be too careful ... + */ + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 0); + debug0 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + idma->idma_state[i] = (debug0 >> (i * 9)) & 0x3f; + + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 11); + debug11 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + idma->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff; + + dev_warn(adapter->pdev_dev, "SGE idma%u, queue %u, potentially stuck in " + "state %u for %d seconds (debug0=%#x, debug11=%#x)\n", + i, idma->idma_qid[i], idma->idma_state[i], + idma->idma_stalled[i] / hz, + debug0, debug11); + t4_sge_decode_idma_state(adapter, idma->idma_state[i]); + } +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 380b15c0417a..88067d90121c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -152,17 +152,33 @@ struct rsp_ctrl { }; }; -#define RSPD_NEWBUF 0x80000000U -#define RSPD_LEN(x) (((x) >> 0) & 0x7fffffffU) -#define RSPD_QID(x) RSPD_LEN(x) +#define RSPD_NEWBUF_S 31 +#define RSPD_NEWBUF_V(x) ((x) << RSPD_NEWBUF_S) +#define RSPD_NEWBUF_F RSPD_NEWBUF_V(1U) -#define RSPD_GEN(x) ((x) >> 7) -#define RSPD_TYPE(x) (((x) >> 4) & 3) +#define RSPD_LEN_S 0 +#define RSPD_LEN_M 0x7fffffff +#define RSPD_LEN_G(x) (((x) >> RSPD_LEN_S) & RSPD_LEN_M) -#define V_QINTR_CNT_EN 0x0 -#define QINTR_CNT_EN 0x1 -#define QINTR_TIMER_IDX(x) ((x) << 1) -#define QINTR_TIMER_IDX_GET(x) (((x) >> 1) & 0x7) +#define RSPD_QID_S RSPD_LEN_S +#define RSPD_QID_M RSPD_LEN_M +#define RSPD_QID_G(x) RSPD_LEN_G(x) + +#define RSPD_GEN_S 7 + +#define RSPD_TYPE_S 4 +#define RSPD_TYPE_M 0x3 +#define RSPD_TYPE_G(x) (((x) >> RSPD_TYPE_S) & RSPD_TYPE_M) + +/* Rx queue interrupt deferral fields: counter enable and timer index */ +#define QINTR_CNT_EN_S 0 +#define QINTR_CNT_EN_V(x) ((x) << QINTR_CNT_EN_S) +#define QINTR_CNT_EN_F QINTR_CNT_EN_V(1U) + +#define QINTR_TIMER_IDX_S 1 +#define QINTR_TIMER_IDX_M 0x7 +#define QINTR_TIMER_IDX_V(x) ((x) << QINTR_TIMER_IDX_S) +#define QINTR_TIMER_IDX_G(x) (((x) >> QINTR_TIMER_IDX_S) & QINTR_TIMER_IDX_M) /* * Flash layout. diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index 30a2f56e99c2..d90f8a03e378 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -634,26 +634,9 @@ struct cpl_tid_release { struct cpl_tx_pkt_core { __be32 ctrl0; -#define TXPKT_VF(x) ((x) << 0) -#define TXPKT_PF(x) ((x) << 8) -#define TXPKT_VF_VLD (1 << 11) -#define TXPKT_OVLAN_IDX(x) ((x) << 12) -#define TXPKT_INTF(x) ((x) << 16) -#define TXPKT_INS_OVLAN (1 << 21) -#define TXPKT_OPCODE(x) ((x) << 24) __be16 pack; __be16 len; __be64 ctrl1; -#define TXPKT_CSUM_END(x) ((x) << 12) -#define TXPKT_CSUM_START(x) ((x) << 20) -#define TXPKT_IPHDR_LEN(x) ((u64)(x) << 20) -#define TXPKT_CSUM_LOC(x) ((u64)(x) << 30) -#define TXPKT_ETHHDR_LEN(x) ((u64)(x) << 34) -#define TXPKT_CSUM_TYPE(x) ((u64)(x) << 40) -#define TXPKT_VLAN(x) ((u64)(x) << 44) -#define TXPKT_VLAN_VLD (1ULL << 60) -#define TXPKT_IPCSUM_DIS (1ULL << 62) -#define TXPKT_L4CSUM_DIS (1ULL << 63) }; struct cpl_tx_pkt { @@ -663,16 +646,66 @@ struct cpl_tx_pkt { #define cpl_tx_pkt_xt cpl_tx_pkt +/* cpl_tx_pkt_core.ctrl0 fields */ +#define TXPKT_VF_S 0 +#define TXPKT_VF_V(x) ((x) << TXPKT_VF_S) + +#define TXPKT_PF_S 8 +#define TXPKT_PF_V(x) ((x) << TXPKT_PF_S) + +#define TXPKT_VF_VLD_S 11 +#define TXPKT_VF_VLD_V(x) ((x) << TXPKT_VF_VLD_S) +#define TXPKT_VF_VLD_F TXPKT_VF_VLD_V(1U) + +#define TXPKT_OVLAN_IDX_S 12 +#define TXPKT_OVLAN_IDX_V(x) ((x) << TXPKT_OVLAN_IDX_S) + +#define TXPKT_INTF_S 16 +#define TXPKT_INTF_V(x) ((x) << TXPKT_INTF_S) + +#define TXPKT_INS_OVLAN_S 21 +#define TXPKT_INS_OVLAN_V(x) ((x) << TXPKT_INS_OVLAN_S) +#define TXPKT_INS_OVLAN_F TXPKT_INS_OVLAN_V(1U) + +#define TXPKT_OPCODE_S 24 +#define TXPKT_OPCODE_V(x) ((x) << TXPKT_OPCODE_S) + +/* cpl_tx_pkt_core.ctrl1 fields */ +#define TXPKT_CSUM_END_S 12 +#define TXPKT_CSUM_END_V(x) ((x) << TXPKT_CSUM_END_S) + +#define TXPKT_CSUM_START_S 20 +#define TXPKT_CSUM_START_V(x) ((x) << TXPKT_CSUM_START_S) + +#define TXPKT_IPHDR_LEN_S 20 +#define TXPKT_IPHDR_LEN_V(x) ((__u64)(x) << TXPKT_IPHDR_LEN_S) + +#define TXPKT_CSUM_LOC_S 30 +#define TXPKT_CSUM_LOC_V(x) ((__u64)(x) << TXPKT_CSUM_LOC_S) + +#define TXPKT_ETHHDR_LEN_S 34 +#define TXPKT_ETHHDR_LEN_V(x) ((__u64)(x) << TXPKT_ETHHDR_LEN_S) + +#define TXPKT_CSUM_TYPE_S 40 +#define TXPKT_CSUM_TYPE_V(x) ((__u64)(x) << TXPKT_CSUM_TYPE_S) + +#define TXPKT_VLAN_S 44 +#define TXPKT_VLAN_V(x) ((__u64)(x) << TXPKT_VLAN_S) + +#define TXPKT_VLAN_VLD_S 60 +#define TXPKT_VLAN_VLD_V(x) ((__u64)(x) << TXPKT_VLAN_VLD_S) +#define TXPKT_VLAN_VLD_F TXPKT_VLAN_VLD_V(1ULL) + +#define TXPKT_IPCSUM_DIS_S 62 +#define TXPKT_IPCSUM_DIS_V(x) ((__u64)(x) << TXPKT_IPCSUM_DIS_S) +#define TXPKT_IPCSUM_DIS_F TXPKT_IPCSUM_DIS_V(1ULL) + +#define TXPKT_L4CSUM_DIS_S 63 +#define TXPKT_L4CSUM_DIS_V(x) ((__u64)(x) << TXPKT_L4CSUM_DIS_S) +#define TXPKT_L4CSUM_DIS_F TXPKT_L4CSUM_DIS_V(1ULL) + struct cpl_tx_pkt_lso_core { __be32 lso_ctrl; -#define LSO_TCPHDR_LEN(x) ((x) << 0) -#define LSO_IPHDR_LEN(x) ((x) << 4) -#define LSO_ETHHDR_LEN(x) ((x) << 16) -#define LSO_IPV6(x) ((x) << 20) -#define LSO_LAST_SLICE (1 << 22) -#define LSO_FIRST_SLICE (1 << 23) -#define LSO_OPCODE(x) ((x) << 24) -#define LSO_T5_XFER_SIZE(x) ((x) << 0) __be16 ipid_ofst; __be16 mss; __be32 seqno_offset; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h index 19b2dcf6acde..72ec1f91d29f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h @@ -61,6 +61,29 @@ #define SGE_TIMERREGS 6 #define TIMERREG_COUNTER0_X 0 +#define FETCHBURSTMIN_64B_X 2 + +#define FETCHBURSTMAX_512B_X 3 + +#define HOSTFCMODE_STATUS_PAGE_X 2 + +#define CIDXFLUSHTHRESH_32_X 5 + +#define UPDATEDELIVERY_INTERRUPT_X 1 + +#define RSPD_TYPE_FLBUF_X 0 +#define RSPD_TYPE_CPL_X 1 +#define RSPD_TYPE_INTR_X 2 + +/* Congestion Manager Definitions. + */ +#define CONMCTXT_CNGTPMODE_S 19 +#define CONMCTXT_CNGTPMODE_V(x) ((x) << CONMCTXT_CNGTPMODE_S) +#define CONMCTXT_CNGCHMAP_S 0 +#define CONMCTXT_CNGCHMAP_V(x) ((x) << CONMCTXT_CNGCHMAP_S) +#define CONMCTXT_CNGTPMODE_CHANNEL_X 2 +#define CONMCTXT_CNGTPMODE_QUEUE_X 1 + /* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues. * The User Doorbells are each 128 bytes in length with a Simple Doorbell at * offsets 8x and a Write Combining single 64-byte Egress Queue Unit diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 03fbfd1fb3df..16c6d67370ee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1123,6 +1123,7 @@ enum fw_params_param_dmaq { FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11, FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12, FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13, + FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20, }; enum fw_params_param_dev_diag { @@ -1377,6 +1378,7 @@ struct fw_iq_cmd { #define FW_IQ_CMD_IQFLINTCONGEN_S 27 #define FW_IQ_CMD_IQFLINTCONGEN_V(x) ((x) << FW_IQ_CMD_IQFLINTCONGEN_S) +#define FW_IQ_CMD_IQFLINTCONGEN_F FW_IQ_CMD_IQFLINTCONGEN_V(1U) #define FW_IQ_CMD_IQFLINTISCSIC_S 26 #define FW_IQ_CMD_IQFLINTISCSIC_V(x) ((x) << FW_IQ_CMD_IQFLINTISCSIC_S) @@ -1399,6 +1401,7 @@ struct fw_iq_cmd { #define FW_IQ_CMD_FL0CONGCIF_S 11 #define FW_IQ_CMD_FL0CONGCIF_V(x) ((x) << FW_IQ_CMD_FL0CONGCIF_S) +#define FW_IQ_CMD_FL0CONGCIF_F FW_IQ_CMD_FL0CONGCIF_V(1U) #define FW_IQ_CMD_FL0ONCHIP_S 10 #define FW_IQ_CMD_FL0ONCHIP_V(x) ((x) << FW_IQ_CMD_FL0ONCHIP_S) @@ -1589,6 +1592,7 @@ struct fw_eq_eth_cmd { #define FW_EQ_ETH_CMD_FETCHRO_S 22 #define FW_EQ_ETH_CMD_FETCHRO_V(x) ((x) << FW_EQ_ETH_CMD_FETCHRO_S) +#define FW_EQ_ETH_CMD_FETCHRO_F FW_EQ_ETH_CMD_FETCHRO_V(1U) #define FW_EQ_ETH_CMD_HOSTFCMODE_S 20 #define FW_EQ_ETH_CMD_HOSTFCMODE_V(x) ((x) << FW_EQ_ETH_CMD_HOSTFCMODE_S) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 1d893b0b7ddf..b2b5e5bbe04c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1021,7 +1021,7 @@ static int closest_thres(const struct sge *s, int thres) static unsigned int qtimer_val(const struct adapter *adapter, const struct sge_rspq *rspq) { - unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params); + unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params); return timer_idx < SGE_NTIMERS ? adapter->sge.timer_val[timer_idx] @@ -1086,8 +1086,8 @@ static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq, * Update the response queue's interrupt coalescing parameters and * return success. */ - rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) | - (cnt > 0 ? QINTR_CNT_EN : 0)); + rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) | + QINTR_CNT_EN_V(cnt > 0)); return 0; } @@ -1439,7 +1439,7 @@ static int cxgb4vf_get_coalesce(struct net_device *dev, coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq); coalesce->rx_max_coalesced_frames = - ((rspq->intr_params & QINTR_CNT_EN) + ((rspq->intr_params & QINTR_CNT_EN_F) ? adapter->sge.counter_val[rspq->pktcnt_idx] : 0); return 0; @@ -2393,8 +2393,9 @@ static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx, u8 pkt_cnt_idx, unsigned int size, unsigned int iqe_size) { - rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) | - (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0)); + rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) | + (pkt_cnt_idx < SGE_NCOUNTERS ? + QINTR_CNT_EN_F : 0)); rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS ? pkt_cnt_idx : 0); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 482f6de6817d..2e41d1541d73 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1100,7 +1100,7 @@ nocsum: * unknown protocol, disable HW csum * and hope a bad packet is detected */ - return TXPKT_L4CSUM_DIS; + return TXPKT_L4CSUM_DIS_F; } } else { /* @@ -1117,15 +1117,15 @@ nocsum: } if (likely(csum_type >= TX_CSUM_TCPIP)) - return TXPKT_CSUM_TYPE(csum_type) | - TXPKT_IPHDR_LEN(skb_network_header_len(skb)) | - TXPKT_ETHHDR_LEN(skb_network_offset(skb) - ETH_HLEN); + return TXPKT_CSUM_TYPE_V(csum_type) | + TXPKT_IPHDR_LEN_V(skb_network_header_len(skb)) | + TXPKT_ETHHDR_LEN_V(skb_network_offset(skb) - ETH_HLEN); else { int start = skb_transport_offset(skb); - return TXPKT_CSUM_TYPE(csum_type) | - TXPKT_CSUM_START(start) | - TXPKT_CSUM_LOC(start + skb->csum_offset); + return TXPKT_CSUM_TYPE_V(csum_type) | + TXPKT_CSUM_START_V(start) | + TXPKT_CSUM_LOC_V(start + skb->csum_offset); } } @@ -1160,7 +1160,7 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) { u32 wr_mid; u64 cntrl, *end; - int qidx, credits; + int qidx, credits, max_pkt_len; unsigned int flits, ndesc; struct adapter *adapter; struct sge_eth_txq *txq; @@ -1183,6 +1183,13 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->len < fw_hdr_copy_len)) goto out_free; + /* Discard the packet if the length is greater than mtu */ + max_pkt_len = ETH_HLEN + dev->mtu; + if (skb_vlan_tag_present(skb)) + max_pkt_len += VLAN_HLEN; + if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len))) + goto out_free; + /* * Figure out which TX Queue we're going to use. */ @@ -1281,29 +1288,30 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) * Fill in the LSO CPL message. */ lso->lso_ctrl = - cpu_to_be32(LSO_OPCODE(CPL_TX_PKT_LSO) | - LSO_FIRST_SLICE | - LSO_LAST_SLICE | - LSO_IPV6(v6) | - LSO_ETHHDR_LEN(eth_xtra_len/4) | - LSO_IPHDR_LEN(l3hdr_len/4) | - LSO_TCPHDR_LEN(tcp_hdr(skb)->doff)); + cpu_to_be32(LSO_OPCODE_V(CPL_TX_PKT_LSO) | + LSO_FIRST_SLICE_F | + LSO_LAST_SLICE_F | + LSO_IPV6_V(v6) | + LSO_ETHHDR_LEN_V(eth_xtra_len / 4) | + LSO_IPHDR_LEN_V(l3hdr_len / 4) | + LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff)); lso->ipid_ofst = cpu_to_be16(0); lso->mss = cpu_to_be16(ssi->gso_size); lso->seqno_offset = cpu_to_be32(0); if (is_t4(adapter->params.chip)) lso->len = cpu_to_be32(skb->len); else - lso->len = cpu_to_be32(LSO_T5_XFER_SIZE(skb->len)); + lso->len = cpu_to_be32(LSO_T5_XFER_SIZE_V(skb->len)); /* * Set up TX Packet CPL pointer, control word and perform * accounting. */ cpl = (void *)(lso + 1); - cntrl = (TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | - TXPKT_IPHDR_LEN(l3hdr_len) | - TXPKT_ETHHDR_LEN(eth_xtra_len)); + cntrl = (TXPKT_CSUM_TYPE_V(v6 ? + TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | + TXPKT_IPHDR_LEN_V(l3hdr_len) | + TXPKT_ETHHDR_LEN_V(eth_xtra_len)); txq->tso++; txq->tx_cso += ssi->gso_segs; } else { @@ -1320,10 +1328,10 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) */ cpl = (void *)(wr + 1); if (skb->ip_summed == CHECKSUM_PARTIAL) { - cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS; + cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS_F; txq->tx_cso++; } else - cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS; + cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F; } /* @@ -1332,15 +1340,15 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) */ if (skb_vlan_tag_present(skb)) { txq->vlan_ins++; - cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(skb_vlan_tag_get(skb)); + cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb)); } /* * Fill in the TX Packet CPL message header. */ - cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE(CPL_TX_PKT_XT) | - TXPKT_INTF(pi->port_id) | - TXPKT_PF(0)); + cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE_V(CPL_TX_PKT_XT) | + TXPKT_INTF_V(pi->port_id) | + TXPKT_PF_V(0)); cpl->pack = cpu_to_be16(0); cpl->len = cpu_to_be16(skb->len); cpl->ctrl1 = cpu_to_be64(cntrl); @@ -1663,7 +1671,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, static inline bool is_new_response(const struct rsp_ctrl *rc, const struct sge_rspq *rspq) { - return RSPD_GEN(rc->type_gen) == rspq->gen; + return ((rc->type_gen >> RSPD_GEN_S) & 0x1) == rspq->gen; } /** @@ -1752,8 +1760,8 @@ static int process_responses(struct sge_rspq *rspq, int budget) * SGE. */ dma_rmb(); - rsp_type = RSPD_TYPE(rc->type_gen); - if (likely(rsp_type == RSP_TYPE_FLBUF)) { + rsp_type = RSPD_TYPE_G(rc->type_gen); + if (likely(rsp_type == RSPD_TYPE_FLBUF_X)) { struct page_frag *fp; struct pkt_gl gl; const struct rx_sw_desc *sdesc; @@ -1764,7 +1772,7 @@ static int process_responses(struct sge_rspq *rspq, int budget) * If we get a "new buffer" message from the SGE we * need to move on to the next Free List buffer. */ - if (len & RSPD_NEWBUF) { + if (len & RSPD_NEWBUF_F) { /* * We get one "new buffer" message when we * first start up a queue so we need to ignore @@ -1775,7 +1783,7 @@ static int process_responses(struct sge_rspq *rspq, int budget) 1); rspq->offset = 0; } - len = RSPD_LEN(len); + len = RSPD_LEN_G(len); } gl.tot_len = len; @@ -1818,10 +1826,10 @@ static int process_responses(struct sge_rspq *rspq, int budget) rspq->offset += ALIGN(fp->size, s->fl_align); else restore_rx_bufs(&gl, &rxq->fl, frag); - } else if (likely(rsp_type == RSP_TYPE_CPL)) { + } else if (likely(rsp_type == RSPD_TYPE_CPL_X)) { ret = rspq->handler(rspq, rspq->cur_desc, NULL); } else { - WARN_ON(rsp_type > RSP_TYPE_CPL); + WARN_ON(rsp_type > RSPD_TYPE_CPL_X); ret = 0; } @@ -1833,7 +1841,7 @@ static int process_responses(struct sge_rspq *rspq, int budget) */ const int NOMEM_TIMER_IDX = SGE_NTIMERS-1; rspq->next_intr_params = - QINTR_TIMER_IDX(NOMEM_TIMER_IDX); + QINTR_TIMER_IDX_V(NOMEM_TIMER_IDX); break; } @@ -1875,7 +1883,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) intr_params = rspq->next_intr_params; rspq->next_intr_params = rspq->intr_params; } else - intr_params = QINTR_TIMER_IDX(SGE_TIMER_UPD_CIDX); + intr_params = QINTR_TIMER_IDX_V(SGE_TIMER_UPD_CIDX); if (unlikely(work_done == 0)) rspq->unhandled_irqs++; @@ -1936,10 +1944,10 @@ static unsigned int process_intrq(struct adapter *adapter) * never happen ... */ dma_rmb(); - if (unlikely(RSPD_TYPE(rc->type_gen) != RSP_TYPE_INTR)) { + if (unlikely(RSPD_TYPE_G(rc->type_gen) != RSPD_TYPE_INTR_X)) { dev_err(adapter->pdev_dev, "Unexpected INTRQ response type %d\n", - RSPD_TYPE(rc->type_gen)); + RSPD_TYPE_G(rc->type_gen)); continue; } @@ -1951,7 +1959,7 @@ static unsigned int process_intrq(struct adapter *adapter) * want to either make them fatal and/or conditionalized under * DEBUG. */ - qid = RSPD_QID(be32_to_cpu(rc->pldbuflen_qid)); + qid = RSPD_QID_G(be32_to_cpu(rc->pldbuflen_qid)); iq_idx = IQ_IDX(s, qid); if (unlikely(iq_idx >= MAX_INGQ)) { dev_err(adapter->pdev_dev, @@ -2243,8 +2251,12 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, * Allocate the ring for the hardware free list (with space * for its status page) along with the associated software * descriptor ring. The free list size needs to be a multiple - * of the Egress Queue Unit. + * of the Egress Queue Unit and at least 2 Egress Units larger + * than the SGE's Egress Congrestion Threshold + * (fl_starve_thres - 1). */ + if (fl->size < s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT) + fl->size = s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT; fl->size = roundup(fl->size, FL_PER_EQ_UNIT); fl->desc = alloc_ring(adapter->pdev_dev, fl->size, sizeof(__be64), sizeof(struct rx_sw_desc), diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index 0be6850be8a2..6739ebc08c47 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -5,7 +5,7 @@ #include <linux/in.h> #include <linux/types.h> #include <linux/skbuff.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> #include "enic_res.h" #include "enic_clsf.h" @@ -22,7 +22,7 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) int res; struct filter data; - switch (keys->ip_proto) { + switch (keys->basic.ip_proto) { case IPPROTO_TCP: data.u.ipv4.protocol = PROTO_TCP; break; @@ -33,10 +33,10 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) return -EPROTONOSUPPORT; }; data.type = FILTER_IPV4_5TUPLE; - data.u.ipv4.src_addr = ntohl(keys->src); - data.u.ipv4.dst_addr = ntohl(keys->dst); - data.u.ipv4.src_port = ntohs(keys->port16[0]); - data.u.ipv4.dst_port = ntohs(keys->port16[1]); + data.u.ipv4.src_addr = ntohl(keys->addrs.src); + data.u.ipv4.dst_addr = ntohl(keys->addrs.dst); + data.u.ipv4.src_port = ntohs(keys->ports.src); + data.u.ipv4.dst_port = ntohs(keys->ports.dst); data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; spin_lock_bh(&enic->devcmd_lock); @@ -158,11 +158,11 @@ static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h, struct enic_rfs_fltr_node *tpos; hlist_for_each_entry(tpos, h, node) - if (tpos->keys.src == k->src && - tpos->keys.dst == k->dst && - tpos->keys.ports == k->ports && - tpos->keys.ip_proto == k->ip_proto && - tpos->keys.n_proto == k->n_proto) + if (tpos->keys.addrs.src == k->addrs.src && + tpos->keys.addrs.dst == k->addrs.dst && + tpos->keys.ports.ports == k->ports.ports && + tpos->keys.basic.ip_proto == k->basic.ip_proto && + tpos->keys.basic.n_proto == k->basic.n_proto) return tpos; return NULL; } @@ -177,9 +177,10 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, int res, i; enic = netdev_priv(dev); - res = skb_flow_dissect(skb, &keys); - if (!res || keys.n_proto != htons(ETH_P_IP) || - (keys.ip_proto != IPPROTO_TCP && keys.ip_proto != IPPROTO_UDP)) + res = skb_flow_dissect_flow_keys(skb, &keys); + if (!res || keys.basic.n_proto != htons(ETH_P_IP) || + (keys.basic.ip_proto != IPPROTO_TCP && + keys.basic.ip_proto != IPPROTO_UDP)) return -EPROTONOSUPPORT; tbl_idx = skb_get_hash_raw(skb) & ENIC_RFS_FLW_MASK; diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 28d9ca675a27..117c0968dd0b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -334,7 +334,7 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) n = htbl_fltr_search(enic, (u16)fsp->location); if (!n) return -EINVAL; - switch (n->keys.ip_proto) { + switch (n->keys.basic.ip_proto) { case IPPROTO_TCP: fsp->flow_type = TCP_V4_FLOW; break; @@ -346,16 +346,16 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) break; } - fsp->h_u.tcp_ip4_spec.ip4src = n->keys.src; + fsp->h_u.tcp_ip4_spec.ip4src = n->keys.addrs.src; fsp->m_u.tcp_ip4_spec.ip4src = (__u32)~0; - fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.dst; + fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.addrs.dst; fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0; - fsp->h_u.tcp_ip4_spec.psrc = n->keys.port16[0]; + fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.src; fsp->m_u.tcp_ip4_spec.psrc = (__u16)~0; - fsp->h_u.tcp_ip4_spec.pdst = n->keys.port16[1]; + fsp->h_u.tcp_ip4_spec.pdst = n->keys.ports.dst; fsp->m_u.tcp_ip4_spec.pdst = (__u16)~0; fsp->ring_cookie = n->rq_id; diff --git a/drivers/net/ethernet/emulex/benet/Kconfig b/drivers/net/ethernet/emulex/benet/Kconfig index ea94a8eb6b35..7108563260ae 100644 --- a/drivers/net/ethernet/emulex/benet/Kconfig +++ b/drivers/net/ethernet/emulex/benet/Kconfig @@ -5,6 +5,15 @@ config BE2NET This driver implements the NIC functionality for ServerEngines' 10Gbps network adapter - BladeEngine. +config BE2NET_HWMON + bool "HWMON support for be2net driver" + depends on BE2NET && HWMON + depends on !(BE2NET=y && HWMON=m) + default y + ---help--- + Say Y here if you want to expose thermal sensor data on + be2net network adapter. + config BE2NET_VXLAN bool "VXLAN offload support on be2net driver" default y diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 1bf1cdce74ac..8d12b41b3b19 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -31,11 +31,13 @@ #include <linux/slab.h> #include <linux/u64_stats_sync.h> #include <linux/cpumask.h> +#include <linux/hwmon.h> +#include <linux/hwmon-sysfs.h> #include "be_hw.h" #include "be_roce.h" -#define DRV_VER "10.6.0.1" +#define DRV_VER "10.6.0.2" #define DRV_NAME "be2net" #define BE_NAME "Emulex BladeEngine2" #define BE3_NAME "Emulex BladeEngine3" @@ -314,7 +316,6 @@ struct be_rx_obj { } ____cacheline_aligned_in_smp; struct be_drv_stats { - u32 be_on_die_temperature; u32 eth_red_drops; u32 dma_map_errors; u32 rx_drops_no_pbuf; @@ -366,6 +367,7 @@ struct be_vf_cfg { u32 tx_rate; u32 plink_tracking; u32 privileges; + bool spoofchk; }; enum vf_state { @@ -382,6 +384,7 @@ enum vf_state { #define BE_FLAGS_SETUP_DONE BIT(9) #define BE_FLAGS_EVT_INCOMPATIBLE_SFP BIT(10) #define BE_FLAGS_ERR_DETECTION_SCHEDULED BIT(11) +#define BE_FLAGS_OS2BMC BIT(12) #define BE_UC_PMAC_COUNT 30 #define BE_VF_UC_PMAC_COUNT 2 @@ -426,6 +429,8 @@ struct be_resources { u32 vf_if_cap_flags; /* VF if capability flags */ }; +#define be_is_os2bmc_enabled(adapter) (adapter->flags & BE_FLAGS_OS2BMC) + struct rss_info { u64 rss_flags; u8 rsstable[RSS_INDIR_TABLE_LEN]; @@ -433,6 +438,12 @@ struct rss_info { u8 rss_hkey[RSS_HASH_KEY_LEN]; }; +#define BE_INVALID_DIE_TEMP 0xFF +struct be_hwmon { + struct device *hwmon_dev; + u8 be_on_die_temp; /* Unit: millidegree Celsius */ +}; + /* Macros to read/write the 'features' word of be_wrb_params structure. */ #define BE_WRB_F_BIT(name) BE_WRB_F_##name##_BIT @@ -453,7 +464,8 @@ enum { BE_WRB_F_LSO_BIT, /* LSO */ BE_WRB_F_LSO6_BIT, /* LSO6 */ BE_WRB_F_VLAN_BIT, /* VLAN */ - BE_WRB_F_VLAN_SKIP_HW_BIT /* Skip VLAN tag (workaround) */ + BE_WRB_F_VLAN_SKIP_HW_BIT, /* Skip VLAN tag (workaround) */ + BE_WRB_F_OS2BMC_BIT /* Send packet to the management ring */ }; /* The structure below provides a HW-agnostic abstraction of WRB params @@ -514,6 +526,7 @@ struct be_adapter { u16 work_counter; struct delayed_work be_err_detection_work; + u8 err_flags; u32 flags; u32 cmd_privileges; /* Ethtool knobs and info */ @@ -572,8 +585,11 @@ struct be_adapter { u16 qnq_vid; u32 msg_enable; int be_get_temp_freq; + struct be_hwmon hwmon_info; u8 pf_number; struct rss_info rss_info; + /* Filters for packets that need to be sent to BMC */ + u32 bmc_filt_mask; }; #define be_physfn(adapter) (!adapter->virtfn) @@ -772,26 +788,36 @@ static inline bool is_ipv4_pkt(struct sk_buff *skb) return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4; } -static inline bool be_multi_rxq(const struct be_adapter *adapter) +#define BE_ERROR_EEH 1 +#define BE_ERROR_UE BIT(1) +#define BE_ERROR_FW BIT(2) +#define BE_ERROR_HW (BE_ERROR_EEH | BE_ERROR_UE) +#define BE_ERROR_ANY (BE_ERROR_EEH | BE_ERROR_UE | BE_ERROR_FW) +#define BE_CLEAR_ALL 0xFF + +static inline u8 be_check_error(struct be_adapter *adapter, u32 err_type) { - return adapter->num_rx_qs > 1; + return (adapter->err_flags & err_type); } -static inline bool be_error(struct be_adapter *adapter) +static inline void be_set_error(struct be_adapter *adapter, int err_type) { - return adapter->eeh_error || adapter->hw_error || adapter->fw_timeout; + struct net_device *netdev = adapter->netdev; + + adapter->err_flags |= err_type; + netif_carrier_off(netdev); + + dev_info(&adapter->pdev->dev, "%s: Link down\n", netdev->name); } -static inline bool be_hw_error(struct be_adapter *adapter) +static inline void be_clear_error(struct be_adapter *adapter, int err_type) { - return adapter->eeh_error || adapter->hw_error; + adapter->err_flags &= ~err_type; } -static inline void be_clear_all_error(struct be_adapter *adapter) +static inline bool be_multi_rxq(const struct be_adapter *adapter) { - adapter->eeh_error = false; - adapter->hw_error = false; - adapter->fw_timeout = false; + return adapter->num_rx_qs > 1; } void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, @@ -804,6 +830,7 @@ bool be_pause_supported(struct be_adapter *adapter); u32 be_get_fw_log_level(struct be_adapter *adapter); int be_update_queues(struct be_adapter *adapter); int be_poll(struct napi_struct *napi, int budget); +void be_eqd_update(struct be_adapter *adapter, bool force_update); /* * internal function to initialize-cleanup roce device. diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index fb140faeafb1..41150543906a 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -93,7 +93,7 @@ static void be_mcc_notify(struct be_adapter *adapter) struct be_queue_info *mccq = &adapter->mcc_obj.q; u32 val = 0; - if (be_error(adapter)) + if (be_check_error(adapter, BE_ERROR_ANY)) return; val |= mccq->id & DB_MCCQ_RING_ID_MASK; @@ -140,6 +140,7 @@ static bool be_skip_err_log(u8 opcode, u16 base_status, u16 addl_status) if (base_status == MCC_STATUS_NOT_SUPPORTED || base_status == MCC_STATUS_ILLEGAL_REQUEST || addl_status == MCC_ADDL_STATUS_TOO_MANY_INTERFACES || + addl_status == MCC_ADDL_STATUS_INSUFFICIENT_VLANS || (opcode == OPCODE_COMMON_WRITE_FLASHROM && (base_status == MCC_STATUS_ILLEGAL_FIELD || addl_status == MCC_ADDL_STATUS_FLASH_IMAGE_CRC_MISMATCH))) @@ -191,10 +192,12 @@ static void be_async_cmd_process(struct be_adapter *adapter, if (base_status == MCC_STATUS_SUCCESS) { struct be_cmd_resp_get_cntl_addnl_attribs *resp = (void *)resp_hdr; - adapter->drv_stats.be_on_die_temperature = + adapter->hwmon_info.be_on_die_temp = resp->on_die_temperature; } else { adapter->be_get_temp_freq = 0; + adapter->hwmon_info.be_on_die_temp = + BE_INVALID_DIE_TEMP; } return; } @@ -330,6 +333,21 @@ static void be_async_grp5_pvid_state_process(struct be_adapter *adapter, } } +#define MGMT_ENABLE_MASK 0x4 +static void be_async_grp5_fw_control_process(struct be_adapter *adapter, + struct be_mcc_compl *compl) +{ + struct be_async_fw_control *evt = (struct be_async_fw_control *)compl; + u32 evt_dw1 = le32_to_cpu(evt->event_data_word1); + + if (evt_dw1 & MGMT_ENABLE_MASK) { + adapter->flags |= BE_FLAGS_OS2BMC; + adapter->bmc_filt_mask = le32_to_cpu(evt->event_data_word2); + } else { + adapter->flags &= ~BE_FLAGS_OS2BMC; + } +} + static void be_async_grp5_evt_process(struct be_adapter *adapter, struct be_mcc_compl *compl) { @@ -346,6 +364,10 @@ static void be_async_grp5_evt_process(struct be_adapter *adapter, case ASYNC_EVENT_PVID_STATE: be_async_grp5_pvid_state_process(adapter, compl); break; + /* Async event to disable/enable os2bmc and/or mac-learning */ + case ASYNC_EVENT_FW_CONTROL: + be_async_grp5_fw_control_process(adapter, compl); + break; default: break; } @@ -486,7 +508,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter) struct be_mcc_obj *mcc_obj = &adapter->mcc_obj; for (i = 0; i < mcc_timeout; i++) { - if (be_error(adapter)) + if (be_check_error(adapter, BE_ERROR_ANY)) return -EIO; local_bh_disable(); @@ -499,7 +521,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter) } if (i == mcc_timeout) { dev_err(&adapter->pdev->dev, "FW not responding\n"); - adapter->fw_timeout = true; + be_set_error(adapter, BE_ERROR_FW); return -EIO; } return status; @@ -538,7 +560,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db) u32 ready; do { - if (be_error(adapter)) + if (be_check_error(adapter, BE_ERROR_ANY)) return -EIO; ready = ioread32(db); @@ -551,7 +573,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db) if (msecs > 4000) { dev_err(&adapter->pdev->dev, "FW not responding\n"); - adapter->fw_timeout = true; + be_set_error(adapter, BE_ERROR_FW); be_detect_error(adapter); return -1; } @@ -1457,7 +1479,7 @@ int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags, *if_handle = le32_to_cpu(resp->interface_id); /* Hack to retrieve VF's pmac-id on BE3 */ - if (BE3_chip(adapter) && !be_physfn(adapter)) + if (BE3_chip(adapter) && be_virtfn(adapter)) adapter->pmac_id[0] = le32_to_cpu(resp->pmac_id); } return status; @@ -3153,7 +3175,7 @@ int be_cmd_set_mac(struct be_adapter *adapter, u8 *mac, int if_id, u32 dom) } int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, - u32 domain, u16 intf_id, u16 hsw_mode) + u32 domain, u16 intf_id, u16 hsw_mode, u8 spoofchk) { struct be_mcc_wrb *wrb; struct be_cmd_req_set_hsw_config *req; @@ -3189,6 +3211,14 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, ctxt, hsw_mode); } + /* Enable/disable both mac and vlan spoof checking */ + if (!BEx_chip(adapter) && spoofchk) { + AMAP_SET_BITS(struct amap_set_hsw_context, mac_spoofchk, + ctxt, spoofchk); + AMAP_SET_BITS(struct amap_set_hsw_context, vlan_spoofchk, + ctxt, spoofchk); + } + be_dws_cpu_to_le(req->context, sizeof(req->context)); status = be_mcc_notify_wait(adapter); @@ -3199,7 +3229,7 @@ err: /* Get Hyper switch config */ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, - u32 domain, u16 intf_id, u8 *mode) + u32 domain, u16 intf_id, u8 *mode, bool *spoofchk) { struct be_mcc_wrb *wrb; struct be_cmd_req_get_hsw_config *req; @@ -3247,6 +3277,10 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, if (mode) *mode = AMAP_GET_BITS(struct amap_get_hsw_resp_context, port_fwd_type, &resp->context); + if (spoofchk) + *spoofchk = + AMAP_GET_BITS(struct amap_get_hsw_resp_context, + spoofchk, &resp->context); } err: @@ -3258,7 +3292,7 @@ static bool be_is_wol_excluded(struct be_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; - if (!be_physfn(adapter)) + if (be_virtfn(adapter)) return true; switch (pdev->subsystem_device) { diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 1ec22300e254..2716e6f30d9a 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -65,7 +65,8 @@ enum mcc_base_status { enum mcc_addl_status { MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES = 0x16, MCC_ADDL_STATUS_FLASH_IMAGE_CRC_MISMATCH = 0x4d, - MCC_ADDL_STATUS_TOO_MANY_INTERFACES = 0x4a + MCC_ADDL_STATUS_TOO_MANY_INTERFACES = 0x4a, + MCC_ADDL_STATUS_INSUFFICIENT_VLANS = 0xab }; #define CQE_BASE_STATUS_MASK 0xFFFF @@ -104,6 +105,7 @@ struct be_mcc_compl { #define ASYNC_DEBUG_EVENT_TYPE_QNQ 1 #define ASYNC_EVENT_CODE_SLIPORT 0x11 #define ASYNC_EVENT_PORT_MISCONFIG 0x9 +#define ASYNC_EVENT_FW_CONTROL 0x5 enum { LINK_DOWN = 0x0, @@ -180,6 +182,22 @@ struct be_async_event_misconfig_port { u32 flags; } __packed; +#define BMC_FILT_BROADCAST_ARP BIT(0) +#define BMC_FILT_BROADCAST_DHCP_CLIENT BIT(1) +#define BMC_FILT_BROADCAST_DHCP_SERVER BIT(2) +#define BMC_FILT_BROADCAST_NET_BIOS BIT(3) +#define BMC_FILT_BROADCAST BIT(7) +#define BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER BIT(8) +#define BMC_FILT_MULTICAST_IPV6_RA BIT(9) +#define BMC_FILT_MULTICAST_IPV6_RAS BIT(10) +#define BMC_FILT_MULTICAST BIT(15) +struct be_async_fw_control { + u32 event_data_word1; + u32 event_data_word2; + u32 evt_tag; + u32 event_data_word4; +} __packed; + struct be_mcc_mailbox { struct be_mcc_wrb wrb; struct be_mcc_compl compl; @@ -1109,10 +1127,6 @@ struct be_cmd_req_query_fw_cfg { u32 rsvd[31]; }; -/* ASIC revisions */ -#define ASIC_REV_B0 0x10 -#define ASIC_REV_P2 0x11 - struct be_cmd_resp_query_fw_cfg { struct be_cmd_resp_hdr hdr; u32 be_config_number; @@ -1745,18 +1759,24 @@ struct be_cmd_req_set_mac_list { #define PORT_FWD_TYPE_VEPA 0x3 #define PORT_FWD_TYPE_VEB 0x2 +#define ENABLE_MAC_SPOOFCHK 0x2 +#define DISABLE_MAC_SPOOFCHK 0x3 + struct amap_set_hsw_context { u8 interface_id[16]; - u8 rsvd0[14]; + u8 rsvd0[8]; + u8 mac_spoofchk[2]; + u8 rsvd1[4]; u8 pvid_valid; u8 pport; - u8 rsvd1[6]; + u8 rsvd2[6]; u8 port_fwd_type[3]; - u8 rsvd2[7]; + u8 rsvd3[5]; + u8 vlan_spoofchk[2]; u8 pvid[16]; - u8 rsvd3[32]; u8 rsvd4[32]; u8 rsvd5[32]; + u8 rsvd6[32]; } __packed; struct be_cmd_req_set_hsw_config { @@ -1774,11 +1794,13 @@ struct amap_get_hsw_req_context { struct amap_get_hsw_resp_context { u8 rsvd0[6]; u8 port_fwd_type[3]; - u8 rsvd1[7]; + u8 rsvd1[5]; + u8 spoofchk; + u8 rsvd2; u8 pvid[16]; - u8 rsvd2[32]; u8 rsvd3[32]; u8 rsvd4[32]; + u8 rsvd5[32]; } __packed; struct be_cmd_req_get_hsw_config { @@ -2334,9 +2356,9 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, u8 mac_count, u32 domain); int be_cmd_set_mac(struct be_adapter *adapter, u8 *mac, int if_id, u32 dom); int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, u32 domain, - u16 intf_id, u16 hsw_mode); + u16 intf_id, u16 hsw_mode, u8 spoofchk); int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, u32 domain, - u16 intf_id, u8 *mode); + u16 intf_id, u8 *mode, bool *spoofchk); int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter); int be_cmd_set_fw_log_level(struct be_adapter *adapter, u32 level); int be_cmd_get_fw_log_level(struct be_adapter *adapter); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index b765c24625bf..675cbacef772 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -123,7 +123,6 @@ static const struct be_ethtool_stat et_stats[] = { {DRVSTAT_INFO(dma_map_errors)}, /* Number of packets dropped due to random early drop function */ {DRVSTAT_INFO(eth_red_drops)}, - {DRVSTAT_INFO(be_on_die_temperature)}, {DRVSTAT_INFO(rx_roce_bytes_lsd)}, {DRVSTAT_INFO(rx_roce_bytes_msd)}, {DRVSTAT_INFO(rx_roce_frames)}, @@ -368,6 +367,14 @@ static int be_set_coalesce(struct net_device *netdev, aic++; } + /* For Skyhawk, the EQD setting happens via EQ_DB when AIC is enabled. + * When AIC is disabled, persistently force set EQD value via the + * FW cmd, so that we don't have to calculate the delay multiplier + * encode value each time EQ_DB is rung + */ + if (!et->use_adaptive_rx_coalesce && skyhawk_chip(adapter)) + be_eqd_update(adapter, true); + return 0; } diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h index 48840889db62..c684bb32b487 100644 --- a/drivers/net/ethernet/emulex/benet/be_hw.h +++ b/drivers/net/ethernet/emulex/benet/be_hw.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -132,6 +132,18 @@ #define DB_EQ_NUM_POPPED_SHIFT (16) /* bits 16 - 28 */ /* Rearm bit */ #define DB_EQ_REARM_SHIFT (29) /* bit 29 */ +/* Rearm to interrupt delay encoding */ +#define DB_EQ_R2I_DLY_SHIFT (30) /* bits 30 - 31 */ + +/* Rearm to interrupt (R2I) delay multiplier encoding represents 3 different + * values configured in CEV_REARM2IRPT_DLY_MULT_CSR register. This value is + * programmed by host driver while ringing an EQ doorbell(EQ_DB) if a delay + * between rearming the EQ and next interrupt on this EQ is desired. + */ +#define R2I_DLY_ENC_0 0 /* No delay */ +#define R2I_DLY_ENC_1 1 /* maps to 160us EQ delay */ +#define R2I_DLY_ENC_2 2 /* maps to 96us EQ delay */ +#define R2I_DLY_ENC_3 3 /* maps to 48us EQ delay */ /********* Compl Q door bell *************/ #define DB_CQ_OFFSET 0x120 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a6dcbf850c1f..75696d4f9ed0 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -179,7 +179,7 @@ static void be_intr_set(struct be_adapter *adapter, bool enable) if (lancer_chip(adapter)) return; - if (adapter->eeh_error) + if (be_check_error(adapter, BE_ERROR_EEH)) return; status = be_cmd_intr_set(adapter, enable); @@ -191,6 +191,9 @@ static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted) { u32 val = 0; + if (be_check_error(adapter, BE_ERROR_HW)) + return; + val |= qid & DB_RQ_RING_ID_MASK; val |= posted << DB_RQ_NUM_POSTED_SHIFT; @@ -203,6 +206,9 @@ static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo, { u32 val = 0; + if (be_check_error(adapter, BE_ERROR_HW)) + return; + val |= txo->q.id & DB_TXULP_RING_ID_MASK; val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT; @@ -211,14 +217,15 @@ static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo, } static void be_eq_notify(struct be_adapter *adapter, u16 qid, - bool arm, bool clear_int, u16 num_popped) + bool arm, bool clear_int, u16 num_popped, + u32 eq_delay_mult_enc) { u32 val = 0; val |= qid & DB_EQ_RING_ID_MASK; val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT); - if (adapter->eeh_error) + if (be_check_error(adapter, BE_ERROR_HW)) return; if (arm) @@ -227,6 +234,7 @@ static void be_eq_notify(struct be_adapter *adapter, u16 qid, val |= 1 << DB_EQ_CLR_SHIFT; val |= 1 << DB_EQ_EVNT_SHIFT; val |= num_popped << DB_EQ_NUM_POPPED_SHIFT; + val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT; iowrite32(val, adapter->db + DB_EQ_OFFSET); } @@ -238,7 +246,7 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped) val |= ((qid & DB_CQ_RING_ID_EXT_MASK) << DB_CQ_RING_ID_EXT_MASK_SHIFT); - if (adapter->eeh_error) + if (be_check_error(adapter, BE_ERROR_HW)) return; if (arm) @@ -662,6 +670,8 @@ void be_link_status_update(struct be_adapter *adapter, u8 link_status) netif_carrier_on(netdev); else netif_carrier_off(netdev); + + netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down"); } static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb) @@ -810,6 +820,8 @@ static void wrb_fill_hdr(struct be_adapter *adapter, SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb)); SET_TX_WRB_HDR_BITS(len, hdr, skb->len); + SET_TX_WRB_HDR_BITS(mgmt, hdr, + BE_WRB_F_GET(wrb_params->features, OS2BMC)); } static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb, @@ -1146,6 +1158,130 @@ static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo) txo->pend_wrb_cnt = 0; } +/* OS2BMC related */ + +#define DHCP_CLIENT_PORT 68 +#define DHCP_SERVER_PORT 67 +#define NET_BIOS_PORT1 137 +#define NET_BIOS_PORT2 138 +#define DHCPV6_RAS_PORT 547 + +#define is_mc_allowed_on_bmc(adapter, eh) \ + (!is_multicast_filt_enabled(adapter) && \ + is_multicast_ether_addr(eh->h_dest) && \ + !is_broadcast_ether_addr(eh->h_dest)) + +#define is_bc_allowed_on_bmc(adapter, eh) \ + (!is_broadcast_filt_enabled(adapter) && \ + is_broadcast_ether_addr(eh->h_dest)) + +#define is_arp_allowed_on_bmc(adapter, skb) \ + (is_arp(skb) && is_arp_filt_enabled(adapter)) + +#define is_broadcast_packet(eh, adapter) \ + (is_multicast_ether_addr(eh->h_dest) && \ + !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast)) + +#define is_arp(skb) (skb->protocol == htons(ETH_P_ARP)) + +#define is_arp_filt_enabled(adapter) \ + (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP)) + +#define is_dhcp_client_filt_enabled(adapter) \ + (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT) + +#define is_dhcp_srvr_filt_enabled(adapter) \ + (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER) + +#define is_nbios_filt_enabled(adapter) \ + (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS) + +#define is_ipv6_na_filt_enabled(adapter) \ + (adapter->bmc_filt_mask & \ + BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER) + +#define is_ipv6_ra_filt_enabled(adapter) \ + (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA) + +#define is_ipv6_ras_filt_enabled(adapter) \ + (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS) + +#define is_broadcast_filt_enabled(adapter) \ + (adapter->bmc_filt_mask & BMC_FILT_BROADCAST) + +#define is_multicast_filt_enabled(adapter) \ + (adapter->bmc_filt_mask & BMC_FILT_MULTICAST) + +static bool be_send_pkt_to_bmc(struct be_adapter *adapter, + struct sk_buff **skb) +{ + struct ethhdr *eh = (struct ethhdr *)(*skb)->data; + bool os2bmc = false; + + if (!be_is_os2bmc_enabled(adapter)) + goto done; + + if (!is_multicast_ether_addr(eh->h_dest)) + goto done; + + if (is_mc_allowed_on_bmc(adapter, eh) || + is_bc_allowed_on_bmc(adapter, eh) || + is_arp_allowed_on_bmc(adapter, (*skb))) { + os2bmc = true; + goto done; + } + + if ((*skb)->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *hdr = ipv6_hdr((*skb)); + u8 nexthdr = hdr->nexthdr; + + if (nexthdr == IPPROTO_ICMPV6) { + struct icmp6hdr *icmp6 = icmp6_hdr((*skb)); + + switch (icmp6->icmp6_type) { + case NDISC_ROUTER_ADVERTISEMENT: + os2bmc = is_ipv6_ra_filt_enabled(adapter); + goto done; + case NDISC_NEIGHBOUR_ADVERTISEMENT: + os2bmc = is_ipv6_na_filt_enabled(adapter); + goto done; + default: + break; + } + } + } + + if (is_udp_pkt((*skb))) { + struct udphdr *udp = udp_hdr((*skb)); + + switch (udp->dest) { + case DHCP_CLIENT_PORT: + os2bmc = is_dhcp_client_filt_enabled(adapter); + goto done; + case DHCP_SERVER_PORT: + os2bmc = is_dhcp_srvr_filt_enabled(adapter); + goto done; + case NET_BIOS_PORT1: + case NET_BIOS_PORT2: + os2bmc = is_nbios_filt_enabled(adapter); + goto done; + case DHCPV6_RAS_PORT: + os2bmc = is_ipv6_ras_filt_enabled(adapter); + goto done; + default: + break; + } + } +done: + /* For packets over a vlan, which are destined + * to BMC, asic expects the vlan to be inline in the packet. + */ + if (os2bmc) + *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL); + + return os2bmc; +} + static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) { struct be_adapter *adapter = netdev_priv(netdev); @@ -1167,6 +1303,18 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) goto drop; } + /* if os2bmc is enabled and if the pkt is destined to bmc, + * enqueue the pkt a 2nd time with mgmt bit set. + */ + if (be_send_pkt_to_bmc(adapter, &skb)) { + BE_WRB_F_SET(wrb_params.features, OS2BMC, 1); + wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params); + if (unlikely(!wrb_cnt)) + goto drop; + else + skb_get(skb); + } + if (be_is_txq_full(txo)) { netif_stop_subqueue(netdev, q_idx); tx_stats(txo)->tx_stops++; @@ -1265,7 +1413,8 @@ static int be_vid_config(struct be_adapter *adapter) if (status) { dev_err(dev, "Setting HW VLAN filtering failed\n"); /* Set to VLAN promisc mode as setting VLAN filter failed */ - if (addl_status(status) == + if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS || + addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES) return be_set_vlan_promisc(adapter); } else if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) { @@ -1466,6 +1615,7 @@ static int be_get_vf_config(struct net_device *netdev, int vf, vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT; memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN); vi->linkstate = adapter->vf_cfg[vf].plink_tracking; + vi->spoofchk = adapter->vf_cfg[vf].spoofchk; return 0; } @@ -1478,7 +1628,7 @@ static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan) int status; /* Enable Transparent VLAN Tagging */ - status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0); + status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0); if (status) return status; @@ -1507,7 +1657,7 @@ static int be_clear_vf_tvt(struct be_adapter *adapter, int vf) /* Reset Transparent VLAN Tagging. */ status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1, - vf_cfg->if_handle, 0); + vf_cfg->if_handle, 0, 0); if (status) return status; @@ -1642,6 +1792,39 @@ static int be_set_vf_link_state(struct net_device *netdev, int vf, return 0; } +static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable) +{ + struct be_adapter *adapter = netdev_priv(netdev); + struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; + u8 spoofchk; + int status; + + if (!sriov_enabled(adapter)) + return -EPERM; + + if (vf >= adapter->num_vfs) + return -EINVAL; + + if (BEx_chip(adapter)) + return -EOPNOTSUPP; + + if (enable == vf_cfg->spoofchk) + return 0; + + spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK; + + status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle, + 0, spoofchk); + if (status) { + dev_err(&adapter->pdev->dev, + "Spoofchk change on VF %d failed: %#x\n", vf, status); + return be_cmd_status(status); + } + + vf_cfg->spoofchk = enable; + return 0; +} + static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts, ulong now) { @@ -1650,61 +1833,110 @@ static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts, aic->jiffies = now; } -static void be_eqd_update(struct be_adapter *adapter) +static int be_get_new_eqd(struct be_eq_obj *eqo) { - struct be_set_eqd set_eqd[MAX_EVT_QS]; - int eqd, i, num = 0, start; + struct be_adapter *adapter = eqo->adapter; + int eqd, start; struct be_aic_obj *aic; - struct be_eq_obj *eqo; struct be_rx_obj *rxo; struct be_tx_obj *txo; - u64 rx_pkts, tx_pkts; + u64 rx_pkts = 0, tx_pkts = 0; ulong now; u32 pps, delta; + int i; - for_all_evt_queues(adapter, eqo, i) { - aic = &adapter->aic_obj[eqo->idx]; - if (!aic->enable) { - if (aic->jiffies) - aic->jiffies = 0; - eqd = aic->et_eqd; - goto modify_eqd; - } + aic = &adapter->aic_obj[eqo->idx]; + if (!aic->enable) { + if (aic->jiffies) + aic->jiffies = 0; + eqd = aic->et_eqd; + return eqd; + } - rxo = &adapter->rx_obj[eqo->idx]; + for_all_rx_queues_on_eq(adapter, eqo, rxo, i) { do { start = u64_stats_fetch_begin_irq(&rxo->stats.sync); - rx_pkts = rxo->stats.rx_pkts; + rx_pkts += rxo->stats.rx_pkts; } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start)); + } - txo = &adapter->tx_obj[eqo->idx]; + for_all_tx_queues_on_eq(adapter, eqo, txo, i) { do { start = u64_stats_fetch_begin_irq(&txo->stats.sync); - tx_pkts = txo->stats.tx_reqs; + tx_pkts += txo->stats.tx_reqs; } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start)); + } - /* Skip, if wrapped around or first calculation */ - now = jiffies; - if (!aic->jiffies || time_before(now, aic->jiffies) || - rx_pkts < aic->rx_pkts_prev || - tx_pkts < aic->tx_reqs_prev) { - be_aic_update(aic, rx_pkts, tx_pkts, now); - continue; - } + /* Skip, if wrapped around or first calculation */ + now = jiffies; + if (!aic->jiffies || time_before(now, aic->jiffies) || + rx_pkts < aic->rx_pkts_prev || + tx_pkts < aic->tx_reqs_prev) { + be_aic_update(aic, rx_pkts, tx_pkts, now); + return aic->prev_eqd; + } - delta = jiffies_to_msecs(now - aic->jiffies); - pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) + - (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta); - eqd = (pps / 15000) << 2; + delta = jiffies_to_msecs(now - aic->jiffies); + if (delta == 0) + return aic->prev_eqd; - if (eqd < 8) - eqd = 0; - eqd = min_t(u32, eqd, aic->max_eqd); - eqd = max_t(u32, eqd, aic->min_eqd); + pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) + + (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta); + eqd = (pps / 15000) << 2; - be_aic_update(aic, rx_pkts, tx_pkts, now); -modify_eqd: - if (eqd != aic->prev_eqd) { + if (eqd < 8) + eqd = 0; + eqd = min_t(u32, eqd, aic->max_eqd); + eqd = max_t(u32, eqd, aic->min_eqd); + + be_aic_update(aic, rx_pkts, tx_pkts, now); + + return eqd; +} + +/* For Skyhawk-R only */ +static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo) +{ + struct be_adapter *adapter = eqo->adapter; + struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx]; + ulong now = jiffies; + int eqd; + u32 mult_enc; + + if (!aic->enable) + return 0; + + if (time_before_eq(now, aic->jiffies) || + jiffies_to_msecs(now - aic->jiffies) < 1) + eqd = aic->prev_eqd; + else + eqd = be_get_new_eqd(eqo); + + if (eqd > 100) + mult_enc = R2I_DLY_ENC_1; + else if (eqd > 60) + mult_enc = R2I_DLY_ENC_2; + else if (eqd > 20) + mult_enc = R2I_DLY_ENC_3; + else + mult_enc = R2I_DLY_ENC_0; + + aic->prev_eqd = eqd; + + return mult_enc; +} + +void be_eqd_update(struct be_adapter *adapter, bool force_update) +{ + struct be_set_eqd set_eqd[MAX_EVT_QS]; + struct be_aic_obj *aic; + struct be_eq_obj *eqo; + int i, num = 0, eqd; + + for_all_evt_queues(adapter, eqo, i) { + aic = &adapter->aic_obj[eqo->idx]; + eqd = be_get_new_eqd(eqo); + if (force_update || eqd != aic->prev_eqd) { set_eqd[num].delay_multiplier = (eqd * 65)/100; set_eqd[num].eq_id = eqo->q.id; aic->prev_eqd = eqd; @@ -2212,7 +2444,7 @@ static void be_eq_clean(struct be_eq_obj *eqo) { int num = events_get(eqo); - be_eq_notify(eqo->adapter, eqo->q.id, false, true, num); + be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0); } static void be_rx_cq_clean(struct be_rx_obj *rxo) @@ -2236,7 +2468,9 @@ static void be_rx_cq_clean(struct be_rx_obj *rxo) if (lancer_chip(adapter)) break; - if (flush_wait++ > 10 || be_hw_error(adapter)) { + if (flush_wait++ > 50 || + be_check_error(adapter, + BE_ERROR_HW)) { dev_warn(&adapter->pdev->dev, "did not receive flush compl\n"); break; @@ -2297,7 +2531,8 @@ static void be_tx_compl_clean(struct be_adapter *adapter) pending_txqs--; } - if (pending_txqs == 0 || ++timeo > 10 || be_hw_error(adapter)) + if (pending_txqs == 0 || ++timeo > 10 || + be_check_error(adapter, BE_ERROR_HW)) break; mdelay(1); @@ -2573,7 +2808,7 @@ static irqreturn_t be_intx(int irq, void *dev) if (num_evts) eqo->spurious_intr = 0; } - be_eq_notify(adapter, eqo->q.id, false, true, num_evts); + be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0); /* Return IRQ_HANDLED only for the the first spurious intr * after a valid intr to stop the kernel from branding @@ -2589,7 +2824,7 @@ static irqreturn_t be_msix(int irq, void *dev) { struct be_eq_obj *eqo = dev; - be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0); + be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0); napi_schedule(&eqo->napi); return IRQ_HANDLED; } @@ -2838,6 +3073,7 @@ int be_poll(struct napi_struct *napi, int budget) int max_work = 0, work, i, num_evts; struct be_rx_obj *rxo; struct be_tx_obj *txo; + u32 mult_enc = 0; num_evts = events_get(eqo); @@ -2863,10 +3099,18 @@ int be_poll(struct napi_struct *napi, int budget) if (max_work < budget) { napi_complete(napi); - be_eq_notify(adapter, eqo->q.id, true, false, num_evts); + + /* Skyhawk EQ_DB has a provision to set the rearm to interrupt + * delay via a delay multiplier encoding value + */ + if (skyhawk_chip(adapter)) + mult_enc = be_get_eq_delay_mult_enc(eqo); + + be_eq_notify(adapter, eqo->q.id, true, false, num_evts, + mult_enc); } else { /* As we'll continue in polling mode, count and clear events */ - be_eq_notify(adapter, eqo->q.id, false, false, num_evts); + be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0); } return max_work; } @@ -2898,22 +3142,19 @@ void be_detect_error(struct be_adapter *adapter) u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0; u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0; u32 i; - bool error_detected = false; struct device *dev = &adapter->pdev->dev; - struct net_device *netdev = adapter->netdev; - if (be_hw_error(adapter)) + if (be_check_error(adapter, BE_ERROR_HW)) return; if (lancer_chip(adapter)) { sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); if (sliport_status & SLIPORT_STATUS_ERR_MASK) { + be_set_error(adapter, BE_ERROR_UE); sliport_err1 = ioread32(adapter->db + SLIPORT_ERROR1_OFFSET); sliport_err2 = ioread32(adapter->db + SLIPORT_ERROR2_OFFSET); - adapter->hw_error = true; - error_detected = true; /* Do not log error messages if its a FW reset */ if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 && sliport_err2 == SLIPORT_ERROR_FW_RESET2) { @@ -2945,12 +3186,12 @@ void be_detect_error(struct be_adapter *adapter) */ if (ue_lo || ue_hi) { - error_detected = true; dev_err(dev, "Unrecoverable Error detected in the adapter"); dev_err(dev, "Please reboot server to recover"); if (skyhawk_chip(adapter)) - adapter->hw_error = true; + be_set_error(adapter, BE_ERROR_UE); + for (i = 0; ue_lo; ue_lo >>= 1, i++) { if (ue_lo & 1) dev_err(dev, "UE: %s bit set\n", @@ -2963,8 +3204,6 @@ void be_detect_error(struct be_adapter *adapter) } } } - if (error_detected) - netif_carrier_off(netdev); } static void be_msix_disable(struct be_adapter *adapter) @@ -3015,7 +3254,7 @@ fail: dev_warn(dev, "MSIx enable failed\n"); /* INTx is not supported in VFs, so fail probe if enable_msix fails */ - if (!be_physfn(adapter)) + if (be_virtfn(adapter)) return num_vec; return 0; } @@ -3062,7 +3301,7 @@ static int be_irq_register(struct be_adapter *adapter) if (status == 0) goto done; /* INTx is not supported for VF */ - if (!be_physfn(adapter)) + if (be_virtfn(adapter)) return status; } @@ -3229,9 +3468,12 @@ static int be_rx_qs_create(struct be_adapter *adapter) memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN); - /* First time posting */ + /* Post 1 less than RXQ-len to avoid head being equal to tail, + * which is a queue empty condition + */ for_all_rx_queues(adapter, rxo, i) - be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST); + be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1); + return 0; } @@ -3263,7 +3505,7 @@ static int be_open(struct net_device *netdev) for_all_evt_queues(adapter, eqo, i) { napi_enable(&eqo->napi); be_enable_busy_poll(eqo); - be_eq_notify(adapter, eqo->q.id, true, true, 0); + be_eq_notify(adapter, eqo->q.id, true, true, 0, 0); } adapter->flags |= BE_FLAGS_NAPI_ENABLED; @@ -3563,7 +3805,7 @@ static int be_vfs_if_create(struct be_adapter *adapter) /* If a FW profile exists, then cap_flags are updated */ cap_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | - BE_IF_FLAGS_MULTICAST; + BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS; for_all_vfs(adapter, vf_cfg, vf) { if (!BE3_chip(adapter)) { @@ -3610,6 +3852,7 @@ static int be_vf_setup(struct be_adapter *adapter) struct device *dev = &adapter->pdev->dev; struct be_vf_cfg *vf_cfg; int status, old_vfs, vf; + bool spoofchk; old_vfs = pci_num_vf(adapter->pdev); @@ -3657,6 +3900,12 @@ static int be_vf_setup(struct be_adapter *adapter) if (!old_vfs) be_cmd_config_qos(adapter, 0, 0, vf + 1); + status = be_cmd_get_hsw_config(adapter, NULL, vf + 1, + vf_cfg->if_handle, NULL, + &spoofchk); + if (!status) + vf_cfg->spoofchk = spoofchk; + if (!old_vfs) { be_cmd_enable_vf(adapter, vf + 1); be_cmd_set_logical_link_config(adapter, @@ -3733,8 +3982,9 @@ static void BEx_get_resources(struct be_adapter *adapter, * *only* if it is RSS-capable. */ if (BE2_chip(adapter) || use_sriov || (adapter->port_num > 1) || - !be_physfn(adapter) || (be_is_mc(adapter) && - !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) { + be_virtfn(adapter) || + (be_is_mc(adapter) && + !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) { res->max_tx_qs = 1; } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) { struct be_resources super_nic_res = {0}; @@ -4075,7 +4325,7 @@ static int be_func_init(struct be_adapter *adapter) msleep(100); /* We can clear all errors when function reset succeeds */ - be_clear_all_error(adapter); + be_clear_error(adapter, BE_CLEAR_ALL); } /* Tell FW we're ready to fire cmds */ @@ -4182,7 +4432,7 @@ static void be_netpoll(struct net_device *netdev) int i; for_all_evt_queues(adapter, eqo, i) { - be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0); + be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0); napi_schedule(&eqo->napi); } } @@ -4666,14 +4916,11 @@ static int lancer_fw_download(struct be_adapter *adapter, return 0; } -#define BE2_UFI 2 -#define BE3_UFI 3 -#define BE3R_UFI 10 -#define SH_UFI 4 -#define SH_P2_UFI 11 - -static int be_get_ufi_type(struct be_adapter *adapter, - struct flash_file_hdr_g3 *fhdr) +/* Check if the flash image file is compatible with the adapter that + * is being flashed. + */ +static bool be_check_ufi_compatibility(struct be_adapter *adapter, + struct flash_file_hdr_g3 *fhdr) { if (!fhdr) { dev_err(&adapter->pdev->dev, "Invalid FW UFI file"); @@ -4685,43 +4932,22 @@ static int be_get_ufi_type(struct be_adapter *adapter, */ switch (fhdr->build[0]) { case BLD_STR_UFI_TYPE_SH: - return (fhdr->asic_type_rev == ASIC_REV_P2) ? SH_P2_UFI : - SH_UFI; + if (!skyhawk_chip(adapter)) + return false; + break; case BLD_STR_UFI_TYPE_BE3: - return (fhdr->asic_type_rev == ASIC_REV_B0) ? BE3R_UFI : - BE3_UFI; + if (!BE3_chip(adapter)) + return false; + break; case BLD_STR_UFI_TYPE_BE2: - return BE2_UFI; - default: - return -1; - } -} - -/* Check if the flash image file is compatible with the adapter that - * is being flashed. - * BE3 chips with asic-rev B0 must be flashed only with BE3R_UFI type. - * Skyhawk chips with asic-rev P2 must be flashed only with SH_P2_UFI type. - */ -static bool be_check_ufi_compatibility(struct be_adapter *adapter, - struct flash_file_hdr_g3 *fhdr) -{ - int ufi_type = be_get_ufi_type(adapter, fhdr); - - switch (ufi_type) { - case SH_P2_UFI: - return skyhawk_chip(adapter); - case SH_UFI: - return (skyhawk_chip(adapter) && - adapter->asic_rev < ASIC_REV_P2); - case BE3R_UFI: - return BE3_chip(adapter); - case BE3_UFI: - return (BE3_chip(adapter) && adapter->asic_rev < ASIC_REV_B0); - case BE2_UFI: - return BE2_chip(adapter); + if (!BE2_chip(adapter)) + return false; + break; default: return false; } + + return (fhdr->asic_type_rev >= adapter->asic_rev); } static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw) @@ -4829,7 +5055,7 @@ static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, adapter->if_handle, mode == BRIDGE_MODE_VEPA ? PORT_FWD_TYPE_VEPA : - PORT_FWD_TYPE_VEB); + PORT_FWD_TYPE_VEB, 0); if (status) goto err; @@ -4861,7 +5087,8 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, hsw_mode = PORT_FWD_TYPE_VEB; } else { status = be_cmd_get_hsw_config(adapter, NULL, 0, - adapter->if_handle, &hsw_mode); + adapter->if_handle, &hsw_mode, + NULL); if (status) return 0; } @@ -5014,6 +5241,7 @@ static const struct net_device_ops be_netdev_ops = { .ndo_set_vf_rate = be_set_vf_tx_rate, .ndo_get_vf_config = be_get_vf_config, .ndo_set_vf_link_state = be_set_vf_link_state, + .ndo_set_vf_spoofchk = be_set_vf_spoofchk, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = be_netpoll, #endif @@ -5118,7 +5346,7 @@ static void be_err_detection_task(struct work_struct *work) be_detect_error(adapter); - if (adapter->hw_error) { + if (be_check_error(adapter, BE_ERROR_HW)) { be_cleanup(adapter); /* As of now error recovery support is in Lancer only */ @@ -5182,7 +5410,9 @@ static void be_worker(struct work_struct *work) be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST); } - be_eqd_update(adapter); + /* EQ-delay update for Skyhawk is done while notifying EQ */ + if (!skyhawk_chip(adapter)) + be_eqd_update(adapter, false); if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP) be_log_sfp_info(adapter); @@ -5202,7 +5432,7 @@ static void be_unmap_pci_bars(struct be_adapter *adapter) static int db_bar(struct be_adapter *adapter) { - if (lancer_chip(adapter) || !be_physfn(adapter)) + if (lancer_chip(adapter) || be_virtfn(adapter)) return 0; else return 4; @@ -5382,6 +5612,30 @@ static void be_remove(struct pci_dev *pdev) free_netdev(adapter->netdev); } +static ssize_t be_hwmon_show_temp(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct be_adapter *adapter = dev_get_drvdata(dev); + + /* Unit: millidegree Celsius */ + if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP) + return -EIO; + else + return sprintf(buf, "%u\n", + adapter->hwmon_info.be_on_die_temp * 1000); +} + +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, + be_hwmon_show_temp, NULL, 1); + +static struct attribute *be_hwmon_attrs[] = { + &sensor_dev_attr_temp1_input.dev_attr.attr, + NULL +}; + +ATTRIBUTE_GROUPS(be_hwmon); + static char *mc_name(struct be_adapter *adapter) { char *str = ""; /* default */ @@ -5501,6 +5755,16 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) be_schedule_err_detection(adapter); + /* On Die temperature not supported for VF. */ + if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) { + adapter->hwmon_info.hwmon_dev = + devm_hwmon_device_register_with_groups(&pdev->dev, + DRV_NAME, + adapter, + be_hwmon_groups); + adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP; + } + dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev), func_name(adapter), mc_name(adapter), adapter->port_name); @@ -5593,8 +5857,8 @@ static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev, dev_err(&adapter->pdev->dev, "EEH error detected\n"); - if (!adapter->eeh_error) { - adapter->eeh_error = true; + if (!be_check_error(adapter, BE_ERROR_EEH)) { + be_set_error(adapter, BE_ERROR_EEH); be_cancel_err_detection(adapter); @@ -5641,7 +5905,7 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev) return PCI_ERS_RESULT_DISCONNECT; pci_cleanup_aer_uncorrect_error_status(pdev); - be_clear_all_error(adapter); + be_clear_error(adapter, BE_CLEAR_ALL); return PCI_ERS_RESULT_RECOVERED; } diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c index 132866433a25..60368207bf58 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.c +++ b/drivers/net/ethernet/emulex/benet/be_roce.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h index e6f7eb1a7d87..cde6ef905ec4 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.h +++ b/drivers/net/ethernet/emulex/benet/be_roce.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 66d47e448e4d..bf4cf3fbb5f2 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2118,6 +2118,82 @@ static void fec_enet_get_drvinfo(struct net_device *ndev, strlcpy(info->bus_info, dev_name(&ndev->dev), sizeof(info->bus_info)); } +static int fec_enet_get_regs_len(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct resource *r; + int s = 0; + + r = platform_get_resource(fep->pdev, IORESOURCE_MEM, 0); + if (r) + s = resource_size(r); + + return s; +} + +/* List of registers that can be safety be read to dump them with ethtool */ +#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ + defined(CONFIG_M520x) || defined(CONFIG_M532x) || \ + defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28) +static u32 fec_enet_register_offset[] = { + FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0, + FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL, + FEC_X_CNTRL, FEC_ADDR_LOW, FEC_ADDR_HIGH, FEC_OPD, FEC_TXIC0, FEC_TXIC1, + FEC_TXIC2, FEC_RXIC0, FEC_RXIC1, FEC_RXIC2, FEC_HASH_TABLE_HIGH, + FEC_HASH_TABLE_LOW, FEC_GRP_HASH_TABLE_HIGH, FEC_GRP_HASH_TABLE_LOW, + FEC_X_WMRK, FEC_R_BOUND, FEC_R_FSTART, FEC_R_DES_START_1, + FEC_X_DES_START_1, FEC_R_BUFF_SIZE_1, FEC_R_DES_START_2, + FEC_X_DES_START_2, FEC_R_BUFF_SIZE_2, FEC_R_DES_START_0, + FEC_X_DES_START_0, FEC_R_BUFF_SIZE_0, FEC_R_FIFO_RSFL, FEC_R_FIFO_RSEM, + FEC_R_FIFO_RAEM, FEC_R_FIFO_RAFL, FEC_RACC, FEC_RCMR_1, FEC_RCMR_2, + FEC_DMA_CFG_1, FEC_DMA_CFG_2, FEC_R_DES_ACTIVE_1, FEC_X_DES_ACTIVE_1, + FEC_R_DES_ACTIVE_2, FEC_X_DES_ACTIVE_2, FEC_QOS_SCHEME, + RMON_T_DROP, RMON_T_PACKETS, RMON_T_BC_PKT, RMON_T_MC_PKT, + RMON_T_CRC_ALIGN, RMON_T_UNDERSIZE, RMON_T_OVERSIZE, RMON_T_FRAG, + RMON_T_JAB, RMON_T_COL, RMON_T_P64, RMON_T_P65TO127, RMON_T_P128TO255, + RMON_T_P256TO511, RMON_T_P512TO1023, RMON_T_P1024TO2047, + RMON_T_P_GTE2048, RMON_T_OCTETS, + IEEE_T_DROP, IEEE_T_FRAME_OK, IEEE_T_1COL, IEEE_T_MCOL, IEEE_T_DEF, + IEEE_T_LCOL, IEEE_T_EXCOL, IEEE_T_MACERR, IEEE_T_CSERR, IEEE_T_SQE, + IEEE_T_FDXFC, IEEE_T_OCTETS_OK, + RMON_R_PACKETS, RMON_R_BC_PKT, RMON_R_MC_PKT, RMON_R_CRC_ALIGN, + RMON_R_UNDERSIZE, RMON_R_OVERSIZE, RMON_R_FRAG, RMON_R_JAB, + RMON_R_RESVD_O, RMON_R_P64, RMON_R_P65TO127, RMON_R_P128TO255, + RMON_R_P256TO511, RMON_R_P512TO1023, RMON_R_P1024TO2047, + RMON_R_P_GTE2048, RMON_R_OCTETS, + IEEE_R_DROP, IEEE_R_FRAME_OK, IEEE_R_CRC, IEEE_R_ALIGN, IEEE_R_MACERR, + IEEE_R_FDXFC, IEEE_R_OCTETS_OK +}; +#else +static u32 fec_enet_register_offset[] = { + FEC_ECNTRL, FEC_IEVENT, FEC_IMASK, FEC_IVEC, FEC_R_DES_ACTIVE_0, + FEC_R_DES_ACTIVE_1, FEC_R_DES_ACTIVE_2, FEC_X_DES_ACTIVE_0, + FEC_X_DES_ACTIVE_1, FEC_X_DES_ACTIVE_2, FEC_MII_DATA, FEC_MII_SPEED, + FEC_R_BOUND, FEC_R_FSTART, FEC_X_WMRK, FEC_X_FSTART, FEC_R_CNTRL, + FEC_MAX_FRM_LEN, FEC_X_CNTRL, FEC_ADDR_LOW, FEC_ADDR_HIGH, + FEC_GRP_HASH_TABLE_HIGH, FEC_GRP_HASH_TABLE_LOW, FEC_R_DES_START_0, + FEC_R_DES_START_1, FEC_R_DES_START_2, FEC_X_DES_START_0, + FEC_X_DES_START_1, FEC_X_DES_START_2, FEC_R_BUFF_SIZE_0, + FEC_R_BUFF_SIZE_1, FEC_R_BUFF_SIZE_2 +}; +#endif + +static void fec_enet_get_regs(struct net_device *ndev, + struct ethtool_regs *regs, void *regbuf) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + u32 __iomem *theregs = (u32 __iomem *)fep->hwp; + u32 *buf = (u32 *)regbuf; + u32 i, off; + + memset(buf, 0, regs->len); + + for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) { + off = fec_enet_register_offset[i] / 4; + buf[off] = readl(&theregs[off]); + } +} + static int fec_enet_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) { @@ -2515,6 +2591,8 @@ static const struct ethtool_ops fec_enet_ethtool_ops = { .get_settings = fec_enet_get_settings, .set_settings = fec_enet_set_settings, .get_drvinfo = fec_enet_get_drvinfo, + .get_regs_len = fec_enet_get_regs_len, + .get_regs = fec_enet_get_regs, .nway_reset = fec_enet_nway_reset, .get_link = ethtool_op_get_link, .get_coalesce = fec_enet_get_coalesce, diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 4ee080d49bc0..ff875028fdff 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -516,6 +516,15 @@ static struct net_device_stats *gfar_get_stats(struct net_device *dev) return &dev->stats; } +static int gfar_set_mac_addr(struct net_device *dev, void *p) +{ + eth_mac_addr(dev, p); + + gfar_set_mac_for_addr(dev, 0, dev->dev_addr); + + return 0; +} + static const struct net_device_ops gfar_netdev_ops = { .ndo_open = gfar_enet_open, .ndo_start_xmit = gfar_start_xmit, @@ -526,7 +535,7 @@ static const struct net_device_ops gfar_netdev_ops = { .ndo_tx_timeout = gfar_timeout, .ndo_do_ioctl = gfar_ioctl, .ndo_get_stats = gfar_get_stats, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = gfar_set_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = gfar_netpoll, @@ -1411,6 +1420,8 @@ static int gfar_probe(struct platform_device *ofdev) dev->features |= NETIF_F_HW_VLAN_CTAG_RX; } + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + gfar_init_addr_hash_table(priv); /* Insert receive time stamps into padding alignment bytes */ @@ -2254,7 +2265,6 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) int i, rq = 0; int do_tstamp, do_csum, do_vlan; u32 bufaddr; - unsigned long flags; unsigned int nr_frags, nr_txbds, bytes_sent, fcb_len = 0; rq = skb->queue_mapping; @@ -2434,19 +2444,6 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_sent_queue(txq, bytes_sent); - /* We can work in parallel with gfar_clean_tx_ring(), except - * when modifying num_txbdfree. Note that we didn't grab the lock - * when we were reading the num_txbdfree and checking for available - * space, that's because outside of this function it can only grow, - * and once we've got needed space, it cannot suddenly disappear. - * - * The lock also protects us from gfar_error(), which can modify - * regs->tstat and thus retrigger the transfers, which is why we - * also must grab the lock before setting ready bit for the first - * to be transmitted BD. - */ - spin_lock_irqsave(&tx_queue->txlock, flags); - gfar_wmb(); txbdp_start->lstatus = cpu_to_be32(lstatus); @@ -2463,8 +2460,15 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_queue->cur_tx = next_txbd(txbdp, base, tx_queue->tx_ring_size); + /* We can work in parallel with gfar_clean_tx_ring(), except + * when modifying num_txbdfree. Note that we didn't grab the lock + * when we were reading the num_txbdfree and checking for available + * space, that's because outside of this function it can only grow. + */ + spin_lock_bh(&tx_queue->txlock); /* reduce TxBD free count */ tx_queue->num_txbdfree -= (nr_txbds); + spin_unlock_bh(&tx_queue->txlock); /* If the next BD still needs to be cleaned up, then the bds * are full. We need to tell the kernel to stop sending us stuff. @@ -2478,9 +2482,6 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Tell the DMA to go go go */ gfar_write(®s->tstat, TSTAT_CLEAR_THALT >> tx_queue->qindex); - /* Unlock priv */ - spin_unlock_irqrestore(&tx_queue->txlock, flags); - return NETDEV_TX_OK; dma_map_err: @@ -2622,7 +2623,6 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) skb_dirtytx = tx_queue->skb_dirtytx; while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) { - unsigned long flags; frags = skb_shinfo(skb)->nr_frags; @@ -2686,9 +2686,9 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) TX_RING_MOD_MASK(tx_ring_size); howmany++; - spin_lock_irqsave(&tx_queue->txlock, flags); + spin_lock(&tx_queue->txlock); tx_queue->num_txbdfree += nr_txbds; - spin_unlock_irqrestore(&tx_queue->txlock, flags); + spin_unlock(&tx_queue->txlock); } /* If we freed a buffer, we can restart transmission, if necessary */ @@ -3411,21 +3411,12 @@ static irqreturn_t gfar_error(int irq, void *grp_id) if (events & IEVENT_CRL) dev->stats.tx_aborted_errors++; if (events & IEVENT_XFUN) { - unsigned long flags; - netif_dbg(priv, tx_err, dev, "TX FIFO underrun, packet dropped\n"); dev->stats.tx_dropped++; atomic64_inc(&priv->extra_stats.tx_underrun); - local_irq_save(flags); - lock_tx_qs(priv); - - /* Reactivate the Tx Queues */ - gfar_write(®s->tstat, gfargrp->tstat); - - unlock_tx_qs(priv); - local_irq_restore(flags); + schedule_work(&priv->reset_task); } netif_dbg(priv, tx_err, dev, "Transmit Error\n"); } diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 3b39fdddeb57..d49bee38cd31 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -798,7 +798,7 @@ static void hip04_free_ring(struct net_device *ndev, struct device *d) for (i = 0; i < RX_DESC_NUM; i++) if (priv->rx_buf[i]) - put_page(virt_to_head_page(priv->rx_buf[i])); + skb_free_frag(priv->rx_buf[i]); for (i = 0; i < TX_DESC_NUM; i++) if (priv->tx_skb[i]) diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 0ffdcd381fdd..a5e077eac99a 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -500,7 +500,6 @@ static int hix5hd2_rx(struct net_device *dev, int limit) napi_gro_receive(&priv->napi, skb); dev->stats.rx_packets++; dev->stats.rx_bytes += skb->len; - dev->last_rx = jiffies; next: pos = dma_ring_incr(pos, RX_DESC_NUM); } diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 18134766a114..29bbb628d712 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -58,7 +58,7 @@ static struct kobj_type ktype_veth_pool; static const char ibmveth_driver_name[] = "ibmveth"; static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver"; -#define ibmveth_driver_version "1.04" +#define ibmveth_driver_version "1.05" MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>"); MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver"); @@ -100,6 +100,8 @@ struct ibmveth_stat ibmveth_stats[] = { { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed) }, { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support) }, { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support) }, + { "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets) }, + { "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets) } }; /* simple methods of getting data from the current rxq entry */ @@ -852,6 +854,10 @@ static int ibmveth_set_features(struct net_device *dev, struct ibmveth_adapter *adapter = netdev_priv(dev); int rx_csum = !!(features & NETIF_F_RXCSUM); int rc; + netdev_features_t changed = features ^ dev->features; + + if (features & NETIF_F_TSO & changed) + netdev_info(dev, "TSO feature requires all partitions to have updated driver"); if (rx_csum == adapter->rx_csum) return 0; @@ -1035,6 +1041,15 @@ retry_bounce: descs[i+1].fields.address = dma_addr; } + if (skb_is_gso(skb) && !skb_is_gso_v6(skb)) { + /* Put -1 in the IP checksum to tell phyp it + * is a largesend packet and put the mss in the TCP checksum. + */ + ip_hdr(skb)->check = 0xffff; + tcp_hdr(skb)->check = cpu_to_be16(skb_shinfo(skb)->gso_size); + adapter->tx_large_packets++; + } + if (ibmveth_send(adapter, descs)) { adapter->tx_send_failed++; netdev->stats.tx_dropped++; @@ -1080,6 +1095,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) struct net_device *netdev = adapter->netdev; int frames_processed = 0; unsigned long lpar_rc; + struct iphdr *iph; restart_poll: while (frames_processed < budget) { @@ -1122,10 +1138,23 @@ restart_poll: skb_put(skb, length); skb->protocol = eth_type_trans(skb, netdev); - if (csum_good) + if (csum_good) { skb->ip_summed = CHECKSUM_UNNECESSARY; + if (be16_to_cpu(skb->protocol) == ETH_P_IP) { + iph = (struct iphdr *)skb->data; + + /* If the IP checksum is not offloaded and if the packet + * is large send, the checksum must be rebuilt. + */ + if (iph->check == 0xffff) { + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + adapter->rx_large_packets++; + } + } + } - netif_receive_skb(skb); /* send it up */ + napi_gro_receive(napi, skb); /* send it up */ netdev->stats.rx_packets++; netdev->stats.rx_bytes += length; @@ -1422,8 +1451,14 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; netdev->features |= netdev->hw_features; + /* TSO is disabled by default */ + netdev->hw_features |= NETIF_F_TSO; + memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN); + if (firmware_has_feature(FW_FEATURE_CMO)) + memcpy(pool_count, pool_count_cmo, sizeof(pool_count)); + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { struct kobject *kobj = &adapter->rx_buff_pool[i].kobj; int error; diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 1f37499d4398..41dedb1fb2ae 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -104,7 +104,8 @@ static inline long h_illan_attributes(unsigned long unit_address, static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; static int pool_count[] = { 256, 512, 256, 256, 256 }; -static int pool_active[] = { 1, 1, 0, 0, 0}; +static int pool_count_cmo[] = { 256, 512, 256, 256, 64 }; +static int pool_active[] = { 1, 1, 0, 0, 1}; #define IBM_VETH_INVALID_MAP ((u16)0xffff) @@ -160,6 +161,8 @@ struct ibmveth_adapter { u64 rx_no_buffer; u64 tx_map_failed; u64 tx_send_failed; + u64 tx_large_packets; + u64 rx_large_packets; }; /* diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 1a450f4b6b12..35357ae2fe75 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -874,7 +874,7 @@ static int e100_exec_cb(struct nic *nic, struct sk_buff *skb, { struct cb *cb; unsigned long flags; - int err = 0; + int err; spin_lock_irqsave(&nic->cb_lock, flags); diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 983eb4e6f7aa..74dc15055971 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -2079,11 +2079,6 @@ static void *e1000_alloc_frag(const struct e1000_adapter *a) return data; } -static void e1000_free_frag(const void *data) -{ - put_page(virt_to_head_page(data)); -} - /** * e1000_clean_rx_ring - Free Rx Buffers per Queue * @adapter: board private structure @@ -2107,7 +2102,7 @@ static void e1000_clean_rx_ring(struct e1000_adapter *adapter, adapter->rx_buffer_len, DMA_FROM_DEVICE); if (buffer_info->rxbuf.data) { - e1000_free_frag(buffer_info->rxbuf.data); + skb_free_frag(buffer_info->rxbuf.data); buffer_info->rxbuf.data = NULL; } } else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq) { @@ -4594,28 +4589,28 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, data = e1000_alloc_frag(adapter); /* Failed allocation, critical failure */ if (!data) { - e1000_free_frag(olddata); + skb_free_frag(olddata); adapter->alloc_rx_buff_failed++; break; } if (!e1000_check_64k_bound(adapter, data, bufsz)) { /* give up */ - e1000_free_frag(data); - e1000_free_frag(olddata); + skb_free_frag(data); + skb_free_frag(olddata); adapter->alloc_rx_buff_failed++; break; } /* Use new allocation */ - e1000_free_frag(olddata); + skb_free_frag(olddata); } buffer_info->dma = dma_map_single(&pdev->dev, data, adapter->rx_buffer_len, DMA_FROM_DEVICE); if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { - e1000_free_frag(data); + skb_free_frag(data); buffer_info->dma = 0; adapter->alloc_rx_buff_failed++; break; @@ -4637,7 +4632,7 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, adapter->rx_buffer_len, DMA_FROM_DEVICE); - e1000_free_frag(data); + skb_free_frag(data); buffer_info->rxbuf.data = NULL; buffer_info->dma = 0; diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index dc79ed85030b..32e77755a9c6 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -2010,7 +2010,7 @@ const struct e1000_info e1000_82573_info = { .flags2 = FLAG2_DISABLE_ASPM_L1 | FLAG2_DISABLE_ASPM_L0S, .pba = 20, - .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, + .max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN, .get_variants = e1000_get_variants_82571, .mac_ops = &e82571_mac_ops, .phy_ops = &e82_phy_ops_m88, diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 9d81c0317433..e18443a00bdb 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1015,7 +1015,7 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) u16 max_snoop, max_nosnoop; u16 max_ltr_enc; /* max LTR latency encoded */ s64 lat_ns; /* latency (ns) */ - s64 value; + u64 value; u32 rxa; if (!hw->adapter->max_frame_size) { @@ -1042,12 +1042,13 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) */ lat_ns = ((s64)rxa * 1024 - (2 * (s64)hw->adapter->max_frame_size)) * 8 * 1000; - if (lat_ns < 0) - lat_ns = 0; - else - do_div(lat_ns, speed); + if (lat_ns < 0) { + value = 0; + } else { + value = lat_ns; + do_div(value, speed); + } - value = lat_ns; while (value > PCI_LTR_VALUE_MASK) { scale++; value = DIV_ROUND_UP(value, (1 << 5)); @@ -1563,7 +1564,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) ((adapter->hw.mac.type >= e1000_pch2lan) && (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LSECCK)))) { adapter->flags &= ~FLAG_HAS_JUMBO_FRAMES; - adapter->max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN; + adapter->max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; hw->mac.ops.blink_led = NULL; } @@ -5681,7 +5682,7 @@ const struct e1000_info e1000_ich8_info = { | FLAG_HAS_FLASH | FLAG_APME_IN_WUC, .pba = 8, - .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, + .max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5754,7 +5755,7 @@ const struct e1000_info e1000_pch2_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5774,7 +5775,7 @@ const struct e1000_info e1000_pch_lpt_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5794,7 +5795,7 @@ const struct e1000_info e1000_pch_spt_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index c509a5c900f5..7dd2c11c3f61 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3807,7 +3807,7 @@ void e1000e_reset(struct e1000_adapter *adapter) /* reset Packet Buffer Allocation to default */ ew32(PBA, pba); - if (adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) { + if (adapter->max_frame_size > (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) { /* To maintain wire speed transmits, the Tx FIFO should be * large enough to accommodate two full transmit packets, * rounded up to the next 1KB and expressed in KB. Likewise, @@ -4196,9 +4196,9 @@ static int e1000_sw_init(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; - adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN; + adapter->rx_buffer_len = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; adapter->rx_ps_bsize0 = 128; - adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + adapter->max_frame_size = netdev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; adapter->tx_ring_count = E1000_DEFAULT_TXD; adapter->rx_ring_count = E1000_DEFAULT_RXD; @@ -5781,17 +5781,17 @@ struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, static int e1000_change_mtu(struct net_device *netdev, int new_mtu) { struct e1000_adapter *adapter = netdev_priv(netdev); - int max_frame = new_mtu + VLAN_HLEN + ETH_HLEN + ETH_FCS_LEN; + int max_frame = new_mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; /* Jumbo frame support */ - if ((max_frame > ETH_FRAME_LEN + ETH_FCS_LEN) && + if ((max_frame > (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) && !(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) { e_err("Jumbo Frames not supported.\n"); return -EINVAL; } /* Supported frame sizes */ - if ((new_mtu < ETH_ZLEN + ETH_FCS_LEN + VLAN_HLEN) || + if ((new_mtu < (VLAN_ETH_ZLEN + ETH_FCS_LEN)) || (max_frame > adapter->max_hw_frame_size)) { e_err("Unsupported MTU setting\n"); return -EINVAL; @@ -5831,10 +5831,8 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) adapter->rx_buffer_len = 4096; /* adjust allocation if LPE protects us, and we aren't using SBP */ - if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) || - (max_frame == ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN)) - adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN - + ETH_FCS_LEN; + if (max_frame <= (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) + adapter->rx_buffer_len = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; if (netif_running(netdev)) e1000e_up(adapter); @@ -6678,6 +6676,19 @@ static void e1000_eeprom_checks(struct e1000_adapter *adapter) } } +static netdev_features_t e1000_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct e1000_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + /* Jumbo frame workaround on 82579 and newer requires CRC be stripped */ + if ((hw->mac.type >= e1000_pch2lan) && (netdev->mtu > ETH_DATA_LEN)) + features &= ~NETIF_F_RXFCS; + + return features; +} + static int e1000_set_features(struct net_device *netdev, netdev_features_t features) { @@ -6734,6 +6745,7 @@ static const struct net_device_ops e1000e_netdev_ops = { .ndo_poll_controller = e1000_netpoll, #endif .ndo_set_features = e1000_set_features, + .ndo_fix_features = e1000_fix_features, }; /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 4bd3a80aba82..0b4a7be2c7d2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1688,7 +1688,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) skb_mark_napi_id(skb, &rx_ring->q_vector->napi); i40e_receive_skb(rx_ring, skb, vlan_tag); - rx_ring->netdev->last_rx = jiffies; rx_desc->wb.qword1.status_error_len = 0; } while (likely(total_rx_packets < budget)); @@ -1821,7 +1820,6 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) #endif i40e_receive_skb(rx_ring, skb, vlan_tag); - rx_ring->netdev->last_rx = jiffies; rx_desc->wb.qword1.status_error_len = 0; } while (likely(total_rx_packets < budget)); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index b077e02a0cc7..3ef23091439f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1156,7 +1156,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) skb_mark_napi_id(skb, &rx_ring->q_vector->napi); i40e_receive_skb(rx_ring, skb, vlan_tag); - rx_ring->netdev->last_rx = jiffies; rx_desc->wb.qword1.status_error_len = 0; } while (likely(total_rx_packets < budget)); @@ -1271,7 +1270,6 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) : 0; i40e_receive_skb(rx_ring, skb, vlan_tag); - rx_ring->netdev->last_rx = jiffies; rx_desc->wb.qword1.status_error_len = 0; } while (likely(total_rx_packets < budget)); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a0a9b1fcb5e8..f287186192bb 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1836,31 +1836,19 @@ void igb_reinit_locked(struct igb_adapter *adapter) * * @adapter: adapter struct **/ -static s32 igb_enable_mas(struct igb_adapter *adapter) +static void igb_enable_mas(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - u32 connsw; - s32 ret_val = 0; - - connsw = rd32(E1000_CONNSW); - if (!(hw->phy.media_type == e1000_media_type_copper)) - return ret_val; + u32 connsw = rd32(E1000_CONNSW); /* configure for SerDes media detect */ - if (!(connsw & E1000_CONNSW_SERDESD)) { + if ((hw->phy.media_type == e1000_media_type_copper) && + (!(connsw & E1000_CONNSW_SERDESD))) { connsw |= E1000_CONNSW_ENRGSRC; connsw |= E1000_CONNSW_AUTOSENSE_EN; wr32(E1000_CONNSW, connsw); wrfl(); - } else if (connsw & E1000_CONNSW_SERDESD) { - /* already SerDes, no need to enable anything */ - return ret_val; - } else { - netdev_info(adapter->netdev, - "MAS: Unable to configure feature, disabling..\n"); - adapter->flags &= ~IGB_FLAG_MAS_ENABLE; } - return ret_val; } void igb_reset(struct igb_adapter *adapter) @@ -1980,10 +1968,9 @@ void igb_reset(struct igb_adapter *adapter) adapter->ei.get_invariants(hw); adapter->flags &= ~IGB_FLAG_MEDIA_RESET; } - if (adapter->flags & IGB_FLAG_MAS_ENABLE) { - if (igb_enable_mas(adapter)) - dev_err(&pdev->dev, - "Error enabling Media Auto Sense\n"); + if ((mac->type == e1000_82575) && + (adapter->flags & IGB_FLAG_MAS_ENABLE)) { + igb_enable_mas(adapter); } if (hw->mac.ops.init_hw(hw)) dev_err(&pdev->dev, "Hardware Error\n"); @@ -4989,6 +4976,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_tx_buffer *first; int tso; u32 tx_flags = 0; + unsigned short f; u16 count = TXD_USE_COUNT(skb_headlen(skb)); __be16 protocol = vlan_get_protocol(skb); u8 hdr_len = 0; @@ -4999,14 +4987,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, * + 1 desc for context descriptor, * otherwise try next time */ - if (NETDEV_FRAG_PAGE_MAX_SIZE > IGB_MAX_DATA_PER_TXD) { - unsigned short f; - - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); - } else { - count += skb_shinfo(skb)->nr_frags; - } + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) + count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); if (igb_maybe_stop_tx(tx_ring, count + 3)) { /* this is a hard error */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index eafa9ec802ba..9f6fb19062a0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3053,7 +3053,7 @@ static int ixgbe_get_module_info(struct net_device *dev, { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - u32 status; + s32 status; u8 sff8472_rev, addr_mode; bool page_swap = false; @@ -3061,14 +3061,14 @@ static int ixgbe_get_module_info(struct net_device *dev, status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_SFF_8472_COMP, &sff8472_rev); - if (status != 0) + if (status) return -EIO; /* addressing mode is not supported */ status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_SFF_8472_SWAP, &addr_mode); - if (status != 0) + if (status) return -EIO; if (addr_mode & IXGBE_SFF_ADDRESSING_MODE) { @@ -3095,7 +3095,7 @@ static int ixgbe_get_module_eeprom(struct net_device *dev, { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - u32 status = IXGBE_ERR_PHY_ADDR_INVALID; + s32 status = IXGBE_ERR_PHY_ADDR_INVALID; u8 databyte = 0xFF; int i = 0; @@ -3112,7 +3112,7 @@ static int ixgbe_get_module_eeprom(struct net_device *dev, else status = hw->phy.ops.read_i2c_sff8472(hw, i, &databyte); - if (status != 0) + if (status) return -EIO; data[i - ee->offset] = databyte; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5be12a00e1f4..23d82b34314e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4757,7 +4757,7 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) { u32 speed; bool autoneg, link_up = false; - u32 ret = IXGBE_ERR_LINK_SETUP; + int ret = IXGBE_ERR_LINK_SETUP; if (hw->mac.ops.check_link) ret = hw->mac.ops.check_link(hw, &speed, &link_up, false); @@ -8022,7 +8022,7 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { - u32 status; + int status; __u16 mode; if (nla_type(attr) != IFLA_BRIDGE_MODE) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 8a2be444113d..af828f89419f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -317,14 +317,14 @@ bool ixgbe_check_reset_blocked(struct ixgbe_hw *hw) **/ static s32 ixgbe_get_phy_id(struct ixgbe_hw *hw) { - u32 status; + s32 status; u16 phy_id_high = 0; u16 phy_id_low = 0; status = hw->phy.ops.read_reg(hw, MDIO_DEVID1, MDIO_MMD_PMAPMD, &phy_id_high); - if (status == 0) { + if (!status) { hw->phy.id = (u32)(phy_id_high << 16); status = hw->phy.ops.read_reg(hw, MDIO_DEVID2, MDIO_MMD_PMAPMD, &phy_id_low); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index f5f948d08b43..0a8b5e42e1a9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -696,14 +696,14 @@ static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw) /* Release both semaphores by writing 0 to the bits REGSMP and SMBI */ - swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); - swsm &= ~IXGBE_SWSM_SMBI; - IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); - swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); swsm &= ~IXGBE_SWFW_REGSMP; IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swsm); + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + swsm &= ~IXGBE_SWSM_SMBI; + IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); + IXGBE_WRITE_FLUSH(hw); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index cf5cf819a6b8..b0236985e915 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -103,6 +103,39 @@ static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw) return 0; } +/** + * ixgbe_iosf_wait - Wait for IOSF command completion + * @hw: pointer to hardware structure + * @ctrl: pointer to location to receive final IOSF control value + * + * Return: failing status on timeout + * + * Note: ctrl can be NULL if the IOSF control register value is not needed + */ +static s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl) +{ + u32 i, command; + + /* Check every 10 usec to see if the address cycle completed. + * The SB IOSF BUSY bit will clear when the operation is + * complete. + */ + for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { + command = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL); + if (!(command & IXGBE_SB_IOSF_CTRL_BUSY)) + break; + usleep_range(10, 20); + } + if (ctrl) + *ctrl = command; + if (i == IXGBE_MDIO_COMMAND_TIMEOUT) { + hw_dbg(hw, "IOSF wait timed out\n"); + return IXGBE_ERR_PHY; + } + + return 0; +} + /** ixgbe_read_iosf_sb_reg_x550 - Writes a value to specified register of the * IOSF device * @hw: pointer to hardware structure @@ -113,7 +146,17 @@ static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw) static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u32 *data) { - u32 i, command, error; + u32 gssr = IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_PHY0_SM; + u32 command, error; + s32 ret; + + ret = hw->mac.ops.acquire_swfw_sync(hw, gssr); + if (ret) + return ret; + + ret = ixgbe_iosf_wait(hw, NULL); + if (ret) + goto out; command = ((reg_addr << IXGBE_SB_IOSF_CTRL_ADDR_SHIFT) | (device_type << IXGBE_SB_IOSF_CTRL_TARGET_SELECT_SHIFT)); @@ -121,17 +164,7 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, /* Write IOSF control register */ IXGBE_WRITE_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL, command); - /* Check every 10 usec to see if the address cycle completed. - * The SB IOSF BUSY bit will clear when the operation is - * complete - */ - for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { - usleep_range(10, 20); - - command = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL); - if ((command & IXGBE_SB_IOSF_CTRL_BUSY) == 0) - break; - } + ret = ixgbe_iosf_wait(hw, &command); if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> @@ -140,14 +173,12 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, return IXGBE_ERR_PHY; } - if (i == IXGBE_MDIO_COMMAND_TIMEOUT) { - hw_dbg(hw, "Read timed out\n"); - return IXGBE_ERR_PHY; - } - - *data = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_DATA); + if (!ret) + *data = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_DATA); - return 0; +out: + hw->mac.ops.release_swfw_sync(hw, gssr); + return ret; } /** ixgbe_read_ee_hostif_data_X550 - Read EEPROM word using a host interface @@ -789,7 +820,17 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u32 data) { - u32 i, command, error; + u32 gssr = IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_PHY0_SM; + u32 command, error; + s32 ret; + + ret = hw->mac.ops.acquire_swfw_sync(hw, gssr); + if (ret) + return ret; + + ret = ixgbe_iosf_wait(hw, NULL); + if (ret) + goto out; command = ((reg_addr << IXGBE_SB_IOSF_CTRL_ADDR_SHIFT) | (device_type << IXGBE_SB_IOSF_CTRL_TARGET_SELECT_SHIFT)); @@ -800,17 +841,7 @@ static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, /* Write IOSF data register */ IXGBE_WRITE_REG(hw, IXGBE_SB_IOSF_INDIRECT_DATA, data); - /* Check every 10 usec to see if the address cycle completed. - * The SB IOSF BUSY bit will clear when the operation is - * complete - */ - for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { - usleep_range(10, 20); - - command = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL); - if ((command & IXGBE_SB_IOSF_CTRL_BUSY) == 0) - break; - } + ret = ixgbe_iosf_wait(hw, &command); if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> @@ -819,12 +850,9 @@ static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, return IXGBE_ERR_PHY; } - if (i == IXGBE_MDIO_COMMAND_TIMEOUT) { - hw_dbg(hw, "Write timed out\n"); - return IXGBE_ERR_PHY; - } - - return 0; +out: + hw->mac.ops.release_swfw_sync(hw, gssr); + return ret; } /** ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode. @@ -1035,7 +1063,7 @@ static s32 ixgbe_setup_kr_x550em(struct ixgbe_hw *hw) **/ static s32 ixgbe_setup_internal_phy_x550em(struct ixgbe_hw *hw) { - u32 status; + s32 status; u16 lasi, autoneg_status, speed; ixgbe_link_speed force_speed; @@ -1177,7 +1205,7 @@ static enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw) **/ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw) { - u32 status; + s32 status; u16 reg; u32 retries = 2; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ce5f7f9cff06..ecce8261ce3b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1359,7 +1359,7 @@ static void *mvneta_frag_alloc(const struct mvneta_port *pp) static void mvneta_frag_free(const struct mvneta_port *pp, void *data) { if (likely(pp->frag_size <= PAGE_SIZE)) - put_page(virt_to_head_page(data)); + skb_free_frag(data); else kfree(data); } diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 81d0f1c86d6d..becbb5f1f5a7 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -244,7 +244,6 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget) napi_gro_receive(&priv->napi, skb); rx++; - ndev->last_rx = jiffies; priv->stats.rx_packets++; priv->stats.rx_bytes += len; if (desc0 & RX_DESC0_MULTICAST) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 25800a1dedcb..02b7115b6aaa 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -3871,9 +3871,6 @@ static int ql_adapter_reset(struct ql_adapter *qdev) return status; } - end_jiffies = jiffies + - max((unsigned long)1, usecs_to_jiffies(30)); - /* Check if bit is set then skip the mailbox command and * clear the bit, else we are in normal reset process. */ @@ -3888,6 +3885,7 @@ static int ql_adapter_reset(struct ql_adapter *qdev) ql_write32(qdev, RST_FO, (RST_FO_FR << 16) | RST_FO_FR); + end_jiffies = jiffies + usecs_to_jiffies(30); do { value = ql_read32(qdev, RST_FO); if ((value & RST_FO_FR) == 0) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 6af028d5f9bc..2f87909f5186 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -839,7 +839,7 @@ static const struct of_device_id qca_spi_of_match[] = { MODULE_DEVICE_TABLE(of, qca_spi_of_match); static int -qca_spi_probe(struct spi_device *spi_device) +qca_spi_probe(struct spi_device *spi) { struct qcaspi *qca = NULL; struct net_device *qcaspi_devs = NULL; @@ -847,52 +847,52 @@ qca_spi_probe(struct spi_device *spi_device) u16 signature; const char *mac; - if (!spi_device->dev.of_node) { - dev_err(&spi_device->dev, "Missing device tree\n"); + if (!spi->dev.of_node) { + dev_err(&spi->dev, "Missing device tree\n"); return -EINVAL; } - legacy_mode = of_property_read_bool(spi_device->dev.of_node, + legacy_mode = of_property_read_bool(spi->dev.of_node, "qca,legacy-mode"); if (qcaspi_clkspeed == 0) { - if (spi_device->max_speed_hz) - qcaspi_clkspeed = spi_device->max_speed_hz; + if (spi->max_speed_hz) + qcaspi_clkspeed = spi->max_speed_hz; else qcaspi_clkspeed = QCASPI_CLK_SPEED; } if ((qcaspi_clkspeed < QCASPI_CLK_SPEED_MIN) || (qcaspi_clkspeed > QCASPI_CLK_SPEED_MAX)) { - dev_info(&spi_device->dev, "Invalid clkspeed: %d\n", + dev_info(&spi->dev, "Invalid clkspeed: %d\n", qcaspi_clkspeed); return -EINVAL; } if ((qcaspi_burst_len < QCASPI_BURST_LEN_MIN) || (qcaspi_burst_len > QCASPI_BURST_LEN_MAX)) { - dev_info(&spi_device->dev, "Invalid burst len: %d\n", + dev_info(&spi->dev, "Invalid burst len: %d\n", qcaspi_burst_len); return -EINVAL; } if ((qcaspi_pluggable < QCASPI_PLUGGABLE_MIN) || (qcaspi_pluggable > QCASPI_PLUGGABLE_MAX)) { - dev_info(&spi_device->dev, "Invalid pluggable: %d\n", + dev_info(&spi->dev, "Invalid pluggable: %d\n", qcaspi_pluggable); return -EINVAL; } - dev_info(&spi_device->dev, "ver=%s, clkspeed=%d, burst_len=%d, pluggable=%d\n", + dev_info(&spi->dev, "ver=%s, clkspeed=%d, burst_len=%d, pluggable=%d\n", QCASPI_DRV_VERSION, qcaspi_clkspeed, qcaspi_burst_len, qcaspi_pluggable); - spi_device->mode = SPI_MODE_3; - spi_device->max_speed_hz = qcaspi_clkspeed; - if (spi_setup(spi_device) < 0) { - dev_err(&spi_device->dev, "Unable to setup SPI device\n"); + spi->mode = SPI_MODE_3; + spi->max_speed_hz = qcaspi_clkspeed; + if (spi_setup(spi) < 0) { + dev_err(&spi->dev, "Unable to setup SPI device\n"); return -EFAULT; } @@ -905,23 +905,23 @@ qca_spi_probe(struct spi_device *spi_device) qca = netdev_priv(qcaspi_devs); if (!qca) { free_netdev(qcaspi_devs); - dev_err(&spi_device->dev, "Fail to retrieve private structure\n"); + dev_err(&spi->dev, "Fail to retrieve private structure\n"); return -ENOMEM; } qca->net_dev = qcaspi_devs; - qca->spi_dev = spi_device; + qca->spi_dev = spi; qca->legacy_mode = legacy_mode; - spi_set_drvdata(spi_device, qcaspi_devs); + spi_set_drvdata(spi, qcaspi_devs); - mac = of_get_mac_address(spi_device->dev.of_node); + mac = of_get_mac_address(spi->dev.of_node); if (mac) ether_addr_copy(qca->net_dev->dev_addr, mac); if (!is_valid_ether_addr(qca->net_dev->dev_addr)) { eth_hw_addr_random(qca->net_dev); - dev_info(&spi_device->dev, "Using random MAC address: %pM\n", + dev_info(&spi->dev, "Using random MAC address: %pM\n", qca->net_dev->dev_addr); } @@ -932,7 +932,7 @@ qca_spi_probe(struct spi_device *spi_device) qcaspi_read_register(qca, SPI_REG_SIGNATURE, &signature); if (signature != QCASPI_GOOD_SIGNATURE) { - dev_err(&spi_device->dev, "Invalid signature (0x%04X)\n", + dev_err(&spi->dev, "Invalid signature (0x%04X)\n", signature); free_netdev(qcaspi_devs); return -EFAULT; @@ -940,7 +940,7 @@ qca_spi_probe(struct spi_device *spi_device) } if (register_netdev(qcaspi_devs)) { - dev_info(&spi_device->dev, "Unable to register net device %s\n", + dev_info(&spi->dev, "Unable to register net device %s\n", qcaspi_devs->name); free_netdev(qcaspi_devs); return -EFAULT; @@ -952,9 +952,9 @@ qca_spi_probe(struct spi_device *spi_device) } static int -qca_spi_remove(struct spi_device *spi_device) +qca_spi_remove(struct spi_device *spi) { - struct net_device *qcaspi_devs = spi_get_drvdata(spi_device); + struct net_device *qcaspi_devs = spi_get_drvdata(spi); struct qcaspi *qca = netdev_priv(qcaspi_devs); qcaspi_remove_device_debugfs(qca); diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index cf98cc9bbc8d..701ffc200917 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -181,7 +181,7 @@ struct rocker_desc_info { size_t data_size; size_t tlv_size; struct rocker_desc *desc; - DEFINE_DMA_UNMAP_ADDR(mapaddr); + dma_addr_t mapaddr; }; struct rocker_dma_ring_info { @@ -225,6 +225,7 @@ struct rocker_port { struct napi_struct napi_rx; struct rocker_dma_ring_info tx_ring; struct rocker_dma_ring_info rx_ring; + struct list_head trans_mem; }; struct rocker { @@ -236,21 +237,21 @@ struct rocker { struct { u64 id; } hw; - spinlock_t cmd_ring_lock; + spinlock_t cmd_ring_lock; /* for cmd ring accesses */ struct rocker_dma_ring_info cmd_ring; struct rocker_dma_ring_info event_ring; DECLARE_HASHTABLE(flow_tbl, 16); - spinlock_t flow_tbl_lock; + spinlock_t flow_tbl_lock; /* for flow tbl accesses */ u64 flow_tbl_next_cookie; DECLARE_HASHTABLE(group_tbl, 16); - spinlock_t group_tbl_lock; + spinlock_t group_tbl_lock; /* for group tbl accesses */ DECLARE_HASHTABLE(fdb_tbl, 16); - spinlock_t fdb_tbl_lock; + spinlock_t fdb_tbl_lock; /* for fdb tbl accesses */ unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN]; DECLARE_HASHTABLE(internal_vlan_tbl, 8); - spinlock_t internal_vlan_tbl_lock; + spinlock_t internal_vlan_tbl_lock; /* for vlan tbl accesses */ DECLARE_HASHTABLE(neigh_tbl, 16); - spinlock_t neigh_tbl_lock; + spinlock_t neigh_tbl_lock; /* for neigh tbl accesses */ u32 neigh_tbl_next_index; }; @@ -325,16 +326,83 @@ static bool rocker_port_is_bridged(struct rocker_port *rocker_port) return !!rocker_port->bridge_dev; } +static void *__rocker_port_mem_alloc(struct rocker_port *rocker_port, + enum switchdev_trans trans, size_t size) +{ + struct list_head *elem = NULL; + + /* If in transaction prepare phase, allocate the memory + * and enqueue it on a per-port list. If in transaction + * commit phase, dequeue the memory from the per-port list + * rather than re-allocating the memory. The idea is the + * driver code paths for prepare and commit are identical + * so the memory allocated in the prepare phase is the + * memory used in the commit phase. + */ + + switch (trans) { + case SWITCHDEV_TRANS_PREPARE: + elem = kzalloc(size + sizeof(*elem), GFP_KERNEL); + if (!elem) + return NULL; + list_add_tail(elem, &rocker_port->trans_mem); + break; + case SWITCHDEV_TRANS_COMMIT: + BUG_ON(list_empty(&rocker_port->trans_mem)); + elem = rocker_port->trans_mem.next; + list_del_init(elem); + break; + case SWITCHDEV_TRANS_NONE: + elem = kzalloc(size + sizeof(*elem), GFP_KERNEL); + if (elem) + INIT_LIST_HEAD(elem); + break; + default: + break; + } + + return elem ? elem + 1 : NULL; +} + +static void *rocker_port_kzalloc(struct rocker_port *rocker_port, + enum switchdev_trans trans, size_t size) +{ + return __rocker_port_mem_alloc(rocker_port, trans, size); +} + +static void *rocker_port_kcalloc(struct rocker_port *rocker_port, + enum switchdev_trans trans, size_t n, + size_t size) +{ + return __rocker_port_mem_alloc(rocker_port, trans, n * size); +} + +static void rocker_port_kfree(struct rocker_port *rocker_port, + enum switchdev_trans trans, const void *mem) +{ + struct list_head *elem; + + /* Frees are ignored if in transaction prepare phase. The + * memory remains on the per-port list until freed in the + * commit phase. + */ + + if (trans == SWITCHDEV_TRANS_PREPARE) + return; + + elem = (struct list_head *)mem - 1; + BUG_ON(!list_empty(elem)); + kfree(elem); +} + struct rocker_wait { wait_queue_head_t wait; bool done; - bool nowait; }; static void rocker_wait_reset(struct rocker_wait *wait) { wait->done = false; - wait->nowait = false; } static void rocker_wait_init(struct rocker_wait *wait) @@ -343,20 +411,23 @@ static void rocker_wait_init(struct rocker_wait *wait) rocker_wait_reset(wait); } -static struct rocker_wait *rocker_wait_create(gfp_t gfp) +static struct rocker_wait *rocker_wait_create(struct rocker_port *rocker_port, + enum switchdev_trans trans) { struct rocker_wait *wait; - wait = kmalloc(sizeof(*wait), gfp); + wait = rocker_port_kzalloc(rocker_port, trans, sizeof(*wait)); if (!wait) return NULL; rocker_wait_init(wait); return wait; } -static void rocker_wait_destroy(struct rocker_wait *work) +static void rocker_wait_destroy(struct rocker_port *rocker_port, + enum switchdev_trans trans, + struct rocker_wait *wait) { - kfree(work); + rocker_port_kfree(rocker_port, trans, wait); } static bool rocker_wait_event_timeout(struct rocker_wait *wait, @@ -1317,12 +1388,7 @@ static irqreturn_t rocker_cmd_irq_handler(int irq, void *dev_id) spin_lock(&rocker->cmd_ring_lock); while ((desc_info = rocker_desc_tail_get(&rocker->cmd_ring))) { wait = rocker_desc_cookie_ptr_get(desc_info); - if (wait->nowait) { - rocker_desc_gen_clear(desc_info); - rocker_wait_destroy(wait); - } else { - rocker_wait_wake_up(wait); - } + rocker_wait_wake_up(wait); credits++; } spin_unlock(&rocker->cmd_ring_lock); @@ -1374,22 +1440,44 @@ static int rocker_event_link_change(struct rocker *rocker, } #define ROCKER_OP_FLAG_REMOVE BIT(0) -#define ROCKER_OP_FLAG_NOWAIT BIT(1) -#define ROCKER_OP_FLAG_LEARNED BIT(2) -#define ROCKER_OP_FLAG_REFRESH BIT(3) +#define ROCKER_OP_FLAG_LEARNED BIT(1) +#define ROCKER_OP_FLAG_REFRESH BIT(2) static int rocker_port_fdb(struct rocker_port *rocker_port, + enum switchdev_trans trans, const unsigned char *addr, __be16 vlan_id, int flags); +struct rocker_mac_vlan_seen_work { + struct work_struct work; + struct rocker_port *rocker_port; + int flags; + unsigned char addr[ETH_ALEN]; + __be16 vlan_id; +}; + +static void rocker_event_mac_vlan_seen_work(struct work_struct *work) +{ + struct rocker_mac_vlan_seen_work *sw = + container_of(work, struct rocker_mac_vlan_seen_work, work); + + rtnl_lock(); + rocker_port_fdb(sw->rocker_port, SWITCHDEV_TRANS_NONE, + sw->addr, sw->vlan_id, sw->flags); + rtnl_unlock(); + + kfree(work); +} + static int rocker_event_mac_vlan_seen(struct rocker *rocker, const struct rocker_tlv *info) { + struct rocker_mac_vlan_seen_work *sw; struct rocker_tlv *attrs[ROCKER_TLV_EVENT_MAC_VLAN_MAX + 1]; unsigned int port_number; struct rocker_port *rocker_port; unsigned char *addr; - int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_LEARNED; + int flags = ROCKER_OP_FLAG_LEARNED; __be16 vlan_id; rocker_tlv_parse_nested(attrs, ROCKER_TLV_EVENT_MAC_VLAN_MAX, info); @@ -1411,7 +1499,20 @@ static int rocker_event_mac_vlan_seen(struct rocker *rocker, rocker_port->stp_state != BR_STATE_FORWARDING) return 0; - return rocker_port_fdb(rocker_port, addr, vlan_id, flags); + sw = kmalloc(sizeof(*sw), GFP_ATOMIC); + if (!sw) + return -ENOMEM; + + INIT_WORK(&sw->work, rocker_event_mac_vlan_seen_work); + + sw->rocker_port = rocker_port; + sw->flags = flags; + ether_addr_copy(sw->addr, addr); + sw->vlan_id = vlan_id; + + schedule_work(&sw->work); + + return 0; } static int rocker_event_process(struct rocker *rocker, @@ -1494,41 +1595,44 @@ typedef int (*rocker_cmd_cb_t)(struct rocker *rocker, static int rocker_cmd_exec(struct rocker *rocker, struct rocker_port *rocker_port, + enum switchdev_trans trans, rocker_cmd_cb_t prepare, void *prepare_priv, - rocker_cmd_cb_t process, void *process_priv, - bool nowait) + rocker_cmd_cb_t process, void *process_priv) { struct rocker_desc_info *desc_info; struct rocker_wait *wait; unsigned long flags; int err; - wait = rocker_wait_create(nowait ? GFP_ATOMIC : GFP_KERNEL); + wait = rocker_wait_create(rocker_port, trans); if (!wait) return -ENOMEM; - wait->nowait = nowait; spin_lock_irqsave(&rocker->cmd_ring_lock, flags); + desc_info = rocker_desc_head_get(&rocker->cmd_ring); if (!desc_info) { spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags); err = -EAGAIN; goto out; } + err = prepare(rocker, rocker_port, desc_info, prepare_priv); if (err) { spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags); goto out; } + rocker_desc_cookie_ptr_set(desc_info, wait); - rocker_desc_head_set(rocker, &rocker->cmd_ring, desc_info); - spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags); - if (nowait) - return 0; + if (trans != SWITCHDEV_TRANS_PREPARE) + rocker_desc_head_set(rocker, &rocker->cmd_ring, desc_info); - if (!rocker_wait_event_timeout(wait, HZ / 10)) - return -EIO; + spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags); + + if (trans != SWITCHDEV_TRANS_PREPARE) + if (!rocker_wait_event_timeout(wait, HZ / 10)) + return -EIO; err = rocker_desc_err(desc_info); if (err) @@ -1539,7 +1643,7 @@ static int rocker_cmd_exec(struct rocker *rocker, rocker_desc_gen_clear(desc_info); out: - rocker_wait_destroy(wait); + rocker_wait_destroy(rocker_port, trans, wait); return err; } @@ -1762,41 +1866,46 @@ static int rocker_cmd_get_port_settings_ethtool(struct rocker_port *rocker_port, struct ethtool_cmd *ecmd) { return rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, rocker_cmd_get_port_settings_prep, NULL, rocker_cmd_get_port_settings_ethtool_proc, - ecmd, false); + ecmd); } static int rocker_cmd_get_port_settings_macaddr(struct rocker_port *rocker_port, unsigned char *macaddr) { return rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, rocker_cmd_get_port_settings_prep, NULL, rocker_cmd_get_port_settings_macaddr_proc, - macaddr, false); + macaddr); } static int rocker_cmd_set_port_settings_ethtool(struct rocker_port *rocker_port, struct ethtool_cmd *ecmd) { return rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, rocker_cmd_set_port_settings_ethtool_prep, - ecmd, NULL, NULL, false); + ecmd, NULL, NULL); } static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port, unsigned char *macaddr) { return rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, rocker_cmd_set_port_settings_macaddr_prep, - macaddr, NULL, NULL, false); + macaddr, NULL, NULL); } -static int rocker_port_set_learning(struct rocker_port *rocker_port) +static int rocker_port_set_learning(struct rocker_port *rocker_port, + enum switchdev_trans trans) { - return rocker_cmd_exec(rocker_port->rocker, rocker_port, + return rocker_cmd_exec(rocker_port->rocker, rocker_port, trans, rocker_cmd_set_port_learning_prep, - NULL, NULL, NULL, false); + NULL, NULL, NULL); } static int rocker_cmd_flow_tbl_add_ig_port(struct rocker_desc_info *desc_info, @@ -2308,8 +2417,8 @@ rocker_flow_tbl_find(struct rocker *rocker, struct rocker_flow_tbl_entry *match) } static int rocker_flow_tbl_add(struct rocker_port *rocker_port, - struct rocker_flow_tbl_entry *match, - bool nowait) + enum switchdev_trans trans, + struct rocker_flow_tbl_entry *match) { struct rocker *rocker = rocker_port->rocker; struct rocker_flow_tbl_entry *found; @@ -2324,8 +2433,9 @@ static int rocker_flow_tbl_add(struct rocker_port *rocker_port, if (found) { match->cookie = found->cookie; - hash_del(&found->entry); - kfree(found); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); + rocker_port_kfree(rocker_port, trans, found); found = match; found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD; } else { @@ -2334,18 +2444,19 @@ static int rocker_flow_tbl_add(struct rocker_port *rocker_port, found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD; } - hash_add(rocker->flow_tbl, &found->entry, found->key_crc32); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_add(rocker->flow_tbl, &found->entry, found->key_crc32); spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags); - return rocker_cmd_exec(rocker, rocker_port, + return rocker_cmd_exec(rocker, rocker_port, trans, rocker_cmd_flow_tbl_add, - found, NULL, NULL, nowait); + found, NULL, NULL); } static int rocker_flow_tbl_del(struct rocker_port *rocker_port, - struct rocker_flow_tbl_entry *match, - bool nowait) + enum switchdev_trans trans, + struct rocker_flow_tbl_entry *match) { struct rocker *rocker = rocker_port->rocker; struct rocker_flow_tbl_entry *found; @@ -2360,47 +2471,43 @@ static int rocker_flow_tbl_del(struct rocker_port *rocker_port, found = rocker_flow_tbl_find(rocker, match); if (found) { - hash_del(&found->entry); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL; } spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags); - kfree(match); + rocker_port_kfree(rocker_port, trans, match); if (found) { - err = rocker_cmd_exec(rocker, rocker_port, + err = rocker_cmd_exec(rocker, rocker_port, trans, rocker_cmd_flow_tbl_del, - found, NULL, NULL, nowait); - kfree(found); + found, NULL, NULL); + rocker_port_kfree(rocker_port, trans, found); } return err; } -static gfp_t rocker_op_flags_gfp(int flags) -{ - return flags & ROCKER_OP_FLAG_NOWAIT ? GFP_ATOMIC : GFP_KERNEL; -} - static int rocker_flow_tbl_do(struct rocker_port *rocker_port, - int flags, struct rocker_flow_tbl_entry *entry) + enum switchdev_trans trans, int flags, + struct rocker_flow_tbl_entry *entry) { - bool nowait = flags & ROCKER_OP_FLAG_NOWAIT; - if (flags & ROCKER_OP_FLAG_REMOVE) - return rocker_flow_tbl_del(rocker_port, entry, nowait); + return rocker_flow_tbl_del(rocker_port, trans, entry); else - return rocker_flow_tbl_add(rocker_port, entry, nowait); + return rocker_flow_tbl_add(rocker_port, trans, entry); } static int rocker_flow_tbl_ig_port(struct rocker_port *rocker_port, - int flags, u32 in_pport, u32 in_pport_mask, + enum switchdev_trans trans, int flags, + u32 in_pport, u32 in_pport_mask, enum rocker_of_dpa_table_id goto_tbl) { struct rocker_flow_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2410,18 +2517,19 @@ static int rocker_flow_tbl_ig_port(struct rocker_port *rocker_port, entry->key.ig_port.in_pport_mask = in_pport_mask; entry->key.ig_port.goto_tbl = goto_tbl; - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static int rocker_flow_tbl_vlan(struct rocker_port *rocker_port, - int flags, u32 in_pport, - __be16 vlan_id, __be16 vlan_id_mask, + enum switchdev_trans trans, int flags, + u32 in_pport, __be16 vlan_id, + __be16 vlan_id_mask, enum rocker_of_dpa_table_id goto_tbl, bool untagged, __be16 new_vlan_id) { struct rocker_flow_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2435,10 +2543,11 @@ static int rocker_flow_tbl_vlan(struct rocker_port *rocker_port, entry->key.vlan.untagged = untagged; entry->key.vlan.new_vlan_id = new_vlan_id; - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static int rocker_flow_tbl_term_mac(struct rocker_port *rocker_port, + enum switchdev_trans trans, u32 in_pport, u32 in_pport_mask, __be16 eth_type, const u8 *eth_dst, const u8 *eth_dst_mask, __be16 vlan_id, @@ -2447,7 +2556,7 @@ static int rocker_flow_tbl_term_mac(struct rocker_port *rocker_port, { struct rocker_flow_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2471,11 +2580,11 @@ static int rocker_flow_tbl_term_mac(struct rocker_port *rocker_port, entry->key.term_mac.vlan_id_mask = vlan_id_mask; entry->key.term_mac.copy_to_cpu = copy_to_cpu; - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port, - int flags, + enum switchdev_trans trans, int flags, const u8 *eth_dst, const u8 *eth_dst_mask, __be16 vlan_id, u32 tunnel_id, enum rocker_of_dpa_table_id goto_tbl, @@ -2487,7 +2596,7 @@ static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port, bool dflt = !eth_dst || (eth_dst && eth_dst_mask); bool wild = false; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2500,7 +2609,7 @@ static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port, if (eth_dst_mask) { entry->key.bridge.has_eth_dst_mask = 1; ether_addr_copy(entry->key.bridge.eth_dst_mask, eth_dst_mask); - if (memcmp(eth_dst_mask, ff_mac, ETH_ALEN)) + if (!ether_addr_equal(eth_dst_mask, ff_mac)) wild = true; } @@ -2525,10 +2634,11 @@ static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port, entry->key.bridge.group_id = group_id; entry->key.bridge.copy_to_cpu = copy_to_cpu; - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static int rocker_flow_tbl_ucast4_routing(struct rocker_port *rocker_port, + enum switchdev_trans trans, __be16 eth_type, __be32 dst, __be32 dst_mask, u32 priority, enum rocker_of_dpa_table_id goto_tbl, @@ -2536,7 +2646,7 @@ static int rocker_flow_tbl_ucast4_routing(struct rocker_port *rocker_port, { struct rocker_flow_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2550,30 +2660,29 @@ static int rocker_flow_tbl_ucast4_routing(struct rocker_port *rocker_port, entry->key_len = offsetof(struct rocker_flow_tbl_key, ucast_routing.group_id); - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static int rocker_flow_tbl_acl(struct rocker_port *rocker_port, - int flags, u32 in_pport, - u32 in_pport_mask, + enum switchdev_trans trans, int flags, + u32 in_pport, u32 in_pport_mask, const u8 *eth_src, const u8 *eth_src_mask, const u8 *eth_dst, const u8 *eth_dst_mask, - __be16 eth_type, - __be16 vlan_id, __be16 vlan_id_mask, - u8 ip_proto, u8 ip_proto_mask, - u8 ip_tos, u8 ip_tos_mask, + __be16 eth_type, __be16 vlan_id, + __be16 vlan_id_mask, u8 ip_proto, + u8 ip_proto_mask, u8 ip_tos, u8 ip_tos_mask, u32 group_id) { u32 priority; struct rocker_flow_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; priority = ROCKER_PRIORITY_ACL_NORMAL; if (eth_dst && eth_dst_mask) { - if (memcmp(eth_dst_mask, mcast_mac, ETH_ALEN) == 0) + if (ether_addr_equal(eth_dst_mask, mcast_mac)) priority = ROCKER_PRIORITY_ACL_DFLT; else if (is_link_local_ether_addr(eth_dst)) priority = ROCKER_PRIORITY_ACL_CTRL; @@ -2602,7 +2711,7 @@ static int rocker_flow_tbl_acl(struct rocker_port *rocker_port, entry->key.acl.ip_tos_mask = ip_tos_mask; entry->key.acl.group_id = group_id; - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static struct rocker_group_tbl_entry * @@ -2620,22 +2729,24 @@ rocker_group_tbl_find(struct rocker *rocker, return NULL; } -static void rocker_group_tbl_entry_free(struct rocker_group_tbl_entry *entry) +static void rocker_group_tbl_entry_free(struct rocker_port *rocker_port, + enum switchdev_trans trans, + struct rocker_group_tbl_entry *entry) { switch (ROCKER_GROUP_TYPE_GET(entry->group_id)) { case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: - kfree(entry->group_ids); + rocker_port_kfree(rocker_port, trans, entry->group_ids); break; default: break; } - kfree(entry); + rocker_port_kfree(rocker_port, trans, entry); } static int rocker_group_tbl_add(struct rocker_port *rocker_port, - struct rocker_group_tbl_entry *match, - bool nowait) + enum switchdev_trans trans, + struct rocker_group_tbl_entry *match) { struct rocker *rocker = rocker_port->rocker; struct rocker_group_tbl_entry *found; @@ -2646,8 +2757,9 @@ static int rocker_group_tbl_add(struct rocker_port *rocker_port, found = rocker_group_tbl_find(rocker, match); if (found) { - hash_del(&found->entry); - rocker_group_tbl_entry_free(found); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); + rocker_group_tbl_entry_free(rocker_port, trans, found); found = match; found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD; } else { @@ -2655,18 +2767,19 @@ static int rocker_group_tbl_add(struct rocker_port *rocker_port, found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD; } - hash_add(rocker->group_tbl, &found->entry, found->group_id); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_add(rocker->group_tbl, &found->entry, found->group_id); spin_unlock_irqrestore(&rocker->group_tbl_lock, flags); - return rocker_cmd_exec(rocker, rocker_port, + return rocker_cmd_exec(rocker, rocker_port, trans, rocker_cmd_group_tbl_add, - found, NULL, NULL, nowait); + found, NULL, NULL); } static int rocker_group_tbl_del(struct rocker_port *rocker_port, - struct rocker_group_tbl_entry *match, - bool nowait) + enum switchdev_trans trans, + struct rocker_group_tbl_entry *match) { struct rocker *rocker = rocker_port->rocker; struct rocker_group_tbl_entry *found; @@ -2678,93 +2791,95 @@ static int rocker_group_tbl_del(struct rocker_port *rocker_port, found = rocker_group_tbl_find(rocker, match); if (found) { - hash_del(&found->entry); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL; } spin_unlock_irqrestore(&rocker->group_tbl_lock, flags); - rocker_group_tbl_entry_free(match); + rocker_group_tbl_entry_free(rocker_port, trans, match); if (found) { - err = rocker_cmd_exec(rocker, rocker_port, + err = rocker_cmd_exec(rocker, rocker_port, trans, rocker_cmd_group_tbl_del, - found, NULL, NULL, nowait); - rocker_group_tbl_entry_free(found); + found, NULL, NULL); + rocker_group_tbl_entry_free(rocker_port, trans, found); } return err; } static int rocker_group_tbl_do(struct rocker_port *rocker_port, - int flags, struct rocker_group_tbl_entry *entry) + enum switchdev_trans trans, int flags, + struct rocker_group_tbl_entry *entry) { - bool nowait = flags & ROCKER_OP_FLAG_NOWAIT; - if (flags & ROCKER_OP_FLAG_REMOVE) - return rocker_group_tbl_del(rocker_port, entry, nowait); + return rocker_group_tbl_del(rocker_port, trans, entry); else - return rocker_group_tbl_add(rocker_port, entry, nowait); + return rocker_group_tbl_add(rocker_port, trans, entry); } static int rocker_group_l2_interface(struct rocker_port *rocker_port, - int flags, __be16 vlan_id, - u32 out_pport, int pop_vlan) + enum switchdev_trans trans, int flags, + __be16 vlan_id, u32 out_pport, + int pop_vlan) { struct rocker_group_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; entry->group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); entry->l2_interface.pop_vlan = pop_vlan; - return rocker_group_tbl_do(rocker_port, flags, entry); + return rocker_group_tbl_do(rocker_port, trans, flags, entry); } static int rocker_group_l2_fan_out(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, u8 group_count, u32 *group_ids, u32 group_id) { struct rocker_group_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; entry->group_id = group_id; entry->group_count = group_count; - entry->group_ids = kcalloc(group_count, sizeof(u32), - rocker_op_flags_gfp(flags)); + entry->group_ids = rocker_port_kcalloc(rocker_port, trans, group_count, + sizeof(u32)); if (!entry->group_ids) { - kfree(entry); + rocker_port_kfree(rocker_port, trans, entry); return -ENOMEM; } memcpy(entry->group_ids, group_ids, group_count * sizeof(u32)); - return rocker_group_tbl_do(rocker_port, flags, entry); + return rocker_group_tbl_do(rocker_port, trans, flags, entry); } static int rocker_group_l2_flood(struct rocker_port *rocker_port, - int flags, __be16 vlan_id, - u8 group_count, u32 *group_ids, - u32 group_id) + enum switchdev_trans trans, int flags, + __be16 vlan_id, u8 group_count, + u32 *group_ids, u32 group_id) { - return rocker_group_l2_fan_out(rocker_port, flags, + return rocker_group_l2_fan_out(rocker_port, trans, flags, group_count, group_ids, group_id); } static int rocker_group_l3_unicast(struct rocker_port *rocker_port, - int flags, u32 index, u8 *src_mac, - u8 *dst_mac, __be16 vlan_id, - bool ttl_check, u32 pport) + enum switchdev_trans trans, int flags, + u32 index, u8 *src_mac, u8 *dst_mac, + __be16 vlan_id, bool ttl_check, u32 pport) { struct rocker_group_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2777,7 +2892,7 @@ static int rocker_group_l3_unicast(struct rocker_port *rocker_port, entry->l3_unicast.ttl_check = ttl_check; entry->l3_unicast.group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, pport); - return rocker_group_tbl_do(rocker_port, flags, entry); + return rocker_group_tbl_do(rocker_port, trans, flags, entry); } static struct rocker_neigh_tbl_entry * @@ -2794,36 +2909,44 @@ static struct rocker_neigh_tbl_entry * } static void _rocker_neigh_add(struct rocker *rocker, + enum switchdev_trans trans, struct rocker_neigh_tbl_entry *entry) { - entry->index = rocker->neigh_tbl_next_index++; + entry->index = rocker->neigh_tbl_next_index; + if (trans == SWITCHDEV_TRANS_PREPARE) + return; + rocker->neigh_tbl_next_index++; entry->ref_count++; hash_add(rocker->neigh_tbl, &entry->entry, be32_to_cpu(entry->ip_addr)); } -static void _rocker_neigh_del(struct rocker *rocker, +static void _rocker_neigh_del(struct rocker_port *rocker_port, + enum switchdev_trans trans, struct rocker_neigh_tbl_entry *entry) { + if (trans == SWITCHDEV_TRANS_PREPARE) + return; if (--entry->ref_count == 0) { hash_del(&entry->entry); - kfree(entry); + rocker_port_kfree(rocker_port, trans, entry); } } -static void _rocker_neigh_update(struct rocker *rocker, - struct rocker_neigh_tbl_entry *entry, +static void _rocker_neigh_update(struct rocker_neigh_tbl_entry *entry, + enum switchdev_trans trans, u8 *eth_dst, bool ttl_check) { if (eth_dst) { ether_addr_copy(entry->eth_dst, eth_dst); entry->ttl_check = ttl_check; - } else { + } else if (trans != SWITCHDEV_TRANS_PREPARE) { entry->ref_count++; } } static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, __be32 ip_addr, u8 *eth_dst) { struct rocker *rocker = rocker_port->rocker; @@ -2840,7 +2963,7 @@ static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, bool removing; int err = 0; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2857,12 +2980,12 @@ static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, entry->dev = rocker_port->dev; ether_addr_copy(entry->eth_dst, eth_dst); entry->ttl_check = true; - _rocker_neigh_add(rocker, entry); + _rocker_neigh_add(rocker, trans, entry); } else if (removing) { memcpy(entry, found, sizeof(*entry)); - _rocker_neigh_del(rocker, found); + _rocker_neigh_del(rocker_port, trans, found); } else if (updating) { - _rocker_neigh_update(rocker, found, eth_dst, true); + _rocker_neigh_update(found, trans, eth_dst, true); memcpy(entry, found, sizeof(*entry)); } else { err = -ENOENT; @@ -2879,7 +3002,7 @@ static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, * other routes' nexthops. */ - err = rocker_group_l3_unicast(rocker_port, flags, + err = rocker_group_l3_unicast(rocker_port, trans, flags, entry->index, rocker_port->dev->dev_addr, entry->eth_dst, @@ -2895,7 +3018,7 @@ static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, if (adding || removing) { group_id = ROCKER_GROUP_L3_UNICAST(entry->index); - err = rocker_flow_tbl_ucast4_routing(rocker_port, + err = rocker_flow_tbl_ucast4_routing(rocker_port, trans, eth_type, ip_addr, inet_make_mask(32), priority, goto_tbl, @@ -2909,13 +3032,13 @@ static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, err_out: if (!adding) - kfree(entry); + rocker_port_kfree(rocker_port, trans, entry); return err; } static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port, - __be32 ip_addr) + enum switchdev_trans trans, __be32 ip_addr) { struct net_device *dev = rocker_port->dev; struct neighbour *n = __ipv4_neigh_lookup(dev, (__force u32)ip_addr); @@ -2933,7 +3056,8 @@ static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port, */ if (n->nud_state & NUD_VALID) - err = rocker_port_ipv4_neigh(rocker_port, 0, ip_addr, n->ha); + err = rocker_port_ipv4_neigh(rocker_port, trans, 0, + ip_addr, n->ha); else neigh_event_send(n, NULL); @@ -2941,7 +3065,8 @@ static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port, return err; } -static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags, +static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, __be32 ip_addr, u32 *index) { struct rocker *rocker = rocker_port->rocker; @@ -2954,7 +3079,7 @@ static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags, bool resolved = true; int err = 0; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2971,13 +3096,13 @@ static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags, if (adding) { entry->ip_addr = ip_addr; entry->dev = rocker_port->dev; - _rocker_neigh_add(rocker, entry); + _rocker_neigh_add(rocker, trans, entry); *index = entry->index; resolved = false; } else if (removing) { - _rocker_neigh_del(rocker, found); + _rocker_neigh_del(rocker_port, trans, found); } else if (updating) { - _rocker_neigh_update(rocker, found, NULL, false); + _rocker_neigh_update(found, trans, NULL, false); resolved = !is_zero_ether_addr(found->eth_dst); } else { err = -ENOENT; @@ -2986,7 +3111,7 @@ static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags, spin_unlock_irqrestore(&rocker->neigh_tbl_lock, lock_flags); if (!adding) - kfree(entry); + rocker_port_kfree(rocker_port, trans, entry); if (err) return err; @@ -2994,12 +3119,13 @@ static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags, /* Resolved means neigh ip_addr is resolved to neigh mac. */ if (!resolved) - err = rocker_port_ipv4_resolve(rocker_port, ip_addr); + err = rocker_port_ipv4_resolve(rocker_port, trans, ip_addr); return err; } static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, __be16 vlan_id) { struct rocker_port *p; @@ -3010,8 +3136,8 @@ static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port, int err = 0; int i; - group_ids = kcalloc(rocker->port_count, sizeof(u32), - rocker_op_flags_gfp(flags)); + group_ids = rocker_port_kcalloc(rocker_port, trans, rocker->port_count, + sizeof(u32)); if (!group_ids) return -ENOMEM; @@ -3034,21 +3160,20 @@ static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port, if (group_count == 0) goto no_ports_in_vlan; - err = rocker_group_l2_flood(rocker_port, flags, vlan_id, - group_count, group_ids, - group_id); + err = rocker_group_l2_flood(rocker_port, trans, flags, vlan_id, + group_count, group_ids, group_id); if (err) netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 flood group\n", err); no_ports_in_vlan: - kfree(group_ids); + rocker_port_kfree(rocker_port, trans, group_ids); return err; } static int rocker_port_vlan_l2_groups(struct rocker_port *rocker_port, - int flags, __be16 vlan_id, - bool pop_vlan) + enum switchdev_trans trans, int flags, + __be16 vlan_id, bool pop_vlan) { struct rocker *rocker = rocker_port->rocker; struct rocker_port *p; @@ -3065,9 +3190,8 @@ static int rocker_port_vlan_l2_groups(struct rocker_port *rocker_port, if (rocker_port->stp_state == BR_STATE_LEARNING || rocker_port->stp_state == BR_STATE_FORWARDING) { out_pport = rocker_port->pport; - err = rocker_group_l2_interface(rocker_port, flags, - vlan_id, out_pport, - pop_vlan); + err = rocker_group_l2_interface(rocker_port, trans, flags, + vlan_id, out_pport, pop_vlan); if (err) { netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 group for pport %d\n", @@ -3091,9 +3215,8 @@ static int rocker_port_vlan_l2_groups(struct rocker_port *rocker_port, return 0; out_pport = 0; - err = rocker_group_l2_interface(rocker_port, flags, - vlan_id, out_pport, - pop_vlan); + err = rocker_group_l2_interface(rocker_port, trans, flags, + vlan_id, out_pport, pop_vlan); if (err) { netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 group for CPU port\n", err); @@ -3149,8 +3272,8 @@ static struct rocker_ctrl { }; static int rocker_port_ctrl_vlan_acl(struct rocker_port *rocker_port, - int flags, struct rocker_ctrl *ctrl, - __be16 vlan_id) + enum switchdev_trans trans, int flags, + struct rocker_ctrl *ctrl, __be16 vlan_id) { u32 in_pport = rocker_port->pport; u32 in_pport_mask = 0xffffffff; @@ -3165,7 +3288,7 @@ static int rocker_port_ctrl_vlan_acl(struct rocker_port *rocker_port, u32 group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); int err; - err = rocker_flow_tbl_acl(rocker_port, flags, + err = rocker_flow_tbl_acl(rocker_port, trans, flags, in_pport, in_pport_mask, eth_src, eth_src_mask, ctrl->eth_dst, ctrl->eth_dst_mask, @@ -3182,7 +3305,8 @@ static int rocker_port_ctrl_vlan_acl(struct rocker_port *rocker_port, } static int rocker_port_ctrl_vlan_bridge(struct rocker_port *rocker_port, - int flags, struct rocker_ctrl *ctrl, + enum switchdev_trans trans, int flags, + struct rocker_ctrl *ctrl, __be16 vlan_id) { enum rocker_of_dpa_table_id goto_tbl = @@ -3194,7 +3318,7 @@ static int rocker_port_ctrl_vlan_bridge(struct rocker_port *rocker_port, if (!rocker_port_is_bridged(rocker_port)) return 0; - err = rocker_flow_tbl_bridge(rocker_port, flags, + err = rocker_flow_tbl_bridge(rocker_port, trans, flags, ctrl->eth_dst, ctrl->eth_dst_mask, vlan_id, tunnel_id, goto_tbl, group_id, ctrl->copy_to_cpu); @@ -3206,8 +3330,8 @@ static int rocker_port_ctrl_vlan_bridge(struct rocker_port *rocker_port, } static int rocker_port_ctrl_vlan_term(struct rocker_port *rocker_port, - int flags, struct rocker_ctrl *ctrl, - __be16 vlan_id) + enum switchdev_trans trans, int flags, + struct rocker_ctrl *ctrl, __be16 vlan_id) { u32 in_pport_mask = 0xffffffff; __be16 vlan_id_mask = htons(0xffff); @@ -3216,7 +3340,7 @@ static int rocker_port_ctrl_vlan_term(struct rocker_port *rocker_port, if (ntohs(vlan_id) == 0) vlan_id = rocker_port->internal_vlan_id; - err = rocker_flow_tbl_term_mac(rocker_port, + err = rocker_flow_tbl_term_mac(rocker_port, trans, rocker_port->pport, in_pport_mask, ctrl->eth_type, ctrl->eth_dst, ctrl->eth_dst_mask, vlan_id, @@ -3229,32 +3353,34 @@ static int rocker_port_ctrl_vlan_term(struct rocker_port *rocker_port, return err; } -static int rocker_port_ctrl_vlan(struct rocker_port *rocker_port, int flags, +static int rocker_port_ctrl_vlan(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, struct rocker_ctrl *ctrl, __be16 vlan_id) { if (ctrl->acl) - return rocker_port_ctrl_vlan_acl(rocker_port, flags, + return rocker_port_ctrl_vlan_acl(rocker_port, trans, flags, ctrl, vlan_id); if (ctrl->bridge) - return rocker_port_ctrl_vlan_bridge(rocker_port, flags, + return rocker_port_ctrl_vlan_bridge(rocker_port, trans, flags, ctrl, vlan_id); if (ctrl->term) - return rocker_port_ctrl_vlan_term(rocker_port, flags, + return rocker_port_ctrl_vlan_term(rocker_port, trans, flags, ctrl, vlan_id); return -EOPNOTSUPP; } static int rocker_port_ctrl_vlan_add(struct rocker_port *rocker_port, - int flags, __be16 vlan_id) + enum switchdev_trans trans, int flags, + __be16 vlan_id) { int err = 0; int i; for (i = 0; i < ROCKER_CTRL_MAX; i++) { if (rocker_port->ctrls[i]) { - err = rocker_port_ctrl_vlan(rocker_port, flags, + err = rocker_port_ctrl_vlan(rocker_port, trans, flags, &rocker_ctrls[i], vlan_id); if (err) return err; @@ -3264,7 +3390,8 @@ static int rocker_port_ctrl_vlan_add(struct rocker_port *rocker_port, return err; } -static int rocker_port_ctrl(struct rocker_port *rocker_port, int flags, +static int rocker_port_ctrl(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, struct rocker_ctrl *ctrl) { u16 vid; @@ -3273,7 +3400,7 @@ static int rocker_port_ctrl(struct rocker_port *rocker_port, int flags, for (vid = 1; vid < VLAN_N_VID; vid++) { if (!test_bit(vid, rocker_port->vlan_bitmap)) continue; - err = rocker_port_ctrl_vlan(rocker_port, flags, + err = rocker_port_ctrl_vlan(rocker_port, trans, flags, ctrl, htons(vid)); if (err) break; @@ -3282,8 +3409,8 @@ static int rocker_port_ctrl(struct rocker_port *rocker_port, int flags, return err; } -static int rocker_port_vlan(struct rocker_port *rocker_port, int flags, - u16 vid) +static int rocker_port_vlan(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, u16 vid) { enum rocker_of_dpa_table_id goto_tbl = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; @@ -3297,50 +3424,57 @@ static int rocker_port_vlan(struct rocker_port *rocker_port, int flags, internal_vlan_id = rocker_port_vid_to_vlan(rocker_port, vid, &untagged); - if (adding && test_and_set_bit(ntohs(internal_vlan_id), - rocker_port->vlan_bitmap)) + if (adding && test_bit(ntohs(internal_vlan_id), + rocker_port->vlan_bitmap)) return 0; /* already added */ - else if (!adding && !test_and_clear_bit(ntohs(internal_vlan_id), - rocker_port->vlan_bitmap)) + else if (!adding && !test_bit(ntohs(internal_vlan_id), + rocker_port->vlan_bitmap)) return 0; /* already removed */ + change_bit(ntohs(internal_vlan_id), rocker_port->vlan_bitmap); + if (adding) { - err = rocker_port_ctrl_vlan_add(rocker_port, flags, + err = rocker_port_ctrl_vlan_add(rocker_port, trans, flags, internal_vlan_id); if (err) { netdev_err(rocker_port->dev, "Error (%d) port ctrl vlan add\n", err); - return err; + goto err_out; } } - err = rocker_port_vlan_l2_groups(rocker_port, flags, + err = rocker_port_vlan_l2_groups(rocker_port, trans, flags, internal_vlan_id, untagged); if (err) { netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 groups\n", err); - return err; + goto err_out; } - err = rocker_port_vlan_flood_group(rocker_port, flags, + err = rocker_port_vlan_flood_group(rocker_port, trans, flags, internal_vlan_id); if (err) { netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 flood group\n", err); - return err; + goto err_out; } - err = rocker_flow_tbl_vlan(rocker_port, flags, + err = rocker_flow_tbl_vlan(rocker_port, trans, flags, in_pport, vlan_id, vlan_id_mask, goto_tbl, untagged, internal_vlan_id); if (err) netdev_err(rocker_port->dev, "Error (%d) port VLAN table\n", err); +err_out: + if (trans == SWITCHDEV_TRANS_PREPARE) + change_bit(ntohs(internal_vlan_id), rocker_port->vlan_bitmap); + return err; } -static int rocker_port_ig_tbl(struct rocker_port *rocker_port, int flags) +static int rocker_port_ig_tbl(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags) { enum rocker_of_dpa_table_id goto_tbl; u32 in_pport; @@ -3355,7 +3489,7 @@ static int rocker_port_ig_tbl(struct rocker_port *rocker_port, int flags) in_pport_mask = 0xffff0000; goto_tbl = ROCKER_OF_DPA_TABLE_ID_VLAN; - err = rocker_flow_tbl_ig_port(rocker_port, flags, + err = rocker_flow_tbl_ig_port(rocker_port, trans, flags, in_pport, in_pport_mask, goto_tbl); if (err) @@ -3367,7 +3501,8 @@ static int rocker_port_ig_tbl(struct rocker_port *rocker_port, int flags) struct rocker_fdb_learn_work { struct work_struct work; - struct net_device *dev; + struct rocker_port *rocker_port; + enum switchdev_trans trans; int flags; u8 addr[ETH_ALEN]; u16 vid; @@ -3379,23 +3514,24 @@ static void rocker_port_fdb_learn_work(struct work_struct *work) container_of(work, struct rocker_fdb_learn_work, work); bool removing = (lw->flags & ROCKER_OP_FLAG_REMOVE); bool learned = (lw->flags & ROCKER_OP_FLAG_LEARNED); - struct netdev_switch_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info; info.addr = lw->addr; info.vid = lw->vid; if (learned && removing) - call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_DEL, - lw->dev, &info.info); + call_switchdev_notifiers(SWITCHDEV_FDB_DEL, + lw->rocker_port->dev, &info.info); else if (learned && !removing) - call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_ADD, - lw->dev, &info.info); + call_switchdev_notifiers(SWITCHDEV_FDB_ADD, + lw->rocker_port->dev, &info.info); - kfree(work); + rocker_port_kfree(lw->rocker_port, lw->trans, work); } static int rocker_port_fdb_learn(struct rocker_port *rocker_port, - int flags, const u8 *addr, __be16 vlan_id) + enum switchdev_trans trans, int flags, + const u8 *addr, __be16 vlan_id) { struct rocker_fdb_learn_work *lw; enum rocker_of_dpa_table_id goto_tbl = @@ -3411,8 +3547,8 @@ static int rocker_port_fdb_learn(struct rocker_port *rocker_port, group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); if (!(flags & ROCKER_OP_FLAG_REFRESH)) { - err = rocker_flow_tbl_bridge(rocker_port, flags, addr, NULL, - vlan_id, tunnel_id, goto_tbl, + err = rocker_flow_tbl_bridge(rocker_port, trans, flags, addr, + NULL, vlan_id, tunnel_id, goto_tbl, group_id, copy_to_cpu); if (err) return err; @@ -3424,18 +3560,22 @@ static int rocker_port_fdb_learn(struct rocker_port *rocker_port, if (!rocker_port_is_bridged(rocker_port)) return 0; - lw = kmalloc(sizeof(*lw), rocker_op_flags_gfp(flags)); + lw = rocker_port_kzalloc(rocker_port, trans, sizeof(*lw)); if (!lw) return -ENOMEM; INIT_WORK(&lw->work, rocker_port_fdb_learn_work); - lw->dev = rocker_port->dev; + lw->rocker_port = rocker_port; + lw->trans = trans; lw->flags = flags; ether_addr_copy(lw->addr, addr); lw->vid = rocker_port_vlan_to_vid(rocker_port, vlan_id); - schedule_work(&lw->work); + if (trans == SWITCHDEV_TRANS_PREPARE) + rocker_port_kfree(rocker_port, trans, lw); + else + schedule_work(&lw->work); return 0; } @@ -3453,6 +3593,7 @@ rocker_fdb_tbl_find(struct rocker *rocker, struct rocker_fdb_tbl_entry *match) } static int rocker_port_fdb(struct rocker_port *rocker_port, + enum switchdev_trans trans, const unsigned char *addr, __be16 vlan_id, int flags) { @@ -3462,7 +3603,7 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, bool removing = (flags & ROCKER_OP_FLAG_REMOVE); unsigned long lock_flags; - fdb = kzalloc(sizeof(*fdb), rocker_op_flags_gfp(flags)); + fdb = rocker_port_kzalloc(rocker_port, trans, sizeof(*fdb)); if (!fdb) return -ENOMEM; @@ -3477,32 +3618,35 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, found = rocker_fdb_tbl_find(rocker, fdb); if (removing && found) { - kfree(fdb); - hash_del(&found->entry); + rocker_port_kfree(rocker_port, trans, fdb); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); } else if (!removing && !found) { - hash_add(rocker->fdb_tbl, &fdb->entry, fdb->key_crc32); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_add(rocker->fdb_tbl, &fdb->entry, fdb->key_crc32); } spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); /* Check if adding and already exists, or removing and can't find */ if (!found != !removing) { - kfree(fdb); + rocker_port_kfree(rocker_port, trans, fdb); if (!found && removing) return 0; /* Refreshing existing to update aging timers */ flags |= ROCKER_OP_FLAG_REFRESH; } - return rocker_port_fdb_learn(rocker_port, flags, addr, vlan_id); + return rocker_port_fdb_learn(rocker_port, trans, flags, addr, vlan_id); } -static int rocker_port_fdb_flush(struct rocker_port *rocker_port) +static int rocker_port_fdb_flush(struct rocker_port *rocker_port, + enum switchdev_trans trans) { struct rocker *rocker = rocker_port->rocker; struct rocker_fdb_tbl_entry *found; unsigned long lock_flags; - int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE; + int flags = ROCKER_OP_FLAG_REMOVE; struct hlist_node *tmp; int bkt; int err = 0; @@ -3518,12 +3662,13 @@ static int rocker_port_fdb_flush(struct rocker_port *rocker_port) continue; if (!found->learned) continue; - err = rocker_port_fdb_learn(rocker_port, flags, + err = rocker_port_fdb_learn(rocker_port, trans, flags, found->key.addr, found->key.vlan_id); if (err) goto err_out; - hash_del(&found->entry); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); } err_out: @@ -3533,7 +3678,8 @@ err_out: } static int rocker_port_router_mac(struct rocker_port *rocker_port, - int flags, __be16 vlan_id) + enum switchdev_trans trans, int flags, + __be16 vlan_id) { u32 in_pport_mask = 0xffffffff; __be16 eth_type; @@ -3546,7 +3692,7 @@ static int rocker_port_router_mac(struct rocker_port *rocker_port, vlan_id = rocker_port->internal_vlan_id; eth_type = htons(ETH_P_IP); - err = rocker_flow_tbl_term_mac(rocker_port, + err = rocker_flow_tbl_term_mac(rocker_port, trans, rocker_port->pport, in_pport_mask, eth_type, rocker_port->dev->dev_addr, dst_mac_mask, vlan_id, vlan_id_mask, @@ -3555,7 +3701,7 @@ static int rocker_port_router_mac(struct rocker_port *rocker_port, return err; eth_type = htons(ETH_P_IPV6); - err = rocker_flow_tbl_term_mac(rocker_port, + err = rocker_flow_tbl_term_mac(rocker_port, trans, rocker_port->pport, in_pport_mask, eth_type, rocker_port->dev->dev_addr, dst_mac_mask, vlan_id, vlan_id_mask, @@ -3564,13 +3710,14 @@ static int rocker_port_router_mac(struct rocker_port *rocker_port, return err; } -static int rocker_port_fwding(struct rocker_port *rocker_port) +static int rocker_port_fwding(struct rocker_port *rocker_port, + enum switchdev_trans trans) { bool pop_vlan; u32 out_pport; __be16 vlan_id; u16 vid; - int flags = ROCKER_OP_FLAG_NOWAIT; + int flags = 0; int err; /* Port will be forwarding-enabled if its STP state is LEARNING @@ -3590,9 +3737,8 @@ static int rocker_port_fwding(struct rocker_port *rocker_port) continue; vlan_id = htons(vid); pop_vlan = rocker_vlan_id_is_internal(vlan_id); - err = rocker_group_l2_interface(rocker_port, flags, - vlan_id, out_pport, - pop_vlan); + err = rocker_group_l2_interface(rocker_port, trans, flags, + vlan_id, out_pport, pop_vlan); if (err) { netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 group for pport %d\n", @@ -3604,13 +3750,21 @@ static int rocker_port_fwding(struct rocker_port *rocker_port) return 0; } -static int rocker_port_stp_update(struct rocker_port *rocker_port, u8 state) +static int rocker_port_stp_update(struct rocker_port *rocker_port, + enum switchdev_trans trans, u8 state) { bool want[ROCKER_CTRL_MAX] = { 0, }; + bool prev_ctrls[ROCKER_CTRL_MAX]; + u8 prev_state; int flags; int err; int i; + if (trans == SWITCHDEV_TRANS_PREPARE) { + memcpy(prev_ctrls, rocker_port->ctrls, sizeof(prev_ctrls)); + prev_state = rocker_port->stp_state; + } + if (rocker_port->stp_state == state) return 0; @@ -3638,41 +3792,50 @@ static int rocker_port_stp_update(struct rocker_port *rocker_port, u8 state) for (i = 0; i < ROCKER_CTRL_MAX; i++) { if (want[i] != rocker_port->ctrls[i]) { - flags = ROCKER_OP_FLAG_NOWAIT | - (want[i] ? 0 : ROCKER_OP_FLAG_REMOVE); - err = rocker_port_ctrl(rocker_port, flags, + flags = (want[i] ? 0 : ROCKER_OP_FLAG_REMOVE); + err = rocker_port_ctrl(rocker_port, trans, flags, &rocker_ctrls[i]); if (err) - return err; + goto err_out; rocker_port->ctrls[i] = want[i]; } } - err = rocker_port_fdb_flush(rocker_port); + err = rocker_port_fdb_flush(rocker_port, trans); if (err) - return err; + goto err_out; + + err = rocker_port_fwding(rocker_port, trans); - return rocker_port_fwding(rocker_port); +err_out: + if (trans == SWITCHDEV_TRANS_PREPARE) { + memcpy(rocker_port->ctrls, prev_ctrls, sizeof(prev_ctrls)); + rocker_port->stp_state = prev_state; + } + + return err; } -static int rocker_port_fwd_enable(struct rocker_port *rocker_port) +static int rocker_port_fwd_enable(struct rocker_port *rocker_port, + enum switchdev_trans trans) { if (rocker_port_is_bridged(rocker_port)) /* bridge STP will enable port */ return 0; /* port is not bridged, so simulate going to FORWARDING state */ - return rocker_port_stp_update(rocker_port, BR_STATE_FORWARDING); + return rocker_port_stp_update(rocker_port, trans, BR_STATE_FORWARDING); } -static int rocker_port_fwd_disable(struct rocker_port *rocker_port) +static int rocker_port_fwd_disable(struct rocker_port *rocker_port, + enum switchdev_trans trans) { if (rocker_port_is_bridged(rocker_port)) /* bridge STP will disable port */ return 0; /* port is not bridged, so simulate going to DISABLED state */ - return rocker_port_stp_update(rocker_port, BR_STATE_DISABLED); + return rocker_port_stp_update(rocker_port, trans, BR_STATE_DISABLED); } static struct rocker_internal_vlan_tbl_entry * @@ -3760,7 +3923,8 @@ not_found: spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, lock_flags); } -static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, __be32 dst, +static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, + enum switchdev_trans trans, __be32 dst, int dst_len, struct fib_info *fi, u32 tb_id, int flags) { @@ -3784,7 +3948,7 @@ static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, __be32 dst, has_gw = !!nh->nh_gw; if (has_gw && nh_on_port) { - err = rocker_port_ipv4_nh(rocker_port, flags, + err = rocker_port_ipv4_nh(rocker_port, trans, flags, nh->nh_gw, &index); if (err) return err; @@ -3795,7 +3959,7 @@ static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, __be32 dst, group_id = ROCKER_GROUP_L2_INTERFACE(internal_vlan_id, 0); } - err = rocker_flow_tbl_ucast4_routing(rocker_port, eth_type, dst, + err = rocker_flow_tbl_ucast4_routing(rocker_port, trans, eth_type, dst, dst_mask, priority, goto_tbl, group_id, flags); if (err) @@ -3834,7 +3998,7 @@ static int rocker_port_open(struct net_device *dev) goto err_request_rx_irq; } - err = rocker_port_fwd_enable(rocker_port); + err = rocker_port_fwd_enable(rocker_port, SWITCHDEV_TRANS_NONE); if (err) goto err_fwd_enable; @@ -3861,7 +4025,7 @@ static int rocker_port_stop(struct net_device *dev) rocker_port_set_enable(rocker_port, false); napi_disable(&rocker_port->napi_rx); napi_disable(&rocker_port->napi_tx); - rocker_port_fwd_disable(rocker_port); + rocker_port_fwd_disable(rocker_port, SWITCHDEV_TRANS_NONE); free_irq(rocker_msix_rx_vector(rocker_port), rocker_port); free_irq(rocker_msix_tx_vector(rocker_port), rocker_port); rocker_port_dma_rings_fini(rocker_port); @@ -4014,11 +4178,12 @@ static int rocker_port_vlan_rx_add_vid(struct net_device *dev, struct rocker_port *rocker_port = netdev_priv(dev); int err; - err = rocker_port_vlan(rocker_port, 0, vid); + err = rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, 0, vid); if (err) return err; - return rocker_port_router_mac(rocker_port, 0, htons(vid)); + return rocker_port_router_mac(rocker_port, SWITCHDEV_TRANS_NONE, + 0, htons(vid)); } static int rocker_port_vlan_rx_kill_vid(struct net_device *dev, @@ -4027,250 +4192,340 @@ static int rocker_port_vlan_rx_kill_vid(struct net_device *dev, struct rocker_port *rocker_port = netdev_priv(dev); int err; - err = rocker_port_router_mac(rocker_port, ROCKER_OP_FLAG_REMOVE, - htons(vid)); + err = rocker_port_router_mac(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, htons(vid)); if (err) return err; - return rocker_port_vlan(rocker_port, ROCKER_OP_FLAG_REMOVE, vid); + return rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, vid); } -static int rocker_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid, - u16 nlm_flags) +static int rocker_port_get_phys_port_name(struct net_device *dev, + char *buf, size_t len) { struct rocker_port *rocker_port = netdev_priv(dev); - __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, vid, NULL); - int flags = 0; + struct port_name name = { .buf = buf, .len = len }; + int err; - if (!rocker_port_is_bridged(rocker_port)) - return -EINVAL; + err = rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, + rocker_cmd_get_port_settings_prep, NULL, + rocker_cmd_get_port_settings_phys_name_proc, + &name); - return rocker_port_fdb(rocker_port, addr, vlan_id, flags); + return err ? -EOPNOTSUPP : 0; } -static int rocker_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid) +static const struct net_device_ops rocker_port_netdev_ops = { + .ndo_open = rocker_port_open, + .ndo_stop = rocker_port_stop, + .ndo_start_xmit = rocker_port_xmit, + .ndo_set_mac_address = rocker_port_set_mac_address, + .ndo_vlan_rx_add_vid = rocker_port_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = rocker_port_vlan_rx_kill_vid, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, + .ndo_get_phys_port_name = rocker_port_get_phys_port_name, +}; + +/******************** + * swdev interface + ********************/ + +static int rocker_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) { struct rocker_port *rocker_port = netdev_priv(dev); - __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, vid, NULL); - int flags = ROCKER_OP_FLAG_REMOVE; + struct rocker *rocker = rocker_port->rocker; - if (!rocker_port_is_bridged(rocker_port)) - return -EINVAL; + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_PARENT_ID: + attr->u.ppid.id_len = sizeof(rocker->hw.id); + memcpy(&attr->u.ppid.id, &rocker->hw.id, attr->u.ppid.id_len); + break; + case SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS: + attr->u.brport_flags = rocker_port->brport_flags; + break; + default: + return -EOPNOTSUPP; + } - return rocker_port_fdb(rocker_port, addr, vlan_id, flags); + return 0; } -static int rocker_fdb_fill_info(struct sk_buff *skb, - struct rocker_port *rocker_port, - const unsigned char *addr, u16 vid, - u32 portid, u32 seq, int type, - unsigned int flags) +static void rocker_port_trans_abort(struct rocker_port *rocker_port) { - struct nlmsghdr *nlh; - struct ndmsg *ndm; + struct list_head *mem, *tmp; - nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); - if (!nlh) - return -EMSGSIZE; + list_for_each_safe(mem, tmp, &rocker_port->trans_mem) { + list_del(mem); + kfree(mem); + } +} - ndm = nlmsg_data(nlh); - ndm->ndm_family = AF_BRIDGE; - ndm->ndm_pad1 = 0; - ndm->ndm_pad2 = 0; - ndm->ndm_flags = NTF_SELF; - ndm->ndm_type = 0; - ndm->ndm_ifindex = rocker_port->dev->ifindex; - ndm->ndm_state = NUD_REACHABLE; +static int rocker_port_brport_flags_set(struct rocker_port *rocker_port, + enum switchdev_trans trans, + unsigned long brport_flags) +{ + unsigned long orig_flags; + int err = 0; - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) - goto nla_put_failure; + orig_flags = rocker_port->brport_flags; + rocker_port->brport_flags = brport_flags; + if ((orig_flags ^ rocker_port->brport_flags) & BR_LEARNING) + err = rocker_port_set_learning(rocker_port, trans); - if (vid && nla_put_u16(skb, NDA_VLAN, vid)) - goto nla_put_failure; + if (trans == SWITCHDEV_TRANS_PREPARE) + rocker_port->brport_flags = orig_flags; - nlmsg_end(skb, nlh); - return 0; - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; + return err; } -static int rocker_port_fdb_dump(struct sk_buff *skb, - struct netlink_callback *cb, - struct net_device *dev, - struct net_device *filter_dev, - int idx) +static int rocker_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) { struct rocker_port *rocker_port = netdev_priv(dev); - struct rocker *rocker = rocker_port->rocker; - struct rocker_fdb_tbl_entry *found; - struct hlist_node *tmp; - int bkt; - unsigned long lock_flags; - const unsigned char *addr; - u16 vid; - int err; + int err = 0; - spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); - hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { - if (found->key.pport != rocker_port->pport) - continue; - if (idx < cb->args[0]) - goto skip; - addr = found->key.addr; - vid = rocker_port_vlan_to_vid(rocker_port, found->key.vlan_id); - err = rocker_fdb_fill_info(skb, rocker_port, addr, vid, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, NLM_F_MULTI); - if (err < 0) - break; -skip: - ++idx; + switch (attr->trans) { + case SWITCHDEV_TRANS_PREPARE: + BUG_ON(!list_empty(&rocker_port->trans_mem)); + break; + case SWITCHDEV_TRANS_ABORT: + rocker_port_trans_abort(rocker_port); + return 0; + default: + break; } - spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); - return idx; + + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_STP_STATE: + err = rocker_port_stp_update(rocker_port, attr->trans, + attr->u.stp_state); + break; + case SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS: + err = rocker_port_brport_flags_set(rocker_port, attr->trans, + attr->u.brport_flags); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; } -static int rocker_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +static int rocker_port_vlan_add(struct rocker_port *rocker_port, + enum switchdev_trans trans, u16 vid, u16 flags) { - struct rocker_port *rocker_port = netdev_priv(dev); - struct nlattr *protinfo; - struct nlattr *attr; int err; - protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), - IFLA_PROTINFO); - if (protinfo) { - attr = nla_find_nested(protinfo, IFLA_BRPORT_LEARNING); - if (attr) { - if (nla_len(attr) < sizeof(u8)) - return -EINVAL; - - if (nla_get_u8(attr)) - rocker_port->brport_flags |= BR_LEARNING; - else - rocker_port->brport_flags &= ~BR_LEARNING; - err = rocker_port_set_learning(rocker_port); - if (err) - return err; - } - attr = nla_find_nested(protinfo, IFLA_BRPORT_LEARNING_SYNC); - if (attr) { - if (nla_len(attr) < sizeof(u8)) - return -EINVAL; - - if (nla_get_u8(attr)) - rocker_port->brport_flags |= BR_LEARNING_SYNC; - else - rocker_port->brport_flags &= ~BR_LEARNING_SYNC; - } + /* XXX deal with flags for PVID and untagged */ + + err = rocker_port_vlan(rocker_port, trans, 0, vid); + if (err) + return err; + + return rocker_port_router_mac(rocker_port, trans, 0, htons(vid)); +} + +static int rocker_port_vlans_add(struct rocker_port *rocker_port, + enum switchdev_trans trans, + struct switchdev_obj_vlan *vlan) +{ + u16 vid; + int err; + + for (vid = vlan->vid_start; vid <= vlan->vid_end; vid++) { + err = rocker_port_vlan_add(rocker_port, trans, + vid, vlan->flags); + if (err) + return err; } return 0; } -static int rocker_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, - u32 filter_mask, int nlflags) +static int rocker_port_fdb_add(struct rocker_port *rocker_port, + enum switchdev_trans trans, + struct switchdev_obj_fdb *fdb) { - struct rocker_port *rocker_port = netdev_priv(dev); - u16 mode = BRIDGE_MODE_UNDEF; - u32 mask = BR_LEARNING | BR_LEARNING_SYNC; + __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, fdb->vid, NULL); + int flags = 0; + + if (!rocker_port_is_bridged(rocker_port)) + return -EINVAL; - return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, - rocker_port->brport_flags, mask, - nlflags); + return rocker_port_fdb(rocker_port, trans, fdb->addr, vlan_id, flags); } -static int rocker_port_get_phys_port_name(struct net_device *dev, - char *buf, size_t len) +static int rocker_port_obj_add(struct net_device *dev, + struct switchdev_obj *obj) { struct rocker_port *rocker_port = netdev_priv(dev); - struct port_name name = { .buf = buf, .len = len }; - int err; + struct switchdev_obj_ipv4_fib *fib4; + int err = 0; - err = rocker_cmd_exec(rocker_port->rocker, rocker_port, - rocker_cmd_get_port_settings_prep, NULL, - rocker_cmd_get_port_settings_phys_name_proc, - &name, false); + switch (obj->trans) { + case SWITCHDEV_TRANS_PREPARE: + BUG_ON(!list_empty(&rocker_port->trans_mem)); + break; + case SWITCHDEV_TRANS_ABORT: + rocker_port_trans_abort(rocker_port); + return 0; + default: + break; + } - return err ? -EOPNOTSUPP : 0; + switch (obj->id) { + case SWITCHDEV_OBJ_PORT_VLAN: + err = rocker_port_vlans_add(rocker_port, obj->trans, + &obj->u.vlan); + break; + case SWITCHDEV_OBJ_IPV4_FIB: + fib4 = &obj->u.ipv4_fib; + err = rocker_port_fib_ipv4(rocker_port, obj->trans, + htonl(fib4->dst), fib4->dst_len, + fib4->fi, fib4->tb_id, 0); + break; + case SWITCHDEV_OBJ_PORT_FDB: + err = rocker_port_fdb_add(rocker_port, obj->trans, &obj->u.fdb); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; } -static const struct net_device_ops rocker_port_netdev_ops = { - .ndo_open = rocker_port_open, - .ndo_stop = rocker_port_stop, - .ndo_start_xmit = rocker_port_xmit, - .ndo_set_mac_address = rocker_port_set_mac_address, - .ndo_vlan_rx_add_vid = rocker_port_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = rocker_port_vlan_rx_kill_vid, - .ndo_fdb_add = rocker_port_fdb_add, - .ndo_fdb_del = rocker_port_fdb_del, - .ndo_fdb_dump = rocker_port_fdb_dump, - .ndo_bridge_setlink = rocker_port_bridge_setlink, - .ndo_bridge_getlink = rocker_port_bridge_getlink, - .ndo_get_phys_port_name = rocker_port_get_phys_port_name, -}; +static int rocker_port_vlan_del(struct rocker_port *rocker_port, + u16 vid, u16 flags) +{ + int err; -/******************** - * swdev interface - ********************/ + err = rocker_port_router_mac(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, htons(vid)); + if (err) + return err; + + return rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, vid); +} -static int rocker_port_swdev_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static int rocker_port_vlans_del(struct rocker_port *rocker_port, + struct switchdev_obj_vlan *vlan) { - struct rocker_port *rocker_port = netdev_priv(dev); - struct rocker *rocker = rocker_port->rocker; + u16 vid; + int err; + + for (vid = vlan->vid_start; vid <= vlan->vid_end; vid++) { + err = rocker_port_vlan_del(rocker_port, vid, vlan->flags); + if (err) + return err; + } - psid->id_len = sizeof(rocker->hw.id); - memcpy(&psid->id, &rocker->hw.id, psid->id_len); return 0; } -static int rocker_port_swdev_port_stp_update(struct net_device *dev, u8 state) +static int rocker_port_fdb_del(struct rocker_port *rocker_port, + enum switchdev_trans trans, + struct switchdev_obj_fdb *fdb) { - struct rocker_port *rocker_port = netdev_priv(dev); + __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, fdb->vid, NULL); + int flags = ROCKER_OP_FLAG_REMOVE; - return rocker_port_stp_update(rocker_port, state); + if (!rocker_port_is_bridged(rocker_port)) + return -EINVAL; + + return rocker_port_fdb(rocker_port, trans, fdb->addr, vlan_id, flags); } -static int rocker_port_swdev_fib_ipv4_add(struct net_device *dev, - __be32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) +static int rocker_port_obj_del(struct net_device *dev, + struct switchdev_obj *obj) { struct rocker_port *rocker_port = netdev_priv(dev); - int flags = 0; + struct switchdev_obj_ipv4_fib *fib4; + int err = 0; - return rocker_port_fib_ipv4(rocker_port, dst, dst_len, - fi, tb_id, flags); + switch (obj->id) { + case SWITCHDEV_OBJ_PORT_VLAN: + err = rocker_port_vlans_del(rocker_port, &obj->u.vlan); + break; + case SWITCHDEV_OBJ_IPV4_FIB: + fib4 = &obj->u.ipv4_fib; + err = rocker_port_fib_ipv4(rocker_port, SWITCHDEV_TRANS_NONE, + htonl(fib4->dst), fib4->dst_len, + fib4->fi, fib4->tb_id, + ROCKER_OP_FLAG_REMOVE); + break; + case SWITCHDEV_OBJ_PORT_FDB: + err = rocker_port_fdb_del(rocker_port, obj->trans, &obj->u.fdb); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int rocker_port_fdb_dump(struct rocker_port *rocker_port, + struct switchdev_obj *obj) +{ + struct rocker *rocker = rocker_port->rocker; + struct switchdev_obj_fdb *fdb = &obj->u.fdb; + struct rocker_fdb_tbl_entry *found; + struct hlist_node *tmp; + unsigned long lock_flags; + int bkt; + int err = 0; + + spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); + hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { + if (found->key.pport != rocker_port->pport) + continue; + fdb->addr = found->key.addr; + fdb->vid = rocker_port_vlan_to_vid(rocker_port, + found->key.vlan_id); + err = obj->cb(rocker_port->dev, obj); + if (err) + break; + } + spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); + + return err; } -static int rocker_port_swdev_fib_ipv4_del(struct net_device *dev, - __be32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) +static int rocker_port_obj_dump(struct net_device *dev, + struct switchdev_obj *obj) { struct rocker_port *rocker_port = netdev_priv(dev); - int flags = ROCKER_OP_FLAG_REMOVE; + int err = 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_PORT_FDB: + err = rocker_port_fdb_dump(rocker_port, obj); + break; + default: + err = -EOPNOTSUPP; + break; + } - return rocker_port_fib_ipv4(rocker_port, dst, dst_len, - fi, tb_id, flags); + return err; } -static const struct swdev_ops rocker_port_swdev_ops = { - .swdev_parent_id_get = rocker_port_swdev_parent_id_get, - .swdev_port_stp_update = rocker_port_swdev_port_stp_update, - .swdev_fib_ipv4_add = rocker_port_swdev_fib_ipv4_add, - .swdev_fib_ipv4_del = rocker_port_swdev_fib_ipv4_del, +static const struct switchdev_ops rocker_port_switchdev_ops = { + .switchdev_port_attr_get = rocker_port_attr_get, + .switchdev_port_attr_set = rocker_port_attr_set, + .switchdev_port_obj_add = rocker_port_obj_add, + .switchdev_port_obj_del = rocker_port_obj_del, + .switchdev_port_obj_dump = rocker_port_obj_dump, }; /******************** @@ -4401,9 +4656,10 @@ static int rocker_cmd_get_port_stats_ethtool(struct rocker_port *rocker_port, void *priv) { return rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, rocker_cmd_get_port_stats_prep, NULL, rocker_cmd_get_port_stats_ethtool_proc, - priv, false); + priv); } static void rocker_port_get_stats(struct net_device *dev, @@ -4417,8 +4673,6 @@ static void rocker_port_get_stats(struct net_device *dev, for (i = 0; i < ARRAY_SIZE(rocker_port_stats); ++i) data[i] = 0; } - - return; } static int rocker_port_get_sset_count(struct net_device *netdev, int sset) @@ -4472,8 +4726,9 @@ static int rocker_port_poll_tx(struct napi_struct *napi, int budget) if (err == 0) { rocker_port->dev->stats.tx_packets++; rocker_port->dev->stats.tx_bytes += skb->len; - } else + } else { rocker_port->dev->stats.tx_errors++; + } dev_kfree_skb_any(skb); credits++; @@ -4585,7 +4840,8 @@ static void rocker_remove_ports(struct rocker *rocker) for (i = 0; i < rocker->port_count; i++) { rocker_port = rocker->ports[i]; - rocker_port_ig_tbl(rocker_port, ROCKER_OP_FLAG_REMOVE); + rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE); unregister_netdev(rocker_port->dev); } kfree(rocker->ports); @@ -4621,11 +4877,12 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_port->port_number = port_number; rocker_port->pport = port_number + 1; rocker_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC; + INIT_LIST_HEAD(&rocker_port->trans_mem); rocker_port_dev_addr_init(rocker, rocker_port); dev->netdev_ops = &rocker_port_netdev_ops; dev->ethtool_ops = &rocker_port_ethtool_ops; - dev->swdev_ops = &rocker_port_swdev_ops; + dev->switchdev_ops = &rocker_port_switchdev_ops; netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, NAPI_POLL_WEIGHT); netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx, @@ -4633,8 +4890,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_carrier_init(rocker_port); dev->features |= NETIF_F_NETNS_LOCAL | - NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_HW_SWITCH_OFFLOAD; + NETIF_F_HW_VLAN_CTAG_FILTER; err = register_netdev(dev); if (err) { @@ -4643,11 +4899,11 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) } rocker->ports[port_number] = rocker_port; - rocker_port_set_learning(rocker_port); + rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE); rocker_port->internal_vlan_id = rocker_port_internal_vlan_id_get(rocker_port, dev->ifindex); - err = rocker_port_ig_tbl(rocker_port, 0); + err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0); if (err) { dev_err(&pdev->dev, "install ig port table failed\n"); goto err_port_ig_tbl; @@ -4900,13 +5156,13 @@ static int rocker_port_bridge_join(struct rocker_port *rocker_port, rocker_port->bridge_dev = bridge; /* Use bridge internal VLAN ID for untagged pkts */ - err = rocker_port_vlan(rocker_port, ROCKER_OP_FLAG_REMOVE, 0); + err = rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, 0); if (err) return err; rocker_port->internal_vlan_id = - rocker_port_internal_vlan_id_get(rocker_port, - bridge->ifindex); - return rocker_port_vlan(rocker_port, 0, 0); + rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex); + return rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, 0, 0); } static int rocker_port_bridge_leave(struct rocker_port *rocker_port) @@ -4919,18 +5175,19 @@ static int rocker_port_bridge_leave(struct rocker_port *rocker_port) rocker_port->bridge_dev = NULL; /* Use port internal VLAN ID for untagged pkts */ - err = rocker_port_vlan(rocker_port, ROCKER_OP_FLAG_REMOVE, 0); + err = rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, 0); if (err) return err; rocker_port->internal_vlan_id = rocker_port_internal_vlan_id_get(rocker_port, rocker_port->dev->ifindex); - err = rocker_port_vlan(rocker_port, 0, 0); + err = rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, 0, 0); if (err) return err; if (rocker_port->dev->flags & IFF_UP) - err = rocker_port_fwd_enable(rocker_port); + err = rocker_port_fwd_enable(rocker_port, SWITCHDEV_TRANS_NONE); return err; } @@ -4992,7 +5249,8 @@ static int rocker_neigh_update(struct net_device *dev, struct neighbour *n) int flags = (n->nud_state & NUD_VALID) ? 0 : ROCKER_OP_FLAG_REMOVE; __be32 ip_addr = *(__be32 *)n->primary_key; - return rocker_port_ipv4_neigh(rocker_port, flags, ip_addr, n->ha); + return rocker_port_ipv4_neigh(rocker_port, SWITCHDEV_TRANS_NONE, + flags, ip_addr, n->ha); } static int rocker_netevent_event(struct notifier_block *unused, diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index a4e9591d7457..c61fbf968036 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -65,9 +65,9 @@ enum { #define ROCKER_TEST_DMA_CTRL 0x0034 /* Rocker test register ctrl */ -#define ROCKER_TEST_DMA_CTRL_CLEAR (1 << 0) -#define ROCKER_TEST_DMA_CTRL_FILL (1 << 1) -#define ROCKER_TEST_DMA_CTRL_INVERT (1 << 2) +#define ROCKER_TEST_DMA_CTRL_CLEAR BIT(0) +#define ROCKER_TEST_DMA_CTRL_FILL BIT(1) +#define ROCKER_TEST_DMA_CTRL_INVERT BIT(2) /* Rocker DMA ring register offsets */ #define ROCKER_DMA_DESC_ADDR(x) (0x1000 + (x) * 32) /* 8-byte */ @@ -79,7 +79,7 @@ enum { #define ROCKER_DMA_DESC_RES1(x) (0x101c + (x) * 32) /* Rocker dma ctrl register bits */ -#define ROCKER_DMA_DESC_CTRL_RESET (1 << 0) +#define ROCKER_DMA_DESC_CTRL_RESET BIT(0) /* Rocker DMA ring types */ enum rocker_dma_type { @@ -111,7 +111,7 @@ struct rocker_desc { u16 comp_err; }; -#define ROCKER_DMA_DESC_COMP_ERR_GEN (1 << 15) +#define ROCKER_DMA_DESC_COMP_ERR_GEN BIT(15) /* Rocker DMA TLV struct */ struct rocker_tlv { @@ -237,14 +237,14 @@ enum { ROCKER_TLV_RX_MAX = __ROCKER_TLV_RX_MAX - 1, }; -#define ROCKER_RX_FLAGS_IPV4 (1 << 0) -#define ROCKER_RX_FLAGS_IPV6 (1 << 1) -#define ROCKER_RX_FLAGS_CSUM_CALC (1 << 2) -#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD (1 << 3) -#define ROCKER_RX_FLAGS_IP_FRAG (1 << 4) -#define ROCKER_RX_FLAGS_TCP (1 << 5) -#define ROCKER_RX_FLAGS_UDP (1 << 6) -#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7) +#define ROCKER_RX_FLAGS_IPV4 BIT(0) +#define ROCKER_RX_FLAGS_IPV6 BIT(1) +#define ROCKER_RX_FLAGS_CSUM_CALC BIT(2) +#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD BIT(3) +#define ROCKER_RX_FLAGS_IP_FRAG BIT(4) +#define ROCKER_RX_FLAGS_TCP BIT(5) +#define ROCKER_RX_FLAGS_UDP BIT(6) +#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7) enum { ROCKER_TLV_TX_UNSPEC, @@ -460,6 +460,6 @@ enum rocker_of_dpa_overlay_type { #define ROCKER_SWITCH_ID 0x0320 /* 8-byte */ /* Rocker control bits */ -#define ROCKER_CONTROL_RESET (1 << 0) +#define ROCKER_CONTROL_RESET BIT(0) #endif diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index 3a83c0dca8e6..ce8470fe79d5 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -3,6 +3,6 @@ sfc-y += efx.o nic.o farch.o falcon.o siena.o ef10.o tx.o \ tenxpress.o txc43128_phy.o falcon_boards.o \ mcdi.o mcdi_port.o mcdi_mon.o ptp.o sfc-$(CONFIG_SFC_MTD) += mtd.o -sfc-$(CONFIG_SFC_SRIOV) += siena_sriov.o +sfc-$(CONFIG_SFC_SRIOV) += sriov.o siena_sriov.o ef10_sriov.o obj-$(CONFIG_SFC) += sfc.o diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index fbb6cfa0f5f1..a547cebff4e2 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -15,6 +15,7 @@ #include "nic.h" #include "workarounds.h" #include "selftest.h" +#include "ef10_sriov.h" #include <linux/in.h> #include <linux/jhash.h> #include <linux/wait.h> @@ -30,6 +31,9 @@ enum { /* The reserved RSS context value */ #define EFX_EF10_RSS_CONTEXT_INVALID 0xffffffff +/* The maximum size of a shared RSS context */ +/* TODO: this should really be from the mcdi protocol export */ +#define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL /* The filter table(s) are managed by firmware and we have write-only * access. When removing filters we must identify them to the @@ -77,7 +81,6 @@ struct efx_ef10_filter_table { /* An arbitrary search limit for the software hash table */ #define EFX_EF10_FILTER_SEARCH_LIMIT 200 -static void efx_ef10_rx_push_rss_config(struct efx_nic *efx); static void efx_ef10_rx_free_indir_table(struct efx_nic *efx); static void efx_ef10_filter_table_remove(struct efx_nic *efx); @@ -92,9 +95,50 @@ static int efx_ef10_get_warm_boot_count(struct efx_nic *efx) static unsigned int efx_ef10_mem_map_size(struct efx_nic *efx) { - return resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]); + int bar; + + bar = efx->type->mem_bar; + return resource_size(&efx->pci_dev->resource[bar]); +} + +static int efx_ef10_get_pf_index(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf, + sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + + nic_data->pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF); + return 0; } +#ifdef CONFIG_SFC_SRIOV +static int efx_ef10_get_vf_index(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf, + sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + + nic_data->vf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_VF); + return 0; +} +#endif + static int efx_ef10_init_datapath_caps(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_OUT_LEN); @@ -117,6 +161,13 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) nic_data->datapath_caps = MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1); + /* record the DPCPU firmware IDs to determine VEB vswitching support. + */ + nic_data->rx_dpcpu_fw_id = + MCDI_WORD(outbuf, GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID); + nic_data->tx_dpcpu_fw_id = + MCDI_WORD(outbuf, GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID); + if (!(nic_data->datapath_caps & (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))) { netif_err(efx, drv, efx->net_dev, @@ -147,7 +198,7 @@ static int efx_ef10_get_sysclk_freq(struct efx_nic *efx) return rc > 0 ? rc : -ERANGE; } -static int efx_ef10_get_mac_address(struct efx_nic *efx, u8 *mac_address) +static int efx_ef10_get_mac_address_pf(struct efx_nic *efx, u8 *mac_address) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_MAC_ADDRESSES_OUT_LEN); size_t outlen; @@ -167,6 +218,34 @@ static int efx_ef10_get_mac_address(struct efx_nic *efx, u8 *mac_address) return 0; } +static int efx_ef10_get_mac_address_vf(struct efx_nic *efx, u8 *mac_address) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_GET_MAC_ADDRESSES_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_VPORT_GET_MAC_ADDRESSES_OUT_LENMAX); + size_t outlen; + int num_addrs, rc; + + MCDI_SET_DWORD(inbuf, VPORT_GET_MAC_ADDRESSES_IN_VPORT_ID, + EVB_PORT_ID_ASSIGNED); + rc = efx_mcdi_rpc(efx, MC_CMD_VPORT_GET_MAC_ADDRESSES, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); + + if (rc) + return rc; + if (outlen < MC_CMD_VPORT_GET_MAC_ADDRESSES_OUT_LENMIN) + return -EIO; + + num_addrs = MCDI_DWORD(outbuf, + VPORT_GET_MAC_ADDRESSES_OUT_MACADDR_COUNT); + + WARN_ON(num_addrs != 1); + + ether_addr_copy(mac_address, + MCDI_PTR(outbuf, VPORT_GET_MAC_ADDRESSES_OUT_MACADDR)); + + return 0; +} + static int efx_ef10_probe(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data; @@ -178,7 +257,7 @@ static int efx_ef10_probe(struct efx_nic *efx) efx->max_channels = min_t(unsigned int, EFX_MAX_CHANNELS, - resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]) / + efx_ef10_mem_map_size(efx) / (EFX_VI_PAGE_SIZE * EFX_TXQ_TYPES)); if (WARN_ON(efx->max_channels == 0)) return -EIO; @@ -209,6 +288,8 @@ static int efx_ef10_probe(struct efx_nic *efx) nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; + nic_data->vport_id = EVB_PORT_ID_ASSIGNED; + /* In case we're recovering from a crash (kexec), we want to * cancel any outstanding request by the previous user of this * function. We send a special message using the least @@ -230,6 +311,10 @@ static int efx_ef10_probe(struct efx_nic *efx) if (rc) goto fail3; + rc = efx_ef10_get_pf_index(efx); + if (rc) + goto fail3; + rc = efx_ef10_init_datapath_caps(efx); if (rc < 0) goto fail3; @@ -242,7 +327,7 @@ static int efx_ef10_probe(struct efx_nic *efx) goto fail3; efx->port_num = rc; - rc = efx_ef10_get_mac_address(efx, efx->net_dev->perm_addr); + rc = efx->type->get_mac_address(efx, efx->net_dev->perm_addr); if (rc) goto fail3; @@ -251,10 +336,22 @@ static int efx_ef10_probe(struct efx_nic *efx) goto fail3; efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */ - /* Check whether firmware supports bug 35388 workaround */ + /* Check whether firmware supports bug 35388 workaround. + * First try to enable it, then if we get EPERM, just + * ask if it's already enabled + */ rc = efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG35388, true); if (rc == 0) nic_data->workaround_35388 = true; + else if (rc == -EPERM) { + unsigned int enabled; + + rc = efx_mcdi_get_workarounds(efx, NULL, &enabled); + if (rc) + goto fail3; + nic_data->workaround_35388 = enabled & + MC_CMD_GET_WORKAROUNDS_OUT_BUG35388; + } else if (rc != -ENOSYS && rc != -ENOENT) goto fail3; netif_dbg(efx, probe, efx->net_dev, @@ -262,7 +359,7 @@ static int efx_ef10_probe(struct efx_nic *efx) nic_data->workaround_35388 ? "en" : "dis"); rc = efx_mcdi_mon_probe(efx); - if (rc) + if (rc && rc != -EPERM) goto fail3; efx_ptp_probe(efx, NULL); @@ -281,7 +378,7 @@ fail1: static int efx_ef10_free_vis(struct efx_nic *efx) { - MCDI_DECLARE_BUF_OUT_OR_ERR(outbuf, 0); + MCDI_DECLARE_BUF_ERR(outbuf); size_t outlen; int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0, outbuf, sizeof(outbuf), &outlen); @@ -352,9 +449,9 @@ static int efx_ef10_alloc_piobufs(struct efx_nic *efx, unsigned int n) static int efx_ef10_link_piobufs(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; - MCDI_DECLARE_BUF(inbuf, - max(MC_CMD_LINK_PIOBUF_IN_LEN, - MC_CMD_UNLINK_PIOBUF_IN_LEN)); + _MCDI_DECLARE_BUF(inbuf, + max(MC_CMD_LINK_PIOBUF_IN_LEN, + MC_CMD_UNLINK_PIOBUF_IN_LEN)); struct efx_channel *channel; struct efx_tx_queue *tx_queue; unsigned int offset, index; @@ -363,6 +460,8 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx) BUILD_BUG_ON(MC_CMD_LINK_PIOBUF_OUT_LEN != 0); BUILD_BUG_ON(MC_CMD_UNLINK_PIOBUF_OUT_LEN != 0); + memset(inbuf, 0, sizeof(inbuf)); + /* Link a buffer to each VI in the write-combining mapping */ for (index = 0; index < nic_data->n_piobufs; ++index) { MCDI_SET_DWORD(inbuf, LINK_PIOBUF_IN_PIOBUF_HANDLE, @@ -475,6 +574,25 @@ static void efx_ef10_remove(struct efx_nic *efx) struct efx_ef10_nic_data *nic_data = efx->nic_data; int rc; +#ifdef CONFIG_SFC_SRIOV + struct efx_ef10_nic_data *nic_data_pf; + struct pci_dev *pci_dev_pf; + struct efx_nic *efx_pf; + struct ef10_vf *vf; + + if (efx->pci_dev->is_virtfn) { + pci_dev_pf = efx->pci_dev->physfn; + if (pci_dev_pf) { + efx_pf = pci_get_drvdata(pci_dev_pf); + nic_data_pf = efx_pf->nic_data; + vf = nic_data_pf->vf + nic_data->vf_index; + vf->efx = NULL; + } else + netif_info(efx, drv, efx->net_dev, + "Could not get the PF id from VF\n"); + } +#endif + efx_ptp_remove(efx); efx_mcdi_mon_remove(efx); @@ -495,6 +613,50 @@ static void efx_ef10_remove(struct efx_nic *efx) kfree(nic_data); } +static int efx_ef10_probe_pf(struct efx_nic *efx) +{ + return efx_ef10_probe(efx); +} + +#ifdef CONFIG_SFC_SRIOV +static int efx_ef10_probe_vf(struct efx_nic *efx) +{ + int rc; + + rc = efx_ef10_probe(efx); + if (rc) + return rc; + + rc = efx_ef10_get_vf_index(efx); + if (rc) + goto fail; + + if (efx->pci_dev->is_virtfn) { + if (efx->pci_dev->physfn) { + struct efx_nic *efx_pf = + pci_get_drvdata(efx->pci_dev->physfn); + struct efx_ef10_nic_data *nic_data_p = efx_pf->nic_data; + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + nic_data_p->vf[nic_data->vf_index].efx = efx; + } else + netif_info(efx, drv, efx->net_dev, + "Could not get the PF id from VF\n"); + } + + return 0; + +fail: + efx_ef10_remove(efx); + return rc; +} +#else +static int efx_ef10_probe_vf(struct efx_nic *efx __attribute__ ((unused))) +{ + return 0; +} +#endif + static int efx_ef10_alloc_vis(struct efx_nic *efx, unsigned int min_vis, unsigned int max_vis) { @@ -687,7 +849,9 @@ static int efx_ef10_init_nic(struct efx_nic *efx) nic_data->must_restore_piobufs = false; } - efx_ef10_rx_push_rss_config(efx); + /* don't fail init if RSS setup doesn't work */ + efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table); + return 0; } @@ -702,6 +866,14 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx) nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; } +static enum reset_type efx_ef10_map_reset_reason(enum reset_type reason) +{ + if (reason == RESET_TYPE_MC_FAILURE) + return RESET_TYPE_DATAPATH; + + return efx_mcdi_map_reset_reason(reason); +} + static int efx_ef10_map_reset_flags(u32 *flags) { enum { @@ -1044,6 +1216,14 @@ static void efx_ef10_push_irq_moderation(struct efx_channel *channel) } } +static void efx_ef10_get_wol_vf(struct efx_nic *efx, + struct ethtool_wolinfo *wol) {} + +static int efx_ef10_set_wol_vf(struct efx_nic *efx, u32 type) +{ + return -EOPNOTSUPP; +} + static void efx_ef10_get_wol(struct efx_nic *efx, struct ethtool_wolinfo *wol) { wol->supported = 0; @@ -1123,6 +1303,10 @@ static int efx_ef10_mcdi_poll_reboot(struct efx_nic *efx) /* All our allocations have been reset */ efx_ef10_reset_mc_allocations(efx); + /* Driver-created vswitches and vports must be re-created */ + nic_data->must_probe_vswitching = true; + nic_data->vport_id = EVB_PORT_ID_ASSIGNED; + /* The datapath firmware might have been changed */ nic_data->must_check_datapath_caps = true; @@ -1232,16 +1416,17 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) { MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / EFX_BUF_SIZE)); - MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_TXQ_OUT_LEN); bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD; size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE; struct efx_channel *channel = tx_queue->channel; struct efx_nic *efx = tx_queue->efx; - size_t inlen, outlen; + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t inlen; dma_addr_t dma_addr; efx_qword_t *txd; int rc; int i; + BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0); MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1); MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel); @@ -1251,7 +1436,7 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload); MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id); dma_addr = tx_queue->txd.buf.dma_addr; @@ -1266,7 +1451,7 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) inlen = MC_CMD_INIT_TXQ_IN_LEN(entries); rc = efx_mcdi_rpc(efx, MC_CMD_INIT_TXQ, inbuf, inlen, - outbuf, sizeof(outbuf), &outlen); + NULL, 0, NULL); if (rc) goto fail; @@ -1299,7 +1484,7 @@ fail: static void efx_ef10_tx_fini(struct efx_tx_queue *tx_queue) { MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_TXQ_OUT_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); struct efx_nic *efx = tx_queue->efx; size_t outlen; int rc; @@ -1378,19 +1563,33 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) } } -static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context) +static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context, + bool exclusive, unsigned *context_size) { MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; size_t outlen; int rc; + u32 alloc_type = exclusive ? + MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE : + MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED; + unsigned rss_spread = exclusive ? + efx->rss_spread : + min(rounddown_pow_of_two(efx->rss_spread), + EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE); + + if (!exclusive && rss_spread == 1) { + *context = EFX_EF10_RSS_CONTEXT_INVALID; + if (context_size) + *context_size = 1; + return 0; + } MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID, - EVB_PORT_ID_ASSIGNED); - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, - MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE); - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, - EFX_MAX_CHANNELS); + nic_data->vport_id); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, alloc_type); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, rss_spread); rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_ALLOC, inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); @@ -1402,6 +1601,9 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context) *context = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID); + if (context_size) + *context_size = rss_spread; + return 0; } @@ -1418,7 +1620,8 @@ static void efx_ef10_free_rss_context(struct efx_nic *efx, u32 context) WARN_ON(rc != 0); } -static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context) +static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context, + const u32 *rx_indir_table) { MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN); MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN); @@ -1432,7 +1635,7 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context) for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); ++i) MCDI_PTR(tablebuf, RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] = - (u8) efx->rx_indir_table[i]; + (u8) rx_indir_table[i]; rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_TABLE, tablebuf, sizeof(tablebuf), NULL, 0, NULL); @@ -1460,27 +1663,119 @@ static void efx_ef10_rx_free_indir_table(struct efx_nic *efx) nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; } -static void efx_ef10_rx_push_rss_config(struct efx_nic *efx) +static int efx_ef10_rx_push_shared_rss_config(struct efx_nic *efx, + unsigned *context_size) { + u32 new_rx_rss_context; struct efx_ef10_nic_data *nic_data = efx->nic_data; - int rc; + int rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context, + false, context_size); - netif_dbg(efx, drv, efx->net_dev, "pushing RSS config\n"); + if (rc != 0) + return rc; - if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID) { - rc = efx_ef10_alloc_rss_context(efx, &nic_data->rx_rss_context); - if (rc != 0) - goto fail; + nic_data->rx_rss_context = new_rx_rss_context; + nic_data->rx_rss_context_exclusive = false; + efx_set_default_rx_indir_table(efx); + return 0; +} + +static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx, + const u32 *rx_indir_table) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + u32 new_rx_rss_context; + + if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID || + !nic_data->rx_rss_context_exclusive) { + rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context, + true, NULL); + if (rc == -EOPNOTSUPP) + return rc; + else if (rc != 0) + goto fail1; + } else { + new_rx_rss_context = nic_data->rx_rss_context; } - rc = efx_ef10_populate_rss_table(efx, nic_data->rx_rss_context); + rc = efx_ef10_populate_rss_table(efx, new_rx_rss_context, + rx_indir_table); if (rc != 0) - goto fail; + goto fail2; - return; + if (nic_data->rx_rss_context != new_rx_rss_context) + efx_ef10_rx_free_indir_table(efx); + nic_data->rx_rss_context = new_rx_rss_context; + nic_data->rx_rss_context_exclusive = true; + if (rx_indir_table != efx->rx_indir_table) + memcpy(efx->rx_indir_table, rx_indir_table, + sizeof(efx->rx_indir_table)); + return 0; -fail: +fail2: + if (new_rx_rss_context != nic_data->rx_rss_context) + efx_ef10_free_rss_context(efx, new_rx_rss_context); +fail1: netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table) +{ + int rc; + + if (efx->rss_spread == 1) + return 0; + + rc = efx_ef10_rx_push_exclusive_rss_config(efx, rx_indir_table); + + if (rc == -ENOBUFS && !user) { + unsigned context_size; + bool mismatch = false; + size_t i; + + for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table) && !mismatch; + i++) + mismatch = rx_indir_table[i] != + ethtool_rxfh_indir_default(i, efx->rss_spread); + + rc = efx_ef10_rx_push_shared_rss_config(efx, &context_size); + if (rc == 0) { + if (context_size != efx->rss_spread) + netif_warn(efx, probe, efx->net_dev, + "Could not allocate an exclusive RSS" + " context; allocated a shared one of" + " different size." + " Wanted %u, got %u.\n", + efx->rss_spread, context_size); + else if (mismatch) + netif_warn(efx, probe, efx->net_dev, + "Could not allocate an exclusive RSS" + " context; allocated a shared one but" + " could not apply custom" + " indirection.\n"); + else + netif_info(efx, probe, efx->net_dev, + "Could not allocate an exclusive RSS" + " context; allocated a shared one.\n"); + } + } + return rc; +} + +static int efx_ef10_vf_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table + __attribute__ ((unused))) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + if (user) + return -EOPNOTSUPP; + if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID) + return 0; + return efx_ef10_rx_push_shared_rss_config(efx, NULL); } static int efx_ef10_rx_probe(struct efx_rx_queue *rx_queue) @@ -1496,14 +1791,15 @@ static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue) MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / EFX_BUF_SIZE)); - MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_RXQ_OUT_LEN); struct efx_channel *channel = efx_rx_queue_channel(rx_queue); size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE; struct efx_nic *efx = rx_queue->efx; - size_t inlen, outlen; + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t inlen; dma_addr_t dma_addr; int rc; int i; + BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0); rx_queue->scatter_n = 0; rx_queue->scatter_len = 0; @@ -1517,7 +1813,7 @@ static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue) INIT_RXQ_IN_FLAG_PREFIX, 1, INIT_RXQ_IN_FLAG_TIMESTAMP, 1); MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id); dma_addr = rx_queue->rxd.buf.dma_addr; @@ -1532,7 +1828,7 @@ static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue) inlen = MC_CMD_INIT_RXQ_IN_LEN(entries); rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen, - outbuf, sizeof(outbuf), &outlen); + NULL, 0, NULL); if (rc) netdev_WARN(efx->net_dev, "failed to initialise RXQ %d\n", efx_rx_queue_index(rx_queue)); @@ -1541,7 +1837,7 @@ static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue) static void efx_ef10_rx_fini(struct efx_rx_queue *rx_queue) { MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_RXQ_OUT_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); struct efx_nic *efx = rx_queue->efx; size_t outlen; int rc; @@ -1703,7 +1999,7 @@ static int efx_ef10_ev_init(struct efx_channel *channel) static void efx_ef10_ev_fini(struct efx_channel *channel) { MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_EVQ_OUT_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); struct efx_nic *efx = channel->efx; size_t outlen; int rc; @@ -2286,11 +2582,12 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx, match_fields); } - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, nic_data->vport_id); MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST, spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ? MC_CMD_FILTER_OP_IN_RX_DEST_DROP : MC_CMD_FILTER_OP_IN_RX_DEST_HOST); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DOMAIN, 0); MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DEST, MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT); MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_QUEUE, @@ -3055,6 +3352,9 @@ fail: return rc; } +/* Caller must hold efx->filter_sem for read if race against + * efx_ef10_filter_table_remove() is possible + */ static void efx_ef10_filter_table_restore(struct efx_nic *efx) { struct efx_ef10_filter_table *table = efx->filter_state; @@ -3064,9 +3364,14 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx) bool failed = false; int rc; + WARN_ON(!rwsem_is_locked(&efx->filter_sem)); + if (!nic_data->must_restore_filters) return; + if (!table) + return; + spin_lock_bh(&efx->filter_lock); for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { @@ -3102,6 +3407,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx) nic_data->must_restore_filters = false; } +/* Caller must hold efx->filter_sem for write */ static void efx_ef10_filter_table_remove(struct efx_nic *efx) { struct efx_ef10_filter_table *table = efx->filter_state; @@ -3110,6 +3416,10 @@ static void efx_ef10_filter_table_remove(struct efx_nic *efx) unsigned int filter_idx; int rc; + efx->filter_state = NULL; + if (!table) + return; + for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { spec = efx_ef10_filter_entry_spec(table, filter_idx); if (!spec) @@ -3135,6 +3445,9 @@ static void efx_ef10_filter_table_remove(struct efx_nic *efx) kfree(table); } +/* Caller must hold efx->filter_sem for read if race against + * efx_ef10_filter_table_remove() is possible + */ static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx) { struct efx_ef10_filter_table *table = efx->filter_state; @@ -3149,6 +3462,9 @@ static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx) if (!efx_dev_registered(efx)) return; + if (!table) + return; + /* Mark old filters that may need to be removed */ spin_lock_bh(&efx->filter_lock); n = table->dev_uc_count < 0 ? 1 : table->dev_uc_count; @@ -3280,6 +3596,78 @@ static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx) WARN_ON(remove_failed); } +static int efx_ef10_set_mac_address(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_SET_MAC_IN_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + bool was_enabled = efx->port_enabled; + int rc; + + efx_device_detach_sync(efx); + efx_net_stop(efx->net_dev); + down_write(&efx->filter_sem); + efx_ef10_filter_table_remove(efx); + + ether_addr_copy(MCDI_PTR(inbuf, VADAPTOR_SET_MAC_IN_MACADDR), + efx->net_dev->dev_addr); + MCDI_SET_DWORD(inbuf, VADAPTOR_SET_MAC_IN_UPSTREAM_PORT_ID, + nic_data->vport_id); + rc = efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf, + sizeof(inbuf), NULL, 0, NULL); + + efx_ef10_filter_table_probe(efx); + up_write(&efx->filter_sem); + if (was_enabled) + efx_net_open(efx->net_dev); + netif_device_attach(efx->net_dev); + +#if !defined(CONFIG_SFC_SRIOV) + if (rc == -EPERM) + netif_err(efx, drv, efx->net_dev, + "Cannot change MAC address; use sfboot to enable mac-spoofing" + " on this interface\n"); +#else + if (rc == -EPERM) { + struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; + + /* Switch to PF and change MAC address on vport */ + if (efx->pci_dev->is_virtfn && pci_dev_pf) { + struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf); + + if (!efx_ef10_sriov_set_vf_mac(efx_pf, + nic_data->vf_index, + efx->net_dev->dev_addr)) + return 0; + } + netif_err(efx, drv, efx->net_dev, + "Cannot change MAC address; use sfboot to enable mac-spoofing" + " on this interface\n"); + } else if (efx->pci_dev->is_virtfn) { + /* Successfully changed by VF (with MAC spoofing), so update the + * parent PF if possible. + */ + struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; + + if (pci_dev_pf) { + struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf); + struct efx_ef10_nic_data *nic_data = efx_pf->nic_data; + unsigned int i; + + for (i = 0; i < efx_pf->vf_count; ++i) { + struct ef10_vf *vf = nic_data->vf + i; + + if (vf->efx == efx) { + ether_addr_copy(vf->mac, + efx->net_dev->dev_addr); + return 0; + } + } + } + } +#endif + return rc; +} + static int efx_ef10_mac_reconfigure(struct efx_nic *efx) { efx_ef10_filter_sync_rx_mode(efx); @@ -3287,6 +3675,13 @@ static int efx_ef10_mac_reconfigure(struct efx_nic *efx) return efx_mcdi_set_mac(efx); } +static int efx_ef10_mac_reconfigure_vf(struct efx_nic *efx) +{ + efx_ef10_filter_sync_rx_mode(efx); + + return 0; +} + static int efx_ef10_start_bist(struct efx_nic *efx, u32 bist_type) { MCDI_DECLARE_BUF(inbuf, MC_CMD_START_BIST_IN_LEN); @@ -3494,6 +3889,9 @@ static void efx_ef10_ptp_write_host_time(struct efx_nic *efx, u32 host_time) _efx_writed(efx, cpu_to_le32(host_time), ER_DZ_MC_DB_LWRD); } +static void efx_ef10_ptp_write_host_time_vf(struct efx_nic *efx, + u32 host_time) {} + static int efx_ef10_rx_enable_timestamping(struct efx_channel *channel, bool temp) { @@ -3571,6 +3969,12 @@ static int efx_ef10_ptp_set_ts_sync_events(struct efx_nic *efx, bool en, return 0; } +static int efx_ef10_ptp_set_ts_config_vf(struct efx_nic *efx, + struct hwtstamp_config *init) +{ + return -EOPNOTSUPP; +} + static int efx_ef10_ptp_set_ts_config(struct efx_nic *efx, struct hwtstamp_config *init) { @@ -3607,14 +4011,117 @@ static int efx_ef10_ptp_set_ts_config(struct efx_nic *efx, } } +const struct efx_nic_type efx_hunt_a0_vf_nic_type = { + .is_vf = true, + .mem_bar = EFX_MEM_VF_BAR, + .mem_map_size = efx_ef10_mem_map_size, + .probe = efx_ef10_probe_vf, + .remove = efx_ef10_remove, + .dimension_resources = efx_ef10_dimension_resources, + .init = efx_ef10_init_nic, + .fini = efx_port_dummy_op_void, + .map_reset_reason = efx_ef10_map_reset_reason, + .map_reset_flags = efx_ef10_map_reset_flags, + .reset = efx_ef10_reset, + .probe_port = efx_mcdi_port_probe, + .remove_port = efx_mcdi_port_remove, + .fini_dmaq = efx_ef10_fini_dmaq, + .prepare_flr = efx_ef10_prepare_flr, + .finish_flr = efx_port_dummy_op_void, + .describe_stats = efx_ef10_describe_stats, + .update_stats = efx_ef10_update_stats, + .start_stats = efx_port_dummy_op_void, + .pull_stats = efx_port_dummy_op_void, + .stop_stats = efx_port_dummy_op_void, + .set_id_led = efx_mcdi_set_id_led, + .push_irq_moderation = efx_ef10_push_irq_moderation, + .reconfigure_mac = efx_ef10_mac_reconfigure_vf, + .check_mac_fault = efx_mcdi_mac_check_fault, + .reconfigure_port = efx_mcdi_port_reconfigure, + .get_wol = efx_ef10_get_wol_vf, + .set_wol = efx_ef10_set_wol_vf, + .resume_wol = efx_port_dummy_op_void, + .mcdi_request = efx_ef10_mcdi_request, + .mcdi_poll_response = efx_ef10_mcdi_poll_response, + .mcdi_read_response = efx_ef10_mcdi_read_response, + .mcdi_poll_reboot = efx_ef10_mcdi_poll_reboot, + .irq_enable_master = efx_port_dummy_op_void, + .irq_test_generate = efx_ef10_irq_test_generate, + .irq_disable_non_ev = efx_port_dummy_op_void, + .irq_handle_msi = efx_ef10_msi_interrupt, + .irq_handle_legacy = efx_ef10_legacy_interrupt, + .tx_probe = efx_ef10_tx_probe, + .tx_init = efx_ef10_tx_init, + .tx_remove = efx_ef10_tx_remove, + .tx_write = efx_ef10_tx_write, + .rx_push_rss_config = efx_ef10_vf_rx_push_rss_config, + .rx_probe = efx_ef10_rx_probe, + .rx_init = efx_ef10_rx_init, + .rx_remove = efx_ef10_rx_remove, + .rx_write = efx_ef10_rx_write, + .rx_defer_refill = efx_ef10_rx_defer_refill, + .ev_probe = efx_ef10_ev_probe, + .ev_init = efx_ef10_ev_init, + .ev_fini = efx_ef10_ev_fini, + .ev_remove = efx_ef10_ev_remove, + .ev_process = efx_ef10_ev_process, + .ev_read_ack = efx_ef10_ev_read_ack, + .ev_test_generate = efx_ef10_ev_test_generate, + .filter_table_probe = efx_ef10_filter_table_probe, + .filter_table_restore = efx_ef10_filter_table_restore, + .filter_table_remove = efx_ef10_filter_table_remove, + .filter_update_rx_scatter = efx_ef10_filter_update_rx_scatter, + .filter_insert = efx_ef10_filter_insert, + .filter_remove_safe = efx_ef10_filter_remove_safe, + .filter_get_safe = efx_ef10_filter_get_safe, + .filter_clear_rx = efx_ef10_filter_clear_rx, + .filter_count_rx_used = efx_ef10_filter_count_rx_used, + .filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit, + .filter_get_rx_ids = efx_ef10_filter_get_rx_ids, +#ifdef CONFIG_RFS_ACCEL + .filter_rfs_insert = efx_ef10_filter_rfs_insert, + .filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one, +#endif +#ifdef CONFIG_SFC_MTD + .mtd_probe = efx_port_dummy_op_int, +#endif + .ptp_write_host_time = efx_ef10_ptp_write_host_time_vf, + .ptp_set_ts_config = efx_ef10_ptp_set_ts_config_vf, +#ifdef CONFIG_SFC_SRIOV + .vswitching_probe = efx_ef10_vswitching_probe_vf, + .vswitching_restore = efx_ef10_vswitching_restore_vf, + .vswitching_remove = efx_ef10_vswitching_remove_vf, +#endif + .get_mac_address = efx_ef10_get_mac_address_vf, + .set_mac_address = efx_ef10_set_mac_address, + + .revision = EFX_REV_HUNT_A0, + .max_dma_mask = DMA_BIT_MASK(ESF_DZ_TX_KER_BUF_ADDR_WIDTH), + .rx_prefix_size = ES_DZ_RX_PREFIX_SIZE, + .rx_hash_offset = ES_DZ_RX_PREFIX_HASH_OFST, + .rx_ts_offset = ES_DZ_RX_PREFIX_TSTAMP_OFST, + .can_rx_scatter = true, + .always_rx_scatter = true, + .max_interrupt_mode = EFX_INT_MODE_MSIX, + .timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH, + .offload_features = (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXHASH | NETIF_F_NTUPLE), + .mcdi_max_ver = 2, + .max_rx_ip_filters = HUNT_FILTER_TBL_ROWS, + .hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE | + 1 << HWTSTAMP_FILTER_ALL, +}; + const struct efx_nic_type efx_hunt_a0_nic_type = { + .is_vf = false, + .mem_bar = EFX_MEM_BAR, .mem_map_size = efx_ef10_mem_map_size, - .probe = efx_ef10_probe, + .probe = efx_ef10_probe_pf, .remove = efx_ef10_remove, .dimension_resources = efx_ef10_dimension_resources, .init = efx_ef10_init_nic, .fini = efx_port_dummy_op_void, - .map_reset_reason = efx_mcdi_map_reset_reason, + .map_reset_reason = efx_ef10_map_reset_reason, .map_reset_flags = efx_ef10_map_reset_flags, .reset = efx_ef10_reset, .probe_port = efx_mcdi_port_probe, @@ -3650,7 +4157,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .tx_init = efx_ef10_tx_init, .tx_remove = efx_ef10_tx_remove, .tx_write = efx_ef10_tx_write, - .rx_push_rss_config = efx_ef10_rx_push_rss_config, + .rx_push_rss_config = efx_ef10_pf_rx_push_rss_config, .rx_probe = efx_ef10_rx_probe, .rx_init = efx_ef10_rx_init, .rx_remove = efx_ef10_rx_remove, @@ -3689,11 +4196,24 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .ptp_write_host_time = efx_ef10_ptp_write_host_time, .ptp_set_ts_sync_events = efx_ef10_ptp_set_ts_sync_events, .ptp_set_ts_config = efx_ef10_ptp_set_ts_config, +#ifdef CONFIG_SFC_SRIOV + .sriov_configure = efx_ef10_sriov_configure, .sriov_init = efx_ef10_sriov_init, .sriov_fini = efx_ef10_sriov_fini, - .sriov_mac_address_changed = efx_ef10_sriov_mac_address_changed, .sriov_wanted = efx_ef10_sriov_wanted, .sriov_reset = efx_ef10_sriov_reset, + .sriov_flr = efx_ef10_sriov_flr, + .sriov_set_vf_mac = efx_ef10_sriov_set_vf_mac, + .sriov_set_vf_vlan = efx_ef10_sriov_set_vf_vlan, + .sriov_set_vf_spoofchk = efx_ef10_sriov_set_vf_spoofchk, + .sriov_get_vf_config = efx_ef10_sriov_get_vf_config, + .sriov_set_vf_link_state = efx_ef10_sriov_set_vf_link_state, + .vswitching_probe = efx_ef10_vswitching_probe_pf, + .vswitching_restore = efx_ef10_vswitching_restore_pf, + .vswitching_remove = efx_ef10_vswitching_remove_pf, +#endif + .get_mac_address = efx_ef10_get_mac_address_pf, + .set_mac_address = efx_ef10_set_mac_address, .revision = EFX_REV_HUNT_A0, .max_dma_mask = DMA_BIT_MASK(ESF_DZ_TX_KER_BUF_ADDR_WIDTH), diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c new file mode 100644 index 000000000000..3969b1bf7ef3 --- /dev/null +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -0,0 +1,738 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#include <linux/pci.h> +#include <linux/module.h> +#include "net_driver.h" +#include "ef10_sriov.h" +#include "efx.h" +#include "nic.h" +#include "mcdi_pcol.h" + +static int efx_ef10_evb_port_assign(struct efx_nic *efx, unsigned int port_id, + unsigned int vf_fn) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_EVB_PORT_ASSIGN_IN_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + MCDI_SET_DWORD(inbuf, EVB_PORT_ASSIGN_IN_PORT_ID, port_id); + MCDI_POPULATE_DWORD_2(inbuf, EVB_PORT_ASSIGN_IN_FUNCTION, + EVB_PORT_ASSIGN_IN_PF, nic_data->pf_index, + EVB_PORT_ASSIGN_IN_VF, vf_fn); + + return efx_mcdi_rpc(efx, MC_CMD_EVB_PORT_ASSIGN, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_ef10_vport_add_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ADD_MAC_ADDRESS_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_ADD_MAC_ADDRESS_IN_VPORT_ID, port_id); + ether_addr_copy(MCDI_PTR(inbuf, VPORT_ADD_MAC_ADDRESS_IN_MACADDR), mac); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_ADD_MAC_ADDRESS, inbuf, + sizeof(inbuf), NULL, 0, NULL); +} + +static int efx_ef10_vport_del_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_DEL_MAC_ADDRESS_IN_VPORT_ID, port_id); + ether_addr_copy(MCDI_PTR(inbuf, VPORT_DEL_MAC_ADDRESS_IN_MACADDR), mac); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_DEL_MAC_ADDRESS, inbuf, + sizeof(inbuf), NULL, 0, NULL); +} + +static int efx_ef10_vswitch_alloc(struct efx_nic *efx, unsigned int port_id, + unsigned int vswitch_type) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VSWITCH_ALLOC_IN_LEN); + int rc; + + MCDI_SET_DWORD(inbuf, VSWITCH_ALLOC_IN_UPSTREAM_PORT_ID, port_id); + MCDI_SET_DWORD(inbuf, VSWITCH_ALLOC_IN_TYPE, vswitch_type); + MCDI_SET_DWORD(inbuf, VSWITCH_ALLOC_IN_NUM_VLAN_TAGS, 2); + MCDI_POPULATE_DWORD_1(inbuf, VSWITCH_ALLOC_IN_FLAGS, + VSWITCH_ALLOC_IN_FLAG_AUTO_PORT, 0); + + /* Quietly try to allocate 2 VLAN tags */ + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_VSWITCH_ALLOC, inbuf, sizeof(inbuf), + NULL, 0, NULL); + + /* If 2 VLAN tags is too many, revert to trying with 1 VLAN tags */ + if (rc == -EPROTO) { + MCDI_SET_DWORD(inbuf, VSWITCH_ALLOC_IN_NUM_VLAN_TAGS, 1); + rc = efx_mcdi_rpc(efx, MC_CMD_VSWITCH_ALLOC, inbuf, + sizeof(inbuf), NULL, 0, NULL); + } else if (rc) { + efx_mcdi_display_error(efx, MC_CMD_VSWITCH_ALLOC, + MC_CMD_VSWITCH_ALLOC_IN_LEN, + NULL, 0, rc); + } + return rc; +} + +static int efx_ef10_vswitch_free(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VSWITCH_FREE_IN_LEN); + + MCDI_SET_DWORD(inbuf, VSWITCH_FREE_IN_UPSTREAM_PORT_ID, port_id); + + return efx_mcdi_rpc(efx, MC_CMD_VSWITCH_FREE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_ef10_vport_alloc(struct efx_nic *efx, + unsigned int port_id_in, + unsigned int vport_type, + u16 vlan, + unsigned int *port_id_out) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ALLOC_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_VPORT_ALLOC_OUT_LEN); + size_t outlen; + int rc; + + EFX_WARN_ON_PARANOID(!port_id_out); + + MCDI_SET_DWORD(inbuf, VPORT_ALLOC_IN_UPSTREAM_PORT_ID, port_id_in); + MCDI_SET_DWORD(inbuf, VPORT_ALLOC_IN_TYPE, vport_type); + MCDI_SET_DWORD(inbuf, VPORT_ALLOC_IN_NUM_VLAN_TAGS, + (vlan != EFX_EF10_NO_VLAN)); + MCDI_POPULATE_DWORD_1(inbuf, VPORT_ALLOC_IN_FLAGS, + VPORT_ALLOC_IN_FLAG_AUTO_PORT, 0); + if (vlan != EFX_EF10_NO_VLAN) + MCDI_POPULATE_DWORD_1(inbuf, VPORT_ALLOC_IN_VLAN_TAGS, + VPORT_ALLOC_IN_VLAN_TAG_0, vlan); + + rc = efx_mcdi_rpc(efx, MC_CMD_VPORT_ALLOC, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < MC_CMD_VPORT_ALLOC_OUT_LEN) + return -EIO; + + *port_id_out = MCDI_DWORD(outbuf, VPORT_ALLOC_OUT_VPORT_ID); + return 0; +} + +static int efx_ef10_vport_free(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_FREE_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_FREE_IN_VPORT_ID, port_id); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_FREE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_ALLOC_IN_LEN); + + MCDI_SET_DWORD(inbuf, VADAPTOR_ALLOC_IN_UPSTREAM_PORT_ID, port_id); + return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_ALLOC, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_FREE_IN_LEN); + + MCDI_SET_DWORD(inbuf, VADAPTOR_FREE_IN_UPSTREAM_PORT_ID, port_id); + return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_FREE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static void efx_ef10_sriov_free_vf_vports(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int i; + + if (!nic_data->vf) + return; + + for (i = 0; i < efx->vf_count; i++) { + struct ef10_vf *vf = nic_data->vf + i; + + if (vf->vport_assigned) { + efx_ef10_evb_port_assign(efx, EVB_PORT_ID_NULL, i); + vf->vport_assigned = 0; + } + + if (!is_zero_ether_addr(vf->mac)) { + efx_ef10_vport_del_mac(efx, vf->vport_id, vf->mac); + eth_zero_addr(vf->mac); + } + + if (vf->vport_id) { + efx_ef10_vport_free(efx, vf->vport_id); + vf->vport_id = 0; + } + + vf->efx = NULL; + } +} + +static void efx_ef10_sriov_free_vf_vswitching(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + efx_ef10_sriov_free_vf_vports(efx); + kfree(nic_data->vf); + nic_data->vf = NULL; +} + +static int efx_ef10_sriov_assign_vf_vport(struct efx_nic *efx, + unsigned int vf_i) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct ef10_vf *vf = nic_data->vf + vf_i; + int rc; + + if (WARN_ON_ONCE(!nic_data->vf)) + return -EOPNOTSUPP; + + rc = efx_ef10_vport_alloc(efx, EVB_PORT_ID_ASSIGNED, + MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL, + vf->vlan, &vf->vport_id); + if (rc) + return rc; + + rc = efx_ef10_vport_add_mac(efx, vf->vport_id, vf->mac); + if (rc) { + eth_zero_addr(vf->mac); + return rc; + } + + rc = efx_ef10_evb_port_assign(efx, vf->vport_id, vf_i); + if (rc) + return rc; + + vf->vport_assigned = 1; + return 0; +} + +static int efx_ef10_sriov_alloc_vf_vswitching(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + unsigned int i; + int rc; + + nic_data->vf = kcalloc(efx->vf_count, sizeof(struct ef10_vf), + GFP_KERNEL); + if (!nic_data->vf) + return -ENOMEM; + + for (i = 0; i < efx->vf_count; i++) { + random_ether_addr(nic_data->vf[i].mac); + nic_data->vf[i].efx = NULL; + nic_data->vf[i].vlan = EFX_EF10_NO_VLAN; + + rc = efx_ef10_sriov_assign_vf_vport(efx, i); + if (rc) + goto fail; + } + + return 0; +fail: + efx_ef10_sriov_free_vf_vports(efx); + kfree(nic_data->vf); + nic_data->vf = NULL; + return rc; +} + +static int efx_ef10_sriov_restore_vf_vswitching(struct efx_nic *efx) +{ + unsigned int i; + int rc; + + for (i = 0; i < efx->vf_count; i++) { + rc = efx_ef10_sriov_assign_vf_vport(efx, i); + if (rc) + goto fail; + } + + return 0; +fail: + efx_ef10_sriov_free_vf_vswitching(efx); + return rc; +} + +/* On top of the default firmware vswitch setup, create a VEB vswitch and + * expansion vport for use by this function. + */ +int efx_ef10_vswitching_probe_pf(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct net_device *net_dev = efx->net_dev; + int rc; + + if (pci_sriov_get_totalvfs(efx->pci_dev) <= 0) { + /* vswitch not needed as we have no VFs */ + efx_ef10_vadaptor_alloc(efx, nic_data->vport_id); + return 0; + } + + rc = efx_ef10_vswitch_alloc(efx, EVB_PORT_ID_ASSIGNED, + MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEB); + if (rc) + goto fail1; + + rc = efx_ef10_vport_alloc(efx, EVB_PORT_ID_ASSIGNED, + MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL, + EFX_EF10_NO_VLAN, &nic_data->vport_id); + if (rc) + goto fail2; + + rc = efx_ef10_vport_add_mac(efx, nic_data->vport_id, net_dev->dev_addr); + if (rc) + goto fail3; + ether_addr_copy(nic_data->vport_mac, net_dev->dev_addr); + + rc = efx_ef10_vadaptor_alloc(efx, nic_data->vport_id); + if (rc) + goto fail4; + + return 0; +fail4: + efx_ef10_vport_del_mac(efx, nic_data->vport_id, nic_data->vport_mac); + eth_zero_addr(nic_data->vport_mac); +fail3: + efx_ef10_vport_free(efx, nic_data->vport_id); + nic_data->vport_id = EVB_PORT_ID_ASSIGNED; +fail2: + efx_ef10_vswitch_free(efx, EVB_PORT_ID_ASSIGNED); +fail1: + return rc; +} + +int efx_ef10_vswitching_probe_vf(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + return efx_ef10_vadaptor_alloc(efx, nic_data->vport_id); +} + +int efx_ef10_vswitching_restore_pf(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + if (!nic_data->must_probe_vswitching) + return 0; + + rc = efx_ef10_vswitching_probe_pf(efx); + if (rc) + goto fail; + + rc = efx_ef10_sriov_restore_vf_vswitching(efx); + if (rc) + goto fail; + + nic_data->must_probe_vswitching = false; +fail: + return rc; +} + +int efx_ef10_vswitching_restore_vf(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + if (!nic_data->must_probe_vswitching) + return 0; + + rc = efx_ef10_vadaptor_free(efx, EVB_PORT_ID_ASSIGNED); + if (rc) + return rc; + + nic_data->must_probe_vswitching = false; + return 0; +} + +void efx_ef10_vswitching_remove_pf(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + efx_ef10_sriov_free_vf_vswitching(efx); + + efx_ef10_vadaptor_free(efx, nic_data->vport_id); + + if (nic_data->vport_id == EVB_PORT_ID_ASSIGNED) + return; /* No vswitch was ever created */ + + if (!is_zero_ether_addr(nic_data->vport_mac)) { + efx_ef10_vport_del_mac(efx, nic_data->vport_id, + efx->net_dev->dev_addr); + eth_zero_addr(nic_data->vport_mac); + } + efx_ef10_vport_free(efx, nic_data->vport_id); + nic_data->vport_id = EVB_PORT_ID_ASSIGNED; + + efx_ef10_vswitch_free(efx, nic_data->vport_id); +} + +void efx_ef10_vswitching_remove_vf(struct efx_nic *efx) +{ + efx_ef10_vadaptor_free(efx, EVB_PORT_ID_ASSIGNED); +} + +static int efx_ef10_pci_sriov_enable(struct efx_nic *efx, int num_vfs) +{ + int rc = 0; + struct pci_dev *dev = efx->pci_dev; + + efx->vf_count = num_vfs; + + rc = efx_ef10_sriov_alloc_vf_vswitching(efx); + if (rc) + goto fail1; + + rc = pci_enable_sriov(dev, num_vfs); + if (rc) + goto fail2; + + return 0; +fail2: + efx_ef10_sriov_free_vf_vswitching(efx); +fail1: + efx->vf_count = 0; + netif_err(efx, probe, efx->net_dev, + "Failed to enable SRIOV VFs\n"); + return rc; +} + +static int efx_ef10_pci_sriov_disable(struct efx_nic *efx) +{ + struct pci_dev *dev = efx->pci_dev; + + pci_disable_sriov(dev); + efx_ef10_sriov_free_vf_vswitching(efx); + efx->vf_count = 0; + return 0; +} + +int efx_ef10_sriov_configure(struct efx_nic *efx, int num_vfs) +{ + if (num_vfs == 0) + return efx_ef10_pci_sriov_disable(efx); + else + return efx_ef10_pci_sriov_enable(efx, num_vfs); +} + +int efx_ef10_sriov_init(struct efx_nic *efx) +{ + return 0; +} + +void efx_ef10_sriov_fini(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + if (!nic_data->vf) + return; + + rc = efx_ef10_pci_sriov_disable(efx); + if (rc) + netif_dbg(efx, drv, efx->net_dev, + "Disabling SRIOV was not successful rc=%d\n", rc); + else + netif_dbg(efx, drv, efx->net_dev, "SRIOV disabled\n"); +} + +static int efx_ef10_vport_del_vf_mac(struct efx_nic *efx, unsigned int port_id, + u8 *mac) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, VPORT_DEL_MAC_ADDRESS_IN_VPORT_ID, port_id); + ether_addr_copy(MCDI_PTR(inbuf, VPORT_DEL_MAC_ADDRESS_IN_MACADDR), mac); + + rc = efx_mcdi_rpc(efx, MC_CMD_VPORT_DEL_MAC_ADDRESS, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); + + return rc; +} + +int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct ef10_vf *vf; + int rc; + + if (!nic_data->vf) + return -EOPNOTSUPP; + + if (vf_i >= efx->vf_count) + return -EINVAL; + vf = nic_data->vf + vf_i; + + if (vf->efx) { + efx_device_detach_sync(vf->efx); + efx_net_stop(vf->efx->net_dev); + + down_write(&vf->efx->filter_sem); + vf->efx->type->filter_table_remove(vf->efx); + + rc = efx_ef10_vadaptor_free(vf->efx, EVB_PORT_ID_ASSIGNED); + if (rc) { + up_write(&vf->efx->filter_sem); + return rc; + } + } + + rc = efx_ef10_evb_port_assign(efx, EVB_PORT_ID_NULL, vf_i); + if (rc) + return rc; + + if (!is_zero_ether_addr(vf->mac)) { + rc = efx_ef10_vport_del_vf_mac(efx, vf->vport_id, vf->mac); + if (rc) + return rc; + } + + if (!is_zero_ether_addr(mac)) { + rc = efx_ef10_vport_add_mac(efx, vf->vport_id, mac); + if (rc) { + eth_zero_addr(vf->mac); + goto fail; + } + if (vf->efx) + ether_addr_copy(vf->efx->net_dev->dev_addr, mac); + } + + ether_addr_copy(vf->mac, mac); + + rc = efx_ef10_evb_port_assign(efx, vf->vport_id, vf_i); + if (rc) + goto fail; + + if (vf->efx) { + /* VF cannot use the vport_id that the PF created */ + rc = efx_ef10_vadaptor_alloc(vf->efx, EVB_PORT_ID_ASSIGNED); + if (rc) { + up_write(&vf->efx->filter_sem); + return rc; + } + vf->efx->type->filter_table_probe(vf->efx); + up_write(&vf->efx->filter_sem); + efx_net_open(vf->efx->net_dev); + netif_device_attach(vf->efx->net_dev); + } + + return 0; + +fail: + memset(vf->mac, 0, ETH_ALEN); + return rc; +} + +int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, u16 vlan, + u8 qos) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct ef10_vf *vf; + u16 old_vlan, new_vlan; + int rc = 0, rc2 = 0; + + if (vf_i >= efx->vf_count) + return -EINVAL; + if (qos != 0) + return -EINVAL; + + vf = nic_data->vf + vf_i; + + new_vlan = (vlan == 0) ? EFX_EF10_NO_VLAN : vlan; + if (new_vlan == vf->vlan) + return 0; + + if (vf->efx) { + efx_device_detach_sync(vf->efx); + efx_net_stop(vf->efx->net_dev); + + down_write(&vf->efx->filter_sem); + vf->efx->type->filter_table_remove(vf->efx); + + rc = efx_ef10_vadaptor_free(vf->efx, EVB_PORT_ID_ASSIGNED); + if (rc) + goto restore_filters; + } + + if (vf->vport_assigned) { + rc = efx_ef10_evb_port_assign(efx, EVB_PORT_ID_NULL, vf_i); + if (rc) { + netif_warn(efx, drv, efx->net_dev, + "Failed to change vlan on VF %d.\n", vf_i); + netif_warn(efx, drv, efx->net_dev, + "This is likely because the VF is bound to a driver in a VM.\n"); + netif_warn(efx, drv, efx->net_dev, + "Please unload the driver in the VM.\n"); + goto restore_vadaptor; + } + vf->vport_assigned = 0; + } + + if (!is_zero_ether_addr(vf->mac)) { + rc = efx_ef10_vport_del_mac(efx, vf->vport_id, vf->mac); + if (rc) + goto restore_evb_port; + } + + if (vf->vport_id) { + rc = efx_ef10_vport_free(efx, vf->vport_id); + if (rc) + goto restore_mac; + vf->vport_id = 0; + } + + /* Do the actual vlan change */ + old_vlan = vf->vlan; + vf->vlan = new_vlan; + + /* Restore everything in reverse order */ + rc = efx_ef10_vport_alloc(efx, EVB_PORT_ID_ASSIGNED, + MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL, + vf->vlan, &vf->vport_id); + if (rc) + goto reset_nic; + +restore_mac: + if (!is_zero_ether_addr(vf->mac)) { + rc2 = efx_ef10_vport_add_mac(efx, vf->vport_id, vf->mac); + if (rc2) { + eth_zero_addr(vf->mac); + goto reset_nic; + } + } + +restore_evb_port: + rc2 = efx_ef10_evb_port_assign(efx, vf->vport_id, vf_i); + if (rc2) + goto reset_nic; + else + vf->vport_assigned = 1; + +restore_vadaptor: + if (vf->efx) { + rc2 = efx_ef10_vadaptor_alloc(vf->efx, EVB_PORT_ID_ASSIGNED); + if (rc2) + goto reset_nic; + } + +restore_filters: + if (vf->efx) { + rc2 = vf->efx->type->filter_table_probe(vf->efx); + if (rc2) + goto reset_nic; + + up_write(&vf->efx->filter_sem); + + rc2 = efx_net_open(vf->efx->net_dev); + if (rc2) + goto reset_nic; + + netif_device_attach(vf->efx->net_dev); + } + return rc; + +reset_nic: + if (vf->efx) { + up_write(&vf->efx->filter_sem); + netif_err(efx, drv, efx->net_dev, + "Failed to restore VF - scheduling reset.\n"); + efx_schedule_reset(vf->efx, RESET_TYPE_DATAPATH); + } else { + netif_err(efx, drv, efx->net_dev, + "Failed to restore the VF and cannot reset the VF " + "- VF is not functional.\n"); + netif_err(efx, drv, efx->net_dev, + "Please reload the driver attached to the VF.\n"); + } + + return rc ? rc : rc2; +} + +int efx_ef10_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf_i, + bool spoofchk) +{ + return spoofchk ? -EOPNOTSUPP : 0; +} + +int efx_ef10_sriov_set_vf_link_state(struct efx_nic *efx, int vf_i, + int link_state) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_LINK_STATE_MODE_IN_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + BUILD_BUG_ON(IFLA_VF_LINK_STATE_AUTO != + MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_AUTO); + BUILD_BUG_ON(IFLA_VF_LINK_STATE_ENABLE != + MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_UP); + BUILD_BUG_ON(IFLA_VF_LINK_STATE_DISABLE != + MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_DOWN); + MCDI_POPULATE_DWORD_2(inbuf, LINK_STATE_MODE_IN_FUNCTION, + LINK_STATE_MODE_IN_FUNCTION_PF, + nic_data->pf_index, + LINK_STATE_MODE_IN_FUNCTION_VF, vf_i); + MCDI_SET_DWORD(inbuf, LINK_STATE_MODE_IN_NEW_MODE, link_state); + return efx_mcdi_rpc(efx, MC_CMD_LINK_STATE_MODE, inbuf, sizeof(inbuf), + NULL, 0, NULL); /* don't care what old mode was */ +} + +int efx_ef10_sriov_get_vf_config(struct efx_nic *efx, int vf_i, + struct ifla_vf_info *ivf) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_LINK_STATE_MODE_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_LINK_STATE_MODE_OUT_LEN); + + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct ef10_vf *vf; + size_t outlen; + int rc; + + if (vf_i >= efx->vf_count) + return -EINVAL; + + if (!nic_data->vf) + return -EOPNOTSUPP; + + vf = nic_data->vf + vf_i; + + ivf->vf = vf_i; + ivf->min_tx_rate = 0; + ivf->max_tx_rate = 0; + ether_addr_copy(ivf->mac, vf->mac); + ivf->vlan = (vf->vlan == EFX_EF10_NO_VLAN) ? 0 : vf->vlan; + ivf->qos = 0; + + MCDI_POPULATE_DWORD_2(inbuf, LINK_STATE_MODE_IN_FUNCTION, + LINK_STATE_MODE_IN_FUNCTION_PF, + nic_data->pf_index, + LINK_STATE_MODE_IN_FUNCTION_VF, vf_i); + MCDI_SET_DWORD(inbuf, LINK_STATE_MODE_IN_NEW_MODE, + MC_CMD_LINK_STATE_MODE_IN_DO_NOT_CHANGE); + rc = efx_mcdi_rpc(efx, MC_CMD_LINK_STATE_MODE, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < MC_CMD_LINK_STATE_MODE_OUT_LEN) + return -EIO; + ivf->linkstate = MCDI_DWORD(outbuf, LINK_STATE_MODE_OUT_OLD_MODE); + + return 0; +} diff --git a/drivers/net/ethernet/sfc/ef10_sriov.h b/drivers/net/ethernet/sfc/ef10_sriov.h new file mode 100644 index 000000000000..b98557670f73 --- /dev/null +++ b/drivers/net/ethernet/sfc/ef10_sriov.h @@ -0,0 +1,64 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EF10_SRIOV_H +#define EF10_SRIOV_H + +#include "net_driver.h" + +/** + * struct ef10_vf - PF's store of VF data + * @efx: efx_nic struct for the current VF + * @vport_id: vport ID for the VF + * @vport_assigned: record whether the vport is currently assigned to the VF + * @mac: MAC address for the VF, zero when address is removed from the vport + * @vlan: Default VLAN for the VF or #EFX_EF10_NO_VLAN + */ +struct ef10_vf { + struct efx_nic *efx; + unsigned int vport_id; + unsigned int vport_assigned; + u8 mac[ETH_ALEN]; + u16 vlan; +#define EFX_EF10_NO_VLAN 0 +}; + +static inline bool efx_ef10_sriov_wanted(struct efx_nic *efx) +{ + return false; +} + +int efx_ef10_sriov_configure(struct efx_nic *efx, int num_vfs); +int efx_ef10_sriov_init(struct efx_nic *efx); +static inline void efx_ef10_sriov_reset(struct efx_nic *efx) {} +void efx_ef10_sriov_fini(struct efx_nic *efx); +static inline void efx_ef10_sriov_flr(struct efx_nic *efx, unsigned vf_i) {} + +int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf, u8 *mac); + +int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, + u16 vlan, u8 qos); + +int efx_ef10_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf, + bool spoofchk); + +int efx_ef10_sriov_get_vf_config(struct efx_nic *efx, int vf_i, + struct ifla_vf_info *ivf); + +int efx_ef10_sriov_set_vf_link_state(struct efx_nic *efx, int vf_i, + int link_state); + +int efx_ef10_vswitching_probe_pf(struct efx_nic *efx); +int efx_ef10_vswitching_probe_vf(struct efx_nic *efx); +int efx_ef10_vswitching_restore_pf(struct efx_nic *efx); +int efx_ef10_vswitching_restore_vf(struct efx_nic *efx); +void efx_ef10_vswitching_remove_pf(struct efx_nic *efx); +void efx_ef10_vswitching_remove_vf(struct efx_nic *efx); + +#endif /* EF10_SRIOV_H */ diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 4b00545a3ace..9eafa39d0e7f 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -26,6 +26,7 @@ #include "efx.h" #include "nic.h" #include "selftest.h" +#include "sriov.h" #include "mcdi.h" #include "workarounds.h" @@ -76,6 +77,7 @@ const char *const efx_reset_type_names[] = { [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", [RESET_TYPE_WORLD] = "WORLD", [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", + [RESET_TYPE_DATAPATH] = "DATAPATH", [RESET_TYPE_MC_BIST] = "MC_BIST", [RESET_TYPE_DISABLE] = "DISABLE", [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", @@ -948,6 +950,16 @@ void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) static void efx_fini_port(struct efx_nic *efx); +/* We assume that efx->type->reconfigure_mac will always try to sync RX + * filters and therefore needs to read-lock the filter table against freeing + */ +void efx_mac_reconfigure(struct efx_nic *efx) +{ + down_read(&efx->filter_sem); + efx->type->reconfigure_mac(efx); + up_read(&efx->filter_sem); +} + /* Push loopback/power/transmit disable settings to the PHY, and reconfigure * the MAC appropriately. All other PHY configuration changes are pushed * through phy_op->set_settings(), and pushed asynchronously to the MAC @@ -1001,7 +1013,7 @@ static void efx_mac_work(struct work_struct *data) mutex_lock(&efx->mac_lock); if (efx->port_enabled) - efx->type->reconfigure_mac(efx); + efx_mac_reconfigure(efx); mutex_unlock(&efx->mac_lock); } @@ -1041,11 +1053,11 @@ static int efx_init_port(struct efx_nic *efx) /* Reconfigure the MAC before creating dma queues (required for * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */ - efx->type->reconfigure_mac(efx); + efx_mac_reconfigure(efx); /* Ensure the PHY advertises the correct flow control settings */ rc = efx->phy_op->reconfigure(efx); - if (rc) + if (rc && rc != -EPERM) goto fail2; mutex_unlock(&efx->mac_lock); @@ -1067,7 +1079,7 @@ static void efx_start_port(struct efx_nic *efx) efx->port_enabled = true; /* Ensure MAC ingress/egress is enabled */ - efx->type->reconfigure_mac(efx); + efx_mac_reconfigure(efx); mutex_unlock(&efx->mac_lock); } @@ -1200,10 +1212,12 @@ static int efx_init_io(struct efx_nic *efx) struct pci_dev *pci_dev = efx->pci_dev; dma_addr_t dma_mask = efx->type->max_dma_mask; unsigned int mem_map_size = efx->type->mem_map_size(efx); - int rc; + int rc, bar; netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); + bar = efx->type->mem_bar; + rc = pci_enable_device(pci_dev); if (rc) { netif_err(efx, probe, efx->net_dev, @@ -1234,8 +1248,8 @@ static int efx_init_io(struct efx_nic *efx) netif_dbg(efx, probe, efx->net_dev, "using DMA mask %llx\n", (unsigned long long) dma_mask); - efx->membase_phys = pci_resource_start(efx->pci_dev, EFX_MEM_BAR); - rc = pci_request_region(pci_dev, EFX_MEM_BAR, "sfc"); + efx->membase_phys = pci_resource_start(efx->pci_dev, bar); + rc = pci_request_region(pci_dev, bar, "sfc"); if (rc) { netif_err(efx, probe, efx->net_dev, "request for memory BAR failed\n"); @@ -1258,7 +1272,7 @@ static int efx_init_io(struct efx_nic *efx) return 0; fail4: - pci_release_region(efx->pci_dev, EFX_MEM_BAR); + pci_release_region(efx->pci_dev, bar); fail3: efx->membase_phys = 0; fail2: @@ -1269,6 +1283,8 @@ static int efx_init_io(struct efx_nic *efx) static void efx_fini_io(struct efx_nic *efx) { + int bar; + netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); if (efx->membase) { @@ -1277,13 +1293,23 @@ static void efx_fini_io(struct efx_nic *efx) } if (efx->membase_phys) { - pci_release_region(efx->pci_dev, EFX_MEM_BAR); + bar = efx->type->mem_bar; + pci_release_region(efx->pci_dev, bar); efx->membase_phys = 0; } pci_disable_device(efx->pci_dev); } +void efx_set_default_rx_indir_table(struct efx_nic *efx) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++) + efx->rx_indir_table[i] = + ethtool_rxfh_indir_default(i, efx->rss_spread); +} + static unsigned int efx_wanted_parallelism(struct efx_nic *efx) { cpumask_var_t thread_mask; @@ -1314,15 +1340,19 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx) /* If RSS is requested for the PF *and* VFs then we can't write RSS * table entries that are inaccessible to VFs */ - if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 && - count > efx_vf_size(efx)) { - netif_warn(efx, probe, efx->net_dev, - "Reducing number of RSS channels from %u to %u for " - "VF support. Increase vf-msix-limit to use more " - "channels on the PF.\n", - count, efx_vf_size(efx)); - count = efx_vf_size(efx); +#ifdef CONFIG_SFC_SRIOV + if (efx->type->sriov_wanted) { + if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 && + count > efx_vf_size(efx)) { + netif_warn(efx, probe, efx->net_dev, + "Reducing number of RSS channels from %u to %u for " + "VF support. Increase vf-msix-limit to use more " + "channels on the PF.\n", + count, efx_vf_size(efx)); + count = efx_vf_size(efx); + } } +#endif return count; } @@ -1426,10 +1456,15 @@ static int efx_probe_interrupts(struct efx_nic *efx) } /* RSS might be usable on VFs even if it is disabled on the PF */ - - efx->rss_spread = ((efx->n_rx_channels > 1 || - !efx->type->sriov_wanted(efx)) ? - efx->n_rx_channels : efx_vf_size(efx)); +#ifdef CONFIG_SFC_SRIOV + if (efx->type->sriov_wanted) { + efx->rss_spread = ((efx->n_rx_channels > 1 || + !efx->type->sriov_wanted(efx)) ? + efx->n_rx_channels : efx_vf_size(efx)); + return 0; + } +#endif + efx->rss_spread = efx->n_rx_channels; return 0; } @@ -1593,7 +1628,6 @@ static void efx_set_channels(struct efx_nic *efx) static int efx_probe_nic(struct efx_nic *efx) { - size_t i; int rc; netif_dbg(efx, probe, efx->net_dev, "creating NIC\n"); @@ -1616,10 +1650,9 @@ static int efx_probe_nic(struct efx_nic *efx) goto fail2; if (efx->n_channels > 1) - netdev_rss_key_fill(&efx->rx_hash_key, sizeof(efx->rx_hash_key)); - for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++) - efx->rx_indir_table[i] = - ethtool_rxfh_indir_default(i, efx->rss_spread); + netdev_rss_key_fill(&efx->rx_hash_key, + sizeof(efx->rx_hash_key)); + efx_set_default_rx_indir_table(efx); netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels); @@ -1650,10 +1683,11 @@ static int efx_probe_filters(struct efx_nic *efx) int rc; spin_lock_init(&efx->filter_lock); - + init_rwsem(&efx->filter_sem); + down_write(&efx->filter_sem); rc = efx->type->filter_table_probe(efx); if (rc) - return rc; + goto out_unlock; #ifdef CONFIG_RFS_ACCEL if (efx->type->offload_features & NETIF_F_NTUPLE) { @@ -1662,12 +1696,14 @@ static int efx_probe_filters(struct efx_nic *efx) GFP_KERNEL); if (!efx->rps_flow_id) { efx->type->filter_table_remove(efx); - return -ENOMEM; + rc = -ENOMEM; + goto out_unlock; } } #endif - - return 0; +out_unlock: + up_write(&efx->filter_sem); + return rc; } static void efx_remove_filters(struct efx_nic *efx) @@ -1675,12 +1711,16 @@ static void efx_remove_filters(struct efx_nic *efx) #ifdef CONFIG_RFS_ACCEL kfree(efx->rps_flow_id); #endif + down_write(&efx->filter_sem); efx->type->filter_table_remove(efx); + up_write(&efx->filter_sem); } static void efx_restore_filters(struct efx_nic *efx) { + down_read(&efx->filter_sem); efx->type->filter_table_restore(efx); + up_read(&efx->filter_sem); } /************************************************************************** @@ -1712,21 +1752,33 @@ static int efx_probe_all(struct efx_nic *efx) } efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; +#ifdef CONFIG_SFC_SRIOV + rc = efx->type->vswitching_probe(efx); + if (rc) /* not fatal; the PF will still work fine */ + netif_warn(efx, probe, efx->net_dev, + "failed to setup vswitching rc=%d;" + " VFs may not function\n", rc); +#endif + rc = efx_probe_filters(efx); if (rc) { netif_err(efx, probe, efx->net_dev, "failed to create filter tables\n"); - goto fail3; + goto fail4; } rc = efx_probe_channels(efx); if (rc) - goto fail4; + goto fail5; return 0; - fail4: + fail5: efx_remove_filters(efx); + fail4: +#ifdef CONFIG_SFC_SRIOV + efx->type->vswitching_remove(efx); +#endif fail3: efx_remove_port(efx); fail2: @@ -1816,6 +1868,9 @@ static void efx_remove_all(struct efx_nic *efx) { efx_remove_channels(efx); efx_remove_filters(efx); +#ifdef CONFIG_SFC_SRIOV + efx->type->vswitching_remove(efx); +#endif efx_remove_port(efx); efx_remove_nic(efx); } @@ -2059,7 +2114,7 @@ static int efx_busy_poll(struct napi_struct *napi) *************************************************************************/ /* Context: process, rtnl_lock() held. */ -static int efx_net_open(struct net_device *net_dev) +int efx_net_open(struct net_device *net_dev) { struct efx_nic *efx = netdev_priv(net_dev); int rc; @@ -2088,7 +2143,7 @@ static int efx_net_open(struct net_device *net_dev) * Note that the kernel will ignore our return code; this method * should really be a void. */ -static int efx_net_stop(struct net_device *net_dev) +int efx_net_stop(struct net_device *net_dev) { struct efx_nic *efx = netdev_priv(net_dev); @@ -2146,7 +2201,7 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) mutex_lock(&efx->mac_lock); net_dev->mtu = new_mtu; - efx->type->reconfigure_mac(efx); + efx_mac_reconfigure(efx); mutex_unlock(&efx->mac_lock); efx_start_all(efx); @@ -2159,6 +2214,8 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data) struct efx_nic *efx = netdev_priv(net_dev); struct sockaddr *addr = data; u8 *new_addr = addr->sa_data; + u8 old_addr[6]; + int rc; if (!is_valid_ether_addr(new_addr)) { netif_err(efx, drv, efx->net_dev, @@ -2167,12 +2224,20 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data) return -EADDRNOTAVAIL; } + /* save old address */ + ether_addr_copy(old_addr, net_dev->dev_addr); ether_addr_copy(net_dev->dev_addr, new_addr); - efx->type->sriov_mac_address_changed(efx); + if (efx->type->set_mac_address) { + rc = efx->type->set_mac_address(efx); + if (rc) { + ether_addr_copy(net_dev->dev_addr, old_addr); + return rc; + } + } /* Reconfigure the MAC */ mutex_lock(&efx->mac_lock); - efx->type->reconfigure_mac(efx); + efx_mac_reconfigure(efx); mutex_unlock(&efx->mac_lock); return 0; @@ -2199,7 +2264,7 @@ static int efx_set_features(struct net_device *net_dev, netdev_features_t data) return 0; } -static const struct net_device_ops efx_farch_netdev_ops = { +static const struct net_device_ops efx_netdev_ops = { .ndo_open = efx_net_open, .ndo_stop = efx_net_stop, .ndo_get_stats64 = efx_net_stats, @@ -2212,10 +2277,11 @@ static const struct net_device_ops efx_farch_netdev_ops = { .ndo_set_rx_mode = efx_set_rx_mode, .ndo_set_features = efx_set_features, #ifdef CONFIG_SFC_SRIOV - .ndo_set_vf_mac = efx_siena_sriov_set_vf_mac, - .ndo_set_vf_vlan = efx_siena_sriov_set_vf_vlan, - .ndo_set_vf_spoofchk = efx_siena_sriov_set_vf_spoofchk, - .ndo_get_vf_config = efx_siena_sriov_get_vf_config, + .ndo_set_vf_mac = efx_sriov_set_vf_mac, + .ndo_set_vf_vlan = efx_sriov_set_vf_vlan, + .ndo_set_vf_spoofchk = efx_sriov_set_vf_spoofchk, + .ndo_get_vf_config = efx_sriov_get_vf_config, + .ndo_set_vf_link_state = efx_sriov_set_vf_link_state, #endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = efx_netpoll, @@ -2229,29 +2295,6 @@ static const struct net_device_ops efx_farch_netdev_ops = { #endif }; -static const struct net_device_ops efx_ef10_netdev_ops = { - .ndo_open = efx_net_open, - .ndo_stop = efx_net_stop, - .ndo_get_stats64 = efx_net_stats, - .ndo_tx_timeout = efx_watchdog, - .ndo_start_xmit = efx_hard_start_xmit, - .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = efx_ioctl, - .ndo_change_mtu = efx_change_mtu, - .ndo_set_mac_address = efx_set_mac_address, - .ndo_set_rx_mode = efx_set_rx_mode, - .ndo_set_features = efx_set_features, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = efx_netpoll, -#endif -#ifdef CONFIG_NET_RX_BUSY_POLL - .ndo_busy_poll = efx_busy_poll, -#endif -#ifdef CONFIG_RFS_ACCEL - .ndo_rx_flow_steer = efx_filter_rfs, -#endif -}; - static void efx_update_name(struct efx_nic *efx) { strcpy(efx->name, efx->net_dev->name); @@ -2264,8 +2307,7 @@ static int efx_netdev_event(struct notifier_block *this, { struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); - if ((net_dev->netdev_ops == &efx_farch_netdev_ops || - net_dev->netdev_ops == &efx_ef10_netdev_ops) && + if ((net_dev->netdev_ops == &efx_netdev_ops) && event == NETDEV_CHANGENAME) efx_update_name(netdev_priv(net_dev)); @@ -2292,12 +2334,9 @@ static int efx_register_netdev(struct efx_nic *efx) net_dev->watchdog_timeo = 5 * HZ; net_dev->irq = efx->pci_dev->irq; - if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) { - net_dev->netdev_ops = &efx_ef10_netdev_ops; + net_dev->netdev_ops = &efx_netdev_ops; + if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) net_dev->priv_flags |= IFF_UNICAST_FLT; - } else { - net_dev->netdev_ops = &efx_farch_netdev_ops; - } net_dev->ethtool_ops = &efx_ethtool_ops; net_dev->gso_max_segs = EFX_TSO_MAX_SEGS; @@ -2393,7 +2432,8 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) efx_disable_interrupts(efx); mutex_lock(&efx->mac_lock); - if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) + if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && + method != RESET_TYPE_DATAPATH) efx->phy_op->fini(efx); efx->type->fini(efx); } @@ -2422,11 +2462,13 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) if (!ok) goto fail; - if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) { + if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && + method != RESET_TYPE_DATAPATH) { rc = efx->phy_op->init(efx); if (rc) goto fail; - if (efx->phy_op->reconfigure(efx)) + rc = efx->phy_op->reconfigure(efx); + if (rc && rc != -EPERM) netif_err(efx, drv, efx->net_dev, "could not restore PHY settings\n"); } @@ -2434,8 +2476,20 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) rc = efx_enable_interrupts(efx); if (rc) goto fail; + +#ifdef CONFIG_SFC_SRIOV + rc = efx->type->vswitching_restore(efx); + if (rc) /* not fatal; the PF will still work fine */ + netif_warn(efx, probe, efx->net_dev, + "failed to restore vswitching rc=%d;" + " VFs may not function\n", rc); +#endif + + down_read(&efx->filter_sem); efx_restore_filters(efx); - efx->type->sriov_reset(efx); + up_read(&efx->filter_sem); + if (efx->type->sriov_reset) + efx->type->sriov_reset(efx); mutex_unlock(&efx->mac_lock); @@ -2605,6 +2659,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) case RESET_TYPE_WORLD: case RESET_TYPE_DISABLE: case RESET_TYPE_RECOVER_OR_DISABLE: + case RESET_TYPE_DATAPATH: case RESET_TYPE_MC_BIST: case RESET_TYPE_MCDI_TIMEOUT: method = type; @@ -2655,6 +2710,8 @@ static const struct pci_device_id efx_pci_table[] = { .driver_data = (unsigned long) &siena_a0_nic_type}, {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903), /* SFC9120 PF */ .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903), /* SFC9120 VF */ + .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923), /* SFC9140 PF */ .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, {0} /* end of list */ @@ -2826,7 +2883,9 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_disable_interrupts(efx); rtnl_unlock(); - efx->type->sriov_fini(efx); + if (efx->type->sriov_fini) + efx->type->sriov_fini(efx); + efx_unregister_netdev(efx); efx_mtd_remove(efx); @@ -3008,7 +3067,8 @@ static int efx_pci_probe(struct pci_dev *pci_dev, netif_info(efx, probe, efx->net_dev, "Solarflare NIC detected\n"); - efx_probe_vpd_strings(efx); + if (!efx->type->is_vf) + efx_probe_vpd_strings(efx); /* Set up basic I/O (BAR mappings etc) */ rc = efx_init_io(efx); @@ -3023,10 +3083,12 @@ static int efx_pci_probe(struct pci_dev *pci_dev, if (rc) goto fail4; - rc = efx->type->sriov_init(efx); - if (rc) - netif_err(efx, probe, efx->net_dev, - "SR-IOV can't be enabled rc %d\n", rc); + if (efx->type->sriov_init) { + rc = efx->type->sriov_init(efx); + if (rc) + netif_err(efx, probe, efx->net_dev, + "SR-IOV can't be enabled rc %d\n", rc); + } netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); @@ -3058,6 +3120,26 @@ static int efx_pci_probe(struct pci_dev *pci_dev, return rc; } +/* efx_pci_sriov_configure returns the actual number of Virtual Functions + * enabled on success + */ +#ifdef CONFIG_SFC_SRIOV +static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs) +{ + int rc; + struct efx_nic *efx = pci_get_drvdata(dev); + + if (efx->type->sriov_configure) { + rc = efx->type->sriov_configure(efx, num_vfs); + if (rc) + return rc; + else + return num_vfs; + } else + return -EOPNOTSUPP; +} +#endif + static int efx_pm_freeze(struct device *dev) { struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); @@ -3280,6 +3362,9 @@ static struct pci_driver efx_pci_driver = { .remove = efx_pci_remove, .driver.pm = &efx_pm_ops, .err_handler = &efx_err_handlers, +#ifdef CONFIG_SFC_SRIOV + .sriov_configure = efx_pci_sriov_configure, +#endif }; /************************************************************************** @@ -3302,9 +3387,11 @@ static int __init efx_init_module(void) if (rc) goto err_notifier; +#ifdef CONFIG_SFC_SRIOV rc = efx_init_sriov(); if (rc) goto err_sriov; +#endif reset_workqueue = create_singlethread_workqueue("sfc_reset"); if (!reset_workqueue) { @@ -3321,8 +3408,10 @@ static int __init efx_init_module(void) err_pci: destroy_workqueue(reset_workqueue); err_reset: +#ifdef CONFIG_SFC_SRIOV efx_fini_sriov(); err_sriov: +#endif unregister_netdevice_notifier(&efx_netdev_notifier); err_notifier: return rc; @@ -3334,7 +3423,9 @@ static void __exit efx_exit_module(void) pci_unregister_driver(&efx_pci_driver); destroy_workqueue(reset_workqueue); +#ifdef CONFIG_SFC_SRIOV efx_fini_sriov(); +#endif unregister_netdevice_notifier(&efx_netdev_notifier); } diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 2587c582a821..acb1e0718485 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -15,7 +15,12 @@ #include "filter.h" /* All controllers use BAR 0 for I/O space and BAR 2(&3) for memory */ +/* All VFs use BAR 0/1 for memory */ #define EFX_MEM_BAR 2 +#define EFX_MEM_VF_BAR 0 + +int efx_net_open(struct net_device *net_dev); +int efx_net_stop(struct net_device *net_dev); /* TX */ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue); @@ -32,6 +37,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); extern unsigned int efx_piobuf_size; /* RX */ +void efx_set_default_rx_indir_table(struct efx_nic *efx); void efx_rx_config_page_split(struct efx_nic *efx); int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); @@ -71,6 +77,8 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); /* Filters */ +void efx_mac_reconfigure(struct efx_nic *efx); + /** * efx_filter_insert_filter - add or replace a filter * @efx: NIC in which to insert the filter @@ -220,6 +228,13 @@ static inline void efx_mtd_rename(struct efx_nic *efx) {} static inline void efx_mtd_remove(struct efx_nic *efx) {} #endif +#ifdef CONFIG_SFC_SRIOV +static inline unsigned int efx_vf_size(struct efx_nic *efx) +{ + return 1 << efx->vi_scale; +} +#endif + static inline void efx_schedule_channel(struct efx_channel *channel) { netif_vdbg(channel->efx, intr, channel->efx->net_dev, diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h index d1dbb5fb31bb..c94f56271dd4 100644 --- a/drivers/net/ethernet/sfc/enum.h +++ b/drivers/net/ethernet/sfc/enum.h @@ -143,6 +143,7 @@ enum efx_loopback_mode { * @RESET_TYPE_WORLD: Reset as much as possible * @RESET_TYPE_RECOVER_OR_DISABLE: Try to recover. Apply RESET_TYPE_DISABLE if * unsuccessful. + * @RESET_TYPE_DATAPATH: Reset datapath only. * @RESET_TYPE_MC_BIST: MC entering BIST mode. * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog @@ -159,6 +160,7 @@ enum reset_type { RESET_TYPE_ALL, RESET_TYPE_WORLD, RESET_TYPE_RECOVER_OR_DISABLE, + RESET_TYPE_DATAPATH, RESET_TYPE_MC_BIST, RESET_TYPE_DISABLE, RESET_TYPE_MAX_METHOD, diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 4835bc0d0de8..034797661f96 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -734,7 +734,7 @@ static int efx_ethtool_set_pauseparam(struct net_device *net_dev, /* Reconfigure the MAC. The PHY *may* generate a link state change event * if the user just changed the advertised capabilities, but there's no * harm doing this twice */ - efx->type->reconfigure_mac(efx); + efx_mac_reconfigure(efx); out: mutex_unlock(&efx->mac_lock); @@ -1109,9 +1109,8 @@ static int efx_ethtool_set_rxfh(struct net_device *net_dev, const u32 *indir, return -EOPNOTSUPP; if (!indir) return 0; - memcpy(efx->rx_indir_table, indir, sizeof(efx->rx_indir_table)); - efx->type->rx_push_rss_config(efx); - return 0; + + return efx->type->rx_push_rss_config(efx, true, indir); } static int efx_ethtool_get_ts_info(struct net_device *net_dev, diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c index f166c8ef38a3..80e69af21642 100644 --- a/drivers/net/ethernet/sfc/falcon.c +++ b/drivers/net/ethernet/sfc/falcon.c @@ -477,16 +477,29 @@ static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id) * ************************************************************************** */ +static int dummy_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table) +{ + (void) efx; + (void) user; + (void) rx_indir_table; + return -ENOSYS; +} -static void falcon_b0_rx_push_rss_config(struct efx_nic *efx) +static int falcon_b0_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table) { efx_oword_t temp; + (void) user; /* Set hash key for IPv4 */ memcpy(&temp, efx->rx_hash_key, sizeof(temp)); efx_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY); + memcpy(efx->rx_indir_table, rx_indir_table, + sizeof(efx->rx_indir_table)); efx_farch_rx_push_indir_table(efx); + return 0; } /************************************************************************** @@ -2507,7 +2520,7 @@ static int falcon_init_nic(struct efx_nic *efx) falcon_init_rx_cfg(efx); if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) { - falcon_b0_rx_push_rss_config(efx); + falcon_b0_rx_push_rss_config(efx, false, efx->rx_indir_table); /* Set destination of both TX and RX Flush events */ EFX_POPULATE_OWORD_1(temp, FRF_BZ_FLS_EVQ_ID, 0); @@ -2687,6 +2700,8 @@ static int falcon_set_wol(struct efx_nic *efx, u32 type) */ const struct efx_nic_type falcon_a1_nic_type = { + .is_vf = false, + .mem_bar = EFX_MEM_BAR, .mem_map_size = falcon_a1_mem_map_size, .probe = falcon_probe_nic, .remove = falcon_remove_nic, @@ -2729,7 +2744,7 @@ const struct efx_nic_type falcon_a1_nic_type = { .tx_init = efx_farch_tx_init, .tx_remove = efx_farch_tx_remove, .tx_write = efx_farch_tx_write, - .rx_push_rss_config = efx_port_dummy_op_void, + .rx_push_rss_config = dummy_rx_push_rss_config, .rx_probe = efx_farch_rx_probe, .rx_init = efx_farch_rx_init, .rx_remove = efx_farch_rx_remove, @@ -2766,11 +2781,6 @@ const struct efx_nic_type falcon_a1_nic_type = { .mtd_write = falcon_mtd_write, .mtd_sync = falcon_mtd_sync, #endif - .sriov_init = efx_falcon_sriov_init, - .sriov_fini = efx_falcon_sriov_fini, - .sriov_mac_address_changed = efx_falcon_sriov_mac_address_changed, - .sriov_wanted = efx_falcon_sriov_wanted, - .sriov_reset = efx_falcon_sriov_reset, .revision = EFX_REV_FALCON_A1, .txd_ptr_tbl_base = FR_AA_TX_DESC_PTR_TBL_KER, @@ -2788,6 +2798,8 @@ const struct efx_nic_type falcon_a1_nic_type = { }; const struct efx_nic_type falcon_b0_nic_type = { + .is_vf = false, + .mem_bar = EFX_MEM_BAR, .mem_map_size = falcon_b0_mem_map_size, .probe = falcon_probe_nic, .remove = falcon_remove_nic, @@ -2867,11 +2879,6 @@ const struct efx_nic_type falcon_b0_nic_type = { .mtd_write = falcon_mtd_write, .mtd_sync = falcon_mtd_sync, #endif - .sriov_init = efx_falcon_sriov_init, - .sriov_fini = efx_falcon_sriov_fini, - .sriov_mac_address_changed = efx_falcon_sriov_mac_address_changed, - .sriov_wanted = efx_falcon_sriov_wanted, - .sriov_reset = efx_falcon_sriov_reset, .revision = EFX_REV_FALCON_B0, .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL, diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index bb89e96a125e..f08266f0eca2 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -20,6 +20,8 @@ #include "efx.h" #include "nic.h" #include "farch_regs.h" +#include "sriov.h" +#include "siena_sriov.h" #include "io.h" #include "workarounds.h" @@ -1198,13 +1200,17 @@ efx_farch_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n", channel->channel, ev_sub_data); efx_farch_handle_tx_flush_done(efx, event); +#ifdef CONFIG_SFC_SRIOV efx_siena_sriov_tx_flush_done(efx, event); +#endif break; case FSE_AZ_RX_DESCQ_FLS_DONE_EV: netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n", channel->channel, ev_sub_data); efx_farch_handle_rx_flush_done(efx, event); +#ifdef CONFIG_SFC_SRIOV efx_siena_sriov_rx_flush_done(efx, event); +#endif break; case FSE_AZ_EVQ_INIT_DONE_EV: netif_dbg(efx, hw, efx->net_dev, @@ -1242,8 +1248,11 @@ efx_farch_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) " RX Q %d is disabled.\n", ev_sub_data, ev_sub_data); efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR); - } else + } +#ifdef CONFIG_SFC_SRIOV + else efx_siena_sriov_desc_fetch_err(efx, ev_sub_data); +#endif break; case FSE_BZ_TX_DSC_ERROR_EV: if (ev_sub_data < EFX_VI_BASE) { @@ -1252,8 +1261,11 @@ efx_farch_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) " TX Q %d is disabled.\n", ev_sub_data, ev_sub_data); efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR); - } else + } +#ifdef CONFIG_SFC_SRIOV + else efx_siena_sriov_desc_fetch_err(efx, ev_sub_data); +#endif break; default: netif_vdbg(efx, hw, efx->net_dev, @@ -1317,9 +1329,11 @@ int efx_farch_ev_process(struct efx_channel *channel, int budget) case FSE_AZ_EV_CODE_DRIVER_EV: efx_farch_handle_driver_event(channel, &event); break; +#ifdef CONFIG_SFC_SRIOV case FSE_CZ_EV_CODE_USER_EV: efx_siena_sriov_event(channel, &event); break; +#endif case FSE_CZ_EV_CODE_MCDI_EV: efx_mcdi_process_event(channel, &event); break; @@ -1685,28 +1699,32 @@ void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); #ifdef CONFIG_SFC_SRIOV - if (efx->type->sriov_wanted(efx)) { - unsigned vi_dc_entries, buftbl_free, entries_per_vf, vf_limit; - - nic_data->vf_buftbl_base = buftbl_min; - - vi_dc_entries = RX_DC_ENTRIES + TX_DC_ENTRIES; - vi_count = max(vi_count, EFX_VI_BASE); - buftbl_free = (sram_lim_qw - buftbl_min - - vi_count * vi_dc_entries); - - entries_per_vf = ((vi_dc_entries + EFX_VF_BUFTBL_PER_VI) * - efx_vf_size(efx)); - vf_limit = min(buftbl_free / entries_per_vf, - (1024U - EFX_VI_BASE) >> efx->vi_scale); - - if (efx->vf_count > vf_limit) { - netif_err(efx, probe, efx->net_dev, - "Reducing VF count from from %d to %d\n", - efx->vf_count, vf_limit); - efx->vf_count = vf_limit; + if (efx->type->sriov_wanted) { + if (efx->type->sriov_wanted(efx)) { + unsigned vi_dc_entries, buftbl_free; + unsigned entries_per_vf, vf_limit; + + nic_data->vf_buftbl_base = buftbl_min; + + vi_dc_entries = RX_DC_ENTRIES + TX_DC_ENTRIES; + vi_count = max(vi_count, EFX_VI_BASE); + buftbl_free = (sram_lim_qw - buftbl_min - + vi_count * vi_dc_entries); + + entries_per_vf = ((vi_dc_entries + + EFX_VF_BUFTBL_PER_VI) * + efx_vf_size(efx)); + vf_limit = min(buftbl_free / entries_per_vf, + (1024U - EFX_VI_BASE) >> efx->vi_scale); + + if (efx->vf_count > vf_limit) { + netif_err(efx, probe, efx->net_dev, + "Reducing VF count from from %d to %d\n", + efx->vf_count, vf_limit); + efx->vf_count = vf_limit; + } + vi_count += efx->vf_count * efx_vf_size(efx); } - vi_count += efx->vf_count * efx_vf_size(efx); } #endif diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index d37928f01949..8267a1c75771 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -406,7 +406,7 @@ static bool efx_mcdi_complete_async(struct efx_mcdi_iface *mcdi, bool timeout) struct efx_mcdi_async_param *async; size_t hdr_len, data_len, err_len; efx_dword_t *outbuf; - MCDI_DECLARE_BUF_OUT_OR_ERR(errbuf, 0); + MCDI_DECLARE_BUF_ERR(errbuf); int rc; if (cmpxchg(&mcdi->state, @@ -534,7 +534,7 @@ static int _efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, size_t *outlen_actual, bool quiet) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - MCDI_DECLARE_BUF_OUT_OR_ERR(errbuf, 0); + MCDI_DECLARE_BUF_ERR(errbuf); int rc; if (mcdi->mode == MCDI_MODE_POLL) @@ -1035,7 +1035,9 @@ void efx_mcdi_process_event(struct efx_channel *channel, /* MAC stats are gather lazily. We can ignore this. */ break; case MCDI_EVENT_CODE_FLR: - efx_siena_sriov_flr(efx, MCDI_EVENT_FIELD(*event, FLR_VF)); + if (efx->type->sriov_flr) + efx->type->sriov_flr(efx, + MCDI_EVENT_FIELD(*event, FLR_VF)); break; case MCDI_EVENT_CODE_PTP_RX: case MCDI_EVENT_CODE_PTP_FAULT: @@ -1081,9 +1083,7 @@ void efx_mcdi_process_event(struct efx_channel *channel, void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len) { - MCDI_DECLARE_BUF(outbuf, - max(MC_CMD_GET_VERSION_OUT_LEN, - MC_CMD_GET_CAPABILITIES_OUT_LEN)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_VERSION_OUT_LEN); size_t outlength; const __le16 *ver_words; size_t offset; @@ -1108,19 +1108,11 @@ void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len) * single version. Report which variants are running. */ if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) { - BUILD_BUG_ON(MC_CMD_GET_CAPABILITIES_IN_LEN != 0); - rc = efx_mcdi_rpc(efx, MC_CMD_GET_CAPABILITIES, NULL, 0, - outbuf, sizeof(outbuf), &outlength); - if (rc || outlength < MC_CMD_GET_CAPABILITIES_OUT_LEN) - offset += snprintf( - buf + offset, len - offset, " rx? tx?"); - else - offset += snprintf( - buf + offset, len - offset, " rx%x tx%x", - MCDI_WORD(outbuf, - GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID), - MCDI_WORD(outbuf, - GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID)); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + offset += snprintf(buf + offset, len - offset, " rx%x tx%x", + nic_data->rx_dpcpu_fw_id, + nic_data->tx_dpcpu_fw_id); /* It's theoretically possible for the string to exceed 31 * characters, though in practice the first three version @@ -1150,10 +1142,26 @@ static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_UPDATE, 1); MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_FIRMWARE_ID, MC_CMD_FW_LOW_LATENCY); - rc = efx_mcdi_rpc(efx, MC_CMD_DRV_ATTACH, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - if (rc) + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_DRV_ATTACH, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + /* If we're not the primary PF, trying to ATTACH with a FIRMWARE_ID + * specified will fail with EPERM, and we have to tell the MC we don't + * care what firmware we get. + */ + if (rc == -EPERM) { + netif_dbg(efx, probe, efx->net_dev, + "efx_mcdi_drv_attach with fw-variant setting failed EPERM, trying without it\n"); + MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_FIRMWARE_ID, + MC_CMD_FW_DONT_CARE); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_DRV_ATTACH, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), + &outlen); + } + if (rc) { + efx_mcdi_display_error(efx, MC_CMD_DRV_ATTACH, sizeof(inbuf), + outbuf, outlen, rc); goto fail; + } if (outlen < MC_CMD_DRV_ATTACH_OUT_LEN) { rc = -EIO; goto fail; @@ -1178,16 +1186,6 @@ static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, * and are completely trusted by firmware. Abort probing * if that's not true for this function. */ - if (driver_operating && - (efx->mcdi->fn_flags & - (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL | - 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED)) != - (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL | - 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED)) { - netif_err(efx, probe, efx->net_dev, - "This driver version only supports one function per port\n"); - return -ENODEV; - } if (was_attached != NULL) *was_attached = MCDI_DWORD(outbuf, DRV_ATTACH_OUT_OLD_STATE); @@ -1385,10 +1383,13 @@ fail1: return rc; } +/* Returns 1 if an assertion was read, 0 if no assertion had fired, + * negative on error. + */ static int efx_mcdi_read_assertion(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_ASSERTS_IN_LEN); - MCDI_DECLARE_BUF_OUT_OR_ERR(outbuf, MC_CMD_GET_ASSERTS_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_ASSERTS_OUT_LEN); unsigned int flags, index; const char *reason; size_t outlen; @@ -1406,6 +1407,8 @@ static int efx_mcdi_read_assertion(struct efx_nic *efx) rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_ASSERTS, inbuf, MC_CMD_GET_ASSERTS_IN_LEN, outbuf, sizeof(outbuf), &outlen); + if (rc == -EPERM) + return 0; } while ((rc == -EINTR || rc == -EIO) && retry-- > 0); if (rc) { @@ -1443,24 +1446,31 @@ static int efx_mcdi_read_assertion(struct efx_nic *efx) MCDI_ARRAY_DWORD(outbuf, GET_ASSERTS_OUT_GP_REGS_OFFS, index)); - return 0; + return 1; } -static void efx_mcdi_exit_assertion(struct efx_nic *efx) +static int efx_mcdi_exit_assertion(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_REBOOT_IN_LEN); + int rc; /* If the MC is running debug firmware, it might now be * waiting for a debugger to attach, but we just want it to * reboot. We set a flag that makes the command a no-op if it - * has already done so. We don't know what return code to - * expect (0 or -EIO), so ignore it. + * has already done so. + * The MCDI will thus return either 0 or -EIO. */ BUILD_BUG_ON(MC_CMD_REBOOT_OUT_LEN != 0); MCDI_SET_DWORD(inbuf, REBOOT_IN_FLAGS, MC_CMD_REBOOT_FLAGS_AFTER_ASSERTION); - (void) efx_mcdi_rpc(efx, MC_CMD_REBOOT, inbuf, MC_CMD_REBOOT_IN_LEN, - NULL, 0, NULL); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_REBOOT, inbuf, MC_CMD_REBOOT_IN_LEN, + NULL, 0, NULL); + if (rc == -EIO) + rc = 0; + if (rc) + efx_mcdi_display_error(efx, MC_CMD_REBOOT, MC_CMD_REBOOT_IN_LEN, + NULL, 0, rc); + return rc; } int efx_mcdi_handle_assertion(struct efx_nic *efx) @@ -1468,12 +1478,10 @@ int efx_mcdi_handle_assertion(struct efx_nic *efx) int rc; rc = efx_mcdi_read_assertion(efx); - if (rc) + if (rc <= 0) return rc; - efx_mcdi_exit_assertion(efx); - - return 0; + return efx_mcdi_exit_assertion(efx); } void efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode) @@ -1550,7 +1558,9 @@ int efx_mcdi_reset(struct efx_nic *efx, enum reset_type method) if (rc) return rc; - if (method == RESET_TYPE_WORLD) + if (method == RESET_TYPE_DATAPATH) + return 0; + else if (method == RESET_TYPE_WORLD) return efx_mcdi_reset_mc(efx); else return efx_mcdi_reset_func(efx); @@ -1688,6 +1698,36 @@ int efx_mcdi_set_workaround(struct efx_nic *efx, u32 type, bool enabled) NULL, 0, NULL); } +int efx_mcdi_get_workarounds(struct efx_nic *efx, unsigned int *impl_out, + unsigned int *enabled_out) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_WORKAROUNDS_OUT_LEN); + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_WORKAROUNDS, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < MC_CMD_GET_WORKAROUNDS_OUT_LEN) { + rc = -EIO; + goto fail; + } + + if (impl_out) + *impl_out = MCDI_DWORD(outbuf, GET_WORKAROUNDS_OUT_IMPLEMENTED); + + if (enabled_out) + *enabled_out = MCDI_DWORD(outbuf, GET_WORKAROUNDS_OUT_ENABLED); + + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + #ifdef CONFIG_SFC_MTD #define EFX_MCDI_NVRAM_LEN_MAX 128 diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 56465f7465a2..7afab2fff4fe 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -176,10 +176,12 @@ void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev); * 32-bit-aligned. Also, on Siena we must copy to the MC shared * memory strictly 32 bits at a time, so add any necessary padding. */ -#define MCDI_DECLARE_BUF(_name, _len) \ +#define _MCDI_DECLARE_BUF(_name, _len) \ efx_dword_t _name[DIV_ROUND_UP(_len, 4)] -#define MCDI_DECLARE_BUF_OUT_OR_ERR(_name, _len) \ - MCDI_DECLARE_BUF(_name, max_t(size_t, _len, 8)) +#define MCDI_DECLARE_BUF(_name, _len) \ + _MCDI_DECLARE_BUF(_name, _len) = {{{0}}} +#define MCDI_DECLARE_BUF_ERR(_name) \ + MCDI_DECLARE_BUF(_name, 8) #define _MCDI_PTR(_buf, _offset) \ ((u8 *)(_buf) + (_offset)) #define MCDI_PTR(_buf, _field) \ @@ -339,6 +341,8 @@ bool efx_mcdi_mac_check_fault(struct efx_nic *efx); enum reset_type efx_mcdi_map_reset_reason(enum reset_type reason); int efx_mcdi_reset(struct efx_nic *efx, enum reset_type method); int efx_mcdi_set_workaround(struct efx_nic *efx, u32 type, bool enabled); +int efx_mcdi_get_workarounds(struct efx_nic *efx, unsigned int *impl_out, + unsigned int *enabled_out); #ifdef CONFIG_SFC_MCDI_MON int efx_mcdi_mon_probe(struct efx_nic *efx); diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index e028de10e1b7..9efdf0a5df64 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -638,6 +638,8 @@ */ #define MC_CMD_READ32 0x1 +#define MC_CMD_0x1_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_READ32_IN msgrequest */ #define MC_CMD_READ32_IN_LEN 8 #define MC_CMD_READ32_IN_ADDR_OFST 0 @@ -659,6 +661,8 @@ */ #define MC_CMD_WRITE32 0x2 +#define MC_CMD_0x2_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_WRITE32_IN msgrequest */ #define MC_CMD_WRITE32_IN_LENMIN 8 #define MC_CMD_WRITE32_IN_LENMAX 252 @@ -679,6 +683,8 @@ */ #define MC_CMD_COPYCODE 0x3 +#define MC_CMD_0x3_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_COPYCODE_IN msgrequest */ #define MC_CMD_COPYCODE_IN_LEN 16 /* Source address */ @@ -717,6 +723,8 @@ */ #define MC_CMD_SET_FUNC 0x4 +#define MC_CMD_0x4_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SET_FUNC_IN msgrequest */ #define MC_CMD_SET_FUNC_IN_LEN 4 /* Set function */ @@ -732,6 +740,8 @@ */ #define MC_CMD_GET_BOOT_STATUS 0x5 +#define MC_CMD_0x5_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_GET_BOOT_STATUS_IN msgrequest */ #define MC_CMD_GET_BOOT_STATUS_IN_LEN 0 @@ -758,6 +768,8 @@ */ #define MC_CMD_GET_ASSERTS 0x6 +#define MC_CMD_0x6_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_GET_ASSERTS_IN msgrequest */ #define MC_CMD_GET_ASSERTS_IN_LEN 4 /* Set to clear assertion */ @@ -794,6 +806,8 @@ */ #define MC_CMD_LOG_CTRL 0x7 +#define MC_CMD_0x7_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_LOG_CTRL_IN msgrequest */ #define MC_CMD_LOG_CTRL_IN_LEN 8 /* Log destination */ @@ -814,6 +828,8 @@ */ #define MC_CMD_GET_VERSION 0x8 +#define MC_CMD_0x8_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_VERSION_IN msgrequest */ #define MC_CMD_GET_VERSION_IN_LEN 0 @@ -870,6 +886,8 @@ */ #define MC_CMD_PTP 0xb +#define MC_CMD_0xb_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_PTP_IN msgrequest */ #define MC_CMD_PTP_IN_LEN 1 /* PTP operation code */ @@ -1404,6 +1422,8 @@ */ #define MC_CMD_CSR_READ32 0xc +#define MC_CMD_0xc_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_CSR_READ32_IN msgrequest */ #define MC_CMD_CSR_READ32_IN_LEN 12 /* Address */ @@ -1428,6 +1448,8 @@ */ #define MC_CMD_CSR_WRITE32 0xd +#define MC_CMD_0xd_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_CSR_WRITE32_IN msgrequest */ #define MC_CMD_CSR_WRITE32_IN_LENMIN 12 #define MC_CMD_CSR_WRITE32_IN_LENMAX 252 @@ -1452,6 +1474,8 @@ */ #define MC_CMD_HP 0x54 +#define MC_CMD_0x54_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_HP_IN msgrequest */ #define MC_CMD_HP_IN_LEN 16 /* HP OCSD sub-command. When address is not NULL, request activation of OCSD at @@ -1493,6 +1517,8 @@ */ #define MC_CMD_STACKINFO 0xf +#define MC_CMD_0xf_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_STACKINFO_IN msgrequest */ #define MC_CMD_STACKINFO_IN_LEN 0 @@ -1513,6 +1539,8 @@ */ #define MC_CMD_MDIO_READ 0x10 +#define MC_CMD_0x10_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_MDIO_READ_IN msgrequest */ #define MC_CMD_MDIO_READ_IN_LEN 16 /* Bus number; there are two MDIO buses: one for the internal PHY, and one for @@ -1552,6 +1580,8 @@ */ #define MC_CMD_MDIO_WRITE 0x11 +#define MC_CMD_0x11_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_MDIO_WRITE_IN msgrequest */ #define MC_CMD_MDIO_WRITE_IN_LEN 20 /* Bus number; there are two MDIO buses: one for the internal PHY, and one for @@ -1591,6 +1621,8 @@ */ #define MC_CMD_DBI_WRITE 0x12 +#define MC_CMD_0x12_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_DBI_WRITE_IN msgrequest */ #define MC_CMD_DBI_WRITE_IN_LENMIN 12 #define MC_CMD_DBI_WRITE_IN_LENMAX 252 @@ -1739,6 +1771,8 @@ */ #define MC_CMD_GET_BOARD_CFG 0x18 +#define MC_CMD_0x18_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_BOARD_CFG_IN msgrequest */ #define MC_CMD_GET_BOARD_CFG_IN_LEN 0 @@ -1778,6 +1812,8 @@ */ #define MC_CMD_DBI_READX 0x19 +#define MC_CMD_0x19_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_DBI_READX_IN msgrequest */ #define MC_CMD_DBI_READX_IN_LENMIN 8 #define MC_CMD_DBI_READX_IN_LENMAX 248 @@ -1822,6 +1858,8 @@ */ #define MC_CMD_SET_RAND_SEED 0x1a +#define MC_CMD_0x1a_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SET_RAND_SEED_IN msgrequest */ #define MC_CMD_SET_RAND_SEED_IN_LEN 16 /* Seed value. */ @@ -1863,6 +1901,8 @@ */ #define MC_CMD_DRV_ATTACH 0x1c +#define MC_CMD_0x1c_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_DRV_ATTACH_IN msgrequest */ #define MC_CMD_DRV_ATTACH_IN_LEN 12 /* new state (0=detached, 1=attached) to set if UPDATE=1 */ @@ -1875,6 +1915,8 @@ #define MC_CMD_FW_FULL_FEATURED 0x0 /* enum: Prefer to use firmware with fewer features but lower latency */ #define MC_CMD_FW_LOW_LATENCY 0x1 +/* enum: Only this option is allowed for non-admin functions */ +#define MC_CMD_FW_DONT_CARE 0xffffffff /* MC_CMD_DRV_ATTACH_OUT msgresponse */ #define MC_CMD_DRV_ATTACH_OUT_LEN 4 @@ -1920,6 +1962,8 @@ */ #define MC_CMD_PORT_RESET 0x20 +#define MC_CMD_0x20_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_PORT_RESET_IN msgrequest */ #define MC_CMD_PORT_RESET_IN_LEN 0 @@ -1934,6 +1978,7 @@ * extended version of the deprecated MC_CMD_PORT_RESET with added fields. */ #define MC_CMD_ENTITY_RESET 0x20 +/* MC_CMD_0x20_PRIVILEGE_CTG SRIOV_CTG_GENERAL */ /* MC_CMD_ENTITY_RESET_IN msgrequest */ #define MC_CMD_ENTITY_RESET_IN_LEN 4 @@ -2023,6 +2068,8 @@ */ #define MC_CMD_PUTS 0x23 +#define MC_CMD_0x23_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_PUTS_IN msgrequest */ #define MC_CMD_PUTS_IN_LENMIN 13 #define MC_CMD_PUTS_IN_LENMAX 252 @@ -2050,6 +2097,8 @@ */ #define MC_CMD_GET_PHY_CFG 0x24 +#define MC_CMD_0x24_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_PHY_CFG_IN msgrequest */ #define MC_CMD_GET_PHY_CFG_IN_LEN 0 @@ -2149,6 +2198,8 @@ */ #define MC_CMD_START_BIST 0x25 +#define MC_CMD_0x25_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_START_BIST_IN msgrequest */ #define MC_CMD_START_BIST_IN_LEN 4 /* Type of test. */ @@ -2185,6 +2236,8 @@ */ #define MC_CMD_POLL_BIST 0x26 +#define MC_CMD_0x26_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_POLL_BIST_IN msgrequest */ #define MC_CMD_POLL_BIST_IN_LEN 0 @@ -2344,6 +2397,8 @@ */ #define MC_CMD_GET_LOOPBACK_MODES 0x28 +#define MC_CMD_0x28_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_LOOPBACK_MODES_IN msgrequest */ #define MC_CMD_GET_LOOPBACK_MODES_IN_LEN 0 @@ -2463,6 +2518,8 @@ */ #define MC_CMD_GET_LINK 0x29 +#define MC_CMD_0x29_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_LINK_IN msgrequest */ #define MC_CMD_GET_LINK_IN_LEN 0 @@ -2519,6 +2576,8 @@ */ #define MC_CMD_SET_LINK 0x2a +#define MC_CMD_0x2a_PRIVILEGE_CTG SRIOV_CTG_LINK + /* MC_CMD_SET_LINK_IN msgrequest */ #define MC_CMD_SET_LINK_IN_LEN 16 /* ??? */ @@ -2550,6 +2609,8 @@ */ #define MC_CMD_SET_ID_LED 0x2b +#define MC_CMD_0x2b_PRIVILEGE_CTG SRIOV_CTG_LINK + /* MC_CMD_SET_ID_LED_IN msgrequest */ #define MC_CMD_SET_ID_LED_IN_LEN 4 /* Set LED state. */ @@ -2568,6 +2629,8 @@ */ #define MC_CMD_SET_MAC 0x2c +#define MC_CMD_0x2c_PRIVILEGE_CTG SRIOV_CTG_LINK + /* MC_CMD_SET_MAC_IN msgrequest */ #define MC_CMD_SET_MAC_IN_LEN 24 /* The MTU is the MTU programmed directly into the XMAC/GMAC (inclusive of @@ -2609,6 +2672,8 @@ */ #define MC_CMD_PHY_STATS 0x2d +#define MC_CMD_0x2d_PRIVILEGE_CTG SRIOV_CTG_LINK + /* MC_CMD_PHY_STATS_IN msgrequest */ #define MC_CMD_PHY_STATS_IN_LEN 8 /* ??? */ @@ -2687,6 +2752,8 @@ */ #define MC_CMD_MAC_STATS 0x2e +#define MC_CMD_0x2e_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_MAC_STATS_IN msgrequest */ #define MC_CMD_MAC_STATS_IN_LEN 16 /* ??? */ @@ -2926,6 +2993,8 @@ */ #define MC_CMD_WOL_FILTER_SET 0x32 +#define MC_CMD_0x32_PRIVILEGE_CTG SRIOV_CTG_LINK + /* MC_CMD_WOL_FILTER_SET_IN msgrequest */ #define MC_CMD_WOL_FILTER_SET_IN_LEN 192 #define MC_CMD_WOL_FILTER_SET_IN_FILTER_MODE_OFST 0 @@ -3020,6 +3089,8 @@ */ #define MC_CMD_WOL_FILTER_REMOVE 0x33 +#define MC_CMD_0x33_PRIVILEGE_CTG SRIOV_CTG_LINK + /* MC_CMD_WOL_FILTER_REMOVE_IN msgrequest */ #define MC_CMD_WOL_FILTER_REMOVE_IN_LEN 4 #define MC_CMD_WOL_FILTER_REMOVE_IN_FILTER_ID_OFST 0 @@ -3035,6 +3106,8 @@ */ #define MC_CMD_WOL_FILTER_RESET 0x34 +#define MC_CMD_0x34_PRIVILEGE_CTG SRIOV_CTG_LINK + /* MC_CMD_WOL_FILTER_RESET_IN msgrequest */ #define MC_CMD_WOL_FILTER_RESET_IN_LEN 4 #define MC_CMD_WOL_FILTER_RESET_IN_MASK_OFST 0 @@ -3069,6 +3142,8 @@ */ #define MC_CMD_NVRAM_TYPES 0x36 +#define MC_CMD_0x36_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_NVRAM_TYPES_IN msgrequest */ #define MC_CMD_NVRAM_TYPES_IN_LEN 0 @@ -3125,6 +3200,8 @@ */ #define MC_CMD_NVRAM_INFO 0x37 +#define MC_CMD_0x37_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_NVRAM_INFO_IN msgrequest */ #define MC_CMD_NVRAM_INFO_IN_LEN 4 #define MC_CMD_NVRAM_INFO_IN_TYPE_OFST 0 @@ -3157,6 +3234,8 @@ */ #define MC_CMD_NVRAM_UPDATE_START 0x38 +#define MC_CMD_0x38_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_NVRAM_UPDATE_START_IN msgrequest */ #define MC_CMD_NVRAM_UPDATE_START_IN_LEN 4 #define MC_CMD_NVRAM_UPDATE_START_IN_TYPE_OFST 0 @@ -3175,6 +3254,8 @@ */ #define MC_CMD_NVRAM_READ 0x39 +#define MC_CMD_0x39_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_NVRAM_READ_IN msgrequest */ #define MC_CMD_NVRAM_READ_IN_LEN 12 #define MC_CMD_NVRAM_READ_IN_TYPE_OFST 0 @@ -3202,6 +3283,8 @@ */ #define MC_CMD_NVRAM_WRITE 0x3a +#define MC_CMD_0x3a_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_NVRAM_WRITE_IN msgrequest */ #define MC_CMD_NVRAM_WRITE_IN_LENMIN 13 #define MC_CMD_NVRAM_WRITE_IN_LENMAX 252 @@ -3228,6 +3311,8 @@ */ #define MC_CMD_NVRAM_ERASE 0x3b +#define MC_CMD_0x3b_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_NVRAM_ERASE_IN msgrequest */ #define MC_CMD_NVRAM_ERASE_IN_LEN 12 #define MC_CMD_NVRAM_ERASE_IN_TYPE_OFST 0 @@ -3248,6 +3333,8 @@ */ #define MC_CMD_NVRAM_UPDATE_FINISH 0x3c +#define MC_CMD_0x3c_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_NVRAM_UPDATE_FINISH_IN msgrequest */ #define MC_CMD_NVRAM_UPDATE_FINISH_IN_LEN 8 #define MC_CMD_NVRAM_UPDATE_FINISH_IN_TYPE_OFST 0 @@ -3279,6 +3366,8 @@ */ #define MC_CMD_REBOOT 0x3d +#define MC_CMD_0x3d_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_REBOOT_IN msgrequest */ #define MC_CMD_REBOOT_IN_LEN 4 #define MC_CMD_REBOOT_IN_FLAGS_OFST 0 @@ -3316,6 +3405,8 @@ */ #define MC_CMD_REBOOT_MODE 0x3f +#define MC_CMD_0x3f_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_REBOOT_MODE_IN msgrequest */ #define MC_CMD_REBOOT_MODE_IN_LEN 4 #define MC_CMD_REBOOT_MODE_IN_VALUE_OFST 0 @@ -3368,6 +3459,8 @@ */ #define MC_CMD_SENSOR_INFO 0x41 +#define MC_CMD_0x41_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SENSOR_INFO_IN msgrequest */ #define MC_CMD_SENSOR_INFO_IN_LEN 0 @@ -3542,6 +3635,8 @@ */ #define MC_CMD_READ_SENSORS 0x42 +#define MC_CMD_0x42_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_READ_SENSORS_IN msgrequest */ #define MC_CMD_READ_SENSORS_IN_LEN 8 /* DMA address of host buffer for sensor readings (must be 4Kbyte aligned). */ @@ -3602,6 +3697,8 @@ */ #define MC_CMD_GET_PHY_STATE 0x43 +#define MC_CMD_0x43_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_PHY_STATE_IN msgrequest */ #define MC_CMD_GET_PHY_STATE_IN_LEN 0 @@ -3636,6 +3733,8 @@ */ #define MC_CMD_WOL_FILTER_GET 0x45 +#define MC_CMD_0x45_PRIVILEGE_CTG SRIOV_CTG_LINK + /* MC_CMD_WOL_FILTER_GET_IN msgrequest */ #define MC_CMD_WOL_FILTER_GET_IN_LEN 0 @@ -3651,6 +3750,8 @@ */ #define MC_CMD_ADD_LIGHTSOUT_OFFLOAD 0x46 +#define MC_CMD_0x46_PRIVILEGE_CTG SRIOV_CTG_LINK + /* MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN msgrequest */ #define MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_LENMIN 8 #define MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_LENMAX 252 @@ -3692,6 +3793,8 @@ */ #define MC_CMD_REMOVE_LIGHTSOUT_OFFLOAD 0x47 +#define MC_CMD_0x47_PRIVILEGE_CTG SRIOV_CTG_LINK + /* MC_CMD_REMOVE_LIGHTSOUT_OFFLOAD_IN msgrequest */ #define MC_CMD_REMOVE_LIGHTSOUT_OFFLOAD_IN_LEN 8 #define MC_CMD_REMOVE_LIGHTSOUT_OFFLOAD_IN_PROTOCOL_OFST 0 @@ -3722,6 +3825,8 @@ */ #define MC_CMD_TESTASSERT 0x49 +#define MC_CMD_0x49_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_TESTASSERT_IN msgrequest */ #define MC_CMD_TESTASSERT_IN_LEN 0 @@ -3739,6 +3844,8 @@ */ #define MC_CMD_WORKAROUND 0x4a +#define MC_CMD_0x4a_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_WORKAROUND_IN msgrequest */ #define MC_CMD_WORKAROUND_IN_LEN 8 #define MC_CMD_WORKAROUND_IN_TYPE_OFST 0 @@ -3765,6 +3872,8 @@ */ #define MC_CMD_GET_PHY_MEDIA_INFO 0x4b +#define MC_CMD_0x4b_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_GET_PHY_MEDIA_INFO_IN msgrequest */ #define MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN 4 #define MC_CMD_GET_PHY_MEDIA_INFO_IN_PAGE_OFST 0 @@ -3788,6 +3897,8 @@ */ #define MC_CMD_NVRAM_TEST 0x4c +#define MC_CMD_0x4c_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_NVRAM_TEST_IN msgrequest */ #define MC_CMD_NVRAM_TEST_IN_LEN 4 #define MC_CMD_NVRAM_TEST_IN_TYPE_OFST 0 @@ -3849,6 +3960,8 @@ */ #define MC_CMD_SENSOR_SET_LIMS 0x4e +#define MC_CMD_0x4e_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SENSOR_SET_LIMS_IN msgrequest */ #define MC_CMD_SENSOR_SET_LIMS_IN_LEN 20 #define MC_CMD_SENSOR_SET_LIMS_IN_SENSOR_OFST 0 @@ -3890,6 +4003,8 @@ */ #define MC_CMD_NVRAM_PARTITIONS 0x51 +#define MC_CMD_0x51_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_NVRAM_PARTITIONS_IN msgrequest */ #define MC_CMD_NVRAM_PARTITIONS_IN_LEN 0 @@ -3913,6 +4028,8 @@ */ #define MC_CMD_NVRAM_METADATA 0x52 +#define MC_CMD_0x52_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_NVRAM_METADATA_IN msgrequest */ #define MC_CMD_NVRAM_METADATA_IN_LEN 4 /* Partition type ID code */ @@ -3958,6 +4075,8 @@ */ #define MC_CMD_GET_MAC_ADDRESSES 0x55 +#define MC_CMD_0x55_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_MAC_ADDRESSES_IN msgrequest */ #define MC_CMD_GET_MAC_ADDRESSES_IN_LEN 0 @@ -4087,11 +4206,66 @@ /***********************************/ +/* MC_CMD_GET_WORKAROUNDS + * Read the list of all implemented and all currently enabled workarounds. The + * enums here must correspond with those in MC_CMD_WORKAROUND. + */ +#define MC_CMD_GET_WORKAROUNDS 0x59 + +/* MC_CMD_GET_WORKAROUNDS_OUT msgresponse */ +#define MC_CMD_GET_WORKAROUNDS_OUT_LEN 8 +/* Each workaround is represented by a single bit according to the enums below. + */ +#define MC_CMD_GET_WORKAROUNDS_OUT_IMPLEMENTED_OFST 0 +#define MC_CMD_GET_WORKAROUNDS_OUT_ENABLED_OFST 4 +/* enum: Bug 17230 work around. */ +#define MC_CMD_GET_WORKAROUNDS_OUT_BUG17230 0x2 +/* enum: Bug 35388 work around (unsafe EVQ writes). */ +#define MC_CMD_GET_WORKAROUNDS_OUT_BUG35388 0x4 +/* enum: Bug35017 workaround (A64 tables must be identity map) */ +#define MC_CMD_GET_WORKAROUNDS_OUT_BUG35017 0x8 + + +/***********************************/ +/* MC_CMD_LINK_STATE_MODE + * Read/set link state mode of a VF + */ +#define MC_CMD_LINK_STATE_MODE 0x5c + +#define MC_CMD_0x5c_PRIVILEGE_CTG SRIOV_CTG_GENERAL + +/* MC_CMD_LINK_STATE_MODE_IN msgrequest */ +#define MC_CMD_LINK_STATE_MODE_IN_LEN 8 +/* The target function to have its link state mode read or set, must be a VF + * e.g. VF 1,3 = 0x00030001 + */ +#define MC_CMD_LINK_STATE_MODE_IN_FUNCTION_OFST 0 +#define MC_CMD_LINK_STATE_MODE_IN_FUNCTION_PF_LBN 0 +#define MC_CMD_LINK_STATE_MODE_IN_FUNCTION_PF_WIDTH 16 +#define MC_CMD_LINK_STATE_MODE_IN_FUNCTION_VF_LBN 16 +#define MC_CMD_LINK_STATE_MODE_IN_FUNCTION_VF_WIDTH 16 +/* New link state mode to be set */ +#define MC_CMD_LINK_STATE_MODE_IN_NEW_MODE_OFST 4 +#define MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_AUTO 0x0 /* enum */ +#define MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_UP 0x1 /* enum */ +#define MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_DOWN 0x2 /* enum */ +/* enum: Use this value to just read the existing setting without modifying it. + */ +#define MC_CMD_LINK_STATE_MODE_IN_DO_NOT_CHANGE 0xffffffff + +/* MC_CMD_LINK_STATE_MODE_OUT msgresponse */ +#define MC_CMD_LINK_STATE_MODE_OUT_LEN 4 +#define MC_CMD_LINK_STATE_MODE_OUT_OLD_MODE_OFST 0 + + +/***********************************/ /* MC_CMD_READ_REGS * Get a dump of the MCPU registers */ #define MC_CMD_READ_REGS 0x50 +#define MC_CMD_0x50_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_READ_REGS_IN msgrequest */ #define MC_CMD_READ_REGS_IN_LEN 0 @@ -4115,6 +4289,8 @@ */ #define MC_CMD_INIT_EVQ 0x80 +#define MC_CMD_0x80_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_INIT_EVQ_IN msgrequest */ #define MC_CMD_INIT_EVQ_IN_LENMIN 44 #define MC_CMD_INIT_EVQ_IN_LENMAX 548 @@ -4213,6 +4389,8 @@ */ #define MC_CMD_INIT_RXQ 0x81 +#define MC_CMD_0x81_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_INIT_RXQ_IN msgrequest */ #define MC_CMD_INIT_RXQ_IN_LENMIN 36 #define MC_CMD_INIT_RXQ_IN_LENMAX 252 @@ -4265,6 +4443,8 @@ */ #define MC_CMD_INIT_TXQ 0x82 +#define MC_CMD_0x82_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_INIT_TXQ_IN msgrequest */ #define MC_CMD_INIT_TXQ_IN_LENMIN 36 #define MC_CMD_INIT_TXQ_IN_LENMAX 252 @@ -4322,6 +4502,8 @@ */ #define MC_CMD_FINI_EVQ 0x83 +#define MC_CMD_0x83_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_FINI_EVQ_IN msgrequest */ #define MC_CMD_FINI_EVQ_IN_LEN 4 /* Instance of EVQ to destroy. Should be the same instance as that previously @@ -4339,6 +4521,8 @@ */ #define MC_CMD_FINI_RXQ 0x84 +#define MC_CMD_0x84_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_FINI_RXQ_IN msgrequest */ #define MC_CMD_FINI_RXQ_IN_LEN 4 /* Instance of RXQ to destroy */ @@ -4354,6 +4538,8 @@ */ #define MC_CMD_FINI_TXQ 0x85 +#define MC_CMD_0x85_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_FINI_TXQ_IN msgrequest */ #define MC_CMD_FINI_TXQ_IN_LEN 4 /* Instance of TXQ to destroy */ @@ -4369,6 +4555,8 @@ */ #define MC_CMD_DRIVER_EVENT 0x86 +#define MC_CMD_0x86_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_DRIVER_EVENT_IN msgrequest */ #define MC_CMD_DRIVER_EVENT_IN_LEN 12 /* Handle of target EVQ */ @@ -4392,6 +4580,8 @@ */ #define MC_CMD_PROXY_CMD 0x5b +#define MC_CMD_0x5b_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_PROXY_CMD_IN msgrequest */ #define MC_CMD_PROXY_CMD_IN_LEN 4 /* The handle of the target function. */ @@ -4414,6 +4604,8 @@ */ #define MC_CMD_ALLOC_BUFTBL_CHUNK 0x87 +#define MC_CMD_0x87_PRIVILEGE_CTG SRIOV_CTG_ONLOAD + /* MC_CMD_ALLOC_BUFTBL_CHUNK_IN msgrequest */ #define MC_CMD_ALLOC_BUFTBL_CHUNK_IN_LEN 8 /* Owner ID to use */ @@ -4437,6 +4629,8 @@ */ #define MC_CMD_PROGRAM_BUFTBL_ENTRIES 0x88 +#define MC_CMD_0x88_PRIVILEGE_CTG SRIOV_CTG_ONLOAD + /* MC_CMD_PROGRAM_BUFTBL_ENTRIES_IN msgrequest */ #define MC_CMD_PROGRAM_BUFTBL_ENTRIES_IN_LENMIN 20 #define MC_CMD_PROGRAM_BUFTBL_ENTRIES_IN_LENMAX 268 @@ -4463,6 +4657,8 @@ */ #define MC_CMD_FREE_BUFTBL_CHUNK 0x89 +#define MC_CMD_0x89_PRIVILEGE_CTG SRIOV_CTG_ONLOAD + /* MC_CMD_FREE_BUFTBL_CHUNK_IN msgrequest */ #define MC_CMD_FREE_BUFTBL_CHUNK_IN_LEN 4 #define MC_CMD_FREE_BUFTBL_CHUNK_IN_HANDLE_OFST 0 @@ -4477,6 +4673,8 @@ */ #define MC_CMD_FILTER_OP 0x8a +#define MC_CMD_0x8a_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_FILTER_OP_IN msgrequest */ #define MC_CMD_FILTER_OP_IN_LEN 108 /* identifies the type of operation requested */ @@ -4637,6 +4835,8 @@ */ #define MC_CMD_GET_PARSER_DISP_INFO 0xe4 +#define MC_CMD_0xe4_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_PARSER_DISP_INFO_IN msgrequest */ #define MC_CMD_GET_PARSER_DISP_INFO_IN_LEN 4 /* identifies the type of operation requested */ @@ -4669,6 +4869,8 @@ */ #define MC_CMD_PARSER_DISP_RW 0xe5 +#define MC_CMD_0xe5_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_PARSER_DISP_RW_IN msgrequest */ #define MC_CMD_PARSER_DISP_RW_IN_LEN 32 /* identifies the target of the operation */ @@ -4719,6 +4921,8 @@ */ #define MC_CMD_GET_PF_COUNT 0xb6 +#define MC_CMD_0xb6_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_PF_COUNT_IN msgrequest */ #define MC_CMD_GET_PF_COUNT_IN_LEN 0 @@ -4750,6 +4954,8 @@ */ #define MC_CMD_GET_PORT_ASSIGNMENT 0xb8 +#define MC_CMD_0xb8_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_PORT_ASSIGNMENT_IN msgrequest */ #define MC_CMD_GET_PORT_ASSIGNMENT_IN_LEN 0 @@ -4765,6 +4971,8 @@ */ #define MC_CMD_SET_PORT_ASSIGNMENT 0xb9 +#define MC_CMD_0xb9_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SET_PORT_ASSIGNMENT_IN msgrequest */ #define MC_CMD_SET_PORT_ASSIGNMENT_IN_LEN 4 /* Identifies the port assignment for this function. */ @@ -4780,6 +4988,8 @@ */ #define MC_CMD_ALLOC_VIS 0x8b +#define MC_CMD_0x8b_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_ALLOC_VIS_IN msgrequest */ #define MC_CMD_ALLOC_VIS_IN_LEN 8 /* The minimum number of VIs that is acceptable */ @@ -4804,6 +5014,8 @@ */ #define MC_CMD_FREE_VIS 0x8c +#define MC_CMD_0x8c_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_FREE_VIS_IN msgrequest */ #define MC_CMD_FREE_VIS_IN_LEN 0 @@ -4817,6 +5029,8 @@ */ #define MC_CMD_GET_SRIOV_CFG 0xba +#define MC_CMD_0xba_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_SRIOV_CFG_IN msgrequest */ #define MC_CMD_GET_SRIOV_CFG_IN_LEN 0 @@ -4841,6 +5055,8 @@ */ #define MC_CMD_SET_SRIOV_CFG 0xbb +#define MC_CMD_0xbb_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SET_SRIOV_CFG_IN msgrequest */ #define MC_CMD_SET_SRIOV_CFG_IN_LEN 20 /* Number of VFs currently enabled. */ @@ -4870,6 +5086,8 @@ */ #define MC_CMD_GET_VI_ALLOC_INFO 0x8d +#define MC_CMD_0x8d_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_VI_ALLOC_INFO_IN msgrequest */ #define MC_CMD_GET_VI_ALLOC_INFO_IN_LEN 0 @@ -4889,6 +5107,8 @@ */ #define MC_CMD_DUMP_VI_STATE 0x8e +#define MC_CMD_0x8e_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_DUMP_VI_STATE_IN msgrequest */ #define MC_CMD_DUMP_VI_STATE_IN_LEN 4 /* The VI number to query. */ @@ -4998,6 +5218,8 @@ */ #define MC_CMD_ALLOC_PIOBUF 0x8f +#define MC_CMD_0x8f_PRIVILEGE_CTG SRIOV_CTG_ONLOAD + /* MC_CMD_ALLOC_PIOBUF_IN msgrequest */ #define MC_CMD_ALLOC_PIOBUF_IN_LEN 0 @@ -5013,6 +5235,8 @@ */ #define MC_CMD_FREE_PIOBUF 0x90 +#define MC_CMD_0x90_PRIVILEGE_CTG SRIOV_CTG_ONLOAD + /* MC_CMD_FREE_PIOBUF_IN msgrequest */ #define MC_CMD_FREE_PIOBUF_IN_LEN 4 /* Handle for allocated push I/O buffer. */ @@ -5028,6 +5252,8 @@ */ #define MC_CMD_GET_VI_TLP_PROCESSING 0xb0 +#define MC_CMD_0xb0_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_VI_TLP_PROCESSING_IN msgrequest */ #define MC_CMD_GET_VI_TLP_PROCESSING_IN_LEN 4 /* VI number to get information for. */ @@ -5062,6 +5288,8 @@ */ #define MC_CMD_SET_VI_TLP_PROCESSING 0xb1 +#define MC_CMD_0xb1_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_SET_VI_TLP_PROCESSING_IN msgrequest */ #define MC_CMD_SET_VI_TLP_PROCESSING_IN_LEN 8 /* VI number to set information for. */ @@ -5096,6 +5324,8 @@ */ #define MC_CMD_GET_TLP_PROCESSING_GLOBALS 0xbc +#define MC_CMD_0xbc_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN msgrequest */ #define MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_LEN 4 #define MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_OFST 0 @@ -5157,6 +5387,8 @@ */ #define MC_CMD_SET_TLP_PROCESSING_GLOBALS 0xbd +#define MC_CMD_0xbd_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SET_TLP_PROCESSING_GLOBALS_IN msgrequest */ #define MC_CMD_SET_TLP_PROCESSING_GLOBALS_IN_LEN 8 #define MC_CMD_SET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_OFST 0 @@ -5203,6 +5435,8 @@ */ #define MC_CMD_SATELLITE_DOWNLOAD 0x91 +#define MC_CMD_0x91_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SATELLITE_DOWNLOAD_IN msgrequest: The reset requirements for the CPUs * are subtle, and so downloads must proceed in a number of phases. * @@ -5318,6 +5552,7 @@ */ #define MC_CMD_GET_CAPABILITIES 0xbe +#define MC_CMD_0xbe_PRIVILEGE_CTG SRIOV_CTG_GENERAL /* MC_CMD_GET_CAPABILITIES_IN msgrequest */ #define MC_CMD_GET_CAPABILITIES_IN_LEN 0 @@ -5433,6 +5668,8 @@ */ #define MC_CMD_TCM_BUCKET_ALLOC 0xb2 +#define MC_CMD_0xb2_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_TCM_BUCKET_ALLOC_IN msgrequest */ #define MC_CMD_TCM_BUCKET_ALLOC_IN_LEN 0 @@ -5448,6 +5685,8 @@ */ #define MC_CMD_TCM_BUCKET_FREE 0xb3 +#define MC_CMD_0xb3_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_TCM_BUCKET_FREE_IN msgrequest */ #define MC_CMD_TCM_BUCKET_FREE_IN_LEN 4 /* the bucket id */ @@ -5463,6 +5702,8 @@ */ #define MC_CMD_TCM_BUCKET_INIT 0xb4 +#define MC_CMD_0xb4_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_TCM_BUCKET_INIT_IN msgrequest */ #define MC_CMD_TCM_BUCKET_INIT_IN_LEN 8 /* the bucket id */ @@ -5480,6 +5721,8 @@ */ #define MC_CMD_TCM_TXQ_INIT 0xb5 +#define MC_CMD_0xb5_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_TCM_TXQ_INIT_IN msgrequest */ #define MC_CMD_TCM_TXQ_INIT_IN_LEN 28 /* the txq id */ @@ -5511,6 +5754,8 @@ */ #define MC_CMD_LINK_PIOBUF 0x92 +#define MC_CMD_0x92_PRIVILEGE_CTG SRIOV_CTG_ONLOAD + /* MC_CMD_LINK_PIOBUF_IN msgrequest */ #define MC_CMD_LINK_PIOBUF_IN_LEN 8 /* Handle for allocated push I/O buffer. */ @@ -5528,6 +5773,8 @@ */ #define MC_CMD_UNLINK_PIOBUF 0x93 +#define MC_CMD_0x93_PRIVILEGE_CTG SRIOV_CTG_ONLOAD + /* MC_CMD_UNLINK_PIOBUF_IN msgrequest */ #define MC_CMD_UNLINK_PIOBUF_IN_LEN 4 /* Function Local Instance (VI) number. */ @@ -5543,6 +5790,8 @@ */ #define MC_CMD_VSWITCH_ALLOC 0x94 +#define MC_CMD_0x94_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_VSWITCH_ALLOC_IN msgrequest */ #define MC_CMD_VSWITCH_ALLOC_IN_LEN 16 /* The port to connect to the v-switch's upstream port. */ @@ -5572,6 +5821,8 @@ */ #define MC_CMD_VSWITCH_FREE 0x95 +#define MC_CMD_0x95_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_VSWITCH_FREE_IN msgrequest */ #define MC_CMD_VSWITCH_FREE_IN_LEN 4 /* The port to which the v-switch is connected. */ @@ -5587,6 +5838,8 @@ */ #define MC_CMD_VPORT_ALLOC 0x96 +#define MC_CMD_0x96_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_VPORT_ALLOC_IN msgrequest */ #define MC_CMD_VPORT_ALLOC_IN_LEN 20 /* The port to which the v-switch is connected. */ @@ -5636,6 +5889,8 @@ */ #define MC_CMD_VPORT_FREE 0x97 +#define MC_CMD_0x97_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_VPORT_FREE_IN msgrequest */ #define MC_CMD_VPORT_FREE_IN_LEN 4 /* The handle of the v-port */ @@ -5651,8 +5906,10 @@ */ #define MC_CMD_VADAPTOR_ALLOC 0x98 +#define MC_CMD_0x98_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_VADAPTOR_ALLOC_IN msgrequest */ -#define MC_CMD_VADAPTOR_ALLOC_IN_LEN 16 +#define MC_CMD_VADAPTOR_ALLOC_IN_LEN 30 /* The port to connect to the v-adaptor's port. */ #define MC_CMD_VADAPTOR_ALLOC_IN_UPSTREAM_PORT_ID_OFST 0 /* Flags controlling v-adaptor creation */ @@ -5661,6 +5918,19 @@ #define MC_CMD_VADAPTOR_ALLOC_IN_FLAG_AUTO_VADAPTOR_WIDTH 1 /* The number of VLAN tags to strip on receive */ #define MC_CMD_VADAPTOR_ALLOC_IN_NUM_VLANS_OFST 12 +/* The number of VLAN tags to transparently insert/remove. */ +#define MC_CMD_VADAPTOR_ALLOC_IN_NUM_VLAN_TAGS_OFST 16 +/* The actual VLAN tags to insert/remove */ +#define MC_CMD_VADAPTOR_ALLOC_IN_VLAN_TAGS_OFST 20 +#define MC_CMD_VADAPTOR_ALLOC_IN_VLAN_TAG_0_LBN 0 +#define MC_CMD_VADAPTOR_ALLOC_IN_VLAN_TAG_0_WIDTH 16 +#define MC_CMD_VADAPTOR_ALLOC_IN_VLAN_TAG_1_LBN 16 +#define MC_CMD_VADAPTOR_ALLOC_IN_VLAN_TAG_1_WIDTH 16 +/* The MAC address to assign to this v-adaptor */ +#define MC_CMD_VADAPTOR_ALLOC_IN_MACADDR_OFST 24 +#define MC_CMD_VADAPTOR_ALLOC_IN_MACADDR_LEN 6 +/* enum: Derive the MAC address from the upstream port */ +#define MC_CMD_VADAPTOR_ALLOC_IN_AUTO_MAC 0x0 /* MC_CMD_VADAPTOR_ALLOC_OUT msgresponse */ #define MC_CMD_VADAPTOR_ALLOC_OUT_LEN 0 @@ -5672,6 +5942,8 @@ */ #define MC_CMD_VADAPTOR_FREE 0x99 +#define MC_CMD_0x99_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_VADAPTOR_FREE_IN msgrequest */ #define MC_CMD_VADAPTOR_FREE_IN_LEN 4 /* The port to which the v-adaptor is connected. */ @@ -5682,11 +5954,53 @@ /***********************************/ +/* MC_CMD_VADAPTOR_SET_MAC + * assign a new MAC address to a v-adaptor. + */ +#define MC_CMD_VADAPTOR_SET_MAC 0x5d + +#define MC_CMD_0x5d_PRIVILEGE_CTG SRIOV_CTG_GENERAL + +/* MC_CMD_VADAPTOR_SET_MAC_IN msgrequest */ +#define MC_CMD_VADAPTOR_SET_MAC_IN_LEN 10 +/* The port to which the v-adaptor is connected. */ +#define MC_CMD_VADAPTOR_SET_MAC_IN_UPSTREAM_PORT_ID_OFST 0 +/* The new MAC address to assign to this v-adaptor */ +#define MC_CMD_VADAPTOR_SET_MAC_IN_MACADDR_OFST 4 +#define MC_CMD_VADAPTOR_SET_MAC_IN_MACADDR_LEN 6 + +/* MC_CMD_VADAPTOR_SET_MAC_OUT msgresponse */ +#define MC_CMD_VADAPTOR_SET_MAC_OUT_LEN 0 + + +/***********************************/ +/* MC_CMD_VADAPTOR_GET_MAC + * read the MAC address assigned to a v-adaptor. + */ +#define MC_CMD_VADAPTOR_GET_MAC 0x5e + +#define MC_CMD_0x5e_PRIVILEGE_CTG SRIOV_CTG_GENERAL + +/* MC_CMD_VADAPTOR_GET_MAC_IN msgrequest */ +#define MC_CMD_VADAPTOR_GET_MAC_IN_LEN 4 +/* The port to which the v-adaptor is connected. */ +#define MC_CMD_VADAPTOR_GET_MAC_IN_UPSTREAM_PORT_ID_OFST 0 + +/* MC_CMD_VADAPTOR_GET_MAC_OUT msgresponse */ +#define MC_CMD_VADAPTOR_GET_MAC_OUT_LEN 6 +/* The MAC address assigned to this v-adaptor */ +#define MC_CMD_VADAPTOR_GET_MAC_OUT_MACADDR_OFST 0 +#define MC_CMD_VADAPTOR_GET_MAC_OUT_MACADDR_LEN 6 + + +/***********************************/ /* MC_CMD_EVB_PORT_ASSIGN * assign a port to a PCI function. */ #define MC_CMD_EVB_PORT_ASSIGN 0x9a +#define MC_CMD_0x9a_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_EVB_PORT_ASSIGN_IN msgrequest */ #define MC_CMD_EVB_PORT_ASSIGN_IN_LEN 8 /* The port to assign. */ @@ -5708,6 +6022,8 @@ */ #define MC_CMD_RDWR_A64_REGIONS 0x9b +#define MC_CMD_0x9b_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_RDWR_A64_REGIONS_IN msgrequest */ #define MC_CMD_RDWR_A64_REGIONS_IN_LEN 17 #define MC_CMD_RDWR_A64_REGIONS_IN_REGION0_OFST 0 @@ -5736,6 +6052,8 @@ */ #define MC_CMD_ONLOAD_STACK_ALLOC 0x9c +#define MC_CMD_0x9c_PRIVILEGE_CTG SRIOV_CTG_ONLOAD + /* MC_CMD_ONLOAD_STACK_ALLOC_IN msgrequest */ #define MC_CMD_ONLOAD_STACK_ALLOC_IN_LEN 4 /* The handle of the owning upstream port */ @@ -5753,6 +6071,8 @@ */ #define MC_CMD_ONLOAD_STACK_FREE 0x9d +#define MC_CMD_0x9d_PRIVILEGE_CTG SRIOV_CTG_ONLOAD + /* MC_CMD_ONLOAD_STACK_FREE_IN msgrequest */ #define MC_CMD_ONLOAD_STACK_FREE_IN_LEN 4 /* The handle of the Onload stack */ @@ -5768,6 +6088,8 @@ */ #define MC_CMD_RSS_CONTEXT_ALLOC 0x9e +#define MC_CMD_0x9e_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_RSS_CONTEXT_ALLOC_IN msgrequest */ #define MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN 12 /* The handle of the owning upstream port */ @@ -5800,6 +6122,8 @@ */ #define MC_CMD_RSS_CONTEXT_FREE 0x9f +#define MC_CMD_0x9f_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_RSS_CONTEXT_FREE_IN msgrequest */ #define MC_CMD_RSS_CONTEXT_FREE_IN_LEN 4 /* The handle of the RSS context */ @@ -5815,6 +6139,8 @@ */ #define MC_CMD_RSS_CONTEXT_SET_KEY 0xa0 +#define MC_CMD_0xa0_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_RSS_CONTEXT_SET_KEY_IN msgrequest */ #define MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN 44 /* The handle of the RSS context */ @@ -5833,6 +6159,8 @@ */ #define MC_CMD_RSS_CONTEXT_GET_KEY 0xa1 +#define MC_CMD_0xa1_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_RSS_CONTEXT_GET_KEY_IN msgrequest */ #define MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN 4 /* The handle of the RSS context */ @@ -5851,6 +6179,8 @@ */ #define MC_CMD_RSS_CONTEXT_SET_TABLE 0xa2 +#define MC_CMD_0xa2_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_RSS_CONTEXT_SET_TABLE_IN msgrequest */ #define MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN 132 /* The handle of the RSS context */ @@ -5869,6 +6199,8 @@ */ #define MC_CMD_RSS_CONTEXT_GET_TABLE 0xa3 +#define MC_CMD_0xa3_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_RSS_CONTEXT_GET_TABLE_IN msgrequest */ #define MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN 4 /* The handle of the RSS context */ @@ -5887,6 +6219,8 @@ */ #define MC_CMD_RSS_CONTEXT_SET_FLAGS 0xe1 +#define MC_CMD_0xe1_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_RSS_CONTEXT_SET_FLAGS_IN msgrequest */ #define MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN 8 /* The handle of the RSS context */ @@ -5912,6 +6246,8 @@ */ #define MC_CMD_RSS_CONTEXT_GET_FLAGS 0xe2 +#define MC_CMD_0xe2_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_RSS_CONTEXT_GET_FLAGS_IN msgrequest */ #define MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN 4 /* The handle of the RSS context */ @@ -5937,6 +6273,8 @@ */ #define MC_CMD_DOT1P_MAPPING_ALLOC 0xa4 +#define MC_CMD_0xa4_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_DOT1P_MAPPING_ALLOC_IN msgrequest */ #define MC_CMD_DOT1P_MAPPING_ALLOC_IN_LEN 8 /* The handle of the owning upstream port */ @@ -5959,6 +6297,8 @@ */ #define MC_CMD_DOT1P_MAPPING_FREE 0xa5 +#define MC_CMD_0xa5_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_DOT1P_MAPPING_FREE_IN msgrequest */ #define MC_CMD_DOT1P_MAPPING_FREE_IN_LEN 4 /* The handle of the .1p mapping */ @@ -5974,6 +6314,8 @@ */ #define MC_CMD_DOT1P_MAPPING_SET_TABLE 0xa6 +#define MC_CMD_0xa6_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_DOT1P_MAPPING_SET_TABLE_IN msgrequest */ #define MC_CMD_DOT1P_MAPPING_SET_TABLE_IN_LEN 36 /* The handle of the .1p mapping */ @@ -5994,6 +6336,8 @@ */ #define MC_CMD_DOT1P_MAPPING_GET_TABLE 0xa7 +#define MC_CMD_0xa7_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_DOT1P_MAPPING_GET_TABLE_IN msgrequest */ #define MC_CMD_DOT1P_MAPPING_GET_TABLE_IN_LEN 4 /* The handle of the .1p mapping */ @@ -6014,6 +6358,8 @@ */ #define MC_CMD_GET_VECTOR_CFG 0xbf +#define MC_CMD_0xbf_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_VECTOR_CFG_IN msgrequest */ #define MC_CMD_GET_VECTOR_CFG_IN_LEN 0 @@ -6033,6 +6379,8 @@ */ #define MC_CMD_SET_VECTOR_CFG 0xc0 +#define MC_CMD_0xc0_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_SET_VECTOR_CFG_IN msgrequest */ #define MC_CMD_SET_VECTOR_CFG_IN_LEN 12 /* Base absolute interrupt vector number, or MC_CMD_RESOURCE_INSTANCE_ANY to @@ -6423,6 +6771,8 @@ */ #define MC_CMD_VPORT_ADD_MAC_ADDRESS 0xa8 +#define MC_CMD_0xa8_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_VPORT_ADD_MAC_ADDRESS_IN msgrequest */ #define MC_CMD_VPORT_ADD_MAC_ADDRESS_IN_LEN 10 /* The handle of the v-port */ @@ -6441,6 +6791,8 @@ */ #define MC_CMD_VPORT_DEL_MAC_ADDRESS 0xa9 +#define MC_CMD_0xa9_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_VPORT_DEL_MAC_ADDRESS_IN msgrequest */ #define MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN 10 /* The handle of the v-port */ @@ -6459,6 +6811,8 @@ */ #define MC_CMD_VPORT_GET_MAC_ADDRESSES 0xaa +#define MC_CMD_0xaa_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_VPORT_GET_MAC_ADDRESSES_IN msgrequest */ #define MC_CMD_VPORT_GET_MAC_ADDRESSES_IN_LEN 4 /* The handle of the v-port */ @@ -6486,6 +6840,8 @@ */ #define MC_CMD_DUMP_BUFTBL_ENTRIES 0xab +#define MC_CMD_0xab_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_DUMP_BUFTBL_ENTRIES_IN msgrequest */ #define MC_CMD_DUMP_BUFTBL_ENTRIES_IN_LEN 8 /* Index of the first buffer table entry. */ @@ -6510,6 +6866,8 @@ */ #define MC_CMD_SET_RXDP_CONFIG 0xc1 +#define MC_CMD_0xc1_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SET_RXDP_CONFIG_IN msgrequest */ #define MC_CMD_SET_RXDP_CONFIG_IN_LEN 4 #define MC_CMD_SET_RXDP_CONFIG_IN_DATA_OFST 0 @@ -6526,6 +6884,8 @@ */ #define MC_CMD_GET_RXDP_CONFIG 0xc2 +#define MC_CMD_0xc2_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_GET_RXDP_CONFIG_IN msgrequest */ #define MC_CMD_GET_RXDP_CONFIG_IN_LEN 0 @@ -6890,6 +7250,8 @@ */ #define MC_CMD_GET_CLOCK 0xac +#define MC_CMD_0xac_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_CLOCK_IN msgrequest */ #define MC_CMD_GET_CLOCK_IN_LEN 0 @@ -6907,6 +7269,8 @@ */ #define MC_CMD_SET_CLOCK 0xad +#define MC_CMD_0xad_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SET_CLOCK_IN msgrequest */ #define MC_CMD_SET_CLOCK_IN_LEN 12 /* Requested system frequency in MHz; 0 leaves unchanged. */ @@ -6932,6 +7296,8 @@ */ #define MC_CMD_DPCPU_RPC 0xae +#define MC_CMD_0xae_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_DPCPU_RPC_IN msgrequest */ #define MC_CMD_DPCPU_RPC_IN_LEN 36 #define MC_CMD_DPCPU_RPC_IN_CPU_OFST 0 @@ -7016,6 +7382,8 @@ */ #define MC_CMD_TRIGGER_INTERRUPT 0xe3 +#define MC_CMD_0xe3_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_TRIGGER_INTERRUPT_IN msgrequest */ #define MC_CMD_TRIGGER_INTERRUPT_IN_LEN 4 /* Interrupt level relative to base for function. */ @@ -7031,6 +7399,8 @@ */ #define MC_CMD_CAP_BLK_READ 0xe7 +#define MC_CMD_0xe7_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_CAP_BLK_READ_IN msgrequest */ #define MC_CMD_CAP_BLK_READ_IN_LEN 12 #define MC_CMD_CAP_BLK_READ_IN_CAP_REG_OFST 0 @@ -7055,6 +7425,8 @@ */ #define MC_CMD_DUMP_DO 0xe8 +#define MC_CMD_0xe8_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_DUMP_DO_IN msgrequest */ #define MC_CMD_DUMP_DO_IN_LEN 52 #define MC_CMD_DUMP_DO_IN_PADDING_OFST 0 @@ -7108,6 +7480,8 @@ */ #define MC_CMD_DUMP_CONFIGURE_UNSOLICITED 0xe9 +#define MC_CMD_0xe9_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_DUMP_CONFIGURE_UNSOLICITED_IN msgrequest */ #define MC_CMD_DUMP_CONFIGURE_UNSOLICITED_IN_LEN 52 #define MC_CMD_DUMP_CONFIGURE_UNSOLICITED_IN_ENABLE_OFST 0 @@ -7151,6 +7525,8 @@ */ #define MC_CMD_SET_PSU 0xea +#define MC_CMD_0xea_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SET_PSU_IN msgrequest */ #define MC_CMD_SET_PSU_IN_LEN 12 #define MC_CMD_SET_PSU_IN_PARAM_OFST 0 @@ -7171,6 +7547,8 @@ */ #define MC_CMD_GET_FUNCTION_INFO 0xec +#define MC_CMD_0xec_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_FUNCTION_INFO_IN msgrequest */ #define MC_CMD_GET_FUNCTION_INFO_IN_LEN 0 @@ -7188,6 +7566,8 @@ */ #define MC_CMD_ENABLE_OFFLINE_BIST 0xed +#define MC_CMD_0xed_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_ENABLE_OFFLINE_BIST_IN msgrequest */ #define MC_CMD_ENABLE_OFFLINE_BIST_IN_LEN 0 @@ -7203,6 +7583,8 @@ */ #define MC_CMD_UART_SEND_DATA 0xee +#define MC_CMD_0xee_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_UART_SEND_DATA_OUT msgrequest */ #define MC_CMD_UART_SEND_DATA_OUT_LENMIN 16 #define MC_CMD_UART_SEND_DATA_OUT_LENMAX 252 @@ -7231,6 +7613,8 @@ */ #define MC_CMD_UART_RECV_DATA 0xef +#define MC_CMD_0xef_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_UART_RECV_DATA_OUT msgrequest */ #define MC_CMD_UART_RECV_DATA_OUT_LEN 16 /* CRC32 over OFFSET, LENGTH, RESERVED */ @@ -7266,6 +7650,8 @@ */ #define MC_CMD_READ_FUSES 0xf0 +#define MC_CMD_0xf0_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_READ_FUSES_IN msgrequest */ #define MC_CMD_READ_FUSES_IN_LEN 8 /* Offset in OTP to read */ @@ -7292,6 +7678,8 @@ */ #define MC_CMD_KR_TUNE 0xf1 +#define MC_CMD_0xf1_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_KR_TUNE_IN msgrequest */ #define MC_CMD_KR_TUNE_IN_LENMIN 4 #define MC_CMD_KR_TUNE_IN_LENMAX 252 @@ -7550,6 +7938,8 @@ */ #define MC_CMD_PCIE_TUNE 0xf2 +#define MC_CMD_0xf2_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_PCIE_TUNE_IN msgrequest */ #define MC_CMD_PCIE_TUNE_IN_LENMIN 4 #define MC_CMD_PCIE_TUNE_IN_LENMAX 252 @@ -7711,6 +8101,8 @@ */ #define MC_CMD_LICENSING 0xf3 +#define MC_CMD_0xf3_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_LICENSING_IN msgrequest */ #define MC_CMD_LICENSING_IN_LEN 4 /* identifies the type of operation requested */ @@ -7756,6 +8148,8 @@ */ #define MC_CMD_MC2MC_PROXY 0xf4 +#define MC_CMD_0xf4_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_MC2MC_PROXY_IN msgrequest */ #define MC_CMD_MC2MC_PROXY_IN_LEN 0 @@ -7771,6 +8165,8 @@ */ #define MC_CMD_GET_LICENSED_APP_STATE 0xf5 +#define MC_CMD_0xf5_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_GET_LICENSED_APP_STATE_IN msgrequest */ #define MC_CMD_GET_LICENSED_APP_STATE_IN_LEN 4 /* application ID to query (LICENSED_APP_ID_xxx) */ @@ -7792,6 +8188,8 @@ */ #define MC_CMD_LICENSED_APP_OP 0xf6 +#define MC_CMD_0xf6_PRIVILEGE_CTG SRIOV_CTG_GENERAL + /* MC_CMD_LICENSED_APP_OP_IN msgrequest */ #define MC_CMD_LICENSED_APP_OP_IN_LENMIN 8 #define MC_CMD_LICENSED_APP_OP_IN_LENMAX 252 @@ -7847,6 +8245,8 @@ */ #define MC_CMD_SET_PORT_SNIFF_CONFIG 0xf7 +#define MC_CMD_0xf7_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_SET_PORT_SNIFF_CONFIG_IN msgrequest */ #define MC_CMD_SET_PORT_SNIFF_CONFIG_IN_LEN 16 /* configuration flags */ @@ -7881,6 +8281,8 @@ */ #define MC_CMD_GET_PORT_SNIFF_CONFIG 0xf8 +#define MC_CMD_0xf8_PRIVILEGE_CTG SRIOV_CTG_ADMIN + /* MC_CMD_GET_PORT_SNIFF_CONFIG_IN msgrequest */ #define MC_CMD_GET_PORT_SNIFF_CONFIG_IN_LEN 0 diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index fb19b70eac01..9bf04cbce20a 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -865,6 +865,7 @@ int efx_mcdi_set_mac(struct efx_nic *efx) BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0); + /* This has no effect on EF10 */ ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR), efx->net_dev->dev_addr); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 325dd94bca46..a468a22e7a88 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -25,6 +25,7 @@ #include <linux/highmem.h> #include <linux/workqueue.h> #include <linux/mutex.h> +#include <linux/rwsem.h> #include <linux/vmalloc.h> #include <linux/i2c.h> #include <linux/mtd/mtd.h> @@ -793,7 +794,6 @@ union efx_multicast_hash { efx_oword_t oword[EFX_MCAST_HASH_ENTRIES / sizeof(efx_oword_t) / 8]; }; -struct efx_vf; struct vfdi_status; /** @@ -897,7 +897,8 @@ struct vfdi_status; * @loopback_mode: Loopback status * @loopback_modes: Supported loopback mode bitmask * @loopback_selftest: Offline self-test private state - * @filter_lock: Filter table lock + * @filter_sem: Filter table rw_semaphore, for freeing the table + * @filter_lock: Filter table lock, for mere content changes * @filter_state: Architecture-dependent filter table state * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, * indexed by filter ID @@ -909,7 +910,6 @@ struct vfdi_status; * completed (either success or failure). Not used when MCDI is used to * flush receive queues. * @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions. - * @vf: Array of &struct efx_vf objects. * @vf_count: Number of VFs intended to be enabled. * @vf_init_count: Number of VFs that have been fully initialised. * @vi_scale: log2 number of vnics per VF. @@ -1040,6 +1040,7 @@ struct efx_nic { void *loopback_selftest; + struct rw_semaphore filter_sem; spinlock_t filter_lock; void *filter_state; #ifdef CONFIG_RFS_ACCEL @@ -1053,7 +1054,6 @@ struct efx_nic { wait_queue_head_t flush_wq; #ifdef CONFIG_SFC_SRIOV - struct efx_vf *vf; unsigned vf_count; unsigned vf_init_count; unsigned vi_scale; @@ -1092,6 +1092,7 @@ struct efx_mtd_partition { /** * struct efx_nic_type - Efx device type definition + * @mem_bar: Get the memory BAR * @mem_map_size: Get memory BAR mapped size * @probe: Probe the controller * @remove: Free resources allocated by probe() @@ -1204,6 +1205,7 @@ struct efx_mtd_partition { * @ptp_set_ts_config: Set hardware timestamp configuration. The flags * and tx_type will already have been validated but this operation * must validate and update rx_filter. + * @set_mac_address: Set the MAC address of the device * @revision: Hardware architecture revision * @txd_ptr_tbl_base: TX descriptor ring base address * @rxd_ptr_tbl_base: RX descriptor ring base address @@ -1226,6 +1228,8 @@ struct efx_mtd_partition { * @hwtstamp_filters: Mask of hardware timestamp filter types supported */ struct efx_nic_type { + bool is_vf; + unsigned int mem_bar; unsigned int (*mem_map_size)(struct efx_nic *efx); int (*probe)(struct efx_nic *efx); void (*remove)(struct efx_nic *efx); @@ -1277,7 +1281,8 @@ struct efx_nic_type { void (*tx_init)(struct efx_tx_queue *tx_queue); void (*tx_remove)(struct efx_tx_queue *tx_queue); void (*tx_write)(struct efx_tx_queue *tx_queue); - void (*rx_push_rss_config)(struct efx_nic *efx); + int (*rx_push_rss_config)(struct efx_nic *efx, bool user, + const u32 *rx_indir_table); int (*rx_probe)(struct efx_rx_queue *rx_queue); void (*rx_init)(struct efx_rx_queue *rx_queue); void (*rx_remove)(struct efx_rx_queue *rx_queue); @@ -1330,11 +1335,26 @@ struct efx_nic_type { int (*ptp_set_ts_sync_events)(struct efx_nic *efx, bool en, bool temp); int (*ptp_set_ts_config)(struct efx_nic *efx, struct hwtstamp_config *init); + int (*sriov_configure)(struct efx_nic *efx, int num_vfs); int (*sriov_init)(struct efx_nic *efx); void (*sriov_fini)(struct efx_nic *efx); - void (*sriov_mac_address_changed)(struct efx_nic *efx); bool (*sriov_wanted)(struct efx_nic *efx); void (*sriov_reset)(struct efx_nic *efx); + void (*sriov_flr)(struct efx_nic *efx, unsigned vf_i); + int (*sriov_set_vf_mac)(struct efx_nic *efx, int vf_i, u8 *mac); + int (*sriov_set_vf_vlan)(struct efx_nic *efx, int vf_i, u16 vlan, + u8 qos); + int (*sriov_set_vf_spoofchk)(struct efx_nic *efx, int vf_i, + bool spoofchk); + int (*sriov_get_vf_config)(struct efx_nic *efx, int vf_i, + struct ifla_vf_info *ivi); + int (*sriov_set_vf_link_state)(struct efx_nic *efx, int vf_i, + int link_state); + int (*vswitching_probe)(struct efx_nic *efx); + int (*vswitching_restore)(struct efx_nic *efx); + void (*vswitching_remove)(struct efx_nic *efx); + int (*get_mac_address)(struct efx_nic *efx, unsigned char *perm_addr); + int (*set_mac_address)(struct efx_nic *efx); int revision; unsigned int txd_ptr_tbl_base; diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 93d10cbbd1cf..db8562ec586d 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -381,6 +381,7 @@ enum { * @efx: Pointer back to main interface structure * @wol_filter_id: Wake-on-LAN packet filter id * @stats: Hardware statistics + * @vf: Array of &struct siena_vf objects * @vf_buftbl_base: The zeroth buffer table index used to back VF queues. * @vfdi_status: Common VFDI status page to be dmad to VF address space. * @local_addr_list: List of local addresses. Protected by %local_lock. @@ -394,6 +395,7 @@ struct siena_nic_data { int wol_filter_id; u64 stats[SIENA_STAT_COUNT]; #ifdef CONFIG_SFC_SRIOV + struct siena_vf *vf; struct efx_channel *vfdi_channel; unsigned vf_buftbl_base; struct efx_buffer vfdi_status; @@ -483,12 +485,21 @@ enum { * @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC * reboot * @rx_rss_context: Firmware handle for our RSS context + * @rx_rss_context_exclusive: Whether our RSS context is exclusive or shared * @stats: Hardware statistics * @workaround_35388: Flag: firmware supports workaround for bug 35388 * @must_check_datapath_caps: Flag: @datapath_caps needs to be revalidated * after MC reboot * @datapath_caps: Capabilities of datapath firmware (FLAGS1 field of * %MC_CMD_GET_CAPABILITIES response) + * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU + * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU + * @vport_id: The function's vport ID, only relevant for PFs + * @must_probe_vswitching: Flag: vswitching has yet to be setup after MC reboot + * @pf_index: The number for this PF, or the parent PF if this is a VF +#ifdef CONFIG_SFC_SRIOV + * @vf: Pointer to VF data structure +#endif */ struct efx_ef10_nic_data { struct efx_buffer mcdi_buf; @@ -503,126 +514,26 @@ struct efx_ef10_nic_data { unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT]; bool must_restore_piobufs; u32 rx_rss_context; + bool rx_rss_context_exclusive; u64 stats[EF10_STAT_COUNT]; bool workaround_35388; bool must_check_datapath_caps; u32 datapath_caps; -}; - -/* - * On the SFC9000 family each port is associated with 1 PCI physical - * function (PF) handled by sfc and a configurable number of virtual - * functions (VFs) that may be handled by some other driver, often in - * a VM guest. The queue pointer registers are mapped in both PF and - * VF BARs such that an 8K region provides access to a single RX, TX - * and event queue (collectively a Virtual Interface, VI or VNIC). - * - * The PF has access to all 1024 VIs while VFs are mapped to VIs - * according to VI_BASE and VI_SCALE: VF i has access to VIs numbered - * in range [VI_BASE + i << VI_SCALE, VI_BASE + i + 1 << VI_SCALE). - * The number of VIs and the VI_SCALE value are configurable but must - * be established at boot time by firmware. - */ - -/* Maximum VI_SCALE parameter supported by Siena */ -#define EFX_VI_SCALE_MAX 6 -/* Base VI to use for SR-IOV. Must be aligned to (1 << EFX_VI_SCALE_MAX), - * so this is the smallest allowed value. */ -#define EFX_VI_BASE 128U -/* Maximum number of VFs allowed */ -#define EFX_VF_COUNT_MAX 127 -/* Limit EVQs on VFs to be only 8k to reduce buffer table reservation */ -#define EFX_MAX_VF_EVQ_SIZE 8192UL -/* The number of buffer table entries reserved for each VI on a VF */ -#define EFX_VF_BUFTBL_PER_VI \ - ((EFX_MAX_VF_EVQ_SIZE + 2 * EFX_MAX_DMAQ_SIZE) * \ - sizeof(efx_qword_t) / EFX_BUF_SIZE) - + unsigned int rx_dpcpu_fw_id; + unsigned int tx_dpcpu_fw_id; + unsigned int vport_id; + bool must_probe_vswitching; + unsigned int pf_index; #ifdef CONFIG_SFC_SRIOV - -/* SIENA */ -static inline bool efx_siena_sriov_wanted(struct efx_nic *efx) -{ - return efx->vf_count != 0; -} - -static inline bool efx_siena_sriov_enabled(struct efx_nic *efx) -{ - return efx->vf_init_count != 0; -} - -static inline unsigned int efx_vf_size(struct efx_nic *efx) -{ - return 1 << efx->vi_scale; -} + unsigned int vf_index; + struct ef10_vf *vf; +#endif + u8 vport_mac[ETH_ALEN]; +}; int efx_init_sriov(void); -void efx_siena_sriov_probe(struct efx_nic *efx); -int efx_siena_sriov_init(struct efx_nic *efx); -void efx_siena_sriov_mac_address_changed(struct efx_nic *efx); -void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event); -void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event); -void efx_siena_sriov_event(struct efx_channel *channel, efx_qword_t *event); -void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq); -void efx_siena_sriov_flr(struct efx_nic *efx, unsigned flr); -void efx_siena_sriov_reset(struct efx_nic *efx); -void efx_siena_sriov_fini(struct efx_nic *efx); void efx_fini_sriov(void); -/* EF10 */ -static inline bool efx_ef10_sriov_wanted(struct efx_nic *efx) { return false; } -static inline int efx_ef10_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; } -static inline void efx_ef10_sriov_mac_address_changed(struct efx_nic *efx) {} -static inline void efx_ef10_sriov_reset(struct efx_nic *efx) {} -static inline void efx_ef10_sriov_fini(struct efx_nic *efx) {} - -#else - -/* SIENA */ -static inline bool efx_siena_sriov_wanted(struct efx_nic *efx) { return false; } -static inline bool efx_siena_sriov_enabled(struct efx_nic *efx) { return false; } -static inline unsigned int efx_vf_size(struct efx_nic *efx) { return 0; } -static inline int efx_init_sriov(void) { return 0; } -static inline void efx_siena_sriov_probe(struct efx_nic *efx) {} -static inline int efx_siena_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; } -static inline void efx_siena_sriov_mac_address_changed(struct efx_nic *efx) {} -static inline void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, - efx_qword_t *event) {} -static inline void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, - efx_qword_t *event) {} -static inline void efx_siena_sriov_event(struct efx_channel *channel, - efx_qword_t *event) {} -static inline void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, - unsigned dmaq) {} -static inline void efx_siena_sriov_flr(struct efx_nic *efx, unsigned flr) {} -static inline void efx_siena_sriov_reset(struct efx_nic *efx) {} -static inline void efx_siena_sriov_fini(struct efx_nic *efx) {} -static inline void efx_fini_sriov(void) {} - -/* EF10 */ -static inline bool efx_ef10_sriov_wanted(struct efx_nic *efx) { return false; } -static inline int efx_ef10_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; } -static inline void efx_ef10_sriov_mac_address_changed(struct efx_nic *efx) {} -static inline void efx_ef10_sriov_reset(struct efx_nic *efx) {} -static inline void efx_ef10_sriov_fini(struct efx_nic *efx) {} - -#endif - -/* FALCON */ -static inline bool efx_falcon_sriov_wanted(struct efx_nic *efx) { return false; } -static inline int efx_falcon_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; } -static inline void efx_falcon_sriov_mac_address_changed(struct efx_nic *efx) {} -static inline void efx_falcon_sriov_reset(struct efx_nic *efx) {} -static inline void efx_falcon_sriov_fini(struct efx_nic *efx) {} - -int efx_siena_sriov_set_vf_mac(struct net_device *dev, int vf, u8 *mac); -int efx_siena_sriov_set_vf_vlan(struct net_device *dev, int vf, - u16 vlan, u8 qos); -int efx_siena_sriov_get_vf_config(struct net_device *dev, int vf, - struct ifla_vf_info *ivf); -int efx_siena_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf, - bool spoofchk); - struct ethtool_ts_info; int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel); void efx_ptp_defer_probe_with_channel(struct efx_nic *efx); @@ -654,6 +565,7 @@ extern const struct efx_nic_type falcon_a1_nic_type; extern const struct efx_nic_type falcon_b0_nic_type; extern const struct efx_nic_type siena_a0_nic_type; extern const struct efx_nic_type efx_hunt_a0_nic_type; +extern const struct efx_nic_type efx_hunt_a0_vf_nic_type; /************************************************************************** * diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index a2e9aee05cdd..ad62615a93dc 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -306,7 +306,7 @@ struct efx_ptp_data { struct work_struct pps_work; struct workqueue_struct *pps_workwq; bool nic_ts_enabled; - MCDI_DECLARE_BUF(txbuf, MC_CMD_PTP_IN_TRANSMIT_LENMAX); + _MCDI_DECLARE_BUF(txbuf, MC_CMD_PTP_IN_TRANSMIT_LENMAX); unsigned int good_syncs; unsigned int fast_syncs; @@ -389,11 +389,8 @@ size_t efx_ptp_update_stats(struct efx_nic *efx, u64 *stats) MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), NULL); - if (rc) { - netif_err(efx, hw, efx->net_dev, - "MC_CMD_PTP_OP_STATUS failed (%d)\n", rc); + if (rc) memset(outbuf, 0, sizeof(outbuf)); - } efx_nic_update_stats(efx_ptp_stat_desc, PTP_STAT_COUNT, efx_ptp_stat_mask, stats, _MCDI_PTR(outbuf, 0), false); @@ -490,14 +487,20 @@ static int efx_ptp_get_attributes(struct efx_nic *efx) */ MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_GET_ATTRIBUTES); MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); - rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &out_len); - if (rc == 0) + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &out_len); + if (rc == 0) { fmt = MCDI_DWORD(outbuf, PTP_OUT_GET_ATTRIBUTES_TIME_FORMAT); - else if (rc == -EINVAL) + } else if (rc == -EINVAL) { fmt = MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS; - else + } else if (rc == -EPERM) { + netif_info(efx, probe, efx->net_dev, "no PTP support\n"); + return rc; + } else { + efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), + outbuf, sizeof(outbuf), rc); return rc; + } if (fmt == MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_27FRACTION) { ptp->ns_to_nic_time = efx_ptp_ns_to_s27; @@ -541,8 +544,8 @@ static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx) MC_CMD_PTP_OP_GET_TIMESTAMP_CORRECTIONS); MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); - rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), NULL); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), NULL); if (rc == 0) { efx->ptp_data->ts_corrections.tx = MCDI_DWORD(outbuf, PTP_OUT_GET_TIMESTAMP_CORRECTIONS_TRANSMIT); @@ -558,6 +561,8 @@ static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx) efx->ptp_data->ts_corrections.pps_out = 0; efx->ptp_data->ts_corrections.pps_in = 0; } else { + efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), outbuf, + sizeof(outbuf), rc); return rc; } @@ -568,7 +573,7 @@ static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx) static int efx_ptp_enable(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_ENABLE_LEN); - MCDI_DECLARE_BUF_OUT_OR_ERR(outbuf, 0); + MCDI_DECLARE_BUF_ERR(outbuf); int rc; MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_ENABLE); @@ -596,7 +601,7 @@ static int efx_ptp_enable(struct efx_nic *efx) static int efx_ptp_disable(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_DISABLE_LEN); - MCDI_DECLARE_BUF_OUT_OR_ERR(outbuf, 0); + MCDI_DECLARE_BUF_ERR(outbuf); int rc; MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_DISABLE); @@ -604,7 +609,12 @@ static int efx_ptp_disable(struct efx_nic *efx) rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), NULL); rc = (rc == -EALREADY) ? 0 : rc; - if (rc) + /* If we get ENOSYS, the NIC doesn't support PTP, and thus this function + * should only have been called during probe. + */ + if (rc == -ENOSYS || rc == -EPERM) + netif_info(efx, probe, efx->net_dev, "no PTP support\n"); + else if (rc) efx_mcdi_display_error(efx, MC_CMD_PTP, MC_CMD_PTP_IN_DISABLE_LEN, outbuf, sizeof(outbuf), rc); diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index f12c811938d2..b323b9167526 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -25,6 +25,7 @@ #include "mcdi.h" #include "mcdi_pcol.h" #include "selftest.h" +#include "siena_sriov.h" /* Hardware control for SFC9000 family including SFL9021 (aka Siena). */ @@ -306,7 +307,9 @@ static int siena_probe_nic(struct efx_nic *efx) if (rc) goto fail5; +#ifdef CONFIG_SFC_SRIOV efx_siena_sriov_probe(efx); +#endif efx_ptp_defer_probe_with_channel(efx); return 0; @@ -321,7 +324,8 @@ fail1: return rc; } -static void siena_rx_push_rss_config(struct efx_nic *efx) +static int siena_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table) { efx_oword_t temp; @@ -343,7 +347,11 @@ static void siena_rx_push_rss_config(struct efx_nic *efx) FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8); efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3); + memcpy(efx->rx_indir_table, rx_indir_table, + sizeof(efx->rx_indir_table)); efx_farch_rx_push_indir_table(efx); + + return 0; } /* This call performs hardware-specific global initialisation, such as @@ -386,7 +394,7 @@ static int siena_init_nic(struct efx_nic *efx) EFX_RX_USR_BUF_SIZE >> 5); efx_writeo(efx, &temp, FR_AZ_RX_CFG); - siena_rx_push_rss_config(efx); + siena_rx_push_rss_config(efx, false, efx->rx_indir_table); /* Enable event logging */ rc = efx_mcdi_log_ctrl(efx, true, false, 0); @@ -909,6 +917,8 @@ fail: */ const struct efx_nic_type siena_a0_nic_type = { + .is_vf = false, + .mem_bar = EFX_MEM_BAR, .mem_map_size = siena_mem_map_size, .probe = siena_probe_nic, .remove = siena_remove_nic, @@ -996,11 +1006,22 @@ const struct efx_nic_type siena_a0_nic_type = { #endif .ptp_write_host_time = siena_ptp_write_host_time, .ptp_set_ts_config = siena_ptp_set_ts_config, +#ifdef CONFIG_SFC_SRIOV + .sriov_configure = efx_siena_sriov_configure, .sriov_init = efx_siena_sriov_init, .sriov_fini = efx_siena_sriov_fini, - .sriov_mac_address_changed = efx_siena_sriov_mac_address_changed, .sriov_wanted = efx_siena_sriov_wanted, .sriov_reset = efx_siena_sriov_reset, + .sriov_flr = efx_siena_sriov_flr, + .sriov_set_vf_mac = efx_siena_sriov_set_vf_mac, + .sriov_set_vf_vlan = efx_siena_sriov_set_vf_vlan, + .sriov_set_vf_spoofchk = efx_siena_sriov_set_vf_spoofchk, + .sriov_get_vf_config = efx_siena_sriov_get_vf_config, + .vswitching_probe = efx_port_dummy_op_int, + .vswitching_restore = efx_port_dummy_op_int, + .vswitching_remove = efx_port_dummy_op_void, + .set_mac_address = efx_siena_sriov_mac_address_changed, +#endif .revision = EFX_REV_SIENA_A0, .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL, diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index fe83430796fd..da7b94f34604 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -16,6 +16,7 @@ #include "filter.h" #include "mcdi_pcol.h" #include "farch_regs.h" +#include "siena_sriov.h" #include "vfdi.h" /* Number of longs required to track all the VIs in a VF */ @@ -38,7 +39,7 @@ enum efx_vf_tx_filter_mode { }; /** - * struct efx_vf - Back-end resource and protocol state for a PCI VF + * struct siena_vf - Back-end resource and protocol state for a PCI VF * @efx: The Efx NIC owning this VF * @pci_rid: The PCI requester ID for this VF * @pci_name: The PCI name (formatted address) of this VF @@ -83,7 +84,7 @@ enum efx_vf_tx_filter_mode { * @rxq_retry_count: Number of receive queues in @rxq_retry_mask. * @reset_work: Work item to schedule a VF reset. */ -struct efx_vf { +struct siena_vf { struct efx_nic *efx; unsigned int pci_rid; char pci_name[13]; /* dddd:bb:dd.f */ @@ -189,7 +190,7 @@ MODULE_PARM_DESC(max_vfs, */ static struct workqueue_struct *vfdi_workqueue; -static unsigned abs_index(struct efx_vf *vf, unsigned index) +static unsigned abs_index(struct siena_vf *vf, unsigned index) { return EFX_VI_BASE + vf->index * efx_vf_size(vf->efx) + index; } @@ -207,8 +208,8 @@ static int efx_siena_sriov_cmd(struct efx_nic *efx, bool enable, MCDI_SET_DWORD(inbuf, SRIOV_IN_VI_BASE, EFX_VI_BASE); MCDI_SET_DWORD(inbuf, SRIOV_IN_VF_COUNT, efx->vf_count); - rc = efx_mcdi_rpc(efx, MC_CMD_SRIOV, inbuf, MC_CMD_SRIOV_IN_LEN, - outbuf, MC_CMD_SRIOV_OUT_LEN, &outlen); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_SRIOV, inbuf, MC_CMD_SRIOV_IN_LEN, + outbuf, MC_CMD_SRIOV_OUT_LEN, &outlen); if (rc) return rc; if (outlen < MC_CMD_SRIOV_OUT_LEN) @@ -299,7 +300,7 @@ out: /* The TX filter is entirely controlled by this driver, and is modified * underneath the feet of the VF */ -static void efx_siena_sriov_reset_tx_filter(struct efx_vf *vf) +static void efx_siena_sriov_reset_tx_filter(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct efx_filter_spec filter; @@ -343,7 +344,7 @@ static void efx_siena_sriov_reset_tx_filter(struct efx_vf *vf) } /* The RX filter is managed here on behalf of the VF driver */ -static void efx_siena_sriov_reset_rx_filter(struct efx_vf *vf) +static void efx_siena_sriov_reset_rx_filter(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct efx_filter_spec filter; @@ -382,7 +383,7 @@ static void efx_siena_sriov_reset_rx_filter(struct efx_vf *vf) } } -static void __efx_siena_sriov_update_vf_addr(struct efx_vf *vf) +static void __efx_siena_sriov_update_vf_addr(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct siena_nic_data *nic_data = efx->nic_data; @@ -397,7 +398,7 @@ static void __efx_siena_sriov_update_vf_addr(struct efx_vf *vf) * local_page_list, either by acquiring local_lock or by running from * efx_siena_sriov_peer_work() */ -static void __efx_siena_sriov_push_vf_status(struct efx_vf *vf) +static void __efx_siena_sriov_push_vf_status(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct siena_nic_data *nic_data = efx->nic_data; @@ -509,8 +510,9 @@ static bool bad_buf_count(unsigned buf_count, unsigned max_entry_count) * Optionally set VF index and VI index within the VF. */ static bool map_vi_index(struct efx_nic *efx, unsigned abs_index, - struct efx_vf **vf_out, unsigned *rel_index_out) + struct siena_vf **vf_out, unsigned *rel_index_out) { + struct siena_nic_data *nic_data = efx->nic_data; unsigned vf_i; if (abs_index < EFX_VI_BASE) @@ -520,13 +522,13 @@ static bool map_vi_index(struct efx_nic *efx, unsigned abs_index, return true; if (vf_out) - *vf_out = efx->vf + vf_i; + *vf_out = nic_data->vf + vf_i; if (rel_index_out) *rel_index_out = abs_index % efx_vf_size(efx); return false; } -static int efx_vfdi_init_evq(struct efx_vf *vf) +static int efx_vfdi_init_evq(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct vfdi_req *req = vf->buf.addr; @@ -567,7 +569,7 @@ static int efx_vfdi_init_evq(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -static int efx_vfdi_init_rxq(struct efx_vf *vf) +static int efx_vfdi_init_rxq(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct vfdi_req *req = vf->buf.addr; @@ -608,7 +610,7 @@ static int efx_vfdi_init_rxq(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -static int efx_vfdi_init_txq(struct efx_vf *vf) +static int efx_vfdi_init_txq(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct vfdi_req *req = vf->buf.addr; @@ -655,7 +657,7 @@ static int efx_vfdi_init_txq(struct efx_vf *vf) } /* Returns true when efx_vfdi_fini_all_queues should wake */ -static bool efx_vfdi_flush_wake(struct efx_vf *vf) +static bool efx_vfdi_flush_wake(struct siena_vf *vf) { /* Ensure that all updates are visible to efx_vfdi_fini_all_queues() */ smp_mb(); @@ -664,7 +666,7 @@ static bool efx_vfdi_flush_wake(struct efx_vf *vf) atomic_read(&vf->rxq_retry_count); } -static void efx_vfdi_flush_clear(struct efx_vf *vf) +static void efx_vfdi_flush_clear(struct siena_vf *vf) { memset(vf->txq_mask, 0, sizeof(vf->txq_mask)); vf->txq_count = 0; @@ -674,7 +676,7 @@ static void efx_vfdi_flush_clear(struct efx_vf *vf) atomic_set(&vf->rxq_retry_count, 0); } -static int efx_vfdi_fini_all_queues(struct efx_vf *vf) +static int efx_vfdi_fini_all_queues(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; efx_oword_t reg; @@ -757,7 +759,7 @@ static int efx_vfdi_fini_all_queues(struct efx_vf *vf) return timeout ? 0 : VFDI_RC_ETIMEDOUT; } -static int efx_vfdi_insert_filter(struct efx_vf *vf) +static int efx_vfdi_insert_filter(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct siena_nic_data *nic_data = efx->nic_data; @@ -789,7 +791,7 @@ static int efx_vfdi_insert_filter(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -static int efx_vfdi_remove_all_filters(struct efx_vf *vf) +static int efx_vfdi_remove_all_filters(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct siena_nic_data *nic_data = efx->nic_data; @@ -801,7 +803,7 @@ static int efx_vfdi_remove_all_filters(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -static int efx_vfdi_set_status_page(struct efx_vf *vf) +static int efx_vfdi_set_status_page(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct siena_nic_data *nic_data = efx->nic_data; @@ -846,7 +848,7 @@ static int efx_vfdi_set_status_page(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -static int efx_vfdi_clear_status_page(struct efx_vf *vf) +static int efx_vfdi_clear_status_page(struct siena_vf *vf) { mutex_lock(&vf->status_lock); vf->status_addr = 0; @@ -855,7 +857,7 @@ static int efx_vfdi_clear_status_page(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -typedef int (*efx_vfdi_op_t)(struct efx_vf *vf); +typedef int (*efx_vfdi_op_t)(struct siena_vf *vf); static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = { [VFDI_OP_INIT_EVQ] = efx_vfdi_init_evq, @@ -870,7 +872,7 @@ static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = { static void efx_siena_sriov_vfdi(struct work_struct *work) { - struct efx_vf *vf = container_of(work, struct efx_vf, req); + struct siena_vf *vf = container_of(work, struct siena_vf, req); struct efx_nic *efx = vf->efx; struct vfdi_req *req = vf->buf.addr; struct efx_memcpy_req copy[2]; @@ -936,7 +938,8 @@ static void efx_siena_sriov_vfdi(struct work_struct *work) * event ring in guest memory with VFDI reset events, then (re-initialise) the * event queue to raise an interrupt. The guest driver will then recover. */ -static void efx_siena_sriov_reset_vf(struct efx_vf *vf, + +static void efx_siena_sriov_reset_vf(struct siena_vf *vf, struct efx_buffer *buffer) { struct efx_nic *efx = vf->efx; @@ -1006,7 +1009,7 @@ static void efx_siena_sriov_reset_vf(struct efx_vf *vf, static void efx_siena_sriov_reset_vf_work(struct work_struct *work) { - struct efx_vf *vf = container_of(work, struct efx_vf, req); + struct siena_vf *vf = container_of(work, struct siena_vf, req); struct efx_nic *efx = vf->efx; struct efx_buffer buf; @@ -1055,8 +1058,10 @@ void efx_siena_sriov_probe(struct efx_nic *efx) if (!max_vfs) return; - if (efx_siena_sriov_cmd(efx, false, &efx->vi_scale, &count)) + if (efx_siena_sriov_cmd(efx, false, &efx->vi_scale, &count)) { + netif_info(efx, probe, efx->net_dev, "no SR-IOV VFs probed\n"); return; + } if (count > 0 && count > max_vfs) count = max_vfs; @@ -1077,7 +1082,7 @@ static void efx_siena_sriov_peer_work(struct work_struct *data) peer_work); struct efx_nic *efx = nic_data->efx; struct vfdi_status *vfdi_status = nic_data->vfdi_status.addr; - struct efx_vf *vf; + struct siena_vf *vf; struct efx_local_addr *local_addr; struct vfdi_endpoint *peer; struct efx_endpoint_page *epp; @@ -1099,7 +1104,7 @@ static void efx_siena_sriov_peer_work(struct work_struct *data) peer_space = ARRAY_SIZE(vfdi_status->peers) - 1; peer_count = 1; for (pos = 0; pos < efx->vf_count; ++pos) { - vf = efx->vf + pos; + vf = nic_data->vf + pos; mutex_lock(&vf->status_lock); if (vf->rx_filtering && !is_zero_ether_addr(vf->addr.mac_addr)) { @@ -1155,7 +1160,7 @@ static void efx_siena_sriov_peer_work(struct work_struct *data) /* Finally, push the pages */ for (pos = 0; pos < efx->vf_count; ++pos) { - vf = efx->vf + pos; + vf = nic_data->vf + pos; mutex_lock(&vf->status_lock); if (vf->status_addr) @@ -1190,14 +1195,16 @@ static void efx_siena_sriov_free_local(struct efx_nic *efx) static int efx_siena_sriov_vf_alloc(struct efx_nic *efx) { unsigned index; - struct efx_vf *vf; + struct siena_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; - efx->vf = kzalloc(sizeof(struct efx_vf) * efx->vf_count, GFP_KERNEL); - if (!efx->vf) + nic_data->vf = kcalloc(efx->vf_count, sizeof(*nic_data->vf), + GFP_KERNEL); + if (!nic_data->vf) return -ENOMEM; for (index = 0; index < efx->vf_count; ++index) { - vf = efx->vf + index; + vf = nic_data->vf + index; vf->efx = efx; vf->index = index; @@ -1216,11 +1223,12 @@ static int efx_siena_sriov_vf_alloc(struct efx_nic *efx) static void efx_siena_sriov_vfs_fini(struct efx_nic *efx) { - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; unsigned int pos; for (pos = 0; pos < efx->vf_count; ++pos) { - vf = efx->vf + pos; + vf = nic_data->vf + pos; efx_nic_free_buffer(efx, &vf->buf); kfree(vf->peer_page_addrs); @@ -1237,7 +1245,7 @@ static int efx_siena_sriov_vfs_init(struct efx_nic *efx) struct siena_nic_data *nic_data = efx->nic_data; unsigned index, devfn, sriov, buftbl_base; u16 offset, stride; - struct efx_vf *vf; + struct siena_vf *vf; int rc; sriov = pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV); @@ -1250,7 +1258,7 @@ static int efx_siena_sriov_vfs_init(struct efx_nic *efx) buftbl_base = nic_data->vf_buftbl_base; devfn = pci_dev->devfn + offset; for (index = 0; index < efx->vf_count; ++index) { - vf = efx->vf + index; + vf = nic_data->vf + index; /* Reserve buffer entries */ vf->buftbl_base = buftbl_base; @@ -1350,7 +1358,7 @@ fail_pci: fail_vfs: cancel_work_sync(&nic_data->peer_work); efx_siena_sriov_free_local(efx); - kfree(efx->vf); + kfree(nic_data->vf); fail_alloc: efx_nic_free_buffer(efx, &nic_data->vfdi_status); fail_status: @@ -1361,7 +1369,7 @@ fail_cmd: void efx_siena_sriov_fini(struct efx_nic *efx) { - struct efx_vf *vf; + struct siena_vf *vf; unsigned int pos; struct siena_nic_data *nic_data = efx->nic_data; @@ -1377,7 +1385,7 @@ void efx_siena_sriov_fini(struct efx_nic *efx) /* Flush all reconfiguration work */ for (pos = 0; pos < efx->vf_count; ++pos) { - vf = efx->vf + pos; + vf = nic_data->vf + pos; cancel_work_sync(&vf->req); cancel_work_sync(&vf->reset_work); } @@ -1388,7 +1396,7 @@ void efx_siena_sriov_fini(struct efx_nic *efx) /* Tear down back-end state */ efx_siena_sriov_vfs_fini(efx); efx_siena_sriov_free_local(efx); - kfree(efx->vf); + kfree(nic_data->vf); efx_nic_free_buffer(efx, &nic_data->vfdi_status); efx_siena_sriov_cmd(efx, false, NULL, NULL); } @@ -1396,7 +1404,7 @@ void efx_siena_sriov_fini(struct efx_nic *efx) void efx_siena_sriov_event(struct efx_channel *channel, efx_qword_t *event) { struct efx_nic *efx = channel->efx; - struct efx_vf *vf; + struct siena_vf *vf; unsigned qid, seq, type, data; qid = EFX_QWORD_FIELD(*event, FSF_CZ_USER_QID); @@ -1452,11 +1460,12 @@ error: void efx_siena_sriov_flr(struct efx_nic *efx, unsigned vf_i) { - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; if (vf_i > efx->vf_init_count) return; - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; netif_info(efx, hw, efx->net_dev, "FLR on VF %s\n", vf->pci_name); @@ -1467,21 +1476,23 @@ void efx_siena_sriov_flr(struct efx_nic *efx, unsigned vf_i) vf->evq0_count = 0; } -void efx_siena_sriov_mac_address_changed(struct efx_nic *efx) +int efx_siena_sriov_mac_address_changed(struct efx_nic *efx) { struct siena_nic_data *nic_data = efx->nic_data; struct vfdi_status *vfdi_status = nic_data->vfdi_status.addr; if (!efx->vf_init_count) - return; + return 0; ether_addr_copy(vfdi_status->peers[0].mac_addr, efx->net_dev->dev_addr); queue_work(vfdi_workqueue, &nic_data->peer_work); + + return 0; } void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event) { - struct efx_vf *vf; + struct siena_vf *vf; unsigned queue, qid; queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA); @@ -1500,7 +1511,7 @@ void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event) void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event) { - struct efx_vf *vf; + struct siena_vf *vf; unsigned ev_failed, queue, qid; queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID); @@ -1525,7 +1536,7 @@ void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event) /* Called from napi. Schedule the reset work item */ void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq) { - struct efx_vf *vf; + struct siena_vf *vf; unsigned int rel; if (map_vi_index(efx, dmaq, &vf, &rel)) @@ -1541,9 +1552,10 @@ void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq) /* Reset all VFs */ void efx_siena_sriov_reset(struct efx_nic *efx) { + struct siena_nic_data *nic_data = efx->nic_data; unsigned int vf_i; struct efx_buffer buf; - struct efx_vf *vf; + struct siena_vf *vf; ASSERT_RTNL(); @@ -1557,7 +1569,7 @@ void efx_siena_sriov_reset(struct efx_nic *efx) return; for (vf_i = 0; vf_i < efx->vf_init_count; ++vf_i) { - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; efx_siena_sriov_reset_vf(vf, &buf); } @@ -1573,7 +1585,6 @@ int efx_init_sriov(void) vfdi_workqueue = create_singlethread_workqueue("sfc_vfdi"); if (!vfdi_workqueue) return -ENOMEM; - return 0; } @@ -1582,14 +1593,14 @@ void efx_fini_sriov(void) destroy_workqueue(vfdi_workqueue); } -int efx_siena_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) +int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac) { - struct efx_nic *efx = netdev_priv(net_dev); - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; if (vf_i >= efx->vf_init_count) return -EINVAL; - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; mutex_lock(&vf->status_lock); ether_addr_copy(vf->addr.mac_addr, mac); @@ -1599,16 +1610,16 @@ int efx_siena_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) return 0; } -int efx_siena_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, +int efx_siena_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, u16 vlan, u8 qos) { - struct efx_nic *efx = netdev_priv(net_dev); - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; u16 tci; if (vf_i >= efx->vf_init_count) return -EINVAL; - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; mutex_lock(&vf->status_lock); tci = (vlan & VLAN_VID_MASK) | ((qos & 0x7) << VLAN_PRIO_SHIFT); @@ -1619,16 +1630,16 @@ int efx_siena_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, return 0; } -int efx_siena_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, +int efx_siena_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf_i, bool spoofchk) { - struct efx_nic *efx = netdev_priv(net_dev); - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; int rc; if (vf_i >= efx->vf_init_count) return -EINVAL; - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; mutex_lock(&vf->txq_lock); if (vf->txq_count == 0) { @@ -1643,16 +1654,16 @@ int efx_siena_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, return rc; } -int efx_siena_sriov_get_vf_config(struct net_device *net_dev, int vf_i, +int efx_siena_sriov_get_vf_config(struct efx_nic *efx, int vf_i, struct ifla_vf_info *ivi) { - struct efx_nic *efx = netdev_priv(net_dev); - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; u16 tci; if (vf_i >= efx->vf_init_count) return -EINVAL; - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; ivi->vf = vf_i; ether_addr_copy(ivi->mac, vf->addr.mac_addr); @@ -1666,3 +1677,12 @@ int efx_siena_sriov_get_vf_config(struct net_device *net_dev, int vf_i, return 0; } +bool efx_siena_sriov_wanted(struct efx_nic *efx) +{ + return efx->vf_count != 0; +} + +int efx_siena_sriov_configure(struct efx_nic *efx, int num_vfs) +{ + return 0; +} diff --git a/drivers/net/ethernet/sfc/siena_sriov.h b/drivers/net/ethernet/sfc/siena_sriov.h new file mode 100644 index 000000000000..d88d4dab170a --- /dev/null +++ b/drivers/net/ethernet/sfc/siena_sriov.h @@ -0,0 +1,79 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef SIENA_SRIOV_H +#define SIENA_SRIOV_H + +#include "net_driver.h" + +/* On the SFC9000 family each port is associated with 1 PCI physical + * function (PF) handled by sfc and a configurable number of virtual + * functions (VFs) that may be handled by some other driver, often in + * a VM guest. The queue pointer registers are mapped in both PF and + * VF BARs such that an 8K region provides access to a single RX, TX + * and event queue (collectively a Virtual Interface, VI or VNIC). + * + * The PF has access to all 1024 VIs while VFs are mapped to VIs + * according to VI_BASE and VI_SCALE: VF i has access to VIs numbered + * in range [VI_BASE + i << VI_SCALE, VI_BASE + i + 1 << VI_SCALE). + * The number of VIs and the VI_SCALE value are configurable but must + * be established at boot time by firmware. + */ + +/* Maximum VI_SCALE parameter supported by Siena */ +#define EFX_VI_SCALE_MAX 6 +/* Base VI to use for SR-IOV. Must be aligned to (1 << EFX_VI_SCALE_MAX), + * so this is the smallest allowed value. + */ +#define EFX_VI_BASE 128U +/* Maximum number of VFs allowed */ +#define EFX_VF_COUNT_MAX 127 +/* Limit EVQs on VFs to be only 8k to reduce buffer table reservation */ +#define EFX_MAX_VF_EVQ_SIZE 8192UL +/* The number of buffer table entries reserved for each VI on a VF */ +#define EFX_VF_BUFTBL_PER_VI \ + ((EFX_MAX_VF_EVQ_SIZE + 2 * EFX_MAX_DMAQ_SIZE) * \ + sizeof(efx_qword_t) / EFX_BUF_SIZE) + +int efx_siena_sriov_configure(struct efx_nic *efx, int num_vfs); +int efx_siena_sriov_init(struct efx_nic *efx); +void efx_siena_sriov_fini(struct efx_nic *efx); +int efx_siena_sriov_mac_address_changed(struct efx_nic *efx); +bool efx_siena_sriov_wanted(struct efx_nic *efx); +void efx_siena_sriov_reset(struct efx_nic *efx); +void efx_siena_sriov_flr(struct efx_nic *efx, unsigned flr); + +int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf, u8 *mac); +int efx_siena_sriov_set_vf_vlan(struct efx_nic *efx, int vf, + u16 vlan, u8 qos); +int efx_siena_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf, + bool spoofchk); +int efx_siena_sriov_get_vf_config(struct efx_nic *efx, int vf, + struct ifla_vf_info *ivf); + +#ifdef CONFIG_SFC_SRIOV + +static inline bool efx_siena_sriov_enabled(struct efx_nic *efx) +{ + return efx->vf_init_count != 0; +} +#else /* !CONFIG_SFC_SRIOV */ +static inline bool efx_siena_sriov_enabled(struct efx_nic *efx) +{ + return false; +} +#endif /* CONFIG_SFC_SRIOV */ + +void efx_siena_sriov_probe(struct efx_nic *efx); +void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event); +void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event); +void efx_siena_sriov_event(struct efx_channel *channel, efx_qword_t *event); +void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq); + +#endif /* SIENA_SRIOV_H */ diff --git a/drivers/net/ethernet/sfc/sriov.c b/drivers/net/ethernet/sfc/sriov.c new file mode 100644 index 000000000000..6c5edbdbfa27 --- /dev/null +++ b/drivers/net/ethernet/sfc/sriov.c @@ -0,0 +1,72 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2014-2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#include <linux/module.h> +#include "net_driver.h" +#include "nic.h" +#include "sriov.h" + +int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_set_vf_mac) + return efx->type->sriov_set_vf_mac(efx, vf_i, mac); + else + return -EOPNOTSUPP; +} + +int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, + u8 qos) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_set_vf_vlan) { + if ((vlan & ~VLAN_VID_MASK) || + (qos & ~(VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT))) + return -EINVAL; + + return efx->type->sriov_set_vf_vlan(efx, vf_i, vlan, qos); + } else { + return -EOPNOTSUPP; + } +} + +int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, + bool spoofchk) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_set_vf_spoofchk) + return efx->type->sriov_set_vf_spoofchk(efx, vf_i, spoofchk); + else + return -EOPNOTSUPP; +} + +int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, + struct ifla_vf_info *ivi) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_get_vf_config) + return efx->type->sriov_get_vf_config(efx, vf_i, ivi); + else + return -EOPNOTSUPP; +} + +int efx_sriov_set_vf_link_state(struct net_device *net_dev, int vf_i, + int link_state) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_set_vf_link_state) + return efx->type->sriov_set_vf_link_state(efx, vf_i, + link_state); + else + return -EOPNOTSUPP; +} diff --git a/drivers/net/ethernet/sfc/sriov.h b/drivers/net/ethernet/sfc/sriov.h new file mode 100644 index 000000000000..3be15a54c562 --- /dev/null +++ b/drivers/net/ethernet/sfc/sriov.h @@ -0,0 +1,29 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2014-2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_SRIOV_H +#define EFX_SRIOV_H + +#include "net_driver.h" + +#ifdef CONFIG_SFC_SRIOV + +int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac); +int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, + u8 qos); +int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, + bool spoofchk); +int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, + struct ifla_vf_info *ivi); +int efx_sriov_set_vf_link_state(struct net_device *net_dev, int vf_i, + int link_state); + +#endif /* CONFIG_SFC_SRIOV */ + +#endif /* EFX_SRIOV_H */ diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 7d3af190be55..731e0453a7d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -26,6 +26,82 @@ config STMMAC_PLATFORM If unsure, say N. +if STMMAC_PLATFORM + +config DWMAC_GENERIC + tristate "Generic driver for DWMAC" + default STMMAC_PLATFORM + ---help--- + Generic DWMAC driver for platforms that don't require any + platform specific code to function or is using platform + data for setup. + +config DWMAC_LPC18XX + tristate "NXP LPC18xx/43xx DWMAC support" + default ARCH_LPC18XX + depends on OF + select MFD_SYSCON + ---help--- + Support for NXP LPC18xx/43xx DWMAC Ethernet. + +config DWMAC_MESON + tristate "Amlogic Meson dwmac support" + default ARCH_MESON + depends on OF + help + Support for Ethernet controller on Amlogic Meson SoCs. + + This selects the Amlogic Meson SoC glue layer support for + the stmmac device driver. This driver is used for Meson6 and + Meson8 SoCs. + +config DWMAC_ROCKCHIP + tristate "Rockchip dwmac support" + default ARCH_ROCKCHIP + depends on OF + select MFD_SYSCON + help + Support for Ethernet controller on Rockchip RK3288 SoC. + + This selects the Rockchip RK3288 SoC glue layer support for + the stmmac device driver. + +config DWMAC_SOCFPGA + tristate "SOCFPGA dwmac support" + default ARCH_SOCFPGA + depends on OF + select MFD_SYSCON + help + Support for ethernet controller on Altera SOCFPGA + + This selects the Altera SOCFPGA SoC glue layer support + for the stmmac device driver. This driver is used for + arria5 and cyclone5 FPGA SoCs. + +config DWMAC_STI + tristate "STi GMAC support" + default ARCH_STI + depends on OF + select MFD_SYSCON + ---help--- + Support for ethernet controller on STi SOCs. + + This selects STi SoC glue layer support for the stmmac + device driver. This driver is used on for the STi series + SOCs GMAC ethernet controller. + +config DWMAC_SUNXI + tristate "Allwinner GMAC support" + default ARCH_SUNXI + depends on OF + ---help--- + Support for Allwinner A20/A31 GMAC ethernet controllers. + + This selects Allwinner SoC glue layer support for the + stmmac device driver. This driver is used for A20/A31 + GMAC ethernet controller. +endif + config STMMAC_PCI tristate "STMMAC PCI bus support" depends on STMMAC_ETH && PCI diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 73c2715a27f3..92e714a48367 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -4,9 +4,16 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o \ mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o $(stmmac-y) -obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o -stmmac-platform-objs:= stmmac_platform.o dwmac-meson.o dwmac-sunxi.o \ - dwmac-sti.o dwmac-socfpga.o dwmac-rk.o +# Ordering matters. Generic driver must be last. +obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o +obj-$(CONFIG_DWMAC_LPC18XX) += dwmac-lpc18xx.o +obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o +obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o +obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-socfpga.o +obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o +obj-$(CONFIG_DWMAC_SUNXI) += dwmac-sunxi.o +obj-$(CONFIG_DWMAC_GENERIC) += dwmac-generic.o +stmmac-platform-objs:= stmmac_platform.o obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o stmmac-pci-objs:= stmmac_pci.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c new file mode 100644 index 000000000000..e817a1a44379 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -0,0 +1,41 @@ +/* + * Generic DWMAC platform driver + * + * Copyright (C) 2007-2011 STMicroelectronics Ltd + * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> + +#include "stmmac.h" +#include "stmmac_platform.h" + +static const struct of_device_id dwmac_generic_match[] = { + { .compatible = "st,spear600-gmac"}, + { .compatible = "snps,dwmac-3.610"}, + { .compatible = "snps,dwmac-3.70a"}, + { .compatible = "snps,dwmac-3.710"}, + { .compatible = "snps,dwmac"}, + { } +}; +MODULE_DEVICE_TABLE(of, dwmac_generic_match); + +static struct platform_driver dwmac_generic_driver = { + .probe = stmmac_pltfr_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = STMMAC_RESOURCE_NAME, + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = of_match_ptr(dwmac_generic_match), + }, +}; +module_platform_driver(dwmac_generic_driver); + +MODULE_DESCRIPTION("Generic dwmac driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c new file mode 100644 index 000000000000..cb888d3ebbdc --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -0,0 +1,99 @@ +/* + * DWMAC glue for NXP LPC18xx/LPC43xx Ethernet + * + * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_net.h> +#include <linux/phy.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/stmmac.h> + +#include "stmmac_platform.h" + +/* Register defines for CREG syscon */ +#define LPC18XX_CREG_CREG6 0x12c +# define LPC18XX_CREG_CREG6_ETHMODE_MASK 0x7 +# define LPC18XX_CREG_CREG6_ETHMODE_MII 0x0 +# define LPC18XX_CREG_CREG6_ETHMODE_RMII 0x4 + +struct lpc18xx_dwmac_priv_data { + struct regmap *reg; + int interface; +}; + +static void *lpc18xx_dwmac_setup(struct platform_device *pdev) +{ + struct lpc18xx_dwmac_priv_data *dwmac; + + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); + if (!dwmac) + return ERR_PTR(-ENOMEM); + + dwmac->interface = of_get_phy_mode(pdev->dev.of_node); + if (dwmac->interface < 0) + return ERR_PTR(dwmac->interface); + + dwmac->reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg"); + if (IS_ERR(dwmac->reg)) { + dev_err(&pdev->dev, "Syscon lookup failed\n"); + return dwmac->reg; + } + + return dwmac; +} + +static int lpc18xx_dwmac_init(struct platform_device *pdev, void *priv) +{ + struct lpc18xx_dwmac_priv_data *dwmac = priv; + u8 ethmode; + + if (dwmac->interface == PHY_INTERFACE_MODE_MII) { + ethmode = LPC18XX_CREG_CREG6_ETHMODE_MII; + } else if (dwmac->interface == PHY_INTERFACE_MODE_RMII) { + ethmode = LPC18XX_CREG_CREG6_ETHMODE_RMII; + } else { + dev_err(&pdev->dev, "Only MII and RMII mode supported\n"); + return -EINVAL; + } + + regmap_update_bits(dwmac->reg, LPC18XX_CREG_CREG6, + LPC18XX_CREG_CREG6_ETHMODE_MASK, ethmode); + + return 0; +} + +static const struct stmmac_of_data lpc18xx_dwmac_data = { + .has_gmac = 1, + .setup = lpc18xx_dwmac_setup, + .init = lpc18xx_dwmac_init, +}; + +static const struct of_device_id lpc18xx_dwmac_match[] = { + { .compatible = "nxp,lpc1850-dwmac", .data = &lpc18xx_dwmac_data }, + { } +}; +MODULE_DEVICE_TABLE(of, lpc18xx_dwmac_match); + +static struct platform_driver lpc18xx_dwmac_driver = { + .probe = stmmac_pltfr_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = "lpc18xx-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = lpc18xx_dwmac_match, + }, +}; +module_platform_driver(lpc18xx_dwmac_driver); + +MODULE_AUTHOR("Joachim Eastwood <manabian@gmail.com>"); +MODULE_DESCRIPTION("DWMAC glue for LPC18xx/43xx Ethernet"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c index cca028d632f6..61a324a87d09 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c @@ -15,6 +15,7 @@ #include <linux/ethtool.h> #include <linux/io.h> #include <linux/ioport.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/stmmac.h> @@ -63,7 +64,28 @@ static void *meson6_dwmac_setup(struct platform_device *pdev) return dwmac; } -const struct stmmac_of_data meson6_dwmac_data = { +static const struct stmmac_of_data meson6_dwmac_data = { .setup = meson6_dwmac_setup, .fix_mac_speed = meson6_dwmac_fix_mac_speed, }; + +static const struct of_device_id meson6_dwmac_match[] = { + { .compatible = "amlogic,meson6-dwmac", .data = &meson6_dwmac_data}, + { } +}; +MODULE_DEVICE_TABLE(of, meson6_dwmac_match); + +static struct platform_driver meson6_dwmac_driver = { + .probe = stmmac_pltfr_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = "meson6-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = meson6_dwmac_match, + }, +}; +module_platform_driver(meson6_dwmac_driver); + +MODULE_AUTHOR("Beniamino Galvani <b.galvani@gmail.com>"); +MODULE_DESCRIPTION("Amlogic Meson DWMAC glue layer"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 6249a4ec08f0..30e28f0d9a60 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -22,13 +22,17 @@ #include <linux/phy.h> #include <linux/of_net.h> #include <linux/gpio.h> +#include <linux/module.h> #include <linux/of_gpio.h> #include <linux/of_device.h> +#include <linux/platform_device.h> #include <linux/regulator/consumer.h> #include <linux/delay.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> +#include "stmmac_platform.h" + struct rk_priv_data { struct platform_device *pdev; int phy_iface; @@ -428,10 +432,31 @@ static void rk_fix_speed(void *priv, unsigned int speed) dev_err(dev, "unsupported interface %d", bsp_priv->phy_iface); } -const struct stmmac_of_data rk3288_gmac_data = { +static const struct stmmac_of_data rk3288_gmac_data = { .has_gmac = 1, .fix_mac_speed = rk_fix_speed, .setup = rk_gmac_setup, .init = rk_gmac_init, .exit = rk_gmac_exit, }; + +static const struct of_device_id rk_gmac_dwmac_match[] = { + { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_gmac_data}, + { } +}; +MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match); + +static struct platform_driver rk_gmac_dwmac_driver = { + .probe = stmmac_pltfr_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = "rk_gmac-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = rk_gmac_dwmac_match, + }, +}; +module_platform_driver(rk_gmac_dwmac_driver); + +MODULE_AUTHOR("Chen-Zhi (Roger Chen) <roger.chen@rock-chips.com>"); +MODULE_DESCRIPTION("Rockchip RK3288 DWMAC specific glue layer"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 5a36bd2c7837..8141c5b844ae 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -257,9 +257,28 @@ static int socfpga_dwmac_init(struct platform_device *pdev, void *priv) return ret; } -const struct stmmac_of_data socfpga_gmac_data = { +static const struct stmmac_of_data socfpga_gmac_data = { .setup = socfpga_dwmac_probe, .init = socfpga_dwmac_init, .exit = socfpga_dwmac_exit, .fix_mac_speed = socfpga_dwmac_fix_mac_speed, }; + +static const struct of_device_id socfpga_dwmac_match[] = { + { .compatible = "altr,socfpga-stmmac", .data = &socfpga_gmac_data }, + { } +}; +MODULE_DEVICE_TABLE(of, socfpga_dwmac_match); + +static struct platform_driver socfpga_dwmac_driver = { + .probe = stmmac_pltfr_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = "socfpga-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = socfpga_dwmac_match, + }, +}; +module_platform_driver(socfpga_dwmac_driver); + +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index bb6e2dc61bec..a2e8111c5d14 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -17,6 +17,7 @@ #include <linux/stmmac.h> #include <linux/phy.h> #include <linux/mfd/syscon.h> +#include <linux/module.h> #include <linux/regmap.h> #include <linux/clk.h> #include <linux/of.h> @@ -351,16 +352,40 @@ static void *sti_dwmac_setup(struct platform_device *pdev) return dwmac; } -const struct stmmac_of_data stih4xx_dwmac_data = { +static const struct stmmac_of_data stih4xx_dwmac_data = { .fix_mac_speed = stih4xx_fix_retime_src, .setup = sti_dwmac_setup, .init = stix4xx_init, .exit = sti_dwmac_exit, }; -const struct stmmac_of_data stid127_dwmac_data = { +static const struct stmmac_of_data stid127_dwmac_data = { .fix_mac_speed = stid127_fix_retime_src, .setup = sti_dwmac_setup, .init = stid127_init, .exit = sti_dwmac_exit, }; + +static const struct of_device_id sti_dwmac_match[] = { + { .compatible = "st,stih415-dwmac", .data = &stih4xx_dwmac_data}, + { .compatible = "st,stih416-dwmac", .data = &stih4xx_dwmac_data}, + { .compatible = "st,stid127-dwmac", .data = &stid127_dwmac_data}, + { .compatible = "st,stih407-dwmac", .data = &stih4xx_dwmac_data}, + { } +}; +MODULE_DEVICE_TABLE(of, sti_dwmac_match); + +static struct platform_driver sti_dwmac_driver = { + .probe = stmmac_pltfr_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = "sti-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = sti_dwmac_match, + }, +}; +module_platform_driver(sti_dwmac_driver); + +MODULE_AUTHOR("Srinivas Kandagatla <srinivas.kandagatla@st.com>"); +MODULE_DESCRIPTION("STMicroelectronics DWMAC Specific Glue layer"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index c5ea9ab75b03..15048ca39759 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -18,7 +18,9 @@ #include <linux/stmmac.h> #include <linux/clk.h> +#include <linux/module.h> #include <linux/phy.h> +#include <linux/platform_device.h> #include <linux/of_net.h> #include <linux/regulator/consumer.h> @@ -132,7 +134,7 @@ static void sun7i_fix_speed(void *priv, unsigned int speed) /* of_data specifying hardware features and callbacks. * hardware features were copied from Allwinner drivers. */ -const struct stmmac_of_data sun7i_gmac_data = { +static const struct stmmac_of_data sun7i_gmac_data = { .has_gmac = 1, .tx_coe = 1, .fix_mac_speed = sun7i_fix_speed, @@ -140,3 +142,24 @@ const struct stmmac_of_data sun7i_gmac_data = { .init = sun7i_gmac_init, .exit = sun7i_gmac_exit, }; + +static const struct of_device_id sun7i_dwmac_match[] = { + { .compatible = "allwinner,sun7i-a20-gmac", .data = &sun7i_gmac_data}, + { } +}; +MODULE_DEVICE_TABLE(of, sun7i_dwmac_match); + +static struct platform_driver sun7i_dwmac_driver = { + .probe = stmmac_pltfr_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = "sun7i-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = sun7i_dwmac_match, + }, +}; +module_platform_driver(sun7i_dwmac_driver); + +MODULE_AUTHOR("Chen-Yu Tsai <wens@csie.org>"); +MODULE_DESCRIPTION("Allwinner sunxi DWMAC specific glue layer"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 2ac9552d1fa3..9cbcae203597 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -34,6 +34,14 @@ #include <linux/ptp_clock_kernel.h> #include <linux/reset.h> +struct stmmac_resources { + void __iomem *addr; + const char *mac; + int wol_irq; + int lpi_irq; + int irq; +}; + struct stmmac_tx_info { dma_addr_t buf; bool map_as_page; @@ -129,9 +137,9 @@ void stmmac_ptp_unregister(struct stmmac_priv *priv); int stmmac_resume(struct net_device *ndev); int stmmac_suspend(struct net_device *ndev); int stmmac_dvr_remove(struct net_device *ndev); -struct stmmac_priv *stmmac_dvr_probe(struct device *device, - struct plat_stmmacenet_data *plat_dat, - void __iomem *addr); +int stmmac_dvr_probe(struct device *device, + struct plat_stmmacenet_data *plat_dat, + struct stmmac_resources *res); void stmmac_disable_eee_mode(struct stmmac_priv *priv); bool stmmac_eee_init(struct stmmac_priv *priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 05c146f718a3..e4f273976071 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -975,13 +975,11 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, { struct sk_buff *skb; - skb = __netdev_alloc_skb(priv->dev, priv->dma_buf_sz + NET_IP_ALIGN, - flags); + skb = __netdev_alloc_skb_ip_align(priv->dev, priv->dma_buf_sz, flags); if (!skb) { pr_err("%s: Rx init fails; skb is NULL\n", __func__); return -ENOMEM; } - skb_reserve(skb, NET_IP_ALIGN); priv->rx_skbuff[i] = skb; priv->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data, priv->dma_buf_sz, @@ -2799,16 +2797,15 @@ static int stmmac_hw_init(struct stmmac_priv *priv) * stmmac_dvr_probe * @device: device pointer * @plat_dat: platform data pointer - * @addr: iobase memory address + * @res: stmmac resource pointer * Description: this is the main probe function used to * call the alloc_etherdev, allocate the priv structure. * Return: - * on success the new private structure is returned, otherwise the error - * pointer. + * returns 0 on success, otherwise errno. */ -struct stmmac_priv *stmmac_dvr_probe(struct device *device, - struct plat_stmmacenet_data *plat_dat, - void __iomem *addr) +int stmmac_dvr_probe(struct device *device, + struct plat_stmmacenet_data *plat_dat, + struct stmmac_resources *res) { int ret = 0; struct net_device *ndev = NULL; @@ -2816,7 +2813,7 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, ndev = alloc_etherdev(sizeof(struct stmmac_priv)); if (!ndev) - return ERR_PTR(-ENOMEM); + return -ENOMEM; SET_NETDEV_DEV(ndev, device); @@ -2827,8 +2824,17 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, stmmac_set_ethtool_ops(ndev); priv->pause = pause; priv->plat = plat_dat; - priv->ioaddr = addr; - priv->dev->base_addr = (unsigned long)addr; + priv->ioaddr = res->addr; + priv->dev->base_addr = (unsigned long)res->addr; + + priv->dev->irq = res->irq; + priv->wol_irq = res->wol_irq; + priv->lpi_irq = res->lpi_irq; + + if (res->mac) + memcpy(priv->dev->dev_addr, res->mac, ETH_ALEN); + + dev_set_drvdata(device, priv); /* Verify driver arguments */ stmmac_verify_args(); @@ -2943,7 +2949,7 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, } } - return priv; + return 0; error_mdio_register: unregister_netdev(ndev); @@ -2956,7 +2962,7 @@ error_pclk_get: error_clk_get: free_netdev(ndev); - return ERR_PTR(ret); + return ret; } EXPORT_SYMBOL_GPL(stmmac_dvr_probe); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 3bca908716e2..d71a721ea61c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -163,7 +163,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev, { struct stmmac_pci_info *info = (struct stmmac_pci_info *)id->driver_data; struct plat_stmmacenet_data *plat; - struct stmmac_priv *priv; + struct stmmac_resources res; int i; int ret; @@ -214,19 +214,12 @@ static int stmmac_pci_probe(struct pci_dev *pdev, pci_enable_msi(pdev); - priv = stmmac_dvr_probe(&pdev->dev, plat, pcim_iomap_table(pdev)[i]); - if (IS_ERR(priv)) { - dev_err(&pdev->dev, "%s: main driver probe failed\n", __func__); - return PTR_ERR(priv); - } - priv->dev->irq = pdev->irq; - priv->wol_irq = pdev->irq; - - pci_set_drvdata(pdev, priv->dev); + memset(&res, 0, sizeof(res)); + res.addr = pcim_iomap_table(pdev)[i]; + res.wol_irq = pdev->irq; + res.irq = pdev->irq; - dev_dbg(&pdev->dev, "STMMAC PCI driver registration completed\n"); - - return 0; + return stmmac_dvr_probe(&pdev->dev, plat, &res); } /** diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 68aec5c460db..1664c0186f5b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -32,25 +32,6 @@ #include "stmmac.h" #include "stmmac_platform.h" -static const struct of_device_id stmmac_dt_ids[] = { - /* SoC specific glue layers should come before generic bindings */ - { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_gmac_data}, - { .compatible = "amlogic,meson6-dwmac", .data = &meson6_dwmac_data}, - { .compatible = "allwinner,sun7i-a20-gmac", .data = &sun7i_gmac_data}, - { .compatible = "st,stih415-dwmac", .data = &stih4xx_dwmac_data}, - { .compatible = "st,stih416-dwmac", .data = &stih4xx_dwmac_data}, - { .compatible = "st,stid127-dwmac", .data = &stid127_dwmac_data}, - { .compatible = "st,stih407-dwmac", .data = &stih4xx_dwmac_data}, - { .compatible = "altr,socfpga-stmmac", .data = &socfpga_gmac_data }, - { .compatible = "st,spear600-gmac"}, - { .compatible = "snps,dwmac-3.610"}, - { .compatible = "snps,dwmac-3.70a"}, - { .compatible = "snps,dwmac-3.710"}, - { .compatible = "snps,dwmac"}, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, stmmac_dt_ids); - #ifdef CONFIG_OF /** @@ -129,14 +110,9 @@ static int stmmac_probe_config_dt(struct platform_device *pdev, struct device_node *np = pdev->dev.of_node; struct stmmac_dma_cfg *dma_cfg; const struct of_device_id *device; + struct device *dev = &pdev->dev; - if (!np) - return -ENODEV; - - device = of_match_device(stmmac_dt_ids, &pdev->dev); - if (!device) - return -ENODEV; - + device = of_match_device(dev->driver->of_match_table, dev); if (device->data) { const struct stmmac_of_data *data = device->data; plat->has_gmac = data->has_gmac; @@ -268,27 +244,26 @@ static int stmmac_probe_config_dt(struct platform_device *pdev, * the necessary platform resources, invoke custom helper (if required) and * invoke the main probe function. */ -static int stmmac_pltfr_probe(struct platform_device *pdev) +int stmmac_pltfr_probe(struct platform_device *pdev) { + struct stmmac_resources stmmac_res; int ret = 0; struct resource *res; struct device *dev = &pdev->dev; - void __iomem *addr = NULL; - struct stmmac_priv *priv = NULL; struct plat_stmmacenet_data *plat_dat = NULL; - const char *mac = NULL; - int irq, wol_irq, lpi_irq; + + memset(&stmmac_res, 0, sizeof(stmmac_res)); /* Get IRQ information early to have an ability to ask for deferred * probe if needed before we went too far with resource allocation. */ - irq = platform_get_irq_byname(pdev, "macirq"); - if (irq < 0) { - if (irq != -EPROBE_DEFER) { + stmmac_res.irq = platform_get_irq_byname(pdev, "macirq"); + if (stmmac_res.irq < 0) { + if (stmmac_res.irq != -EPROBE_DEFER) { dev_err(dev, "MAC IRQ configuration information not found\n"); } - return irq; + return stmmac_res.irq; } /* On some platforms e.g. SPEAr the wake up irq differs from the mac irq @@ -298,21 +273,21 @@ static int stmmac_pltfr_probe(struct platform_device *pdev) * In case the wake up interrupt is not passed from the platform * so the driver will continue to use the mac irq (ndev->irq) */ - wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq"); - if (wol_irq < 0) { - if (wol_irq == -EPROBE_DEFER) + stmmac_res.wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq"); + if (stmmac_res.wol_irq < 0) { + if (stmmac_res.wol_irq == -EPROBE_DEFER) return -EPROBE_DEFER; - wol_irq = irq; + stmmac_res.wol_irq = stmmac_res.irq; } - lpi_irq = platform_get_irq_byname(pdev, "eth_lpi"); - if (lpi_irq == -EPROBE_DEFER) + stmmac_res.lpi_irq = platform_get_irq_byname(pdev, "eth_lpi"); + if (stmmac_res.lpi_irq == -EPROBE_DEFER) return -EPROBE_DEFER; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - addr = devm_ioremap_resource(dev, res); - if (IS_ERR(addr)) - return PTR_ERR(addr); + stmmac_res.addr = devm_ioremap_resource(dev, res); + if (IS_ERR(stmmac_res.addr)) + return PTR_ERR(stmmac_res.addr); plat_dat = dev_get_platdata(&pdev->dev); @@ -332,7 +307,7 @@ static int stmmac_pltfr_probe(struct platform_device *pdev) plat_dat->unicast_filter_entries = 1; if (pdev->dev.of_node) { - ret = stmmac_probe_config_dt(pdev, plat_dat, &mac); + ret = stmmac_probe_config_dt(pdev, plat_dat, &stmmac_res.mac); if (ret) { pr_err("%s: main dt probe failed", __func__); return ret; @@ -353,27 +328,9 @@ static int stmmac_pltfr_probe(struct platform_device *pdev) return ret; } - priv = stmmac_dvr_probe(&(pdev->dev), plat_dat, addr); - if (IS_ERR(priv)) { - pr_err("%s: main driver probe failed", __func__); - return PTR_ERR(priv); - } - - /* Copy IRQ values to priv structure which is now avaialble */ - priv->dev->irq = irq; - priv->wol_irq = wol_irq; - priv->lpi_irq = lpi_irq; - - /* Get MAC address if available (DT) */ - if (mac) - memcpy(priv->dev->dev_addr, mac, ETH_ALEN); - - platform_set_drvdata(pdev, priv->dev); - - pr_debug("STMMAC platform driver registration completed"); - - return 0; + return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); } +EXPORT_SYMBOL_GPL(stmmac_pltfr_probe); /** * stmmac_pltfr_remove @@ -381,7 +338,7 @@ static int stmmac_pltfr_probe(struct platform_device *pdev) * Description: this function calls the main to free the net resources * and calls the platforms hook and release the resources (e.g. mem). */ -static int stmmac_pltfr_remove(struct platform_device *pdev) +int stmmac_pltfr_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -395,6 +352,7 @@ static int stmmac_pltfr_remove(struct platform_device *pdev) return ret; } +EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); #ifdef CONFIG_PM_SLEEP /** @@ -438,21 +396,6 @@ static int stmmac_pltfr_resume(struct device *dev) } #endif /* CONFIG_PM_SLEEP */ -static SIMPLE_DEV_PM_OPS(stmmac_pltfr_pm_ops, - stmmac_pltfr_suspend, stmmac_pltfr_resume); - -static struct platform_driver stmmac_pltfr_driver = { - .probe = stmmac_pltfr_probe, - .remove = stmmac_pltfr_remove, - .driver = { - .name = STMMAC_RESOURCE_NAME, - .pm = &stmmac_pltfr_pm_ops, - .of_match_table = of_match_ptr(stmmac_dt_ids), - }, -}; - -module_platform_driver(stmmac_pltfr_driver); - -MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet PLATFORM driver"); -MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>"); -MODULE_LICENSE("GPL"); +SIMPLE_DEV_PM_OPS(stmmac_pltfr_pm_ops, stmmac_pltfr_suspend, + stmmac_pltfr_resume); +EXPORT_SYMBOL_GPL(stmmac_pltfr_pm_ops); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index 093eb99e5ffd..71da86d7bd00 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -19,11 +19,8 @@ #ifndef __STMMAC_PLATFORM_H__ #define __STMMAC_PLATFORM_H__ -extern const struct stmmac_of_data meson6_dwmac_data; -extern const struct stmmac_of_data sun7i_gmac_data; -extern const struct stmmac_of_data stih4xx_dwmac_data; -extern const struct stmmac_of_data stid127_dwmac_data; -extern const struct stmmac_of_data socfpga_gmac_data; -extern const struct stmmac_of_data rk3288_gmac_data; +int stmmac_pltfr_probe(struct platform_device *pdev); +int stmmac_pltfr_remove(struct platform_device *pdev); +extern const struct dev_pm_ops stmmac_pltfr_pm_ops; #endif /* __STMMAC_PLATFORM_H__ */ diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 43efc3a0cda5..5ec4ed3f6c8d 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -537,7 +537,7 @@ int netcp_unregister_rxhook(struct netcp_intf *netcp_priv, int order, static void netcp_frag_free(bool is_frag, void *ptr) { if (is_frag) - put_page(virt_to_head_page(ptr)); + skb_free_frag(ptr); else kfree(ptr); } @@ -698,7 +698,6 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) } } - netcp->ndev->last_rx = jiffies; netcp->ndev->stats.rx_packets++; netcp->ndev->stats.rx_bytes += skb->len; diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 3d8f60d9643e..6f0a4495c7f3 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -721,9 +721,6 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) if (!hash_default) __inv_buffer(buf, len); - /* ISSUE: Is this needed? */ - dev->last_rx = jiffies; - #ifdef TILE_NET_DUMP_PACKETS dump_packet(buf, len, "rx"); #endif /* TILE_NET_DUMP_PACKETS */ diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index 8e9371a3388a..3c54a2cae5df 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -604,8 +604,7 @@ spider_net_set_multi(struct net_device *netdev) int i; u32 reg; struct spider_net_card *card = netdev_priv(netdev); - unsigned long bitmask[SPIDER_NET_MULTICAST_HASHES / BITS_PER_LONG] = - {0, }; + DECLARE_BITMAP(bitmask, SPIDER_NET_MULTICAST_HASHES) = {}; spider_net_set_promisc(card); diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index de2850497c09..725106f75d42 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -472,8 +472,7 @@ struct rhine_private { /* Frequently used values: keep some adjacent for cache effect. */ u32 quirks; - struct rx_desc *rx_head_desc; - unsigned int cur_rx, dirty_rx; /* Producer/consumer ring indices */ + unsigned int cur_rx; unsigned int cur_tx, dirty_tx; unsigned int rx_buf_sz; /* Based on MTU+slack. */ struct rhine_stats rx_stats; @@ -1213,17 +1212,61 @@ static void free_ring(struct net_device* dev) } -static void alloc_rbufs(struct net_device *dev) +struct rhine_skb_dma { + struct sk_buff *skb; + dma_addr_t dma; +}; + +static inline int rhine_skb_dma_init(struct net_device *dev, + struct rhine_skb_dma *sd) { struct rhine_private *rp = netdev_priv(dev); struct device *hwdev = dev->dev.parent; - dma_addr_t next; + const int size = rp->rx_buf_sz; + + sd->skb = netdev_alloc_skb(dev, size); + if (!sd->skb) + return -ENOMEM; + + sd->dma = dma_map_single(hwdev, sd->skb->data, size, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(hwdev, sd->dma))) { + netif_err(rp, drv, dev, "Rx DMA mapping failure\n"); + dev_kfree_skb_any(sd->skb); + return -EIO; + } + + return 0; +} + +static void rhine_reset_rbufs(struct rhine_private *rp) +{ int i; - rp->dirty_rx = rp->cur_rx = 0; + rp->cur_rx = 0; + + for (i = 0; i < RX_RING_SIZE; i++) + rp->rx_ring[i].rx_status = cpu_to_le32(DescOwn); +} + +static inline void rhine_skb_dma_nic_store(struct rhine_private *rp, + struct rhine_skb_dma *sd, int entry) +{ + rp->rx_skbuff_dma[entry] = sd->dma; + rp->rx_skbuff[entry] = sd->skb; + + rp->rx_ring[entry].addr = cpu_to_le32(sd->dma); + dma_wmb(); +} + +static void free_rbufs(struct net_device* dev); + +static int alloc_rbufs(struct net_device *dev) +{ + struct rhine_private *rp = netdev_priv(dev); + dma_addr_t next; + int rc, i; rp->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32); - rp->rx_head_desc = &rp->rx_ring[0]; next = rp->rx_ring_dma; /* Init the ring entries */ @@ -1239,23 +1282,20 @@ static void alloc_rbufs(struct net_device *dev) /* Fill in the Rx buffers. Handle allocation failure gracefully. */ for (i = 0; i < RX_RING_SIZE; i++) { - struct sk_buff *skb = netdev_alloc_skb(dev, rp->rx_buf_sz); - rp->rx_skbuff[i] = skb; - if (skb == NULL) - break; + struct rhine_skb_dma sd; - rp->rx_skbuff_dma[i] = - dma_map_single(hwdev, skb->data, rp->rx_buf_sz, - DMA_FROM_DEVICE); - if (dma_mapping_error(hwdev, rp->rx_skbuff_dma[i])) { - rp->rx_skbuff_dma[i] = 0; - dev_kfree_skb(skb); - break; + rc = rhine_skb_dma_init(dev, &sd); + if (rc < 0) { + free_rbufs(dev); + goto out; } - rp->rx_ring[i].addr = cpu_to_le32(rp->rx_skbuff_dma[i]); - rp->rx_ring[i].rx_status = cpu_to_le32(DescOwn); + + rhine_skb_dma_nic_store(rp, &sd, i); } - rp->dirty_rx = (unsigned int)(i - RX_RING_SIZE); + + rhine_reset_rbufs(rp); +out: + return rc; } static void free_rbufs(struct net_device* dev) @@ -1659,16 +1699,18 @@ static int rhine_open(struct net_device *dev) rc = request_irq(rp->irq, rhine_interrupt, IRQF_SHARED, dev->name, dev); if (rc) - return rc; + goto out; netif_dbg(rp, ifup, dev, "%s() irq %d\n", __func__, rp->irq); rc = alloc_ring(dev); - if (rc) { - free_irq(rp->irq, dev); - return rc; - } - alloc_rbufs(dev); + if (rc < 0) + goto out_free_irq; + + rc = alloc_rbufs(dev); + if (rc < 0) + goto out_free_ring; + alloc_tbufs(dev); rhine_chip_reset(dev); rhine_task_enable(rp); @@ -1680,7 +1722,14 @@ static int rhine_open(struct net_device *dev) netif_start_queue(dev); - return 0; +out: + return rc; + +out_free_ring: + free_ring(dev); +out_free_irq: + free_irq(rp->irq, dev); + goto out; } static void rhine_reset_task(struct work_struct *work) @@ -1700,9 +1749,9 @@ static void rhine_reset_task(struct work_struct *work) /* clear all descriptors */ free_tbufs(dev); - free_rbufs(dev); alloc_tbufs(dev); - alloc_rbufs(dev); + + rhine_reset_rbufs(rp); /* Reinitialize the hardware. */ rhine_chip_reset(dev); @@ -1730,6 +1779,11 @@ static void rhine_tx_timeout(struct net_device *dev) schedule_work(&rp->reset_task); } +static inline bool rhine_tx_queue_full(struct rhine_private *rp) +{ + return (rp->cur_tx - rp->dirty_tx) >= TX_QUEUE_LEN; +} + static netdev_tx_t rhine_start_tx(struct sk_buff *skb, struct net_device *dev) { @@ -1800,11 +1854,17 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb, netdev_sent_queue(dev, skb->len); /* lock eth irq */ - wmb(); + dma_wmb(); rp->tx_ring[entry].tx_status |= cpu_to_le32(DescOwn); wmb(); rp->cur_tx++; + /* + * Nobody wants cur_tx write to rot for ages after the NIC will have + * seen the transmit request, especially as the transmit completion + * handler could miss it. + */ + smp_wmb(); /* Non-x86 Todo: explicitly flush cache lines here. */ @@ -1817,8 +1877,14 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb, ioaddr + ChipCmd1); IOSYNC; - if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN) + /* dirty_tx may be pessimistically out-of-sync. See rhine_tx. */ + if (rhine_tx_queue_full(rp)) { netif_stop_queue(dev); + smp_rmb(); + /* Rejuvenate. */ + if (!rhine_tx_queue_full(rp)) + netif_wake_queue(dev); + } netif_dbg(rp, tx_queued, dev, "Transmit frame #%d queued in slot %d\n", rp->cur_tx - 1, entry); @@ -1866,13 +1932,24 @@ static void rhine_tx(struct net_device *dev) { struct rhine_private *rp = netdev_priv(dev); struct device *hwdev = dev->dev.parent; - int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE; unsigned int pkts_compl = 0, bytes_compl = 0; + unsigned int dirty_tx = rp->dirty_tx; + unsigned int cur_tx; struct sk_buff *skb; + /* + * The race with rhine_start_tx does not matter here as long as the + * driver enforces a value of cur_tx that was relevant when the + * packet was scheduled to the network chipset. + * Executive summary: smp_rmb() balances smp_wmb() in rhine_start_tx. + */ + smp_rmb(); + cur_tx = rp->cur_tx; /* find and cleanup dirty tx descriptors */ - while (rp->dirty_tx != rp->cur_tx) { - txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status); + while (dirty_tx != cur_tx) { + unsigned int entry = dirty_tx % TX_RING_SIZE; + u32 txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status); + netif_dbg(rp, tx_done, dev, "Tx scavenge %d status %08x\n", entry, txstatus); if (txstatus & DescOwn) @@ -1921,12 +1998,23 @@ static void rhine_tx(struct net_device *dev) pkts_compl++; dev_consume_skb_any(skb); rp->tx_skbuff[entry] = NULL; - entry = (++rp->dirty_tx) % TX_RING_SIZE; + dirty_tx++; } + rp->dirty_tx = dirty_tx; + /* Pity we can't rely on the nearby BQL completion implicit barrier. */ + smp_wmb(); + netdev_completed_queue(dev, pkts_compl, bytes_compl); - if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4) + + /* cur_tx may be optimistically out-of-sync. See rhine_start_tx. */ + if (!rhine_tx_queue_full(rp) && netif_queue_stopped(dev)) { netif_wake_queue(dev); + smp_rmb(); + /* Rejuvenate. */ + if (rhine_tx_queue_full(rp)) + netif_stop_queue(dev); + } } /** @@ -1944,22 +2032,33 @@ static inline u16 rhine_get_vlan_tci(struct sk_buff *skb, int data_size) return be16_to_cpup((__be16 *)trailer); } +static inline void rhine_rx_vlan_tag(struct sk_buff *skb, struct rx_desc *desc, + int data_size) +{ + dma_rmb(); + if (unlikely(desc->desc_length & cpu_to_le32(DescTag))) { + u16 vlan_tci; + + vlan_tci = rhine_get_vlan_tci(skb, data_size); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + } +} + /* Process up to limit frames from receive ring */ static int rhine_rx(struct net_device *dev, int limit) { struct rhine_private *rp = netdev_priv(dev); struct device *hwdev = dev->dev.parent; - int count; int entry = rp->cur_rx % RX_RING_SIZE; + int count; netif_dbg(rp, rx_status, dev, "%s(), entry %d status %08x\n", __func__, - entry, le32_to_cpu(rp->rx_head_desc->rx_status)); + entry, le32_to_cpu(rp->rx_ring[entry].rx_status)); /* If EOP is set on the next entry, it's a new packet. Send it up. */ for (count = 0; count < limit; ++count) { - struct rx_desc *desc = rp->rx_head_desc; + struct rx_desc *desc = rp->rx_ring + entry; u32 desc_status = le32_to_cpu(desc->rx_status); - u32 desc_length = le32_to_cpu(desc->desc_length); int data_size = desc_status >> 16; if (desc_status & DescOwn) @@ -1975,10 +2074,6 @@ static int rhine_rx(struct net_device *dev, int limit) "entry %#x length %d status %08x!\n", entry, data_size, desc_status); - netdev_warn(dev, - "Oversized Ethernet frame %p vs %p\n", - rp->rx_head_desc, - &rp->rx_ring[entry]); dev->stats.rx_length_errors++; } else if (desc_status & RxErr) { /* There was a error. */ @@ -2000,16 +2095,17 @@ static int rhine_rx(struct net_device *dev, int limit) } } } else { - struct sk_buff *skb = NULL; /* Length should omit the CRC */ int pkt_len = data_size - 4; - u16 vlan_tci = 0; + struct sk_buff *skb; /* Check if the packet is long enough to accept without copying to a minimally-sized skbuff. */ - if (pkt_len < rx_copybreak) + if (pkt_len < rx_copybreak) { skb = netdev_alloc_skb_ip_align(dev, pkt_len); - if (skb) { + if (unlikely(!skb)) + goto drop; + dma_sync_single_for_cpu(hwdev, rp->rx_skbuff_dma[entry], rp->rx_buf_sz, @@ -2018,32 +2114,31 @@ static int rhine_rx(struct net_device *dev, int limit) skb_copy_to_linear_data(skb, rp->rx_skbuff[entry]->data, pkt_len); - skb_put(skb, pkt_len); + dma_sync_single_for_device(hwdev, rp->rx_skbuff_dma[entry], rp->rx_buf_sz, DMA_FROM_DEVICE); } else { + struct rhine_skb_dma sd; + + if (unlikely(rhine_skb_dma_init(dev, &sd) < 0)) + goto drop; + skb = rp->rx_skbuff[entry]; - if (skb == NULL) { - netdev_err(dev, "Inconsistent Rx descriptor chain\n"); - break; - } - rp->rx_skbuff[entry] = NULL; - skb_put(skb, pkt_len); + dma_unmap_single(hwdev, rp->rx_skbuff_dma[entry], rp->rx_buf_sz, DMA_FROM_DEVICE); + rhine_skb_dma_nic_store(rp, &sd, entry); } - if (unlikely(desc_length & DescTag)) - vlan_tci = rhine_get_vlan_tci(skb, data_size); - + skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, dev); - if (unlikely(desc_length & DescTag)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + rhine_rx_vlan_tag(skb, desc, data_size); + netif_receive_skb(skb); u64_stats_update_begin(&rp->rx_stats.syncp); @@ -2051,35 +2146,16 @@ static int rhine_rx(struct net_device *dev, int limit) rp->rx_stats.packets++; u64_stats_update_end(&rp->rx_stats.syncp); } +give_descriptor_to_nic: + desc->rx_status = cpu_to_le32(DescOwn); entry = (++rp->cur_rx) % RX_RING_SIZE; - rp->rx_head_desc = &rp->rx_ring[entry]; - } - - /* Refill the Rx ring buffers. */ - for (; rp->cur_rx - rp->dirty_rx > 0; rp->dirty_rx++) { - struct sk_buff *skb; - entry = rp->dirty_rx % RX_RING_SIZE; - if (rp->rx_skbuff[entry] == NULL) { - skb = netdev_alloc_skb(dev, rp->rx_buf_sz); - rp->rx_skbuff[entry] = skb; - if (skb == NULL) - break; /* Better luck next round. */ - rp->rx_skbuff_dma[entry] = - dma_map_single(hwdev, skb->data, - rp->rx_buf_sz, - DMA_FROM_DEVICE); - if (dma_mapping_error(hwdev, - rp->rx_skbuff_dma[entry])) { - dev_kfree_skb(skb); - rp->rx_skbuff_dma[entry] = 0; - break; - } - rp->rx_ring[entry].addr = cpu_to_le32(rp->rx_skbuff_dma[entry]); - } - rp->rx_ring[entry].rx_status = cpu_to_le32(DescOwn); } return count; + +drop: + dev->stats.rx_dropped++; + goto give_descriptor_to_nic; } static void rhine_restart_tx(struct net_device *dev) { @@ -2484,9 +2560,8 @@ static int rhine_resume(struct device *device) enable_mmio(rp->pioaddr, rp->quirks); rhine_power_init(dev); free_tbufs(dev); - free_rbufs(dev); alloc_tbufs(dev); - alloc_rbufs(dev); + rhine_reset_rbufs(rp); rhine_task_enable(rp); spin_lock_bh(&rp->lock); init_registers(dev); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index af2694dc6f90..3b99a4df71f8 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -688,10 +688,8 @@ static int temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; if (temac_check_tx_bd_space(lp, num_frag)) { - if (!netif_queue_stopped(ndev)) { + if (!netif_queue_stopped(ndev)) netif_stop_queue(ndev); - return NETDEV_TX_BUSY; - } return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 4c9b4fa1d3c1..7cb9abac95c8 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -11,16 +11,16 @@ #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/interrupt.h> +#include <linux/if_vlan.h> /* Packet size info */ #define XAE_HDR_SIZE 14 /* Size of Ethernet header */ -#define XAE_HDR_VLAN_SIZE 18 /* Size of an Ethernet hdr + VLAN */ #define XAE_TRL_SIZE 4 /* Size of Ethernet trailer (FCS) */ #define XAE_MTU 1500 /* Max MTU of an Ethernet frame */ #define XAE_JUMBO_MTU 9000 /* Max MTU of a jumbo Eth. frame */ #define XAE_MAX_FRAME_SIZE (XAE_MTU + XAE_HDR_SIZE + XAE_TRL_SIZE) -#define XAE_MAX_VLAN_FRAME_SIZE (XAE_MTU + XAE_HDR_VLAN_SIZE + XAE_TRL_SIZE) +#define XAE_MAX_VLAN_FRAME_SIZE (XAE_MTU + VLAN_ETH_HLEN + XAE_TRL_SIZE) #define XAE_MAX_JUMBO_FRAME_SIZE (XAE_JUMBO_MTU + XAE_HDR_SIZE + XAE_TRL_SIZE) /* Configuration options */ @@ -38,18 +38,21 @@ #define XAE_OPTION_FLOW_CONTROL (1 << 4) /* Strip FCS and PAD from incoming frames. Note: PAD from VLAN frames is not - * stripped. Default: disabled (set) */ + * stripped. Default: disabled (set) + */ #define XAE_OPTION_FCS_STRIP (1 << 5) /* Generate FCS field and add PAD automatically for outgoing frames. - * Default: enabled (set) */ + * Default: enabled (set) + */ #define XAE_OPTION_FCS_INSERT (1 << 6) /* Enable Length/Type error checking for incoming frames. When this option is * set, the MAC will filter frames that have a mismatched type/length field * and if XAE_OPTION_REPORT_RXERR is set, the user is notified when these * types of frames are encountered. When this option is cleared, the MAC will - * allow these types of frames to be received. Default: enabled (set) */ + * allow these types of frames to be received. Default: enabled (set) + */ #define XAE_OPTION_LENTYPE_ERR (1 << 7) /* Enable the transmitter. Default: enabled (set) */ @@ -159,12 +162,12 @@ #define XAE_MDIO_MWD_OFFSET 0x00000508 /* MII Management Write Data */ #define XAE_MDIO_MRD_OFFSET 0x0000050C /* MII Management Read Data */ #define XAE_MDIO_MIS_OFFSET 0x00000600 /* MII Management Interrupt Status */ -#define XAE_MDIO_MIP_OFFSET 0x00000620 /* MII Mgmt Interrupt Pending - * register offset */ -#define XAE_MDIO_MIE_OFFSET 0x00000640 /* MII Management Interrupt Enable - * register offset */ -#define XAE_MDIO_MIC_OFFSET 0x00000660 /* MII Management Interrupt Clear - * register offset. */ +/* MII Mgmt Interrupt Pending register offset */ +#define XAE_MDIO_MIP_OFFSET 0x00000620 +/* MII Management Interrupt Enable register offset */ +#define XAE_MDIO_MIE_OFFSET 0x00000640 +/* MII Management Interrupt Clear register offset. */ +#define XAE_MDIO_MIC_OFFSET 0x00000660 #define XAE_UAW0_OFFSET 0x00000700 /* Unicast address word 0 */ #define XAE_UAW1_OFFSET 0x00000704 /* Unicast address word 1 */ #define XAE_FMI_OFFSET 0x00000708 /* Filter Mask Index */ @@ -176,18 +179,17 @@ #define XAE_MCAST_TABLE_OFFSET 0x00020000 /* Multicast table address */ /* Bit Masks for Axi Ethernet RAF register */ -#define XAE_RAF_MCSTREJ_MASK 0x00000002 /* Reject receive multicast - * destination address */ -#define XAE_RAF_BCSTREJ_MASK 0x00000004 /* Reject receive broadcast - * destination address */ +/* Reject receive multicast destination address */ +#define XAE_RAF_MCSTREJ_MASK 0x00000002 +/* Reject receive broadcast destination address */ +#define XAE_RAF_BCSTREJ_MASK 0x00000004 #define XAE_RAF_TXVTAGMODE_MASK 0x00000018 /* Tx VLAN TAG mode */ #define XAE_RAF_RXVTAGMODE_MASK 0x00000060 /* Rx VLAN TAG mode */ #define XAE_RAF_TXVSTRPMODE_MASK 0x00000180 /* Tx VLAN STRIP mode */ #define XAE_RAF_RXVSTRPMODE_MASK 0x00000600 /* Rx VLAN STRIP mode */ #define XAE_RAF_NEWFNCENBL_MASK 0x00000800 /* New function mode */ -#define XAE_RAF_EMULTIFLTRENBL_MASK 0x00001000 /* Exteneded Multicast - * Filtering mode - */ +/* Exteneded Multicast Filtering mode */ +#define XAE_RAF_EMULTIFLTRENBL_MASK 0x00001000 #define XAE_RAF_STATSRST_MASK 0x00002000 /* Stats. Counter Reset */ #define XAE_RAF_RXBADFRMEN_MASK 0x00004000 /* Recv Bad Frame Enable */ #define XAE_RAF_TXVTAGMODE_SHIFT 3 /* Tx Tag mode shift bits */ @@ -197,15 +199,16 @@ /* Bit Masks for Axi Ethernet TPF and IFGP registers */ #define XAE_TPF_TPFV_MASK 0x0000FFFF /* Tx pause frame value */ -#define XAE_IFGP0_IFGP_MASK 0x0000007F /* Transmit inter-frame - * gap adjustment value */ +/* Transmit inter-frame gap adjustment value */ +#define XAE_IFGP0_IFGP_MASK 0x0000007F /* Bit Masks for Axi Ethernet IS, IE and IP registers, Same masks apply - * for all 3 registers. */ -#define XAE_INT_HARDACSCMPLT_MASK 0x00000001 /* Hard register access - * complete */ -#define XAE_INT_AUTONEG_MASK 0x00000002 /* Auto negotiation - * complete */ + * for all 3 registers. + */ +/* Hard register access complete */ +#define XAE_INT_HARDACSCMPLT_MASK 0x00000001 +/* Auto negotiation complete */ +#define XAE_INT_AUTONEG_MASK 0x00000002 #define XAE_INT_RXCMPIT_MASK 0x00000004 /* Rx complete */ #define XAE_INT_RXRJECT_MASK 0x00000008 /* Rx frame rejected */ #define XAE_INT_RXFIFOOVR_MASK 0x00000010 /* Rx fifo overrun */ @@ -215,10 +218,9 @@ #define XAE_INT_PHYRSTCMPLT_MASK 0x00000100 /* Phy Reset complete */ #define XAE_INT_ALL_MASK 0x0000003F /* All the ints */ +/* INT bits that indicate receive errors */ #define XAE_INT_RECV_ERROR_MASK \ - (XAE_INT_RXRJECT_MASK | XAE_INT_RXFIFOOVR_MASK) /* INT bits that - * indicate receive - * errors */ + (XAE_INT_RXRJECT_MASK | XAE_INT_RXFIFOOVR_MASK) /* Bit masks for Axi Ethernet VLAN TPID Word 0 register */ #define XAE_TPID_0_MASK 0x0000FFFF /* TPID 0 */ @@ -231,27 +233,28 @@ /* Bit masks for Axi Ethernet RCW1 register */ #define XAE_RCW1_RST_MASK 0x80000000 /* Reset */ #define XAE_RCW1_JUM_MASK 0x40000000 /* Jumbo frame enable */ -#define XAE_RCW1_FCS_MASK 0x20000000 /* In-Band FCS enable - * (FCS not stripped) */ +/* In-Band FCS enable (FCS not stripped) */ +#define XAE_RCW1_FCS_MASK 0x20000000 #define XAE_RCW1_RX_MASK 0x10000000 /* Receiver enable */ #define XAE_RCW1_VLAN_MASK 0x08000000 /* VLAN frame enable */ -#define XAE_RCW1_LT_DIS_MASK 0x02000000 /* Length/type field valid check - * disable */ -#define XAE_RCW1_CL_DIS_MASK 0x01000000 /* Control frame Length check - * disable */ -#define XAE_RCW1_PAUSEADDR_MASK 0x0000FFFF /* Pause frame source address - * bits [47:32]. Bits [31:0] are - * stored in register RCW0 */ +/* Length/type field valid check disable */ +#define XAE_RCW1_LT_DIS_MASK 0x02000000 +/* Control frame Length check disable */ +#define XAE_RCW1_CL_DIS_MASK 0x01000000 +/* Pause frame source address bits [47:32]. Bits [31:0] are + * stored in register RCW0 + */ +#define XAE_RCW1_PAUSEADDR_MASK 0x0000FFFF /* Bit masks for Axi Ethernet TC register */ #define XAE_TC_RST_MASK 0x80000000 /* Reset */ #define XAE_TC_JUM_MASK 0x40000000 /* Jumbo frame enable */ -#define XAE_TC_FCS_MASK 0x20000000 /* In-Band FCS enable - * (FCS not generated) */ +/* In-Band FCS enable (FCS not generated) */ +#define XAE_TC_FCS_MASK 0x20000000 #define XAE_TC_TX_MASK 0x10000000 /* Transmitter enable */ #define XAE_TC_VLAN_MASK 0x08000000 /* VLAN frame enable */ -#define XAE_TC_IFG_MASK 0x02000000 /* Inter-frame gap adjustment - * enable */ +/* Inter-frame gap adjustment enable */ +#define XAE_TC_IFG_MASK 0x02000000 /* Bit masks for Axi Ethernet FCC register */ #define XAE_FCC_FCRX_MASK 0x20000000 /* Rx flow control enable */ @@ -301,10 +304,10 @@ #define XAE_MDIO_INT_MIIM_RDY_MASK 0x00000001 /* MIIM Interrupt */ /* Bit masks for Axi Ethernet UAW1 register */ -#define XAE_UAW1_UNICASTADDR_MASK 0x0000FFFF /* Station address bits - * [47:32]; Station address - * bits [31:0] are stored in - * register UAW0 */ +/* Station address bits [47:32]; Station address + * bits [31:0] are stored in register UAW0 + */ +#define XAE_UAW1_UNICASTADDR_MASK 0x0000FFFF /* Bit masks for Axi Ethernet FMI register */ #define XAE_FMI_PM_MASK 0x80000000 /* Promis. mode enable */ @@ -320,8 +323,8 @@ #define XAE_PHY_TYPE_SGMII 4 #define XAE_PHY_TYPE_1000BASE_X 5 -#define XAE_MULTICAST_CAM_TABLE_NUM 4 /* Total number of entries in the - * hardware multicast table. */ + /* Total number of entries in the hardware multicast table. */ +#define XAE_MULTICAST_CAM_TABLE_NUM 4 /* Axi Ethernet Synthesis features */ #define XAE_FEATURE_PARTIAL_RX_CSUM (1 << 0) @@ -407,8 +410,11 @@ struct axidma_bd { * Txed/Rxed in the existing hardware. If jumbo option is * supported, the maximum frame size would be 9k. Else it is * 1522 bytes (assuming support for basic VLAN) - * @jumbo_support: Stores hardware configuration for jumbo support. If hardware - * can handle jumbo packets, this entry will be 1, else 0. + * @rxmem: Stores rx memory size for jumbo frame handling. + * @csum_offload_on_tx_path: Stores the checksum selection on TX side. + * @csum_offload_on_rx_path: Stores the checksum selection on RX side. + * @coalesce_count_rx: Store the irq coalesce on RX side. + * @coalesce_count_tx: Store the irq coalesce on TX side. */ struct axienet_local { struct net_device *ndev; @@ -446,7 +452,7 @@ struct axienet_local { u32 rx_bd_ci; u32 max_frm_size; - u32 jumbo_support; + u32 rxmem; int csum_offload_on_tx_path; int csum_offload_on_rx_path; @@ -472,7 +478,7 @@ struct axienet_option { * @lp: Pointer to axienet local structure * @offset: Address offset from the base address of Axi Ethernet core * - * returns: The contents of the Axi Ethernet register + * Return: The contents of the Axi Ethernet register * * This function returns the contents of the corresponding register. */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 28b7e7d9c272..4208dd7ef101 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -117,7 +117,7 @@ static struct axienet_option axienet_options[] = { * @lp: Pointer to axienet local structure * @reg: Address offset from the base address of the Axi DMA core * - * returns: The contents of the Axi DMA register + * Return: The contents of the Axi DMA register * * This function returns the contents of the corresponding Axi DMA register. */ @@ -179,8 +179,7 @@ static void axienet_dma_bd_release(struct net_device *ndev) * axienet_dma_bd_init - Setup buffer descriptor rings for Axi DMA * @ndev: Pointer to the net_device structure * - * returns: 0, on success - * -ENOMEM, on failure + * Return: 0, on success -ENOMEM, on failure * * This function is called to initialize the Rx and Tx DMA descriptor * rings. This initializes the descriptors with required default values @@ -198,9 +197,7 @@ static int axienet_dma_bd_init(struct net_device *ndev) lp->tx_bd_tail = 0; lp->rx_bd_ci = 0; - /* - * Allocate the Tx and Rx buffer descriptors. - */ + /* Allocate the Tx and Rx buffer descriptors. */ lp->tx_bd_v = dma_zalloc_coherent(ndev->dev.parent, sizeof(*lp->tx_bd_v) * TX_BD_NUM, &lp->tx_bd_p, GFP_KERNEL); @@ -263,7 +260,8 @@ static int axienet_dma_bd_init(struct net_device *ndev) axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); /* Populate the tail pointer and bring the Rx Axi DMA engine out of - * halted state. This will make the Rx side ready for reception.*/ + * halted state. This will make the Rx side ready for reception. + */ axienet_dma_out32(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p); cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, @@ -273,7 +271,8 @@ static int axienet_dma_bd_init(struct net_device *ndev) /* Write to the RS (Run-stop) bit in the Tx channel control register. * Tx channel is now ready to run. But only after we write to the - * tail pointer register that the Tx channel will start transmitting */ + * tail pointer register that the Tx channel will start transmitting. + */ axienet_dma_out32(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, @@ -320,7 +319,7 @@ static void axienet_set_mac_address(struct net_device *ndev, void *address) * @ndev: Pointer to the net_device structure * @p: 6 byte Address to be written as MAC address * - * returns: 0 for all conditions. Presently, there is no failure case. + * Return: 0 for all conditions. Presently, there is no failure case. * * This function is called to initialize the MAC address of the Axi Ethernet * core. It calls the core specific axienet_set_mac_address. This is the @@ -354,7 +353,8 @@ static void axienet_set_multicast_list(struct net_device *ndev) netdev_mc_count(ndev) > XAE_MULTICAST_CAM_TABLE_NUM) { /* We must make the kernel realize we had to move into * promiscuous mode. If it was a promiscuous mode request - * the flag is already set. If not we set it. */ + * the flag is already set. If not we set it. + */ ndev->flags |= IFF_PROMISC; reg = axienet_ior(lp, XAE_FMI_OFFSET); reg |= XAE_FMI_PM_MASK; @@ -438,14 +438,15 @@ static void __axienet_device_reset(struct axienet_local *lp, /* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset * process of Axi DMA takes a while to complete as all pending * commands/transfers will be flushed or completed during this - * reset process. */ + * reset process. + */ axienet_dma_out32(lp, offset, XAXIDMA_CR_RESET_MASK); timeout = DELAY_OF_ONE_MILLISEC; while (axienet_dma_in32(lp, offset) & XAXIDMA_CR_RESET_MASK) { udelay(1); if (--timeout == 0) { - dev_err(dev, "axienet_device_reset DMA " - "reset timeout!\n"); + netdev_err(lp->ndev, "%s: DMA reset timeout!\n", + __func__); break; } } @@ -471,19 +472,21 @@ static void axienet_device_reset(struct net_device *ndev) __axienet_device_reset(lp, &ndev->dev, XAXIDMA_RX_CR_OFFSET); lp->max_frm_size = XAE_MAX_VLAN_FRAME_SIZE; + lp->options |= XAE_OPTION_VLAN; lp->options &= (~XAE_OPTION_JUMBO); if ((ndev->mtu > XAE_MTU) && - (ndev->mtu <= XAE_JUMBO_MTU) && - (lp->jumbo_support)) { - lp->max_frm_size = ndev->mtu + XAE_HDR_VLAN_SIZE + - XAE_TRL_SIZE; - lp->options |= XAE_OPTION_JUMBO; + (ndev->mtu <= XAE_JUMBO_MTU)) { + lp->max_frm_size = ndev->mtu + VLAN_ETH_HLEN + + XAE_TRL_SIZE; + + if (lp->max_frm_size <= lp->rxmem) + lp->options |= XAE_OPTION_JUMBO; } if (axienet_dma_bd_init(ndev)) { - dev_err(&ndev->dev, "axienet_device_reset descriptor " - "allocation failed\n"); + netdev_err(ndev, "%s: descriptor allocation failed\n", + __func__); } axienet_status = axienet_ior(lp, XAE_RCW1_OFFSET); @@ -497,7 +500,8 @@ static void axienet_device_reset(struct net_device *ndev) axienet_iow(lp, XAE_FCC_OFFSET, XAE_FCC_FCRX_MASK); /* Sync default options with HW but leave receiver and - * transmitter disabled.*/ + * transmitter disabled. + */ axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); axienet_set_mac_address(ndev, NULL); @@ -558,8 +562,8 @@ static void axienet_adjust_link(struct net_device *ndev) lp->last_link = link_state; phy_print_status(phy); } else { - dev_err(&ndev->dev, "Error setting Axi Ethernet " - "mac speed\n"); + netdev_err(ndev, + "Error setting Axi Ethernet mac speed\n"); } } } @@ -617,7 +621,7 @@ static void axienet_start_xmit_done(struct net_device *ndev) * @lp: Pointer to the axienet_local structure * @num_frag: The number of BDs to check for * - * returns: 0, on success + * Return: 0, on success * NETDEV_TX_BUSY, if any of the descriptors are not free * * This function is invoked before BDs are allocated and transmission starts. @@ -640,7 +644,7 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp, * @skb: sk_buff pointer that contains data to be Txed. * @ndev: Pointer to net_device structure. * - * returns: NETDEV_TX_OK, on success + * Return: NETDEV_TX_OK, on success * NETDEV_TX_BUSY, if any of the descriptors are not free * * This function is invoked from upper layers to initiate transmission. The @@ -726,15 +730,15 @@ static void axienet_recv(struct net_device *ndev) u32 csumstatus; u32 size = 0; u32 packets = 0; - dma_addr_t tail_p; + dma_addr_t tail_p = 0; struct axienet_local *lp = netdev_priv(ndev); struct sk_buff *skb, *new_skb; struct axidma_bd *cur_p; - tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) { + tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; skb = (struct sk_buff *) (cur_p->sw_id_offset); length = cur_p->app4 & 0x0000FFFF; @@ -786,7 +790,8 @@ static void axienet_recv(struct net_device *ndev) ndev->stats.rx_packets += packets; ndev->stats.rx_bytes += size; - axienet_dma_out32(lp, XAXIDMA_RX_TDESC_OFFSET, tail_p); + if (tail_p) + axienet_dma_out32(lp, XAXIDMA_RX_TDESC_OFFSET, tail_p); } /** @@ -794,7 +799,7 @@ static void axienet_recv(struct net_device *ndev) * @irq: irq number * @_ndev: net_device pointer * - * returns: IRQ_HANDLED for all cases. + * Return: IRQ_HANDLED for all cases. * * This is the Axi DMA Tx done Isr. It invokes "axienet_start_xmit_done" * to complete the BD processing. @@ -808,6 +813,7 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev) status = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { + axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); axienet_start_xmit_done(lp->ndev); goto out; } @@ -831,9 +837,9 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev) axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); tasklet_schedule(&lp->dma_err_tasklet); + axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); } out: - axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); return IRQ_HANDLED; } @@ -842,7 +848,7 @@ out: * @irq: irq number * @_ndev: net_device pointer * - * returns: IRQ_HANDLED for all cases. + * Return: IRQ_HANDLED for all cases. * * This is the Axi DMA Rx Isr. It invokes "axienet_recv" to complete the BD * processing. @@ -856,6 +862,7 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev) status = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { + axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); axienet_recv(lp->ndev); goto out; } @@ -879,9 +886,9 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev) axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); tasklet_schedule(&lp->dma_err_tasklet); + axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); } out: - axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); return IRQ_HANDLED; } @@ -891,7 +898,7 @@ static void axienet_dma_err_handler(unsigned long data); * axienet_open - Driver open routine. * @ndev: Pointer to net_device structure * - * returns: 0, on success. + * Return: 0, on success. * -ENODEV, if PHY cannot be connected to * non-zero error value on failure * @@ -914,7 +921,8 @@ static int axienet_open(struct net_device *ndev) /* Disable the MDIO interface till Axi Ethernet Reset is completed. * When we do an Axi Ethernet reset, it resets the complete core * including the MDIO. If MDIO is not disabled when the reset - * process is started, MDIO will be broken afterwards. */ + * process is started, MDIO will be broken afterwards. + */ axienet_iow(lp, XAE_MDIO_MC_OFFSET, (mdio_mcreg & (~XAE_MDIO_MC_MDIOEN_MASK))); axienet_device_reset(ndev); @@ -925,14 +933,20 @@ static int axienet_open(struct net_device *ndev) return ret; if (lp->phy_node) { - lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node, + if (lp->phy_type == XAE_PHY_TYPE_GMII) { + lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node, axienet_adjust_link, 0, PHY_INTERFACE_MODE_GMII); - if (!lp->phy_dev) { - dev_err(lp->dev, "of_phy_connect() failed\n"); - return -ENODEV; + } else if (lp->phy_type == XAE_PHY_TYPE_RGMII_2_0) { + lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node, + axienet_adjust_link, 0, + PHY_INTERFACE_MODE_RGMII_ID); } - phy_start(lp->phy_dev); + + if (!lp->phy_dev) + dev_err(lp->dev, "of_phy_connect() failed\n"); + else + phy_start(lp->phy_dev); } /* Enable tasklets for Axi DMA error handling */ @@ -965,7 +979,7 @@ err_tx_irq: * axienet_stop - Driver stop routine. * @ndev: Pointer to net_device structure * - * returns: 0, on success. + * Return: 0, on success. * * This is the driver stop routine. It calls phy_disconnect to stop the PHY * device. It also removes the interrupt handlers and disables the interrupts. @@ -1005,7 +1019,7 @@ static int axienet_stop(struct net_device *ndev) * @ndev: Pointer to net_device structure * @new_mtu: New mtu value to be applied * - * returns: Always returns 0 (success). + * Return: Always returns 0 (success). * * This is the change mtu driver routine. It checks if the Axi Ethernet * hardware supports jumbo frames before changing the mtu. This can be @@ -1017,15 +1031,15 @@ static int axienet_change_mtu(struct net_device *ndev, int new_mtu) if (netif_running(ndev)) return -EBUSY; - if (lp->jumbo_support) { - if ((new_mtu > XAE_JUMBO_MTU) || (new_mtu < 64)) - return -EINVAL; - ndev->mtu = new_mtu; - } else { - if ((new_mtu > XAE_MTU) || (new_mtu < 64)) - return -EINVAL; - ndev->mtu = new_mtu; - } + + if ((new_mtu + VLAN_ETH_HLEN + + XAE_TRL_SIZE) > lp->rxmem) + return -EINVAL; + + if ((new_mtu > XAE_JUMBO_MTU) || (new_mtu < 64)) + return -EINVAL; + + ndev->mtu = new_mtu; return 0; } @@ -1072,6 +1086,8 @@ static const struct net_device_ops axienet_netdev_ops = { * not be found, the function returns -ENODEV. This function calls the * relevant PHY ethtool API to get the PHY settings. * Issue "ethtool ethX" under linux prompt to execute this function. + * + * Return: 0 on success, -ENODEV if PHY doesn't exist */ static int axienet_ethtools_get_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) @@ -1093,6 +1109,8 @@ static int axienet_ethtools_get_settings(struct net_device *ndev, * relevant PHY ethtool API to set the PHY. * Issue e.g. "ethtool -s ethX speed 1000" under linux prompt to execute this * function. + * + * Return: 0 on success, -ENODEV if PHY doesn't exist */ static int axienet_ethtools_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) @@ -1127,6 +1145,8 @@ static void axienet_ethtools_get_drvinfo(struct net_device *ndev, * * This implements ethtool command for getting the total register length * information. + * + * Return: the total regs length */ static int axienet_ethtools_get_regs_len(struct net_device *ndev) { @@ -1213,11 +1233,13 @@ axienet_ethtools_get_pauseparam(struct net_device *ndev, * axienet_ethtools_set_pauseparam - Set device pause parameter(flow control) * settings. * @ndev: Pointer to net_device structure - * @epauseparam:Pointer to ethtool_pauseparam structure + * @epauseparm:Pointer to ethtool_pauseparam structure * * This implements ethtool command for enabling flow control on Rx and Tx * paths. Issue "ethtool -A ethX tx on|off" under linux prompt to execute this * function. + * + * Return: 0 on success, -EFAULT if device is running */ static int axienet_ethtools_set_pauseparam(struct net_device *ndev, @@ -1227,8 +1249,8 @@ axienet_ethtools_set_pauseparam(struct net_device *ndev, struct axienet_local *lp = netdev_priv(ndev); if (netif_running(ndev)) { - printk(KERN_ERR "%s: Please stop netif before applying " - "configruation\n", ndev->name); + netdev_err(ndev, + "Please stop netif before applying configuration\n"); return -EFAULT; } @@ -1254,6 +1276,8 @@ axienet_ethtools_set_pauseparam(struct net_device *ndev, * This implements ethtool command for getting the DMA interrupt coalescing * count on Tx and Rx paths. Issue "ethtool -c ethX" under linux prompt to * execute this function. + * + * Return: 0 always */ static int axienet_ethtools_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *ecoalesce) @@ -1277,6 +1301,8 @@ static int axienet_ethtools_get_coalesce(struct net_device *ndev, * This implements ethtool command for setting the DMA interrupt coalescing * count on Tx and Rx paths. Issue "ethtool -C ethX rx-frames 5" under linux * prompt to execute this function. + * + * Return: 0, on success, Non-zero error value on failure. */ static int axienet_ethtools_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ecoalesce) @@ -1284,8 +1310,8 @@ static int axienet_ethtools_set_coalesce(struct net_device *ndev, struct axienet_local *lp = netdev_priv(ndev); if (netif_running(ndev)) { - printk(KERN_ERR "%s: Please stop netif before applying " - "configruation\n", ndev->name); + netdev_err(ndev, + "Please stop netif before applying configuration\n"); return -EFAULT; } @@ -1354,7 +1380,8 @@ static void axienet_dma_err_handler(unsigned long data) /* Disable the MDIO interface till Axi Ethernet Reset is completed. * When we do an Axi Ethernet reset, it resets the complete core * including the MDIO. So if MDIO is not disabled when the reset - * process is started, MDIO will be broken afterwards. */ + * process is started, MDIO will be broken afterwards. + */ axienet_iow(lp, XAE_MDIO_MC_OFFSET, (mdio_mcreg & ~XAE_MDIO_MC_MDIOEN_MASK)); @@ -1425,7 +1452,8 @@ static void axienet_dma_err_handler(unsigned long data) axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); /* Populate the tail pointer and bring the Rx Axi DMA engine out of - * halted state. This will make the Rx side ready for reception.*/ + * halted state. This will make the Rx side ready for reception. + */ axienet_dma_out32(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p); cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, @@ -1435,7 +1463,8 @@ static void axienet_dma_err_handler(unsigned long data) /* Write to the RS (Run-stop) bit in the Tx channel control register. * Tx channel is now ready to run. But only after we write to the - * tail pointer register that the Tx channel will start transmitting */ + * tail pointer register that the Tx channel will start transmitting + */ axienet_dma_out32(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, @@ -1451,7 +1480,8 @@ static void axienet_dma_err_handler(unsigned long data) axienet_iow(lp, XAE_FCC_OFFSET, XAE_FCC_FCRX_MASK); /* Sync default options with HW but leave receiver and - * transmitter disabled.*/ + * transmitter disabled. + */ axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); axienet_set_mac_address(ndev, NULL); @@ -1460,11 +1490,10 @@ static void axienet_dma_err_handler(unsigned long data) } /** - * axienet_of_probe - Axi Ethernet probe function. - * @op: Pointer to platform device structure. - * @match: Pointer to device id structure + * axienet_probe - Axi Ethernet probe function. + * @pdev: Pointer to platform device structure. * - * returns: 0, on success + * Return: 0, on success * Non-zero error value on failure. * * This is the probe routine for Axi Ethernet driver. This is called before @@ -1472,22 +1501,23 @@ static void axienet_dma_err_handler(unsigned long data) * device. Parses through device tree and populates fields of * axienet_local. It registers the Ethernet device. */ -static int axienet_of_probe(struct platform_device *op) +static int axienet_probe(struct platform_device *pdev) { - __be32 *p; - int size, ret = 0; + int ret; struct device_node *np; struct axienet_local *lp; struct net_device *ndev; - const void *addr; + u8 mac_addr[6]; + struct resource *ethres, dmares; + u32 value; ndev = alloc_etherdev(sizeof(*lp)); if (!ndev) return -ENOMEM; - platform_set_drvdata(op, ndev); + platform_set_drvdata(pdev, ndev); - SET_NETDEV_DEV(ndev, &op->dev); + SET_NETDEV_DEV(ndev, &pdev->dev); ndev->flags &= ~IFF_MULTICAST; /* clear multicast */ ndev->features = NETIF_F_SG; ndev->netdev_ops = &axienet_netdev_ops; @@ -1495,21 +1525,23 @@ static int axienet_of_probe(struct platform_device *op) lp = netdev_priv(ndev); lp->ndev = ndev; - lp->dev = &op->dev; + lp->dev = &pdev->dev; lp->options = XAE_OPTION_DEFAULTS; /* Map device registers */ - lp->regs = of_iomap(op->dev.of_node, 0); + ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); + lp->regs = devm_ioremap_resource(&pdev->dev, ethres); if (!lp->regs) { - dev_err(&op->dev, "could not map Axi Ethernet regs.\n"); + dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n"); ret = -ENOMEM; - goto nodev; + goto free_netdev; } + /* Setup checksum offload, but default to off if not specified */ lp->features = 0; - p = (__be32 *) of_get_property(op->dev.of_node, "xlnx,txcsum", NULL); - if (p) { - switch (be32_to_cpup(p)) { + ret = of_property_read_u32(pdev->dev.of_node, "xlnx,txcsum", &value); + if (!ret) { + switch (value) { case 1: lp->csum_offload_on_tx_path = XAE_FEATURE_PARTIAL_TX_CSUM; @@ -1528,9 +1560,9 @@ static int axienet_of_probe(struct platform_device *op) lp->csum_offload_on_tx_path = XAE_NO_CSUM_OFFLOAD; } } - p = (__be32 *) of_get_property(op->dev.of_node, "xlnx,rxcsum", NULL); - if (p) { - switch (be32_to_cpup(p)) { + ret = of_property_read_u32(pdev->dev.of_node, "xlnx,rxcsum", &value); + if (!ret) { + switch (value) { case 1: lp->csum_offload_on_rx_path = XAE_FEATURE_PARTIAL_RX_CSUM; @@ -1546,82 +1578,77 @@ static int axienet_of_probe(struct platform_device *op) } } /* For supporting jumbo frames, the Axi Ethernet hardware must have - * a larger Rx/Tx Memory. Typically, the size must be more than or - * equal to 16384 bytes, so that we can enable jumbo option and start - * supporting jumbo frames. Here we check for memory allocated for - * Rx/Tx in the hardware from the device-tree and accordingly set - * flags. */ - p = (__be32 *) of_get_property(op->dev.of_node, "xlnx,rxmem", NULL); - if (p) { - if ((be32_to_cpup(p)) >= 0x4000) - lp->jumbo_support = 1; - } - p = (__be32 *) of_get_property(op->dev.of_node, "xlnx,phy-type", NULL); - if (p) - lp->phy_type = be32_to_cpup(p); + * a larger Rx/Tx Memory. Typically, the size must be large so that + * we can enable jumbo option and start supporting jumbo frames. + * Here we check for memory allocated for Rx/Tx in the hardware from + * the device-tree and accordingly set flags. + */ + of_property_read_u32(pdev->dev.of_node, "xlnx,rxmem", &lp->rxmem); + of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &lp->phy_type); /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */ - np = of_parse_phandle(op->dev.of_node, "axistream-connected", 0); - if (!np) { - dev_err(&op->dev, "could not find DMA node\n"); - ret = -ENODEV; - goto err_iounmap; + np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0); + if (IS_ERR(np)) { + dev_err(&pdev->dev, "could not find DMA node\n"); + ret = PTR_ERR(np); + goto free_netdev; } - lp->dma_regs = of_iomap(np, 0); - if (lp->dma_regs) { - dev_dbg(&op->dev, "MEM base: %p\n", lp->dma_regs); - } else { - dev_err(&op->dev, "unable to map DMA registers\n"); - of_node_put(np); + ret = of_address_to_resource(np, 0, &dmares); + if (ret) { + dev_err(&pdev->dev, "unable to get DMA resource\n"); + goto free_netdev; + } + lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares); + if (!lp->dma_regs) { + dev_err(&pdev->dev, "could not map DMA regs\n"); + ret = -ENOMEM; + goto free_netdev; } lp->rx_irq = irq_of_parse_and_map(np, 1); lp->tx_irq = irq_of_parse_and_map(np, 0); of_node_put(np); if ((lp->rx_irq <= 0) || (lp->tx_irq <= 0)) { - dev_err(&op->dev, "could not determine irqs\n"); + dev_err(&pdev->dev, "could not determine irqs\n"); ret = -ENOMEM; - goto err_iounmap_2; + goto free_netdev; } /* Retrieve the MAC address */ - addr = of_get_property(op->dev.of_node, "local-mac-address", &size); - if ((!addr) || (size != 6)) { - dev_err(&op->dev, "could not find MAC address\n"); - ret = -ENODEV; - goto err_iounmap_2; + ret = of_property_read_u8_array(pdev->dev.of_node, + "local-mac-address", mac_addr, 6); + if (ret) { + dev_err(&pdev->dev, "could not find MAC address\n"); + goto free_netdev; } - axienet_set_mac_address(ndev, (void *) addr); + axienet_set_mac_address(ndev, (void *)mac_addr); lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; - lp->phy_node = of_parse_phandle(op->dev.of_node, "phy-handle", 0); - ret = axienet_mdio_setup(lp, op->dev.of_node); - if (ret) - dev_warn(&op->dev, "error registering MDIO bus\n"); + lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); + if (lp->phy_node) { + ret = axienet_mdio_setup(lp, pdev->dev.of_node); + if (ret) + dev_warn(&pdev->dev, "error registering MDIO bus\n"); + } ret = register_netdev(lp->ndev); if (ret) { dev_err(lp->dev, "register_netdev() error (%i)\n", ret); - goto err_iounmap_2; + goto free_netdev; } return 0; -err_iounmap_2: - if (lp->dma_regs) - iounmap(lp->dma_regs); -err_iounmap: - iounmap(lp->regs); -nodev: +free_netdev: free_netdev(ndev); - ndev = NULL; + return ret; } -static int axienet_of_remove(struct platform_device *op) +static int axienet_remove(struct platform_device *pdev) { - struct net_device *ndev = platform_get_drvdata(op); + struct net_device *ndev = platform_get_drvdata(pdev); struct axienet_local *lp = netdev_priv(ndev); axienet_mdio_teardown(lp); @@ -1630,24 +1657,21 @@ static int axienet_of_remove(struct platform_device *op) of_node_put(lp->phy_node); lp->phy_node = NULL; - iounmap(lp->regs); - if (lp->dma_regs) - iounmap(lp->dma_regs); free_netdev(ndev); return 0; } -static struct platform_driver axienet_of_driver = { - .probe = axienet_of_probe, - .remove = axienet_of_remove, +static struct platform_driver axienet_driver = { + .probe = axienet_probe, + .remove = axienet_remove, .driver = { .name = "xilinx_axienet", .of_match_table = axienet_of_match, }, }; -module_platform_driver(axienet_of_driver); +module_platform_driver(axienet_driver); MODULE_DESCRIPTION("Xilinx Axi Ethernet driver"); MODULE_AUTHOR("Xilinx"); diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 3b67d60d4378..2a5a16834c01 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -37,7 +37,7 @@ int axienet_mdio_wait_until_ready(struct axienet_local *lp) * @phy_id: Address of the PHY device * @reg: PHY register to read * - * returns: The register contents on success, -ETIMEDOUT on a timeout + * Return: The register contents on success, -ETIMEDOUT on a timeout * * Reads the contents of the requested register from the requested PHY * address by first writing the details into MCR register. After a while @@ -80,7 +80,7 @@ static int axienet_mdio_read(struct mii_bus *bus, int phy_id, int reg) * @reg: PHY register to write to * @val: Value to be written into the register * - * returns: 0 on success, -ETIMEDOUT on a timeout + * Return: 0 on success, -ETIMEDOUT on a timeout * * Writes the value to the requested register by first writing the value * into MWD register. The the MCR register is then appropriately setup @@ -119,7 +119,7 @@ static int axienet_mdio_write(struct mii_bus *bus, int phy_id, int reg, * @lp: Pointer to axienet local data structure. * @np: Pointer to device node * - * returns: 0 on success, -ETIMEDOUT on a timeout, -ENOMEM when + * Return: 0 on success, -ETIMEDOUT on a timeout, -ENOMEM when * mdiobus_alloc (to allocate memory for mii bus structure) fails. * * Sets up the MDIO interface by initializing the MDIO clock and enabling the @@ -161,19 +161,19 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np) np1 = of_find_node_by_name(NULL, "cpu"); if (!np1) { - printk(KERN_WARNING "%s(): Could not find CPU device node.", - __func__); - printk(KERN_WARNING "Setting MDIO clock divisor to " - "default %d\n", DEFAULT_CLOCK_DIVISOR); + netdev_warn(lp->ndev, "Could not find CPU device node.\n"); + netdev_warn(lp->ndev, + "Setting MDIO clock divisor to default %d\n", + DEFAULT_CLOCK_DIVISOR); clk_div = DEFAULT_CLOCK_DIVISOR; goto issue; } property_p = (u32 *) of_get_property(np1, "clock-frequency", NULL); if (!property_p) { - printk(KERN_WARNING "%s(): Could not find CPU property: " - "clock-frequency.", __func__); - printk(KERN_WARNING "Setting MDIO clock divisor to " - "default %d\n", DEFAULT_CLOCK_DIVISOR); + netdev_warn(lp->ndev, "clock-frequency property not found.\n"); + netdev_warn(lp->ndev, + "Setting MDIO clock divisor to default %d\n", + DEFAULT_CLOCK_DIVISOR); clk_div = DEFAULT_CLOCK_DIVISOR; of_node_put(np1); goto issue; @@ -183,12 +183,14 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np) clk_div = (host_clock / (MAX_MDIO_FREQ * 2)) - 1; /* If there is any remainder from the division of * fHOST / (MAX_MDIO_FREQ * 2), then we need to add - * 1 to the clock divisor or we will surely be above 2.5 MHz */ + * 1 to the clock divisor or we will surely be above 2.5 MHz + */ if (host_clock % (MAX_MDIO_FREQ * 2)) clk_div++; - printk(KERN_DEBUG "%s(): Setting MDIO clock divisor to %u based " - "on %u Hz host clock.\n", __func__, clk_div, host_clock); + netdev_dbg(lp->ndev, + "Setting MDIO clock divisor to %u/%u Hz host clock.\n", + clk_div, host_clock); of_node_put(np1); issue: diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c new file mode 100644 index 000000000000..b7eafa4c1a67 --- /dev/null +++ b/drivers/net/geneve.c @@ -0,0 +1,503 @@ +/* + * GENEVE: Generic Network Virtualization Encapsulation + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/hash.h> +#include <net/rtnetlink.h> +#include <net/geneve.h> + +#define GENEVE_NETDEV_VER "0.6" + +#define GENEVE_UDP_PORT 6081 + +#define GENEVE_N_VID (1u << 24) +#define GENEVE_VID_MASK (GENEVE_N_VID - 1) + +#define VNI_HASH_BITS 10 +#define VNI_HASH_SIZE (1<<VNI_HASH_BITS) + +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + +/* per-network namespace private data for this module */ +struct geneve_net { + struct list_head geneve_list; + struct hlist_head vni_list[VNI_HASH_SIZE]; +}; + +/* Pseudo network device */ +struct geneve_dev { + struct hlist_node hlist; /* vni hash table */ + struct net *net; /* netns for packet i/o */ + struct net_device *dev; /* netdev for geneve tunnel */ + struct geneve_sock *sock; /* socket used for geneve tunnel */ + u8 vni[3]; /* virtual network ID for tunnel */ + struct sockaddr_in remote; /* IPv4 address for link partner */ + struct list_head next; /* geneve's per namespace list */ +}; + +static int geneve_net_id; + +static inline __u32 geneve_net_vni_hash(u8 vni[3]) +{ + __u32 vnid; + + vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; + return hash_32(vnid, VNI_HASH_BITS); +} + +/* geneve receive/decap routine */ +static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) +{ + struct genevehdr *gnvh = geneve_hdr(skb); + struct geneve_dev *dummy, *geneve = NULL; + struct geneve_net *gn; + struct iphdr *iph = NULL; + struct pcpu_sw_netstats *stats; + struct hlist_head *vni_list_head; + int err = 0; + __u32 hash; + + iph = ip_hdr(skb); /* Still outer IP header... */ + + gn = gs->rcv_data; + + /* Find the device for this VNI */ + hash = geneve_net_vni_hash(gnvh->vni); + vni_list_head = &gn->vni_list[hash]; + hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { + if (!memcmp(gnvh->vni, dummy->vni, sizeof(dummy->vni)) && + iph->saddr == dummy->remote.sin_addr.s_addr) { + geneve = dummy; + break; + } + } + if (!geneve) + goto drop; + + /* Drop packets w/ critical options, + * since we don't support any... + */ + if (gnvh->critical) + goto drop; + + skb_reset_mac_header(skb); + skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev))); + skb->protocol = eth_type_trans(skb, geneve->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + + /* Ignore packet loops (and multicast echo) */ + if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) + goto drop; + + skb_reset_network_header(skb); + + iph = ip_hdr(skb); /* Now inner IP header... */ + err = IP_ECN_decapsulate(iph, skb); + + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++geneve->dev->stats.rx_frame_errors; + ++geneve->dev->stats.rx_errors; + goto drop; + } + } + + stats = this_cpu_ptr(geneve->dev->tstats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += skb->len; + u64_stats_update_end(&stats->syncp); + + netif_rx(skb); + + return; +drop: + /* Consume bad packet */ + kfree_skb(skb); +} + +/* Setup stats when device is created */ +static int geneve_init(struct net_device *dev) +{ + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static void geneve_uninit(struct net_device *dev) +{ + free_percpu(dev->tstats); +} + +static int geneve_open(struct net_device *dev) +{ + struct geneve_dev *geneve = netdev_priv(dev); + struct net *net = geneve->net; + struct geneve_net *gn = net_generic(geneve->net, geneve_net_id); + struct geneve_sock *gs; + + gs = geneve_sock_add(net, htons(GENEVE_UDP_PORT), geneve_rx, gn, + false, false); + if (IS_ERR(gs)) + return PTR_ERR(gs); + + geneve->sock = gs; + + return 0; +} + +static int geneve_stop(struct net_device *dev) +{ + struct geneve_dev *geneve = netdev_priv(dev); + struct geneve_sock *gs = geneve->sock; + + geneve_sock_release(gs); + + return 0; +} + +static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct geneve_dev *geneve = netdev_priv(dev); + struct geneve_sock *gs = geneve->sock; + struct rtable *rt = NULL; + const struct iphdr *iip; /* interior IP header */ + struct flowi4 fl4; + int err; + __be16 sport; + __u8 tos, ttl = 0; + + iip = ip_hdr(skb); + + skb_reset_mac_header(skb); + + /* TODO: port min/max limits should be configurable */ + sport = udp_flow_src_port(dev_net(dev), skb, 0, 0, true); + + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = geneve->remote.sin_addr.s_addr; + rt = ip_route_output_key(geneve->net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (rt->dst.dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); + dev->stats.collisions++; + goto rt_tx_error; + } + + /* TODO: tos and ttl should be configurable */ + + tos = ip_tunnel_ecn_encap(0, iip, skb); + + if (IN_MULTICAST(ntohl(fl4.daddr))) + ttl = 1; + + ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); + + /* no need to handle local destination and encap bypass...yet... */ + + err = geneve_xmit_skb(gs, rt, skb, fl4.saddr, fl4.daddr, + tos, ttl, 0, sport, htons(GENEVE_UDP_PORT), 0, + geneve->vni, 0, NULL, false, + !net_eq(geneve->net, dev_net(geneve->dev))); + if (err < 0) + ip_rt_put(rt); + + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + + return NETDEV_TX_OK; + +rt_tx_error: + ip_rt_put(rt); +tx_error: + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +static const struct net_device_ops geneve_netdev_ops = { + .ndo_init = geneve_init, + .ndo_uninit = geneve_uninit, + .ndo_open = geneve_open, + .ndo_stop = geneve_stop, + .ndo_start_xmit = geneve_xmit, + .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr, +}; + +static void geneve_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); + strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); +} + +static const struct ethtool_ops geneve_ethtool_ops = { + .get_drvinfo = geneve_get_drvinfo, + .get_link = ethtool_op_get_link, +}; + +/* Info for udev, that this is a virtual tunnel endpoint */ +static struct device_type geneve_type = { + .name = "geneve", +}; + +/* Initialize the device structure. */ +static void geneve_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->netdev_ops = &geneve_netdev_ops; + dev->ethtool_ops = &geneve_ethtool_ops; + dev->destructor = free_netdev; + + SET_NETDEV_DEVTYPE(dev, &geneve_type); + + dev->tx_queue_len = 0; + dev->features |= NETIF_F_LLTX; + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; + dev->features |= NETIF_F_RXCSUM; + dev->features |= NETIF_F_GSO_SOFTWARE; + + dev->vlan_features = dev->features; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + + dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + + netif_keep_dst(dev); + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; +} + +static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { + [IFLA_GENEVE_ID] = { .type = NLA_U32 }, + [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, +}; + +static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + if (!data) + return -EINVAL; + + if (data[IFLA_GENEVE_ID]) { + __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); + + if (vni >= GENEVE_VID_MASK) + return -ERANGE; + } + + return 0; +} + +static int geneve_newlink(struct net *net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct geneve_net *gn = net_generic(net, geneve_net_id); + struct geneve_dev *dummy, *geneve = netdev_priv(dev); + struct hlist_head *vni_list_head; + struct sockaddr_in remote; /* IPv4 address for link partner */ + __u32 vni, hash; + int err; + + if (!data[IFLA_GENEVE_ID] || !data[IFLA_GENEVE_REMOTE]) + return -EINVAL; + + geneve->net = net; + geneve->dev = dev; + + vni = nla_get_u32(data[IFLA_GENEVE_ID]); + geneve->vni[0] = (vni & 0x00ff0000) >> 16; + geneve->vni[1] = (vni & 0x0000ff00) >> 8; + geneve->vni[2] = vni & 0x000000ff; + + geneve->remote.sin_addr.s_addr = + nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); + if (IN_MULTICAST(ntohl(geneve->remote.sin_addr.s_addr))) + return -EINVAL; + + remote = geneve->remote; + hash = geneve_net_vni_hash(geneve->vni); + vni_list_head = &gn->vni_list[hash]; + hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { + if (!memcmp(geneve->vni, dummy->vni, sizeof(dummy->vni)) && + !memcmp(&remote, &dummy->remote, sizeof(dummy->remote))) + return -EBUSY; + } + + if (tb[IFLA_ADDRESS] == NULL) + eth_hw_addr_random(dev); + + err = register_netdevice(dev); + if (err) + return err; + + list_add(&geneve->next, &gn->geneve_list); + + hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]); + + return 0; +} + +static void geneve_dellink(struct net_device *dev, struct list_head *head) +{ + struct geneve_dev *geneve = netdev_priv(dev); + + if (!hlist_unhashed(&geneve->hlist)) + hlist_del_rcu(&geneve->hlist); + + list_del(&geneve->next); + unregister_netdevice_queue(dev, head); +} + +static size_t geneve_get_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ + nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */ + 0; +} + +static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct geneve_dev *geneve = netdev_priv(dev); + __u32 vni; + + vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2]; + if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) + goto nla_put_failure; + + if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, + geneve->remote.sin_addr.s_addr)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops geneve_link_ops __read_mostly = { + .kind = "geneve", + .maxtype = IFLA_GENEVE_MAX, + .policy = geneve_policy, + .priv_size = sizeof(struct geneve_dev), + .setup = geneve_setup, + .validate = geneve_validate, + .newlink = geneve_newlink, + .dellink = geneve_dellink, + .get_size = geneve_get_size, + .fill_info = geneve_fill_info, +}; + +static __net_init int geneve_init_net(struct net *net) +{ + struct geneve_net *gn = net_generic(net, geneve_net_id); + unsigned int h; + + INIT_LIST_HEAD(&gn->geneve_list); + + for (h = 0; h < VNI_HASH_SIZE; ++h) + INIT_HLIST_HEAD(&gn->vni_list[h]); + + return 0; +} + +static void __net_exit geneve_exit_net(struct net *net) +{ + struct geneve_net *gn = net_generic(net, geneve_net_id); + struct geneve_dev *geneve, *next; + struct net_device *dev, *aux; + LIST_HEAD(list); + + rtnl_lock(); + + /* gather any geneve devices that were moved into this ns */ + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &geneve_link_ops) + unregister_netdevice_queue(dev, &list); + + /* now gather any other geneve devices that were created in this ns */ + list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { + /* If geneve->dev is in the same netns, it was already added + * to the list by the previous loop. + */ + if (!net_eq(dev_net(geneve->dev), net)) + unregister_netdevice_queue(geneve->dev, &list); + } + + /* unregister the devices gathered above */ + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +static struct pernet_operations geneve_net_ops = { + .init = geneve_init_net, + .exit = geneve_exit_net, + .id = &geneve_net_id, + .size = sizeof(struct geneve_net), +}; + +static int __init geneve_init_module(void) +{ + int rc; + + rc = register_pernet_subsys(&geneve_net_ops); + if (rc) + goto out1; + + rc = rtnl_link_register(&geneve_link_ops); + if (rc) + goto out2; + + return 0; +out2: + unregister_pernet_subsys(&geneve_net_ops); +out1: + return rc; +} +late_initcall(geneve_init_module); + +static void __exit geneve_cleanup_module(void) +{ + rtnl_link_unregister(&geneve_link_ops); + unregister_pernet_subsys(&geneve_net_ops); +} +module_exit(geneve_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_VERSION(GENEVE_NETDEV_VER); +MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); +MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); +MODULE_ALIAS_RTNL_LINK("geneve"); diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 41071d32bc8e..ddcc7f8d22b4 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -611,6 +611,12 @@ struct multi_send_data { u32 count; /* counter of batched packets */ }; +struct netvsc_stats { + u64 packets; + u64 bytes; + struct u64_stats_sync syncp; +}; + /* The context of the netvsc device */ struct net_device_context { /* point back to our device context */ @@ -618,6 +624,9 @@ struct net_device_context { struct delayed_work dwork; struct work_struct work; u32 msg_enable; /* debug level */ + + struct netvsc_stats __percpu *tx_stats; + struct netvsc_stats __percpu *rx_stats; }; /* Per netvsc device */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ea091bc5ff09..b0249685139c 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -743,6 +743,7 @@ static inline int netvsc_send_pkt( u64 req_id; int ret; struct hv_page_buffer *pgbuf; + u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound); nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; if (packet->is_data_pkt) { @@ -769,32 +770,42 @@ static inline int netvsc_send_pkt( if (out_channel->rescind) return -ENODEV; + /* + * It is possible that once we successfully place this packet + * on the ringbuffer, we may stop the queue. In that case, we want + * to notify the host independent of the xmit_more flag. We don't + * need to be precise here; in the worst case we may signal the host + * unnecessarily. + */ + if (ring_avail < (RING_AVAIL_PERCENT_LOWATER + 1)) + packet->xmit_more = false; + if (packet->page_buf_cnt) { pgbuf = packet->cp_partial ? packet->page_buf + packet->rmsg_pgcnt : packet->page_buf; - ret = vmbus_sendpacket_pagebuffer(out_channel, - pgbuf, - packet->page_buf_cnt, - &nvmsg, - sizeof(struct nvsp_message), - req_id); + ret = vmbus_sendpacket_pagebuffer_ctl(out_channel, + pgbuf, + packet->page_buf_cnt, + &nvmsg, + sizeof(struct nvsp_message), + req_id, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED, + !packet->xmit_more); } else { - ret = vmbus_sendpacket( - out_channel, &nvmsg, - sizeof(struct nvsp_message), - req_id, - VM_PKT_DATA_INBAND, - VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + ret = vmbus_sendpacket_ctl(out_channel, &nvmsg, + sizeof(struct nvsp_message), + req_id, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED, + !packet->xmit_more); } if (ret == 0) { atomic_inc(&net_device->num_outstanding_sends); atomic_inc(&net_device->queue_sends[q_idx]); - if (hv_ringbuf_avail_percent(&out_channel->outbound) < - RING_AVAIL_PERCENT_LOWATER) { - netif_tx_stop_queue(netdev_get_tx_queue( - ndev, q_idx)); + if (ring_avail < RING_AVAIL_PERCENT_LOWATER) { + netif_tx_stop_queue(netdev_get_tx_queue(ndev, q_idx)); if (atomic_read(&net_device-> queue_sends[q_idx]) < 1) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5993c7e2d723..d9c88bc09f45 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -196,12 +196,12 @@ static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) struct flow_keys flow; int data_len; - if (!skb_flow_dissect(skb, &flow) || - !(flow.n_proto == htons(ETH_P_IP) || - flow.n_proto == htons(ETH_P_IPV6))) + if (!skb_flow_dissect_flow_keys(skb, &flow) || + !(flow.basic.n_proto == htons(ETH_P_IP) || + flow.basic.n_proto == htons(ETH_P_IPV6))) return false; - if (flow.ip_proto == IPPROTO_TCP) + if (flow.basic.ip_proto == IPPROTO_TCP) data_len = 12; else data_len = 8; @@ -391,7 +391,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) u32 skb_length; u32 pkt_sz; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; - + struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); /* We will atmost need two pages to describe the rndis * header. We can only transmit MAX_PAGE_BUFFER_COUNT number @@ -580,8 +580,10 @@ do_send: drop: if (ret == 0) { - net->stats.tx_bytes += skb_length; - net->stats.tx_packets++; + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->packets++; + tx_stats->bytes += skb_length; + u64_stats_update_end(&tx_stats->syncp); } else { if (ret != -EAGAIN) { dev_kfree_skb_any(skb); @@ -644,13 +646,17 @@ int netvsc_recv_callback(struct hv_device *device_obj, struct ndis_tcp_ip_checksum_info *csum_info) { struct net_device *net; + struct net_device_context *net_device_ctx; struct sk_buff *skb; + struct netvsc_stats *rx_stats; net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev; if (!net || net->reg_state != NETREG_REGISTERED) { packet->status = NVSP_STAT_FAIL; return 0; } + net_device_ctx = netdev_priv(net); + rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); /* Allocate a skb - TODO direct I/O to pages? */ skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); @@ -686,8 +692,10 @@ int netvsc_recv_callback(struct hv_device *device_obj, skb_record_rx_queue(skb, packet->channel-> offermsg.offer.sub_channel_index); - net->stats.rx_packets++; - net->stats.rx_bytes += packet->total_data_buflen; + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->packets++; + rx_stats->bytes += packet->total_data_buflen; + u64_stats_update_end(&rx_stats->syncp); /* * Pass the skb back up. Network stack will deallocate the skb when it @@ -753,6 +761,46 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) return 0; } +static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, + struct rtnl_link_stats64 *t) +{ + struct net_device_context *ndev_ctx = netdev_priv(net); + int cpu; + + for_each_possible_cpu(cpu) { + struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats, + cpu); + struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats, + cpu); + u64 tx_packets, tx_bytes, rx_packets, rx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + tx_packets = tx_stats->packets; + tx_bytes = tx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); + + do { + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + rx_packets = rx_stats->packets; + rx_bytes = rx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); + + t->tx_bytes += tx_bytes; + t->tx_packets += tx_packets; + t->rx_bytes += rx_bytes; + t->rx_packets += rx_packets; + } + + t->tx_dropped = net->stats.tx_dropped; + t->tx_errors = net->stats.tx_dropped; + + t->rx_dropped = net->stats.rx_dropped; + t->rx_errors = net->stats.rx_errors; + + return t; +} static int netvsc_set_mac_addr(struct net_device *ndev, void *p) { @@ -804,6 +852,7 @@ static const struct net_device_ops device_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = netvsc_set_mac_addr, .ndo_select_queue = netvsc_select_queue, + .ndo_get_stats64 = netvsc_get_stats64, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = netvsc_poll_controller, #endif @@ -855,6 +904,14 @@ static void netvsc_link_change(struct work_struct *w) netdev_notify_peers(net); } +static void netvsc_free_netdev(struct net_device *netdev) +{ + struct net_device_context *net_device_ctx = netdev_priv(netdev); + + free_percpu(net_device_ctx->tx_stats); + free_percpu(net_device_ctx->rx_stats); + free_netdev(netdev); +} static int netvsc_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) @@ -883,6 +940,18 @@ static int netvsc_probe(struct hv_device *dev, netdev_dbg(net, "netvsc msg_enable: %d\n", net_device_ctx->msg_enable); + net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats); + if (!net_device_ctx->tx_stats) { + free_netdev(net); + return -ENOMEM; + } + net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats); + if (!net_device_ctx->rx_stats) { + free_percpu(net_device_ctx->tx_stats); + free_netdev(net); + return -ENOMEM; + } + hv_set_drvdata(dev, net); INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); INIT_WORK(&net_device_ctx->work, do_set_multicast); @@ -909,7 +978,7 @@ static int netvsc_probe(struct hv_device *dev, ret = rndis_filter_device_add(dev, &device_info); if (ret != 0) { netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); - free_netdev(net); + netvsc_free_netdev(net); hv_set_drvdata(dev, NULL); return ret; } @@ -923,7 +992,7 @@ static int netvsc_probe(struct hv_device *dev, if (ret != 0) { pr_err("Unable to register netdev.\n"); rndis_filter_device_remove(dev); - free_netdev(net); + netvsc_free_netdev(net); } else { schedule_delayed_work(&net_device_ctx->dwork, 0); } @@ -962,7 +1031,7 @@ static int netvsc_remove(struct hv_device *dev) */ rndis_filter_device_remove(dev); - free_netdev(net); + netvsc_free_netdev(net); return 0; } diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 54549a6223dd..953a97492fab 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -39,6 +39,8 @@ #define IPVLAN_MAC_FILTER_SIZE (1 << IPVLAN_MAC_FILTER_BITS) #define IPVLAN_MAC_FILTER_MASK (IPVLAN_MAC_FILTER_SIZE - 1) +#define IPVLAN_QBACKLOG_LIMIT 1000 + typedef enum { IPVL_IPV6 = 0, IPVL_ICMPV6, @@ -93,6 +95,8 @@ struct ipvl_port { struct hlist_head hlhead[IPVLAN_HASH_SIZE]; struct list_head ipvlans; struct rcu_head rcu; + struct work_struct wq; + struct sk_buff_head backlog; int count; u16 mode; }; @@ -112,6 +116,7 @@ void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval); void ipvlan_init_secret(void); unsigned int ipvlan_mac_hash(const unsigned char *addr); rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb); +void ipvlan_process_multicast(struct work_struct *work); int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev); void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr); struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index c30b5c300c05..8afbedad620d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -189,62 +189,69 @@ unsigned int ipvlan_mac_hash(const unsigned char *addr) return hash & IPVLAN_MAC_FILTER_MASK; } -static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb, - const struct ipvl_dev *in_dev, bool local) +void ipvlan_process_multicast(struct work_struct *work) { - struct ethhdr *eth = eth_hdr(skb); + struct ipvl_port *port = container_of(work, struct ipvl_port, wq); + struct ethhdr *ethh; struct ipvl_dev *ipvlan; - struct sk_buff *nskb; + struct sk_buff *skb, *nskb; + struct sk_buff_head list; unsigned int len; unsigned int mac_hash; int ret; + u8 pkt_type; + bool hlocal, dlocal; - if (skb->protocol == htons(ETH_P_PAUSE)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { - if (local && (ipvlan == in_dev)) - continue; + __skb_queue_head_init(&list); - mac_hash = ipvlan_mac_hash(eth->h_dest); - if (!test_bit(mac_hash, ipvlan->mac_filters)) - continue; + spin_lock_bh(&port->backlog.lock); + skb_queue_splice_tail_init(&port->backlog, &list); + spin_unlock_bh(&port->backlog.lock); - ret = NET_RX_DROP; - len = skb->len + ETH_HLEN; - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - goto mcast_acct; + while ((skb = __skb_dequeue(&list)) != NULL) { + ethh = eth_hdr(skb); + hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr); + mac_hash = ipvlan_mac_hash(ethh->h_dest); - if (ether_addr_equal(eth->h_dest, ipvlan->phy_dev->broadcast)) - nskb->pkt_type = PACKET_BROADCAST; + if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) + pkt_type = PACKET_BROADCAST; else - nskb->pkt_type = PACKET_MULTICAST; - - nskb->dev = ipvlan->dev; - if (local) - ret = dev_forward_skb(ipvlan->dev, nskb); - else - ret = netif_rx(nskb); -mcast_acct: - ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); - } - rcu_read_unlock(); - - /* Locally generated? ...Forward a copy to the main-device as - * well. On the RX side we'll ignore it (wont give it to any - * of the virtual devices. - */ - if (local) { - nskb = skb_clone(skb, GFP_ATOMIC); - if (nskb) { - if (ether_addr_equal(eth->h_dest, port->dev->broadcast)) - nskb->pkt_type = PACKET_BROADCAST; + pkt_type = PACKET_MULTICAST; + + dlocal = false; + rcu_read_lock(); + list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { + if (hlocal && (ipvlan->dev == skb->dev)) { + dlocal = true; + continue; + } + if (!test_bit(mac_hash, ipvlan->mac_filters)) + continue; + + ret = NET_RX_DROP; + len = skb->len + ETH_HLEN; + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + goto acct; + + nskb->pkt_type = pkt_type; + nskb->dev = ipvlan->dev; + if (hlocal) + ret = dev_forward_skb(ipvlan->dev, nskb); else - nskb->pkt_type = PACKET_MULTICAST; - - dev_forward_skb(port->dev, nskb); + ret = netif_rx(nskb); +acct: + ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); + } + rcu_read_unlock(); + + if (dlocal) { + /* If the packet originated here, send it out. */ + skb->dev = port->dev; + skb->pkt_type = pkt_type; + dev_queue_xmit(skb); + } else { + kfree_skb(skb); } } } @@ -446,6 +453,26 @@ out: return ret; } +static void ipvlan_multicast_enqueue(struct ipvl_port *port, + struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_PAUSE)) { + kfree_skb(skb); + return; + } + + spin_lock(&port->backlog.lock); + if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { + __skb_queue_tail(&port->backlog, skb); + spin_unlock(&port->backlog.lock); + schedule_work(&port->wq); + } else { + spin_unlock(&port->backlog.lock); + atomic_long_inc(&skb->dev->rx_dropped); + kfree_skb(skb); + } +} + static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) { const struct ipvl_dev *ipvlan = netdev_priv(dev); @@ -493,11 +520,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) return dev_forward_skb(ipvlan->phy_dev, skb); } else if (is_multicast_ether_addr(eth->h_dest)) { - u8 ip_summed = skb->ip_summed; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - ipvlan_multicast_frame(ipvlan->port, skb, ipvlan, true); - skb->ip_summed = ip_summed; + ipvlan_multicast_enqueue(ipvlan->port, skb); + return NET_XMIT_SUCCESS; } skb->dev = ipvlan->phy_dev; @@ -581,8 +605,18 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, int addr_type; if (is_multicast_ether_addr(eth->h_dest)) { - if (ipvlan_external_frame(skb, port)) - ipvlan_multicast_frame(port, skb, NULL, false); + if (ipvlan_external_frame(skb, port)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + /* External frames are queued for device local + * distribution, but a copy is given to master + * straight away to avoid sending duplicates later + * when work-queue processes this frame. This is + * achieved by returning RX_HANDLER_PASS. + */ + if (nskb) + ipvlan_multicast_enqueue(port, nskb); + } } else { struct ipvl_addr *addr; diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 77b92a0fe557..1acc283160d9 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -54,6 +54,9 @@ static int ipvlan_port_create(struct net_device *dev) for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) INIT_HLIST_HEAD(&port->hlhead[idx]); + skb_queue_head_init(&port->backlog); + INIT_WORK(&port->wq, ipvlan_process_multicast); + err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port); if (err) goto err; @@ -72,6 +75,8 @@ static void ipvlan_port_destroy(struct net_device *dev) dev->priv_flags &= ~IFF_IPVLAN_MASTER; netdev_rx_handler_unregister(dev); + cancel_work_sync(&port->wq); + __skb_queue_purge(&port->backlog); kfree_rcu(port, rcu); } @@ -213,17 +218,6 @@ static void ipvlan_change_rx_flags(struct net_device *dev, int change) dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1); } -static void ipvlan_set_broadcast_mac_filter(struct ipvl_dev *ipvlan, bool set) -{ - struct net_device *dev = ipvlan->dev; - unsigned int hashbit = ipvlan_mac_hash(dev->broadcast); - - if (set && !test_bit(hashbit, ipvlan->mac_filters)) - __set_bit(hashbit, ipvlan->mac_filters); - else if (!set && test_bit(hashbit, ipvlan->mac_filters)) - __clear_bit(hashbit, ipvlan->mac_filters); -} - static void ipvlan_set_multicast_mac_filter(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); @@ -238,6 +232,12 @@ static void ipvlan_set_multicast_mac_filter(struct net_device *dev) netdev_for_each_mc_addr(ha, dev) __set_bit(ipvlan_mac_hash(ha->addr), mc_filters); + /* Turn-on broadcast bit irrespective of address family, + * since broadcast is deferred to a work-queue, hence no + * impact on fast-path processing. + */ + __set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters); + bitmap_copy(ipvlan->mac_filters, mc_filters, IPVLAN_MAC_FILTER_SIZE); } @@ -705,7 +705,6 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) */ if (netif_running(ipvlan->dev)) ipvlan_ht_addr_add(ipvlan, addr); - ipvlan_set_broadcast_mac_filter(ipvlan, true); return 0; } @@ -722,8 +721,6 @@ static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) list_del(&addr->anode); ipvlan->ipv4cnt--; WARN_ON(ipvlan->ipv4cnt < 0); - if (!ipvlan->ipv4cnt) - ipvlan_set_broadcast_mac_filter(ipvlan, false); kfree_rcu(addr, rcu); return; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 8c350c5d54ad..483afb19596c 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -476,7 +476,7 @@ static int macvtap_open(struct inode *inode, struct file *file) err = -ENOMEM; q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &macvtap_proto); + &macvtap_proto, 0); if (!q) goto out; @@ -1006,6 +1006,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, unsigned int __user *up = argp; unsigned short u; int __user *sp = argp; + struct sockaddr sa; int s; int ret; @@ -1101,6 +1102,37 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, rtnl_unlock(); return ret; + case SIOCGIFHWADDR: + rtnl_lock(); + vlan = macvtap_get_vlan(q); + if (!vlan) { + rtnl_unlock(); + return -ENOLINK; + } + ret = 0; + u = vlan->dev->type; + if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) || + copy_to_user(&ifr->ifr_hwaddr.sa_data, vlan->dev->dev_addr, ETH_ALEN) || + put_user(u, &ifr->ifr_hwaddr.sa_family)) + ret = -EFAULT; + macvtap_put_vlan(vlan); + rtnl_unlock(); + return ret; + + case SIOCSIFHWADDR: + if (copy_from_user(&sa, &ifr->ifr_hwaddr, sizeof(sa))) + return -EFAULT; + rtnl_lock(); + vlan = macvtap_get_vlan(q); + if (!vlan) { + rtnl_unlock(); + return -ENOLINK; + } + ret = dev_set_mac_address(vlan->dev, &sa); + macvtap_put_vlan(vlan); + rtnl_unlock(); + return ret; + default: return -EINVAL; } diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 70641d2c0429..7c0cb87d1f2f 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -24,13 +24,6 @@ config AMD_PHY ---help--- Currently supports the am79c874 -config AMD_XGBE_PHY - tristate "Driver for the AMD 10GbE (amd-xgbe) PHYs" - depends on (OF || ACPI) && HAS_IOMEM - depends on ARM64 || COMPILE_TEST - ---help--- - Currently supports the AMD 10GbE PHY - config MARVELL_PHY tristate "Drivers for Marvell PHYs" ---help--- diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 501ea7699a2d..e97e7f921862 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -33,5 +33,4 @@ obj-$(CONFIG_MDIO_BUS_MUX_GPIO) += mdio-mux-gpio.o obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o -obj-$(CONFIG_AMD_XGBE_PHY) += amd-xgbe-phy.o obj-$(CONFIG_MDIO_BCM_UNIMAC) += mdio-bcm-unimac.o diff --git a/drivers/net/phy/amd-xgbe-phy.c b/drivers/net/phy/amd-xgbe-phy.c deleted file mode 100644 index fb276f64cd64..000000000000 --- a/drivers/net/phy/amd-xgbe-phy.c +++ /dev/null @@ -1,1862 +0,0 @@ -/* - * AMD 10Gb Ethernet PHY driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/kernel.h> -#include <linux/device.h> -#include <linux/platform_device.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/unistd.h> -#include <linux/slab.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/delay.h> -#include <linux/workqueue.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/mii.h> -#include <linux/ethtool.h> -#include <linux/phy.h> -#include <linux/mdio.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/of_platform.h> -#include <linux/of_device.h> -#include <linux/uaccess.h> -#include <linux/bitops.h> -#include <linux/property.h> -#include <linux/acpi.h> -#include <linux/jiffies.h> - -MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION("1.0.0-a"); -MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver"); - -#define XGBE_PHY_ID 0x000162d0 -#define XGBE_PHY_MASK 0xfffffff0 - -#define XGBE_PHY_SPEEDSET_PROPERTY "amd,speed-set" -#define XGBE_PHY_BLWC_PROPERTY "amd,serdes-blwc" -#define XGBE_PHY_CDR_RATE_PROPERTY "amd,serdes-cdr-rate" -#define XGBE_PHY_PQ_SKEW_PROPERTY "amd,serdes-pq-skew" -#define XGBE_PHY_TX_AMP_PROPERTY "amd,serdes-tx-amp" -#define XGBE_PHY_DFE_CFG_PROPERTY "amd,serdes-dfe-tap-config" -#define XGBE_PHY_DFE_ENA_PROPERTY "amd,serdes-dfe-tap-enable" - -#define XGBE_PHY_SPEEDS 3 -#define XGBE_PHY_SPEED_1000 0 -#define XGBE_PHY_SPEED_2500 1 -#define XGBE_PHY_SPEED_10000 2 - -#define XGBE_AN_MS_TIMEOUT 500 - -#define XGBE_AN_INT_CMPLT 0x01 -#define XGBE_AN_INC_LINK 0x02 -#define XGBE_AN_PG_RCV 0x04 -#define XGBE_AN_INT_MASK 0x07 - -#define XNP_MCF_NULL_MESSAGE 0x001 -#define XNP_ACK_PROCESSED BIT(12) -#define XNP_MP_FORMATTED BIT(13) -#define XNP_NP_EXCHANGE BIT(15) - -#define XGBE_PHY_RATECHANGE_COUNT 500 - -#define XGBE_PHY_KR_TRAINING_START 0x01 -#define XGBE_PHY_KR_TRAINING_ENABLE 0x02 - -#define XGBE_PHY_FEC_ENABLE 0x01 -#define XGBE_PHY_FEC_FORWARD 0x02 -#define XGBE_PHY_FEC_MASK 0x03 - -#ifndef MDIO_PMA_10GBR_PMD_CTRL -#define MDIO_PMA_10GBR_PMD_CTRL 0x0096 -#endif - -#ifndef MDIO_PMA_10GBR_FEC_ABILITY -#define MDIO_PMA_10GBR_FEC_ABILITY 0x00aa -#endif - -#ifndef MDIO_PMA_10GBR_FEC_CTRL -#define MDIO_PMA_10GBR_FEC_CTRL 0x00ab -#endif - -#ifndef MDIO_AN_XNP -#define MDIO_AN_XNP 0x0016 -#endif - -#ifndef MDIO_AN_LPX -#define MDIO_AN_LPX 0x0019 -#endif - -#ifndef MDIO_AN_INTMASK -#define MDIO_AN_INTMASK 0x8001 -#endif - -#ifndef MDIO_AN_INT -#define MDIO_AN_INT 0x8002 -#endif - -#ifndef MDIO_CTRL1_SPEED1G -#define MDIO_CTRL1_SPEED1G (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100) -#endif - -/* SerDes integration register offsets */ -#define SIR0_KR_RT_1 0x002c -#define SIR0_STATUS 0x0040 -#define SIR1_SPEED 0x0000 - -/* SerDes integration register entry bit positions and sizes */ -#define SIR0_KR_RT_1_RESET_INDEX 11 -#define SIR0_KR_RT_1_RESET_WIDTH 1 -#define SIR0_STATUS_RX_READY_INDEX 0 -#define SIR0_STATUS_RX_READY_WIDTH 1 -#define SIR0_STATUS_TX_READY_INDEX 8 -#define SIR0_STATUS_TX_READY_WIDTH 1 -#define SIR1_SPEED_CDR_RATE_INDEX 12 -#define SIR1_SPEED_CDR_RATE_WIDTH 4 -#define SIR1_SPEED_DATARATE_INDEX 4 -#define SIR1_SPEED_DATARATE_WIDTH 2 -#define SIR1_SPEED_PLLSEL_INDEX 3 -#define SIR1_SPEED_PLLSEL_WIDTH 1 -#define SIR1_SPEED_RATECHANGE_INDEX 6 -#define SIR1_SPEED_RATECHANGE_WIDTH 1 -#define SIR1_SPEED_TXAMP_INDEX 8 -#define SIR1_SPEED_TXAMP_WIDTH 4 -#define SIR1_SPEED_WORDMODE_INDEX 0 -#define SIR1_SPEED_WORDMODE_WIDTH 3 - -#define SPEED_10000_BLWC 0 -#define SPEED_10000_CDR 0x7 -#define SPEED_10000_PLL 0x1 -#define SPEED_10000_PQ 0x12 -#define SPEED_10000_RATE 0x0 -#define SPEED_10000_TXAMP 0xa -#define SPEED_10000_WORD 0x7 -#define SPEED_10000_DFE_TAP_CONFIG 0x1 -#define SPEED_10000_DFE_TAP_ENABLE 0x7f - -#define SPEED_2500_BLWC 1 -#define SPEED_2500_CDR 0x2 -#define SPEED_2500_PLL 0x0 -#define SPEED_2500_PQ 0xa -#define SPEED_2500_RATE 0x1 -#define SPEED_2500_TXAMP 0xf -#define SPEED_2500_WORD 0x1 -#define SPEED_2500_DFE_TAP_CONFIG 0x3 -#define SPEED_2500_DFE_TAP_ENABLE 0x0 - -#define SPEED_1000_BLWC 1 -#define SPEED_1000_CDR 0x2 -#define SPEED_1000_PLL 0x0 -#define SPEED_1000_PQ 0xa -#define SPEED_1000_RATE 0x3 -#define SPEED_1000_TXAMP 0xf -#define SPEED_1000_WORD 0x1 -#define SPEED_1000_DFE_TAP_CONFIG 0x3 -#define SPEED_1000_DFE_TAP_ENABLE 0x0 - -/* SerDes RxTx register offsets */ -#define RXTX_REG6 0x0018 -#define RXTX_REG20 0x0050 -#define RXTX_REG22 0x0058 -#define RXTX_REG114 0x01c8 -#define RXTX_REG129 0x0204 - -/* SerDes RxTx register entry bit positions and sizes */ -#define RXTX_REG6_RESETB_RXD_INDEX 8 -#define RXTX_REG6_RESETB_RXD_WIDTH 1 -#define RXTX_REG20_BLWC_ENA_INDEX 2 -#define RXTX_REG20_BLWC_ENA_WIDTH 1 -#define RXTX_REG114_PQ_REG_INDEX 9 -#define RXTX_REG114_PQ_REG_WIDTH 7 -#define RXTX_REG129_RXDFE_CONFIG_INDEX 14 -#define RXTX_REG129_RXDFE_CONFIG_WIDTH 2 - -/* Bit setting and getting macros - * The get macro will extract the current bit field value from within - * the variable - * - * The set macro will clear the current bit field value within the - * variable and then set the bit field of the variable to the - * specified value - */ -#define GET_BITS(_var, _index, _width) \ - (((_var) >> (_index)) & ((0x1 << (_width)) - 1)) - -#define SET_BITS(_var, _index, _width, _val) \ -do { \ - (_var) &= ~(((0x1 << (_width)) - 1) << (_index)); \ - (_var) |= (((_val) & ((0x1 << (_width)) - 1)) << (_index)); \ -} while (0) - -#define XSIR_GET_BITS(_var, _prefix, _field) \ - GET_BITS((_var), \ - _prefix##_##_field##_INDEX, \ - _prefix##_##_field##_WIDTH) - -#define XSIR_SET_BITS(_var, _prefix, _field, _val) \ - SET_BITS((_var), \ - _prefix##_##_field##_INDEX, \ - _prefix##_##_field##_WIDTH, (_val)) - -/* Macros for reading or writing SerDes integration registers - * The ioread macros will get bit fields or full values using the - * register definitions formed using the input names - * - * The iowrite macros will set bit fields or full values using the - * register definitions formed using the input names - */ -#define XSIR0_IOREAD(_priv, _reg) \ - ioread16((_priv)->sir0_regs + _reg) - -#define XSIR0_IOREAD_BITS(_priv, _reg, _field) \ - GET_BITS(XSIR0_IOREAD((_priv), _reg), \ - _reg##_##_field##_INDEX, \ - _reg##_##_field##_WIDTH) - -#define XSIR0_IOWRITE(_priv, _reg, _val) \ - iowrite16((_val), (_priv)->sir0_regs + _reg) - -#define XSIR0_IOWRITE_BITS(_priv, _reg, _field, _val) \ -do { \ - u16 reg_val = XSIR0_IOREAD((_priv), _reg); \ - SET_BITS(reg_val, \ - _reg##_##_field##_INDEX, \ - _reg##_##_field##_WIDTH, (_val)); \ - XSIR0_IOWRITE((_priv), _reg, reg_val); \ -} while (0) - -#define XSIR1_IOREAD(_priv, _reg) \ - ioread16((_priv)->sir1_regs + _reg) - -#define XSIR1_IOREAD_BITS(_priv, _reg, _field) \ - GET_BITS(XSIR1_IOREAD((_priv), _reg), \ - _reg##_##_field##_INDEX, \ - _reg##_##_field##_WIDTH) - -#define XSIR1_IOWRITE(_priv, _reg, _val) \ - iowrite16((_val), (_priv)->sir1_regs + _reg) - -#define XSIR1_IOWRITE_BITS(_priv, _reg, _field, _val) \ -do { \ - u16 reg_val = XSIR1_IOREAD((_priv), _reg); \ - SET_BITS(reg_val, \ - _reg##_##_field##_INDEX, \ - _reg##_##_field##_WIDTH, (_val)); \ - XSIR1_IOWRITE((_priv), _reg, reg_val); \ -} while (0) - -/* Macros for reading or writing SerDes RxTx registers - * The ioread macros will get bit fields or full values using the - * register definitions formed using the input names - * - * The iowrite macros will set bit fields or full values using the - * register definitions formed using the input names - */ -#define XRXTX_IOREAD(_priv, _reg) \ - ioread16((_priv)->rxtx_regs + _reg) - -#define XRXTX_IOREAD_BITS(_priv, _reg, _field) \ - GET_BITS(XRXTX_IOREAD((_priv), _reg), \ - _reg##_##_field##_INDEX, \ - _reg##_##_field##_WIDTH) - -#define XRXTX_IOWRITE(_priv, _reg, _val) \ - iowrite16((_val), (_priv)->rxtx_regs + _reg) - -#define XRXTX_IOWRITE_BITS(_priv, _reg, _field, _val) \ -do { \ - u16 reg_val = XRXTX_IOREAD((_priv), _reg); \ - SET_BITS(reg_val, \ - _reg##_##_field##_INDEX, \ - _reg##_##_field##_WIDTH, (_val)); \ - XRXTX_IOWRITE((_priv), _reg, reg_val); \ -} while (0) - -static const u32 amd_xgbe_phy_serdes_blwc[] = { - SPEED_1000_BLWC, - SPEED_2500_BLWC, - SPEED_10000_BLWC, -}; - -static const u32 amd_xgbe_phy_serdes_cdr_rate[] = { - SPEED_1000_CDR, - SPEED_2500_CDR, - SPEED_10000_CDR, -}; - -static const u32 amd_xgbe_phy_serdes_pq_skew[] = { - SPEED_1000_PQ, - SPEED_2500_PQ, - SPEED_10000_PQ, -}; - -static const u32 amd_xgbe_phy_serdes_tx_amp[] = { - SPEED_1000_TXAMP, - SPEED_2500_TXAMP, - SPEED_10000_TXAMP, -}; - -static const u32 amd_xgbe_phy_serdes_dfe_tap_cfg[] = { - SPEED_1000_DFE_TAP_CONFIG, - SPEED_2500_DFE_TAP_CONFIG, - SPEED_10000_DFE_TAP_CONFIG, -}; - -static const u32 amd_xgbe_phy_serdes_dfe_tap_ena[] = { - SPEED_1000_DFE_TAP_ENABLE, - SPEED_2500_DFE_TAP_ENABLE, - SPEED_10000_DFE_TAP_ENABLE, -}; - -enum amd_xgbe_phy_an { - AMD_XGBE_AN_READY = 0, - AMD_XGBE_AN_PAGE_RECEIVED, - AMD_XGBE_AN_INCOMPAT_LINK, - AMD_XGBE_AN_COMPLETE, - AMD_XGBE_AN_NO_LINK, - AMD_XGBE_AN_ERROR, -}; - -enum amd_xgbe_phy_rx { - AMD_XGBE_RX_BPA = 0, - AMD_XGBE_RX_XNP, - AMD_XGBE_RX_COMPLETE, - AMD_XGBE_RX_ERROR, -}; - -enum amd_xgbe_phy_mode { - AMD_XGBE_MODE_KR, - AMD_XGBE_MODE_KX, -}; - -enum amd_xgbe_phy_speedset { - AMD_XGBE_PHY_SPEEDSET_1000_10000 = 0, - AMD_XGBE_PHY_SPEEDSET_2500_10000, -}; - -struct amd_xgbe_phy_priv { - struct platform_device *pdev; - struct acpi_device *adev; - struct device *dev; - - struct phy_device *phydev; - - /* SerDes related mmio resources */ - struct resource *rxtx_res; - struct resource *sir0_res; - struct resource *sir1_res; - - /* SerDes related mmio registers */ - void __iomem *rxtx_regs; /* SerDes Rx/Tx CSRs */ - void __iomem *sir0_regs; /* SerDes integration registers (1/2) */ - void __iomem *sir1_regs; /* SerDes integration registers (2/2) */ - - int an_irq; - char an_irq_name[IFNAMSIZ + 32]; - struct work_struct an_irq_work; - unsigned int an_irq_allocated; - - unsigned int speed_set; - - /* SerDes UEFI configurable settings. - * Switching between modes/speeds requires new values for some - * SerDes settings. The values can be supplied as device - * properties in array format. The first array entry is for - * 1GbE, second for 2.5GbE and third for 10GbE - */ - u32 serdes_blwc[XGBE_PHY_SPEEDS]; - u32 serdes_cdr_rate[XGBE_PHY_SPEEDS]; - u32 serdes_pq_skew[XGBE_PHY_SPEEDS]; - u32 serdes_tx_amp[XGBE_PHY_SPEEDS]; - u32 serdes_dfe_tap_cfg[XGBE_PHY_SPEEDS]; - u32 serdes_dfe_tap_ena[XGBE_PHY_SPEEDS]; - - /* Auto-negotiation state machine support */ - struct mutex an_mutex; - enum amd_xgbe_phy_an an_result; - enum amd_xgbe_phy_an an_state; - enum amd_xgbe_phy_rx kr_state; - enum amd_xgbe_phy_rx kx_state; - struct work_struct an_work; - struct workqueue_struct *an_workqueue; - unsigned int an_supported; - unsigned int parallel_detect; - unsigned int fec_ability; - unsigned long an_start; - - unsigned int lpm_ctrl; /* CTRL1 for resume */ -}; - -static int amd_xgbe_an_enable_kr_training(struct phy_device *phydev) -{ - int ret; - - ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); - if (ret < 0) - return ret; - - ret |= XGBE_PHY_KR_TRAINING_ENABLE; - phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, ret); - - return 0; -} - -static int amd_xgbe_an_disable_kr_training(struct phy_device *phydev) -{ - int ret; - - ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); - if (ret < 0) - return ret; - - ret &= ~XGBE_PHY_KR_TRAINING_ENABLE; - phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, ret); - - return 0; -} - -static int amd_xgbe_phy_pcs_power_cycle(struct phy_device *phydev) -{ - int ret; - - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1); - if (ret < 0) - return ret; - - ret |= MDIO_CTRL1_LPOWER; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret); - - usleep_range(75, 100); - - ret &= ~MDIO_CTRL1_LPOWER; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret); - - return 0; -} - -static void amd_xgbe_phy_serdes_start_ratechange(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - - /* Assert Rx and Tx ratechange */ - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, RATECHANGE, 1); -} - -static void amd_xgbe_phy_serdes_complete_ratechange(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - unsigned int wait; - u16 status; - - /* Release Rx and Tx ratechange */ - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, RATECHANGE, 0); - - /* Wait for Rx and Tx ready */ - wait = XGBE_PHY_RATECHANGE_COUNT; - while (wait--) { - usleep_range(50, 75); - - status = XSIR0_IOREAD(priv, SIR0_STATUS); - if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) && - XSIR_GET_BITS(status, SIR0_STATUS, TX_READY)) - goto rx_reset; - } - - netdev_dbg(phydev->attached_dev, "SerDes rx/tx not ready (%#hx)\n", - status); - -rx_reset: - /* Perform Rx reset for the DFE changes */ - XRXTX_IOWRITE_BITS(priv, RXTX_REG6, RESETB_RXD, 0); - XRXTX_IOWRITE_BITS(priv, RXTX_REG6, RESETB_RXD, 1); -} - -static int amd_xgbe_phy_xgmii_mode(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - int ret; - - /* Enable KR training */ - ret = amd_xgbe_an_enable_kr_training(phydev); - if (ret < 0) - return ret; - - /* Set PCS to KR/10G speed */ - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2); - if (ret < 0) - return ret; - - ret &= ~MDIO_PCS_CTRL2_TYPE; - ret |= MDIO_PCS_CTRL2_10GBR; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2, ret); - - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1); - if (ret < 0) - return ret; - - ret &= ~MDIO_CTRL1_SPEEDSEL; - ret |= MDIO_CTRL1_SPEED10G; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret); - - ret = amd_xgbe_phy_pcs_power_cycle(phydev); - if (ret < 0) - return ret; - - /* Set SerDes to 10G speed */ - amd_xgbe_phy_serdes_start_ratechange(phydev); - - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, DATARATE, SPEED_10000_RATE); - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, WORDMODE, SPEED_10000_WORD); - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, PLLSEL, SPEED_10000_PLL); - - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, CDR_RATE, - priv->serdes_cdr_rate[XGBE_PHY_SPEED_10000]); - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, TXAMP, - priv->serdes_tx_amp[XGBE_PHY_SPEED_10000]); - XRXTX_IOWRITE_BITS(priv, RXTX_REG20, BLWC_ENA, - priv->serdes_blwc[XGBE_PHY_SPEED_10000]); - XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, - priv->serdes_pq_skew[XGBE_PHY_SPEED_10000]); - XRXTX_IOWRITE_BITS(priv, RXTX_REG129, RXDFE_CONFIG, - priv->serdes_dfe_tap_cfg[XGBE_PHY_SPEED_10000]); - XRXTX_IOWRITE(priv, RXTX_REG22, - priv->serdes_dfe_tap_ena[XGBE_PHY_SPEED_10000]); - - amd_xgbe_phy_serdes_complete_ratechange(phydev); - - return 0; -} - -static int amd_xgbe_phy_gmii_2500_mode(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - int ret; - - /* Disable KR training */ - ret = amd_xgbe_an_disable_kr_training(phydev); - if (ret < 0) - return ret; - - /* Set PCS to KX/1G speed */ - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2); - if (ret < 0) - return ret; - - ret &= ~MDIO_PCS_CTRL2_TYPE; - ret |= MDIO_PCS_CTRL2_10GBX; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2, ret); - - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1); - if (ret < 0) - return ret; - - ret &= ~MDIO_CTRL1_SPEEDSEL; - ret |= MDIO_CTRL1_SPEED1G; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret); - - ret = amd_xgbe_phy_pcs_power_cycle(phydev); - if (ret < 0) - return ret; - - /* Set SerDes to 2.5G speed */ - amd_xgbe_phy_serdes_start_ratechange(phydev); - - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, DATARATE, SPEED_2500_RATE); - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, WORDMODE, SPEED_2500_WORD); - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, PLLSEL, SPEED_2500_PLL); - - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, CDR_RATE, - priv->serdes_cdr_rate[XGBE_PHY_SPEED_2500]); - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, TXAMP, - priv->serdes_tx_amp[XGBE_PHY_SPEED_2500]); - XRXTX_IOWRITE_BITS(priv, RXTX_REG20, BLWC_ENA, - priv->serdes_blwc[XGBE_PHY_SPEED_2500]); - XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, - priv->serdes_pq_skew[XGBE_PHY_SPEED_2500]); - XRXTX_IOWRITE_BITS(priv, RXTX_REG129, RXDFE_CONFIG, - priv->serdes_dfe_tap_cfg[XGBE_PHY_SPEED_2500]); - XRXTX_IOWRITE(priv, RXTX_REG22, - priv->serdes_dfe_tap_ena[XGBE_PHY_SPEED_2500]); - - amd_xgbe_phy_serdes_complete_ratechange(phydev); - - return 0; -} - -static int amd_xgbe_phy_gmii_mode(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - int ret; - - /* Disable KR training */ - ret = amd_xgbe_an_disable_kr_training(phydev); - if (ret < 0) - return ret; - - /* Set PCS to KX/1G speed */ - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2); - if (ret < 0) - return ret; - - ret &= ~MDIO_PCS_CTRL2_TYPE; - ret |= MDIO_PCS_CTRL2_10GBX; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2, ret); - - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1); - if (ret < 0) - return ret; - - ret &= ~MDIO_CTRL1_SPEEDSEL; - ret |= MDIO_CTRL1_SPEED1G; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret); - - ret = amd_xgbe_phy_pcs_power_cycle(phydev); - if (ret < 0) - return ret; - - /* Set SerDes to 1G speed */ - amd_xgbe_phy_serdes_start_ratechange(phydev); - - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, DATARATE, SPEED_1000_RATE); - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, WORDMODE, SPEED_1000_WORD); - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, PLLSEL, SPEED_1000_PLL); - - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, CDR_RATE, - priv->serdes_cdr_rate[XGBE_PHY_SPEED_1000]); - XSIR1_IOWRITE_BITS(priv, SIR1_SPEED, TXAMP, - priv->serdes_tx_amp[XGBE_PHY_SPEED_1000]); - XRXTX_IOWRITE_BITS(priv, RXTX_REG20, BLWC_ENA, - priv->serdes_blwc[XGBE_PHY_SPEED_1000]); - XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, - priv->serdes_pq_skew[XGBE_PHY_SPEED_1000]); - XRXTX_IOWRITE_BITS(priv, RXTX_REG129, RXDFE_CONFIG, - priv->serdes_dfe_tap_cfg[XGBE_PHY_SPEED_1000]); - XRXTX_IOWRITE(priv, RXTX_REG22, - priv->serdes_dfe_tap_ena[XGBE_PHY_SPEED_1000]); - - amd_xgbe_phy_serdes_complete_ratechange(phydev); - - return 0; -} - -static int amd_xgbe_phy_cur_mode(struct phy_device *phydev, - enum amd_xgbe_phy_mode *mode) -{ - int ret; - - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL2); - if (ret < 0) - return ret; - - if ((ret & MDIO_PCS_CTRL2_TYPE) == MDIO_PCS_CTRL2_10GBR) - *mode = AMD_XGBE_MODE_KR; - else - *mode = AMD_XGBE_MODE_KX; - - return 0; -} - -static bool amd_xgbe_phy_in_kr_mode(struct phy_device *phydev) -{ - enum amd_xgbe_phy_mode mode; - - if (amd_xgbe_phy_cur_mode(phydev, &mode)) - return false; - - return (mode == AMD_XGBE_MODE_KR); -} - -static int amd_xgbe_phy_switch_mode(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - int ret; - - /* If we are in KR switch to KX, and vice-versa */ - if (amd_xgbe_phy_in_kr_mode(phydev)) { - if (priv->speed_set == AMD_XGBE_PHY_SPEEDSET_1000_10000) - ret = amd_xgbe_phy_gmii_mode(phydev); - else - ret = amd_xgbe_phy_gmii_2500_mode(phydev); - } else { - ret = amd_xgbe_phy_xgmii_mode(phydev); - } - - return ret; -} - -static int amd_xgbe_phy_set_mode(struct phy_device *phydev, - enum amd_xgbe_phy_mode mode) -{ - enum amd_xgbe_phy_mode cur_mode; - int ret; - - ret = amd_xgbe_phy_cur_mode(phydev, &cur_mode); - if (ret) - return ret; - - if (mode != cur_mode) - ret = amd_xgbe_phy_switch_mode(phydev); - - return ret; -} - -static int amd_xgbe_phy_set_an(struct phy_device *phydev, bool enable, - bool restart) -{ - int ret; - - ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); - if (ret < 0) - return ret; - - ret &= ~MDIO_AN_CTRL1_ENABLE; - - if (enable) - ret |= MDIO_AN_CTRL1_ENABLE; - - if (restart) - ret |= MDIO_AN_CTRL1_RESTART; - - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, ret); - - return 0; -} - -static int amd_xgbe_phy_restart_an(struct phy_device *phydev) -{ - return amd_xgbe_phy_set_an(phydev, true, true); -} - -static int amd_xgbe_phy_disable_an(struct phy_device *phydev) -{ - return amd_xgbe_phy_set_an(phydev, false, false); -} - -static enum amd_xgbe_phy_an amd_xgbe_an_tx_training(struct phy_device *phydev, - enum amd_xgbe_phy_rx *state) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - int ad_reg, lp_reg, ret; - - *state = AMD_XGBE_RX_COMPLETE; - - /* If we're not in KR mode then we're done */ - if (!amd_xgbe_phy_in_kr_mode(phydev)) - return AMD_XGBE_AN_PAGE_RECEIVED; - - /* Enable/Disable FEC */ - ad_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); - if (ad_reg < 0) - return AMD_XGBE_AN_ERROR; - - lp_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA + 2); - if (lp_reg < 0) - return AMD_XGBE_AN_ERROR; - - ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FEC_CTRL); - if (ret < 0) - return AMD_XGBE_AN_ERROR; - - ret &= ~XGBE_PHY_FEC_MASK; - if ((ad_reg & 0xc000) && (lp_reg & 0xc000)) - ret |= priv->fec_ability; - - phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FEC_CTRL, ret); - - /* Start KR training */ - ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); - if (ret < 0) - return AMD_XGBE_AN_ERROR; - - if (ret & XGBE_PHY_KR_TRAINING_ENABLE) { - XSIR0_IOWRITE_BITS(priv, SIR0_KR_RT_1, RESET, 1); - - ret |= XGBE_PHY_KR_TRAINING_START; - phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, - ret); - - XSIR0_IOWRITE_BITS(priv, SIR0_KR_RT_1, RESET, 0); - } - - return AMD_XGBE_AN_PAGE_RECEIVED; -} - -static enum amd_xgbe_phy_an amd_xgbe_an_tx_xnp(struct phy_device *phydev, - enum amd_xgbe_phy_rx *state) -{ - u16 msg; - - *state = AMD_XGBE_RX_XNP; - - msg = XNP_MCF_NULL_MESSAGE; - msg |= XNP_MP_FORMATTED; - - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_XNP + 2, 0); - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_XNP + 1, 0); - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_XNP, msg); - - return AMD_XGBE_AN_PAGE_RECEIVED; -} - -static enum amd_xgbe_phy_an amd_xgbe_an_rx_bpa(struct phy_device *phydev, - enum amd_xgbe_phy_rx *state) -{ - unsigned int link_support; - int ret, ad_reg, lp_reg; - - /* Read Base Ability register 2 first */ - ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA + 1); - if (ret < 0) - return AMD_XGBE_AN_ERROR; - - /* Check for a supported mode, otherwise restart in a different one */ - link_support = amd_xgbe_phy_in_kr_mode(phydev) ? 0x80 : 0x20; - if (!(ret & link_support)) - return AMD_XGBE_AN_INCOMPAT_LINK; - - /* Check Extended Next Page support */ - ad_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE); - if (ad_reg < 0) - return AMD_XGBE_AN_ERROR; - - lp_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA); - if (lp_reg < 0) - return AMD_XGBE_AN_ERROR; - - return ((ad_reg & XNP_NP_EXCHANGE) || (lp_reg & XNP_NP_EXCHANGE)) ? - amd_xgbe_an_tx_xnp(phydev, state) : - amd_xgbe_an_tx_training(phydev, state); -} - -static enum amd_xgbe_phy_an amd_xgbe_an_rx_xnp(struct phy_device *phydev, - enum amd_xgbe_phy_rx *state) -{ - int ad_reg, lp_reg; - - /* Check Extended Next Page support */ - ad_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_XNP); - if (ad_reg < 0) - return AMD_XGBE_AN_ERROR; - - lp_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPX); - if (lp_reg < 0) - return AMD_XGBE_AN_ERROR; - - return ((ad_reg & XNP_NP_EXCHANGE) || (lp_reg & XNP_NP_EXCHANGE)) ? - amd_xgbe_an_tx_xnp(phydev, state) : - amd_xgbe_an_tx_training(phydev, state); -} - -static enum amd_xgbe_phy_an amd_xgbe_an_page_received(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - enum amd_xgbe_phy_rx *state; - unsigned long an_timeout; - int ret; - - if (!priv->an_start) { - priv->an_start = jiffies; - } else { - an_timeout = priv->an_start + - msecs_to_jiffies(XGBE_AN_MS_TIMEOUT); - if (time_after(jiffies, an_timeout)) { - /* Auto-negotiation timed out, reset state */ - priv->kr_state = AMD_XGBE_RX_BPA; - priv->kx_state = AMD_XGBE_RX_BPA; - - priv->an_start = jiffies; - } - } - - state = amd_xgbe_phy_in_kr_mode(phydev) ? &priv->kr_state - : &priv->kx_state; - - switch (*state) { - case AMD_XGBE_RX_BPA: - ret = amd_xgbe_an_rx_bpa(phydev, state); - break; - - case AMD_XGBE_RX_XNP: - ret = amd_xgbe_an_rx_xnp(phydev, state); - break; - - default: - ret = AMD_XGBE_AN_ERROR; - } - - return ret; -} - -static enum amd_xgbe_phy_an amd_xgbe_an_incompat_link(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - int ret; - - /* Be sure we aren't looping trying to negotiate */ - if (amd_xgbe_phy_in_kr_mode(phydev)) { - priv->kr_state = AMD_XGBE_RX_ERROR; - - if (!(phydev->advertising & SUPPORTED_1000baseKX_Full) && - !(phydev->advertising & SUPPORTED_2500baseX_Full)) - return AMD_XGBE_AN_NO_LINK; - - if (priv->kx_state != AMD_XGBE_RX_BPA) - return AMD_XGBE_AN_NO_LINK; - } else { - priv->kx_state = AMD_XGBE_RX_ERROR; - - if (!(phydev->advertising & SUPPORTED_10000baseKR_Full)) - return AMD_XGBE_AN_NO_LINK; - - if (priv->kr_state != AMD_XGBE_RX_BPA) - return AMD_XGBE_AN_NO_LINK; - } - - ret = amd_xgbe_phy_disable_an(phydev); - if (ret) - return AMD_XGBE_AN_ERROR; - - ret = amd_xgbe_phy_switch_mode(phydev); - if (ret) - return AMD_XGBE_AN_ERROR; - - ret = amd_xgbe_phy_restart_an(phydev); - if (ret) - return AMD_XGBE_AN_ERROR; - - return AMD_XGBE_AN_INCOMPAT_LINK; -} - -static irqreturn_t amd_xgbe_an_isr(int irq, void *data) -{ - struct amd_xgbe_phy_priv *priv = (struct amd_xgbe_phy_priv *)data; - - /* Interrupt reason must be read and cleared outside of IRQ context */ - disable_irq_nosync(priv->an_irq); - - queue_work(priv->an_workqueue, &priv->an_irq_work); - - return IRQ_HANDLED; -} - -static void amd_xgbe_an_irq_work(struct work_struct *work) -{ - struct amd_xgbe_phy_priv *priv = container_of(work, - struct amd_xgbe_phy_priv, - an_irq_work); - - /* Avoid a race between enabling the IRQ and exiting the work by - * waiting for the work to finish and then queueing it - */ - flush_work(&priv->an_work); - queue_work(priv->an_workqueue, &priv->an_work); -} - -static void amd_xgbe_an_state_machine(struct work_struct *work) -{ - struct amd_xgbe_phy_priv *priv = container_of(work, - struct amd_xgbe_phy_priv, - an_work); - struct phy_device *phydev = priv->phydev; - enum amd_xgbe_phy_an cur_state = priv->an_state; - int int_reg, int_mask; - - mutex_lock(&priv->an_mutex); - - /* Read the interrupt */ - int_reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INT); - if (!int_reg) - goto out; - -next_int: - if (int_reg < 0) { - priv->an_state = AMD_XGBE_AN_ERROR; - int_mask = XGBE_AN_INT_MASK; - } else if (int_reg & XGBE_AN_PG_RCV) { - priv->an_state = AMD_XGBE_AN_PAGE_RECEIVED; - int_mask = XGBE_AN_PG_RCV; - } else if (int_reg & XGBE_AN_INC_LINK) { - priv->an_state = AMD_XGBE_AN_INCOMPAT_LINK; - int_mask = XGBE_AN_INC_LINK; - } else if (int_reg & XGBE_AN_INT_CMPLT) { - priv->an_state = AMD_XGBE_AN_COMPLETE; - int_mask = XGBE_AN_INT_CMPLT; - } else { - priv->an_state = AMD_XGBE_AN_ERROR; - int_mask = 0; - } - - /* Clear the interrupt to be processed */ - int_reg &= ~int_mask; - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INT, int_reg); - - priv->an_result = priv->an_state; - -again: - cur_state = priv->an_state; - - switch (priv->an_state) { - case AMD_XGBE_AN_READY: - priv->an_supported = 0; - break; - - case AMD_XGBE_AN_PAGE_RECEIVED: - priv->an_state = amd_xgbe_an_page_received(phydev); - priv->an_supported++; - break; - - case AMD_XGBE_AN_INCOMPAT_LINK: - priv->an_supported = 0; - priv->parallel_detect = 0; - priv->an_state = amd_xgbe_an_incompat_link(phydev); - break; - - case AMD_XGBE_AN_COMPLETE: - priv->parallel_detect = priv->an_supported ? 0 : 1; - netdev_dbg(phydev->attached_dev, "%s successful\n", - priv->an_supported ? "Auto negotiation" - : "Parallel detection"); - break; - - case AMD_XGBE_AN_NO_LINK: - break; - - default: - priv->an_state = AMD_XGBE_AN_ERROR; - } - - if (priv->an_state == AMD_XGBE_AN_NO_LINK) { - int_reg = 0; - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INT, 0); - } else if (priv->an_state == AMD_XGBE_AN_ERROR) { - netdev_err(phydev->attached_dev, - "error during auto-negotiation, state=%u\n", - cur_state); - - int_reg = 0; - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INT, 0); - } - - if (priv->an_state >= AMD_XGBE_AN_COMPLETE) { - priv->an_result = priv->an_state; - priv->an_state = AMD_XGBE_AN_READY; - priv->kr_state = AMD_XGBE_RX_BPA; - priv->kx_state = AMD_XGBE_RX_BPA; - priv->an_start = 0; - } - - if (cur_state != priv->an_state) - goto again; - - if (int_reg) - goto next_int; - -out: - enable_irq(priv->an_irq); - - mutex_unlock(&priv->an_mutex); -} - -static int amd_xgbe_an_init(struct phy_device *phydev) -{ - int ret; - - /* Set up Advertisement register 3 first */ - ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); - if (ret < 0) - return ret; - - if (phydev->advertising & SUPPORTED_10000baseR_FEC) - ret |= 0xc000; - else - ret &= ~0xc000; - - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2, ret); - - /* Set up Advertisement register 2 next */ - ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); - if (ret < 0) - return ret; - - if (phydev->advertising & SUPPORTED_10000baseKR_Full) - ret |= 0x80; - else - ret &= ~0x80; - - if ((phydev->advertising & SUPPORTED_1000baseKX_Full) || - (phydev->advertising & SUPPORTED_2500baseX_Full)) - ret |= 0x20; - else - ret &= ~0x20; - - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1, ret); - - /* Set up Advertisement register 1 last */ - ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE); - if (ret < 0) - return ret; - - if (phydev->advertising & SUPPORTED_Pause) - ret |= 0x400; - else - ret &= ~0x400; - - if (phydev->advertising & SUPPORTED_Asym_Pause) - ret |= 0x800; - else - ret &= ~0x800; - - /* We don't intend to perform XNP */ - ret &= ~XNP_NP_EXCHANGE; - - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, ret); - - return 0; -} - -static int amd_xgbe_phy_soft_reset(struct phy_device *phydev) -{ - int count, ret; - - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1); - if (ret < 0) - return ret; - - ret |= MDIO_CTRL1_RESET; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret); - - count = 50; - do { - msleep(20); - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1); - if (ret < 0) - return ret; - } while ((ret & MDIO_CTRL1_RESET) && --count); - - if (ret & MDIO_CTRL1_RESET) - return -ETIMEDOUT; - - /* Disable auto-negotiation for now */ - ret = amd_xgbe_phy_disable_an(phydev); - if (ret < 0) - return ret; - - /* Clear auto-negotiation interrupts */ - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INT, 0); - - return 0; -} - -static int amd_xgbe_phy_config_init(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - struct net_device *netdev = phydev->attached_dev; - int ret; - - if (!priv->an_irq_allocated) { - /* Allocate the auto-negotiation workqueue and interrupt */ - snprintf(priv->an_irq_name, sizeof(priv->an_irq_name) - 1, - "%s-pcs", netdev_name(netdev)); - - priv->an_workqueue = - create_singlethread_workqueue(priv->an_irq_name); - if (!priv->an_workqueue) { - netdev_err(netdev, "phy workqueue creation failed\n"); - return -ENOMEM; - } - - ret = devm_request_irq(priv->dev, priv->an_irq, - amd_xgbe_an_isr, 0, priv->an_irq_name, - priv); - if (ret) { - netdev_err(netdev, "phy irq request failed\n"); - destroy_workqueue(priv->an_workqueue); - return ret; - } - - priv->an_irq_allocated = 1; - } - - /* Set initial mode - call the mode setting routines - * directly to insure we are properly configured - */ - if (phydev->advertising & SUPPORTED_10000baseKR_Full) - ret = amd_xgbe_phy_xgmii_mode(phydev); - else if (phydev->advertising & SUPPORTED_1000baseKX_Full) - ret = amd_xgbe_phy_gmii_mode(phydev); - else if (phydev->advertising & SUPPORTED_2500baseX_Full) - ret = amd_xgbe_phy_gmii_2500_mode(phydev); - else - ret = -EINVAL; - if (ret < 0) - return ret; - - /* Set up advertisement registers based on current settings */ - ret = amd_xgbe_an_init(phydev); - if (ret) - return ret; - - /* Enable auto-negotiation interrupts */ - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INTMASK, 0x07); - - return 0; -} - -static int amd_xgbe_phy_setup_forced(struct phy_device *phydev) -{ - int ret; - - /* Disable auto-negotiation */ - ret = amd_xgbe_phy_disable_an(phydev); - if (ret < 0) - return ret; - - /* Validate/Set specified speed */ - switch (phydev->speed) { - case SPEED_10000: - ret = amd_xgbe_phy_set_mode(phydev, AMD_XGBE_MODE_KR); - break; - - case SPEED_2500: - case SPEED_1000: - ret = amd_xgbe_phy_set_mode(phydev, AMD_XGBE_MODE_KX); - break; - - default: - ret = -EINVAL; - } - - if (ret < 0) - return ret; - - /* Validate duplex mode */ - if (phydev->duplex != DUPLEX_FULL) - return -EINVAL; - - phydev->pause = 0; - phydev->asym_pause = 0; - - return 0; -} - -static int __amd_xgbe_phy_config_aneg(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - u32 mmd_mask = phydev->c45_ids.devices_in_package; - int ret; - - if (phydev->autoneg != AUTONEG_ENABLE) - return amd_xgbe_phy_setup_forced(phydev); - - /* Make sure we have the AN MMD present */ - if (!(mmd_mask & MDIO_DEVS_AN)) - return -EINVAL; - - /* Disable auto-negotiation interrupt */ - disable_irq(priv->an_irq); - - /* Start auto-negotiation in a supported mode */ - if (phydev->advertising & SUPPORTED_10000baseKR_Full) - ret = amd_xgbe_phy_set_mode(phydev, AMD_XGBE_MODE_KR); - else if ((phydev->advertising & SUPPORTED_1000baseKX_Full) || - (phydev->advertising & SUPPORTED_2500baseX_Full)) - ret = amd_xgbe_phy_set_mode(phydev, AMD_XGBE_MODE_KX); - else - ret = -EINVAL; - if (ret < 0) { - enable_irq(priv->an_irq); - return ret; - } - - /* Disable and stop any in progress auto-negotiation */ - ret = amd_xgbe_phy_disable_an(phydev); - if (ret < 0) - return ret; - - /* Clear any auto-negotitation interrupts */ - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_INT, 0); - - priv->an_result = AMD_XGBE_AN_READY; - priv->an_state = AMD_XGBE_AN_READY; - priv->kr_state = AMD_XGBE_RX_BPA; - priv->kx_state = AMD_XGBE_RX_BPA; - - /* Re-enable auto-negotiation interrupt */ - enable_irq(priv->an_irq); - - /* Set up advertisement registers based on current settings */ - ret = amd_xgbe_an_init(phydev); - if (ret) - return ret; - - /* Enable and start auto-negotiation */ - return amd_xgbe_phy_restart_an(phydev); -} - -static int amd_xgbe_phy_config_aneg(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - int ret; - - mutex_lock(&priv->an_mutex); - - ret = __amd_xgbe_phy_config_aneg(phydev); - - mutex_unlock(&priv->an_mutex); - - return ret; -} - -static int amd_xgbe_phy_aneg_done(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - - return (priv->an_result == AMD_XGBE_AN_COMPLETE); -} - -static int amd_xgbe_phy_update_link(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - int ret; - - /* If we're doing auto-negotiation don't report link down */ - if (priv->an_state != AMD_XGBE_AN_READY) { - phydev->link = 1; - return 0; - } - - /* Link status is latched low, so read once to clear - * and then read again to get current state - */ - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1); - if (ret < 0) - return ret; - - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1); - if (ret < 0) - return ret; - - phydev->link = (ret & MDIO_STAT1_LSTATUS) ? 1 : 0; - - return 0; -} - -static int amd_xgbe_phy_read_status(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - u32 mmd_mask = phydev->c45_ids.devices_in_package; - int ret, ad_ret, lp_ret; - - ret = amd_xgbe_phy_update_link(phydev); - if (ret) - return ret; - - if ((phydev->autoneg == AUTONEG_ENABLE) && - !priv->parallel_detect) { - if (!(mmd_mask & MDIO_DEVS_AN)) - return -EINVAL; - - if (!amd_xgbe_phy_aneg_done(phydev)) - return 0; - - /* Compare Advertisement and Link Partner register 1 */ - ad_ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE); - if (ad_ret < 0) - return ad_ret; - lp_ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA); - if (lp_ret < 0) - return lp_ret; - - ad_ret &= lp_ret; - phydev->pause = (ad_ret & 0x400) ? 1 : 0; - phydev->asym_pause = (ad_ret & 0x800) ? 1 : 0; - - /* Compare Advertisement and Link Partner register 2 */ - ad_ret = phy_read_mmd(phydev, MDIO_MMD_AN, - MDIO_AN_ADVERTISE + 1); - if (ad_ret < 0) - return ad_ret; - lp_ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA + 1); - if (lp_ret < 0) - return lp_ret; - - ad_ret &= lp_ret; - if (ad_ret & 0x80) { - phydev->speed = SPEED_10000; - ret = amd_xgbe_phy_set_mode(phydev, AMD_XGBE_MODE_KR); - if (ret) - return ret; - } else { - switch (priv->speed_set) { - case AMD_XGBE_PHY_SPEEDSET_1000_10000: - phydev->speed = SPEED_1000; - break; - - case AMD_XGBE_PHY_SPEEDSET_2500_10000: - phydev->speed = SPEED_2500; - break; - } - - ret = amd_xgbe_phy_set_mode(phydev, AMD_XGBE_MODE_KX); - if (ret) - return ret; - } - - phydev->duplex = DUPLEX_FULL; - } else { - if (amd_xgbe_phy_in_kr_mode(phydev)) { - phydev->speed = SPEED_10000; - } else { - switch (priv->speed_set) { - case AMD_XGBE_PHY_SPEEDSET_1000_10000: - phydev->speed = SPEED_1000; - break; - - case AMD_XGBE_PHY_SPEEDSET_2500_10000: - phydev->speed = SPEED_2500; - break; - } - } - phydev->duplex = DUPLEX_FULL; - phydev->pause = 0; - phydev->asym_pause = 0; - } - - return 0; -} - -static int amd_xgbe_phy_suspend(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - int ret; - - mutex_lock(&phydev->lock); - - ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1); - if (ret < 0) - goto unlock; - - priv->lpm_ctrl = ret; - - ret |= MDIO_CTRL1_LPOWER; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, ret); - - ret = 0; - -unlock: - mutex_unlock(&phydev->lock); - - return ret; -} - -static int amd_xgbe_phy_resume(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - - mutex_lock(&phydev->lock); - - priv->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, priv->lpm_ctrl); - - mutex_unlock(&phydev->lock); - - return 0; -} - -static unsigned int amd_xgbe_phy_resource_count(struct platform_device *pdev, - unsigned int type) -{ - unsigned int count; - int i; - - for (i = 0, count = 0; i < pdev->num_resources; i++) { - struct resource *r = &pdev->resource[i]; - - if (type == resource_type(r)) - count++; - } - - return count; -} - -static int amd_xgbe_phy_probe(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv; - struct platform_device *phy_pdev; - struct device *dev, *phy_dev; - unsigned int phy_resnum, phy_irqnum; - int ret; - - if (!phydev->bus || !phydev->bus->parent) - return -EINVAL; - - dev = phydev->bus->parent; - - priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->pdev = to_platform_device(dev); - priv->adev = ACPI_COMPANION(dev); - priv->dev = dev; - priv->phydev = phydev; - mutex_init(&priv->an_mutex); - INIT_WORK(&priv->an_irq_work, amd_xgbe_an_irq_work); - INIT_WORK(&priv->an_work, amd_xgbe_an_state_machine); - - if (!priv->adev || acpi_disabled) { - struct device_node *bus_node; - struct device_node *phy_node; - - bus_node = priv->dev->of_node; - phy_node = of_parse_phandle(bus_node, "phy-handle", 0); - if (!phy_node) { - dev_err(dev, "unable to parse phy-handle\n"); - ret = -EINVAL; - goto err_priv; - } - - phy_pdev = of_find_device_by_node(phy_node); - of_node_put(phy_node); - - if (!phy_pdev) { - dev_err(dev, "unable to obtain phy device\n"); - ret = -EINVAL; - goto err_priv; - } - - phy_resnum = 0; - phy_irqnum = 0; - } else { - /* In ACPI, the XGBE and PHY resources are the grouped - * together with the PHY resources at the end - */ - phy_pdev = priv->pdev; - phy_resnum = amd_xgbe_phy_resource_count(phy_pdev, - IORESOURCE_MEM) - 3; - phy_irqnum = amd_xgbe_phy_resource_count(phy_pdev, - IORESOURCE_IRQ) - 1; - } - phy_dev = &phy_pdev->dev; - - /* Get the device mmio areas */ - priv->rxtx_res = platform_get_resource(phy_pdev, IORESOURCE_MEM, - phy_resnum++); - priv->rxtx_regs = devm_ioremap_resource(dev, priv->rxtx_res); - if (IS_ERR(priv->rxtx_regs)) { - dev_err(dev, "rxtx ioremap failed\n"); - ret = PTR_ERR(priv->rxtx_regs); - goto err_put; - } - - priv->sir0_res = platform_get_resource(phy_pdev, IORESOURCE_MEM, - phy_resnum++); - priv->sir0_regs = devm_ioremap_resource(dev, priv->sir0_res); - if (IS_ERR(priv->sir0_regs)) { - dev_err(dev, "sir0 ioremap failed\n"); - ret = PTR_ERR(priv->sir0_regs); - goto err_rxtx; - } - - priv->sir1_res = platform_get_resource(phy_pdev, IORESOURCE_MEM, - phy_resnum++); - priv->sir1_regs = devm_ioremap_resource(dev, priv->sir1_res); - if (IS_ERR(priv->sir1_regs)) { - dev_err(dev, "sir1 ioremap failed\n"); - ret = PTR_ERR(priv->sir1_regs); - goto err_sir0; - } - - /* Get the auto-negotiation interrupt */ - ret = platform_get_irq(phy_pdev, phy_irqnum); - if (ret < 0) { - dev_err(dev, "platform_get_irq failed\n"); - goto err_sir1; - } - priv->an_irq = ret; - - /* Get the device speed set property */ - ret = device_property_read_u32(phy_dev, XGBE_PHY_SPEEDSET_PROPERTY, - &priv->speed_set); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_PHY_SPEEDSET_PROPERTY); - goto err_sir1; - } - - switch (priv->speed_set) { - case AMD_XGBE_PHY_SPEEDSET_1000_10000: - case AMD_XGBE_PHY_SPEEDSET_2500_10000: - break; - default: - dev_err(dev, "invalid %s property\n", - XGBE_PHY_SPEEDSET_PROPERTY); - ret = -EINVAL; - goto err_sir1; - } - - if (device_property_present(phy_dev, XGBE_PHY_BLWC_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_PHY_BLWC_PROPERTY, - priv->serdes_blwc, - XGBE_PHY_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_PHY_BLWC_PROPERTY); - goto err_sir1; - } - } else { - memcpy(priv->serdes_blwc, amd_xgbe_phy_serdes_blwc, - sizeof(priv->serdes_blwc)); - } - - if (device_property_present(phy_dev, XGBE_PHY_CDR_RATE_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_PHY_CDR_RATE_PROPERTY, - priv->serdes_cdr_rate, - XGBE_PHY_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_PHY_CDR_RATE_PROPERTY); - goto err_sir1; - } - } else { - memcpy(priv->serdes_cdr_rate, amd_xgbe_phy_serdes_cdr_rate, - sizeof(priv->serdes_cdr_rate)); - } - - if (device_property_present(phy_dev, XGBE_PHY_PQ_SKEW_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_PHY_PQ_SKEW_PROPERTY, - priv->serdes_pq_skew, - XGBE_PHY_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_PHY_PQ_SKEW_PROPERTY); - goto err_sir1; - } - } else { - memcpy(priv->serdes_pq_skew, amd_xgbe_phy_serdes_pq_skew, - sizeof(priv->serdes_pq_skew)); - } - - if (device_property_present(phy_dev, XGBE_PHY_TX_AMP_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_PHY_TX_AMP_PROPERTY, - priv->serdes_tx_amp, - XGBE_PHY_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_PHY_TX_AMP_PROPERTY); - goto err_sir1; - } - } else { - memcpy(priv->serdes_tx_amp, amd_xgbe_phy_serdes_tx_amp, - sizeof(priv->serdes_tx_amp)); - } - - if (device_property_present(phy_dev, XGBE_PHY_DFE_CFG_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_PHY_DFE_CFG_PROPERTY, - priv->serdes_dfe_tap_cfg, - XGBE_PHY_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_PHY_DFE_CFG_PROPERTY); - goto err_sir1; - } - } else { - memcpy(priv->serdes_dfe_tap_cfg, - amd_xgbe_phy_serdes_dfe_tap_cfg, - sizeof(priv->serdes_dfe_tap_cfg)); - } - - if (device_property_present(phy_dev, XGBE_PHY_DFE_ENA_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_PHY_DFE_ENA_PROPERTY, - priv->serdes_dfe_tap_ena, - XGBE_PHY_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_PHY_DFE_ENA_PROPERTY); - goto err_sir1; - } - } else { - memcpy(priv->serdes_dfe_tap_ena, - amd_xgbe_phy_serdes_dfe_tap_ena, - sizeof(priv->serdes_dfe_tap_ena)); - } - - /* Initialize supported features */ - phydev->supported = SUPPORTED_Autoneg; - phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - phydev->supported |= SUPPORTED_Backplane; - phydev->supported |= SUPPORTED_10000baseKR_Full; - switch (priv->speed_set) { - case AMD_XGBE_PHY_SPEEDSET_1000_10000: - phydev->supported |= SUPPORTED_1000baseKX_Full; - break; - case AMD_XGBE_PHY_SPEEDSET_2500_10000: - phydev->supported |= SUPPORTED_2500baseX_Full; - break; - } - - ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FEC_ABILITY); - if (ret < 0) - return ret; - priv->fec_ability = ret & XGBE_PHY_FEC_MASK; - if (priv->fec_ability & XGBE_PHY_FEC_ENABLE) - phydev->supported |= SUPPORTED_10000baseR_FEC; - - phydev->advertising = phydev->supported; - - phydev->priv = priv; - - if (!priv->adev || acpi_disabled) - platform_device_put(phy_pdev); - - return 0; - -err_sir1: - devm_iounmap(dev, priv->sir1_regs); - devm_release_mem_region(dev, priv->sir1_res->start, - resource_size(priv->sir1_res)); - -err_sir0: - devm_iounmap(dev, priv->sir0_regs); - devm_release_mem_region(dev, priv->sir0_res->start, - resource_size(priv->sir0_res)); - -err_rxtx: - devm_iounmap(dev, priv->rxtx_regs); - devm_release_mem_region(dev, priv->rxtx_res->start, - resource_size(priv->rxtx_res)); - -err_put: - if (!priv->adev || acpi_disabled) - platform_device_put(phy_pdev); - -err_priv: - devm_kfree(dev, priv); - - return ret; -} - -static void amd_xgbe_phy_remove(struct phy_device *phydev) -{ - struct amd_xgbe_phy_priv *priv = phydev->priv; - struct device *dev = priv->dev; - - if (priv->an_irq_allocated) { - devm_free_irq(dev, priv->an_irq, priv); - - flush_workqueue(priv->an_workqueue); - destroy_workqueue(priv->an_workqueue); - } - - /* Release resources */ - devm_iounmap(dev, priv->sir1_regs); - devm_release_mem_region(dev, priv->sir1_res->start, - resource_size(priv->sir1_res)); - - devm_iounmap(dev, priv->sir0_regs); - devm_release_mem_region(dev, priv->sir0_res->start, - resource_size(priv->sir0_res)); - - devm_iounmap(dev, priv->rxtx_regs); - devm_release_mem_region(dev, priv->rxtx_res->start, - resource_size(priv->rxtx_res)); - - devm_kfree(dev, priv); -} - -static int amd_xgbe_match_phy_device(struct phy_device *phydev) -{ - return phydev->c45_ids.device_ids[MDIO_MMD_PCS] == XGBE_PHY_ID; -} - -static struct phy_driver amd_xgbe_phy_driver[] = { - { - .phy_id = XGBE_PHY_ID, - .phy_id_mask = XGBE_PHY_MASK, - .name = "AMD XGBE PHY", - .features = 0, - .flags = PHY_IS_INTERNAL, - .probe = amd_xgbe_phy_probe, - .remove = amd_xgbe_phy_remove, - .soft_reset = amd_xgbe_phy_soft_reset, - .config_init = amd_xgbe_phy_config_init, - .suspend = amd_xgbe_phy_suspend, - .resume = amd_xgbe_phy_resume, - .config_aneg = amd_xgbe_phy_config_aneg, - .aneg_done = amd_xgbe_phy_aneg_done, - .read_status = amd_xgbe_phy_read_status, - .match_phy_device = amd_xgbe_match_phy_device, - .driver = { - .owner = THIS_MODULE, - }, - }, -}; - -module_phy_driver(amd_xgbe_phy_driver); - -static struct mdio_device_id __maybe_unused amd_xgbe_phy_ids[] = { - { XGBE_PHY_ID, XGBE_PHY_MASK }, - { } -}; -MODULE_DEVICE_TABLE(mdio, amd_xgbe_phy_ids); diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/phy/mdio-bitbang.c index daec9b05d168..61a543c788cc 100644 --- a/drivers/net/phy/mdio-bitbang.c +++ b/drivers/net/phy/mdio-bitbang.c @@ -165,8 +165,11 @@ static int mdiobb_read(struct mii_bus *bus, int phy, int reg) ctrl->ops->set_mdio_dir(ctrl, 0); - /* check the turnaround bit: the PHY should be driving it to zero */ - if (mdiobb_get_bit(ctrl) != 0) { + /* check the turnaround bit: the PHY should be driving it to zero, if this + * PHY is listed in phy_ignore_ta_mask as having broken TA, skip that + */ + if (mdiobb_get_bit(ctrl) != 0 && + !(bus->phy_ignore_ta_mask & (1 << phy))) { /* PHY didn't drive TA low -- flush any bits it * may be trying to send. */ diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 53d18150f4e2..7dc21e56a7aa 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -158,6 +158,7 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev, new_bus->name = "GPIO Bitbanged MDIO", new_bus->phy_mask = pdata->phy_mask; + new_bus->phy_ignore_ta_mask = pdata->phy_ignore_ta_mask; new_bus->irq = pdata->irqs; new_bus->parent = dev; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 47cd578052fc..377d2db04d33 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -58,6 +58,31 @@ static const char *phy_speed_to_str(int speed) } } +#define PHY_STATE_STR(_state) \ + case PHY_##_state: \ + return __stringify(_state); \ + +static const char *phy_state_to_str(enum phy_state st) +{ + switch (st) { + PHY_STATE_STR(DOWN) + PHY_STATE_STR(STARTING) + PHY_STATE_STR(READY) + PHY_STATE_STR(PENDING) + PHY_STATE_STR(UP) + PHY_STATE_STR(AN) + PHY_STATE_STR(RUNNING) + PHY_STATE_STR(NOLINK) + PHY_STATE_STR(FORCING) + PHY_STATE_STR(CHANGELINK) + PHY_STATE_STR(HALTED) + PHY_STATE_STR(RESUMING) + } + + return NULL; +} + + /** * phy_print_status - Convenience function to print out the current phy status * @phydev: the phy_device struct @@ -784,10 +809,13 @@ void phy_state_machine(struct work_struct *work) struct phy_device *phydev = container_of(dwork, struct phy_device, state_queue); bool needs_aneg = false, do_suspend = false; + enum phy_state old_state; int err = 0; mutex_lock(&phydev->lock); + old_state = phydev->state; + if (phydev->drv->link_change_notify) phydev->drv->link_change_notify(phydev); @@ -952,6 +980,9 @@ void phy_state_machine(struct work_struct *work) if (err < 0) phy_error(phydev); + dev_dbg(&phydev->dev, "PHY state change %s -> %s\n", + phy_state_to_str(old_state), phy_state_to_str(phydev->state)); + queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, PHY_STATE_TIME * HZ); } diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index b62a5e3a1c65..3837ae344f63 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -550,11 +550,11 @@ static struct proto pppoe_sk_proto __read_mostly = { * Initialize a new struct sock. * **********************************************************************/ -static int pppoe_create(struct net *net, struct socket *sock) +static int pppoe_create(struct net *net, struct socket *sock, int kern) { struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, kern); if (!sk) return -ENOMEM; diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index 2940e9fe351b..0e1b30622477 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -118,7 +118,7 @@ static int pppox_create(struct net *net, struct socket *sock, int protocol, !try_module_get(pppox_protos[protocol]->owner)) goto out; - rc = pppox_protos[protocol]->create(net, sock); + rc = pppox_protos[protocol]->create(net, sock, kern); module_put(pppox_protos[protocol]->owner); out: diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index e3bfbd4d0136..14839bc0aaf5 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -561,14 +561,14 @@ static void pptp_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); } -static int pptp_create(struct net *net, struct socket *sock) +static int pptp_create(struct net *net, struct socket *sock, int kern) { int error = -ENOMEM; struct sock *sk; struct pppox_sock *po; struct pptp_opt *opt; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto, kern); if (!sk) goto out; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6928448f6b7f..daa054b3ff03 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1924,7 +1924,7 @@ static netdev_features_t team_fix_features(struct net_device *dev, struct team *team = netdev_priv(dev); netdev_features_t mask; - mask = features | NETIF_F_HW_SWITCH_OFFLOAD; + mask = features; features &= ~NETIF_F_ONE_FOR_ALL; features |= NETIF_F_ALL_FOR_ALL; @@ -1977,8 +1977,12 @@ static const struct net_device_ops team_netdev_ops = { .ndo_del_slave = team_del_slave, .ndo_fix_features = team_fix_features, .ndo_change_carrier = team_change_carrier, - .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, - .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, .ndo_features_check = passthru_features_check, }; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e470ae59d405..1a1c4f7b3ec5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -146,7 +146,6 @@ struct tun_file { struct socket socket; struct socket_wq wq; struct tun_struct __rcu *tun; - struct net *net; struct fasync_struct *fasync; /* only used for fasnyc */ unsigned int flags; @@ -493,10 +492,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - - BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, - &tfile->socket.flags)); - sk_release_kernel(&tfile->sk); + sock_put(&tfile->sk); } } @@ -1492,18 +1488,10 @@ out: return ret; } -static int tun_release(struct socket *sock) -{ - if (sock->sk) - sock_put(sock->sk); - return 0; -} - /* Ops structure to mimic raw sockets with tun */ static const struct proto_ops tun_socket_ops = { .sendmsg = tun_sendmsg, .recvmsg = tun_recvmsg, - .release = tun_release, }; static struct proto tun_proto = { @@ -1865,7 +1853,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, if (cmd == TUNSETIFF && !tun) { ifr.ifr_name[IFNAMSIZ-1] = '\0'; - ret = tun_set_iff(tfile->net, file, &ifr); + ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr); if (ret) goto unlock; @@ -2154,16 +2142,16 @@ out: static int tun_chr_open(struct inode *inode, struct file * file) { + struct net *net = current->nsproxy->net_ns; struct tun_file *tfile; DBG1(KERN_INFO, "tunX: tun_chr_open\n"); - tfile = (struct tun_file *)sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, - &tun_proto); + tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, + &tun_proto, 0); if (!tfile) return -ENOMEM; RCU_INIT_POINTER(tfile->tun, NULL); - tfile->net = get_net(current->nsproxy->net_ns); tfile->flags = 0; tfile->ifindex = 0; @@ -2174,13 +2162,11 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile->socket.ops = &tun_socket_ops; sock_init_data(&tfile->socket, &tfile->sk); - sk_change_net(&tfile->sk, tfile->net); tfile->sk.sk_write_space = tun_sock_write_space; tfile->sk.sk_sndbuf = INT_MAX; file->private_data = tfile; - set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags); INIT_LIST_HEAD(&tfile->next); sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); @@ -2191,10 +2177,8 @@ static int tun_chr_open(struct inode *inode, struct file * file) static int tun_chr_close(struct inode *inode, struct file *file) { struct tun_file *tfile = file->private_data; - struct net *net = tfile->net; tun_detach(tfile, true); - put_net(net); return 0; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 21a0fbf1ed94..5eddbc02c6c2 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -336,7 +336,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (!net_eq(dev_net(vxlan->dev), vxlan->net) && nla_put_s32(skb, NDA_LINK_NETNSID, - peernet2id(dev_net(vxlan->dev), vxlan->net))) + peernet2id_alloc(dev_net(vxlan->dev), vxlan->net))) goto nla_put_failure; if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) @@ -1921,6 +1921,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, memset(&fl4, 0, sizeof(fl4)); fl4.flowi4_oif = rdst->remote_ifindex; fl4.flowi4_tos = RT_TOS(tos); + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = IPPROTO_UDP; fl4.daddr = dst->sin.sin_addr.s_addr; fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr; @@ -1981,6 +1983,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, fl6.flowi6_oif = rdst->remote_ifindex; fl6.daddr = dst->sin6.sin6_addr; fl6.saddr = vxlan->saddr.sin6.sin6_addr; + fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; if (ipv6_stub->ipv6_dst_lookup(sk, &ndst, &fl6)) { diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index f07a61899545..413528295d72 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -1353,12 +1353,7 @@ static void adm8211_configure_filter(struct ieee80211_hw *dev, new_flags = 0; - if (*total_flags & FIF_PROMISC_IN_BSS) { - new_flags |= FIF_PROMISC_IN_BSS; - priv->nar |= ADM8211_NAR_PR; - priv->nar &= ~ADM8211_NAR_MM; - mc_filter[1] = mc_filter[0] = ~0; - } else if (*total_flags & FIF_ALLMULTI || multicast == ~(0ULL)) { + if (*total_flags & FIF_ALLMULTI || multicast == ~(0ULL)) { new_flags |= FIF_ALLMULTI; priv->nar &= ~ADM8211_NAR_PR; priv->nar |= ADM8211_NAR_MM; diff --git a/drivers/net/wireless/at76c50x-usb.h b/drivers/net/wireless/at76c50x-usb.h index 55090a38ac95..ae03271f878e 100644 --- a/drivers/net/wireless/at76c50x-usb.h +++ b/drivers/net/wireless/at76c50x-usb.h @@ -447,7 +447,7 @@ struct at76_priv { int mac80211_registered; }; -#define AT76_SUPPORTED_FILTERS FIF_PROMISC_IN_BSS +#define AT76_SUPPORTED_FILTERS 0 #define SCAN_POLL_INTERVAL (HZ / 4) diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 5147ebe4cd05..14937cbeca56 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1319,8 +1319,7 @@ out_unlock: } -#define AR5523_SUPPORTED_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define AR5523_SUPPORTED_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_OTHER_BSS) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 973485bd4121..fcd08b2f8d26 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -773,7 +773,6 @@ static int ath10k_monitor_recalc(struct ath10k *ar) lockdep_assert_held(&ar->conf_mutex); should_start = ar->monitor || - ar->filter_flags & FIF_PROMISC_IN_BSS || test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags); ath10k_dbg(ar, ATH10K_DBG_MAC, @@ -3493,8 +3492,7 @@ static void ath10k_remove_interface(struct ieee80211_hw *hw, * FIXME: Has to be verified. */ #define SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ + (FIF_ALLMULTI | \ FIF_CONTROL | \ FIF_PSPOLL | \ FIF_OTHER_BSS | \ @@ -5500,7 +5498,8 @@ int ath10k_mac_register(struct ath10k *ar) IEEE80211_HW_HAS_RATE_CONTROL | IEEE80211_HW_AP_LINK_PS | IEEE80211_HW_SPECTRUM_MGMT | - IEEE80211_HW_SW_CRYPTO_CONTROL; + IEEE80211_HW_SW_CRYPTO_CONTROL | + IEEE80211_HW_SUPPORT_FAST_XMIT; ar->hw->wiphy->features |= NL80211_FEATURE_STATIC_SMPS; diff --git a/drivers/net/wireless/ath/ath5k/ath5k.h b/drivers/net/wireless/ath/ath5k/ath5k.h index 7ca0d6f930fd..e22b0e778927 100644 --- a/drivers/net/wireless/ath/ath5k/ath5k.h +++ b/drivers/net/wireless/ath/ath5k/ath5k.h @@ -1280,7 +1280,6 @@ struct ath5k_hw { DECLARE_BITMAP(status, 4); #define ATH_STAT_INVALID 0 /* disable hardware accesses */ -#define ATH_STAT_PROMISC 1 #define ATH_STAT_LEDSOFT 2 /* enable LED gpio status */ #define ATH_STAT_STARTED 3 /* opened & irqs enabled */ #define ATH_STAT_RESET 4 /* hw reset */ diff --git a/drivers/net/wireless/ath/ath5k/led.c b/drivers/net/wireless/ath/ath5k/led.c index ca4b7ccd697f..803030fd17d3 100644 --- a/drivers/net/wireless/ath/ath5k/led.c +++ b/drivers/net/wireless/ath/ath5k/led.c @@ -124,7 +124,7 @@ ath5k_led_brightness_set(struct led_classdev *led_dev, static int ath5k_register_led(struct ath5k_hw *ah, struct ath5k_led *led, - const char *name, char *trigger) + const char *name, const char *trigger) { int err; diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c index 3b4a6463d87a..dc44cfef7517 100644 --- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c +++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c @@ -369,7 +369,7 @@ ath5k_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *new_flags, u64 multicast) { #define SUPPORTED_FIF_FLAGS \ - (FIF_PROMISC_IN_BSS | FIF_ALLMULTI | FIF_FCSFAIL | \ + (FIF_ALLMULTI | FIF_FCSFAIL | \ FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | \ FIF_BCN_PRBRESP_PROMISC) @@ -393,16 +393,6 @@ ath5k_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, (AR5K_RX_FILTER_UCAST | AR5K_RX_FILTER_BCAST | AR5K_RX_FILTER_MCAST); - if (changed_flags & (FIF_PROMISC_IN_BSS | FIF_OTHER_BSS)) { - if (*new_flags & FIF_PROMISC_IN_BSS) - __set_bit(ATH_STAT_PROMISC, ah->status); - else - __clear_bit(ATH_STAT_PROMISC, ah->status); - } - - if (test_bit(ATH_STAT_PROMISC, ah->status)) - rfilt |= AR5K_RX_FILTER_PROM; - /* Note, AR5K_RX_FILTER_MCAST is already enabled */ if (*new_flags & FIF_ALLMULTI) { mfilt[0] = ~0; @@ -418,8 +408,7 @@ ath5k_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, if ((*new_flags & FIF_BCN_PRBRESP_PROMISC) || (ah->nvifs > 1)) rfilt |= AR5K_RX_FILTER_BEACON; - /* FIF_CONTROL doc says that if FIF_PROMISC_IN_BSS is not - * set we should only pass on control frames for this + /* FIF_CONTROL doc says we should only pass on control frames for this * station. This needs testing. I believe right now this * enables *all* control frames, which is OK.. but * but we should see if we can improve on granularity */ @@ -809,7 +798,6 @@ const struct ieee80211_ops ath5k_hw_ops = { .sw_scan_start = ath5k_sw_scan_start, .sw_scan_complete = ath5k_sw_scan_complete, .get_stats = ath5k_get_stats, - /* .get_tkip_seq = not implemented */ /* .set_frag_threshold = not implemented */ /* .set_rts_threshold = not implemented */ /* .sta_add = not implemented */ diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index 564923c0df87..b71f3072fd9a 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1238,8 +1238,7 @@ out: } #define SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ + (FIF_ALLMULTI | \ FIF_CONTROL | \ FIF_PSPOLL | \ FIF_OTHER_BSS | \ diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index a0f58e2aa553..cc9648f844ae 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -872,14 +872,7 @@ u32 ath9k_htc_calcrxfilter(struct ath9k_htc_priv *priv) if (priv->rxfilter & FIF_PROBE_REQ) rfilt |= ATH9K_RX_FILTER_PROBEREQ; - /* - * Set promiscuous mode when FIF_PROMISC_IN_BSS is enabled for station - * mode interface or when in monitor mode. AP mode does not need this - * since it receives all in-BSS frames anyway. - */ - if (((ah->opmode != NL80211_IFTYPE_AP) && - (priv->rxfilter & FIF_PROMISC_IN_BSS)) || - ah->is_monitoring) + if (ah->is_monitoring) rfilt |= ATH9K_RX_FILTER_PROM; if (priv->rxfilter & FIF_CONTROL) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index b0badef71ce7..d285e3a89853 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1442,8 +1442,7 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) } #define SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ + (FIF_ALLMULTI | \ FIF_CONTROL | \ FIF_PSPOLL | \ FIF_OTHER_BSS | \ diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index 6fb40ef86fd6..6c75fb1ab77d 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -392,11 +392,6 @@ u32 ath_calcrxfilter(struct ath_softc *sc) if (sc->cur_chan->rxfilter & FIF_PROBE_REQ) rfilt |= ATH9K_RX_FILTER_PROBEREQ; - /* - * Set promiscuous mode when FIF_PROMISC_IN_BSS is enabled for station - * mode interface or when in monitor mode. AP mode does not need this - * since it receives all in-BSS frames anyway. - */ if (sc->sc_ah->is_monitoring) rfilt |= ATH9K_RX_FILTER_PROM; diff --git a/drivers/net/wireless/ath/carl9170/fw.c b/drivers/net/wireless/ath/carl9170/fw.c index 47d5c2e910ad..020cd46471f5 100644 --- a/drivers/net/wireless/ath/carl9170/fw.c +++ b/drivers/net/wireless/ath/carl9170/fw.c @@ -310,8 +310,7 @@ static int carl9170_fw(struct ar9170 *ar, const __u8 *data, size_t len) if (SUPP(CARL9170FW_RX_FILTER)) { ar->fw.rx_filter = true; ar->rx_filter_caps = FIF_FCSFAIL | FIF_PLCPFAIL | - FIF_CONTROL | FIF_PSPOLL | FIF_OTHER_BSS | - FIF_PROMISC_IN_BSS; + FIF_CONTROL | FIF_PSPOLL | FIF_OTHER_BSS; } if (SUPP(CARL9170FW_HW_COUNTERS)) diff --git a/drivers/net/wireless/ath/carl9170/led.c b/drivers/net/wireless/ath/carl9170/led.c index 78dadc797558..2c74425f5059 100644 --- a/drivers/net/wireless/ath/carl9170/led.c +++ b/drivers/net/wireless/ath/carl9170/led.c @@ -122,7 +122,7 @@ static void carl9170_led_set_brightness(struct led_classdev *led, } static int carl9170_led_register_led(struct ar9170 *ar, int i, char *name, - char *trigger) + const char *trigger) { int err; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index f1455a04cb62..59db6732d4e3 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1011,9 +1011,8 @@ static void carl9170_op_configure_filter(struct ieee80211_hw *hw, if (multicast != ar->cur_mc_hash) WARN_ON(carl9170_update_multicast(ar, multicast)); - if (changed_flags & (FIF_OTHER_BSS | FIF_PROMISC_IN_BSS)) { - ar->sniffer_enabled = !!(*new_flags & - (FIF_OTHER_BSS | FIF_PROMISC_IN_BSS)); + if (changed_flags & FIF_OTHER_BSS) { + ar->sniffer_enabled = !!(*new_flags & FIF_OTHER_BSS); WARN_ON(carl9170_set_operating_mode(ar)); } @@ -1033,7 +1032,7 @@ static void carl9170_op_configure_filter(struct ieee80211_hw *hw, if (!(*new_flags & FIF_PSPOLL)) rx_filter |= CARL9170_RX_FILTER_CTL_PSPOLL; - if (!(*new_flags & (FIF_OTHER_BSS | FIF_PROMISC_IN_BSS))) { + if (!(*new_flags & FIF_OTHER_BSS)) { rx_filter |= CARL9170_RX_FILTER_OTHER_RA; rx_filter |= CARL9170_RX_FILTER_DECRY_FAIL; } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index b2f9521fe551..f40992969b4a 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3131,8 +3131,6 @@ static void b43_adjust_opmode(struct b43_wldev *dev) ctl |= B43_MACCTL_KEEP_BAD; if (wl->filter_flags & FIF_PLCPFAIL) ctl |= B43_MACCTL_KEEP_BADPLCP; - if (wl->filter_flags & FIF_PROMISC_IN_BSS) - ctl |= B43_MACCTL_PROMISC; if (wl->filter_flags & FIF_BCN_PRBRESP_PROMISC) ctl |= B43_MACCTL_BEACPROMISC; @@ -4310,16 +4308,14 @@ static void b43_op_configure_filter(struct ieee80211_hw *hw, goto out_unlock; } - *fflags &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + *fflags &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; - changed &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + changed &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index c77b7f59505c..39d49d6cd07f 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2055,8 +2055,6 @@ static void b43legacy_adjust_opmode(struct b43legacy_wldev *dev) ctl |= B43legacy_MACCTL_KEEP_BAD; if (wl->filter_flags & FIF_PLCPFAIL) ctl |= B43legacy_MACCTL_KEEP_BADPLCP; - if (wl->filter_flags & FIF_PROMISC_IN_BSS) - ctl |= B43legacy_MACCTL_PROMISC; if (wl->filter_flags & FIF_BCN_PRBRESP_PROMISC) ctl |= B43legacy_MACCTL_BEACPROMISC; @@ -2922,16 +2920,14 @@ static void b43legacy_op_configure_filter(struct ieee80211_hw *hw, } spin_lock_irqsave(&wl->irq_lock, flags); - *fflags &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + *fflags &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; - changed &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + changed &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c index 48135063347e..b46cab250615 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c @@ -41,8 +41,7 @@ #define BRCMS_FLUSH_TIMEOUT 500 /* msec */ /* Flags we support */ -#define MAC_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define MAC_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_CONTROL | \ FIF_OTHER_BSS | \ @@ -743,8 +742,6 @@ brcms_ops_configure_filter(struct ieee80211_hw *hw, changed_flags &= MAC_FILTERS; *total_flags &= MAC_FILTERS; - if (changed_flags & FIF_PROMISC_IN_BSS) - brcms_dbg_info(core, "FIF_PROMISC_IN_BSS\n"); if (changed_flags & FIF_ALLMULTI) brcms_dbg_info(core, "FIF_ALLMULTI\n"); if (changed_flags & FIF_FCSFAIL) diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c index 369527e27689..9728be0e704b 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/main.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c @@ -3571,7 +3571,7 @@ void brcms_c_mac_promisc(struct brcms_c_info *wlc, uint filter_flags) wlc->filter_flags = filter_flags; - if (filter_flags & (FIF_PROMISC_IN_BSS | FIF_OTHER_BSS)) + if (filter_flags & FIF_OTHER_BSS) promisc_bits |= MCTL_PROMISC; if (filter_flags & FIF_BCN_PRBRESP_PROMISC) diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c index b0f65fa09428..b86500b4418f 100644 --- a/drivers/net/wireless/cw1200/sta.c +++ b/drivers/net/wireless/cw1200/sta.c @@ -578,13 +578,11 @@ void cw1200_configure_filter(struct ieee80211_hw *dev, { struct cw1200_common *priv = dev->priv; bool listening = !!(*total_flags & - (FIF_PROMISC_IN_BSS | - FIF_OTHER_BSS | + (FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC | FIF_PROBE_REQ)); - *total_flags &= FIF_PROMISC_IN_BSS | - FIF_OTHER_BSS | + *total_flags &= FIF_OTHER_BSS | FIF_FCSFAIL | FIF_BCN_PRBRESP_PROMISC | FIF_PROBE_REQ; @@ -592,14 +590,12 @@ void cw1200_configure_filter(struct ieee80211_hw *dev, down(&priv->scan.lock); mutex_lock(&priv->conf_mutex); - priv->rx_filter.promiscuous = (*total_flags & FIF_PROMISC_IN_BSS) - ? 1 : 0; + priv->rx_filter.promiscuous = 0; priv->rx_filter.bssid = (*total_flags & (FIF_OTHER_BSS | FIF_PROBE_REQ)) ? 1 : 0; priv->rx_filter.fcs = (*total_flags & FIF_FCSFAIL) ? 1 : 0; priv->disable_beacon_filter = !(*total_flags & (FIF_BCN_PRBRESP_PROMISC | - FIF_PROMISC_IN_BSS | FIF_PROBE_REQ)); if (priv->listening != listening) { priv->listening = listening; diff --git a/drivers/net/wireless/iwlegacy/3945-mac.c b/drivers/net/wireless/iwlegacy/3945-mac.c index e5665804d986..189cdf58084b 100644 --- a/drivers/net/wireless/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/iwlegacy/3945-mac.c @@ -3048,7 +3048,7 @@ il3945_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, D_MAC80211("Enter: changed: 0x%x, total: 0x%x\n", changed_flags, *total_flags); - CHK(FIF_OTHER_BSS | FIF_PROMISC_IN_BSS, RXON_FILTER_PROMISC_MSK); + CHK(FIF_OTHER_BSS, RXON_FILTER_PROMISC_MSK); CHK(FIF_CONTROL, RXON_FILTER_CTL2HOST_MSK); CHK(FIF_BCN_PRBRESP_PROMISC, RXON_FILTER_BCON_AWARE_MSK); @@ -3074,7 +3074,7 @@ il3945_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, * filters into the device. */ *total_flags &= - FIF_OTHER_BSS | FIF_ALLMULTI | FIF_PROMISC_IN_BSS | + FIF_OTHER_BSS | FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 976f65fe9c38..e4b175cbeefd 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -6166,7 +6166,7 @@ il4965_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, D_MAC80211("Enter: changed: 0x%x, total: 0x%x\n", changed_flags, *total_flags); - CHK(FIF_OTHER_BSS | FIF_PROMISC_IN_BSS, RXON_FILTER_PROMISC_MSK); + CHK(FIF_OTHER_BSS, RXON_FILTER_PROMISC_MSK); /* Setting _just_ RXON_FILTER_CTL2HOST_MSK causes FH errors */ CHK(FIF_CONTROL, RXON_FILTER_CTL2HOST_MSK | RXON_FILTER_PROMISC_MSK); CHK(FIF_BCN_PRBRESP_PROMISC, RXON_FILTER_BCON_AWARE_MSK); @@ -6192,7 +6192,7 @@ il4965_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, * filters into the device. */ *total_flags &= - FIF_OTHER_BSS | FIF_ALLMULTI | FIF_PROMISC_IN_BSS | + FIF_OTHER_BSS | FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index 5abd62ed8cb4..ba7fc42edf97 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1061,7 +1061,7 @@ static void iwlagn_configure_filter(struct ieee80211_hw *hw, IWL_DEBUG_MAC80211(priv, "Enter: changed: 0x%x, total: 0x%x\n", changed_flags, *total_flags); - CHK(FIF_OTHER_BSS | FIF_PROMISC_IN_BSS, RXON_FILTER_PROMISC_MSK); + CHK(FIF_OTHER_BSS, RXON_FILTER_PROMISC_MSK); /* Setting _just_ RXON_FILTER_CTL2HOST_MSK causes FH errors */ CHK(FIF_CONTROL, RXON_FILTER_CTL2HOST_MSK | RXON_FILTER_PROMISC_MSK); CHK(FIF_BCN_PRBRESP_PROMISC, RXON_FILTER_BCON_AWARE_MSK); @@ -1088,7 +1088,7 @@ static void iwlagn_configure_filter(struct ieee80211_hw *hw, * since we currently do not support programming multicast * filters into the device. */ - *total_flags &= FIF_OTHER_BSS | FIF_ALLMULTI | FIF_PROMISC_IN_BSS | + *total_flags &= FIF_OTHER_BSS | FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } @@ -1140,7 +1140,6 @@ static void iwlagn_mac_event_callback(struct ieee80211_hw *hw, return; IWL_DEBUG_MAC80211(priv, "enter\n"); - mutex_lock(&priv->mutex); if (priv->lib->bt_params && priv->lib->bt_params->advanced_bt_coexist) { @@ -1149,13 +1148,12 @@ static void iwlagn_mac_event_callback(struct ieee80211_hw *hw, else if (event->u.rssi.data == RSSI_EVENT_HIGH) priv->bt_enable_pspoll = false; - iwlagn_send_advance_bt_config(priv); + queue_work(priv->workqueue, &priv->bt_runtime_config); } else { IWL_DEBUG_MAC80211(priv, "Advanced BT coex disabled," "ignoring RSSI callback\n"); } - mutex_unlock(&priv->mutex); IWL_DEBUG_MAC80211(priv, "leave\n"); } diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c index 83903a5025c2..0b5a81d52a3e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c @@ -248,7 +248,7 @@ static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz, */ if ((nvm_flags & NVM_CHANNEL_GO_CONCURRENT) && (flags & IEEE80211_CHAN_NO_IR)) - flags |= IEEE80211_CHAN_GO_CONCURRENT; + flags |= IEEE80211_CHAN_IR_CONCURRENT; return flags; } diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index ed02e4bf2c26..1bdf18674fb8 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -439,7 +439,7 @@ static u64 lbtf_op_prepare_multicast(struct ieee80211_hw *hw, return mc_count; } -#define SUPPORTED_FIF_FLAGS (FIF_PROMISC_IN_BSS | FIF_ALLMULTI) +#define SUPPORTED_FIF_FLAGS FIF_ALLMULTI static void lbtf_op_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *new_flags, @@ -458,10 +458,7 @@ static void lbtf_op_configure_filter(struct ieee80211_hw *hw, return; } - if (*new_flags & (FIF_PROMISC_IN_BSS)) - priv->mac_control |= CMD_ACT_MAC_PROMISCUOUS_ENABLE; - else - priv->mac_control &= ~CMD_ACT_MAC_PROMISCUOUS_ENABLE; + priv->mac_control &= ~CMD_ACT_MAC_PROMISCUOUS_ENABLE; if (*new_flags & (FIF_ALLMULTI) || multicast > MRVDRV_MAX_MULTICAST_LIST_SIZE) { priv->mac_control |= CMD_ACT_MAC_ALL_MULTICAST_ENABLE; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index d5c0a1af08b9..8d2f6bbf9598 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1554,8 +1554,6 @@ static void mac80211_hwsim_configure_filter(struct ieee80211_hw *hw, wiphy_debug(hw->wiphy, "%s\n", __func__); data->rx_filter = 0; - if (*total_flags & FIF_PROMISC_IN_BSS) - data->rx_filter |= FIF_PROMISC_IN_BSS; if (*total_flags & FIF_ALLMULTI) data->rx_filter |= FIF_ALLMULTI; @@ -2399,7 +2397,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, IEEE80211_HW_WANT_MONITOR_VIF | IEEE80211_HW_QUEUE_CONTROL | IEEE80211_HW_SUPPORTS_HT_CCK_RATES | - IEEE80211_HW_CHANCTX_STA_CSA; + IEEE80211_HW_CHANCTX_STA_CSA | + IEEE80211_HW_SUPPORT_FAST_XMIT; if (rctbl) hw->flags |= IEEE80211_HW_SUPPORTS_RC_TABLE; @@ -2438,6 +2437,31 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, sband->n_channels = ARRAY_SIZE(hwsim_channels_5ghz); sband->bitrates = data->rates + 4; sband->n_bitrates = ARRAY_SIZE(hwsim_rates) - 4; + + sband->vht_cap.vht_supported = true; + sband->vht_cap.cap = + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ | + IEEE80211_VHT_CAP_RXLDPC | + IEEE80211_VHT_CAP_SHORT_GI_80 | + IEEE80211_VHT_CAP_SHORT_GI_160 | + IEEE80211_VHT_CAP_TXSTBC | + IEEE80211_VHT_CAP_RXSTBC_1 | + IEEE80211_VHT_CAP_RXSTBC_2 | + IEEE80211_VHT_CAP_RXSTBC_3 | + IEEE80211_VHT_CAP_RXSTBC_4 | + IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; + sband->vht_cap.vht_mcs.rx_mcs_map = + cpu_to_le16(IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 14); + sband->vht_cap.vht_mcs.tx_mcs_map = + sband->vht_cap.vht_mcs.rx_mcs_map; break; default: continue; @@ -2458,31 +2482,6 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, sband->ht_cap.mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED; hw->wiphy->bands[band] = sband; - - sband->vht_cap.vht_supported = true; - sband->vht_cap.cap = - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | - IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ | - IEEE80211_VHT_CAP_RXLDPC | - IEEE80211_VHT_CAP_SHORT_GI_80 | - IEEE80211_VHT_CAP_SHORT_GI_160 | - IEEE80211_VHT_CAP_TXSTBC | - IEEE80211_VHT_CAP_RXSTBC_1 | - IEEE80211_VHT_CAP_RXSTBC_2 | - IEEE80211_VHT_CAP_RXSTBC_3 | - IEEE80211_VHT_CAP_RXSTBC_4 | - IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; - sband->vht_cap.vht_mcs.rx_mcs_map = - cpu_to_le16(IEEE80211_VHT_MCS_SUPPORT_0_8 << 0 | - IEEE80211_VHT_MCS_SUPPORT_0_8 << 2 | - IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | - IEEE80211_VHT_MCS_SUPPORT_0_8 << 6 | - IEEE80211_VHT_MCS_SUPPORT_0_8 << 8 | - IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | - IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | - IEEE80211_VHT_MCS_SUPPORT_0_8 << 14); - sband->vht_cap.vht_mcs.tx_mcs_map = - sband->vht_cap.vht_mcs.rx_mcs_map; } /* By default all radios belong to the first group */ diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index 95921167b53f..b71fc74d14ab 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -5192,7 +5192,7 @@ mwl8k_configure_filter_sniffer(struct ieee80211_hw *hw, priv->sniffer_enabled = true; } - *total_flags &= FIF_PROMISC_IN_BSS | FIF_ALLMULTI | + *total_flags &= FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL | FIF_OTHER_BSS; diff --git a/drivers/net/wireless/p54/fwio.c b/drivers/net/wireless/p54/fwio.c index 275408eaf95e..257a9eadd595 100644 --- a/drivers/net/wireless/p54/fwio.c +++ b/drivers/net/wireless/p54/fwio.c @@ -351,8 +351,7 @@ int p54_setup_mac(struct p54_common *priv) * "TRANSPARENT and PROMISCUOUS are mutually exclusive" * STSW45X0C LMAC API - page 12 */ - if (((priv->filter_flags & FIF_PROMISC_IN_BSS) || - (priv->filter_flags & FIF_OTHER_BSS)) && + if (priv->filter_flags & FIF_OTHER_BSS && (mode != P54_FILTER_TYPE_PROMISCUOUS)) mode |= P54_FILTER_TYPE_TRANSPARENT; } else { diff --git a/drivers/net/wireless/p54/led.c b/drivers/net/wireless/p54/led.c index 1f6fd5ff5531..9a8fedd3c0f5 100644 --- a/drivers/net/wireless/p54/led.c +++ b/drivers/net/wireless/p54/led.c @@ -83,7 +83,7 @@ static void p54_led_brightness_set(struct led_classdev *led_dev, static int p54_register_led(struct p54_common *priv, unsigned int led_index, - char *name, char *trigger) + char *name, const char *trigger) { struct p54_led_dev *led = &priv->leds[led_index]; int err; diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index e79674f73dc5..2947ad21053c 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -395,13 +395,11 @@ static void p54_configure_filter(struct ieee80211_hw *dev, { struct p54_common *priv = dev->priv; - *total_flags &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | - FIF_OTHER_BSS; + *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS; priv->filter_flags = *total_flags; - if (changed_flags & (FIF_PROMISC_IN_BSS | FIF_OTHER_BSS)) + if (changed_flags & FIF_OTHER_BSS) p54_setup_mac(priv); if (changed_flags & FIF_ALLMULTI || multicast) diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c index bdf5590ba304..7da138892026 100644 --- a/drivers/net/wireless/rt2x00/rt2400pci.c +++ b/drivers/net/wireless/rt2x00/rt2400pci.c @@ -273,10 +273,8 @@ static void rt2400pci_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, RXCSR0_DROP_CONTROL, !(filter_flags & FIF_CONTROL)); - rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, RXCSR0_DROP_TODS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, RXCSR0_DROP_VERSION_ERROR, 1); rt2x00mmio_register_write(rt2x00dev, RXCSR0, reg); diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c index 79f4fe65a119..4ea53aa9ede3 100644 --- a/drivers/net/wireless/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/rt2x00/rt2500pci.c @@ -274,10 +274,8 @@ static void rt2500pci_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, RXCSR0_DROP_CONTROL, !(filter_flags & FIF_CONTROL)); - rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, RXCSR0_DROP_TODS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, RXCSR0_DROP_VERSION_ERROR, 1); rt2x00_set_field32(®, RXCSR0_DROP_MCAST, diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index 05c64597838d..237bbb54c7a8 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -434,10 +434,8 @@ static void rt2500usb_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field16(®, TXRX_CSR2_DROP_CONTROL, !(filter_flags & FIF_CONTROL)); - rt2x00_set_field16(®, TXRX_CSR2_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field16(®, TXRX_CSR2_DROP_NOT_TO_ME, 1); rt2x00_set_field16(®, TXRX_CSR2_DROP_TODS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field16(®, TXRX_CSR2_DROP_VERSION_ERROR, 1); rt2x00_set_field16(®, TXRX_CSR2_DROP_MULTICAST, diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index be2d54f257b1..dfeca8355b22 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -1513,8 +1513,7 @@ void rt2800_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_FCSFAIL)); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_PHY_ERROR, !(filter_flags & FIF_PLCPFAIL)); - rt2x00_set_field32(®, RX_FILTER_CFG_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, RX_FILTER_CFG_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_NOT_MY_BSSD, 0); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_VER_ERROR, 1); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_MULTICAST, @@ -7818,21 +7817,25 @@ EXPORT_SYMBOL_GPL(rt2800_probe_hw); /* * IEEE80211 stack callback functions. */ -void rt2800_get_tkip_seq(struct ieee80211_hw *hw, u8 hw_key_idx, u32 *iv32, - u16 *iv16) +void rt2800_get_key_seq(struct ieee80211_hw *hw, + struct ieee80211_key_conf *key, + struct ieee80211_key_seq *seq) { struct rt2x00_dev *rt2x00dev = hw->priv; struct mac_iveiv_entry iveiv_entry; u32 offset; - offset = MAC_IVEIV_ENTRY(hw_key_idx); + if (key->cipher != WLAN_CIPHER_SUITE_TKIP) + return; + + offset = MAC_IVEIV_ENTRY(key->hw_key_idx); rt2800_register_multiread(rt2x00dev, offset, &iveiv_entry, sizeof(iveiv_entry)); - memcpy(iv16, &iveiv_entry.iv[0], sizeof(*iv16)); - memcpy(iv32, &iveiv_entry.iv[4], sizeof(*iv32)); + memcpy(&seq->tkip.iv16, &iveiv_entry.iv[0], 2); + memcpy(&seq->tkip.iv32, &iveiv_entry.iv[4], 4); } -EXPORT_SYMBOL_GPL(rt2800_get_tkip_seq); +EXPORT_SYMBOL_GPL(rt2800_get_key_seq); int rt2800_set_rts_threshold(struct ieee80211_hw *hw, u32 value) { diff --git a/drivers/net/wireless/rt2x00/rt2800lib.h b/drivers/net/wireless/rt2x00/rt2800lib.h index 3019db637a4b..1609b8a7f7eb 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.h +++ b/drivers/net/wireless/rt2x00/rt2800lib.h @@ -209,8 +209,9 @@ int rt2800_read_eeprom_efuse(struct rt2x00_dev *rt2x00dev); int rt2800_probe_hw(struct rt2x00_dev *rt2x00dev); -void rt2800_get_tkip_seq(struct ieee80211_hw *hw, u8 hw_key_idx, u32 *iv32, - u16 *iv16); +void rt2800_get_key_seq(struct ieee80211_hw *hw, + struct ieee80211_key_conf *key, + struct ieee80211_key_seq *seq); int rt2800_set_rts_threshold(struct ieee80211_hw *hw, u32 value); int rt2800_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 queue_idx, diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c index cc1b3cc73c5a..0af22573a2eb 100644 --- a/drivers/net/wireless/rt2x00/rt2800pci.c +++ b/drivers/net/wireless/rt2x00/rt2800pci.c @@ -309,7 +309,7 @@ static const struct ieee80211_ops rt2800pci_mac80211_ops = { .sw_scan_start = rt2x00mac_sw_scan_start, .sw_scan_complete = rt2x00mac_sw_scan_complete, .get_stats = rt2x00mac_get_stats, - .get_tkip_seq = rt2800_get_tkip_seq, + .get_key_seq = rt2800_get_key_seq, .set_rts_threshold = rt2800_set_rts_threshold, .sta_add = rt2x00mac_sta_add, .sta_remove = rt2x00mac_sta_remove, diff --git a/drivers/net/wireless/rt2x00/rt2800soc.c b/drivers/net/wireless/rt2x00/rt2800soc.c index aaa7aa4cad9d..a985a5a7945e 100644 --- a/drivers/net/wireless/rt2x00/rt2800soc.c +++ b/drivers/net/wireless/rt2x00/rt2800soc.c @@ -148,7 +148,7 @@ static const struct ieee80211_ops rt2800soc_mac80211_ops = { .sw_scan_start = rt2x00mac_sw_scan_start, .sw_scan_complete = rt2x00mac_sw_scan_complete, .get_stats = rt2x00mac_get_stats, - .get_tkip_seq = rt2800_get_tkip_seq, + .get_key_seq = rt2800_get_key_seq, .set_rts_threshold = rt2800_set_rts_threshold, .sta_add = rt2x00mac_sta_add, .sta_remove = rt2x00mac_sta_remove, diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 6ec2466b52b6..5932306084fd 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -835,7 +835,7 @@ static const struct ieee80211_ops rt2800usb_mac80211_ops = { .sw_scan_start = rt2x00mac_sw_scan_start, .sw_scan_complete = rt2x00mac_sw_scan_complete, .get_stats = rt2x00mac_get_stats, - .get_tkip_seq = rt2800_get_tkip_seq, + .get_key_seq = rt2800_get_key_seq, .set_rts_threshold = rt2800_set_rts_threshold, .sta_add = rt2x00mac_sta_add, .sta_remove = rt2x00mac_sta_remove, diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 300876df056f..1b8a459a412b 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -359,8 +359,7 @@ void rt2x00mac_configure_filter(struct ieee80211_hw *hw, FIF_PLCPFAIL | FIF_CONTROL | FIF_PSPOLL | - FIF_OTHER_BSS | - FIF_PROMISC_IN_BSS; + FIF_OTHER_BSS; /* * Apply some rules to the filters: @@ -369,9 +368,6 @@ void rt2x00mac_configure_filter(struct ieee80211_hw *hw, * - Multicast filter seems to kill broadcast traffic so never use it. */ *total_flags |= FIF_ALLMULTI; - if (*total_flags & FIF_OTHER_BSS || - *total_flags & FIF_PROMISC_IN_BSS) - *total_flags |= FIF_PROMISC_IN_BSS | FIF_OTHER_BSS; /* * If the device has a single filter for all control frames, diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index 819455009fe4..c8a967247a9a 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -530,10 +530,8 @@ static void rt61pci_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, TXRX_CSR0_DROP_CONTROL, !(filter_flags & (FIF_CONTROL | FIF_PSPOLL))); - rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_TO_DS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, TXRX_CSR0_DROP_VERSION_ERROR, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_MULTICAST, diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index a5458cf01fb2..65ce3afb888a 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -480,10 +480,8 @@ static void rt73usb_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, TXRX_CSR0_DROP_CONTROL, !(filter_flags & (FIF_CONTROL | FIF_PSPOLL))); - rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_TO_DS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, TXRX_CSR0_DROP_VERSION_ERROR, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_MULTICAST, diff --git a/drivers/net/wireless/rtlwifi/core.h b/drivers/net/wireless/rtlwifi/core.h index 82733c6b8c46..782ac2fc4b28 100644 --- a/drivers/net/wireless/rtlwifi/core.h +++ b/drivers/net/wireless/rtlwifi/core.h @@ -27,8 +27,7 @@ #define __RTL_CORE_H__ #define RTL_SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | FIF_CONTROL | \ + (FIF_ALLMULTI | FIF_CONTROL | \ FIF_OTHER_BSS | \ FIF_FCSFAIL | \ FIF_BCN_PRBRESP_PROMISC) diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index 5d54d16a59e7..f238ee54226c 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -763,8 +763,7 @@ static u64 wl1251_op_prepare_multicast(struct ieee80211_hw *hw, return (u64)(unsigned long)fp; } -#define WL1251_SUPPORTED_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define WL1251_SUPPORTED_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_BCN_PRBRESP_PROMISC | \ FIF_CONTROL | \ @@ -795,10 +794,6 @@ static void wl1251_op_configure_filter(struct ieee80211_hw *hw, wl->rx_config = WL1251_DEFAULT_RX_CONFIG; wl->rx_filter = WL1251_DEFAULT_RX_FILTER; - if (*total & FIF_PROMISC_IN_BSS) { - wl->rx_config |= CFG_BSSID_FILTER_EN; - wl->rx_config |= CFG_RX_ALL_GOOD; - } if (*total & FIF_ALLMULTI) /* * CFG_MC_FILTER_EN in rx_config needs to be 0 to receive @@ -825,7 +820,7 @@ static void wl1251_op_configure_filter(struct ieee80211_hw *hw, if (ret < 0) goto out; - if (*total & FIF_ALLMULTI || *total & FIF_PROMISC_IN_BSS) + if (*total & FIF_ALLMULTI) ret = wl1251_acx_group_address_tbl(wl, false, NULL, 0); else if (fp) ret = wl1251_acx_group_address_tbl(wl, fp->enabled, diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 0be807951afe..7fe50f8182f3 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -3175,8 +3175,7 @@ static u64 wl1271_op_prepare_multicast(struct ieee80211_hw *hw, return (u64)(unsigned long)fp; } -#define WL1271_SUPPORTED_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define WL1271_SUPPORTED_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_BCN_PRBRESP_PROMISC | \ FIF_CONTROL | \ @@ -6077,7 +6076,8 @@ static int wl1271_init_ieee80211(struct wl1271 *wl) IEEE80211_HW_AMPDU_AGGREGATION | IEEE80211_HW_TX_AMPDU_SETUP_IN_HW | IEEE80211_HW_QUEUE_CONTROL | - IEEE80211_HW_CHANCTX_STA_CSA; + IEEE80211_HW_CHANCTX_STA_CSA | + IEEE80211_HW_SUPPORT_FAST_XMIT; wl->hw->wiphy->cipher_suites = cipher_suites; wl->hw->wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index e7af261e9198..89b6f69f09c8 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -1230,7 +1230,7 @@ static u64 zd_op_prepare_multicast(struct ieee80211_hw *hw, } #define SUPPORTED_FIF_FLAGS \ - (FIF_PROMISC_IN_BSS | FIF_ALLMULTI | FIF_FCSFAIL | FIF_CONTROL | \ + (FIF_ALLMULTI | FIF_FCSFAIL | FIF_CONTROL | \ FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC) static void zd_op_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, @@ -1256,7 +1256,7 @@ static void zd_op_configure_filter(struct ieee80211_hw *hw, * we will have some issue with IPv6 which uses multicast for link * layer address resolution. */ - if (*new_flags & (FIF_PROMISC_IN_BSS | FIF_ALLMULTI)) + if (*new_flags & FIF_ALLMULTI) zd_mc_add_all(&hash); spin_lock_irqsave(&mac->lock, flags); diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 0c064485d1c2..fdc60db60829 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -68,6 +68,9 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *chi phy->irq = mdio->irq[addr]; } + if (of_property_read_bool(child, "broken-turn-around")) + mdio->phy_ignore_ta_mask |= 1 << addr; + /* Associate the OF node with the device structure so it * can be looked up later */ of_node_get(child); diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 00b7d9c9fe48..2f5b518b0e78 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -2150,7 +2150,7 @@ lcs_new_device(struct ccwgroup_device *ccwgdev) rc = lcs_detect(card); if (rc) { LCS_DBF_TEXT(2, setup, "dtctfail"); - dev_err(&card->dev->dev, + dev_err(&ccwgdev->dev, "Detecting a network adapter for LCS devices" " failed with rc=%d (0x%x)\n", rc, rc); lcs_stopcard(card); diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 3abac028899f..ba974a2e409f 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -175,6 +175,8 @@ struct qeth_sbp_info { __u32 supported_funcs; enum qeth_sbp_roles role; __u32 hostnotification:1; + __u32 reflect_promisc:1; + __u32 reflect_promisc_primary:1; }; static inline int qeth_is_ipa_supported(struct qeth_ipa_info *ipa, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 3466d3cb7647..5e20fba37bff 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -645,7 +645,8 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, card->info.hwtrap = 2; qeth_schedule_recovery(card); return NULL; - case IPA_CMD_SETBRIDGEPORT: + case IPA_CMD_SETBRIDGEPORT_IQD: + case IPA_CMD_SETBRIDGEPORT_OSA: case IPA_CMD_ADDRESS_CHANGE_NOTIF: if (card->discipline->control_event_handler (card, cmd)) diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c index 7b55768a9592..beb4bdc26de5 100644 --- a/drivers/s390/net/qeth_core_mpc.c +++ b/drivers/s390/net/qeth_core_mpc.c @@ -237,6 +237,7 @@ static struct ipa_cmd_names qeth_ipa_cmd_names[] = { {IPA_CMD_DELGMAC, "delgmac"}, {IPA_CMD_SETVLAN, "setvlan"}, {IPA_CMD_DELVLAN, "delvlan"}, + {IPA_CMD_SETBRIDGEPORT_OSA, "set_bridge_port(osa)"}, {IPA_CMD_SETCCID, "setccid"}, {IPA_CMD_DELCCID, "delccid"}, {IPA_CMD_MODCCID, "modccid"}, @@ -249,7 +250,7 @@ static struct ipa_cmd_names qeth_ipa_cmd_names[] = { {IPA_CMD_DELIP, "delip"}, {IPA_CMD_SETADAPTERPARMS, "setadapterparms"}, {IPA_CMD_SET_DIAG_ASS, "set_diag_ass"}, - {IPA_CMD_SETBRIDGEPORT, "set_bridge_port"}, + {IPA_CMD_SETBRIDGEPORT_IQD, "set_bridge_port(hs)"}, {IPA_CMD_CREATE_ADDR, "create_addr"}, {IPA_CMD_DESTROY_ADDR, "destroy_addr"}, {IPA_CMD_REGISTER_LOCAL_ADDR, "register_local_addr"}, diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 1558be1af72d..6cccc9a49ede 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -92,6 +92,7 @@ enum qeth_ipa_cmds { IPA_CMD_DELGMAC = 0x24, IPA_CMD_SETVLAN = 0x25, IPA_CMD_DELVLAN = 0x26, + IPA_CMD_SETBRIDGEPORT_OSA = 0x2b, IPA_CMD_SETCCID = 0x41, IPA_CMD_DELCCID = 0x42, IPA_CMD_MODCCID = 0x43, @@ -104,7 +105,7 @@ enum qeth_ipa_cmds { IPA_CMD_DELIP = 0xb7, IPA_CMD_SETADAPTERPARMS = 0xb8, IPA_CMD_SET_DIAG_ASS = 0xb9, - IPA_CMD_SETBRIDGEPORT = 0xbe, + IPA_CMD_SETBRIDGEPORT_IQD = 0xbe, IPA_CMD_CREATE_ADDR = 0xc3, IPA_CMD_DESTROY_ADDR = 0xc4, IPA_CMD_REGISTER_LOCAL_ADDR = 0xd1, diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 0ea0869120cf..2e65b989a9ea 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -137,7 +137,7 @@ static int qeth_setdel_makerc(struct qeth_card *card, int retcode) rc = 0; break; case IPA_RC_L2_UNSUPPORTED_CMD: - rc = -ENOSYS; + rc = -EOPNOTSUPP; break; case IPA_RC_L2_ADDR_TABLE_FULL: rc = -ENOSPC; @@ -683,6 +683,39 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p) return rc ? -EINVAL : 0; } +static void qeth_promisc_to_bridge(struct qeth_card *card) +{ + struct net_device *dev = card->dev; + enum qeth_ipa_promisc_modes promisc_mode; + int role; + int rc; + + QETH_CARD_TEXT(card, 3, "pmisc2br"); + + if (!card->options.sbp.reflect_promisc) + return; + promisc_mode = (dev->flags & IFF_PROMISC) ? SET_PROMISC_MODE_ON + : SET_PROMISC_MODE_OFF; + if (promisc_mode == card->info.promisc_mode) + return; + + if (promisc_mode == SET_PROMISC_MODE_ON) { + if (card->options.sbp.reflect_promisc_primary) + role = QETH_SBP_ROLE_PRIMARY; + else + role = QETH_SBP_ROLE_SECONDARY; + } else + role = QETH_SBP_ROLE_NONE; + + rc = qeth_bridgeport_setrole(card, role); + QETH_DBF_TEXT_(SETUP, 2, "bpm%c%04x", + (promisc_mode == SET_PROMISC_MODE_ON) ? '+' : '-', rc); + if (!rc) { + card->options.sbp.role = role; + card->info.promisc_mode = promisc_mode; + } +} + static void qeth_l2_set_multicast_list(struct net_device *dev) { struct qeth_card *card = dev->ml_priv; @@ -704,9 +737,10 @@ static void qeth_l2_set_multicast_list(struct net_device *dev) qeth_l2_add_mc(card, ha->addr, 1); spin_unlock_bh(&card->mclock); - if (!qeth_adp_supported(card, IPA_SETADP_SET_PROMISC_MODE)) - return; - qeth_setadp_promisc_mode(card); + if (qeth_adp_supported(card, IPA_SETADP_SET_PROMISC_MODE)) + qeth_setadp_promisc_mode(card); + else + qeth_promisc_to_bridge(card); } static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -994,7 +1028,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) qeth_bridgeport_query_support(card); if (card->options.sbp.supported_funcs) dev_info(&card->gdev->dev, - "The device represents a HiperSockets Bridge Capable Port\n"); + "The device represents a Bridge Capable Port\n"); qeth_trace_features(card); if (!card->dev && qeth_l2_setup_netdev(card)) { @@ -1247,7 +1281,8 @@ static int qeth_l2_control_event(struct qeth_card *card, struct qeth_ipa_cmd *cmd) { switch (cmd->hdr.command) { - case IPA_CMD_SETBRIDGEPORT: + case IPA_CMD_SETBRIDGEPORT_OSA: + case IPA_CMD_SETBRIDGEPORT_IQD: if (cmd->data.sbp.hdr.command_code == IPA_SBP_BRIDGE_PORT_STATE_CHANGE) { qeth_bridge_state_change(card, cmd); @@ -1533,7 +1568,7 @@ static void qeth_bridge_host_event_worker(struct work_struct *work) if (data->hostevs.lost_event_mask) { dev_info(&data->card->gdev->dev, -"Address notification from the HiperSockets Bridge Port stopped %s (%s)\n", +"Address notification from the Bridge Port stopped %s (%s)\n", data->card->dev->name, (data->hostevs.lost_event_mask == 0x01) ? "Overflow" @@ -1617,70 +1652,80 @@ static int qeth_bridgeport_makerc(struct qeth_card *card, struct _qeth_sbp_cbctl *cbctl, enum qeth_ipa_sbp_cmd setcmd) { int rc; + int is_iqd = (card->info.type == QETH_CARD_TYPE_IQD); - switch (cbctl->ipa_rc) { - case IPA_RC_SUCCESS: + if ((is_iqd && (cbctl->ipa_rc == IPA_RC_SUCCESS)) || + (!is_iqd && (cbctl->ipa_rc == cbctl->cmd_rc))) switch (cbctl->cmd_rc) { case 0x0000: rc = 0; break; + case 0x2B04: case 0x0004: - rc = -ENOSYS; + rc = -EOPNOTSUPP; break; + case 0x2B0C: case 0x000C: /* Not configured as bridge Port */ rc = -ENODEV; /* maybe not the best code here? */ dev_err(&card->gdev->dev, - "The HiperSockets device is not configured as a Bridge Port\n"); + "The device is not configured as a Bridge Port\n"); break; + case 0x2B14: case 0x0014: /* Another device is Primary */ switch (setcmd) { case IPA_SBP_SET_PRIMARY_BRIDGE_PORT: rc = -EEXIST; dev_err(&card->gdev->dev, - "The HiperSockets LAN already has a primary Bridge Port\n"); + "The LAN already has a primary Bridge Port\n"); break; case IPA_SBP_SET_SECONDARY_BRIDGE_PORT: rc = -EBUSY; dev_err(&card->gdev->dev, - "The HiperSockets device is already a primary Bridge Port\n"); + "The device is already a primary Bridge Port\n"); break; default: rc = -EIO; } break; + case 0x2B18: case 0x0018: /* This device is currently Secondary */ rc = -EBUSY; dev_err(&card->gdev->dev, - "The HiperSockets device is already a secondary Bridge Port\n"); + "The device is already a secondary Bridge Port\n"); break; + case 0x2B1C: case 0x001C: /* Limit for Secondary devices reached */ rc = -EEXIST; dev_err(&card->gdev->dev, - "The HiperSockets LAN cannot have more secondary Bridge Ports\n"); + "The LAN cannot have more secondary Bridge Ports\n"); break; + case 0x2B24: case 0x0024: /* This device is currently Primary */ rc = -EBUSY; dev_err(&card->gdev->dev, - "The HiperSockets device is already a primary Bridge Port\n"); + "The device is already a primary Bridge Port\n"); break; + case 0x2B20: case 0x0020: /* Not authorized by zManager */ rc = -EACCES; dev_err(&card->gdev->dev, - "The HiperSockets device is not authorized to be a Bridge Port\n"); + "The device is not authorized to be a Bridge Port\n"); break; default: rc = -EIO; } - break; - case IPA_RC_NOTSUPP: - rc = -ENOSYS; - break; - case IPA_RC_UNSUPPORTED_COMMAND: - rc = -ENOSYS; - break; - default: - rc = -EIO; - } + else + switch (cbctl->ipa_rc) { + case IPA_RC_NOTSUPP: + rc = -EOPNOTSUPP; + break; + case IPA_RC_UNSUPPORTED_COMMAND: + rc = -EOPNOTSUPP; + break; + default: + rc = -EIO; + } + if (rc) { QETH_CARD_TEXT_(card, 2, "SBPi%04x", cbctl->ipa_rc); QETH_CARD_TEXT_(card, 2, "SBPc%04x", cbctl->cmd_rc); @@ -1688,6 +1733,13 @@ static int qeth_bridgeport_makerc(struct qeth_card *card, return rc; } +static inline int ipa_cmd_sbp(struct qeth_card *card) +{ + return (card->info.type == QETH_CARD_TYPE_IQD) ? + IPA_CMD_SETBRIDGEPORT_IQD : + IPA_CMD_SETBRIDGEPORT_OSA; +} + static int qeth_bridgeport_query_support_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { @@ -1719,7 +1771,7 @@ static void qeth_bridgeport_query_support(struct qeth_card *card) struct _qeth_sbp_cbctl cbctl; QETH_CARD_TEXT(card, 2, "brqsuppo"); - iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETBRIDGEPORT, 0); + iob = qeth_get_ipacmd_buffer(card, ipa_cmd_sbp(card), 0); if (!iob) return; cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE); @@ -1796,7 +1848,7 @@ int qeth_bridgeport_query_ports(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "brqports"); if (!(card->options.sbp.supported_funcs & IPA_SBP_QUERY_BRIDGE_PORTS)) return -EOPNOTSUPP; - iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETBRIDGEPORT, 0); + iob = qeth_get_ipacmd_buffer(card, ipa_cmd_sbp(card), 0); if (!iob) return -ENOMEM; cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE); @@ -1808,10 +1860,9 @@ int qeth_bridgeport_query_ports(struct qeth_card *card, cmd->data.sbp.hdr.seq_no = 1; rc = qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_ports_cb, (void *)&cbctl); - if (rc) + if (rc < 0) return rc; - rc = qeth_bridgeport_makerc(card, &cbctl, IPA_SBP_QUERY_BRIDGE_PORTS); - return rc; + return qeth_bridgeport_makerc(card, &cbctl, IPA_SBP_QUERY_BRIDGE_PORTS); } EXPORT_SYMBOL_GPL(qeth_bridgeport_query_ports); @@ -1864,7 +1915,7 @@ int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role) } if (!(card->options.sbp.supported_funcs & setcmd)) return -EOPNOTSUPP; - iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETBRIDGEPORT, 0); + iob = qeth_get_ipacmd_buffer(card, ipa_cmd_sbp(card), 0); if (!iob) return -ENOMEM; cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE); @@ -1874,10 +1925,9 @@ int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role) cmd->data.sbp.hdr.seq_no = 1; rc = qeth_send_ipa_cmd(card, iob, qeth_bridgeport_set_cb, (void *)&cbctl); - if (rc) + if (rc < 0) return rc; - rc = qeth_bridgeport_makerc(card, &cbctl, setcmd); - return rc; + return qeth_bridgeport_makerc(card, &cbctl, setcmd); } /** @@ -1898,7 +1948,7 @@ static int qeth_anset_makerc(struct qeth_card *card, int pnso_rc, u16 response) case 0x0004: case 0x0100: case 0x0106: - rc = -ENOSYS; + rc = -EOPNOTSUPP; dev_err(&card->gdev->dev, "Setting address notification failed\n"); break; diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c index 59e3aa538b4d..52673cd1db99 100644 --- a/drivers/s390/net/qeth_l2_sys.c +++ b/drivers/s390/net/qeth_l2_sys.c @@ -23,8 +23,6 @@ static ssize_t qeth_bridge_port_role_state_show(struct device *dev, if (!card) return -EINVAL; - mutex_lock(&card->conf_mutex); - if (qeth_card_hw_is_reachable(card) && card->options.sbp.supported_funcs) rc = qeth_bridgeport_query_ports(card, @@ -59,8 +57,6 @@ static ssize_t qeth_bridge_port_role_state_show(struct device *dev, rc = sprintf(buf, "%s\n", word); } - mutex_unlock(&card->conf_mutex); - return rc; } @@ -90,7 +86,9 @@ static ssize_t qeth_bridge_port_role_store(struct device *dev, mutex_lock(&card->conf_mutex); - if (qeth_card_hw_is_reachable(card)) { + if (card->options.sbp.reflect_promisc) /* Forbid direct manipulation */ + rc = -EPERM; + else if (qeth_card_hw_is_reachable(card)) { rc = qeth_bridgeport_setrole(card, role); if (!rc) card->options.sbp.role = role; @@ -123,12 +121,8 @@ static ssize_t qeth_bridgeport_hostnotification_show(struct device *dev, if (!card) return -EINVAL; - mutex_lock(&card->conf_mutex); - enabled = card->options.sbp.hostnotification; - mutex_unlock(&card->conf_mutex); - return sprintf(buf, "%d\n", enabled); } @@ -167,10 +161,72 @@ static DEVICE_ATTR(bridge_hostnotify, 0644, qeth_bridgeport_hostnotification_show, qeth_bridgeport_hostnotification_store); +static ssize_t qeth_bridgeport_reflect_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct qeth_card *card = dev_get_drvdata(dev); + char *state; + + if (!card) + return -EINVAL; + + if (card->options.sbp.reflect_promisc) { + if (card->options.sbp.reflect_promisc_primary) + state = "primary"; + else + state = "secondary"; + } else + state = "none"; + + return sprintf(buf, "%s\n", state); +} + +static ssize_t qeth_bridgeport_reflect_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct qeth_card *card = dev_get_drvdata(dev); + int enable, primary; + int rc = 0; + + if (!card) + return -EINVAL; + + if (sysfs_streq(buf, "none")) { + enable = 0; + primary = 0; + } else if (sysfs_streq(buf, "primary")) { + enable = 1; + primary = 1; + } else if (sysfs_streq(buf, "secondary")) { + enable = 1; + primary = 0; + } else + return -EINVAL; + + mutex_lock(&card->conf_mutex); + + if (card->options.sbp.role != QETH_SBP_ROLE_NONE) + rc = -EPERM; + else { + card->options.sbp.reflect_promisc = enable; + card->options.sbp.reflect_promisc_primary = primary; + rc = 0; + } + + mutex_unlock(&card->conf_mutex); + + return rc ? rc : count; +} + +static DEVICE_ATTR(bridge_reflect_promisc, 0644, + qeth_bridgeport_reflect_show, + qeth_bridgeport_reflect_store); + static struct attribute *qeth_l2_bridgeport_attrs[] = { &dev_attr_bridge_role.attr, &dev_attr_bridge_state.attr, &dev_attr_bridge_hostnotify.attr, + &dev_attr_bridge_reflect_promisc.attr, NULL, }; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 04e42c649134..70eb2f61bb92 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3198,8 +3198,7 @@ static int qeth_l3_set_features(struct net_device *dev, netdev_features_t features) { struct qeth_card *card = dev->ml_priv; - u32 changed = dev->features ^ features; - int err; + netdev_features_t changed = dev->features ^ features; if (!(changed & NETIF_F_RXCSUM)) return 0; @@ -3208,11 +3207,7 @@ static int qeth_l3_set_features(struct net_device *dev, card->state == CARD_STATE_RECOVER) return 0; - err = qeth_l3_set_rx_csum(card, features & NETIF_F_RXCSUM); - if (err) - dev->features = features ^ NETIF_F_RXCSUM; - - return err; + return qeth_l3_set_rx_csum(card, features & NETIF_F_RXCSUM ? 1 : 0); } static const struct ethtool_ops qeth_l3_ethtool_ops = { diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 0343ae386f03..ecd7c0f82481 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1524,21 +1524,12 @@ static void vnt_configure(struct ieee80211_hw *hw, struct vnt_private *priv = hw->priv; u8 rx_mode = 0; - *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_PROMISC_IN_BSS | - FIF_BCN_PRBRESP_PROMISC; + *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; VNSvInPortB(priv->PortOffset + MAC_REG_RCR, &rx_mode); dev_dbg(&priv->pcid->dev, "rx mode in = %x\n", rx_mode); - if (changed_flags & FIF_PROMISC_IN_BSS) { - /* unconditionally log net taps */ - if (*total_flags & FIF_PROMISC_IN_BSS) - rx_mode |= RCR_UNICAST; - else - rx_mode &= ~RCR_UNICAST; - } - if (changed_flags & FIF_ALLMULTI) { if (*total_flags & FIF_ALLMULTI) { unsigned long flags; diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index ab3ab84cb0a7..0d97b6457ead 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -785,8 +785,7 @@ static void vnt_configure(struct ieee80211_hw *hw, u8 rx_mode = 0; int rc; - *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_PROMISC_IN_BSS | - FIF_BCN_PRBRESP_PROMISC; + *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; rc = vnt_control_in(priv, MESSAGE_TYPE_READ, MAC_REG_RCR, MESSAGE_REQUEST_MACREG, sizeof(u8), &rx_mode); @@ -796,14 +795,6 @@ static void vnt_configure(struct ieee80211_hw *hw, dev_dbg(&priv->usb->dev, "rx mode in = %x\n", rx_mode); - if (changed_flags & FIF_PROMISC_IN_BSS) { - /* unconditionally log net taps */ - if (*total_flags & FIF_PROMISC_IN_BSS) - rx_mode |= RCR_UNICAST; - else - rx_mode &= ~RCR_UNICAST; - } - if (changed_flags & FIF_ALLMULTI) { if (*total_flags & FIF_ALLMULTI) { if (priv->mc_list_count > 2) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 3a57a1b0fb51..b50642870a43 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -85,7 +85,7 @@ int afs_open_socket(void) return -ENOMEM; } - ret = sock_create_kern(AF_RXRPC, SOCK_DGRAM, PF_INET, &socket); + ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket); if (ret < 0) { destroy_workqueue(afs_async_calls); _leave(" = %d [socket]", ret); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index d08e079ea5d3..754fd6c0b747 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -921,8 +921,8 @@ static int tcp_accept_from_sock(struct connection *con) mutex_unlock(&connections_lock); memset(&peeraddr, 0, sizeof(peeraddr)); - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &newsock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &newsock); if (result < 0) return -ENOMEM; @@ -1173,8 +1173,8 @@ static void tcp_connect_to_sock(struct connection *con) goto out; /* Create a socket to communicate with */ - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (result < 0) goto out_err; @@ -1258,8 +1258,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con, addr_len = sizeof(struct sockaddr_in6); /* Create a socket to communicate with */ - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (result < 0) { log_print("Can't create listening comms socket"); goto create_out; @@ -1365,8 +1365,8 @@ static int sctp_listen_for_all(void) log_print("Using SCTP for communications"); - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET, - IPPROTO_SCTP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_SEQPACKET, IPPROTO_SCTP, &sock); if (result < 0) { log_print("Can't create comms socket, check SCTP is loaded"); goto out; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d5cda067115a..8821b9a8689e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -126,6 +126,27 @@ struct bpf_prog_aux { struct work_struct work; }; +struct bpf_array { + struct bpf_map map; + u32 elem_size; + /* 'ownership' of prog_array is claimed by the first program that + * is going to use this map or by the first program which FD is stored + * in the map to make sure that all callers and callees have the same + * prog_type and JITed flag + */ + enum bpf_prog_type owner_prog_type; + bool owner_jited; + union { + char value[0] __aligned(8); + struct bpf_prog *prog[0] __aligned(8); + }; +}; +#define MAX_TAIL_CALL_CNT 32 + +u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); +void bpf_prog_array_map_clear(struct bpf_map *map); +bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); + #ifdef CONFIG_BPF_SYSCALL void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl); @@ -160,5 +181,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; +extern const struct bpf_func_proto bpf_tail_call_proto; #endif /* _LINUX_BPF_H */ diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 606563ef8a72..9012f8775208 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -110,7 +110,29 @@ static inline bool is_zero_ether_addr(const u8 *addr) */ static inline bool is_multicast_ether_addr(const u8 *addr) { - return 0x01 & addr[0]; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + u32 a = *(const u32 *)addr; +#else + u16 a = *(const u16 *)addr; +#endif +#ifdef __BIG_ENDIAN + return 0x01 & (a >> ((sizeof(a) * 8) - 8)); +#else + return 0x01 & a; +#endif +} + +static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 +#ifdef __BIG_ENDIAN + return 0x01 & ((*(const u64 *)addr) >> 56); +#else + return 0x01 & (*(const u64 *)addr); +#endif +#else + return is_multicast_ether_addr(addr); +#endif } /** @@ -169,6 +191,24 @@ static inline bool is_valid_ether_addr(const u8 *addr) } /** + * eth_proto_is_802_3 - Determine if a given Ethertype/length is a protocol + * @proto: Ethertype/length value to be tested + * + * Check that the value from the Ethertype/length field is a valid Ethertype. + * + * Return true if the valid is an 802.3 supported Ethertype. + */ +static inline bool eth_proto_is_802_3(__be16 proto) +{ +#ifndef __BIG_ENDIAN + /* if CPU is little endian mask off bits representing LSB */ + proto &= htons(0xFF00); +#endif + /* cast both to u16 and compare since LSB can be ignored */ + return (__force u16)proto >= (__force u16)htons(ETH_P_802_3_MIN); +} + +/** * eth_random_addr - Generate software assigned random Ethernet address * @addr: Pointer to a six-byte array containing the Ethernet address * diff --git a/include/linux/filter.h b/include/linux/filter.h index fa11b3a367be..17724f6ea983 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -207,6 +207,16 @@ struct bpf_prog_aux; .off = OFF, \ .imm = 0 }) +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ @@ -267,6 +277,14 @@ struct bpf_prog_aux; .off = 0, \ .imm = 0 }) +/* Internal classic blocks for direct assignment */ + +#define __BPF_STMT(CODE, K) \ + ((struct sock_filter) BPF_STMT(CODE, K)) + +#define __BPF_JUMP(CODE, K, JT, JF) \ + ((struct sock_filter) BPF_JUMP(CODE, K, JT, JF)) + #define bytes_to_bpf_size(bytes) \ ({ \ int bpf_size = -EINVAL; \ @@ -360,12 +378,9 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) int sk_filter(struct sock *sk, struct sk_buff *skb); -void bpf_prog_select_runtime(struct bpf_prog *fp); +int bpf_prog_select_runtime(struct bpf_prog *fp); void bpf_prog_free(struct bpf_prog *fp); -int bpf_convert_filter(struct sock_filter *prog, int len, - struct bpf_insn *new_prog, int *new_len); - struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); @@ -377,14 +392,17 @@ static inline void bpf_prog_unlock_free(struct bpf_prog *fp) __bpf_prog_free(fp); } +typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, + unsigned int flen); + int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); +int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, + bpf_aux_classic_check_t trans); void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); - -int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 15928f0647e4..6ba7cf23748f 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -368,6 +368,11 @@ extern void free_pages(unsigned long addr, unsigned int order); extern void free_hot_cold_page(struct page *page, bool cold); extern void free_hot_cold_page_list(struct list_head *list, bool cold); +struct page_frag_cache; +extern void *__alloc_page_frag(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask); +extern void __free_page_frag(void *addr); + extern void __free_kmem_pages(struct page *page, unsigned int order); extern void free_kmem_pages(unsigned long addr, unsigned int order); diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 66a7d7600f43..b49cf923becc 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -74,7 +74,7 @@ static inline struct sock *sk_pppox(struct pppox_sock *po) struct module; struct pppox_proto { - int (*create)(struct net *net, struct socket *sock); + int (*create)(struct net *net, struct socket *sock, int kern); int (*ioctl)(struct socket *sock, unsigned int cmd, unsigned long arg); struct module *owner; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 920e4457ce6e..b9ab677c0c0a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -539,7 +539,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, */ proto = vhdr->h_vlan_encapsulated_proto; - if (ntohs(proto) >= ETH_P_802_3_MIN) { + if (eth_proto_is_802_3(proto)) { skb->protocol = proto; return; } diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 2c677afeea47..193ad488d3e2 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -130,5 +130,6 @@ extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); +int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed); #endif diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h index 66c30a763b10..11f00cdabe3d 100644 --- a/include/linux/mdio-gpio.h +++ b/include/linux/mdio-gpio.h @@ -23,7 +23,8 @@ struct mdio_gpio_platform_data { bool mdio_active_low; bool mdo_active_low; - unsigned int phy_mask; + u32 phy_mask; + u32 phy_ignore_ta_mask; int irqs[PHY_MAX_ADDR]; /* reset callback */ int (*reset)(struct mii_bus *bus); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8d37e26a1007..0038ac7466fd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -226,6 +226,24 @@ struct page_frag { #endif }; +#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) +#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) + +struct page_frag_cache { + void * va; +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + __u16 offset; + __u16 size; +#else + __u32 offset; +#endif + /* we maintain a pagecount bias, so that we dont dirty cache line + * containing page->_count every time we allocate a fragment. + */ + unsigned int pagecnt_bias; + bool pfmemalloc; +}; + typedef unsigned long __nocast vm_flags_t; /* diff --git a/include/linux/net.h b/include/linux/net.h index 738ea48be889..04aa06852771 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -38,7 +38,6 @@ struct net; #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 -#define SOCK_EXTERNALLY_ALLOCATED 5 #ifndef ARCH_HAS_SOCKET_TYPES /** @@ -208,7 +207,7 @@ void sock_unregister(int family); int __sock_create(struct net *net, int family, int type, int proto, struct socket **res, int kern); int sock_create(int family, int type, int proto, struct socket **res); -int sock_create_kern(int family, int type, int proto, struct socket **res); +int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res); int sock_create_lite(int family, int type, int proto, struct socket **res); void sock_release(struct socket *sock); int sock_sendmsg(struct socket *sock, struct msghdr *msg); diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7d59dc6ab789..9672781c593d 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -66,7 +66,6 @@ enum { NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ NETIF_F_BUSY_POLL_BIT, /* Busy poll */ - NETIF_F_HW_SWITCH_OFFLOAD_BIT, /* HW switch offload */ /* * Add your fresh new feature above and remember to update @@ -125,7 +124,6 @@ enum { #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) -#define NETIF_F_HW_SWITCH_OFFLOAD __NETIF_F(HW_SWITCH_OFFLOAD) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ @@ -161,8 +159,7 @@ enum { */ #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ NETIF_F_SG | NETIF_F_HIGHDMA | \ - NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED | \ - NETIF_F_HW_SWITCH_OFFLOAD) + NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED) /* * If one device doesn't support one of these features, then disable it diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 05b9a694e213..51f8d2f5dc3f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1564,7 +1564,7 @@ struct net_device { const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_SWITCHDEV - const struct swdev_ops *swdev_ops; + const struct switchdev_ops *switchdev_ops; #endif const struct header_ops *header_ops; @@ -1652,7 +1652,14 @@ struct net_device { rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; +#ifdef CONFIG_NET_CLS_ACT + struct tcf_proto __rcu *ingress_cl_list; +#endif struct netdev_queue __rcu *ingress_queue; +#ifdef CONFIG_NETFILTER_INGRESS + struct list_head nf_hooks_ingress; +#endif + unsigned char broadcast[MAX_ADDR_LEN]; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rx_cpu_rmap; @@ -2552,10 +2559,6 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { - if (WARN_ON(!dev_queue)) { - pr_info("netif_stop_queue() cannot be called before register_netdev()\n"); - return; - } set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -2571,15 +2574,7 @@ static inline void netif_stop_queue(struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); } -static inline void netif_tx_stop_all_queues(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - netif_tx_stop_queue(txq); - } -} +void netif_tx_stop_all_queues(struct net_device *dev); static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { @@ -2840,6 +2835,9 @@ static inline int netif_set_xps_queue(struct net_device *dev, } #endif +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, + unsigned int num_tx_queues); + /* * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used * as a distribution range limit for the returned value. diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 63560d0a8dfe..f5ff5d156da8 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,10 +54,12 @@ struct nf_hook_state { struct net_device *in; struct net_device *out; struct sock *sk; + struct list_head *hook_list; int (*okfn)(struct sock *, struct sk_buff *); }; static inline void nf_hook_state_init(struct nf_hook_state *p, + struct list_head *hook_list, unsigned int hook, int thresh, u_int8_t pf, struct net_device *indev, @@ -71,6 +73,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->in = indev; p->out = outdev; p->sk = sk; + p->hook_list = hook_list; p->okfn = okfn; } @@ -79,16 +82,17 @@ typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, const struct nf_hook_state *state); struct nf_hook_ops { - struct list_head list; + struct list_head list; /* User fills in from here down. */ - nf_hookfn *hook; - struct module *owner; - void *priv; - u_int8_t pf; - unsigned int hooknum; + nf_hookfn *hook; + struct net_device *dev; + struct module *owner; + void *priv; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ - int priority; + int priority; }; struct nf_sockopt_ops { @@ -131,26 +135,33 @@ extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #ifdef HAVE_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; -static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +static inline bool nf_hook_list_active(struct list_head *nf_hook_list, + u_int8_t pf, unsigned int hook) { if (__builtin_constant_p(pf) && __builtin_constant_p(hook)) return static_key_false(&nf_hooks_needed[pf][hook]); - return !list_empty(&nf_hooks[pf][hook]); + return !list_empty(nf_hook_list); } #else -static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +static inline bool nf_hook_list_active(struct list_head *nf_hook_list, + u_int8_t pf, unsigned int hook) { - return !list_empty(&nf_hooks[pf][hook]); + return !list_empty(nf_hook_list); } #endif +static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +{ + return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook); +} + int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); /** * nf_hook_thresh - call a netfilter hook - * + * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. @@ -166,8 +177,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, if (nf_hooks_active(pf, hook)) { struct nf_hook_state state; - nf_hook_state_init(&state, hook, thresh, pf, - indev, outdev, sk, okfn); + nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh, + pf, indev, outdev, sk, okfn); return nf_hook_slow(skb, &state); } return 1; diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 34b172301558..ffdfdc24952a 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -122,13 +122,13 @@ struct ip_set_skbinfo { struct ip_set; #define ext_timeout(e, s) \ -(unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]) +((unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])) #define ext_counter(e, s) \ -(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]) +((struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])) #define ext_comment(e, s) \ -(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT]) +((struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])) #define ext_skbinfo(e, s) \ -(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO]) +((struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])) typedef int (*ipset_adtfn)(struct ip_set *set, void *value, const struct ip_set_ext *ext, @@ -533,29 +533,9 @@ bitmap_bytes(u32 a, u32 b) #include <linux/netfilter/ipset/ip_set_timeout.h> #include <linux/netfilter/ipset/ip_set_comment.h> -static inline int +int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, - const void *e, bool active) -{ - if (SET_WITH_TIMEOUT(set)) { - unsigned long *timeout = ext_timeout(e, set); - - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(active ? ip_set_timeout_get(timeout) - : *timeout))) - return -EMSGSIZE; - } - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, set))) - return -EMSGSIZE; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(e, set))) - return -EMSGSIZE; - if (SET_WITH_SKBINFO(set) && - ip_set_put_skbinfo(skb, ext_skbinfo(e, set))) - return -EMSGSIZE; - return 0; -} + const void *e, bool active); #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index a3e215bb0241..09f38206c18f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -62,6 +62,7 @@ struct xt_mtchk_param { void *matchinfo; unsigned int hook_mask; u_int8_t family; + bool nft_compat; }; /** @@ -92,6 +93,7 @@ struct xt_tgchk_param { void *targinfo; unsigned int hook_mask; u_int8_t family; + bool nft_compat; }; /* Target destructor parameters */ diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h new file mode 100644 index 000000000000..cb0727fe2b3d --- /dev/null +++ b/include/linux/netfilter_ingress.h @@ -0,0 +1,41 @@ +#ifndef _NETFILTER_INGRESS_H_ +#define _NETFILTER_INGRESS_H_ + +#include <linux/netfilter.h> +#include <linux/netdevice.h> + +#ifdef CONFIG_NETFILTER_INGRESS +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return nf_hook_list_active(&skb->dev->nf_hooks_ingress, + NFPROTO_NETDEV, NF_NETDEV_INGRESS); +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + struct nf_hook_state state; + + nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, + NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL, + skb->dev, NULL, NULL); + return nf_hook_slow(skb, &state); +} + +static inline void nf_hook_ingress_init(struct net_device *dev) +{ + INIT_LIST_HEAD(&dev->nf_hooks_ingress); +} +#else /* CONFIG_NETFILTER_INGRESS */ +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return 0; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) {} +#endif /* CONFIG_NETFILTER_INGRESS */ +#endif /* _NETFILTER_INGRESS_H_ */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6835c1279df7..9120edb650a0 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -28,6 +28,8 @@ struct netlink_skb_parms { __u32 dst_group; __u32 flags; struct sock *sk; + bool nsid_is_set; + int nsid; }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) diff --git a/include/linux/phy.h b/include/linux/phy.h index 685809835b5c..701c7a3946e0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -181,6 +181,9 @@ struct mii_bus { /* PHY addresses to be ignored when probing */ u32 phy_mask; + /* PHY addresses to ignore the TA/read failure */ + u32 phy_ignore_ta_mask; + /* * Pointer to an array of interrupts, each PHY's * interrupt at the index matching its address diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 7b8e260c4a27..a2324fb45cf4 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -79,17 +79,9 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_INGRESS void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); -#else -static inline void net_inc_ingress_queue(void) -{ -} - -static inline void net_dec_ingress_queue(void) -{ -} #endif extern void rtnetlink_init(void); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f15154a879c7..b617095adb88 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,7 +34,7 @@ #include <linux/dma-mapping.h> #include <linux/netdev_features.h> #include <linux/sched.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> /* A. Checksumming of received packets by device. * @@ -170,12 +170,14 @@ struct nf_bridge_info { BRNF_PROTO_UNCHANGED, BRNF_PROTO_8021Q, BRNF_PROTO_PPPOE - } orig_proto; + } orig_proto:8; bool pkt_otherhost; unsigned int mask; struct net_device *physindev; - struct net_device *physoutdev; - char neigh_header[8]; + union { + struct net_device *physoutdev; + char neigh_header[8]; + }; __be32 ipv4_daddr; }; #endif @@ -919,7 +921,6 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) skb->hash = hash; } -void __skb_get_hash(struct sk_buff *skb); static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) @@ -928,6 +929,8 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->hash; } +__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); + static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { return skb->hash; @@ -1935,8 +1938,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, if (skb_transport_header_was_set(skb)) return; - else if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); + else if (skb_flow_dissect_flow_keys(skb, &keys)) + skb_set_transport_header(skb, keys.basic.thoff); else skb_set_transport_header(skb, offset_hint); } @@ -2127,10 +2130,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE - void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, @@ -2185,6 +2184,11 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); } +static inline void skb_free_frag(void *addr) +{ + __free_page_frag(addr); +} + void *napi_alloc_frag(unsigned int fragsz); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int length, gfp_t gfp_mask); @@ -3050,7 +3054,7 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, } } else if (skb->csum_bad) { /* ip_summed == CHECKSUM_NONE in this case */ - return 1; + return (__force __sum16)1; } skb->csum = psum; @@ -3298,9 +3302,6 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) return skb->queue_mapping != 0; } -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues); - static inline struct sec_path *skb_sec_path(struct sk_buff *skb) { #ifdef CONFIG_XFRM @@ -3355,15 +3356,14 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) { int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) - - skb_transport_offset(skb); - __u16 csum; + skb_transport_offset(skb); + __wsum partial; - csum = csum_fold(csum_partial(skb_transport_header(skb), - plen, skb->csum)); + partial = csum_partial(skb_transport_header(skb), plen, skb->csum); skb->csum = res; SKB_GSO_CB(skb)->csum_start -= plen; - return csum; + return csum_fold(partial); } static inline bool skb_is_gso(const struct sk_buff *skb) @@ -3418,10 +3418,9 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); int skb_checksum_setup(struct sk_buff *skb, bool recalculate); - -u32 skb_get_poff(const struct sk_buff *skb); -u32 __skb_get_poff(const struct sk_buff *skb, void *data, - const struct flow_keys *keys, int hlen); +struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, + unsigned int transport_len, + __sum16(*skb_chkf)(struct sk_buff *skb)); /** * skb_head_is_locked - Determine if the skb->head is locked down diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e8bbf403618f..48c3696e8645 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -149,11 +149,16 @@ struct tcp_sock { * sum(delta(rcv_nxt)), or how many bytes * were acked. */ + u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn + * total number of segments in. + */ u32 rcv_nxt; /* What we want to receive next */ u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ u32 snd_nxt; /* Next sequence we send */ - + u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. @@ -201,6 +206,7 @@ struct tcp_sock { syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ @@ -328,6 +334,7 @@ struct tcp_sock { * socket. Used to retransmit SYNACKs etc. */ struct request_sock *fastopen_rsk; + u32 *saved_syn; }; enum tsq_flags { @@ -395,4 +402,10 @@ static inline int fastopen_init_queue(struct sock *sk, int backlog) return 0; } +static inline void tcp_saved_syn_free(struct tcp_sock *tp) +{ + kfree(tp->saved_syn); + tp->saved_syn = NULL; +} + #endif /* _LINUX_TCP_H */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 80456f72d70a..def59d3a34d5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -142,6 +142,7 @@ void ipv6_mc_unmap(struct inet6_dev *idev); void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); +int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed); void addrconf_dad_failure(struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 172632dd9930..db639a4c5ab8 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -74,7 +74,7 @@ void vsock_pending_work(struct work_struct *work); struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, - gfp_t priority, unsigned short type); + gfp_t priority, unsigned short type, int kern); /**** TRANSPORT ****/ diff --git a/include/net/bond_options.h b/include/net/bond_options.h index ea6546d2c946..c28aca25320e 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -63,6 +63,9 @@ enum { BOND_OPT_LP_INTERVAL, BOND_OPT_SLAVES, BOND_OPT_TLB_DYNAMIC_LB, + BOND_OPT_AD_ACTOR_SYS_PRIO, + BOND_OPT_AD_ACTOR_SYSTEM, + BOND_OPT_AD_USER_PORT_KEY, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 78ed135e9dea..20defc0353d1 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -136,6 +136,9 @@ struct bond_params { int packets_per_slave; int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; + u16 ad_actor_sys_prio; + u16 ad_user_port_key; + u8 ad_actor_system[ETH_ALEN]; }; struct bond_parm_tbl { diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f8d6813cd5b2..d63ecec73090 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -111,7 +111,7 @@ enum ieee80211_band { * This may be due to the driver or due to regulatory bandwidth * restrictions. * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY - * @IEEE80211_CHAN_GO_CONCURRENT: see %NL80211_FREQUENCY_ATTR_GO_CONCURRENT + * @IEEE80211_CHAN_IR_CONCURRENT: see %NL80211_FREQUENCY_ATTR_IR_CONCURRENT * @IEEE80211_CHAN_NO_20MHZ: 20 MHz bandwidth is not permitted * on this channel. * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted @@ -129,7 +129,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_80MHZ = 1<<7, IEEE80211_CHAN_NO_160MHZ = 1<<8, IEEE80211_CHAN_INDOOR_ONLY = 1<<9, - IEEE80211_CHAN_GO_CONCURRENT = 1<<10, + IEEE80211_CHAN_IR_CONCURRENT = 1<<10, IEEE80211_CHAN_NO_20MHZ = 1<<11, IEEE80211_CHAN_NO_10MHZ = 1<<12, }; diff --git a/include/net/checksum.h b/include/net/checksum.h index 0a55ac715077..2d1d73cb773e 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -122,7 +122,9 @@ static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { - *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), from), to)); + __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); + + *sum = csum_fold(csum_add(tmp, (__force __wsum)to)); } /* Implements RFC 1624 (Incremental Internet Checksum) diff --git a/include/net/codel.h b/include/net/codel.h index 1e18005f7f65..267e70210061 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -7,7 +7,7 @@ * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> - * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> + * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -119,12 +119,14 @@ static inline u32 codel_time_to_us(codel_time_t val) /** * struct codel_params - contains codel parameters * @target: target queue size (in time units) + * @ce_threshold: threshold for marking packets with ECN CE * @interval: width of moving time window * @mtu: device mtu, or minimal queue backlog in bytes. * @ecn: is Explicit Congestion Notification enabled */ struct codel_params { codel_time_t target; + codel_time_t ce_threshold; codel_time_t interval; u32 mtu; bool ecn; @@ -161,19 +163,24 @@ struct codel_vars { * @maxpacket: largest packet we've seen so far * @drop_count: temp count of dropped packets in dequeue() * ecn_mark: number of packets we ECN marked instead of dropping + * ce_mark: number of packets CE marked because sojourn time was above ce_threshold */ struct codel_stats { u32 maxpacket; u32 drop_count; u32 ecn_mark; + u32 ce_mark; }; +#define CODEL_DISABLED_THRESHOLD INT_MAX + static void codel_params_init(struct codel_params *params, const struct Qdisc *sch) { params->interval = MS2TIME(100); params->target = MS2TIME(5); params->mtu = psched_mtu(qdisc_dev(sch)); + params->ce_threshold = CODEL_DISABLED_THRESHOLD; params->ecn = false; } @@ -354,6 +361,9 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, vars->rec_inv_sqrt); } end: + if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && + INET_ECN_set_ce(skb)) + stats->ce_mark++; return skb; } #endif diff --git a/include/net/dst.h b/include/net/dst.h index 0fb99a26e973..2bc73f8a00a9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -109,7 +109,6 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL -#define DST_METRICS_FORCE_OVERWRITE 0x2UL #define DST_METRICS_FLAGS 0x3UL #define __DST_METRICS_PTR(Y) \ ((u32 *)((Y) & ~DST_METRICS_FLAGS)) @@ -120,11 +119,6 @@ static inline bool dst_metrics_read_only(const struct dst_entry *dst) return dst->_metrics & DST_METRICS_READ_ONLY; } -static inline void dst_metrics_set_force_overwrite(struct dst_entry *dst) -{ - dst->_metrics |= DST_METRICS_FORCE_OVERWRITE; -} - void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old); static inline void dst_destroy_metrics_generic(struct dst_entry *dst) @@ -355,18 +349,6 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, __skb_tunnel_rx(skb, dev, net); } -/* Children define the path of the packet through the - * Linux networking. Thus, destinations are stackable. - */ - -static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) -{ - struct dst_entry *child = dst_clone(skb_dst(skb)->child); - - skb_dst_drop(skb); - return child; -} - int dst_discard_sk(struct sock *sk, struct sk_buff *skb); static inline int dst_discard(struct sk_buff *skb) { diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h new file mode 100644 index 000000000000..bac9c1421f58 --- /dev/null +++ b/include/net/flow_dissector.h @@ -0,0 +1,167 @@ +#ifndef _NET_FLOW_DISSECTOR_H +#define _NET_FLOW_DISSECTOR_H + +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/in6.h> +#include <uapi/linux/if_ether.h> + +/** + * struct flow_dissector_key_basic: + * @thoff: Transport header offset + * @n_proto: Network header protocol (eg. IPv4/IPv6) + * @ip_proto: Transport header protocol (eg. TCP/UDP) + */ +struct flow_dissector_key_basic { + u16 thoff; + __be16 n_proto; + u8 ip_proto; +}; + +/** + * struct flow_dissector_key_addrs: + * @src: source ip address in case of IPv4 + * For IPv6 it contains 32bit hash of src address + * @dst: destination ip address in case of IPv4 + * For IPv6 it contains 32bit hash of dst address + */ +struct flow_dissector_key_addrs { + /* (src,dst) must be grouped, in the same way than in IP header */ + __be32 src; + __be32 dst; +}; + +/** + * flow_dissector_key_tp_ports: + * @ports: port numbers of Transport header + * src: source port number + * dst: destination port number + */ +struct flow_dissector_key_ports { + union { + __be32 ports; + struct { + __be16 src; + __be16 dst; + }; + }; +}; + +/** + * struct flow_dissector_key_ipv6_addrs: + * @src: source ip address + * @dst: destination ip address + */ +struct flow_dissector_key_ipv6_addrs { + /* (src,dst) must be grouped, in the same way than in IP header */ + struct in6_addr src; + struct in6_addr dst; +}; + +/** + * struct flow_dissector_key_eth_addrs: + * @src: source Ethernet address + * @dst: destination Ethernet address + */ +struct flow_dissector_key_eth_addrs { + /* (dst,src) must be grouped, in the same way than in ETH header */ + unsigned char dst[ETH_ALEN]; + unsigned char src[ETH_ALEN]; +}; + +enum flow_dissector_key_id { + FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ + FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ + FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ + FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ + FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ + + FLOW_DISSECTOR_KEY_MAX, +}; + +struct flow_dissector_key { + enum flow_dissector_key_id key_id; + size_t offset; /* offset of struct flow_dissector_key_* + in target the struct */ +}; + +struct flow_dissector { + unsigned int used_keys; /* each bit repesents presence of one key id */ + unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; +}; + +void skb_flow_dissector_init(struct flow_dissector *flow_dissector, + const struct flow_dissector_key *key, + unsigned int key_count); + +bool __skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, + void *data, __be16 proto, int nhoff, int hlen); + +static inline bool skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container) +{ + return __skb_flow_dissect(skb, flow_dissector, target_container, + NULL, 0, 0, 0); +} + +struct flow_keys { + struct flow_dissector_key_addrs addrs; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_basic basic; +}; + +extern struct flow_dissector flow_keys_dissector; +extern struct flow_dissector flow_keys_buf_dissector; + +static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, + struct flow_keys *flow) +{ + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(skb, &flow_keys_dissector, flow, + NULL, 0, 0, 0); +} + +static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow, + void *data, __be16 proto, + int nhoff, int hlen) +{ + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow, + data, proto, nhoff, hlen); +} + +__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + void *data, int hlen_proto); + +static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, + int thoff, u8 ip_proto) +{ + return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); +} + +u32 flow_hash_from_keys(struct flow_keys *keys); +void __skb_get_hash(struct sk_buff *skb); +u32 skb_get_poff(const struct sk_buff *skb); +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen); + +/* struct flow_keys_digest: + * + * This structure is used to hold a digest of the full flow keys. This is a + * larger "hash" of a flow to allow definitively matching specific flows where + * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so + * that it can by used in CB of skb (see sch_choke for an example). + */ +#define FLOW_KEYS_DIGEST_LEN 16 +struct flow_keys_digest { + u8 data[FLOW_KEYS_DIGEST_LEN]; +}; + +void make_flow_keys_digest(struct flow_keys_digest *digest, + const struct flow_keys *flow); + +#endif diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h deleted file mode 100644 index dc8fd81412bf..000000000000 --- a/include/net/flow_keys.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _NET_FLOW_KEYS_H -#define _NET_FLOW_KEYS_H - -/* struct flow_keys: - * @src: source ip address in case of IPv4 - * For IPv6 it contains 32bit hash of src address - * @dst: destination ip address in case of IPv4 - * For IPv6 it contains 32bit hash of dst address - * @ports: port numbers of Transport header - * port16[0]: src port number - * port16[1]: dst port number - * @thoff: Transport header offset - * @n_proto: Network header protocol (eg. IPv4/IPv6) - * @ip_proto: Transport header protocol (eg. TCP/UDP) - * All the members, except thoff, are in network byte order. - */ -struct flow_keys { - /* (src,dst) must be grouped, in the same way than in IP header */ - __be32 src; - __be32 dst; - union { - __be32 ports; - __be16 port16[2]; - }; - u16 thoff; - __be16 n_proto; - u8 ip_proto; -}; - -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, - void *data, __be16 proto, int nhoff, int hlen); -static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) -{ - return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0); -} -__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - void *data, int hlen_proto); -static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) -{ - return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); -} -u32 flow_hash_from_keys(struct flow_keys *keys); -unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, - __be16 protocol); -#endif diff --git a/include/net/geneve.h b/include/net/geneve.h index 14fb8d3390b4..2a0543a1899d 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,6 +62,11 @@ struct genevehdr { struct geneve_opt options[]; }; +static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) +{ + return (struct genevehdr *)(udp_hdr(skb) + 1); +} + #ifdef CONFIG_INET struct geneve_sock; diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 4a92423eefa5..279f83591971 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -41,7 +41,7 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, static inline void inet_ctl_sock_destroy(struct sock *sk) { - sk_release_kernel(sk); + sock_release(sk->sk_socket); } #endif diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 73fe0f9525d9..774d24151d4a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -148,8 +148,6 @@ struct inet_hashinfo { */ struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] ____cacheline_aligned_in_smp; - - atomic_t bsockets; }; static inline struct inet_ehash_bucket *inet_ehash_bucket( diff --git a/include/net/ip.h b/include/net/ip.h index d14af7edd197..7921a36b805c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -31,7 +31,7 @@ #include <net/route.h> #include <net/snmp.h> #include <net/flow.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> struct sock; @@ -108,9 +108,8 @@ int ip_local_deliver(struct sk_buff *skb); int ip_mr_input(struct sk_buff *skb); int ip_output(struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct sock *sk, struct sk_buff *skb); -int ip_fragment(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)); -int ip_do_nat(struct sk_buff *skb); +int ip_do_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); void ip_send_check(struct iphdr *ip); int __ip_local_out(struct sk_buff *skb); int ip_local_out_sk(struct sock *sk, struct sk_buff *skb); @@ -360,10 +359,10 @@ static inline void inet_set_txhash(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct flow_keys keys; - keys.src = inet->inet_saddr; - keys.dst = inet->inet_daddr; - keys.port16[0] = inet->inet_sport; - keys.port16[1] = inet->inet_dport; + keys.addrs.src = inet->inet_saddr; + keys.addrs.dst = inet->inet_daddr; + keys.ports.src = inet->inet_sport; + keys.ports.dst = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } @@ -478,6 +477,16 @@ enum ip_defrag_users { IP_DEFRAG_MACVLAN, }; +/* Return true if the value of 'user' is between 'lower_bond' + * and 'upper_bond' inclusively. + */ +static inline bool ip_defrag_user_in_between(u32 user, + enum ip_defrag_users lower_bond, + enum ip_defrag_users upper_bond) +{ + return user >= lower_bond && user <= upper_bond; +} + int ip_defrag(struct sk_buff *skb, u32 user); #ifdef CONFIG_INET struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 20e80fa7bbdd..e00018011028 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -121,44 +121,14 @@ struct rt6_info { struct rt6key rt6i_prefsrc; struct inet6_dev *rt6i_idev; - unsigned long _rt6i_peer; u32 rt6i_metric; + u32 rt6i_pmtu; /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; u8 rt6i_protocol; }; -static inline struct inet_peer *rt6_peer_ptr(struct rt6_info *rt) -{ - return inetpeer_ptr(rt->_rt6i_peer); -} - -static inline bool rt6_has_peer(struct rt6_info *rt) -{ - return inetpeer_ptr_is_peer(rt->_rt6i_peer); -} - -static inline void __rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) -{ - __inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); -} - -static inline bool rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) -{ - return inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); -} - -static inline void rt6_init_peer(struct rt6_info *rt, struct inet_peer_base *base) -{ - inetpeer_init_ptr(&rt->_rt6i_peer, base); -} - -static inline void rt6_transfer_peer(struct rt6_info *rt, struct rt6_info *ort) -{ - inetpeer_transfer_peer(&rt->_rt6i_peer, &ort->_rt6i_peer); -} - static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { return ((struct rt6_info *)dst)->rt6i_idev; @@ -189,15 +159,6 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } -static inline void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) -{ - struct dst_entry *new = (struct dst_entry *) from; - - rt->rt6i_flags &= ~RTF_EXPIRES; - dst_hold(new); - rt->dst.from = new; -} - static inline void ip6_rt_put(struct rt6_info *rt) { /* dst_release() accepts a NULL parameter. diff --git a/include/net/ipv6.h b/include/net/ipv6.h index eec8ad3c9843..aab8190d16e8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -19,7 +19,7 @@ #include <net/if_inet6.h> #include <net/ndisc.h> #include <net/flow.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> #include <net/snmp.h> #define SIN6_LEN_RFC2133 24 @@ -239,8 +239,10 @@ struct ip6_flowlabel { struct net *fl_net; }; -#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) -#define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) +#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) +#define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) +#define IPV6_FLOWLABEL_STATELESS_FLAG cpu_to_be32(0x00080000) + #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 @@ -696,10 +698,10 @@ static inline void ip6_set_txhash(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct flow_keys keys; - keys.src = (__force __be32)ipv6_addr_hash(&np->saddr); - keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); - keys.port16[0] = inet->inet_sport; - keys.port16[1] = inet->inet_dport; + keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); + keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); + keys.ports.src = inet->inet_sport; + keys.ports.dst = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } @@ -719,6 +721,9 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, hash ^= hash >> 12; flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; + + if (net->ipv6.sysctl.flowlabel_state_ranges) + flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; } return flowlabel; diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 0134681acc4c..fe994d2e5286 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -96,7 +96,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb) } struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot); + struct proto *prot, int kern); void llc_sk_free(struct sock *sk); void llc_sk_reset(struct sock *sk); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8e3668b44c29..2f8b7decace0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -337,10 +337,16 @@ enum ieee80211_bss_change { * enum ieee80211_event_type - event to be notified to the low level driver * @RSSI_EVENT: AP's rssi crossed the a threshold set by the driver. * @MLME_EVENT: event related to MLME + * @BAR_RX_EVENT: a BAR was received + * @BA_FRAME_TIMEOUT: Frames were released from the reordering buffer because + * they timed out. This won't be called for each frame released, but only + * once each time the timeout triggers. */ enum ieee80211_event_type { RSSI_EVENT, MLME_EVENT, + BAR_RX_EVENT, + BA_FRAME_TIMEOUT, }; /** @@ -354,7 +360,7 @@ enum ieee80211_rssi_event_data { }; /** - * enum ieee80211_rssi_event - data attached to an %RSSI_EVENT + * struct ieee80211_rssi_event - data attached to an %RSSI_EVENT * @data: See &enum ieee80211_rssi_event_data */ struct ieee80211_rssi_event { @@ -388,7 +394,7 @@ enum ieee80211_mlme_event_status { }; /** - * enum ieee80211_mlme_event - data attached to an %MLME_EVENT + * struct ieee80211_mlme_event - data attached to an %MLME_EVENT * @data: See &enum ieee80211_mlme_event_data * @status: See &enum ieee80211_mlme_event_status * @reason: the reason code if applicable @@ -400,16 +406,31 @@ struct ieee80211_mlme_event { }; /** + * struct ieee80211_ba_event - data attached for BlockAck related events + * @sta: pointer to the &ieee80211_sta to which this event relates + * @tid: the tid + * @ssn: the starting sequence number (for %BAR_RX_EVENT) + */ +struct ieee80211_ba_event { + struct ieee80211_sta *sta; + u16 tid; + u16 ssn; +}; + +/** * struct ieee80211_event - event to be sent to the driver - * @type The event itself. See &enum ieee80211_event_type. + * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT + * @ba: relevant if &type is %BAR_RX_EVENT or %BA_FRAME_TIMEOUT + * @u:union holding the fields above */ struct ieee80211_event { enum ieee80211_event_type type; union { struct ieee80211_rssi_event rssi; struct ieee80211_mlme_event mlme; + struct ieee80211_ba_event ba; } u; }; @@ -1480,6 +1501,47 @@ struct ieee80211_key_conf { u8 key[0]; }; +#define IEEE80211_MAX_PN_LEN 16 + +/** + * struct ieee80211_key_seq - key sequence counter + * + * @tkip: TKIP data, containing IV32 and IV16 in host byte order + * @ccmp: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @aes_cmac: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @aes_gmac: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @gcmp: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @hw: data for HW-only (e.g. cipher scheme) keys + */ +struct ieee80211_key_seq { + union { + struct { + u32 iv32; + u16 iv16; + } tkip; + struct { + u8 pn[6]; + } ccmp; + struct { + u8 pn[6]; + } aes_cmac; + struct { + u8 pn[6]; + } aes_gmac; + struct { + u8 pn[6]; + } gcmp; + struct { + u8 seq[IEEE80211_MAX_PN_LEN]; + u8 seq_len; + } hw; + }; +}; + /** * struct ieee80211_cipher_scheme - cipher scheme * @@ -1797,6 +1859,10 @@ struct ieee80211_txq { * the driver returns 1. This also forces the driver to advertise its * supported cipher suites. * + * @IEEE80211_HW_SUPPORT_FAST_XMIT: The driver/hardware supports fast-xmit, + * this currently requires only the ability to calculate the duration + * for frames. + * * @IEEE80211_HW_QUEUE_CONTROL: The driver wants to control per-interface * queue mapping in order to use different queues (not just one per AC) * for different virtual interfaces. See the doc section on HW queue @@ -1845,7 +1911,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_WANT_MONITOR_VIF = 1<<14, IEEE80211_HW_NO_AUTO_VIF = 1<<15, IEEE80211_HW_SW_CRYPTO_CONTROL = 1<<16, - /* free slots */ + IEEE80211_HW_SUPPORT_FAST_XMIT = 1<<17, IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<18, IEEE80211_HW_CONNECTION_MONITOR = 1<<19, IEEE80211_HW_QUEUE_CONTROL = 1<<20, @@ -1939,8 +2005,8 @@ enum ieee80211_hw_flags { * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. * * @netdev_features: netdev features to be set in each netdev created - * from this HW. Note only HW checksum features are currently - * compatible with mac80211. Other feature bits will be rejected. + * from this HW. Note that not all features are usable with mac80211, + * other features will be rejected during HW registration. * * @uapsd_queues: This bitmap is included in (re)association frame to indicate * for each access category if it is uAPSD trigger-enabled and delivery- @@ -2504,10 +2570,6 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * stack. It is always safe to pass more frames than requested, * but this has negative impact on power consumption. * - * @FIF_PROMISC_IN_BSS: promiscuous mode within your BSS, - * think of the BSS as your network segment and then this corresponds - * to the regular ethernet device promiscuous mode. - * * @FIF_ALLMULTI: pass all multicast frames, this is used if requested * by the user or if the hardware is not capable of filtering by * multicast address. @@ -2524,8 +2586,8 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * mac80211 needs to do and the amount of CPU wakeups, so you should * honour this flag if possible. * - * @FIF_CONTROL: pass control frames (except for PS Poll), if PROMISC_IN_BSS - * is not set then only those addressed to this station. + * @FIF_CONTROL: pass control frames (except for PS Poll) addressed to this + * station * * @FIF_OTHER_BSS: pass frames destined to other BSSes * @@ -2535,7 +2597,6 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * @FIF_PROBE_REQ: pass probe request frames */ enum ieee80211_filter_flags { - FIF_PROMISC_IN_BSS = 1<<0, FIF_ALLMULTI = 1<<1, FIF_FCSFAIL = 1<<2, FIF_PLCPFAIL = 1<<3, @@ -2818,9 +2879,9 @@ enum ieee80211_reconfig_type { * Returns zero if statistics are available. * The callback can sleep. * - * @get_tkip_seq: If your device implements TKIP encryption in hardware this - * callback should be provided to read the TKIP transmit IVs (both IV32 - * and IV16) for the given key from hardware. + * @get_key_seq: If your device implements encryption in hardware and does + * IV/PN assignment then this callback should be provided to read the + * IV/PN for the given key from hardware. * The callback must be atomic. * * @set_frag_threshold: Configuration of fragmentation threshold. Assign this @@ -3003,7 +3064,7 @@ enum ieee80211_reconfig_type { * The callback can sleep. * @event_callback: Notify driver about any event in mac80211. See * &enum ieee80211_event_type for the different types. - * The callback can sleep. + * The callback must be atomic. * * @release_buffered_frames: Release buffered frames according to the given * parameters. In the case where the driver buffers some frames for @@ -3219,8 +3280,9 @@ struct ieee80211_ops { struct ieee80211_vif *vif); int (*get_stats)(struct ieee80211_hw *hw, struct ieee80211_low_level_stats *stats); - void (*get_tkip_seq)(struct ieee80211_hw *hw, u8 hw_key_idx, - u32 *iv32, u16 *iv16); + void (*get_key_seq)(struct ieee80211_hw *hw, + struct ieee80211_key_conf *key, + struct ieee80211_key_seq *seq); int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); int (*sta_add)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -3468,14 +3530,15 @@ enum ieee80211_tpt_led_trigger_flags { }; #ifdef CONFIG_MAC80211_LEDS -char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw); -char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, - unsigned int flags, - const struct ieee80211_tpt_blink *blink_table, - unsigned int blink_table_len); +const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw); +const char * +__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, + unsigned int flags, + const struct ieee80211_tpt_blink *blink_table, + unsigned int blink_table_len); #endif /** * ieee80211_get_tx_led_name - get name of TX LED @@ -3489,7 +3552,7 @@ char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_tx_led_name(hw); @@ -3510,7 +3573,7 @@ static inline char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_rx_led_name(hw); @@ -3531,7 +3594,7 @@ static inline char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_assoc_led_name(hw); @@ -3552,7 +3615,7 @@ static inline char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_radio_led_name(hw); @@ -3573,7 +3636,7 @@ static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) * * Note: This function must be called before ieee80211_register_hw(). */ -static inline char * +static inline const char * ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, unsigned int flags, const struct ieee80211_tpt_blink *blink_table, unsigned int blink_table_len) @@ -4254,40 +4317,6 @@ void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf, u8 *k1, u8 *k2); /** - * struct ieee80211_key_seq - key sequence counter - * - * @tkip: TKIP data, containing IV32 and IV16 in host byte order - * @ccmp: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @aes_cmac: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @aes_gmac: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @gcmp: PN data, most significant byte first (big endian, - * reverse order than in packet) - */ -struct ieee80211_key_seq { - union { - struct { - u32 iv32; - u16 iv16; - } tkip; - struct { - u8 pn[6]; - } ccmp; - struct { - u8 pn[6]; - } aes_cmac; - struct { - u8 pn[6]; - } aes_gmac; - struct { - u8 pn[6]; - } gcmp; - }; -}; - -/** * ieee80211_get_key_tx_seq - get key TX sequence counter * * @keyconf: the parameter passed with the set key diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f733656404de..72eb23723294 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -58,6 +58,7 @@ struct net { struct list_head exit_list; /* Use only net_mutex */ struct user_namespace *user_ns; /* Owning user namespace */ + spinlock_t nsid_lock; struct idr netns_ids; struct ns_common ns; @@ -271,7 +272,9 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #define __net_initconst __initconst #endif +int peernet2id_alloc(struct net *net, struct net *peer); int peernet2id(struct net *net, struct net *peer); +bool peernet_has_id(struct net *net, struct net *peer); struct net *get_net_ns_by_id(struct net *net, int id); struct pernet_operations { diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 614a49be68a9..6848b8bb2e63 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -77,6 +77,8 @@ struct netns_ipv4 { struct local_ports ip_local_ports; int sysctl_tcp_ecn; + int sysctl_tcp_ecn_fallback; + int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; int sysctl_ip_nonlocal_bind; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index d2527bf81142..8d93544a2d2b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -34,6 +34,7 @@ struct netns_sysctl_ipv6 { int fwmark_reflect; int idgen_retries; int idgen_delay; + int flowlabel_state_ranges; }; struct netns_ipv6 { diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 9f4265ce8892..87935cad2f7b 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -64,6 +64,7 @@ struct request_sock { struct timer_list rsk_timer; const struct request_sock_ops *rsk_ops; struct sock *sk; + u32 *saved_syn; u32 secid; u32 peer_secid; }; @@ -77,7 +78,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) req->rsk_ops = ops; sock_hold(sk_listener); req->rsk_listener = sk_listener; - + req->saved_syn = NULL; /* Following is temporary. It is coupled with debugging * helpers in reqsk_put() & reqsk_free() */ @@ -104,6 +105,7 @@ static inline void reqsk_free(struct request_sock *req) req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); + kfree(req->saved_syn); kmem_cache_free(req->rsk_ops->slab, req); } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6d778efcfdfd..2738f6f87908 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -501,12 +501,6 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) return sch->enqueue(skb, sch); } -static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) -{ - qdisc_skb_cb(skb)->pkt_len = skb->len; - return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; -} - static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) { return q->flags & TCQ_F_CPUSTATS; @@ -745,23 +739,6 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen) return rtab->data[slot]; } -#ifdef CONFIG_NET_CLS_ACT -static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask, - int action) -{ - struct sk_buff *n; - - n = skb_clone(skb, gfp_mask); - - if (n) { - n->tc_verd = SET_TC_VERD(n->tc_verd, 0); - n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); - n->tc_verd = CLR_TC_MUNGED(n->tc_verd); - } - return n; -} -#endif - struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; diff --git a/include/net/sock.h b/include/net/sock.h index 3a4898ec8c67..26c1c3171e00 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -184,6 +184,7 @@ struct sock_common { unsigned char skc_reuse:4; unsigned char skc_reuseport:1; unsigned char skc_ipv6only:1; + unsigned char skc_net_refcnt:1; int skc_bound_dev_if; union { struct hlist_node skc_bind_node; @@ -323,6 +324,7 @@ struct sock { #define sk_reuse __sk_common.skc_reuse #define sk_reuseport __sk_common.skc_reuseport #define sk_ipv6only __sk_common.skc_ipv6only +#define sk_net_refcnt __sk_common.skc_net_refcnt #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot @@ -1366,7 +1368,7 @@ static inline struct inode *SOCK_INODE(struct socket *socket) * Functions for memory accounting */ int __sk_mem_schedule(struct sock *sk, int size, int kind); -void __sk_mem_reclaim(struct sock *sk); +void __sk_mem_reclaim(struct sock *sk, int amount); #define SK_MEM_QUANTUM ((int)PAGE_SIZE) #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) @@ -1407,7 +1409,7 @@ static inline void sk_mem_reclaim(struct sock *sk) if (!sk_has_account(sk)) return; if (sk->sk_forward_alloc >= SK_MEM_QUANTUM) - __sk_mem_reclaim(sk); + __sk_mem_reclaim(sk, sk->sk_forward_alloc); } static inline void sk_mem_reclaim_partial(struct sock *sk) @@ -1415,7 +1417,7 @@ static inline void sk_mem_reclaim_partial(struct sock *sk) if (!sk_has_account(sk)) return; if (sk->sk_forward_alloc > SK_MEM_QUANTUM) - __sk_mem_reclaim(sk); + __sk_mem_reclaim(sk, sk->sk_forward_alloc - 1); } static inline void sk_mem_charge(struct sock *sk, int size) @@ -1514,9 +1516,8 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot); + struct proto *prot, int kern); void sk_free(struct sock *sk); -void sk_release_kernel(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, @@ -2024,7 +2025,8 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) } } -struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp); +struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, + bool force_schedule); /** * sk_page_frag - return an appropriate page_frag @@ -2192,22 +2194,6 @@ void sock_net_set(struct sock *sk, struct net *net) write_pnet(&sk->sk_net, net); } -/* - * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace. - * They should not hold a reference to a namespace in order to allow - * to stop it. - * Sockets after sk_change_net should be released using sk_release_kernel - */ -static inline void sk_change_net(struct sock *sk, struct net *net) -{ - struct net *current_net = sock_net(sk); - - if (!net_eq(current_net, net)) { - put_net(current_net); - sock_net_set(sk, net); - } -} - static inline struct sock *skb_steal_sock(struct sk_buff *skb) { if (skb->sk) { diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d2e69ee3019a..437f8fe75705 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -14,154 +14,261 @@ #include <linux/netdevice.h> #include <linux/notifier.h> +#define SWITCHDEV_F_NO_RECURSE BIT(0) + +enum switchdev_trans { + SWITCHDEV_TRANS_NONE, + SWITCHDEV_TRANS_PREPARE, + SWITCHDEV_TRANS_ABORT, + SWITCHDEV_TRANS_COMMIT, +}; + +enum switchdev_attr_id { + SWITCHDEV_ATTR_UNDEFINED, + SWITCHDEV_ATTR_PORT_PARENT_ID, + SWITCHDEV_ATTR_PORT_STP_STATE, + SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, +}; + +struct switchdev_attr { + enum switchdev_attr_id id; + enum switchdev_trans trans; + u32 flags; + union { + struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ + u8 stp_state; /* PORT_STP_STATE */ + unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ + } u; +}; + struct fib_info; +enum switchdev_obj_id { + SWITCHDEV_OBJ_UNDEFINED, + SWITCHDEV_OBJ_PORT_VLAN, + SWITCHDEV_OBJ_IPV4_FIB, + SWITCHDEV_OBJ_PORT_FDB, +}; + +struct switchdev_obj { + enum switchdev_obj_id id; + enum switchdev_trans trans; + int (*cb)(struct net_device *dev, struct switchdev_obj *obj); + union { + struct switchdev_obj_vlan { /* PORT_VLAN */ + u16 flags; + u16 vid_start; + u16 vid_end; + } vlan; + struct switchdev_obj_ipv4_fib { /* IPV4_FIB */ + u32 dst; + int dst_len; + struct fib_info *fi; + u8 tos; + u8 type; + u32 nlflags; + u32 tb_id; + } ipv4_fib; + struct switchdev_obj_fdb { /* PORT_FDB */ + const unsigned char *addr; + u16 vid; + } fdb; + } u; +}; + /** * struct switchdev_ops - switchdev operations * - * @swdev_parent_id_get: Called to get an ID of the switch chip this port - * is part of. If driver implements this, it indicates that it - * represents a port of a switch chip. + * @switchdev_port_attr_get: Get a port attribute (see switchdev_attr). + * + * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). * - * @swdev_port_stp_update: Called to notify switch device port of bridge - * port STP state change. + * @switchdev_port_obj_add: Add an object to port (see switchdev_obj). * - * @swdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. + * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj). * - * @swdev_fib_ipv4_del: Called to delete IPv4 route from switch device. + * @switchdev_port_obj_dump: Dump port objects (see switchdev_obj). */ -struct swdev_ops { - int (*swdev_parent_id_get)(struct net_device *dev, - struct netdev_phys_item_id *psid); - int (*swdev_port_stp_update)(struct net_device *dev, u8 state); - int (*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, - u32 tb_id); - int (*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); +struct switchdev_ops { + int (*switchdev_port_attr_get)(struct net_device *dev, + struct switchdev_attr *attr); + int (*switchdev_port_attr_set)(struct net_device *dev, + struct switchdev_attr *attr); + int (*switchdev_port_obj_add)(struct net_device *dev, + struct switchdev_obj *obj); + int (*switchdev_port_obj_del)(struct net_device *dev, + struct switchdev_obj *obj); + int (*switchdev_port_obj_dump)(struct net_device *dev, + struct switchdev_obj *obj); }; -enum netdev_switch_notifier_type { - NETDEV_SWITCH_FDB_ADD = 1, - NETDEV_SWITCH_FDB_DEL, +enum switchdev_notifier_type { + SWITCHDEV_FDB_ADD = 1, + SWITCHDEV_FDB_DEL, }; -struct netdev_switch_notifier_info { +struct switchdev_notifier_info { struct net_device *dev; }; -struct netdev_switch_notifier_fdb_info { - struct netdev_switch_notifier_info info; /* must be first */ +struct switchdev_notifier_fdb_info { + struct switchdev_notifier_info info; /* must be first */ const unsigned char *addr; u16 vid; }; static inline struct net_device * -netdev_switch_notifier_info_to_dev(const struct netdev_switch_notifier_info *info) +switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) { return info->dev; } #ifdef CONFIG_NET_SWITCHDEV -int netdev_switch_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid); -int netdev_switch_port_stp_update(struct net_device *dev, u8 state); -int register_netdev_switch_notifier(struct notifier_block *nb); -int unregister_netdev_switch_notifier(struct notifier_block *nb); -int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, - struct netdev_switch_notifier_info *info); -int netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id); -int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); -void netdev_switch_fib_ipv4_abort(struct fib_info *fi); +int switchdev_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr); +int switchdev_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr); +int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj); +int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj); +int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj); +int register_switchdev_notifier(struct notifier_block *nb); +int unregister_switchdev_notifier(struct notifier_block *nb); +int call_switchdev_notifiers(unsigned long val, struct net_device *dev, + struct switchdev_notifier_info *info); +int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags); +int switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, u32 tb_id); +int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id); +void switchdev_fib_ipv4_abort(struct fib_info *fi); +int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid, u16 nlm_flags); +int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid); +int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, int idx); #else -static inline int netdev_switch_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static inline int switchdev_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_obj_add(struct net_device *dev, + struct switchdev_obj *obj) { return -EOPNOTSUPP; } -static inline int netdev_switch_port_stp_update(struct net_device *dev, - u8 state) +static inline int switchdev_port_obj_del(struct net_device *dev, + struct switchdev_obj *obj) { return -EOPNOTSUPP; } -static inline int register_netdev_switch_notifier(struct notifier_block *nb) +static inline int switchdev_port_obj_dump(struct net_device *dev, + struct switchdev_obj *obj) +{ + return -EOPNOTSUPP; +} + +static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; } -static inline int unregister_netdev_switch_notifier(struct notifier_block *nb) +static inline int unregister_switchdev_notifier(struct notifier_block *nb) { return 0; } -static inline int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, - struct netdev_switch_notifier_info *info) +static inline int call_switchdev_notifiers(unsigned long val, + struct net_device *dev, + struct switchdev_notifier_info *info) { return NOTIFY_DONE; } -static inline int netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, + u32 seq, struct net_device *dev, + u32 filter_mask, int nlflags) { return -EOPNOTSUPP; } -static inline int netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { return -EOPNOTSUPP; } -static inline int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { - return 0; + return -EOPNOTSUPP; } -static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 nlflags, u32 tb_id) { return 0; } -static inline int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) +static inline int switchdev_fib_ipv4_del(u32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) { return 0; } -static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) +static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) { - return 0; } -static inline void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 vid, u16 nlm_flags) { + return -EOPNOTSUPP; +} + +static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_fdb_dump(struct sk_buff *skb, + struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, + int idx) +{ + return -EOPNOTSUPP; } #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index 6d204f3f9df8..2bb2bad21d5c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -286,6 +286,14 @@ extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; +/* optimized version of sk_under_memory_pressure() for TCP sockets */ +static inline bool tcp_under_memory_pressure(const struct sock *sk) +{ + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + return !!sk->sk_cgrp->memory_pressure; + + return tcp_memory_pressure; +} /* * The next routines deal with comparing 32 bit unsigned ints * and worry about wraparound (automatic with unsigned arithmetic). @@ -311,6 +319,8 @@ static inline bool tcp_out_of_memory(struct sock *sk) return false; } +void sk_forced_mem_schedule(struct sock *sk, int size); + static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { struct percpu_counter *ocp = sk->sk_prot->orphan_count; @@ -326,18 +336,6 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) bool tcp_check_oom(struct sock *sk, int shift); -/* syncookies: remember time of last synqueue overflow */ -static inline void tcp_synq_overflow(struct sock *sk) -{ - tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies; -} - -/* syncookies: no recent synqueue overflow on this listening socket? */ -static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) -{ - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; - return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK); -} extern struct proto tcp_prot; @@ -483,13 +481,35 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); * i.e. a sent cookie is valid only at most for 2*60 seconds (or less if * the counter advances immediately after a cookie is generated). */ -#define MAX_SYNCOOKIE_AGE 2 +#define MAX_SYNCOOKIE_AGE 2 +#define TCP_SYNCOOKIE_PERIOD (60 * HZ) +#define TCP_SYNCOOKIE_VALID (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD) + +/* syncookies: remember time of last synqueue overflow + * But do not dirty this field too often (once per second is enough) + */ +static inline void tcp_synq_overflow(struct sock *sk) +{ + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long now = jiffies; + + if (time_after(now, last_overflow + HZ)) + tcp_sk(sk)->rx_opt.ts_recent_stamp = now; +} + +/* syncookies: no recent synqueue overflow on this listening socket? */ +static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) +{ + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + + return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); +} static inline u32 tcp_cookie_time(void) { u64 val = get_jiffies_64(); - do_div(val, 60 * HZ); + do_div(val, TCP_SYNCOOKIE_PERIOD); return val; } @@ -527,7 +547,7 @@ int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); void tcp_send_probe0(struct sock *); void tcp_send_partial(struct sock *); -int tcp_write_wakeup(struct sock *); +int tcp_write_wakeup(struct sock *, int mib); void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); @@ -692,6 +712,8 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) #define TCPHDR_ECE 0x40 #define TCPHDR_CWR 0x80 +#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR) + /* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission code. * We also store the host-order sequence numbers in here too. @@ -1043,14 +1065,31 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) return tp->is_cwnd_limited; } -static inline void tcp_check_probe_timer(struct sock *sk) +/* Something is really bad, we could not queue an additional packet, + * because qdisc is full or receiver sent a 0 window. + * We do not want to add fuel to the fire, or abort too early, + * so make sure the timer we arm now is at least 200ms in the future, + * regardless of current icsk_rto value (as it could be ~2ms) + */ +static inline unsigned long tcp_probe0_base(const struct sock *sk) { - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); + return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN); +} + +/* Variant of inet_csk_rto_backoff() used for zero window probes */ +static inline unsigned long tcp_probe0_when(const struct sock *sk, + unsigned long max_when) +{ + u64 when = (u64)tcp_probe0_base(sk) << inet_csk(sk)->icsk_backoff; - if (!tp->packets_out && !icsk->icsk_pending) + return (unsigned long)min_t(u64, when, max_when); +} + +static inline void tcp_check_probe_timer(struct sock *sk) +{ + if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - icsk->icsk_rto, TCP_RTO_MAX); + tcp_probe0_base(sk), TCP_RTO_MAX); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9ebdf5701e8..f0a9af8b4dae 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -113,6 +113,7 @@ enum bpf_map_type { BPF_MAP_TYPE_UNSPEC, BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_ARRAY, + BPF_MAP_TYPE_PROG_ARRAY, }; enum bpf_prog_type { @@ -210,6 +211,15 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_l4_csum_replace, + + /** + * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY + * @index: index inside array that selects specific program to run + * Return: 0 on success + */ + BPF_FUNC_tail_call, __BPF_FUNC_MAX_ID, }; diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 41892f720057..9692cda5f8fc 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -95,11 +95,17 @@ typedef __u32 can_err_mask_t; * @can_dlc: frame payload length in byte (0 .. 8) aka data length code * N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1 * mapping of the 'data length code' to the real payload length + * @__pad: padding + * @__res0: reserved / padding + * @__res1: reserved / padding * @data: CAN frame payload (up to 8 byte) */ struct can_frame { canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ __u8 can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */ + __u8 __pad; /* padding */ + __u8 __res0; /* reserved / padding */ + __u8 __res1; /* reserved / padding */ __u8 data[CAN_MAX_DLEN] __attribute__((aligned(8))); }; diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 2e49fc880d29..ae832b45b44c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1264,15 +1264,19 @@ enum ethtool_sfeatures_retval_bits { * it was forced up into this mode or autonegotiated. */ -/* The forced speed, 10Mb, 100Mb, gigabit, [2.5|10|20|40|56]GbE. */ +/* The forced speed, 10Mb, 100Mb, gigabit, [2.5|5|10|20|25|40|50|56|100]GbE. */ #define SPEED_10 10 #define SPEED_100 100 #define SPEED_1000 1000 #define SPEED_2500 2500 +#define SPEED_5000 5000 #define SPEED_10000 10000 #define SPEED_20000 20000 +#define SPEED_25000 25000 #define SPEED_40000 40000 +#define SPEED_50000 50000 #define SPEED_56000 56000 +#define SPEED_100000 100000 #define SPEED_UNKNOWN -1 diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d9cd19214b98..afccc9393fef 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -390,6 +390,15 @@ struct ifla_vxlan_port_range { __be16 high; }; +/* GENEVE section */ +enum { + IFLA_GENEVE_UNSPEC, + IFLA_GENEVE_ID, + IFLA_GENEVE_REMOTE, + __IFLA_GENEVE_MAX +}; +#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) + /* Bonding section */ enum { @@ -417,6 +426,9 @@ enum { IFLA_BOND_AD_LACP_RATE, IFLA_BOND_AD_SELECT, IFLA_BOND_AD_INFO, + IFLA_BOND_AD_ACTOR_SYS_PRIO, + IFLA_BOND_AD_USER_PORT_KEY, + IFLA_BOND_AD_ACTOR_SYSTEM, __IFLA_BOND_MAX, }; diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 053bd102fbe0..d3d715f8c88f 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -54,6 +54,7 @@ struct sockaddr_ll { #define PACKET_FANOUT 18 #define PACKET_TX_HAS_OFF 19 #define PACKET_QDISC_BYPASS 20 +#define PACKET_ROLLOVER_STATS 21 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 @@ -75,6 +76,12 @@ struct tpacket_stats_v3 { unsigned int tp_freeze_q_cnt; }; +struct tpacket_rollover_stats { + __aligned_u64 tp_all; + __aligned_u64 tp_huge; + __aligned_u64 tp_failed; +}; + union tpacket_stats_u { struct tpacket_stats stats1; struct tpacket_stats_v3 stats3; diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index ef1b1f88ca18..177027cce6b3 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -51,11 +51,17 @@ enum nf_inet_hooks { NF_INET_NUMHOOKS }; +enum nf_dev_hooks { + NF_NETDEV_INGRESS, + NF_NETDEV_NUMHOOKS +}; + enum { NFPROTO_UNSPEC = 0, NFPROTO_INET = 1, NFPROTO_IPV4 = 2, NFPROTO_ARP = 3, + NFPROTO_NETDEV = 5, NFPROTO_BRIDGE = 7, NFPROTO_IPV6 = 10, NFPROTO_DECNET = 12, diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 1a85940f8ab7..3e34b7d702f8 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -108,6 +108,7 @@ struct nlmsgerr { #define NETLINK_NO_ENOBUFS 5 #define NETLINK_RX_RING 6 #define NETLINK_TX_RING 7 +#define NETLINK_LISTEN_ALL_NSID 8 struct nl_pktinfo { __u32 group; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 241220c43e86..c0ab6b0a3919 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2620,16 +2620,17 @@ enum nl80211_band_attr { * an indoor surroundings, i.e., it is connected to AC power (and not * through portable DC inverters) or is under the control of a master * that is acting as an AP and is connected to AC power. - * @NL80211_FREQUENCY_ATTR_GO_CONCURRENT: GO operation is allowed on this + * @NL80211_FREQUENCY_ATTR_IR_CONCURRENT: IR operation is allowed on this * channel if it's connected concurrently to a BSS on the same channel on * the 2 GHz band or to a channel in the same UNII band (on the 5 GHz - * band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO on a - * channel that has the GO_CONCURRENT attribute set can be done when there - * is a clear assessment that the device is operating under the guidance of - * an authorized master, i.e., setting up a GO while the device is also - * connected to an AP with DFS and radar detection on the UNII band (it is - * up to user-space, i.e., wpa_supplicant to perform the required - * verifications) + * band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO or TDLS + * off-channel on a channel that has the IR_CONCURRENT attribute set can be + * done when there is a clear assessment that the device is operating under + * the guidance of an authorized master, i.e., setting up a GO or TDLS + * off-channel while the device is also connected to an AP with DFS and + * radar detection on the UNII band (it is up to user-space, i.e., + * wpa_supplicant to perform the required verifications). Using this + * attribute for IR is disallowed for master interfaces (IBSS, AP). * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed @@ -2641,7 +2642,7 @@ enum nl80211_band_attr { * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122 * for more information on the FCC description of the relaxations allowed * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and - * NL80211_FREQUENCY_ATTR_GO_CONCURRENT. + * NL80211_FREQUENCY_ATTR_IR_CONCURRENT. */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, @@ -2659,7 +2660,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_160MHZ, NL80211_FREQUENCY_ATTR_DFS_CAC_TIME, NL80211_FREQUENCY_ATTR_INDOOR_ONLY, - NL80211_FREQUENCY_ATTR_GO_CONCURRENT, + NL80211_FREQUENCY_ATTR_IR_CONCURRENT, NL80211_FREQUENCY_ATTR_NO_20MHZ, NL80211_FREQUENCY_ATTR_NO_10MHZ, @@ -2672,6 +2673,8 @@ enum nl80211_frequency_attr { #define NL80211_FREQUENCY_ATTR_PASSIVE_SCAN NL80211_FREQUENCY_ATTR_NO_IR #define NL80211_FREQUENCY_ATTR_NO_IBSS NL80211_FREQUENCY_ATTR_NO_IR #define NL80211_FREQUENCY_ATTR_NO_IR NL80211_FREQUENCY_ATTR_NO_IR +#define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \ + NL80211_FREQUENCY_ATTR_IR_CONCURRENT /** * enum nl80211_bitrate_attr - bitrate attributes @@ -2830,7 +2833,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated * base on contiguous rules and wider channels will be allowed to cross * multiple contiguous/overlapping frequency ranges. - * @NL80211_RRF_GO_CONCURRENT: See &NL80211_FREQUENCY_ATTR_GO_CONCURRENT + * @NL80211_RRF_IR_CONCURRENT: See &NL80211_FREQUENCY_ATTR_IR_CONCURRENT * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed @@ -2847,7 +2850,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_IR = 1<<7, __NL80211_RRF_NO_IBSS = 1<<8, NL80211_RRF_AUTO_BW = 1<<11, - NL80211_RRF_GO_CONCURRENT = 1<<12, + NL80211_RRF_IR_CONCURRENT = 1<<12, NL80211_RRF_NO_HT40MINUS = 1<<13, NL80211_RRF_NO_HT40PLUS = 1<<14, NL80211_RRF_NO_80MHZ = 1<<15, @@ -2859,6 +2862,7 @@ enum nl80211_reg_rule_flags { #define NL80211_RRF_NO_IR NL80211_RRF_NO_IR #define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS |\ NL80211_RRF_NO_HT40PLUS) +#define NL80211_RRF_GO_CONCURRENT NL80211_RRF_IR_CONCURRENT /* For backport compatibility with older userspace */ #define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index bf08e76bf505..4f0d1bc3647d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/pkt_sched.h> +#ifdef __KERNEL__ /* I think i could have done better macros ; for now this is stolen from * some arch/mips code - jhs */ @@ -35,20 +36,6 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance * * */ -#define TC_MUNGED _TC_MAKEMASK1(0) -#define SET_TC_MUNGED(v) ( TC_MUNGED | (v & ~TC_MUNGED)) -#define CLR_TC_MUNGED(v) ( v & ~TC_MUNGED) - -#define TC_OK2MUNGE _TC_MAKEMASK1(1) -#define SET_TC_OK2MUNGE(v) ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE)) -#define CLR_TC_OK2MUNGE(v) ( v & ~TC_OK2MUNGE) - -#define S_TC_VERD _TC_MAKE32(2) -#define M_TC_VERD _TC_MAKEMASK(4,S_TC_VERD) -#define G_TC_VERD(x) _TC_GETVALUE(x,S_TC_VERD,M_TC_VERD) -#define V_TC_VERD(x) _TC_MAKEVALUE(x,S_TC_VERD) -#define SET_TC_VERD(v,n) ((V_TC_VERD(n)) | (v & ~M_TC_VERD)) - #define S_TC_FROM _TC_MAKE32(6) #define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM) #define G_TC_FROM(x) _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM) @@ -62,18 +49,16 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance #define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS)) #define CLR_TC_NCLS(v) ( v & ~TC_NCLS) -#define S_TC_RTTL _TC_MAKE32(9) -#define M_TC_RTTL _TC_MAKEMASK(3,S_TC_RTTL) -#define G_TC_RTTL(x) _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL) -#define V_TC_RTTL(x) _TC_MAKEVALUE(x,S_TC_RTTL) -#define SET_TC_RTTL(v,n) ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL)) - #define S_TC_AT _TC_MAKE32(12) #define M_TC_AT _TC_MAKEMASK(2,S_TC_AT) #define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT) #define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT) #define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT)) +#define MAX_REC_LOOP 4 +#define MAX_RED_LOOP 4 +#endif + /* Action attributes */ enum { TCA_ACT_UNSPEC, @@ -93,8 +78,6 @@ enum { #define TCA_ACT_NOUNBIND 0 #define TCA_ACT_REPLACE 1 #define TCA_ACT_NOREPLACE 0 -#define MAX_REC_LOOP 4 -#define MAX_RED_LOOP 4 #define TC_ACT_UNSPEC (-1) #define TC_ACT_OK 0 @@ -404,6 +387,36 @@ enum { #define TCA_BPF_MAX (__TCA_BPF_MAX - 1) +/* Flower classifier */ + +enum { + TCA_FLOWER_UNSPEC, + TCA_FLOWER_CLASSID, + TCA_FLOWER_INDEV, + TCA_FLOWER_ACT, + TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE, /* be16 */ + TCA_FLOWER_KEY_IP_PROTO, /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC, /* be16 */ + TCA_FLOWER_KEY_TCP_DST, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC, /* be16 */ + TCA_FLOWER_KEY_UDP_DST, /* be16 */ + __TCA_FLOWER_MAX, +}; + +#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) + /* Extended Matches */ struct tcf_ematch_tree_hdr { diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 534b84710745..8d2530daca9f 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -268,7 +268,8 @@ enum { TCA_GRED_STAB, TCA_GRED_DPS, TCA_GRED_MAX_P, - __TCA_GRED_MAX, + TCA_GRED_LIMIT, + __TCA_GRED_MAX, }; #define TCA_GRED_MAX (__TCA_GRED_MAX - 1) @@ -679,6 +680,7 @@ enum { TCA_CODEL_LIMIT, TCA_CODEL_INTERVAL, TCA_CODEL_ECN, + TCA_CODEL_CE_THRESHOLD, __TCA_CODEL_MAX }; @@ -695,6 +697,7 @@ struct tc_codel_xstats { __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */ __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */ __u32 dropping; /* are we in dropping state ? */ + __u32 ce_mark; /* number of CE marked packets because of ce_threshold */ }; /* FQ_CODEL */ @@ -707,6 +710,7 @@ enum { TCA_FQ_CODEL_ECN, TCA_FQ_CODEL_FLOWS, TCA_FQ_CODEL_QUANTUM, + TCA_FQ_CODEL_CE_THRESHOLD, __TCA_FQ_CODEL_MAX }; @@ -730,6 +734,7 @@ struct tc_fq_codel_qd_stats { */ __u32 new_flows_len; /* count of flows in new list */ __u32 old_flows_len; /* count of flows in old list */ + __u32 ce_mark; /* packets above ce_threshold */ }; struct tc_fq_codel_cl_stats { diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 6a6fb747c78d..eee8968407f0 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -276,6 +276,8 @@ enum LINUX_MIB_TCPACKSKIPPEDFINWAIT2, /* TCPACKSkippedFinWait2 */ LINUX_MIB_TCPACKSKIPPEDTIMEWAIT, /* TCPACKSkippedTimeWait */ LINUX_MIB_TCPACKSKIPPEDCHALLENGE, /* TCPACKSkippedChallenge */ + LINUX_MIB_TCPWINPROBE, /* TCPWinProbe */ + LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index faa72f4fa547..65a77b071e22 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -113,6 +113,8 @@ enum { #define TCP_TIMESTAMP 24 #define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ #define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ +#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ +#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ struct tcp_repair_opt { __u32 opt_code; @@ -190,8 +192,10 @@ struct tcp_info { __u64 tcpi_pacing_rate; __u64 tcpi_max_pacing_rate; - __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ + __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ + __u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */ + __u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */ }; /* for TCP_MD5SIG socket option */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 8a6616583f38..614bcd4c1d74 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -14,12 +14,7 @@ #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/mm.h> - -struct bpf_array { - struct bpf_map map; - u32 elem_size; - char value[0] __aligned(8); -}; +#include <linux/filter.h> /* Called from syscall */ static struct bpf_map *array_map_alloc(union bpf_attr *attr) @@ -154,3 +149,109 @@ static int __init register_array_map(void) return 0; } late_initcall(register_array_map); + +static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) +{ + /* only bpf_prog file descriptors can be stored in prog_array map */ + if (attr->value_size != sizeof(u32)) + return ERR_PTR(-EINVAL); + return array_map_alloc(attr); +} + +static void prog_array_map_free(struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + int i; + + synchronize_rcu(); + + /* make sure it's empty */ + for (i = 0; i < array->map.max_entries; i++) + BUG_ON(array->prog[i] != NULL); + kvfree(array); +} + +static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key) +{ + return NULL; +} + +/* only called from syscall */ +static int prog_array_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog, *old_prog; + u32 index = *(u32 *)key, ufd; + + if (map_flags != BPF_ANY) + return -EINVAL; + + if (index >= array->map.max_entries) + return -E2BIG; + + ufd = *(u32 *)value; + prog = bpf_prog_get(ufd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + if (!bpf_prog_array_compatible(array, prog)) { + bpf_prog_put(prog); + return -EINVAL; + } + + old_prog = xchg(array->prog + index, prog); + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static int prog_array_map_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *old_prog; + u32 index = *(u32 *)key; + + if (index >= array->map.max_entries) + return -E2BIG; + + old_prog = xchg(array->prog + index, NULL); + if (old_prog) { + bpf_prog_put(old_prog); + return 0; + } else { + return -ENOENT; + } +} + +/* decrement refcnt of all bpf_progs that are stored in this map */ +void bpf_prog_array_map_clear(struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + int i; + + for (i = 0; i < array->map.max_entries; i++) + prog_array_map_delete_elem(map, &i); +} + +static const struct bpf_map_ops prog_array_ops = { + .map_alloc = prog_array_map_alloc, + .map_free = prog_array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = prog_array_map_lookup_elem, + .map_update_elem = prog_array_map_update_elem, + .map_delete_elem = prog_array_map_delete_elem, +}; + +static struct bpf_map_type_list prog_array_type __read_mostly = { + .ops = &prog_array_ops, + .type = BPF_MAP_TYPE_PROG_ARRAY, +}; + +static int __init register_prog_array_map(void) +{ + bpf_register_map_type(&prog_array_type); + return 0; +} +late_initcall(register_prog_array_map); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 54f0e7fcd0e2..d44b25cbe460 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -176,6 +176,15 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return 0; } +const struct bpf_func_proto bpf_tail_call_proto = { + .func = NULL, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + /** * __bpf_prog_run - run eBPF program on a given context * @ctx: is the data we are operating on @@ -244,6 +253,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, /* Call instruction */ [BPF_JMP | BPF_CALL] = &&JMP_CALL, + [BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL, /* Jumps */ [BPF_JMP | BPF_JA] = &&JMP_JA, [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, @@ -286,6 +296,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW, }; + u32 tail_call_cnt = 0; void *ptr; int off; @@ -431,6 +442,30 @@ select_insn: BPF_R4, BPF_R5); CONT; + JMP_TAIL_CALL: { + struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog; + u64 index = BPF_R3; + + if (unlikely(index >= array->map.max_entries)) + goto out; + + if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) + goto out; + + tail_call_cnt++; + + prog = READ_ONCE(array->prog[index]); + if (unlikely(!prog)) + goto out; + + ARG1 = BPF_R1; + insn = prog->insnsi; + goto select_insn; +out: + CONT; + } /* JMP */ JMP_JA: insn += insn->off; @@ -619,6 +654,40 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog) { } +bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) +{ + if (array->owner_prog_type) { + if (array->owner_prog_type != fp->type) + return false; + if (array->owner_jited != fp->jited) + return false; + } else { + array->owner_prog_type = fp->type; + array->owner_jited = fp->jited; + } + return true; +} + +static int check_tail_call(const struct bpf_prog *fp) +{ + struct bpf_prog_aux *aux = fp->aux; + int i; + + for (i = 0; i < aux->used_map_cnt; i++) { + struct bpf_array *array; + struct bpf_map *map; + + map = aux->used_maps[i]; + if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + continue; + array = container_of(map, struct bpf_array, map); + if (!bpf_prog_array_compatible(array, fp)) + return -EINVAL; + } + + return 0; +} + /** * bpf_prog_select_runtime - select execution runtime for BPF program * @fp: bpf_prog populated with internal BPF program @@ -626,7 +695,7 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog) * try to JIT internal BPF program, if JIT is not available select interpreter * BPF program will be executed via BPF_PROG_RUN() macro */ -void bpf_prog_select_runtime(struct bpf_prog *fp) +int bpf_prog_select_runtime(struct bpf_prog *fp) { fp->bpf_func = (void *) __bpf_prog_run; @@ -634,6 +703,8 @@ void bpf_prog_select_runtime(struct bpf_prog *fp) bpf_int_jit_compile(fp); /* Lock whole bpf_prog as read-only */ bpf_prog_lock_ro(fp); + + return check_tail_call(fp); } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3bae6c591914..98a69bd83069 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -68,6 +68,12 @@ static int bpf_map_release(struct inode *inode, struct file *filp) { struct bpf_map *map = filp->private_data; + if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) + /* prog_array stores refcnt-ed bpf_prog pointers + * release them all when user space closes prog_array_fd + */ + bpf_prog_array_map_clear(map); + bpf_map_put(map); return 0; } @@ -392,6 +398,19 @@ static void fixup_bpf_calls(struct bpf_prog *prog) */ BUG_ON(!prog->aux->ops->get_func_proto); + if (insn->imm == BPF_FUNC_tail_call) { + /* mark bpf_tail_call as different opcode + * to avoid conditional branch in + * interpeter for every normal call + * and to prevent accidental JITing by + * JIT compiler that doesn't support + * bpf_tail_call yet + */ + insn->imm = 0; + insn->code |= BPF_X; + continue; + } + fn = prog->aux->ops->get_func_proto(insn->imm); /* all functions that have prototype and verifier allowed * programs to call them, must be real in-kernel functions @@ -532,7 +551,9 @@ static int bpf_prog_load(union bpf_attr *attr) fixup_bpf_calls(prog); /* eBPF program is ready to be JITed */ - bpf_prog_select_runtime(prog); + err = bpf_prog_select_runtime(prog); + if (err < 0) + goto free_used_maps; err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); if (err < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 47dcd3aa6e23..cfd9a40b9a5a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -907,6 +907,23 @@ static int check_call(struct verifier_env *env, int func_id) fn->ret_type, func_id); return -EINVAL; } + + if (map && map->map_type == BPF_MAP_TYPE_PROG_ARRAY && + func_id != BPF_FUNC_tail_call) + /* prog_array map type needs extra care: + * only allow to pass it into bpf_tail_call() for now. + * bpf_map_delete_elem() can be allowed in the future, + * while bpf_map_update_elem() must only be done via syscall + */ + return -EINVAL; + + if (func_id == BPF_FUNC_tail_call && + map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + /* don't allow any other map type to be passed into + * bpf_tail_call() + */ + return -EINVAL; + return 0; } diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 4f44028943e6..245df6b32b81 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -346,16 +346,13 @@ static inline void seccomp_sync_threads(void) */ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { - struct seccomp_filter *filter; - unsigned long fp_size; - struct sock_filter *fp; - int new_len; - long ret; + struct seccomp_filter *sfilter; + int ret; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) return ERR_PTR(-EINVAL); + BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); - fp_size = fprog->len * sizeof(struct sock_filter); /* * Installing a seccomp filter requires that the task has @@ -368,60 +365,21 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) CAP_SYS_ADMIN) != 0) return ERR_PTR(-EACCES); - fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN); - if (!fp) - return ERR_PTR(-ENOMEM); - - /* Copy the instructions from fprog. */ - ret = -EFAULT; - if (copy_from_user(fp, fprog->filter, fp_size)) - goto free_prog; - - /* Check and rewrite the fprog via the skb checker */ - ret = bpf_check_classic(fp, fprog->len); - if (ret) - goto free_prog; - - /* Check and rewrite the fprog for seccomp use */ - ret = seccomp_check_filter(fp, fprog->len); - if (ret) - goto free_prog; - - /* Convert 'sock_filter' insns to 'bpf_insn' insns */ - ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len); - if (ret) - goto free_prog; - /* Allocate a new seccomp_filter */ - ret = -ENOMEM; - filter = kzalloc(sizeof(struct seccomp_filter), - GFP_KERNEL|__GFP_NOWARN); - if (!filter) - goto free_prog; - - filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN); - if (!filter->prog) - goto free_filter; - - ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len); - if (ret) - goto free_filter_prog; - - kfree(fp); - atomic_set(&filter->usage, 1); - filter->prog->len = new_len; + sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN); + if (!sfilter) + return ERR_PTR(-ENOMEM); - bpf_prog_select_runtime(filter->prog); + ret = bpf_prog_create_from_user(&sfilter->prog, fprog, + seccomp_check_filter); + if (ret < 0) { + kfree(sfilter); + return ERR_PTR(ret); + } - return filter; + atomic_set(&sfilter->usage, 1); -free_filter_prog: - __bpf_prog_free(filter->prog); -free_filter: - kfree(filter); -free_prog: - kfree(fp); - return ERR_PTR(ret); + return sfilter; } /** diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 2d56ce501632..646445e41bd4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -172,6 +172,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_probe_read_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; case BPF_FUNC_trace_printk: /* diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4396434e4715..ca66a0e32c8e 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -584,7 +584,6 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) struct bucket_table *tbl = iter->walker->tbl; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; - void *obj = NULL; if (p) { p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); @@ -604,8 +603,7 @@ next: if (!rht_is_a_nulls(p)) { iter->skip++; iter->p = p; - obj = rht_obj(ht, p); - goto out; + return rht_obj(ht, p); } iter->skip = 0; @@ -623,9 +621,7 @@ next: iter->p = NULL; -out: - - return obj; + return NULL; } EXPORT_SYMBOL_GPL(rhashtable_walk_next); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 80d78c51f65f..9b012a811ee1 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -21,6 +21,7 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> +#include <linux/random.h> /* General test specific settings */ #define MAX_SUBTESTS 3 @@ -67,6 +68,10 @@ struct bpf_test { union { struct sock_filter insns[MAX_INSNS]; struct bpf_insn insns_int[MAX_INSNS]; + struct { + void *insns; + unsigned int len; + } ptr; } u; __u8 aux; __u8 data[MAX_DATA]; @@ -74,8 +79,190 @@ struct bpf_test { int data_size; __u32 result; } test[MAX_SUBTESTS]; + int (*fill_helper)(struct bpf_test *self); }; +/* Large test cases need separate allocation and fill handler. */ + +static int bpf_fill_maxinsns1(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + __u32 k = ~0; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len; i++, k--) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, k); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns2(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len; i++) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, 0xfefefefe); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns3(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + struct rnd_state rnd; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + prandom_seed_state(&rnd, 3141592653589793238ULL); + + for (i = 0; i < len - 1; i++) { + __u32 k = prandom_u32_state(&rnd); + + insn[i] = __BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, k); + } + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns4(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS + 1; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len; i++) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, 0xfefefefe); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns5(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[0] = __BPF_JUMP(BPF_JMP | BPF_JA, len - 2, 0, 0); + + for (i = 1; i < len - 1; i++) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, 0xfefefefe); + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xabababab); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns6(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len - 1; i++) + insn[i] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SKF_AD_OFF + + SKF_AD_VLAN_TAG_PRESENT); + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns7(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len - 4; i++) + insn[i] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SKF_AD_OFF + + SKF_AD_CPU); + + insn[len - 4] = __BPF_STMT(BPF_MISC | BPF_TAX, 0); + insn[len - 3] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SKF_AD_OFF + + SKF_AD_CPU); + insn[len - 2] = __BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0); + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns8(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i, jmp_off = len - 3; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[0] = __BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff); + + for (i = 1; i < len - 1; i++) + insn[i] = __BPF_JUMP(BPF_JMP | BPF_JGT, 0xffffffff, jmp_off--, 0); + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + static struct bpf_test tests[] = { { "TAX", @@ -1755,7 +1942,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JEQ, R3, 0x1234, 1), BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R0, 0x1ffffffffLL), + BPF_ALU64_IMM(BPF_RSH, R0, 32), /* R0 = 1 */ BPF_EXIT_INSN(), }, INTERNAL, @@ -1805,6 +1993,2281 @@ static struct bpf_test tests[] = { 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6}, { { 38, 256 } } }, + /* BPF_ALU | BPF_MOV | BPF_X */ + { + "ALU_MOV_X: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_MOV_X: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295U), + BPF_ALU32_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295U } }, + }, + { + "ALU64_MOV_X: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOV_X: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295U), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295U } }, + }, + /* BPF_ALU | BPF_MOV | BPF_K */ + { + "ALU_MOV_K: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_MOV_K: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 4294967295U), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295U } }, + }, + { + "ALU_MOV_K: 0x0000ffffffff0000 = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), + BPF_LD_IMM64(R3, 0x00000000ffffffffLL), + BPF_ALU32_IMM(BPF_MOV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOV_K: dst = 2", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOV_K: dst = 2147483647", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_OR_K: dst = 0x0", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), + BPF_LD_IMM64(R3, 0x0), + BPF_ALU64_IMM(BPF_MOV, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOV_K: dst = -1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), + BPF_LD_IMM64(R3, 0xffffffffffffffffLL), + BPF_ALU64_IMM(BPF_MOV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_ADD | BPF_X */ + { + "ALU_ADD_X: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_X: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294U), + BPF_ALU32_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295U } }, + }, + { + "ALU64_ADD_X: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_X: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294U), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295U } }, + }, + /* BPF_ALU | BPF_ADD | BPF_K */ + { + "ALU_ADD_K: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_ADD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_K: 3 + 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_ADD, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_K: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_ADD, R0, 4294967294U), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295U } }, + }, + { + "ALU_ADD_K: 0 + (-1) = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0x00000000ffffffff), + BPF_ALU32_IMM(BPF_ADD, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_ADD_K: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_ADD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_K: 3 + 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_ADD, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_K: 1 + 2147483646 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_ADD, R0, 2147483646), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_ADD_K: 2147483646 + -2147483647 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483646), + BPF_ALU64_IMM(BPF_ADD, R0, -2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + { + "ALU64_ADD_K: 1 + 0 = 1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0x1), + BPF_ALU64_IMM(BPF_ADD, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_ADD_K: 0 + (-1) = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0xffffffffffffffffLL), + BPF_ALU64_IMM(BPF_ADD, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_SUB | BPF_X */ + { + "ALU_SUB_X: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_SUB_X: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295U), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294U), + BPF_ALU32_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_SUB_X: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_SUB_X: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295U), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294U), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_SUB | BPF_K */ + { + "ALU_SUB_K: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_SUB, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_SUB_K: 3 - 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_SUB, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_SUB_K: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295U), + BPF_ALU32_IMM(BPF_SUB, R0, 4294967294U), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_SUB_K: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_SUB, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_SUB_K: 3 - 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_SUB, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_SUB_K: 4294967294 - 4294967295 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967294U), + BPF_ALU64_IMM(BPF_SUB, R0, 4294967295U), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + { + "ALU64_ADD_K: 2147483646 - 2147483647 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483646), + BPF_ALU64_IMM(BPF_SUB, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + /* BPF_ALU | BPF_MUL | BPF_X */ + { + "ALU_MUL_X: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 3), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU_MUL_X: 2 * 0x7FFFFFF8 = 0xFFFFFFF0", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0x7FFFFFF8), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xFFFFFFF0 } }, + }, + { + "ALU_MUL_X: -1 * -1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, -1), + BPF_ALU32_IMM(BPF_MOV, R1, -1), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MUL_X: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 3), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU64_MUL_X: 1 * 2147483647 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483647), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + /* BPF_ALU | BPF_MUL | BPF_K */ + { + "ALU_MUL_K: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MUL, R0, 3), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU_MUL_K: 3 * 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MUL, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_MUL_K: 2 * 0x7FFFFFF8 = 0xFFFFFFF0", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MUL, R0, 0x7FFFFFF8), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xFFFFFFF0 } }, + }, + { + "ALU_MUL_K: 1 * (-1) = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0x00000000ffffffff), + BPF_ALU32_IMM(BPF_MUL, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MUL_K: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU64_IMM(BPF_MUL, R0, 3), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU64_MUL_K: 3 * 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MUL, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_MUL_K: 1 * 2147483647 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_MUL, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_MUL_K: 1 * -2147483647 = -2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_MUL, R0, -2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -2147483647 } }, + }, + { + "ALU64_MUL_K: 1 * (-1) = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0xffffffffffffffffLL), + BPF_ALU64_IMM(BPF_MUL, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_DIV | BPF_X */ + { + "ALU_DIV_X: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_X: 4294967295 / 4294967295 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295U), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295U), + BPF_ALU32_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_X: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_X: 2147483647 / 2147483647 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483647), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_X: 0xffffffffffffffff / (-1) = 0x0000000000000001", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffLL), + BPF_LD_IMM64(R4, 0xffffffffffffffffLL), + BPF_LD_IMM64(R3, 0x0000000000000001LL), + BPF_ALU64_REG(BPF_DIV, R2, R4), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_DIV | BPF_K */ + { + "ALU_DIV_K: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_DIV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_K: 3 / 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_DIV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_K: 4294967295 / 4294967295 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295U), + BPF_ALU32_IMM(BPF_DIV, R0, 4294967295U), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_DIV_K: 0xffffffffffffffff / (-1) = 0x1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffLL), + BPF_LD_IMM64(R3, 0x1UL), + BPF_ALU32_IMM(BPF_DIV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_DIV_K: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU64_IMM(BPF_DIV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_K: 3 / 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_DIV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_K: 2147483647 / 2147483647 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU64_IMM(BPF_DIV, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_K: 0xffffffffffffffff / (-1) = 0x0000000000000001", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffLL), + BPF_LD_IMM64(R3, 0x0000000000000001LL), + BPF_ALU64_IMM(BPF_DIV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_X */ + { + "ALU_MOD_X: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_MOD_X: 4294967295 % 4294967293 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295U), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967293U), + BPF_ALU32_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOD_X: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MOD_X: 2147483647 % 2147483645 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483645), + BPF_ALU64_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_K */ + { + "ALU_MOD_K: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_MOD_K: 3 % 1 = 0", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOD, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + }, + { + "ALU_MOD_K: 4294967295 % 4294967293 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295U), + BPF_ALU32_IMM(BPF_MOD, R0, 4294967293U), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOD_K: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MOD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MOD_K: 3 % 1 = 0", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MOD, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + }, + { + "ALU64_MOD_K: 2147483647 % 2147483645 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU64_IMM(BPF_MOD, R0, 2147483645), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + /* BPF_ALU | BPF_AND | BPF_X */ + { + "ALU_AND_X: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_AND_X: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_X: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_AND_X: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_ALU | BPF_AND | BPF_K */ + { + "ALU_AND_K: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_AND, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_AND_K: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_AND, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_K: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_AND, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_AND_K: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU64_IMM(BPF_AND, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), + BPF_LD_IMM64(R3, 0x0000000000000000LL), + BPF_ALU64_IMM(BPF_AND, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), + BPF_LD_IMM64(R3, 0x0000ffffffff0000LL), + BPF_ALU64_IMM(BPF_AND, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_AND_K: 0xffffffffffffffff & -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffLL), + BPF_LD_IMM64(R3, 0xffffffffffffffffLL), + BPF_ALU64_IMM(BPF_AND, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_OR | BPF_X */ + { + "ALU_OR_X: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_OR_X: 0x0 | 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_X: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_OR_X: 0 | 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_ALU | BPF_OR | BPF_K */ + { + "ALU_OR_K: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_OR, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_OR_K: 0 & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_OR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_K: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_OR, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_OR_K: 0 & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU64_IMM(BPF_OR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), + BPF_LD_IMM64(R3, 0x0000ffffffff0000LL), + BPF_ALU64_IMM(BPF_OR, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_OR_K: 0x0000ffffffff0000 | -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), + BPF_LD_IMM64(R3, 0xffffffffffffffffLL), + BPF_ALU64_IMM(BPF_OR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_OR_K: 0x000000000000000 | -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000000000000000LL), + BPF_LD_IMM64(R3, 0xffffffffffffffffLL), + BPF_ALU64_IMM(BPF_OR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_XOR | BPF_X */ + { + "ALU_XOR_X: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_MOV, R1, 6), + BPF_ALU32_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_XOR_X: 0x1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_X: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_MOV, R1, 6), + BPF_ALU64_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_XOR_X: 1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + /* BPF_ALU | BPF_XOR | BPF_K */ + { + "ALU_XOR_K: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_XOR, R0, 6), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_XOR_K: 1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_XOR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_K: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU64_IMM(BPF_XOR, R0, 6), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_K: 0x0000ffffffff0000 ^ 0x0 = 0x0000ffffffff0000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), + BPF_LD_IMM64(R3, 0x0000ffffffff0000LL), + BPF_ALU64_IMM(BPF_XOR, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_XOR_K: 0x0000ffffffff0000 ^ -1 = 0xffff00000000ffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), + BPF_LD_IMM64(R3, 0xffff00000000ffffLL), + BPF_ALU64_IMM(BPF_XOR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_XOR_K: 0x000000000000000 ^ -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000000000000000LL), + BPF_LD_IMM64(R3, 0xffffffffffffffffLL), + BPF_ALU64_IMM(BPF_XOR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_LSH | BPF_X */ + { + "ALU_LSH_X: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_LSH_X: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU32_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + { + "ALU64_LSH_X: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_LSH_X: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + /* BPF_ALU | BPF_LSH | BPF_K */ + { + "ALU_LSH_K: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_LSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_LSH_K: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_LSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + { + "ALU64_LSH_K: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_LSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_LSH_K: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_LSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + /* BPF_ALU | BPF_RSH | BPF_X */ + { + "ALU_RSH_X: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_RSH_X: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_X: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_X: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_RSH | BPF_K */ + { + "ALU_RSH_K: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_RSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_RSH_K: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_RSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_K: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU64_IMM(BPF_RSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_K: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU64_IMM(BPF_RSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_ARSH | BPF_X */ + { + "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xff00ff0000000000LL), + BPF_ALU32_IMM(BPF_MOV, R1, 40), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff00ff } }, + }, + /* BPF_ALU | BPF_ARSH | BPF_K */ + { + "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xff00ff0000000000LL), + BPF_ALU64_IMM(BPF_ARSH, R0, 40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff00ff } }, + }, + /* BPF_ALU | BPF_NEG */ + { + "ALU_NEG: -(3) = -3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 3), + BPF_ALU32_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + { + "ALU_NEG: -(-3) = 3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -3), + BPF_ALU32_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_NEG: -(3) = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + { + "ALU64_NEG: -(-3) = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, -3), + BPF_ALU64_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + /* BPF_ALU | BPF_END | BPF_FROM_BE */ + { + "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_be16(0xcdef) } }, + }, + { + "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_be32(0x89abcdef) } }, + }, + { + "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } }, + }, + /* BPF_ALU | BPF_END | BPF_FROM_LE */ + { + "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_le16(0xcdef) } }, + }, + { + "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_le32(0x89abcdef) } }, + }, + { + "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } }, + }, + /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */ + { + "ST_MEM_B: Store/Load byte: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_B, R10, -40, 0xff), + BPF_LDX_MEM(BPF_B, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xff } }, + }, + { + "ST_MEM_B: Store/Load byte: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0x7f), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7f } }, + }, + { + "STX_MEM_B: Store/Load byte: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffLL), + BPF_STX_MEM(BPF_B, R10, R1, -40), + BPF_LDX_MEM(BPF_B, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xff } }, + }, + { + "ST_MEM_H: Store/Load half word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0xffff), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff } }, + }, + { + "ST_MEM_H: Store/Load half word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0x7fff), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fff } }, + }, + { + "STX_MEM_H: Store/Load half word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffLL), + BPF_STX_MEM(BPF_H, R10, R1, -40), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff } }, + }, + { + "ST_MEM_W: Store/Load word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_W: Store/Load word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fffffff } }, + }, + { + "STX_MEM_W: Store/Load word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffffffLL), + BPF_STX_MEM(BPF_W, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_DW: Store/Load double word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_DW: Store/Load double word: max negative 2", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffff00000000ffffLL), + BPF_LD_IMM64(R3, 0xffffffffffffffffLL), + BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_DW, R2, R10, -40), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ST_MEM_DW: Store/Load double word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fffffff } }, + }, + { + "STX_MEM_DW: Store/Load double word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_STX_MEM(BPF_W, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_STX | BPF_XADD | BPF_W/DW */ + { + "STX_XADD_W: Test: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_W, R10, -40, 0x10), + BPF_STX_XADD(BPF_W, R10, R0, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x22 } }, + }, + { + "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_DW, R10, -40, 0x10), + BPF_STX_XADD(BPF_DW, R10, R0, -40), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x22 } }, + }, + /* BPF_JMP | BPF_EXIT */ + { + "JMP_EXIT", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x4711), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0x4712), + }, + INTERNAL, + { }, + { { 0, 0x4711 } }, + }, + /* BPF_JMP | BPF_JA */ + { + "JMP_JA: Unconditional jump: if (true) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGT | BPF_K */ + { + "JMP_JSGT_K: Signed jump: if (-1 > -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGT, R1, -2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGT_K: Signed jump: if (-1 > -1) return 0", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGT, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGE | BPF_K */ + { + "JMP_JSGE_K: Signed jump: if (-1 >= -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGE, R1, -2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGE_K: Signed jump: if (-1 >= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGE, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGT | BPF_K */ + { + "JMP_JGT_K: if (3 > 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGT, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGE | BPF_K */ + { + "JMP_JGE_K: if (3 >= 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGT | BPF_K jump backwards */ + { + "JMP_JGT_K: if (3 > 2) return 1 (jump backwards)", + .u.insns_int = { + BPF_JMP_IMM(BPF_JA, 0, 0, 2), /* goto start */ + BPF_ALU32_IMM(BPF_MOV, R0, 1), /* out: */ + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), /* start: */ + BPF_LD_IMM64(R1, 3), /* note: this takes 2 insns */ + BPF_JMP_IMM(BPF_JGT, R1, 2, -6), /* goto out */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JGE_K: if (3 >= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGE, R1, 3, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JNE | BPF_K */ + { + "JMP_JNE_K: if (3 != 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JEQ | BPF_K */ + { + "JMP_JEQ_K: if (3 == 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JEQ, R1, 3, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSET | BPF_K */ + { + "JMP_JSET_K: if (0x3 & 0x2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSET_K: if (0x3 & 0xffffffff) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 0xffffffff, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGT | BPF_X */ + { + "JMP_JSGT_X: Signed jump: if (-1 > -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -2), + BPF_JMP_REG(BPF_JSGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGT_X: Signed jump: if (-1 > -1) return 0", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -1), + BPF_JMP_REG(BPF_JSGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGE | BPF_X */ + { + "JMP_JSGE_X: Signed jump: if (-1 >= -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -2), + BPF_JMP_REG(BPF_JSGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGE_X: Signed jump: if (-1 >= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -1), + BPF_JMP_REG(BPF_JSGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGT | BPF_X */ + { + "JMP_JGT_X: if (3 > 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGE | BPF_X */ + { + "JMP_JGE_X: if (3 >= 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JGE_X: if (3 >= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 3), + BPF_JMP_REG(BPF_JGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JNE | BPF_X */ + { + "JMP_JNE_X: if (3 != 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JEQ | BPF_X */ + { + "JMP_JEQ_X: if (3 == 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 3), + BPF_JMP_REG(BPF_JEQ, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSET | BPF_X */ + { + "JMP_JSET_X: if (0x3 & 0x2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSET_X: if (0x3 & 0xffffffff) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 0xffffffff), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Maximum possible literals", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xffffffff } }, + .fill_helper = bpf_fill_maxinsns1, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Single literal", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xfefefefe } }, + .fill_helper = bpf_fill_maxinsns2, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Run/add until end", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0x947bf368 } }, + .fill_helper = bpf_fill_maxinsns3, + }, + { + "BPF_MAXINSNS: Too many instructions", + { }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { }, + .fill_helper = bpf_fill_maxinsns4, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Very long jump", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xabababab } }, + .fill_helper = bpf_fill_maxinsns5, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Ctx heavy transformations", + { }, + CLASSIC, + { }, + { + { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, + { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } + }, + .fill_helper = bpf_fill_maxinsns6, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Call heavy transformations", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 1, 0 }, { 10, 0 } }, + .fill_helper = bpf_fill_maxinsns7, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Jump heavy test", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xffffffff } }, + .fill_helper = bpf_fill_maxinsns8, + }, }; static struct net_device dev; @@ -1858,10 +4321,15 @@ static void release_test_data(const struct bpf_test *test, void *data) kfree_skb(data); } -static int probe_filter_length(struct sock_filter *fp) +static int filter_length(int which) { - int len = 0; + struct sock_filter *fp; + int len; + + if (tests[which].fill_helper) + return tests[which].u.ptr.len; + fp = tests[which].u.insns; for (len = MAX_INSNS - 1; len > 0; --len) if (fp[len].code != 0 || fp[len].k != 0) break; @@ -1869,16 +4337,25 @@ static int probe_filter_length(struct sock_filter *fp) return len + 1; } +static void *filter_pointer(int which) +{ + if (tests[which].fill_helper) + return tests[which].u.ptr.insns; + else + return tests[which].u.insns; +} + static struct bpf_prog *generate_filter(int which, int *err) { - struct bpf_prog *fp; - struct sock_fprog_kern fprog; - unsigned int flen = probe_filter_length(tests[which].u.insns); __u8 test_type = tests[which].aux & TEST_TYPE_MASK; + unsigned int flen = filter_length(which); + void *fptr = filter_pointer(which); + struct sock_fprog_kern fprog; + struct bpf_prog *fp; switch (test_type) { case CLASSIC: - fprog.filter = tests[which].u.insns; + fprog.filter = fptr; fprog.len = flen; *err = bpf_prog_create(&fp, &fprog); @@ -1914,8 +4391,7 @@ static struct bpf_prog *generate_filter(int which, int *err) } fp->len = flen; - memcpy(fp->insnsi, tests[which].u.insns_int, - fp->len * sizeof(struct bpf_insn)); + memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn)); bpf_prog_select_runtime(fp); break; @@ -1987,9 +4463,33 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test) return err_cnt; } +static __init int prepare_bpf_tests(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (tests[i].fill_helper && + tests[i].fill_helper(&tests[i]) < 0) + return -ENOMEM; + } + + return 0; +} + +static __init void destroy_bpf_tests(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (tests[i].fill_helper) + kfree(tests[i].u.ptr.insns); + } +} + static __init int test_bpf(void) { int i, err_cnt = 0, pass_cnt = 0; + int jit_cnt = 0, run_cnt = 0; for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_prog *fp; @@ -2006,6 +4506,13 @@ static __init int test_bpf(void) return err; } + + pr_cont("jited:%u ", fp->jited); + + run_cnt++; + if (fp->jited) + jit_cnt++; + err = run_one(fp, &tests[i]); release_filter(fp, i); @@ -2018,13 +4525,24 @@ static __init int test_bpf(void) } } - pr_info("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt); + pr_info("Summary: %d PASSED, %d FAILED, [%d/%d JIT'ed]\n", + pass_cnt, err_cnt, jit_cnt, run_cnt); + return err_cnt ? -EINVAL : 0; } static int __init test_bpf_init(void) { - return test_bpf(); + int ret; + + ret = prepare_bpf_tests(); + if (ret < 0) + return ret; + + ret = test_bpf(); + + destroy_bpf_tests(); + return ret; } static void __exit test_bpf_exit(void) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index b2957540d3c7..c90777eae1f8 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -1,14 +1,9 @@ /* * Resizable, Scalable, Concurrent Hash Table * - * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> + * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * - * Based on the following paper: - * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf - * - * Code partially derived from nft_hash - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -26,20 +21,37 @@ #include <linux/rhashtable.h> #include <linux/slab.h> +#define MAX_ENTRIES 1000000 +#define TEST_INSERT_FAIL INT_MAX + +static int entries = 50000; +module_param(entries, int, 0); +MODULE_PARM_DESC(entries, "Number of entries to add (default: 50000)"); + +static int runs = 4; +module_param(runs, int, 0); +MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)"); + +static int max_size = 65536; +module_param(max_size, int, 0); +MODULE_PARM_DESC(runs, "Maximum table size (default: 65536)"); -#define TEST_HT_SIZE 8 -#define TEST_ENTRIES 2048 -#define TEST_PTR ((void *) 0xdeadbeef) -#define TEST_NEXPANDS 4 +static bool shrinking = false; +module_param(shrinking, bool, 0); +MODULE_PARM_DESC(shrinking, "Enable automatic shrinking (default: off)"); + +static int size = 8; +module_param(size, int, 0); +MODULE_PARM_DESC(size, "Initial size hint of table (default: 8)"); struct test_obj { - void *ptr; int value; struct rhash_head node; }; -static const struct rhashtable_params test_rht_params = { - .nelem_hint = TEST_HT_SIZE, +static struct test_obj array[MAX_ENTRIES]; + +static struct rhashtable_params test_rht_params = { .head_offset = offsetof(struct test_obj, node), .key_offset = offsetof(struct test_obj, value), .key_len = sizeof(int), @@ -51,11 +63,14 @@ static int __init test_rht_lookup(struct rhashtable *ht) { unsigned int i; - for (i = 0; i < TEST_ENTRIES * 2; i++) { + for (i = 0; i < entries * 2; i++) { struct test_obj *obj; bool expected = !(i % 2); u32 key = i; + if (array[i / 2].value == TEST_INSERT_FAIL) + expected = false; + obj = rhashtable_lookup_fast(ht, &key, test_rht_params); if (expected && !obj) { @@ -66,9 +81,9 @@ static int __init test_rht_lookup(struct rhashtable *ht) key); return -EEXIST; } else if (expected && obj) { - if (obj->ptr != TEST_PTR || obj->value != i) { - pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n", - obj->ptr, TEST_PTR, obj->value, i); + if (obj->value != i) { + pr_warn("Test failed: Lookup value mismatch %u!=%u\n", + obj->value, i); return -EINVAL; } } @@ -77,129 +92,147 @@ static int __init test_rht_lookup(struct rhashtable *ht) return 0; } -static void test_bucket_stats(struct rhashtable *ht, bool quiet) +static void test_bucket_stats(struct rhashtable *ht) { - unsigned int cnt, rcu_cnt, i, total = 0; + unsigned int err, total = 0, chain_len = 0; + struct rhashtable_iter hti; struct rhash_head *pos; - struct test_obj *obj; - struct bucket_table *tbl; - tbl = rht_dereference_rcu(ht->tbl, ht); - for (i = 0; i < tbl->size; i++) { - rcu_cnt = cnt = 0; + err = rhashtable_walk_init(ht, &hti); + if (err) { + pr_warn("Test failed: allocation error"); + return; + } - if (!quiet) - pr_info(" [%#4x/%u]", i, tbl->size); + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) { + pr_warn("Test failed: iterator failed: %d\n", err); + return; + } - rht_for_each_entry_rcu(obj, pos, tbl, i, node) { - cnt++; - total++; - if (!quiet) - pr_cont(" [%p],", obj); + while ((pos = rhashtable_walk_next(&hti))) { + if (PTR_ERR(pos) == -EAGAIN) { + pr_info("Info: encountered resize\n"); + chain_len++; + continue; + } else if (IS_ERR(pos)) { + pr_warn("Test failed: rhashtable_walk_next() error: %ld\n", + PTR_ERR(pos)); + break; } - rht_for_each_entry_rcu(obj, pos, tbl, i, node) - rcu_cnt++; - - if (rcu_cnt != cnt) - pr_warn("Test failed: Chain count mismach %d != %d", - cnt, rcu_cnt); - - if (!quiet) - pr_cont("\n [%#x] first element: %p, chain length: %u\n", - i, tbl->buckets[i], cnt); + total++; } - pr_info(" Traversal complete: counted=%u, nelems=%u, entries=%d\n", - total, atomic_read(&ht->nelems), TEST_ENTRIES); + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + + pr_info(" Traversal complete: counted=%u, nelems=%u, entries=%d, table-jumps=%u\n", + total, atomic_read(&ht->nelems), entries, chain_len); - if (total != atomic_read(&ht->nelems) || total != TEST_ENTRIES) + if (total != atomic_read(&ht->nelems) || total != entries) pr_warn("Test failed: Total count mismatch ^^^"); } -static int __init test_rhashtable(struct rhashtable *ht) +static s64 __init test_rhashtable(struct rhashtable *ht) { - struct bucket_table *tbl; struct test_obj *obj; - struct rhash_head *pos, *next; int err; - unsigned int i; + unsigned int i, insert_fails = 0; + s64 start, end; /* * Insertion Test: - * Insert TEST_ENTRIES into table with all keys even numbers + * Insert entries into table with all keys even numbers */ - pr_info(" Adding %d keys\n", TEST_ENTRIES); - for (i = 0; i < TEST_ENTRIES; i++) { - struct test_obj *obj; - - obj = kzalloc(sizeof(*obj), GFP_KERNEL); - if (!obj) { - err = -ENOMEM; - goto error; - } + pr_info(" Adding %d keys\n", entries); + start = ktime_get_ns(); + for (i = 0; i < entries; i++) { + struct test_obj *obj = &array[i]; - obj->ptr = TEST_PTR; obj->value = i * 2; err = rhashtable_insert_fast(ht, &obj->node, test_rht_params); - if (err) { - kfree(obj); - goto error; + if (err == -ENOMEM || err == -EBUSY) { + /* Mark failed inserts but continue */ + obj->value = TEST_INSERT_FAIL; + insert_fails++; + } else if (err) { + return err; } } + if (insert_fails) + pr_info(" %u insertions failed due to memory pressure\n", + insert_fails); + + test_bucket_stats(ht); rcu_read_lock(); - test_bucket_stats(ht, true); test_rht_lookup(ht); rcu_read_unlock(); - rcu_read_lock(); - test_bucket_stats(ht, true); - rcu_read_unlock(); + test_bucket_stats(ht); - pr_info(" Deleting %d keys\n", TEST_ENTRIES); - for (i = 0; i < TEST_ENTRIES; i++) { + pr_info(" Deleting %d keys\n", entries); + for (i = 0; i < entries; i++) { u32 key = i * 2; - obj = rhashtable_lookup_fast(ht, &key, test_rht_params); - BUG_ON(!obj); + if (array[i].value != TEST_INSERT_FAIL) { + obj = rhashtable_lookup_fast(ht, &key, test_rht_params); + BUG_ON(!obj); - rhashtable_remove_fast(ht, &obj->node, test_rht_params); - kfree(obj); + rhashtable_remove_fast(ht, &obj->node, test_rht_params); + } } - return 0; - -error: - tbl = rht_dereference_rcu(ht->tbl, ht); - for (i = 0; i < tbl->size; i++) - rht_for_each_entry_safe(obj, pos, next, tbl, i, node) - kfree(obj); + end = ktime_get_ns(); + pr_info(" Duration of test: %lld ns\n", end - start); - return err; + return end - start; } static struct rhashtable ht; static int __init test_rht_init(void) { - int err; + int i, err; + u64 total_time = 0; - pr_info("Running resizable hashtable tests...\n"); + entries = min(entries, MAX_ENTRIES); - err = rhashtable_init(&ht, &test_rht_params); - if (err < 0) { - pr_warn("Test failed: Unable to initialize hashtable: %d\n", - err); - return err; - } + test_rht_params.automatic_shrinking = shrinking; + test_rht_params.max_size = max_size; + test_rht_params.nelem_hint = size; - err = test_rhashtable(&ht); + pr_info("Running rhashtable test nelem=%d, max_size=%d, shrinking=%d\n", + size, max_size, shrinking); - rhashtable_destroy(&ht); + for (i = 0; i < runs; i++) { + s64 time; - return err; + pr_info("Test %02d:\n", i); + memset(&array, 0, sizeof(array)); + err = rhashtable_init(&ht, &test_rht_params); + if (err < 0) { + pr_warn("Test failed: Unable to initialize hashtable: %d\n", + err); + continue; + } + + time = test_rhashtable(&ht); + rhashtable_destroy(&ht); + if (time < 0) { + pr_warn("Test failed: return code %lld\n", time); + return -EINVAL; + } + + total_time += time; + } + + do_div(total_time, runs); + pr_info("Average test time: %llu\n", total_time); + + return 0; } static void __exit test_rht_exit(void) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ebffa0e4a9c0..2fd31aebef30 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2967,6 +2967,104 @@ void free_pages(unsigned long addr, unsigned int order) EXPORT_SYMBOL(free_pages); /* + * Page Fragment: + * An arbitrary-length arbitrary-offset area of memory which resides + * within a 0 or higher order page. Multiple fragments within that page + * are individually refcounted, in the page's reference counter. + * + * The page_frag functions below provide a simple allocation framework for + * page fragments. This is used by the network stack and network device + * drivers to provide a backing region of memory for use as either an + * sk_buff->head, or to be used in the "frags" portion of skb_shared_info. + */ +static struct page *__page_frag_refill(struct page_frag_cache *nc, + gfp_t gfp_mask) +{ + struct page *page = NULL; + gfp_t gfp = gfp_mask; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC; + page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, + PAGE_FRAG_CACHE_MAX_ORDER); + nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE; +#endif + if (unlikely(!page)) + page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); + + nc->va = page ? page_address(page) : NULL; + + return page; +} + +void *__alloc_page_frag(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask) +{ + unsigned int size = PAGE_SIZE; + struct page *page; + int offset; + + if (unlikely(!nc->va)) { +refill: + page = __page_frag_refill(nc, gfp_mask); + if (!page) + return NULL; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif + /* Even if we own the page, we do not use atomic_set(). + * This would break get_page_unless_zero() users. + */ + atomic_add(size - 1, &page->_count); + + /* reset page count bias and offset to start of new frag */ + nc->pfmemalloc = page->pfmemalloc; + nc->pagecnt_bias = size; + nc->offset = size; + } + + offset = nc->offset - fragsz; + if (unlikely(offset < 0)) { + page = virt_to_page(nc->va); + + if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) + goto refill; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif + /* OK, page count is 0, we can safely set it */ + atomic_set(&page->_count, size); + + /* reset page count bias and offset to start of new frag */ + nc->pagecnt_bias = size; + offset = size - fragsz; + } + + nc->pagecnt_bias--; + nc->offset = offset; + + return nc->va + offset; +} +EXPORT_SYMBOL(__alloc_page_frag); + +/* + * Frees a page fragment allocated out of either a compound or order 0 page. + */ +void __free_page_frag(void *addr) +{ + struct page *page = virt_to_head_page(addr); + + if (unlikely(put_page_testzero(page))) + __free_pages_ok(page, compound_order(page)); +} +EXPORT_SYMBOL(__free_page_frag); + +/* * alloc_kmem_pages charges newly allocated pages to the kmem resource counter * of the current memory cgroup. * diff --git a/net/Kconfig b/net/Kconfig index 44dd5786ee91..57a7c5af3175 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -45,6 +45,9 @@ config COMPAT_NETLINK_MESSAGES Newly written code should NEVER need this option but do compat-independent messages instead! +config NET_INGRESS + bool + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 3b7ad43c7dad..d5871ac493eb 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1030,7 +1030,7 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) goto out; rc = -ENOMEM; - sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto); + sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern); if (!sk) goto out; rc = 0; diff --git a/net/atm/common.c b/net/atm/common.c index ed0466637e13..49a872db7e42 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -141,7 +141,7 @@ static struct proto vcc_proto = { .release_cb = vcc_release_cb, }; -int vcc_create(struct net *net, struct socket *sock, int protocol, int family) +int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern) { struct sock *sk; struct atm_vcc *vcc; @@ -149,7 +149,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family) sock->sk = NULL; if (sock->type == SOCK_STREAM) return -EINVAL; - sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto); + sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, kern); if (!sk) return -ENOMEM; sock_init_data(sock, sk); diff --git a/net/atm/common.h b/net/atm/common.h index 4d6f5b2068ac..959436b87182 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -10,7 +10,7 @@ #include <linux/poll.h> /* for poll_table */ -int vcc_create(struct net *net, struct socket *sock, int protocol, int family); +int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, diff --git a/net/atm/pvc.c b/net/atm/pvc.c index ae0324021407..040207ec399f 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -136,7 +136,7 @@ static int pvc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; sock->ops = &pvc_proto_ops; - return vcc_create(net, sock, protocol, PF_ATMPVC); + return vcc_create(net, sock, protocol, PF_ATMPVC, kern); } static const struct net_proto_family pvc_family_ops = { diff --git a/net/atm/svc.c b/net/atm/svc.c index 1ba23f5018e7..3fa0a9ee98d1 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -660,7 +660,7 @@ static int svc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; sock->ops = &svc_proto_ops; - error = vcc_create(net, sock, protocol, AF_ATMSVC); + error = vcc_create(net, sock, protocol, AF_ATMSVC, kern); if (error) return error; ATM_SD(sock)->local.sas_family = AF_ATMSVC; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 330c1f4a5a0b..4273533d22b1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -855,7 +855,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto); + sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, kern); if (sk == NULL) return -ENOMEM; @@ -881,7 +881,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) struct sock *sk; ax25_cb *ax25, *oax25; - sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot); + sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot, 0); if (sk == NULL) return NULL; diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index bde2bdd9e929..b5116fa9835e 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -202,7 +202,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index d82787d417bd..ce86a7bae844 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -205,7 +205,7 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 56f9edbf3d05..5b14dcafcd08 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1377,7 +1377,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &hci_sock_ops; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index cb3fdde1968a..008ba439bd62 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -235,7 +235,7 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a7278f05eafb..244287706f91 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -43,7 +43,7 @@ static struct bt_sock_list l2cap_sk_list = { static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, - int proto, gfp_t prio); + int proto, gfp_t prio, int kern); bool l2cap_is_socket(struct socket *sock) { @@ -1193,7 +1193,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) } sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, - GFP_ATOMIC); + GFP_ATOMIC, 0); if (!sk) { release_sock(parent); return NULL; @@ -1523,12 +1523,12 @@ static struct proto l2cap_proto = { }; static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, - int proto, gfp_t prio) + int proto, gfp_t prio, int kern) { struct sock *sk; struct l2cap_chan *chan; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, kern); if (!sk) return NULL; @@ -1574,7 +1574,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &l2cap_sock_ops; - sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 4fea24275b17..29709fbfd1f5 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -200,7 +200,7 @@ static int rfcomm_l2sock_create(struct socket **sock) BT_DBG(""); - err = sock_create_kern(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock); + err = sock_create_kern(&init_net, PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock); if (!err) { struct sock *sk = (*sock)->sk; sk->sk_data_ready = rfcomm_l2data_ready; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 825e8fb5114b..b2338e971b33 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -269,12 +269,12 @@ static struct proto rfcomm_proto = { .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) +static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) { struct rfcomm_dlc *d; struct sock *sk; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, kern); if (!sk) return NULL; @@ -324,7 +324,7 @@ static int rfcomm_sock_create(struct net *net, struct socket *sock, sock->ops = &rfcomm_sock_ops; - sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; @@ -969,7 +969,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * goto done; } - sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC); + sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC, 0); if (!sk) goto done; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 4322c833e748..6b6e59dc54cf 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -460,11 +460,11 @@ static struct proto sco_proto = { .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) +static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) { struct sock *sk; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, kern); if (!sk) return NULL; @@ -501,7 +501,7 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &sco_sock_ops; - sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; @@ -1026,7 +1026,7 @@ static void sco_conn_ready(struct sco_conn *conn) bh_lock_sock(parent); sk = sco_sock_alloc(sock_net(parent), NULL, - BTPROTO_SCO, GFP_ATOMIC); + BTPROTO_SCO, GFP_ATOMIC, 0); if (!sk) { bh_unlock_sock(parent); sco_conn_unlock(conn); diff --git a/net/bridge/br.c b/net/bridge/br.c index 02c24cf63c34..a1abe4936fe1 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -121,13 +121,13 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; -static int br_netdev_switch_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int br_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) { - struct net_device *dev = netdev_switch_notifier_info_to_dev(ptr); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); struct net_bridge_port *p; struct net_bridge *br; - struct netdev_switch_notifier_fdb_info *fdb_info; + struct switchdev_notifier_fdb_info *fdb_info; int err = NOTIFY_DONE; rtnl_lock(); @@ -138,14 +138,14 @@ static int br_netdev_switch_event(struct notifier_block *unused, br = p->br; switch (event) { - case NETDEV_SWITCH_FDB_ADD: + case SWITCHDEV_FDB_ADD: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, fdb_info->vid); if (err) err = notifier_from_errno(err); break; - case NETDEV_SWITCH_FDB_DEL: + case SWITCHDEV_FDB_DEL: fdb_info = ptr; err = br_fdb_external_learn_del(br, p, fdb_info->addr, fdb_info->vid); @@ -159,8 +159,8 @@ out: return err; } -static struct notifier_block br_netdev_switch_notifier = { - .notifier_call = br_netdev_switch_event, +static struct notifier_block br_switchdev_notifier = { + .notifier_call = br_switchdev_event, }; static void __net_exit br_net_exit(struct net *net) @@ -214,7 +214,7 @@ static int __init br_init(void) if (err) goto err_out3; - err = register_netdev_switch_notifier(&br_netdev_switch_notifier); + err = register_switchdev_notifier(&br_switchdev_notifier); if (err) goto err_out4; @@ -235,7 +235,7 @@ static int __init br_init(void) return 0; err_out5: - unregister_netdev_switch_notifier(&br_netdev_switch_notifier); + unregister_switchdev_notifier(&br_switchdev_notifier); err_out4: unregister_netdevice_notifier(&br_device_notifier); err_out3: @@ -253,7 +253,7 @@ static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); br_netlink_fini(); - unregister_netdev_switch_notifier(&br_netdev_switch_notifier); + unregister_switchdev_notifier(&br_switchdev_notifier); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); unregister_pernet_subsys(&br_net_ops); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a3abe6ed111e..d7e103e3538a 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -975,9 +975,6 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int err = 0; __be32 group; - if (!pskb_may_pull(skb, sizeof(*ih))) - return -EINVAL; - ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); @@ -1248,25 +1245,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, max_delay = 10 * HZ; group = 0; } - } else { - if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) { - err = -EINVAL; - goto out; - } - + } else if (skb->len >= sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; max_delay = ih3->code ? IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; - } - - /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer - * all-systems destination addresses (224.0.0.1) for general queries - */ - if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) { - err = -EINVAL; + } else { goto out; } @@ -1329,12 +1315,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ - if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) { - err = -EINVAL; - goto out; - } - if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; @@ -1358,14 +1338,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, is_general_query = group && ipv6_addr_any(group); - /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer - * all-nodes destination address (ff02::1) for general queries - */ - if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) { - err = -EINVAL; - goto out; - } - if (is_general_query) { saddr.proto = htons(ETH_P_IPV6); saddr.u.ip6 = ip6h->saddr; @@ -1557,74 +1529,22 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb2 = skb; - const struct iphdr *iph; + struct sk_buff *skb_trimmed = NULL; struct igmphdr *ih; - unsigned int len; - unsigned int offset; int err; - /* We treat OOM as packet loss for now. */ - if (!pskb_may_pull(skb, sizeof(*iph))) - return -EINVAL; - - iph = ip_hdr(skb); - - if (iph->ihl < 5 || iph->version != 4) - return -EINVAL; - - if (!pskb_may_pull(skb, ip_hdrlen(skb))) - return -EINVAL; - - iph = ip_hdr(skb); - - if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) - return -EINVAL; + err = ip_mc_check_igmp(skb, &skb_trimmed); - if (iph->protocol != IPPROTO_IGMP) { - if (!ipv4_is_local_multicast(iph->daddr)) + if (err == -ENOMSG) { + if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; + } else if (err < 0) { + return err; } - len = ntohs(iph->tot_len); - if (skb->len < len || len < ip_hdrlen(skb)) - return -EINVAL; - - if (skb->len > len) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - return -ENOMEM; - - err = pskb_trim_rcsum(skb2, len); - if (err) - goto err_out; - } - - len -= ip_hdrlen(skb2); - offset = skb_network_offset(skb2) + ip_hdrlen(skb2); - __skb_pull(skb2, offset); - skb_reset_transport_header(skb2); - - err = -EINVAL; - if (!pskb_may_pull(skb2, sizeof(*ih))) - goto out; - - switch (skb2->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_fold(skb2->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb2->csum = 0; - if (skb_checksum_complete(skb2)) - goto out; - } - - err = 0; - BR_INPUT_SKB_CB(skb)->igmp = 1; - ih = igmp_hdr(skb2); + ih = igmp_hdr(skb); switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: @@ -1633,21 +1553,19 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_add_group(br, port, ih->group, vid); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb2, vid); + err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - err = br_ip4_multicast_query(br, port, skb2, vid); + err = br_ip4_multicast_query(br, port, skb_trimmed, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group, vid); break; } -out: - __skb_push(skb2, offset); -err_out: - if (skb2 != skb) - kfree_skb(skb2); + if (skb_trimmed) + kfree_skb(skb_trimmed); + return err; } @@ -1657,138 +1575,42 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb2; - const struct ipv6hdr *ip6h; - u8 icmp6_type; - u8 nexthdr; - __be16 frag_off; - unsigned int len; - int offset; + struct sk_buff *skb_trimmed = NULL; + struct mld_msg *mld; int err; - if (!pskb_may_pull(skb, sizeof(*ip6h))) - return -EINVAL; - - ip6h = ipv6_hdr(skb); - - /* - * We're interested in MLD messages only. - * - Version is 6 - * - MLD has always Router Alert hop-by-hop option - * - But we do not support jumbrograms. - */ - if (ip6h->version != 6) - return 0; - - /* Prevent flooding this packet if there is no listener present */ - if (!ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) - BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + err = ipv6_mc_check_mld(skb, &skb_trimmed); - if (ip6h->nexthdr != IPPROTO_HOPOPTS || - ip6h->payload_len == 0) - return 0; - - len = ntohs(ip6h->payload_len) + sizeof(*ip6h); - if (skb->len < len) - return -EINVAL; - - nexthdr = ip6h->nexthdr; - offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr, &frag_off); - - if (offset < 0 || nexthdr != IPPROTO_ICMPV6) + if (err == -ENOMSG) { + if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; - - /* Okay, we found ICMPv6 header */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - return -ENOMEM; - - err = -EINVAL; - if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr))) - goto out; - - len -= offset - skb_network_offset(skb2); - - __skb_pull(skb2, offset); - skb_reset_transport_header(skb2); - skb_postpull_rcsum(skb2, skb_network_header(skb2), - skb_network_header_len(skb2)); - - icmp6_type = icmp6_hdr(skb2)->icmp6_type; - - switch (icmp6_type) { - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - case ICMPV6_MLD2_REPORT: - break; - default: - err = 0; - goto out; - } - - /* Okay, we found MLD message. Check further. */ - if (skb2->len > len) { - err = pskb_trim_rcsum(skb2, len); - if (err) - goto out; - err = -EINVAL; - } - - ip6h = ipv6_hdr(skb2); - - switch (skb2->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len, - IPPROTO_ICMPV6, skb2->csum)) - break; - /*FALLTHROUGH*/ - case CHECKSUM_NONE: - skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, - &ip6h->daddr, - skb2->len, - IPPROTO_ICMPV6, 0)); - if (__skb_checksum_complete(skb2)) - goto out; + } else if (err < 0) { + return err; } - err = 0; - BR_INPUT_SKB_CB(skb)->igmp = 1; + mld = (struct mld_msg *)skb_transport_header(skb); - switch (icmp6_type) { + switch (mld->mld_type) { case ICMPV6_MGM_REPORT: - { - struct mld_msg *mld; - if (!pskb_may_pull(skb2, sizeof(*mld))) { - err = -EINVAL; - goto out; - } - mld = (struct mld_msg *)skb_transport_header(skb2); BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid); break; - } case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb2, vid); + err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb2, vid); + err = br_ip6_multicast_query(br, port, skb_trimmed, vid); break; case ICMPV6_MGM_REDUCTION: - { - struct mld_msg *mld; - if (!pskb_may_pull(skb2, sizeof(*mld))) { - err = -EINVAL; - goto out; - } - mld = (struct mld_msg *)skb_transport_header(skb2); br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid); - } + break; } -out: - kfree_skb(skb2); + if (skb_trimmed) + kfree_skb(skb_trimmed); + return err; } #endif diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 60ddfbeb47f5..46660a28feef 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -125,6 +125,14 @@ static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb) return skb->nf_bridge; } +static void nf_bridge_info_free(struct sk_buff *skb) +{ + if (skb->nf_bridge) { + nf_bridge_put(skb->nf_bridge); + skb->nf_bridge = NULL; + } +} + static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { struct net_bridge_port *port; @@ -832,17 +840,39 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); __skb_push(skb, data->encap_size); + nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); } +static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) +{ + unsigned int mtu = ip_skb_dst_mtu(skb); + struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = skb_rtable(skb); + struct net_device *dev = rt->dst.dev; + + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || + (IPCB(skb)->frag_max_size && + IPCB(skb)->frag_max_size > mtu))) { + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + kfree_skb(skb); + return -EMSGSIZE; + } + + return ip_do_fragment(sk, skb, output); +} + static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { int ret; int frag_max_size; unsigned int mtu_reserved; - if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) + if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) { + nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); + } mtu_reserved = nf_bridge_mtu_reduction(skb); /* This is wrong! We should preserve the original fragment @@ -866,8 +896,9 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); - ret = ip_fragment(sk, skb, br_nf_push_frag_xmit); + ret = br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); } else { + nf_bridge_info_free(skb); ret = br_dev_queue_push_xmit(sk, skb); } @@ -876,7 +907,8 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) #else static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { - return br_dev_queue_push_xmit(sk, skb); + nf_bridge_info_free(skb); + return br_dev_queue_push_xmit(sk, skb); } #endif @@ -964,6 +996,8 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) nf_bridge->neigh_header, ETH_HLEN - ETH_ALEN); skb->dev = nf_bridge->physindev; + + nf_bridge->physoutdev = NULL; br_handle_frame_finish(NULL, skb); } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 4b5c236998ff..6b67ed3831de 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -586,7 +586,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; - int err = 0, ret_offload = 0; + int err = 0; protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); @@ -628,16 +628,6 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) afspec, RTM_SETLINK); } - if (p && !(flags & BRIDGE_FLAGS_SELF)) { - /* set bridge attributes in hardware if supported - */ - ret_offload = netdev_switch_port_bridge_setlink(dev, nlh, - flags); - if (ret_offload && ret_offload != -EOPNOTSUPP) - br_warn(p->br, "error setting attrs on port %u(%s)\n", - (unsigned int)p->port_no, p->dev->name); - } - if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); out: @@ -649,7 +639,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { struct nlattr *afspec; struct net_bridge_port *p; - int err = 0, ret_offload = 0; + int err = 0; afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (!afspec) @@ -668,16 +658,6 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) */ br_ifinfo_notify(RTM_NEWLINK, p); - if (p && !(flags & BRIDGE_FLAGS_SELF)) { - /* del bridge attributes in hardware - */ - ret_offload = netdev_switch_port_bridge_dellink(dev, nlh, - flags); - if (ret_offload && ret_offload != -EOPNOTSUPP) - br_warn(p->br, "error deleting attrs on port %u (%s)\n", - (unsigned int)p->port_no, p->dev->name); - } - return err; } static int br_validate(struct nlattr *tb[], struct nlattr *data[]) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3362c29400f1..1f36fa70639b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -33,8 +33,8 @@ /* Control of forwarding link local multicast */ #define BR_GROUPFWD_DEFAULT 0 -/* Don't allow forwarding control protocols like STP and LLDP */ -#define BR_GROUPFWD_RESTRICTED 0x4007u +/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */ +#define BR_GROUPFWD_RESTRICTED 0x0007u /* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ #define BR_GROUPFWD_8021AD 0xB801u diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index fb3ebe615513..45f1ff113af9 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -39,10 +39,14 @@ void br_log_state(const struct net_bridge_port *p) void br_set_state(struct net_bridge_port *p, unsigned int state) { + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_STP_STATE, + .u.stp_state = state, + }; int err; p->state = state; - err = netdev_switch_port_stp_update(p->dev, state); + err = switchdev_port_attr_set(p->dev, &attr); if (err && err != -EOPNOTSUPP) br_warn(p->br, "error setting offload STP state on port %u(%s)\n", (unsigned int) p->port_no, p->dev->name); diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 071d87214dde..0c40570069ba 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -164,8 +164,10 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par) !(info->bitmask & EBT_STP_MASK)) return -EINVAL; /* Make sure the match only receives stp frames */ - if (!ether_addr_equal(e->destmac, bridge_ula) || - !ether_addr_equal(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC)) + if (!par->nft_compat && + (!ether_addr_equal(e->destmac, bridge_ula) || + !ether_addr_equal(e->destmsk, msk) || + !(e->bitmask & EBT_DESTMAC))) return -EINVAL; return 0; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 24c7c96bf5f8..d5aba394ff6f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -139,7 +139,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, ethproto = h->h_proto; if (e->bitmask & EBT_802_3) { - if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO)) + if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO)) return 1; } else if (!(e->bitmask & EBT_NOPROTO) && FWINV2(e->ethproto != ethproto, EBT_IPROTO)) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 4ec0c803aef1..78a04ebb113c 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1047,7 +1047,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol, * is really not used at all in the net/core or socket.c but the * initialization makes sure that sock->state is not uninitialized. */ - sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot); + sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot, kern); if (!sk) return -ENOMEM; diff --git a/net/can/af_can.c b/net/can/af_can.c index 32d710eaf1fc..d4d404bdfc9a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -179,7 +179,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol, sock->ops = cp->ops; - sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot); + sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot, kern); if (!sk) { err = -ENOMEM; goto errout; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 967080a9f043..073262fea6dd 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -480,8 +480,8 @@ static int ceph_tcp_connect(struct ceph_connection *con) int ret; BUG_ON(con->sock); - ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) return ret; sock->sk->sk_allocation = GFP_NOFS; diff --git a/net/core/dev.c b/net/core/dev.c index 2c1c67fad64d..594163d0c6eb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -135,6 +135,7 @@ #include <linux/if_macvlan.h> #include <linux/errqueue.h> #include <linux/hrtimer.h> +#include <linux/netfilter_ingress.h> #include "net-sysfs.h" @@ -1630,7 +1631,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_INGRESS static struct static_key ingress_needed __read_mostly; void net_inc_ingress_queue(void) @@ -2350,6 +2351,34 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); +/* + * Returns a Tx hash based on the given packet descriptor a Tx queues' number + * to be used as a distribution range. + */ +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, + unsigned int num_tx_queues) +{ + u32 hash; + u16 qoffset = 0; + u16 qcount = num_tx_queues; + + if (skb_rx_queue_recorded(skb)) { + hash = skb_get_rx_queue(skb); + while (unlikely(hash >= num_tx_queues)) + hash -= num_tx_queues; + return hash; + } + + if (dev->num_tc) { + u8 tc = netdev_get_prio_tc_map(dev, skb->priority); + qoffset = dev->tc_to_txq[tc].offset; + qcount = dev->tc_to_txq[tc].count; + } + + return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; +} +EXPORT_SYMBOL(__skb_tx_hash); + static void skb_warn_bad_offload(const struct sk_buff *skb) { static const netdev_features_t null_features = 0; @@ -2908,6 +2937,84 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(dev_loopback_xmit); +static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +{ +#ifdef CONFIG_XPS + struct xps_dev_maps *dev_maps; + struct xps_map *map; + int queue_index = -1; + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + map = rcu_dereference( + dev_maps->cpu_map[skb->sender_cpu - 1]); + if (map) { + if (map->len == 1) + queue_index = map->queues[0]; + else + queue_index = map->queues[reciprocal_scale(skb_get_hash(skb), + map->len)]; + if (unlikely(queue_index >= dev->real_num_tx_queues)) + queue_index = -1; + } + } + rcu_read_unlock(); + + return queue_index; +#else + return -1; +#endif +} + +static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + int queue_index = sk_tx_queue_get(sk); + + if (queue_index < 0 || skb->ooo_okay || + queue_index >= dev->real_num_tx_queues) { + int new_index = get_xps_queue(dev, skb); + if (new_index < 0) + new_index = skb_tx_hash(dev, skb); + + if (queue_index != new_index && sk && + rcu_access_pointer(sk->sk_dst_cache)) + sk_tx_queue_set(sk, new_index); + + queue_index = new_index; + } + + return queue_index; +} + +struct netdev_queue *netdev_pick_tx(struct net_device *dev, + struct sk_buff *skb, + void *accel_priv) +{ + int queue_index = 0; + +#ifdef CONFIG_XPS + if (skb->sender_cpu == 0) + skb->sender_cpu = raw_smp_processor_id() + 1; +#endif + + if (dev->real_num_tx_queues != 1) { + const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_select_queue) + queue_index = ops->ndo_select_queue(dev, skb, accel_priv, + __netdev_pick_tx); + else + queue_index = __netdev_pick_tx(dev, skb); + + if (!accel_priv) + queue_index = netdev_cap_txqueue(dev, queue_index); + } + + skb_set_queue_mapping(skb, queue_index); + return netdev_get_tx_queue(dev, queue_index); +} + /** * __dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -3520,66 +3627,47 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev, EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); #endif -#ifdef CONFIG_NET_CLS_ACT -/* TODO: Maybe we should just force sch_ingress to be compiled in - * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions - * a compare and 2 stores extra right now if we dont have it on - * but have CONFIG_NET_CLS_ACT - * NOTE: This doesn't stop any functionality; if you dont have - * the ingress scheduler, you just can't add policies on ingress. - * - */ -static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) -{ - struct net_device *dev = skb->dev; - u32 ttl = G_TC_RTTL(skb->tc_verd); - int result = TC_ACT_OK; - struct Qdisc *q; - - if (unlikely(MAX_RED_LOOP < ttl++)) { - net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n", - skb->skb_iif, dev->ifindex); - return TC_ACT_SHOT; - } - - skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - - q = rcu_dereference(rxq->qdisc); - if (q != &noop_qdisc) { - spin_lock(qdisc_lock(q)); - if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) - result = qdisc_enqueue_root(skb, q); - spin_unlock(qdisc_lock(q)); - } - - return result; -} - static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); +#ifdef CONFIG_NET_CLS_ACT + struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); + struct tcf_result cl_res; - if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc) + /* If there's at least one ingress present somewhere (so + * we get here via enabled static key), remaining devices + * that are not configured with an ingress qdisc will bail + * out here. + */ + if (!cl) return skb; - if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } - switch (ing_filter(skb, rxq)) { + qdisc_skb_cb(skb)->pkt_len = skb->len; + skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); + qdisc_bstats_update_cpu(cl->q, skb); + + switch (tc_classify(skb, cl, &cl_res)) { + case TC_ACT_OK: + case TC_ACT_RECLASSIFY: + skb->tc_index = TC_H_MIN(cl_res.classid); + break; case TC_ACT_SHOT: + qdisc_qstats_drop_cpu(cl->q); case TC_ACT_STOLEN: + case TC_ACT_QUEUED: kfree_skb(skb); return NULL; + default: + break; } - +#endif /* CONFIG_NET_CLS_ACT */ return skb; } -#endif /** * netdev_rx_handler_register - register receive handler @@ -3652,6 +3740,22 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) } } +static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, + int *ret, struct net_device *orig_dev) +{ +#ifdef CONFIG_NETFILTER_INGRESS + if (nf_hook_ingress_active(skb)) { + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } + + return nf_hook_ingress(skb); + } +#endif /* CONFIG_NETFILTER_INGRESS */ + return 0; +} + static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) { struct packet_type *ptype, *pt_prev; @@ -3711,13 +3815,17 @@ another_round: } skip_taps: -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_INGRESS if (static_key_false(&ingress_needed)) { skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) goto unlock; - } + if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) + goto unlock; + } +#endif +#ifdef CONFIG_NET_CLS_ACT skb->tc_verd = 0; ncls: #endif @@ -6320,6 +6428,17 @@ static int netif_alloc_netdev_queues(struct net_device *dev) return 0; } +void netif_tx_stop_all_queues(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + netif_tx_stop_queue(txq); + } +} +EXPORT_SYMBOL(netif_tx_stop_all_queues); + /** * register_netdevice - register a network device * @dev: device to register @@ -6869,6 +6988,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->group = INIT_NETDEV_GROUP; if (!dev->ethtool_ops) dev->ethtool_ops = &default_ethtool_ops; + + nf_hook_ingress_init(dev); + return dev; free_all: diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1347e11f5cc9..4f6a17ef0710 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -98,7 +98,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_BUSY_POLL_BIT] = "busy-poll", - [NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload", }; static const char diff --git a/net/core/filter.c b/net/core/filter.c index bf831a85c315..3adcca6f17a4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -36,6 +36,7 @@ #include <net/netlink.h> #include <linux/skbuff.h> #include <net/sock.h> +#include <net/flow_dissector.h> #include <linux/errno.h> #include <linux/timer.h> #include <asm/uaccess.h> @@ -355,8 +356,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * for socket filters: ctx == 'struct sk_buff *', for seccomp: * ctx == 'struct seccomp_data *'. */ -int bpf_convert_filter(struct sock_filter *prog, int len, - struct bpf_insn *new_prog, int *new_len) +static int bpf_convert_filter(struct sock_filter *prog, int len, + struct bpf_insn *new_prog, int *new_len) { int new_flen = 0, pass = 0, target, i; struct bpf_insn *new_insn; @@ -371,7 +372,8 @@ int bpf_convert_filter(struct sock_filter *prog, int len, return -EINVAL; if (new_prog) { - addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL); + addrs = kcalloc(len, sizeof(*addrs), + GFP_KERNEL | __GFP_NOWARN); if (!addrs) return -ENOMEM; } @@ -751,7 +753,8 @@ static bool chk_code_allowed(u16 code_to_probe) * * Returns 0 if the rule set is legal or -EINVAL if not. */ -int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) +static int bpf_check_classic(const struct sock_filter *filter, + unsigned int flen) { bool anc_found; int pc; @@ -825,7 +828,6 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) return -EINVAL; } -EXPORT_SYMBOL(bpf_check_classic); static int bpf_prog_store_orig_filter(struct bpf_prog *fp, const struct sock_fprog *fprog) @@ -839,7 +841,9 @@ static int bpf_prog_store_orig_filter(struct bpf_prog *fp, fkprog = fp->orig_prog; fkprog->len = fprog->len; - fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL); + + fkprog->filter = kmemdup(fp->insns, fsize, + GFP_KERNEL | __GFP_NOWARN); if (!fkprog->filter) { kfree(fp->orig_prog); return -ENOMEM; @@ -941,7 +945,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) * pass. At this time, the user BPF is stored in fp->insns. */ old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!old_prog) { err = -ENOMEM; goto out_err; @@ -988,7 +992,8 @@ out_err: return ERR_PTR(err); } -static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) +static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, + bpf_aux_classic_check_t trans) { int err; @@ -1001,6 +1006,17 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) return ERR_PTR(err); } + /* There might be additional checks and transformations + * needed on classic filters, f.e. in case of seccomp. + */ + if (trans) { + err = trans(fp->insns, fp->len); + if (err) { + __bpf_prog_release(fp); + return ERR_PTR(err); + } + } + /* Probe if we can JIT compile the filter and if so, do * the compilation of the filter. */ @@ -1050,7 +1066,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - fp = bpf_prepare_filter(fp); + fp = bpf_prepare_filter(fp, NULL); if (IS_ERR(fp)) return PTR_ERR(fp); @@ -1059,6 +1075,53 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) } EXPORT_SYMBOL_GPL(bpf_prog_create); +/** + * bpf_prog_create_from_user - create an unattached filter from user buffer + * @pfp: the unattached filter that is created + * @fprog: the filter program + * @trans: post-classic verifier transformation handler + * + * This function effectively does the same as bpf_prog_create(), only + * that it builds up its insns buffer from user space provided buffer. + * It also allows for passing a bpf_aux_classic_check_t handler. + */ +int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, + bpf_aux_classic_check_t trans) +{ + unsigned int fsize = bpf_classic_proglen(fprog); + struct bpf_prog *fp; + + /* Make sure new filter is there and in the right amounts. */ + if (fprog->filter == NULL) + return -EINVAL; + + fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); + if (!fp) + return -ENOMEM; + + if (copy_from_user(fp->insns, fprog->filter, fsize)) { + __bpf_prog_free(fp); + return -EFAULT; + } + + fp->len = fprog->len; + /* Since unattached filters are not copied back to user + * space through sk_get_filter(), we do not need to hold + * a copy here, and can spare us the work. + */ + fp->orig_prog = NULL; + + /* bpf_prepare_filter() already takes care of freeing + * memory in case something goes wrong. + */ + fp = bpf_prepare_filter(fp, trans); + if (IS_ERR(fp)) + return PTR_ERR(fp); + + *pfp = fp; + return 0; +} + void bpf_prog_destroy(struct bpf_prog *fp) { __bpf_prog_release(fp); @@ -1135,7 +1198,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - prog = bpf_prepare_filter(prog); + prog = bpf_prepare_filter(prog, NULL); if (IS_ERR(prog)) return PTR_ERR(prog); @@ -1358,6 +1421,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; default: return NULL; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2c35c02a931e..1f2d89300b1a 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1,3 +1,4 @@ +#include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/export.h> #include <linux/ip.h> @@ -12,19 +13,57 @@ #include <linux/if_tunnel.h> #include <linux/if_pppox.h> #include <linux/ppp_defs.h> -#include <net/flow_keys.h> +#include <linux/stddef.h> +#include <linux/if_ether.h> +#include <net/flow_dissector.h> #include <scsi/fc/fc_fcoe.h> -/* copy saddr & daddr, possibly using 64bit load/store - * Equivalent to : flow->src = iph->saddr; - * flow->dst = iph->daddr; - */ -static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) +static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector, + enum flow_dissector_key_id key_id) +{ + return flow_dissector->used_keys & (1 << key_id); +} + +static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector, + enum flow_dissector_key_id key_id) +{ + flow_dissector->used_keys |= (1 << key_id); +} + +static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector, + enum flow_dissector_key_id key_id, + void *target_container) +{ + return ((char *) target_container) + flow_dissector->offset[key_id]; +} + +void skb_flow_dissector_init(struct flow_dissector *flow_dissector, + const struct flow_dissector_key *key, + unsigned int key_count) { - BUILD_BUG_ON(offsetof(typeof(*flow), dst) != - offsetof(typeof(*flow), src) + sizeof(flow->src)); - memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); + unsigned int i; + + memset(flow_dissector, 0, sizeof(*flow_dissector)); + + for (i = 0; i < key_count; i++, key++) { + /* User should make sure that every key target offset is withing + * boundaries of unsigned short. + */ + BUG_ON(key->offset > USHRT_MAX); + BUG_ON(skb_flow_dissector_uses_key(flow_dissector, + key->key_id)); + + skb_flow_dissector_set_key(flow_dissector, key->key_id); + flow_dissector->offset[key->key_id] = key->offset; + } + + /* Ensure that the dissector always includes basic key. That way + * we are able to avoid handling lack of it in fast path. + */ + BUG_ON(!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_BASIC)); } +EXPORT_SYMBOL(skb_flow_dissector_init); /** * __skb_flow_get_ports - extract the upper layer ports and return them @@ -63,17 +102,27 @@ EXPORT_SYMBOL(__skb_flow_get_ports); /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified + * @flow_dissector: list of keys to dissect + * @target_container: target structure to put dissected values into * @data: raw buffer pointer to the packet, if NULL use skb->data * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) * @hlen: packet header length, if @data is NULL use skb_headlen(skb) * - * The function will try to retrieve the struct flow_keys from either the skbuff - * or a raw buffer specified by the rest parameters + * The function will try to retrieve individual keys into target specified + * by flow_dissector from either the skbuff or a raw buffer specified by the + * rest parameters. + * + * Caller must take care of zeroing target container memory. */ -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, +bool __skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, __be16 proto, int nhoff, int hlen) { + struct flow_dissector_key_basic *key_basic; + struct flow_dissector_key_addrs *key_addrs; + struct flow_dissector_key_ports *key_ports; u8 ip_proto; if (!data) { @@ -83,7 +132,23 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, hlen = skb_headlen(skb); } - memset(flow, 0, sizeof(*flow)); + /* It is ensured by skb_flow_dissector_init() that basic key will + * be always present. + */ + key_basic = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_BASIC, + target_container); + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct ethhdr *eth = eth_hdr(skb); + struct flow_dissector_key_eth_addrs *key_eth_addrs; + + key_eth_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + target_container); + memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); + } again: switch (proto) { @@ -100,14 +165,13 @@ ip: if (ip_is_fragment(iph)) ip_proto = 0; - /* skip the address processing if skb is NULL. The assumption - * here is that if there is no skb we are not looking for flow - * info but lengths and protocols. - */ - if (!skb) + if (!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS)) break; - - iph_to_flow_copy_addrs(flow, iph); + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + target_container); + memcpy(key_addrs, &iph->saddr, sizeof(*key_addrs)); break; } case htons(ETH_P_IPV6): { @@ -123,23 +187,47 @@ ipv6: ip_proto = iph->nexthdr; nhoff += sizeof(struct ipv6hdr); - /* see comment above in IPv4 section */ - if (!skb) - break; + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) { + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + target_container); + + key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr); + key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + goto flow_label; + } + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { + struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs; - flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); - flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + key_ipv6_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + target_container); + memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs)); + goto flow_label; + } + break; +flow_label: flow_label = ip6_flowlabel(iph); if (flow_label) { /* Awesome, IPv6 packet has a flow label so we can * use that to represent the ports without any * further dissection. */ - flow->n_proto = proto; - flow->ip_proto = ip_proto; - flow->ports = flow_label; - flow->thoff = (u16)nhoff; + + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; + key_basic->thoff = (u16)nhoff; + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS)) { + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); + key_ports->ports = flow_label; + } return true; } @@ -186,14 +274,21 @@ ipv6: hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return false; - flow->src = hdr->srcnode; - flow->dst = 0; - flow->n_proto = proto; - flow->thoff = (u16)nhoff; + key_basic->n_proto = proto; + key_basic->thoff = (u16)nhoff; + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) { + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + target_container); + key_addrs->src = hdr->srcnode; + key_addrs->dst = 0; + } return true; } case htons(ETH_P_FCOE): - flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); + key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN); /* fall through */ default: return false; @@ -248,14 +343,24 @@ ipv6: break; } - flow->n_proto = proto; - flow->ip_proto = ip_proto; - flow->thoff = (u16) nhoff; - - /* unless skb is set we don't need to record port info */ - if (skb) - flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, - data, hlen); + /* It is ensured by skb_flow_dissector_init() that basic key will + * be always present. + */ + key_basic = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_BASIC, + target_container); + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; + key_basic->thoff = (u16) nhoff; + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS)) { + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); + key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, + data, hlen); + } return true; } @@ -267,27 +372,27 @@ static __always_inline void __flow_hash_secret_init(void) net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) +static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval) { - __flow_hash_secret_init(); - return jhash_3words(a, b, c, hashrnd); + return jhash_3words(a, b, c, keyval); } -static inline u32 __flow_hash_from_keys(struct flow_keys *keys) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) { u32 hash; /* get a consistent hash (same value on both flow directions) */ - if (((__force u32)keys->dst < (__force u32)keys->src) || - (((__force u32)keys->dst == (__force u32)keys->src) && - ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) { - swap(keys->dst, keys->src); - swap(keys->port16[0], keys->port16[1]); + if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) || + (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) && + ((__force u16)keys->ports.dst < (__force u16)keys->ports.src))) { + swap(keys->addrs.dst, keys->addrs.src); + swap(keys->ports.src, keys->ports.dst); } - hash = __flow_hash_3words((__force u32)keys->dst, - (__force u32)keys->src, - (__force u32)keys->ports); + hash = __flow_hash_3words((__force u32)keys->addrs.dst, + (__force u32)keys->addrs.src, + (__force u32)keys->ports.ports, + keyval); if (!hash) hash = 1; @@ -296,12 +401,52 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys) u32 flow_hash_from_keys(struct flow_keys *keys) { - return __flow_hash_from_keys(keys); + __flow_hash_secret_init(); + return __flow_hash_from_keys(keys, hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys); -/* - * __skb_get_hash: calculate a flow hash based on src/dst addresses +static inline u32 ___skb_get_hash(const struct sk_buff *skb, + struct flow_keys *keys, u32 keyval) +{ + if (!skb_flow_dissect_flow_keys(skb, keys)) + return 0; + + return __flow_hash_from_keys(keys, keyval); +} + +struct _flow_keys_digest_data { + __be16 n_proto; + u8 ip_proto; + u8 padding; + __be32 ports; + __be32 src; + __be32 dst; +}; + +void make_flow_keys_digest(struct flow_keys_digest *digest, + const struct flow_keys *flow) +{ + struct _flow_keys_digest_data *data = + (struct _flow_keys_digest_data *)digest; + + BUILD_BUG_ON(sizeof(*data) > sizeof(*digest)); + + memset(digest, 0, sizeof(*digest)); + + data->n_proto = flow->basic.n_proto; + data->ip_proto = flow->basic.ip_proto; + data->ports = flow->ports.ports; + data->src = flow->addrs.src; + data->dst = flow->addrs.dst; +} +EXPORT_SYMBOL(make_flow_keys_digest); + +/** + * __skb_get_hash: calculate a flow hash + * @skb: sk_buff to calculate flow hash from + * + * This function calculates a flow hash based on src/dst addresses * and src/dst port numbers. Sets hash in skb to non-zero hash value * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports. @@ -309,53 +454,34 @@ EXPORT_SYMBOL(flow_hash_from_keys); void __skb_get_hash(struct sk_buff *skb) { struct flow_keys keys; + u32 hash; - if (!skb_flow_dissect(skb, &keys)) - return; + __flow_hash_secret_init(); - if (keys.ports) + hash = ___skb_get_hash(skb, &keys, hashrnd); + if (!hash) + return; + if (keys.ports.ports) skb->l4_hash = 1; - skb->sw_hash = 1; - - skb->hash = __flow_hash_from_keys(&keys); + skb->hash = hash; } EXPORT_SYMBOL(__skb_get_hash); -/* - * Returns a Tx hash based on the given packet descriptor a Tx queues' number - * to be used as a distribution range. - */ -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues) +__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) { - u32 hash; - u16 qoffset = 0; - u16 qcount = num_tx_queues; - - if (skb_rx_queue_recorded(skb)) { - hash = skb_get_rx_queue(skb); - while (unlikely(hash >= num_tx_queues)) - hash -= num_tx_queues; - return hash; - } - - if (dev->num_tc) { - u8 tc = netdev_get_prio_tc_map(dev, skb->priority); - qoffset = dev->tc_to_txq[tc].offset; - qcount = dev->tc_to_txq[tc].count; - } + struct flow_keys keys; - return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; + return ___skb_get_hash(skb, &keys, perturb); } -EXPORT_SYMBOL(__skb_tx_hash); +EXPORT_SYMBOL(skb_get_hash_perturb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { - u32 poff = keys->thoff; + u32 poff = keys->basic.thoff; - switch (keys->ip_proto) { + switch (keys->basic.ip_proto) { case IPPROTO_TCP: { /* access doff as u8 to avoid unaligned access */ const u8 *doff; @@ -396,8 +522,12 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data, return poff; } -/* skb_get_poff() returns the offset to the payload as far as it could - * be dissected. The main user is currently BPF, so that we can dynamically +/** + * skb_get_poff - get the offset to the payload + * @skb: sk_buff to get the payload offset from + * + * The function will get the offset to the payload as far as it could + * be dissected. The main user is currently BPF, so that we can dynamically * truncate packets without needing to push actual payload to the user * space and can analyze headers only, instead. */ @@ -405,86 +535,52 @@ u32 skb_get_poff(const struct sk_buff *skb) { struct flow_keys keys; - if (!skb_flow_dissect(skb, &keys)) + if (!skb_flow_dissect_flow_keys(skb, &keys)) return 0; return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); } -static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) -{ -#ifdef CONFIG_XPS - struct xps_dev_maps *dev_maps; - struct xps_map *map; - int queue_index = -1; - - rcu_read_lock(); - dev_maps = rcu_dereference(dev->xps_maps); - if (dev_maps) { - map = rcu_dereference( - dev_maps->cpu_map[skb->sender_cpu - 1]); - if (map) { - if (map->len == 1) - queue_index = map->queues[0]; - else - queue_index = map->queues[reciprocal_scale(skb_get_hash(skb), - map->len)]; - if (unlikely(queue_index >= dev->real_num_tx_queues)) - queue_index = -1; - } - } - rcu_read_unlock(); - - return queue_index; -#else - return -1; -#endif -} - -static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) +static const struct flow_dissector_key flow_keys_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + .offset = offsetof(struct flow_keys, addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, +}; + +static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, +}; + +struct flow_dissector flow_keys_dissector __read_mostly; +EXPORT_SYMBOL(flow_keys_dissector); + +struct flow_dissector flow_keys_buf_dissector __read_mostly; + +static int __init init_default_flow_dissectors(void) { - struct sock *sk = skb->sk; - int queue_index = sk_tx_queue_get(sk); - - if (queue_index < 0 || skb->ooo_okay || - queue_index >= dev->real_num_tx_queues) { - int new_index = get_xps_queue(dev, skb); - if (new_index < 0) - new_index = skb_tx_hash(dev, skb); - - if (queue_index != new_index && sk && - rcu_access_pointer(sk->sk_dst_cache)) - sk_tx_queue_set(sk, new_index); - - queue_index = new_index; - } - - return queue_index; + skb_flow_dissector_init(&flow_keys_dissector, + flow_keys_dissector_keys, + ARRAY_SIZE(flow_keys_dissector_keys)); + skb_flow_dissector_init(&flow_keys_buf_dissector, + flow_keys_buf_dissector_keys, + ARRAY_SIZE(flow_keys_buf_dissector_keys)); + return 0; } -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb, - void *accel_priv) -{ - int queue_index = 0; - -#ifdef CONFIG_XPS - if (skb->sender_cpu == 0) - skb->sender_cpu = raw_smp_processor_id() + 1; -#endif - - if (dev->real_num_tx_queues != 1) { - const struct net_device_ops *ops = dev->netdev_ops; - if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb, accel_priv, - __netdev_pick_tx); - else - queue_index = __netdev_pick_tx(dev, skb); - - if (!accel_priv) - queue_index = netdev_cap_txqueue(dev, queue_index); - } - - skb_set_queue_mapping(skb, queue_index); - return netdev_get_tx_queue(dev, queue_index); -} +late_initcall_sync(init_default_flow_dissectors); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3de654256028..3a74df750af4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -913,6 +913,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_PROBE; neigh->updated = jiffies; atomic_set(&neigh->probes, 0); + notify = 1; next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); } } else { @@ -1144,6 +1145,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (new != old) { neigh_del_timer(neigh); + if (new & NUD_PROBE) + atomic_set(&neigh->probes, 0); if (new & NUD_IN_TIMER) neigh_add_timer(neigh, (jiffies + ((new & NUD_REACHABLE) ? diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4238d6da5c60..18b34d771ed4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -458,11 +458,15 @@ static ssize_t phys_switch_id_show(struct device *dev, return restart_syscall(); if (dev_isalive(netdev)) { - struct netdev_phys_item_id ppid; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; - ret = netdev_switch_parent_id_get(netdev, &ppid); + ret = switchdev_port_attr_get(netdev, &attr); if (!ret) - ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id); + ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len, + attr.u.ppid.id); } rtnl_unlock(); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 572af0011997..2c2eb1b629b1 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -147,24 +147,17 @@ static void ops_free_list(const struct pernet_operations *ops, } } -static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, - int id); +/* should be called with nsid_lock held */ static int alloc_netid(struct net *net, struct net *peer, int reqid) { - int min = 0, max = 0, id; - - ASSERT_RTNL(); + int min = 0, max = 0; if (reqid >= 0) { min = reqid; max = reqid + 1; } - id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); - if (id >= 0) - rtnl_net_notifyid(net, peer, RTM_NEWNSID, id); - - return id; + return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC); } /* This function is used by idr_for_each(). If net is equal to peer, the @@ -180,11 +173,16 @@ static int net_eq_idr(int id, void *net, void *peer) return 0; } -static int __peernet2id(struct net *net, struct net *peer, bool alloc) +/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc + * is set to true, thus the caller knows that the new id must be notified via + * rtnl. + */ +static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) { int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); + bool alloc_it = *alloc; - ASSERT_RTNL(); + *alloc = false; /* Magic value for id 0. */ if (id == NET_ID_ZERO) @@ -192,36 +190,77 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc) if (id > 0) return id; - if (alloc) - return alloc_netid(net, peer, -1); + if (alloc_it) { + id = alloc_netid(net, peer, -1); + *alloc = true; + return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; + } + + return NETNSA_NSID_NOT_ASSIGNED; +} + +/* should be called with nsid_lock held */ +static int __peernet2id(struct net *net, struct net *peer) +{ + bool no = false; - return -ENOENT; + return __peernet2id_alloc(net, peer, &no); } +static void rtnl_net_notifyid(struct net *net, int cmd, int id); /* This function returns the id of a peer netns. If no id is assigned, one will * be allocated and returned. */ +int peernet2id_alloc(struct net *net, struct net *peer) +{ + unsigned long flags; + bool alloc; + int id; + + spin_lock_irqsave(&net->nsid_lock, flags); + alloc = atomic_read(&peer->count) == 0 ? false : true; + id = __peernet2id_alloc(net, peer, &alloc); + spin_unlock_irqrestore(&net->nsid_lock, flags); + if (alloc && id >= 0) + rtnl_net_notifyid(net, RTM_NEWNSID, id); + return id; +} +EXPORT_SYMBOL(peernet2id_alloc); + +/* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) { - bool alloc = atomic_read(&peer->count) == 0 ? false : true; + unsigned long flags; int id; - id = __peernet2id(net, peer, alloc); - return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; + spin_lock_irqsave(&net->nsid_lock, flags); + id = __peernet2id(net, peer); + spin_unlock_irqrestore(&net->nsid_lock, flags); + return id; +} + +/* This function returns true is the peer netns has an id assigned into the + * current netns. + */ +bool peernet_has_id(struct net *net, struct net *peer) +{ + return peernet2id(net, peer) >= 0; } -EXPORT_SYMBOL(peernet2id); struct net *get_net_ns_by_id(struct net *net, int id) { + unsigned long flags; struct net *peer; if (id < 0) return NULL; rcu_read_lock(); + spin_lock_irqsave(&net->nsid_lock, flags); peer = idr_find(&net->netns_ids, id); if (peer) get_net(peer); + spin_unlock_irqrestore(&net->nsid_lock, flags); rcu_read_unlock(); return peer; @@ -242,6 +281,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); + spin_lock_init(&net->nsid_lock); list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); @@ -362,14 +402,19 @@ static void cleanup_net(struct work_struct *work) list_del_rcu(&net->list); list_add_tail(&net->exit_list, &net_exit_list); for_each_net(tmp) { - int id = __peernet2id(tmp, net, false); + int id; - if (id >= 0) { - rtnl_net_notifyid(tmp, net, RTM_DELNSID, id); + spin_lock_irq(&tmp->nsid_lock); + id = __peernet2id(tmp, net); + if (id >= 0) idr_remove(&tmp->netns_ids, id); - } + spin_unlock_irq(&tmp->nsid_lock); + if (id >= 0) + rtnl_net_notifyid(tmp, RTM_DELNSID, id); } + spin_lock_irq(&net->nsid_lock); idr_destroy(&net->netns_ids); + spin_unlock_irq(&net->nsid_lock); } rtnl_unlock(); @@ -497,6 +542,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; + unsigned long flags; struct net *peer; int nsid, err; @@ -517,14 +563,19 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) if (IS_ERR(peer)) return PTR_ERR(peer); - if (__peernet2id(net, peer, false) >= 0) { + spin_lock_irqsave(&net->nsid_lock, flags); + if (__peernet2id(net, peer) >= 0) { + spin_unlock_irqrestore(&net->nsid_lock, flags); err = -EEXIST; goto out; } err = alloc_netid(net, peer, nsid); - if (err > 0) + spin_unlock_irqrestore(&net->nsid_lock, flags); + if (err >= 0) { + rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; + } out: put_net(peer); return err; @@ -538,14 +589,10 @@ static int rtnl_net_get_size(void) } static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, - int cmd, struct net *net, struct net *peer, - int nsid) + int cmd, struct net *net, int nsid) { struct nlmsghdr *nlh; struct rtgenmsg *rth; - int id; - - ASSERT_RTNL(); nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags); if (!nlh) @@ -554,14 +601,7 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, rth = nlmsg_data(nlh); rth->rtgen_family = AF_UNSPEC; - if (nsid >= 0) { - id = nsid; - } else { - id = __peernet2id(net, peer, false); - if (id < 0) - id = NETNSA_NSID_NOT_ASSIGNED; - } - if (nla_put_s32(skb, NETNSA_NSID, id)) + if (nla_put_s32(skb, NETNSA_NSID, nsid)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -578,7 +618,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *tb[NETNSA_MAX + 1]; struct sk_buff *msg; struct net *peer; - int err; + int err, id; err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy); @@ -600,8 +640,9 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) goto out; } + id = peernet2id(net, peer); err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, - RTM_NEWNSID, net, peer, -1); + RTM_NEWNSID, net, id); if (err < 0) goto err_out; @@ -633,7 +674,7 @@ static int rtnl_net_dumpid_one(int id, void *peer, void *data) ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid, net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWNSID, net_cb->net, peer, id); + RTM_NEWNSID, net_cb->net, id); if (ret < 0) return ret; @@ -652,17 +693,17 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) .idx = 0, .s_idx = cb->args[0], }; + unsigned long flags; - ASSERT_RTNL(); - + spin_lock_irqsave(&net->nsid_lock, flags); idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); + spin_unlock_irqrestore(&net->nsid_lock, flags); cb->args[0] = net_cb.idx; return skb->len; } -static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, - int id) +static void rtnl_net_notifyid(struct net *net, int cmd, int id) { struct sk_buff *msg; int err = -ENOMEM; @@ -671,7 +712,7 @@ static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, if (!msg) goto out; - err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id); + err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id); if (err < 0) goto err_out; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 508155b283dd..676550f34560 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -177,7 +177,7 @@ #include <asm/dma.h> #include <asm/div64.h> /* do_div */ -#define VERSION "2.74" +#define VERSION "2.75" #define IP_NAME_SZ 32 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ #define MPLS_STACK_BOTTOM htonl(0x00000100) @@ -210,6 +210,10 @@ #define T_REMDEVALL (1<<2) /* Remove all devs */ #define T_REMDEV (1<<3) /* Remove one dev */ +/* Xmit modes */ +#define M_START_XMIT 0 /* Default normal TX */ +#define M_NETIF_RECEIVE 1 /* Inject packets into stack */ + /* If lock -- protects updating of if_list */ #define if_lock(t) spin_lock(&(t->if_lock)); #define if_unlock(t) spin_unlock(&(t->if_lock)); @@ -251,13 +255,14 @@ struct pktgen_dev { * we will do a random selection from within the range. */ __u32 flags; - int removal_mark; /* non-zero => the device is marked for - * removal by worker thread */ - + int xmit_mode; int min_pkt_size; int max_pkt_size; int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; + int removal_mark; /* non-zero => the device is marked for + * removal by worker thread */ + struct page *page; u64 delay; /* nano-seconds */ @@ -507,7 +512,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, pktgen_reset_all_threads(pn); else - pr_warn("Unknown command: %s\n", data); + return -EINVAL; return count; } @@ -567,7 +572,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) " dst_min: %s dst_max: %s\n", pkt_dev->dst_min, pkt_dev->dst_max); seq_printf(seq, - " src_min: %s src_max: %s\n", + " src_min: %s src_max: %s\n", pkt_dev->src_min, pkt_dev->src_max); } @@ -620,6 +625,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->node >= 0) seq_printf(seq, " node: %d\n", pkt_dev->node); + if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) + seq_puts(seq, " xmit_mode: netif_receive\n"); + seq_puts(seq, " Flags: "); if (pkt_dev->flags & F_IPV6) @@ -1081,7 +1089,8 @@ static ssize_t pktgen_if_write(struct file *file, if (len < 0) return len; if ((value > 0) && - (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) || + !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) return -ENOTSUPP; i += len; pkt_dev->clone_skb = value; @@ -1134,7 +1143,7 @@ static ssize_t pktgen_if_write(struct file *file, return len; i += len; - if ((value > 1) && + if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) && (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) return -ENOTSUPP; pkt_dev->burst = value < 1 ? 1 : value; @@ -1160,6 +1169,45 @@ static ssize_t pktgen_if_write(struct file *file, sprintf(pg_result, "ERROR: node not possible"); return count; } + if (!strcmp(name, "xmit_mode")) { + char f[32]; + + memset(f, 0, 32); + len = strn_len(&user_buffer[i], sizeof(f) - 1); + if (len < 0) + return len; + + if (copy_from_user(f, &user_buffer[i], len)) + return -EFAULT; + i += len; + + if (strcmp(f, "start_xmit") == 0) { + pkt_dev->xmit_mode = M_START_XMIT; + } else if (strcmp(f, "netif_receive") == 0) { + /* clone_skb set earlier, not supported in this mode */ + if (pkt_dev->clone_skb > 0) + return -ENOTSUPP; + + pkt_dev->xmit_mode = M_NETIF_RECEIVE; + + /* make sure new packet is allocated every time + * pktgen_xmit() is called + */ + pkt_dev->last_ok = 1; + + /* override clone_skb if user passed default value + * at module loading time + */ + pkt_dev->clone_skb = 0; + } else { + sprintf(pg_result, + "xmit_mode -:%s:- unknown\nAvailable modes: %s", + f, "start_xmit, netif_receive\n"); + return count; + } + sprintf(pg_result, "OK: xmit_mode=%s", f); + return count; + } if (!strcmp(name, "flag")) { char f[32]; memset(f, 0, 32); @@ -1267,6 +1315,9 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "NO_TIMESTAMP") == 0) pkt_dev->flags |= F_NO_TIMESTAMP; + else if (strcmp(f, "!NO_TIMESTAMP") == 0) + pkt_dev->flags &= ~F_NO_TIMESTAMP; + else { sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", @@ -3317,6 +3368,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) unsigned int burst = ACCESS_ONCE(pkt_dev->burst); struct net_device *odev = pkt_dev->odev; struct netdev_queue *txq; + struct sk_buff *skb; int ret; /* If device is offline, then don't send */ @@ -3354,6 +3406,37 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) if (pkt_dev->delay && pkt_dev->last_ok) spin(pkt_dev, pkt_dev->next_tx); + if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) { + skb = pkt_dev->skb; + skb->protocol = eth_type_trans(skb, skb->dev); + atomic_add(burst, &skb->users); + local_bh_disable(); + do { + ret = netif_receive_skb(skb); + if (ret == NET_RX_DROP) + pkt_dev->errors++; + pkt_dev->sofar++; + pkt_dev->seq_num++; + if (atomic_read(&skb->users) != burst) { + /* skb was queued by rps/rfs or taps, + * so cannot reuse this skb + */ + atomic_sub(burst - 1, &skb->users); + /* get out of the loop and wait + * until skb is consumed + */ + break; + } + /* skb was 'freed' by stack, so clean few + * bits and reuse it + */ +#ifdef CONFIG_NET_CLS_ACT + skb->tc_verd = 0; /* reset reclass/redir ttl */ +#endif + } while (--burst > 0); + goto out; /* Skips xmit_mode M_START_XMIT */ + } + txq = skb_get_tx_queue(odev, pkt_dev->skb); local_bh_disable(); @@ -3401,6 +3484,7 @@ xmit_more: unlock: HARD_TX_UNLOCK(odev, txq); +out: local_bh_enable(); /* If pkt_dev->count is zero, then run forever */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8de36824018d..077b6d280371 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1004,16 +1004,20 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; - struct netdev_phys_item_id psid; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; - err = netdev_switch_parent_id_get(dev, &psid); + err = switchdev_port_attr_get(dev, &attr); if (err) { if (err == -EOPNOTSUPP) return 0; return err; } - if (nla_put(skb, IFLA_PHYS_SWITCH_ID, psid.id_len, psid.id)) + if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len, + attr.u.ppid.id)) return -EMSGSIZE; return 0; @@ -1204,7 +1208,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id(dev_net(dev), link_net); + int id = peernet2id_alloc(dev_net(dev), link_net); if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) goto nla_put_failure; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3cfff2a3d651..f3fe9bd9e672 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -347,94 +347,18 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) } EXPORT_SYMBOL(build_skb); -struct netdev_alloc_cache { - struct page_frag frag; - /* we maintain a pagecount bias, so that we dont dirty cache line - * containing page->_count every time we allocate a fragment. - */ - unsigned int pagecnt_bias; -}; -static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache); - -static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, - gfp_t gfp_mask) -{ - const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER; - struct page *page = NULL; - gfp_t gfp = gfp_mask; - - if (order) { - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | - __GFP_NOMEMALLOC; - page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); - nc->frag.size = PAGE_SIZE << (page ? order : 0); - } - - if (unlikely(!page)) - page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); - - nc->frag.page = page; - - return page; -} - -static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache, - unsigned int fragsz, gfp_t gfp_mask) -{ - struct netdev_alloc_cache *nc = this_cpu_ptr(cache); - struct page *page = nc->frag.page; - unsigned int size; - int offset; - - if (unlikely(!page)) { -refill: - page = __page_frag_refill(nc, gfp_mask); - if (!page) - return NULL; - - /* if size can vary use frag.size else just use PAGE_SIZE */ - size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; - - /* Even if we own the page, we do not use atomic_set(). - * This would break get_page_unless_zero() users. - */ - atomic_add(size - 1, &page->_count); - - /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size; - nc->frag.offset = size; - } - - offset = nc->frag.offset - fragsz; - if (unlikely(offset < 0)) { - if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) - goto refill; - - /* if size can vary use frag.size else just use PAGE_SIZE */ - size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; - - /* OK, page count is 0, we can safely set it */ - atomic_set(&page->_count, size); - - /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size; - offset = size - fragsz; - } - - nc->pagecnt_bias--; - nc->frag.offset = offset; - - return page_address(page) + offset; -} +static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); +static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { + struct page_frag_cache *nc; unsigned long flags; void *data; local_irq_save(flags); - data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask); + nc = this_cpu_ptr(&netdev_alloc_cache); + data = __alloc_page_frag(nc, fragsz, gfp_mask); local_irq_restore(flags); return data; } @@ -454,7 +378,9 @@ EXPORT_SYMBOL(netdev_alloc_frag); static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { - return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask); + struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); + + return __alloc_page_frag(nc, fragsz, gfp_mask); } void *napi_alloc_frag(unsigned int fragsz) @@ -464,76 +390,70 @@ void *napi_alloc_frag(unsigned int fragsz) EXPORT_SYMBOL(napi_alloc_frag); /** - * __alloc_rx_skb - allocate an skbuff for rx + * __netdev_alloc_skb - allocate an skbuff for rx on a specific device + * @dev: network device to receive on * @length: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb - * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for - * allocations in case we have to fallback to __alloc_skb() - * If SKB_ALLOC_NAPI is set, page fragment will be allocated - * from napi_cache instead of netdev_cache. * * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate + * buffer has NET_SKB_PAD headroom built in. Users should allocate * the headroom they think they need without accounting for the * built in space. The built in space is used for optimisations. * * %NULL is returned if there is no free memory. */ -static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask, - int flags) +struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, + gfp_t gfp_mask) { - struct sk_buff *skb = NULL; - unsigned int fragsz = SKB_DATA_ALIGN(length) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + struct page_frag_cache *nc; + unsigned long flags; + struct sk_buff *skb; + bool pfmemalloc; + void *data; - if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { - void *data; + len += NET_SKB_PAD; - if (sk_memalloc_socks()) - gfp_mask |= __GFP_MEMALLOC; + if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || + (gfp_mask & (__GFP_WAIT | GFP_DMA))) { + skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); + if (!skb) + goto skb_fail; + goto skb_success; + } - data = (flags & SKB_ALLOC_NAPI) ? - __napi_alloc_frag(fragsz, gfp_mask) : - __netdev_alloc_frag(fragsz, gfp_mask); + len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + len = SKB_DATA_ALIGN(len); - if (likely(data)) { - skb = build_skb(data, fragsz); - if (unlikely(!skb)) - put_page(virt_to_head_page(data)); - } - } else { - skb = __alloc_skb(length, gfp_mask, - SKB_ALLOC_RX, NUMA_NO_NODE); - } - return skb; -} + if (sk_memalloc_socks()) + gfp_mask |= __GFP_MEMALLOC; -/** - * __netdev_alloc_skb - allocate an skbuff for rx on a specific device - * @dev: network device to receive on - * @length: length to allocate - * @gfp_mask: get_free_pages mask, passed to alloc_skb - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has NET_SKB_PAD headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. - */ -struct sk_buff *__netdev_alloc_skb(struct net_device *dev, - unsigned int length, gfp_t gfp_mask) -{ - struct sk_buff *skb; + local_irq_save(flags); + + nc = this_cpu_ptr(&netdev_alloc_cache); + data = __alloc_page_frag(nc, len, gfp_mask); + pfmemalloc = nc->pfmemalloc; - length += NET_SKB_PAD; - skb = __alloc_rx_skb(length, gfp_mask, 0); + local_irq_restore(flags); + + if (unlikely(!data)) + return NULL; - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD); - skb->dev = dev; + skb = __build_skb(data, len); + if (unlikely(!skb)) { + skb_free_frag(data); + return NULL; } + /* use OR instead of assignment to avoid clearing of bits in mask */ + if (pfmemalloc) + skb->pfmemalloc = 1; + skb->head_frag = 1; + +skb_success: + skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + +skb_fail: return skb; } EXPORT_SYMBOL(__netdev_alloc_skb); @@ -551,19 +471,49 @@ EXPORT_SYMBOL(__netdev_alloc_skb); * * %NULL is returned if there is no free memory. */ -struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, - unsigned int length, gfp_t gfp_mask) +struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, + gfp_t gfp_mask) { + struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; + void *data; - length += NET_SKB_PAD + NET_IP_ALIGN; - skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI); + len += NET_SKB_PAD + NET_IP_ALIGN; - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - skb->dev = napi->dev; + if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || + (gfp_mask & (__GFP_WAIT | GFP_DMA))) { + skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); + if (!skb) + goto skb_fail; + goto skb_success; } + len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + len = SKB_DATA_ALIGN(len); + + if (sk_memalloc_socks()) + gfp_mask |= __GFP_MEMALLOC; + + data = __alloc_page_frag(nc, len, gfp_mask); + if (unlikely(!data)) + return NULL; + + skb = __build_skb(data, len); + if (unlikely(!skb)) { + skb_free_frag(data); + return NULL; + } + + /* use OR instead of assignment to avoid clearing of bits in mask */ + if (nc->pfmemalloc) + skb->pfmemalloc = 1; + skb->head_frag = 1; + +skb_success: + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); + skb->dev = napi->dev; + +skb_fail: return skb; } EXPORT_SYMBOL(__napi_alloc_skb); @@ -611,10 +561,12 @@ static void skb_clone_fraglist(struct sk_buff *skb) static void skb_free_head(struct sk_buff *skb) { + unsigned char *head = skb->head; + if (skb->head_frag) - put_page(virt_to_head_page(skb->head)); + skb_free_frag(head); else - kfree(skb->head); + kfree(head); } static void skb_release_data(struct sk_buff *skb) @@ -4030,6 +3982,93 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate) } EXPORT_SYMBOL(skb_checksum_setup); +/** + * skb_checksum_maybe_trim - maybe trims the given skb + * @skb: the skb to check + * @transport_len: the data length beyond the network header + * + * Checks whether the given skb has data beyond the given transport length. + * If so, returns a cloned skb trimmed to this transport length. + * Otherwise returns the provided skb. Returns NULL in error cases + * (e.g. transport_len exceeds skb length or out-of-memory). + * + * Caller needs to set the skb transport header and release the returned skb. + * Provided skb is consumed. + */ +static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, + unsigned int transport_len) +{ + struct sk_buff *skb_chk; + unsigned int len = skb_transport_offset(skb) + transport_len; + int ret; + + if (skb->len < len) { + kfree_skb(skb); + return NULL; + } else if (skb->len == len) { + return skb; + } + + skb_chk = skb_clone(skb, GFP_ATOMIC); + kfree_skb(skb); + + if (!skb_chk) + return NULL; + + ret = pskb_trim_rcsum(skb_chk, len); + if (ret) { + kfree_skb(skb_chk); + return NULL; + } + + return skb_chk; +} + +/** + * skb_checksum_trimmed - validate checksum of an skb + * @skb: the skb to check + * @transport_len: the data length beyond the network header + * @skb_chkf: checksum function to use + * + * Applies the given checksum function skb_chkf to the provided skb. + * Returns a checked and maybe trimmed skb. Returns NULL on error. + * + * If the skb has data beyond the given transport length, then a + * trimmed & cloned skb is checked and returned. + * + * Caller needs to set the skb transport header and release the returned skb. + * Provided skb is consumed. + */ +struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, + unsigned int transport_len, + __sum16(*skb_chkf)(struct sk_buff *skb)) +{ + struct sk_buff *skb_chk; + unsigned int offset = skb_transport_offset(skb); + __sum16 ret; + + skb_chk = skb_checksum_maybe_trim(skb, transport_len); + if (!skb_chk) + return NULL; + + if (!pskb_may_pull(skb_chk, offset)) { + kfree_skb(skb_chk); + return NULL; + } + + __skb_pull(skb_chk, offset); + ret = skb_chkf(skb_chk); + __skb_push(skb_chk, offset); + + if (ret) { + kfree_skb(skb_chk); + return NULL; + } + + return skb_chk; +} +EXPORT_SYMBOL(skb_checksum_trimmed); + void __skb_warn_lro_forwarding(const struct sk_buff *skb) { net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", diff --git a/net/core/sock.c b/net/core/sock.c index 292f42228bfb..29124fcdc42a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1396,9 +1396,10 @@ EXPORT_SYMBOL_GPL(sock_update_netprioidx); * @family: protocol family * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * @prot: struct proto associated with this new sock instance + * @kern: is this to be a kernel socket? */ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot) + struct proto *prot, int kern) { struct sock *sk; @@ -1411,7 +1412,10 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); - sock_net_set(sk, get_net(net)); + sk->sk_net_refcnt = kern ? 0 : 1; + if (likely(sk->sk_net_refcnt)) + get_net(net); + sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); @@ -1445,7 +1449,8 @@ static void __sk_free(struct sock *sk) if (sk->sk_peer_cred) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); - put_net(sock_net(sk)); + if (likely(sk->sk_net_refcnt)) + put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } @@ -1461,25 +1466,6 @@ void sk_free(struct sock *sk) } EXPORT_SYMBOL(sk_free); -/* - * Last sock_put should drop reference to sk->sk_net. It has already - * been dropped in sk_change_net. Taking reference to stopping namespace - * is not an option. - * Take reference to a socket to remove it from hash _alive_ and after that - * destroy it in the context of init_net. - */ -void sk_release_kernel(struct sock *sk) -{ - if (sk == NULL || sk->sk_socket == NULL) - return; - - sock_hold(sk); - sock_release(sk->sk_socket); - sock_net_set(sk, get_net(&init_net)); - sock_put(sk); -} -EXPORT_SYMBOL(sk_release_kernel); - static void sk_update_clone(const struct sock *sk, struct sock *newsk) { if (mem_cgroup_sockets_enabled && sk->sk_cgrp) @@ -2083,12 +2069,13 @@ EXPORT_SYMBOL(__sk_mem_schedule); /** * __sk_reclaim - reclaim memory_allocated * @sk: socket + * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) */ -void __sk_mem_reclaim(struct sock *sk) +void __sk_mem_reclaim(struct sock *sk, int amount) { - sk_memory_allocated_sub(sk, - sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT); - sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; + amount >>= SK_MEM_QUANTUM_SHIFT; + sk_memory_allocated_sub(sk, amount); + sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; if (sk_under_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) diff --git a/net/core/stream.c b/net/core/stream.c index 301c05f26060..d70f77a0c889 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -119,6 +119,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) int err = 0; long vm_wait = 0; long current_timeo = *timeo_p; + bool noblock = (*timeo_p ? false : true); DEFINE_WAIT(wait); if (sk_stream_memory_free(sk)) @@ -131,8 +132,11 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; - if (!*timeo_p) + if (!*timeo_p) { + if (noblock) + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); goto do_nonblock; + } if (signal_pending(current)) goto do_interrupted; clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 754484b3cd0e..675cf94e04f8 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -468,10 +468,10 @@ static struct proto dn_proto = { .obj_size = sizeof(struct dn_sock), }; -static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) +static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern) { struct dn_scp *scp; - struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto); + struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern); if (!sk) goto out; @@ -693,7 +693,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol, } - if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL) + if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL) return -ENOBUFS; sk->sk_protocol = protocol; @@ -1096,7 +1096,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation); + newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 827cda560a55..04ffad311704 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -345,6 +345,24 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state) return ret; } +static int dsa_slave_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) +{ + int ret = 0; + + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_STP_STATE: + if (attr->trans == SWITCHDEV_TRANS_COMMIT) + ret = dsa_slave_stp_update(dev, attr->u.stp_state); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + static int dsa_slave_bridge_port_join(struct net_device *dev, struct net_device *br) { @@ -382,14 +400,20 @@ static int dsa_slave_bridge_port_leave(struct net_device *dev) return ret; } -static int dsa_slave_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static int dsa_slave_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - psid->id_len = sizeof(ds->index); - memcpy(&psid->id, &ds->index, psid->id_len); + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_PARENT_ID: + attr->u.ppid.id_len = sizeof(ds->index); + memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len); + break; + default: + return -EOPNOTSUPP; + } return 0; } @@ -675,9 +699,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_get_iflink = dsa_slave_get_iflink, }; -static const struct swdev_ops dsa_slave_swdev_ops = { - .swdev_parent_id_get = dsa_slave_parent_id_get, - .swdev_port_stp_update = dsa_slave_stp_update, +static const struct switchdev_ops dsa_slave_switchdev_ops = { + .switchdev_port_attr_get = dsa_slave_port_attr_get, + .switchdev_port_attr_set = dsa_slave_port_attr_set, }; static void dsa_slave_adjust_link(struct net_device *dev) @@ -810,12 +834,19 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, return 0; } +static struct lock_class_key dsa_slave_netdev_xmit_lock_key; +static void dsa_slave_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, + &dsa_slave_netdev_xmit_lock_key); +} + int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); - netif_device_detach(slave_dev); - if (p->phy) { phy_stop(p->phy); p->old_pause = -1; @@ -859,7 +890,10 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; slave_dev->netdev_ops = &dsa_slave_netdev_ops; - slave_dev->swdev_ops = &dsa_slave_swdev_ops; + slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; + + netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, + NULL); SET_NETDEV_DEV(slave_dev, parent); slave_dev->dev.of_node = ds->pd->port_dn[port]; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f3bad41d725f..c3325bd2f3fb 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -58,6 +58,7 @@ #include <net/ipv6.h> #include <net/ip.h> #include <net/dsa.h> +#include <net/flow_dissector.h> #include <linux/uaccess.h> __setup("ether=", netdev_boot_setup); @@ -130,9 +131,9 @@ u32 eth_get_headlen(void *data, unsigned int len) return len; /* parse any remaining L2/L3 headers, check for L4 */ - if (!__skb_flow_dissect(NULL, &keys, data, - eth->h_proto, sizeof(*eth), len)) - return max_t(u32, keys.thoff, sizeof(*eth)); + if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto, + sizeof(*eth), len)) + return max_t(u32, keys.basic.thoff, sizeof(*eth)); /* parse for any L4 headers */ return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); @@ -156,10 +157,11 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb_reset_mac_header(skb); + + eth = (struct ethhdr *)skb->data; skb_pull_inline(skb, ETH_HLEN); - eth = eth_hdr(skb); - if (unlikely(is_multicast_ether_addr(eth->h_dest))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else @@ -178,7 +180,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) if (unlikely(netdev_uses_dsa(dev))) return htons(ETH_P_XDSA); - if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN)) + if (likely(eth_proto_is_802_3(eth->h_proto))) return eth->h_proto; /* diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index b60c65f70346..7aaaf967df58 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -1014,7 +1014,7 @@ static int ieee802154_create(struct net *net, struct socket *sock, } rc = -ENOMEM; - sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); + sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto, kern); if (!sk) goto out; rc = 0; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index bd2901604842..d83071dccd74 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -331,8 +331,8 @@ config NET_FOU_IP_TUNNELS When this option is enabled IP tunnels can be configured to use FOU or GUE encapsulation. -config GENEVE - tristate "Generic Network Virtualization Encapsulation (Geneve)" +config GENEVE_CORE + tristate "Generic Network Virtualization Encapsulation library" depends on INET select NET_UDP_TUNNEL ---help--- diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 518c04ed666e..b36236dd6014 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -56,7 +56,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o -obj-$(CONFIG_GENEVE) += geneve.o +obj-$(CONFIG_GENEVE_CORE) += geneve_core.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o xfrm4_protocol.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8b47a4d79d04..235d36afece3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -317,7 +317,7 @@ lookup_protocol: WARN_ON(!answer_prot->slab); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; @@ -1430,7 +1430,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, struct net *net) { struct socket *sock; - int rc = sock_create_kern(family, type, protocol, &sock); + int rc = sock_create_kern(net, family, type, protocol, &sock); if (rc == 0) { *sk = sock->sk; @@ -1440,8 +1440,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, * we do not wish this socket to see incoming packets. */ (*sk)->sk_prot->unhash(*sk); - - sk_change_net(*sk, net); } return rc; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8d695b6659c7..28ec3c1823bf 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -713,8 +713,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, struct hlist_head *dest; unsigned int new_hash; - hlist_del(&fi->fib_hash); - new_hash = fib_info_hashfn(fi); dest = &new_info_hash[new_hash]; hlist_add_head(&fi->fib_hash, dest); @@ -731,8 +729,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, struct hlist_head *ldest; unsigned int new_hash; - hlist_del(&fi->fib_lhash); - new_hash = fib_laddr_hashfn(fi->fib_prefsrc); ldest = &new_laddrhash[new_hash]; hlist_add_head(&fi->fib_lhash, ldest); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 09b62e17dd8c..5a5d9bdeaeb4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1166,13 +1166,13 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_slen = fa->fa_slen; new_fa->tb_id = tb->tb_id; - err = netdev_switch_fib_ipv4_add(key, plen, fi, - new_fa->fa_tos, - cfg->fc_type, - cfg->fc_nlflags, - tb->tb_id); + err = switchdev_fib_ipv4_add(key, plen, fi, + new_fa->fa_tos, + cfg->fc_type, + cfg->fc_nlflags, + tb->tb_id); if (err) { - netdev_switch_fib_ipv4_abort(fi); + switchdev_fib_ipv4_abort(fi); kmem_cache_free(fn_alias_kmem, new_fa); goto out; } @@ -1216,12 +1216,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->tb_id = tb->tb_id; /* (Optionally) offload fib entry to switch hardware. */ - err = netdev_switch_fib_ipv4_add(key, plen, fi, tos, - cfg->fc_type, - cfg->fc_nlflags, - tb->tb_id); + err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, + cfg->fc_nlflags, tb->tb_id); if (err) { - netdev_switch_fib_ipv4_abort(fi); + switchdev_fib_ipv4_abort(fi); goto out_free_new_fa; } @@ -1240,7 +1238,7 @@ succeeded: return 0; out_sw_fib_del: - netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); + switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1518,8 +1516,8 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if (!fa_to_delete) return -ESRCH; - netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, - cfg->fc_type, tb->tb_id); + switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, + cfg->fc_type, tb->tb_id); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1768,10 +1766,9 @@ void fib_table_flush_external(struct fib_table *tb) if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) continue; - netdev_switch_fib_ipv4_del(n->key, - KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, - fa->fa_type, tb->tb_id); + switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, fa->fa_type, + tb->tb_id); } /* update leaf slen */ @@ -1836,10 +1833,9 @@ int fib_table_flush(struct fib_table *tb) continue; } - netdev_switch_fib_ipv4_del(n->key, - KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, - fa->fa_type, tb->tb_id); + switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, fa->fa_type, + tb->tb_id); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve_core.c index 8986e63f3bda..311a4ba6950a 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve_core.c @@ -60,11 +60,6 @@ struct geneve_net { static int geneve_net_id; -static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) -{ - return (struct genevehdr *)(udp_hdr(skb) + 1); -} - static struct geneve_sock *geneve_find_sock(struct net *net, sa_family_t family, __be16 port) { @@ -435,7 +430,7 @@ static int __init geneve_init_module(void) if (rc) return rc; - pr_info("Geneve driver\n"); + pr_info("Geneve core logic\n"); return 0; } @@ -449,5 +444,4 @@ module_exit(geneve_cleanup_module); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>"); -MODULE_DESCRIPTION("Driver for GENEVE encapsulated traffic"); -MODULE_ALIAS_RTNL_LINK("geneve"); +MODULE_DESCRIPTION("Driver library for GENEVE encapsulated traffic"); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a3a697f5ffba..651cdf648ec4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1339,6 +1339,168 @@ out: } EXPORT_SYMBOL(ip_mc_inc_group); +static int ip_mc_check_iphdr(struct sk_buff *skb) +{ + const struct iphdr *iph; + unsigned int len; + unsigned int offset = skb_network_offset(skb) + sizeof(*iph); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + iph = ip_hdr(skb); + + if (iph->version != 4 || ip_hdrlen(skb) < sizeof(*iph)) + return -EINVAL; + + offset += ip_hdrlen(skb) - sizeof(*iph); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + iph = ip_hdr(skb); + + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + return -EINVAL; + + len = skb_network_offset(skb) + ntohs(iph->tot_len); + if (skb->len < len || len < offset) + return -EINVAL; + + skb_set_transport_header(skb, offset); + + return 0; +} + +static int ip_mc_check_igmp_reportv3(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct igmpv3_report); + + return pskb_may_pull(skb, len) ? 0 : -EINVAL; +} + +static int ip_mc_check_igmp_query(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct igmphdr); + if (skb->len < len) + return -EINVAL; + + /* IGMPv{1,2}? */ + if (skb->len != len) { + /* or IGMPv3? */ + len += sizeof(struct igmpv3_query) - sizeof(struct igmphdr); + if (skb->len < len || !pskb_may_pull(skb, len)) + return -EINVAL; + } + + /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer + * all-systems destination addresses (224.0.0.1) for general queries + */ + if (!igmp_hdr(skb)->group && + ip_hdr(skb)->daddr != htonl(INADDR_ALLHOSTS_GROUP)) + return -EINVAL; + + return 0; +} + +static int ip_mc_check_igmp_msg(struct sk_buff *skb) +{ + switch (igmp_hdr(skb)->type) { + case IGMP_HOST_LEAVE_MESSAGE: + case IGMP_HOST_MEMBERSHIP_REPORT: + case IGMPV2_HOST_MEMBERSHIP_REPORT: + /* fall through */ + return 0; + case IGMPV3_HOST_MEMBERSHIP_REPORT: + return ip_mc_check_igmp_reportv3(skb); + case IGMP_HOST_MEMBERSHIP_QUERY: + return ip_mc_check_igmp_query(skb); + default: + return -ENOMSG; + } +} + +static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb) +{ + return skb_checksum_simple_validate(skb); +} + +static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) + +{ + struct sk_buff *skb_chk; + unsigned int transport_len; + unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr); + int ret; + + transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb); + + skb_get(skb); + skb_chk = skb_checksum_trimmed(skb, transport_len, + ip_mc_validate_checksum); + if (!skb_chk) + return -EINVAL; + + if (!pskb_may_pull(skb_chk, len)) { + kfree_skb(skb_chk); + return -EINVAL; + } + + ret = ip_mc_check_igmp_msg(skb_chk); + if (ret) { + kfree_skb(skb_chk); + return ret; + } + + if (skb_trimmed) + *skb_trimmed = skb_chk; + else + kfree_skb(skb_chk); + + return 0; +} + +/** + * ip_mc_check_igmp - checks whether this is a sane IGMP packet + * @skb: the skb to validate + * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional) + * + * Checks whether an IPv4 packet is a valid IGMP packet. If so sets + * skb network and transport headers accordingly and returns zero. + * + * -EINVAL: A broken packet was detected, i.e. it violates some internet + * standard + * -ENOMSG: IP header validation succeeded but it is not an IGMP packet. + * -ENOMEM: A memory allocation failure happened. + * + * Optionally, an skb pointer might be provided via skb_trimmed (or set it + * to NULL): After parsing an IGMP packet successfully it will point to + * an skb which has its tail aligned to the IP packet end. This might + * either be the originally provided skb or a trimmed, cloned version if + * the skb frame had data beyond the IP packet. A cloned skb allows us + * to leave the original skb and its full frame unchanged (which might be + * desirable for layer 2 frame jugglers). + * + * The caller needs to release a reference count from any returned skb_trimmed. + */ +int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) +{ + int ret = ip_mc_check_iphdr(skb); + + if (ret < 0) + return ret; + + if (ip_hdr(skb)->protocol != IPPROTO_IGMP) + return -ENOMSG; + + return __ip_mc_check_igmp(skb, skb_trimmed); +} +EXPORT_SYMBOL(ip_mc_check_igmp); + /* * Resend IGMP JOIN report; used by netdev notifier. */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8976ca423a07..60021d0d9326 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -99,6 +99,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) struct net *net = sock_net(sk); int smallest_size = -1, smallest_rover; kuid_t uid = sock_i_uid(sk); + int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; local_bh_disable(); if (!snum) { @@ -106,6 +107,14 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) again: inet_get_local_port_range(net, &low, &high); + if (attempt_half) { + int half = low + ((high - low) >> 1); + + if (attempt_half == 1) + high = half; + else + low = half; + } remaining = (high - low) + 1; smallest_rover = rover = prandom_u32() % remaining + low; @@ -127,11 +136,6 @@ again: (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; - if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && - !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { - snum = smallest_rover; - goto tb_found; - } } if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { snum = rover; @@ -159,6 +163,11 @@ again: snum = smallest_rover; goto have_snum; } + if (attempt_half == 1) { + /* OK we now try the upper half of the range */ + attempt_half = 2; + goto again; + } goto fail; } /* OK, here is the one we will use. HEAD is diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index c6fb80bd5826..3766bddb3e8a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -90,10 +90,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - - atomic_inc(&hashinfo->bsockets); - inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &tb->owners); tb->num_owners++; @@ -111,8 +107,6 @@ static void __inet_put_port(struct sock *sk) struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; - atomic_dec(&hashinfo->bsockets); - spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; __sk_del_bind_node(sk); @@ -608,7 +602,6 @@ void inet_hashinfo_init(struct inet_hashinfo *h) { int i; - atomic_set(&h->bsockets, 0); for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 00ec8d5d7e7e..2ffbd16b79e0 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -170,7 +170,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); -void tw_timer_handler(unsigned long data) +static void tw_timer_handler(unsigned long data) { struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cc1da6d9cb35..47fa64ee82b1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -173,6 +173,15 @@ static void ipq_kill(struct ipq *ipq) inet_frag_kill(&ipq->q, &ip4_frags); } +static bool frag_expire_skip_icmp(u32 user) +{ + return user == IP_DEFRAG_AF_PACKET || + ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN, + __IP_DEFRAG_CONNTRACK_IN_END) || + ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN, + __IP_DEFRAG_CONNTRACK_BRIDGE_IN); +} + /* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ @@ -217,10 +226,8 @@ static void ip_expire(unsigned long arg) /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ - if (qp->user == IP_DEFRAG_AF_PACKET || - ((qp->user >= IP_DEFRAG_CONNTRACK_IN) && - (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) && - (skb_rtable(head)->rt_type != RTN_LOCAL))) + if (frag_expire_skip_icmp(qp->user) && + (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out_rcu_unlock; /* Send an ICMP "Fragment Reassembly Timeout" message. */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c65b93a7b711..8d91b922fcfe 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -83,6 +83,9 @@ int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; EXPORT_SYMBOL(sysctl_ip_default_ttl); +static int ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); + /* Generate a checksum for an outgoing IP datagram. */ void ip_send_check(struct iphdr *iph) { @@ -91,7 +94,7 @@ void ip_send_check(struct iphdr *iph) } EXPORT_SYMBOL(ip_send_check); -int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb) +static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); @@ -478,6 +481,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } +static int ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) +{ + struct iphdr *iph = ip_hdr(skb); + unsigned int mtu = ip_skb_dst_mtu(skb); + + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || + (IPCB(skb)->frag_max_size && + IPCB(skb)->frag_max_size > mtu))) { + struct rtable *rt = skb_rtable(skb); + struct net_device *dev = rt->dst.dev; + + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + kfree_skb(skb); + return -EMSGSIZE; + } + + return ip_do_fragment(sk, skb, output); +} + /* * This IP datagram is too large to be sent in one piece. Break it up into * smaller pieces (each of size equal to IP header plus @@ -485,8 +510,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) * single device frame, and queue such a frame for sending. */ -int ip_fragment(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)) +int ip_do_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) { struct iphdr *iph; int ptr; @@ -507,15 +532,6 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb, iph = ip_hdr(skb); mtu = ip_skb_dst_mtu(skb); - if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || - (IPCB(skb)->frag_max_size && - IPCB(skb)->frag_max_size > mtu))) { - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); - kfree_skb(skb); - return -EMSGSIZE; - } /* * Setup starting values. @@ -751,7 +767,7 @@ fail: IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; } -EXPORT_SYMBOL(ip_fragment); +EXPORT_SYMBOL(ip_do_fragment); int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index ce63ab21b6cd..6a51a71a6c67 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -98,7 +98,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) return -ENOMEM; eh = (struct ethhdr *)skb->data; - if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) + if (likely(eth_proto_is_802_3(eh->h_proto))) skb->protocol = eh->h_proto; else skb->protocol = htons(ETH_P_802_2); @@ -165,6 +165,8 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, { int i; + netdev_stats_to_stats64(tot, &dev->stats); + for_each_possible_cpu(i) { const struct pcpu_sw_netstats *tstats = per_cpu_ptr(dev->tstats, i); @@ -185,22 +187,6 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, tot->tx_bytes += tx_bytes; } - tot->multicast = dev->stats.multicast; - - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->rx_errors = dev->stats.rx_errors; - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - tot->collisions = dev->stats.collisions; - return tot; } EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ff96396ebec5..254238daf58b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -251,7 +251,8 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EINVAL; } - p.i_key = p.o_key = p.i_flags = p.o_flags = 0; + p.i_key = p.o_key = 0; + p.i_flags = p.o_flags = 0; if (p.iph.ttl) p.iph.frag_off |= htons(IP_DF); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 771ab3d01ad3..45cb16a6a4a3 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -367,6 +367,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) struct clusterip_config *config; int ret; + if (par->nft_compat) { + pr_err("cannot use CLUSTERIP target from nftables compat\n"); + return -EOPNOTSUPP; + } + if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index e9e67793055f..fe8cc183411e 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -18,7 +18,7 @@ #include <net/netfilter/nf_conntrack_synproxy.h> static struct iphdr * -synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr) +synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct iphdr *iph; @@ -220,7 +220,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet, nth->ack_seq = th->ack_seq; tcp_flag_word(nth) = TCP_FLAG_ACK; nth->doff = tcp_hdr_size / 4; - nth->window = ntohs(htons(th->window) >> opts->wscale); + nth->window = htons(ntohs(th->window) >> opts->wscale); nth->check = 0; nth->urg_ptr = 0; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index e1f3b911dd1e..da5d483e236a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -298,6 +298,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2), SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT), SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE), + SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE), + SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f45f2a12f37b..f6055984c307 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -457,12 +457,9 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, } #define IP_IDENTS_SZ 2048u -struct ip_ident_bucket { - atomic_t id; - u32 stamp32; -}; -static struct ip_ident_bucket *ip_idents __read_mostly; +static atomic_t *ip_idents __read_mostly; +static u32 *ip_tstamps __read_mostly; /* In order to protect privacy, we add a perturbation to identifiers * if one generator is seldom used. This makes hard for an attacker @@ -470,15 +467,16 @@ static struct ip_ident_bucket *ip_idents __read_mostly; */ u32 ip_idents_reserve(u32 hash, int segs) { - struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ; - u32 old = ACCESS_ONCE(bucket->stamp32); + u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ; + atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; + u32 old = ACCESS_ONCE(*p_tstamp); u32 now = (u32)jiffies; u32 delta = 0; - if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) + if (old != now && cmpxchg(p_tstamp, old, now) == old) delta = prandom_u32_max(now - old); - return atomic_add_return(segs + delta, &bucket->id) - segs; + return atomic_add_return(segs + delta, p_id) - segs; } EXPORT_SYMBOL(ip_idents_reserve); @@ -2097,7 +2095,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) goto out; } if (ipv4_is_local_multicast(fl4->daddr) || - ipv4_is_lbcast(fl4->daddr)) { + ipv4_is_lbcast(fl4->daddr) || + fl4->flowi4_proto == IPPROTO_IGMP) { if (!fl4->saddr) fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); @@ -2742,6 +2741,10 @@ int __init ip_rt_init(void) prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); + ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL); + if (!ip_tstamps) + panic("IP: failed to allocate ip_tstamps\n"); + for_each_possible_cpu(cpu) { struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c3852a7ff3c7..841de32f1fee 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -821,6 +821,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { + .procname = "tcp_ecn_fallback", + .data = &init_net.ipv4.sysctl_tcp_ecn_fallback, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "ip_local_port_range", .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), .data = &init_net.ipv4.ip_local_ports.range, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1377f2a0472..90afcec3f427 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -809,16 +809,28 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } EXPORT_SYMBOL(tcp_splice_read); -struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) +struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, + bool force_schedule) { struct sk_buff *skb; /* The TCP header must be at least 32-bit aligned. */ size = ALIGN(size, 4); + if (unlikely(tcp_under_memory_pressure(sk))) + sk_mem_reclaim_partial(sk); + skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); - if (skb) { - if (sk_wmem_schedule(sk, skb->truesize)) { + if (likely(skb)) { + bool mem_scheduled; + + if (force_schedule) { + mem_scheduled = true; + sk_forced_mem_schedule(sk, skb->truesize); + } else { + mem_scheduled = sk_wmem_schedule(sk, skb->truesize); + } + if (likely(mem_scheduled)) { skb_reserve(skb, sk->sk_prot->max_header); /* * Make sure that we have exactly size bytes @@ -908,7 +920,8 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; - skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, + skb_queue_empty(&sk->sk_write_queue)); if (!skb) goto wait_for_memory; @@ -987,6 +1000,9 @@ do_error: if (copied) goto out; out_err: + /* make sure we wake any epoll edge trigger waiter */ + if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) + sk->sk_write_space(sk); return sk_stream_error(sk, flags, err); } @@ -1144,7 +1160,8 @@ new_segment: skb = sk_stream_alloc_skb(sk, select_size(sk, sg), - sk->sk_allocation); + sk->sk_allocation, + skb_queue_empty(&sk->sk_write_queue)); if (!skb) goto wait_for_memory; @@ -1275,6 +1292,9 @@ do_error: goto out; out_err: err = sk_stream_error(sk, flags, err); + /* make sure we wake any epoll edge trigger waiter */ + if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) + sk->sk_write_space(sk); release_sock(sk); return err; } @@ -2483,6 +2503,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, icsk->icsk_syn_retries = val; break; + case TCP_SAVE_SYN: + if (val < 0 || val > 1) + err = -EINVAL; + else + tp->save_syn = val; + break; + case TCP_LINGER2: if (val < 0) tp->linger2 = -1; @@ -2672,6 +2699,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_bytes_acked = tp->bytes_acked; info->tcpi_bytes_received = tp->bytes_received; } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); + info->tcpi_segs_out = tp->segs_out; + info->tcpi_segs_in = tp->segs_in; } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -2821,6 +2850,42 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_NOTSENT_LOWAT: val = tp->notsent_lowat; break; + case TCP_SAVE_SYN: + val = tp->save_syn; + break; + case TCP_SAVED_SYN: { + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + if (tp->saved_syn) { + if (len < tp->saved_syn[0]) { + if (put_user(tp->saved_syn[0], optlen)) { + release_sock(sk); + return -EFAULT; + } + release_sock(sk); + return -EINVAL; + } + len = tp->saved_syn[0]; + if (put_user(len, optlen)) { + release_sock(sk); + return -EFAULT; + } + if (copy_to_user(optval, tp->saved_syn + 1, len)) { + release_sock(sk); + return -EFAULT; + } + tcp_saved_syn_free(tp); + release_sock(sk); + } else { + release_sock(sk); + len = 0; + if (put_user(len, optlen)) + return -EFAULT; + } + return 0; + } default: return -ENOPROTOOPT; } @@ -3025,11 +3090,12 @@ __setup("thash_entries=", set_thash_entries); static void __init tcp_init_mem(void) { - unsigned long limit = nr_free_buffer_pages() / 8; + unsigned long limit = nr_free_buffer_pages() / 16; + limit = max(limit, 128UL); - sysctl_tcp_mem[0] = limit / 4 * 3; - sysctl_tcp_mem[1] = limit; - sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; + sysctl_tcp_mem[0] = limit / 4 * 3; /* 4.68 % */ + sysctl_tcp_mem[1] = limit; /* 6.25 % */ + sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */ } void __init tcp_init(void) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c9ab964189a0..15c4536188a4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -359,7 +359,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) /* Check #1 */ if (tp->rcv_ssthresh < tp->window_clamp && (int)tp->rcv_ssthresh < tcp_space(sk) && - !sk_under_memory_pressure(sk)) { + !tcp_under_memory_pressure(sk)) { int incr; /* Check #2. Increase window, if skb with such overhead @@ -446,7 +446,7 @@ static void tcp_clamp_window(struct sock *sk) if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && - !sk_under_memory_pressure(sk) && + !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), sysctl_tcp_rmem[2]); @@ -1130,7 +1130,12 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, struct tcp_sacktag_state { int reord; int fack_count; - long rtt_us; /* RTT measured by SACKing never-retransmitted data */ + /* Timestamps for earliest and latest never-retransmitted segment + * that was SACKed. RTO needs the earliest RTT to stay conservative, + * but congestion control should still get an accurate delay signal. + */ + struct skb_mstamp first_sackt; + struct skb_mstamp last_sackt; int flag; }; @@ -1233,14 +1238,9 @@ static u8 tcp_sacktag_one(struct sock *sk, state->reord); if (!after(end_seq, tp->high_seq)) state->flag |= FLAG_ORIG_SACK_ACKED; - /* Pick the earliest sequence sacked for RTT */ - if (state->rtt_us < 0) { - struct skb_mstamp now; - - skb_mstamp_get(&now); - state->rtt_us = skb_mstamp_us_delta(&now, - xmit_time); - } + if (state->first_sackt.v64 == 0) + state->first_sackt = *xmit_time; + state->last_sackt = *xmit_time; } if (sacked & TCPCB_LOST) { @@ -1634,7 +1634,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl static int tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, - u32 prior_snd_una, long *sack_rtt_us) + u32 prior_snd_una, struct tcp_sacktag_state *state) { struct tcp_sock *tp = tcp_sk(sk); const unsigned char *ptr = (skb_transport_header(ack_skb) + @@ -1642,7 +1642,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); struct tcp_sack_block sp[TCP_NUM_SACKS]; struct tcp_sack_block *cache; - struct tcp_sacktag_state state; struct sk_buff *skb; int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); int used_sacks; @@ -1650,9 +1649,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, int i, j; int first_sack_index; - state.flag = 0; - state.reord = tp->packets_out; - state.rtt_us = -1L; + state->flag = 0; + state->reord = tp->packets_out; if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) @@ -1663,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); if (found_dup_sack) - state.flag |= FLAG_DSACKING_ACK; + state->flag |= FLAG_DSACKING_ACK; /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -1728,7 +1726,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, } skb = tcp_write_queue_head(sk); - state.fack_count = 0; + state->fack_count = 0; i = 0; if (!tp->sacked_out) { @@ -1762,10 +1760,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, /* Head todo? */ if (before(start_seq, cache->start_seq)) { - skb = tcp_sacktag_skip(skb, sk, &state, + skb = tcp_sacktag_skip(skb, sk, state, start_seq); skb = tcp_sacktag_walk(skb, sk, next_dup, - &state, + state, start_seq, cache->start_seq, dup_sack); @@ -1776,7 +1774,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, goto advance_sp; skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, - &state, + state, cache->end_seq); /* ...tail remains todo... */ @@ -1785,12 +1783,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (!skb) break; - state.fack_count = tp->fackets_out; + state->fack_count = tp->fackets_out; cache++; goto walk; } - skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq); + skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq); /* Check overlap against next cached too (past this one already) */ cache++; continue; @@ -1800,12 +1798,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (!skb) break; - state.fack_count = tp->fackets_out; + state->fack_count = tp->fackets_out; } - skb = tcp_sacktag_skip(skb, sk, &state, start_seq); + skb = tcp_sacktag_skip(skb, sk, state, start_seq); walk: - skb = tcp_sacktag_walk(skb, sk, next_dup, &state, + skb = tcp_sacktag_walk(skb, sk, next_dup, state, start_seq, end_seq, dup_sack); advance_sp: @@ -1820,9 +1818,9 @@ advance_sp: for (j = 0; j < used_sacks; j++) tp->recv_sack_cache[i++] = sp[j]; - if ((state.reord < tp->fackets_out) && + if ((state->reord < tp->fackets_out) && ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) - tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); + tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); tcp_mark_lost_retrans(sk); tcp_verify_left_out(tp); @@ -1834,8 +1832,7 @@ out: WARN_ON((int)tp->retrans_out < 0); WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif - *sack_rtt_us = state.rtt_us; - return state.flag; + return state->flag; } /* Limits sacked_out so that sum with lost_out isn't ever larger than @@ -3055,7 +3052,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, * arrived at the other end. */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, - u32 prior_snd_una, long sack_rtt_us) + u32 prior_snd_una, + struct tcp_sacktag_state *sack) { const struct inet_connection_sock *icsk = inet_csk(sk); struct skb_mstamp first_ackt, last_ackt, now; @@ -3063,8 +3061,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; bool fully_acked = true; - long ca_seq_rtt_us = -1L; + long sack_rtt_us = -1L; long seq_rtt_us = -1L; + long ca_rtt_us = -1L; struct sk_buff *skb; u32 pkts_acked = 0; bool rtt_update; @@ -3153,15 +3152,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, skb_mstamp_get(&now); if (likely(first_ackt.v64)) { seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); - ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + } + if (sack->first_sackt.v64) { + sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt); + ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); } rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); if (flag & FLAG_ACKED) { - const struct tcp_congestion_ops *ca_ops - = inet_csk(sk)->icsk_ca_ops; - tcp_rearm_rto(sk); if (unlikely(icsk->icsk_mtup.probe_size && !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { @@ -3184,11 +3184,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); - if (ca_ops->pkts_acked) { - long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us); - ca_ops->pkts_acked(sk, pkts_acked, rtt_us); - } - } else if (skb && rtt_update && sack_rtt_us >= 0 && sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent @@ -3198,6 +3193,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tcp_rearm_rto(sk); } + if (icsk->icsk_ca_ops->pkts_acked) + icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us); + #if FASTRETRANS_DEBUG > 0 WARN_ON((int)tp->sacked_out < 0); WARN_ON((int)tp->lost_out < 0); @@ -3238,7 +3236,7 @@ static void tcp_ack_probe(struct sock *sk) * This function is not for random using! */ } else { - unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); + unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); @@ -3466,6 +3464,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct tcp_sacktag_state sack_state; u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; @@ -3474,7 +3473,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets = tp->packets_out; const int prior_unsacked = tp->packets_out - tp->sacked_out; int acked = 0; /* Number of packets newly acked */ - long sack_rtt_us = -1L; + + sack_state.first_sackt.v64 = 0; /* We very likely will need to access write queue head. */ prefetchw(sk->sk_write_queue.next); @@ -3538,7 +3538,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (TCP_SKB_CB(skb)->sacked) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_rtt_us); + &sack_state); if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { flag |= FLAG_ECE; @@ -3563,7 +3563,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ acked = tp->packets_out; flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, - sack_rtt_us); + &sack_state); acked -= tp->packets_out; /* Advance cwnd if state allows */ @@ -3615,7 +3615,7 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_rtt_us); + &sack_state); tcp_fastretrans_alert(sk, acked, prior_unsacked, is_dupack, flag); } @@ -4514,10 +4514,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (eaten <= 0) { queue_and_out: - if (eaten < 0 && - tcp_try_rmem_schedule(sk, skb, skb->truesize)) - goto drop; - + if (eaten < 0) { + if (skb_queue_len(&sk->sk_receive_queue) == 0) + sk_forced_mem_schedule(sk, skb->truesize); + else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) + goto drop; + } eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); } tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); @@ -4788,7 +4790,7 @@ static int tcp_prune_queue(struct sock *sk) if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); - else if (sk_under_memory_pressure(sk)) + else if (tcp_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tcp_collapse_ofo_queue(sk); @@ -4832,7 +4834,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk) return false; /* If we are under global TCP memory pressure, do not expand. */ - if (sk_under_memory_pressure(sk)) + if (tcp_under_memory_pressure(sk)) return false; /* If we are under soft global TCP memory pressure, do not expand. */ @@ -6067,6 +6069,23 @@ static bool tcp_syn_flood_action(struct sock *sk, return want_cookie; } +static void tcp_reqsk_record_syn(const struct sock *sk, + struct request_sock *req, + const struct sk_buff *skb) +{ + if (tcp_sk(sk)->save_syn) { + u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb); + u32 *copy; + + copy = kmalloc(len + sizeof(u32), GFP_ATOMIC); + if (copy) { + copy[0] = len; + memcpy(©[1], skb_network_header(skb), len); + req->saved_syn = copy; + } + } +} + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) @@ -6199,6 +6218,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->tfo_listener = false; af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } + tcp_reqsk_record_syn(sk, req, skb); return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fc1c658ec6c1..feb875769b8d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1626,6 +1626,7 @@ process: skb->dev = NULL; bh_lock_sock_nested(sk); + tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) @@ -1802,6 +1803,7 @@ void tcp_v4_destroy_sock(struct sock *sk) /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); + tcp_saved_syn_free(tp); sk_sockets_allocated_dec(sk); sock_release_memcg(sk); @@ -2410,12 +2412,15 @@ static int __net_init tcp_sk_init(struct net *net) goto fail; *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; } + net->ipv4.sysctl_tcp_ecn = 2; + net->ipv4.sysctl_tcp_ecn_fallback = 1; + net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; - return 0; + return 0; fail: tcp_sk_exit(net); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b5732a54f2ad..df7fe3c31162 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -448,6 +448,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; + newtp->segs_in = 0; newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; @@ -536,6 +537,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; + newtp->saved_syn = req->saved_syn; + req->saved_syn = NULL; + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); } return newsk; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a369e8a70b2c..534e5fdb04c1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -350,6 +350,15 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) } } +static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) +{ + if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) + /* tp->ecn_flags are cleared at a later point in time when + * SYN ACK is ultimatively being received. + */ + TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR); +} + static void tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, struct sock *sk) @@ -1018,6 +1027,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); + tp->segs_out += tcp_skb_pcount(skb); /* OK, its time to fill skb_shinfo(skb)->gso_segs */ skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); @@ -1163,7 +1173,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, return -ENOMEM; /* Get a new skb... force flag on. */ - buff = sk_stream_alloc_skb(sk, nsize, gfp); + buff = sk_stream_alloc_skb(sk, nsize, gfp, true); if (!buff) return -ENOMEM; /* We'll just try again later. */ @@ -1722,7 +1732,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, if (skb->len != skb->data_len) return tcp_fragment(sk, skb, len, mss_now, gfp); - buff = sk_stream_alloc_skb(sk, 0, gfp); + buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) return -ENOMEM; @@ -1941,7 +1951,7 @@ static int tcp_mtu_probe(struct sock *sk) } /* We're allowed to probe. Build it now. */ - nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC); + nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); if (!nskb) return -1; sk->sk_wmem_queued += nskb->truesize; @@ -2392,7 +2402,7 @@ u32 __tcp_select_window(struct sock *sk) if (free_space < (full_space >> 1)) { icsk->icsk_ack.quick = 0; - if (sk_under_memory_pressure(sk)) + if (tcp_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); @@ -2615,6 +2625,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) } } + /* RFC3168, section 6.1.1.1. ECN fallback */ + if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN) + tcp_ecn_clear_syn(sk, skb); + tcp_retrans_try_collapse(sk, skb, cur_mss); /* Make a copy, if the first transmission SKB clone we made @@ -2816,8 +2830,10 @@ begin_fwd: * connection tear down and (memory) recovery. * Otherwise tcp_send_fin() could be tempted to either delay FIN * or even be forced to close flow without any FIN. + * In general, we want to allow one skb per socket to avoid hangs + * with edge trigger epoll() */ -static void sk_forced_wmem_schedule(struct sock *sk, int size) +void sk_forced_mem_schedule(struct sock *sk, int size) { int amt, status; @@ -2841,7 +2857,7 @@ void tcp_send_fin(struct sock *sk) * Note: in the latter case, FIN packet will be sent after a timeout, * as TCP stack thinks it has already been transmitted. */ - if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { + if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) { coalesce: TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(tskb)->end_seq++; @@ -2864,7 +2880,7 @@ coalesce: return; } skb_reserve(skb, MAX_TCP_HEADER); - sk_forced_wmem_schedule(sk, skb->truesize); + sk_forced_mem_schedule(sk, skb->truesize); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); @@ -3175,7 +3191,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) /* limit to order-0 allocations */ space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); - syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation); + syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false); if (!syn_data) goto fallback; syn_data->ip_summed = CHECKSUM_PARTIAL; @@ -3241,7 +3257,7 @@ int tcp_connect(struct sock *sk) return 0; } - buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); + buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true); if (unlikely(!buff)) return -ENOBUFS; @@ -3382,7 +3398,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack); * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is * out-of-date with SND.UNA-1 to probe window. */ -static int tcp_xmit_probe_skb(struct sock *sk, int urgent) +static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -3400,6 +3416,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) */ tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); skb_mstamp_get(&skb->skb_mstamp); + NET_INC_STATS_BH(sock_net(sk), mib); return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); } @@ -3407,12 +3424,12 @@ void tcp_send_window_probe(struct sock *sk) { if (sk->sk_state == TCP_ESTABLISHED) { tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; - tcp_xmit_probe_skb(sk, 0); + tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE); } } /* Initiate keepalive or window probe from timer. */ -int tcp_write_wakeup(struct sock *sk) +int tcp_write_wakeup(struct sock *sk, int mib) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -3449,8 +3466,8 @@ int tcp_write_wakeup(struct sock *sk) return err; } else { if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) - tcp_xmit_probe_skb(sk, 1); - return tcp_xmit_probe_skb(sk, 0); + tcp_xmit_probe_skb(sk, 1, mib); + return tcp_xmit_probe_skb(sk, 0, mib); } } @@ -3464,7 +3481,7 @@ void tcp_send_probe0(struct sock *sk) unsigned long probe_max; int err; - err = tcp_write_wakeup(sk); + err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); if (tp->packets_out || !tcp_send_head(sk)) { /* Cancel probe timer, if it is not required. */ @@ -3490,7 +3507,7 @@ void tcp_send_probe0(struct sock *sk) probe_max = TCP_RESOURCE_PROBE_INTERVAL; } inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - inet_csk_rto_backoff(icsk, probe_max), + tcp_probe0_when(sk, probe_max), TCP_RTO_MAX); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8c65dc147d8b..5b752f58a900 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -247,7 +247,7 @@ void tcp_delack_timer_handler(struct sock *sk) } out: - if (sk_under_memory_pressure(sk)) + if (tcp_under_memory_pressure(sk)) sk_mem_reclaim(sk); } @@ -616,7 +616,7 @@ static void tcp_keepalive_timer (unsigned long data) tcp_write_err(sk); goto out; } - if (tcp_write_wakeup(sk) <= 0) { + if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { icsk->icsk_probes_out++; elapsed = keepalive_intvl_when(tp); } else { diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 6bb98cc193c9..933ea903f7b8 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -15,12 +15,10 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket *sock = NULL; struct sockaddr_in udp_addr; - err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; - sk_change_net(sock->sk, net); - udp_addr.sin_family = AF_INET; udp_addr.sin_addr = cfg->local_ip; udp_addr.sin_port = cfg->local_udp_port; @@ -47,7 +45,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, error: if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); } *sockp = NULL; return err; @@ -101,7 +99,7 @@ void udp_tunnel_sock_release(struct socket *sock) { rcu_assign_sk_user_data(sock->sk, NULL); kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); } EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2e8c06108ab9..0f3f1999719a 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -48,4 +48,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o ifneq ($(CONFIG_IPV6),) obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o +obj-y += mcast_snoop.o endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 37b70e82bff8..21c2c818df3b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2121,6 +2121,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); if (!fn) goto out; + + noflags |= RTF_CACHE; for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (rt->dst.dev->ifindex != dev->ifindex) continue; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index eef63b394c5a..f3866c0b6cfe 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -167,7 +167,7 @@ lookup_protocol: WARN_ON(!answer_prot->slab); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; @@ -768,6 +768,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.auto_flowlabels = 0; net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; + net->ipv6.sysctl.flowlabel_state_ranges = 1; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index d491125011c4..1f9ebe3cbb4a 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -595,6 +595,10 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; + if (net->ipv6.sysctl.flowlabel_state_ranges && + (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) + return -ERANGE; + fl = fl_create(net, sk, &freq, optval, optlen, &err); if (!fl) return err; diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index bba8903e871f..e1a1136bda7c 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -19,12 +19,10 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, int err; struct socket *sock = NULL; - err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; - sk_change_net(sock->sk, net); - udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); @@ -55,7 +53,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, error: if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); } *sockp = NULL; return err; diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c new file mode 100644 index 000000000000..df8afe5ab31e --- /dev/null +++ b/net/ipv6/mcast_snoop.c @@ -0,0 +1,213 @@ +/* Copyright (C) 2010: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> + * Copyright (C) 2015: Linus Lüssing <linus.luessing@c0d3.blue> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * + * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki. + */ + +#include <linux/skbuff.h> +#include <net/ipv6.h> +#include <net/mld.h> +#include <net/addrconf.h> +#include <net/ip6_checksum.h> + +static int ipv6_mc_check_ip6hdr(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + unsigned int len; + unsigned int offset = skb_network_offset(skb) + sizeof(*ip6h); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + ip6h = ipv6_hdr(skb); + + if (ip6h->version != 6) + return -EINVAL; + + len = offset + ntohs(ip6h->payload_len); + if (skb->len < len || len <= offset) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_exthdrs(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + int offset; + u8 nexthdr; + __be16 frag_off; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_HOPOPTS) + return -ENOMSG; + + nexthdr = ip6h->nexthdr; + offset = skb_network_offset(skb) + sizeof(*ip6h); + offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); + + if (offset < 0) + return -EINVAL; + + if (nexthdr != IPPROTO_ICMPV6) + return -ENOMSG; + + skb_set_transport_header(skb, offset); + + return 0; +} + +static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct mld2_report); + + return pskb_may_pull(skb, len) ? 0 : -EINVAL; +} + +static int ipv6_mc_check_mld_query(struct sk_buff *skb) +{ + struct mld_msg *mld; + unsigned int len = skb_transport_offset(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) + return -EINVAL; + + len += sizeof(struct mld_msg); + if (skb->len < len) + return -EINVAL; + + /* MLDv1? */ + if (skb->len != len) { + /* or MLDv2? */ + len += sizeof(struct mld2_query) - sizeof(struct mld_msg); + if (skb->len < len || !pskb_may_pull(skb, len)) + return -EINVAL; + } + + mld = (struct mld_msg *)skb_transport_header(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer + * all-nodes destination address (ff02::1) for general queries + */ + if (ipv6_addr_any(&mld->mld_mca) && + !ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_mld_msg(struct sk_buff *skb) +{ + struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb); + + switch (mld->mld_type) { + case ICMPV6_MGM_REDUCTION: + case ICMPV6_MGM_REPORT: + /* fall through */ + return 0; + case ICMPV6_MLD2_REPORT: + return ipv6_mc_check_mld_reportv2(skb); + case ICMPV6_MGM_QUERY: + return ipv6_mc_check_mld_query(skb); + default: + return -ENOMSG; + } +} + +static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) +{ + return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); +} + +static int __ipv6_mc_check_mld(struct sk_buff *skb, + struct sk_buff **skb_trimmed) + +{ + struct sk_buff *skb_chk = NULL; + unsigned int transport_len; + unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); + int ret; + + transport_len = ntohs(ipv6_hdr(skb)->payload_len); + transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); + + skb_get(skb); + skb_chk = skb_checksum_trimmed(skb, transport_len, + ipv6_mc_validate_checksum); + if (!skb_chk) + return -EINVAL; + + if (!pskb_may_pull(skb_chk, len)) { + kfree_skb(skb_chk); + return -EINVAL; + } + + ret = ipv6_mc_check_mld_msg(skb_chk); + if (ret) { + kfree_skb(skb_chk); + return ret; + } + + if (skb_trimmed) + *skb_trimmed = skb_chk; + else + kfree_skb(skb_chk); + + return 0; +} + +/** + * ipv6_mc_check_mld - checks whether this is a sane MLD packet + * @skb: the skb to validate + * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) + * + * Checks whether an IPv6 packet is a valid MLD packet. If so sets + * skb network and transport headers accordingly and returns zero. + * + * -EINVAL: A broken packet was detected, i.e. it violates some internet + * standard + * -ENOMSG: IP header validation succeeded but it is not an MLD packet. + * -ENOMEM: A memory allocation failure happened. + * + * Optionally, an skb pointer might be provided via skb_trimmed (or set it + * to NULL): After parsing an MLD packet successfully it will point to + * an skb which has its tail aligned to the IP packet end. This might + * either be the originally provided skb or a trimmed, cloned version if + * the skb frame had data beyond the IP packet. A cloned skb allows us + * to leave the original skb and its full frame unchanged (which might be + * desirable for layer 2 frame jugglers). + * + * The caller needs to release a reference count from any returned skb_trimmed. + */ +int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) +{ + int ret; + + ret = ipv6_mc_check_ip6hdr(skb); + if (ret < 0) + return ret; + + ret = ipv6_mc_check_exthdrs(skb); + if (ret < 0) + return ret; + + return __ipv6_mc_check_mld(skb, skb_trimmed); +} +EXPORT_SYMBOL(ipv6_mc_check_mld); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c73ae5039e46..0c889cb89cc3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -92,6 +92,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static void rt6_dst_from_metrics_check(struct rt6_info *rt); static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO @@ -104,65 +105,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, int ifindex); #endif -static void rt6_bind_peer(struct rt6_info *rt, int create) -{ - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_rt6i_peer); - if (!base) - return; - - peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (peer) { - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); - } -} - -static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) -{ - if (rt6_has_peer(rt)) - return rt6_peer_ptr(rt); - - rt6_bind_peer(rt, create); - return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); -} - -static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) -{ - return __rt6_get_peer(rt, 1); -} - static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { - struct rt6_info *rt = (struct rt6_info *) dst; - struct inet_peer *peer; - u32 *p = NULL; + struct rt6_info *rt = (struct rt6_info *)dst; - if (!(rt->dst.flags & DST_HOST)) + if (rt->rt6i_flags & RTF_CACHE) + return NULL; + else return dst_cow_metrics_generic(dst, old); - - peer = rt6_get_peer_create(rt); - if (peer) { - u32 *old_p = __DST_METRICS_PTR(old); - unsigned long prev, new; - - p = peer->metrics; - if (inet_metrics_new(peer) || - (old & DST_METRICS_FORCE_OVERWRITE)) - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); - - new = (unsigned long) p; - prev = cmpxchg(&dst->_metrics, old, new); - - if (prev != old) { - p = __DST_METRICS_PTR(prev); - if (prev & DST_METRICS_READ_ONLY) - p = NULL; - } - } - return p; } static inline const void *choose_neigh_daddr(struct rt6_info *rt, @@ -311,7 +261,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); INIT_LIST_HEAD(&rt->rt6i_siblings); } return rt; @@ -323,8 +272,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev = rt->rt6i_idev; struct dst_entry *from = dst->from; - if (!(rt->dst.flags & DST_HOST)) - dst_destroy_metrics_generic(dst); + dst_destroy_metrics_generic(dst); if (idev) { rt->rt6i_idev = NULL; @@ -333,11 +281,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) dst->from = NULL; dst_release(from); - - if (rt6_has_peer(rt)) { - struct inet_peer *peer = rt6_peer_ptr(rt); - inet_putpeer(peer); - } } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -652,15 +595,33 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, u32 metric, int oif, int strict, bool *do_rr) { - struct rt6_info *rt, *match; + struct rt6_info *rt, *match, *cont; int mpri = -1; match = NULL; - for (rt = rr_head; rt && rt->rt6i_metric == metric; - rt = rt->dst.rt6_next) + cont = NULL; + for (rt = rr_head; rt; rt = rt->dst.rt6_next) { + if (rt->rt6i_metric != metric) { + cont = rt; + break; + } + + match = find_match(rt, oif, strict, &mpri, match, do_rr); + } + + for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { + if (rt->rt6i_metric != metric) { + cont = rt; + break; + } + match = find_match(rt, oif, strict, &mpri, match, do_rr); - for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; - rt = rt->dst.rt6_next) + } + + if (match || !cont) + return match; + + for (rt = cont; rt; rt = rt->dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; @@ -959,7 +920,7 @@ redo_rt6_select: if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); - else if (!(rt->dst.flags & DST_HOST)) + else if (!(rt->dst.flags & DST_HOST) || !(rt->rt6i_flags & RTF_LOCAL)) nrt = rt6_alloc_clone(rt, &fl6->daddr); else goto out2; @@ -985,6 +946,7 @@ redo_rt6_select: goto redo_fib6_lookup_lock; out2: + rt6_dst_from_metrics_check(rt); rt->dst.lastuse = jiffies; rt->dst.__use++; @@ -1059,7 +1021,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new = &rt->dst; memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); - rt6_init_peer(rt, net->ipv6.peers); new->__use = 1; new->input = dst_discard; @@ -1093,6 +1054,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori * Destination cache support functions */ +static void rt6_dst_from_metrics_check(struct rt6_info *rt) +{ + if (rt->dst.from && + dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) + dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); +} + static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) { struct rt6_info *rt; @@ -1109,6 +1077,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt6_check_expired(rt)) return NULL; + rt6_dst_from_metrics_check(rt); + return dst; } @@ -1154,14 +1124,14 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct rt6_info *rt6 = (struct rt6_info *)dst; dst_confirm(dst); - if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { + if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) { struct net *net = dev_net(dst->dev); rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - dst_metric_set(dst, RTAX_MTU, mtu); + rt6->rt6i_pmtu = mtu; rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } } @@ -1341,12 +1311,17 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { + const struct rt6_info *rt = (const struct rt6_info *)dst; + unsigned int mtu = rt->rt6i_pmtu; struct inet6_dev *idev; - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) goto out; + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + goto out; + mtu = IPV6_MIN_MTU; rcu_read_lock(); @@ -1590,10 +1565,8 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) { + if (rt->rt6i_dst.plen == 128) rt->dst.flags |= DST_HOST; - dst_metrics_set_force_overwrite(&rt->dst); - } #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -1651,6 +1624,16 @@ int ip6_route_add(struct fib6_config *cfg) int gwa_type; gw_addr = &cfg->fc_gateway; + + /* if gw_addr is local we will fail to detect this in case + * address is still TENTATIVE (DAD in progress). rt6_lookup() + * will return already-added prefix route via interface that + * prefix route was assigned to, which might be non-loopback. + */ + err = -EINVAL; + if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0)) + goto out; + rt->rt6i_gateway = *gw_addr; gwa_type = ipv6_addr_type(gw_addr); @@ -1664,7 +1647,6 @@ int ip6_route_add(struct fib6_config *cfg) (SIT, PtP, NBMA NOARP links) it is handy to allow some exceptions. --ANK */ - err = -EINVAL; if (!(gwa_type & IPV6_ADDR_UNICAST)) goto out; @@ -1785,6 +1767,9 @@ static int ip6_route_del(struct fib6_config *cfg) if (fn) { for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + if ((rt->rt6i_flags & RTF_CACHE) && + !(cfg->fc_flags & RTF_CACHE)) + continue; if (cfg->fc_ifindex && (!rt->dst.dev || rt->dst.dev->ifindex != cfg->fc_ifindex)) @@ -1926,12 +1911,27 @@ out: * Misc support functions */ +static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) +{ + BUG_ON(from->dst.from); + + rt->rt6i_flags &= ~RTF_EXPIRES; + dst_hold(&from->dst); + rt->dst.from = &from->dst; + dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); +} + static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest) { struct net *net = dev_net(ort->dst.dev); - struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, - ort->rt6i_table); + struct rt6_info *rt; + + if (ort->rt6i_flags & RTF_CACHE) + ort = (struct rt6_info *)ort->dst.from; + + rt = ip6_dst_alloc(net, ort->dst.dev, 0, + ort->rt6i_table); if (rt) { rt->dst.input = ort->dst.input; @@ -1940,7 +1940,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, rt->rt6i_dst.addr = *dest; rt->rt6i_dst.plen = 128; - dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) @@ -2373,11 +2372,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->dst.dev == arg->dev && - !dst_metric_locked(&rt->dst, RTAX_MTU) && - (dst_mtu(&rt->dst) >= arg->mtu || - (dst_mtu(&rt->dst) < arg->mtu && - dst_mtu(&rt->dst) == idev->cnf.mtu6))) { - dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + !dst_metric_locked(&rt->dst, RTAX_MTU)) { + if (rt->rt6i_flags & RTF_CACHE) { + /* For RTF_CACHE with rt6i_pmtu == 0 + * (i.e. a redirected route), + * the metrics of its rt->dst.from has already + * been updated. + */ + if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) + rt->rt6i_pmtu = arg->mtu; + } else if (dst_mtu(&rt->dst) >= arg->mtu || + (dst_mtu(&rt->dst) < arg->mtu && + dst_mtu(&rt->dst) == idev->cnf.mtu6)) { + dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + } } return 0; } @@ -2434,6 +2442,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type == RTN_LOCAL) cfg->fc_flags |= RTF_LOCAL; + if (rtm->rtm_flags & RTM_F_CLONED) + cfg->fc_flags |= RTF_CACHE; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -2608,6 +2619,7 @@ static int rt6_fill_node(struct net *net, int iif, int type, u32 portid, u32 seq, int prefix, int nowait, unsigned int flags) { + u32 metrics[RTAX_MAX]; struct rtmsg *rtm; struct nlmsghdr *nlh; long expires; @@ -2721,7 +2733,10 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; } - if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) + memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); + if (rt->rt6i_pmtu) + metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; + if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; if (rt->rt6i_flags & RTF_GATEWAY) { diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index abcc79f649b3..4e705add4f18 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -68,6 +68,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "flowlabel_state_ranges", + .data = &init_net.ipv6.sysctl.flowlabel_state_ranges, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -109,6 +116,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect; ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries; ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; + ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3adffb300238..fefe4455e1f3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1421,6 +1421,7 @@ process: skb->dev = NULL; bh_lock_sock_nested(sk); + tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f337a908a76a..6ae256b4c55a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -71,13 +71,6 @@ static int xfrm6_get_tos(const struct flowi *fl) return 0; } -static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst) -{ - struct rt6_info *rt = (struct rt6_info *)xdst; - - rt6_init_peer(rt, net->ipv6.peers); -} - static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { @@ -106,8 +99,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, return -ENODEV; } - rt6_transfer_peer(&xdst->u.rt6, rt); - /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | @@ -255,10 +246,6 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); - if (rt6_has_peer(&xdst->u.rt6)) { - struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6); - inet_putpeer(peer); - } xfrm_dst_destroy(xdst); } @@ -308,7 +295,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, - .init_dst = xfrm6_init_dst, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 4ea5d7497b5f..48d0dc89b58d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1347,7 +1347,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol, goto out; rc = -ENOMEM; - sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto); + sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, kern); if (!sk) goto out; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ee0ea25c8e7a..fae6822cc367 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1100,7 +1100,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol, } /* Allocate networking socket */ - sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto); + sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6daa52a18d40..918151c11348 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -535,12 +535,12 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent) sk->sk_type = parent->sk_type; } -static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, int kern) { struct sock *sk; struct iucv_sock *iucv; - sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto); + sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto, kern); if (!sk) return NULL; iucv = iucv_sk(sk); @@ -602,7 +602,7 @@ static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL); + sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern); if (!sk) return -ENOMEM; @@ -1723,7 +1723,7 @@ static int iucv_callback_connreq(struct iucv_path *path, } /* Create the new socket */ - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); if (!nsk) { err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); @@ -1933,7 +1933,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) goto out; } - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); bh_lock_sock(sk); if ((sk->sk_state != IUCV_LISTEN) || sk_acceptq_is_full(sk) || diff --git a/net/key/af_key.c b/net/key/af_key.c index f0d52d721b3a..9e834ec475a9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -149,7 +149,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto); + sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern); if (sk == NULL) goto out; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a29a504492af..f6b090df3930 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1334,9 +1334,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work) if (sock) inet_shutdown(sock, 2); } else { - if (sock) + if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sk); + sock_release(sock); + } } l2tp_tunnel_sock_put(sk); @@ -1399,13 +1400,11 @@ static int l2tp_tunnel_sock_create(struct net *net, if (cfg->local_ip6 && cfg->peer_ip6) { struct sockaddr_l2tpip6 ip6_addr = {0}; - err = sock_create_kern(AF_INET6, SOCK_DGRAM, + err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; - sk_change_net(sock->sk, net); - ip6_addr.l2tp_family = AF_INET6; memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6, sizeof(ip6_addr.l2tp_addr)); @@ -1429,13 +1428,11 @@ static int l2tp_tunnel_sock_create(struct net *net, { struct sockaddr_l2tpip ip_addr = {0}; - err = sock_create_kern(AF_INET, SOCK_DGRAM, + err = sock_create_kern(net, AF_INET, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; - sk_change_net(sock->sk, net); - ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->local_ip; ip_addr.l2tp_conn_id = tunnel_id; @@ -1462,7 +1459,7 @@ out: *sockp = sock; if ((err < 0) && sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); *sockp = NULL; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index e9b0dec56b8e..f56c9f69e9f2 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -542,12 +542,12 @@ static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb) /* socket() handler. Initialize a new struct sock. */ -static int pppol2tp_create(struct net *net, struct socket *sock) +static int pppol2tp_create(struct net *net, struct socket *sock, int kern) { int error = -ENOMEM; struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto, kern); if (!sk) goto out; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 17a8dff06090..8fd9febaa5ba 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -168,7 +168,7 @@ static int llc_ui_create(struct net *net, struct socket *sock, int protocol, if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; - sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto); + sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto, kern); if (sk) { rc = 0; llc_ui_sk_init(sock, sk); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 81a61fce3afb..3e821daf9dd4 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -768,7 +768,7 @@ static struct sock *llc_create_incoming_sock(struct sock *sk, struct llc_addr *daddr) { struct sock *newsk = llc_sk_alloc(sock_net(sk), sk->sk_family, GFP_ATOMIC, - sk->sk_prot); + sk->sk_prot, 0); struct llc_sock *newllc, *llc = llc_sk(sk); if (!newsk) @@ -931,9 +931,9 @@ static void llc_sk_init(struct sock *sk) * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) +struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern) { - struct sock *sk = sk_alloc(net, family, priority, prot); + struct sock *sk = sk_alloc(net, family, priority, prot, kern); if (!sk) goto out; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 64a012a0c6e5..086de496a4c1 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -302,6 +302,20 @@ config MAC80211_DEBUG_COUNTERS ---help--- Selecting this option causes mac80211 to keep additional and very verbose statistics about TX and RX handler use - and show them in debugfs. + as well as a few selected dot11 counters. These will be + exposed in debugfs. + + Note that some of the counters are not concurrency safe + and may thus not always be accurate. If unsure, say N. + +config MAC80211_STA_HASH_MAX_SIZE + int "Station hash table maximum size" if MAC80211_DEBUG_MENU + default 0 + ---help--- + Setting this option to a low value (e.g. 4) allows testing the + hash table with collisions relatively deterministically (just + connect more stations than the number selected here.) + + If unsure, leave the default of 0. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 265e42721a66..3469bbdc891c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -137,6 +137,9 @@ static int ieee80211_set_noack_map(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->noack_map = noack_map; + + ieee80211_check_fast_xmit_iface(sdata); + return 0; } @@ -309,6 +312,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, u32 iv32; u16 iv16; int err = -ENOENT; + struct ieee80211_key_seq kseq = {}; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -339,10 +343,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, iv32 = key->u.tkip.tx.iv32; iv16 = key->u.tkip.tx.iv16; - if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) - drv_get_tkip_seq(sdata->local, - key->conf.hw_key_idx, - &iv32, &iv16); + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + iv32 = kseq.tkip.iv32; + iv16 = kseq.tkip.iv16; + } seq[0] = iv16 & 0xff; seq[1] = (iv16 >> 8) & 0xff; @@ -355,52 +361,85 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: - pn64 = atomic64_read(&key->u.ccmp.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.ccmp.pn, 6); + } else { + pn64 = atomic64_read(&key->u.ccmp.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: - pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.aes_cmac.pn, 6); + } else { + pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: - pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.aes_gmac.pn, 6); + } else { + pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: - pn64 = atomic64_read(&key->u.gcmp.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.gcmp.pn, 6); + } else { + pn64 = atomic64_read(&key->u.gcmp.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; + default: + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + break; + if (WARN_ON(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) + break; + drv_get_key_seq(sdata->local, key, &kseq); + params.seq = kseq.hw.seq; + params.seq_len = kseq.hw.seq_len; + break; } params.key = key->conf.key; @@ -2099,10 +2138,14 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) int err; if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { + ieee80211_check_fast_xmit_all(local); + err = drv_set_frag_threshold(local, wiphy->frag_threshold); - if (err) + if (err) { + ieee80211_check_fast_xmit_all(local); return err; + } } if ((changed & WIPHY_PARAM_COVERAGE_CLASS) || @@ -3336,8 +3379,14 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (!sdata->u.mgd.associated) + sdata_lock(sdata); + if (!sdata->u.mgd.associated || + (params->offchan && params->wait && + local->ops->remain_on_channel && + memcmp(sdata->u.mgd.associated->bssid, + mgmt->bssid, ETH_ALEN))) need_offchan = true; + sdata_unlock(sdata); break; case NL80211_IFTYPE_P2P_DEVICE: need_offchan = true; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 5bcd4e5589d3..f01c18a3160e 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -664,6 +664,8 @@ out: ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + ieee80211_check_fast_xmit_iface(sdata); + return ret; } @@ -1008,6 +1010,8 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata) if (WARN_ON(!chandef)) return -EINVAL; + ieee80211_change_chanctx(local, new_ctx, chandef); + vif_chsw[0].vif = &sdata->vif; vif_chsw[0].old_ctx = &old_ctx->conf; vif_chsw[0].new_ctx = &new_ctx->conf; @@ -1030,6 +1034,8 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata) if (sdata->vif.type == NL80211_IFTYPE_AP) __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); + ieee80211_check_fast_xmit_iface(sdata); + if (ieee80211_chanctx_refcount(local, old_ctx) == 0) ieee80211_free_chanctx(local, old_ctx); @@ -1079,6 +1085,8 @@ ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata) if (WARN_ON(!chandef)) return -EINVAL; + ieee80211_change_chanctx(local, new_ctx, chandef); + list_del(&sdata->reserved_chanctx_list); sdata->reserved_chanctx = NULL; @@ -1376,6 +1384,8 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); + ieee80211_check_fast_xmit_iface(sdata); + sdata->radar_required = sdata->reserved_radar_required; if (sdata->vif.bss_conf.chandef.width != diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 23813ebb349c..b17206db49b4 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -219,8 +219,8 @@ static const struct file_operations stats_ ##name## _ops = { \ .llseek = generic_file_llseek, \ }; -#define DEBUGFS_STATS_ADD(name, field) \ - debugfs_create_u32(#name, 0400, statsd, (u32 *) &field); +#define DEBUGFS_STATS_ADD(name) \ + debugfs_create_u32(#name, 0400, statsd, &local->name); #define DEBUGFS_DEVSTATS_ADD(name) \ debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops); @@ -255,53 +255,31 @@ void debugfs_hw_add(struct ieee80211_local *local) if (!statsd) return; - DEBUGFS_STATS_ADD(transmitted_fragment_count, - local->dot11TransmittedFragmentCount); - DEBUGFS_STATS_ADD(multicast_transmitted_frame_count, - local->dot11MulticastTransmittedFrameCount); - DEBUGFS_STATS_ADD(failed_count, local->dot11FailedCount); - DEBUGFS_STATS_ADD(retry_count, local->dot11RetryCount); - DEBUGFS_STATS_ADD(multiple_retry_count, - local->dot11MultipleRetryCount); - DEBUGFS_STATS_ADD(frame_duplicate_count, - local->dot11FrameDuplicateCount); - DEBUGFS_STATS_ADD(received_fragment_count, - local->dot11ReceivedFragmentCount); - DEBUGFS_STATS_ADD(multicast_received_frame_count, - local->dot11MulticastReceivedFrameCount); - DEBUGFS_STATS_ADD(transmitted_frame_count, - local->dot11TransmittedFrameCount); #ifdef CONFIG_MAC80211_DEBUG_COUNTERS - DEBUGFS_STATS_ADD(tx_handlers_drop, local->tx_handlers_drop); - DEBUGFS_STATS_ADD(tx_handlers_queued, local->tx_handlers_queued); - DEBUGFS_STATS_ADD(tx_handlers_drop_fragment, - local->tx_handlers_drop_fragment); - DEBUGFS_STATS_ADD(tx_handlers_drop_wep, - local->tx_handlers_drop_wep); - DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc, - local->tx_handlers_drop_not_assoc); - DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port, - local->tx_handlers_drop_unauth_port); - DEBUGFS_STATS_ADD(rx_handlers_drop, local->rx_handlers_drop); - DEBUGFS_STATS_ADD(rx_handlers_queued, local->rx_handlers_queued); - DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc, - local->rx_handlers_drop_nullfunc); - DEBUGFS_STATS_ADD(rx_handlers_drop_defrag, - local->rx_handlers_drop_defrag); - DEBUGFS_STATS_ADD(rx_handlers_drop_short, - local->rx_handlers_drop_short); - DEBUGFS_STATS_ADD(tx_expand_skb_head, - local->tx_expand_skb_head); - DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned, - local->tx_expand_skb_head_cloned); - DEBUGFS_STATS_ADD(rx_expand_skb_head, - local->rx_expand_skb_head); - DEBUGFS_STATS_ADD(rx_expand_skb_head2, - local->rx_expand_skb_head2); - DEBUGFS_STATS_ADD(rx_handlers_fragments, - local->rx_handlers_fragments); - DEBUGFS_STATS_ADD(tx_status_drop, - local->tx_status_drop); + DEBUGFS_STATS_ADD(dot11TransmittedFragmentCount); + DEBUGFS_STATS_ADD(dot11MulticastTransmittedFrameCount); + DEBUGFS_STATS_ADD(dot11FailedCount); + DEBUGFS_STATS_ADD(dot11RetryCount); + DEBUGFS_STATS_ADD(dot11MultipleRetryCount); + DEBUGFS_STATS_ADD(dot11FrameDuplicateCount); + DEBUGFS_STATS_ADD(dot11ReceivedFragmentCount); + DEBUGFS_STATS_ADD(dot11MulticastReceivedFrameCount); + DEBUGFS_STATS_ADD(dot11TransmittedFrameCount); + DEBUGFS_STATS_ADD(tx_handlers_drop); + DEBUGFS_STATS_ADD(tx_handlers_queued); + DEBUGFS_STATS_ADD(tx_handlers_drop_wep); + DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc); + DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port); + DEBUGFS_STATS_ADD(rx_handlers_drop); + DEBUGFS_STATS_ADD(rx_handlers_queued); + DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc); + DEBUGFS_STATS_ADD(rx_handlers_drop_defrag); + DEBUGFS_STATS_ADD(rx_handlers_drop_short); + DEBUGFS_STATS_ADD(tx_expand_skb_head); + DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned); + DEBUGFS_STATS_ADD(rx_expand_skb_head_defrag); + DEBUGFS_STATS_ADD(rx_handlers_fragments); + DEBUGFS_STATS_ADD(tx_status_drop); #endif DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount); DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 252859e90e8a..06d52935036d 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -29,8 +29,6 @@ static ssize_t sta_ ##name## _read(struct file *file, \ format_string, sta->field); \ } #define STA_READ_D(name, field) STA_READ(name, field, "%d\n") -#define STA_READ_U(name, field) STA_READ(name, field, "%u\n") -#define STA_READ_S(name, field) STA_READ(name, field, "%s\n") #define STA_OPS(name) \ static const struct file_operations sta_ ##name## _ops = { \ @@ -52,10 +50,7 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_OPS(name) STA_FILE(aid, sta.aid, D); -STA_FILE(dev, sdata->name, S); -STA_FILE(last_signal, last_signal, D); STA_FILE(last_ack_signal, last_ack_signal, D); -STA_FILE(beacon_loss_count, beacon_loss_count, D); static ssize_t sta_flags_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) @@ -101,40 +96,6 @@ static ssize_t sta_num_ps_buf_frames_read(struct file *file, } STA_OPS(num_ps_buf_frames); -static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct sta_info *sta = file->private_data; - return mac80211_format_buffer(userbuf, count, ppos, "%d\n", - jiffies_to_msecs(jiffies - sta->last_rx)); -} -STA_OPS(inactive_ms); - - -static ssize_t sta_connected_time_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct sta_info *sta = file->private_data; - struct timespec uptime; - struct tm result; - long connected_time_secs; - char buf[100]; - int res; - ktime_get_ts(&uptime); - connected_time_secs = uptime.tv_sec - sta->last_connected; - time_to_tm(connected_time_secs, 0, &result); - result.tm_year -= 70; - result.tm_mday -= 1; - res = scnprintf(buf, sizeof(buf), - "years - %ld\nmonths - %d\ndays - %d\nclock - %d:%d:%d\n\n", - result.tm_year, result.tm_mon, result.tm_mday, - result.tm_hour, result.tm_min, result.tm_sec); - return simple_read_from_buffer(userbuf, count, ppos, buf, res); -} -STA_OPS(connected_time); - - - static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -359,37 +320,6 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, } STA_OPS(vht_capa); -static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct sta_info *sta = file->private_data; - struct rate_info rinfo; - u16 rate; - sta_set_rate_info_tx(sta, &sta->last_tx_rate, &rinfo); - rate = cfg80211_calculate_bitrate(&rinfo); - - return mac80211_format_buffer(userbuf, count, ppos, - "%d.%d MBit/s\n", - rate/10, rate%10); -} -STA_OPS(current_tx_rate); - -static ssize_t sta_last_rx_rate_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct sta_info *sta = file->private_data; - struct rate_info rinfo; - u16 rate; - - sta_set_rate_info_rx(sta, &rinfo); - - rate = cfg80211_calculate_bitrate(&rinfo); - - return mac80211_format_buffer(userbuf, count, ppos, - "%d.%d MBit/s\n", - rate/10, rate%10); -} -STA_OPS(last_rx_rate); #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, \ @@ -432,30 +362,15 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(flags); DEBUGFS_ADD(num_ps_buf_frames); - DEBUGFS_ADD(inactive_ms); - DEBUGFS_ADD(connected_time); DEBUGFS_ADD(last_seq_ctrl); DEBUGFS_ADD(agg_status); - DEBUGFS_ADD(dev); - DEBUGFS_ADD(last_signal); - DEBUGFS_ADD(beacon_loss_count); DEBUGFS_ADD(ht_capa); DEBUGFS_ADD(vht_capa); DEBUGFS_ADD(last_ack_signal); - DEBUGFS_ADD(current_tx_rate); - DEBUGFS_ADD(last_rx_rate); - DEBUGFS_ADD_COUNTER(rx_packets, rx_packets); - DEBUGFS_ADD_COUNTER(tx_packets, tx_packets); - DEBUGFS_ADD_COUNTER(rx_bytes, rx_bytes); - DEBUGFS_ADD_COUNTER(tx_bytes, tx_bytes); DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates); DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments); - DEBUGFS_ADD_COUNTER(rx_dropped, rx_dropped); - DEBUGFS_ADD_COUNTER(tx_fragments, tx_fragments); DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count); - DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed); - DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count); if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) debugfs_create_x32("driver_buffered_tids", 0400, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 26e1ca8a474a..c01e681b90fb 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -417,12 +417,13 @@ static inline int drv_get_stats(struct ieee80211_local *local, return ret; } -static inline void drv_get_tkip_seq(struct ieee80211_local *local, - u8 hw_key_idx, u32 *iv32, u16 *iv16) +static inline void drv_get_key_seq(struct ieee80211_local *local, + struct ieee80211_key *key, + struct ieee80211_key_seq *seq) { - if (local->ops->get_tkip_seq) - local->ops->get_tkip_seq(&local->hw, hw_key_idx, iv32, iv16); - trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16); + if (local->ops->get_key_seq) + local->ops->get_key_seq(&local->hw, &key->conf, seq); + trace_drv_get_key_seq(local, &key->conf); } static inline int drv_set_frag_threshold(struct ieee80211_local *local, diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index 52bcea6ad9e8..188faab11c24 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -38,7 +38,7 @@ static void ieee80211_get_ringparam(struct net_device *dev, static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { "rx_packets", "rx_bytes", "rx_duplicates", "rx_fragments", "rx_dropped", - "tx_packets", "tx_bytes", "tx_fragments", + "tx_packets", "tx_bytes", "tx_filtered", "tx_retry_failed", "tx_retries", "beacon_loss", "sta_state", "txrate", "rxrate", "signal", "channel", "noise", "ch_time", "ch_time_busy", @@ -87,7 +87,6 @@ static void ieee80211_get_stats(struct net_device *dev, \ data[i++] += sinfo.tx_packets; \ data[i++] += sinfo.tx_bytes; \ - data[i++] += sta->tx_fragments; \ data[i++] += sta->tx_filtered_count; \ data[i++] += sta->tx_retry_failed; \ data[i++] += sta->tx_retry_count; \ diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index bfef1b215050..21716af8bec3 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1031,8 +1031,11 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, } } - if (sta && elems->wmm_info && local->hw.queues >= IEEE80211_NUM_ACS) + if (sta && !sta->sta.wme && + elems->wmm_info && local->hw.queues >= IEEE80211_NUM_ACS) { sta->sta.wme = true; + ieee80211_check_fast_xmit(sta); + } if (sta && elems->ht_operation && elems->ht_cap_elem && sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ab46ab4a7249..2c4fe45ea38a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -181,8 +181,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result; /** * enum ieee80211_packet_rx_flags - packet RX flags - * @IEEE80211_RX_RA_MATCH: frame is destined to interface currently processed - * (incl. multicast frames) * @IEEE80211_RX_FRAGMENTED: fragmented frame * @IEEE80211_RX_AMSDU: a-MSDU packet * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed @@ -192,7 +190,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result; * @rx_flags field of &struct ieee80211_rx_status. */ enum ieee80211_packet_rx_flags { - IEEE80211_RX_RA_MATCH = BIT(1), IEEE80211_RX_FRAGMENTED = BIT(2), IEEE80211_RX_AMSDU = BIT(3), IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4), @@ -725,7 +722,6 @@ struct ieee80211_if_mesh { * enum ieee80211_sub_if_data_flags - virtual interface flags * * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets - * @IEEE80211_SDATA_PROMISC: interface is promisc * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between * associated stations and deliver multicast frames both @@ -735,7 +731,6 @@ struct ieee80211_if_mesh { */ enum ieee80211_sub_if_data_flags { IEEE80211_SDATA_ALLMULTI = BIT(0), - IEEE80211_SDATA_PROMISC = BIT(1), IEEE80211_SDATA_OPERATING_GMODE = BIT(2), IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4), @@ -1043,7 +1038,6 @@ enum queue_stop_reason { #ifdef CONFIG_MAC80211_LEDS struct tpt_led_trigger { - struct led_trigger trig; char name[32]; const struct ieee80211_tpt_blink *blink_table; unsigned int blink_table_len; @@ -1211,8 +1205,8 @@ struct ieee80211_local { atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES]; - /* number of interfaces with corresponding IFF_ flags */ - atomic_t iff_allmultis, iff_promiscs; + /* number of interfaces with allmulti RX */ + atomic_t iff_allmultis; struct rate_control_ref *rate_ctrl; @@ -1264,6 +1258,15 @@ struct ieee80211_local { struct list_head chanctx_list; struct mutex chanctx_mtx; +#ifdef CONFIG_MAC80211_LEDS + struct led_trigger tx_led, rx_led, assoc_led, radio_led; + struct led_trigger tpt_led; + atomic_t tx_led_active, rx_led_active, assoc_led_active; + atomic_t radio_led_active, tpt_led_active; + struct tpt_led_trigger *tpt_led_trigger; +#endif + +#ifdef CONFIG_MAC80211_DEBUG_COUNTERS /* SNMP counters */ /* dot11CountersTable */ u32 dot11TransmittedFragmentCount; @@ -1276,18 +1279,9 @@ struct ieee80211_local { u32 dot11MulticastReceivedFrameCount; u32 dot11TransmittedFrameCount; -#ifdef CONFIG_MAC80211_LEDS - struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led; - struct tpt_led_trigger *tpt_led_trigger; - char tx_led_name[32], rx_led_name[32], - assoc_led_name[32], radio_led_name[32]; -#endif - -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS /* TX/RX handler statistics */ unsigned int tx_handlers_drop; unsigned int tx_handlers_queued; - unsigned int tx_handlers_drop_fragment; unsigned int tx_handlers_drop_wep; unsigned int tx_handlers_drop_not_assoc; unsigned int tx_handlers_drop_unauth_port; @@ -1298,8 +1292,7 @@ struct ieee80211_local { unsigned int rx_handlers_drop_short; unsigned int tx_expand_skb_head; unsigned int tx_expand_skb_head_cloned; - unsigned int rx_expand_skb_head; - unsigned int rx_expand_skb_head2; + unsigned int rx_expand_skb_head_defrag; unsigned int rx_handlers_fragments; unsigned int tx_status_drop; #define I802_DEBUG_INC(c) (c)++ @@ -1651,6 +1644,11 @@ struct sk_buff * ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags); +void ieee80211_check_fast_xmit(struct sta_info *sta); +void ieee80211_check_fast_xmit_all(struct ieee80211_local *local); +void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata); +void ieee80211_clear_fast_xmit(struct sta_info *sta); + /* HT */ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index bab5c63c0bad..4ee8fea263ed 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -697,9 +697,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (sdata->flags & IEEE80211_SDATA_ALLMULTI) atomic_inc(&local->iff_allmultis); - if (sdata->flags & IEEE80211_SDATA_PROMISC) - atomic_inc(&local->iff_promiscs); - if (coming_up) local->open_count++; @@ -829,13 +826,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1))); - /* don't count this interface for promisc/allmulti while it is down */ + /* don't count this interface for allmulti while it is down */ if (sdata->flags & IEEE80211_SDATA_ALLMULTI) atomic_dec(&local->iff_allmultis); - if (sdata->flags & IEEE80211_SDATA_PROMISC) - atomic_dec(&local->iff_promiscs); - if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll--; local->fif_probe_req--; @@ -1049,12 +1043,10 @@ static void ieee80211_set_multicast_list(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - int allmulti, promisc, sdata_allmulti, sdata_promisc; + int allmulti, sdata_allmulti; allmulti = !!(dev->flags & IFF_ALLMULTI); - promisc = !!(dev->flags & IFF_PROMISC); sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI); - sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC); if (allmulti != sdata_allmulti) { if (dev->flags & IFF_ALLMULTI) @@ -1064,13 +1056,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev) sdata->flags ^= IEEE80211_SDATA_ALLMULTI; } - if (promisc != sdata_promisc) { - if (dev->flags & IFF_PROMISC) - atomic_inc(&local->iff_promiscs); - else - atomic_dec(&local->iff_promiscs); - sdata->flags ^= IEEE80211_SDATA_PROMISC; - } spin_lock_bh(&local->filter_lock); __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); spin_unlock_bh(&local->filter_lock); @@ -1111,6 +1096,35 @@ static u16 ieee80211_netdev_select_queue(struct net_device *dev, return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); } +static struct rtnl_link_stats64 * +ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_sw_netstats *tstats; + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + tstats = per_cpu_ptr(dev->tstats, i); + + do { + start = u64_stats_fetch_begin_irq(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); + + stats->rx_packets += rx_packets; + stats->tx_packets += tx_packets; + stats->rx_bytes += rx_bytes; + stats->tx_bytes += tx_bytes; + } + + return stats; +} + static const struct net_device_ops ieee80211_dataif_ops = { .ndo_open = ieee80211_open, .ndo_stop = ieee80211_stop, @@ -1120,6 +1134,7 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_netdev_select_queue, + .ndo_get_stats64 = ieee80211_get_stats64, }; static u16 ieee80211_monitor_select_queue(struct net_device *dev, @@ -1153,14 +1168,21 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_monitor_select_queue, + .ndo_get_stats64 = ieee80211_get_stats64, }; +static void ieee80211_if_free(struct net_device *dev) +{ + free_percpu(dev->tstats); + free_netdev(dev); +} + static void ieee80211_if_setup(struct net_device *dev) { ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &ieee80211_dataif_ops; - dev->destructor = free_netdev; + dev->destructor = ieee80211_if_free; } static void ieee80211_iface_work(struct work_struct *work) @@ -1701,6 +1723,12 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, return -ENOMEM; dev_net_set(ndev, wiphy_net(local->hw.wiphy)); + ndev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!ndev->tstats) { + free_netdev(ndev); + return -ENOMEM; + } + ndev->needed_headroom = local->tx_headroom + 4*6 /* four MAC addresses */ + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */ diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 2291cd730091..2e677376c958 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -229,6 +229,7 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, if (uni) { rcu_assign_pointer(sdata->default_unicast_key, key); + ieee80211_check_fast_xmit_iface(sdata); drv_set_default_unicast_key(sdata->local, sdata, idx); } @@ -298,6 +299,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, if (pairwise) { rcu_assign_pointer(sta->ptk[idx], new); sta->ptk_idx = idx; + ieee80211_check_fast_xmit(sta); } else { rcu_assign_pointer(sta->gtk[idx], new); sta->gtk_idx = idx; @@ -483,15 +485,17 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, break; default: if (cs) { - size_t len = (seq_len > MAX_PN_LEN) ? - MAX_PN_LEN : seq_len; + if (seq_len && seq_len != cs->pn_len) { + kfree(key); + return ERR_PTR(-EINVAL); + } key->conf.iv_len = cs->hdr_len; key->conf.icv_len = cs->mic_len; for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) - for (j = 0; j < len; j++) + for (j = 0; j < seq_len; j++) key->u.gen.rx_pn[i][j] = - seq[len - j - 1]; + seq[seq_len - j - 1]; key->flags |= KEY_FLAG_CIPHER_SCHEME; } } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index c5a31835be0e..df430a618764 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -18,7 +18,6 @@ #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 -#define MAX_PN_LEN 16 struct ieee80211_local; struct ieee80211_sub_if_data; @@ -116,7 +115,7 @@ struct ieee80211_key { } gcmp; struct { /* generic cipher scheme */ - u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN]; + u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_MAX_PN_LEN]; } gen; } u; diff --git a/net/mac80211/led.c b/net/mac80211/led.c index e2b836446af3..0505845b7ab8 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -12,96 +12,175 @@ #include <linux/export.h> #include "led.h" -#define MAC80211_BLINK_DELAY 50 /* ms */ - -void ieee80211_led_rx(struct ieee80211_local *local) -{ - unsigned long led_delay = MAC80211_BLINK_DELAY; - if (unlikely(!local->rx_led)) - return; - led_trigger_blink_oneshot(local->rx_led, &led_delay, &led_delay, 0); -} - -void ieee80211_led_tx(struct ieee80211_local *local) -{ - unsigned long led_delay = MAC80211_BLINK_DELAY; - if (unlikely(!local->tx_led)) - return; - led_trigger_blink_oneshot(local->tx_led, &led_delay, &led_delay, 0); -} - void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) { - if (unlikely(!local->assoc_led)) + if (!atomic_read(&local->assoc_led_active)) return; if (associated) - led_trigger_event(local->assoc_led, LED_FULL); + led_trigger_event(&local->assoc_led, LED_FULL); else - led_trigger_event(local->assoc_led, LED_OFF); + led_trigger_event(&local->assoc_led, LED_OFF); } void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) { - if (unlikely(!local->radio_led)) + if (!atomic_read(&local->radio_led_active)) return; if (enabled) - led_trigger_event(local->radio_led, LED_FULL); + led_trigger_event(&local->radio_led, LED_FULL); else - led_trigger_event(local->radio_led, LED_OFF); + led_trigger_event(&local->radio_led, LED_OFF); +} + +void ieee80211_alloc_led_names(struct ieee80211_local *local) +{ + local->rx_led.name = kasprintf(GFP_KERNEL, "%srx", + wiphy_name(local->hw.wiphy)); + local->tx_led.name = kasprintf(GFP_KERNEL, "%stx", + wiphy_name(local->hw.wiphy)); + local->assoc_led.name = kasprintf(GFP_KERNEL, "%sassoc", + wiphy_name(local->hw.wiphy)); + local->radio_led.name = kasprintf(GFP_KERNEL, "%sradio", + wiphy_name(local->hw.wiphy)); +} + +void ieee80211_free_led_names(struct ieee80211_local *local) +{ + kfree(local->rx_led.name); + kfree(local->tx_led.name); + kfree(local->assoc_led.name); + kfree(local->radio_led.name); +} + +static void ieee80211_tx_led_activate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + tx_led); + + atomic_inc(&local->tx_led_active); +} + +static void ieee80211_tx_led_deactivate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + tx_led); + + atomic_dec(&local->tx_led_active); +} + +static void ieee80211_rx_led_activate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + rx_led); + + atomic_inc(&local->rx_led_active); +} + +static void ieee80211_rx_led_deactivate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + rx_led); + + atomic_dec(&local->rx_led_active); +} + +static void ieee80211_assoc_led_activate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + assoc_led); + + atomic_inc(&local->assoc_led_active); +} + +static void ieee80211_assoc_led_deactivate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + assoc_led); + + atomic_dec(&local->assoc_led_active); +} + +static void ieee80211_radio_led_activate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + radio_led); + + atomic_inc(&local->radio_led_active); +} + +static void ieee80211_radio_led_deactivate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + radio_led); + + atomic_dec(&local->radio_led_active); +} + +static void ieee80211_tpt_led_activate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + tpt_led); + + atomic_inc(&local->tpt_led_active); } -void ieee80211_led_names(struct ieee80211_local *local) +static void ieee80211_tpt_led_deactivate(struct led_classdev *led_cdev) { - snprintf(local->rx_led_name, sizeof(local->rx_led_name), - "%srx", wiphy_name(local->hw.wiphy)); - snprintf(local->tx_led_name, sizeof(local->tx_led_name), - "%stx", wiphy_name(local->hw.wiphy)); - snprintf(local->assoc_led_name, sizeof(local->assoc_led_name), - "%sassoc", wiphy_name(local->hw.wiphy)); - snprintf(local->radio_led_name, sizeof(local->radio_led_name), - "%sradio", wiphy_name(local->hw.wiphy)); + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + tpt_led); + + atomic_dec(&local->tpt_led_active); } void ieee80211_led_init(struct ieee80211_local *local) { - local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (local->rx_led) { - local->rx_led->name = local->rx_led_name; - if (led_trigger_register(local->rx_led)) { - kfree(local->rx_led); - local->rx_led = NULL; - } + atomic_set(&local->rx_led_active, 0); + local->rx_led.activate = ieee80211_rx_led_activate; + local->rx_led.deactivate = ieee80211_rx_led_deactivate; + if (local->rx_led.name && led_trigger_register(&local->rx_led)) { + kfree(local->rx_led.name); + local->rx_led.name = NULL; } - local->tx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (local->tx_led) { - local->tx_led->name = local->tx_led_name; - if (led_trigger_register(local->tx_led)) { - kfree(local->tx_led); - local->tx_led = NULL; - } + atomic_set(&local->tx_led_active, 0); + local->tx_led.activate = ieee80211_tx_led_activate; + local->tx_led.deactivate = ieee80211_tx_led_deactivate; + if (local->tx_led.name && led_trigger_register(&local->tx_led)) { + kfree(local->tx_led.name); + local->tx_led.name = NULL; } - local->assoc_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (local->assoc_led) { - local->assoc_led->name = local->assoc_led_name; - if (led_trigger_register(local->assoc_led)) { - kfree(local->assoc_led); - local->assoc_led = NULL; - } + atomic_set(&local->assoc_led_active, 0); + local->assoc_led.activate = ieee80211_assoc_led_activate; + local->assoc_led.deactivate = ieee80211_assoc_led_deactivate; + if (local->assoc_led.name && led_trigger_register(&local->assoc_led)) { + kfree(local->assoc_led.name); + local->assoc_led.name = NULL; } - local->radio_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (local->radio_led) { - local->radio_led->name = local->radio_led_name; - if (led_trigger_register(local->radio_led)) { - kfree(local->radio_led); - local->radio_led = NULL; - } + atomic_set(&local->radio_led_active, 0); + local->radio_led.activate = ieee80211_radio_led_activate; + local->radio_led.deactivate = ieee80211_radio_led_deactivate; + if (local->radio_led.name && led_trigger_register(&local->radio_led)) { + kfree(local->radio_led.name); + local->radio_led.name = NULL; } + atomic_set(&local->tpt_led_active, 0); if (local->tpt_led_trigger) { - if (led_trigger_register(&local->tpt_led_trigger->trig)) { + local->tpt_led.activate = ieee80211_tpt_led_activate; + local->tpt_led.deactivate = ieee80211_tpt_led_deactivate; + if (led_trigger_register(&local->tpt_led)) { kfree(local->tpt_led_trigger); local->tpt_led_trigger = NULL; } @@ -110,58 +189,50 @@ void ieee80211_led_init(struct ieee80211_local *local) void ieee80211_led_exit(struct ieee80211_local *local) { - if (local->radio_led) { - led_trigger_unregister(local->radio_led); - kfree(local->radio_led); - } - if (local->assoc_led) { - led_trigger_unregister(local->assoc_led); - kfree(local->assoc_led); - } - if (local->tx_led) { - led_trigger_unregister(local->tx_led); - kfree(local->tx_led); - } - if (local->rx_led) { - led_trigger_unregister(local->rx_led); - kfree(local->rx_led); - } + if (local->radio_led.name) + led_trigger_unregister(&local->radio_led); + if (local->assoc_led.name) + led_trigger_unregister(&local->assoc_led); + if (local->tx_led.name) + led_trigger_unregister(&local->tx_led); + if (local->rx_led.name) + led_trigger_unregister(&local->rx_led); if (local->tpt_led_trigger) { - led_trigger_unregister(&local->tpt_led_trigger->trig); + led_trigger_unregister(&local->tpt_led); kfree(local->tpt_led_trigger); } } -char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - return local->radio_led_name; + return local->radio_led.name; } EXPORT_SYMBOL(__ieee80211_get_radio_led_name); -char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - return local->assoc_led_name; + return local->assoc_led.name; } EXPORT_SYMBOL(__ieee80211_get_assoc_led_name); -char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - return local->tx_led_name; + return local->tx_led.name; } EXPORT_SYMBOL(__ieee80211_get_tx_led_name); -char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - return local->rx_led_name; + return local->rx_led.name; } EXPORT_SYMBOL(__ieee80211_get_rx_led_name); @@ -205,16 +276,17 @@ static void tpt_trig_timer(unsigned long data) } } - read_lock(&tpt_trig->trig.leddev_list_lock); - list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list) + read_lock(&local->tpt_led.leddev_list_lock); + list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list) led_blink_set(led_cdev, &on, &off); - read_unlock(&tpt_trig->trig.leddev_list_lock); + read_unlock(&local->tpt_led.leddev_list_lock); } -char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, - unsigned int flags, - const struct ieee80211_tpt_blink *blink_table, - unsigned int blink_table_len) +const char * +__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, + unsigned int flags, + const struct ieee80211_tpt_blink *blink_table, + unsigned int blink_table_len) { struct ieee80211_local *local = hw_to_local(hw); struct tpt_led_trigger *tpt_trig; @@ -229,7 +301,7 @@ char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, snprintf(tpt_trig->name, sizeof(tpt_trig->name), "%stpt", wiphy_name(local->hw.wiphy)); - tpt_trig->trig.name = tpt_trig->name; + local->tpt_led.name = tpt_trig->name; tpt_trig->blink_table = blink_table; tpt_trig->blink_table_len = blink_table_len; @@ -269,10 +341,10 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) tpt_trig->running = false; del_timer_sync(&tpt_trig->timer); - read_lock(&tpt_trig->trig.leddev_list_lock); - list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list) + read_lock(&local->tpt_led.leddev_list_lock); + list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list) led_set_brightness(led_cdev, LED_OFF); - read_unlock(&tpt_trig->trig.leddev_list_lock); + read_unlock(&local->tpt_led.leddev_list_lock); } void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, diff --git a/net/mac80211/led.h b/net/mac80211/led.h index 89f4344f13b9..a7893a1ac98b 100644 --- a/net/mac80211/led.h +++ b/net/mac80211/led.h @@ -11,25 +11,42 @@ #include <linux/leds.h> #include "ieee80211_i.h" +#define MAC80211_BLINK_DELAY 50 /* ms */ + +static inline void ieee80211_led_rx(struct ieee80211_local *local) +{ +#ifdef CONFIG_MAC80211_LEDS + unsigned long led_delay = MAC80211_BLINK_DELAY; + + if (!atomic_read(&local->rx_led_active)) + return; + led_trigger_blink_oneshot(&local->rx_led, &led_delay, &led_delay, 0); +#endif +} + +static inline void ieee80211_led_tx(struct ieee80211_local *local) +{ +#ifdef CONFIG_MAC80211_LEDS + unsigned long led_delay = MAC80211_BLINK_DELAY; + + if (!atomic_read(&local->tx_led_active)) + return; + led_trigger_blink_oneshot(&local->tx_led, &led_delay, &led_delay, 0); +#endif +} + #ifdef CONFIG_MAC80211_LEDS -void ieee80211_led_rx(struct ieee80211_local *local); -void ieee80211_led_tx(struct ieee80211_local *local); void ieee80211_led_assoc(struct ieee80211_local *local, bool associated); void ieee80211_led_radio(struct ieee80211_local *local, bool enabled); -void ieee80211_led_names(struct ieee80211_local *local); +void ieee80211_alloc_led_names(struct ieee80211_local *local); +void ieee80211_free_led_names(struct ieee80211_local *local); void ieee80211_led_init(struct ieee80211_local *local); void ieee80211_led_exit(struct ieee80211_local *local); void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, unsigned int types_on, unsigned int types_off); #else -static inline void ieee80211_led_rx(struct ieee80211_local *local) -{ -} -static inline void ieee80211_led_tx(struct ieee80211_local *local) -{ -} static inline void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) { @@ -38,7 +55,10 @@ static inline void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) { } -static inline void ieee80211_led_names(struct ieee80211_local *local) +static inline void ieee80211_alloc_led_names(struct ieee80211_local *local) +{ +} +static inline void ieee80211_free_led_names(struct ieee80211_local *local) { } static inline void ieee80211_led_init(struct ieee80211_local *local) @@ -58,7 +78,7 @@ static inline void ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes) { #ifdef CONFIG_MAC80211_LEDS - if (local->tpt_led_trigger && ieee80211_is_data(fc)) + if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->tx_bytes += bytes; #endif } @@ -67,7 +87,7 @@ static inline void ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes) { #ifdef CONFIG_MAC80211_LEDS - if (local->tpt_led_trigger && ieee80211_is_data(fc)) + if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->rx_bytes += bytes; #endif } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index df3051d96aff..3c956c5f99b2 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -41,9 +41,6 @@ void ieee80211_configure_filter(struct ieee80211_local *local) unsigned int changed_flags; unsigned int new_flags = 0; - if (atomic_read(&local->iff_promiscs)) - new_flags |= FIF_PROMISC_IN_BSS; - if (atomic_read(&local->iff_allmultis)) new_flags |= FIF_ALLMULTI; @@ -646,7 +643,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, skb_queue_head_init(&local->skb_queue); skb_queue_head_init(&local->skb_queue_unreliable); - ieee80211_led_names(local); + ieee80211_alloc_led_names(local); ieee80211_roc_setup(local); @@ -771,8 +768,11 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256; } - for (r = 0; r < local->hw.n_cipher_schemes; r++) + for (r = 0; r < local->hw.n_cipher_schemes; r++) { suites[w++] = cs[r].cipher; + if (WARN_ON(cs[r].pn_len > IEEE80211_MAX_PN_LEN)) + return -EINVAL; + } } local->hw.wiphy->cipher_suites = suites; @@ -840,7 +840,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* Only HW csum features are currently compatible with mac80211 */ feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_HW_CSUM; + NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | + NETIF_F_GSO_SOFTWARE; if (WARN_ON(hw->netdev_features & ~feature_whitelist)) return -EINVAL; @@ -1209,6 +1210,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) sta_info_stop(local); + ieee80211_free_led_names(local); + wiphy_free(local->hw.wiphy); } EXPORT_SYMBOL(ieee80211_free_hw); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 60d737f144e3..ac843fc88745 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -72,10 +72,11 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata, * * @sta: mesh peer link to restart * - * Locking: this function must be called holding sta->lock + * Locking: this function must be called holding sta->plink_lock */ static inline void mesh_plink_fsm_restart(struct sta_info *sta) { + lockdep_assert_held(&sta->plink_lock); sta->plink_state = NL80211_PLINK_LISTEN; sta->llid = sta->plid = sta->reason = 0; sta->plink_retries = 0; @@ -213,13 +214,15 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) * All mesh paths with this peer as next hop will be flushed * Returns beacon changed flag if the beacon content changed. * - * Locking: the caller must hold sta->lock + * Locking: the caller must hold sta->plink_lock */ static u32 __mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; u32 changed = 0; + lockdep_assert_held(&sta->plink_lock); + if (sta->plink_state == NL80211_PLINK_ESTAB) changed = mesh_plink_dec_estab_count(sdata); sta->plink_state = NL80211_PLINK_BLOCKED; @@ -244,13 +247,13 @@ u32 mesh_plink_deactivate(struct sta_info *sta) struct ieee80211_sub_if_data *sdata = sta->sdata; u32 changed; - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); changed = __mesh_plink_deactivate(sta); sta->reason = WLAN_REASON_MESH_PEER_CANCELED; mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, sta->llid, sta->plid, sta->reason); - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); return changed; } @@ -387,7 +390,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[band]; rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates); - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); sta->last_rx = jiffies; /* rates and capabilities don't change during peering */ @@ -419,7 +422,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, else rate_control_rate_update(local, sband, sta, changed); out: - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); } static struct sta_info * @@ -552,7 +555,7 @@ static void mesh_plink_timer(unsigned long data) if (sta->sdata->local->quiescing) return; - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); /* If a timer fires just before a state transition on another CPU, * we may have already extended the timeout and changed state by the @@ -563,7 +566,7 @@ static void mesh_plink_timer(unsigned long data) mpl_dbg(sta->sdata, "Ignoring timer for %pM in state %s (timer adjusted)", sta->sta.addr, mplstates[sta->plink_state]); - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); return; } @@ -573,7 +576,7 @@ static void mesh_plink_timer(unsigned long data) mpl_dbg(sta->sdata, "Ignoring timer for %pM in state %s (timer deleted)", sta->sta.addr, mplstates[sta->plink_state]); - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); return; } @@ -619,7 +622,7 @@ static void mesh_plink_timer(unsigned long data) default: break; } - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); if (action) mesh_plink_frame_tx(sdata, action, sta->sta.addr, sta->llid, sta->plid, reason); @@ -674,16 +677,16 @@ u32 mesh_plink_open(struct sta_info *sta) if (!test_sta_flag(sta, WLAN_STA_AUTH)) return 0; - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); sta->llid = mesh_get_new_llid(sdata); if (sta->plink_state != NL80211_PLINK_LISTEN && sta->plink_state != NL80211_PLINK_BLOCKED) { - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); return 0; } sta->plink_state = NL80211_PLINK_OPN_SNT; mesh_plink_timer_set(sta, sdata->u.mesh.mshcfg.dot11MeshRetryTimeout); - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); mpl_dbg(sdata, "Mesh plink: starting establishment with %pM\n", sta->sta.addr); @@ -700,10 +703,10 @@ u32 mesh_plink_block(struct sta_info *sta) { u32 changed; - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); changed = __mesh_plink_deactivate(sta); sta->plink_state = NL80211_PLINK_BLOCKED; - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); return changed; } @@ -758,7 +761,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr, mplstates[sta->plink_state], mplevents[event]); - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); switch (sta->plink_state) { case NL80211_PLINK_LISTEN: switch (event) { @@ -872,7 +875,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, */ break; } - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); if (action) { mesh_plink_frame_tx(sdata, action, sta->sta.addr, sta->llid, sta->plid, sta->reason); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 26053bf2faa8..3294666f599c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4307,15 +4307,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, - struct cfg80211_bss *cbss, bool assoc) + struct cfg80211_bss *cbss, bool assoc, + bool override) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss *bss = (void *)cbss->priv; struct sta_info *new_sta = NULL; struct ieee80211_supported_band *sband; - struct ieee80211_sta_ht_cap sta_ht_cap; - bool have_sta = false, is_override = false; + bool have_sta = false; int err; sband = local->hw.wiphy->bands[cbss->channel->band]; @@ -4335,14 +4335,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, return -ENOMEM; } - memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); - ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); - - is_override = (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) != - (sband->ht_cap.cap & - IEEE80211_HT_CAP_SUP_WIDTH_20_40); - - if (new_sta || is_override) { + if (new_sta || override) { err = ieee80211_prep_channel(sdata, cbss); if (err) { if (new_sta) @@ -4552,7 +4545,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); - err = ieee80211_prep_connection(sdata, req->bss, false); + err = ieee80211_prep_connection(sdata, req->bss, false, false); if (err) goto err_clear; @@ -4624,6 +4617,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; const u8 *ssidie, *ht_ie, *vht_ie; int i, err; + bool override = false; assoc_data = kzalloc(sizeof(*assoc_data) + req->ie_len, GFP_KERNEL); if (!assoc_data) @@ -4728,14 +4722,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } } - if (req->flags & ASSOC_REQ_DISABLE_HT) { - ifmgd->flags |= IEEE80211_STA_DISABLE_HT; - ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; - } - - if (req->flags & ASSOC_REQ_DISABLE_VHT) - ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; - /* Also disable HT if we don't support it or the AP doesn't use WMM */ sband = local->hw.wiphy->bands[req->bss->channel->band]; if (!sband->ht_cap.ht_supported || @@ -4847,7 +4833,36 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->dtim_period = 0; ifmgd->have_beacon = false; - err = ieee80211_prep_connection(sdata, req->bss, true); + /* override HT/VHT configuration only if the AP and we support it */ + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { + struct ieee80211_sta_ht_cap sta_ht_cap; + + if (req->flags & ASSOC_REQ_DISABLE_HT) + override = true; + + memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); + ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); + + /* check for 40 MHz disable override */ + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ) && + sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 && + !(sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) + override = true; + + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && + req->flags & ASSOC_REQ_DISABLE_VHT) + override = true; + } + + if (req->flags & ASSOC_REQ_DISABLE_HT) { + ifmgd->flags |= IEEE80211_STA_DISABLE_HT; + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + } + + if (req->flags & ASSOC_REQ_DISABLE_VHT) + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + + err = ieee80211_prep_connection(sdata, req->bss, true, override); if (err) goto err_clear; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index d53355b011f5..de69adf24f53 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -683,7 +683,13 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) return; - ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); + if (ista) { + spin_lock_bh(&sta->rate_ctrl_lock); + ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); + spin_unlock_bh(&sta->rate_ctrl_lock); + } else { + ref->ops->get_rate(ref->priv, NULL, NULL, txrc); + } if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE) return; diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 38652f09feaf..25c9be5dd7fd 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -42,10 +42,12 @@ static inline void rate_control_tx_status(struct ieee80211_local *local, if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) return; + spin_lock_bh(&sta->rate_ctrl_lock); if (ref->ops->tx_status) ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); else ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info); + spin_unlock_bh(&sta->rate_ctrl_lock); } static inline void @@ -64,7 +66,9 @@ rate_control_tx_status_noskb(struct ieee80211_local *local, if (WARN_ON_ONCE(!ref->ops->tx_status_noskb)) return; + spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info); + spin_unlock_bh(&sta->rate_ctrl_lock); } static inline void rate_control_rate_init(struct sta_info *sta) @@ -91,8 +95,10 @@ static inline void rate_control_rate_init(struct sta_info *sta) sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band]; + spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista, priv_sta); + spin_unlock_bh(&sta->rate_ctrl_lock); rcu_read_unlock(); set_sta_flag(sta, WLAN_STA_RATE_CONTROL); } @@ -115,18 +121,20 @@ static inline void rate_control_rate_update(struct ieee80211_local *local, return; } + spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def, ista, priv_sta, changed); + spin_unlock_bh(&sta->rate_ctrl_lock); rcu_read_unlock(); } drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); } static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, - struct ieee80211_sta *sta, - gfp_t gfp) + struct sta_info *sta, gfp_t gfp) { - return ref->ops->alloc_sta(ref->priv, sta, gfp); + spin_lock_init(&sta->rate_ctrl_lock); + return ref->ops->alloc_sta(ref->priv, &sta->sta, gfp); } static inline void rate_control_free_sta(struct sta_info *sta) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 260eed45b6d2..aa35977a9c4d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -32,6 +32,16 @@ #include "wme.h" #include "rate.h" +static inline void ieee80211_rx_stats(struct net_device *dev, u32 len) +{ + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += len; + u64_stats_update_end(&tstats->syncp); +} + /* * monitor mode reception * @@ -529,8 +539,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, } prev_dev = sdata->dev; - sdata->dev->stats.rx_packets++; - sdata->dev->stats.rx_bytes += skb->len; + ieee80211_rx_stats(sdata->dev, skb->len); } if (prev_dev) { @@ -981,7 +990,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, struct sk_buff *skb = rx->skb; struct ieee80211_local *local = rx->local; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct sta_info *sta = rx->sta; struct tid_ampdu_rx *tid_agg_rx; u16 sc; @@ -1016,10 +1024,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL) goto dont_reorder; - /* not actually part of this BA session */ - if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) - goto dont_reorder; - /* new, potentially un-ordered, ampdu frame - process it */ /* reset session timer */ @@ -1073,10 +1077,8 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { - if (status->rx_flags & IEEE80211_RX_RA_MATCH) { - rx->local->dot11FrameDuplicateCount++; - rx->sta->num_duplicates++; - } + I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); + rx->sta->num_duplicates++; return RX_DROP_UNUSABLE; } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; @@ -1200,6 +1202,8 @@ static void sta_ps_start(struct sta_info *sta) ps_dbg(sdata, "STA %pM aid %d enters power save mode\n", sta->sta.addr, sta->sta.aid); + ieee80211_clear_fast_xmit(sta); + if (!sta->sta.txq[0]) return; @@ -1265,7 +1269,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx) struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); int tid, ac; - if (!rx->sta || !(status->rx_flags & IEEE80211_RX_RA_MATCH)) + if (!rx->sta) return RX_CONTINUE; if (sdata->vif.type != NL80211_IFTYPE_AP && @@ -1367,11 +1371,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) } } } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { - u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, - NL80211_IFTYPE_OCB); - /* OCB uses wild-card BSSID */ - if (is_broadcast_ether_addr(bssid)) - sta->last_rx = jiffies; + sta->last_rx = jiffies; } else if (!is_multicast_ether_addr(hdr->addr1)) { /* * Mesh beacons will update last_rx when if they are found to @@ -1386,9 +1386,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) } } - if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) - return RX_CONTINUE; - if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) ieee80211_sta_rx_notify(rx->sdata, hdr); @@ -1517,13 +1514,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * possible. */ - /* - * No point in finding a key and decrypting if the frame is neither - * addressed to us nor a multicast frame. - */ - if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) - return RX_CONTINUE; - /* start without a key */ rx->key = NULL; fc = hdr->frame_control; @@ -1795,7 +1785,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) frag = sc & IEEE80211_SCTL_FRAG; if (is_multicast_ether_addr(hdr->addr1)) { - rx->local->dot11MulticastReceivedFrameCount++; + I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount); goto out_no_led; } @@ -1878,7 +1868,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) rx->skb = __skb_dequeue(&entry->skb_list); if (skb_tailroom(rx->skb) < entry->extra_len) { - I802_DEBUG_INC(rx->local->rx_expand_skb_head2); + I802_DEBUG_INC(rx->local->rx_expand_skb_head_defrag); if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len, GFP_ATOMIC))) { I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); @@ -2054,18 +2044,15 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) struct sk_buff *skb, *xmit_skb; struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; struct sta_info *dsta; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - - dev->stats.rx_packets++; - dev->stats.rx_bytes += rx->skb->len; skb = rx->skb; xmit_skb = NULL; + ieee80211_rx_stats(dev, skb->len); + if ((sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && - (status->rx_flags & IEEE80211_RX_RA_MATCH) && (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { if (is_multicast_ether_addr(ehdr->h_dest)) { /* @@ -2206,7 +2193,6 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) struct sk_buff *skb = rx->skb, *fwd_skb; struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u16 q, hdrlen; @@ -2237,8 +2223,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) mesh_rmc_check(rx->sdata, hdr->addr3, mesh_hdr)) return RX_DROP_MONITOR; - if (!ieee80211_is_data(hdr->frame_control) || - !(status->rx_flags & IEEE80211_RX_RA_MATCH)) + if (!ieee80211_is_data(hdr->frame_control)) return RX_CONTINUE; if (!mesh_hdr->ttl) @@ -2329,11 +2314,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames); ieee80211_add_pending_skb(local, fwd_skb); out: - if (is_multicast_ether_addr(hdr->addr1) || - sdata->dev->flags & IFF_PROMISC) + if (is_multicast_ether_addr(hdr->addr1)) return RX_CONTINUE; - else - return RX_DROP_MONITOR; + return RX_DROP_MONITOR; } #endif @@ -2444,6 +2427,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) struct { __le16 control, start_seq_num; } __packed bar_data; + struct ieee80211_event event = { + .type = BAR_RX_EVENT, + }; if (!rx->sta) return RX_DROP_MONITOR; @@ -2459,6 +2445,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) return RX_DROP_MONITOR; start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; + event.u.ba.tid = tid; + event.u.ba.ssn = start_seq_num; + event.u.ba.sta = &rx->sta->sta; /* reset session timer */ if (tid_agg_rx->timeout) @@ -2471,6 +2460,8 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) start_seq_num, frames); spin_unlock(&tid_agg_rx->reorder_lock); + drv_event_callback(rx->local, rx->sdata, &event); + kfree_skb(skb); return RX_QUEUED; } @@ -2560,9 +2551,6 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) rx->flags |= IEEE80211_RX_BEACON_REPORTED; } - if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) - return RX_DROP_MONITOR; - if (ieee80211_drop_unencrypted_mgmt(rx)) return RX_DROP_UNUSABLE; @@ -2590,9 +2578,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) return RX_DROP_UNUSABLE; - if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) - return RX_DROP_UNUSABLE; - switch (mgmt->u.action.category) { case WLAN_CATEGORY_HT: /* reject HT action frames from stations not supporting HT */ @@ -3076,8 +3061,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, } prev_dev = sdata->dev; - sdata->dev->stats.rx_packets++; - sdata->dev->stats.rx_bytes += skb->len; + ieee80211_rx_stats(sdata->dev, skb->len); } if (prev_dev) { @@ -3245,16 +3229,25 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames); spin_unlock(&tid_agg_rx->reorder_lock); + if (!skb_queue_empty(&frames)) { + struct ieee80211_event event = { + .type = BA_FRAME_TIMEOUT, + .u.ba.tid = tid, + .u.ba.sta = &sta->sta, + }; + drv_event_callback(rx.local, rx.sdata, &event); + } + ieee80211_rx_handlers(&rx, &frames); } /* main receive path */ -static bool prepare_for_handlers(struct ieee80211_rx_data *rx, - struct ieee80211_hdr *hdr) +static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = rx->sdata; struct sk_buff *skb = rx->skb; + struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); int multicast = is_multicast_ether_addr(hdr->addr1); @@ -3263,30 +3256,23 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, case NL80211_IFTYPE_STATION: if (!bssid && !sdata->u.mgd.use_4addr) return false; - if (!multicast && - !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { - if (!(sdata->dev->flags & IFF_PROMISC) || - sdata->u.mgd.use_4addr) - return false; - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - } - break; + if (multicast) + return true; + return ether_addr_equal(sdata->vif.addr, hdr->addr1); case NL80211_IFTYPE_ADHOC: if (!bssid) return false; if (ether_addr_equal(sdata->vif.addr, hdr->addr2) || ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2)) return false; - if (ieee80211_is_beacon(hdr->frame_control)) { + if (ieee80211_is_beacon(hdr->frame_control)) return true; - } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { + if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) return false; - } else if (!multicast && - !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { - if (!(sdata->dev->flags & IFF_PROMISC)) - return false; - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - } else if (!rx->sta) { + if (!multicast && + !ether_addr_equal(sdata->vif.addr, hdr->addr1)) + return false; + if (!rx->sta) { int rate_idx; if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) rate_idx = 0; /* TODO: HT/VHT rates */ @@ -3295,25 +3281,18 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, ieee80211_ibss_rx_no_sta(sdata, bssid, hdr->addr2, BIT(rate_idx)); } - break; + return true; case NL80211_IFTYPE_OCB: if (!bssid) return false; - if (ieee80211_is_beacon(hdr->frame_control)) { + if (ieee80211_is_beacon(hdr->frame_control)) return false; - } else if (!is_broadcast_ether_addr(bssid)) { - ocb_dbg(sdata, "BSSID mismatch in OCB mode!\n"); + if (!is_broadcast_ether_addr(bssid)) return false; - } else if (!multicast && - !ether_addr_equal(sdata->dev->dev_addr, - hdr->addr1)) { - /* if we are in promisc mode we also accept - * packets not destined for us - */ - if (!(sdata->dev->flags & IFF_PROMISC)) - return false; - rx->flags &= ~IEEE80211_RX_RA_MATCH; - } else if (!rx->sta) { + if (!multicast && + !ether_addr_equal(sdata->dev->dev_addr, hdr->addr1)) + return false; + if (!rx->sta) { int rate_idx; if (status->flag & RX_FLAG_HT) rate_idx = 0; /* TODO: HT rates */ @@ -3322,22 +3301,17 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, ieee80211_ocb_rx_no_sta(sdata, bssid, hdr->addr2, BIT(rate_idx)); } - break; + return true; case NL80211_IFTYPE_MESH_POINT: - if (!multicast && - !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { - if (!(sdata->dev->flags & IFF_PROMISC)) - return false; - - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - } - break; + if (multicast) + return true; + return ether_addr_equal(sdata->vif.addr, hdr->addr1); case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_AP: - if (!bssid) { - if (!ether_addr_equal(sdata->vif.addr, hdr->addr1)) - return false; - } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { + if (!bssid) + return ether_addr_equal(sdata->vif.addr, hdr->addr1); + + if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { /* * Accept public action frames even when the * BSSID doesn't match, this is used for P2P @@ -3349,10 +3323,10 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, return false; if (ieee80211_is_public_action(hdr, skb->len)) return true; - if (!ieee80211_is_beacon(hdr->frame_control)) - return false; - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - } else if (!ieee80211_has_tods(hdr->frame_control)) { + return ieee80211_is_beacon(hdr->frame_control); + } + + if (!ieee80211_has_tods(hdr->frame_control)) { /* ignore data frames to TDLS-peers */ if (ieee80211_is_data(hdr->frame_control)) return false; @@ -3361,30 +3335,22 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, !ether_addr_equal(bssid, hdr->addr1)) return false; } - break; + return true; case NL80211_IFTYPE_WDS: if (bssid || !ieee80211_is_data(hdr->frame_control)) return false; - if (!ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2)) - return false; - break; + return ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2); case NL80211_IFTYPE_P2P_DEVICE: - if (!ieee80211_is_public_action(hdr, skb->len) && - !ieee80211_is_probe_req(hdr->frame_control) && - !ieee80211_is_probe_resp(hdr->frame_control) && - !ieee80211_is_beacon(hdr->frame_control)) - return false; - if (!ether_addr_equal(sdata->vif.addr, hdr->addr1) && - !multicast) - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - break; + return ieee80211_is_public_action(hdr, skb->len) || + ieee80211_is_probe_req(hdr->frame_control) || + ieee80211_is_probe_resp(hdr->frame_control) || + ieee80211_is_beacon(hdr->frame_control); default: - /* should never get here */ - WARN_ON_ONCE(1); break; } - return true; + WARN_ON_ONCE(1); + return false; } /* @@ -3398,13 +3364,10 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, { struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - struct ieee80211_hdr *hdr = (void *)skb->data; rx->skb = skb; - status->rx_flags |= IEEE80211_RX_RA_MATCH; - if (!prepare_for_handlers(rx, hdr)) + if (!ieee80211_accept_frame(rx)) return false; if (!consume) { @@ -3447,7 +3410,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, rx.local = local; if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc)) - local->dot11ReceivedFragmentCount++; + I802_DEBUG_INC(local->dot11ReceivedFragmentCount); if (ieee80211_is_mgmt(fc)) { /* drop frame if too short for header */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 2880f2ae99ab..ce0c1662de42 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -71,6 +71,7 @@ static const struct rhashtable_params sta_rht_params = { .key_offset = offsetof(struct sta_info, sta.addr), .key_len = ETH_ALEN, .hashfn = sta_addr_hash, + .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; /* Caller must hold local->sta_mtx */ @@ -286,7 +287,7 @@ static int sta_prepare_rate_control(struct ieee80211_local *local, sta->rate_ctrl = local->rate_ctrl; sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, - &sta->sta, gfp); + sta, gfp); if (!sta->rate_ctrl_priv) return -ENOMEM; @@ -312,6 +313,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); #ifdef CONFIG_MAC80211_MESH + spin_lock_init(&sta->plink_lock); if (ieee80211_vif_is_mesh(&sdata->vif) && !sdata->u.mesh.user_mpm) init_timer(&sta->plink_timer); @@ -1217,6 +1219,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) ps_dbg(sdata, "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n", sta->sta.addr, sta->sta.aid, filtered, buffered); + + ieee80211_check_fast_xmit(sta); } static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, @@ -1615,6 +1619,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, if (block) { set_sta_flag(sta, WLAN_STA_PS_DRIVER); + ieee80211_clear_fast_xmit(sta); return; } @@ -1632,6 +1637,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, ieee80211_queue_work(hw, &sta->drv_deliver_wk); } else { clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + ieee80211_check_fast_xmit(sta); } } EXPORT_SYMBOL(ieee80211_sta_block_awake); @@ -1736,6 +1742,7 @@ int sta_info_move_state(struct sta_info *sta, !sta->sdata->u.vlan.sta)) atomic_dec(&sta->sdata->bss->num_mcast_sta); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + ieee80211_clear_fast_xmit(sta); } break; case IEEE80211_STA_AUTHORIZED: @@ -1745,6 +1752,7 @@ int sta_info_move_state(struct sta_info *sta, !sta->sdata->u.vlan.sta)) atomic_inc(&sta->sdata->bss->num_mcast_sta); set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + ieee80211_check_fast_xmit(sta); } break; default: diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 5c164fb3f6c5..9bd1e97876bd 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -241,6 +241,34 @@ struct sta_ampdu_mlme { /* Value to indicate no TID reservation */ #define IEEE80211_TID_UNRESERVED 0xff +#define IEEE80211_FAST_XMIT_MAX_IV 18 + +/** + * struct ieee80211_fast_tx - TX fastpath information + * @key: key to use for hw crypto + * @hdr: the 802.11 header to put with the frame + * @hdr_len: actual 802.11 header length + * @sa_offs: offset of the SA + * @da_offs: offset of the DA + * @pn_offs: offset where to put PN for crypto (or 0 if not needed) + * @band: band this will be transmitted on, for tx_info + * @rcu_head: RCU head to free this struct + * + * This struct is small enough so that the common case (maximum crypto + * header length of 8 like for CCMP/GCMP) fits into a single 64-byte + * cache line. + */ +struct ieee80211_fast_tx { + struct ieee80211_key *key; + u8 hdr_len; + u8 sa_offs, da_offs, pn_offs; + u8 band; + u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV + + sizeof(rfc1042_header)]; + + struct rcu_head rcu_head; +}; + /** * struct sta_info - STA information * @@ -257,6 +285,8 @@ struct sta_ampdu_mlme { * @gtk: group keys negotiated with this station, if any * @gtk_idx: last installed group key index * @rate_ctrl: rate control algorithm reference + * @rate_ctrl_lock: spinlock used to protect rate control data + * (data inside the algorithm, so serializes calls there) * @rate_ctrl_priv: rate control private per-STA pointer * @last_tx_rate: rate used for last transmit, to report to userspace as * "the" transmit rate @@ -295,10 +325,10 @@ struct sta_ampdu_mlme { * @fail_avg: moving percentage of failed MSDUs * @tx_packets: number of RX/TX MSDUs * @tx_bytes: number of bytes transmitted to this STA - * @tx_fragments: number of transmitted MPDUs * @tid_seq: per-TID sequence numbers for sending to this STA * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers + * @plink_lock: serialize access to plink fields * @llid: Local link ID * @plid: Peer link ID * @reason: Cancel reason on PLINK_HOLDING state @@ -338,6 +368,7 @@ struct sta_ampdu_mlme { * using IEEE80211_NUM_TID entry for non-QoS frames * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID * entry for non-QoS frames + * @fast_tx: TX fastpath information */ struct sta_info { /* General information, mostly static */ @@ -352,8 +383,11 @@ struct sta_info { u8 ptk_idx; struct rate_control_ref *rate_ctrl; void *rate_ctrl_priv; + spinlock_t rate_ctrl_lock; spinlock_t lock; + struct ieee80211_fast_tx __rcu *fast_tx; + struct work_struct drv_deliver_wk; u16 listen_interval; @@ -400,7 +434,6 @@ struct sta_info { unsigned int fail_avg; /* Updated from TX path only, no locking requirements */ - u32 tx_fragments; u64 tx_packets[IEEE80211_NUM_ACS]; u64 tx_bytes[IEEE80211_NUM_ACS]; struct ieee80211_tx_rate last_tx_rate; @@ -422,9 +455,10 @@ struct sta_info { #ifdef CONFIG_MAC80211_MESH /* - * Mesh peer link attributes + * Mesh peer link attributes, protected by plink_lock. * TODO: move to a sub-structure that is referenced with pointer? */ + spinlock_t plink_lock; u16 llid; u16 plid; u16 reason; @@ -432,6 +466,7 @@ struct sta_info { enum nl80211_plink_state plink_state; u32 plink_timeout; struct timer_list plink_timer; + s64 t_offset; s64 t_offset_setpoint; /* mesh power save */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 005fdbe39a8b..461594966b65 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -631,15 +631,15 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, } if (acked || noack_success) { - local->dot11TransmittedFrameCount++; - if (!pubsta) - local->dot11MulticastTransmittedFrameCount++; - if (retry_count > 0) - local->dot11RetryCount++; - if (retry_count > 1) - local->dot11MultipleRetryCount++; + I802_DEBUG_INC(local->dot11TransmittedFrameCount); + if (!pubsta) + I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount); + if (retry_count > 0) + I802_DEBUG_INC(local->dot11RetryCount); + if (retry_count > 1) + I802_DEBUG_INC(local->dot11MultipleRetryCount); } else { - local->dot11FailedCount++; + I802_DEBUG_INC(local->dot11FailedCount); } } EXPORT_SYMBOL(ieee80211_tx_status_noskb); @@ -802,13 +802,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if ((info->flags & IEEE80211_TX_STAT_ACK) || (info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) { if (ieee80211_is_first_frag(hdr->seq_ctrl)) { - local->dot11TransmittedFrameCount++; + I802_DEBUG_INC(local->dot11TransmittedFrameCount); if (is_multicast_ether_addr(ieee80211_get_DA(hdr))) - local->dot11MulticastTransmittedFrameCount++; + I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount); if (retry_count > 0) - local->dot11RetryCount++; + I802_DEBUG_INC(local->dot11RetryCount); if (retry_count > 1) - local->dot11MultipleRetryCount++; + I802_DEBUG_INC(local->dot11MultipleRetryCount); } /* This counter shall be incremented for an acknowledged MPDU @@ -818,10 +818,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (!is_multicast_ether_addr(hdr->addr1) || ieee80211_is_data(fc) || ieee80211_is_mgmt(fc)) - local->dot11TransmittedFragmentCount++; + I802_DEBUG_INC(local->dot11TransmittedFragmentCount); } else { if (ieee80211_is_first_frag(hdr->seq_ctrl)) - local->dot11FailedCount++; + I802_DEBUG_INC(local->dot11FailedCount); } if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) && diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index fff0d864adfa..8a92a920ff17 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -527,30 +527,19 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, /* if HT support is only added in TDLS, we need an HT-operation IE */ if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) { - struct ieee80211_chanctx_conf *chanctx_conf = - rcu_dereference(sdata->vif.chanctx_conf); - if (!WARN_ON(!chanctx_conf)) { - pos = skb_put(skb, 2 + - sizeof(struct ieee80211_ht_operation)); - /* send an empty HT operation IE */ - ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap, - &chanctx_conf->def, 0); - } + pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation)); + /* send an empty HT operation IE */ + ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap, + &sdata->vif.bss_conf.chandef, 0); } ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); /* only include VHT-operation if not on the 2.4GHz band */ - if (band != IEEE80211_BAND_2GHZ && !ap_sta->sta.vht_cap.vht_supported && - sta->sta.vht_cap.vht_supported) { - struct ieee80211_chanctx_conf *chanctx_conf = - rcu_dereference(sdata->vif.chanctx_conf); - if (!WARN_ON(!chanctx_conf)) { - pos = skb_put(skb, 2 + - sizeof(struct ieee80211_vht_operation)); - ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap, - &chanctx_conf->def); - } + if (band != IEEE80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) { + pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation)); + ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap, + &sdata->vif.bss_conf.chandef); } rcu_read_unlock(); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 4c2e7690226a..6f14591d8ca9 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -69,6 +69,17 @@ #define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic +#define KEY_ENTRY __field(u32, cipher) \ + __field(u8, hw_key_idx) \ + __field(u8, flags) \ + __field(s8, keyidx) +#define KEY_ASSIGN(k) __entry->cipher = (k)->cipher; \ + __entry->flags = (k)->flags; \ + __entry->keyidx = (k)->keyidx; \ + __entry->hw_key_idx = (k)->hw_key_idx; +#define KEY_PR_FMT " cipher:0x%x, flags=%#x, keyidx=%d, hw_key_idx=%d" +#define KEY_PR_ARG __entry->cipher, __entry->flags, __entry->keyidx, __entry->hw_key_idx + /* @@ -522,25 +533,19 @@ TRACE_EVENT(drv_set_key, LOCAL_ENTRY VIF_ENTRY STA_ENTRY - __field(u32, cipher) - __field(u8, hw_key_idx) - __field(u8, flags) - __field(s8, keyidx) + KEY_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->cipher = key->cipher; - __entry->flags = key->flags; - __entry->keyidx = key->keyidx; - __entry->hw_key_idx = key->hw_key_idx; + KEY_ASSIGN(key); ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT KEY_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, KEY_PR_ARG ) ); @@ -656,28 +661,25 @@ TRACE_EVENT(drv_get_stats, ) ); -TRACE_EVENT(drv_get_tkip_seq, +TRACE_EVENT(drv_get_key_seq, TP_PROTO(struct ieee80211_local *local, - u8 hw_key_idx, u32 *iv32, u16 *iv16), + struct ieee80211_key_conf *key), - TP_ARGS(local, hw_key_idx, iv32, iv16), + TP_ARGS(local, key), TP_STRUCT__entry( LOCAL_ENTRY - __field(u8, hw_key_idx) - __field(u32, iv32) - __field(u16, iv16) + KEY_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; - __entry->hw_key_idx = hw_key_idx; - __entry->iv32 = *iv32; - __entry->iv16 = *iv16; + KEY_ASSIGN(key); ), TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG + LOCAL_PR_FMT KEY_PR_FMT, + LOCAL_PR_ARG, KEY_PR_ARG ) ); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 667111ee6a20..8df134213adf 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -37,6 +37,16 @@ /* misc utils */ +static inline void ieee80211_tx_stats(struct net_device *dev, u32 len) +{ + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->tx_packets++; + tstats->tx_bytes += len; + u64_stats_update_end(&tstats->syncp); +} + static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, struct sk_buff *skb, int group_addr, int next_frag_len) @@ -987,7 +997,6 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) skb_queue_walk(&tx->skbs, skb) { ac = skb_get_queue_mapping(skb); - tx->sta->tx_fragments++; tx->sta->tx_bytes[ac] += skb->len; } if (ac >= 0) @@ -1600,7 +1609,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, if (skb_cloned(skb) && (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CLONED_SKBS) || !skb_clone_writable(skb, ETH_HLEN) || - sdata->crypto_tx_tailroom_needed_cnt)) + (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); @@ -2387,12 +2396,460 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, return ERR_PTR(ret); } +/* + * fast-xmit overview + * + * The core idea of this fast-xmit is to remove per-packet checks by checking + * them out of band. ieee80211_check_fast_xmit() implements the out-of-band + * checks that are needed to get the sta->fast_tx pointer assigned, after which + * much less work can be done per packet. For example, fragmentation must be + * disabled or the fast_tx pointer will not be set. All the conditions are seen + * in the code here. + * + * Once assigned, the fast_tx data structure also caches the per-packet 802.11 + * header and other data to aid packet processing in ieee80211_xmit_fast(). + * + * The most difficult part of this is that when any of these assumptions + * change, an external trigger (i.e. a call to ieee80211_clear_fast_xmit(), + * ieee80211_check_fast_xmit() or friends) is required to reset the data, + * since the per-packet code no longer checks the conditions. This is reflected + * by the calls to these functions throughout the rest of the code, and must be + * maintained if any of the TX path checks change. + */ + +void ieee80211_check_fast_xmit(struct sta_info *sta) +{ + struct ieee80211_fast_tx build = {}, *fast_tx = NULL, *old; + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_hdr *hdr = (void *)build.hdr; + struct ieee80211_chanctx_conf *chanctx_conf; + __le16 fc; + + if (!(local->hw.flags & IEEE80211_HW_SUPPORT_FAST_XMIT)) + return; + + /* Locking here protects both the pointer itself, and against concurrent + * invocations winning data access races to, e.g., the key pointer that + * is used. + * Without it, the invocation of this function right after the key + * pointer changes wouldn't be sufficient, as another CPU could access + * the pointer, then stall, and then do the cache update after the CPU + * that invalidated the key. + * With the locking, such scenarios cannot happen as the check for the + * key and the fast-tx assignment are done atomically, so the CPU that + * modifies the key will either wait or other one will see the key + * cleared/changed already. + */ + spin_lock_bh(&sta->lock); + if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS && + !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) && + sdata->vif.type == NL80211_IFTYPE_STATION) + goto out; + + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + goto out; + + if (test_sta_flag(sta, WLAN_STA_PS_STA) || + test_sta_flag(sta, WLAN_STA_PS_DRIVER) || + test_sta_flag(sta, WLAN_STA_PS_DELIVER)) + goto out; + + if (sdata->noack_map) + goto out; + + /* fast-xmit doesn't handle fragmentation at all */ + if (local->hw.wiphy->frag_threshold != (u32)-1 && + !local->ops->set_frag_threshold) + goto out; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (!chanctx_conf) { + rcu_read_unlock(); + goto out; + } + build.band = chanctx_conf->def.chan->band; + rcu_read_unlock(); + + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); + + switch (sdata->vif.type) { + case NL80211_IFTYPE_ADHOC: + /* DA SA BSSID */ + build.da_offs = offsetof(struct ieee80211_hdr, addr1); + build.sa_offs = offsetof(struct ieee80211_hdr, addr2); + memcpy(hdr->addr3, sdata->u.ibss.bssid, ETH_ALEN); + build.hdr_len = 24; + break; + case NL80211_IFTYPE_STATION: + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + /* DA SA BSSID */ + build.da_offs = offsetof(struct ieee80211_hdr, addr1); + build.sa_offs = offsetof(struct ieee80211_hdr, addr2); + memcpy(hdr->addr3, sdata->u.mgd.bssid, ETH_ALEN); + build.hdr_len = 24; + break; + } + + if (sdata->u.mgd.use_4addr) { + /* non-regular ethertype cannot use the fastpath */ + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS); + /* RA TA DA SA */ + memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + build.da_offs = offsetof(struct ieee80211_hdr, addr3); + build.sa_offs = offsetof(struct ieee80211_hdr, addr4); + build.hdr_len = 30; + break; + } + fc |= cpu_to_le16(IEEE80211_FCTL_TODS); + /* BSSID SA DA */ + memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN); + build.da_offs = offsetof(struct ieee80211_hdr, addr3); + build.sa_offs = offsetof(struct ieee80211_hdr, addr2); + build.hdr_len = 24; + break; + case NL80211_IFTYPE_AP_VLAN: + if (sdata->wdev.use_4addr) { + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS); + /* RA TA DA SA */ + memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + build.da_offs = offsetof(struct ieee80211_hdr, addr3); + build.sa_offs = offsetof(struct ieee80211_hdr, addr4); + build.hdr_len = 30; + break; + } + /* fall through */ + case NL80211_IFTYPE_AP: + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); + /* DA BSSID SA */ + build.da_offs = offsetof(struct ieee80211_hdr, addr1); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + build.sa_offs = offsetof(struct ieee80211_hdr, addr3); + build.hdr_len = 24; + break; + default: + /* not handled on fast-xmit */ + goto out; + } + + if (sta->sta.wme) { + build.hdr_len += 2; + fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); + } + + /* We store the key here so there's no point in using rcu_dereference() + * but that's fine because the code that changes the pointers will call + * this function after doing so. For a single CPU that would be enough, + * for multiple see the comment above. + */ + build.key = rcu_access_pointer(sta->ptk[sta->ptk_idx]); + if (!build.key) + build.key = rcu_access_pointer(sdata->default_unicast_key); + if (build.key) { + bool gen_iv, iv_spc, mmic; + + gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV; + iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE; + mmic = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC; + + /* don't handle software crypto */ + if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + goto out; + + switch (build.key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + /* add fixed key ID */ + if (gen_iv) { + (build.hdr + build.hdr_len)[3] = + 0x20 | (build.key->conf.keyidx << 6); + build.pn_offs = build.hdr_len; + } + if (gen_iv || iv_spc) + build.hdr_len += IEEE80211_CCMP_HDR_LEN; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + /* add fixed key ID */ + if (gen_iv) { + (build.hdr + build.hdr_len)[3] = + 0x20 | (build.key->conf.keyidx << 6); + build.pn_offs = build.hdr_len; + } + if (gen_iv || iv_spc) + build.hdr_len += IEEE80211_GCMP_HDR_LEN; + break; + case WLAN_CIPHER_SUITE_TKIP: + /* cannot handle MMIC or IV generation in xmit-fast */ + if (mmic || gen_iv) + goto out; + if (iv_spc) + build.hdr_len += IEEE80211_TKIP_IV_LEN; + break; + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + /* cannot handle IV generation in fast-xmit */ + if (gen_iv) + goto out; + if (iv_spc) + build.hdr_len += IEEE80211_WEP_IV_LEN; + break; + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + WARN(1, + "management cipher suite 0x%x enabled for data\n", + build.key->conf.cipher); + goto out; + default: + /* we don't know how to generate IVs for this at all */ + if (WARN_ON(gen_iv)) + goto out; + /* pure hardware keys are OK, of course */ + if (!(build.key->flags & KEY_FLAG_CIPHER_SCHEME)) + break; + /* cipher scheme might require space allocation */ + if (iv_spc && + build.key->conf.iv_len > IEEE80211_FAST_XMIT_MAX_IV) + goto out; + if (iv_spc) + build.hdr_len += build.key->conf.iv_len; + } + + fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + } + + hdr->frame_control = fc; + + memcpy(build.hdr + build.hdr_len, + rfc1042_header, sizeof(rfc1042_header)); + build.hdr_len += sizeof(rfc1042_header); + + fast_tx = kmemdup(&build, sizeof(build), GFP_ATOMIC); + /* if the kmemdup fails, continue w/o fast_tx */ + if (!fast_tx) + goto out; + + out: + /* we might have raced against another call to this function */ + old = rcu_dereference_protected(sta->fast_tx, + lockdep_is_held(&sta->lock)); + rcu_assign_pointer(sta->fast_tx, fast_tx); + if (old) + kfree_rcu(old, rcu_head); + spin_unlock_bh(&sta->lock); +} + +void ieee80211_check_fast_xmit_all(struct ieee80211_local *local) +{ + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) + ieee80211_check_fast_xmit(sta); + rcu_read_unlock(); +} + +void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + rcu_read_lock(); + + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata && + (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) + continue; + ieee80211_check_fast_xmit(sta); + } + + rcu_read_unlock(); +} + +void ieee80211_clear_fast_xmit(struct sta_info *sta) +{ + struct ieee80211_fast_tx *fast_tx; + + spin_lock_bh(&sta->lock); + fast_tx = rcu_dereference_protected(sta->fast_tx, + lockdep_is_held(&sta->lock)); + RCU_INIT_POINTER(sta->fast_tx, NULL); + spin_unlock_bh(&sta->lock); + + if (fast_tx) + kfree_rcu(fast_tx, rcu_head); +} + +static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, + struct net_device *dev, struct sta_info *sta, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + u16 ethertype = (skb->data[12] << 8) | skb->data[13]; + int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2); + int hw_headroom = sdata->local->hw.extra_tx_headroom; + struct ethhdr eth; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)fast_tx->hdr; + struct ieee80211_tx_data tx; + ieee80211_tx_result r; + struct tid_ampdu_tx *tid_tx = NULL; + u8 tid = IEEE80211_NUM_TIDS; + + /* control port protocol needs a lot of special handling */ + if (cpu_to_be16(ethertype) == sdata->control_port_protocol) + return false; + + /* only RFC 1042 SNAP */ + if (ethertype < ETH_P_802_3_MIN) + return false; + + /* don't handle TX status request here either */ + if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) + return false; + + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (tid_tx && + !test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) + return false; + } + + /* after this point (skb is modified) we cannot return false */ + + if (skb_shared(skb)) { + struct sk_buff *tmp_skb = skb; + + skb = skb_clone(skb, GFP_ATOMIC); + kfree_skb(tmp_skb); + + if (!skb) + return true; + } + + ieee80211_tx_stats(dev, skb->len + extra_head); + + /* will not be crypto-handled beyond what we do here, so use false + * as the may-encrypt argument for the resize to not account for + * more room than we already have in 'extra_head' + */ + if (unlikely(ieee80211_skb_resize(sdata, skb, + max_t(int, extra_head + hw_headroom - + skb_headroom(skb), 0), + false))) { + kfree_skb(skb); + return true; + } + + memcpy(ð, skb->data, ETH_HLEN - 2); + hdr = (void *)skb_push(skb, extra_head); + memcpy(skb->data, fast_tx->hdr, fast_tx->hdr_len); + memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN); + memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN); + + memset(info, 0, sizeof(*info)); + info->band = fast_tx->band; + info->control.vif = &sdata->vif; + info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT | + IEEE80211_TX_CTL_DONTFRAG | + (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0); + + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + *ieee80211_get_qos_ctl(hdr) = tid; + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); + } else { + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); + sdata->sequence_number += 0x10; + } + + sta->tx_msdu[tid]++; + + info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + + __skb_queue_head_init(&tx.skbs); + + tx.flags = IEEE80211_TX_UNICAST; + tx.local = local; + tx.sdata = sdata; + tx.sta = sta; + tx.key = fast_tx->key; + + if (fast_tx->key) + info->control.hw_key = &fast_tx->key->conf; + + if (!(local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) { + tx.skb = skb; + r = ieee80211_tx_h_rate_ctrl(&tx); + skb = tx.skb; + tx.skb = NULL; + + if (r != TX_CONTINUE) { + if (r != TX_QUEUED) + kfree_skb(skb); + return true; + } + } + + /* statistics normally done by ieee80211_tx_h_stats (but that + * has to consider fragmentation, so is more complex) + */ + sta->tx_bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->tx_packets[skb_get_queue_mapping(skb)]++; + + if (fast_tx->pn_offs) { + u64 pn; + u8 *crypto_hdr = skb->data + fast_tx->pn_offs; + + switch (fast_tx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + pn = atomic64_inc_return(&fast_tx->key->u.ccmp.tx_pn); + crypto_hdr[0] = pn; + crypto_hdr[1] = pn >> 8; + crypto_hdr[4] = pn >> 16; + crypto_hdr[5] = pn >> 24; + crypto_hdr[6] = pn >> 32; + crypto_hdr[7] = pn >> 40; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn = atomic64_inc_return(&fast_tx->key->u.gcmp.tx_pn); + crypto_hdr[0] = pn; + crypto_hdr[1] = pn >> 8; + crypto_hdr[4] = pn >> 16; + crypto_hdr[5] = pn >> 24; + crypto_hdr[6] = pn >> 32; + crypto_hdr[7] = pn >> 40; + break; + } + } + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + + __skb_queue_tail(&tx.skbs, skb); + ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false); + return true; +} + void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; + struct sk_buff *next; if (unlikely(skb->len < ETH_HLEN)) { kfree_skb(skb); @@ -2401,20 +2858,67 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, rcu_read_lock(); - if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) { - kfree_skb(skb); - goto out; + if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) + goto out_free; + + if (!IS_ERR_OR_NULL(sta)) { + struct ieee80211_fast_tx *fast_tx; + + fast_tx = rcu_dereference(sta->fast_tx); + + if (fast_tx && + ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb)) + goto out; } - skb = ieee80211_build_hdr(sdata, skb, info_flags, sta); - if (IS_ERR(skb)) - goto out; + if (skb_is_gso(skb)) { + struct sk_buff *segs; + + segs = skb_gso_segment(skb, 0); + if (IS_ERR(segs)) { + goto out_free; + } else if (segs) { + consume_skb(skb); + skb = segs; + } + } else { + /* we cannot process non-linear frames on this path */ + if (skb_linearize(skb)) { + kfree_skb(skb); + goto out; + } + + /* the frame could be fragmented, software-encrypted, and other + * things so we cannot really handle checksum offload with it - + * fix it up in software before we handle anything else. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + skb_set_transport_header(skb, + skb_checksum_start_offset(skb)); + if (skb_checksum_help(skb)) + goto out_free; + } + } + + next = skb; + while (next) { + skb = next; + next = skb->next; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - dev->trans_start = jiffies; + skb->prev = NULL; + skb->next = NULL; + + skb = ieee80211_build_hdr(sdata, skb, info_flags, sta); + if (IS_ERR(skb)) + goto out; - ieee80211_xmit(sdata, sta, skb); + ieee80211_tx_stats(dev, skb->len); + + ieee80211_xmit(sdata, sta, skb); + } + goto out; + out_free: + kfree_skb(skb); out: rcu_read_unlock(); } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index a0f3e6a3c7d1..1c78d7fb1da7 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1,6 +1,13 @@ menu "Core Netfilter Configuration" depends on NET && INET && NETFILTER +config NETFILTER_INGRESS + bool "Netfilter ingress support" + select NET_INGRESS + help + This allows you to classify packets from ingress using the Netfilter + infrastructure. + config NETFILTER_NETLINK tristate diff --git a/net/netfilter/core.c b/net/netfilter/core.c index e6163017c42d..653e32eac08c 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex); int nf_register_hook(struct nf_hook_ops *reg) { + struct list_head *nf_hook_list; struct nf_hook_ops *elem; mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) { + switch (reg->pf) { + case NFPROTO_NETDEV: +#ifdef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) { + BUG_ON(reg->dev == NULL); + nf_hook_list = ®->dev->nf_hooks_ingress; + net_inc_ingress_queue(); + break; + } +#endif + /* Fall through. */ + default: + nf_hook_list = &nf_hooks[reg->pf][reg->hooknum]; + break; + } + + list_for_each_entry(elem, nf_hook_list, list) { if (reg->priority < elem->priority) break; } @@ -85,6 +102,18 @@ void nf_unregister_hook(struct nf_hook_ops *reg) mutex_lock(&nf_hook_mutex); list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); + switch (reg->pf) { + case NFPROTO_NETDEV: +#ifdef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) { + net_dec_ingress_queue(); + break; + } + break; +#endif + default: + break; + } #ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif @@ -166,11 +195,9 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - elem = list_entry_rcu(&nf_hooks[state->pf][state->hook], - struct nf_hook_ops, list); + elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list); next_hook: - verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state, - &elem); + verdict = nf_iterate(state->hook_list, skb, state, &elem); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 55b083ec587a..2fe6de46f6d0 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -36,6 +36,7 @@ IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip"); #define MTYPE bitmap_ip +#define HOST_MASK 32 /* Type structure */ struct bitmap_ip { @@ -149,8 +150,11 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -174,7 +178,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!cidr || cidr > 32) + if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } else @@ -277,7 +281,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr >= 32) + if (cidr >= HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(first_ip, last_ip, cidr); } else @@ -286,7 +290,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_NETMASK]) { netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); - if (netmask > 32) + if (netmask > HOST_MASK) return -IPSET_ERR_INVALID_NETMASK; first_ip &= ip_set_hostmask(netmask); @@ -360,7 +364,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 86104744b00f..eb188561d65f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -36,6 +36,7 @@ IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip,mac"); #define MTYPE bitmap_ipmac +#define HOST_MASK 32 #define IP_SET_BITMAP_STORED_TIMEOUT enum { @@ -250,8 +251,11 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -343,7 +347,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr >= 32) + if (cidr >= HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(first_ip, last_ip, cidr); } else @@ -397,7 +401,8 @@ static struct ip_set_type bitmap_ipmac_type = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 005dd36444c3..898edb693b3f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -294,7 +294,8 @@ static struct ip_set_type bitmap_port_type = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d259da3ce67a..475e4960a164 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -42,7 +42,7 @@ static inline struct ip_set_net *ip_set_pernet(struct net *net) } #define IP_SET_INC 64 -#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) +#define STRNCMP(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) static unsigned int max_sets; @@ -85,7 +85,7 @@ find_set_type(const char *name, u8 family, u8 revision) struct ip_set_type *type; list_for_each_entry_rcu(type, &ip_set_type_list, list) - if (STREQ(type->name, name) && + if (STRNCMP(type->name, name) && (type->family == family || type->family == NFPROTO_UNSPEC) && revision >= type->revision_min && @@ -132,7 +132,7 @@ __find_set_type_get(const char *name, u8 family, u8 revision, /* Make sure the type is already loaded * but we don't support the revision */ list_for_each_entry_rcu(type, &ip_set_type_list, list) - if (STREQ(type->name, name)) { + if (STRNCMP(type->name, name)) { err = -IPSET_ERR_FIND_TYPE; goto unlock; } @@ -166,7 +166,7 @@ __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max, *min = 255; *max = 0; rcu_read_lock(); list_for_each_entry_rcu(type, &ip_set_type_list, list) - if (STREQ(type->name, name) && + if (STRNCMP(type->name, name) && (type->family == family || type->family == NFPROTO_UNSPEC)) { found = true; @@ -365,7 +365,7 @@ size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) { enum ip_set_ext_id id; - size_t offset = 0; + size_t offset = len; u32 cadt_flags = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) @@ -375,12 +375,12 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) for (id = 0; id < IPSET_EXT_ID_MAX; id++) { if (!add_extension(id, cadt_flags, tb)) continue; - offset += ALIGN(len + offset, ip_set_extensions[id].align); + offset = ALIGN(offset, ip_set_extensions[id].align); set->offset[id] = offset; set->extensions |= ip_set_extensions[id].type; offset += ip_set_extensions[id].len; } - return len + offset; + return offset; } EXPORT_SYMBOL_GPL(ip_set_elem_len); @@ -432,6 +432,31 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], } EXPORT_SYMBOL_GPL(ip_set_get_extensions); +int +ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, + const void *e, bool active) +{ + if (SET_WITH_TIMEOUT(set)) { + unsigned long *timeout = ext_timeout(e, set); + + if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(active ? ip_set_timeout_get(timeout) + : *timeout))) + return -EMSGSIZE; + } + if (SET_WITH_COUNTER(set) && + ip_set_put_counter(skb, ext_counter(e, set))) + return -EMSGSIZE; + if (SET_WITH_COMMENT(set) && + ip_set_put_comment(skb, ext_comment(e, set))) + return -EMSGSIZE; + if (SET_WITH_SKBINFO(set) && + ip_set_put_skbinfo(skb, ext_skbinfo(e, set))) + return -EMSGSIZE; + return 0; +} +EXPORT_SYMBOL_GPL(ip_set_put_extensions); + /* * Creating/destroying/renaming/swapping affect the existence and * the properties of a set. All of these can be executed from userspace @@ -581,7 +606,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) rcu_read_lock(); for (i = 0; i < inst->ip_set_max; i++) { s = rcu_dereference(inst->ip_set_list)[i]; - if (s != NULL && STREQ(s->name, name)) { + if (s != NULL && STRNCMP(s->name, name)) { __ip_set_get(s); index = i; *set = s; @@ -758,7 +783,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) *id = IPSET_INVALID_ID; for (i = 0; i < inst->ip_set_max; i++) { set = ip_set(inst, i); - if (set != NULL && STREQ(set->name, name)) { + if (set != NULL && STRNCMP(set->name, name)) { *id = i; break; } @@ -787,7 +812,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, if (s == NULL) { if (*index == IPSET_INVALID_ID) *index = i; - } else if (STREQ(name, s->name)) { + } else if (STRNCMP(name, s->name)) { /* Name clash */ *set = s; return -EEXIST; @@ -887,7 +912,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, if (ret == -EEXIST) { /* If this is the same set and requested, ignore error */ if ((flags & IPSET_FLAG_EXIST) && - STREQ(set->type->name, clash->type->name) && + STRNCMP(set->type->name, clash->type->name) && set->type->family == clash->type->family && set->type->revision_min == clash->type->revision_min && set->type->revision_max == clash->type->revision_max && @@ -1098,7 +1123,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s != NULL && STREQ(s->name, name2)) { + if (s != NULL && STRNCMP(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; } diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 29fb01ddff93..1981f021cc60 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -98,7 +98,7 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src, __be16 *port, u8 *proto) { const struct iphdr *iph = ip_hdr(skb); - unsigned int protooff = ip_hdrlen(skb); + unsigned int protooff = skb_network_offset(skb) + ip_hdrlen(skb); int protocol = iph->protocol; /* See comments at tcp_match in ip_tables.c */ @@ -135,7 +135,9 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src, __be16 frag_off = 0; nexthdr = ipv6_hdr(skb)->nexthdr; - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, + protoff = ipv6_skip_exthdr(skb, + skb_network_offset(skb) + + sizeof(struct ipv6hdr), &nexthdr, &frag_off); if (protoff < 0 || (frag_off & htons(~0x7)) != 0) return false; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 974ff386db0f..7952869c8023 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -180,6 +180,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef mtype_data_equal #undef mtype_do_data_match #undef mtype_data_set_flags +#undef mtype_data_reset_elem #undef mtype_data_reset_flags #undef mtype_data_netmask #undef mtype_data_list @@ -193,7 +194,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef mtype_ahash_memsize #undef mtype_flush #undef mtype_destroy -#undef mtype_gc_init #undef mtype_same_set #undef mtype_kadt #undef mtype_uadt @@ -227,6 +227,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) #define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) #define mtype_elem IPSET_TOKEN(MTYPE, _elem) + #define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) #define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) #define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) @@ -234,7 +235,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) #define mtype_flush IPSET_TOKEN(MTYPE, _flush) #define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) -#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) @@ -249,9 +249,18 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_head IPSET_TOKEN(MTYPE, _head) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) +#ifndef MTYPE +#error "MTYPE is not defined!" +#endif + +#ifndef HOST_MASK +#error "HOST_MASK is not defined!" +#endif + #ifndef HKEY_DATALEN #define HKEY_DATALEN sizeof(struct mtype_elem) #endif @@ -261,6 +270,9 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) & jhash_mask(htable_bits)) #ifndef htype +#ifndef HTYPE +#error "HTYPE is not defined!" +#endif /* HTYPE */ #define htype HTYPE /* The generic hash structure */ @@ -287,7 +299,7 @@ struct htype { struct net_prefixes nets[0]; /* book-keeping of prefixes */ #endif }; -#endif +#endif /* htype */ #ifdef IP_SET_HASH_WITH_NETS /* Network cidr size book keeping when the hash stores different @@ -1045,7 +1057,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, u8 netmask; #endif size_t hsize; - struct HTYPE *h; + struct htype *h; struct htable *t; #ifndef IP_SET_PROTO_UNDEF @@ -1165,3 +1177,5 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, return 0; } #endif /* IP_SET_EMIT_CREATE */ + +#undef HKEY_DATALEN diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 76959d79e9d1..54df48b5c455 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -56,15 +56,15 @@ hash_ip4_data_equal(const struct hash_ip4_elem *e1, return e1->ip == e2->ip; } -static inline bool +static bool hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip)) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -74,7 +74,6 @@ hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e) } #define MTYPE hash_ip4 -#define PF 4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" @@ -121,8 +120,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -145,7 +147,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!cidr || cidr > 32) + if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } @@ -196,10 +198,10 @@ hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e) { if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6)) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -208,12 +210,9 @@ hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e) } #undef MTYPE -#undef PF #undef HOST_MASK -#undef HKEY_DATALEN #define MTYPE hash_ip6 -#define PF 6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE @@ -261,8 +260,11 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -301,7 +303,8 @@ static struct ip_set_type hash_ip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 7abf9788cfa8..d231248eb3e2 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -63,10 +63,10 @@ hash_ipmark4_data_list(struct sk_buff *skb, if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -76,10 +76,8 @@ hash_ipmark4_data_next(struct hash_ipmark4_elem *next, next->ip = d->ip; } -#define MTYPE hash_ipmark4 -#define PF 4 -#define HOST_MASK 32 -#define HKEY_DATALEN sizeof(struct hash_ipmark4_elem) +#define MTYPE hash_ipmark4 +#define HOST_MASK 32 #include "ip_set_hash_gen.h" static int @@ -123,12 +121,15 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; - e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK])); + ret = ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK])); e.mark &= h->markmask; if (adt == IPSET_TEST || @@ -147,7 +148,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!cidr || cidr > 32) + if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } @@ -191,10 +192,10 @@ hash_ipmark6_data_list(struct sk_buff *skb, if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -204,15 +205,11 @@ hash_ipmark6_data_next(struct hash_ipmark4_elem *next, } #undef MTYPE -#undef PF #undef HOST_MASK -#undef HKEY_DATALEN #define MTYPE hash_ipmark6 -#define PF 6 #define HOST_MASK 128 -#define HKEY_DATALEN sizeof(struct hash_ipmark6_elem) -#define IP_SET_EMIT_CREATE +#define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" @@ -258,12 +255,15 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK])); + e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK])); e.mark &= h->markmask; if (adt == IPSET_TEST) { @@ -307,7 +307,8 @@ static struct ip_set_type hash_ipmark_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index dcbcceb9a52f..a47c29f12090 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -69,10 +69,10 @@ hash_ipport4_data_list(struct sk_buff *skb, nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -83,10 +83,8 @@ hash_ipport4_data_next(struct hash_ipport4_elem *next, next->port = d->port; } -#define MTYPE hash_ipport4 -#define PF 4 -#define HOST_MASK 32 -#define HKEY_DATALEN sizeof(struct hash_ipport4_elem) +#define MTYPE hash_ipport4 +#define HOST_MASK 32 #include "ip_set_hash_gen.h" static int @@ -132,15 +130,15 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; - if (tb[IPSET_ATTR_PORT]) - e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); - else - return -IPSET_ERR_PROTOCOL; + ret = ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); @@ -171,7 +169,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!cidr || cidr > 32) + if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } @@ -231,10 +229,10 @@ hash_ipport6_data_list(struct sk_buff *skb, nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -245,15 +243,11 @@ hash_ipport6_data_next(struct hash_ipport4_elem *next, } #undef MTYPE -#undef PF #undef HOST_MASK -#undef HKEY_DATALEN #define MTYPE hash_ipport6 -#define PF 6 #define HOST_MASK 128 -#define HKEY_DATALEN sizeof(struct hash_ipport6_elem) -#define IP_SET_EMIT_CREATE +#define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int @@ -301,15 +295,15 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; - if (tb[IPSET_ATTR_PORT]) - e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); - else - return -IPSET_ERR_PROTOCOL; + ret = ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); @@ -376,7 +370,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 7ef93fc887a1..89615f134845 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -70,10 +70,10 @@ hash_ipportip4_data_list(struct sk_buff *skb, nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -86,7 +86,6 @@ hash_ipportip4_data_next(struct hash_ipportip4_elem *next, /* Common functions */ #define MTYPE hash_ipportip4 -#define PF 4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" @@ -134,8 +133,11 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -143,10 +145,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret) return ret; - if (tb[IPSET_ATTR_PORT]) - e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); - else - return -IPSET_ERR_PROTOCOL; + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); @@ -177,7 +176,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!cidr || cidr > 32) + if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } @@ -240,10 +239,10 @@ hash_ipportip6_data_list(struct sk_buff *skb, nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -254,11 +253,9 @@ hash_ipportip6_data_next(struct hash_ipportip4_elem *next, } #undef MTYPE -#undef PF #undef HOST_MASK #define MTYPE hash_ipportip6 -#define PF 6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" @@ -309,8 +306,11 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -318,10 +318,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret) return ret; - if (tb[IPSET_ATTR_PORT]) - e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); - else - return -IPSET_ERR_PROTOCOL; + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); @@ -388,7 +385,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index b6012ad92781..6ba7a7e083f9 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -114,10 +114,10 @@ hash_ipportnet4_data_list(struct sk_buff *skb, (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -130,7 +130,6 @@ hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next, } #define MTYPE hash_ipportnet4 -#define PF 4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" @@ -189,8 +188,11 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -205,10 +207,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], e.cidr = cidr - 1; } - if (tb[IPSET_ATTR_PORT]) - e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); - else - return -IPSET_ERR_PROTOCOL; + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); @@ -249,7 +248,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!cidr || cidr > 32) + if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } @@ -367,10 +366,10 @@ hash_ipportnet6_data_list(struct sk_buff *skb, (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -381,11 +380,9 @@ hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next, } #undef MTYPE -#undef PF #undef HOST_MASK #define MTYPE hash_ipportnet6 -#define PF 6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" @@ -448,8 +445,11 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -466,10 +466,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], ip6_netmask(&e.ip2, e.cidr + 1); - if (tb[IPSET_ATTR_PORT]) - e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); - else - return -IPSET_ERR_PROTOCOL; + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); @@ -547,7 +544,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 65690b52a4d5..1f8668d7a538 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -52,7 +52,12 @@ hash_mac4_data_equal(const struct hash_mac4_elem *e1, static inline bool hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e) { - return nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether); + if (nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether)) + goto nla_put_failure; + return false; + +nla_put_failure: + return true; } static inline void @@ -62,7 +67,6 @@ hash_mac4_data_next(struct hash_mac4_elem *next, } #define MTYPE hash_mac4 -#define PF 4 #define HOST_MASK 32 #define IP_SET_EMIT_CREATE #define IP_SET_PROTO_UNDEF @@ -149,7 +153,8 @@ static struct ip_set_type hash_mac_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 6b3ac10ac2f1..2e63dad8644d 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -95,10 +95,10 @@ hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data) (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -109,7 +109,6 @@ hash_net4_data_next(struct hash_net4_elem *next, } #define MTYPE hash_net4 -#define PF 4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" @@ -160,8 +159,11 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -264,10 +266,10 @@ hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data) (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -277,11 +279,9 @@ hash_net6_data_next(struct hash_net4_elem *next, } #undef MTYPE -#undef PF #undef HOST_MASK #define MTYPE hash_net6 -#define PF 6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" @@ -333,8 +333,11 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -383,7 +386,8 @@ static struct ip_set_type hash_net_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 380ef5148ea1..fe481f677f56 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -193,10 +193,10 @@ hash_netiface4_data_list(struct sk_buff *skb, (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -207,7 +207,6 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next, } #define MTYPE hash_netiface4 -#define PF 4 #define HOST_MASK 32 #define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) #include "ip_set_hash_gen.h" @@ -308,8 +307,11 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -444,10 +446,10 @@ hash_netiface6_data_list(struct sk_buff *skb, (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -457,12 +459,9 @@ hash_netiface6_data_next(struct hash_netiface4_elem *next, } #undef MTYPE -#undef PF #undef HOST_MASK -#undef HKEY_DATALEN #define MTYPE hash_netiface6 -#define PF 6 #define HOST_MASK 128 #define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed) #define IP_SET_EMIT_CREATE @@ -546,8 +545,11 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -613,7 +615,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index ea8772afb6e7..847047483560 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -128,7 +128,6 @@ hash_netnet4_data_next(struct hash_netnet4_elem *next, } #define MTYPE hash_netnet4 -#define PF 4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" @@ -182,9 +181,15 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || - ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -354,11 +359,9 @@ hash_netnet6_data_next(struct hash_netnet4_elem *next, } #undef MTYPE -#undef PF #undef HOST_MASK #define MTYPE hash_netnet6 -#define PF 6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" @@ -411,9 +414,15 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || - ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]); + if (ret) + return ret; + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -470,7 +479,8 @@ static struct ip_set_type hash_netnet_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index c0ddb58d19dc..8273819c1a2f 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -110,10 +110,10 @@ hash_netport4_data_list(struct sk_buff *skb, (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -125,7 +125,6 @@ hash_netport4_data_next(struct hash_netport4_elem *next, } #define MTYPE hash_netport4 -#define PF 4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" @@ -182,8 +181,11 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -194,10 +196,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], e.cidr = cidr - 1; } - if (tb[IPSET_ATTR_PORT]) - e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); - else - return -IPSET_ERR_PROTOCOL; + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); @@ -326,10 +325,10 @@ hash_netport6_data_list(struct sk_buff *skb, (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -340,11 +339,9 @@ hash_netport6_data_next(struct hash_netport4_elem *next, } #undef MTYPE -#undef PF #undef HOST_MASK #define MTYPE hash_netport6 -#define PF 6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" @@ -404,8 +401,11 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -417,10 +417,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], } ip6_netmask(&e.ip, e.cidr + 1); - if (tb[IPSET_ATTR_PORT]) - e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); - else - return -IPSET_ERR_PROTOCOL; + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); @@ -495,7 +492,8 @@ static struct ip_set_type hash_netport_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index bfaa94c7baa7..1451a8ac938f 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -54,7 +54,7 @@ struct hash_netportnet4_elem { u16 ccmp; }; u16 padding; - u8 nomatch:1; + u8 nomatch; u8 proto; }; @@ -124,10 +124,10 @@ hash_netportnet4_data_list(struct sk_buff *skb, (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -139,7 +139,6 @@ hash_netportnet4_data_next(struct hash_netportnet4_elem *next, } #define MTYPE hash_netportnet4 -#define PF 4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" @@ -200,9 +199,15 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || - ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -220,10 +225,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], e.cidr[1] = cidr; } - if (tb[IPSET_ATTR_PORT]) - e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); - else - return -IPSET_ERR_PROTOCOL; + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); @@ -326,7 +328,7 @@ struct hash_netportnet6_elem { u16 ccmp; }; u16 padding; - u8 nomatch:1; + u8 nomatch; u8 proto; }; @@ -397,10 +399,10 @@ hash_netportnet6_data_list(struct sk_buff *skb, (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -411,11 +413,9 @@ hash_netportnet6_data_next(struct hash_netportnet4_elem *next, } #undef MTYPE -#undef PF #undef HOST_MASK #define MTYPE hash_netportnet6 -#define PF 6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" @@ -477,9 +477,15 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || - ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) || - ip_set_get_extensions(set, tb, &ext); + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]); + if (ret) + return ret; + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]); + if (ret) + return ret; + + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -496,10 +502,7 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], ip6_netmask(&e.ip[0], e.cidr[0]); ip6_netmask(&e.ip[1], e.cidr[1]); - if (tb[IPSET_ATTR_PORT]) - e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); - else - return -IPSET_ERR_PROTOCOL; + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); @@ -577,7 +580,8 @@ static struct ip_set_type hash_netportnet_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index f8f682806e36..5bd3b1eae3fa 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -678,7 +678,8 @@ static struct ip_set_type list_set_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 19b9cce6c210..b08ba9538d12 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1457,18 +1457,12 @@ static struct socket *make_send_sock(struct net *net, int id) struct socket *sock; int result; - /* First create a socket move it to right name space later */ - result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + /* First create a socket */ + result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - /* - * Kernel sockets that are a part of a namespace, should not - * hold a reference to a namespace in order to allow to stop it. - * After sk_change_net should be released using sk_release_kernel. - */ - sk_change_net(sock->sk, net); result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); @@ -1497,7 +1491,7 @@ static struct socket *make_send_sock(struct net *net, int id) return sock; error: - sk_release_kernel(sock->sk); + sock_release(sock); return ERR_PTR(result); } @@ -1518,17 +1512,11 @@ static struct socket *make_receive_sock(struct net *net, int id) int result; /* First create a socket */ - result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - /* - * Kernel sockets that are a part of a namespace, should not - * hold a reference to a namespace in order to allow to stop it. - * After sk_change_net should be released using sk_release_kernel. - */ - sk_change_net(sock->sk, net); /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = SK_CAN_REUSE; result = sysctl_sync_sock_size(ipvs); @@ -1554,7 +1542,7 @@ static struct socket *make_receive_sock(struct net *net, int id) return sock; error: - sk_release_kernel(sock->sk); + sock_release(sock); return ERR_PTR(result); } @@ -1692,7 +1680,7 @@ done: ip_vs_sync_buff_release(sb); /* release the sending multicast socket */ - sk_release_kernel(tinfo->sock->sk); + sock_release(tinfo->sock); kfree(tinfo); return 0; @@ -1729,7 +1717,7 @@ static int sync_thread_backup(void *data) } /* release the sending multicast socket */ - sk_release_kernel(tinfo->sock->sk); + sock_release(tinfo->sock); kfree(tinfo->buf); kfree(tinfo); @@ -1854,11 +1842,11 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) return 0; outsocket: - sk_release_kernel(sock->sk); + sock_release(sock); outtinfo: if (tinfo) { - sk_release_kernel(tinfo->sock->sk); + sock_release(tinfo->sock); kfree(tinfo->buf); kfree(tinfo); } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 11c7682fa0ea..22a5ac76683e 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -1257,7 +1257,7 @@ static int seq_show(struct seq_file *s, void *v) inst->copy_mode, inst->copy_range, inst->queue_dropped, inst->queue_user_dropped, inst->id_sequence, 1); - return seq_has_overflowed(s); + return 0; } static const struct seq_operations nfqnl_seq_ops = { diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7f29cfc76349..66def315eb56 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -161,6 +161,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, par->hook_mask = 0; } par->family = ctx->afi->family; + par->nft_compat = true; } static void target_compat_from_user(struct xt_target *t, void *in, void *out) @@ -377,6 +378,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, par->hook_mask = 0; } par->family = ctx->afi->family; + par->nft_compat = true; } static void match_compat_from_user(struct xt_match *m, void *in, void *out) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 51a459c3c649..83032464a4bd 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -947,11 +947,9 @@ static int xt_table_seq_show(struct seq_file *seq, void *v) { struct xt_table *table = list_entry(v, struct xt_table, list); - if (strlen(table->name)) { + if (*table->name) seq_printf(seq, "%s\n", table->name); - return seq_has_overflowed(seq); - } else - return 0; + return 0; } static const struct seq_operations xt_table_seq_ops = { @@ -1087,10 +1085,8 @@ static int xt_match_seq_show(struct seq_file *seq, void *v) if (trav->curr == trav->head) return 0; match = list_entry(trav->curr, struct xt_match, list); - if (*match->name == '\0') - return 0; - seq_printf(seq, "%s\n", match->name); - return seq_has_overflowed(seq); + if (*match->name) + seq_printf(seq, "%s\n", match->name); } return 0; } @@ -1142,10 +1138,8 @@ static int xt_target_seq_show(struct seq_file *seq, void *v) if (trav->curr == trav->head) return 0; target = list_entry(trav->curr, struct xt_target, list); - if (*target->name == '\0') - return 0; - seq_printf(seq, "%s\n", target->name); - return seq_has_overflowed(seq); + if (*target->name) + seq_printf(seq, "%s\n", target->name); } return 0; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index e762de5ee89b..8c3190e2fc6a 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -277,6 +277,9 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) "FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } + if (par->nft_compat) + return 0; + xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; @@ -299,6 +302,9 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par) "FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } + if (par->nft_compat) + return 0; + xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 23345238711b..ebd41dc501e5 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -23,6 +23,7 @@ MODULE_ALIAS("ipt_mark"); MODULE_ALIAS("ip6t_mark"); MODULE_ALIAS("ipt_MARK"); MODULE_ALIAS("ip6t_MARK"); +MODULE_ALIAS("arpt_MARK"); static unsigned int mark_tg(struct sk_buff *skb, const struct xt_action_param *par) diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 89045982ec94..b103e9627716 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -15,8 +15,9 @@ #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> -#include <linux/netfilter/xt_set.h> +#include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_timeout.h> +#include <uapi/linux/netfilter/xt_set.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index bf6e76643f78..69d67c300b80 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -76,17 +76,18 @@ struct listeners { }; /* state bits */ -#define NETLINK_CONGESTED 0x0 +#define NETLINK_S_CONGESTED 0x0 /* flags */ -#define NETLINK_KERNEL_SOCKET 0x1 -#define NETLINK_RECV_PKTINFO 0x2 -#define NETLINK_BROADCAST_SEND_ERROR 0x4 -#define NETLINK_RECV_NO_ENOBUFS 0x8 +#define NETLINK_F_KERNEL_SOCKET 0x1 +#define NETLINK_F_RECV_PKTINFO 0x2 +#define NETLINK_F_BROADCAST_SEND_ERROR 0x4 +#define NETLINK_F_RECV_NO_ENOBUFS 0x8 +#define NETLINK_F_LISTEN_ALL_NSID 0x10 static inline int netlink_is_kernel(struct sock *sk) { - return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; + return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET; } struct netlink_table *nl_table __read_mostly; @@ -256,8 +257,9 @@ static void netlink_overrun(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); - if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { - if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) { + if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) { + if (!test_and_set_bit(NETLINK_S_CONGESTED, + &nlk_sk(sk)->state)) { sk->sk_err = ENOBUFS; sk->sk_error_report(sk); } @@ -270,8 +272,8 @@ static void netlink_rcv_wake(struct sock *sk) struct netlink_sock *nlk = nlk_sk(sk); if (skb_queue_empty(&sk->sk_receive_queue)) - clear_bit(NETLINK_CONGESTED, &nlk->state); - if (!test_bit(NETLINK_CONGESTED, &nlk->state)) + clear_bit(NETLINK_S_CONGESTED, &nlk->state); + if (!test_bit(NETLINK_S_CONGESTED, &nlk->state)) wake_up_interruptible(&nlk->wait); } @@ -1118,14 +1120,15 @@ static struct proto netlink_proto = { }; static int __netlink_create(struct net *net, struct socket *sock, - struct mutex *cb_mutex, int protocol) + struct mutex *cb_mutex, int protocol, + int kern) { struct sock *sk; struct netlink_sock *nlk; sock->ops = &netlink_ops; - sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); + sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern); if (!sk) return -ENOMEM; @@ -1187,7 +1190,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (err < 0) goto out; - err = __netlink_create(net, sock, cb_mutex, protocol); + err = __netlink_create(net, sock, cb_mutex, protocol, kern); if (err < 0) goto out_module; @@ -1297,20 +1300,24 @@ static int netlink_autobind(struct socket *sock) struct netlink_table *table = &nl_table[sk->sk_protocol]; s32 portid = task_tgid_vnr(current); int err; - static s32 rover = -4097; + s32 rover = -4096; + bool ok; retry: cond_resched(); rcu_read_lock(); - if (__netlink_lookup(table, portid, net)) { + ok = !__netlink_lookup(table, portid, net); + rcu_read_unlock(); + if (!ok) { /* Bind collision, search negative portid values. */ - portid = rover--; - if (rover > -4097) + if (rover == -4096) + /* rover will be in range [S32_MIN, -4097] */ + rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN); + else if (rover >= -4096) rover = -4097; - rcu_read_unlock(); + portid = rover--; goto retry; } - rcu_read_unlock(); err = netlink_insert(sk, portid); if (err == -EADDRINUSE) @@ -1657,7 +1664,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, nlk = nlk_sk(sk); if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_CONGESTED, &nlk->state)) && + test_bit(NETLINK_S_CONGESTED, &nlk->state)) && !netlink_skb_is_mmaped(skb)) { DECLARE_WAITQUEUE(wait, current); if (!*timeo) { @@ -1672,7 +1679,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, add_wait_queue(&nlk->wait, &wait); if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_CONGESTED, &nlk->state)) && + test_bit(NETLINK_S_CONGESTED, &nlk->state)) && !sock_flag(sk, SOCK_DEAD)) *timeo = schedule_timeout(*timeo); @@ -1896,7 +1903,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) struct netlink_sock *nlk = nlk_sk(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && - !test_bit(NETLINK_CONGESTED, &nlk->state)) { + !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { netlink_skb_set_owner_r(skb, sk); __netlink_sendskb(sk, skb); return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); @@ -1932,8 +1939,17 @@ static void do_one_broadcast(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) return; - if (!net_eq(sock_net(sk), p->net)) - return; + if (!net_eq(sock_net(sk), p->net)) { + if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID)) + return; + + if (!peernet_has_id(sock_net(sk), p->net)) + return; + + if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns, + CAP_NET_BROADCAST)) + return; + } if (p->failure) { netlink_overrun(sk); @@ -1957,23 +1973,33 @@ static void do_one_broadcast(struct sock *sk, netlink_overrun(sk); /* Clone failed. Notify ALL listeners. */ p->failure = 1; - if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) + if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) p->delivery_failure = 1; - } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { + goto out; + } + if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { kfree_skb(p->skb2); p->skb2 = NULL; - } else if (sk_filter(sk, p->skb2)) { + goto out; + } + if (sk_filter(sk, p->skb2)) { kfree_skb(p->skb2); p->skb2 = NULL; - } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { + goto out; + } + NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); + NETLINK_CB(p->skb2).nsid_is_set = true; + val = netlink_broadcast_deliver(sk, p->skb2); + if (val < 0) { netlink_overrun(sk); - if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) + if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) p->delivery_failure = 1; } else { p->congested |= val; p->delivered = 1; p->skb2 = NULL; } +out: sock_put(sk); } @@ -2058,7 +2084,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) !test_bit(p->group - 1, nlk->groups)) goto out; - if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) { + if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) { ret = 1; goto out; } @@ -2077,7 +2103,7 @@ out: * @code: error code, must be negative (as usual in kernelspace) * * This function returns the number of broadcast listeners that have set the - * NETLINK_RECV_NO_ENOBUFS socket option. + * NETLINK_NO_ENOBUFS socket option. */ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { @@ -2137,9 +2163,9 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, switch (optname) { case NETLINK_PKTINFO: if (val) - nlk->flags |= NETLINK_RECV_PKTINFO; + nlk->flags |= NETLINK_F_RECV_PKTINFO; else - nlk->flags &= ~NETLINK_RECV_PKTINFO; + nlk->flags &= ~NETLINK_F_RECV_PKTINFO; err = 0; break; case NETLINK_ADD_MEMBERSHIP: @@ -2168,18 +2194,18 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, } case NETLINK_BROADCAST_ERROR: if (val) - nlk->flags |= NETLINK_BROADCAST_SEND_ERROR; + nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR; else - nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR; + nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR; err = 0; break; case NETLINK_NO_ENOBUFS: if (val) { - nlk->flags |= NETLINK_RECV_NO_ENOBUFS; - clear_bit(NETLINK_CONGESTED, &nlk->state); + nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS; + clear_bit(NETLINK_S_CONGESTED, &nlk->state); wake_up_interruptible(&nlk->wait); } else { - nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; + nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS; } err = 0; break; @@ -2202,6 +2228,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; } #endif /* CONFIG_NETLINK_MMAP */ + case NETLINK_LISTEN_ALL_NSID: + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) + return -EPERM; + + if (val) + nlk->flags |= NETLINK_F_LISTEN_ALL_NSID; + else + nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -2228,7 +2264,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, if (len < sizeof(int)) return -EINVAL; len = sizeof(int); - val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; + val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0; if (put_user(len, optlen) || put_user(val, optval)) return -EFAULT; @@ -2238,7 +2274,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, if (len < sizeof(int)) return -EINVAL; len = sizeof(int); - val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0; + val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0; if (put_user(len, optlen) || put_user(val, optval)) return -EFAULT; @@ -2248,7 +2284,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, if (len < sizeof(int)) return -EINVAL; len = sizeof(int); - val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0; + val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0; if (put_user(len, optlen) || put_user(val, optval)) return -EFAULT; @@ -2268,6 +2304,16 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); } +static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + if (!NETLINK_CB(skb).nsid_is_set) + return; + + put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int), + &NETLINK_CB(skb).nsid); +} + static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; @@ -2419,8 +2465,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = sizeof(*addr); } - if (nlk->flags & NETLINK_RECV_PKTINFO) + if (nlk->flags & NETLINK_F_RECV_PKTINFO) netlink_cmsg_recv_pktinfo(msg, skb); + if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) + netlink_cmsg_listen_all_nsid(sk, msg, skb); memset(&scm, 0, sizeof(scm)); scm.creds = *NETLINK_CREDS(skb); @@ -2474,17 +2522,10 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - /* - * We have to just have a reference on the net from sk, but don't - * get_net it. Besides, we cannot get and then put the net here. - * So we create one inside init_net and the move it to net. - */ - - if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) + if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0) goto out_sock_release_nosk; sk = sock->sk; - sk_change_net(sk, net); if (!cfg || cfg->groups < 32) groups = 32; @@ -2503,7 +2544,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, goto out_sock_release; nlk = nlk_sk(sk); - nlk->flags |= NETLINK_KERNEL_SOCKET; + nlk->flags |= NETLINK_F_KERNEL_SOCKET; netlink_table_grab(); if (!nl_table[unit].registered) { @@ -2540,7 +2581,10 @@ EXPORT_SYMBOL(__netlink_kernel_create); void netlink_kernel_release(struct sock *sk) { - sk_release_kernel(sk); + if (sk == NULL || sk->sk_socket == NULL) + return; + + sock_release(sk->sk_socket); } EXPORT_SYMBOL(netlink_kernel_release); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index b987fd56c3c5..ed212ffc1d9d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -433,7 +433,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto); + sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, kern); if (sk == NULL) return -ENOMEM; @@ -476,7 +476,7 @@ static struct sock *nr_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot); + sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot, 0); if (sk == NULL) return NULL; diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 2277276f52bc..54e40fa47822 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -40,7 +40,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto, read_lock(&proto_tab_lock); if (proto_tab[proto] && try_module_get(proto_tab[proto]->owner)) { - rc = proto_tab[proto]->create(net, sock, proto_tab[proto]); + rc = proto_tab[proto]->create(net, sock, proto_tab[proto], kern); module_put(proto_tab[proto]->owner); } read_unlock(&proto_tab_lock); diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index de1789e3cc82..1f68724d44d3 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -225,7 +225,7 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, struct sk_buff *skb, u8 direction); /* Sock API */ -struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp); +struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern); void nfc_llcp_sock_free(struct nfc_llcp_sock *sock); void nfc_llcp_accept_unlink(struct sock *sk); void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index b18f07ccb504..98876274a1ee 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -934,7 +934,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, sock->ssap = ssap; } - new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC); + new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC, 0); if (new_sk == NULL) { reason = LLCP_DM_REJ; release_sock(&sock->sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 9578bd6a4f3e..b7de0da46acd 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -942,12 +942,12 @@ static void llcp_sock_destruct(struct sock *sk) } } -struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp) +struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern) { struct sock *sk; struct nfc_llcp_sock *llcp_sock; - sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto); + sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto, kern); if (!sk) return NULL; @@ -993,7 +993,7 @@ void nfc_llcp_sock_free(struct nfc_llcp_sock *sock) } static int llcp_sock_create(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto) + const struct nfc_protocol *nfc_proto, int kern) { struct sock *sk; @@ -1009,7 +1009,7 @@ static int llcp_sock_create(struct net *net, struct socket *sock, else sock->ops = &llcp_sock_ops; - sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC); + sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index a8ce80b47720..5c93e8412a26 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -30,7 +30,7 @@ struct nfc_protocol { struct proto *proto; struct module *owner; int (*create)(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto); + const struct nfc_protocol *nfc_proto, int kern); }; struct nfc_rawsock { diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 82b4e8024778..e9a91488fe3d 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -334,7 +334,7 @@ static void rawsock_destruct(struct sock *sk) } static int rawsock_create(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto) + const struct nfc_protocol *nfc_proto, int kern) { struct sock *sk; @@ -348,7 +348,7 @@ static int rawsock_create(struct net *net, struct socket *sock, else sock->ops = &rawsock_ops; - sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto); + sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern); if (!sk) return -ENOMEM; diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index ed6b0f8dd1bb..15840401a2ce 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -59,7 +59,7 @@ config OPENVSWITCH_VXLAN config OPENVSWITCH_GENEVE tristate "Open vSwitch Geneve tunneling support" depends on OPENVSWITCH - depends on GENEVE + depends on GENEVE_CORE default OPENVSWITCH ---help--- If you say Y here, then the Open vSwitch will be able create geneve vport. diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 096c6276e6b9..3b90461317ec 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -545,7 +545,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) /* Normally, setting the skb 'protocol' field would be handled by a * call to eth_type_trans(), but it assumes there's a sending * device, which we may not have. */ - if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) + if (eth_proto_is_802_3(eth->h_proto)) packet->protocol = eth->h_proto; else packet->protocol = htons(ETH_P_802_2); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2dacc7b5af23..bc7b0aba994a 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -332,7 +332,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) proto = *(__be16 *) skb->data; __skb_pull(skb, sizeof(__be16)); - if (ntohs(proto) >= ETH_P_802_3_MIN) + if (eth_proto_is_802_3(proto)) return proto; if (skb->len < sizeof(struct llc_snap_hdr)) @@ -349,7 +349,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) __skb_pull(skb, sizeof(struct llc_snap_hdr)); - if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) + if (eth_proto_is_802_3(llc->ethertype)) return llc->ethertype; return htons(ETH_P_802_2); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c691b1a1eee0..624e41c4267f 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -816,7 +816,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, if (is_mask) { /* Always exact match EtherType. */ eth_type = htons(0xffff); - } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { + } else if (!eth_proto_is_802_3(eth_type)) { OVS_NLERR(log, "EtherType %x is less than min %x", ntohs(eth_type), ETH_P_802_3_MIN); return -EINVAL; diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index bf02fd5808c9..208c576bd1b6 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -46,11 +46,6 @@ static inline struct geneve_port *geneve_vport(const struct vport *vport) return vport_priv(vport); } -static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) -{ - return (struct genevehdr *)(udp_hdr(skb) + 1); -} - /* Convert 64 bit tunnel ID to 24 bit VNI. */ static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b5989c6ee551..fd5164139bf0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1234,27 +1234,81 @@ static void packet_free_pending(struct packet_sock *po) free_percpu(po->tx_ring.pending_refcnt); } -static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) +#define ROOM_POW_OFF 2 +#define ROOM_NONE 0x0 +#define ROOM_LOW 0x1 +#define ROOM_NORMAL 0x2 + +static bool __tpacket_has_room(struct packet_sock *po, int pow_off) +{ + int idx, len; + + len = po->rx_ring.frame_max + 1; + idx = po->rx_ring.head; + if (pow_off) + idx += len >> pow_off; + if (idx >= len) + idx -= len; + return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL); +} + +static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off) +{ + int idx, len; + + len = po->rx_ring.prb_bdqc.knum_blocks; + idx = po->rx_ring.prb_bdqc.kactive_blk_num; + if (pow_off) + idx += len >> pow_off; + if (idx >= len) + idx -= len; + return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL); +} + +static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) { struct sock *sk = &po->sk; - bool has_room; + int ret = ROOM_NONE; + + if (po->prot_hook.func != tpacket_rcv) { + int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc) + - (skb ? skb->truesize : 0); + if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF)) + return ROOM_NORMAL; + else if (avail > 0) + return ROOM_LOW; + else + return ROOM_NONE; + } + + if (po->tp_version == TPACKET_V3) { + if (__tpacket_v3_has_room(po, ROOM_POW_OFF)) + ret = ROOM_NORMAL; + else if (__tpacket_v3_has_room(po, 0)) + ret = ROOM_LOW; + } else { + if (__tpacket_has_room(po, ROOM_POW_OFF)) + ret = ROOM_NORMAL; + else if (__tpacket_has_room(po, 0)) + ret = ROOM_LOW; + } - if (po->prot_hook.func != tpacket_rcv) - return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize) - <= sk->sk_rcvbuf; + return ret; +} - spin_lock(&sk->sk_receive_queue.lock); - if (po->tp_version == TPACKET_V3) - has_room = prb_lookup_block(po, &po->rx_ring, - po->rx_ring.prb_bdqc.kactive_blk_num, - TP_STATUS_KERNEL); - else - has_room = packet_lookup_frame(po, &po->rx_ring, - po->rx_ring.head, - TP_STATUS_KERNEL); - spin_unlock(&sk->sk_receive_queue.lock); +static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) +{ + int ret; + bool has_room; + + spin_lock_bh(&po->sk.sk_receive_queue.lock); + ret = __packet_rcv_has_room(po, skb); + has_room = ret == ROOM_NORMAL; + if (po->pressure == has_room) + po->pressure = !has_room; + spin_unlock_bh(&po->sk.sk_receive_queue.lock); - return has_room; + return ret; } static void packet_sock_destruct(struct sock *sk) @@ -1282,6 +1336,20 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num) return x; } +static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb) +{ + u32 rxhash; + int i, count = 0; + + rxhash = skb_get_hash(skb); + for (i = 0; i < ROLLOVER_HLEN; i++) + if (po->rollover->history[i] == rxhash) + count++; + + po->rollover->history[prandom_u32() % ROLLOVER_HLEN] = rxhash; + return count > (ROLLOVER_HLEN >> 1); +} + static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) @@ -1318,22 +1386,40 @@ static unsigned int fanout_demux_rnd(struct packet_fanout *f, static unsigned int fanout_demux_rollover(struct packet_fanout *f, struct sk_buff *skb, - unsigned int idx, unsigned int skip, + unsigned int idx, bool try_self, unsigned int num) { - unsigned int i, j; + struct packet_sock *po, *po_next, *po_skip = NULL; + unsigned int i, j, room = ROOM_NONE; - i = j = min_t(int, f->next[idx], num - 1); + po = pkt_sk(f->arr[idx]); + + if (try_self) { + room = packet_rcv_has_room(po, skb); + if (room == ROOM_NORMAL || + (room == ROOM_LOW && !fanout_flow_is_huge(po, skb))) + return idx; + po_skip = po; + } + + i = j = min_t(int, po->rollover->sock, num - 1); do { - if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) { + po_next = pkt_sk(f->arr[i]); + if (po_next != po_skip && !po_next->pressure && + packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { if (i != j) - f->next[idx] = i; + po->rollover->sock = i; + atomic_long_inc(&po->rollover->num); + if (room == ROOM_LOW) + atomic_long_inc(&po->rollover->num_huge); return i; } + if (++i == num) i = 0; } while (i != j); + atomic_long_inc(&po->rollover->num_failed); return idx; } @@ -1386,17 +1472,14 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, idx = fanout_demux_qm(f, skb, num); break; case PACKET_FANOUT_ROLLOVER: - idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); + idx = fanout_demux_rollover(f, skb, 0, false, num); break; } - po = pkt_sk(f->arr[idx]); - if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) && - unlikely(!packet_rcv_has_room(po, skb))) { - idx = fanout_demux_rollover(f, skb, idx, idx, num); - po = pkt_sk(f->arr[idx]); - } + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) + idx = fanout_demux_rollover(f, skb, idx, true, num); + po = pkt_sk(f->arr[idx]); return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } @@ -1467,6 +1550,16 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) if (po->fanout) return -EALREADY; + if (type == PACKET_FANOUT_ROLLOVER || + (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) { + po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL); + if (!po->rollover) + return -ENOMEM; + atomic_long_set(&po->rollover->num, 0); + atomic_long_set(&po->rollover->num_huge, 0); + atomic_long_set(&po->rollover->num_failed, 0); + } + mutex_lock(&fanout_mutex); match = NULL; list_for_each_entry(f, &fanout_list, list) { @@ -1515,6 +1608,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } out: mutex_unlock(&fanout_mutex); + if (err) { + kfree(po->rollover); + po->rollover = NULL; + } return err; } @@ -1536,6 +1633,8 @@ static void fanout_release(struct sock *sk) kfree(f); } mutex_unlock(&fanout_mutex); + + kfree(po->rollover); } static const struct proto_ops packet_ops; @@ -2835,7 +2934,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern); if (sk == NULL) goto out; @@ -2865,6 +2964,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, spin_lock_init(&po->bind_lock); mutex_init(&po->pg_vec_lock); + po->rollover = NULL; po->prot_hook.func = packet_rcv; if (sock->type == SOCK_PACKET) @@ -2942,6 +3042,9 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (skb == NULL) goto out; + if (pkt_sk(sk)->pressure) + packet_rcv_has_room(pkt_sk(sk), NULL); + if (pkt_sk(sk)->has_vnet_hdr) { struct virtio_net_hdr vnet_hdr = { 0 }; @@ -3485,6 +3588,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, struct packet_sock *po = pkt_sk(sk); void *data = &val; union tpacket_stats_u st; + struct tpacket_rollover_stats rstats; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3560,6 +3664,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, ((u32)po->fanout->flags << 24)) : 0); break; + case PACKET_ROLLOVER_STATS: + if (!po->rollover) + return -EINVAL; + rstats.tp_all = atomic_long_read(&po->rollover->num); + rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); + rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); + data = &rstats; + lv = sizeof(rstats); + break; case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; break; @@ -3697,6 +3810,8 @@ static unsigned int packet_poll(struct file *file, struct socket *sock, TP_STATUS_KERNEL)) mask |= POLLIN | POLLRDNORM; } + if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) + po->pressure = 0; spin_unlock_bh(&sk->sk_receive_queue.lock); spin_lock_bh(&sk->sk_write_queue.lock); if (po->tx_ring.pg_vec) { diff --git a/net/packet/internal.h b/net/packet/internal.h index fe6e20caea1d..c035d263c1e8 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -82,12 +82,20 @@ struct packet_fanout { atomic_t rr_cur; struct list_head list; struct sock *arr[PACKET_FANOUT_MAX]; - int next[PACKET_FANOUT_MAX]; spinlock_t lock; atomic_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; +struct packet_rollover { + int sock; + atomic_long_t num; + atomic_long_t num_huge; + atomic_long_t num_failed; +#define ROLLOVER_HLEN (L1_CACHE_BYTES / sizeof(u32)) + u32 history[ROLLOVER_HLEN] ____cacheline_aligned; +} ____cacheline_aligned_in_smp; + struct packet_sock { /* struct sock has to be the first member of packet_sock */ struct sock sk; @@ -102,8 +110,10 @@ struct packet_sock { auxdata:1, origdev:1, has_vnet_hdr:1; + int pressure; int ifindex; /* bound device */ __be16 num; + struct packet_rollover *rollover; struct packet_mclist *mclist; atomic_t mapped; enum tpacket_versions tp_version; diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 32ab87d34828..10d42f3220ab 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -97,7 +97,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol, goto out; } - sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot); + sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot, kern); if (sk == NULL) { err = -ENOMEM; goto out; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 6de2aeb98a1f..850a86cde0b3 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -845,7 +845,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) } /* Create a new to-be-accepted sock */ - newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot); + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0); if (!newsk) { pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); err = -ENOBUFS; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 10443377fb9d..3d83641f2861 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -440,7 +440,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto); + sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 8ae603069a1a..36dbc2da3661 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -520,7 +520,7 @@ static int rose_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto); + sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, kern); if (sk == NULL) return -ENOMEM; @@ -559,7 +559,7 @@ static struct sock *rose_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto); + sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto, 0); if (sk == NULL) return NULL; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0095b9a0b779..25d60ed15284 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -632,7 +632,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, sock->ops = &rxrpc_rpc_ops; sock->state = SS_UNCONNECTED; - sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto); + sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c index ca904ed5400a..78483b4602bf 100644 --- a/net/rxrpc/ar-local.c +++ b/net/rxrpc/ar-local.c @@ -73,8 +73,8 @@ static int rxrpc_create_local(struct rxrpc_local *local) _enter("%p{%d}", local, local->srx.transport_type); /* create a socket to represent the local endpoint */ - ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP, - &local->socket); + ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type, + IPPROTO_UDP, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); return ret; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2274e723a3df..daa33432b716 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -312,6 +312,7 @@ config NET_SCH_PIE config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT + select NET_INGRESS ---help--- Say Y here if you want to use classifiers for incoming packets. If unsure, say Y. @@ -477,6 +478,16 @@ config NET_CLS_BPF To compile this code as a module, choose M here: the module will be called cls_bpf. +config NET_CLS_FLOWER + tristate "Flower classifier" + select NET_CLS + ---help--- + If you say Y here, you will be able to classify packets based on + a configurable combination of packet keys and masks. + + To compile this code as a module, choose M here: the module will + be called cls_flower. + config NET_EMATCH bool "Extended Matches" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index 7ca7f4c1b8c2..690c1689e090 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o +obj-$(CONFIG_NET_CLS_FLOWER) += cls_flower.o obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3d43e4979f27..af427a3dbcba 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -392,11 +392,6 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, list_for_each_entry(a, actions, list) { repeat: ret = a->ops->act(skb, a, res); - if (TC_MUNGED & skb->tc_verd) { - /* copied already, allow trampling */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); - } if (ret == TC_ACT_REPEAT) goto repeat; /* we need a ttl - JHS */ if (ret != TC_ACT_PIPE) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 3f63ceac8e01..a42a3b257226 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -151,7 +151,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, } at = G_TC_AT(skb->tc_verd); - skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action); + skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 == NULL) goto out; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 59649d588d79..17e6d6669c7f 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -108,7 +108,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = a->priv; - int i, munged = 0; + int i; unsigned int off; if (skb_unclone(skb, GFP_ATOMIC)) @@ -156,11 +156,8 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, *ptr = ((*ptr & tkey->mask) ^ tkey->val); if (ptr == &_data) skb_store_bits(skb, off + offset, ptr, 4); - munged++; } - if (munged) - skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); goto done; } else WARN(1, "pedit BUG: index %d\n", p->tcf_index); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index a620c4e288a5..b4359924846c 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -26,7 +26,7 @@ #include <net/pkt_cls.h> #include <net/ip.h> #include <net/route.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include <net/netfilter/nf_conntrack.h> @@ -68,35 +68,35 @@ static inline u32 addr_fold(void *addr) static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->src) - return ntohl(flow->src); + if (flow->addrs.src) + return ntohl(flow->addrs.src); return addr_fold(skb->sk); } static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->dst) - return ntohl(flow->dst); + if (flow->addrs.dst) + return ntohl(flow->addrs.dst); return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) { - return flow->ip_proto; + return flow->basic.ip_proto; } static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->ports) - return ntohs(flow->port16[0]); + if (flow->ports.ports) + return ntohs(flow->ports.src); return addr_fold(skb->sk); } static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->ports) - return ntohs(flow->port16[1]); + if (flow->ports.ports) + return ntohs(flow->ports.dst); return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } @@ -295,7 +295,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, keymask = f->keymask; if (keymask & FLOW_KEYS_NEEDED) - skb_flow_dissect(skb, &flow_keys); + skb_flow_dissect_flow_keys(skb, &flow_keys); for (n = 0; n < f->nkeys; n++) { key = ffs(keymask) - 1; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c new file mode 100644 index 000000000000..8c8f34ef6980 --- /dev/null +++ b/net/sched/cls_flower.c @@ -0,0 +1,688 @@ +/* + * net/sched/cls_flower.c Flower classifier + * + * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/rhashtable.h> + +#include <linux/if_ether.h> +#include <linux/in6.h> +#include <linux/ip.h> + +#include <net/sch_generic.h> +#include <net/pkt_cls.h> +#include <net/ip.h> +#include <net/flow_dissector.h> + +struct fl_flow_key { + int indev_ifindex; + struct flow_dissector_key_basic basic; + struct flow_dissector_key_eth_addrs eth; + union { + struct flow_dissector_key_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_ports tp; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct fl_flow_mask_range { + unsigned short int start; + unsigned short int end; +}; + +struct fl_flow_mask { + struct fl_flow_key key; + struct fl_flow_mask_range range; + struct rcu_head rcu; +}; + +struct cls_fl_head { + struct rhashtable ht; + struct fl_flow_mask mask; + struct flow_dissector dissector; + u32 hgen; + bool mask_assigned; + struct list_head filters; + struct rhashtable_params ht_params; + struct rcu_head rcu; +}; + +struct cls_fl_filter { + struct rhash_head ht_node; + struct fl_flow_key mkey; + struct tcf_exts exts; + struct tcf_result res; + struct fl_flow_key key; + struct list_head list; + u32 handle; + struct rcu_head rcu; +}; + +static unsigned short int fl_mask_range(const struct fl_flow_mask *mask) +{ + return mask->range.end - mask->range.start; +} + +static void fl_mask_update_range(struct fl_flow_mask *mask) +{ + const u8 *bytes = (const u8 *) &mask->key; + size_t size = sizeof(mask->key); + size_t i, first = 0, last = size - 1; + + for (i = 0; i < sizeof(mask->key); i++) { + if (bytes[i]) { + if (!first && i) + first = i; + last = i; + } + } + mask->range.start = rounddown(first, sizeof(long)); + mask->range.end = roundup(last + 1, sizeof(long)); +} + +static void *fl_key_get_start(struct fl_flow_key *key, + const struct fl_flow_mask *mask) +{ + return (u8 *) key + mask->range.start; +} + +static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key, + struct fl_flow_mask *mask) +{ + const long *lkey = fl_key_get_start(key, mask); + const long *lmask = fl_key_get_start(&mask->key, mask); + long *lmkey = fl_key_get_start(mkey, mask); + int i; + + for (i = 0; i < fl_mask_range(mask); i += sizeof(long)) + *lmkey++ = *lkey++ & *lmask++; +} + +static void fl_clear_masked_range(struct fl_flow_key *key, + struct fl_flow_mask *mask) +{ + memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask)); +} + +static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) +{ + struct cls_fl_head *head = rcu_dereference_bh(tp->root); + struct cls_fl_filter *f; + struct fl_flow_key skb_key; + struct fl_flow_key skb_mkey; + + fl_clear_masked_range(&skb_key, &head->mask); + skb_key.indev_ifindex = skb->skb_iif; + /* skb_flow_dissect() does not set n_proto in case an unknown protocol, + * so do it rather here. + */ + skb_key.basic.n_proto = skb->protocol; + skb_flow_dissect(skb, &head->dissector, &skb_key); + + fl_set_masked_key(&skb_mkey, &skb_key, &head->mask); + + f = rhashtable_lookup_fast(&head->ht, + fl_key_get_start(&skb_mkey, &head->mask), + head->ht_params); + if (f) { + *res = f->res; + return tcf_exts_exec(skb, &f->exts, res); + } + return -1; +} + +static int fl_init(struct tcf_proto *tp) +{ + struct cls_fl_head *head; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (!head) + return -ENOBUFS; + + INIT_LIST_HEAD_RCU(&head->filters); + rcu_assign_pointer(tp->root, head); + + return 0; +} + +static void fl_destroy_filter(struct rcu_head *head) +{ + struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); + + tcf_exts_destroy(&f->exts); + kfree(f); +} + +static bool fl_destroy(struct tcf_proto *tp, bool force) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f, *next; + + if (!force && !list_empty(&head->filters)) + return false; + + list_for_each_entry_safe(f, next, &head->filters, list) { + list_del_rcu(&f->list); + call_rcu(&f->rcu, fl_destroy_filter); + } + RCU_INIT_POINTER(tp->root, NULL); + if (head->mask_assigned) + rhashtable_destroy(&head->ht); + kfree_rcu(head, rcu); + return true; +} + +static unsigned long fl_get(struct tcf_proto *tp, u32 handle) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f; + + list_for_each_entry(f, &head->filters, list) + if (f->handle == handle) + return (unsigned long) f; + return 0; +} + +static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { + [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC }, + [TCA_FLOWER_CLASSID] = { .type = NLA_U32 }, + [TCA_FLOWER_INDEV] = { .type = NLA_STRING, + .len = IFNAMSIZ }, + [TCA_FLOWER_KEY_ETH_DST] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_DST_MASK] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_SRC] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_IP_PROTO] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV4_DST] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 }, +}; + +static void fl_set_key_val(struct nlattr **tb, + void *val, int val_type, + void *mask, int mask_type, int len) +{ + if (!tb[val_type]) + return; + memcpy(val, nla_data(tb[val_type]), len); + if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type]) + memset(mask, 0xff, len); + else + memcpy(mask, nla_data(tb[mask_type]), len); +} + +static int fl_set_key(struct net *net, struct nlattr **tb, + struct fl_flow_key *key, struct fl_flow_key *mask) +{ +#ifdef CONFIG_NET_CLS_IND + if (tb[TCA_FLOWER_INDEV]) { + int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]); + if (err < 0) + return err; + key->indev_ifindex = err; + mask->indev_ifindex = 0xffffffff; + } +#endif + + fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, + mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, + sizeof(key->eth.dst)); + fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, + mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, + sizeof(key->eth.src)); + fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, + &mask->basic.n_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto)); + if (key->basic.n_proto == htons(ETH_P_IP) || + key->basic.n_proto == htons(ETH_P_IPV6)) { + fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, + &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.ip_proto)); + } + if (key->basic.n_proto == htons(ETH_P_IP)) { + fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, + &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, + sizeof(key->ipv4.src)); + fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST, + &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, + sizeof(key->ipv4.dst)); + } else if (key->basic.n_proto == htons(ETH_P_IPV6)) { + fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, + &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, + sizeof(key->ipv6.src)); + fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST, + &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK, + sizeof(key->ipv6.dst)); + } + if (key->basic.ip_proto == IPPROTO_TCP) { + fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)); + fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)); + } else if (key->basic.ip_proto == IPPROTO_UDP) { + fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)); + fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)); + } + + return 0; +} + +static bool fl_mask_eq(struct fl_flow_mask *mask1, + struct fl_flow_mask *mask2) +{ + const long *lmask1 = fl_key_get_start(&mask1->key, mask1); + const long *lmask2 = fl_key_get_start(&mask2->key, mask2); + + return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) && + !memcmp(lmask1, lmask2, fl_mask_range(mask1)); +} + +static const struct rhashtable_params fl_ht_params = { + .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */ + .head_offset = offsetof(struct cls_fl_filter, ht_node), + .automatic_shrinking = true, +}; + +static int fl_init_hashtable(struct cls_fl_head *head, + struct fl_flow_mask *mask) +{ + head->ht_params = fl_ht_params; + head->ht_params.key_len = fl_mask_range(mask); + head->ht_params.key_offset += mask->range.start; + + return rhashtable_init(&head->ht, &head->ht_params); +} + +#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member) +#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member)) +#define FL_KEY_MEMBER_END_OFFSET(member) \ + (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member)) + +#define FL_KEY_IN_RANGE(mask, member) \ + (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \ + FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start) + +#define FL_KEY_SET(keys, cnt, id, member) \ + do { \ + keys[cnt].key_id = id; \ + keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member); \ + cnt++; \ + } while(0); + +#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \ + do { \ + if (FL_KEY_IN_RANGE(mask, member)) \ + FL_KEY_SET(keys, cnt, id, member); \ + } while(0); + +static void fl_init_dissector(struct cls_fl_head *head, + struct fl_flow_mask *mask) +{ + struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX]; + size_t cnt = 0; + + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_PORTS, tp); + + skb_flow_dissector_init(&head->dissector, keys, cnt); +} + +static int fl_check_assign_mask(struct cls_fl_head *head, + struct fl_flow_mask *mask) +{ + int err; + + if (head->mask_assigned) { + if (!fl_mask_eq(&head->mask, mask)) + return -EINVAL; + else + return 0; + } + + /* Mask is not assigned yet. So assign it and init hashtable + * according to that. + */ + err = fl_init_hashtable(head, mask); + if (err) + return err; + memcpy(&head->mask, mask, sizeof(head->mask)); + head->mask_assigned = true; + + fl_init_dissector(head, mask); + + return 0; +} + +static int fl_set_parms(struct net *net, struct tcf_proto *tp, + struct cls_fl_filter *f, struct fl_flow_mask *mask, + unsigned long base, struct nlattr **tb, + struct nlattr *est, bool ovr) +{ + struct tcf_exts e; + int err; + + tcf_exts_init(&e, TCA_FLOWER_ACT, 0); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + return err; + + if (tb[TCA_FLOWER_CLASSID]) { + f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); + tcf_bind_filter(tp, &f->res, base); + } + + err = fl_set_key(net, tb, &f->key, &mask->key); + if (err) + goto errout; + + fl_mask_update_range(mask); + fl_set_masked_key(&f->mkey, &f->key, mask); + + tcf_exts_change(tp, &f->exts, &e); + + return 0; +errout: + tcf_exts_destroy(&e); + return err; +} + +static u32 fl_grab_new_handle(struct tcf_proto *tp, + struct cls_fl_head *head) +{ + unsigned int i = 0x80000000; + u32 handle; + + do { + if (++head->hgen == 0x7FFFFFFF) + head->hgen = 1; + } while (--i > 0 && fl_get(tp, head->hgen)); + + if (unlikely(i == 0)) { + pr_err("Insufficient number of handles\n"); + handle = 0; + } else { + handle = head->hgen; + } + + return handle; +} + +static int fl_change(struct net *net, struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, + unsigned long *arg, bool ovr) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg; + struct cls_fl_filter *fnew; + struct nlattr *tb[TCA_FLOWER_MAX + 1]; + struct fl_flow_mask mask = {}; + int err; + + if (!tca[TCA_OPTIONS]) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy); + if (err < 0) + return err; + + if (fold && handle && fold->handle != handle) + return -EINVAL; + + fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); + if (!fnew) + return -ENOBUFS; + + tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); + + if (!handle) { + handle = fl_grab_new_handle(tp, head); + if (!handle) { + err = -EINVAL; + goto errout; + } + } + fnew->handle = handle; + + err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); + if (err) + goto errout; + + err = fl_check_assign_mask(head, &mask); + if (err) + goto errout; + + err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, + head->ht_params); + if (err) + goto errout; + if (fold) + rhashtable_remove_fast(&head->ht, &fold->ht_node, + head->ht_params); + + *arg = (unsigned long) fnew; + + if (fold) { + list_replace_rcu(&fnew->list, &fold->list); + tcf_unbind_filter(tp, &fold->res); + call_rcu(&fold->rcu, fl_destroy_filter); + } else { + list_add_tail_rcu(&fnew->list, &head->filters); + } + + return 0; + +errout: + kfree(fnew); + return err; +} + +static int fl_delete(struct tcf_proto *tp, unsigned long arg) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f = (struct cls_fl_filter *) arg; + + rhashtable_remove_fast(&head->ht, &f->ht_node, + head->ht_params); + list_del_rcu(&f->list); + tcf_unbind_filter(tp, &f->res); + call_rcu(&f->rcu, fl_destroy_filter); + return 0; +} + +static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f; + + list_for_each_entry_rcu(f, &head->filters, list) { + if (arg->count < arg->skip) + goto skip; + if (arg->fn(tp, (unsigned long) f, arg) < 0) { + arg->stop = 1; + break; + } +skip: + arg->count++; + } +} + +static int fl_dump_key_val(struct sk_buff *skb, + void *val, int val_type, + void *mask, int mask_type, int len) +{ + int err; + + if (!memchr_inv(mask, 0, len)) + return 0; + err = nla_put(skb, val_type, len, val); + if (err) + return err; + if (mask_type != TCA_FLOWER_UNSPEC) { + err = nla_put(skb, mask_type, len, mask); + if (err) + return err; + } + return 0; +} + +static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f = (struct cls_fl_filter *) fh; + struct nlattr *nest; + struct fl_flow_key *key, *mask; + + if (!f) + return skb->len; + + t->tcm_handle = f->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (!nest) + goto nla_put_failure; + + if (f->res.classid && + nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid)) + goto nla_put_failure; + + key = &f->key; + mask = &head->mask.key; + + if (mask->indev_ifindex) { + struct net_device *dev; + + dev = __dev_get_by_index(net, key->indev_ifindex); + if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name)) + goto nla_put_failure; + } + + if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, + mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, + sizeof(key->eth.dst)) || + fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, + mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, + sizeof(key->eth.src)) || + fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, + &mask->basic.n_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto))) + goto nla_put_failure; + if ((key->basic.n_proto == htons(ETH_P_IP) || + key->basic.n_proto == htons(ETH_P_IPV6)) && + fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, + &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.ip_proto))) + goto nla_put_failure; + + if (key->basic.n_proto == htons(ETH_P_IP) && + (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, + &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, + sizeof(key->ipv4.src)) || + fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST, + &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, + sizeof(key->ipv4.dst)))) + goto nla_put_failure; + else if (key->basic.n_proto == htons(ETH_P_IPV6) && + (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, + &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, + sizeof(key->ipv6.src)) || + fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST, + &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK, + sizeof(key->ipv6.dst)))) + goto nla_put_failure; + + if (key->basic.ip_proto == IPPROTO_TCP && + (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)) || + fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)))) + goto nla_put_failure; + else if (key->basic.ip_proto == IPPROTO_UDP && + (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)) || + fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)))) + goto nla_put_failure; + + if (tcf_exts_dump(skb, &f->exts)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + if (tcf_exts_dump_stats(skb, &f->exts) < 0) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + +static struct tcf_proto_ops cls_fl_ops __read_mostly = { + .kind = "flower", + .classify = fl_classify, + .init = fl_init, + .destroy = fl_destroy, + .get = fl_get, + .change = fl_change, + .delete = fl_delete, + .walk = fl_walk, + .dump = fl_dump, + .owner = THIS_MODULE, +}; + +static int __init cls_fl_init(void) +{ + return register_tcf_proto_ops(&cls_fl_ops); +} + +static void __exit cls_fl_exit(void) +{ + unregister_tcf_proto_ops(&cls_fl_ops); +} + +module_init(cls_fl_init); +module_exit(cls_fl_exit); + +MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); +MODULE_DESCRIPTION("Flower classifier"); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ad9eed70bc8f..0b74dc0ede9c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1816,13 +1816,8 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, continue; err = tp->classify(skb, tp, res); - if (err >= 0) { -#ifdef CONFIG_NET_CLS_ACT - if (err != TC_ACT_RECLASSIFY && skb->tc_verd) - skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); -#endif + if (err >= 0) return err; - } } return -1; } @@ -1834,23 +1829,22 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, int err = 0; #ifdef CONFIG_NET_CLS_ACT const struct tcf_proto *otp = tp; + int limit = 0; reclassify: #endif err = tc_classify_compat(skb, tp, res); #ifdef CONFIG_NET_CLS_ACT if (err == TC_ACT_RECLASSIFY) { - u32 verd = G_TC_VERD(skb->tc_verd); tp = otp; - if (verd++ >= MAX_REC_LOOP) { + if (unlikely(limit++ >= MAX_REC_LOOP)) { net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n", tp->q->ops->id, tp->prio & 0xffff, ntohs(tp->protocol)); return TC_ACT_SHOT; } - skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); goto reclassify; } #endif diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index c009eb9045ce..93d5742dc7e0 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -18,7 +18,7 @@ #include <net/pkt_sched.h> #include <net/inet_ecn.h> #include <net/red.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> /* CHOKe stateless AQM for fair bandwidth allocation @@ -133,16 +133,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) --sch->q.qlen; } -/* private part of skb->cb[] that a qdisc is allowed to use - * is limited to QDISC_CB_PRIV_LEN bytes. - * As a flow key might be too large, we store a part of it only. - */ -#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3) - struct choke_skb_cb { u16 classid; u8 keys_valid; - u8 keys[QDISC_CB_PRIV_LEN - 3]; + struct flow_keys_digest keys; }; static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) @@ -176,19 +170,19 @@ static bool choke_match_flow(struct sk_buff *skb1, if (!choke_skb_cb(skb1)->keys_valid) { choke_skb_cb(skb1)->keys_valid = 1; - skb_flow_dissect(skb1, &temp); - memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN); + skb_flow_dissect_flow_keys(skb1, &temp); + make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp); } if (!choke_skb_cb(skb2)->keys_valid) { choke_skb_cb(skb2)->keys_valid = 1; - skb_flow_dissect(skb2, &temp); - memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN); + skb_flow_dissect_flow_keys(skb2, &temp); + make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp); } return !memcmp(&choke_skb_cb(skb1)->keys, &choke_skb_cb(skb2)->keys, - CHOKE_K_LEN); + sizeof(choke_skb_cb(skb1)->keys)); } /* diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 7a0bdb16ac92..535007d5f0b5 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -6,7 +6,7 @@ * * Implemented on linux by : * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> - * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> + * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -109,6 +109,7 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { [TCA_CODEL_LIMIT] = { .type = NLA_U32 }, [TCA_CODEL_INTERVAL] = { .type = NLA_U32 }, [TCA_CODEL_ECN] = { .type = NLA_U32 }, + [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 }, }; static int codel_change(struct Qdisc *sch, struct nlattr *opt) @@ -133,6 +134,12 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT; } + if (tb[TCA_CODEL_CE_THRESHOLD]) { + u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]); + + q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; + } + if (tb[TCA_CODEL_INTERVAL]) { u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]); @@ -201,7 +208,10 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_CODEL_ECN, q->params.ecn)) goto nla_put_failure; - + if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD && + nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD, + codel_time_to_us(q->params.ce_threshold))) + goto nla_put_failure; return nla_nest_end(skb, opts); nla_put_failure: @@ -220,6 +230,7 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) .ldelay = codel_time_to_us(q->vars.ldelay), .dropping = q->vars.dropping, .ecn_mark = q->stats.ecn_mark, + .ce_mark = q->stats.ce_mark, }; if (q->vars.dropping) { diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index c244c45b78d7..d75993f89fac 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -6,7 +6,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> + * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> */ #include <linux/module.h> @@ -23,7 +23,6 @@ #include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> -#include <net/flow_keys.h> #include <net/codel.h> /* Fair Queue CoDel. @@ -68,15 +67,9 @@ struct fq_codel_sched_data { }; static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, - const struct sk_buff *skb) + struct sk_buff *skb) { - struct flow_keys keys; - unsigned int hash; - - skb_flow_dissect(skb, &keys); - hash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src ^ keys.ip_proto, - (__force u32)keys.ports, q->perturbation); + u32 hash = skb_get_hash_perturb(skb, q->perturbation); return reciprocal_scale(hash, q->flows_cnt); } @@ -299,6 +292,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { [TCA_FQ_CODEL_ECN] = { .type = NLA_U32 }, [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 }, [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 }, + [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, }; static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) @@ -329,6 +323,12 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT; } + if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) { + u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]); + + q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; + } + if (tb[TCA_FQ_CODEL_INTERVAL]) { u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); @@ -448,6 +448,11 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) q->flows_cnt)) goto nla_put_failure; + if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD && + nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, + codel_time_to_us(q->cparams.ce_threshold))) + goto nla_put_failure; + return nla_nest_end(skb, opts); nla_put_failure: @@ -466,6 +471,7 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.qdisc_stats.drop_overlimit = q->drop_overlimit; st.qdisc_stats.ecn_mark = q->cstats.ecn_mark; st.qdisc_stats.new_flow_count = q->new_flow_count; + st.qdisc_stats.ce_mark = q->cstats.ce_mark; list_for_each(pos, &q->new_flows) st.qdisc_stats.new_flows_len++; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 634529e0ce6b..abb9f2fec28f 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -165,7 +165,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) * if no default DP has been configured. This * allows for DP flows to be left untouched. */ - if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len) + if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= + sch->limit)) return qdisc_enqueue_tail(skb, sch); else goto drop; @@ -397,7 +398,10 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, q->DP = dp; q->prio = prio; - q->limit = ctl->limit; + if (ctl->limit > sch->limit) + q->limit = sch->limit; + else + q->limit = ctl->limit; if (q->backlog == 0) red_end_of_idle_period(&q->vars); @@ -414,6 +418,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { [TCA_GRED_STAB] = { .len = 256 }, [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, [TCA_GRED_MAX_P] = { .type = NLA_U32 }, + [TCA_GRED_LIMIT] = { .type = NLA_U32 }, }; static int gred_change(struct Qdisc *sch, struct nlattr *opt) @@ -433,11 +438,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) if (err < 0) return err; - if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) + if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { + if (tb[TCA_GRED_LIMIT] != NULL) + sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); return gred_change_table_def(sch, opt); + } if (tb[TCA_GRED_PARMS] == NULL || - tb[TCA_GRED_STAB] == NULL) + tb[TCA_GRED_STAB] == NULL || + tb[TCA_GRED_LIMIT] != NULL) return -EINVAL; max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0; @@ -501,6 +510,14 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) return -EINVAL; + if (tb[TCA_GRED_LIMIT]) + sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); + else { + u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1; + + sch->limit = qlen * psched_mtu(qdisc_dev(sch)); + } + return gred_change_table_def(sch, tb[TCA_GRED_DPS]); } @@ -531,6 +548,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p)) goto nla_put_failure; + if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit)) + goto nla_put_failure; + parms = nla_nest_start(skb, TCA_GRED_PARMS); if (parms == NULL) goto nla_put_failure; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 15d3aabfe250..9d15cb6b8cb1 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -9,7 +9,6 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> -#include <net/flow_keys.h> #include <net/pkt_sched.h> #include <net/sock.h> @@ -176,22 +175,6 @@ static u32 hhf_time_stamp(void) return jiffies; } -static unsigned int skb_hash(const struct hhf_sched_data *q, - const struct sk_buff *skb) -{ - struct flow_keys keys; - unsigned int hash; - - if (skb->sk && skb->sk->sk_hash) - return skb->sk->sk_hash; - - skb_flow_dissect(skb, &keys); - hash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src ^ keys.ip_proto, - (__force u32)keys.ports, q->perturbation); - return hash; -} - /* Looks up a heavy-hitter flow in a chaining list of table T. */ static struct hh_flow_state *seek_list(const u32 hash, struct list_head *head, @@ -280,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) } /* Get hashed flow-id of the skb. */ - hash = skb_hash(q, skb); + hash = skb_get_hash_perturb(skb, q->perturbation); /* Check if this packet belongs to an already established HH flow. */ flow_pos = hash & HHF_BIT_MASK; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 4cdbfb85686a..e7c648fa9dc3 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -12,16 +12,10 @@ #include <linux/list.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> + #include <net/netlink.h> #include <net/pkt_sched.h> - -struct ingress_qdisc_data { - struct tcf_proto __rcu *filter_list; -}; - -/* ------------------------- Class/flow operations ------------------------- */ - static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; @@ -49,57 +43,24 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch, unsigned long cl) { - struct ingress_qdisc_data *p = qdisc_priv(sch); - - return &p->filter_list; -} - -/* --------------------------- Qdisc operations ---------------------------- */ + struct net_device *dev = qdisc_dev(sch); -static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct ingress_qdisc_data *p = qdisc_priv(sch); - struct tcf_result res; - struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result; - - result = tc_classify(skb, fl, &res); - - qdisc_bstats_update(sch, skb); - switch (result) { - case TC_ACT_SHOT: - result = TC_ACT_SHOT; - qdisc_qstats_drop(sch); - break; - case TC_ACT_STOLEN: - case TC_ACT_QUEUED: - result = TC_ACT_STOLEN; - break; - case TC_ACT_RECLASSIFY: - case TC_ACT_OK: - skb->tc_index = TC_H_MIN(res.classid); - default: - result = TC_ACT_OK; - break; - } - - return result; + return &dev->ingress_cl_list; } -/* ------------------------------------------------------------- */ - static int ingress_init(struct Qdisc *sch, struct nlattr *opt) { net_inc_ingress_queue(); + sch->flags |= TCQ_F_CPUSTATS; return 0; } static void ingress_destroy(struct Qdisc *sch) { - struct ingress_qdisc_data *p = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); - tcf_destroy_chain(&p->filter_list); + tcf_destroy_chain(&dev->ingress_cl_list); net_dec_ingress_queue(); } @@ -110,6 +71,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; + return nla_nest_end(skb, nest); nla_put_failure: @@ -130,8 +92,6 @@ static const struct Qdisc_class_ops ingress_class_ops = { static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", - .priv_size = sizeof(struct ingress_qdisc_data), - .enqueue = ingress_enqueue, .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, @@ -148,6 +108,7 @@ static void __exit ingress_module_exit(void) unregister_qdisc(&ingress_qdisc_ops); } -module_init(ingress_module_init) -module_exit(ingress_module_exit) +module_init(ingress_module_init); +module_exit(ingress_module_exit); + MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 956ead2cab9a..5abd1d9de989 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -440,9 +440,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { struct Qdisc *rootq = qdisc_root(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ - q->duplicate = 0; - qdisc_enqueue_root(skb2, rootq); + q->duplicate = 0; + rootq->enqueue(skb2, rootq); q->duplicate = dupsave; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 5819dd82630d..4b815193326c 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -26,7 +26,6 @@ #include <net/ip.h> #include <net/pkt_sched.h> #include <net/inet_ecn.h> -#include <net/flow_keys.h> /* * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level) @@ -285,9 +284,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) int i; u32 p_min = ~0; u32 minqlen = ~0; - u32 r, slot, salt, sfbhash; + u32 r, sfbhash; + u32 slot = q->slot; int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - struct flow_keys keys; if (unlikely(sch->q.qlen >= q->limit)) { qdisc_qstats_overlimit(sch); @@ -309,22 +308,17 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) fl = rcu_dereference_bh(q->filter_list); if (fl) { + u32 salt; + /* If using external classifiers, get result and record it. */ if (!sfb_classify(skb, fl, &ret, &salt)) goto other_drop; - keys.src = salt; - keys.dst = 0; - keys.ports = 0; + sfbhash = jhash_1word(salt, q->bins[slot].perturbation); } else { - skb_flow_dissect(skb, &keys); + sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation); } - slot = q->slot; - sfbhash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src, - (__force u32)keys.ports, - q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; @@ -356,10 +350,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (unlikely(p_min >= SFB_MAX_PROB)) { /* Inelastic flow */ if (q->double_buffering) { - sfbhash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src, - (__force u32)keys.ports, - q->bins[slot].perturbation); + sfbhash = skb_get_hash_perturb(skb, + q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b877140beda5..7d1492663360 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -23,7 +23,6 @@ #include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> -#include <net/flow_keys.h> #include <net/red.h> @@ -156,30 +155,10 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index return &q->dep[val - SFQ_MAX_FLOWS]; } -/* - * In order to be able to quickly rehash our queue when timer changes - * q->perturbation, we store flow_keys in skb->cb[] - */ -struct sfq_skb_cb { - struct flow_keys keys; -}; - -static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb) -{ - qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb)); - return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data; -} - static unsigned int sfq_hash(const struct sfq_sched_data *q, const struct sk_buff *skb) { - const struct flow_keys *keys = &sfq_skb_cb(skb)->keys; - unsigned int hash; - - hash = jhash_3words((__force u32)keys->dst, - (__force u32)keys->src ^ keys->ip_proto, - (__force u32)keys->ports, q->perturbation); - return hash & (q->divisor - 1); + return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1); } static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, @@ -196,10 +175,8 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return TC_H_MIN(skb->priority); fl = rcu_dereference_bh(q->filter_list); - if (!fl) { - skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys); + if (!fl) return sfq_hash(q, skb) + 1; - } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; result = tc_classify(skb, fl, &res); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0e4198ee2370..e703ff7fed40 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -635,7 +635,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; - newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot); + newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0); if (!newsk) goto out; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 53b7acde9aa3..59e80356672b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -550,7 +550,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, struct sctp_association *asoc) { struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, - sk->sk_prot); + sk->sk_prot, 0); struct inet_sock *newinet; if (!newsk) diff --git a/net/socket.c b/net/socket.c index 884e32997698..9963a0b53a64 100644 --- a/net/socket.c +++ b/net/socket.c @@ -576,9 +576,6 @@ void sock_release(struct socket *sock) if (rcu_dereference_protected(sock->wq, 1)->fasync_list) pr_err("%s: fasync list not empty!\n", __func__); - if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags)) - return; - this_cpu_sub(sockets_in_use, 1); if (!sock->file) { iput(SOCK_INODE(sock)); @@ -1213,9 +1210,9 @@ int sock_create(int family, int type, int protocol, struct socket **res) } EXPORT_SYMBOL(sock_create); -int sock_create_kern(int family, int type, int protocol, struct socket **res) +int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) { - return __sock_create(&init_net, family, type, protocol, res, 1); + return __sock_create(net, family, type, protocol, res, 1); } EXPORT_SYMBOL(sock_create_kern); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 055453d48668..ac853acbe211 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -15,97 +15,359 @@ #include <linux/mutex.h> #include <linux/notifier.h> #include <linux/netdevice.h> +#include <linux/if_bridge.h> #include <net/ip_fib.h> #include <net/switchdev.h> /** - * netdev_switch_parent_id_get - Get ID of a switch + * switchdev_port_attr_get - Get port attribute + * + * @dev: port device + * @attr: attribute to get + */ +int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + struct switchdev_attr first = { + .id = SWITCHDEV_ATTR_UNDEFINED + }; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_attr_get) + return ops->switchdev_port_attr_get(dev, attr); + + if (attr->flags & SWITCHDEV_F_NO_RECURSE) + return err; + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to get attr on + * each port. Return -ENODATA if attr values don't + * compare across ports. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = switchdev_port_attr_get(lower_dev, attr); + if (err) + break; + if (first.id == SWITCHDEV_ATTR_UNDEFINED) + first = *attr; + else if (memcmp(&first, attr, sizeof(*attr))) + return -ENODATA; + } + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_port_attr_get); + +static int __switchdev_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_attr_set) + return ops->switchdev_port_attr_set(dev, attr); + + if (attr->flags & SWITCHDEV_F_NO_RECURSE) + return err; + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to set attr on + * each port. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = __switchdev_port_attr_set(lower_dev, attr); + if (err) + break; + } + + return err; +} + +struct switchdev_attr_set_work { + struct work_struct work; + struct net_device *dev; + struct switchdev_attr attr; +}; + +static void switchdev_port_attr_set_work(struct work_struct *work) +{ + struct switchdev_attr_set_work *asw = + container_of(work, struct switchdev_attr_set_work, work); + int err; + + rtnl_lock(); + err = switchdev_port_attr_set(asw->dev, &asw->attr); + BUG_ON(err); + rtnl_unlock(); + + dev_put(asw->dev); + kfree(work); +} + +static int switchdev_port_attr_set_defer(struct net_device *dev, + struct switchdev_attr *attr) +{ + struct switchdev_attr_set_work *asw; + + asw = kmalloc(sizeof(*asw), GFP_ATOMIC); + if (!asw) + return -ENOMEM; + + INIT_WORK(&asw->work, switchdev_port_attr_set_work); + + dev_hold(dev); + asw->dev = dev; + memcpy(&asw->attr, attr, sizeof(asw->attr)); + + schedule_work(&asw->work); + + return 0; +} + +/** + * switchdev_port_attr_set - Set port attribute + * * @dev: port device - * @psid: switch ID + * @attr: attribute to set * - * Get ID of a switch this port is part of. + * Use a 2-phase prepare-commit transaction model to ensure + * system is not left in a partially updated state due to + * failure from driver/device. */ -int netdev_switch_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) { - const struct swdev_ops *ops = dev->swdev_ops; + int err; + + if (!rtnl_is_locked()) { + /* Running prepare-commit transaction across stacked + * devices requires nothing moves, so if rtnl_lock is + * not held, schedule a worker thread to hold rtnl_lock + * while setting attr. + */ + + return switchdev_port_attr_set_defer(dev, attr); + } + + /* Phase I: prepare for attr set. Driver/device should fail + * here if there are going to be issues in the commit phase, + * such as lack of resources or support. The driver/device + * should reserve resources needed for the commit phase here, + * but should not commit the attr. + */ + + attr->trans = SWITCHDEV_TRANS_PREPARE; + err = __switchdev_port_attr_set(dev, attr); + if (err) { + /* Prepare phase failed: abort the transaction. Any + * resources reserved in the prepare phase are + * released. + */ + + attr->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_attr_set(dev, attr); + + return err; + } + + /* Phase II: commit attr set. This cannot fail as a fault + * of driver/device. If it does, it's a bug in the driver/device + * because the driver said everythings was OK in phase I. + */ + + attr->trans = SWITCHDEV_TRANS_COMMIT; + err = __switchdev_port_attr_set(dev, attr); + BUG_ON(err); + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_port_attr_set); + +static int __switchdev_port_obj_add(struct net_device *dev, + struct switchdev_obj *obj) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_obj_add) + return ops->switchdev_port_obj_add(dev, obj); + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to add object on + * each port. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = __switchdev_port_obj_add(lower_dev, obj); + if (err) + break; + } + + return err; +} + +/** + * switchdev_port_obj_add - Add port object + * + * @dev: port device + * @obj: object to add + * + * Use a 2-phase prepare-commit transaction model to ensure + * system is not left in a partially updated state due to + * failure from driver/device. + * + * rtnl_lock must be held. + */ +int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) +{ + int err; - if (!ops || !ops->swdev_parent_id_get) - return -EOPNOTSUPP; - return ops->swdev_parent_id_get(dev, psid); + ASSERT_RTNL(); + + /* Phase I: prepare for obj add. Driver/device should fail + * here if there are going to be issues in the commit phase, + * such as lack of resources or support. The driver/device + * should reserve resources needed for the commit phase here, + * but should not commit the obj. + */ + + obj->trans = SWITCHDEV_TRANS_PREPARE; + err = __switchdev_port_obj_add(dev, obj); + if (err) { + /* Prepare phase failed: abort the transaction. Any + * resources reserved in the prepare phase are + * released. + */ + + obj->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_obj_add(dev, obj); + + return err; + } + + /* Phase II: commit obj add. This cannot fail as a fault + * of driver/device. If it does, it's a bug in the driver/device + * because the driver said everythings was OK in phase I. + */ + + obj->trans = SWITCHDEV_TRANS_COMMIT; + err = __switchdev_port_obj_add(dev, obj); + WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); + + return err; } -EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get); +EXPORT_SYMBOL_GPL(switchdev_port_obj_add); /** - * netdev_switch_port_stp_update - Notify switch device port of STP - * state change + * switchdev_port_obj_del - Delete port object + * * @dev: port device - * @state: port STP state + * @obj: object to delete + */ +int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_obj_del) + return ops->switchdev_port_obj_del(dev, obj); + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to delete object on + * each port. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = switchdev_port_obj_del(lower_dev, obj); + if (err) + break; + } + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_del); + +/** + * switchdev_port_obj_dump - Dump port objects * - * Notify switch device port of bridge port STP state change. + * @dev: port device + * @obj: object to dump */ -int netdev_switch_port_stp_update(struct net_device *dev, u8 state) +int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj) { - const struct swdev_ops *ops = dev->swdev_ops; + const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; struct list_head *iter; int err = -EOPNOTSUPP; - if (ops && ops->swdev_port_stp_update) - return ops->swdev_port_stp_update(dev, state); + if (ops && ops->switchdev_port_obj_dump) + return ops->switchdev_port_obj_dump(dev, obj); + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to dump objects on + * first port at bottom of stack. + */ netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = netdev_switch_port_stp_update(lower_dev, state); - if (err && err != -EOPNOTSUPP) - return err; + err = switchdev_port_obj_dump(lower_dev, obj); + break; } return err; } -EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update); +EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); -static DEFINE_MUTEX(netdev_switch_mutex); -static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain); +static DEFINE_MUTEX(switchdev_mutex); +static RAW_NOTIFIER_HEAD(switchdev_notif_chain); /** - * register_netdev_switch_notifier - Register notifier + * register_switchdev_notifier - Register notifier * @nb: notifier_block * * Register switch device notifier. This should be used by code * which needs to monitor events happening in particular device. * Return values are same as for atomic_notifier_chain_register(). */ -int register_netdev_switch_notifier(struct notifier_block *nb) +int register_switchdev_notifier(struct notifier_block *nb) { int err; - mutex_lock(&netdev_switch_mutex); - err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb); - mutex_unlock(&netdev_switch_mutex); + mutex_lock(&switchdev_mutex); + err = raw_notifier_chain_register(&switchdev_notif_chain, nb); + mutex_unlock(&switchdev_mutex); return err; } -EXPORT_SYMBOL_GPL(register_netdev_switch_notifier); +EXPORT_SYMBOL_GPL(register_switchdev_notifier); /** - * unregister_netdev_switch_notifier - Unregister notifier + * unregister_switchdev_notifier - Unregister notifier * @nb: notifier_block * * Unregister switch device notifier. * Return values are same as for atomic_notifier_chain_unregister(). */ -int unregister_netdev_switch_notifier(struct notifier_block *nb) +int unregister_switchdev_notifier(struct notifier_block *nb) { int err; - mutex_lock(&netdev_switch_mutex); - err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb); - mutex_unlock(&netdev_switch_mutex); + mutex_lock(&switchdev_mutex); + err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); + mutex_unlock(&switchdev_mutex); return err; } -EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); +EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); /** - * call_netdev_switch_notifiers - Call notifiers + * call_switchdev_notifiers - Call notifiers * @val: value passed unmodified to notifier function * @dev: port device * @info: notifier information data @@ -114,146 +376,387 @@ EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); * when it needs to propagate hardware event. * Return values are same as for atomic_notifier_call_chain(). */ -int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, - struct netdev_switch_notifier_info *info) +int call_switchdev_notifiers(unsigned long val, struct net_device *dev, + struct switchdev_notifier_info *info) { int err; info->dev = dev; - mutex_lock(&netdev_switch_mutex); - err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info); - mutex_unlock(&netdev_switch_mutex); + mutex_lock(&switchdev_mutex); + err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); + mutex_unlock(&switchdev_mutex); return err; } -EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers); +EXPORT_SYMBOL_GPL(call_switchdev_notifiers); /** - * netdev_switch_port_bridge_setlink - Notify switch device port of bridge - * port attributes + * switchdev_port_bridge_getlink - Get bridge port attributes * * @dev: port device - * @nlh: netlink msg with bridge port attributes - * @flags: bridge setlink flags * - * Notify switch device port of bridge port attributes + * Called for SELF on rtnl_bridge_getlink to get bridge port + * attributes. */ -int netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) { - const struct net_device_ops *ops = dev->netdev_ops; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, + }; + u16 mode = BRIDGE_MODE_UNDEF; + u32 mask = BR_LEARNING | BR_LEARNING_SYNC; + int err; - if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) - return 0; + err = switchdev_port_attr_get(dev, &attr); + if (err) + return err; + + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, + attr.u.brport_flags, mask, nlflags); +} +EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); + +static int switchdev_port_br_setflag(struct net_device *dev, + struct nlattr *nlattr, + unsigned long brport_flag) +{ + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, + }; + u8 flag = nla_get_u8(nlattr); + int err; + + err = switchdev_port_attr_get(dev, &attr); + if (err) + return err; + + if (flag) + attr.u.brport_flags |= brport_flag; + else + attr.u.brport_flags &= ~brport_flag; + + return switchdev_port_attr_set(dev, &attr); +} + +static const struct nla_policy +switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = { + [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, + [IFLA_BRPORT_COST] = { .type = NLA_U32 }, + [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, + [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, + [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, + [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 }, + [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, + [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 }, + [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, +}; + +static int switchdev_port_br_setlink_protinfo(struct net_device *dev, + struct nlattr *protinfo) +{ + struct nlattr *attr; + int rem; + int err; + + err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, + switchdev_port_bridge_policy); + if (err) + return err; + + nla_for_each_nested(attr, protinfo, rem) { + switch (nla_type(attr)) { + case IFLA_BRPORT_LEARNING: + err = switchdev_port_br_setflag(dev, attr, + BR_LEARNING); + break; + case IFLA_BRPORT_LEARNING_SYNC: + err = switchdev_port_br_setflag(dev, attr, + BR_LEARNING_SYNC); + break; + default: + err = -EOPNOTSUPP; + break; + } + if (err) + return err; + } + + return 0; +} + +static int switchdev_port_br_afspec(struct net_device *dev, + struct nlattr *afspec, + int (*f)(struct net_device *dev, + struct switchdev_obj *obj)) +{ + struct nlattr *attr; + struct bridge_vlan_info *vinfo; + struct switchdev_obj obj = { + .id = SWITCHDEV_OBJ_PORT_VLAN, + }; + struct switchdev_obj_vlan *vlan = &obj.u.vlan; + int rem; + int err; - if (!ops->ndo_bridge_setlink) - return -EOPNOTSUPP; + nla_for_each_nested(attr, afspec, rem) { + if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) + continue; + if (nla_len(attr) != sizeof(struct bridge_vlan_info)) + return -EINVAL; + vinfo = nla_data(attr); + vlan->flags = vinfo->flags; + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (vlan->vid_start) + return -EINVAL; + vlan->vid_start = vinfo->vid; + } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { + if (!vlan->vid_start) + return -EINVAL; + vlan->vid_end = vinfo->vid; + if (vlan->vid_end <= vlan->vid_start) + return -EINVAL; + err = f(dev, &obj); + if (err) + return err; + memset(vlan, 0, sizeof(*vlan)); + } else { + if (vlan->vid_start) + return -EINVAL; + vlan->vid_start = vinfo->vid; + vlan->vid_end = vinfo->vid; + err = f(dev, &obj); + if (err) + return err; + memset(vlan, 0, sizeof(*vlan)); + } + } - return ops->ndo_bridge_setlink(dev, nlh, flags); + return 0; } -EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_setlink); /** - * netdev_switch_port_bridge_dellink - Notify switch device port of bridge - * port attribute delete + * switchdev_port_bridge_setlink - Set bridge port attributes * * @dev: port device - * @nlh: netlink msg with bridge port attributes - * @flags: bridge setlink flags + * @nlh: netlink header + * @flags: netlink flags * - * Notify switch device port of bridge port attribute delete + * Called for SELF on rtnl_bridge_setlink to set bridge port + * attributes. */ -int netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +int switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) { - const struct net_device_ops *ops = dev->netdev_ops; + struct nlattr *protinfo; + struct nlattr *afspec; + int err = 0; - if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) - return 0; + protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), + IFLA_PROTINFO); + if (protinfo) { + err = switchdev_port_br_setlink_protinfo(dev, protinfo); + if (err) + return err; + } - if (!ops->ndo_bridge_dellink) - return -EOPNOTSUPP; + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), + IFLA_AF_SPEC); + if (afspec) + err = switchdev_port_br_afspec(dev, afspec, + switchdev_port_obj_add); - return ops->ndo_bridge_dellink(dev, nlh, flags); + return err; } -EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_dellink); +EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); /** - * ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink - * op for master devices + * switchdev_port_bridge_dellink - Set bridge port attributes * * @dev: port device - * @nlh: netlink msg with bridge port attributes - * @flags: bridge setlink flags + * @nlh: netlink header + * @flags: netlink flags * - * Notify master device slaves of bridge port attributes + * Called for SELF on rtnl_bridge_dellink to set bridge port + * attributes. */ -int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) { - struct net_device *lower_dev; - struct list_head *iter; - int ret = 0, err = 0; + struct nlattr *afspec; - if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) - return ret; + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), + IFLA_AF_SPEC); + if (afspec) + return switchdev_port_br_afspec(dev, afspec, + switchdev_port_obj_del); - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = netdev_switch_port_bridge_setlink(lower_dev, nlh, flags); - if (err && err != -EOPNOTSUPP) - ret = err; - } + return 0; +} +EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); - return ret; +/** + * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port + * + * @ndmsg: netlink hdr + * @nlattr: netlink attributes + * @dev: port device + * @addr: MAC address to add + * @vid: VLAN to add + * + * Add FDB entry to switch device. + */ +int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid, u16 nlm_flags) +{ + struct switchdev_obj obj = { + .id = SWITCHDEV_OBJ_PORT_FDB, + .u.fdb = { + .addr = addr, + .vid = vid, + }, + }; + + return switchdev_port_obj_add(dev, &obj); } -EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_setlink); +EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); /** - * ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink - * op for master devices + * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port * + * @ndmsg: netlink hdr + * @nlattr: netlink attributes * @dev: port device - * @nlh: netlink msg with bridge port attributes - * @flags: bridge dellink flags + * @addr: MAC address to delete + * @vid: VLAN to delete * - * Notify master device slaves of bridge port attribute deletes + * Delete FDB entry from switch device. */ -int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid) { - struct net_device *lower_dev; - struct list_head *iter; - int ret = 0, err = 0; + struct switchdev_obj obj = { + .id = SWITCHDEV_OBJ_PORT_FDB, + .u.fdb = { + .addr = addr, + .vid = vid, + }, + }; + + return switchdev_port_obj_del(dev, &obj); +} +EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); + +struct switchdev_fdb_dump { + struct switchdev_obj obj; + struct sk_buff *skb; + struct netlink_callback *cb; + struct net_device *filter_dev; + int idx; +}; + +static int switchdev_port_fdb_dump_cb(struct net_device *dev, + struct switchdev_obj *obj) +{ + struct switchdev_fdb_dump *dump = + container_of(obj, struct switchdev_fdb_dump, obj); + u32 portid = NETLINK_CB(dump->cb->skb).portid; + u32 seq = dump->cb->nlh->nlmsg_seq; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + struct net_device *master = netdev_master_upper_dev_get(dev); + + if (dump->idx < dump->cb->args[0]) + goto skip; + + if (master && dump->filter_dev != master) + goto skip; + + nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, + sizeof(*ndm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = NTF_SELF; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dev->ifindex; + ndm->ndm_state = NUD_REACHABLE; + + if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) + goto nla_put_failure; + + if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid)) + goto nla_put_failure; + + nlmsg_end(dump->skb, nlh); + +skip: + dump->idx++; + return 0; + +nla_put_failure: + nlmsg_cancel(dump->skb, nlh); + return -EMSGSIZE; +} - if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) - return ret; +/** + * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries + * + * @skb: netlink skb + * @cb: netlink callback + * @dev: port device + * @filter_dev: filter device + * @idx: + * + * Delete FDB entry from switch device. + */ +int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, int idx) +{ + struct switchdev_fdb_dump dump = { + .obj = { + .id = SWITCHDEV_OBJ_PORT_FDB, + .cb = switchdev_port_fdb_dump_cb, + }, + .skb = skb, + .cb = cb, + .filter_dev = filter_dev, + .idx = idx, + }; + int err; - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = netdev_switch_port_bridge_dellink(lower_dev, nlh, flags); - if (err && err != -EOPNOTSUPP) - ret = err; - } + err = switchdev_port_obj_dump(dev, &dump.obj); + if (err) + return err; - return ret; + return dump.idx; } -EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_dellink); +EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); -static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) +static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) { - const struct swdev_ops *ops = dev->swdev_ops; + const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; struct net_device *port_dev; struct list_head *iter; /* Recusively search down until we find a sw port dev. - * (A sw port dev supports swdev_parent_id_get). + * (A sw port dev supports switchdev_port_attr_get). */ - if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD && - ops && ops->swdev_parent_id_get) + if (ops && ops->switchdev_port_attr_get) return dev; netdev_for_each_lower_dev(dev, lower_dev, iter) { - port_dev = netdev_switch_get_lowest_dev(lower_dev); + port_dev = switchdev_get_lowest_dev(lower_dev); if (port_dev) return port_dev; } @@ -261,10 +764,12 @@ static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) return NULL; } -static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) +static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) { - struct netdev_phys_item_id psid; - struct netdev_phys_item_id prev_psid; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + }; + struct switchdev_attr prev_attr; struct net_device *dev = NULL; int nhsel; @@ -276,28 +781,29 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) if (!nh->nh_dev) return NULL; - dev = netdev_switch_get_lowest_dev(nh->nh_dev); + dev = switchdev_get_lowest_dev(nh->nh_dev); if (!dev) return NULL; - if (netdev_switch_parent_id_get(dev, &psid)) + if (switchdev_port_attr_get(dev, &attr)) return NULL; if (nhsel > 0) { - if (prev_psid.id_len != psid.id_len) + if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len) return NULL; - if (memcmp(prev_psid.id, psid.id, psid.id_len)) + if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id, + attr.u.ppid.id_len)) return NULL; } - prev_psid = psid; + prev_attr = attr; } return dev; } /** - * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch + * switchdev_fib_ipv4_add - Add IPv4 route entry to switch * * @dst: route's IPv4 destination address * @dst_len: destination address length (prefix length) @@ -309,11 +815,22 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) * * Add IPv4 route entry to switch device. */ -int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id) +int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, u32 tb_id) { + struct switchdev_obj fib_obj = { + .id = SWITCHDEV_OBJ_IPV4_FIB, + .u.ipv4_fib = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .nlflags = nlflags, + .tb_id = tb_id, + }, + }; struct net_device *dev; - const struct swdev_ops *ops; int err = 0; /* Don't offload route if using custom ip rules or if @@ -328,25 +845,20 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, if (fi->fib_net->ipv4.fib_offload_disabled) return 0; - dev = netdev_switch_get_dev_by_nhs(fi); + dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; - ops = dev->swdev_ops; - - if (ops->swdev_fib_ipv4_add) { - err = ops->swdev_fib_ipv4_add(dev, htonl(dst), dst_len, - fi, tos, type, nlflags, - tb_id); - if (!err) - fi->fib_flags |= RTNH_F_OFFLOAD; - } + + err = switchdev_port_obj_add(dev, &fib_obj); + if (!err) + fi->fib_flags |= RTNH_F_OFFLOAD; return err; } -EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add); +EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); /** - * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch + * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch * * @dst: route's IPv4 destination address * @dst_len: destination address length (prefix length) @@ -357,38 +869,45 @@ EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add); * * Delete IPv4 route entry from switch device. */ -int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) +int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) { + struct switchdev_obj fib_obj = { + .id = SWITCHDEV_OBJ_IPV4_FIB, + .u.ipv4_fib = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .nlflags = 0, + .tb_id = tb_id, + }, + }; struct net_device *dev; - const struct swdev_ops *ops; int err = 0; if (!(fi->fib_flags & RTNH_F_OFFLOAD)) return 0; - dev = netdev_switch_get_dev_by_nhs(fi); + dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; - ops = dev->swdev_ops; - if (ops->swdev_fib_ipv4_del) { - err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len, - fi, tos, type, tb_id); - if (!err) - fi->fib_flags &= ~RTNH_F_OFFLOAD; - } + err = switchdev_port_obj_del(dev, &fib_obj); + if (!err) + fi->fib_flags &= ~RTNH_F_OFFLOAD; return err; } -EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_del); +EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); /** - * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation + * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation * * @fi: route FIB info structure */ -void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +void switchdev_fib_ipv4_abort(struct fib_info *fi) { /* There was a problem installing this route to the offload * device. For now, until we come up with more refined @@ -401,4 +920,4 @@ void netdev_switch_fib_ipv4_abort(struct fib_info *fi) fib_flush_external(fi->fib_net); fi->fib_net->ipv4.fib_offload_disabled = true; } -EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_abort); +EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); diff --git a/net/tipc/addr.c b/net/tipc/addr.c index ba7daa864d44..48fd3b5a73fb 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -38,13 +38,6 @@ #include "addr.h" #include "core.h" -u32 tipc_own_addr(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - return tn->own_addr; -} - /** * in_own_cluster - test for cluster inclusion; <0.0.0> always matches */ diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 7ba6d5c8ae40..93f7c983be33 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -41,10 +41,18 @@ #include <linux/tipc.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include "core.h" #define TIPC_ZONE_MASK 0xff000000u #define TIPC_CLUSTER_MASK 0xfffff000u +static inline u32 tipc_own_addr(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->own_addr; +} + static inline u32 tipc_zone_mask(u32 addr) { return addr & TIPC_ZONE_MASK; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index c5cbdcb1f0b5..4906ca3c0f3a 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -115,19 +115,15 @@ static void bclink_set_last_sent(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *bcl = tn->bcl; - struct sk_buff *skb = skb_peek(&bcl->backlogq); - if (skb) - bcl->fsm_msg_cnt = mod(buf_seqno(skb) - 1); - else - bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1); + bcl->silent_intv_cnt = mod(bcl->snd_nxt - 1); } u32 tipc_bclink_get_last_sent(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - return tn->bcl->fsm_msg_cnt; + return tn->bcl->silent_intv_cnt; } static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) @@ -212,16 +208,16 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) * or both sent and unsent messages (otherwise) */ if (tn->bclink->bcast_nodes.count) - acked = tn->bcl->fsm_msg_cnt; + acked = tn->bcl->silent_intv_cnt; else - acked = tn->bcl->next_out_no; + acked = tn->bcl->snd_nxt; } else { /* * Bail out if specified sequence number does not correspond * to a message that has been sent and not yet acknowledged */ if (less(acked, buf_seqno(skb)) || - less(tn->bcl->fsm_msg_cnt, acked) || + less(tn->bcl->silent_intv_cnt, acked) || less_eq(acked, n_ptr->bclink.acked)) goto exit; } @@ -803,9 +799,9 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) goto attr_msg_full; if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->next_in_no)) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt)) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->next_out_no)) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt)) goto attr_msg_full; prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); @@ -866,6 +862,27 @@ int tipc_bclink_set_queue_limits(struct net *net, u32 limit) return 0; } +int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) +{ + int err; + u32 win; + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + if (!attrs[TIPC_NLA_LINK_PROP]) + return -EINVAL; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props); + if (err) + return err; + + if (!props[TIPC_NLA_PROP_WIN]) + return -EOPNOTSUPP; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + + return tipc_bclink_set_queue_limits(net, win); +} + int tipc_bclink_init(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); @@ -893,7 +910,7 @@ int tipc_bclink_init(struct net *net) __skb_queue_head_init(&bcl->backlogq); __skb_queue_head_init(&bcl->deferdq); skb_queue_head_init(&bcl->wakeupq); - bcl->next_out_no = 1; + bcl->snd_nxt = 1; spin_lock_init(&bclink->node.lock); __skb_queue_head_init(&bclink->arrvq); skb_queue_head_init(&bclink->inputq); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 4bdc12277d33..3c290a48f720 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -131,6 +131,7 @@ uint tipc_bclink_get_mtu(void); int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list); void tipc_bclink_wakeup_users(struct net *net); int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); +int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); void tipc_bclink_input(struct net *net); #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 70e3dacbf84a..00bc0e620532 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -71,8 +71,7 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } }; -static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, - bool shutting_down); +static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr); /** * tipc_media_find - locates specified media object by name @@ -324,7 +323,7 @@ restart: res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr); if (res) { - bearer_disable(net, b_ptr, false); + bearer_disable(net, b_ptr); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); return -EINVAL; @@ -344,7 +343,7 @@ restart: static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) { pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_link_reset_list(net, b_ptr->identity); + tipc_link_delete_list(net, b_ptr->identity); tipc_disc_reset(net, b_ptr); return 0; } @@ -354,8 +353,7 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) * * Note: This routine assumes caller holds RTNL lock. */ -static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, - bool shutting_down) +static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr) { struct tipc_net *tn = net_generic(net, tipc_net_id); u32 i; @@ -363,7 +361,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, pr_info("Disabling bearer <%s>\n", b_ptr->name); b_ptr->media->disable_media(b_ptr); - tipc_link_delete_list(net, b_ptr->identity, shutting_down); + tipc_link_delete_list(net, b_ptr->identity); if (b_ptr->link_req) tipc_disc_delete(b_ptr->link_req); @@ -541,7 +539,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - bearer_disable(dev_net(dev), b_ptr, false); + bearer_disable(dev_net(dev), b_ptr); break; } return NOTIFY_OK; @@ -583,7 +581,7 @@ void tipc_bearer_stop(struct net *net) for (i = 0; i < MAX_BEARERS; i++) { b_ptr = rtnl_dereference(tn->bearer_list[i]); if (b_ptr) { - bearer_disable(net, b_ptr, true); + bearer_disable(net, b_ptr); tn->bearer_list[i] = NULL; } } @@ -747,7 +745,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - bearer_disable(net, bearer, false); + bearer_disable(net, bearer); rtnl_unlock(); return 0; @@ -812,7 +810,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) char *name; struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; - struct net *net = genl_info_net(info); + struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 5cad243ee8fc..dc714d977768 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -38,9 +38,9 @@ #define _TIPC_BEARER_H #include "netlink.h" +#include "core.h" #include <net/genetlink.h> -#define MAX_BEARERS 2 #define MAX_MEDIA 3 #define MAX_NODES 4096 #define WSIZE 32 diff --git a/net/tipc/core.c b/net/tipc/core.c index be1c9fa60b09..005ba5eb0ea4 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -68,7 +68,7 @@ static int __net_init tipc_init_net(struct net *net) if (err) goto out_nametbl; - err = tipc_subscr_start(net); + err = tipc_topsrv_start(net); if (err) goto out_subscr; return 0; @@ -83,7 +83,7 @@ out_sk_rht: static void __net_exit tipc_exit_net(struct net *net) { - tipc_subscr_stop(net); + tipc_topsrv_stop(net); tipc_net_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); diff --git a/net/tipc/core.h b/net/tipc/core.h index 3dc68c7a966d..0fcf133d5cb7 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -60,16 +60,19 @@ #include <net/netns/generic.h> #include <linux/rhashtable.h> -#include "node.h" -#include "bearer.h" -#include "bcast.h" -#include "netlink.h" -#include "link.h" -#include "node.h" -#include "msg.h" +struct tipc_node; +struct tipc_bearer; +struct tipc_bcbearer; +struct tipc_bclink; +struct tipc_link; +struct tipc_name_table; +struct tipc_server; #define TIPC_MOD_VER "2.0.0" +#define NODE_HTABLE_SIZE 512 +#define MAX_BEARERS 3 + extern int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; extern int sysctl_tipc_named_timeout __read_mostly; @@ -106,6 +109,26 @@ struct tipc_net { atomic_t subscription_count; }; +static inline u16 mod(u16 x) +{ + return x & 0xffffu; +} + +static inline int less_eq(u16 left, u16 right) +{ + return mod(right - left) < 32768u; +} + +static inline int more(u16 left, u16 right) +{ + return !less_eq(left, right); +} + +static inline int less(u16 left, u16 right) +{ + return less_eq(left, right) && (mod(right) != mod(left)); +} + #ifdef CONFIG_SYSCTL int tipc_register_sysctl(void); void tipc_unregister_sysctl(void); diff --git a/net/tipc/link.c b/net/tipc/link.c index 43a515dc97b0..fb2a003c8e6d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -86,7 +86,7 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { */ #define STARTING_EVT 856384768 /* link processing trigger */ #define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */ -#define TIMEOUT_EVT 560817u /* link timer expired */ +#define SILENCE_EVT 560817u /* timer dicovered silence from peer */ /* * State value stored in 'failover_pkts' @@ -106,6 +106,7 @@ static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); +static void link_set_timer(struct tipc_link *link, unsigned long time); /* * Simple link routines */ @@ -197,11 +198,12 @@ static void link_timeout(unsigned long data) } /* do all other link processing performed on a periodic basis */ - link_state_event(l_ptr, TIMEOUT_EVT); - + if (l_ptr->silent_intv_cnt || tipc_bclink_acks_missing(l_ptr->owner)) + link_state_event(l_ptr, SILENCE_EVT); + l_ptr->silent_intv_cnt++; if (skb_queue_len(&l_ptr->backlogq)) tipc_link_push_packets(l_ptr); - + link_set_timer(l_ptr, l_ptr->keepalive_intv); tipc_node_unlock(l_ptr->owner); tipc_link_put(l_ptr); } @@ -233,8 +235,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, if (n_ptr->link_cnt >= MAX_BEARERS) { tipc_addr_string_fill(addr_string, n_ptr->addr); - pr_err("Attempt to establish %uth link to %s. Max %u allowed.\n", - n_ptr->link_cnt, addr_string, MAX_BEARERS); + pr_err("Cannot establish %uth link to %s. Max %u allowed.\n", + n_ptr->link_cnt, addr_string, MAX_BEARERS); return NULL; } @@ -261,7 +263,6 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, /* note: peer i/f name is updated by reset/activate message */ memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); l_ptr->owner = n_ptr; - l_ptr->checkpoint = 1; l_ptr->peer_session = INVALID_SESSION; l_ptr->bearer_id = b_ptr->identity; link_set_supervision_props(l_ptr, b_ptr->tolerance); @@ -280,7 +281,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->mtu = l_ptr->advertised_mtu; l_ptr->priority = b_ptr->priority; tipc_link_set_queue_limits(l_ptr, b_ptr->window); - l_ptr->next_out_no = 1; + l_ptr->snd_nxt = 1; __skb_queue_head_init(&l_ptr->transmq); __skb_queue_head_init(&l_ptr->backlogq); __skb_queue_head_init(&l_ptr->deferdq); @@ -311,8 +312,7 @@ void tipc_link_delete(struct tipc_link *l) tipc_link_put(l); } -void tipc_link_delete_list(struct net *net, unsigned int bearer_id, - bool shutting_down) +void tipc_link_delete_list(struct net *net, unsigned int bearer_id) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *link; @@ -451,9 +451,9 @@ void tipc_link_reset(struct tipc_link *l_ptr) if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) { l_ptr->flags |= LINK_FAILINGOVER; - l_ptr->failover_checkpt = l_ptr->next_in_no; + l_ptr->failover_checkpt = l_ptr->rcv_nxt; pl->failover_pkts = FIRST_FAILOVER; - pl->failover_checkpt = l_ptr->next_in_no; + pl->failover_checkpt = l_ptr->rcv_nxt; pl->failover_skb = l_ptr->reasm_buf; } else { kfree_skb(l_ptr->reasm_buf); @@ -469,36 +469,19 @@ void tipc_link_reset(struct tipc_link *l_ptr) tipc_link_purge_backlog(l_ptr); l_ptr->reasm_buf = NULL; l_ptr->rcv_unacked = 0; - l_ptr->checkpoint = 1; - l_ptr->next_out_no = 1; - l_ptr->fsm_msg_cnt = 0; + l_ptr->snd_nxt = 1; + l_ptr->silent_intv_cnt = 0; l_ptr->stale_count = 0; link_reset_statistics(l_ptr); } -void tipc_link_reset_list(struct net *net, unsigned int bearer_id) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - - rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->links[bearer_id]; - if (l_ptr) - tipc_link_reset(l_ptr); - tipc_node_unlock(n_ptr); - } - rcu_read_unlock(); -} - static void link_activate(struct tipc_link *link) { struct tipc_node *node = link->owner; - link->next_in_no = 1; + link->rcv_nxt = 1; link->stats.recv_info = 1; + link->silent_intv_cnt = 0; tipc_node_link_up(node, link); tipc_bearer_add_dest(node->net, link->bearer_id, link->addr); } @@ -511,7 +494,7 @@ static void link_activate(struct tipc_link *link) static void link_state_event(struct tipc_link *l_ptr, unsigned int event) { struct tipc_link *other; - unsigned long cont_intv = l_ptr->cont_intv; + unsigned long timer_intv = l_ptr->keepalive_intv; if (l_ptr->flags & LINK_STOPPED) return; @@ -519,45 +502,33 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT)) return; /* Not yet. */ - if (l_ptr->flags & LINK_FAILINGOVER) { - if (event == TIMEOUT_EVT) - link_set_timer(l_ptr, cont_intv); + if (l_ptr->flags & LINK_FAILINGOVER) return; - } switch (l_ptr->state) { case WORKING_WORKING: switch (event) { case TRAFFIC_MSG_EVT: case ACTIVATE_MSG: + l_ptr->silent_intv_cnt = 0; break; - case TIMEOUT_EVT: - if (l_ptr->next_in_no != l_ptr->checkpoint) { - l_ptr->checkpoint = l_ptr->next_in_no; - if (tipc_bclink_acks_missing(l_ptr->owner)) { + case SILENCE_EVT: + if (!l_ptr->silent_intv_cnt) { + if (tipc_bclink_acks_missing(l_ptr->owner)) tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - } - link_set_timer(l_ptr, cont_intv); break; } l_ptr->state = WORKING_UNKNOWN; - l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv / 4); break; case RESET_MSG: pr_debug("%s<%s>, requested by peer\n", link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; - l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); break; default: pr_debug("%s%u in WW state\n", link_unk_evt, event); @@ -568,46 +539,33 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) case TRAFFIC_MSG_EVT: case ACTIVATE_MSG: l_ptr->state = WORKING_WORKING; - l_ptr->fsm_msg_cnt = 0; - link_set_timer(l_ptr, cont_intv); + l_ptr->silent_intv_cnt = 0; break; case RESET_MSG: pr_debug("%s<%s>, requested by peer while probing\n", link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; - l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); break; - case TIMEOUT_EVT: - if (l_ptr->next_in_no != l_ptr->checkpoint) { + case SILENCE_EVT: + if (!l_ptr->silent_intv_cnt) { l_ptr->state = WORKING_WORKING; - l_ptr->fsm_msg_cnt = 0; - l_ptr->checkpoint = l_ptr->next_in_no; - if (tipc_bclink_acks_missing(l_ptr->owner)) { + if (tipc_bclink_acks_missing(l_ptr->owner)) tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - } - link_set_timer(l_ptr, cont_intv); - } else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) { + } else if (l_ptr->silent_intv_cnt < + l_ptr->abort_limit) { tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv / 4); } else { /* Link has failed */ pr_debug("%s<%s>, peer not responding\n", link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_UNKNOWN; - l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); } break; default: @@ -623,31 +581,22 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) if (other && link_working_unknown(other)) break; l_ptr->state = WORKING_WORKING; - l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); - l_ptr->fsm_msg_cnt++; if (l_ptr->owner->working_links == 1) tipc_link_sync_xmit(l_ptr); - link_set_timer(l_ptr, cont_intv); break; case RESET_MSG: l_ptr->state = RESET_RESET; - l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, 1, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); break; case STARTING_EVT: l_ptr->flags |= LINK_STARTED; - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); + link_set_timer(l_ptr, timer_intv); break; - case TIMEOUT_EVT: + case SILENCE_EVT: tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); break; default: pr_err("%s%u in RU state\n", link_unk_evt, event); @@ -661,21 +610,16 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) if (other && link_working_unknown(other)) break; l_ptr->state = WORKING_WORKING; - l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); - l_ptr->fsm_msg_cnt++; if (l_ptr->owner->working_links == 1) tipc_link_sync_xmit(l_ptr); - link_set_timer(l_ptr, cont_intv); break; case RESET_MSG: break; - case TIMEOUT_EVT: + case SILENCE_EVT: tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); break; default: pr_err("%s%u in RR state\n", link_unk_evt, event); @@ -701,53 +645,58 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, { struct tipc_msg *msg = buf_msg(skb_peek(list)); unsigned int maxwin = link->window; - unsigned int imp = msg_importance(msg); + unsigned int i, imp = msg_importance(msg); uint mtu = link->mtu; - uint ack = mod(link->next_in_no - 1); - uint seqno = link->next_out_no; - uint bc_last_in = link->owner->bclink.last_in; + u16 ack = mod(link->rcv_nxt - 1); + u16 seqno = link->snd_nxt; + u16 bc_last_in = link->owner->bclink.last_in; struct tipc_media_addr *addr = &link->media_addr; struct sk_buff_head *transmq = &link->transmq; struct sk_buff_head *backlogq = &link->backlogq; - struct sk_buff *skb, *tmp; - - /* Match backlog limit against msg importance: */ - if (unlikely(link->backlog[imp].len >= link->backlog[imp].limit)) - return link_schedule_user(link, list); + struct sk_buff *skb, *bskb; + /* Match msg importance against this and all higher backlog limits: */ + for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) { + if (unlikely(link->backlog[i].len >= link->backlog[i].limit)) + return link_schedule_user(link, list); + } if (unlikely(msg_size(msg) > mtu)) { __skb_queue_purge(list); return -EMSGSIZE; } /* Prepare each packet for sending, and add to relevant queue: */ - skb_queue_walk_safe(list, skb, tmp) { - __skb_unlink(skb, list); + while (skb_queue_len(list)) { + skb = skb_peek(list); msg = buf_msg(skb); msg_set_seqno(msg, seqno); msg_set_ack(msg, ack); msg_set_bcast_ack(msg, bc_last_in); if (likely(skb_queue_len(transmq) < maxwin)) { + __skb_dequeue(list); __skb_queue_tail(transmq, skb); tipc_bearer_send(net, link->bearer_id, skb, addr); link->rcv_unacked = 0; seqno++; continue; } - if (tipc_msg_bundle(skb_peek_tail(backlogq), skb, mtu)) { + if (tipc_msg_bundle(skb_peek_tail(backlogq), msg, mtu)) { + kfree_skb(__skb_dequeue(list)); link->stats.sent_bundled++; continue; } - if (tipc_msg_make_bundle(&skb, mtu, link->addr)) { + if (tipc_msg_make_bundle(&bskb, msg, mtu, link->addr)) { + kfree_skb(__skb_dequeue(list)); + __skb_queue_tail(backlogq, bskb); + link->backlog[msg_importance(buf_msg(bskb))].len++; link->stats.sent_bundled++; link->stats.sent_bundles++; - imp = msg_importance(buf_msg(skb)); + continue; } - __skb_queue_tail(backlogq, skb); - link->backlog[imp].len++; - seqno++; + link->backlog[imp].len += skb_queue_len(list); + skb_queue_splice_tail_init(list, backlogq); } - link->next_out_no = seqno; + link->snd_nxt = seqno; return 0; } @@ -877,7 +826,8 @@ void tipc_link_push_packets(struct tipc_link *link) { struct sk_buff *skb; struct tipc_msg *msg; - unsigned int ack = mod(link->next_in_no - 1); + u16 seqno = link->snd_nxt; + u16 ack = mod(link->rcv_nxt - 1); while (skb_queue_len(&link->transmq) < link->window) { skb = __skb_dequeue(&link->backlogq); @@ -886,12 +836,15 @@ void tipc_link_push_packets(struct tipc_link *link) msg = buf_msg(skb); link->backlog[msg_importance(msg)].len--; msg_set_ack(msg, ack); + msg_set_seqno(msg, seqno); + seqno = mod(seqno + 1); msg_set_bcast_ack(msg, link->owner->bclink.last_in); link->rcv_unacked = 0; __skb_queue_tail(&link->transmq, skb); tipc_bearer_send(link->owner->net, link->bearer_id, skb, &link->media_addr); } + link->snd_nxt = seqno; } void tipc_link_reset_all(struct tipc_node *node) @@ -964,13 +917,13 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, msg = buf_msg(skb); /* Detect repeated retransmit failures */ - if (l_ptr->last_retransmitted == msg_seqno(msg)) { + if (l_ptr->last_retransm == msg_seqno(msg)) { if (++l_ptr->stale_count > 100) { link_retransmit_failure(l_ptr, skb); return; } } else { - l_ptr->last_retransmitted = msg_seqno(msg); + l_ptr->last_retransm = msg_seqno(msg); l_ptr->stale_count = 1; } @@ -978,7 +931,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, if (!retransmits) break; msg = buf_msg(skb); - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); + msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb, &l_ptr->media_addr); @@ -1001,11 +954,11 @@ static bool link_synch(struct tipc_link *l) goto synched; /* Was last pre-synch packet added to input queue ? */ - if (less_eq(pl->next_in_no, l->synch_point)) + if (less_eq(pl->rcv_nxt, l->synch_point)) return false; /* Is it still in the input queue ? */ - post_synch = mod(pl->next_in_no - l->synch_point) - 1; + post_synch = mod(pl->rcv_nxt - l->synch_point) - 1; if (skb_queue_len(&pl->inputq) > post_synch) return false; synched: @@ -1016,13 +969,13 @@ synched: static void link_retrieve_defq(struct tipc_link *link, struct sk_buff_head *list) { - u32 seq_no; + u16 seq_no; if (skb_queue_empty(&link->deferdq)) return; seq_no = buf_seqno(skb_peek(&link->deferdq)); - if (seq_no == mod(link->next_in_no)) + if (seq_no == link->rcv_nxt) skb_queue_splice_tail_init(&link->deferdq, list); } @@ -1043,8 +996,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) struct tipc_link *l_ptr; struct sk_buff *skb1, *tmp; struct tipc_msg *msg; - u32 seq_no; - u32 ackd; + u16 seq_no; + u16 ackd; u32 released; skb2list(skb, &head); @@ -1137,18 +1090,20 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) } /* Link is now in state WORKING_WORKING */ - if (unlikely(seq_no != mod(l_ptr->next_in_no))) { + if (unlikely(seq_no != l_ptr->rcv_nxt)) { link_handle_out_of_seq_msg(l_ptr, skb); link_retrieve_defq(l_ptr, &head); skb = NULL; goto unlock; } + l_ptr->silent_intv_cnt = 0; + /* Synchronize with parallel link if applicable */ if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { if (!link_synch(l_ptr)) goto unlock; } - l_ptr->next_in_no++; + l_ptr->rcv_nxt++; if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) link_retrieve_defq(l_ptr, &head); if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) { @@ -1268,7 +1223,7 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb) { struct sk_buff *skb1; - u32 seq_no = buf_seqno(skb); + u16 seq_no = buf_seqno(skb); /* Empty queue ? */ if (skb_queue_empty(list)) { @@ -1284,7 +1239,7 @@ u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb) /* Locate insertion point in queue, then insert; discard if duplicate */ skb_queue_walk(list, skb1) { - u32 curr_seqno = buf_seqno(skb1); + u16 curr_seqno = buf_seqno(skb1); if (seq_no == curr_seqno) { kfree_skb(skb); @@ -1312,14 +1267,14 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, return; } - /* Record OOS packet arrival (force mismatch on next timeout) */ - l_ptr->checkpoint--; + /* Record OOS packet arrival */ + l_ptr->silent_intv_cnt = 0; /* * Discard packet if a duplicate; otherwise add it to deferred queue * and notify peer of gap as per protocol specification */ - if (less(seq_no, mod(l_ptr->next_in_no))) { + if (less(seq_no, l_ptr->rcv_nxt)) { l_ptr->stats.duplicates++; kfree_skb(buf); return; @@ -1344,6 +1299,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, struct tipc_msg *msg = l_ptr->pmsg; u32 msg_size = sizeof(l_ptr->proto_msg); int r_flag; + u16 last_rcv; /* Don't send protocol message during link failover */ if (l_ptr->flags & LINK_FAILINGOVER) @@ -1360,7 +1316,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net)); if (msg_typ == STATE_MSG) { - u32 next_sent = mod(l_ptr->next_out_no); + u16 next_sent = l_ptr->snd_nxt; if (!tipc_link_is_up(l_ptr)) return; @@ -1368,8 +1324,8 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, next_sent = buf_seqno(skb_peek(&l_ptr->backlogq)); msg_set_next_sent(msg, next_sent); if (!skb_queue_empty(&l_ptr->deferdq)) { - u32 rec = buf_seqno(skb_peek(&l_ptr->deferdq)); - gap = mod(rec - mod(l_ptr->next_in_no)); + last_rcv = buf_seqno(skb_peek(&l_ptr->deferdq)); + gap = mod(last_rcv - l_ptr->rcv_nxt); } msg_set_seq_gap(msg, gap); if (gap) @@ -1377,7 +1333,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, msg_set_link_tolerance(msg, tolerance); msg_set_linkprio(msg, priority); msg_set_max_pkt(msg, l_ptr->mtu); - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); + msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1)); msg_set_probe(msg, probe_msg != 0); if (probe_msg) l_ptr->stats.sent_probes++; @@ -1397,7 +1353,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, msg_set_linkprio(msg, l_ptr->priority); msg_set_size(msg, msg_size); - msg_set_seqno(msg, mod(l_ptr->next_out_no + (0xffff/2))); + msg_set_seqno(msg, mod(l_ptr->snd_nxt + (0xffff / 2))); buf = tipc_buf_acquire(msg_size); if (!buf) @@ -1496,17 +1452,15 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, } /* Record reception; force mismatch at next timeout: */ - l_ptr->checkpoint--; + l_ptr->silent_intv_cnt = 0; link_state_event(l_ptr, TRAFFIC_MSG_EVT); l_ptr->stats.recv_states++; if (link_reset_unknown(l_ptr)) break; - if (less_eq(mod(l_ptr->next_in_no), msg_next_sent(msg))) { - rec_gap = mod(msg_next_sent(msg) - - mod(l_ptr->next_in_no)); - } + if (less_eq(l_ptr->rcv_nxt, msg_next_sent(msg))) + rec_gap = mod(msg_next_sent(msg) - l_ptr->rcv_nxt); if (msg_probe(msg)) l_ptr->stats.recv_probes++; @@ -1580,6 +1534,11 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, TUNNEL_PROTOCOL, FAILOVER_MSG, INT_H_SIZE, l_ptr->addr); + + skb_queue_walk(&l_ptr->backlogq, skb) { + msg_set_seqno(buf_msg(skb), l_ptr->snd_nxt); + l_ptr->snd_nxt = mod(l_ptr->snd_nxt + 1); + } skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq); tipc_link_purge_backlog(l_ptr); msgcount = skb_queue_len(&l_ptr->transmq); @@ -1640,6 +1599,7 @@ void tipc_link_dup_queue_xmit(struct tipc_link *link, struct tipc_msg tnl_hdr; struct sk_buff_head *queue = &link->transmq; int mcnt; + u16 seqno; tipc_msg_init(link_own_addr(link), &tnl_hdr, TUNNEL_PROTOCOL, SYNCH_MSG, INT_H_SIZE, link->addr); @@ -1653,7 +1613,7 @@ tunnel_queue: struct tipc_msg *msg = buf_msg(skb); u32 len = msg_size(msg); - msg_set_ack(msg, mod(link->next_in_no - 1)); + msg_set_ack(msg, mod(link->rcv_nxt - 1)); msg_set_bcast_ack(msg, link->owner->bclink.last_in); msg_set_size(&tnl_hdr, len + INT_H_SIZE); outskb = tipc_buf_acquire(len + INT_H_SIZE); @@ -1671,6 +1631,11 @@ tunnel_queue: } if (queue == &link->backlogq) return; + seqno = link->snd_nxt; + skb_queue_walk(&link->backlogq, skb) { + msg_set_seqno(buf_msg(skb), seqno); + seqno = mod(seqno + 1); + } queue = &link->backlogq; goto tunnel_queue; } @@ -1742,8 +1707,8 @@ static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) return; l_ptr->tolerance = tol; - l_ptr->cont_intv = msecs_to_jiffies(intv); - l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->cont_intv) / 4); + l_ptr->keepalive_intv = msecs_to_jiffies(intv); + l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->keepalive_intv)); } void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) @@ -1803,8 +1768,8 @@ static struct tipc_node *tipc_link_find_owner(struct net *net, static void link_reset_statistics(struct tipc_link *l_ptr) { memset(&l_ptr->stats, 0, sizeof(l_ptr->stats)); - l_ptr->stats.sent_info = l_ptr->next_out_no; - l_ptr->stats.recv_info = l_ptr->next_in_no; + l_ptr->stats.sent_info = l_ptr->snd_nxt; + l_ptr->stats.recv_info = l_ptr->rcv_nxt; } static void link_print(struct tipc_link *l_ptr, const char *str) @@ -1893,6 +1858,9 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + if (strcmp(name, tipc_bclink_name) == 0) + return tipc_nl_bc_link_set(net, attrs); + node = tipc_link_find_owner(net, name, &bearer_id); if (!node) return -EINVAL; @@ -2034,9 +2002,9 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->next_in_no)) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->rcv_nxt)) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->next_out_no)) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->snd_nxt)) goto attr_msg_full; if (tipc_link_is_up(link)) @@ -2175,50 +2143,53 @@ out: int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); - struct sk_buff *ans_skb; struct tipc_nl_msg msg; - struct tipc_link *link; - struct tipc_node *node; char *name; - int bearer_id; int err; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + if (!info->attrs[TIPC_NLA_LINK_NAME]) return -EINVAL; - name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); - node = tipc_link_find_owner(net, name, &bearer_id); - if (!node) - return -EINVAL; - ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (!ans_skb) + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg.skb) return -ENOMEM; - msg.skb = ans_skb; - msg.portid = info->snd_portid; - msg.seq = info->snd_seq; - - tipc_node_lock(node); - link = node->links[bearer_id]; - if (!link) { - err = -EINVAL; - goto err_out; - } - - err = __tipc_nl_add_link(net, &msg, link, 0); - if (err) - goto err_out; + if (strcmp(name, tipc_bclink_name) == 0) { + err = tipc_nl_add_bc_link(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } else { + int bearer_id; + struct tipc_node *node; + struct tipc_link *link; - tipc_node_unlock(node); + node = tipc_link_find_owner(net, name, &bearer_id); + if (!node) + return -EINVAL; - return genlmsg_reply(ans_skb, info); + tipc_node_lock(node); + link = node->links[bearer_id]; + if (!link) { + tipc_node_unlock(node); + nlmsg_free(msg.skb); + return -EINVAL; + } -err_out: - tipc_node_unlock(node); - nlmsg_free(ans_skb); + err = __tipc_nl_add_link(net, &msg, link, 0); + tipc_node_unlock(node); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } - return err; + return genlmsg_reply(msg.skb, info); } int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) diff --git a/net/tipc/link.h b/net/tipc/link.h index b5b4e3554d4e..0c02c973e985 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -107,30 +107,29 @@ struct tipc_stats { * @owner: pointer to peer node * @refcnt: reference counter for permanent references (owner node & timer) * @flags: execution state flags for link endpoint instance - * @checkpoint: reference point for triggering link continuity checking * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] - * @cont_intv: link continuity testing interval + * @keepalive_intv: link keepalive timer interval * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM - * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state + * @silent_intv_cnt: # of timer intervals without any reception from peer * @proto_msg: template for control messages generated by link * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') * @backlog_limit: backlog queue congestion thresholds (indexed by importance) * @exp_msg_count: # of tunnelled messages expected during link changeover - * @reset_checkpoint: seq # of last acknowledged message at time of link reset + * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset * @mtu: current maximum packet size for this link * @advertised_mtu: advertised own mtu when link is being established * @transmitq: queue for sent, non-acked messages * @backlogq: queue for messages waiting to be sent - * @next_out_no: next sequence number to use for outbound messages + * @snt_nxt: next sequence number to use for outbound messages * @last_retransmitted: sequence number of most recently retransmitted message * @stale_count: # of identical retransmit requests made by peer - * @next_in_no: next sequence number to expect for inbound messages + * @rcv_nxt: next sequence number to expect for inbound messages * @deferred_queue: deferred queue saved OOS b'cast message received from node * @unacked_window: # of inbound messages rx'd without ack'ing back to peer * @inputq: buffer queue for messages to be delivered upwards @@ -151,15 +150,14 @@ struct tipc_link { /* Management and link supervision data */ unsigned int flags; - u32 checkpoint; u32 peer_session; u32 peer_bearer_id; u32 bearer_id; u32 tolerance; - unsigned long cont_intv; + unsigned long keepalive_intv; u32 abort_limit; int state; - u32 fsm_msg_cnt; + u32 silent_intv_cnt; struct { unchar hdr[INT_H_SIZE]; unchar body[TIPC_MAX_IF_NAME]; @@ -185,13 +183,13 @@ struct tipc_link { u16 len; u16 limit; } backlog[5]; - u32 next_out_no; + u16 snd_nxt; + u16 last_retransm; u32 window; - u32 last_retransmitted; u32 stale_count; /* Reception */ - u32 next_in_no; + u16 rcv_nxt; u32 rcv_unacked; struct sk_buff_head deferdq; struct sk_buff_head inputq; @@ -213,8 +211,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); void tipc_link_delete(struct tipc_link *link); -void tipc_link_delete_list(struct net *net, unsigned int bearer_id, - bool shutting_down); +void tipc_link_delete_list(struct net *net, unsigned int bearer_id); void tipc_link_failover_send_queue(struct tipc_link *l_ptr); void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest); void tipc_link_reset_fragments(struct tipc_link *l_ptr); @@ -223,7 +220,6 @@ int tipc_link_is_active(struct tipc_link *l_ptr); void tipc_link_purge_queues(struct tipc_link *l_ptr); void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); -void tipc_link_reset_list(struct net *net, unsigned int bearer_id); int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, u32 selector); int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest, @@ -247,39 +243,6 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); void link_prepare_wakeup(struct tipc_link *l); -/* - * Link sequence number manipulation routines (uses modulo 2**16 arithmetic) - */ -static inline u32 buf_seqno(struct sk_buff *buf) -{ - return msg_seqno(buf_msg(buf)); -} - -static inline u32 mod(u32 x) -{ - return x & 0xffffu; -} - -static inline int less_eq(u32 left, u32 right) -{ - return mod(right - left) < 32768u; -} - -static inline int more(u32 left, u32 right) -{ - return !less_eq(left, right); -} - -static inline int less(u32 left, u32 right) -{ - return less_eq(left, right) && (mod(right) != mod(left)); -} - -static inline u32 lesser(u32 left, u32 right) -{ - return less_eq(left, right) ? left : right; -} - static inline u32 link_own_addr(struct tipc_link *l) { return msg_prevnode(l->pmsg); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index c3e96e815418..08b4cc7d496d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -331,16 +331,15 @@ error: /** * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one - * @bskb: the buffer to append to ("bundle") - * @skb: buffer to be appended + * @skb: the buffer to append to ("bundle") + * @msg: message to be appended * @mtu: max allowable size for the bundle buffer * Consumes buffer if successful * Returns true if bundling could be performed, otherwise false */ -bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu) +bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu) { struct tipc_msg *bmsg; - struct tipc_msg *msg = buf_msg(skb); unsigned int bsz; unsigned int msz = msg_size(msg); u32 start, pad; @@ -348,9 +347,9 @@ bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu) if (likely(msg_user(msg) == MSG_FRAGMENTER)) return false; - if (!bskb) + if (!skb) return false; - bmsg = buf_msg(bskb); + bmsg = buf_msg(skb); bsz = msg_size(bmsg); start = align(bsz); pad = start - bsz; @@ -359,18 +358,20 @@ bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu) return false; if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) return false; - if (likely(msg_user(bmsg) != MSG_BUNDLER)) + if (unlikely(msg_user(bmsg) != MSG_BUNDLER)) return false; - if (unlikely(skb_tailroom(bskb) < (pad + msz))) + if (unlikely(skb_tailroom(skb) < (pad + msz))) return false; if (unlikely(max < (start + msz))) return false; + if ((msg_importance(msg) < TIPC_SYSTEM_IMPORTANCE) && + (msg_importance(bmsg) == TIPC_SYSTEM_IMPORTANCE)) + return false; - skb_put(bskb, pad + msz); - skb_copy_to_linear_data_offset(bskb, start, skb->data, msz); + skb_put(skb, pad + msz); + skb_copy_to_linear_data_offset(skb, start, msg, msz); msg_set_size(bmsg, start + msz); msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); - kfree_skb(skb); return true; } @@ -416,18 +417,18 @@ none: /** * tipc_msg_make_bundle(): Create bundle buf and append message to its tail - * @list: the buffer chain - * @skb: buffer to be appended and replaced + * @list: the buffer chain, where head is the buffer to replace/append + * @skb: buffer to be created, appended to and returned in case of success + * @msg: message to be appended * @mtu: max allowable size for the bundle buffer, inclusive header * @dnode: destination node for message. (Not always present in header) - * Replaces buffer if successful * Returns true if success, otherwise false */ -bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode) +bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, + u32 mtu, u32 dnode) { - struct sk_buff *bskb; + struct sk_buff *_skb; struct tipc_msg *bmsg; - struct tipc_msg *msg = buf_msg(*skb); u32 msz = msg_size(msg); u32 max = mtu - INT_H_SIZE; @@ -440,19 +441,23 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode) if (msz > (max / 2)) return false; - bskb = tipc_buf_acquire(max); - if (!bskb) + _skb = tipc_buf_acquire(max); + if (!_skb) return false; - skb_trim(bskb, INT_H_SIZE); - bmsg = buf_msg(bskb); + skb_trim(_skb, INT_H_SIZE); + bmsg = buf_msg(_skb); tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode); + if (msg_isdata(msg)) + msg_set_importance(bmsg, TIPC_CRITICAL_IMPORTANCE); + else + msg_set_importance(bmsg, TIPC_SYSTEM_IMPORTANCE); msg_set_seqno(bmsg, msg_seqno(msg)); msg_set_ack(bmsg, msg_ack(msg)); msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); - tipc_msg_bundle(bskb, *skb, mtu); - *skb = bskb; + tipc_msg_bundle(_skb, msg, mtu); + *skb = _skb; return true; } diff --git a/net/tipc/msg.h b/net/tipc/msg.h index e1d3595e2ee9..19c45fb66238 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -313,12 +313,12 @@ static inline void msg_set_lookup_scope(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 19, 0x3, n); } -static inline u32 msg_bcast_ack(struct tipc_msg *m) +static inline u16 msg_bcast_ack(struct tipc_msg *m) { return msg_bits(m, 1, 0, 0xffff); } -static inline void msg_set_bcast_ack(struct tipc_msg *m, u32 n) +static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n) { msg_set_bits(m, 1, 0, 0xffff, n); } @@ -327,22 +327,22 @@ static inline void msg_set_bcast_ack(struct tipc_msg *m, u32 n) /* * Word 2 */ -static inline u32 msg_ack(struct tipc_msg *m) +static inline u16 msg_ack(struct tipc_msg *m) { return msg_bits(m, 2, 16, 0xffff); } -static inline void msg_set_ack(struct tipc_msg *m, u32 n) +static inline void msg_set_ack(struct tipc_msg *m, u16 n) { msg_set_bits(m, 2, 16, 0xffff, n); } -static inline u32 msg_seqno(struct tipc_msg *m) +static inline u16 msg_seqno(struct tipc_msg *m) { return msg_bits(m, 2, 0, 0xffff); } -static inline void msg_set_seqno(struct tipc_msg *m, u32 n) +static inline void msg_set_seqno(struct tipc_msg *m, u16 n) { msg_set_bits(m, 2, 0, 0xffff, n); } @@ -352,18 +352,22 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n) */ static inline u32 msg_importance(struct tipc_msg *m) { - if (unlikely(msg_user(m) == MSG_FRAGMENTER)) + int usr = msg_user(m); + + if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m))) + return usr; + if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)) return msg_bits(m, 5, 13, 0x7); - if (likely(msg_isdata(m) && !msg_errcode(m))) - return msg_user(m); return TIPC_SYSTEM_IMPORTANCE; } static inline void msg_set_importance(struct tipc_msg *m, u32 i) { - if (unlikely(msg_user(m) == MSG_FRAGMENTER)) + int usr = msg_user(m); + + if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))) msg_set_bits(m, 5, 13, 0x7, i); - else if (likely(i < TIPC_SYSTEM_IMPORTANCE)) + else if (i < TIPC_SYSTEM_IMPORTANCE) msg_set_user(m, i); else pr_warn("Trying to set illegal importance in message\n"); @@ -772,9 +776,9 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, uint data_sz, u32 dnode, u32 onode, u32 dport, u32 oport, int errcode); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); -bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu); - -bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode); +bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu); +bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, + u32 mtu, u32 dnode); bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); @@ -782,6 +786,11 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode, int *err); struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); +static inline u16 buf_seqno(struct sk_buff *skb) +{ + return msg_seqno(buf_msg(skb)); +} + /* tipc_skb_peek(): peek and reserve first buffer in list * @list: list to be peeked in * Returns pointer to first buffer in list, if any diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index ab0ac62a1287..0f47f08bf38f 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -330,13 +330,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, /* Any subscriptions waiting for notification? */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { - tipc_subscr_report_overlap(s, - publ->lower, - publ->upper, - TIPC_PUBLISHED, - publ->ref, - publ->node, - created_subseq); + tipc_subscrp_report_overlap(s, publ->lower, publ->upper, + TIPC_PUBLISHED, publ->ref, + publ->node, created_subseq); } return publ; } @@ -404,13 +400,9 @@ found: /* Notify any waiting subscriptions */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { - tipc_subscr_report_overlap(s, - publ->lower, - publ->upper, - TIPC_WITHDRAWN, - publ->ref, - publ->node, - removed_subseq); + tipc_subscrp_report_overlap(s, publ->lower, publ->upper, + TIPC_WITHDRAWN, publ->ref, + publ->node, removed_subseq); } return publ; @@ -432,19 +424,17 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, return; while (sseq != &nseq->sseqs[nseq->first_free]) { - if (tipc_subscr_overlap(s, sseq->lower, sseq->upper)) { + if (tipc_subscrp_check_overlap(s, sseq->lower, sseq->upper)) { struct publication *crs; struct name_info *info = sseq->info; int must_report = 1; list_for_each_entry(crs, &info->zone_list, zone_list) { - tipc_subscr_report_overlap(s, - sseq->lower, - sseq->upper, - TIPC_PUBLISHED, - crs->ref, - crs->node, - must_report); + tipc_subscrp_report_overlap(s, sseq->lower, + sseq->upper, + TIPC_PUBLISHED, + crs->ref, crs->node, + must_report); must_report = 0; } } diff --git a/net/tipc/net.c b/net/tipc/net.c index a54f3cbe2246..d6d1399ae229 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -40,6 +40,7 @@ #include "subscr.h" #include "socket.h" #include "node.h" +#include "bcast.h" static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index ce9121e8e990..53e0fee80086 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -55,6 +55,7 @@ struct tipc_nl_compat_msg { int rep_type; int rep_size; int req_type; + struct net *net; struct sk_buff *rep; struct tlv_desc *req; struct sock *dst_sk; @@ -68,7 +69,8 @@ struct tipc_nl_compat_cmd_dump { struct tipc_nl_compat_cmd_doit { int (*doit)(struct sk_buff *skb, struct genl_info *info); - int (*transcode)(struct sk_buff *skb, struct tipc_nl_compat_msg *msg); + int (*transcode)(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg); }; static int tipc_skb_tailroom(struct sk_buff *skb) @@ -281,7 +283,7 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, if (!trans_buf) return -ENOMEM; - err = (*cmd->transcode)(trans_buf, msg); + err = (*cmd->transcode)(cmd, trans_buf, msg); if (err) goto trans_out; @@ -353,7 +355,8 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, nla_len(bearer[TIPC_NLA_BEARER_NAME])); } -static int tipc_nl_compat_bearer_enable(struct sk_buff *skb, +static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg) { struct nlattr *prop; @@ -385,7 +388,8 @@ static int tipc_nl_compat_bearer_enable(struct sk_buff *skb, return 0; } -static int tipc_nl_compat_bearer_disable(struct sk_buff *skb, +static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg) { char *name; @@ -576,11 +580,81 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, &link_info, sizeof(link_info)); } -static int tipc_nl_compat_link_set(struct sk_buff *skb, - struct tipc_nl_compat_msg *msg) +static int __tipc_add_link_prop(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg, + struct tipc_link_config *lc) +{ + switch (msg->cmd) { + case TIPC_CMD_SET_LINK_PRI: + return nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value)); + case TIPC_CMD_SET_LINK_TOL: + return nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value)); + case TIPC_CMD_SET_LINK_WINDOW: + return nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value)); + } + + return -EINVAL; +} + +static int tipc_nl_compat_media_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) { - struct nlattr *link; struct nlattr *prop; + struct nlattr *media; + struct tipc_link_config *lc; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + media = nla_nest_start(skb, TIPC_NLA_MEDIA); + if (!media) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start(skb, TIPC_NLA_MEDIA_PROP); + if (!prop) + return -EMSGSIZE; + + __tipc_add_link_prop(skb, msg, lc); + nla_nest_end(skb, prop); + nla_nest_end(skb, media); + + return 0; +} + +static int tipc_nl_compat_bearer_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *bearer; + struct tipc_link_config *lc; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + if (!prop) + return -EMSGSIZE; + + __tipc_add_link_prop(skb, msg, lc); + nla_nest_end(skb, prop); + nla_nest_end(skb, bearer); + + return 0; +} + +static int __tipc_nl_compat_link_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *link; struct tipc_link_config *lc; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -596,24 +670,40 @@ static int tipc_nl_compat_link_set(struct sk_buff *skb, if (!prop) return -EMSGSIZE; - if (msg->cmd == TIPC_CMD_SET_LINK_PRI) { - if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value))) - return -EMSGSIZE; - } else if (msg->cmd == TIPC_CMD_SET_LINK_TOL) { - if (nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value))) - return -EMSGSIZE; - } else if (msg->cmd == TIPC_CMD_SET_LINK_WINDOW) { - if (nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value))) - return -EMSGSIZE; - } - + __tipc_add_link_prop(skb, msg, lc); nla_nest_end(skb, prop); nla_nest_end(skb, link); return 0; } -static int tipc_nl_compat_link_reset_stats(struct sk_buff *skb, +static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct tipc_link_config *lc; + struct tipc_bearer *bearer; + struct tipc_media *media; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + media = tipc_media_find(lc->name); + if (media) { + cmd->doit = &tipc_nl_media_set; + return tipc_nl_compat_media_set(skb, msg); + } + + bearer = tipc_bearer_find(msg->net, lc->name); + if (bearer) { + cmd->doit = &tipc_nl_bearer_set; + return tipc_nl_compat_bearer_set(skb, msg); + } + + return __tipc_nl_compat_link_set(skb, msg); +} + +static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg) { char *name; @@ -851,7 +941,8 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, sizeof(node_info)); } -static int tipc_nl_compat_net_set(struct sk_buff *skb, +static int tipc_nl_compat_net_set(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg) { u32 val; @@ -1007,7 +1098,6 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) struct nlmsghdr *req_nlh; struct nlmsghdr *rep_nlh; struct tipc_genlmsghdr *req_userhdr = info->userhdr; - struct net *net = genl_info_net(info); memset(&msg, 0, sizeof(msg)); @@ -1015,6 +1105,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; msg.cmd = req_userhdr->cmd; msg.dst_sk = info->dst_sk; + msg.net = genl_info_net(info); if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN); @@ -1030,7 +1121,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) } err = tipc_nl_compat_handle(&msg); - if (err == -EOPNOTSUPP) + if ((err == -EOPNOTSUPP) || (err == -EPERM)) msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); else if (err == -EINVAL) msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR); @@ -1043,7 +1134,7 @@ send: rep_nlh = nlmsg_hdr(msg.rep); memcpy(rep_nlh, info->nlhdr, len); rep_nlh->nlmsg_len = msg.rep->len; - genlmsg_unicast(net, msg.rep, NETLINK_CB(skb).portid); + genlmsg_unicast(msg.net, msg.rep, NETLINK_CB(skb).portid); return err; } diff --git a/net/tipc/node.c b/net/tipc/node.c index 22c059ad2999..0b1d61a5f853 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1,7 +1,7 @@ /* * net/tipc/node.c: TIPC node management routines * - * Copyright (c) 2000-2006, 2012-2014, Ericsson AB + * Copyright (c) 2000-2006, 2012-2015, Ericsson AB * Copyright (c) 2005-2006, 2010-2014, Wind River Systems * All rights reserved. * @@ -39,6 +39,7 @@ #include "node.h" #include "name_distr.h" #include "socket.h" +#include "bcast.h" static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); diff --git a/net/tipc/node.h b/net/tipc/node.h index 02d5c20dc551..5a834cf142c8 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -45,8 +45,6 @@ /* Out-of-range value for node signature */ #define INVALID_NODE_SIG 0x10000 -#define NODE_HTABLE_SIZE 512 - /* Flags used to take different actions according to flag type * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down diff --git a/net/tipc/server.c b/net/tipc/server.c index 77ff03ed1e18..922e04a43396 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -309,6 +309,10 @@ static int tipc_accept_from_sock(struct tipc_conn *con) /* Notify that new connection is incoming */ newcon->usr_data = s->tipc_conn_new(newcon->conid); + if (!newcon->usr_data) { + sock_release(newsock); + return -ENOMEM; + } /* Wake up receive process in case of 'SYN+' message */ newsock->sk->sk_data_ready(newsock->sk); @@ -321,7 +325,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) struct socket *sock = NULL; int ret; - ret = __sock_create(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock, 1); + ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock); if (ret < 0) return NULL; ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9074b5cede38..9370f953e16f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -41,6 +41,7 @@ #include "link.h" #include "name_distr.h" #include "socket.h" +#include "bcast.h" #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ @@ -342,7 +343,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, } /* Allocate socket's protocol area */ - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 1c147c869c2e..350cca33ee0a 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -40,16 +40,21 @@ /** * struct tipc_subscriber - TIPC network topology subscriber + * @kref: reference counter to tipc_subscription object * @conid: connection identifier to server connecting to subscriber * @lock: control access to subscriber - * @subscription_list: list of subscription objects for this subscriber + * @subscrp_list: list of subscription objects for this subscriber */ struct tipc_subscriber { + struct kref kref; int conid; spinlock_t lock; - struct list_head subscription_list; + struct list_head subscrp_list; }; +static void tipc_subscrp_delete(struct tipc_subscription *sub); +static void tipc_subscrb_put(struct tipc_subscriber *subscriber); + /** * htohl - convert value to endianness used by destination * @in: value to convert @@ -62,9 +67,9 @@ static u32 htohl(u32 in, int swap) return swap ? swab32(in) : in; } -static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper, u32 event, u32 port_ref, - u32 node) +static void tipc_subscrp_send_event(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port_ref, u32 node) { struct tipc_net *tn = net_generic(sub->net, tipc_net_id); struct tipc_subscriber *subscriber = sub->subscriber; @@ -82,12 +87,13 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, } /** - * tipc_subscr_overlap - test for subscription overlap with the given values + * tipc_subscrp_check_overlap - test for subscription overlap with the + * given values * * Returns 1 if there is overlap, otherwise 0. */ -int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper) +int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper) { if (found_lower < sub->seq.lower) found_lower = sub->seq.lower; @@ -98,138 +104,121 @@ int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, return 1; } -/** - * tipc_subscr_report_overlap - issue event if there is subscription overlap - * - * Protected by nameseq.lock in name_table.c - */ -void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper, u32 event, u32 port_ref, - u32 node, int must) +void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must) { - if (!tipc_subscr_overlap(sub, found_lower, found_upper)) + if (!tipc_subscrp_check_overlap(sub, found_lower, found_upper)) return; if (!must && !(sub->filter & TIPC_SUB_PORTS)) return; - subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); + tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, + node); } -static void subscr_timeout(unsigned long data) +static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; struct tipc_subscriber *subscriber = sub->subscriber; - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); - /* The spin lock per subscriber is used to protect its members */ - spin_lock_bh(&subscriber->lock); + /* Notify subscriber of timeout */ + tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, + TIPC_SUBSCR_TIMEOUT, 0, 0); - /* Validate timeout (in case subscription is being cancelled) */ - if (sub->timeout == TIPC_WAIT_FOREVER) { - spin_unlock_bh(&subscriber->lock); - return; - } + spin_lock_bh(&subscriber->lock); + tipc_subscrp_delete(sub); + spin_unlock_bh(&subscriber->lock); - /* Unlink subscription from name table */ - tipc_nametbl_unsubscribe(sub); + tipc_subscrb_put(subscriber); +} - /* Unlink subscription from subscriber */ - list_del(&sub->subscription_list); +static void tipc_subscrb_kref_release(struct kref *kref) +{ + struct tipc_subscriber *subcriber = container_of(kref, + struct tipc_subscriber, kref); - spin_unlock_bh(&subscriber->lock); + kfree(subcriber); +} - /* Notify subscriber of timeout */ - subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, - TIPC_SUBSCR_TIMEOUT, 0, 0); +static void tipc_subscrb_put(struct tipc_subscriber *subscriber) +{ + kref_put(&subscriber->kref, tipc_subscrb_kref_release); +} - /* Now destroy subscription */ - kfree(sub); - atomic_dec(&tn->subscription_count); +static void tipc_subscrb_get(struct tipc_subscriber *subscriber) +{ + kref_get(&subscriber->kref); } -/** - * subscr_del - delete a subscription within a subscription list - * - * Called with subscriber lock held. - */ -static void subscr_del(struct tipc_subscription *sub) +static struct tipc_subscriber *tipc_subscrb_create(int conid) { - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + struct tipc_subscriber *subscriber; - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscription_list); - kfree(sub); - atomic_dec(&tn->subscription_count); + subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC); + if (!subscriber) { + pr_warn("Subscriber rejected, no memory\n"); + return NULL; + } + kref_init(&subscriber->kref); + INIT_LIST_HEAD(&subscriber->subscrp_list); + subscriber->conid = conid; + spin_lock_init(&subscriber->lock); + + return subscriber; } -static void subscr_release(struct tipc_subscriber *subscriber) +static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) { - struct tipc_subscription *sub; - struct tipc_subscription *sub_temp; + struct tipc_subscription *sub, *temp; spin_lock_bh(&subscriber->lock); - /* Destroy any existing subscriptions for subscriber */ - list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, - subscription_list) { - if (sub->timeout != TIPC_WAIT_FOREVER) { - spin_unlock_bh(&subscriber->lock); - del_timer_sync(&sub->timer); - spin_lock_bh(&subscriber->lock); + list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, + subscrp_list) { + if (del_timer(&sub->timer)) { + tipc_subscrp_delete(sub); + tipc_subscrb_put(subscriber); } - subscr_del(sub); } spin_unlock_bh(&subscriber->lock); - /* Now destroy subscriber */ - kfree(subscriber); + tipc_subscrb_put(subscriber); } -/** - * subscr_cancel - handle subscription cancellation request - * - * Called with subscriber lock held. Routine must temporarily release lock - * to enable the subscription timeout routine to finish without deadlocking; - * the lock is then reclaimed to allow caller to release it upon return. - * - * Note that fields of 's' use subscriber's endianness! - */ -static void subscr_cancel(struct tipc_subscr *s, - struct tipc_subscriber *subscriber) +static void tipc_subscrp_delete(struct tipc_subscription *sub) { - struct tipc_subscription *sub; - struct tipc_subscription *sub_temp; - int found = 0; + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + + tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); + kfree(sub); + atomic_dec(&tn->subscription_count); +} +static void tipc_subscrp_cancel(struct tipc_subscr *s, + struct tipc_subscriber *subscriber) +{ + struct tipc_subscription *sub, *temp; + + spin_lock_bh(&subscriber->lock); /* Find first matching subscription, exit if not found */ - list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, - subscription_list) { + list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, + subscrp_list) { if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { - found = 1; + if (del_timer(&sub->timer)) { + tipc_subscrp_delete(sub); + tipc_subscrb_put(subscriber); + } break; } } - if (!found) - return; - - /* Cancel subscription timer (if used), then delete subscription */ - if (sub->timeout != TIPC_WAIT_FOREVER) { - sub->timeout = TIPC_WAIT_FOREVER; - spin_unlock_bh(&subscriber->lock); - del_timer_sync(&sub->timer); - spin_lock_bh(&subscriber->lock); - } - subscr_del(sub); + spin_unlock_bh(&subscriber->lock); } -/** - * subscr_subscribe - create subscription for subscriber - * - * Called with subscriber lock held. - */ -static int subscr_subscribe(struct net *net, struct tipc_subscr *s, - struct tipc_subscriber *subscriber, - struct tipc_subscription **sub_p) +static int tipc_subscrp_create(struct net *net, struct tipc_subscr *s, + struct tipc_subscriber *subscriber, + struct tipc_subscription **sub_p) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_subscription *sub; @@ -241,7 +230,7 @@ static int subscr_subscribe(struct net *net, struct tipc_subscr *s, /* Detect & process a subscription cancellation request */ if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); - subscr_cancel(s, subscriber); + tipc_subscrp_cancel(s, subscriber); return 0; } @@ -273,62 +262,51 @@ static int subscr_subscribe(struct net *net, struct tipc_subscr *s, kfree(sub); return -EINVAL; } - list_add(&sub->subscription_list, &subscriber->subscription_list); + spin_lock_bh(&subscriber->lock); + list_add(&sub->subscrp_list, &subscriber->subscrp_list); + spin_unlock_bh(&subscriber->lock); sub->subscriber = subscriber; sub->swap = swap; - memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); + memcpy(&sub->evt.s, s, sizeof(*s)); atomic_inc(&tn->subscription_count); - if (sub->timeout != TIPC_WAIT_FOREVER) { - setup_timer(&sub->timer, subscr_timeout, (unsigned long)sub); - mod_timer(&sub->timer, jiffies + sub->timeout); - } + setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); + if (sub->timeout != TIPC_WAIT_FOREVER) + sub->timeout += jiffies; + if (!mod_timer(&sub->timer, sub->timeout)) + tipc_subscrb_get(subscriber); *sub_p = sub; return 0; } /* Handle one termination request for the subscriber */ -static void subscr_conn_shutdown_event(int conid, void *usr_data) +static void tipc_subscrb_shutdown_cb(int conid, void *usr_data) { - subscr_release((struct tipc_subscriber *)usr_data); + tipc_subscrb_delete((struct tipc_subscriber *)usr_data); } /* Handle one request to create a new subscription for the subscriber */ -static void subscr_conn_msg_event(struct net *net, int conid, - struct sockaddr_tipc *addr, void *usr_data, - void *buf, size_t len) +static void tipc_subscrb_rcv_cb(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len) { struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub = NULL; struct tipc_net *tn = net_generic(net, tipc_net_id); - spin_lock_bh(&subscriber->lock); - subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub); + tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub); if (sub) tipc_nametbl_subscribe(sub); else tipc_conn_terminate(tn->topsrv, subscriber->conid); - spin_unlock_bh(&subscriber->lock); } /* Handle one request to establish a new subscriber */ -static void *subscr_named_msg_event(int conid) +static void *tipc_subscrb_connect_cb(int conid) { - struct tipc_subscriber *subscriber; - - /* Create subscriber object */ - subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC); - if (subscriber == NULL) { - pr_warn("Subscriber rejected, no memory\n"); - return NULL; - } - INIT_LIST_HEAD(&subscriber->subscription_list); - subscriber->conid = conid; - spin_lock_init(&subscriber->lock); - - return (void *)subscriber; + return (void *)tipc_subscrb_create(conid); } -int tipc_subscr_start(struct net *net) +int tipc_topsrv_start(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); const char name[] = "topology_server"; @@ -355,9 +333,9 @@ int tipc_subscr_start(struct net *net) topsrv->imp = TIPC_CRITICAL_IMPORTANCE; topsrv->type = SOCK_SEQPACKET; topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); - topsrv->tipc_conn_recvmsg = subscr_conn_msg_event; - topsrv->tipc_conn_new = subscr_named_msg_event; - topsrv->tipc_conn_shutdown = subscr_conn_shutdown_event; + topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb; + topsrv->tipc_conn_new = tipc_subscrb_connect_cb; + topsrv->tipc_conn_shutdown = tipc_subscrb_shutdown_cb; strncpy(topsrv->name, name, strlen(name) + 1); tn->topsrv = topsrv; @@ -366,7 +344,7 @@ int tipc_subscr_start(struct net *net) return tipc_server_start(topsrv); } -void tipc_subscr_stop(struct net *net) +void tipc_topsrv_stop(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_server *topsrv = tn->topsrv; diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 33488bd9fe3c..92ee18cc5fe6 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -54,7 +54,7 @@ struct tipc_subscriber; * @filter: event filtering to be done for subscription * @timer: timer governing subscription duration (optional) * @nameseq_list: adjacent subscriptions in name sequence's subscription list - * @subscription_list: adjacent subscriptions in subscriber's subscription list + * @subscrp_list: adjacent subscriptions in subscriber's subscription list * @server_ref: object reference of server port associated with subscription * @swap: indicates if subscriber uses opposite endianness in its messages * @evt: template for events generated by subscription @@ -67,17 +67,17 @@ struct tipc_subscription { u32 filter; struct timer_list timer; struct list_head nameseq_list; - struct list_head subscription_list; + struct list_head subscrp_list; int swap; struct tipc_event evt; }; -int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper); -void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper, u32 event, u32 port_ref, - u32 node, int must); -int tipc_subscr_start(struct net *net); -void tipc_subscr_stop(struct net *net); +int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper); +void tipc_subscrp_report_overlap(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, u32 event, + u32 port_ref, u32 node, int must); +int tipc_topsrv_start(struct net *net); +void tipc_topsrv_stop(struct net *net); #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5266ea7b922b..941b3d26e3bf 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -620,7 +620,7 @@ static struct proto unix_proto = { */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock *unix_create1(struct net *net, struct socket *sock) +static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) { struct sock *sk = NULL; struct unix_sock *u; @@ -629,7 +629,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) goto out; - sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); if (!sk) goto out; @@ -688,7 +688,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - return unix_create1(net, sock) ? 0 : -ENOMEM; + return unix_create1(net, sock, kern) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) @@ -1088,7 +1088,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(sock_net(sk), NULL); + newsk = unix_create1(sock_net(sk), NULL, 0); if (newsk == NULL) goto out; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2ec86e652a19..df5fc6b340f1 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -581,13 +581,14 @@ struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, gfp_t priority, - unsigned short type) + unsigned short type, + int kern) { struct sock *sk; struct vsock_sock *psk; struct vsock_sock *vsk; - sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); + sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto, kern); if (!sk) return NULL; @@ -1866,7 +1867,7 @@ static int vsock_create(struct net *net, struct socket *sock, sock->state = SS_UNCONNECTED; - return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; + return __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern) ? 0 : -ENOMEM; } static const struct net_proto_family vsock_family_ops = { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index c294da095461..1f63daff3965 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1022,7 +1022,7 @@ static int vmci_transport_recv_listen(struct sock *sk, } pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, - sk->sk_type); + sk->sk_type, 0); if (!pending) { vmci_transport_send_reset(sk, pkt); return -ENOMEM; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 7aaf7415dc4c..915b328b9ac5 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -698,19 +698,20 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, EXPORT_SYMBOL(cfg80211_chandef_usable); /* - * For GO only, check if the channel can be used under permissive conditions - * mandated by the some regulatory bodies, i.e., the channel is marked with - * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface + * Check if the channel can be used under permissive conditions mandated by + * some regulatory bodies, i.e., the channel is marked with + * IEEE80211_CHAN_IR_CONCURRENT and there is an additional station interface * associated to an AP on the same channel or on the same UNII band * (assuming that the AP is an authorized master). - * In addition allow the GO to operate on a channel on which indoor operation is + * In addition allow operation on a channel on which indoor operation is * allowed, iff we are currently operating in an indoor environment. */ -static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, +static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, + enum nl80211_iftype iftype, struct ieee80211_channel *chan) { - struct wireless_dev *wdev_iter; - struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx); + struct wireless_dev *wdev; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); ASSERT_RTNL(); @@ -718,32 +719,48 @@ static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)) return false; + /* only valid for GO and TDLS off-channel (station/p2p-CL) */ + if (iftype != NL80211_IFTYPE_P2P_GO && + iftype != NL80211_IFTYPE_STATION && + iftype != NL80211_IFTYPE_P2P_CLIENT) + return false; + if (regulatory_indoor_allowed() && (chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) return true; - if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT)) + if (!(chan->flags & IEEE80211_CHAN_IR_CONCURRENT)) return false; /* * Generally, it is possible to rely on another device/driver to allow - * the GO concurrent relaxation, however, since the device can further + * the IR concurrent relaxation, however, since the device can further * enforce the relaxation (by doing a similar verifications as this), * and thus fail the GO instantiation, consider only the interfaces of * the current registered device. */ - list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { struct ieee80211_channel *other_chan = NULL; int r1, r2; - if (wdev_iter->iftype != NL80211_IFTYPE_STATION || - !netif_running(wdev_iter->netdev)) - continue; - - wdev_lock(wdev_iter); - if (wdev_iter->current_bss) - other_chan = wdev_iter->current_bss->pub.channel; - wdev_unlock(wdev_iter); + wdev_lock(wdev); + if (wdev->iftype == NL80211_IFTYPE_STATION && + wdev->current_bss) + other_chan = wdev->current_bss->pub.channel; + + /* + * If a GO already operates on the same GO_CONCURRENT channel, + * this one (maybe the same one) can beacon as well. We allow + * the operation even if the station we relied on with + * GO_CONCURRENT is disconnected now. But then we must make sure + * we're not outdoor on an indoor-only channel. + */ + if (iftype == NL80211_IFTYPE_P2P_GO && + wdev->iftype == NL80211_IFTYPE_P2P_GO && + wdev->beacon_interval && + !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) + other_chan = wdev->chandef.chan; + wdev_unlock(wdev); if (!other_chan) continue; @@ -784,7 +801,6 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype) { - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); bool res; u32 prohibited_flags = IEEE80211_CHAN_DISABLED | IEEE80211_CHAN_RADAR; @@ -792,13 +808,12 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype); /* - * Under certain conditions suggested by the some regulatory bodies - * a GO can operate on channels marked with IEEE80211_NO_IR - * so set this flag only if such relaxations are not enabled and - * the conditions are not met. + * Under certain conditions suggested by some regulatory bodies a + * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag + * only if such relaxations are not enabled and the conditions are not + * met. */ - if (iftype != NL80211_IFTYPE_P2P_GO || - !cfg80211_go_permissive_chan(rdev, chandef->chan)) + if (!cfg80211_ir_permissive_chan(wiphy, iftype, chandef->chan)) prohibited_flags |= IEEE80211_CHAN_NO_IR; if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 && diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dd78445c7d50..c264effd00a6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -639,8 +639,8 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT)) + if ((chan->flags & IEEE80211_CHAN_IR_CONCURRENT) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_IR_CONCURRENT)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ)) @@ -4061,7 +4061,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy, return -EINVAL; break; case CFG80211_STA_MESH_PEER_USER: - if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) + if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION && + params->plink_action != NL80211_PLINK_ACTION_BLOCK) return -EINVAL; break; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0e347f888fe9..d359e0610198 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -989,8 +989,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_OFDM; if (rd_flags & NL80211_RRF_NO_OUTDOOR) channel_flags |= IEEE80211_CHAN_INDOOR_ONLY; - if (rd_flags & NL80211_RRF_GO_CONCURRENT) - channel_flags |= IEEE80211_CHAN_GO_CONCURRENT; + if (rd_flags & NL80211_RRF_IR_CONCURRENT) + channel_flags |= IEEE80211_CHAN_IR_CONCURRENT; if (rd_flags & NL80211_RRF_NO_HT40MINUS) channel_flags |= IEEE80211_CHAN_NO_HT40MINUS; if (rd_flags & NL80211_RRF_NO_HT40PLUS) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index c3ab230e4493..a750f330b8dd 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -515,10 +515,10 @@ static struct proto x25_proto = { .obj_size = sizeof(struct x25_sock), }; -static struct sock *x25_alloc_socket(struct net *net) +static struct sock *x25_alloc_socket(struct net *net, int kern) { struct x25_sock *x25; - struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto); + struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, kern); if (!sk) goto out; @@ -553,7 +553,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol, goto out; rc = -ENOBUFS; - if ((sk = x25_alloc_socket(net)) == NULL) + if ((sk = x25_alloc_socket(net, kern)) == NULL) goto out; x25 = x25_sk(sk); @@ -602,7 +602,7 @@ static struct sock *x25_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) goto out; - if ((sk = x25_alloc_socket(sock_net(osk))) == NULL) + if ((sk = x25_alloc_socket(sock_net(osk), 0)) == NULL) goto out; x25 = x25_sk(sk); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index fbcedbe33190..68ada2ca4b60 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -38,6 +38,18 @@ static int xfrm_skb_check_space(struct sk_buff *skb) return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); } +/* Children define the path of the packet through the + * Linux networking. Thus, destinations are stackable. + */ + +static struct dst_entry *skb_dst_pop(struct sk_buff *skb) +{ + struct dst_entry *child = dst_clone(skb_dst(skb)->child); + + skb_dst_drop(skb); + return child; +} + static int xfrm_output_one(struct sk_buff *skb, int err) { struct dst_entry *dst = skb_dst(skb); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 76e3458a5419..46c6a8cf74d3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -6,29 +6,35 @@ hostprogs-y := test_verifier test_maps hostprogs-y += sock_example hostprogs-y += sockex1 hostprogs-y += sockex2 +hostprogs-y += sockex3 hostprogs-y += tracex1 hostprogs-y += tracex2 hostprogs-y += tracex3 hostprogs-y += tracex4 +hostprogs-y += tracex5 test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o sock_example-objs := sock_example.o libbpf.o sockex1-objs := bpf_load.o libbpf.o sockex1_user.o sockex2-objs := bpf_load.o libbpf.o sockex2_user.o +sockex3-objs := bpf_load.o libbpf.o sockex3_user.o tracex1-objs := bpf_load.o libbpf.o tracex1_user.o tracex2-objs := bpf_load.o libbpf.o tracex2_user.o tracex3-objs := bpf_load.o libbpf.o tracex3_user.o tracex4-objs := bpf_load.o libbpf.o tracex4_user.o +tracex5-objs := bpf_load.o libbpf.o tracex5_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) always += sockex1_kern.o always += sockex2_kern.o +always += sockex3_kern.o always += tracex1_kern.o always += tracex2_kern.o always += tracex3_kern.o always += tracex4_kern.o +always += tracex5_kern.o always += tcbpf1_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -36,15 +42,17 @@ HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable HOSTLOADLIBES_sockex1 += -lelf HOSTLOADLIBES_sockex2 += -lelf +HOSTLOADLIBES_sockex3 += -lelf HOSTLOADLIBES_tracex1 += -lelf HOSTLOADLIBES_tracex2 += -lelf HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt +HOSTLOADLIBES_tracex5 += -lelf # point this to your LLVM backend with bpf support LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc -%.o: %.c +$(obj)/%.o: $(src)/%.c clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index f960b5fb3ed8..f531a0b3282d 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -21,6 +21,10 @@ static unsigned long long (*bpf_ktime_get_ns)(void) = (void *) BPF_FUNC_ktime_get_ns; static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = (void *) BPF_FUNC_trace_printk; +static void (*bpf_tail_call)(void *ctx, void *map, int index) = + (void *) BPF_FUNC_tail_call; +static unsigned long long (*bpf_get_smp_processor_id)(void) = + (void *) BPF_FUNC_get_smp_processor_id; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 38dac5a53b51..da86a8e0a95a 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -16,6 +16,7 @@ #include <sys/ioctl.h> #include <sys/mman.h> #include <poll.h> +#include <ctype.h> #include "libbpf.h" #include "bpf_helpers.h" #include "bpf_load.h" @@ -29,6 +30,19 @@ int map_fd[MAX_MAPS]; int prog_fd[MAX_PROGS]; int event_fd[MAX_PROGS]; int prog_cnt; +int prog_array_fd = -1; + +static int populate_prog_array(const char *event, int prog_fd) +{ + int ind = atoi(event), err; + + err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); + if (err < 0) { + printf("failed to store prog_fd in prog_array\n"); + return -1; + } + return 0; +} static int load_and_attach(const char *event, struct bpf_insn *prog, int size) { @@ -54,12 +68,40 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return -1; } + fd = bpf_prog_load(prog_type, prog, size, license, kern_version); + if (fd < 0) { + printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); + return -1; + } + + prog_fd[prog_cnt++] = fd; + + if (is_socket) { + event += 6; + if (*event != '/') + return 0; + event++; + if (!isdigit(*event)) { + printf("invalid prog number\n"); + return -1; + } + return populate_prog_array(event, fd); + } + if (is_kprobe || is_kretprobe) { if (is_kprobe) event += 7; else event += 10; + if (*event == 0) { + printf("event name cannot be empty\n"); + return -1; + } + + if (isdigit(*event)) + return populate_prog_array(event, fd); + snprintf(buf, sizeof(buf), "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", is_kprobe ? 'p' : 'r', event, event); @@ -71,18 +113,6 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) } } - fd = bpf_prog_load(prog_type, prog, size, license, kern_version); - - if (fd < 0) { - printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); - return -1; - } - - prog_fd[prog_cnt++] = fd; - - if (is_socket) - return 0; - strcpy(buf, DEBUGFS); strcat(buf, "events/kprobes/"); strcat(buf, event); @@ -130,6 +160,9 @@ static int load_maps(struct bpf_map_def *maps, int len) maps[i].max_entries); if (map_fd[i] < 0) return 1; + + if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) + prog_array_fd = map_fd[i]; } return 0; } diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c new file mode 100644 index 000000000000..2625b987944f --- /dev/null +++ b/samples/bpf/sockex3_kern.c @@ -0,0 +1,303 @@ +/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" +#include <uapi/linux/in.h> +#include <uapi/linux/if.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/ipv6.h> +#include <uapi/linux/if_tunnel.h> +#include <uapi/linux/mpls.h> +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF + +#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F + +struct bpf_map_def SEC("maps") jmp_table = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 8, +}; + +#define PARSE_VLAN 1 +#define PARSE_MPLS 2 +#define PARSE_IP 3 +#define PARSE_IPV6 4 + +/* protocol dispatch routine. + * It tail-calls next BPF program depending on eth proto + * Note, we could have used: + * bpf_tail_call(skb, &jmp_table, proto); + * but it would need large prog_array + */ +static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) +{ + switch (proto) { + case ETH_P_8021Q: + case ETH_P_8021AD: + bpf_tail_call(skb, &jmp_table, PARSE_VLAN); + break; + case ETH_P_MPLS_UC: + case ETH_P_MPLS_MC: + bpf_tail_call(skb, &jmp_table, PARSE_MPLS); + break; + case ETH_P_IP: + bpf_tail_call(skb, &jmp_table, PARSE_IP); + break; + case ETH_P_IPV6: + bpf_tail_call(skb, &jmp_table, PARSE_IPV6); + break; + } +} + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct flow_keys { + __be32 src; + __be32 dst; + union { + __be32 ports; + __be16 port16[2]; + }; + __u32 ip_proto; +}; + +static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff) +{ + return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) + & (IP_MF | IP_OFFSET); +} + +static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) +{ + __u64 w0 = load_word(ctx, off); + __u64 w1 = load_word(ctx, off + 4); + __u64 w2 = load_word(ctx, off + 8); + __u64 w3 = load_word(ctx, off + 12); + + return (__u32)(w0 ^ w1 ^ w2 ^ w3); +} + +struct globals { + struct flow_keys flow; + __u32 nhoff; +}; + +struct bpf_map_def SEC("maps") percpu_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct globals), + .max_entries = 32, +}; + +/* user poor man's per_cpu until native support is ready */ +static struct globals *this_cpu_globals(void) +{ + u32 key = bpf_get_smp_processor_id(); + + return bpf_map_lookup_elem(&percpu_map, &key); +} + +/* some simple stats for user space consumption */ +struct pair { + __u64 packets; + __u64 bytes; +}; + +struct bpf_map_def SEC("maps") hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct flow_keys), + .value_size = sizeof(struct pair), + .max_entries = 1024, +}; + +static void update_stats(struct __sk_buff *skb, struct globals *g) +{ + struct flow_keys key = g->flow; + struct pair *value; + + value = bpf_map_lookup_elem(&hash_map, &key); + if (value) { + __sync_fetch_and_add(&value->packets, 1); + __sync_fetch_and_add(&value->bytes, skb->len); + } else { + struct pair val = {1, skb->len}; + + bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); + } +} + +static __always_inline void parse_ip_proto(struct __sk_buff *skb, + struct globals *g, __u32 ip_proto) +{ + __u32 nhoff = g->nhoff; + int poff; + + switch (ip_proto) { + case IPPROTO_GRE: { + struct gre_hdr { + __be16 flags; + __be16 proto; + }; + + __u32 gre_flags = load_half(skb, + nhoff + offsetof(struct gre_hdr, flags)); + __u32 gre_proto = load_half(skb, + nhoff + offsetof(struct gre_hdr, proto)); + + if (gre_flags & (GRE_VERSION|GRE_ROUTING)) + break; + + nhoff += 4; + if (gre_flags & GRE_CSUM) + nhoff += 4; + if (gre_flags & GRE_KEY) + nhoff += 4; + if (gre_flags & GRE_SEQ) + nhoff += 4; + + g->nhoff = nhoff; + parse_eth_proto(skb, gre_proto); + break; + } + case IPPROTO_IPIP: + parse_eth_proto(skb, ETH_P_IP); + break; + case IPPROTO_IPV6: + parse_eth_proto(skb, ETH_P_IPV6); + break; + case IPPROTO_TCP: + case IPPROTO_UDP: + g->flow.ports = load_word(skb, nhoff); + case IPPROTO_ICMP: + g->flow.ip_proto = ip_proto; + update_stats(skb, g); + break; + default: + break; + } +} + +PROG(PARSE_IP)(struct __sk_buff *skb) +{ + struct globals *g = this_cpu_globals(); + __u32 nhoff, verlen, ip_proto; + + if (!g) + return 0; + + nhoff = g->nhoff; + + if (unlikely(ip_is_fragment(skb, nhoff))) + return 0; + + ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); + + if (ip_proto != IPPROTO_GRE) { + g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); + g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); + } + + verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); + nhoff += (verlen & 0xF) << 2; + + g->nhoff = nhoff; + parse_ip_proto(skb, g, ip_proto); + return 0; +} + +PROG(PARSE_IPV6)(struct __sk_buff *skb) +{ + struct globals *g = this_cpu_globals(); + __u32 nhoff, ip_proto; + + if (!g) + return 0; + + nhoff = g->nhoff; + + ip_proto = load_byte(skb, + nhoff + offsetof(struct ipv6hdr, nexthdr)); + g->flow.src = ipv6_addr_hash(skb, + nhoff + offsetof(struct ipv6hdr, saddr)); + g->flow.dst = ipv6_addr_hash(skb, + nhoff + offsetof(struct ipv6hdr, daddr)); + nhoff += sizeof(struct ipv6hdr); + + g->nhoff = nhoff; + parse_ip_proto(skb, g, ip_proto); + return 0; +} + +PROG(PARSE_VLAN)(struct __sk_buff *skb) +{ + struct globals *g = this_cpu_globals(); + __u32 nhoff, proto; + + if (!g) + return 0; + + nhoff = g->nhoff; + + proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nhoff += sizeof(struct vlan_hdr); + g->nhoff = nhoff; + + parse_eth_proto(skb, proto); + + return 0; +} + +PROG(PARSE_MPLS)(struct __sk_buff *skb) +{ + struct globals *g = this_cpu_globals(); + __u32 nhoff, label; + + if (!g) + return 0; + + nhoff = g->nhoff; + + label = load_word(skb, nhoff); + nhoff += sizeof(struct mpls_label); + g->nhoff = nhoff; + + if (label & MPLS_LS_S_MASK) { + __u8 verlen = load_byte(skb, nhoff); + if ((verlen & 0xF0) == 4) + parse_eth_proto(skb, ETH_P_IP); + else + parse_eth_proto(skb, ETH_P_IPV6); + } else { + parse_eth_proto(skb, ETH_P_MPLS_UC); + } + + return 0; +} + +SEC("socket/0") +int main_prog(struct __sk_buff *skb) +{ + struct globals *g = this_cpu_globals(); + __u32 nhoff = ETH_HLEN; + __u32 proto = load_half(skb, 12); + + if (!g) + return 0; + + g->nhoff = nhoff; + parse_eth_proto(skb, proto); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c new file mode 100644 index 000000000000..2617772d060d --- /dev/null +++ b/samples/bpf/sockex3_user.c @@ -0,0 +1,66 @@ +#include <stdio.h> +#include <assert.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <unistd.h> +#include <arpa/inet.h> + +struct flow_keys { + __be32 src; + __be32 dst; + union { + __be32 ports; + __be16 port16[2]; + }; + __u32 ip_proto; +}; + +struct pair { + __u64 packets; + __u64 bytes; +}; + +int main(int argc, char **argv) +{ + char filename[256]; + FILE *f; + int i, sock; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + sock = open_raw_sock("lo"); + + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4], + sizeof(__u32)) == 0); + + if (argc > 1) + f = popen("ping -c5 localhost", "r"); + else + f = popen("netperf -l 4 localhost", "r"); + (void) f; + + for (i = 0; i < 5; i++) { + struct flow_keys key = {}, next_key; + struct pair value; + + sleep(1); + printf("IP src.port -> dst.port bytes packets\n"); + while (bpf_get_next_key(map_fd[2], &key, &next_key) == 0) { + bpf_lookup_elem(map_fd[2], &next_key, &value); + printf("%s.%05d -> %s.%05d %12lld %12lld\n", + inet_ntoa((struct in_addr){htonl(next_key.src)}), + next_key.port16[0], + inet_ntoa((struct in_addr){htonl(next_key.dst)}), + next_key.port16[1], + value.bytes, value.packets); + key = next_key; + } + } + return 0; +} diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c new file mode 100644 index 000000000000..b71fe07a7a7a --- /dev/null +++ b/samples/bpf/tracex5_kern.c @@ -0,0 +1,75 @@ +/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/ptrace.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include <uapi/linux/seccomp.h> +#include "bpf_helpers.h" + +#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F + +struct bpf_map_def SEC("maps") progs = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1024, +}; + +SEC("kprobe/seccomp_phase1") +int bpf_prog1(struct pt_regs *ctx) +{ + struct seccomp_data sd = {}; + + bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + + /* dispatch into next BPF program depending on syscall number */ + bpf_tail_call(ctx, &progs, sd.nr); + + /* fall through -> unknown syscall */ + if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) { + char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n"; + bpf_trace_printk(fmt, sizeof(fmt), sd.nr); + } + return 0; +} + +/* we jump here when syscall number == __NR_write */ +PROG(__NR_write)(struct pt_regs *ctx) +{ + struct seccomp_data sd = {}; + + bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + if (sd.args[2] == 512) { + char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), + sd.args[0], sd.args[1], sd.args[2]); + } + return 0; +} + +PROG(__NR_read)(struct pt_regs *ctx) +{ + struct seccomp_data sd = {}; + + bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + if (sd.args[2] > 128 && sd.args[2] <= 1024) { + char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), + sd.args[0], sd.args[1], sd.args[2]); + } + return 0; +} + +PROG(__NR_mmap)(struct pt_regs *ctx) +{ + char fmt[] = "mmap\n"; + bpf_trace_printk(fmt, sizeof(fmt)); + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c new file mode 100644 index 000000000000..a04dd3cd4358 --- /dev/null +++ b/samples/bpf/tracex5_user.c @@ -0,0 +1,46 @@ +#include <stdio.h> +#include <linux/bpf.h> +#include <unistd.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <sys/prctl.h> +#include "libbpf.h" +#include "bpf_load.h" + +/* install fake seccomp program to enable seccomp code path inside the kernel, + * so that our kprobe attached to seccomp_phase1() can be triggered + */ +static void install_accept_all_seccomp(void) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), + .filter = filter, + }; + if (prctl(PR_SET_SECCOMP, 2, &prog)) + perror("prctl"); +} + +int main(int ac, char **argv) +{ + FILE *f; + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + install_accept_all_seccomp(); + + f = popen("dd if=/dev/zero of=/dev/null count=5", "r"); + (void) f; + + read_trace_pipe(); + + return 0; +} diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst new file mode 100644 index 000000000000..8365c4e5c513 --- /dev/null +++ b/samples/pktgen/README.rst @@ -0,0 +1,43 @@ +Sample and benchmark scripts for pktgen (packet generator) +========================================================== +This directory contains some pktgen sample and benchmark scripts, that +can easily be copied and adjusted for your own use-case. + +General doc is located in kernel: Documentation/networking/pktgen.txt + +Helper include files +==================== +This directory contains two helper shell files, that can be "included" +by shell source'ing. Namely "functions.sh" and "parameters.sh". + +Common parameters +----------------- +The parameters.sh file support easy and consistant parameter parsing +across the sample scripts. Usage example is printed on errors:: + + Usage: ./pktgen_sample01_simple.sh [-vx] -i ethX + -i : ($DEV) output interface/device (required) + -s : ($PKT_SIZE) packet size + -d : ($DEST_IP) destination IP + -m : ($DST_MAC) destination MAC-addr + -t : ($THREADS) threads to start + -c : ($SKB_CLONE) SKB clones send before alloc new SKB + -b : ($BURST) HW level bursting of SKBs + -v : ($VERBOSE) verbose + -x : ($DEBUG) debug + +The global variable being set is also listed. E.g. the required +interface/device parameter "-i" sets variable $DEV. + +Common functions +---------------- +The functions.sh file provides; Three different shell functions for +configuring the different components of pktgen: pg_ctrl(), pg_thread() +and pg_set(). + +These functions correspond to pktgens different components. + * pg_ctrl() control "pgctrl" (/proc/net/pktgen/pgctrl) + * pg_thread() control the kernel threads and binding to devices + * pg_set() control setup of individual devices + +See sample scripts for usage examples. diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh new file mode 100644 index 000000000000..205e4cde4601 --- /dev/null +++ b/samples/pktgen/functions.sh @@ -0,0 +1,121 @@ +# +# Common functions used by pktgen scripts +# - Depending on bash 3 (or higher) syntax +# +# Author: Jesper Dangaaard Brouer +# License: GPL + +## -- General shell logging cmds -- +function err() { + local exitcode=$1 + shift + echo "ERROR: $@" >&2 + exit $exitcode +} + +function warn() { + echo "WARN : $@" >&2 +} + +function info() { + if [[ -n "$VERBOSE" ]]; then + echo "INFO : $@" >&2 + fi +} + +## -- Pktgen proc config commands -- ## +export PROC_DIR=/proc/net/pktgen +# +# Three different shell functions for configuring the different +# components of pktgen: +# pg_ctrl(), pg_thread() and pg_set(). +# +# These functions correspond to pktgens different components. +# * pg_ctrl() control "pgctrl" (/proc/net/pktgen/pgctrl) +# * pg_thread() control the kernel threads and binding to devices +# * pg_set() control setup of individual devices +function pg_ctrl() { + local proc_file="pgctrl" + proc_cmd ${proc_file} "$@" +} + +function pg_thread() { + local thread=$1 + local proc_file="kpktgend_${thread}" + shift + proc_cmd ${proc_file} "$@" +} + +function pg_set() { + local dev=$1 + local proc_file="$dev" + shift + proc_cmd ${proc_file} "$@" +} + +# More generic replacement for pgset(), that does not depend on global +# variable for proc file. +function proc_cmd() { + local result + local proc_file=$1 + # after shift, the remaining args are contained in $@ + shift + local proc_ctrl=${PROC_DIR}/$proc_file + if [[ ! -e "$proc_ctrl" ]]; then + err 3 "proc file:$proc_ctrl does not exists (dev added to thread?)" + else + if [[ ! -w "$proc_ctrl" ]]; then + err 4 "proc file:$proc_ctrl not writable, not root?!" + fi + fi + + if [[ "$DEBUG" == "yes" ]]; then + echo "cmd: $@ > $proc_ctrl" + fi + # Quoting of "$@" is important for space expansion + echo "$@" > "$proc_ctrl" + local status=$? + + result=$(grep "Result: OK:" $proc_ctrl) + # Due to pgctrl, cannot use exit code $? from grep + if [[ "$result" == "" ]]; then + grep "Result:" $proc_ctrl >&2 + fi + if (( $status != 0 )); then + err 5 "Write error($status) occurred cmd: \"$@ > $proc_ctrl\"" + fi +} + +# Old obsolete "pgset" function, with slightly improved err handling +function pgset() { + local result + + if [[ "$DEBUG" == "yes" ]]; then + echo "cmd: $1 > $PGDEV" + fi + echo $1 > $PGDEV + local status=$? + + result=`cat $PGDEV | fgrep "Result: OK:"` + if [[ "$result" == "" ]]; then + cat $PGDEV | fgrep Result: + fi + if (( $status != 0 )); then + err 5 "Write error($status) occurred cmd: \"$1 > $PGDEV\"" + fi +} + +## -- General shell tricks -- + +function root_check_run_with_sudo() { + # Trick so, program can be run as normal user, will just use "sudo" + # call as root_check_run_as_sudo "$@" + if [ "$EUID" -ne 0 ]; then + if [ -x $0 ]; then # Directly executable use sudo + info "Not root, running with sudo" + sudo "$0" "$@" + exit $? + fi + err 4 "cannot perform sudo run of $0" + fi +} diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh new file mode 100644 index 000000000000..33b70fdd5a4a --- /dev/null +++ b/samples/pktgen/parameters.sh @@ -0,0 +1,97 @@ +# +# Common parameter parsing for pktgen scripts +# + +function usage() { + echo "" + echo "Usage: $0 [-vx] -i ethX" + echo " -i : (\$DEV) output interface/device (required)" + echo " -s : (\$PKT_SIZE) packet size" + echo " -d : (\$DEST_IP) destination IP" + echo " -m : (\$DST_MAC) destination MAC-addr" + echo " -t : (\$THREADS) threads to start" + echo " -c : (\$SKB_CLONE) SKB clones send before alloc new SKB" + echo " -b : (\$BURST) HW level bursting of SKBs" + echo " -v : (\$VERBOSE) verbose" + echo " -x : (\$DEBUG) debug" + echo "" +} + +## --- Parse command line arguments / parameters --- +## echo "Commandline options:" +while getopts "s:i:d:m:t:c:b:vxh" option; do + case $option in + i) # interface + export DEV=$OPTARG + info "Output device set to: DEV=$DEV" + ;; + s) + export PKT_SIZE=$OPTARG + info "Packet size set to: PKT_SIZE=$PKT_SIZE bytes" + ;; + d) # destination IP + export DEST_IP=$OPTARG + info "Destination IP set to: DEST_IP=$DEST_IP" + ;; + m) # MAC + export DST_MAC=$OPTARG + info "Destination MAC set to: DST_MAC=$DST_MAC" + ;; + t) + export THREADS=$OPTARG + export CPU_THREADS=$OPTARG + let "CPU_THREADS -= 1" + info "Number of threads to start: $THREADS (0 to $CPU_THREADS)" + ;; + c) + export CLONE_SKB=$OPTARG + info "CLONE_SKB=$CLONE_SKB" + ;; + b) + export BURST=$OPTARG + info "SKB bursting: BURST=$BURST" + ;; + v) + export VERBOSE=yes + info "Verbose mode: VERBOSE=$VERBOSE" + ;; + x) + export DEBUG=yes + info "Debug mode: DEBUG=$DEBUG" + ;; + h|?|*) + usage; + err 2 "[ERROR] Unknown parameters!!!" + esac +done +shift $(( $OPTIND - 1 )) + +if [ -z "$PKT_SIZE" ]; then + # NIC adds 4 bytes CRC + export PKT_SIZE=60 + info "Default packet size set to: set to: $PKT_SIZE bytes" +fi + +if [ -z "$THREADS" ]; then + # Zero CPU threads means one thread, because CPU numbers are zero indexed + export CPU_THREADS=0 + export THREADS=1 +fi + +if [ -z "$DEV" ]; then + usage + err 2 "Please specify output device" +fi + +if [ -z "$DST_MAC" ]; then + warn "Missing destination MAC address" +fi + +if [ -z "$DEST_IP" ]; then + warn "Missing destination IP address" +fi + +if [ ! -d /proc/net/pktgen ]; then + info "Loading kernel module: pktgen" + modprobe pktgen +fi diff --git a/samples/pktgen/pktgen.conf-1-1 b/samples/pktgen/pktgen.conf-1-1 deleted file mode 100755 index f91daad9e916..000000000000 --- a/samples/pktgen/pktgen.conf-1-1 +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -#modprobe pktgen - - -function pgset() { - local result - - echo $1 > $PGDEV - - result=`cat $PGDEV | fgrep "Result: OK:"` - if [ "$result" = "" ]; then - cat $PGDEV | fgrep Result: - fi -} - -# Config Start Here ----------------------------------------------------------- - - -# thread config -# Each CPU has its own thread. One CPU example. We add eth1. - -PGDEV=/proc/net/pktgen/kpktgend_0 - echo "Removing all devices" - pgset "rem_device_all" - echo "Adding eth1" - pgset "add_device eth1" - - -# device config -# delay 0 means maximum speed. - -CLONE_SKB="clone_skb 1000000" -# NIC adds 4 bytes CRC -PKT_SIZE="pkt_size 60" - -# COUNT 0 means forever -#COUNT="count 0" -COUNT="count 10000000" -DELAY="delay 0" - -PGDEV=/proc/net/pktgen/eth1 - echo "Configuring $PGDEV" - pgset "$COUNT" - pgset "$CLONE_SKB" - pgset "$PKT_SIZE" - pgset "$DELAY" - pgset "dst 10.10.11.2" - pgset "dst_mac 00:04:23:08:91:dc" - - -# Time to run -PGDEV=/proc/net/pktgen/pgctrl - - echo "Running... ctrl^C to stop" - trap true INT - pgset "start" - echo "Done" - cat /proc/net/pktgen/eth1 diff --git a/samples/pktgen/pktgen.conf-2-1 b/samples/pktgen/pktgen.conf-2-1 deleted file mode 100755 index e108e97d6d89..000000000000 --- a/samples/pktgen/pktgen.conf-2-1 +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash - -#modprobe pktgen - - -function pgset() { - local result - - echo $1 > $PGDEV - - result=`cat $PGDEV | fgrep "Result: OK:"` - if [ "$result" = "" ]; then - cat $PGDEV | fgrep Result: - fi -} - -# Config Start Here ----------------------------------------------------------- - - -# thread config -# Each CPU has its own thread. Two CPU example. We add eth1 to the first -# and leave the second idle. - -PGDEV=/proc/net/pktgen/kpktgend_0 - echo "Removing all devices" - pgset "rem_device_all" - echo "Adding eth1" - pgset "add_device eth1" - -# We need to remove old config since we dont use this thread. We can only -# one NIC on one CPU due to affinity reasons. - -PGDEV=/proc/net/pktgen/kpktgend_1 - echo "Removing all devices" - pgset "rem_device_all" - -# device config -# delay 0 means maximum speed. - -CLONE_SKB="clone_skb 1000000" -# NIC adds 4 bytes CRC -PKT_SIZE="pkt_size 60" - -# COUNT 0 means forever -#COUNT="count 0" -COUNT="count 10000000" -DELAY="delay 0" - -PGDEV=/proc/net/pktgen/eth1 - echo "Configuring $PGDEV" - pgset "$COUNT" - pgset "$CLONE_SKB" - pgset "$PKT_SIZE" - pgset "$DELAY" - pgset "dst 10.10.11.2" - pgset "dst_mac 00:04:23:08:91:dc" - - -# Time to run -PGDEV=/proc/net/pktgen/pgctrl - - echo "Running... ctrl^C to stop" - trap true INT - pgset "start" - echo "Done" - cat /proc/net/pktgen/eth1 diff --git a/samples/pktgen/pktgen.conf-2-2 b/samples/pktgen/pktgen.conf-2-2 deleted file mode 100755 index acea15503e71..000000000000 --- a/samples/pktgen/pktgen.conf-2-2 +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash - -#modprobe pktgen - - -function pgset() { - local result - - echo $1 > $PGDEV - - result=`cat $PGDEV | fgrep "Result: OK:"` - if [ "$result" = "" ]; then - cat $PGDEV | fgrep Result: - fi -} - -# Config Start Here ----------------------------------------------------------- - - -# thread config -# Each CPU has its own thread. Two CPU example. We add eth1, eth2 respectively. - -PGDEV=/proc/net/pktgen/kpktgend_0 - echo "Removing all devices" - pgset "rem_device_all" - echo "Adding eth1" - pgset "add_device eth1" - -PGDEV=/proc/net/pktgen/kpktgend_1 - echo "Removing all devices" - pgset "rem_device_all" - echo "Adding eth2" - pgset "add_device eth2" - - -# device config -# delay 0 means maximum speed. - -CLONE_SKB="clone_skb 1000000" -# NIC adds 4 bytes CRC -PKT_SIZE="pkt_size 60" - -# COUNT 0 means forever -#COUNT="count 0" -COUNT="count 10000000" -DELAY="delay 0" - -PGDEV=/proc/net/pktgen/eth1 - echo "Configuring $PGDEV" - pgset "$COUNT" - pgset "$CLONE_SKB" - pgset "$PKT_SIZE" - pgset "$DELAY" - pgset "dst 10.10.11.2" - pgset "dst_mac 00:04:23:08:91:dc" - -PGDEV=/proc/net/pktgen/eth2 - echo "Configuring $PGDEV" - pgset "$COUNT" - pgset "$CLONE_SKB" - pgset "$PKT_SIZE" - pgset "$DELAY" - pgset "dst 192.168.2.2" - pgset "dst_mac 00:04:23:08:91:de" - -# Time to run -PGDEV=/proc/net/pktgen/pgctrl - - echo "Running... ctrl^C to stop" - trap true INT - pgset "start" - echo "Done" - cat /proc/net/pktgen/eth1 /proc/net/pktgen/eth2 diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh new file mode 100755 index 000000000000..cb1590331b47 --- /dev/null +++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# +# Benchmark script: +# - developed for benchmarking ingress qdisc path +# +# Script for injecting packets into RX path of the stack with pktgen +# "xmit_mode netif_receive". With an invalid dst_mac this will only +# measure the ingress code path as packets gets dropped in ip_rcv(). +# +# This script don't really need any hardware. It benchmarks software +# RX path just after NIC driver level. With bursting is also +# "removes" the SKB alloc/free overhead. +# +# Setup scenarios for measuring ingress qdisc (with invalid dst_mac): +# ------------------------------------------------------------------ +# (1) no ingress (uses static_key_false(&ingress_needed)) +# +# (2) ingress on other dev (change ingress_needed and calls +# handle_ing() but exit early) +# +# config: tc qdisc add dev $SOMEDEV handle ffff: ingress +# +# (3) ingress on this dev, handle_ing() -> tc_classify() +# +# config: tc qdisc add dev $DEV handle ffff: ingress +# +# (4) ingress on this dev + drop at u32 classifier/action. +# +basedir=`dirname $0` +source ${basedir}/functions.sh +root_check_run_with_sudo "$@" + +# Parameter parsing via include +source ${basedir}/parameters.sh +# Using invalid DST_MAC will cause the packets to get dropped in +# ip_rcv() which is part of the test +[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" +[ -z "$BURST" ] && BURST=1024 + +# Base Config +DELAY="0" # Zero means max speed +COUNT="10000000" # Zero means indefinitely + +# General cleanup everything since last run +pg_ctrl "reset" + +# Threads are specified with parameter -t value in $THREADS +for ((thread = 0; thread < $THREADS; thread++)); do + # The device name is extended with @name, using thread number to + # make then unique, but any name will do. + dev=${DEV}@${thread} + + # Add remove all other devices and add_device $dev to thread + pg_thread $thread "rem_device_all" + pg_thread $thread "add_device" $dev + + # Base config of dev + pg_set $dev "flag QUEUE_MAP_CPU" + pg_set $dev "count $COUNT" + pg_set $dev "pkt_size $PKT_SIZE" + pg_set $dev "delay $DELAY" + pg_set $dev "flag NO_TIMESTAMP" + + # Destination + pg_set $dev "dst_mac $DST_MAC" + pg_set $dev "dst $DEST_IP" + + # Inject packet into RX path of stack + pg_set $dev "xmit_mode netif_receive" + + # Burst allow us to avoid measuring SKB alloc/free overhead + pg_set $dev "burst $BURST" +done + +# start_run +echo "Running... ctrl^C to stop" >&2 +pg_ctrl "start" +echo "Done" >&2 + +# Print results +for ((thread = 0; thread < $THREADS; thread++)); do + dev=${DEV}@${thread} + echo "Device: $dev" + cat /proc/net/pktgen/$dev | grep -A2 "Result:" +done diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh new file mode 100755 index 000000000000..8c9d318c221b --- /dev/null +++ b/samples/pktgen/pktgen_sample01_simple.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# +# Simple example: +# * pktgen sending with single thread and single interface +# * flow variation via random UDP source port +# +basedir=`dirname $0` +source ${basedir}/functions.sh +root_check_run_with_sudo "$@" + +# Parameter parsing via include +# - go look in parameters.sh to see which setting are avail +# - required param is the interface "-i" stored in $DEV +source ${basedir}/parameters.sh +# +# Set some default params, if they didn't get set +[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +[ -z "$CLONE_SKB" ] && CLONE_SKB="0" +# Example enforce param "-m" for dst_mac +[ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac" + +# Base Config +DELAY="0" # Zero means max speed +COUNT="100000" # Zero means indefinitely + +# Flow variation random source port between min and max +UDP_MIN=9 +UDP_MAX=109 + +# General cleanup everything since last run +# (especially important if other threads were configured by other scripts) +pg_ctrl "reset" + +# Add remove all other devices and add_device $DEV to thread 0 +thread=0 +pg_thread $thread "rem_device_all" +pg_thread $thread "add_device" $DEV + +# How many packets to send (zero means indefinitely) +pg_set $DEV "count $COUNT" + +# Reduce alloc cost by sending same SKB many times +# - this obviously affects the randomness within the packet +pg_set $DEV "clone_skb $CLONE_SKB" + +# Set packet size +pg_set $DEV "pkt_size $PKT_SIZE" + +# Delay between packets (zero means max speed) +pg_set $DEV "delay $DELAY" + +# Flag example disabling timestamping +pg_set $DEV "flag NO_TIMESTAMP" + +# Destination +pg_set $DEV "dst_mac $DST_MAC" +pg_set $DEV "dst $DEST_IP" + +# Setup random UDP port src range +pg_set $DEV "flag UDPSRC_RND" +pg_set $DEV "udp_src_min $UDP_MIN" +pg_set $DEV "udp_src_max $UDP_MAX" + +# start_run +echo "Running... ctrl^C to stop" >&2 +pg_ctrl "start" +echo "Done" >&2 + +# Print results +echo "Result device: $DEV" +cat /proc/net/pktgen/$DEV diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh new file mode 100755 index 000000000000..32467aea8e47 --- /dev/null +++ b/samples/pktgen/pktgen_sample02_multiqueue.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# +# Multiqueue: Using pktgen threads for sending on multiple CPUs +# * adding devices to kernel threads +# * notice the naming scheme for keeping device names unique +# * nameing scheme: dev@thread_number +# * flow variation via random UDP source port +# +basedir=`dirname $0` +source ${basedir}/functions.sh +root_check_run_with_sudo "$@" +# +# Required param: -i dev in $DEV +source ${basedir}/parameters.sh + +# Base Config +DELAY="0" # Zero means max speed +COUNT="100000" # Zero means indefinitely +[ -z "$CLONE_SKB" ] && CLONE_SKB="0" + +# Flow variation random source port between min and max +UDP_MIN=9 +UDP_MAX=109 + +# (example of setting default params in your script) +[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" + +# General cleanup everything since last run +pg_ctrl "reset" + +# Threads are specified with parameter -t value in $THREADS +for ((thread = 0; thread < $THREADS; thread++)); do + # The device name is extended with @name, using thread number to + # make then unique, but any name will do. + dev=${DEV}@${thread} + + # Add remove all other devices and add_device $dev to thread + pg_thread $thread "rem_device_all" + pg_thread $thread "add_device" $dev + + # Notice config queue to map to cpu (mirrors smp_processor_id()) + # It is beneficial to map IRQ /proc/irq/*/smp_affinity 1:1 to CPU number + pg_set $dev "flag QUEUE_MAP_CPU" + + # Base config of dev + pg_set $dev "count $COUNT" + pg_set $dev "clone_skb $CLONE_SKB" + pg_set $dev "pkt_size $PKT_SIZE" + pg_set $dev "delay $DELAY" + + # Flag example disabling timestamping + pg_set $dev "flag NO_TIMESTAMP" + + # Destination + pg_set $dev "dst_mac $DST_MAC" + pg_set $dev "dst $DEST_IP" + + # Setup random UDP port src range + pg_set $dev "flag UDPSRC_RND" + pg_set $dev "udp_src_min $UDP_MIN" + pg_set $dev "udp_src_max $UDP_MAX" +done + +# start_run +echo "Running... ctrl^C to stop" >&2 +pg_ctrl "start" +echo "Done" >&2 + +# Print results +for ((thread = 0; thread < $THREADS; thread++)); do + dev=${DEV}@${thread} + echo "Device: $dev" + cat /proc/net/pktgen/$dev | grep -A2 "Result:" +done diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh new file mode 100755 index 000000000000..775f5d0a1e53 --- /dev/null +++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# +# Script for max single flow performance +# - If correctly tuned[1], single CPU 10G wirespeed small pkts is possible[2] +# +# Using pktgen "burst" option (use -b $N) +# - To boost max performance +# - Avail since: kernel v3.18 +# * commit 38b2cf2982dc73 ("net: pktgen: packet bursting via skb->xmit_more") +# - This avoids writing the HW tailptr on every driver xmit +# - The performance boost is impressive, see commit and blog [2] +# +# Notice: On purpose generates a single (UDP) flow towards target, +# reason behind this is to only overload/activate a single CPU on +# target host. And no randomness for pktgen also makes it faster. +# +# Tuning see: +# [1] http://netoptimizer.blogspot.dk/2014/06/pktgen-for-network-overload-testing.html +# [2] http://netoptimizer.blogspot.dk/2014/10/unlocked-10gbps-tx-wirespeed-smallest.html +# +basedir=`dirname $0` +source ${basedir}/functions.sh +root_check_run_with_sudo "$@" + +# Parameter parsing via include +source ${basedir}/parameters.sh +# Set some default params, if they didn't get set +[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" +[ -z "$BURST" ] && BURST=32 +[ -z "$CLONE_SKB" ] && CLONE_SKB="100000" + +# Base Config +DELAY="0" # Zero means max speed +COUNT="0" # Zero means indefinitely + +# General cleanup everything since last run +pg_ctrl "reset" + +# Threads are specified with parameter -t value in $THREADS +for ((thread = 0; thread < $THREADS; thread++)); do + dev=${DEV}@${thread} + + # Add remove all other devices and add_device $dev to thread + pg_thread $thread "rem_device_all" + pg_thread $thread "add_device" $dev + + # Base config + pg_set $dev "flag QUEUE_MAP_CPU" + pg_set $dev "count $COUNT" + pg_set $dev "clone_skb $CLONE_SKB" + pg_set $dev "pkt_size $PKT_SIZE" + pg_set $dev "delay $DELAY" + pg_set $dev "flag NO_TIMESTAMP" + + # Destination + pg_set $dev "dst_mac $DST_MAC" + pg_set $dev "dst $DEST_IP" + + # Setup burst, for easy testing -b 0 disable bursting + # (internally in pktgen default and minimum burst=1) + if [[ ${BURST} -ne 0 ]]; then + pg_set $dev "burst $BURST" + else + info "$dev: Not using burst" + fi +done + +# Run if user hits control-c +function control_c() { + # Print results + for ((thread = 0; thread < $THREADS; thread++)); do + dev=${DEV}@${thread} + echo "Device: $dev" + cat /proc/net/pktgen/$dev | grep -A2 "Result:" + done +} +# trap keyboard interrupt (Ctrl-C) +trap control_c SIGINT + +echo "Running... ctrl^C to stop" >&2 +pg_ctrl "start" diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 6f6733331d95..08c2a36ef7a9 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -272,7 +272,7 @@ int main(int argc, char **argv) const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } }; const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } }; const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } }; - const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } }; + const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } }; const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; int port_off = 2, tries = 5, ret; |