diff options
908 files changed, 27333 insertions, 12900 deletions
diff --git a/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl b/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl index 7ac7d7262bb7..3c3514815cd5 100644 --- a/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl +++ b/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl @@ -1,23 +1,18 @@ -What: /sys/devices/platform/<i2c-demux-name>/cur_master +What: /sys/devices/platform/<i2c-demux-name>/available_masters Date: January 2016 KernelVersion: 4.6 Contact: Wolfram Sang <wsa@the-dreams.de> Description: + Reading the file will give you a list of masters which can be + selected for a demultiplexed bus. The format is + "<index>:<name>". Example from a Renesas Lager board: -This file selects the active I2C master for a demultiplexed bus. + 0:/i2c@e6500000 1:/i2c@e6508000 -Write 0 there for the first master, 1 for the second etc. Reading the file will -give you a list with the active master marked. Example from a Renesas Lager -board: - -root@Lager:~# cat /sys/devices/platform/i2c@8/cur_master -* 0 - /i2c@9 - 1 - /i2c@e6520000 - 2 - /i2c@e6530000 - -root@Lager:~# echo 2 > /sys/devices/platform/i2c@8/cur_master - -root@Lager:~# cat /sys/devices/platform/i2c@8/cur_master - 0 - /i2c@9 - 1 - /i2c@e6520000 -* 2 - /i2c@e6530000 +What: /sys/devices/platform/<i2c-demux-name>/current_master +Date: January 2016 +KernelVersion: 4.6 +Contact: Wolfram Sang <wsa@the-dreams.de> +Description: + This file selects/shows the active I2C master for a demultiplexed + bus. It uses the <index> value from the file 'available_masters'. diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index 7785fb5eb93f..b5ca536e56a8 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -505,6 +505,8 @@ int main(int argc, char *argv[]) if (!loop) goto done; break; + case TASKSTATS_TYPE_NULL: + break; default: fprintf(stderr, "Unknown nested" " nla_type %d\n", @@ -512,7 +514,8 @@ int main(int argc, char *argv[]) break; } len2 += NLA_ALIGN(na->nla_len); - na = (struct nlattr *) ((char *) na + len2); + na = (struct nlattr *)((char *)na + + NLA_ALIGN(na->nla_len)); } break; diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt index ccc62f145306..3f0cbbb8395f 100644 --- a/Documentation/devicetree/bindings/arm/cpus.txt +++ b/Documentation/devicetree/bindings/arm/cpus.txt @@ -192,7 +192,6 @@ nodes to be present and contain the properties described below. can be one of: "allwinner,sun6i-a31" "allwinner,sun8i-a23" - "arm,psci" "arm,realview-smp" "brcm,bcm-nsp-smp" "brcm,brahma-b15" diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt index 078060a97f95..05f705e32a4a 100644 --- a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt +++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt @@ -18,6 +18,8 @@ Required properties for all the ethernet interfaces: - First is the Rx interrupt. This irq is mandatory. - Second is the Tx completion interrupt. This is supported only on SGMII based 1GbE and 10GbE interfaces. +- channel: Ethernet to CPU, start channel (prefetch buffer) number + - Must map to the first irq and irqs must be sequential - port-id: Port number (0 or 1) - clocks: Reference to the clock entry. - local-mac-address: MAC address assigned to this device diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt index ecacfa44b1eb..d4b7f2e49984 100644 --- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt +++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt @@ -7,19 +7,45 @@ Required properties: - mode: dsa fabric mode string. only support one of dsaf modes like these: "2port-64vf", "6port-16rss", - "6port-16vf". + "6port-16vf", + "single-port". - interrupt-parent: the interrupt parent of this device. - interrupts: should contain the DSA Fabric and rcb interrupt. - reg: specifies base physical address(es) and size of the device registers. - The first region is external interface control register base and size. - The second region is SerDes base register and size. + The first region is external interface control register base and size(optional, + only used when subctrl-syscon does not exist). It is recommended using + subctrl-syscon rather than this address. + The second region is SerDes base register and size(optional, only used when + serdes-syscon in port node does not exist). It is recommended using + serdes-syscon rather than this address. The third region is the PPE register base and size. - The fourth region is dsa fabric base register and size. - The fifth region is cpld base register and size, it is not required if do not use cpld. -- phy-handle: phy handle of physicl port, 0 if not any phy device. see ethernet.txt [1]. + The fourth region is dsa fabric base register and size. It is not required for + single-port mode. +- reg-names: may be ppe-base and(or) dsaf-base. It is used to find the + corresponding reg's index. + +- phy-handle: phy handle of physical port, 0 if not any phy device. It is optional + attribute. If port node exists, phy-handle in each port node will be used. + see ethernet.txt [1]. +- subctrl-syscon: is syscon handle for external interface control register. +- reset-field-offset: is offset of reset field. Its value depends on the hardware + user manual. - buf-size: rx buffer size, should be 16-1024. - desc-num: number of description in TX and RX queue, should be 512, 1024, 2048 or 4096. +- port: subnodes of dsaf. A dsaf node may contain several port nodes(Depending + on mode of dsaf). Port node contain some attributes listed below: +- reg: is physical port index in one dsaf. +- phy-handle: phy handle of physical port. It is not required if there isn't + phy device. see ethernet.txt [1]. +- serdes-syscon: is syscon handle for SerDes register. +- cpld-syscon: is syscon handle + register offset pair for cpld register. It is + not required if there isn't cpld device. +- port-rst-offset: is offset of reset field for each port in dsaf. Its value + depends on the hardware user manual. +- port-mode-offset: is offset of port mode field for each port in dsaf. Its + value depends on the hardware user manual. + [1] Documentation/devicetree/bindings/net/phy.txt Example: @@ -28,11 +54,11 @@ dsaf0: dsa@c7000000 { compatible = "hisilicon,hns-dsaf-v1"; mode = "6port-16rss"; interrupt-parent = <&mbigen_dsa>; - reg = <0x0 0xC0000000 0x0 0x420000 - 0x0 0xC2000000 0x0 0x300000 - 0x0 0xc5000000 0x0 0x890000 + reg = <0x0 0xc5000000 0x0 0x890000 0x0 0xc7000000 0x0 0x60000>; - phy-handle = <0 0 0 0 &soc0_phy4 &soc0_phy5 0 0>; + reg-names = "ppe-base", "dsaf-base"; + subctrl-syscon = <&subctrl>; + reset-field-offset = 0; interrupts = <131 4>,<132 4>, <133 4>,<134 4>, <135 4>,<136 4>, <137 4>,<138 4>, <139 4>,<140 4>, <141 4>,<142 4>, @@ -43,4 +69,15 @@ dsaf0: dsa@c7000000 { buf-size = <4096>; desc-num = <1024>; dma-coherent; + + port@0 { + reg = 0; + phy-handle = <&phy0>; + serdes-syscon = <&serdes>; + }; + + port@1 { + reg = 1; + serdes-syscon = <&serdes>; + }; }; diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt b/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt index e6a9d1c30878..b9ff4ba6454e 100644 --- a/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt +++ b/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt @@ -36,6 +36,34 @@ Required properties: | | | | | | external port + This attribute is remained for compatible purpose. It is not recommended to + use it in new code. + +- port-idx-in-ae: is the index of port provided by AE. + In NIC mode of DSAF, all 6 PHYs of service DSAF are taken as ethernet ports + to the CPU. The port-idx-in-ae can be 0 to 5. Here is the diagram: + +-----+---------------+ + | CPU | + +-+-+-+---+-+-+-+-+-+-+ + | | | | | | | | + debug debug service + port port port + (0) (0) (0-5) + + In Switch mode of DSAF, all 6 PHYs of service DSAF are taken as physical + ports connected to a LAN Switch while the CPU side assume itself have one + single NIC connected to this switch. In this case, the port-idx-in-ae + will be 0 only. + +-----+-----+------+------+ + | CPU | + +-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | service| port(0) + debug debug +------------+ + port port | switch | + (0) (0) +-+-+-+-+-+-++ + | | | | | | + external port + - local-mac-address: mac addr of the ethernet interface Example: @@ -43,6 +71,6 @@ Example: ethernet@0{ compatible = "hisilicon,hns-nic-v1"; ae-handle = <&dsaf0>; - port-id = <0>; + port-idx-in-ae = <0>; local-mac-address = [a2 14 e4 4b 56 76]; }; diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 5ca79290eabf..32eaaca04d9b 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -9,7 +9,8 @@ have dual GMAC each represented by a child node.. Required properties: - compatible: Should be "mediatek,mt7623-eth" - reg: Address and length of the register set for the device -- interrupts: Should contain the frame engines interrupt +- interrupts: Should contain the three frame engines interrupts in numeric + order. These are fe_int0, fe_int1 and fe_int2. - clocks: the clock used by the core - clock-names: the names of the clock listed in the clocks property. These are "ethif", "esw", "gp2", "gp1" @@ -42,7 +43,9 @@ eth: ethernet@1b100000 { <ðsys CLK_ETHSYS_GP2>, <ðsys CLK_ETHSYS_GP1>; clock-names = "ethif", "esw", "gp2", "gp1"; - interrupts = <GIC_SPI 200 IRQ_TYPE_LEVEL_LOW>; + interrupts = <GIC_SPI 200 IRQ_TYPE_LEVEL_LOW + GIC_SPI 199 IRQ_TYPE_LEVEL_LOW + GIC_SPI 198 IRQ_TYPE_LEVEL_LOW>; power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>; resets = <ðsys MT2701_ETHSYS_ETH_RST>; reset-names = "eth"; diff --git a/Documentation/devicetree/bindings/net/microchip,enc28j60.txt b/Documentation/devicetree/bindings/net/microchip,enc28j60.txt new file mode 100644 index 000000000000..1dc3bc75539d --- /dev/null +++ b/Documentation/devicetree/bindings/net/microchip,enc28j60.txt @@ -0,0 +1,59 @@ +* Microchip ENC28J60 + +This is a standalone 10 MBit ethernet controller with SPI interface. + +For each device connected to a SPI bus, define a child node within +the SPI master node. + +Required properties: +- compatible: Should be "microchip,enc28j60" +- reg: Specify the SPI chip select the ENC28J60 is wired to +- interrupt-parent: Specify the phandle of the source interrupt, see interrupt + binding documentation for details. Usually this is the GPIO bank + the interrupt line is wired to. +- interrupts: Specify the interrupt index within the interrupt controller (referred + to above in interrupt-parent) and interrupt type. The ENC28J60 natively + generates falling edge interrupts, however, additional board logic + might invert the signal. +- pinctrl-names: List of assigned state names, see pinctrl binding documentation. +- pinctrl-0: List of phandles to configure the GPIO pin used as interrupt line, + see also generic and your platform specific pinctrl binding + documentation. + +Optional properties: +- spi-max-frequency: Maximum frequency of the SPI bus when accessing the ENC28J60. + According to the ENC28J80 datasheet, the chip allows a maximum of 20 MHz, however, + board designs may need to limit this value. +- local-mac-address: See ethernet.txt in the same directory. + + +Example (for NXP i.MX28 with pin control stuff for GPIO irq): + + ssp2: ssp@80014000 { + compatible = "fsl,imx28-spi"; + pinctrl-names = "default"; + pinctrl-0 = <&spi2_pins_b &spi2_sck_cfg>; + status = "okay"; + + enc28j60: ethernet@0 { + compatible = "microchip,enc28j60"; + pinctrl-names = "default"; + pinctrl-0 = <&enc28j60_pins>; + reg = <0>; + interrupt-parent = <&gpio3>; + interrupts = <3 IRQ_TYPE_EDGE_FALLING>; + spi-max-frequency = <12000000>; + }; + }; + + pinctrl@80018000 { + enc28j60_pins: enc28j60_pins@0 { + reg = <0>; + fsl,pinmux-ids = < + MX28_PAD_AUART0_RTS__GPIO_3_3 /* Interrupt */ + >; + fsl,drive-strength = <MXS_DRIVE_4mA>; + fsl,voltage = <MXS_VOLTAGE_HIGH>; + fsl,pull-up = <MXS_PULL_DISABLE>; + }; + }; diff --git a/Documentation/devicetree/bindings/phy/rockchip-dp-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-dp-phy.txt index 50c4f9b00adf..e3b4809fbe82 100644 --- a/Documentation/devicetree/bindings/phy/rockchip-dp-phy.txt +++ b/Documentation/devicetree/bindings/phy/rockchip-dp-phy.txt @@ -8,15 +8,19 @@ Required properties: of memory mapped region. - clock-names: from common clock binding: Required elements: "24m" -- rockchip,grf: phandle to the syscon managing the "general register files" - #phy-cells : from the generic PHY bindings, must be 0; Example: -edp_phy: edp-phy { - compatible = "rockchip,rk3288-dp-phy"; - rockchip,grf = <&grf>; - clocks = <&cru SCLK_EDP_24M>; - clock-names = "24m"; - #phy-cells = <0>; +grf: syscon@ff770000 { + compatible = "rockchip,rk3288-grf", "syscon", "simple-mfd"; + +... + + edp_phy: edp-phy { + compatible = "rockchip,rk3288-dp-phy"; + clocks = <&cru SCLK_EDP_24M>; + clock-names = "24m"; + #phy-cells = <0>; + }; }; diff --git a/Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt index 61916f15a949..555cb0f40690 100644 --- a/Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt +++ b/Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt @@ -3,17 +3,23 @@ Rockchip EMMC PHY Required properties: - compatible: rockchip,rk3399-emmc-phy - - rockchip,grf : phandle to the syscon managing the "general - register files" - #phy-cells: must be 0 - - reg: PHY configure reg address offset in "general + - reg: PHY register address offset and length in "general register files" Example: -emmcphy: phy { - compatible = "rockchip,rk3399-emmc-phy"; - rockchip,grf = <&grf>; - reg = <0xf780>; - #phy-cells = <0>; + +grf: syscon@ff770000 { + compatible = "rockchip,rk3399-grf", "syscon", "simple-mfd"; + #address-cells = <1>; + #size-cells = <1>; + +... + + emmcphy: phy@f780 { + compatible = "rockchip,rk3399-emmc-phy"; + reg = <0xf780 0x20>; + #phy-cells = <0>; + }; }; diff --git a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt index 3f6a524cc5ff..32f4a2d6d0b3 100644 --- a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt @@ -1,13 +1,16 @@ == Amlogic Meson pinmux controller == Required properties for the root node: - - compatible: "amlogic,meson8-pinctrl" or "amlogic,meson8b-pinctrl" + - compatible: one of "amlogic,meson8-cbus-pinctrl" + "amlogic,meson8b-cbus-pinctrl" + "amlogic,meson8-aobus-pinctrl" + "amlogic,meson8b-aobus-pinctrl" - reg: address and size of registers controlling irq functionality === GPIO sub-nodes === -The 2 power domains of the controller (regular and always-on) are -represented as sub-nodes and each of them acts as a GPIO controller. +The GPIO bank for the controller is represented as a sub-node and it acts as a +GPIO controller. Required properties for sub-nodes are: - reg: should contain address and size for mux, pull-enable, pull and @@ -18,10 +21,6 @@ Required properties for sub-nodes are: - gpio-controller: identifies the node as a gpio controller - #gpio-cells: must be 2 -Valid sub-node names are: - - "banks" for the regular domain - - "ao-bank" for the always-on domain - === Other sub-nodes === Child nodes without the "gpio-controller" represent some desired @@ -45,7 +44,7 @@ pinctrl-bindings.txt === Example === pinctrl: pinctrl@c1109880 { - compatible = "amlogic,meson8-pinctrl"; + compatible = "amlogic,meson8-cbus-pinctrl"; reg = <0xc1109880 0x10>; #address-cells = <1>; #size-cells = <1>; @@ -61,15 +60,6 @@ pinctrl-bindings.txt #gpio-cells = <2>; }; - gpio_ao: ao-bank@c1108030 { - reg = <0xc8100014 0x4>, - <0xc810002c 0x4>, - <0xc8100024 0x8>; - reg-names = "mux", "pull", "gpio"; - gpio-controller; - #gpio-cells = <2>; - }; - nand { mux { groups = "nand_io", "nand_io_ce0", "nand_io_ce1", @@ -79,18 +69,4 @@ pinctrl-bindings.txt function = "nand"; }; }; - - uart_ao_a { - mux { - groups = "uart_tx_ao_a", "uart_rx_ao_a", - "uart_cts_ao_a", "uart_rts_ao_a"; - function = "uart_ao"; - }; - - conf { - pins = "GPIOAO_0", "GPIOAO_1", - "GPIOAO_2", "GPIOAO_3"; - bias-disable; - }; - }; }; diff --git a/Documentation/devicetree/bindings/rtc/s3c-rtc.txt b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt index 1068ffce9f91..fdde63a5419c 100644 --- a/Documentation/devicetree/bindings/rtc/s3c-rtc.txt +++ b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt @@ -15,9 +15,10 @@ Required properties: is the rtc tick interrupt. The number of cells representing a interrupt depends on the parent interrupt controller. - clocks: Must contain a list of phandle and clock specifier for the rtc - and source clocks. -- clock-names: Must contain "rtc" and "rtc_src" entries sorted in the - same order as the clocks property. + clock and in the case of a s3c6410 compatible controller, also + a source clock. +- clock-names: Must contain "rtc" and for a s3c6410 compatible controller, + a "rtc_src" sorted in the same order as the clocks property. Example: diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt index 3f0f5ce3338b..36ea940e5bb9 100644 --- a/Documentation/input/event-codes.txt +++ b/Documentation/input/event-codes.txt @@ -173,6 +173,10 @@ A few EV_ABS codes have special meanings: proximity of the device and while the value of the BTN_TOUCH code is 0. If the input device may be used freely in three dimensions, consider ABS_Z instead. + - BTN_TOOL_<name> should be set to 1 when the tool comes into detectable + proximity and set to 0 when the tool leaves detectable proximity. + BTN_TOOL_<name> signals the type of tool that is currently detected by the + hardware and is otherwise independent of ABS_DISTANCE and/or BTN_TOUCH. * ABS_MT_<name>: - Used to describe multitouch input events. Please see diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ecc74fa4bfde..0b3de80ec8f6 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -4077,6 +4077,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. sector if the number is odd); i = IGNORE_DEVICE (don't bind to this device); + j = NO_REPORT_LUNS (don't use report luns + command, uas only); l = NOT_LOCKABLE (don't try to lock and unlock ejectable media); m = MAX_SECTORS_64 (don't transfer more diff --git a/Documentation/networking/gen_stats.txt b/Documentation/networking/gen_stats.txt index 70e6275b757a..ff630a87b511 100644 --- a/Documentation/networking/gen_stats.txt +++ b/Documentation/networking/gen_stats.txt @@ -33,7 +33,8 @@ my_dumping_routine(struct sk_buff *skb, ...) { struct gnet_dump dump; - if (gnet_stats_start_copy(skb, TCA_STATS2, &mystruct->lock, &dump) < 0) + if (gnet_stats_start_copy(skb, TCA_STATS2, &mystruct->lock, &dump, + TCA_PAD) < 0) goto rtattr_failure; if (gnet_stats_copy_basic(&dump, &mystruct->bstats) < 0 || @@ -56,7 +57,8 @@ existing TLV types. my_dumping_routine(struct sk_buff *skb, ...) { if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, &mystruct->lock, &dump) < 0) + TCA_XSTATS, &mystruct->lock, &dump, + TCA_PAD) < 0) goto rtattr_failure; ... } diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt index f310edec8a77..7413eb05223b 100644 --- a/Documentation/networking/netdev-features.txt +++ b/Documentation/networking/netdev-features.txt @@ -131,13 +131,11 @@ stack. Driver should not change behaviour based on them. * LLTX driver (deprecated for hardware drivers) -NETIF_F_LLTX should be set in drivers that implement their own locking in -transmit path or don't need locking at all (e.g. software tunnels). -In ndo_start_xmit, it is recommended to use a try_lock and return -NETDEV_TX_LOCKED when the spin lock fails. The locking should also properly -protect against other callbacks (the rules you need to find out). +NETIF_F_LLTX is meant to be used by drivers that don't need locking at all, +e.g. software tunnels. -Don't use it for new drivers. +This is also used in a few legacy drivers that implement their +own locking, don't use it for new (hardware) drivers. * netns-local device diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index 0b1cf6b2a592..7fec2061a334 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -69,10 +69,9 @@ ndo_start_xmit: When the driver sets NETIF_F_LLTX in dev->features this will be called without holding netif_tx_lock. In this case the driver - has to lock by itself when needed. It is recommended to use a try lock - for this and return NETDEV_TX_LOCKED when the spin lock fails. - The locking there should also properly protect against - set_rx_mode. Note that the use of NETIF_F_LLTX is deprecated. + has to lock by itself when needed. + The locking there should also properly protect against + set_rx_mode. WARNING: use of NETIF_F_LLTX is deprecated. Don't use it for new drivers. Context: Process with BHs disabled or BH (timer), @@ -83,8 +82,6 @@ ndo_start_xmit: o NETDEV_TX_BUSY Cannot transmit packet, try later Usually a bug, means queue start/stop flow control is broken in the driver. Note: the driver must NOT put the skb in its DMA ring. - o NETDEV_TX_LOCKED Locking failed, please retry quickly. - Only valid when NETIF_F_LLTX is set. ndo_tx_timeout: Synchronization: netif_tx_lock spinlock; all TX queues frozen. diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt new file mode 100644 index 000000000000..f200467ade38 --- /dev/null +++ b/Documentation/networking/segmentation-offloads.txt @@ -0,0 +1,130 @@ +Segmentation Offloads in the Linux Networking Stack + +Introduction +============ + +This document describes a set of techniques in the Linux networking stack +to take advantage of segmentation offload capabilities of various NICs. + +The following technologies are described: + * TCP Segmentation Offload - TSO + * UDP Fragmentation Offload - UFO + * IPIP, SIT, GRE, and UDP Tunnel Offloads + * Generic Segmentation Offload - GSO + * Generic Receive Offload - GRO + * Partial Generic Segmentation Offload - GSO_PARTIAL + +TCP Segmentation Offload +======================== + +TCP segmentation allows a device to segment a single frame into multiple +frames with a data payload size specified in skb_shinfo()->gso_size. +When TCP segmentation requested the bit for either SKB_GSO_TCP or +SKB_GSO_TCP6 should be set in skb_shinfo()->gso_type and +skb_shinfo()->gso_size should be set to a non-zero value. + +TCP segmentation is dependent on support for the use of partial checksum +offload. For this reason TSO is normally disabled if the Tx checksum +offload for a given device is disabled. + +In order to support TCP segmentation offload it is necessary to populate +the network and transport header offsets of the skbuff so that the device +drivers will be able determine the offsets of the IP or IPv6 header and the +TCP header. In addition as CHECKSUM_PARTIAL is required csum_start should +also point to the TCP header of the packet. + +For IPv4 segmentation we support one of two types in terms of the IP ID. +The default behavior is to increment the IP ID with every segment. If the +GSO type SKB_GSO_TCP_FIXEDID is specified then we will not increment the IP +ID and all segments will use the same IP ID. If a device has +NETIF_F_TSO_MANGLEID set then the IP ID can be ignored when performing TSO +and we will either increment the IP ID for all frames, or leave it at a +static value based on driver preference. + +UDP Fragmentation Offload +========================= + +UDP fragmentation offload allows a device to fragment an oversized UDP +datagram into multiple IPv4 fragments. Many of the requirements for UDP +fragmentation offload are the same as TSO. However the IPv4 ID for +fragments should not increment as a single IPv4 datagram is fragmented. + +IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads +======================================================== + +In addition to the offloads described above it is possible for a frame to +contain additional headers such as an outer tunnel. In order to account +for such instances an additional set of segmentation offload types were +introduced including SKB_GSO_IPIP, SKB_GSO_SIT, SKB_GSO_GRE, and +SKB_GSO_UDP_TUNNEL. These extra segmentation types are used to identify +cases where there are more than just 1 set of headers. For example in the +case of IPIP and SIT we should have the network and transport headers moved +from the standard list of headers to "inner" header offsets. + +Currently only two levels of headers are supported. The convention is to +refer to the tunnel headers as the outer headers, while the encapsulated +data is normally referred to as the inner headers. Below is the list of +calls to access the given headers: + +IPIP/SIT Tunnel: + Outer Inner +MAC skb_mac_header +Network skb_network_header skb_inner_network_header +Transport skb_transport_header + +UDP/GRE Tunnel: + Outer Inner +MAC skb_mac_header skb_inner_mac_header +Network skb_network_header skb_inner_network_header +Transport skb_transport_header skb_inner_transport_header + +In addition to the above tunnel types there are also SKB_GSO_GRE_CSUM and +SKB_GSO_UDP_TUNNEL_CSUM. These two additional tunnel types reflect the +fact that the outer header also requests to have a non-zero checksum +included in the outer header. + +Finally there is SKB_GSO_REMCSUM which indicates that a given tunnel header +has requested a remote checksum offload. In this case the inner headers +will be left with a partial checksum and only the outer header checksum +will be computed. + +Generic Segmentation Offload +============================ + +Generic segmentation offload is a pure software offload that is meant to +deal with cases where device drivers cannot perform the offloads described +above. What occurs in GSO is that a given skbuff will have its data broken +out over multiple skbuffs that have been resized to match the MSS provided +via skb_shinfo()->gso_size. + +Before enabling any hardware segmentation offload a corresponding software +offload is required in GSO. Otherwise it becomes possible for a frame to +be re-routed between devices and end up being unable to be transmitted. + +Generic Receive Offload +======================= + +Generic receive offload is the complement to GSO. Ideally any frame +assembled by GRO should be segmented to create an identical sequence of +frames using GSO, and any sequence of frames segmented by GSO should be +able to be reassembled back to the original by GRO. The only exception to +this is IPv4 ID in the case that the DF bit is set for a given IP header. +If the value of the IPv4 ID is not sequentially incrementing it will be +altered so that it is when a frame assembled via GRO is segmented via GSO. + +Partial Generic Segmentation Offload +==================================== + +Partial generic segmentation offload is a hybrid between TSO and GSO. What +it effectively does is take advantage of certain traits of TCP and tunnels +so that instead of having to rewrite the packet headers for each segment +only the inner-most transport header and possibly the outer-most network +header need to be updated. This allows devices that do not support tunnel +offloads or tunnel offloads with checksum to still make use of segmentation. + +With the partial offload what occurs is that all headers excluding the +inner transport header are updated such that they will contain the correct +values for if the header was simply duplicated. The one exception to this +is the outer IPv4 ID field. It is up to the device drivers to guarantee +that the IPv4 ID field is incremented in the case that a given header does +not have the DF bit set. diff --git a/Documentation/usb/gadget_multi.txt b/Documentation/usb/gadget_multi.txt index 7d66a8636cb5..5faf514047e9 100644 --- a/Documentation/usb/gadget_multi.txt +++ b/Documentation/usb/gadget_multi.txt @@ -43,7 +43,7 @@ For the gadget two work under Windows two conditions have to be met: First of all, Windows need to detect the gadget as an USB composite gadget which on its own have some conditions[4]. If they are met, Windows lets USB Generic Parent Driver[5] handle the device which then -tries to much drivers for each individual interface (sort of, don't +tries to match drivers for each individual interface (sort of, don't get into too many details). The good news is: you do not have to worry about most of the diff --git a/Documentation/x86/protection-keys.txt b/Documentation/x86/protection-keys.txt new file mode 100644 index 000000000000..c281ded1ba16 --- /dev/null +++ b/Documentation/x86/protection-keys.txt @@ -0,0 +1,27 @@ +Memory Protection Keys for Userspace (PKU aka PKEYs) is a CPU feature +which will be found on future Intel CPUs. + +Memory Protection Keys provides a mechanism for enforcing page-based +protections, but without requiring modification of the page tables +when an application changes protection domains. It works by +dedicating 4 previously ignored bits in each page table entry to a +"protection key", giving 16 possible keys. + +There is also a new user-accessible register (PKRU) with two separate +bits (Access Disable and Write Disable) for each key. Being a CPU +register, PKRU is inherently thread-local, potentially giving each +thread a different set of protections from every other thread. + +There are two new instructions (RDPKRU/WRPKRU) for reading and writing +to the new register. The feature is only available in 64-bit mode, +even though there is theoretically space in the PAE PTEs. These +permissions are enforced on data access only and have no effect on +instruction fetches. + +=========================== Config Option =========================== + +This config option adds approximately 1.5kb of text. and 50 bytes of +data to the executable. A workload which does large O_DIRECT reads +of holes in XFS files was run to exercise get_user_pages_fast(). No +performance delta was observed with the config option +enabled or disabled. diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index c518dce7da4d..5aa738346062 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -19,7 +19,7 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ffffffef00000000 - ffffffff00000000 (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space +ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole @@ -31,8 +31,8 @@ vmalloc space is lazily synchronized into the different PML4 pages of the processes using the page fault handler, with init_level4_pgt as reference. -Current X86-64 implementations only support 40 bits of address space, -but we support up to 46 bits. This expands into MBZ space in the page tables. +Current X86-64 implementations support up to 46 bits of address space (64 TB), +which is our current limit. This expands into MBZ space in the page tables. We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual memory window (this size is arbitrary, it can be raised later if needed). diff --git a/MAINTAINERS b/MAINTAINERS index 6ac970ba54b8..ab008013cfec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6253,8 +6253,8 @@ S: Maintained F: tools/testing/selftests KERNEL VIRTUAL MACHINE (KVM) -M: Gleb Natapov <gleb@kernel.org> M: Paolo Bonzini <pbonzini@redhat.com> +M: Radim Krčmář <rkrcmar@redhat.com> L: kvm@vger.kernel.org W: http://www.linux-kvm.org T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git @@ -11072,6 +11072,15 @@ S: Maintained F: drivers/clk/ti/ F: include/linux/clk/ti.h +TI ETHERNET SWITCH DRIVER (CPSW) +M: Mugunthan V N <mugunthanvnm@ti.com> +R: Grygorii Strashko <grygorii.strashko@ti.com> +L: linux-omap@vger.kernel.org +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/ti/cpsw* +F: drivers/net/ethernet/ti/davinci* + TI FLASH MEDIA INTERFACE DRIVER M: Alex Dubov <oakad@yahoo.com> S: Maintained @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc5 NAME = Blurry Fish Butt # *DOCUMENTATION* @@ -1008,7 +1008,8 @@ prepare0: archprepare FORCE prepare: prepare0 prepare-objtool ifdef CONFIG_STACK_VALIDATION - has_libelf := $(shell echo "int main() {}" | $(HOSTCC) -xc -o /dev/null -lelf - &> /dev/null && echo 1 || echo 0) + has_libelf := $(call try-run,\ + echo "int main() {}" | $(HOSTCC) -xc -o /dev/null -lelf -,1,0) ifeq ($(has_libelf),1) objtool_target := tools/objtool FORCE else diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 208aae071b37..12d0284a46e5 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -593,7 +593,6 @@ config PCI_SYSCALL def_bool PCI source "drivers/pci/Kconfig" -source "drivers/pci/pcie/Kconfig" endmenu diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index ab5d5701e11d..44a578c10732 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -47,14 +47,6 @@ clocks = <&apbclk>; clock-names = "stmmaceth"; max-speed = <100>; - mdio0 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "snps,dwmac-mdio"; - phy1: ethernet-phy@1 { - reg = <1>; - }; - }; }; ehci@0x40000 { diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index f8b396c9aedb..491b3b5f22bd 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -42,6 +42,7 @@ CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_BLK_DEV_LOOP=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 56128ea2b748..b25ee73b2e79 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -43,6 +43,7 @@ CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_BLK_DEV_LOOP=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y diff --git a/arch/arc/include/asm/fb.h b/arch/arc/include/asm/fb.h new file mode 100644 index 000000000000..bd3f68c9ddfc --- /dev/null +++ b/arch/arc/include/asm/fb.h @@ -0,0 +1,19 @@ +#ifndef _ASM_FB_H_ +#define _ASM_FB_H_ + +#include <linux/fb.h> +#include <linux/fs.h> +#include <asm/page.h> + +static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, + unsigned long off) +{ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +} + +static inline int fb_is_primary_device(struct fb_info *info) +{ + return 0; +} + +#endif /* _ASM_FB_H_ */ diff --git a/arch/arm/boot/dts/am335x-baltos-ir5221.dts b/arch/arm/boot/dts/am335x-baltos-ir5221.dts index 6c667fb35449..4e28d87e9356 100644 --- a/arch/arm/boot/dts/am335x-baltos-ir5221.dts +++ b/arch/arm/boot/dts/am335x-baltos-ir5221.dts @@ -470,9 +470,12 @@ }; &cpsw_emac0 { - phy_id = <&davinci_mdio>, <0>; phy-mode = "rmii"; dual_emac_res_vlan = <1>; + fixed-link { + speed = <100>; + full-duplex; + }; }; &cpsw_emac1 { diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 55ca9c7dcf6a..0467846b4cc3 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -860,7 +860,7 @@ ti,no-idle-on-init; reg = <0x50000000 0x2000>; interrupts = <100>; - dmas = <&edma 52>; + dmas = <&edma 52 0>; dma-names = "rxtx"; gpmc,num-cs = <7>; gpmc,num-waitpins = <2>; diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index 6e4f5af3d8f8..ba580a9da390 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -207,7 +207,7 @@ ti,tptcs = <&edma_tptc0 7>, <&edma_tptc1 5>, <&edma_tptc2 0>; - ti,edma-memcpy-channels = <32 33>; + ti,edma-memcpy-channels = <58 59>; }; edma_tptc0: tptc@49800000 { @@ -884,7 +884,7 @@ gpmc: gpmc@50000000 { compatible = "ti,am3352-gpmc"; ti,hwmods = "gpmc"; - dmas = <&edma 52>; + dmas = <&edma 52 0>; dma-names = "rxtx"; clocks = <&l3s_gclk>; clock-names = "fck"; diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 83dfafaaba1b..d5dd72047a7e 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -794,3 +794,8 @@ tx-num-evt = <32>; rx-num-evt = <32>; }; + +&synctimer_32kclk { + assigned-clocks = <&mux_synctimer32k_ck>; + assigned-clock-parents = <&clkdiv32k_ick>; +}; diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index 0a5fc5d02ce2..4168eb9dd369 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -99,13 +99,6 @@ #cooling-cells = <2>; }; - extcon_usb1: extcon_usb1 { - compatible = "linux,extcon-usb-gpio"; - id-gpio = <&gpio7 25 GPIO_ACTIVE_HIGH>; - pinctrl-names = "default"; - pinctrl-0 = <&extcon_usb1_pins>; - }; - hdmi0: connector { compatible = "hdmi-connector"; label = "hdmi"; @@ -349,12 +342,6 @@ >; }; - extcon_usb1_pins: extcon_usb1_pins { - pinctrl-single,pins = < - DRA7XX_CORE_IOPAD(0x37ec, PIN_INPUT_PULLUP | MUX_MODE14) /* uart1_rtsn.gpio7_25 */ - >; - }; - tpd12s015_pins: pinmux_tpd12s015_pins { pinctrl-single,pins = < DRA7XX_CORE_IOPAD(0x37b0, PIN_OUTPUT | MUX_MODE14) /* gpio7_10 CT_CP_HPD */ @@ -706,10 +693,6 @@ pinctrl-0 = <&usb1_pins>; }; -&omap_dwc3_1 { - extcon = <&extcon_usb1>; -}; - &omap_dwc3_2 { extcon = <&extcon_usb2>; }; diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi index 3710755c6d76..85d2c377c332 100644 --- a/arch/arm/boot/dts/armada-385-linksys.dtsi +++ b/arch/arm/boot/dts/armada-385-linksys.dtsi @@ -117,7 +117,7 @@ }; /* USB part of the eSATA/USB 2.0 port */ - usb@50000 { + usb@58000 { status = "okay"; }; diff --git a/arch/arm/boot/dts/dm814x-clocks.dtsi b/arch/arm/boot/dts/dm814x-clocks.dtsi index e0ea6a93a22e..792a64ee0df7 100644 --- a/arch/arm/boot/dts/dm814x-clocks.dtsi +++ b/arch/arm/boot/dts/dm814x-clocks.dtsi @@ -4,6 +4,157 @@ * published by the Free Software Foundation. */ +&pllss { + /* + * See TRM "2.6.10 Connected outputso DPLLS" and + * "2.6.11 Connected Outputs of DPLLJ". Only clkout is + * connected except for hdmi and usb. + */ + adpll_mpu_ck: adpll@40 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-s-clock"; + reg = <0x40 0x40>; + clocks = <&devosc_ck &devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow", "clkinphif"; + clock-output-names = "481c5040.adpll.dcoclkldo", + "481c5040.adpll.clkout", + "481c5040.adpll.clkoutx2", + "481c5040.adpll.clkouthif"; + }; + + adpll_dsp_ck: adpll@80 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0x80 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c5080.adpll.dcoclkldo", + "481c5080.adpll.clkout", + "481c5080.adpll.clkoutldo"; + }; + + adpll_sgx_ck: adpll@b0 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0xb0 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c50b0.adpll.dcoclkldo", + "481c50b0.adpll.clkout", + "481c50b0.adpll.clkoutldo"; + }; + + adpll_hdvic_ck: adpll@e0 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0xe0 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c50e0.adpll.dcoclkldo", + "481c50e0.adpll.clkout", + "481c50e0.adpll.clkoutldo"; + }; + + adpll_l3_ck: adpll@110 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0x110 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c5110.adpll.dcoclkldo", + "481c5110.adpll.clkout", + "481c5110.adpll.clkoutldo"; + }; + + adpll_isp_ck: adpll@140 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0x140 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c5140.adpll.dcoclkldo", + "481c5140.adpll.clkout", + "481c5140.adpll.clkoutldo"; + }; + + adpll_dss_ck: adpll@170 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0x170 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c5170.adpll.dcoclkldo", + "481c5170.adpll.clkout", + "481c5170.adpll.clkoutldo"; + }; + + adpll_video0_ck: adpll@1a0 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0x1a0 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c51a0.adpll.dcoclkldo", + "481c51a0.adpll.clkout", + "481c51a0.adpll.clkoutldo"; + }; + + adpll_video1_ck: adpll@1d0 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0x1d0 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c51d0.adpll.dcoclkldo", + "481c51d0.adpll.clkout", + "481c51d0.adpll.clkoutldo"; + }; + + adpll_hdmi_ck: adpll@200 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0x200 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c5200.adpll.dcoclkldo", + "481c5200.adpll.clkout", + "481c5200.adpll.clkoutldo"; + }; + + adpll_audio_ck: adpll@230 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0x230 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c5230.adpll.dcoclkldo", + "481c5230.adpll.clkout", + "481c5230.adpll.clkoutldo"; + }; + + adpll_usb_ck: adpll@260 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0x260 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c5260.adpll.dcoclkldo", + "481c5260.adpll.clkout", + "481c5260.adpll.clkoutldo"; + }; + + adpll_ddr_ck: adpll@290 { + #clock-cells = <1>; + compatible = "ti,dm814-adpll-lj-clock"; + reg = <0x290 0x30>; + clocks = <&devosc_ck &devosc_ck>; + clock-names = "clkinp", "clkinpulow"; + clock-output-names = "481c5290.adpll.dcoclkldo", + "481c5290.adpll.clkout", + "481c5290.adpll.clkoutldo"; + }; +}; + &pllss_clocks { timer1_fck: timer1_fck { #clock-cells = <0>; @@ -23,6 +174,24 @@ reg = <0x2e0>; }; + /* CPTS_RFT_CLK in RMII_REFCLK_SRC, usually sourced from auiod */ + cpsw_cpts_rft_clk: cpsw_cpts_rft_clk { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&adpll_video0_ck 1 + &adpll_video1_ck 1 + &adpll_audio_ck 1>; + ti,bit-shift = <1>; + reg = <0x2e8>; + }; + + /* REVISIT: Set up with a proper mux using RMII_REFCLK_SRC */ + cpsw_125mhz_gclk: cpsw_125mhz_gclk { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <125000000>; + }; + sysclk18_ck: sysclk18_ck { #clock-cells = <0>; compatible = "ti,mux-clock"; @@ -79,37 +248,6 @@ compatible = "fixed-clock"; clock-frequency = <1000000000>; }; - - sysclk4_ck: sysclk4_ck { - #clock-cells = <0>; - compatible = "fixed-clock"; - clock-frequency = <222000000>; - }; - - sysclk6_ck: sysclk6_ck { - #clock-cells = <0>; - compatible = "fixed-clock"; - clock-frequency = <100000000>; - }; - - sysclk10_ck: sysclk10_ck { - #clock-cells = <0>; - compatible = "fixed-clock"; - clock-frequency = <48000000>; - }; - - cpsw_125mhz_gclk: cpsw_125mhz_gclk { - #clock-cells = <0>; - compatible = "fixed-clock"; - clock-frequency = <125000000>; - }; - - cpsw_cpts_rft_clk: cpsw_cpts_rft_clk { - #clock-cells = <0>; - compatible = "fixed-clock"; - clock-frequency = <250000000>; - }; - }; &prcm_clocks { @@ -138,6 +276,49 @@ clock-div = <78125>; }; + /* L4_HS 220 MHz*/ + sysclk4_ck: sysclk4_ck { + #clock-cells = <0>; + compatible = "ti,fixed-factor-clock"; + clocks = <&adpll_l3_ck 1>; + ti,clock-mult = <1>; + ti,clock-div = <1>; + }; + + /* L4_FWCFG */ + sysclk5_ck: sysclk5_ck { + #clock-cells = <0>; + compatible = "ti,fixed-factor-clock"; + clocks = <&adpll_l3_ck 1>; + ti,clock-mult = <1>; + ti,clock-div = <2>; + }; + + /* L4_LS 110 MHz */ + sysclk6_ck: sysclk6_ck { + #clock-cells = <0>; + compatible = "ti,fixed-factor-clock"; + clocks = <&adpll_l3_ck 1>; + ti,clock-mult = <1>; + ti,clock-div = <2>; + }; + + sysclk8_ck: sysclk8_ck { + #clock-cells = <0>; + compatible = "ti,fixed-factor-clock"; + clocks = <&adpll_usb_ck 1>; + ti,clock-mult = <1>; + ti,clock-div = <1>; + }; + + sysclk10_ck: sysclk10_ck { + compatible = "ti,divider-clock"; + reg = <0x324>; + ti,max-div = <7>; + #clock-cells = <0>; + clocks = <&adpll_usb_ck 1>; + }; + aud_clkin0_ck: aud_clkin0_ck { #clock-cells = <0>; compatible = "fixed-clock"; diff --git a/arch/arm/boot/dts/dra62x-clocks.dtsi b/arch/arm/boot/dts/dra62x-clocks.dtsi index 6f98dc8df9dd..0e49741747ef 100644 --- a/arch/arm/boot/dts/dra62x-clocks.dtsi +++ b/arch/arm/boot/dts/dra62x-clocks.dtsi @@ -6,6 +6,32 @@ #include "dm814x-clocks.dtsi" +/* Compared to dm814x, dra62x does not have hdic, l3 or dss PLLs */ +&adpll_hdvic_ck { + status = "disabled"; +}; + +&adpll_l3_ck { + status = "disabled"; +}; + +&adpll_dss_ck { + status = "disabled"; +}; + +/* Compared to dm814x, dra62x has interconnect clocks on isp PLL */ +&sysclk4_ck { + clocks = <&adpll_isp_ck 1>; +}; + +&sysclk5_ck { + clocks = <&adpll_isp_ck 1>; +}; + +&sysclk6_ck { + clocks = <&adpll_isp_ck 1>; +}; + /* * Compared to dm814x, dra62x has different shifts and more mux options. * Please add the extra options for ysclk_14 and 16 if really needed. diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index d0bae06b7eb7..ef2164a99d0f 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -98,12 +98,20 @@ clock-frequency = <32768>; }; - sys_32k_ck: sys_32k_ck { + sys_clk32_crystal_ck: sys_clk32_crystal_ck { #clock-cells = <0>; compatible = "fixed-clock"; clock-frequency = <32768>; }; + sys_clk32_pseudo_ck: sys_clk32_pseudo_ck { + #clock-cells = <0>; + compatible = "fixed-factor-clock"; + clocks = <&sys_clkin1>; + clock-mult = <1>; + clock-div = <610>; + }; + virt_12000000_ck: virt_12000000_ck { #clock-cells = <0>; compatible = "fixed-clock"; @@ -2170,4 +2178,12 @@ ti,bit-shift = <22>; reg = <0x0558>; }; + + sys_32k_ck: sys_32k_ck { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clk32_crystal_ck>, <&sys_clk32_pseudo_ck>, <&sys_clk32_pseudo_ck>, <&sys_clk32_pseudo_ck>; + ti,bit-shift = <8>; + reg = <0x6c4>; + }; }; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index a2ddcb8c545a..45619f6162c5 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -91,8 +91,8 @@ clock-frequency = <141666666>; }; - pinctrl: pinctrl@c1109880 { - compatible = "amlogic,meson8-pinctrl"; + pinctrl_cbus: pinctrl@c1109880 { + compatible = "amlogic,meson8-cbus-pinctrl"; reg = <0xc1109880 0x10>; #address-cells = <1>; #size-cells = <1>; @@ -108,29 +108,6 @@ #gpio-cells = <2>; }; - gpio_ao: ao-bank@c1108030 { - reg = <0xc8100014 0x4>, - <0xc810002c 0x4>, - <0xc8100024 0x8>; - reg-names = "mux", "pull", "gpio"; - gpio-controller; - #gpio-cells = <2>; - }; - - uart_ao_a_pins: uart_ao_a { - mux { - groups = "uart_tx_ao_a", "uart_rx_ao_a"; - function = "uart_ao"; - }; - }; - - i2c_ao_pins: i2c_mst_ao { - mux { - groups = "i2c_mst_sck_ao", "i2c_mst_sda_ao"; - function = "i2c_mst_ao"; - }; - }; - spi_nor_pins: nor { mux { groups = "nor_d", "nor_q", "nor_c", "nor_cs"; @@ -157,4 +134,34 @@ }; }; + pinctrl_aobus: pinctrl@c8100084 { + compatible = "amlogic,meson8-aobus-pinctrl"; + reg = <0xc8100084 0xc>; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + gpio_ao: ao-bank@c1108030 { + reg = <0xc8100014 0x4>, + <0xc810002c 0x4>, + <0xc8100024 0x8>; + reg-names = "mux", "pull", "gpio"; + gpio-controller; + #gpio-cells = <2>; + }; + + uart_ao_a_pins: uart_ao_a { + mux { + groups = "uart_tx_ao_a", "uart_rx_ao_a"; + function = "uart_ao"; + }; + }; + + i2c_ao_pins: i2c_mst_ao { + mux { + groups = "i2c_mst_sck_ao", "i2c_mst_sda_ao"; + function = "i2c_mst_ao"; + }; + }; + }; }; /* end of / */ diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index 8bad5571af46..2bfe401a4da9 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -155,8 +155,8 @@ reg = <0xc1108000 0x4>, <0xc1104000 0x460>; }; - pinctrl: pinctrl@c1109880 { - compatible = "amlogic,meson8b-pinctrl"; + pinctrl_cbus: pinctrl@c1109880 { + compatible = "amlogic,meson8b-cbus-pinctrl"; reg = <0xc1109880 0x10>; #address-cells = <1>; #size-cells = <1>; @@ -171,6 +171,14 @@ gpio-controller; #gpio-cells = <2>; }; + }; + + pinctrl_aobus: pinctrl@c8100084 { + compatible = "amlogic,meson8b-aobus-pinctrl"; + reg = <0xc8100084 0xc>; + #address-cells = <1>; + #size-cells = <1>; + ranges; gpio_ao: ao-bank@c1108030 { reg = <0xc8100014 0x4>, diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 2bd9c83300b2..421fe9f8a9eb 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -70,7 +70,7 @@ compatible = "arm,cortex-a9-twd-timer"; clocks = <&mpu_periphclk>; reg = <0x48240600 0x20>; - interrupts = <GIC_PPI 13 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_HIGH)>; + interrupts = <GIC_PPI 13 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_EDGE_RISING)>; interrupt-parent = <&gic>; }; diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index ef5330578431..8193139d0d87 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -1,6 +1,6 @@ /dts-v1/; -#include <dt-bindings/interrupt-controller/arm-gic.h> +#include <dt-bindings/interrupt-controller/irq.h> #include <dt-bindings/clock/qcom,gcc-msm8974.h> #include "skeleton.dtsi" @@ -460,8 +460,6 @@ clock-names = "core", "iface"; #address-cells = <1>; #size-cells = <0>; - dmas = <&blsp2_dma 20>, <&blsp2_dma 21>; - dma-names = "tx", "rx"; }; spmi_bus: spmi@fc4cf000 { @@ -479,16 +477,6 @@ interrupt-controller; #interrupt-cells = <4>; }; - - blsp2_dma: dma-controller@f9944000 { - compatible = "qcom,bam-v1.4.0"; - reg = <0xf9944000 0x19000>; - interrupts = <GIC_SPI 239 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&gcc GCC_BLSP2_AHB_CLK>; - clock-names = "bam_clk"; - #dma-cells = <1>; - qcom,ee = <0>; - }; }; smd { diff --git a/arch/arm/boot/dts/r8a7791-koelsch.dts b/arch/arm/boot/dts/r8a7791-koelsch.dts index 0ad71b81d3a2..cc6e28f81fe4 100644 --- a/arch/arm/boot/dts/r8a7791-koelsch.dts +++ b/arch/arm/boot/dts/r8a7791-koelsch.dts @@ -661,6 +661,7 @@ }; &pcie_bus_clk { + clock-frequency = <100000000>; status = "okay"; }; diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts index 6c08314427d6..a9285d9a57cd 100644 --- a/arch/arm/boot/dts/r8a7791-porter.dts +++ b/arch/arm/boot/dts/r8a7791-porter.dts @@ -143,19 +143,11 @@ }; &pfc { - pinctrl-0 = <&scif_clk_pins>; - pinctrl-names = "default"; - scif0_pins: serial0 { renesas,groups = "scif0_data_d"; renesas,function = "scif0"; }; - scif_clk_pins: scif_clk { - renesas,groups = "scif_clk"; - renesas,function = "scif_clk"; - }; - ether_pins: ether { renesas,groups = "eth_link", "eth_mdio", "eth_rmii"; renesas,function = "eth"; @@ -229,11 +221,6 @@ status = "okay"; }; -&scif_clk { - clock-frequency = <14745600>; - status = "okay"; -}; - ðer { pinctrl-0 = <ðer_pins &phy1_pins>; pinctrl-names = "default"; @@ -414,6 +401,7 @@ }; &pcie_bus_clk { + clock-frequency = <100000000>; status = "okay"; }; diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi index 6439f0569fe2..1cd1b6a3a72a 100644 --- a/arch/arm/boot/dts/r8a7791.dtsi +++ b/arch/arm/boot/dts/r8a7791.dtsi @@ -1083,9 +1083,8 @@ pcie_bus_clk: pcie_bus_clk { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <100000000>; + clock-frequency = <0>; clock-output-names = "pcie_bus"; - status = "disabled"; }; /* External SCIF clock */ @@ -1094,7 +1093,6 @@ #clock-cells = <0>; /* This value must be overridden by the board. */ clock-frequency = <0>; - status = "disabled"; }; /* External USB clock - can be overridden by the board */ @@ -1112,7 +1110,6 @@ /* This value must be overridden by the board. */ clock-frequency = <0>; clock-output-names = "can_clk"; - status = "disabled"; }; /* Special CPG clocks */ diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index 07055eacbb0f..a691d590fbd1 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -63,6 +63,9 @@ CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_BU21013=y CONFIG_INPUT_MISC=y CONFIG_INPUT_AB8500_PONKEY=y +CONFIG_RMI4_CORE=y +CONFIG_RMI4_I2C=y +CONFIG_RMI4_F11=y # CONFIG_SERIO is not set CONFIG_VT_HW_CONSOLE_BINDING=y # CONFIG_LEGACY_PTYS is not set diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index b23c6c81c9ad..1ee94c716a7f 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -276,7 +276,7 @@ static inline int __attribute_const__ cpuid_feature_extract_field(u32 features, int feature = (features >> field) & 15; /* feature registers are signed values */ - if (feature > 8) + if (feature > 7) feature -= 16; return feature; diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 7b84657fba35..194b69923389 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -19,7 +19,7 @@ * This may need to be greater than __NR_last_syscall+1 in order to * account for the padding in the syscall table */ -#define __NR_syscalls (392) +#define __NR_syscalls (396) #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_GETHOSTNAME diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index 5dd2528e9e45..2cb9dc770e1d 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -418,6 +418,8 @@ #define __NR_membarrier (__NR_SYSCALL_BASE+389) #define __NR_mlock2 (__NR_SYSCALL_BASE+390) #define __NR_copy_file_range (__NR_SYSCALL_BASE+391) +#define __NR_preadv2 (__NR_SYSCALL_BASE+392) +#define __NR_pwritev2 (__NR_SYSCALL_BASE+393) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index dfc7cd6851ad..703fa0f3cd8f 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -399,8 +399,10 @@ CALL(sys_execveat) CALL(sys_userfaultfd) CALL(sys_membarrier) - CALL(sys_mlock2) +/* 390 */ CALL(sys_mlock2) CALL(sys_copy_file_range) + CALL(sys_preadv2) + CALL(sys_pwritev2) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 139791ed473d..2c4bea39cf22 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -430,11 +430,13 @@ static void __init patch_aeabi_idiv(void) pr_info("CPU: div instructions available: patching division code\n"); fn_addr = ((uintptr_t)&__aeabi_uidiv) & ~1; + asm ("" : "+g" (fn_addr)); ((u32 *)fn_addr)[0] = udiv_instruction(); ((u32 *)fn_addr)[1] = bx_lr_instruction(); flush_icache_range(fn_addr, fn_addr + 8); fn_addr = ((uintptr_t)&__aeabi_idiv) & ~1; + asm ("" : "+g" (fn_addr)); ((u32 *)fn_addr)[0] = sdiv_instruction(); ((u32 *)fn_addr)[1] = bx_lr_instruction(); flush_icache_range(fn_addr, fn_addr + 8); @@ -510,7 +512,7 @@ static void __init elf_hwcap_fixup(void) */ if (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) > 1 || (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) == 1 && - cpuid_feature_extract(CPUID_EXT_ISAR3, 20) >= 3)) + cpuid_feature_extract(CPUID_EXT_ISAR4, 20) >= 3)) elf_hwcap &= ~HWCAP_SWP; } diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index b5384311dec4..dded1b763c16 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1112,10 +1112,17 @@ static void __init hyp_cpu_pm_init(void) { cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } +static void __init hyp_cpu_pm_exit(void) +{ + cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); +} #else static inline void hyp_cpu_pm_init(void) { } +static inline void hyp_cpu_pm_exit(void) +{ +} #endif static void teardown_common_resources(void) @@ -1141,9 +1148,7 @@ static int init_subsystems(void) /* * Register CPU Hotplug notifier */ - cpu_notifier_register_begin(); - err = __register_cpu_notifier(&hyp_init_cpu_nb); - cpu_notifier_register_done(); + err = register_cpu_notifier(&hyp_init_cpu_nb); if (err) { kvm_err("Cannot register KVM init CPU notifier (%d)\n", err); return err; @@ -1193,6 +1198,8 @@ static void teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); + unregister_cpu_notifier(&hyp_init_cpu_nb); + hyp_cpu_pm_exit(); } static int init_vhe_mode(void) diff --git a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c index a5edd7d60266..3d039ef021e0 100644 --- a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c +++ b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c @@ -71,6 +71,7 @@ struct platform_device *__init imx_add_sdhci_esdhc_imx( if (!pdata) pdata = &default_esdhc_pdata; - return imx_add_platform_device(data->devid, data->id, res, - ARRAY_SIZE(res), pdata, sizeof(*pdata)); + return imx_add_platform_device_dmamask(data->devid, data->id, res, + ARRAY_SIZE(res), pdata, sizeof(*pdata), + DMA_BIT_MASK(32)); } diff --git a/arch/arm/mach-omap2/clockdomains7xx_data.c b/arch/arm/mach-omap2/clockdomains7xx_data.c index 7581e036bda6..ef9ed36e8a61 100644 --- a/arch/arm/mach-omap2/clockdomains7xx_data.c +++ b/arch/arm/mach-omap2/clockdomains7xx_data.c @@ -461,7 +461,7 @@ static struct clockdomain ipu_7xx_clkdm = { .cm_inst = DRA7XX_CM_CORE_AON_IPU_INST, .clkdm_offs = DRA7XX_CM_CORE_AON_IPU_IPU_CDOFFS, .dep_bit = DRA7XX_IPU_STATDEP_SHIFT, - .flags = CLKDM_CAN_HWSUP_SWSUP, + .flags = CLKDM_CAN_SWSUP, }; static struct clockdomain mpu1_7xx_clkdm = { diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index d85c24918c17..2abd53ae3e7a 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -669,9 +669,9 @@ void __init dra7xxx_check_revision(void) case 0: omap_revision = DRA722_REV_ES1_0; break; + case 1: default: - /* If we have no new revisions */ - omap_revision = DRA722_REV_ES1_0; + omap_revision = DRA722_REV_ES2_0; break; } break; diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 3c87e40650cf..49de4dd227be 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -368,6 +368,7 @@ void __init omap5_map_io(void) void __init dra7xx_map_io(void) { iotable_init(dra7xx_io_desc, ARRAY_SIZE(dra7xx_io_desc)); + omap_barriers_init(); } #endif /* @@ -736,7 +737,8 @@ void __init omap5_init_late(void) #ifdef CONFIG_SOC_DRA7XX void __init dra7xx_init_early(void) { - omap2_set_globals_tap(-1, OMAP2_L4_IO_ADDRESS(DRA7XX_TAP_BASE)); + omap2_set_globals_tap(DRA7XX_CLASS, + OMAP2_L4_IO_ADDRESS(DRA7XX_TAP_BASE)); omap2_set_globals_prcm_mpu(OMAP2_L4_IO_ADDRESS(OMAP54XX_PRCM_MPU_BASE)); omap2_control_base_init(); omap4_pm_init_early(); diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index f397bd6bd6e3..2c04f2741476 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -274,6 +274,10 @@ static inline void omap5_irq_save_context(void) */ static void irq_save_context(void) { + /* DRA7 has no SAR to save */ + if (soc_is_dra7xx()) + return; + if (!sar_base) sar_base = omap4_get_sar_ram_base(); @@ -290,6 +294,9 @@ static void irq_sar_clear(void) { u32 val; u32 offset = SAR_BACKUP_STATUS_OFFSET; + /* DRA7 has no SAR to save */ + if (soc_is_dra7xx()) + return; if (soc_is_omap54xx()) offset = OMAP5_SAR_BACKUP_STATUS_OFFSET; diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index b6d62e4cdfdd..2af6ff63e3b4 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -1416,9 +1416,7 @@ static void _enable_sysc(struct omap_hwmod *oh) (sf & SYSC_HAS_CLOCKACTIVITY)) _set_clockactivity(oh, oh->class->sysc->clockact, &v); - /* If the cached value is the same as the new value, skip the write */ - if (oh->_sysc_cache != v) - _write_sysconfig(v, oh); + _write_sysconfig(v, oh); /* * Set the autoidle bit only after setting the smartidle bit @@ -1481,7 +1479,9 @@ static void _idle_sysc(struct omap_hwmod *oh) _set_master_standbymode(oh, idlemode, &v); } - _write_sysconfig(v, oh); + /* If the cached value is the same as the new value, skip the write */ + if (oh->_sysc_cache != v) + _write_sysconfig(v, oh); } /** diff --git a/arch/arm/mach-omap2/omap_hwmod_81xx_data.c b/arch/arm/mach-omap2/omap_hwmod_81xx_data.c index 39736ad2a754..df8327713d06 100644 --- a/arch/arm/mach-omap2/omap_hwmod_81xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_81xx_data.c @@ -582,9 +582,11 @@ static struct omap_hwmod_ocp_if dm81xx_alwon_l3_slow__gpmc = { .user = OCP_USER_MPU, }; +/* USB needs udelay 1 after reset at least on hp t410, use 2 for margin */ static struct omap_hwmod_class_sysconfig dm81xx_usbhsotg_sysc = { .rev_offs = 0x0, .sysc_offs = 0x10, + .srst_udelay = 2, .sysc_flags = SYSC_HAS_SIDLEMODE | SYSC_HAS_MIDLEMODE | SYSC_HAS_SOFTRESET, .idlemodes = SIDLE_SMART | MSTANDBY_FORCE | MSTANDBY_SMART, diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 2dbd3785ee6f..d44e0e2f1106 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -198,7 +198,6 @@ void omap_sram_idle(void) int per_next_state = PWRDM_POWER_ON; int core_next_state = PWRDM_POWER_ON; int per_going_off; - int core_prev_state; u32 sdrc_pwr = 0; mpu_next_state = pwrdm_read_next_pwrst(mpu_pwrdm); @@ -278,16 +277,20 @@ void omap_sram_idle(void) sdrc_write_reg(sdrc_pwr, SDRC_POWER); /* CORE */ - if (core_next_state < PWRDM_POWER_ON) { - core_prev_state = pwrdm_read_prev_pwrst(core_pwrdm); - if (core_prev_state == PWRDM_POWER_OFF) { - omap3_core_restore_context(); - omap3_cm_restore_context(); - omap3_sram_restore_context(); - omap2_sms_restore_context(); - } + if (core_next_state < PWRDM_POWER_ON && + pwrdm_read_prev_pwrst(core_pwrdm) == PWRDM_POWER_OFF) { + omap3_core_restore_context(); + omap3_cm_restore_context(); + omap3_sram_restore_context(); + omap2_sms_restore_context(); + } else { + /* + * In off-mode resume path above, omap3_core_restore_context + * also handles the INTC autoidle restore done here so limit + * this to non-off mode resume paths so we don't do it twice. + */ + omap3_intc_resume_idle(); } - omap3_intc_resume_idle(); pwrdm_post_transition(NULL); diff --git a/arch/arm/mach-omap2/soc.h b/arch/arm/mach-omap2/soc.h index 70df8f6cddcc..364418c78bf3 100644 --- a/arch/arm/mach-omap2/soc.h +++ b/arch/arm/mach-omap2/soc.h @@ -489,6 +489,7 @@ IS_OMAP_TYPE(3430, 0x3430) #define DRA752_REV_ES2_0 (DRA7XX_CLASS | (0x52 << 16) | (0x20 << 8)) #define DRA722_REV_ES1_0 (DRA7XX_CLASS | (0x22 << 16) | (0x10 << 8)) #define DRA722_REV_ES1_0 (DRA7XX_CLASS | (0x22 << 16) | (0x10 << 8)) +#define DRA722_REV_ES2_0 (DRA7XX_CLASS | (0x22 << 16) | (0x20 << 8)) void omap2xxx_check_revision(void); void omap3xxx_check_revision(void); diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index 913a319c7b00..fffb697bbf0e 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -1235,5 +1235,6 @@ static struct platform_device pxa2xx_pxa_dma = { void __init pxa2xx_set_dmac_info(int nb_channels, int nb_requestors) { pxa_dma_pdata.dma_channels = nb_channels; + pxa_dma_pdata.nb_requestors = nb_requestors; pxa_register_device(&pxa2xx_pxa_dma, &pxa_dma_pdata); } diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig index c6f6ed1cbed0..36e3c79f4973 100644 --- a/arch/arm/mach-sa1100/Kconfig +++ b/arch/arm/mach-sa1100/Kconfig @@ -61,10 +61,7 @@ config SA1100_H3100 select MFD_IPAQ_MICRO help Say Y here if you intend to run this kernel on the Compaq iPAQ - H3100 handheld computer. Information about this machine and the - Linux port to this machine can be found at: - - <http://www.handhelds.org/Compaq/index.html#iPAQ_H3100> + H3100 handheld computer. config SA1100_H3600 bool "Compaq iPAQ H3600/H3700" @@ -73,10 +70,7 @@ config SA1100_H3600 select MFD_IPAQ_MICRO help Say Y here if you intend to run this kernel on the Compaq iPAQ - H3600 handheld computer. Information about this machine and the - Linux port to this machine can be found at: - - <http://www.handhelds.org/Compaq/index.html#iPAQ_H3600> + H3600 and H3700 handheld computers. config SA1100_BADGE4 bool "HP Labs BadgePAD 4" diff --git a/arch/arm/mach-shmobile/timer.c b/arch/arm/mach-shmobile/timer.c index ad008e4b0c49..67d79f9c6bad 100644 --- a/arch/arm/mach-shmobile/timer.c +++ b/arch/arm/mach-shmobile/timer.c @@ -40,8 +40,7 @@ static void __init shmobile_setup_delay_hz(unsigned int max_cpu_core_hz, void __init shmobile_init_delay(void) { struct device_node *np, *cpus; - bool is_a7_a8_a9 = false; - bool is_a15 = false; + unsigned int div = 0; bool has_arch_timer = false; u32 max_freq = 0; @@ -55,27 +54,22 @@ void __init shmobile_init_delay(void) if (!of_property_read_u32(np, "clock-frequency", &freq)) max_freq = max(max_freq, freq); - if (of_device_is_compatible(np, "arm,cortex-a8") || - of_device_is_compatible(np, "arm,cortex-a9")) { - is_a7_a8_a9 = true; - } else if (of_device_is_compatible(np, "arm,cortex-a7")) { - is_a7_a8_a9 = true; - has_arch_timer = true; - } else if (of_device_is_compatible(np, "arm,cortex-a15")) { - is_a15 = true; + if (of_device_is_compatible(np, "arm,cortex-a8")) { + div = 2; + } else if (of_device_is_compatible(np, "arm,cortex-a9")) { + div = 1; + } else if (of_device_is_compatible(np, "arm,cortex-a7") || + of_device_is_compatible(np, "arm,cortex-a15")) { + div = 1; has_arch_timer = true; } } of_node_put(cpus); - if (!max_freq) + if (!max_freq || !div) return; - if (!has_arch_timer || !IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) { - if (is_a7_a8_a9) - shmobile_setup_delay_hz(max_freq, 1, 3); - else if (is_a15) - shmobile_setup_delay_hz(max_freq, 2, 4); - } + if (!has_arch_timer || !IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) + shmobile_setup_delay_hz(max_freq, 1, div); } diff --git a/arch/arm/mach-uniphier/platsmp.c b/arch/arm/mach-uniphier/platsmp.c index 69141357afe8..db04142f88bc 100644 --- a/arch/arm/mach-uniphier/platsmp.c +++ b/arch/arm/mach-uniphier/platsmp.c @@ -120,7 +120,7 @@ static int __init uniphier_smp_prepare_trampoline(unsigned int max_cpus) if (ret) return ret; - uniphier_smp_rom_boot_rsv2 = ioremap(rom_rsv2_phys, sizeof(SZ_4)); + uniphier_smp_rom_boot_rsv2 = ioremap(rom_rsv2_phys, SZ_4); if (!uniphier_smp_rom_boot_rsv2) { pr_err("failed to map ROM_BOOT_RSV2 register\n"); return -ENOMEM; diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index deac58d5f1f7..c941e93048ad 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -762,7 +762,8 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (!mask) return NULL; - buf = kzalloc(sizeof(*buf), gfp); + buf = kzalloc(sizeof(*buf), + gfp & ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM)); if (!buf) return NULL; diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 0f8963a7e7d9..6fcaac8e200f 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -281,12 +281,12 @@ __v7_ca17mp_setup: bl v7_invalidate_l1 ldmia r12, {r1-r6, lr} #ifdef CONFIG_SMP + orr r10, r10, #(1 << 6) @ Enable SMP/nAMP mode ALT_SMP(mrc p15, 0, r0, c1, c0, 1) - ALT_UP(mov r0, #(1 << 6)) @ fake it for UP - tst r0, #(1 << 6) @ SMP/nAMP mode enabled? - orreq r0, r0, #(1 << 6) @ Enable SMP/nAMP mode - orreq r0, r0, r10 @ Enable CPU-specific SMP bits - mcreq p15, 0, r0, c1, c0, 1 + ALT_UP(mov r0, r10) @ fake it for UP + orr r10, r10, r0 @ Set required bits + teq r10, r0 @ Were they already set? + mcrne p15, 0, r10, c1, c0, 1 @ No, update register #endif b __v7_setup_cont diff --git a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi index a055a5d443b7..ba0487751524 100644 --- a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi +++ b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi @@ -653,6 +653,7 @@ <0 113 4>, <0 114 4>, <0 115 4>; + channel = <12>; port-id = <1>; dma-coherent; clocks = <&xge1clk 0>; diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index ae4a173df493..5147d7698924 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -993,6 +993,7 @@ <0x0 0x65 0x4>, <0x0 0x66 0x4>, <0x0 0x67 0x4>; + channel = <0>; dma-coherent; clocks = <&xge0clk 0>; /* mac address will be overwritten by the bootloader */ diff --git a/arch/arm64/boot/dts/broadcom/vulcan.dtsi b/arch/arm64/boot/dts/broadcom/vulcan.dtsi index c49b5a85809c..85820e2bca9d 100644 --- a/arch/arm64/boot/dts/broadcom/vulcan.dtsi +++ b/arch/arm64/boot/dts/broadcom/vulcan.dtsi @@ -108,12 +108,15 @@ reg = <0x0 0x30000000 0x0 0x10000000>; reg-names = "PCI ECAM"; - /* IO 0x4000_0000 - 0x4001_0000 */ - ranges = <0x01000000 0 0x40000000 0 0x40000000 0 0x00010000 - /* MEM 0x4800_0000 - 0x5000_0000 */ - 0x02000000 0 0x48000000 0 0x48000000 0 0x08000000 - /* MEM64 pref 0x6_0000_0000 - 0x7_0000_0000 */ - 0x43000000 6 0x00000000 6 0x00000000 1 0x00000000>; + /* + * PCI ranges: + * IO no supported + * MEM 0x4000_0000 - 0x6000_0000 + * MEM64 pref 0x40_0000_0000 - 0x60_0000_0000 + */ + ranges = + <0x02000000 0 0x40000000 0 0x40000000 0 0x20000000 + 0x43000000 0x40 0x00000000 0x40 0x00000000 0x20 0x00000000>; interrupt-map-mask = <0 0 0 7>; interrupt-map = /* addr pin ic icaddr icintr */ diff --git a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi index 933cba359918..b6a130c2e5a4 100644 --- a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi @@ -24,17 +24,19 @@ soc0: soc@000000000 { }; dsaf0: dsa@c7000000 { + #address-cells = <1>; + #size-cells = <0>; compatible = "hisilicon,hns-dsaf-v1"; mode = "6port-16rss"; interrupt-parent = <&mbigen_dsa>; - reg = <0x0 0xC0000000 0x0 0x420000 - 0x0 0xC2000000 0x0 0x300000 - 0x0 0xc5000000 0x0 0x890000 + reg = <0x0 0xc5000000 0x0 0x890000 0x0 0xc7000000 0x0 0x60000 >; - phy-handle = <0 0 0 0 &soc0_phy0 &soc0_phy1 0 0>; + reg-names = "ppe-base","dsaf-base"; + subctrl-syscon = <&dsaf_subctrl>; + reset-field-offset = <0>; interrupts = < /* [14] ge fifo err 8 / xge 6**/ 149 0x4 150 0x4 151 0x4 152 0x4 @@ -122,12 +124,31 @@ soc0: soc@000000000 { buf-size = <4096>; desc-num = <1024>; dma-coherent; + + port@0 { + reg = <0>; + serdes-syscon = <&serdes_ctrl0>; + }; + port@1 { + reg = <1>; + serdes-syscon = <&serdes_ctrl0>; + }; + port@4 { + reg = <4>; + phy-handle = <&soc0_phy0>; + serdes-syscon = <&serdes_ctrl1>; + }; + port@5 { + reg = <5>; + phy-handle = <&soc0_phy1>; + serdes-syscon = <&serdes_ctrl1>; + }; }; eth0: ethernet@0{ compatible = "hisilicon,hns-nic-v1"; ae-handle = <&dsaf0>; - port-id = <0>; + port-idx-in-ae = <0>; local-mac-address = [00 00 00 01 00 58]; status = "disabled"; dma-coherent; @@ -135,56 +156,25 @@ soc0: soc@000000000 { eth1: ethernet@1{ compatible = "hisilicon,hns-nic-v1"; ae-handle = <&dsaf0>; - port-id = <1>; + port-idx-in-ae = <1>; + local-mac-address = [00 00 00 01 00 59]; status = "disabled"; dma-coherent; }; - eth2: ethernet@2{ + eth2: ethernet@4{ compatible = "hisilicon,hns-nic-v1"; ae-handle = <&dsaf0>; - port-id = <2>; + port-idx-in-ae = <4>; local-mac-address = [00 00 00 01 00 5a]; status = "disabled"; dma-coherent; }; - eth3: ethernet@3{ + eth3: ethernet@5{ compatible = "hisilicon,hns-nic-v1"; ae-handle = <&dsaf0>; - port-id = <3>; + port-idx-in-ae = <5>; local-mac-address = [00 00 00 01 00 5b]; status = "disabled"; dma-coherent; }; - eth4: ethernet@4{ - compatible = "hisilicon,hns-nic-v1"; - ae-handle = <&dsaf0>; - port-id = <4>; - local-mac-address = [00 00 00 01 00 5c]; - status = "disabled"; - dma-coherent; - }; - eth5: ethernet@5{ - compatible = "hisilicon,hns-nic-v1"; - ae-handle = <&dsaf0>; - port-id = <5>; - local-mac-address = [00 00 00 01 00 5d]; - status = "disabled"; - dma-coherent; - }; - eth6: ethernet@6{ - compatible = "hisilicon,hns-nic-v1"; - ae-handle = <&dsaf0>; - port-id = <6>; - local-mac-address = [00 00 00 01 00 5e]; - status = "disabled"; - dma-coherent; - }; - eth7: ethernet@7{ - compatible = "hisilicon,hns-nic-v1"; - ae-handle = <&dsaf0>; - port-id = <7>; - local-mac-address = [00 00 00 01 00 5f]; - status = "disabled"; - dma-coherent; - }; }; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20-ref.dts index 727ae5f8c4e7..b0ed44313a5b 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20-ref.dts @@ -70,7 +70,6 @@ i2c3 = &i2c3; i2c4 = &i2c4; i2c5 = &i2c5; - i2c6 = &i2c6; }; }; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi index e682a3f52791..651c9d9d2d54 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi @@ -201,15 +201,12 @@ i2c2: i2c@58782000 { compatible = "socionext,uniphier-fi2c"; - status = "disabled"; reg = <0x58782000 0x80>; #address-cells = <1>; #size-cells = <0>; interrupts = <0 43 4>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_i2c2>; clocks = <&i2c_clk>; - clock-frequency = <100000>; + clock-frequency = <400000>; }; i2c3: i2c@58783000 { @@ -227,12 +224,15 @@ i2c4: i2c@58784000 { compatible = "socionext,uniphier-fi2c"; + status = "disabled"; reg = <0x58784000 0x80>; #address-cells = <1>; #size-cells = <0>; interrupts = <0 45 4>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c4>; clocks = <&i2c_clk>; - clock-frequency = <400000>; + clock-frequency = <100000>; }; i2c5: i2c@58785000 { @@ -245,16 +245,6 @@ clock-frequency = <400000>; }; - i2c6: i2c@58786000 { - compatible = "socionext,uniphier-fi2c"; - reg = <0x58786000 0x80>; - #address-cells = <1>; - #size-cells = <0>; - interrupts = <0 26 4>; - clocks = <&i2c_clk>; - clock-frequency = <400000>; - }; - system_bus: system-bus@58c00000 { compatible = "socionext,uniphier-system-bus"; status = "disabled"; diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 4150fd8bae01..3f29887995bc 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -151,8 +151,7 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ - VTCR_EL2_RES1) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) #else /* @@ -163,8 +162,7 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ - VTCR_EL2_RES1) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) #endif diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index eb7490d232a0..40a0a24e6c98 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -54,7 +54,7 @@ extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); -extern void __init_stage2_translation(void); +extern u32 __init_stage2_translation(void); #endif diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b7e82a795ac9..f5c6bd2541ef 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -369,11 +369,12 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -/* #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) */ - static inline void __cpu_init_stage2(void) { - kvm_call_hyp(__init_stage2_translation); + u32 parange = kvm_call_hyp(__init_stage2_translation); + + WARN_ONCE(parange < 40, + "PARange is %d bits, unsupported configuration!", parange); } #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4203d5f257bc..85da0f599cd6 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -588,6 +588,15 @@ set_hcr: msr vpidr_el2, x0 msr vmpidr_el2, x1 + /* + * When VHE is not in use, early init of EL2 and EL1 needs to be + * done here. + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ + cbnz x2, 1f + /* sctlr_el1 */ mov x0, #0x0800 // Set/clear RES{1,0} bits CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems @@ -597,6 +606,7 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems /* Coprocessor traps. */ mov x0, #0x33ff msr cptr_el2, x0 // Disable copro. traps to EL2 +1: #ifdef CONFIG_COMPAT msr hstr_el2, xzr // Disable CP15 traps to EL2 @@ -734,7 +744,8 @@ ENDPROC(__secondary_switched) .macro update_early_cpu_boot_status status, tmp1, tmp2 mov \tmp2, #\status - str_l \tmp2, __early_cpu_boot_status, \tmp1 + adr_l \tmp1, __early_cpu_boot_status + str \tmp2, [\tmp1] dmb sy dc ivac, \tmp1 // Invalidate potentially stale cache line .endm diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index aef3605a8c47..18a71bcd26ee 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -52,6 +52,7 @@ static void write_pen_release(u64 val) static int smp_spin_table_cpu_init(unsigned int cpu) { struct device_node *dn; + int ret; dn = of_get_cpu_node(cpu, NULL); if (!dn) @@ -60,15 +61,15 @@ static int smp_spin_table_cpu_init(unsigned int cpu) /* * Determine the address from which the CPU is polling. */ - if (of_property_read_u64(dn, "cpu-release-addr", - &cpu_release_addr[cpu])) { + ret = of_property_read_u64(dn, "cpu-release-addr", + &cpu_release_addr[cpu]); + if (ret) pr_err("CPU %d: missing or invalid cpu-release-addr property\n", cpu); - return -1; - } + of_node_put(dn); - return 0; + return ret; } static int smp_spin_table_cpu_prepare(unsigned int cpu) diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index 5a9f3bf542b0..bcbe761a5a3d 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -20,9 +20,10 @@ #include <asm/kvm_asm.h> #include <asm/kvm_hyp.h> -void __hyp_text __init_stage2_translation(void) +u32 __hyp_text __init_stage2_translation(void) { u64 val = VTCR_EL2_FLAGS; + u64 parange; u64 tmp; /* @@ -30,7 +31,39 @@ void __hyp_text __init_stage2_translation(void) * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... */ - val |= (read_sysreg(id_aa64mmfr0_el1) & 7) << 16; + parange = read_sysreg(id_aa64mmfr0_el1) & 7; + val |= parange << 16; + + /* Compute the actual PARange... */ + switch (parange) { + case 0: + parange = 32; + break; + case 1: + parange = 36; + break; + case 2: + parange = 40; + break; + case 3: + parange = 42; + break; + case 4: + parange = 44; + break; + case 5: + default: + parange = 48; + break; + } + + /* + * ... and clamp it to 40 bits, unless we have some braindead + * HW that implements less than that. In all cases, we'll + * return that value for the rest of the kernel to decide what + * to do. + */ + val |= 64 - (parange > 40 ? 40 : parange); /* * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS @@ -42,4 +75,6 @@ void __hyp_text __init_stage2_translation(void) VTCR_EL2_VS_8BIT; write_sysreg(val, vtcr_el2); + + return parange; } diff --git a/arch/m68k/coldfire/gpio.c b/arch/m68k/coldfire/gpio.c index 8832083e1cb8..b515809be2b9 100644 --- a/arch/m68k/coldfire/gpio.c +++ b/arch/m68k/coldfire/gpio.c @@ -158,11 +158,6 @@ static int mcfgpio_to_irq(struct gpio_chip *chip, unsigned offset) return -EINVAL; } -static struct bus_type mcfgpio_subsys = { - .name = "gpio", - .dev_name = "gpio", -}; - static struct gpio_chip mcfgpio_chip = { .label = "mcfgpio", .request = mcfgpio_request, @@ -178,8 +173,7 @@ static struct gpio_chip mcfgpio_chip = { static int __init mcfgpio_sysinit(void) { - gpiochip_add_data(&mcfgpio_chip, NULL); - return subsys_system_register(&mcfgpio_subsys, NULL); + return gpiochip_add_data(&mcfgpio_chip, NULL); } core_initcall(mcfgpio_sysinit); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index d1fc4796025e..3ee6976f6088 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-amiga" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -64,7 +63,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -285,7 +283,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -359,6 +359,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -452,6 +453,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -468,6 +470,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -549,6 +552,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -557,7 +561,6 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -565,12 +568,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -594,7 +594,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 9bfe8be3658c..e96787ffcbce 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-apollo" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -62,7 +61,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -283,7 +281,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -341,6 +341,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -411,6 +412,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -427,6 +429,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -508,6 +511,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -516,7 +520,6 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -524,12 +527,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -553,7 +553,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index ebdcfae55580..083fe6beac14 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-atari" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -62,7 +61,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -283,7 +281,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -350,6 +350,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -432,6 +433,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -448,6 +450,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -529,6 +532,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -537,7 +541,6 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -545,12 +548,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -574,7 +574,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 8acc65e54995..475130c06dcb 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-bvme6000" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -60,7 +59,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -281,7 +279,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -340,6 +340,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -403,6 +404,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -419,6 +421,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -500,6 +503,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -508,7 +512,6 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -516,12 +519,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -545,7 +545,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 0c6a3d52b26e..4339658c200f 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-hp300" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -62,7 +61,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -283,7 +281,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -341,6 +341,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -413,6 +414,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -429,6 +431,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -510,6 +513,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -518,7 +522,6 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -526,12 +529,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -555,7 +555,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 12a8a6cb32f4..831cc8c3a2e2 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-mac" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -61,7 +60,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -285,7 +283,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -357,6 +357,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -435,6 +436,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -451,6 +453,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -532,6 +535,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -540,7 +544,6 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -548,12 +551,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -577,7 +577,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 64ff2dcb34c8..6377afeb522b 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-multi" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -71,7 +70,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -295,7 +293,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -390,6 +390,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -515,6 +516,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -531,6 +533,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -612,6 +615,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -620,7 +624,6 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -628,12 +631,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -657,7 +657,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 07fc6abcfe0c..4304b3d56262 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-mvme147" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -59,7 +58,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -280,7 +278,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -339,6 +339,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -403,6 +404,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -419,6 +421,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -500,6 +503,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -508,7 +512,6 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -516,12 +519,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -545,7 +545,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 69903ded88f7..074bda4094ff 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-mvme16x" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -60,7 +59,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -281,7 +279,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -340,6 +340,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -403,6 +404,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -419,6 +421,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -500,6 +503,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -508,7 +512,6 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -516,12 +519,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -545,7 +545,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index bd8401686dde..07b9fa8d7f2e 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-q40" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -60,7 +59,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -281,7 +279,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -346,6 +346,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -426,6 +427,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -442,6 +444,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -523,6 +526,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -531,7 +535,6 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -539,12 +542,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -568,7 +568,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 5f9fb3ab9636..36e6fae02d45 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-sun3" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -57,7 +56,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -278,7 +276,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -337,6 +337,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -405,6 +406,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -421,6 +423,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -502,6 +505,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -509,7 +513,6 @@ CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -517,12 +520,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -546,7 +546,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 5d1c674530e2..903acf929511 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -1,7 +1,6 @@ CONFIG_LOCALVERSION="-sun3x" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=16 @@ -57,7 +56,6 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_IPV6=m @@ -278,7 +276,9 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y +CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -337,6 +337,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m @@ -405,6 +406,7 @@ CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_FS_ENCRYPTION=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -421,6 +423,7 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y +CONFIG_ORANGEFS_FS=m CONFIG_AFFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_ECRYPT_FS_MESSAGING=y @@ -502,6 +505,7 @@ CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -510,7 +514,6 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y -CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m @@ -518,12 +521,9 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m @@ -547,7 +547,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index bafaff6dcd7b..a857d82ec509 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -4,7 +4,7 @@ #include <uapi/asm/unistd.h> -#define NR_syscalls 377 +#define NR_syscalls 379 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h index 0ca729665f29..9fe674bf911f 100644 --- a/arch/m68k/include/uapi/asm/unistd.h +++ b/arch/m68k/include/uapi/asm/unistd.h @@ -382,5 +382,7 @@ #define __NR_membarrier 374 #define __NR_mlock2 375 #define __NR_copy_file_range 376 +#define __NR_preadv2 377 +#define __NR_pwritev2 378 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */ diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index 8bb94261ff97..d6fd6d9ced24 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -397,3 +397,5 @@ ENTRY(sys_call_table) .long sys_membarrier .long sys_mlock2 /* 375 */ .long sys_copy_file_range + .long sys_preadv2 + .long sys_pwritev2 diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index bd3c873951a1..88cfaa8af78e 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -4,8 +4,8 @@ config PARISC select ARCH_MIGHT_HAVE_PC_PARPORT select HAVE_IDE select HAVE_OPROFILE - select HAVE_FUNCTION_TRACER if 64BIT - select HAVE_FUNCTION_GRAPH_TRACER if 64BIT + select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_GRAPH_TRACER select ARCH_WANT_FRAME_POINTERS select RTC_CLASS select RTC_DRV_GENERIC diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug index bc989e522a04..68b7cbd0810a 100644 --- a/arch/parisc/Kconfig.debug +++ b/arch/parisc/Kconfig.debug @@ -2,9 +2,13 @@ menu "Kernel hacking" source "lib/Kconfig.debug" +config TRACE_IRQFLAGS_SUPPORT + def_bool y + config DEBUG_RODATA bool "Write protect kernel read-only data structures" depends on DEBUG_KERNEL + default y help Mark the kernel read-only data as write-protected in the pagetables, in order to catch accidental (and incorrect) writes to such const diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 965a0999fc4c..75cb451b1f03 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -62,9 +62,7 @@ cflags-y += -mdisable-fpregs # Without this, "ld -r" results in .text sections that are too big # (> 0x40000) for branches to reach stubs. -ifndef CONFIG_FUNCTION_TRACER - cflags-y += -ffunction-sections -endif +cflags-y += -ffunction-sections # Use long jumps instead of long branches (needed if your linker fails to # link a too big vmlinux executable). Not enabled for building modules. diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h index 544ed8ef87eb..24cd81d58d70 100644 --- a/arch/parisc/include/asm/ftrace.h +++ b/arch/parisc/include/asm/ftrace.h @@ -4,23 +4,7 @@ #ifndef __ASSEMBLY__ extern void mcount(void); -/* - * Stack of return addresses for functions of a thread. - * Used in struct thread_info - */ -struct ftrace_ret_stack { - unsigned long ret; - unsigned long func; - unsigned long long calltime; -}; - -/* - * Primary handler of a function return. - * It relays on ftrace_return_to_handler. - * Defined in entry.S - */ -extern void return_to_handler(void); - +#define MCOUNT_INSN_SIZE 4 extern unsigned long return_address(unsigned int); diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index d4dd6e58682c..7955e43f3f3f 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -44,20 +44,18 @@ static inline long access_ok(int type, const void __user * addr, #define LDD_USER(ptr) BUILD_BUG() #define STD_KERNEL(x, ptr) __put_kernel_asm64(x, ptr) #define STD_USER(x, ptr) __put_user_asm64(x, ptr) -#define ASM_WORD_INSN ".word\t" #else #define LDD_KERNEL(ptr) __get_kernel_asm("ldd", ptr) #define LDD_USER(ptr) __get_user_asm("ldd", ptr) #define STD_KERNEL(x, ptr) __put_kernel_asm("std", x, ptr) #define STD_USER(x, ptr) __put_user_asm("std", x, ptr) -#define ASM_WORD_INSN ".dword\t" #endif /* - * The exception table contains two values: the first is an address - * for an instruction that is allowed to fault, and the second is - * the address to the fixup routine. Even on a 64bit kernel we could - * use a 32bit (unsigned int) address here. + * The exception table contains two values: the first is the relative offset to + * the address of the instruction that is allowed to fault, and the second is + * the relative offset to the address of the fixup routine. Since relative + * addresses are used, 32bit values are sufficient even on 64bit kernel. */ #define ARCH_HAS_RELATIVE_EXTABLE @@ -77,6 +75,7 @@ struct exception_table_entry { */ struct exception_data { unsigned long fault_ip; + unsigned long fault_gp; unsigned long fault_space; unsigned long fault_addr; }; diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index ff87b4603e3d..69a11183d48d 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -15,11 +15,7 @@ ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_cache.o = -pg -CFLAGS_REMOVE_irq.o = -pg -CFLAGS_REMOVE_pacache.o = -pg CFLAGS_REMOVE_perf.o = -pg -CFLAGS_REMOVE_traps.o = -pg -CFLAGS_REMOVE_unaligned.o = -pg CFLAGS_REMOVE_unwind.o = -pg endif diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index d2f62570a7b1..78d30d2ea2d8 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -299,6 +299,7 @@ int main(void) #endif BLANK(); DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip)); + DEFINE(EXCDATA_GP, offsetof(struct exception_data, fault_gp)); DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space)); DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr)); BLANK(); diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 623496c11756..39127d3e70e5 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -1970,43 +1970,98 @@ pt_regs_ok: b intr_restore copy %r25,%r16 - .import schedule,code syscall_do_resched: - BL schedule,%r2 + load32 syscall_check_resched,%r2 /* if resched, we start over again */ + load32 schedule,%r19 + bv %r0(%r19) /* jumps to schedule() */ #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ #else nop #endif - b syscall_check_resched /* if resched, we start over again */ - nop ENDPROC(syscall_exit) #ifdef CONFIG_FUNCTION_TRACER + .import ftrace_function_trampoline,code -ENTRY(_mcount) - copy %r3, %arg2 + .align L1_CACHE_BYTES + .globl mcount + .type mcount, @function +ENTRY(mcount) +_mcount: + .export _mcount,data + .proc + .callinfo caller,frame=0 + .entry + /* + * The 64bit mcount() function pointer needs 4 dwords, of which the + * first two are free. We optimize it here and put 2 instructions for + * calling mcount(), and 2 instructions for ftrace_stub(). That way we + * have all on one L1 cacheline. + */ b ftrace_function_trampoline + copy %r3, %arg2 /* caller original %sp */ +ftrace_stub: + .globl ftrace_stub + .type ftrace_stub, @function +#ifdef CONFIG_64BIT + bve (%rp) +#else + bv %r0(%rp) +#endif nop -ENDPROC(_mcount) +#ifdef CONFIG_64BIT + .dword mcount + .dword 0 /* code in head.S puts value of global gp here */ +#endif + .exit + .procend +ENDPROC(mcount) + .align 8 + .globl return_to_handler + .type return_to_handler, @function ENTRY(return_to_handler) - load32 return_trampoline, %rp - copy %ret0, %arg0 - copy %ret1, %arg1 - b ftrace_return_to_handler - nop -return_trampoline: - copy %ret0, %rp - copy %r23, %ret0 - copy %r24, %ret1 + .proc + .callinfo caller,frame=FRAME_SIZE + .entry + .export parisc_return_to_handler,data +parisc_return_to_handler: + copy %r3,%r1 + STREG %r0,-RP_OFFSET(%sp) /* store 0 as %rp */ + copy %sp,%r3 + STREGM %r1,FRAME_SIZE(%sp) + STREG %ret0,8(%r3) + STREG %ret1,16(%r3) -.globl ftrace_stub -ftrace_stub: +#ifdef CONFIG_64BIT + loadgp +#endif + + /* call ftrace_return_to_handler(0) */ +#ifdef CONFIG_64BIT + ldo -16(%sp),%ret1 /* Reference param save area */ +#endif + BL ftrace_return_to_handler,%r2 + ldi 0,%r26 + copy %ret0,%rp + + /* restore original return values */ + LDREG 8(%r3),%ret0 + LDREG 16(%r3),%ret1 + + /* return from function */ +#ifdef CONFIG_64BIT + bve (%rp) +#else bv %r0(%rp) - nop +#endif + LDREGM -FRAME_SIZE(%sp),%r3 + .exit + .procend ENDPROC(return_to_handler) + #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_IRQSTACKS diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 559d400f9385..b13f9ec6f294 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -1,6 +1,6 @@ /* * Code for tracing calls in Linux kernel. - * Copyright (C) 2009 Helge Deller <deller@gmx.de> + * Copyright (C) 2009-2016 Helge Deller <deller@gmx.de> * * based on code for x86 which is: * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> @@ -13,104 +13,21 @@ #include <linux/init.h> #include <linux/ftrace.h> +#include <asm/assembly.h> #include <asm/sections.h> #include <asm/ftrace.h> - #ifdef CONFIG_FUNCTION_GRAPH_TRACER - -/* Add a function return address to the trace stack on thread info.*/ -static int push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func, int *depth) -{ - int index; - - if (!current->ret_stack) - return -EBUSY; - - /* The return trace stack is full */ - if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { - atomic_inc(¤t->trace_overrun); - return -EBUSY; - } - - index = ++current->curr_ret_stack; - barrier(); - current->ret_stack[index].ret = ret; - current->ret_stack[index].func = func; - current->ret_stack[index].calltime = time; - *depth = index; - - return 0; -} - -/* Retrieve a function return address to the trace stack on thread info.*/ -static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) -{ - int index; - - index = current->curr_ret_stack; - - if (unlikely(index < 0)) { - ftrace_graph_stop(); - WARN_ON(1); - /* Might as well panic, otherwise we have no where to go */ - *ret = (unsigned long) - dereference_function_descriptor(&panic); - return; - } - - *ret = current->ret_stack[index].ret; - trace->func = current->ret_stack[index].func; - trace->calltime = current->ret_stack[index].calltime; - trace->overrun = atomic_read(¤t->trace_overrun); - trace->depth = index; - barrier(); - current->curr_ret_stack--; - -} - -/* - * Send the trace to the ring-buffer. - * @return the original return address. - */ -unsigned long ftrace_return_to_handler(unsigned long retval0, - unsigned long retval1) -{ - struct ftrace_graph_ret trace; - unsigned long ret; - - pop_return_trace(&trace, &ret); - trace.rettime = local_clock(); - ftrace_graph_return(&trace); - - if (unlikely(!ret)) { - ftrace_graph_stop(); - WARN_ON(1); - /* Might as well panic. What else to do? */ - ret = (unsigned long) - dereference_function_descriptor(&panic); - } - - /* HACK: we hand over the old functions' return values - in %r23 and %r24. Assembly in entry.S will take care - and move those to their final registers %ret0 and %ret1 */ - asm( "copy %0, %%r23 \n\t" - "copy %1, %%r24 \n" : : "r" (retval0), "r" (retval1) ); - - return ret; -} - /* * Hook the return address and push it in the stack of return addrs * in current thread info. */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +static void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - unsigned long long calltime; struct ftrace_graph_ent trace; + extern int parisc_return_to_handler; if (unlikely(ftrace_graph_is_dead())) return; @@ -119,64 +36,47 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; old = *parent; - *parent = (unsigned long) - dereference_function_descriptor(&return_to_handler); - if (unlikely(!__kernel_text_address(old))) { - ftrace_graph_stop(); - *parent = old; - WARN_ON(1); - return; - } - - calltime = local_clock(); + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; - if (push_return_trace(old, calltime, - self_addr, &trace.depth) == -EBUSY) { - *parent = old; + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) return; - } - trace.func = self_addr; + if (ftrace_push_return_trace(old, self_addr, &trace.depth, + 0 ) == -EBUSY) + return; - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; - *parent = old; - } + /* activate parisc_return_to_handler() as return point */ + *parent = (unsigned long) &parisc_return_to_handler; } - #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -void ftrace_function_trampoline(unsigned long parent, +void notrace ftrace_function_trampoline(unsigned long parent, unsigned long self_addr, unsigned long org_sp_gr3) { - extern ftrace_func_t ftrace_trace_function; + extern ftrace_func_t ftrace_trace_function; /* depends on CONFIG_DYNAMIC_FTRACE */ + extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); if (ftrace_trace_function != ftrace_stub) { - ftrace_trace_function(parent, self_addr); + /* struct ftrace_ops *op, struct pt_regs *regs); */ + ftrace_trace_function(parent, self_addr, NULL, NULL); return; } + #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (ftrace_graph_entry && ftrace_graph_return) { - unsigned long sp; + if (ftrace_graph_return != (trace_func_graph_ret_t) ftrace_stub || + ftrace_graph_entry != ftrace_graph_entry_stub) { unsigned long *parent_rp; - asm volatile ("copy %%r30, %0" : "=r"(sp)); - /* sanity check: is stack pointer which we got from - assembler function in entry.S in a reasonable - range compared to current stack pointer? */ - if ((sp - org_sp_gr3) > 0x400) - return; - /* calculate pointer to %rp in stack */ - parent_rp = (unsigned long *) org_sp_gr3 - 0x10; + parent_rp = (unsigned long *) (org_sp_gr3 - RP_OFFSET); /* sanity check: parent_rp should hold parent */ if (*parent_rp != parent) return; - + prepare_ftrace_return(parent_rp, self_addr); return; } diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index 75aa0db9f69e..bbbe360b458f 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -129,6 +129,15 @@ $pgt_fill_loop: /* And the stack pointer too */ ldo THREAD_SZ_ALGN(%r6),%sp +#if defined(CONFIG_64BIT) && defined(CONFIG_FUNCTION_TRACER) + .import _mcount,data + /* initialize mcount FPTR */ + /* Get the global data pointer */ + loadgp + load32 PA(_mcount), %r10 + std %dp,0x18(%r10) +#endif + #ifdef CONFIG_SMP /* Set the smp rendezvous address into page zero. ** It would be safer to do this in init_smp_config() but diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index b9d75d9fa9ac..a0ecdb4abcc8 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -660,6 +660,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, } *loc = (*loc & ~0x3ff1ffd) | reassemble_22(val); break; + case R_PARISC_PCREL32: + /* 32-bit PC relative address */ + *loc = val - dot - 8 + addend; + break; default: printk(KERN_ERR "module %s: Unknown relocation: %u\n", @@ -788,6 +792,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, CHECK_RELOC(val, 22); *loc = (*loc & ~0x3ff1ffd) | reassemble_22(val); break; + case R_PARISC_PCREL32: + /* 32-bit PC relative address */ + *loc = val - dot - 8 + addend; + break; case R_PARISC_DIR64: /* 64-bit effective address */ *loc64 = val + addend; diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 568b2c61ea02..3cad8aadc69e 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -47,11 +47,11 @@ EXPORT_SYMBOL(__cmpxchg_u64); EXPORT_SYMBOL(lclear_user); EXPORT_SYMBOL(lstrnlen_user); -/* Global fixups */ -extern void fixup_get_user_skip_1(void); -extern void fixup_get_user_skip_2(void); -extern void fixup_put_user_skip_1(void); -extern void fixup_put_user_skip_2(void); +/* Global fixups - defined as int to avoid creation of function pointers */ +extern int fixup_get_user_skip_1; +extern int fixup_get_user_skip_2; +extern int fixup_put_user_skip_1; +extern int fixup_put_user_skip_2; EXPORT_SYMBOL(fixup_get_user_skip_1); EXPORT_SYMBOL(fixup_get_user_skip_2); EXPORT_SYMBOL(fixup_put_user_skip_1); diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 16e0735e2f46..97d6b208e129 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -795,6 +795,9 @@ void notrace handle_interruption(int code, struct pt_regs *regs) if (fault_space == 0 && !faulthandler_disabled()) { + /* Clean up and return if in exception table. */ + if (fixup_exception(regs)) + return; pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); parisc_terminate("Kernel Fault", regs, code, fault_address); } diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S index 536ef66bb94b..1052b747e011 100644 --- a/arch/parisc/lib/fixup.S +++ b/arch/parisc/lib/fixup.S @@ -26,6 +26,7 @@ #ifdef CONFIG_SMP .macro get_fault_ip t1 t2 + loadgp addil LT%__per_cpu_offset,%r27 LDREG RT%__per_cpu_offset(%r1),\t1 /* t2 = smp_processor_id() */ @@ -40,14 +41,19 @@ LDREG RT%exception_data(%r1),\t1 /* t1 = this_cpu_ptr(&exception_data) */ add,l \t1,\t2,\t1 + /* %r27 = t1->fault_gp - restore gp */ + LDREG EXCDATA_GP(\t1), %r27 /* t1 = t1->fault_ip */ LDREG EXCDATA_IP(\t1), \t1 .endm #else .macro get_fault_ip t1 t2 + loadgp /* t1 = this_cpu_ptr(&exception_data) */ addil LT%exception_data,%r27 LDREG RT%exception_data(%r1),\t2 + /* %r27 = t2->fault_gp - restore gp */ + LDREG EXCDATA_GP(\t2), %r27 /* t1 = t2->fault_ip */ LDREG EXCDATA_IP(\t2), \t1 .endm diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 26fac9c671c9..16dbe81c97c9 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -145,6 +145,7 @@ int fixup_exception(struct pt_regs *regs) struct exception_data *d; d = this_cpu_ptr(&exception_data); d->fault_ip = regs->iaoq[0]; + d->fault_gp = regs->gr[27]; d->fault_space = regs->isr; d->fault_addr = regs->ior; diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 8dde19962a5b..f63c96cd3608 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -31,6 +31,7 @@ #define PPC_FEATURE_PSERIES_PERFMON_COMPAT \ 0x00000040 +/* Reserved - do not use 0x00000004 */ #define PPC_FEATURE_TRUE_LE 0x00000002 #define PPC_FEATURE_PPC_LE 0x00000001 diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7030b035905d..a15fe1d4e84a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -148,23 +148,25 @@ static struct ibm_pa_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ unsigned long mmu_features; /* MMU_FTR_xxx bit */ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ + unsigned int cpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */ unsigned char pabyte; /* byte number in ibm,pa-features */ unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ } ibm_pa_features[] __initdata = { - {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, - {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, - {CPU_FTR_CTRL, 0, 0, 0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, - {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, - {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, + {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0, 0}, + {0, 0, PPC_FEATURE_HAS_FPU, 0, 0, 1, 0}, + {CPU_FTR_CTRL, 0, 0, 0, 0, 3, 0}, + {CPU_FTR_NOEXECUTE, 0, 0, 0, 0, 6, 0}, + {CPU_FTR_NODSISRALIGN, 0, 0, 0, 1, 1, 1}, + {0, MMU_FTR_CI_LARGE_PAGE, 0, 0, 1, 2, 0}, + {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0}, /* - * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n), - * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP - * which is 0 if the kernel doesn't support TM. + * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n), + * we don't want to turn on TM here, so we use the *_COMP versions + * which are 0 if the kernel doesn't support TM. */ - {CPU_FTR_TM_COMP, 0, 0, 22, 0, 0}, + {CPU_FTR_TM_COMP, 0, 0, + PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0}, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -195,10 +197,12 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs, if (bit ^ fp->invert) { cur_cpu_spec->cpu_features |= fp->cpu_features; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; + cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features |= fp->mmu_features; } else { cur_cpu_spec->cpu_features &= ~fp->cpu_features; cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; + cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features &= ~fp->mmu_features; } } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index aad23e3dff2c..bf24ab188921 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -4,6 +4,9 @@ config MMU config ZONE_DMA def_bool y +config CPU_BIG_ENDIAN + def_bool y + config LOCKDEP_SUPPORT def_bool y diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index b6bfa169a002..535a46d46d28 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -44,7 +44,8 @@ struct zpci_fmb { u64 rpcit_ops; u64 dma_rbytes; u64 dma_wbytes; -} __packed __aligned(64); + u64 pad[2]; +} __packed __aligned(128); enum zpci_state { ZPCI_FN_STATE_RESERVED, diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h index 781a9cf9b002..e10f8337367b 100644 --- a/arch/s390/include/asm/seccomp.h +++ b/arch/s390/include/asm/seccomp.h @@ -13,4 +13,6 @@ #define __NR_seccomp_exit_32 __NR_exit #define __NR_seccomp_sigreturn_32 __NR_sigreturn +#include <asm-generic/seccomp.h> + #endif /* _ASM_S390_SECCOMP_H */ diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index d4549c964589..e5f50a7d2f4e 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -105,6 +105,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) if (_raw_compare_and_swap(&lp->lock, 0, cpu)) return; local_irq_restore(flags); + continue; } /* Check if the lock owner is running. */ if (first_diag && cpu_is_preempted(~owner)) { diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h index 1baf0ba96242..c9f8bbdb1bf8 100644 --- a/arch/sh/include/asm/smp.h +++ b/arch/sh/include/asm/smp.h @@ -34,11 +34,6 @@ enum { DECLARE_PER_CPU(int, cpu_state); void smp_message_recv(unsigned int msg); -void smp_timer_broadcast(const struct cpumask *mask); - -void local_timer_interrupt(void); -void local_timer_setup(unsigned int cpu); -void local_timer_stop(unsigned int cpu); void arch_send_call_function_single_ipi(int cpu); void arch_send_call_function_ipi_mask(const struct cpumask *mask); diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index b0a282d65f6a..358e3f516ef6 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -17,7 +17,7 @@ #define mc_capable() (1) -const struct cpumask *cpu_coregroup_mask(unsigned int cpu); +const struct cpumask *cpu_coregroup_mask(int cpu); extern cpumask_t cpu_core_map[NR_CPUS]; diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c index 4a298808789c..839612c8a0a0 100644 --- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c @@ -73,8 +73,6 @@ static void shx3_prepare_cpus(unsigned int max_cpus) { int i; - local_timer_setup(0); - BUILD_BUG_ON(SMP_MSG_NR >= 8); for (i = 0; i < SMP_MSG_NR; i++) diff --git a/arch/sh/kernel/topology.c b/arch/sh/kernel/topology.c index 772caffba22f..c82912a61d74 100644 --- a/arch/sh/kernel/topology.c +++ b/arch/sh/kernel/topology.c @@ -21,7 +21,7 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); cpumask_t cpu_core_map[NR_CPUS]; EXPORT_SYMBOL(cpu_core_map); -static cpumask_t cpu_coregroup_map(unsigned int cpu) +static cpumask_t cpu_coregroup_map(int cpu) { /* * Presently all SH-X3 SMP cores are multi-cores, so just keep it @@ -30,7 +30,7 @@ static cpumask_t cpu_coregroup_map(unsigned int cpu) return *cpu_possible_mask; } -const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_core_map[cpu]; } diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6915ff2bd996..8774cb23064f 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -26,7 +26,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 -KBUILD_CFLAGS += -fno-strict-aliasing -fPIC +KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC) KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small @@ -40,6 +40,18 @@ GCOV_PROFILE := n UBSAN_SANITIZE :=n LDFLAGS := -m elf_$(UTS_MACHINE) +ifeq ($(CONFIG_RELOCATABLE),y) +# If kernel is relocatable, build compressed kernel as PIE. +ifeq ($(CONFIG_X86_32),y) +LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker) +else +# To build 64-bit compressed kernel as PIE, we disable relocation +# overflow check to avoid relocation overflow error with a new linker +# command-line option, -z noreloc-overflow. +LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \ + && echo "-z noreloc-overflow -pie --no-dynamic-linker") +endif +endif LDFLAGS_vmlinux := -T hostprogs-y := mkpiggy diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 8ef964ddc18e..0256064da8da 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -31,6 +31,34 @@ #include <asm/asm-offsets.h> #include <asm/bootparam.h> +/* + * The 32-bit x86 assembler in binutils 2.26 will generate R_386_GOT32X + * relocation to get the symbol address in PIC. When the compressed x86 + * kernel isn't built as PIC, the linker optimizes R_386_GOT32X + * relocations to their fixed symbol addresses. However, when the + * compressed x86 kernel is loaded at a different address, it leads + * to the following load failure: + * + * Failed to allocate space for phdrs + * + * during the decompression stage. + * + * If the compressed x86 kernel is relocatable at run-time, it should be + * compiled with -fPIE, instead of -fPIC, if possible and should be built as + * Position Independent Executable (PIE) so that linker won't optimize + * R_386_GOT32X relocation to its fixed symbol address. Older + * linkers generate R_386_32 relocations against locally defined symbols, + * _bss, _ebss, _got and _egot, in PIE. It isn't wrong, just less + * optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle + * R_386_32 relocations when relocating the kernel. To generate + * R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as + * hidden: + */ + .hidden _bss + .hidden _ebss + .hidden _got + .hidden _egot + __HEAD ENTRY(startup_32) #ifdef CONFIG_EFI_STUB diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index b0c0d16ef58d..86558a199139 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -33,6 +33,14 @@ #include <asm/asm-offsets.h> #include <asm/bootparam.h> +/* + * Locally defined symbols should be marked hidden: + */ + .hidden _bss + .hidden _ebss + .hidden _got + .hidden _egot + __HEAD .code32 ENTRY(startup_32) diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index a8a0224fa0f8..081255cea1ee 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -453,10 +453,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); else { local_bh_disable(); - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); local_bh_enable(); } } diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index f8a29d2c97b0..e6a8613fbfb0 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -4,6 +4,7 @@ #include <asm/page.h> #include <asm-generic/hugetlb.h> +#define hugepages_supported() cpu_has_pse static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 0a850100c594..2658e2af74ec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -29,7 +29,7 @@ static char gen_pool_buf[MCE_POOLSZ]; void mce_gen_pool_process(void) { struct llist_node *head; - struct mce_evt_llist *node; + struct mce_evt_llist *node, *tmp; struct mce *mce; head = llist_del_all(&mce_event_llist); @@ -37,7 +37,7 @@ void mce_gen_pool_process(void) return; head = llist_reverse_order(head); - llist_for_each_entry(node, head, llnode) { + llist_for_each_entry_safe(node, tmp, head, llnode) { mce = &node->mce; atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 4e7c6933691c..10c11b4da31d 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -152,6 +152,11 @@ static struct clocksource hyperv_cs = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +static unsigned char hv_get_nmi_reason(void) +{ + return 0; +} + static void __init ms_hyperv_init_platform(void) { /* @@ -191,6 +196,13 @@ static void __init ms_hyperv_init_platform(void) machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif mark_tsc_unstable("running on Hyper-V"); + + /* + * Generation 2 instances don't support reading the NMI status from + * 0x61 port. + */ + if (efi_enabled(EFI_BOOT)) + x86_platform.get_nmi_reason = hv_get_nmi_reason; } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 319b08a5b6ed..2367ae07eb76 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -146,6 +146,31 @@ int default_check_phys_apicid_present(int phys_apicid) struct boot_params boot_params; +/* + * Machine setup.. + */ +static struct resource data_resource = { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + +static struct resource code_resource = { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + + #ifdef CONFIG_X86_32 /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data = { @@ -924,6 +949,13 @@ void __init setup_arch(char **cmdline_p) mpx_mm_init(&init_mm); + code_resource.start = __pa_symbol(_text); + code_resource.end = __pa_symbol(_etext)-1; + data_resource.start = __pa_symbol(_etext); + data_resource.end = __pa_symbol(_edata)-1; + bss_resource.start = __pa_symbol(__bss_start); + bss_resource.end = __pa_symbol(__bss_stop)-1; + #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); @@ -987,6 +1019,11 @@ void __init setup_arch(char **cmdline_p) x86_init.resources.probe_roms(); + /* after parse_early_param, so could debug it */ + insert_resource(&iomem_resource, &code_resource); + insert_resource(&iomem_resource, &data_resource); + insert_resource(&iomem_resource, &bss_resource); + e820_add_kernel_range(); trim_bios_range(); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8efb839948e5..bbbaa802d13e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -534,6 +534,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, do_cpuid_1_ent(&entry[i], function, idx); if (idx == 1) { entry[i].eax &= kvm_cpuid_D_1_eax_x86_features; + cpuid_mask(&entry[i].eax, CPUID_D_1_EAX); entry[i].ebx = 0; if (entry[i].eax & (F(XSAVES)|F(XSAVEC))) entry[i].ebx = diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index b70df72e2b33..66b33b96a31b 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -173,10 +173,9 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index = (pfec >> 1) + (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); bool fault = (mmu->permissions[index] >> pte_access) & 1; + u32 errcode = PFERR_PRESENT_MASK; WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK)); - pfec |= PFERR_PRESENT_MASK; - if (unlikely(mmu->pkru_mask)) { u32 pkru_bits, offset; @@ -189,15 +188,15 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3; /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ - offset = pfec - 1 + + offset = (pfec & ~1) + ((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT)); pkru_bits &= mmu->pkru_mask >> offset; - pfec |= -pkru_bits & PFERR_PK_MASK; + errcode |= -pkru_bits & PFERR_PK_MASK; fault |= (pkru_bits != 0); } - return -(uint32_t)fault & pfec; + return -(u32)fault & errcode; } void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 1d971c7553c3..bc019f70e0b6 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -360,7 +360,7 @@ retry_walk: goto error; if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) { - errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; + errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK; goto error; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a2c70e43bc8..9b7798c7b210 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -700,7 +700,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) return 1; } - kvm_put_guest_xcr0(vcpu); vcpu->arch.xcr0 = xcr0; if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) @@ -6590,8 +6589,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->prepare_guest_switch(vcpu); if (vcpu->fpu_active) kvm_load_guest_fpu(vcpu); - kvm_load_guest_xcr0(vcpu); - vcpu->mode = IN_GUEST_MODE; srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); @@ -6618,6 +6615,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto cancel_injection; } + kvm_load_guest_xcr0(vcpu); + if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); @@ -6667,6 +6666,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); + kvm_put_guest_xcr0(vcpu); + /* Interrupt is enabled by handle_external_intr() */ kvm_x86_ops->handle_external_intr(vcpu); @@ -7314,7 +7315,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) * and assume host would use all available bits. * Guest xcr0 would be loaded later. */ - kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); @@ -7323,8 +7323,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - kvm_put_guest_xcr0(vcpu); - if (!vcpu->guest_fpu_loaded) { vcpu->fpu_counter = 0; return; diff --git a/block/partition-generic.c b/block/partition-generic.c index 2c6ae2aed2c4..d7eb77e1e3a8 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -361,15 +361,20 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_del; } + err = hd_ref_init(p); + if (err) { + if (flags & ADDPART_FLAG_WHOLEDISK) + goto out_remove_file; + goto out_del; + } + /* everything is up and running, commence */ rcu_assign_pointer(ptbl->part[partno], p); /* suppress uevent if the disk suppresses it */ if (!dev_get_uevent_suppress(ddev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); - - if (!hd_ref_init(p)) - return p; + return p; out_free_info: free_part_info(p); @@ -378,6 +383,8 @@ out_free_stats: out_free: kfree(p); return ERR_PTR(err); +out_remove_file: + device_remove_file(pdev, &dev_attr_whole_disk); out_del: kobject_put(p->holder_dir); device_del(pdev); diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 1cea67d43e1d..ead8dc0d084e 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -387,16 +387,16 @@ static int pkcs1pad_decrypt(struct akcipher_request *req) req_ctx->child_req.src = req->src; req_ctx->child_req.src_len = req->src_len; req_ctx->child_req.dst = req_ctx->out_sg; - req_ctx->child_req.dst_len = ctx->key_size - 1; + req_ctx->child_req.dst_len = ctx->key_size ; - req_ctx->out_buf = kmalloc(ctx->key_size - 1, + req_ctx->out_buf = kmalloc(ctx->key_size, (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC); if (!req_ctx->out_buf) return -ENOMEM; pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf, - ctx->key_size - 1, NULL); + ctx->key_size, NULL); akcipher_request_set_tfm(&req_ctx->child_req, ctx->child); akcipher_request_set_callback(&req_ctx->child_req, req->base.flags, @@ -595,16 +595,16 @@ static int pkcs1pad_verify(struct akcipher_request *req) req_ctx->child_req.src = req->src; req_ctx->child_req.src_len = req->src_len; req_ctx->child_req.dst = req_ctx->out_sg; - req_ctx->child_req.dst_len = ctx->key_size - 1; + req_ctx->child_req.dst_len = ctx->key_size; - req_ctx->out_buf = kmalloc(ctx->key_size - 1, + req_ctx->out_buf = kmalloc(ctx->key_size, (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC); if (!req_ctx->out_buf) return -ENOMEM; pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf, - ctx->key_size - 1, NULL); + ctx->key_size, NULL); akcipher_request_set_tfm(&req_ctx->child_req, ctx->child); akcipher_request_set_callback(&req_ctx->child_req, req->base.flags, diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 786be8fed39e..1f635471f318 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -136,7 +136,6 @@ static bool bcma_is_core_needed_early(u16 core_id) return false; } -#if defined(CONFIG_OF) && defined(CONFIG_OF_ADDRESS) static struct device_node *bcma_of_find_child_device(struct platform_device *parent, struct bcma_device *core) { @@ -184,7 +183,7 @@ static unsigned int bcma_of_get_irq(struct platform_device *parent, struct of_phandle_args out_irq; int ret; - if (!parent || !parent->dev.of_node) + if (!IS_ENABLED(CONFIG_OF_IRQ) || !parent || !parent->dev.of_node) return 0; ret = bcma_of_irq_parse(parent, core, &out_irq, num); @@ -202,23 +201,15 @@ static void bcma_of_fill_device(struct platform_device *parent, { struct device_node *node; + if (!IS_ENABLED(CONFIG_OF_IRQ)) + return; + node = bcma_of_find_child_device(parent, core); if (node) core->dev.of_node = node; core->irq = bcma_of_get_irq(parent, core, 0); } -#else -static void bcma_of_fill_device(struct platform_device *parent, - struct bcma_device *core) -{ -} -static inline unsigned int bcma_of_get_irq(struct platform_device *parent, - struct bcma_device *core, int num) -{ - return 0; -} -#endif /* CONFIG_OF */ unsigned int bcma_core_irq(struct bcma_device *core, int num) { diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 423f4ca7d712..80cf8add46ff 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -488,6 +488,12 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, bio_segments(bio), blk_rq_bytes(cmd->rq)); + /* + * This bio may be started from the middle of the 'bvec' + * because of bio splitting, so offset from the bvec must + * be passed to iov iterator + */ + iter.iov_offset = bio->bi_iter.bi_bvec_done; cmd->iocb.ki_pos = pos; cmd->iocb.ki_filp = file; diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 47ca4b39d306..641c2d19fc57 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -206,7 +206,8 @@ static int ath3k_load_firmware(struct usb_device *udev, const struct firmware *firmware) { u8 *send_buf; - int err, pipe, len, size, sent = 0; + int len = 0; + int err, pipe, size, sent = 0; int count = firmware->size; BT_DBG("udev %p", udev); @@ -302,7 +303,8 @@ static int ath3k_load_fwfile(struct usb_device *udev, const struct firmware *firmware) { u8 *send_buf; - int err, pipe, len, size, count, sent = 0; + int len = 0; + int err, pipe, size, count, sent = 0; int ret; count = firmware->size; diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index f67ea1c090cb..aba31210c802 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -50,6 +50,7 @@ struct vhci_data { wait_queue_head_t read_wait; struct sk_buff_head readq; + struct mutex open_mutex; struct delayed_work open_timeout; }; @@ -87,12 +88,15 @@ static int vhci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) return 0; } -static int vhci_create_device(struct vhci_data *data, __u8 opcode) +static int __vhci_create_device(struct vhci_data *data, __u8 opcode) { struct hci_dev *hdev; struct sk_buff *skb; __u8 dev_type; + if (data->hdev) + return -EBADFD; + /* bits 0-1 are dev_type (BR/EDR or AMP) */ dev_type = opcode & 0x03; @@ -151,6 +155,17 @@ static int vhci_create_device(struct vhci_data *data, __u8 opcode) return 0; } +static int vhci_create_device(struct vhci_data *data, __u8 opcode) +{ + int err; + + mutex_lock(&data->open_mutex); + err = __vhci_create_device(data, opcode); + mutex_unlock(&data->open_mutex); + + return err; +} + static inline ssize_t vhci_get_user(struct vhci_data *data, struct iov_iter *from) { @@ -191,11 +206,6 @@ static inline ssize_t vhci_get_user(struct vhci_data *data, case HCI_VENDOR_PKT: cancel_delayed_work_sync(&data->open_timeout); - if (data->hdev) { - kfree_skb(skb); - return -EBADFD; - } - opcode = *((__u8 *) skb->data); skb_pull(skb, 1); @@ -320,6 +330,7 @@ static int vhci_open(struct inode *inode, struct file *file) skb_queue_head_init(&data->readq); init_waitqueue_head(&data->read_wait); + mutex_init(&data->open_mutex); INIT_DELAYED_WORK(&data->open_timeout, vhci_open_timeout); file->private_data = data; diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c index c2e52864bb03..ce54a0160faa 100644 --- a/drivers/bus/mvebu-mbus.c +++ b/drivers/bus/mvebu-mbus.c @@ -972,7 +972,7 @@ int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr) } } - pr_err("invalid dram address 0x%x\n", phyaddr); + pr_err("invalid dram address %pa\n", &phyaddr); return -EINVAL; } EXPORT_SYMBOL_GPL(mvebu_mbus_get_dram_win_info); diff --git a/drivers/bus/uniphier-system-bus.c b/drivers/bus/uniphier-system-bus.c index 834a2aeaf27a..350b7309c26d 100644 --- a/drivers/bus/uniphier-system-bus.c +++ b/drivers/bus/uniphier-system-bus.c @@ -108,7 +108,7 @@ static int uniphier_system_bus_check_overlap( for (i = 0; i < ARRAY_SIZE(priv->bank); i++) { for (j = i + 1; j < ARRAY_SIZE(priv->bank); j++) { - if (priv->bank[i].end > priv->bank[j].base || + if (priv->bank[i].end > priv->bank[j].base && priv->bank[i].base < priv->bank[j].end) { dev_err(priv->dev, "region overlap between bank%d and bank%d\n", diff --git a/drivers/char/hw_random/bcm63xx-rng.c b/drivers/char/hw_random/bcm63xx-rng.c index ca9c40309757..5132c9cde50d 100644 --- a/drivers/char/hw_random/bcm63xx-rng.c +++ b/drivers/char/hw_random/bcm63xx-rng.c @@ -12,6 +12,7 @@ #include <linux/clk.h> #include <linux/platform_device.h> #include <linux/hw_random.h> +#include <linux/of.h> #define RNG_CTRL 0x00 #define RNG_EN (1 << 0) diff --git a/drivers/clocksource/tango_xtal.c b/drivers/clocksource/tango_xtal.c index 2bcecafdeaea..c407c47a3232 100644 --- a/drivers/clocksource/tango_xtal.c +++ b/drivers/clocksource/tango_xtal.c @@ -42,7 +42,7 @@ static void __init tango_clocksource_init(struct device_node *np) ret = clocksource_mmio_init(xtal_in_cnt, "tango-xtal", xtal_freq, 350, 32, clocksource_mmio_readl_up); - if (!ret) { + if (ret) { pr_err("%s: registration failed\n", np->full_name); return; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b87596b591b3..e93405f0eac4 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1491,6 +1491,9 @@ static unsigned int cpufreq_update_current_freq(struct cpufreq_policy *policy) { unsigned int new_freq; + if (cpufreq_suspended) + return 0; + new_freq = cpufreq_driver->get(policy->cpu); if (!new_freq) return 0; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 8b5a415ee14a..30fe323c4551 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1130,6 +1130,10 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), int_tofp(duration_ns)); core_busy = mul_fp(core_busy, sample_ratio); + } else { + sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc); + if (sample_ratio < int_tofp(1)) + core_busy = 0; } cpu->sample.busy_scaled = core_busy; diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index 3d9acc53d247..60fc0fa26fd3 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -225,6 +225,9 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out) struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); struct ccp_aes_cmac_exp_ctx state; + /* Don't let anything leak to 'out' */ + memset(&state, 0, sizeof(state)); + state.null_msg = rctx->null_msg; memcpy(state.iv, rctx->iv, sizeof(state.iv)); state.buf_count = rctx->buf_count; diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index b5ad72897dc2..8f36af62fe95 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -212,6 +212,9 @@ static int ccp_sha_export(struct ahash_request *req, void *out) struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); struct ccp_sha_exp_ctx state; + /* Don't let anything leak to 'out' */ + memset(&state, 0, sizeof(state)); + state.type = rctx->type; state.msg_bits = rctx->msg_bits; state.first = rctx->first; diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index a0d4a08313ae..aae05547b924 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -63,6 +63,14 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr, ptr->eptr = upper_32_bits(dma_addr); } +static void copy_talitos_ptr(struct talitos_ptr *dst_ptr, + struct talitos_ptr *src_ptr, bool is_sec1) +{ + dst_ptr->ptr = src_ptr->ptr; + if (!is_sec1) + dst_ptr->eptr = src_ptr->eptr; +} + static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned int len, bool is_sec1) { @@ -1083,21 +1091,20 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ?: 1, (areq->src == areq->dst) ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); - /* hmac data */ desc->ptr[1].len = cpu_to_be16(areq->assoclen); if (sg_count > 1 && (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0, areq->assoclen, &edesc->link_tbl[tbl_off])) > 1) { - tbl_off += ret; - to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * sizeof(struct talitos_ptr), 0); desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP; dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); + + tbl_off += ret; } else { to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0); desc->ptr[1].j_extent = 0; @@ -1126,11 +1133,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) sg_link_tbl_len += authsize; - if (sg_count > 1 && - (ret = sg_to_link_tbl_offset(areq->src, sg_count, areq->assoclen, - sg_link_tbl_len, - &edesc->link_tbl[tbl_off])) > 1) { - tbl_off += ret; + if (sg_count == 1) { + to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src) + + areq->assoclen, 0); + } else if ((ret = sg_to_link_tbl_offset(areq->src, sg_count, + areq->assoclen, sg_link_tbl_len, + &edesc->link_tbl[tbl_off])) > + 1) { desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl + tbl_off * @@ -1138,8 +1147,10 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); - } else - to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src), 0); + tbl_off += ret; + } else { + copy_talitos_ptr(&desc->ptr[4], &edesc->link_tbl[tbl_off], 0); + } /* cipher out */ desc->ptr[5].len = cpu_to_be16(cryptlen); @@ -1151,11 +1162,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, edesc->icv_ool = false; - if (sg_count > 1 && - (sg_count = sg_to_link_tbl_offset(areq->dst, sg_count, + if (sg_count == 1) { + to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst) + + areq->assoclen, 0); + } else if ((sg_count = + sg_to_link_tbl_offset(areq->dst, sg_count, areq->assoclen, cryptlen, - &edesc->link_tbl[tbl_off])) > - 1) { + &edesc->link_tbl[tbl_off])) > 1) { struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl + @@ -1178,8 +1191,9 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, edesc->dma_len, DMA_BIDIRECTIONAL); edesc->icv_ool = true; - } else - to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst), 0); + } else { + copy_talitos_ptr(&desc->ptr[5], &edesc->link_tbl[tbl_off], 0); + } /* iv out */ map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, @@ -2629,21 +2643,11 @@ struct talitos_crypto_alg { struct talitos_alg_template algt; }; -static int talitos_cra_init(struct crypto_tfm *tfm) +static int talitos_init_common(struct talitos_ctx *ctx, + struct talitos_crypto_alg *talitos_alg) { - struct crypto_alg *alg = tfm->__crt_alg; - struct talitos_crypto_alg *talitos_alg; - struct talitos_ctx *ctx = crypto_tfm_ctx(tfm); struct talitos_private *priv; - if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH) - talitos_alg = container_of(__crypto_ahash_alg(alg), - struct talitos_crypto_alg, - algt.alg.hash); - else - talitos_alg = container_of(alg, struct talitos_crypto_alg, - algt.alg.crypto); - /* update context with ptr to dev */ ctx->dev = talitos_alg->dev; @@ -2661,10 +2665,33 @@ static int talitos_cra_init(struct crypto_tfm *tfm) return 0; } +static int talitos_cra_init(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct talitos_crypto_alg *talitos_alg; + struct talitos_ctx *ctx = crypto_tfm_ctx(tfm); + + if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH) + talitos_alg = container_of(__crypto_ahash_alg(alg), + struct talitos_crypto_alg, + algt.alg.hash); + else + talitos_alg = container_of(alg, struct talitos_crypto_alg, + algt.alg.crypto); + + return talitos_init_common(ctx, talitos_alg); +} + static int talitos_cra_init_aead(struct crypto_aead *tfm) { - talitos_cra_init(crypto_aead_tfm(tfm)); - return 0; + struct aead_alg *alg = crypto_aead_alg(tfm); + struct talitos_crypto_alg *talitos_alg; + struct talitos_ctx *ctx = crypto_aead_ctx(tfm); + + talitos_alg = container_of(alg, struct talitos_crypto_alg, + algt.alg.aead); + + return talitos_init_common(ctx, talitos_alg); } static int talitos_cra_init_ahash(struct crypto_tfm *tfm) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 5ad0ec1f0e29..97199b3c25a2 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -130,26 +130,14 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) static void dwc_initialize(struct dw_dma_chan *dwc) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); - struct dw_dma_slave *dws = dwc->chan.private; u32 cfghi = DWC_CFGH_FIFO_MODE; u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); if (dwc->initialized == true) return; - if (dws) { - /* - * We need controller-specific data to set up slave - * transfers. - */ - BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev); - - cfghi |= DWC_CFGH_DST_PER(dws->dst_id); - cfghi |= DWC_CFGH_SRC_PER(dws->src_id); - } else { - cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); - cfghi |= DWC_CFGH_SRC_PER(dwc->src_id); - } + cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); + cfghi |= DWC_CFGH_SRC_PER(dwc->src_id); channel_writel(dwc, CFG_LO, cfglo); channel_writel(dwc, CFG_HI, cfghi); @@ -941,7 +929,7 @@ bool dw_dma_filter(struct dma_chan *chan, void *param) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma_slave *dws = param; - if (!dws || dws->dma_dev != chan->device->dev) + if (dws->dma_dev != chan->device->dev) return false; /* We have to copy data since dws can be temporary storage */ @@ -1165,6 +1153,14 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) * doesn't mean what you think it means), and status writeback. */ + /* + * We need controller-specific data to set up slave transfers. + */ + if (chan->private && !dw_dma_filter(chan, chan->private)) { + dev_warn(chan2dev(chan), "Wrong controller-specific data\n"); + return -EINVAL; + } + /* Enable controller here if needed */ if (!dw->in_use) dw_dma_on(dw); @@ -1226,6 +1222,14 @@ static void dwc_free_chan_resources(struct dma_chan *chan) spin_lock_irqsave(&dwc->lock, flags); list_splice_init(&dwc->free_list, &list); dwc->descs_allocated = 0; + + /* Clear custom channel configuration */ + dwc->src_id = 0; + dwc->dst_id = 0; + + dwc->src_master = 0; + dwc->dst_master = 0; + dwc->initialized = false; /* Disable interrupts */ diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index ee3463e774f8..04070baab78a 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1238,6 +1238,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( struct edma_desc *edesc; dma_addr_t src_addr, dst_addr; enum dma_slave_buswidth dev_width; + bool use_intermediate = false; u32 burst; int i, ret, nslots; @@ -1279,8 +1280,21 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( * but the synchronization is difficult to achieve with Cyclic and * cannot be guaranteed, so we error out early. */ - if (nslots > MAX_NR_SG) - return NULL; + if (nslots > MAX_NR_SG) { + /* + * If the burst and period sizes are the same, we can put + * the full buffer into a single period and activate + * intermediate interrupts. This will produce interrupts + * after each burst, which is also after each desired period. + */ + if (burst == period_len) { + period_len = buf_len; + nslots = 2; + use_intermediate = true; + } else { + return NULL; + } + } edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]), GFP_ATOMIC); @@ -1358,8 +1372,13 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( /* * Enable period interrupt only if it is requested */ - if (tx_flags & DMA_PREP_INTERRUPT) + if (tx_flags & DMA_PREP_INTERRUPT) { edesc->pset[i].param.opt |= TCINTEN; + + /* Also enable intermediate interrupts if necessary */ + if (use_intermediate) + edesc->pset[i].param.opt |= ITCINTEN; + } } /* Place the cyclic channel to highest priority queue */ @@ -1570,32 +1589,6 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) return IRQ_HANDLED; } -static void edma_tc_set_pm_state(struct edma_tc *tc, bool enable) -{ - struct platform_device *tc_pdev; - int ret; - - if (!IS_ENABLED(CONFIG_OF) || !tc) - return; - - tc_pdev = of_find_device_by_node(tc->node); - if (!tc_pdev) { - pr_err("%s: TPTC device is not found\n", __func__); - return; - } - if (!pm_runtime_enabled(&tc_pdev->dev)) - pm_runtime_enable(&tc_pdev->dev); - - if (enable) - ret = pm_runtime_get_sync(&tc_pdev->dev); - else - ret = pm_runtime_put_sync(&tc_pdev->dev); - - if (ret < 0) - pr_err("%s: pm_runtime_%s_sync() failed for %s\n", __func__, - enable ? "get" : "put", dev_name(&tc_pdev->dev)); -} - /* Alloc channel resources */ static int edma_alloc_chan_resources(struct dma_chan *chan) { @@ -1632,8 +1625,6 @@ static int edma_alloc_chan_resources(struct dma_chan *chan) EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id, echan->hw_triggered ? "HW" : "SW"); - edma_tc_set_pm_state(echan->tc, true); - return 0; err_slot: @@ -1670,7 +1661,6 @@ static void edma_free_chan_resources(struct dma_chan *chan) echan->alloced = false; } - edma_tc_set_pm_state(echan->tc, false); echan->tc = NULL; echan->hw_triggered = false; @@ -2417,10 +2407,8 @@ static int edma_pm_suspend(struct device *dev) int i; for (i = 0; i < ecc->num_channels; i++) { - if (echan[i].alloced) { + if (echan[i].alloced) edma_setup_interrupt(&echan[i], false); - edma_tc_set_pm_state(echan[i].tc, false); - } } return 0; @@ -2450,8 +2438,6 @@ static int edma_pm_resume(struct device *dev) /* Set up channel -> slot mapping for the entry slot */ edma_set_chmap(&echan[i], echan[i].slot[0]); - - edma_tc_set_pm_state(echan[i].tc, true); } } @@ -2475,7 +2461,8 @@ static struct platform_driver edma_driver = { static int edma_tptc_probe(struct platform_device *pdev) { - return 0; + pm_runtime_enable(&pdev->dev); + return pm_runtime_get_sync(&pdev->dev); } static struct platform_driver edma_tptc_driver = { diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index eef145edb936..ee510515ce18 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -64,10 +64,10 @@ static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc) if (hsuc->direction == DMA_MEM_TO_DEV) { bsr = config->dst_maxburst; - mtsr = config->dst_addr_width; + mtsr = config->src_addr_width; } else if (hsuc->direction == DMA_DEV_TO_MEM) { bsr = config->src_maxburst; - mtsr = config->src_addr_width; + mtsr = config->dst_addr_width; } hsu_chan_disable(hsuc); @@ -135,7 +135,7 @@ static u32 hsu_dma_chan_get_sr(struct hsu_dma_chan *hsuc) sr = hsu_chan_readl(hsuc, HSU_CH_SR); spin_unlock_irqrestore(&hsuc->vchan.lock, flags); - return sr; + return sr & ~(HSU_CH_SR_DESCE_ANY | HSU_CH_SR_CDESC_ANY); } irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr) @@ -254,10 +254,13 @@ static void hsu_dma_issue_pending(struct dma_chan *chan) static size_t hsu_dma_active_desc_size(struct hsu_dma_chan *hsuc) { struct hsu_dma_desc *desc = hsuc->desc; - size_t bytes = desc->length; + size_t bytes = 0; int i; - i = desc->active % HSU_DMA_CHAN_NR_DESC; + for (i = desc->active; i < desc->nents; i++) + bytes += desc->sg[i].len; + + i = HSU_DMA_CHAN_NR_DESC - 1; do { bytes += hsu_chan_readl(hsuc, HSU_CH_DxTSR(i)); } while (--i >= 0); diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h index 578a8ee8cd05..6b070c22b1df 100644 --- a/drivers/dma/hsu/hsu.h +++ b/drivers/dma/hsu/hsu.h @@ -41,6 +41,9 @@ #define HSU_CH_SR_DESCTO(x) BIT(8 + (x)) #define HSU_CH_SR_DESCTO_ANY (BIT(11) | BIT(10) | BIT(9) | BIT(8)) #define HSU_CH_SR_CHE BIT(15) +#define HSU_CH_SR_DESCE(x) BIT(16 + (x)) +#define HSU_CH_SR_DESCE_ANY (BIT(19) | BIT(18) | BIT(17) | BIT(16)) +#define HSU_CH_SR_CDESC_ANY (BIT(31) | BIT(30)) /* Bits in HSU_CH_CR */ #define HSU_CH_CR_CHA BIT(0) diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index 43bd5aee7ffe..1e984e18c126 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -48,6 +48,7 @@ struct omap_chan { unsigned dma_sig; bool cyclic; bool paused; + bool running; int dma_ch; struct omap_desc *desc; @@ -294,6 +295,8 @@ static void omap_dma_start(struct omap_chan *c, struct omap_desc *d) /* Enable channel */ omap_dma_chan_write(c, CCR, d->ccr | CCR_ENABLE); + + c->running = true; } static void omap_dma_stop(struct omap_chan *c) @@ -355,6 +358,8 @@ static void omap_dma_stop(struct omap_chan *c) omap_dma_chan_write(c, CLNK_CTRL, val); } + + c->running = false; } static void omap_dma_start_sg(struct omap_chan *c, struct omap_desc *d, @@ -673,15 +678,20 @@ static enum dma_status omap_dma_tx_status(struct dma_chan *chan, struct omap_chan *c = to_omap_dma_chan(chan); struct virt_dma_desc *vd; enum dma_status ret; - uint32_t ccr; unsigned long flags; - ccr = omap_dma_chan_read(c, CCR); - /* The channel is no longer active, handle the completion right away */ - if (!(ccr & CCR_ENABLE)) - omap_dma_callback(c->dma_ch, 0, c); - ret = dma_cookie_status(chan, cookie, txstate); + + if (!c->paused && c->running) { + uint32_t ccr = omap_dma_chan_read(c, CCR); + /* + * The channel is no longer active, set the return value + * accordingly + */ + if (!(ccr & CCR_ENABLE)) + ret = DMA_COMPLETE; + } + if (ret == DMA_COMPLETE || !txstate) return ret; @@ -945,9 +955,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy( d->ccr = c->ccr; d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC; - d->cicr = CICR_DROP_IE; - if (tx_flags & DMA_PREP_INTERRUPT) - d->cicr |= CICR_FRAME_IE; + d->cicr = CICR_DROP_IE | CICR_FRAME_IE; d->csdp = data_type; diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c index 0ee0321868d3..ef67f278e076 100644 --- a/drivers/dma/xilinx/xilinx_vdma.c +++ b/drivers/dma/xilinx/xilinx_vdma.c @@ -1236,7 +1236,7 @@ static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec, struct xilinx_vdma_device *xdev = ofdma->of_dma_data; int chan_id = dma_spec->args[0]; - if (chan_id >= XILINX_VDMA_MAX_CHANS_PER_DEVICE) + if (chan_id >= XILINX_VDMA_MAX_CHANS_PER_DEVICE || !xdev->chan[chan_id]) return NULL; return dma_get_slave_channel(&xdev->chan[chan_id]->common); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 93f0d4120289..468447aff8eb 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -362,6 +362,7 @@ struct sbridge_pvt { /* Memory type detection */ bool is_mirrored, is_lockstep, is_close_pg; + bool is_chan_hash; /* Fifo double buffers */ struct mce mce_entry[MCE_LOG_LEN]; @@ -1060,6 +1061,20 @@ static inline u8 sad_pkg_ha(u8 pkg) return (pkg >> 2) & 0x1; } +static int haswell_chan_hash(int idx, u64 addr) +{ + int i; + + /* + * XOR even bits from 12:26 to bit0 of idx, + * odd bits from 13:27 to bit1 + */ + for (i = 12; i < 28; i += 2) + idx ^= (addr >> i) & 3; + + return idx; +} + /**************************************************************************** Memory check routines ****************************************************************************/ @@ -1616,6 +1631,10 @@ static int get_dimm_config(struct mem_ctl_info *mci) KNL_MAX_CHANNELS : NUM_CHANNELS; u64 knl_mc_sizes[KNL_MAX_CHANNELS]; + if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) { + pci_read_config_dword(pvt->pci_ha0, HASWELL_HASYSDEFEATURE2, ®); + pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21); + } if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL || pvt->info.type == KNIGHTS_LANDING) pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, ®); @@ -2118,12 +2137,15 @@ static int get_memory_error_data(struct mem_ctl_info *mci, } ch_way = TAD_CH(reg) + 1; - sck_way = 1 << TAD_SOCK(reg); + sck_way = TAD_SOCK(reg); if (ch_way == 3) idx = addr >> 6; - else + else { idx = (addr >> (6 + sck_way + shiftup)) & 0x3; + if (pvt->is_chan_hash) + idx = haswell_chan_hash(idx, addr); + } idx = idx % ch_way; /* @@ -2157,7 +2179,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, switch(ch_way) { case 2: case 4: - sck_xch = 1 << sck_way * (ch_way >> 1); + sck_xch = (1 << sck_way) * (ch_way >> 1); break; default: sprintf(msg, "Invalid mirror set. Can't decode addr"); @@ -2193,7 +2215,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, ch_addr = addr - offset; ch_addr >>= (6 + shiftup); - ch_addr /= ch_way * sck_way; + ch_addr /= sck_xch; ch_addr <<= (6 + shiftup); ch_addr |= addr & ((1 << (6 + shiftup)) - 1); diff --git a/drivers/extcon/extcon-palmas.c b/drivers/extcon/extcon-palmas.c index 841a4b586395..8b3226dca1d9 100644 --- a/drivers/extcon/extcon-palmas.c +++ b/drivers/extcon/extcon-palmas.c @@ -348,8 +348,7 @@ static int palmas_usb_probe(struct platform_device *pdev) palmas_vbus_irq_handler, IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING | - IRQF_ONESHOT | - IRQF_EARLY_RESUME, + IRQF_ONESHOT, "palmas_usb_vbus", palmas_usb); if (status < 0) { diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index aa1f743152a2..8714f8c271ba 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -203,7 +203,19 @@ void __init efi_init(void) reserve_regions(); early_memunmap(memmap.map, params.mmap_size); - memblock_mark_nomap(params.mmap & PAGE_MASK, - PAGE_ALIGN(params.mmap_size + - (params.mmap & ~PAGE_MASK))); + + if (IS_ENABLED(CONFIG_ARM)) { + /* + * ARM currently does not allow ioremap_cache() to be called on + * memory regions that are covered by struct page. So remove the + * UEFI memory map from the linear mapping. + */ + memblock_mark_nomap(params.mmap & PAGE_MASK, + PAGE_ALIGN(params.mmap_size + + (params.mmap & ~PAGE_MASK))); + } else { + memblock_reserve(params.mmap & PAGE_MASK, + PAGE_ALIGN(params.mmap_size + + (params.mmap & ~PAGE_MASK))); + } } diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c index 11bfee8b79a9..b5d05807e6ec 100644 --- a/drivers/firmware/psci.c +++ b/drivers/firmware/psci.c @@ -360,7 +360,7 @@ static struct cpuidle_ops psci_cpuidle_ops __initdata = { .init = psci_dt_cpu_init_idle, }; -CPUIDLE_METHOD_OF_DECLARE(psci, "arm,psci", &psci_cpuidle_ops); +CPUIDLE_METHOD_OF_DECLARE(psci, "psci", &psci_cpuidle_ops); #endif #endif diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index d0d3065a7557..e66084c295fb 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -18,6 +18,7 @@ #include <linux/i2c.h> #include <linux/platform_data/pca953x.h> #include <linux/slab.h> +#include <asm/unaligned.h> #include <linux/of_platform.h> #include <linux/acpi.h> @@ -159,7 +160,7 @@ static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val) switch (chip->chip_type) { case PCA953X_TYPE: ret = i2c_smbus_write_word_data(chip->client, - reg << 1, (u16) *val); + reg << 1, cpu_to_le16(get_unaligned((u16 *)val))); break; case PCA957X_TYPE: ret = i2c_smbus_write_byte_data(chip->client, reg << 1, diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index b2b7b78664b8..76ac906b4d78 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -283,8 +283,8 @@ static int pxa_gpio_direction_output(struct gpio_chip *chip, writel_relaxed(mask, base + (value ? GPSR_OFFSET : GPCR_OFFSET)); ret = pinctrl_gpio_direction_output(chip->base + offset); - if (!ret) - return 0; + if (ret) + return ret; spin_lock_irqsave(&gpio_lock, flags); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 72065532c1c7..b747c76fd2b1 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -68,6 +68,7 @@ LIST_HEAD(gpio_devices); static void gpiochip_free_hogs(struct gpio_chip *chip); static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); +static bool gpiolib_initialized; static inline void desc_set_label(struct gpio_desc *d, const char *label) { @@ -440,9 +441,63 @@ static void gpiodevice_release(struct device *dev) cdev_del(&gdev->chrdev); list_del(&gdev->list); ida_simple_remove(&gpio_ida, gdev->id); + kfree(gdev->label); + kfree(gdev->descs); kfree(gdev); } +static int gpiochip_setup_dev(struct gpio_device *gdev) +{ + int status; + + cdev_init(&gdev->chrdev, &gpio_fileops); + gdev->chrdev.owner = THIS_MODULE; + gdev->chrdev.kobj.parent = &gdev->dev.kobj; + gdev->dev.devt = MKDEV(MAJOR(gpio_devt), gdev->id); + status = cdev_add(&gdev->chrdev, gdev->dev.devt, 1); + if (status < 0) + chip_warn(gdev->chip, "failed to add char device %d:%d\n", + MAJOR(gpio_devt), gdev->id); + else + chip_dbg(gdev->chip, "added GPIO chardev (%d:%d)\n", + MAJOR(gpio_devt), gdev->id); + status = device_add(&gdev->dev); + if (status) + goto err_remove_chardev; + + status = gpiochip_sysfs_register(gdev); + if (status) + goto err_remove_device; + + /* From this point, the .release() function cleans up gpio_device */ + gdev->dev.release = gpiodevice_release; + get_device(&gdev->dev); + pr_debug("%s: registered GPIOs %d to %d on device: %s (%s)\n", + __func__, gdev->base, gdev->base + gdev->ngpio - 1, + dev_name(&gdev->dev), gdev->chip->label ? : "generic"); + + return 0; + +err_remove_device: + device_del(&gdev->dev); +err_remove_chardev: + cdev_del(&gdev->chrdev); + return status; +} + +static void gpiochip_setup_devs(void) +{ + struct gpio_device *gdev; + int err; + + list_for_each_entry(gdev, &gpio_devices, list) { + err = gpiochip_setup_dev(gdev); + if (err) + pr_err("%s: Failed to initialize gpio device (%d)\n", + dev_name(&gdev->dev), err); + } +} + /** * gpiochip_add_data() - register a gpio_chip * @chip: the chip to register, with chip->base initialized @@ -457,6 +512,9 @@ static void gpiodevice_release(struct device *dev) * the gpio framework's arch_initcall(). Otherwise sysfs initialization * for GPIOs will fail rudely. * + * gpiochip_add_data() must only be called after gpiolib initialization, + * ie after core_initcall(). + * * If chip->base is negative, this requests dynamic assignment of * a range of valid GPIOs. */ @@ -504,8 +562,7 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data) else gdev->owner = THIS_MODULE; - gdev->descs = devm_kcalloc(&gdev->dev, chip->ngpio, - sizeof(gdev->descs[0]), GFP_KERNEL); + gdev->descs = kcalloc(chip->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL); if (!gdev->descs) { status = -ENOMEM; goto err_free_gdev; @@ -514,16 +571,16 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data) if (chip->ngpio == 0) { chip_err(chip, "tried to insert a GPIO chip with zero lines\n"); status = -EINVAL; - goto err_free_gdev; + goto err_free_descs; } if (chip->label) - gdev->label = devm_kstrdup(&gdev->dev, chip->label, GFP_KERNEL); + gdev->label = kstrdup(chip->label, GFP_KERNEL); else - gdev->label = devm_kstrdup(&gdev->dev, "unknown", GFP_KERNEL); + gdev->label = kstrdup("unknown", GFP_KERNEL); if (!gdev->label) { status = -ENOMEM; - goto err_free_gdev; + goto err_free_descs; } gdev->ngpio = chip->ngpio; @@ -543,7 +600,7 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data) if (base < 0) { status = base; spin_unlock_irqrestore(&gpio_lock, flags); - goto err_free_gdev; + goto err_free_label; } /* * TODO: it should not be necessary to reflect the assigned @@ -558,7 +615,7 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data) status = gpiodev_add_to_list(gdev); if (status) { spin_unlock_irqrestore(&gpio_lock, flags); - goto err_free_gdev; + goto err_free_label; } for (i = 0; i < chip->ngpio; i++) { @@ -596,39 +653,16 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data) * we get a device node entry in sysfs under * /sys/bus/gpio/devices/gpiochipN/dev that can be used for * coldplug of device nodes and other udev business. + * We can do this only if gpiolib has been initialized. + * Otherwise, defer until later. */ - cdev_init(&gdev->chrdev, &gpio_fileops); - gdev->chrdev.owner = THIS_MODULE; - gdev->chrdev.kobj.parent = &gdev->dev.kobj; - gdev->dev.devt = MKDEV(MAJOR(gpio_devt), gdev->id); - status = cdev_add(&gdev->chrdev, gdev->dev.devt, 1); - if (status < 0) - chip_warn(chip, "failed to add char device %d:%d\n", - MAJOR(gpio_devt), gdev->id); - else - chip_dbg(chip, "added GPIO chardev (%d:%d)\n", - MAJOR(gpio_devt), gdev->id); - status = device_add(&gdev->dev); - if (status) - goto err_remove_chardev; - - status = gpiochip_sysfs_register(gdev); - if (status) - goto err_remove_device; - - /* From this point, the .release() function cleans up gpio_device */ - gdev->dev.release = gpiodevice_release; - get_device(&gdev->dev); - pr_debug("%s: registered GPIOs %d to %d on device: %s (%s)\n", - __func__, gdev->base, gdev->base + gdev->ngpio - 1, - dev_name(&gdev->dev), chip->label ? : "generic"); - + if (gpiolib_initialized) { + status = gpiochip_setup_dev(gdev); + if (status) + goto err_remove_chip; + } return 0; -err_remove_device: - device_del(&gdev->dev); -err_remove_chardev: - cdev_del(&gdev->chrdev); err_remove_chip: acpi_gpiochip_remove(chip); gpiochip_free_hogs(chip); @@ -637,6 +671,10 @@ err_remove_from_list: spin_lock_irqsave(&gpio_lock, flags); list_del(&gdev->list); spin_unlock_irqrestore(&gpio_lock, flags); +err_free_label: + kfree(gdev->label); +err_free_descs: + kfree(gdev->descs); err_free_gdev: ida_simple_remove(&gpio_ida, gdev->id); /* failures here can mean systems won't boot... */ @@ -2231,9 +2269,11 @@ static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, return desc; } -static struct gpio_desc *acpi_find_gpio(struct device *dev, const char *con_id, +static struct gpio_desc *acpi_find_gpio(struct device *dev, + const char *con_id, unsigned int idx, - enum gpio_lookup_flags *flags) + enum gpiod_flags flags, + enum gpio_lookup_flags *lookupflags) { struct acpi_device *adev = ACPI_COMPANION(dev); struct acpi_gpio_info info; @@ -2264,10 +2304,16 @@ static struct gpio_desc *acpi_find_gpio(struct device *dev, const char *con_id, desc = acpi_get_gpiod_by_index(adev, NULL, idx, &info); if (IS_ERR(desc)) return desc; + + if ((flags == GPIOD_OUT_LOW || flags == GPIOD_OUT_HIGH) && + info.gpioint) { + dev_dbg(dev, "refusing GpioInt() entry when doing GPIOD_OUT_* lookup\n"); + return ERR_PTR(-ENOENT); + } } if (info.polarity == GPIO_ACTIVE_LOW) - *flags |= GPIO_ACTIVE_LOW; + *lookupflags |= GPIO_ACTIVE_LOW; return desc; } @@ -2530,7 +2576,7 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev, desc = of_find_gpio(dev, con_id, idx, &lookupflags); } else if (ACPI_COMPANION(dev)) { dev_dbg(dev, "using ACPI for GPIO lookup\n"); - desc = acpi_find_gpio(dev, con_id, idx, &lookupflags); + desc = acpi_find_gpio(dev, con_id, idx, flags, &lookupflags); } } @@ -2829,6 +2875,9 @@ static int __init gpiolib_dev_init(void) if (ret < 0) { pr_err("gpiolib: failed to allocate char dev region\n"); bus_unregister(&gpio_bus_type); + } else { + gpiolib_initialized = true; + gpiochip_setup_devs(); } return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 62a778012fe0..1bcbade479dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1591,6 +1591,7 @@ struct amdgpu_uvd { struct amdgpu_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; + unsigned fw_version; void *saved_bo; atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; @@ -2034,6 +2035,7 @@ struct amdgpu_device { /* tracking pinned memory */ u64 vram_pin_size; + u64 invisible_pin_size; u64 gart_pin_size; /* amdkfd interface */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index d6b0bff510aa..b7b583c42ea8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -425,6 +425,10 @@ static int acp_resume(void *handle) struct acp_pm_domain *apd; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* return early if no ACP */ + if (!adev->acp.acp_genpd) + return 0; + /* SMU block will power on ACP irrespective of ACP runtime status. * Power off explicitly based on genpd ACP runtime status so that ACP * hw and ACP-genpd status are in sync. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 598eb0cd5aab..b04337de65d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -303,7 +303,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file fw_info.feature = adev->vce.fb_version; break; case AMDGPU_INFO_FW_UVD: - fw_info.ver = 0; + fw_info.ver = adev->uvd.fw_version; fw_info.feature = 0; break; case AMDGPU_INFO_FW_GMC: @@ -384,7 +384,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file vram_gtt.vram_size = adev->mc.real_vram_size; vram_gtt.vram_size -= adev->vram_pin_size; vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size; - vram_gtt.vram_cpu_accessible_size -= adev->vram_pin_size; + vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size); vram_gtt.gtt_size = adev->mc.gtt_size; vram_gtt.gtt_size -= adev->gart_pin_size; return copy_to_user(out, &vram_gtt, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 8d432e6901af..81bd964d3dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -53,7 +53,7 @@ struct amdgpu_hpd; #define AMDGPU_MAX_HPD_PINS 6 #define AMDGPU_MAX_CRTCS 6 -#define AMDGPU_MAX_AFMT_BLOCKS 7 +#define AMDGPU_MAX_AFMT_BLOCKS 9 enum amdgpu_rmx_type { RMX_OFF, @@ -309,8 +309,8 @@ struct amdgpu_mode_info { struct atom_context *atom_context; struct card_info *atom_card_info; bool mode_config_initialized; - struct amdgpu_crtc *crtcs[6]; - struct amdgpu_afmt *afmt[7]; + struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS]; + struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS]; /* DVI-I properties */ struct drm_property *coherent_mode_property; /* DAC enable load detect */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5b6639faa731..e557fc1f17c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -424,9 +424,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, bo->pin_count = 1; if (gpu_addr != NULL) *gpu_addr = amdgpu_bo_gpu_offset(bo); - if (domain == AMDGPU_GEM_DOMAIN_VRAM) + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { bo->adev->vram_pin_size += amdgpu_bo_size(bo); - else + if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) + bo->adev->invisible_pin_size += amdgpu_bo_size(bo); + } else bo->adev->gart_pin_size += amdgpu_bo_size(bo); } else { dev_err(bo->adev->dev, "%p pin failed\n", bo); @@ -456,9 +458,11 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) } r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (likely(r == 0)) { - if (bo->tbo.mem.mem_type == TTM_PL_VRAM) + if (bo->tbo.mem.mem_type == TTM_PL_VRAM) { bo->adev->vram_pin_size -= amdgpu_bo_size(bo); - else + if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) + bo->adev->invisible_pin_size -= amdgpu_bo_size(bo); + } else bo->adev->gart_pin_size -= amdgpu_bo_size(bo); } else { dev_err(bo->adev->dev, "%p validate failed for unpin\n", bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 6f3369de232f..11af4492b4be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -223,6 +223,8 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct amdgpu_bo *rbo = container_of(bo, struct amdgpu_bo, tbo); + if (amdgpu_ttm_tt_get_usermm(bo->ttm)) + return -EPERM; return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 338da80006b6..871018c634e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -158,6 +158,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", version_major, version_minor, family_id); + adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | + (family_id << 8)); + bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) + AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE; r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true, @@ -255,6 +258,8 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (i == AMDGPU_MAX_UVD_HANDLES) return 0; + cancel_delayed_work_sync(&adev->uvd.idle_work); + size = amdgpu_bo_size(adev->uvd.vcpu_bo); ptr = adev->uvd.cpu_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 4bec0c108cea..481a64fa9b47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -234,6 +234,7 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev) if (i == AMDGPU_MAX_VCE_HANDLES) return 0; + cancel_delayed_work_sync(&adev->vce.idle_work); /* TODO: suspending running encoding sessions isn't supported */ return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index b6f7d7bff929..0f14199cf716 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -307,7 +307,7 @@ static int tonga_ih_sw_fini(void *handle) amdgpu_irq_fini(adev); amdgpu_ih_ring_fini(adev); - amdgpu_irq_add_domain(adev); + amdgpu_irq_remove_domain(adev); return 0; } diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 27fbd79d0daf..e17fbdaf874b 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1672,13 +1672,19 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr, u8 sinks[DRM_DP_MAX_SDP_STREAMS]; int i; + port = drm_dp_get_validated_port_ref(mgr, port); + if (!port) + return -EINVAL; + port_num = port->port_num; mstb = drm_dp_get_validated_mstb_ref(mgr, port->parent); if (!mstb) { mstb = drm_dp_get_last_connected_port_and_mstb(mgr, port->parent, &port_num); - if (!mstb) + if (!mstb) { + drm_dp_put_port(port); return -EINVAL; + } } txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL); @@ -1707,6 +1713,7 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr, kfree(txmsg); fail_put: drm_dp_put_mst_branch_device(mstb); + drm_dp_put_port(port); return ret; } diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 414d7f61aa05..558ef9fc39e6 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -205,7 +205,7 @@ static const struct drm_display_mode drm_dmt_modes[] = { DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 0x0f - 1024x768@43Hz, interlace */ { DRM_MODE("1024x768i", DRM_MODE_TYPE_DRIVER, 44900, 1024, 1032, - 1208, 1264, 0, 768, 768, 772, 817, 0, + 1208, 1264, 0, 768, 768, 776, 817, 0, DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_INTERLACE) }, /* 0x10 - 1024x768@60Hz */ @@ -522,12 +522,12 @@ static const struct drm_display_mode edid_est_modes[] = { 720, 840, 0, 480, 481, 484, 500, 0, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@75Hz */ { DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 31500, 640, 664, - 704, 832, 0, 480, 489, 491, 520, 0, + 704, 832, 0, 480, 489, 492, 520, 0, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@72Hz */ { DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 30240, 640, 704, 768, 864, 0, 480, 483, 486, 525, 0, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@67Hz */ - { DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25200, 640, 656, + { DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656, 752, 800, 0, 480, 490, 492, 525, 0, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@60Hz */ { DRM_MODE("720x400", DRM_MODE_TYPE_DRIVER, 35500, 720, 738, @@ -539,7 +539,7 @@ static const struct drm_display_mode edid_est_modes[] = { { DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 135000, 1280, 1296, 1440, 1688, 0, 1024, 1025, 1028, 1066, 0, DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 1280x1024@75Hz */ - { DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 78800, 1024, 1040, + { DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 78750, 1024, 1040, 1136, 1312, 0, 768, 769, 772, 800, 0, DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 1024x768@75Hz */ { DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 75000, 1024, 1048, @@ -2241,7 +2241,7 @@ drm_est3_modes(struct drm_connector *connector, struct detailed_timing *timing) { int i, j, m, modes = 0; struct drm_display_mode *mode; - u8 *est = ((u8 *)timing) + 5; + u8 *est = ((u8 *)timing) + 6; for (i = 0; i < 6; i++) { for (j = 7; j >= 0; j--) { diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index f17d39279596..baddf33fb475 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -94,7 +94,7 @@ comment "Sub-drivers" config DRM_EXYNOS_G2D bool "G2D" - depends on !VIDEO_SAMSUNG_S5P_G2D + depends on VIDEO_SAMSUNG_S5P_G2D=n select FRAME_VECTOR help Choose this option if you want to use Exynos G2D for DRM. diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile index 968b31c522b2..23d2f958739b 100644 --- a/drivers/gpu/drm/exynos/Makefile +++ b/drivers/gpu/drm/exynos/Makefile @@ -2,10 +2,10 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -exynosdrm-y := exynos_drm_drv.o exynos_drm_crtc.o exynos_drm_fbdev.o \ - exynos_drm_fb.o exynos_drm_gem.o exynos_drm_core.o \ - exynos_drm_plane.o +exynosdrm-y := exynos_drm_drv.o exynos_drm_crtc.o exynos_drm_fb.o \ + exynos_drm_gem.o exynos_drm_core.o exynos_drm_plane.o +exynosdrm-$(CONFIG_DRM_FBDEV_EMULATION) += exynos_drm_fbdev.o exynosdrm-$(CONFIG_DRM_EXYNOS_IOMMU) += exynos_drm_iommu.o exynosdrm-$(CONFIG_DRM_EXYNOS_FIMD) += exynos_drm_fimd.o exynosdrm-$(CONFIG_DRM_EXYNOS5433_DECON) += exynos5433_drm_decon.o diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c index 7f55ba6771c6..011211e4167d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_core.c +++ b/drivers/gpu/drm/exynos/exynos_drm_core.c @@ -101,7 +101,7 @@ int exynos_drm_subdrv_open(struct drm_device *dev, struct drm_file *file) return 0; err: - list_for_each_entry_reverse(subdrv, &subdrv->list, list) { + list_for_each_entry_continue_reverse(subdrv, &exynos_drm_subdrv_list, list) { if (subdrv->close) subdrv->close(dev, subdrv->dev, file); } diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index d614194644c8..81cc5537cf25 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -199,17 +199,6 @@ dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer *fb, int index) return exynos_fb->dma_addr[index]; } -static void exynos_drm_output_poll_changed(struct drm_device *dev) -{ - struct exynos_drm_private *private = dev->dev_private; - struct drm_fb_helper *fb_helper = private->fb_helper; - - if (fb_helper) - drm_fb_helper_hotplug_event(fb_helper); - else - exynos_drm_fbdev_init(dev); -} - static const struct drm_mode_config_funcs exynos_drm_mode_config_funcs = { .fb_create = exynos_user_fb_create, .output_poll_changed = exynos_drm_output_poll_changed, diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c index 4ae860c44f1d..72d7c0b7c216 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c @@ -317,3 +317,14 @@ void exynos_drm_fbdev_restore_mode(struct drm_device *dev) drm_fb_helper_restore_fbdev_mode_unlocked(private->fb_helper); } + +void exynos_drm_output_poll_changed(struct drm_device *dev) +{ + struct exynos_drm_private *private = dev->dev_private; + struct drm_fb_helper *fb_helper = private->fb_helper; + + if (fb_helper) + drm_fb_helper_hotplug_event(fb_helper); + else + exynos_drm_fbdev_init(dev); +} diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h index e16d7f0ae192..330eef87f718 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h @@ -15,9 +15,30 @@ #ifndef _EXYNOS_DRM_FBDEV_H_ #define _EXYNOS_DRM_FBDEV_H_ +#ifdef CONFIG_DRM_FBDEV_EMULATION + int exynos_drm_fbdev_init(struct drm_device *dev); -int exynos_drm_fbdev_reinit(struct drm_device *dev); void exynos_drm_fbdev_fini(struct drm_device *dev); void exynos_drm_fbdev_restore_mode(struct drm_device *dev); +void exynos_drm_output_poll_changed(struct drm_device *dev); + +#else + +static inline int exynos_drm_fbdev_init(struct drm_device *dev) +{ + return 0; +} + +static inline void exynos_drm_fbdev_fini(struct drm_device *dev) +{ +} + +static inline void exynos_drm_fbdev_restore_mode(struct drm_device *dev) +{ +} + +#define exynos_drm_output_poll_changed (NULL) + +#endif #endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 51d484ae9f49..018449f8d557 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -888,7 +888,7 @@ static void fimd_dp_clock_enable(struct exynos_drm_crtc *crtc, bool enable) * clock. On these SoCs the bootloader may enable it but any * power domain off/on will reset it to disable state. */ - if (ctx->driver_data != &exynos5_fimd_driver_data || + if (ctx->driver_data != &exynos5_fimd_driver_data && ctx->driver_data != &exynos5420_fimd_driver_data) return; diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c index 9869d70e9e54..a0def0be6d65 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_mic.c +++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c @@ -129,7 +129,7 @@ static void mic_set_path(struct exynos_mic *mic, bool enable) } else val &= ~(MIC0_RGB_MUX | MIC0_I80_MUX | MIC0_ON_MUX); - regmap_write(mic->sysreg, DSD_CFG_MUX, val); + ret = regmap_write(mic->sysreg, DSD_CFG_MUX, val); if (ret) DRM_ERROR("mic: Failed to read system register\n"); } @@ -457,6 +457,7 @@ static int exynos_mic_probe(struct platform_device *pdev) "samsung,disp-syscon"); if (IS_ERR(mic->sysreg)) { DRM_ERROR("mic: Failed to get system register.\n"); + ret = PTR_ERR(mic->sysreg); goto err; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c index d86227236f55..50185ac347b2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_plane.c +++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c @@ -11,9 +11,10 @@ #include <drm/drmP.h> -#include <drm/exynos_drm.h> -#include <drm/drm_plane_helper.h> +#include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_plane_helper.h> +#include <drm/exynos_drm.h> #include "exynos_drm_drv.h" #include "exynos_drm_crtc.h" #include "exynos_drm_fb.h" @@ -57,11 +58,12 @@ static int exynos_plane_get_size(int start, unsigned length, unsigned last) } static void exynos_plane_mode_set(struct exynos_drm_plane_state *exynos_state) - { struct drm_plane_state *state = &exynos_state->base; - struct drm_crtc *crtc = exynos_state->base.crtc; - struct drm_display_mode *mode = &crtc->state->adjusted_mode; + struct drm_crtc *crtc = state->crtc; + struct drm_crtc_state *crtc_state = + drm_atomic_get_existing_crtc_state(state->state, crtc); + struct drm_display_mode *mode = &crtc_state->adjusted_mode; int crtc_x, crtc_y; unsigned int crtc_w, crtc_h; unsigned int src_x, src_y; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 20e82008b8b6..30798cbc6fc0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -758,10 +758,10 @@ static int i915_drm_resume(struct drm_device *dev) dev_priv->display.hpd_irq_setup(dev); spin_unlock_irq(&dev_priv->irq_lock); - intel_display_resume(dev); - intel_dp_mst_resume(dev); + intel_display_resume(dev); + /* * ... but also need to make sure that hotplug processing * doesn't cause havoc. Like in the driver load code we don't diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 10480939159c..daba7ebb9699 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2634,8 +2634,9 @@ struct drm_i915_cmd_table { /* WaRsDisableCoarsePowerGating:skl,bxt */ #define NEEDS_WaRsDisableCoarsePowerGating(dev) (IS_BXT_REVID(dev, 0, BXT_REVID_A1) || \ - ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && \ - IS_SKL_REVID(dev, 0, SKL_REVID_F0))) + IS_SKL_GT3(dev) || \ + IS_SKL_GT4(dev)) + /* * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts * even when in MSI mode. This results in spurious interrupt warnings if the diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 18ba8139e922..4d30b60defda 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -501,19 +501,24 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) if (pvec != NULL) { struct mm_struct *mm = obj->userptr.mm->mm; - down_read(&mm->mmap_sem); - while (pinned < npages) { - ret = get_user_pages_remote(work->task, mm, - obj->userptr.ptr + pinned * PAGE_SIZE, - npages - pinned, - !obj->userptr.read_only, 0, - pvec + pinned, NULL); - if (ret < 0) - break; - - pinned += ret; + ret = -EFAULT; + if (atomic_inc_not_zero(&mm->mm_users)) { + down_read(&mm->mmap_sem); + while (pinned < npages) { + ret = get_user_pages_remote + (work->task, mm, + obj->userptr.ptr + pinned * PAGE_SIZE, + npages - pinned, + !obj->userptr.read_only, 0, + pvec + pinned, NULL); + if (ret < 0) + break; + + pinned += ret; + } + up_read(&mm->mmap_sem); + mmput(mm); } - up_read(&mm->mmap_sem); } mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d1a46ef5ab3f..1c212205d0e7 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1829,7 +1829,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) /* IRQs are synced during runtime_suspend, we don't require a wakeref */ disable_rpm_wakeref_asserts(dev_priv); - for (;;) { + do { master_ctl = I915_READ(GEN8_MASTER_IRQ) & ~GEN8_MASTER_IRQ_CONTROL; iir = I915_READ(VLV_IIR); @@ -1857,7 +1857,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) I915_WRITE(GEN8_MASTER_IRQ, DE_MASTER_IRQ_CONTROL); POSTING_READ(GEN8_MASTER_IRQ); - } + } while (0); enable_rpm_wakeref_asserts(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index a2bd698fe2f7..937e77228466 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -506,6 +506,8 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_device *dev = connector->dev; + intel_connector->unregister(intel_connector); + /* need to nuke the connector */ drm_modeset_lock_all(dev); if (connector->state->crtc) { @@ -519,11 +521,7 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, WARN(ret, "Disabling mst crtc failed with %i\n", ret); } - drm_modeset_unlock_all(dev); - intel_connector->unregister(intel_connector); - - drm_modeset_lock_all(dev); intel_connector_remove_from_fbdev(intel_connector); drm_connector_cleanup(connector); drm_modeset_unlock_all(dev); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6a978ce80244..5c6080fd0968 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -841,11 +841,11 @@ static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes) if (unlikely(total_bytes > remain_usable)) { /* * The base request will fit but the reserved space - * falls off the end. So only need to to wait for the - * reserved size after flushing out the remainder. + * falls off the end. So don't need an immediate wrap + * and only need to effectively wait for the reserved + * size space from the start of ringbuffer. */ wait_bytes = remain_actual + ringbuf->reserved_size; - need_wrap = true; } else if (total_bytes > ringbuf->space) { /* No wrapping required, just waiting. */ wait_bytes = total_bytes; @@ -1913,15 +1913,18 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request) struct intel_ringbuffer *ringbuf = request->ringbuf; int ret; - ret = intel_logical_ring_begin(request, 6 + WA_TAIL_DWORDS); + ret = intel_logical_ring_begin(request, 8 + WA_TAIL_DWORDS); if (ret) return ret; + /* We're using qword write, seqno should be aligned to 8 bytes. */ + BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); + /* w/a for post sync ops following a GPGPU operation we * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(5)); + intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); intel_logical_ring_emit(ringbuf, (PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | @@ -1929,7 +1932,10 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request) intel_logical_ring_emit(ringbuf, hws_seqno_address(request->ring)); intel_logical_ring_emit(ringbuf, 0); intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request)); + /* We're thrashing one dword of HWS. */ + intel_logical_ring_emit(ringbuf, 0); intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); + intel_logical_ring_emit(ringbuf, MI_NOOP); return intel_logical_ring_advance_and_submit(request); } diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 30a8403a8f4f..cd9fe609aefb 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -478,11 +478,8 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val, * and as part of the cleanup in the hw state restore we also redisable * the vga plane. */ - if (!HAS_PCH_SPLIT(dev)) { - drm_modeset_lock_all(dev); + if (!HAS_PCH_SPLIT(dev)) intel_display_resume(dev); - drm_modeset_unlock_all(dev); - } dev_priv->modeset_restore = MODESET_DONE; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 347d4df49a9b..8ed3cf34f82d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2876,25 +2876,28 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, const struct drm_plane_state *pstate, int y) { - struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); + struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); struct drm_framebuffer *fb = pstate->fb; + uint32_t width = 0, height = 0; + + width = drm_rect_width(&intel_pstate->src) >> 16; + height = drm_rect_height(&intel_pstate->src) >> 16; + + if (intel_rotation_90_or_270(pstate->rotation)) + swap(width, height); /* for planar format */ if (fb->pixel_format == DRM_FORMAT_NV12) { if (y) /* y-plane data rate */ - return intel_crtc->config->pipe_src_w * - intel_crtc->config->pipe_src_h * + return width * height * drm_format_plane_cpp(fb->pixel_format, 0); else /* uv-plane data rate */ - return (intel_crtc->config->pipe_src_w/2) * - (intel_crtc->config->pipe_src_h/2) * + return (width / 2) * (height / 2) * drm_format_plane_cpp(fb->pixel_format, 1); } /* for packed formats */ - return intel_crtc->config->pipe_src_w * - intel_crtc->config->pipe_src_h * - drm_format_plane_cpp(fb->pixel_format, 0); + return width * height * drm_format_plane_cpp(fb->pixel_format, 0); } /* @@ -2973,8 +2976,9 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, struct drm_framebuffer *fb = plane->state->fb; int id = skl_wm_plane_id(intel_plane); - if (fb == NULL) + if (!to_intel_plane_state(plane->state)->visible) continue; + if (plane->type == DRM_PLANE_TYPE_CURSOR) continue; @@ -3000,7 +3004,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, uint16_t plane_blocks, y_plane_blocks = 0; int id = skl_wm_plane_id(intel_plane); - if (pstate->fb == NULL) + if (!to_intel_plane_state(pstate)->visible) continue; if (plane->type == DRM_PLANE_TYPE_CURSOR) continue; @@ -3123,26 +3127,36 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv, { struct drm_plane *plane = &intel_plane->base; struct drm_framebuffer *fb = plane->state->fb; + struct intel_plane_state *intel_pstate = + to_intel_plane_state(plane->state); uint32_t latency = dev_priv->wm.skl_latency[level]; uint32_t method1, method2; uint32_t plane_bytes_per_line, plane_blocks_per_line; uint32_t res_blocks, res_lines; uint32_t selected_result; uint8_t cpp; + uint32_t width = 0, height = 0; - if (latency == 0 || !cstate->base.active || !fb) + if (latency == 0 || !cstate->base.active || !intel_pstate->visible) return false; + width = drm_rect_width(&intel_pstate->src) >> 16; + height = drm_rect_height(&intel_pstate->src) >> 16; + + if (intel_rotation_90_or_270(plane->state->rotation)) + swap(width, height); + cpp = drm_format_plane_cpp(fb->pixel_format, 0); method1 = skl_wm_method1(skl_pipe_pixel_rate(cstate), cpp, latency); method2 = skl_wm_method2(skl_pipe_pixel_rate(cstate), cstate->base.adjusted_mode.crtc_htotal, - cstate->pipe_src_w, - cpp, fb->modifier[0], + width, + cpp, + fb->modifier[0], latency); - plane_bytes_per_line = cstate->pipe_src_w * cpp; + plane_bytes_per_line = width * cpp; plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 45ce45a5e122..9121646d7c4d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -968,7 +968,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) /* WaForceContextSaveRestoreNonCoherent:skl,bxt */ tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT; - if (IS_SKL_REVID(dev, SKL_REVID_F0, SKL_REVID_F0) || + if (IS_SKL_REVID(dev, SKL_REVID_F0, REVID_FOREVER) || IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER)) tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE; WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp); @@ -1085,7 +1085,8 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(HIZ_CHICKEN, BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); - if (IS_SKL_REVID(dev, 0, SKL_REVID_F0)) { + /* This is tied to WaForceContextSaveRestoreNonCoherent */ + if (IS_SKL_REVID(dev, 0, REVID_FOREVER)) { /* *Use Force Non-Coherent whenever executing a 3D context. This * is a workaround for a possible hang in the unlikely event @@ -2090,10 +2091,12 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj = ringbuf->obj; + /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ + unsigned flags = PIN_OFFSET_BIAS | 4096; int ret; if (HAS_LLC(dev_priv) && !obj->stolen) { - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 0); + ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags); if (ret) return ret; @@ -2109,7 +2112,8 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev, return -ENOMEM; } } else { - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); + ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, + flags | PIN_MAPPABLE); if (ret) return ret; @@ -2454,11 +2458,11 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes) if (unlikely(total_bytes > remain_usable)) { /* * The base request will fit but the reserved space - * falls off the end. So only need to to wait for the - * reserved size after flushing out the remainder. + * falls off the end. So don't need an immediate wrap + * and only need to effectively wait for the reserved + * size space from the start of ringbuffer. */ wait_bytes = remain_actual + ringbuf->reserved_size; - need_wrap = true; } else if (total_bytes > ringbuf->space) { /* No wrapping required, just waiting. */ wait_bytes = total_bytes; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 436d8f2b8682..68b6f69aa682 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1189,7 +1189,11 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev) } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_thread_status; - dev_priv->uncore.funcs.force_wake_put = fw_domains_put; + if (IS_HASWELL(dev)) + dev_priv->uncore.funcs.force_wake_put = + fw_domains_put_with_fifo; + else + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_MT, FORCEWAKE_ACK_HSW); } else if (IS_IVYBRIDGE(dev)) { diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index ae96ebc490fb..e81aefe5ffa7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1276,18 +1276,18 @@ nouveau_connector_create(struct drm_device *dev, int index) break; default: if (disp->dithering_mode) { + nv_connector->dithering_mode = DITHERING_MODE_AUTO; drm_object_attach_property(&connector->base, disp->dithering_mode, nv_connector-> dithering_mode); - nv_connector->dithering_mode = DITHERING_MODE_AUTO; } if (disp->dithering_depth) { + nv_connector->dithering_depth = DITHERING_DEPTH_AUTO; drm_object_attach_property(&connector->base, disp->dithering_depth, nv_connector-> dithering_depth); - nv_connector->dithering_depth = DITHERING_DEPTH_AUTO; } break; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index c56a886229f1..b2de290da16f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1832,6 +1832,8 @@ gf100_gr_init(struct gf100_gr *gr) gf100_gr_mmio(gr, gr->func->mmio); + nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001); + memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); for (i = 0, gpc = -1; i < gr->tpc_total; i++) { do { diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 43e5f503d1c5..030409a3ee4e 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -375,10 +375,15 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, qxl_bo_kunmap(user_bo); + qcrtc->cur_x += qcrtc->hot_spot_x - hot_x; + qcrtc->cur_y += qcrtc->hot_spot_y - hot_y; + qcrtc->hot_spot_x = hot_x; + qcrtc->hot_spot_y = hot_y; + cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_SET; - cmd->u.set.position.x = qcrtc->cur_x; - cmd->u.set.position.y = qcrtc->cur_y; + cmd->u.set.position.x = qcrtc->cur_x + qcrtc->hot_spot_x; + cmd->u.set.position.y = qcrtc->cur_y + qcrtc->hot_spot_y; cmd->u.set.shape = qxl_bo_physical_address(qdev, cursor_bo, 0); @@ -441,8 +446,8 @@ static int qxl_crtc_cursor_move(struct drm_crtc *crtc, cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_MOVE; - cmd->u.position.x = qcrtc->cur_x; - cmd->u.position.y = qcrtc->cur_y; + cmd->u.position.x = qcrtc->cur_x + qcrtc->hot_spot_x; + cmd->u.position.y = qcrtc->cur_y + qcrtc->hot_spot_y; qxl_release_unmap(qdev, release, &cmd->release_info); qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 6e6b9b1519b8..3f3897eb458c 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -135,6 +135,8 @@ struct qxl_crtc { int index; int cur_x; int cur_y; + int hot_spot_x; + int hot_spot_y; }; struct qxl_output { diff --git a/drivers/gpu/drm/radeon/ni_reg.h b/drivers/gpu/drm/radeon/ni_reg.h index da310a70c0f0..827ccc87cbc3 100644 --- a/drivers/gpu/drm/radeon/ni_reg.h +++ b/drivers/gpu/drm/radeon/ni_reg.h @@ -109,6 +109,8 @@ #define NI_DP_MSE_SAT2 0x7398 #define NI_DP_MSE_SAT_UPDATE 0x739c +# define NI_DP_MSE_SAT_UPDATE_MASK 0x3 +# define NI_DP_MSE_16_MTP_KEEPOUT 0x100 #define NI_DIG_BE_CNTL 0x7140 # define NI_DIG_FE_SOURCE_SELECT(x) (((x) & 0x7f) << 8) diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index fd8c4d317e60..95f4fea89302 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -62,10 +62,6 @@ bool radeon_has_atpx(void) { return radeon_atpx_priv.atpx_detected; } -bool radeon_has_atpx_dgpu_power_cntl(void) { - return radeon_atpx_priv.atpx.functions.power_cntl; -} - /** * radeon_atpx_call - call an ATPX method * @@ -145,6 +141,13 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas */ static int radeon_atpx_validate(struct radeon_atpx *atpx) { + /* make sure required functions are enabled */ + /* dGPU power control is required */ + if (atpx->functions.power_cntl == false) { + printk("ATPX dGPU power cntl not present, forcing\n"); + atpx->functions.power_cntl = true; + } + if (atpx->functions.px_params) { union acpi_object *info; struct atpx_px_params output; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index cfcc099c537d..81a63d7f5cd9 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -2002,10 +2002,12 @@ radeon_add_atom_connector(struct drm_device *dev, rdev->mode_info.dither_property, RADEON_FMT_DITHER_DISABLE); - if (radeon_audio != 0) + if (radeon_audio != 0) { drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; + } if (ASIC_IS_DCE5(rdev)) drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.output_csc_property, @@ -2130,6 +2132,7 @@ radeon_add_atom_connector(struct drm_device *dev, drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; } if (connector_type == DRM_MODE_CONNECTOR_DVII) { radeon_connector->dac_load_detect = true; @@ -2185,6 +2188,7 @@ radeon_add_atom_connector(struct drm_device *dev, drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; } if (ASIC_IS_DCE5(rdev)) drm_object_attach_property(&radeon_connector->base.base, @@ -2237,6 +2241,7 @@ radeon_add_atom_connector(struct drm_device *dev, drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; } if (ASIC_IS_DCE5(rdev)) drm_object_attach_property(&radeon_connector->base.base, diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 4fd1a961012d..d0826fb0434c 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -103,12 +103,6 @@ static const char radeon_family_name[][16] = { "LAST", }; -#if defined(CONFIG_VGA_SWITCHEROO) -bool radeon_has_atpx_dgpu_power_cntl(void); -#else -static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; } -#endif - #define RADEON_PX_QUIRK_DISABLE_PX (1 << 0) #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1) @@ -1305,9 +1299,9 @@ int radeon_device_init(struct radeon_device *rdev, } rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS); - DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n", - radeon_family_name[rdev->family], pdev->vendor, pdev->device, - pdev->subsystem_vendor, pdev->subsystem_device); + DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", + radeon_family_name[rdev->family], pdev->vendor, pdev->device, + pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); /* mutex initialization are all done here so we * can recall function without having locking issues */ @@ -1439,7 +1433,7 @@ int radeon_device_init(struct radeon_device *rdev, * ignore it */ vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode); - if ((rdev->flags & RADEON_IS_PX) && radeon_has_atpx_dgpu_power_cntl()) + if (rdev->flags & RADEON_IS_PX) runtime = true; vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, runtime); if (runtime) diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c index 43cffb526b0c..de504ea29c06 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_mst.c +++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c @@ -89,8 +89,16 @@ static int radeon_dp_mst_set_stream_attrib(struct radeon_encoder *primary, WREG32(NI_DP_MSE_SAT_UPDATE + primary->offset, 1); do { + unsigned value1, value2; + udelay(10); temp = RREG32(NI_DP_MSE_SAT_UPDATE + primary->offset); - } while ((temp & 0x1) && retries++ < 10000); + + value1 = temp & NI_DP_MSE_SAT_UPDATE_MASK; + value2 = temp & NI_DP_MSE_16_MTP_KEEPOUT; + + if (!value1 && !value2) + break; + } while (retries++ < 50); if (retries == 10000) DRM_ERROR("timed out waitin for SAT update %d\n", primary->offset); @@ -150,7 +158,7 @@ static int radeon_dp_mst_update_stream_attribs(struct radeon_connector *mst_conn return 0; } -static int radeon_dp_mst_set_vcp_size(struct radeon_encoder *mst, uint32_t x, uint32_t y) +static int radeon_dp_mst_set_vcp_size(struct radeon_encoder *mst, s64 avg_time_slots_per_mtp) { struct drm_device *dev = mst->base.dev; struct radeon_device *rdev = dev->dev_private; @@ -158,6 +166,8 @@ static int radeon_dp_mst_set_vcp_size(struct radeon_encoder *mst, uint32_t x, ui uint32_t val, temp; uint32_t offset = radeon_atom_set_enc_offset(mst_enc->fe); int retries = 0; + uint32_t x = drm_fixp2int(avg_time_slots_per_mtp); + uint32_t y = drm_fixp2int_ceil((avg_time_slots_per_mtp - x) << 26); val = NI_DP_MSE_RATE_X(x) | NI_DP_MSE_RATE_Y(y); @@ -165,6 +175,7 @@ static int radeon_dp_mst_set_vcp_size(struct radeon_encoder *mst, uint32_t x, ui do { temp = RREG32(NI_DP_MSE_RATE_UPDATE + offset); + udelay(10); } while ((temp & 0x1) && (retries++ < 10000)); if (retries >= 10000) @@ -246,14 +257,8 @@ radeon_dp_mst_connector_destroy(struct drm_connector *connector) kfree(radeon_connector); } -static int radeon_connector_dpms(struct drm_connector *connector, int mode) -{ - DRM_DEBUG_KMS("\n"); - return 0; -} - static const struct drm_connector_funcs radeon_dp_mst_connector_funcs = { - .dpms = radeon_connector_dpms, + .dpms = drm_helper_connector_dpms, .detect = radeon_dp_mst_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = radeon_dp_mst_connector_destroy, @@ -394,7 +399,7 @@ radeon_mst_encoder_dpms(struct drm_encoder *encoder, int mode) struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; int ret, slots; - + s64 fixed_pbn, fixed_pbn_per_slot, avg_time_slots_per_mtp; if (!ASIC_IS_DCE5(rdev)) { DRM_ERROR("got mst dpms on non-DCE5\n"); return; @@ -456,7 +461,11 @@ radeon_mst_encoder_dpms(struct drm_encoder *encoder, int mode) mst_enc->enc_active = true; radeon_dp_mst_update_stream_attribs(radeon_connector->mst_port, primary); - radeon_dp_mst_set_vcp_size(radeon_encoder, slots, 0); + + fixed_pbn = drm_int2fixp(mst_enc->pbn); + fixed_pbn_per_slot = drm_int2fixp(radeon_connector->mst_port->mst_mgr.pbn_div); + avg_time_slots_per_mtp = drm_fixp_div(fixed_pbn, fixed_pbn_per_slot); + radeon_dp_mst_set_vcp_size(radeon_encoder, avg_time_slots_per_mtp); atombios_dig_encoder_setup2(&primary->base, ATOM_ENCODER_CMD_DP_VIDEO_ON, 0, mst_enc->fe); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 7dddfdce85e6..90f739478a1b 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -235,6 +235,8 @@ static int radeon_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct radeon_bo *rbo = container_of(bo, struct radeon_bo, tbo); + if (radeon_ttm_tt_has_userptr(bo->ttm)) + return -EPERM; return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp); } diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index af4df81c4e0c..e6abc09b67e3 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2931,6 +2931,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { { PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x148c, 0x2015, 0, 120000 }, + { PCI_VENDOR_ID_ATI, 0x6810, 0x1682, 0x9275, 0, 120000 }, { 0, 0, 0, 0 }, }; diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index bdb8cc89cacc..4f9c5c6deaed 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1979,6 +1979,9 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_3) }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K) }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_600) }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER) }, { HID_USB_DEVICE(USB_VENDOR_ID_MONTEREY, USB_DEVICE_ID_GENIUS_KB29E) }, { HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 5c0e43ed5c53..c6eaff5f8845 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -676,6 +676,7 @@ #define USB_DEVICE_ID_SIDEWINDER_GV 0x003b #define USB_DEVICE_ID_MS_OFFICE_KB 0x0048 #define USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0 0x009d +#define USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K 0x00b4 #define USB_DEVICE_ID_MS_NE4K 0x00db #define USB_DEVICE_ID_MS_NE4K_JP 0x00dc #define USB_DEVICE_ID_MS_LK6K 0x00f9 @@ -683,6 +684,8 @@ #define USB_DEVICE_ID_MS_PRESENTER_8K_USB 0x0713 #define USB_DEVICE_ID_MS_NE7K 0x071d #define USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K 0x0730 +#define USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1 0x0732 +#define USB_DEVICE_ID_MS_DIGITAL_MEDIA_600 0x0750 #define USB_DEVICE_ID_MS_COMFORT_MOUSE_4500 0x076c #define USB_DEVICE_ID_MS_COMFORT_KEYBOARD 0x00e3 #define USB_DEVICE_ID_MS_SURFACE_PRO_2 0x0799 diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 0125e356bd8d..1ac4ff4d57a6 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -184,21 +184,31 @@ static int lenovo_send_cmd_cptkbd(struct hid_device *hdev, unsigned char byte2, unsigned char byte3) { int ret; - unsigned char buf[] = {0x18, byte2, byte3}; + unsigned char *buf; + + buf = kzalloc(3, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = 0x18; + buf[1] = byte2; + buf[2] = byte3; switch (hdev->product) { case USB_DEVICE_ID_LENOVO_CUSBKBD: - ret = hid_hw_raw_request(hdev, 0x13, buf, sizeof(buf), + ret = hid_hw_raw_request(hdev, 0x13, buf, 3, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); break; case USB_DEVICE_ID_LENOVO_CBTKBD: - ret = hid_hw_output_report(hdev, buf, sizeof(buf)); + ret = hid_hw_output_report(hdev, buf, 3); break; default: ret = -EINVAL; break; } + kfree(buf); + return ret < 0 ? ret : 0; /* BT returns 0, USB returns sizeof(buf) */ } diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c index 75cd3bc59c54..e924d555536c 100644 --- a/drivers/hid/hid-microsoft.c +++ b/drivers/hid/hid-microsoft.c @@ -272,6 +272,12 @@ static const struct hid_device_id ms_devices[] = { .driver_data = MS_PRESENTER }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K), .driver_data = MS_ERGONOMY | MS_RDESC_3K }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K), + .driver_data = MS_ERGONOMY }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_600), + .driver_data = MS_ERGONOMY }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1), + .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0), .driver_data = MS_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_MOUSE_4500), diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 25d3c4330bf6..c741f5e50a66 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1169,6 +1169,7 @@ static void mt_release_contacts(struct hid_device *hid) MT_TOOL_FINGER, false); } + input_mt_sync_frame(input_dev); input_sync(input_dev); } } diff --git a/drivers/hid/hid-wiimote-modules.c b/drivers/hid/hid-wiimote-modules.c index 4390eee2ce84..c830ed39348f 100644 --- a/drivers/hid/hid-wiimote-modules.c +++ b/drivers/hid/hid-wiimote-modules.c @@ -2049,9 +2049,11 @@ static void wiimod_mp_in_mp(struct wiimote_data *wdata, const __u8 *ext) * -----+------------------------------+-----+-----+ * The single bits Yaw, Roll, Pitch in the lower right corner specify * whether the wiimote is rotating fast (0) or slow (1). Speed for slow - * roation is 440 deg/s and for fast rotation 2000 deg/s. To get a - * linear scale we multiply by 2000/440 = ~4.5454 which is 18 for fast - * and 9 for slow. + * roation is 8192/440 units / deg/s and for fast rotation 8192/2000 + * units / deg/s. To get a linear scale for fast rotation we multiply + * by 2000/440 = ~4.5454 and scale both fast and slow by 9 to match the + * previous scale reported by this driver. + * This leaves a linear scale with 8192*9/440 (~167.564) units / deg/s. * If the wiimote is not rotating the sensor reports 2^13 = 8192. * Ext specifies whether an extension is connected to the motionp. * which is parsed by wiimote-core. @@ -2070,15 +2072,15 @@ static void wiimod_mp_in_mp(struct wiimote_data *wdata, const __u8 *ext) z -= 8192; if (!(ext[3] & 0x02)) - x *= 18; + x = (x * 2000 * 9) / 440; else x *= 9; if (!(ext[4] & 0x02)) - y *= 18; + y = (y * 2000 * 9) / 440; else y *= 9; if (!(ext[3] & 0x01)) - z *= 18; + z = (z * 2000 * 9) / 440; else z *= 9; diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index ad71160b9ea4..ae83af649a60 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -951,14 +951,6 @@ static int usbhid_output_report(struct hid_device *hid, __u8 *buf, size_t count) return ret; } -static void usbhid_restart_queues(struct usbhid_device *usbhid) -{ - if (usbhid->urbout && !test_bit(HID_OUT_RUNNING, &usbhid->iofl)) - usbhid_restart_out_queue(usbhid); - if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl)) - usbhid_restart_ctrl_queue(usbhid); -} - static void hid_free_buffers(struct usb_device *dev, struct hid_device *hid) { struct usbhid_device *usbhid = hid->driver_data; @@ -1404,6 +1396,37 @@ static void hid_cease_io(struct usbhid_device *usbhid) usb_kill_urb(usbhid->urbout); } +static void hid_restart_io(struct hid_device *hid) +{ + struct usbhid_device *usbhid = hid->driver_data; + int clear_halt = test_bit(HID_CLEAR_HALT, &usbhid->iofl); + int reset_pending = test_bit(HID_RESET_PENDING, &usbhid->iofl); + + spin_lock_irq(&usbhid->lock); + clear_bit(HID_SUSPENDED, &usbhid->iofl); + usbhid_mark_busy(usbhid); + + if (clear_halt || reset_pending) + schedule_work(&usbhid->reset_work); + usbhid->retry_delay = 0; + spin_unlock_irq(&usbhid->lock); + + if (reset_pending || !test_bit(HID_STARTED, &usbhid->iofl)) + return; + + if (!clear_halt) { + if (hid_start_in(hid) < 0) + hid_io_error(hid); + } + + spin_lock_irq(&usbhid->lock); + if (usbhid->urbout && !test_bit(HID_OUT_RUNNING, &usbhid->iofl)) + usbhid_restart_out_queue(usbhid); + if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl)) + usbhid_restart_ctrl_queue(usbhid); + spin_unlock_irq(&usbhid->lock); +} + /* Treat USB reset pretty much the same as suspend/resume */ static int hid_pre_reset(struct usb_interface *intf) { @@ -1453,14 +1476,14 @@ static int hid_post_reset(struct usb_interface *intf) return 1; } + /* No need to do another reset or clear a halted endpoint */ spin_lock_irq(&usbhid->lock); clear_bit(HID_RESET_PENDING, &usbhid->iofl); + clear_bit(HID_CLEAR_HALT, &usbhid->iofl); spin_unlock_irq(&usbhid->lock); hid_set_idle(dev, intf->cur_altsetting->desc.bInterfaceNumber, 0, 0); - status = hid_start_in(hid); - if (status < 0) - hid_io_error(hid); - usbhid_restart_queues(usbhid); + + hid_restart_io(hid); return 0; } @@ -1483,25 +1506,9 @@ void usbhid_put_power(struct hid_device *hid) #ifdef CONFIG_PM static int hid_resume_common(struct hid_device *hid, bool driver_suspended) { - struct usbhid_device *usbhid = hid->driver_data; - int status; - - spin_lock_irq(&usbhid->lock); - clear_bit(HID_SUSPENDED, &usbhid->iofl); - usbhid_mark_busy(usbhid); - - if (test_bit(HID_CLEAR_HALT, &usbhid->iofl) || - test_bit(HID_RESET_PENDING, &usbhid->iofl)) - schedule_work(&usbhid->reset_work); - usbhid->retry_delay = 0; - - usbhid_restart_queues(usbhid); - spin_unlock_irq(&usbhid->lock); - - status = hid_start_in(hid); - if (status < 0) - hid_io_error(hid); + int status = 0; + hid_restart_io(hid); if (driver_suspended && hid->driver && hid->driver->resume) status = hid->driver->resume(hid); return status; @@ -1570,12 +1577,8 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message) static int hid_resume(struct usb_interface *intf) { struct hid_device *hid = usb_get_intfdata (intf); - struct usbhid_device *usbhid = hid->driver_data; int status; - if (!test_bit(HID_STARTED, &usbhid->iofl)) - return 0; - status = hid_resume_common(hid, true); dev_dbg(&intf->dev, "resume status %d\n", status); return 0; @@ -1584,10 +1587,8 @@ static int hid_resume(struct usb_interface *intf) static int hid_reset_resume(struct usb_interface *intf) { struct hid_device *hid = usb_get_intfdata(intf); - struct usbhid_device *usbhid = hid->driver_data; int status; - clear_bit(HID_SUSPENDED, &usbhid->iofl); status = hid_post_reset(intf); if (status >= 0 && hid->driver && hid->driver->reset_resume) { int ret = hid->driver->reset_resume(hid); diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 68a560957871..ccf1883318c3 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -152,6 +152,25 @@ static void wacom_feature_mapping(struct hid_device *hdev, hid_data->inputmode = field->report->id; hid_data->inputmode_index = usage->usage_index; break; + + case HID_UP_DIGITIZER: + if (field->report->id == 0x0B && + (field->application == WACOM_G9_DIGITIZER || + field->application == WACOM_G11_DIGITIZER)) { + wacom->wacom_wac.mode_report = field->report->id; + wacom->wacom_wac.mode_value = 0; + } + break; + + case WACOM_G9_PAGE: + case WACOM_G11_PAGE: + if (field->report->id == 0x03 && + (field->application == WACOM_G9_TOUCHSCREEN || + field->application == WACOM_G11_TOUCHSCREEN)) { + wacom->wacom_wac.mode_report = field->report->id; + wacom->wacom_wac.mode_value = 0; + } + break; } } @@ -322,26 +341,41 @@ static int wacom_hid_set_device_mode(struct hid_device *hdev) return 0; } -static int wacom_set_device_mode(struct hid_device *hdev, int report_id, - int length, int mode) +static int wacom_set_device_mode(struct hid_device *hdev, + struct wacom_wac *wacom_wac) { - unsigned char *rep_data; + u8 *rep_data; + struct hid_report *r; + struct hid_report_enum *re; + int length; int error = -ENOMEM, limit = 0; - rep_data = kzalloc(length, GFP_KERNEL); + if (wacom_wac->mode_report < 0) + return 0; + + re = &(hdev->report_enum[HID_FEATURE_REPORT]); + r = re->report_id_hash[wacom_wac->mode_report]; + if (!r) + return -EINVAL; + + rep_data = hid_alloc_report_buf(r, GFP_KERNEL); if (!rep_data) - return error; + return -ENOMEM; + + length = hid_report_len(r); do { - rep_data[0] = report_id; - rep_data[1] = mode; + rep_data[0] = wacom_wac->mode_report; + rep_data[1] = wacom_wac->mode_value; error = wacom_set_report(hdev, HID_FEATURE_REPORT, rep_data, length, 1); if (error >= 0) error = wacom_get_report(hdev, HID_FEATURE_REPORT, rep_data, length, 1); - } while (error >= 0 && rep_data[1] != mode && limit++ < WAC_MSG_RETRIES); + } while (error >= 0 && + rep_data[1] != wacom_wac->mode_report && + limit++ < WAC_MSG_RETRIES); kfree(rep_data); @@ -411,32 +445,41 @@ static int wacom_bt_query_tablet_data(struct hid_device *hdev, u8 speed, static int wacom_query_tablet_data(struct hid_device *hdev, struct wacom_features *features) { + struct wacom *wacom = hid_get_drvdata(hdev); + struct wacom_wac *wacom_wac = &wacom->wacom_wac; + if (hdev->bus == BUS_BLUETOOTH) return wacom_bt_query_tablet_data(hdev, 1, features); - if (features->type == HID_GENERIC) - return wacom_hid_set_device_mode(hdev); - - if (features->device_type & WACOM_DEVICETYPE_TOUCH) { - if (features->type > TABLETPC) { - /* MT Tablet PC touch */ - return wacom_set_device_mode(hdev, 3, 4, 4); - } - else if (features->type == WACOM_24HDT) { - return wacom_set_device_mode(hdev, 18, 3, 2); - } - else if (features->type == WACOM_27QHDT) { - return wacom_set_device_mode(hdev, 131, 3, 2); - } - else if (features->type == BAMBOO_PAD) { - return wacom_set_device_mode(hdev, 2, 2, 2); - } - } else if (features->device_type & WACOM_DEVICETYPE_PEN) { - if (features->type <= BAMBOO_PT) { - return wacom_set_device_mode(hdev, 2, 2, 2); + if (features->type != HID_GENERIC) { + if (features->device_type & WACOM_DEVICETYPE_TOUCH) { + if (features->type > TABLETPC) { + /* MT Tablet PC touch */ + wacom_wac->mode_report = 3; + wacom_wac->mode_value = 4; + } else if (features->type == WACOM_24HDT) { + wacom_wac->mode_report = 18; + wacom_wac->mode_value = 2; + } else if (features->type == WACOM_27QHDT) { + wacom_wac->mode_report = 131; + wacom_wac->mode_value = 2; + } else if (features->type == BAMBOO_PAD) { + wacom_wac->mode_report = 2; + wacom_wac->mode_value = 2; + } + } else if (features->device_type & WACOM_DEVICETYPE_PEN) { + if (features->type <= BAMBOO_PT) { + wacom_wac->mode_report = 2; + wacom_wac->mode_value = 2; + } } } + wacom_set_device_mode(hdev, wacom_wac); + + if (features->type == HID_GENERIC) + return wacom_hid_set_device_mode(hdev); + return 0; } @@ -1817,6 +1860,9 @@ static int wacom_probe(struct hid_device *hdev, goto fail_type; } + wacom_wac->hid_data.inputmode = -1; + wacom_wac->mode_report = -1; + wacom->usbdev = dev; wacom->intf = intf; mutex_init(&wacom->lock); diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index bd198bbd4df0..02c4efea241c 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2426,6 +2426,17 @@ void wacom_setup_device_quirks(struct wacom *wacom) } /* + * Hack for the Bamboo One: + * the device presents a PAD/Touch interface as most Bamboos and even + * sends ghosts PAD data on it. However, later, we must disable this + * ghost interface, and we can not detect it unless we set it here + * to WACOM_DEVICETYPE_PAD or WACOM_DEVICETYPE_TOUCH. + */ + if (features->type == BAMBOO_PEN && + features->pktlen == WACOM_PKGLEN_BBTOUCH3) + features->device_type |= WACOM_DEVICETYPE_PAD; + + /* * Raw Wacom-mode pen and touch events both come from interface * 0, whose HID descriptor has an application usage of 0xFF0D * (i.e., WACOM_VENDORDEFINED_PEN). We route pen packets back diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index 25baa7f29599..e2084d914c14 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -84,6 +84,12 @@ #define WACOM_DEVICETYPE_WL_MONITOR 0x0008 #define WACOM_VENDORDEFINED_PEN 0xff0d0001 +#define WACOM_G9_PAGE 0xff090000 +#define WACOM_G9_DIGITIZER (WACOM_G9_PAGE | 0x02) +#define WACOM_G9_TOUCHSCREEN (WACOM_G9_PAGE | 0x11) +#define WACOM_G11_PAGE 0xff110000 +#define WACOM_G11_DIGITIZER (WACOM_G11_PAGE | 0x02) +#define WACOM_G11_TOUCHSCREEN (WACOM_G11_PAGE | 0x11) #define WACOM_PEN_FIELD(f) (((f)->logical == HID_DG_STYLUS) || \ ((f)->physical == HID_DG_STYLUS) || \ @@ -238,6 +244,8 @@ struct wacom_wac { int ps_connected; u8 bt_features; u8 bt_high_speed; + int mode_report; + int mode_value; struct hid_data hid_data; }; diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index f325663c27c5..ba14a863b451 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -771,11 +771,16 @@ static int jz4780_i2c_probe(struct platform_device *pdev) ret = of_property_read_u32(pdev->dev.of_node, "clock-frequency", &clk_freq); if (ret) { - dev_err(&pdev->dev, "clock-frequency not specified in DT"); + dev_err(&pdev->dev, "clock-frequency not specified in DT\n"); goto err; } i2c->speed = clk_freq / 1000; + if (i2c->speed == 0) { + ret = -EINVAL; + dev_err(&pdev->dev, "clock-frequency minimum is 1000\n"); + goto err; + } jz4780_i2c_set_speed(i2c); dev_info(&pdev->dev, "Bus frequency is %d KHz\n", i2c->speed); diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 0f2f8484e8ec..e584d88ee337 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -525,22 +525,16 @@ static int i2c_device_match(struct device *dev, struct device_driver *drv) return 0; } - -/* uevent helps with hotplug: modprobe -q $(MODALIAS) */ static int i2c_device_uevent(struct device *dev, struct kobj_uevent_env *env) { - struct i2c_client *client = to_i2c_client(dev); + struct i2c_client *client = to_i2c_client(dev); int rc; rc = acpi_device_uevent_modalias(dev, env); if (rc != -ENODEV) return rc; - if (add_uevent_var(env, "MODALIAS=%s%s", - I2C_MODULE_PREFIX, client->name)) - return -ENOMEM; - dev_dbg(dev, "uevent\n"); - return 0; + return add_uevent_var(env, "MODALIAS=%s%s", I2C_MODULE_PREFIX, client->name); } /* i2c bus recovery routines */ diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 7748a0a5ddb9..8de073aed001 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -140,22 +140,34 @@ static int i2c_demux_change_master(struct i2c_demux_pinctrl_priv *priv, u32 new_ return i2c_demux_activate_master(priv, new_chan); } -static ssize_t cur_master_show(struct device *dev, struct device_attribute *attr, - char *buf) +static ssize_t available_masters_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct i2c_demux_pinctrl_priv *priv = dev_get_drvdata(dev); int count = 0, i; for (i = 0; i < priv->num_chan && count < PAGE_SIZE; i++) - count += scnprintf(buf + count, PAGE_SIZE - count, "%c %d - %s\n", - i == priv->cur_chan ? '*' : ' ', i, - priv->chan[i].parent_np->full_name); + count += scnprintf(buf + count, PAGE_SIZE - count, "%d:%s%c", + i, priv->chan[i].parent_np->full_name, + i == priv->num_chan - 1 ? '\n' : ' '); return count; } +static DEVICE_ATTR_RO(available_masters); -static ssize_t cur_master_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t current_master_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_demux_pinctrl_priv *priv = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", priv->cur_chan); +} + +static ssize_t current_master_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct i2c_demux_pinctrl_priv *priv = dev_get_drvdata(dev); unsigned int val; @@ -172,7 +184,7 @@ static ssize_t cur_master_store(struct device *dev, struct device_attribute *att return ret < 0 ? ret : count; } -static DEVICE_ATTR_RW(cur_master); +static DEVICE_ATTR_RW(current_master); static int i2c_demux_pinctrl_probe(struct platform_device *pdev) { @@ -218,12 +230,18 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) /* switch to first parent as active master */ i2c_demux_activate_master(priv, 0); - err = device_create_file(&pdev->dev, &dev_attr_cur_master); + err = device_create_file(&pdev->dev, &dev_attr_available_masters); if (err) goto err_rollback; + err = device_create_file(&pdev->dev, &dev_attr_current_master); + if (err) + goto err_rollback_available; + return 0; +err_rollback_available: + device_remove_file(&pdev->dev, &dev_attr_available_masters); err_rollback: for (j = 0; j < i; j++) { of_node_put(priv->chan[j].parent_np); @@ -238,7 +256,8 @@ static int i2c_demux_pinctrl_remove(struct platform_device *pdev) struct i2c_demux_pinctrl_priv *priv = platform_get_drvdata(pdev); int i; - device_remove_file(&pdev->dev, &dev_attr_cur_master); + device_remove_file(&pdev->dev, &dev_attr_current_master); + device_remove_file(&pdev->dev, &dev_attr_available_masters); i2c_demux_deactivate_master(priv); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5acf346e048e..3ff663c35bac 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -671,8 +671,8 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *rep; - int max_mtu; - int oper_mtu; + u16 max_mtu; + u16 oper_mtu; int err; u8 ib_link_width_oper; u8 vl_hw_cap; @@ -1438,7 +1438,8 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, if (!ft) { ft = mlx5_create_auto_grouped_flow_table(ns, priority, num_entries, - num_groups); + num_groups, + 0); if (!IS_ERR(ft)) { prio->refcount = 0; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 3ea9e055fdd3..99cef26e74b4 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -356,7 +356,7 @@ static int nes_netdev_stop(struct net_device *netdev) /** * nes_nic_send */ -static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) +static bool nes_nic_send(struct sk_buff *skb, struct net_device *netdev) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; @@ -413,7 +413,7 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) netdev->name, skb_shinfo(skb)->nr_frags + 2, skb_headlen(skb)); kfree_skb(skb); nesvnic->tx_sw_dropped++; - return NETDEV_TX_LOCKED; + return false; } set_bit(nesnic->sq_head, nesnic->first_frag_overflow); bus_address = pci_map_single(nesdev->pcidev, skb->data + NES_FIRST_FRAG_SIZE, @@ -454,8 +454,7 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX, wqe_misc); nesnic->sq_head++; nesnic->sq_head &= nesnic->sq_size - 1; - - return NETDEV_TX_OK; + return true; } @@ -479,7 +478,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) u32 tso_wqe_length; u32 curr_tcp_seq; u32 wqe_count=1; - u32 send_rc; struct iphdr *iph; __le16 *wqe_fragment_length; u32 nr_frags; @@ -673,13 +671,11 @@ tso_sq_no_longer_full: skb_linearize(skb); skb_set_transport_header(skb, hoffset); skb_set_network_header(skb, nhoffset); - send_rc = nes_nic_send(skb, netdev); - if (send_rc != NETDEV_TX_OK) + if (!nes_nic_send(skb, netdev)) return NETDEV_TX_OK; } } else { - send_rc = nes_nic_send(skb, netdev); - if (send_rc != NETDEV_TX_OK) + if (!nes_nic_send(skb, netdev)) return NETDEV_TX_OK; } diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index e8a84d12b7ff..1142a93dd90b 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -153,6 +153,7 @@ static const struct xpad_device { { 0x0738, 0x4728, "Mad Catz Street Fighter IV FightPad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4738, "Mad Catz Wired Xbox 360 Controller (SFIV)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4740, "Mad Catz Beat Pad", 0, XTYPE_XBOX360 }, + { 0x0738, 0x4a01, "Mad Catz FightStick TE 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0738, 0x6040, "Mad Catz Beat Pad Pro", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0xb726, "Mad Catz Xbox controller - MW2", 0, XTYPE_XBOX360 }, { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", XTYPE_XBOX360 }, @@ -304,6 +305,7 @@ static struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x046d), /* Logitech X-Box 360 style controllers */ XPAD_XBOX360_VENDOR(0x0738), /* Mad Catz X-Box 360 controllers */ { USB_DEVICE(0x0738, 0x4540) }, /* Mad Catz Beat Pad */ + XPAD_XBOXONE_VENDOR(0x0738), /* Mad Catz FightStick TE 2 */ XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x12ab), /* X-Box 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane X-Box 360 controllers */ diff --git a/drivers/input/misc/arizona-haptics.c b/drivers/input/misc/arizona-haptics.c index d5994a745ffa..982936334537 100644 --- a/drivers/input/misc/arizona-haptics.c +++ b/drivers/input/misc/arizona-haptics.c @@ -178,7 +178,6 @@ static int arizona_haptics_probe(struct platform_device *pdev) input_set_drvdata(haptics->input_dev, haptics); haptics->input_dev->name = "arizona:haptics"; - haptics->input_dev->dev.parent = pdev->dev.parent; haptics->input_dev->close = arizona_haptics_close; __set_bit(FF_RUMBLE, haptics->input_dev->ffbit); diff --git a/drivers/input/misc/pmic8xxx-pwrkey.c b/drivers/input/misc/pmic8xxx-pwrkey.c index 3f02e0e03d12..67aab86048ad 100644 --- a/drivers/input/misc/pmic8xxx-pwrkey.c +++ b/drivers/input/misc/pmic8xxx-pwrkey.c @@ -353,7 +353,8 @@ static int pmic8xxx_pwrkey_probe(struct platform_device *pdev) if (of_property_read_u32(pdev->dev.of_node, "debounce", &kpd_delay)) kpd_delay = 15625; - if (kpd_delay > 62500 || kpd_delay == 0) { + /* Valid range of pwr key trigger delay is 1/64 sec to 2 seconds. */ + if (kpd_delay > USEC_PER_SEC * 2 || kpd_delay < USEC_PER_SEC / 64) { dev_err(&pdev->dev, "invalid power key trigger delay\n"); return -EINVAL; } @@ -385,8 +386,8 @@ static int pmic8xxx_pwrkey_probe(struct platform_device *pdev) pwr->name = "pmic8xxx_pwrkey"; pwr->phys = "pmic8xxx_pwrkey/input0"; - delay = (kpd_delay << 10) / USEC_PER_SEC; - delay = 1 + ilog2(delay); + delay = (kpd_delay << 6) / USEC_PER_SEC; + delay = ilog2(delay); err = regmap_read(regmap, PON_CNTL_1, &pon_cntl); if (err < 0) { diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c index 10c4e3d462f1..caa5a62c42fb 100644 --- a/drivers/input/misc/twl4030-vibra.c +++ b/drivers/input/misc/twl4030-vibra.c @@ -222,7 +222,6 @@ static int twl4030_vibra_probe(struct platform_device *pdev) info->input_dev->name = "twl4030:vibrator"; info->input_dev->id.version = 1; - info->input_dev->dev.parent = pdev->dev.parent; info->input_dev->close = twl4030_vibra_close; __set_bit(FF_RUMBLE, info->input_dev->ffbit); diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c index ea63fad48de6..53e33fab3f7a 100644 --- a/drivers/input/misc/twl6040-vibra.c +++ b/drivers/input/misc/twl6040-vibra.c @@ -45,7 +45,6 @@ struct vibra_info { struct device *dev; struct input_dev *input_dev; - struct workqueue_struct *workqueue; struct work_struct play_work; struct mutex mutex; int irq; @@ -213,11 +212,7 @@ static int vibra_play(struct input_dev *input, void *data, info->strong_speed = effect->u.rumble.strong_magnitude; info->direction = effect->direction < EFFECT_DIR_180_DEG ? 1 : -1; - ret = queue_work(info->workqueue, &info->play_work); - if (!ret) { - dev_info(&input->dev, "work is already on queue\n"); - return ret; - } + schedule_work(&info->play_work); return 0; } @@ -362,7 +357,6 @@ static int twl6040_vibra_probe(struct platform_device *pdev) info->input_dev->name = "twl6040:vibrator"; info->input_dev->id.version = 1; - info->input_dev->dev.parent = pdev->dev.parent; info->input_dev->close = twl6040_vibra_close; __set_bit(FF_RUMBLE, info->input_dev->ffbit); diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index 3a7f3a4a4396..7c18249d6c8e 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -858,6 +858,14 @@ static int gtco_probe(struct usb_interface *usbinterface, goto err_free_buf; } + /* Sanity check that a device has an endpoint */ + if (usbinterface->altsetting[0].desc.bNumEndpoints < 1) { + dev_err(&usbinterface->dev, + "Invalid number of endpoints\n"); + error = -EINVAL; + goto err_free_urb; + } + /* * The endpoint is always altsetting 0, we know this since we know * this device only has one interrupt endpoint @@ -879,7 +887,7 @@ static int gtco_probe(struct usb_interface *usbinterface, * HID report descriptor */ if (usb_get_extra_descriptor(usbinterface->cur_altsetting, - HID_DEVICE_TYPE, &hid_desc) != 0){ + HID_DEVICE_TYPE, &hid_desc) != 0) { dev_err(&usbinterface->dev, "Can't retrieve exta USB descriptor to get hid report descriptor length\n"); error = -EIO; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 374c129219ef..5efadad4615b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -92,6 +92,7 @@ struct iommu_dev_data { struct list_head dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ u16 devid; /* PCI Device ID */ + u16 alias; /* Alias Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Device is identity mapped */ struct { @@ -166,6 +167,13 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } +static inline u16 get_device_id(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return PCI_DEVID(pdev->bus->number, pdev->devfn); +} + static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -203,6 +211,68 @@ out_unlock: return dev_data; } +static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) +{ + *(u16 *)data = alias; + return 0; +} + +static u16 get_alias(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + u16 devid, ivrs_alias, pci_alias; + + devid = get_device_id(dev); + ivrs_alias = amd_iommu_alias_table[devid]; + pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); + + if (ivrs_alias == pci_alias) + return ivrs_alias; + + /* + * DMA alias showdown + * + * The IVRS is fairly reliable in telling us about aliases, but it + * can't know about every screwy device. If we don't have an IVRS + * reported alias, use the PCI reported alias. In that case we may + * still need to initialize the rlookup and dev_table entries if the + * alias is to a non-existent device. + */ + if (ivrs_alias == devid) { + if (!amd_iommu_rlookup_table[pci_alias]) { + amd_iommu_rlookup_table[pci_alias] = + amd_iommu_rlookup_table[devid]; + memcpy(amd_iommu_dev_table[pci_alias].data, + amd_iommu_dev_table[devid].data, + sizeof(amd_iommu_dev_table[pci_alias].data)); + } + + return pci_alias; + } + + pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " + "for device %s[%04x:%04x], kernel reported alias " + "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), + PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, + PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), + PCI_FUNC(pci_alias)); + + /* + * If we don't have a PCI DMA alias and the IVRS alias is on the same + * bus, then the IVRS table may know about a quirk that we don't. + */ + if (pci_alias == devid && + PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { + pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; + pdev->dma_alias_devfn = ivrs_alias & 0xff; + pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", + PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), + dev_name(dev)); + } + + return ivrs_alias; +} + static struct iommu_dev_data *find_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -215,13 +285,6 @@ static struct iommu_dev_data *find_dev_data(u16 devid) return dev_data; } -static inline u16 get_device_id(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - - return PCI_DEVID(pdev->bus->number, pdev->devfn); -} - static struct iommu_dev_data *get_dev_data(struct device *dev) { return dev->archdata.iommu; @@ -349,6 +412,8 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; + dev_data->alias = get_alias(dev); + if (pci_iommuv2_capable(pdev)) { struct amd_iommu *iommu; @@ -369,7 +434,7 @@ static void iommu_ignore_device(struct device *dev) u16 devid, alias; devid = get_device_id(dev); - alias = amd_iommu_alias_table[devid]; + alias = get_alias(dev); memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry)); @@ -1061,7 +1126,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) int ret; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = amd_iommu_alias_table[dev_data->devid]; + alias = dev_data->alias; ret = iommu_flush_dte(iommu, dev_data->devid); if (!ret && alias != dev_data->devid) @@ -2039,7 +2104,7 @@ static void do_attach(struct iommu_dev_data *dev_data, bool ats; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = amd_iommu_alias_table[dev_data->devid]; + alias = dev_data->alias; ats = dev_data->ats.enabled; /* Update data structures */ @@ -2073,7 +2138,7 @@ static void do_detach(struct iommu_dev_data *dev_data) return; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = amd_iommu_alias_table[dev_data->devid]; + alias = dev_data->alias; /* decrease reference counters */ dev_data->domain->dev_iommu[iommu->index] -= 1; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 2409e3bd3df2..7c39ac4b9c53 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -826,6 +826,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (smmu_domain->smmu) goto out_unlock; + /* We're bypassing these SIDs, so don't allocate an actual context */ + if (domain->type == IOMMU_DOMAIN_DMA) { + smmu_domain->smmu = smmu; + goto out_unlock; + } + /* * Mapping the requested stage onto what we support is surprisingly * complicated, mainly because the spec allows S1+S2 SMMUs without @@ -948,7 +954,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) void __iomem *cb_base; int irq; - if (!smmu) + if (!smmu || domain->type == IOMMU_DOMAIN_DMA) return; /* @@ -1089,18 +1095,20 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *gr0_base = ARM_SMMU_GR0(smmu); - /* Devices in an IOMMU group may already be configured */ - ret = arm_smmu_master_configure_smrs(smmu, cfg); - if (ret) - return ret == -EEXIST ? 0 : ret; - /* * FIXME: This won't be needed once we have IOMMU-backed DMA ops - * for all devices behind the SMMU. + * for all devices behind the SMMU. Note that we need to take + * care configuring SMRs for devices both a platform_device and + * and a PCI device (i.e. a PCI host controller) */ if (smmu_domain->domain.type == IOMMU_DOMAIN_DMA) return 0; + /* Devices in an IOMMU group may already be configured */ + ret = arm_smmu_master_configure_smrs(smmu, cfg); + if (ret) + return ret == -EEXIST ? 0 : ret; + for (i = 0; i < cfg->num_streamids; ++i) { u32 idx, s2cr; diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 94a30da0cfac..4dffccf532a2 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -467,7 +467,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, gic_map_to_vpe(irq, mips_cm_vp_id(cpumask_first(&tmp))); /* Update the pcpu_masks */ - for (i = 0; i < gic_vpes; i++) + for (i = 0; i < min(gic_vpes, NR_CPUS); i++) clear_bit(irq, pcpu_masks[i].pcpu_mask); set_bit(irq, pcpu_masks[cpumask_first(&tmp)].pcpu_mask); @@ -707,7 +707,7 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq, spin_lock_irqsave(&gic_lock, flags); gic_map_to_pin(intr, gic_cpu_pin); gic_map_to_vpe(intr, vpe); - for (i = 0; i < gic_vpes; i++) + for (i = 0; i < min(gic_vpes, NR_CPUS); i++) clear_bit(intr, pcpu_masks[i].pcpu_mask); set_bit(intr, pcpu_masks[vpe].pcpu_mask); spin_unlock_irqrestore(&gic_lock, flags); diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 0d29b5a6356d..99e5f9751e8b 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -715,6 +715,9 @@ base_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) if (!maddr || maddr->family != AF_ISDN) return -EINVAL; + if (addr_len < sizeof(struct sockaddr_mISDN)) + return -EINVAL; + lock_sock(sk); if (_pms(sk)->dev) { diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index eb934b0242e0..67392b6ab845 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -331,7 +331,7 @@ void set_interrupt(struct lg_cpu *cpu, unsigned int irq) * Actually now I think of it, it's possible that Ron *is* half the Plan 9 * userbase. Oh well. */ -static bool could_be_syscall(unsigned int num) +bool could_be_syscall(unsigned int num) { /* Normal Linux IA32_SYSCALL_VECTOR or reserved vector? */ return num == IA32_SYSCALL_VECTOR || num == syscall_vector; @@ -416,6 +416,10 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num) * * This routine indicates if a particular trap number could be delivered * directly. + * + * Unfortunately, Linux 4.6 started using an interrupt gate instead of a + * trap gate for syscalls, so this trick is ineffective. See Mastery for + * how we could do this anyway... */ static bool direct_trap(unsigned int num) { diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index ac8ad0461e80..69b3814afd2f 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -167,6 +167,7 @@ void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta); bool send_notify_to_eventfd(struct lg_cpu *cpu); void init_clockdev(struct lg_cpu *cpu); bool check_syscall_vector(struct lguest *lg); +bool could_be_syscall(unsigned int num); int init_interrupts(void); void free_interrupts(void); diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 6a4cd771a2be..adc162c7040d 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -429,8 +429,12 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) return; break; case 32 ... 255: + /* This might be a syscall. */ + if (could_be_syscall(cpu->regs->trapnum)) + break; + /* - * These values mean a real interrupt occurred, in which case + * Other values mean a real interrupt occurred, in which case * the Host handler has already been run. We just do a * friendly check if another process should now be run, then * return to run the Guest again. diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index dc11bbf27274..58d04726cdd7 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -46,7 +46,6 @@ static ssize_t mbox_test_signal_write(struct file *filp, size_t count, loff_t *ppos) { struct mbox_test_device *tdev = filp->private_data; - int ret; if (!tdev->tx_channel) { dev_err(tdev->dev, "Channel cannot do Tx\n"); @@ -60,17 +59,20 @@ static ssize_t mbox_test_signal_write(struct file *filp, return -EINVAL; } - tdev->signal = kzalloc(MBOX_MAX_SIG_LEN, GFP_KERNEL); - if (!tdev->signal) - return -ENOMEM; + /* Only allocate memory if we need to */ + if (!tdev->signal) { + tdev->signal = kzalloc(MBOX_MAX_SIG_LEN, GFP_KERNEL); + if (!tdev->signal) + return -ENOMEM; + } - ret = copy_from_user(tdev->signal, userbuf, count); - if (ret) { + if (copy_from_user(tdev->signal, userbuf, count)) { kfree(tdev->signal); + tdev->signal = NULL; return -EFAULT; } - return ret < 0 ? ret : count; + return count; } static const struct file_operations mbox_test_signal_ops = { diff --git a/drivers/mailbox/mailbox-xgene-slimpro.c b/drivers/mailbox/mailbox-xgene-slimpro.c index bd07f39f0692..dd2afbca51c9 100644 --- a/drivers/mailbox/mailbox-xgene-slimpro.c +++ b/drivers/mailbox/mailbox-xgene-slimpro.c @@ -189,8 +189,8 @@ static int slimpro_mbox_probe(struct platform_device *pdev) int i; ctx = devm_kzalloc(&pdev->dev, sizeof(struct slimpro_mbox), GFP_KERNEL); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); + if (!ctx) + return -ENOMEM; platform_set_drvdata(pdev, ctx); diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 6a4811f85705..4a36632c236f 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -375,13 +375,13 @@ struct mbox_chan *mbox_request_channel_byname(struct mbox_client *cl, if (!np) { dev_err(cl->dev, "%s() currently only supports DT\n", __func__); - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EINVAL); } if (!of_get_property(np, "mbox-names", NULL)) { dev_err(cl->dev, "%s() requires an \"mbox-names\" property\n", __func__); - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EINVAL); } of_property_for_each_string(np, "mbox-names", prop, mbox_name) { diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 27f2ef300f8b..3970cda10080 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -867,39 +867,55 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, return 0; } -#define WRITE_LOCK(cmd) \ - down_write(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_write(&cmd->root_lock); \ - return -EINVAL; \ +static bool cmd_write_lock(struct dm_cache_metadata *cmd) +{ + down_write(&cmd->root_lock); + if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { + up_write(&cmd->root_lock); + return false; } + return true; +} -#define WRITE_LOCK_VOID(cmd) \ - down_write(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_write(&cmd->root_lock); \ - return; \ - } +#define WRITE_LOCK(cmd) \ + do { \ + if (!cmd_write_lock((cmd))) \ + return -EINVAL; \ + } while(0) + +#define WRITE_LOCK_VOID(cmd) \ + do { \ + if (!cmd_write_lock((cmd))) \ + return; \ + } while(0) #define WRITE_UNLOCK(cmd) \ - up_write(&cmd->root_lock) + up_write(&(cmd)->root_lock) -#define READ_LOCK(cmd) \ - down_read(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_read(&cmd->root_lock); \ - return -EINVAL; \ +static bool cmd_read_lock(struct dm_cache_metadata *cmd) +{ + down_read(&cmd->root_lock); + if (cmd->fail_io) { + up_read(&cmd->root_lock); + return false; } + return true; +} -#define READ_LOCK_VOID(cmd) \ - down_read(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_read(&cmd->root_lock); \ - return; \ - } +#define READ_LOCK(cmd) \ + do { \ + if (!cmd_read_lock((cmd))) \ + return -EINVAL; \ + } while(0) + +#define READ_LOCK_VOID(cmd) \ + do { \ + if (!cmd_read_lock((cmd))) \ + return; \ + } while(0) #define READ_UNLOCK(cmd) \ - up_read(&cmd->root_lock) + up_read(&(cmd)->root_lock) int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index be4905769a45..3d3ac13287a4 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1662,8 +1662,10 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, tio = alloc_tio(ci, ti, target_bio_nr); tio->len_ptr = len; r = clone_bio(tio, bio, sector, *len); - if (r < 0) + if (r < 0) { + free_tio(ci->md, tio); break; + } __map_bio(tio); } diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c index 5f1a36b8fbb0..0a5cbbe12452 100644 --- a/drivers/misc/lkdtm.c +++ b/drivers/misc/lkdtm.c @@ -458,8 +458,10 @@ static void lkdtm_do_action(enum ctype which) break; val = kmalloc(len, GFP_KERNEL); - if (!val) + if (!val) { + kfree(base); break; + } *val = 0x12345678; base[offset] = *val; @@ -498,14 +500,17 @@ static void lkdtm_do_action(enum ctype which) } case CT_READ_BUDDY_AFTER_FREE: { unsigned long p = __get_free_page(GFP_KERNEL); - int saw, *val = kmalloc(1024, GFP_KERNEL); + int saw, *val; int *base; if (!p) break; - if (!val) + val = kmalloc(1024, GFP_KERNEL); + if (!val) { + free_page(p); break; + } base = (int *)p; diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 3bdbe50a363f..8a0147dfed27 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -86,7 +86,6 @@ static int max_devices; /* TODO: Replace these with struct ida */ static DECLARE_BITMAP(dev_use, MAX_DEVICES); -static DECLARE_BITMAP(name_use, MAX_DEVICES); /* * There is one mmc_blk_data per slot. @@ -105,7 +104,6 @@ struct mmc_blk_data { unsigned int usage; unsigned int read_only; unsigned int part_type; - unsigned int name_idx; unsigned int reset_done; #define MMC_BLK_READ BIT(0) #define MMC_BLK_WRITE BIT(1) @@ -2202,19 +2200,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, goto out; } - /* - * !subname implies we are creating main mmc_blk_data that will be - * associated with mmc_card with dev_set_drvdata. Due to device - * partitions, devidx will not coincide with a per-physical card - * index anymore so we keep track of a name index. - */ - if (!subname) { - md->name_idx = find_first_zero_bit(name_use, max_devices); - __set_bit(md->name_idx, name_use); - } else - md->name_idx = ((struct mmc_blk_data *) - dev_to_disk(parent)->private_data)->name_idx; - md->area_type = area_type; /* @@ -2264,7 +2249,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, */ snprintf(md->disk->disk_name, sizeof(md->disk->disk_name), - "mmcblk%u%s", md->name_idx, subname ? subname : ""); + "mmcblk%u%s", card->host->index, subname ? subname : ""); if (mmc_card_mmc(card)) blk_queue_logical_block_size(md->queue.queue, @@ -2418,7 +2403,6 @@ static void mmc_blk_remove_parts(struct mmc_card *card, struct list_head *pos, *q; struct mmc_blk_data *part_md; - __clear_bit(md->name_idx, name_use); list_for_each_safe(pos, q, &md->part) { part_md = list_entry(pos, struct mmc_blk_data, part); list_del(pos); diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 62aa5d0efcee..79e19017343e 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -390,6 +390,7 @@ static int byt_sd_probe_slot(struct sdhci_pci_slot *slot) slot->cd_idx = 0; slot->cd_override_level = true; if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXT_SD || + slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXTM_SD || slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_APL_SD) slot->host->mmc_host_ops.get_cd = bxt_get_cd; @@ -1173,6 +1174,30 @@ static const struct pci_device_id pci_ids[] = { { .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_BXTM_EMMC, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_byt_emmc, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_BXTM_SDIO, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_byt_sdio, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_BXTM_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_byt_sd, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_DEVICE_ID_INTEL_APL_EMMC, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index d1a0b4db60db..89e7151684a1 100644 --- a/drivers/mmc/host/sdhci-pci.h +++ b/drivers/mmc/host/sdhci-pci.h @@ -28,6 +28,9 @@ #define PCI_DEVICE_ID_INTEL_BXT_SD 0x0aca #define PCI_DEVICE_ID_INTEL_BXT_EMMC 0x0acc #define PCI_DEVICE_ID_INTEL_BXT_SDIO 0x0ad0 +#define PCI_DEVICE_ID_INTEL_BXTM_SD 0x1aca +#define PCI_DEVICE_ID_INTEL_BXTM_EMMC 0x1acc +#define PCI_DEVICE_ID_INTEL_BXTM_SDIO 0x1ad0 #define PCI_DEVICE_ID_INTEL_APL_SD 0x5aca #define PCI_DEVICE_ID_INTEL_APL_EMMC 0x5acc #define PCI_DEVICE_ID_INTEL_APL_SDIO 0x5ad0 diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c index aca439d3ca83..30132500aa1c 100644 --- a/drivers/mmc/host/sdhci-pxav3.c +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -309,8 +309,30 @@ static void pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs) __func__, uhs, ctrl_2); } +static void pxav3_set_power(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) +{ + struct mmc_host *mmc = host->mmc; + u8 pwr = host->pwr; + + sdhci_set_power(host, mode, vdd); + + if (host->pwr == pwr) + return; + + if (host->pwr == 0) + vdd = 0; + + if (!IS_ERR(mmc->supply.vmmc)) { + spin_unlock_irq(&host->lock); + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); + spin_lock_irq(&host->lock); + } +} + static const struct sdhci_ops pxav3_sdhci_ops = { .set_clock = sdhci_set_clock, + .set_power = pxav3_set_power, .platform_send_init_74_clocks = pxav3_gen_init_74_clocks, .get_max_clock = sdhci_pltfm_clk_get_max_clock, .set_bus_width = sdhci_set_bus_width, diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index f8c4762bb48d..bcc0de47fe7e 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -382,14 +382,6 @@ static const struct sdhci_tegra_soc_data soc_data_tegra114 = { .pdata = &sdhci_tegra114_pdata, }; -static const struct sdhci_tegra_soc_data soc_data_tegra124 = { - .pdata = &sdhci_tegra114_pdata, - .nvquirks = NVQUIRK_ENABLE_SDR50 | - NVQUIRK_ENABLE_DDR50 | - NVQUIRK_ENABLE_SDR104 | - NVQUIRK_HAS_PADCALIB, -}; - static const struct sdhci_pltfm_data sdhci_tegra210_pdata = { .quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | @@ -407,7 +399,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra210 = { static const struct of_device_id sdhci_tegra_dt_match[] = { { .compatible = "nvidia,tegra210-sdhci", .data = &soc_data_tegra210 }, - { .compatible = "nvidia,tegra124-sdhci", .data = &soc_data_tegra124 }, + { .compatible = "nvidia,tegra124-sdhci", .data = &soc_data_tegra114 }, { .compatible = "nvidia,tegra114-sdhci", .data = &soc_data_tegra114 }, { .compatible = "nvidia,tegra30-sdhci", .data = &soc_data_tegra30 }, { .compatible = "nvidia,tegra20-sdhci", .data = &soc_data_tegra20 }, diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 8670f162dec7..6bd3d1794966 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1210,10 +1210,24 @@ clock_set: } EXPORT_SYMBOL_GPL(sdhci_set_clock); -static void sdhci_set_power(struct sdhci_host *host, unsigned char mode, - unsigned short vdd) +static void sdhci_set_power_reg(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) { struct mmc_host *mmc = host->mmc; + + spin_unlock_irq(&host->lock); + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); + spin_lock_irq(&host->lock); + + if (mode != MMC_POWER_OFF) + sdhci_writeb(host, SDHCI_POWER_ON, SDHCI_POWER_CONTROL); + else + sdhci_writeb(host, 0, SDHCI_POWER_CONTROL); +} + +void sdhci_set_power(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) +{ u8 pwr = 0; if (mode != MMC_POWER_OFF) { @@ -1245,7 +1259,6 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode, sdhci_writeb(host, 0, SDHCI_POWER_CONTROL); if (host->quirks2 & SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON) sdhci_runtime_pm_bus_off(host); - vdd = 0; } else { /* * Spec says that we should clear the power reg before setting @@ -1276,12 +1289,20 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode, if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER) mdelay(10); } +} +EXPORT_SYMBOL_GPL(sdhci_set_power); - if (!IS_ERR(mmc->supply.vmmc)) { - spin_unlock_irq(&host->lock); - mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); - spin_lock_irq(&host->lock); - } +static void __sdhci_set_power(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) +{ + struct mmc_host *mmc = host->mmc; + + if (host->ops->set_power) + host->ops->set_power(host, mode, vdd); + else if (!IS_ERR(mmc->supply.vmmc)) + sdhci_set_power_reg(host, mode, vdd); + else + sdhci_set_power(host, mode, vdd); } /*****************************************************************************\ @@ -1431,7 +1452,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) } } - sdhci_set_power(host, ios->power_mode, ios->vdd); + __sdhci_set_power(host, ios->power_mode, ios->vdd); if (host->ops->platform_send_init_74_clocks) host->ops->platform_send_init_74_clocks(host, ios->power_mode); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 3bd28033dbd9..0f39f4f84d10 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -529,6 +529,8 @@ struct sdhci_ops { #endif void (*set_clock)(struct sdhci_host *host, unsigned int clock); + void (*set_power)(struct sdhci_host *host, unsigned char mode, + unsigned short vdd); int (*enable_dma)(struct sdhci_host *host); unsigned int (*get_max_clock)(struct sdhci_host *host); @@ -660,6 +662,8 @@ static inline bool sdhci_sdio_irq_enabled(struct sdhci_host *host) } void sdhci_set_clock(struct sdhci_host *host, unsigned int clock); +void sdhci_set_power(struct sdhci_host *host, unsigned char mode, + unsigned short vdd); void sdhci_set_bus_width(struct sdhci_host *host, int width); void sdhci_reset(struct sdhci_host *host, u8 mask); void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing); diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index b6facac54fc0..557b8462f55e 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -4009,7 +4009,6 @@ static int nand_dt_init(struct nand_chip *chip) * This is the first phase of the normal nand_scan() function. It reads the * flash ID and sets up MTD fields accordingly. * - * The mtd->owner field must be set to the module of the caller. */ int nand_scan_ident(struct mtd_info *mtd, int maxchips, struct nand_flash_dev *table) @@ -4429,19 +4428,12 @@ EXPORT_SYMBOL(nand_scan_tail); * * This fills out all the uninitialized function pointers with the defaults. * The flash ID is read and the mtd/chip structures are filled with the - * appropriate values. The mtd->owner field must be set to the module of the - * caller. + * appropriate values. */ int nand_scan(struct mtd_info *mtd, int maxchips) { int ret; - /* Many callers got this wrong, so check for it for a while... */ - if (!mtd->owner && caller_is_module()) { - pr_crit("%s called with NULL mtd->owner!\n", __func__); - BUG(); - } - ret = nand_scan_ident(mtd, maxchips, NULL); if (!ret) ret = nand_scan_tail(mtd); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 2a1ba62b7da2..befd67df08e1 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -62,9 +62,8 @@ config DUMMY this device is consigned into oblivion) with a configurable IP address. It is most commonly used in order to make your currently inactive SLIP address seem like a real address for local programs. - If you use SLIP or PPP, you might want to say Y here. Since this - thing often comes in handy, the default is Y. It won't enlarge your - kernel either. What a deal. Read about it in the Network + If you use SLIP or PPP, you might want to say Y here. It won't + enlarge your kernel. What a deal. Read about it in the Network Administrator's Guide, available from <http://www.tldp.org/docs.html#guide>. @@ -195,6 +194,7 @@ config GENEVE config MACSEC tristate "IEEE 802.1AE MAC-level encryption (MACsec)" + select CRYPTO select CRYPTO_AES select CRYPTO_GCM ---help--- diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c index 0d9b45ff1bb2..81f90c4703ae 100644 --- a/drivers/net/arcnet/com90xx.c +++ b/drivers/net/arcnet/com90xx.c @@ -433,7 +433,7 @@ static void __init com90xx_probe(void) kfree(iomem); } -static int check_mirror(unsigned long addr, size_t size) +static int __init check_mirror(unsigned long addr, size_t size) { void __iomem *p; int res = -1; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 7a5f0ef46bd6..448deb59b9a4 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -135,9 +135,9 @@ static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds) return BCM_SF2_STATS_SIZE; } -static char *bcm_sf2_sw_drv_probe(struct device *dsa_dev, - struct device *host_dev, - int sw_addr, void **_priv) +static const char *bcm_sf2_sw_drv_probe(struct device *dsa_dev, + struct device *host_dev, int sw_addr, + void **_priv) { struct bcm_sf2_priv *priv; diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 92cebab9383e..e36b40886bd8 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -51,7 +51,7 @@ static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) return __ret; \ }) -static char *mv88e6060_get_name(struct mii_bus *bus, int sw_addr) +static const char *mv88e6060_get_name(struct mii_bus *bus, int sw_addr) { int ret; @@ -69,13 +69,13 @@ static char *mv88e6060_get_name(struct mii_bus *bus, int sw_addr) return NULL; } -static char *mv88e6060_drv_probe(struct device *dsa_dev, - struct device *host_dev, - int sw_addr, void **_priv) +static const char *mv88e6060_drv_probe(struct device *dsa_dev, + struct device *host_dev, int sw_addr, + void **_priv) { struct mii_bus *bus = dsa_host_dev_to_mii_bus(host_dev); struct mv88e6060_priv *priv; - char *name; + const char *name; name = mv88e6060_get_name(bus, sw_addr); if (name) { diff --git a/drivers/net/dsa/mv88e6123.c b/drivers/net/dsa/mv88e6123.c index c34283d929c4..5535a42a6113 100644 --- a/drivers/net/dsa/mv88e6123.c +++ b/drivers/net/dsa/mv88e6123.c @@ -17,21 +17,31 @@ #include <net/dsa.h> #include "mv88e6xxx.h" -static const struct mv88e6xxx_switch_id mv88e6123_table[] = { - { PORT_SWITCH_ID_6123, "Marvell 88E6123" }, - { PORT_SWITCH_ID_6123_A1, "Marvell 88E6123 (A1)" }, - { PORT_SWITCH_ID_6123_A2, "Marvell 88E6123 (A2)" }, - { PORT_SWITCH_ID_6161, "Marvell 88E6161" }, - { PORT_SWITCH_ID_6161_A1, "Marvell 88E6161 (A1)" }, - { PORT_SWITCH_ID_6161_A2, "Marvell 88E6161 (A2)" }, - { PORT_SWITCH_ID_6165, "Marvell 88E6165" }, - { PORT_SWITCH_ID_6165_A1, "Marvell 88E6165 (A1)" }, - { PORT_SWITCH_ID_6165_A2, "Marvell 88e6165 (A2)" }, +static const struct mv88e6xxx_info mv88e6123_table[] = { + { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6123, + .family = MV88E6XXX_FAMILY_6165, + .name = "Marvell 88E6123", + .num_databases = 4096, + .num_ports = 3, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6161, + .family = MV88E6XXX_FAMILY_6165, + .name = "Marvell 88E6161", + .num_databases = 4096, + .num_ports = 6, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6165, + .family = MV88E6XXX_FAMILY_6165, + .name = "Marvell 88E6165", + .num_databases = 4096, + .num_ports = 6, + } }; -static char *mv88e6123_drv_probe(struct device *dsa_dev, - struct device *host_dev, - int sw_addr, void **priv) +static const char *mv88e6123_drv_probe(struct device *dsa_dev, + struct device *host_dev, int sw_addr, + void **priv) { return mv88e6xxx_drv_probe(dsa_dev, host_dev, sw_addr, priv, mv88e6123_table, @@ -40,6 +50,7 @@ static char *mv88e6123_drv_probe(struct device *dsa_dev, static int mv88e6123_setup_global(struct dsa_switch *ds) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u32 upstream_port = dsa_upstream_port(ds); int ret; u32 reg; @@ -52,7 +63,9 @@ static int mv88e6123_setup_global(struct dsa_switch *ds) * external PHYs to poll), don't discard packets with * excessive collisions, and mask all interrupt sources. */ - REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, 0x0000); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, 0x0000); + if (ret) + return ret; /* Configure the upstream port, and configure the upstream * port as the port to which ingress and egress monitor frames @@ -61,14 +74,15 @@ static int mv88e6123_setup_global(struct dsa_switch *ds) reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; - REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); + if (ret) + return ret; /* Disable remote management for now, and set the switch's * DSA device number. */ - REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f); - - return 0; + return mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL_2, + ds->index & 0x1f); } static int mv88e6123_setup(struct dsa_switch *ds) @@ -78,23 +92,11 @@ static int mv88e6123_setup(struct dsa_switch *ds) ps->ds = ds; - ret = mv88e6xxx_setup_common(ds); + ret = mv88e6xxx_setup_common(ps); if (ret < 0) return ret; - switch (ps->id) { - case PORT_SWITCH_ID_6123: - ps->num_ports = 3; - break; - case PORT_SWITCH_ID_6161: - case PORT_SWITCH_ID_6165: - ps->num_ports = 6; - break; - default: - return -ENODEV; - } - - ret = mv88e6xxx_switch_reset(ds, false); + ret = mv88e6xxx_switch_reset(ps, false); if (ret < 0) return ret; diff --git a/drivers/net/dsa/mv88e6131.c b/drivers/net/dsa/mv88e6131.c index f5d75fce1e96..357ab794d720 100644 --- a/drivers/net/dsa/mv88e6131.c +++ b/drivers/net/dsa/mv88e6131.c @@ -17,17 +17,37 @@ #include <net/dsa.h> #include "mv88e6xxx.h" -static const struct mv88e6xxx_switch_id mv88e6131_table[] = { - { PORT_SWITCH_ID_6085, "Marvell 88E6085" }, - { PORT_SWITCH_ID_6095, "Marvell 88E6095/88E6095F" }, - { PORT_SWITCH_ID_6131, "Marvell 88E6131" }, - { PORT_SWITCH_ID_6131_B2, "Marvell 88E6131 (B2)" }, - { PORT_SWITCH_ID_6185, "Marvell 88E6185" }, +static const struct mv88e6xxx_info mv88e6131_table[] = { + { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6095, + .family = MV88E6XXX_FAMILY_6095, + .name = "Marvell 88E6095/88E6095F", + .num_databases = 256, + .num_ports = 11, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6085, + .family = MV88E6XXX_FAMILY_6097, + .name = "Marvell 88E6085", + .num_databases = 4096, + .num_ports = 10, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6131, + .family = MV88E6XXX_FAMILY_6185, + .name = "Marvell 88E6131", + .num_databases = 256, + .num_ports = 8, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6185, + .family = MV88E6XXX_FAMILY_6185, + .name = "Marvell 88E6185", + .num_databases = 256, + .num_ports = 10, + } }; -static char *mv88e6131_drv_probe(struct device *dsa_dev, - struct device *host_dev, - int sw_addr, void **priv) +static const char *mv88e6131_drv_probe(struct device *dsa_dev, + struct device *host_dev, int sw_addr, + void **priv) { return mv88e6xxx_drv_probe(dsa_dev, host_dev, sw_addr, priv, mv88e6131_table, @@ -36,6 +56,7 @@ static char *mv88e6131_drv_probe(struct device *dsa_dev, static int mv88e6131_setup_global(struct dsa_switch *ds) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u32 upstream_port = dsa_upstream_port(ds); int ret; u32 reg; @@ -49,11 +70,16 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) * to arbitrate between packet queues, set the maximum frame * size to 1632, and mask all interrupt sources. */ - REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, - GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_MAX_FRAME_1632); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, + GLOBAL_CONTROL_PPU_ENABLE | + GLOBAL_CONTROL_MAX_FRAME_1632); + if (ret) + return ret; /* Set the VLAN ethertype to 0x8100. */ - REG_WRITE(REG_GLOBAL, GLOBAL_CORE_TAG_TYPE, 0x8100); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CORE_TAG_TYPE, 0x8100); + if (ret) + return ret; /* Disable ARP mirroring, and configure the upstream port as * the port to which ingress and egress monitor frames are to @@ -62,31 +88,33 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | GLOBAL_MONITOR_CONTROL_ARP_DISABLED; - REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); + if (ret) + return ret; /* Disable cascade port functionality unless this device * is used in a cascade configuration, and set the switch's * DSA device number. */ if (ds->dst->pd->nr_chips > 1) - REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, - GLOBAL_CONTROL_2_MULTIPLE_CASCADE | - (ds->index & 0x1f)); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL_2, + GLOBAL_CONTROL_2_MULTIPLE_CASCADE | + (ds->index & 0x1f)); else - REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, - GLOBAL_CONTROL_2_NO_CASCADE | - (ds->index & 0x1f)); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL_2, + GLOBAL_CONTROL_2_NO_CASCADE | + (ds->index & 0x1f)); + if (ret) + return ret; /* Force the priority of IGMP/MLD snoop frames and ARP frames * to the highest setting. */ - REG_WRITE(REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, - GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP | - 7 << GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT | - GLOBAL2_PRIO_OVERRIDE_FORCE_ARP | - 7 << GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT); - - return 0; + return mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, + GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP | + 7 << GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT | + GLOBAL2_PRIO_OVERRIDE_FORCE_ARP | + 7 << GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT); } static int mv88e6131_setup(struct dsa_switch *ds) @@ -96,29 +124,13 @@ static int mv88e6131_setup(struct dsa_switch *ds) ps->ds = ds; - ret = mv88e6xxx_setup_common(ds); + ret = mv88e6xxx_setup_common(ps); if (ret < 0) return ret; - mv88e6xxx_ppu_state_init(ds); - - switch (ps->id) { - case PORT_SWITCH_ID_6085: - case PORT_SWITCH_ID_6185: - ps->num_ports = 10; - break; - case PORT_SWITCH_ID_6095: - ps->num_ports = 11; - break; - case PORT_SWITCH_ID_6131: - case PORT_SWITCH_ID_6131_B2: - ps->num_ports = 8; - break; - default: - return -ENODEV; - } + mv88e6xxx_ppu_state_init(ps); - ret = mv88e6xxx_switch_reset(ds, false); + ret = mv88e6xxx_switch_reset(ps, false); if (ret < 0) return ret; @@ -133,7 +145,7 @@ static int mv88e6131_port_to_phy_addr(struct dsa_switch *ds, int port) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - if (port >= 0 && port < ps->num_ports) + if (port >= 0 && port < ps->info->num_ports) return port; return -EINVAL; diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index f5622506cdfa..f75164dc3bd6 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -17,16 +17,37 @@ #include <net/dsa.h> #include "mv88e6xxx.h" -static const struct mv88e6xxx_switch_id mv88e6171_table[] = { - { PORT_SWITCH_ID_6171, "Marvell 88E6171" }, - { PORT_SWITCH_ID_6175, "Marvell 88E6175" }, - { PORT_SWITCH_ID_6350, "Marvell 88E6350" }, - { PORT_SWITCH_ID_6351, "Marvell 88E6351" }, +static const struct mv88e6xxx_info mv88e6171_table[] = { + { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6171, + .family = MV88E6XXX_FAMILY_6351, + .name = "Marvell 88E6171", + .num_databases = 4096, + .num_ports = 7, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6175, + .family = MV88E6XXX_FAMILY_6351, + .name = "Marvell 88E6175", + .num_databases = 4096, + .num_ports = 7, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6350, + .family = MV88E6XXX_FAMILY_6351, + .name = "Marvell 88E6350", + .num_databases = 4096, + .num_ports = 7, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6351, + .family = MV88E6XXX_FAMILY_6351, + .name = "Marvell 88E6351", + .num_databases = 4096, + .num_ports = 7, + } }; -static char *mv88e6171_drv_probe(struct device *dsa_dev, - struct device *host_dev, - int sw_addr, void **priv) +static const char *mv88e6171_drv_probe(struct device *dsa_dev, + struct device *host_dev, int sw_addr, + void **priv) { return mv88e6xxx_drv_probe(dsa_dev, host_dev, sw_addr, priv, mv88e6171_table, @@ -35,6 +56,7 @@ static char *mv88e6171_drv_probe(struct device *dsa_dev, static int mv88e6171_setup_global(struct dsa_switch *ds) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u32 upstream_port = dsa_upstream_port(ds); int ret; u32 reg; @@ -46,8 +68,11 @@ static int mv88e6171_setup_global(struct dsa_switch *ds) /* Discard packets with excessive collisions, mask all * interrupt sources, enable PPU. */ - REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, - GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_DISCARD_EXCESS); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, + GLOBAL_CONTROL_PPU_ENABLE | + GLOBAL_CONTROL_DISCARD_EXCESS); + if (ret) + return ret; /* Configure the upstream port, and configure the upstream * port as the port to which ingress and egress monitor frames @@ -57,14 +82,15 @@ static int mv88e6171_setup_global(struct dsa_switch *ds) upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_MIRROR_SHIFT; - REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); + if (ret) + return ret; /* Disable remote management for now, and set the switch's * DSA device number. */ - REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f); - - return 0; + return mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL_2, + ds->index & 0x1f); } static int mv88e6171_setup(struct dsa_switch *ds) @@ -74,13 +100,11 @@ static int mv88e6171_setup(struct dsa_switch *ds) ps->ds = ds; - ret = mv88e6xxx_setup_common(ds); + ret = mv88e6xxx_setup_common(ps); if (ret < 0) return ret; - ps->num_ports = 7; - - ret = mv88e6xxx_switch_reset(ds, true); + ret = mv88e6xxx_switch_reset(ps, true); if (ret < 0) return ret; diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index e54ee27db129..c622a1d58480 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -22,24 +22,49 @@ #include <net/dsa.h> #include "mv88e6xxx.h" -static const struct mv88e6xxx_switch_id mv88e6352_table[] = { - { PORT_SWITCH_ID_6172, "Marvell 88E6172" }, - { PORT_SWITCH_ID_6176, "Marvell 88E6176" }, - { PORT_SWITCH_ID_6240, "Marvell 88E6240" }, - { PORT_SWITCH_ID_6320, "Marvell 88E6320" }, - { PORT_SWITCH_ID_6320_A1, "Marvell 88E6320 (A1)" }, - { PORT_SWITCH_ID_6320_A2, "Marvell 88e6320 (A2)" }, - { PORT_SWITCH_ID_6321, "Marvell 88E6321" }, - { PORT_SWITCH_ID_6321_A1, "Marvell 88E6321 (A1)" }, - { PORT_SWITCH_ID_6321_A2, "Marvell 88e6321 (A2)" }, - { PORT_SWITCH_ID_6352, "Marvell 88E6352" }, - { PORT_SWITCH_ID_6352_A0, "Marvell 88E6352 (A0)" }, - { PORT_SWITCH_ID_6352_A1, "Marvell 88E6352 (A1)" }, +static const struct mv88e6xxx_info mv88e6352_table[] = { + { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6320, + .family = MV88E6XXX_FAMILY_6320, + .name = "Marvell 88E6320", + .num_databases = 4096, + .num_ports = 7, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6321, + .family = MV88E6XXX_FAMILY_6320, + .name = "Marvell 88E6321", + .num_databases = 4096, + .num_ports = 7, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6172, + .family = MV88E6XXX_FAMILY_6352, + .name = "Marvell 88E6172", + .num_databases = 4096, + .num_ports = 7, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6176, + .family = MV88E6XXX_FAMILY_6352, + .name = "Marvell 88E6176", + .num_databases = 4096, + .num_ports = 7, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6240, + .family = MV88E6XXX_FAMILY_6352, + .name = "Marvell 88E6240", + .num_databases = 4096, + .num_ports = 7, + }, { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6352, + .family = MV88E6XXX_FAMILY_6352, + .name = "Marvell 88E6352", + .num_databases = 4096, + .num_ports = 7, + } }; -static char *mv88e6352_drv_probe(struct device *dsa_dev, - struct device *host_dev, - int sw_addr, void **priv) +static const char *mv88e6352_drv_probe(struct device *dsa_dev, + struct device *host_dev, int sw_addr, + void **priv) { return mv88e6xxx_drv_probe(dsa_dev, host_dev, sw_addr, priv, mv88e6352_table, @@ -48,6 +73,7 @@ static char *mv88e6352_drv_probe(struct device *dsa_dev, static int mv88e6352_setup_global(struct dsa_switch *ds) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u32 upstream_port = dsa_upstream_port(ds); int ret; u32 reg; @@ -59,8 +85,11 @@ static int mv88e6352_setup_global(struct dsa_switch *ds) /* Discard packets with excessive collisions, * mask all interrupt sources, enable PPU (bit 14, undocumented). */ - REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, - GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_DISCARD_EXCESS); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, + GLOBAL_CONTROL_PPU_ENABLE | + GLOBAL_CONTROL_DISCARD_EXCESS); + if (ret) + return ret; /* Configure the upstream port, and configure the upstream * port as the port to which ingress and egress monitor frames @@ -69,14 +98,14 @@ static int mv88e6352_setup_global(struct dsa_switch *ds) reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; - REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); + if (ret) + return ret; /* Disable remote management for now, and set the switch's * DSA device number. */ - REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f); - - return 0; + return mv88e6xxx_reg_write(ps, REG_GLOBAL, 0x1c, ds->index & 0x1f); } static int mv88e6352_setup(struct dsa_switch *ds) @@ -86,15 +115,13 @@ static int mv88e6352_setup(struct dsa_switch *ds) ps->ds = ds; - ret = mv88e6xxx_setup_common(ds); + ret = mv88e6xxx_setup_common(ps); if (ret < 0) return ret; - ps->num_ports = 7; - mutex_init(&ps->eeprom_mutex); - ret = mv88e6xxx_switch_reset(ds, true); + ret = mv88e6xxx_switch_reset(ps, true); if (ret < 0) return ret; @@ -112,7 +139,7 @@ static int mv88e6352_read_eeprom_word(struct dsa_switch *ds, int addr) mutex_lock(&ps->eeprom_mutex); - ret = mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_EEPROM_OP, + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP, GLOBAL2_EEPROM_OP_READ | (addr & GLOBAL2_EEPROM_OP_ADDR_MASK)); if (ret < 0) @@ -122,7 +149,7 @@ static int mv88e6352_read_eeprom_word(struct dsa_switch *ds, int addr) if (ret < 0) goto error; - ret = mv88e6xxx_reg_read(ds, REG_GLOBAL2, GLOBAL2_EEPROM_DATA); + ret = mv88e6xxx_reg_read(ps, REG_GLOBAL2, GLOBAL2_EEPROM_DATA); error: mutex_unlock(&ps->eeprom_mutex); return ret; @@ -193,9 +220,10 @@ static int mv88e6352_get_eeprom(struct dsa_switch *ds, static int mv88e6352_eeprom_is_readonly(struct dsa_switch *ds) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; - ret = mv88e6xxx_reg_read(ds, REG_GLOBAL2, GLOBAL2_EEPROM_OP); + ret = mv88e6xxx_reg_read(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP); if (ret < 0) return ret; @@ -213,11 +241,11 @@ static int mv88e6352_write_eeprom_word(struct dsa_switch *ds, int addr, mutex_lock(&ps->eeprom_mutex); - ret = mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); if (ret < 0) goto error; - ret = mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_EEPROM_OP, + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP, GLOBAL2_EEPROM_OP_WRITE | (addr & GLOBAL2_EEPROM_OP_ADDR_MASK)); if (ret < 0) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 9985a0cf31f1..61150af37bc7 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -25,12 +25,10 @@ #include <net/switchdev.h> #include "mv88e6xxx.h" -static void assert_smi_lock(struct dsa_switch *ds) +static void assert_smi_lock(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - if (unlikely(!mutex_is_locked(&ps->smi_mutex))) { - dev_err(ds->master_dev, "SMI lock not held!\n"); + dev_err(ps->dev, "SMI lock not held!\n"); dump_stack(); } } @@ -92,30 +90,29 @@ static int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, return ret & 0xffff; } -static int _mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) +static int _mv88e6xxx_reg_read(struct mv88e6xxx_priv_state *ps, + int addr, int reg) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; - assert_smi_lock(ds); + assert_smi_lock(ps); ret = __mv88e6xxx_reg_read(ps->bus, ps->sw_addr, addr, reg); if (ret < 0) return ret; - dev_dbg(ds->master_dev, "<- addr: 0x%.2x reg: 0x%.2x val: 0x%.4x\n", + dev_dbg(ps->dev, "<- addr: 0x%.2x reg: 0x%.2x val: 0x%.4x\n", addr, reg, ret); return ret; } -int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) +int mv88e6xxx_reg_read(struct mv88e6xxx_priv_state *ps, int addr, int reg) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_reg_read(ds, addr, reg); + ret = _mv88e6xxx_reg_read(ps, addr, reg); mutex_unlock(&ps->smi_mutex); return ret; @@ -153,26 +150,24 @@ static int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, return 0; } -static int _mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, - u16 val) +static int _mv88e6xxx_reg_write(struct mv88e6xxx_priv_state *ps, int addr, + int reg, u16 val) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - assert_smi_lock(ds); + assert_smi_lock(ps); - dev_dbg(ds->master_dev, "-> addr: 0x%.2x reg: 0x%.2x val: 0x%.4x\n", + dev_dbg(ps->dev, "-> addr: 0x%.2x reg: 0x%.2x val: 0x%.4x\n", addr, reg, val); return __mv88e6xxx_reg_write(ps->bus, ps->sw_addr, addr, reg, val); } -int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) +int mv88e6xxx_reg_write(struct mv88e6xxx_priv_state *ps, int addr, + int reg, u16 val) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_reg_write(ds, addr, reg, val); + ret = _mv88e6xxx_reg_write(ps, addr, reg, val); mutex_unlock(&ps->smi_mutex); return ret; @@ -180,28 +175,46 @@ int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr) { - REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, (addr[0] << 8) | addr[1]); - REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]); - REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]); + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int err; - return 0; + err = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MAC_01, + (addr[0] << 8) | addr[1]); + if (err) + return err; + + err = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MAC_23, + (addr[2] << 8) | addr[3]); + if (err) + return err; + + return mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MAC_45, + (addr[4] << 8) | addr[5]); } int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) { - int i; + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; + int i; for (i = 0; i < 6; i++) { int j; /* Write the MAC address byte. */ - REG_WRITE(REG_GLOBAL2, GLOBAL2_SWITCH_MAC, - GLOBAL2_SWITCH_MAC_BUSY | (i << 8) | addr[i]); + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, + GLOBAL2_SWITCH_MAC_BUSY | + (i << 8) | addr[i]); + if (ret) + return ret; /* Wait for the write to complete. */ for (j = 0; j < 16; j++) { - ret = REG_READ(REG_GLOBAL2, GLOBAL2_SWITCH_MAC); + ret = mv88e6xxx_reg_read(ps, REG_GLOBAL2, + GLOBAL2_SWITCH_MAC); + if (ret < 0) + return ret; + if ((ret & GLOBAL2_SWITCH_MAC_BUSY) == 0) break; } @@ -212,34 +225,43 @@ int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) return 0; } -static int _mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum) +static int _mv88e6xxx_phy_read(struct mv88e6xxx_priv_state *ps, int addr, + int regnum) { if (addr >= 0) - return _mv88e6xxx_reg_read(ds, addr, regnum); + return _mv88e6xxx_reg_read(ps, addr, regnum); return 0xffff; } -static int _mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, - u16 val) +static int _mv88e6xxx_phy_write(struct mv88e6xxx_priv_state *ps, int addr, + int regnum, u16 val) { if (addr >= 0) - return _mv88e6xxx_reg_write(ds, addr, regnum, val); + return _mv88e6xxx_reg_write(ps, addr, regnum, val); return 0; } #ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU -static int mv88e6xxx_ppu_disable(struct dsa_switch *ds) +static int mv88e6xxx_ppu_disable(struct mv88e6xxx_priv_state *ps) { int ret; unsigned long timeout; - ret = REG_READ(REG_GLOBAL, GLOBAL_CONTROL); - REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, - ret & ~GLOBAL_CONTROL_PPU_ENABLE); + ret = mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_CONTROL); + if (ret < 0) + return ret; + + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, + ret & ~GLOBAL_CONTROL_PPU_ENABLE); + if (ret) + return ret; timeout = jiffies + 1 * HZ; while (time_before(jiffies, timeout)) { - ret = REG_READ(REG_GLOBAL, GLOBAL_STATUS); + ret = mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATUS); + if (ret < 0) + return ret; + usleep_range(1000, 2000); if ((ret & GLOBAL_STATUS_PPU_MASK) != GLOBAL_STATUS_PPU_POLLING) @@ -249,17 +271,26 @@ static int mv88e6xxx_ppu_disable(struct dsa_switch *ds) return -ETIMEDOUT; } -static int mv88e6xxx_ppu_enable(struct dsa_switch *ds) +static int mv88e6xxx_ppu_enable(struct mv88e6xxx_priv_state *ps) { - int ret; + int ret, err; unsigned long timeout; - ret = REG_READ(REG_GLOBAL, GLOBAL_CONTROL); - REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, ret | GLOBAL_CONTROL_PPU_ENABLE); + ret = mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_CONTROL); + if (ret < 0) + return ret; + + err = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, + ret | GLOBAL_CONTROL_PPU_ENABLE); + if (err) + return err; timeout = jiffies + 1 * HZ; while (time_before(jiffies, timeout)) { - ret = REG_READ(REG_GLOBAL, GLOBAL_STATUS); + ret = mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATUS); + if (ret < 0) + return ret; + usleep_range(1000, 2000); if ((ret & GLOBAL_STATUS_PPU_MASK) == GLOBAL_STATUS_PPU_POLLING) @@ -275,9 +306,7 @@ static void mv88e6xxx_ppu_reenable_work(struct work_struct *ugly) ps = container_of(ugly, struct mv88e6xxx_priv_state, ppu_work); if (mutex_trylock(&ps->ppu_mutex)) { - struct dsa_switch *ds = ps->ds; - - if (mv88e6xxx_ppu_enable(ds) == 0) + if (mv88e6xxx_ppu_enable(ps) == 0) ps->ppu_disabled = 0; mutex_unlock(&ps->ppu_mutex); } @@ -290,9 +319,8 @@ static void mv88e6xxx_ppu_reenable_timer(unsigned long _ps) schedule_work(&ps->ppu_work); } -static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds) +static int mv88e6xxx_ppu_access_get(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; mutex_lock(&ps->ppu_mutex); @@ -303,7 +331,7 @@ static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds) * it. */ if (!ps->ppu_disabled) { - ret = mv88e6xxx_ppu_disable(ds); + ret = mv88e6xxx_ppu_disable(ps); if (ret < 0) { mutex_unlock(&ps->ppu_mutex); return ret; @@ -317,19 +345,15 @@ static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds) return ret; } -static void mv88e6xxx_ppu_access_put(struct dsa_switch *ds) +static void mv88e6xxx_ppu_access_put(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - /* Schedule a timer to re-enable the PHY polling unit. */ mod_timer(&ps->ppu_timer, jiffies + msecs_to_jiffies(10)); mutex_unlock(&ps->ppu_mutex); } -void mv88e6xxx_ppu_state_init(struct dsa_switch *ds) +void mv88e6xxx_ppu_state_init(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - mutex_init(&ps->ppu_mutex); INIT_WORK(&ps->ppu_work, mv88e6xxx_ppu_reenable_work); init_timer(&ps->ppu_timer); @@ -339,12 +363,13 @@ void mv88e6xxx_ppu_state_init(struct dsa_switch *ds) int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; - ret = mv88e6xxx_ppu_access_get(ds); + ret = mv88e6xxx_ppu_access_get(ps); if (ret >= 0) { - ret = mv88e6xxx_reg_read(ds, addr, regnum); - mv88e6xxx_ppu_access_put(ds); + ret = mv88e6xxx_reg_read(ps, addr, regnum); + mv88e6xxx_ppu_access_put(ps); } return ret; @@ -353,168 +378,79 @@ int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum) int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, int regnum, u16 val) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; - ret = mv88e6xxx_ppu_access_get(ds); + ret = mv88e6xxx_ppu_access_get(ps); if (ret >= 0) { - ret = mv88e6xxx_reg_write(ds, addr, regnum, val); - mv88e6xxx_ppu_access_put(ds); + ret = mv88e6xxx_reg_write(ps, addr, regnum, val); + mv88e6xxx_ppu_access_put(ps); } return ret; } #endif -static bool mv88e6xxx_6065_family(struct dsa_switch *ds) +static bool mv88e6xxx_6065_family(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - switch (ps->id) { - case PORT_SWITCH_ID_6031: - case PORT_SWITCH_ID_6061: - case PORT_SWITCH_ID_6035: - case PORT_SWITCH_ID_6065: - return true; - } - return false; + return ps->info->family == MV88E6XXX_FAMILY_6065; } -static bool mv88e6xxx_6095_family(struct dsa_switch *ds) +static bool mv88e6xxx_6095_family(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - switch (ps->id) { - case PORT_SWITCH_ID_6092: - case PORT_SWITCH_ID_6095: - return true; - } - return false; + return ps->info->family == MV88E6XXX_FAMILY_6095; } -static bool mv88e6xxx_6097_family(struct dsa_switch *ds) +static bool mv88e6xxx_6097_family(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - switch (ps->id) { - case PORT_SWITCH_ID_6046: - case PORT_SWITCH_ID_6085: - case PORT_SWITCH_ID_6096: - case PORT_SWITCH_ID_6097: - return true; - } - return false; + return ps->info->family == MV88E6XXX_FAMILY_6097; } -static bool mv88e6xxx_6165_family(struct dsa_switch *ds) +static bool mv88e6xxx_6165_family(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - switch (ps->id) { - case PORT_SWITCH_ID_6123: - case PORT_SWITCH_ID_6161: - case PORT_SWITCH_ID_6165: - return true; - } - return false; + return ps->info->family == MV88E6XXX_FAMILY_6165; } -static bool mv88e6xxx_6185_family(struct dsa_switch *ds) +static bool mv88e6xxx_6185_family(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - switch (ps->id) { - case PORT_SWITCH_ID_6121: - case PORT_SWITCH_ID_6122: - case PORT_SWITCH_ID_6152: - case PORT_SWITCH_ID_6155: - case PORT_SWITCH_ID_6182: - case PORT_SWITCH_ID_6185: - case PORT_SWITCH_ID_6108: - case PORT_SWITCH_ID_6131: - return true; - } - return false; + return ps->info->family == MV88E6XXX_FAMILY_6185; } -static bool mv88e6xxx_6320_family(struct dsa_switch *ds) +static bool mv88e6xxx_6320_family(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - switch (ps->id) { - case PORT_SWITCH_ID_6320: - case PORT_SWITCH_ID_6321: - return true; - } - return false; + return ps->info->family == MV88E6XXX_FAMILY_6320; } -static bool mv88e6xxx_6351_family(struct dsa_switch *ds) +static bool mv88e6xxx_6351_family(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - switch (ps->id) { - case PORT_SWITCH_ID_6171: - case PORT_SWITCH_ID_6175: - case PORT_SWITCH_ID_6350: - case PORT_SWITCH_ID_6351: - return true; - } - return false; + return ps->info->family == MV88E6XXX_FAMILY_6351; } -static bool mv88e6xxx_6352_family(struct dsa_switch *ds) +static bool mv88e6xxx_6352_family(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - switch (ps->id) { - case PORT_SWITCH_ID_6172: - case PORT_SWITCH_ID_6176: - case PORT_SWITCH_ID_6240: - case PORT_SWITCH_ID_6352: - return true; - } - return false; + return ps->info->family == MV88E6XXX_FAMILY_6352; } -static unsigned int mv88e6xxx_num_databases(struct dsa_switch *ds) +static unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - /* The following devices have 4-bit identifiers for 16 databases */ - if (ps->id == PORT_SWITCH_ID_6061) - return 16; - - /* The following devices have 6-bit identifiers for 64 databases */ - if (ps->id == PORT_SWITCH_ID_6065) - return 64; - - /* The following devices have 8-bit identifiers for 256 databases */ - if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) - return 256; - - /* The following devices have 12-bit identifiers for 4096 databases */ - if (mv88e6xxx_6097_family(ds) || mv88e6xxx_6165_family(ds) || - mv88e6xxx_6351_family(ds) || mv88e6xxx_6352_family(ds)) - return 4096; - - return 0; + return ps->info->num_databases; } -static bool mv88e6xxx_has_fid_reg(struct dsa_switch *ds) +static bool mv88e6xxx_has_fid_reg(struct mv88e6xxx_priv_state *ps) { /* Does the device have dedicated FID registers for ATU and VTU ops? */ - if (mv88e6xxx_6097_family(ds) || mv88e6xxx_6165_family(ds) || - mv88e6xxx_6351_family(ds) || mv88e6xxx_6352_family(ds)) + if (mv88e6xxx_6097_family(ps) || mv88e6xxx_6165_family(ps) || + mv88e6xxx_6351_family(ps) || mv88e6xxx_6352_family(ps)) return true; return false; } -static bool mv88e6xxx_has_stu(struct dsa_switch *ds) +static bool mv88e6xxx_has_stu(struct mv88e6xxx_priv_state *ps) { /* Does the device have STU and dedicated SID registers for VTU ops? */ - if (mv88e6xxx_6097_family(ds) || mv88e6xxx_6165_family(ds) || - mv88e6xxx_6351_family(ds) || mv88e6xxx_6352_family(ds)) + if (mv88e6xxx_6097_family(ps) || mv88e6xxx_6165_family(ps) || + mv88e6xxx_6351_family(ps) || mv88e6xxx_6352_family(ps)) return true; return false; @@ -536,7 +472,7 @@ void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL); + ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_PCS_CTRL); if (ret < 0) goto out; @@ -550,7 +486,7 @@ void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, if (phydev->link) reg |= PORT_PCS_CTRL_LINK_UP; - if (mv88e6xxx_6065_family(ds) && phydev->speed > SPEED_100) + if (mv88e6xxx_6065_family(ps) && phydev->speed > SPEED_100) goto out; switch (phydev->speed) { @@ -572,8 +508,8 @@ void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, if (phydev->duplex == DUPLEX_FULL) reg |= PORT_PCS_CTRL_DUPLEX_FULL; - if ((mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds)) && - (port >= ps->num_ports - 2)) { + if ((mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps)) && + (port >= ps->info->num_ports - 2)) { if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK; if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) @@ -582,19 +518,19 @@ void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK | PORT_PCS_CTRL_RGMII_DELAY_TXCLK); } - _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_PCS_CTRL, reg); + _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_PCS_CTRL, reg); out: mutex_unlock(&ps->smi_mutex); } -static int _mv88e6xxx_stats_wait(struct dsa_switch *ds) +static int _mv88e6xxx_stats_wait(struct mv88e6xxx_priv_state *ps) { int ret; int i; for (i = 0; i < 10; i++) { - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_OP); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATS_OP); if ((ret & GLOBAL_STATS_OP_BUSY) == 0) return 0; } @@ -602,52 +538,54 @@ static int _mv88e6xxx_stats_wait(struct dsa_switch *ds) return -ETIMEDOUT; } -static int _mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) +static int _mv88e6xxx_stats_snapshot(struct mv88e6xxx_priv_state *ps, + int port) { int ret; - if (mv88e6xxx_6320_family(ds) || mv88e6xxx_6352_family(ds)) + if (mv88e6xxx_6320_family(ps) || mv88e6xxx_6352_family(ps)) port = (port + 1) << 5; /* Snapshot the hardware statistics counters for this port. */ - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_STATS_OP, + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_STATS_OP, GLOBAL_STATS_OP_CAPTURE_PORT | GLOBAL_STATS_OP_HIST_RX_TX | port); if (ret < 0) return ret; /* Wait for the snapshotting to complete. */ - ret = _mv88e6xxx_stats_wait(ds); + ret = _mv88e6xxx_stats_wait(ps); if (ret < 0) return ret; return 0; } -static void _mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val) +static void _mv88e6xxx_stats_read(struct mv88e6xxx_priv_state *ps, + int stat, u32 *val) { u32 _val; int ret; *val = 0; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_STATS_OP, + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_STATS_OP, GLOBAL_STATS_OP_READ_CAPTURED | GLOBAL_STATS_OP_HIST_RX_TX | stat); if (ret < 0) return; - ret = _mv88e6xxx_stats_wait(ds); + ret = _mv88e6xxx_stats_wait(ps); if (ret < 0) return; - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_COUNTER_32); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATS_COUNTER_32); if (ret < 0) return; _val = ret << 16; - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_COUNTER_01); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATS_COUNTER_01); if (ret < 0) return; @@ -716,26 +654,26 @@ static struct mv88e6xxx_hw_stat mv88e6xxx_hw_stats[] = { { "out_management", 4, 0x1f | GLOBAL_STATS_OP_BANK_1, BANK1, }, }; -static bool mv88e6xxx_has_stat(struct dsa_switch *ds, +static bool mv88e6xxx_has_stat(struct mv88e6xxx_priv_state *ps, struct mv88e6xxx_hw_stat *stat) { switch (stat->type) { case BANK0: return true; case BANK1: - return mv88e6xxx_6320_family(ds); + return mv88e6xxx_6320_family(ps); case PORT: - return mv88e6xxx_6095_family(ds) || - mv88e6xxx_6185_family(ds) || - mv88e6xxx_6097_family(ds) || - mv88e6xxx_6165_family(ds) || - mv88e6xxx_6351_family(ds) || - mv88e6xxx_6352_family(ds); + return mv88e6xxx_6095_family(ps) || + mv88e6xxx_6185_family(ps) || + mv88e6xxx_6097_family(ps) || + mv88e6xxx_6165_family(ps) || + mv88e6xxx_6351_family(ps) || + mv88e6xxx_6352_family(ps); } return false; } -static uint64_t _mv88e6xxx_get_ethtool_stat(struct dsa_switch *ds, +static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_priv_state *ps, struct mv88e6xxx_hw_stat *s, int port) { @@ -746,13 +684,13 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct dsa_switch *ds, switch (s->type) { case PORT: - ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), s->reg); + ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), s->reg); if (ret < 0) return UINT64_MAX; low = ret; if (s->sizeof_stat == 4) { - ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), s->reg + 1); if (ret < 0) return UINT64_MAX; @@ -761,9 +699,9 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct dsa_switch *ds, break; case BANK0: case BANK1: - _mv88e6xxx_stats_read(ds, s->reg, &low); + _mv88e6xxx_stats_read(ps, s->reg, &low); if (s->sizeof_stat == 8) - _mv88e6xxx_stats_read(ds, s->reg + 1, &high); + _mv88e6xxx_stats_read(ps, s->reg + 1, &high); } value = (((u64)high) << 16) | low; return value; @@ -771,12 +709,13 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct dsa_switch *ds, void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, uint8_t *data) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct mv88e6xxx_hw_stat *stat; int i, j; for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) { stat = &mv88e6xxx_hw_stats[i]; - if (mv88e6xxx_has_stat(ds, stat)) { + if (mv88e6xxx_has_stat(ps, stat)) { memcpy(data + j * ETH_GSTRING_LEN, stat->string, ETH_GSTRING_LEN); j++; @@ -786,12 +725,13 @@ void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, uint8_t *data) int mv88e6xxx_get_sset_count(struct dsa_switch *ds) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct mv88e6xxx_hw_stat *stat; int i, j; for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) { stat = &mv88e6xxx_hw_stats[i]; - if (mv88e6xxx_has_stat(ds, stat)) + if (mv88e6xxx_has_stat(ps, stat)) j++; } return j; @@ -808,15 +748,15 @@ mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_stats_snapshot(ds, port); + ret = _mv88e6xxx_stats_snapshot(ps, port); if (ret < 0) { mutex_unlock(&ps->smi_mutex); return; } for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) { stat = &mv88e6xxx_hw_stats[i]; - if (mv88e6xxx_has_stat(ds, stat)) { - data[j] = _mv88e6xxx_get_ethtool_stat(ds, stat, port); + if (mv88e6xxx_has_stat(ps, stat)) { + data[j] = _mv88e6xxx_get_ethtool_stat(ps, stat, port); j++; } } @@ -832,6 +772,7 @@ int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port) void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *_p) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u16 *p = _p; int i; @@ -842,13 +783,13 @@ void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, for (i = 0; i < 32; i++) { int ret; - ret = mv88e6xxx_reg_read(ds, REG_PORT(port), i); + ret = mv88e6xxx_reg_read(ps, REG_PORT(port), i); if (ret >= 0) p[i] = ret; } } -static int _mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, +static int _mv88e6xxx_wait(struct mv88e6xxx_priv_state *ps, int reg, int offset, u16 mask) { unsigned long timeout = jiffies + HZ / 10; @@ -856,7 +797,7 @@ static int _mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, while (time_before(jiffies, timeout)) { int ret; - ret = _mv88e6xxx_reg_read(ds, reg, offset); + ret = _mv88e6xxx_reg_read(ps, reg, offset); if (ret < 0) return ret; if (!(ret & mask)) @@ -867,74 +808,80 @@ static int _mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, return -ETIMEDOUT; } -static int mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask) +static int mv88e6xxx_wait(struct mv88e6xxx_priv_state *ps, int reg, + int offset, u16 mask) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_wait(ds, reg, offset, mask); + ret = _mv88e6xxx_wait(ps, reg, offset, mask); mutex_unlock(&ps->smi_mutex); return ret; } -static int _mv88e6xxx_phy_wait(struct dsa_switch *ds) +static int _mv88e6xxx_phy_wait(struct mv88e6xxx_priv_state *ps) { - return _mv88e6xxx_wait(ds, REG_GLOBAL2, GLOBAL2_SMI_OP, + return _mv88e6xxx_wait(ps, REG_GLOBAL2, GLOBAL2_SMI_OP, GLOBAL2_SMI_OP_BUSY); } int mv88e6xxx_eeprom_load_wait(struct dsa_switch *ds) { - return mv88e6xxx_wait(ds, REG_GLOBAL2, GLOBAL2_EEPROM_OP, + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + return mv88e6xxx_wait(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP, GLOBAL2_EEPROM_OP_LOAD); } int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds) { - return mv88e6xxx_wait(ds, REG_GLOBAL2, GLOBAL2_EEPROM_OP, + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + return mv88e6xxx_wait(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP, GLOBAL2_EEPROM_OP_BUSY); } -static int _mv88e6xxx_atu_wait(struct dsa_switch *ds) +static int _mv88e6xxx_atu_wait(struct mv88e6xxx_priv_state *ps) { - return _mv88e6xxx_wait(ds, REG_GLOBAL, GLOBAL_ATU_OP, + return _mv88e6xxx_wait(ps, REG_GLOBAL, GLOBAL_ATU_OP, GLOBAL_ATU_OP_BUSY); } -static int _mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int addr, - int regnum) +static int _mv88e6xxx_phy_read_indirect(struct mv88e6xxx_priv_state *ps, + int addr, int regnum) { int ret; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SMI_OP, + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_SMI_OP, GLOBAL2_SMI_OP_22_READ | (addr << 5) | regnum); if (ret < 0) return ret; - ret = _mv88e6xxx_phy_wait(ds); + ret = _mv88e6xxx_phy_wait(ps); if (ret < 0) return ret; - return _mv88e6xxx_reg_read(ds, REG_GLOBAL2, GLOBAL2_SMI_DATA); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL2, GLOBAL2_SMI_DATA); + + return ret; } -static int _mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int addr, - int regnum, u16 val) +static int _mv88e6xxx_phy_write_indirect(struct mv88e6xxx_priv_state *ps, + int addr, int regnum, u16 val) { int ret; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SMI_DATA, val); + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_SMI_DATA, val); if (ret < 0) return ret; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SMI_OP, + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_SMI_OP, GLOBAL2_SMI_OP_22_WRITE | (addr << 5) | regnum); - return _mv88e6xxx_phy_wait(ds); + return _mv88e6xxx_phy_wait(ps); } int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) @@ -944,14 +891,14 @@ int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) mutex_lock(&ps->smi_mutex); - reg = _mv88e6xxx_phy_read_indirect(ds, port, 16); + reg = _mv88e6xxx_phy_read_indirect(ps, port, 16); if (reg < 0) goto out; e->eee_enabled = !!(reg & 0x0200); e->tx_lpi_enabled = !!(reg & 0x0100); - reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS); + reg = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_STATUS); if (reg < 0) goto out; @@ -972,7 +919,7 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_read_indirect(ds, port, 16); + ret = _mv88e6xxx_phy_read_indirect(ps, port, 16); if (ret < 0) goto out; @@ -982,28 +929,28 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, if (e->tx_lpi_enabled) reg |= 0x0100; - ret = _mv88e6xxx_phy_write_indirect(ds, port, 16, reg); + ret = _mv88e6xxx_phy_write_indirect(ps, port, 16, reg); out: mutex_unlock(&ps->smi_mutex); return ret; } -static int _mv88e6xxx_atu_cmd(struct dsa_switch *ds, u16 fid, u16 cmd) +static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_priv_state *ps, u16 fid, u16 cmd) { int ret; - if (mv88e6xxx_has_fid_reg(ds)) { - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_FID, fid); + if (mv88e6xxx_has_fid_reg(ps)) { + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_ATU_FID, fid); if (ret < 0) return ret; - } else if (mv88e6xxx_num_databases(ds) == 256) { + } else if (mv88e6xxx_num_databases(ps) == 256) { /* ATU DBNum[7:4] are located in ATU Control 15:12 */ - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_ATU_CONTROL); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_ATU_CONTROL); if (ret < 0) return ret; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_CONTROL, + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_ATU_CONTROL, (ret & 0xfff) | ((fid << 8) & 0xf000)); if (ret < 0) @@ -1013,14 +960,14 @@ static int _mv88e6xxx_atu_cmd(struct dsa_switch *ds, u16 fid, u16 cmd) cmd |= fid & 0xf; } - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_OP, cmd); + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_ATU_OP, cmd); if (ret < 0) return ret; - return _mv88e6xxx_atu_wait(ds); + return _mv88e6xxx_atu_wait(ps); } -static int _mv88e6xxx_atu_data_write(struct dsa_switch *ds, +static int _mv88e6xxx_atu_data_write(struct mv88e6xxx_priv_state *ps, struct mv88e6xxx_atu_entry *entry) { u16 data = entry->state & GLOBAL_ATU_DATA_STATE_MASK; @@ -1040,21 +987,21 @@ static int _mv88e6xxx_atu_data_write(struct dsa_switch *ds, data |= (entry->portv_trunkid << shift) & mask; } - return _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_DATA, data); + return _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_ATU_DATA, data); } -static int _mv88e6xxx_atu_flush_move(struct dsa_switch *ds, +static int _mv88e6xxx_atu_flush_move(struct mv88e6xxx_priv_state *ps, struct mv88e6xxx_atu_entry *entry, bool static_too) { int op; int err; - err = _mv88e6xxx_atu_wait(ds); + err = _mv88e6xxx_atu_wait(ps); if (err) return err; - err = _mv88e6xxx_atu_data_write(ds, entry); + err = _mv88e6xxx_atu_data_write(ps, entry); if (err) return err; @@ -1066,21 +1013,22 @@ static int _mv88e6xxx_atu_flush_move(struct dsa_switch *ds, GLOBAL_ATU_OP_FLUSH_MOVE_NON_STATIC; } - return _mv88e6xxx_atu_cmd(ds, entry->fid, op); + return _mv88e6xxx_atu_cmd(ps, entry->fid, op); } -static int _mv88e6xxx_atu_flush(struct dsa_switch *ds, u16 fid, bool static_too) +static int _mv88e6xxx_atu_flush(struct mv88e6xxx_priv_state *ps, + u16 fid, bool static_too) { struct mv88e6xxx_atu_entry entry = { .fid = fid, .state = 0, /* EntryState bits must be 0 */ }; - return _mv88e6xxx_atu_flush_move(ds, &entry, static_too); + return _mv88e6xxx_atu_flush_move(ps, &entry, static_too); } -static int _mv88e6xxx_atu_move(struct dsa_switch *ds, u16 fid, int from_port, - int to_port, bool static_too) +static int _mv88e6xxx_atu_move(struct mv88e6xxx_priv_state *ps, u16 fid, + int from_port, int to_port, bool static_too) { struct mv88e6xxx_atu_entry entry = { .trunk = false, @@ -1094,14 +1042,14 @@ static int _mv88e6xxx_atu_move(struct dsa_switch *ds, u16 fid, int from_port, entry.portv_trunkid = (to_port & 0x0f) << 4; entry.portv_trunkid |= from_port & 0x0f; - return _mv88e6xxx_atu_flush_move(ds, &entry, static_too); + return _mv88e6xxx_atu_flush_move(ps, &entry, static_too); } -static int _mv88e6xxx_atu_remove(struct dsa_switch *ds, u16 fid, int port, - bool static_too) +static int _mv88e6xxx_atu_remove(struct mv88e6xxx_priv_state *ps, u16 fid, + int port, bool static_too) { /* Destination port 0xF means remove the entries */ - return _mv88e6xxx_atu_move(ds, fid, port, 0x0f, static_too); + return _mv88e6xxx_atu_move(ps, fid, port, 0x0f, static_too); } static const char * const mv88e6xxx_port_state_names[] = { @@ -1111,12 +1059,14 @@ static const char * const mv88e6xxx_port_state_names[] = { [PORT_CONTROL_STATE_FORWARDING] = "Forwarding", }; -static int _mv88e6xxx_port_state(struct dsa_switch *ds, int port, u8 state) +static int _mv88e6xxx_port_state(struct mv88e6xxx_priv_state *ps, int port, + u8 state) { + struct dsa_switch *ds = ps->ds; int reg, ret = 0; u8 oldstate; - reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_CONTROL); + reg = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_CONTROL); if (reg < 0) return reg; @@ -1131,13 +1081,13 @@ static int _mv88e6xxx_port_state(struct dsa_switch *ds, int port, u8 state) oldstate == PORT_CONTROL_STATE_FORWARDING) && (state == PORT_CONTROL_STATE_DISABLED || state == PORT_CONTROL_STATE_BLOCKING)) { - ret = _mv88e6xxx_atu_remove(ds, 0, port, false); + ret = _mv88e6xxx_atu_remove(ps, 0, port, false); if (ret) return ret; } reg = (reg & ~PORT_CONTROL_STATE_MASK) | state; - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL, + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL, reg); if (ret) return ret; @@ -1150,11 +1100,12 @@ static int _mv88e6xxx_port_state(struct dsa_switch *ds, int port, u8 state) return ret; } -static int _mv88e6xxx_port_based_vlan_map(struct dsa_switch *ds, int port) +static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_priv_state *ps, + int port) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct net_device *bridge = ps->ports[port].bridge_dev; - const u16 mask = (1 << ps->num_ports) - 1; + const u16 mask = (1 << ps->info->num_ports) - 1; + struct dsa_switch *ds = ps->ds; u16 output_ports = 0; int reg; int i; @@ -1163,7 +1114,7 @@ static int _mv88e6xxx_port_based_vlan_map(struct dsa_switch *ds, int port) if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { output_ports = mask; } else { - for (i = 0; i < ps->num_ports; ++i) { + for (i = 0; i < ps->info->num_ports; ++i) { /* allow sending frames to every group member */ if (bridge && ps->ports[i].bridge_dev == bridge) output_ports |= BIT(i); @@ -1177,14 +1128,14 @@ static int _mv88e6xxx_port_based_vlan_map(struct dsa_switch *ds, int port) /* prevent frames from going back out of the port they came in on */ output_ports &= ~BIT(port); - reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_BASE_VLAN); + reg = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_BASE_VLAN); if (reg < 0) return reg; reg &= ~mask; reg |= output_ports & mask; - return _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_BASE_VLAN, reg); + return _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_BASE_VLAN, reg); } void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) @@ -1217,13 +1168,14 @@ void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) schedule_work(&ps->bridge_work); } -static int _mv88e6xxx_port_pvid(struct dsa_switch *ds, int port, u16 *new, - u16 *old) +static int _mv88e6xxx_port_pvid(struct mv88e6xxx_priv_state *ps, int port, + u16 *new, u16 *old) { + struct dsa_switch *ds = ps->ds; u16 pvid; int ret; - ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_DEFAULT_VLAN); + ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_DEFAULT_VLAN); if (ret < 0) return ret; @@ -1233,7 +1185,7 @@ static int _mv88e6xxx_port_pvid(struct dsa_switch *ds, int port, u16 *new, ret &= ~PORT_DEFAULT_VLAN_MASK; ret |= *new & PORT_DEFAULT_VLAN_MASK; - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_DEFAULT_VLAN, ret); if (ret < 0) return ret; @@ -1248,55 +1200,56 @@ static int _mv88e6xxx_port_pvid(struct dsa_switch *ds, int port, u16 *new, return 0; } -static int _mv88e6xxx_port_pvid_get(struct dsa_switch *ds, int port, u16 *pvid) +static int _mv88e6xxx_port_pvid_get(struct mv88e6xxx_priv_state *ps, + int port, u16 *pvid) { - return _mv88e6xxx_port_pvid(ds, port, NULL, pvid); + return _mv88e6xxx_port_pvid(ps, port, NULL, pvid); } -static int _mv88e6xxx_port_pvid_set(struct dsa_switch *ds, int port, u16 pvid) +static int _mv88e6xxx_port_pvid_set(struct mv88e6xxx_priv_state *ps, + int port, u16 pvid) { - return _mv88e6xxx_port_pvid(ds, port, &pvid, NULL); + return _mv88e6xxx_port_pvid(ps, port, &pvid, NULL); } -static int _mv88e6xxx_vtu_wait(struct dsa_switch *ds) +static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_priv_state *ps) { - return _mv88e6xxx_wait(ds, REG_GLOBAL, GLOBAL_VTU_OP, + return _mv88e6xxx_wait(ps, REG_GLOBAL, GLOBAL_VTU_OP, GLOBAL_VTU_OP_BUSY); } -static int _mv88e6xxx_vtu_cmd(struct dsa_switch *ds, u16 op) +static int _mv88e6xxx_vtu_cmd(struct mv88e6xxx_priv_state *ps, u16 op) { int ret; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_VTU_OP, op); + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_OP, op); if (ret < 0) return ret; - return _mv88e6xxx_vtu_wait(ds); + return _mv88e6xxx_vtu_wait(ps); } -static int _mv88e6xxx_vtu_stu_flush(struct dsa_switch *ds) +static int _mv88e6xxx_vtu_stu_flush(struct mv88e6xxx_priv_state *ps) { int ret; - ret = _mv88e6xxx_vtu_wait(ds); + ret = _mv88e6xxx_vtu_wait(ps); if (ret < 0) return ret; - return _mv88e6xxx_vtu_cmd(ds, GLOBAL_VTU_OP_FLUSH_ALL); + return _mv88e6xxx_vtu_cmd(ps, GLOBAL_VTU_OP_FLUSH_ALL); } -static int _mv88e6xxx_vtu_stu_data_read(struct dsa_switch *ds, +static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_priv_state *ps, struct mv88e6xxx_vtu_stu_entry *entry, unsigned int nibble_offset) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u16 regs[3]; int i; int ret; for (i = 0; i < 3; ++i) { - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_VTU_DATA_0_3 + i); if (ret < 0) return ret; @@ -1304,7 +1257,7 @@ static int _mv88e6xxx_vtu_stu_data_read(struct dsa_switch *ds, regs[i] = ret; } - for (i = 0; i < ps->num_ports; ++i) { + for (i = 0; i < ps->info->num_ports; ++i) { unsigned int shift = (i % 4) * 4 + nibble_offset; u16 reg = regs[i / 4]; @@ -1314,16 +1267,15 @@ static int _mv88e6xxx_vtu_stu_data_read(struct dsa_switch *ds, return 0; } -static int _mv88e6xxx_vtu_stu_data_write(struct dsa_switch *ds, +static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_priv_state *ps, struct mv88e6xxx_vtu_stu_entry *entry, unsigned int nibble_offset) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u16 regs[3] = { 0 }; int i; int ret; - for (i = 0; i < ps->num_ports; ++i) { + for (i = 0; i < ps->info->num_ports; ++i) { unsigned int shift = (i % 4) * 4 + nibble_offset; u8 data = entry->data[i]; @@ -1331,7 +1283,7 @@ static int _mv88e6xxx_vtu_stu_data_write(struct dsa_switch *ds, } for (i = 0; i < 3; ++i) { - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_DATA_0_3 + i, regs[i]); if (ret < 0) return ret; @@ -1340,27 +1292,27 @@ static int _mv88e6xxx_vtu_stu_data_write(struct dsa_switch *ds, return 0; } -static int _mv88e6xxx_vtu_vid_write(struct dsa_switch *ds, u16 vid) +static int _mv88e6xxx_vtu_vid_write(struct mv88e6xxx_priv_state *ps, u16 vid) { - return _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_VTU_VID, + return _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_VID, vid & GLOBAL_VTU_VID_MASK); } -static int _mv88e6xxx_vtu_getnext(struct dsa_switch *ds, +static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_priv_state *ps, struct mv88e6xxx_vtu_stu_entry *entry) { struct mv88e6xxx_vtu_stu_entry next = { 0 }; int ret; - ret = _mv88e6xxx_vtu_wait(ds); + ret = _mv88e6xxx_vtu_wait(ps); if (ret < 0) return ret; - ret = _mv88e6xxx_vtu_cmd(ds, GLOBAL_VTU_OP_VTU_GET_NEXT); + ret = _mv88e6xxx_vtu_cmd(ps, GLOBAL_VTU_OP_VTU_GET_NEXT); if (ret < 0) return ret; - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_VTU_VID); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_VTU_VID); if (ret < 0) return ret; @@ -1368,22 +1320,22 @@ static int _mv88e6xxx_vtu_getnext(struct dsa_switch *ds, next.valid = !!(ret & GLOBAL_VTU_VID_VALID); if (next.valid) { - ret = _mv88e6xxx_vtu_stu_data_read(ds, &next, 0); + ret = _mv88e6xxx_vtu_stu_data_read(ps, &next, 0); if (ret < 0) return ret; - if (mv88e6xxx_has_fid_reg(ds)) { - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, + if (mv88e6xxx_has_fid_reg(ps)) { + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_VTU_FID); if (ret < 0) return ret; next.fid = ret & GLOBAL_VTU_FID_MASK; - } else if (mv88e6xxx_num_databases(ds) == 256) { + } else if (mv88e6xxx_num_databases(ps) == 256) { /* VTU DBNum[7:4] are located in VTU Operation 11:8, and * VTU DBNum[3:0] are located in VTU Operation 3:0 */ - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_VTU_OP); if (ret < 0) return ret; @@ -1392,8 +1344,8 @@ static int _mv88e6xxx_vtu_getnext(struct dsa_switch *ds, next.fid |= ret & 0xf; } - if (mv88e6xxx_has_stu(ds)) { - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, + if (mv88e6xxx_has_stu(ps)) { + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_VTU_SID); if (ret < 0) return ret; @@ -1417,16 +1369,16 @@ int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, mutex_lock(&ps->smi_mutex); - err = _mv88e6xxx_port_pvid_get(ds, port, &pvid); + err = _mv88e6xxx_port_pvid_get(ps, port, &pvid); if (err) goto unlock; - err = _mv88e6xxx_vtu_vid_write(ds, GLOBAL_VTU_VID_MASK); + err = _mv88e6xxx_vtu_vid_write(ps, GLOBAL_VTU_VID_MASK); if (err) goto unlock; do { - err = _mv88e6xxx_vtu_getnext(ds, &next); + err = _mv88e6xxx_vtu_getnext(ps, &next); if (err) break; @@ -1457,14 +1409,14 @@ unlock: return err; } -static int _mv88e6xxx_vtu_loadpurge(struct dsa_switch *ds, +static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_priv_state *ps, struct mv88e6xxx_vtu_stu_entry *entry) { u16 op = GLOBAL_VTU_OP_VTU_LOAD_PURGE; u16 reg = 0; int ret; - ret = _mv88e6xxx_vtu_wait(ds); + ret = _mv88e6xxx_vtu_wait(ps); if (ret < 0) return ret; @@ -1472,23 +1424,23 @@ static int _mv88e6xxx_vtu_loadpurge(struct dsa_switch *ds, goto loadpurge; /* Write port member tags */ - ret = _mv88e6xxx_vtu_stu_data_write(ds, entry, 0); + ret = _mv88e6xxx_vtu_stu_data_write(ps, entry, 0); if (ret < 0) return ret; - if (mv88e6xxx_has_stu(ds)) { + if (mv88e6xxx_has_stu(ps)) { reg = entry->sid & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_VTU_SID, reg); + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_SID, reg); if (ret < 0) return ret; } - if (mv88e6xxx_has_fid_reg(ds)) { + if (mv88e6xxx_has_fid_reg(ps)) { reg = entry->fid & GLOBAL_VTU_FID_MASK; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_VTU_FID, reg); + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_FID, reg); if (ret < 0) return ret; - } else if (mv88e6xxx_num_databases(ds) == 256) { + } else if (mv88e6xxx_num_databases(ps) == 256) { /* VTU DBNum[7:4] are located in VTU Operation 11:8, and * VTU DBNum[3:0] are located in VTU Operation 3:0 */ @@ -1499,46 +1451,46 @@ static int _mv88e6xxx_vtu_loadpurge(struct dsa_switch *ds, reg = GLOBAL_VTU_VID_VALID; loadpurge: reg |= entry->vid & GLOBAL_VTU_VID_MASK; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_VTU_VID, reg); + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_VID, reg); if (ret < 0) return ret; - return _mv88e6xxx_vtu_cmd(ds, op); + return _mv88e6xxx_vtu_cmd(ps, op); } -static int _mv88e6xxx_stu_getnext(struct dsa_switch *ds, u8 sid, +static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_priv_state *ps, u8 sid, struct mv88e6xxx_vtu_stu_entry *entry) { struct mv88e6xxx_vtu_stu_entry next = { 0 }; int ret; - ret = _mv88e6xxx_vtu_wait(ds); + ret = _mv88e6xxx_vtu_wait(ps); if (ret < 0) return ret; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_VTU_SID, + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_SID, sid & GLOBAL_VTU_SID_MASK); if (ret < 0) return ret; - ret = _mv88e6xxx_vtu_cmd(ds, GLOBAL_VTU_OP_STU_GET_NEXT); + ret = _mv88e6xxx_vtu_cmd(ps, GLOBAL_VTU_OP_STU_GET_NEXT); if (ret < 0) return ret; - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_VTU_SID); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_VTU_SID); if (ret < 0) return ret; next.sid = ret & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_VTU_VID); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_VTU_VID); if (ret < 0) return ret; next.valid = !!(ret & GLOBAL_VTU_VID_VALID); if (next.valid) { - ret = _mv88e6xxx_vtu_stu_data_read(ds, &next, 2); + ret = _mv88e6xxx_vtu_stu_data_read(ps, &next, 2); if (ret < 0) return ret; } @@ -1547,13 +1499,13 @@ static int _mv88e6xxx_stu_getnext(struct dsa_switch *ds, u8 sid, return 0; } -static int _mv88e6xxx_stu_loadpurge(struct dsa_switch *ds, +static int _mv88e6xxx_stu_loadpurge(struct mv88e6xxx_priv_state *ps, struct mv88e6xxx_vtu_stu_entry *entry) { u16 reg = 0; int ret; - ret = _mv88e6xxx_vtu_wait(ds); + ret = _mv88e6xxx_vtu_wait(ps); if (ret < 0) return ret; @@ -1561,40 +1513,41 @@ static int _mv88e6xxx_stu_loadpurge(struct dsa_switch *ds, goto loadpurge; /* Write port states */ - ret = _mv88e6xxx_vtu_stu_data_write(ds, entry, 2); + ret = _mv88e6xxx_vtu_stu_data_write(ps, entry, 2); if (ret < 0) return ret; reg = GLOBAL_VTU_VID_VALID; loadpurge: - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_VTU_VID, reg); + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_VID, reg); if (ret < 0) return ret; reg = entry->sid & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_VTU_SID, reg); + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_SID, reg); if (ret < 0) return ret; - return _mv88e6xxx_vtu_cmd(ds, GLOBAL_VTU_OP_STU_LOAD_PURGE); + return _mv88e6xxx_vtu_cmd(ps, GLOBAL_VTU_OP_STU_LOAD_PURGE); } -static int _mv88e6xxx_port_fid(struct dsa_switch *ds, int port, u16 *new, - u16 *old) +static int _mv88e6xxx_port_fid(struct mv88e6xxx_priv_state *ps, int port, + u16 *new, u16 *old) { + struct dsa_switch *ds = ps->ds; u16 upper_mask; u16 fid; int ret; - if (mv88e6xxx_num_databases(ds) == 4096) + if (mv88e6xxx_num_databases(ps) == 4096) upper_mask = 0xff; - else if (mv88e6xxx_num_databases(ds) == 256) + else if (mv88e6xxx_num_databases(ps) == 256) upper_mask = 0xf; else return -EOPNOTSUPP; /* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */ - ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_BASE_VLAN); + ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_BASE_VLAN); if (ret < 0) return ret; @@ -1604,14 +1557,14 @@ static int _mv88e6xxx_port_fid(struct dsa_switch *ds, int port, u16 *new, ret &= ~PORT_BASE_VLAN_FID_3_0_MASK; ret |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_BASE_VLAN, + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_BASE_VLAN, ret); if (ret < 0) return ret; } /* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */ - ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_CONTROL_1); + ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_CONTROL_1); if (ret < 0) return ret; @@ -1621,7 +1574,7 @@ static int _mv88e6xxx_port_fid(struct dsa_switch *ds, int port, u16 *new, ret &= ~upper_mask; ret |= (*new >> 4) & upper_mask; - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_1, + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL_1, ret); if (ret < 0) return ret; @@ -1635,19 +1588,20 @@ static int _mv88e6xxx_port_fid(struct dsa_switch *ds, int port, u16 *new, return 0; } -static int _mv88e6xxx_port_fid_get(struct dsa_switch *ds, int port, u16 *fid) +static int _mv88e6xxx_port_fid_get(struct mv88e6xxx_priv_state *ps, + int port, u16 *fid) { - return _mv88e6xxx_port_fid(ds, port, NULL, fid); + return _mv88e6xxx_port_fid(ps, port, NULL, fid); } -static int _mv88e6xxx_port_fid_set(struct dsa_switch *ds, int port, u16 fid) +static int _mv88e6xxx_port_fid_set(struct mv88e6xxx_priv_state *ps, + int port, u16 fid) { - return _mv88e6xxx_port_fid(ds, port, &fid, NULL); + return _mv88e6xxx_port_fid(ps, port, &fid, NULL); } -static int _mv88e6xxx_fid_new(struct dsa_switch *ds, u16 *fid) +static int _mv88e6xxx_fid_new(struct mv88e6xxx_priv_state *ps, u16 *fid) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID); struct mv88e6xxx_vtu_stu_entry vlan; int i, err; @@ -1655,8 +1609,8 @@ static int _mv88e6xxx_fid_new(struct dsa_switch *ds, u16 *fid) bitmap_zero(fid_bitmap, MV88E6XXX_N_FID); /* Set every FID bit used by the (un)bridged ports */ - for (i = 0; i < ps->num_ports; ++i) { - err = _mv88e6xxx_port_fid_get(ds, i, fid); + for (i = 0; i < ps->info->num_ports; ++i) { + err = _mv88e6xxx_port_fid_get(ps, i, fid); if (err) return err; @@ -1664,12 +1618,12 @@ static int _mv88e6xxx_fid_new(struct dsa_switch *ds, u16 *fid) } /* Set every FID bit used by the VLAN entries */ - err = _mv88e6xxx_vtu_vid_write(ds, GLOBAL_VTU_VID_MASK); + err = _mv88e6xxx_vtu_vid_write(ps, GLOBAL_VTU_VID_MASK); if (err) return err; do { - err = _mv88e6xxx_vtu_getnext(ds, &vlan); + err = _mv88e6xxx_vtu_getnext(ps, &vlan); if (err) return err; @@ -1683,35 +1637,35 @@ static int _mv88e6xxx_fid_new(struct dsa_switch *ds, u16 *fid) * databases are not needed. Return the next positive available. */ *fid = find_next_zero_bit(fid_bitmap, MV88E6XXX_N_FID, 1); - if (unlikely(*fid >= mv88e6xxx_num_databases(ds))) + if (unlikely(*fid >= mv88e6xxx_num_databases(ps))) return -ENOSPC; /* Clear the database */ - return _mv88e6xxx_atu_flush(ds, *fid, true); + return _mv88e6xxx_atu_flush(ps, *fid, true); } -static int _mv88e6xxx_vtu_new(struct dsa_switch *ds, u16 vid, +static int _mv88e6xxx_vtu_new(struct mv88e6xxx_priv_state *ps, u16 vid, struct mv88e6xxx_vtu_stu_entry *entry) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + struct dsa_switch *ds = ps->ds; struct mv88e6xxx_vtu_stu_entry vlan = { .valid = true, .vid = vid, }; int i, err; - err = _mv88e6xxx_fid_new(ds, &vlan.fid); + err = _mv88e6xxx_fid_new(ps, &vlan.fid); if (err) return err; /* exclude all ports except the CPU and DSA ports */ - for (i = 0; i < ps->num_ports; ++i) + for (i = 0; i < ps->info->num_ports; ++i) vlan.data[i] = dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i) ? GLOBAL_VTU_DATA_MEMBER_TAG_UNMODIFIED : GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER; - if (mv88e6xxx_6097_family(ds) || mv88e6xxx_6165_family(ds) || - mv88e6xxx_6351_family(ds) || mv88e6xxx_6352_family(ds)) { + if (mv88e6xxx_6097_family(ps) || mv88e6xxx_6165_family(ps) || + mv88e6xxx_6351_family(ps) || mv88e6xxx_6352_family(ps)) { struct mv88e6xxx_vtu_stu_entry vstp; /* Adding a VTU entry requires a valid STU entry. As VSTP is not @@ -1719,7 +1673,7 @@ static int _mv88e6xxx_vtu_new(struct dsa_switch *ds, u16 vid, * entries. Thus, validate the SID 0. */ vlan.sid = 0; - err = _mv88e6xxx_stu_getnext(ds, GLOBAL_VTU_SID_MASK, &vstp); + err = _mv88e6xxx_stu_getnext(ps, GLOBAL_VTU_SID_MASK, &vstp); if (err) return err; @@ -1728,7 +1682,7 @@ static int _mv88e6xxx_vtu_new(struct dsa_switch *ds, u16 vid, vstp.valid = true; vstp.sid = vlan.sid; - err = _mv88e6xxx_stu_loadpurge(ds, &vstp); + err = _mv88e6xxx_stu_loadpurge(ps, &vstp); if (err) return err; } @@ -1738,7 +1692,7 @@ static int _mv88e6xxx_vtu_new(struct dsa_switch *ds, u16 vid, return 0; } -static int _mv88e6xxx_vtu_get(struct dsa_switch *ds, u16 vid, +static int _mv88e6xxx_vtu_get(struct mv88e6xxx_priv_state *ps, u16 vid, struct mv88e6xxx_vtu_stu_entry *entry, bool creat) { int err; @@ -1746,11 +1700,11 @@ static int _mv88e6xxx_vtu_get(struct dsa_switch *ds, u16 vid, if (!vid) return -EINVAL; - err = _mv88e6xxx_vtu_vid_write(ds, vid - 1); + err = _mv88e6xxx_vtu_vid_write(ps, vid - 1); if (err) return err; - err = _mv88e6xxx_vtu_getnext(ds, entry); + err = _mv88e6xxx_vtu_getnext(ps, entry); if (err) return err; @@ -1761,7 +1715,7 @@ static int _mv88e6xxx_vtu_get(struct dsa_switch *ds, u16 vid, * -EOPNOTSUPP to inform bridge about an eventual software VLAN. */ - err = _mv88e6xxx_vtu_new(ds, vid, entry); + err = _mv88e6xxx_vtu_new(ps, vid, entry); } return err; @@ -1779,12 +1733,12 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, mutex_lock(&ps->smi_mutex); - err = _mv88e6xxx_vtu_vid_write(ds, vid_begin - 1); + err = _mv88e6xxx_vtu_vid_write(ps, vid_begin - 1); if (err) goto unlock; do { - err = _mv88e6xxx_vtu_getnext(ds, &vlan); + err = _mv88e6xxx_vtu_getnext(ps, &vlan); if (err) goto unlock; @@ -1794,7 +1748,7 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, if (vlan.vid > vid_end) break; - for (i = 0; i < ps->num_ports; ++i) { + for (i = 0; i < ps->info->num_ports; ++i) { if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i)) continue; @@ -1838,7 +1792,7 @@ int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_CONTROL_2); + ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_CONTROL_2); if (ret < 0) goto unlock; @@ -1848,7 +1802,7 @@ int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, ret &= ~PORT_CONTROL_2_8021Q_MASK; ret |= new & PORT_CONTROL_2_8021Q_MASK; - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_2, + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL_2, ret); if (ret < 0) goto unlock; @@ -1885,13 +1839,13 @@ int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, return 0; } -static int _mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, u16 vid, - bool untagged) +static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_priv_state *ps, int port, + u16 vid, bool untagged) { struct mv88e6xxx_vtu_stu_entry vlan; int err; - err = _mv88e6xxx_vtu_get(ds, vid, &vlan, true); + err = _mv88e6xxx_vtu_get(ps, vid, &vlan, true); if (err) return err; @@ -1899,7 +1853,7 @@ static int _mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, u16 vid, GLOBAL_VTU_DATA_MEMBER_TAG_UNTAGGED : GLOBAL_VTU_DATA_MEMBER_TAG_TAGGED; - return _mv88e6xxx_vtu_loadpurge(ds, &vlan); + return _mv88e6xxx_vtu_loadpurge(ps, &vlan); } void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, @@ -1914,24 +1868,25 @@ void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, mutex_lock(&ps->smi_mutex); for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) - if (_mv88e6xxx_port_vlan_add(ds, port, vid, untagged)) + if (_mv88e6xxx_port_vlan_add(ps, port, vid, untagged)) netdev_err(ds->ports[port], "failed to add VLAN %d%c\n", vid, untagged ? 'u' : 't'); - if (pvid && _mv88e6xxx_port_pvid_set(ds, port, vlan->vid_end)) + if (pvid && _mv88e6xxx_port_pvid_set(ps, port, vlan->vid_end)) netdev_err(ds->ports[port], "failed to set PVID %d\n", vlan->vid_end); mutex_unlock(&ps->smi_mutex); } -static int _mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, u16 vid) +static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_priv_state *ps, + int port, u16 vid) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + struct dsa_switch *ds = ps->ds; struct mv88e6xxx_vtu_stu_entry vlan; int i, err; - err = _mv88e6xxx_vtu_get(ds, vid, &vlan, false); + err = _mv88e6xxx_vtu_get(ps, vid, &vlan, false); if (err) return err; @@ -1943,7 +1898,7 @@ static int _mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, u16 vid) /* keep the VLAN unless all ports are excluded */ vlan.valid = false; - for (i = 0; i < ps->num_ports; ++i) { + for (i = 0; i < ps->info->num_ports; ++i) { if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i)) continue; @@ -1953,11 +1908,11 @@ static int _mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, u16 vid) } } - err = _mv88e6xxx_vtu_loadpurge(ds, &vlan); + err = _mv88e6xxx_vtu_loadpurge(ps, &vlan); if (err) return err; - return _mv88e6xxx_atu_remove(ds, vlan.fid, port, false); + return _mv88e6xxx_atu_remove(ps, vlan.fid, port, false); } int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, @@ -1969,17 +1924,17 @@ int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, mutex_lock(&ps->smi_mutex); - err = _mv88e6xxx_port_pvid_get(ds, port, &pvid); + err = _mv88e6xxx_port_pvid_get(ps, port, &pvid); if (err) goto unlock; for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - err = _mv88e6xxx_port_vlan_del(ds, port, vid); + err = _mv88e6xxx_port_vlan_del(ps, port, vid); if (err) goto unlock; if (vid == pvid) { - err = _mv88e6xxx_port_pvid_set(ds, port, 0); + err = _mv88e6xxx_port_pvid_set(ps, port, 0); if (err) goto unlock; } @@ -1991,14 +1946,14 @@ unlock: return err; } -static int _mv88e6xxx_atu_mac_write(struct dsa_switch *ds, +static int _mv88e6xxx_atu_mac_write(struct mv88e6xxx_priv_state *ps, const unsigned char *addr) { int i, ret; for (i = 0; i < 3; i++) { ret = _mv88e6xxx_reg_write( - ds, REG_GLOBAL, GLOBAL_ATU_MAC_01 + i, + ps, REG_GLOBAL, GLOBAL_ATU_MAC_01 + i, (addr[i * 2] << 8) | addr[i * 2 + 1]); if (ret < 0) return ret; @@ -2007,12 +1962,13 @@ static int _mv88e6xxx_atu_mac_write(struct dsa_switch *ds, return 0; } -static int _mv88e6xxx_atu_mac_read(struct dsa_switch *ds, unsigned char *addr) +static int _mv88e6xxx_atu_mac_read(struct mv88e6xxx_priv_state *ps, + unsigned char *addr) { int i, ret; for (i = 0; i < 3; i++) { - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_ATU_MAC_01 + i); if (ret < 0) return ret; @@ -2023,27 +1979,27 @@ static int _mv88e6xxx_atu_mac_read(struct dsa_switch *ds, unsigned char *addr) return 0; } -static int _mv88e6xxx_atu_load(struct dsa_switch *ds, +static int _mv88e6xxx_atu_load(struct mv88e6xxx_priv_state *ps, struct mv88e6xxx_atu_entry *entry) { int ret; - ret = _mv88e6xxx_atu_wait(ds); + ret = _mv88e6xxx_atu_wait(ps); if (ret < 0) return ret; - ret = _mv88e6xxx_atu_mac_write(ds, entry->mac); + ret = _mv88e6xxx_atu_mac_write(ps, entry->mac); if (ret < 0) return ret; - ret = _mv88e6xxx_atu_data_write(ds, entry); + ret = _mv88e6xxx_atu_data_write(ps, entry); if (ret < 0) return ret; - return _mv88e6xxx_atu_cmd(ds, entry->fid, GLOBAL_ATU_OP_LOAD_DB); + return _mv88e6xxx_atu_cmd(ps, entry->fid, GLOBAL_ATU_OP_LOAD_DB); } -static int _mv88e6xxx_port_fdb_load(struct dsa_switch *ds, int port, +static int _mv88e6xxx_port_fdb_load(struct mv88e6xxx_priv_state *ps, int port, const unsigned char *addr, u16 vid, u8 state) { @@ -2053,9 +2009,9 @@ static int _mv88e6xxx_port_fdb_load(struct dsa_switch *ds, int port, /* Null VLAN ID corresponds to the port private database */ if (vid == 0) - err = _mv88e6xxx_port_fid_get(ds, port, &vlan.fid); + err = _mv88e6xxx_port_fid_get(ps, port, &vlan.fid); else - err = _mv88e6xxx_vtu_get(ds, vid, &vlan, false); + err = _mv88e6xxx_vtu_get(ps, vid, &vlan, false); if (err) return err; @@ -2067,7 +2023,7 @@ static int _mv88e6xxx_port_fdb_load(struct dsa_switch *ds, int port, entry.portv_trunkid = BIT(port); } - return _mv88e6xxx_atu_load(ds, &entry); + return _mv88e6xxx_atu_load(ps, &entry); } int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port, @@ -2090,7 +2046,7 @@ void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); mutex_lock(&ps->smi_mutex); - if (_mv88e6xxx_port_fdb_load(ds, port, fdb->addr, fdb->vid, state)) + if (_mv88e6xxx_port_fdb_load(ps, port, fdb->addr, fdb->vid, state)) netdev_err(ds->ports[port], "failed to load MAC address\n"); mutex_unlock(&ps->smi_mutex); } @@ -2102,14 +2058,14 @@ int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, int ret; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_port_fdb_load(ds, port, fdb->addr, fdb->vid, + ret = _mv88e6xxx_port_fdb_load(ps, port, fdb->addr, fdb->vid, GLOBAL_ATU_DATA_STATE_UNUSED); mutex_unlock(&ps->smi_mutex); return ret; } -static int _mv88e6xxx_atu_getnext(struct dsa_switch *ds, u16 fid, +static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_priv_state *ps, u16 fid, struct mv88e6xxx_atu_entry *entry) { struct mv88e6xxx_atu_entry next = { 0 }; @@ -2117,19 +2073,19 @@ static int _mv88e6xxx_atu_getnext(struct dsa_switch *ds, u16 fid, next.fid = fid; - ret = _mv88e6xxx_atu_wait(ds); + ret = _mv88e6xxx_atu_wait(ps); if (ret < 0) return ret; - ret = _mv88e6xxx_atu_cmd(ds, fid, GLOBAL_ATU_OP_GET_NEXT_DB); + ret = _mv88e6xxx_atu_cmd(ps, fid, GLOBAL_ATU_OP_GET_NEXT_DB); if (ret < 0) return ret; - ret = _mv88e6xxx_atu_mac_read(ds, next.mac); + ret = _mv88e6xxx_atu_mac_read(ps, next.mac); if (ret < 0) return ret; - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_ATU_DATA); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_ATU_DATA); if (ret < 0) return ret; @@ -2154,8 +2110,8 @@ static int _mv88e6xxx_atu_getnext(struct dsa_switch *ds, u16 fid, return 0; } -static int _mv88e6xxx_port_fdb_dump_one(struct dsa_switch *ds, u16 fid, u16 vid, - int port, +static int _mv88e6xxx_port_fdb_dump_one(struct mv88e6xxx_priv_state *ps, + u16 fid, u16 vid, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)) { @@ -2164,12 +2120,12 @@ static int _mv88e6xxx_port_fdb_dump_one(struct dsa_switch *ds, u16 fid, u16 vid, }; int err; - err = _mv88e6xxx_atu_mac_write(ds, addr.mac); + err = _mv88e6xxx_atu_mac_write(ps, addr.mac); if (err) return err; do { - err = _mv88e6xxx_atu_getnext(ds, fid, &addr); + err = _mv88e6xxx_atu_getnext(ps, fid, &addr); if (err) break; @@ -2209,28 +2165,28 @@ int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, mutex_lock(&ps->smi_mutex); /* Dump port's default Filtering Information Database (VLAN ID 0) */ - err = _mv88e6xxx_port_fid_get(ds, port, &fid); + err = _mv88e6xxx_port_fid_get(ps, port, &fid); if (err) goto unlock; - err = _mv88e6xxx_port_fdb_dump_one(ds, fid, 0, port, fdb, cb); + err = _mv88e6xxx_port_fdb_dump_one(ps, fid, 0, port, fdb, cb); if (err) goto unlock; /* Dump VLANs' Filtering Information Databases */ - err = _mv88e6xxx_vtu_vid_write(ds, vlan.vid); + err = _mv88e6xxx_vtu_vid_write(ps, vlan.vid); if (err) goto unlock; do { - err = _mv88e6xxx_vtu_getnext(ds, &vlan); + err = _mv88e6xxx_vtu_getnext(ps, &vlan); if (err) break; if (!vlan.valid) break; - err = _mv88e6xxx_port_fdb_dump_one(ds, vlan.fid, vlan.vid, port, + err = _mv88e6xxx_port_fdb_dump_one(ps, vlan.fid, vlan.vid, port, fdb, cb); if (err) break; @@ -2246,39 +2202,21 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *bridge) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - u16 fid; int i, err; mutex_lock(&ps->smi_mutex); - /* Get or create the bridge FID and assign it to the port */ - for (i = 0; i < ps->num_ports; ++i) - if (ps->ports[i].bridge_dev == bridge) - break; - - if (i < ps->num_ports) - err = _mv88e6xxx_port_fid_get(ds, i, &fid); - else - err = _mv88e6xxx_fid_new(ds, &fid); - if (err) - goto unlock; - - err = _mv88e6xxx_port_fid_set(ds, port, fid); - if (err) - goto unlock; - /* Assign the bridge and remap each port's VLANTable */ ps->ports[port].bridge_dev = bridge; - for (i = 0; i < ps->num_ports; ++i) { + for (i = 0; i < ps->info->num_ports; ++i) { if (ps->ports[i].bridge_dev == bridge) { - err = _mv88e6xxx_port_based_vlan_map(ds, i); + err = _mv88e6xxx_port_based_vlan_map(ps, i); if (err) break; } } -unlock: mutex_unlock(&ps->smi_mutex); return err; @@ -2288,22 +2226,16 @@ void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct net_device *bridge = ps->ports[port].bridge_dev; - u16 fid; int i; mutex_lock(&ps->smi_mutex); - /* Give the port a fresh Filtering Information Database */ - if (_mv88e6xxx_fid_new(ds, &fid) || - _mv88e6xxx_port_fid_set(ds, port, fid)) - netdev_warn(ds->ports[port], "failed to assign a new FID\n"); - /* Unassign the bridge and remap each port's VLANTable */ ps->ports[port].bridge_dev = NULL; - for (i = 0; i < ps->num_ports; ++i) + for (i = 0; i < ps->info->num_ports; ++i) if (i == port || ps->ports[i].bridge_dev == bridge) - if (_mv88e6xxx_port_based_vlan_map(ds, i)) + if (_mv88e6xxx_port_based_vlan_map(ps, i)) netdev_warn(ds->ports[i], "failed to remap\n"); mutex_unlock(&ps->smi_mutex); @@ -2320,59 +2252,60 @@ static void mv88e6xxx_bridge_work(struct work_struct *work) mutex_lock(&ps->smi_mutex); - for (port = 0; port < ps->num_ports; ++port) + for (port = 0; port < ps->info->num_ports; ++port) if (test_and_clear_bit(port, ps->port_state_update_mask) && - _mv88e6xxx_port_state(ds, port, ps->ports[port].state)) - netdev_warn(ds->ports[port], "failed to update state to %s\n", + _mv88e6xxx_port_state(ps, port, ps->ports[port].state)) + netdev_warn(ds->ports[port], + "failed to update state to %s\n", mv88e6xxx_port_state_names[ps->ports[port].state]); mutex_unlock(&ps->smi_mutex); } -static int _mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, - int reg, int val) +static int _mv88e6xxx_phy_page_write(struct mv88e6xxx_priv_state *ps, + int port, int page, int reg, int val) { int ret; - ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page); + ret = _mv88e6xxx_phy_write_indirect(ps, port, 0x16, page); if (ret < 0) goto restore_page_0; - ret = _mv88e6xxx_phy_write_indirect(ds, port, reg, val); + ret = _mv88e6xxx_phy_write_indirect(ps, port, reg, val); restore_page_0: - _mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0); + _mv88e6xxx_phy_write_indirect(ps, port, 0x16, 0x0); return ret; } -static int _mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, - int reg) +static int _mv88e6xxx_phy_page_read(struct mv88e6xxx_priv_state *ps, + int port, int page, int reg) { int ret; - ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page); + ret = _mv88e6xxx_phy_write_indirect(ps, port, 0x16, page); if (ret < 0) goto restore_page_0; - ret = _mv88e6xxx_phy_read_indirect(ds, port, reg); + ret = _mv88e6xxx_phy_read_indirect(ps, port, reg); restore_page_0: - _mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0); + _mv88e6xxx_phy_write_indirect(ps, port, 0x16, 0x0); return ret; } -static int mv88e6xxx_power_on_serdes(struct dsa_switch *ds) +static int mv88e6xxx_power_on_serdes(struct mv88e6xxx_priv_state *ps) { int ret; - ret = _mv88e6xxx_phy_page_read(ds, REG_FIBER_SERDES, PAGE_FIBER_SERDES, + ret = _mv88e6xxx_phy_page_read(ps, REG_FIBER_SERDES, PAGE_FIBER_SERDES, MII_BMCR); if (ret < 0) return ret; if (ret & BMCR_PDOWN) { ret &= ~BMCR_PDOWN; - ret = _mv88e6xxx_phy_page_write(ds, REG_FIBER_SERDES, + ret = _mv88e6xxx_phy_page_write(ps, REG_FIBER_SERDES, PAGE_FIBER_SERDES, MII_BMCR, ret); } @@ -2388,24 +2321,24 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) mutex_lock(&ps->smi_mutex); - if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) || - mv88e6xxx_6065_family(ds) || mv88e6xxx_6320_family(ds)) { + if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || + mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || + mv88e6xxx_6185_family(ps) || mv88e6xxx_6095_family(ps) || + mv88e6xxx_6065_family(ps) || mv88e6xxx_6320_family(ps)) { /* MAC Forcing register: don't force link, speed, * duplex or flow control state to any particular * values on physical ports, but force the CPU port * and all DSA ports to their maximum bandwidth and * full duplex. */ - reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL); + reg = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_PCS_CTRL); if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { reg &= ~PORT_PCS_CTRL_UNFORCED; reg |= PORT_PCS_CTRL_FORCE_LINK | PORT_PCS_CTRL_LINK_UP | PORT_PCS_CTRL_DUPLEX_FULL | PORT_PCS_CTRL_FORCE_DUPLEX; - if (mv88e6xxx_6065_family(ds)) + if (mv88e6xxx_6065_family(ps)) reg |= PORT_PCS_CTRL_100; else reg |= PORT_PCS_CTRL_1000; @@ -2413,7 +2346,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) reg |= PORT_PCS_CTRL_UNFORCED; } - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_PCS_CTRL, reg); if (ret) goto abort; @@ -2434,19 +2367,19 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) * forwarding of unknown unicasts and multicasts. */ reg = 0; - if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) || - mv88e6xxx_6185_family(ds) || mv88e6xxx_6320_family(ds)) + if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || + mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || + mv88e6xxx_6095_family(ps) || mv88e6xxx_6065_family(ps) || + mv88e6xxx_6185_family(ps) || mv88e6xxx_6320_family(ps)) reg = PORT_CONTROL_IGMP_MLD_SNOOP | PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP | PORT_CONTROL_STATE_FORWARDING; if (dsa_is_cpu_port(ds, port)) { - if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) + if (mv88e6xxx_6095_family(ps) || mv88e6xxx_6185_family(ps)) reg |= PORT_CONTROL_DSA_TAG; - if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6320_family(ds)) { + if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || + mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || + mv88e6xxx_6320_family(ps)) { if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA; else @@ -2455,20 +2388,20 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) PORT_CONTROL_FORWARD_UNKNOWN_MC; } - if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) || - mv88e6xxx_6185_family(ds) || mv88e6xxx_6320_family(ds)) { + if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || + mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || + mv88e6xxx_6095_family(ps) || mv88e6xxx_6065_family(ps) || + mv88e6xxx_6185_family(ps) || mv88e6xxx_6320_family(ps)) { if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) reg |= PORT_CONTROL_EGRESS_ADD_TAG; } } if (dsa_is_dsa_port(ds, port)) { - if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) + if (mv88e6xxx_6095_family(ps) || mv88e6xxx_6185_family(ps)) reg |= PORT_CONTROL_DSA_TAG; - if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6320_family(ds)) { + if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || + mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || + mv88e6xxx_6320_family(ps)) { reg |= PORT_CONTROL_FRAME_MODE_DSA; } @@ -2477,7 +2410,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) PORT_CONTROL_FORWARD_UNKNOWN_MC; } if (reg) { - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL, reg); if (ret) goto abort; @@ -2486,15 +2419,15 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) /* If this port is connected to a SerDes, make sure the SerDes is not * powered down. */ - if (mv88e6xxx_6352_family(ds)) { - ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS); + if (mv88e6xxx_6352_family(ps)) { + ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_STATUS); if (ret < 0) goto abort; ret &= PORT_STATUS_CMODE_MASK; if ((ret == PORT_STATUS_CMODE_100BASE_X) || (ret == PORT_STATUS_CMODE_1000BASE_X) || (ret == PORT_STATUS_CMODE_SGMII)) { - ret = mv88e6xxx_power_on_serdes(ds); + ret = mv88e6xxx_power_on_serdes(ps); if (ret < 0) goto abort; } @@ -2507,17 +2440,17 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) * copy of all transmitted/received frames on this port to the CPU. */ reg = 0; - if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6095_family(ds) || mv88e6xxx_6320_family(ds) || - mv88e6xxx_6185_family(ds)) + if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || + mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || + mv88e6xxx_6095_family(ps) || mv88e6xxx_6320_family(ps) || + mv88e6xxx_6185_family(ps)) reg = PORT_CONTROL_2_MAP_DA; - if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6320_family(ds)) + if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || + mv88e6xxx_6165_family(ps) || mv88e6xxx_6320_family(ps)) reg |= PORT_CONTROL_2_JUMBO_10240; - if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) { + if (mv88e6xxx_6095_family(ps) || mv88e6xxx_6185_family(ps)) { /* Set the upstream port this port should use */ reg |= dsa_upstream_port(ds); /* enable forwarding of unknown multicast addresses to @@ -2530,7 +2463,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) reg |= PORT_CONTROL_2_8021Q_DISABLED; if (reg) { - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL_2, reg); if (ret) goto abort; @@ -2542,28 +2475,28 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) * the other bits clear. */ reg = 1 << port; - /* Disable learning for DSA and CPU ports */ - if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) - reg = PORT_ASSOC_VECTOR_LOCKED_PORT; + /* Disable learning for CPU port */ + if (dsa_is_cpu_port(ds, port)) + reg = 0; - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_ASSOC_VECTOR, reg); + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_ASSOC_VECTOR, reg); if (ret) goto abort; /* Egress rate control 2: disable egress rate control. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_RATE_CONTROL_2, + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_RATE_CONTROL_2, 0x0000); if (ret) goto abort; - if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6320_family(ds)) { + if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || + mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || + mv88e6xxx_6320_family(ps)) { /* Do not limit the period of time that this port can * be paused for by the remote end or the period of * time that this port can pause the remote end. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_PAUSE_CTRL, 0x0000); if (ret) goto abort; @@ -2572,12 +2505,12 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) * address database entries that this port is allowed * to use. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_ATU_CONTROL, 0x0000); /* Priority Override: disable DA, SA and VTU priority * override. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_PRI_OVERRIDE, 0x0000); if (ret) goto abort; @@ -2585,14 +2518,14 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) /* Port Ethertype: use the Ethertype DSA Ethertype * value. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_ETH_TYPE, ETH_P_EDSA); if (ret) goto abort; /* Tag Remap: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_TAG_REGMAP_0123, 0x3210); if (ret) goto abort; @@ -2600,18 +2533,18 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) /* Tag Remap 2: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_TAG_REGMAP_4567, 0x7654); if (ret) goto abort; } - if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) || - mv88e6xxx_6320_family(ds)) { + if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || + mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || + mv88e6xxx_6185_family(ps) || mv88e6xxx_6095_family(ps) || + mv88e6xxx_6320_family(ps)) { /* Rate Control: disable ingress rate limiting. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_RATE_CONTROL, 0x0001); if (ret) goto abort; @@ -2620,26 +2553,26 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) /* Port Control 1: disable trunking, disable sending * learning messages to this port. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_1, 0x0000); + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL_1, 0x0000); if (ret) goto abort; - /* Port based VLAN map: give each port its own address + /* Port based VLAN map: give each port the same default address * database, and allow bidirectional communication between the * CPU and DSA port(s), and the other ports. */ - ret = _mv88e6xxx_port_fid_set(ds, port, port + 1); + ret = _mv88e6xxx_port_fid_set(ps, port, 0); if (ret) goto abort; - ret = _mv88e6xxx_port_based_vlan_map(ds, port); + ret = _mv88e6xxx_port_based_vlan_map(ps, port); if (ret) goto abort; /* Default VLAN ID and priority: don't set a default VLAN * ID, and set the default packet priority to zero. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_DEFAULT_VLAN, + ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_DEFAULT_VLAN, 0x0000); abort: mutex_unlock(&ps->smi_mutex); @@ -2652,7 +2585,7 @@ int mv88e6xxx_setup_ports(struct dsa_switch *ds) int ret; int i; - for (i = 0; i < ps->num_ports; i++) { + for (i = 0; i < ps->info->num_ports; i++) { ret = mv88e6xxx_setup_port(ds, i); if (ret < 0) return ret; @@ -2660,15 +2593,10 @@ int mv88e6xxx_setup_ports(struct dsa_switch *ds) return 0; } -int mv88e6xxx_setup_common(struct dsa_switch *ds) +int mv88e6xxx_setup_common(struct mv88e6xxx_priv_state *ps) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - ps->ds = ds; mutex_init(&ps->smi_mutex); - ps->id = REG_READ(REG_PORT(0), PORT_SWITCH_ID) & 0xfff0; - INIT_WORK(&ps->bridge_work, mv88e6xxx_bridge_work); return 0; @@ -2677,42 +2605,67 @@ int mv88e6xxx_setup_common(struct dsa_switch *ds) int mv88e6xxx_setup_global(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int ret; + int err; int i; + mutex_lock(&ps->smi_mutex); /* Set the default address aging time to 5 minutes, and * enable address learn messages to be sent to all message * ports. */ - REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL, - 0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL); + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_ATU_CONTROL, + 0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL); + if (err) + goto unlock; /* Configure the IP ToS mapping registers. */ - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff); + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000); + if (err) + goto unlock; + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000); + if (err) + goto unlock; + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555); + if (err) + goto unlock; + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555); + if (err) + goto unlock; + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa); + if (err) + goto unlock; + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa); + if (err) + goto unlock; + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff); + if (err) + goto unlock; + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff); + if (err) + goto unlock; /* Configure the IEEE 802.1p priority mapping register. */ - REG_WRITE(REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41); + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41); + if (err) + goto unlock; /* Send all frames with destination addresses matching * 01:80:c2:00:00:0x to the CPU port. */ - REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff); + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff); + if (err) + goto unlock; /* Ignore removed tag data on doubly tagged packets, disable * flow control messages, force flow control priority to the * highest, and send all special multicast frames to the CPU * port at the highest priority. */ - REG_WRITE(REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, - 0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 | - GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI); + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, + 0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 | + GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI); + if (err) + goto unlock; /* Program the DSA routing table. */ for (i = 0; i < 32; i++) { @@ -2722,91 +2675,126 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) i != ds->index && i < ds->dst->pd->nr_chips) nexthop = ds->pd->rtable[i] & 0x1f; - REG_WRITE(REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, - GLOBAL2_DEVICE_MAPPING_UPDATE | - (i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) | - nexthop); + err = _mv88e6xxx_reg_write( + ps, REG_GLOBAL2, + GLOBAL2_DEVICE_MAPPING, + GLOBAL2_DEVICE_MAPPING_UPDATE | + (i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) | nexthop); + if (err) + goto unlock; } /* Clear all trunk masks. */ - for (i = 0; i < 8; i++) - REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MASK, - 0x8000 | (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) | - ((1 << ps->num_ports) - 1)); + for (i = 0; i < 8; i++) { + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, + 0x8000 | + (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) | + ((1 << ps->info->num_ports) - 1)); + if (err) + goto unlock; + } /* Clear all trunk mappings. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, - GLOBAL2_TRUNK_MAPPING_UPDATE | - (i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT)); - - if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6320_family(ds)) { + for (i = 0; i < 16; i++) { + err = _mv88e6xxx_reg_write( + ps, REG_GLOBAL2, + GLOBAL2_TRUNK_MAPPING, + GLOBAL2_TRUNK_MAPPING_UPDATE | + (i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT)); + if (err) + goto unlock; + } + + if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || + mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || + mv88e6xxx_6320_family(ps)) { /* Send all frames with destination addresses matching * 01:80:c2:00:00:2x to the CPU port. */ - REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, 0xffff); + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, + GLOBAL2_MGMT_EN_2X, 0xffff); + if (err) + goto unlock; /* Initialise cross-chip port VLAN table to reset * defaults. */ - REG_WRITE(REG_GLOBAL2, GLOBAL2_PVT_ADDR, 0x9000); + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, + GLOBAL2_PVT_ADDR, 0x9000); + if (err) + goto unlock; /* Clear the priority override table. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, - 0x8000 | (i << 8)); + for (i = 0; i < 16; i++) { + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, + GLOBAL2_PRIO_OVERRIDE, + 0x8000 | (i << 8)); + if (err) + goto unlock; + } } - if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || - mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || - mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) || - mv88e6xxx_6320_family(ds)) { + if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || + mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || + mv88e6xxx_6185_family(ps) || mv88e6xxx_6095_family(ps) || + mv88e6xxx_6320_family(ps)) { /* Disable ingress rate limiting by resetting all * ingress rate limit registers to their initial * state. */ - for (i = 0; i < ps->num_ports; i++) - REG_WRITE(REG_GLOBAL2, GLOBAL2_INGRESS_OP, - 0x9000 | (i << 8)); + for (i = 0; i < ps->info->num_ports; i++) { + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, + GLOBAL2_INGRESS_OP, + 0x9000 | (i << 8)); + if (err) + goto unlock; + } } /* Clear the statistics counters for all ports */ - REG_WRITE(REG_GLOBAL, GLOBAL_STATS_OP, GLOBAL_STATS_OP_FLUSH_ALL); + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_FLUSH_ALL); + if (err) + goto unlock; /* Wait for the flush to complete. */ - mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_stats_wait(ds); - if (ret < 0) + err = _mv88e6xxx_stats_wait(ps); + if (err < 0) goto unlock; /* Clear all ATU entries */ - ret = _mv88e6xxx_atu_flush(ds, 0, true); - if (ret < 0) + err = _mv88e6xxx_atu_flush(ps, 0, true); + if (err < 0) goto unlock; /* Clear all the VTU and STU entries */ - ret = _mv88e6xxx_vtu_stu_flush(ds); + err = _mv88e6xxx_vtu_stu_flush(ps); unlock: mutex_unlock(&ps->smi_mutex); - return ret; + return err; } -int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active) +int mv88e6xxx_switch_reset(struct mv88e6xxx_priv_state *ps, bool ppu_active) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u16 is_reset = (ppu_active ? 0x8800 : 0xc800); - struct gpio_desc *gpiod = ds->pd->reset; + struct gpio_desc *gpiod = ps->ds->pd->reset; unsigned long timeout; int ret; int i; + mutex_lock(&ps->smi_mutex); + /* Set all ports to the disabled state. */ - for (i = 0; i < ps->num_ports; i++) { - ret = REG_READ(REG_PORT(i), PORT_CONTROL); - REG_WRITE(REG_PORT(i), PORT_CONTROL, ret & 0xfffc); + for (i = 0; i < ps->info->num_ports; i++) { + ret = _mv88e6xxx_reg_read(ps, REG_PORT(i), PORT_CONTROL); + if (ret < 0) + goto unlock; + + ret = _mv88e6xxx_reg_write(ps, REG_PORT(i), PORT_CONTROL, + ret & 0xfffc); + if (ret) + goto unlock; } /* Wait for transmit queues to drain. */ @@ -2825,22 +2813,31 @@ int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active) * through global registers 0x18 and 0x19. */ if (ppu_active) - REG_WRITE(REG_GLOBAL, 0x04, 0xc000); + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, 0x04, 0xc000); else - REG_WRITE(REG_GLOBAL, 0x04, 0xc400); + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, 0x04, 0xc400); + if (ret) + goto unlock; /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; while (time_before(jiffies, timeout)) { - ret = REG_READ(REG_GLOBAL, 0x00); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, 0x00); + if (ret < 0) + goto unlock; + if ((ret & is_reset) == is_reset) break; usleep_range(1000, 2000); } if (time_after(jiffies, timeout)) - return -ETIMEDOUT; + ret = -ETIMEDOUT; + else + ret = 0; +unlock: + mutex_unlock(&ps->smi_mutex); - return 0; + return ret; } int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg) @@ -2849,7 +2846,7 @@ int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg) int ret; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_page_read(ds, port, page, reg); + ret = _mv88e6xxx_phy_page_read(ps, port, page, reg); mutex_unlock(&ps->smi_mutex); return ret; @@ -2862,17 +2859,16 @@ int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, int ret; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_page_write(ds, port, page, reg, val); + ret = _mv88e6xxx_phy_page_write(ps, port, page, reg, val); mutex_unlock(&ps->smi_mutex); return ret; } -static int mv88e6xxx_port_to_phy_addr(struct dsa_switch *ds, int port) +static int mv88e6xxx_port_to_phy_addr(struct mv88e6xxx_priv_state *ps, + int port) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - if (port >= 0 && port < ps->num_ports) + if (port >= 0 && port < ps->info->num_ports) return port; return -EINVAL; } @@ -2881,14 +2877,14 @@ int mv88e6xxx_phy_read(struct dsa_switch *ds, int port, int regnum) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int addr = mv88e6xxx_port_to_phy_addr(ds, port); + int addr = mv88e6xxx_port_to_phy_addr(ps, port); int ret; if (addr < 0) - return addr; + return 0xffff; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_read(ds, addr, regnum); + ret = _mv88e6xxx_phy_read(ps, addr, regnum); mutex_unlock(&ps->smi_mutex); return ret; } @@ -2897,14 +2893,14 @@ int mv88e6xxx_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int addr = mv88e6xxx_port_to_phy_addr(ds, port); + int addr = mv88e6xxx_port_to_phy_addr(ps, port); int ret; if (addr < 0) - return addr; + return 0xffff; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_write(ds, addr, regnum, val); + ret = _mv88e6xxx_phy_write(ps, addr, regnum, val); mutex_unlock(&ps->smi_mutex); return ret; } @@ -2913,14 +2909,14 @@ int mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int port, int regnum) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int addr = mv88e6xxx_port_to_phy_addr(ds, port); + int addr = mv88e6xxx_port_to_phy_addr(ps, port); int ret; if (addr < 0) - return addr; + return 0xffff; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_read_indirect(ds, addr, regnum); + ret = _mv88e6xxx_phy_read_indirect(ps, addr, regnum); mutex_unlock(&ps->smi_mutex); return ret; } @@ -2930,14 +2926,14 @@ mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int port, int regnum, u16 val) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int addr = mv88e6xxx_port_to_phy_addr(ds, port); + int addr = mv88e6xxx_port_to_phy_addr(ps, port); int ret; if (addr < 0) return addr; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_write_indirect(ds, addr, regnum, val); + ret = _mv88e6xxx_phy_write_indirect(ps, addr, regnum, val); mutex_unlock(&ps->smi_mutex); return ret; } @@ -2954,44 +2950,45 @@ static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp) mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_write(ds, 0x0, 0x16, 0x6); + ret = _mv88e6xxx_phy_write(ps, 0x0, 0x16, 0x6); if (ret < 0) goto error; /* Enable temperature sensor */ - ret = _mv88e6xxx_phy_read(ds, 0x0, 0x1a); + ret = _mv88e6xxx_phy_read(ps, 0x0, 0x1a); if (ret < 0) goto error; - ret = _mv88e6xxx_phy_write(ds, 0x0, 0x1a, ret | (1 << 5)); + ret = _mv88e6xxx_phy_write(ps, 0x0, 0x1a, ret | (1 << 5)); if (ret < 0) goto error; /* Wait for temperature to stabilize */ usleep_range(10000, 12000); - val = _mv88e6xxx_phy_read(ds, 0x0, 0x1a); + val = _mv88e6xxx_phy_read(ps, 0x0, 0x1a); if (val < 0) { ret = val; goto error; } /* Disable temperature sensor */ - ret = _mv88e6xxx_phy_write(ds, 0x0, 0x1a, ret & ~(1 << 5)); + ret = _mv88e6xxx_phy_write(ps, 0x0, 0x1a, ret & ~(1 << 5)); if (ret < 0) goto error; *temp = ((val & 0x1f) - 5) * 5; error: - _mv88e6xxx_phy_write(ds, 0x0, 0x16, 0x0); + _mv88e6xxx_phy_write(ps, 0x0, 0x16, 0x0); mutex_unlock(&ps->smi_mutex); return ret; } static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp) { - int phy = mv88e6xxx_6320_family(ds) ? 3 : 0; + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int phy = mv88e6xxx_6320_family(ps) ? 3 : 0; int ret; *temp = 0; @@ -3007,7 +3004,9 @@ static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp) int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) { - if (mv88e6xxx_6320_family(ds) || mv88e6xxx_6352_family(ds)) + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + if (mv88e6xxx_6320_family(ps) || mv88e6xxx_6352_family(ps)) return mv88e63xx_get_temp(ds, temp); return mv88e61xx_get_temp(ds, temp); @@ -3015,10 +3014,11 @@ int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) { - int phy = mv88e6xxx_6320_family(ds) ? 3 : 0; + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int phy = mv88e6xxx_6320_family(ps) ? 3 : 0; int ret; - if (!mv88e6xxx_6320_family(ds) && !mv88e6xxx_6352_family(ds)) + if (!mv88e6xxx_6320_family(ps) && !mv88e6xxx_6352_family(ps)) return -EOPNOTSUPP; *temp = 0; @@ -3034,10 +3034,11 @@ int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp) { - int phy = mv88e6xxx_6320_family(ds) ? 3 : 0; + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int phy = mv88e6xxx_6320_family(ps) ? 3 : 0; int ret; - if (!mv88e6xxx_6320_family(ds) && !mv88e6xxx_6352_family(ds)) + if (!mv88e6xxx_6320_family(ps) && !mv88e6xxx_6352_family(ps)) return -EOPNOTSUPP; ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26); @@ -3050,10 +3051,11 @@ int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp) int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) { - int phy = mv88e6xxx_6320_family(ds) ? 3 : 0; + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int phy = mv88e6xxx_6320_family(ps) ? 3 : 0; int ret; - if (!mv88e6xxx_6320_family(ds) && !mv88e6xxx_6352_family(ds)) + if (!mv88e6xxx_6320_family(ps) && !mv88e6xxx_6352_family(ps)) return -EOPNOTSUPP; *alarm = false; @@ -3068,61 +3070,60 @@ int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) } #endif /* CONFIG_NET_DSA_HWMON */ -static char *mv88e6xxx_lookup_name(struct mii_bus *bus, int sw_addr, - const struct mv88e6xxx_switch_id *table, - unsigned int num) +static const struct mv88e6xxx_info * +mv88e6xxx_lookup_info(unsigned int prod_num, const struct mv88e6xxx_info *table, + unsigned int num) { - int i, ret; - - if (!bus) - return NULL; - - ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), PORT_SWITCH_ID); - if (ret < 0) - return NULL; + int i; - /* Look up the exact switch ID */ for (i = 0; i < num; ++i) - if (table[i].id == ret) - return table[i].name; - - /* Look up only the product number */ - for (i = 0; i < num; ++i) { - if (table[i].id == (ret & PORT_SWITCH_ID_PROD_NUM_MASK)) { - dev_warn(&bus->dev, - "unknown revision %d, using base switch 0x%x\n", - ret & PORT_SWITCH_ID_REV_MASK, - ret & PORT_SWITCH_ID_PROD_NUM_MASK); - return table[i].name; - } - } + if (table[i].prod_num == prod_num) + return &table[i]; return NULL; } -char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, - int sw_addr, void **priv, - const struct mv88e6xxx_switch_id *table, - unsigned int num) +const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, + int sw_addr, void **priv, + const struct mv88e6xxx_info *table, + unsigned int num) { + const struct mv88e6xxx_info *info; struct mv88e6xxx_priv_state *ps; - struct mii_bus *bus = dsa_host_dev_to_mii_bus(host_dev); - char *name; + struct mii_bus *bus; + const char *name; + int id, prod_num, rev; + bus = dsa_host_dev_to_mii_bus(host_dev); if (!bus) return NULL; - name = mv88e6xxx_lookup_name(bus, sw_addr, table, num); - if (name) { - ps = devm_kzalloc(dsa_dev, sizeof(*ps), GFP_KERNEL); - if (!ps) - return NULL; - *priv = ps; - ps->bus = dsa_host_dev_to_mii_bus(host_dev); - if (!ps->bus) - return NULL; - ps->sw_addr = sw_addr; - } + id = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), PORT_SWITCH_ID); + if (id < 0) + return NULL; + + prod_num = (id & 0xfff0) >> 4; + rev = id & 0x000f; + + info = mv88e6xxx_lookup_info(prod_num, table, num); + if (!info) + return NULL; + + name = info->name; + + ps = devm_kzalloc(dsa_dev, sizeof(*ps), GFP_KERNEL); + if (!ps) + return NULL; + + ps->bus = bus; + ps->sw_addr = sw_addr; + ps->info = info; + + *priv = ps; + + dev_info(&ps->bus->dev, "switch 0x%x probed: %s, revision %u\n", + prod_num, name, rev); + return name; } diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index 5d27decc85cb..4f455d219859 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -68,52 +68,23 @@ #define PORT_PCS_CTRL_UNFORCED 0x03 #define PORT_PAUSE_CTRL 0x02 #define PORT_SWITCH_ID 0x03 -#define PORT_SWITCH_ID_PROD_NUM_MASK 0xfff0 -#define PORT_SWITCH_ID_REV_MASK 0x000f -#define PORT_SWITCH_ID_6031 0x0310 -#define PORT_SWITCH_ID_6035 0x0350 -#define PORT_SWITCH_ID_6046 0x0480 -#define PORT_SWITCH_ID_6061 0x0610 -#define PORT_SWITCH_ID_6065 0x0650 -#define PORT_SWITCH_ID_6085 0x04a0 -#define PORT_SWITCH_ID_6092 0x0970 -#define PORT_SWITCH_ID_6095 0x0950 -#define PORT_SWITCH_ID_6096 0x0980 -#define PORT_SWITCH_ID_6097 0x0990 -#define PORT_SWITCH_ID_6108 0x1070 -#define PORT_SWITCH_ID_6121 0x1040 -#define PORT_SWITCH_ID_6122 0x1050 -#define PORT_SWITCH_ID_6123 0x1210 -#define PORT_SWITCH_ID_6123_A1 0x1212 -#define PORT_SWITCH_ID_6123_A2 0x1213 -#define PORT_SWITCH_ID_6131 0x1060 -#define PORT_SWITCH_ID_6131_B2 0x1066 -#define PORT_SWITCH_ID_6152 0x1a40 -#define PORT_SWITCH_ID_6155 0x1a50 -#define PORT_SWITCH_ID_6161 0x1610 -#define PORT_SWITCH_ID_6161_A1 0x1612 -#define PORT_SWITCH_ID_6161_A2 0x1613 -#define PORT_SWITCH_ID_6165 0x1650 -#define PORT_SWITCH_ID_6165_A1 0x1652 -#define PORT_SWITCH_ID_6165_A2 0x1653 -#define PORT_SWITCH_ID_6171 0x1710 -#define PORT_SWITCH_ID_6172 0x1720 -#define PORT_SWITCH_ID_6175 0x1750 -#define PORT_SWITCH_ID_6176 0x1760 -#define PORT_SWITCH_ID_6182 0x1a60 -#define PORT_SWITCH_ID_6185 0x1a70 -#define PORT_SWITCH_ID_6240 0x2400 -#define PORT_SWITCH_ID_6320 0x1150 -#define PORT_SWITCH_ID_6320_A1 0x1151 -#define PORT_SWITCH_ID_6320_A2 0x1152 -#define PORT_SWITCH_ID_6321 0x3100 -#define PORT_SWITCH_ID_6321_A1 0x3101 -#define PORT_SWITCH_ID_6321_A2 0x3102 -#define PORT_SWITCH_ID_6350 0x3710 -#define PORT_SWITCH_ID_6351 0x3750 -#define PORT_SWITCH_ID_6352 0x3520 -#define PORT_SWITCH_ID_6352_A0 0x3521 -#define PORT_SWITCH_ID_6352_A1 0x3522 +#define PORT_SWITCH_ID_PROD_NUM_6085 0x04a +#define PORT_SWITCH_ID_PROD_NUM_6095 0x095 +#define PORT_SWITCH_ID_PROD_NUM_6131 0x106 +#define PORT_SWITCH_ID_PROD_NUM_6320 0x115 +#define PORT_SWITCH_ID_PROD_NUM_6123 0x121 +#define PORT_SWITCH_ID_PROD_NUM_6161 0x161 +#define PORT_SWITCH_ID_PROD_NUM_6165 0x165 +#define PORT_SWITCH_ID_PROD_NUM_6171 0x171 +#define PORT_SWITCH_ID_PROD_NUM_6172 0x172 +#define PORT_SWITCH_ID_PROD_NUM_6175 0x175 +#define PORT_SWITCH_ID_PROD_NUM_6176 0x176 +#define PORT_SWITCH_ID_PROD_NUM_6185 0x1a7 +#define PORT_SWITCH_ID_PROD_NUM_6240 0x240 +#define PORT_SWITCH_ID_PROD_NUM_6321 0x310 +#define PORT_SWITCH_ID_PROD_NUM_6352 0x352 +#define PORT_SWITCH_ID_PROD_NUM_6350 0x371 +#define PORT_SWITCH_ID_PROD_NUM_6351 0x375 #define PORT_CONTROL 0x04 #define PORT_CONTROL_USE_CORE_TAG BIT(15) #define PORT_CONTROL_DROP_ON_LOCK BIT(14) @@ -367,9 +338,24 @@ #define MV88E6XXX_N_FID 4096 -struct mv88e6xxx_switch_id { - u16 id; - char *name; +enum mv88e6xxx_family { + MV88E6XXX_FAMILY_NONE, + MV88E6XXX_FAMILY_6065, /* 6031 6035 6061 6065 */ + MV88E6XXX_FAMILY_6095, /* 6092 6095 */ + MV88E6XXX_FAMILY_6097, /* 6046 6085 6096 6097 */ + MV88E6XXX_FAMILY_6165, /* 6123 6161 6165 */ + MV88E6XXX_FAMILY_6185, /* 6108 6121 6122 6131 6152 6155 6182 6185 */ + MV88E6XXX_FAMILY_6320, /* 6320 6321 */ + MV88E6XXX_FAMILY_6351, /* 6171 6175 6350 6351 */ + MV88E6XXX_FAMILY_6352, /* 6172 6176 6240 6352 */ +}; + +struct mv88e6xxx_info { + enum mv88e6xxx_family family; + u16 prod_num; + const char *name; + unsigned int num_databases; + unsigned int num_ports; }; struct mv88e6xxx_atu_entry { @@ -397,9 +383,14 @@ struct mv88e6xxx_priv_port { }; struct mv88e6xxx_priv_state { + const struct mv88e6xxx_info *info; + /* The dsa_switch this private structure is related to */ struct dsa_switch *ds; + /* The device this structure is associated to */ + struct device *dev; + /* When using multi-chip addressing, this mutex protects * access to the indirect access registers. (In single-chip * mode, this mutex is effectively useless.) @@ -438,9 +429,6 @@ struct mv88e6xxx_priv_state { */ struct mutex eeprom_mutex; - int id; /* switch product id */ - int num_ports; /* number of switch ports */ - struct mv88e6xxx_priv_port ports[DSA_MAX_PORTS]; DECLARE_BITMAP(port_state_update_mask, DSA_MAX_PORTS); @@ -461,17 +449,18 @@ struct mv88e6xxx_hw_stat { enum stat_type type; }; -int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active); -char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, - int sw_addr, void **priv, - const struct mv88e6xxx_switch_id *table, - unsigned int num); +int mv88e6xxx_switch_reset(struct mv88e6xxx_priv_state *ps, bool ppu_active); +const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, + int sw_addr, void **priv, + const struct mv88e6xxx_info *table, + unsigned int num); int mv88e6xxx_setup_ports(struct dsa_switch *ds); -int mv88e6xxx_setup_common(struct dsa_switch *ds); +int mv88e6xxx_setup_common(struct mv88e6xxx_priv_state *ps); int mv88e6xxx_setup_global(struct dsa_switch *ds); -int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg); -int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val); +int mv88e6xxx_reg_read(struct mv88e6xxx_priv_state *ps, int addr, int reg); +int mv88e6xxx_reg_write(struct mv88e6xxx_priv_state *ps, int addr, + int reg, u16 val); int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr); int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr); int mv88e6xxx_phy_read(struct dsa_switch *ds, int port, int regnum); @@ -479,7 +468,7 @@ int mv88e6xxx_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val); int mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int port, int regnum); int mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int port, int regnum, u16 val); -void mv88e6xxx_ppu_state_init(struct dsa_switch *ds); +void mv88e6xxx_ppu_state_init(struct mv88e6xxx_priv_state *ps); int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum); int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, int regnum, u16 val); @@ -542,25 +531,4 @@ extern struct dsa_switch_driver mv88e6123_switch_driver; extern struct dsa_switch_driver mv88e6352_switch_driver; extern struct dsa_switch_driver mv88e6171_switch_driver; -#define REG_READ(addr, reg) \ - ({ \ - int __ret; \ - \ - __ret = mv88e6xxx_reg_read(ds, addr, reg); \ - if (__ret < 0) \ - return __ret; \ - __ret; \ - }) - -#define REG_WRITE(addr, reg, val) \ - ({ \ - int __ret; \ - \ - __ret = mv88e6xxx_reg_write(ds, addr, reg, val); \ - if (__ret < 0) \ - return __ret; \ - }) - - - #endif diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c index 66d0b73c39c0..8e7575571531 100644 --- a/drivers/net/ethernet/amd/7990.c +++ b/drivers/net/ethernet/amd/7990.c @@ -543,11 +543,13 @@ int lance_start_xmit(struct sk_buff *skb, struct net_device *dev) static int outs; unsigned long flags; - if (!TX_BUFFS_AVAIL) - return NETDEV_TX_LOCKED; - netif_stop_queue(dev); + if (!TX_BUFFS_AVAIL) { + dev_consume_skb_any(skb); + return NETDEV_TX_OK; + } + skblen = skb->len; #ifdef DEBUG_DRIVER diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index 56139184b801..2a18d34d2610 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -547,10 +547,8 @@ static netdev_tx_t lance_start_xmit(struct sk_buff *skb, local_irq_save(flags); - if (!lance_tx_buffs_avail(lp)) { - local_irq_restore(flags); - return NETDEV_TX_LOCKED; - } + if (!lance_tx_buffs_avail(lp)) + goto out_free; #ifdef DEBUG /* dump the packet */ @@ -573,6 +571,7 @@ static netdev_tx_t lance_start_xmit(struct sk_buff *skb, /* Kick the lance: transmit now */ ll->rdp = LE_C0_INEA | LE_C0_TDMD; + out_free: dev_kfree_skb(skb); local_irq_restore(flags); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index 39e081a70f5b..457f74500242 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -824,7 +824,7 @@ static int xgene_mdiobus_register(struct xgene_enet_pdata *pdata, return -EINVAL; phy = get_phy_device(mdio, phy_id, false); - if (!phy || IS_ERR(phy)) + if (IS_ERR(phy)) return -EIO; ret = phy_device_register(phy); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 8d4c1ad2fc60..409152b21191 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -973,6 +973,17 @@ static enum xgene_ring_owner xgene_derive_ring_owner(struct xgene_enet_pdata *p) return owner; } +static u8 xgene_start_cpu_bufnum(struct xgene_enet_pdata *pdata) +{ + struct device *dev = &pdata->pdev->dev; + u32 cpu_bufnum; + int ret; + + ret = device_property_read_u32(dev, "channel", &cpu_bufnum); + + return (!ret) ? cpu_bufnum : pdata->cpu_bufnum; +} + static int xgene_enet_create_desc_rings(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); @@ -981,13 +992,15 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) struct xgene_enet_desc_ring *buf_pool = NULL; enum xgene_ring_owner owner; dma_addr_t dma_exp_bufs; - u8 cpu_bufnum = pdata->cpu_bufnum; + u8 cpu_bufnum; u8 eth_bufnum = pdata->eth_bufnum; u8 bp_bufnum = pdata->bp_bufnum; u16 ring_num = pdata->ring_num; u16 ring_id; int i, ret, size; + cpu_bufnum = xgene_start_cpu_bufnum(pdata); + for (i = 0; i < pdata->rxq_cnt; i++) { /* allocate rx descriptor ring */ owner = xgene_derive_ring_owner(pdata); diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h index b9203d928938..c46b489ce9b4 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c.h +++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h @@ -488,7 +488,7 @@ struct atl1c_tpd_ring { dma_addr_t dma; /* descriptor ring physical address */ u16 size; /* descriptor ring length in bytes */ u16 count; /* number of descriptors in the ring */ - u16 next_to_use; /* this is protectd by adapter->tx_lock */ + u16 next_to_use; atomic_t next_to_clean; struct atl1c_buffer *buffer_info; }; @@ -542,7 +542,6 @@ struct atl1c_adapter { u16 link_duplex; spinlock_t mdio_lock; - spinlock_t tx_lock; atomic_t irq_sem; struct work_struct common_task; diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index d0084d4d1a9b..a3200ea6d765 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -821,7 +821,6 @@ static int atl1c_sw_init(struct atl1c_adapter *adapter) atl1c_set_rxbufsize(adapter, adapter->netdev); atomic_set(&adapter->irq_sem, 1); spin_lock_init(&adapter->mdio_lock); - spin_lock_init(&adapter->tx_lock); set_bit(__AT_DOWN, &adapter->flags); return 0; @@ -2206,7 +2205,6 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct atl1c_adapter *adapter = netdev_priv(netdev); - unsigned long flags; u16 tpd_req = 1; struct atl1c_tpd_desc *tpd; enum atl1c_trans_queue type = atl1c_trans_normal; @@ -2217,16 +2215,10 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb, } tpd_req = atl1c_cal_tpd_req(skb); - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) { - if (netif_msg_pktdata(adapter)) - dev_info(&adapter->pdev->dev, "tx locked\n"); - return NETDEV_TX_LOCKED; - } if (atl1c_tpd_avail(adapter, type) < tpd_req) { /* no enough descriptor, just stop queue */ netif_stop_queue(netdev); - spin_unlock_irqrestore(&adapter->tx_lock, flags); return NETDEV_TX_BUSY; } @@ -2234,7 +2226,6 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb, /* do TSO and check sum */ if (atl1c_tso_csum(adapter, skb, &tpd, type) != 0) { - spin_unlock_irqrestore(&adapter->tx_lock, flags); dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -2257,12 +2248,10 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb, "tx-skb droppted due to dma error\n"); /* roll back tpd/buffer */ atl1c_tx_rollback(adapter, tpd, type); - spin_unlock_irqrestore(&adapter->tx_lock, flags); dev_kfree_skb_any(skb); } else { netdev_sent_queue(adapter->netdev, skb->len); atl1c_tx_queue(adapter, skb, tpd, type); - spin_unlock_irqrestore(&adapter->tx_lock, flags); } return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e.h b/drivers/net/ethernet/atheros/atl1e/atl1e.h index 0212dac7e23a..632bb843aed6 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e.h +++ b/drivers/net/ethernet/atheros/atl1e/atl1e.h @@ -442,7 +442,6 @@ struct atl1e_adapter { u16 link_duplex; spinlock_t mdio_lock; - spinlock_t tx_lock; atomic_t irq_sem; struct work_struct reset_task; diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 59a03a193e83..974713b19ab6 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -648,7 +648,6 @@ static int atl1e_sw_init(struct atl1e_adapter *adapter) atomic_set(&adapter->irq_sem, 1); spin_lock_init(&adapter->mdio_lock); - spin_lock_init(&adapter->tx_lock); set_bit(__AT_DOWN, &adapter->flags); @@ -1866,7 +1865,6 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct atl1e_adapter *adapter = netdev_priv(netdev); - unsigned long flags; u16 tpd_req = 1; struct atl1e_tpd_desc *tpd; @@ -1880,13 +1878,10 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } tpd_req = atl1e_cal_tdp_req(skb); - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) - return NETDEV_TX_LOCKED; if (atl1e_tpd_avail(adapter) < tpd_req) { /* no enough descriptor, just stop queue */ netif_stop_queue(netdev); - spin_unlock_irqrestore(&adapter->tx_lock, flags); return NETDEV_TX_BUSY; } @@ -1910,7 +1905,6 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb, /* do TSO and check sum */ if (atl1e_tso_csum(adapter, skb, tpd) != 0) { - spin_unlock_irqrestore(&adapter->tx_lock, flags); dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -1921,10 +1915,7 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb, } atl1e_tx_queue(adapter, tpd_req, tpd); - - netdev->trans_start = jiffies; /* NETIF_F_LLTX driver :( */ out: - spin_unlock_irqrestore(&adapter->tx_lock, flags); return NETDEV_TX_OK; } @@ -2285,8 +2276,7 @@ static int atl1e_init_netdev(struct net_device *netdev, struct pci_dev *pdev) netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_HW_VLAN_CTAG_RX; - netdev->features = netdev->hw_features | NETIF_F_LLTX | - NETIF_F_HW_VLAN_CTAG_TX; + netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_TX; /* not enabled by default */ netdev->hw_features |= NETIF_F_RXALL | NETIF_F_RXFCS; return 0; diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 8f76f4558a88..2ff465848b65 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1412,7 +1412,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = -EIO; - netdev->hw_features = NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_RX; + netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX; netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); /* Init PHY as early as possible due to power saving issue */ diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 993c780bdfab..30b0c2895a56 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -831,7 +831,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget) rdma_writel(priv, priv->rx_c_index, RDMA_CONS_INDEX); if (work_done < budget) { - napi_complete(napi); + napi_complete_done(napi, work_done); /* re-enable RX interrupts */ intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE); } @@ -873,7 +873,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id) if (likely(napi_schedule_prep(&priv->napi))) { /* disable RX interrupts */ intrl2_0_mask_set(priv, INTRL2_0_RDMA_MBDONE); - __napi_schedule(&priv->napi); + __napi_schedule_irqoff(&priv->napi); } } @@ -916,7 +916,7 @@ static irqreturn_t bcm_sysport_tx_isr(int irq, void *dev_id) if (likely(napi_schedule_prep(&txr->napi))) { intrl2_1_mask_set(priv, BIT(ring)); - __napi_schedule(&txr->napi); + __napi_schedule_irqoff(&txr->napi); } } diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 99b30a952b38..38db2e4d7d54 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1572,6 +1572,11 @@ static int bgmac_probe(struct bcma_device *core) dev_warn(&core->dev, "Using random MAC: %pM\n", mac); } + /* This (reset &) enable is not preset in specs or reference driver but + * Broadcom does it in arch PCI code when enabling fake PCI device. + */ + bcma_core_enable(core, 0); + /* Allocation and references */ net_dev = alloc_etherdev(sizeof(*bgmac)); if (!net_dev) diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h index 4fbb093e0d84..9a03c142b742 100644 --- a/drivers/net/ethernet/broadcom/bgmac.h +++ b/drivers/net/ethernet/broadcom/bgmac.h @@ -199,9 +199,9 @@ #define BGMAC_CMDCFG_TAI 0x00000200 #define BGMAC_CMDCFG_HD 0x00000400 /* Set if in half duplex mode */ #define BGMAC_CMDCFG_HD_SHIFT 10 -#define BGMAC_CMDCFG_SR_REV0 0x00000800 /* Set to reset mode, for other revs */ -#define BGMAC_CMDCFG_SR_REV4 0x00002000 /* Set to reset mode, only for core rev 4 */ -#define BGMAC_CMDCFG_SR(rev) ((rev == 4) ? BGMAC_CMDCFG_SR_REV4 : BGMAC_CMDCFG_SR_REV0) +#define BGMAC_CMDCFG_SR_REV0 0x00000800 /* Set to reset mode, for core rev 0-3 */ +#define BGMAC_CMDCFG_SR_REV4 0x00002000 /* Set to reset mode, for core rev >= 4 */ +#define BGMAC_CMDCFG_SR(rev) ((rev >= 4) ? BGMAC_CMDCFG_SR_REV4 : BGMAC_CMDCFG_SR_REV0) #define BGMAC_CMDCFG_ML 0x00008000 /* Set to activate mac loopback mode */ #define BGMAC_CMDCFG_AE 0x00400000 #define BGMAC_CMDCFG_CFE 0x00800000 diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 8150c74f054a..fbff226369ac 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -878,7 +878,11 @@ static void bcmgenet_get_ethtool_stats(struct net_device *dev, else p = (char *)priv; p += s->stat_offset; - data[i] = *(u32 *)p; + if (sizeof(unsigned long) != sizeof(u32) && + s->stat_sizeof == sizeof(unsigned long)) + data[i] = *(unsigned long *)p; + else + data[i] = *(u32 *)p; } } diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 967951582e03..d20539a6d162 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1011,10 +1011,11 @@ static int bgx_init_of_phy(struct bgx *bgx) } lmac++; - if (lmac == MAX_LMAC_PER_BGX) + if (lmac == MAX_LMAC_PER_BGX) { + of_node_put(node); break; + } } - of_node_put(node); return 0; defer: diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 526ea74e82d9..86f467a2c485 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -1664,8 +1664,7 @@ static int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, struct cmdQ *q = &sge->cmdQ[qid]; unsigned int credits, pidx, genbit, count, use_sched_skb = 0; - if (!spin_trylock(&q->lock)) - return NETDEV_TX_LOCKED; + spin_lock(&q->lock); reclaim_completed_tx(sge, q); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 984a3cc26f86..b4fceb92479f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -324,7 +324,9 @@ struct adapter_params { unsigned int sf_fw_start; /* start of FW image in flash */ unsigned int fw_vers; + unsigned int bs_vers; /* bootstrap version */ unsigned int tp_vers; + unsigned int er_vers; /* expansion ROM version */ u8 api_vers[7]; unsigned short mtus[NMTUS]; @@ -357,6 +359,34 @@ struct sge_idma_monitor_state { unsigned int idma_warn[2]; /* time to warning in HZ */ }; +/* Firmware Mailbox Command/Reply log. All values are in Host-Endian format. + * The access and execute times are signed in order to accommodate negative + * error returns. + */ +struct mbox_cmd { + u64 cmd[MBOX_LEN / 8]; /* a Firmware Mailbox Command/Reply */ + u64 timestamp; /* OS-dependent timestamp */ + u32 seqno; /* sequence number */ + s16 access; /* time (ms) to access mailbox */ + s16 execute; /* time (ms) to execute */ +}; + +struct mbox_cmd_log { + unsigned int size; /* number of entries in the log */ + unsigned int cursor; /* next position in the log to write */ + u32 seqno; /* next sequence number */ + /* variable length mailbox command log starts here */ +}; + +/* Given a pointer to a Firmware Mailbox Command Log and a log entry index, + * return a pointer to the specified entry. + */ +static inline struct mbox_cmd *mbox_cmd_log_entry(struct mbox_cmd_log *log, + unsigned int entry_idx) +{ + return &((struct mbox_cmd *)&(log)[1])[entry_idx]; +} + #include "t4fw_api.h" #define FW_VERSION(chip) ( \ @@ -394,6 +424,7 @@ struct link_config { unsigned char fc; /* actual link flow control */ unsigned char autoneg; /* autonegotiating? */ unsigned char link_ok; /* link up? */ + unsigned char link_down_rc; /* link down reason */ }; #define FW_LEN16(fw_struct) FW_CMD_LEN16_V(sizeof(fw_struct) / 16) @@ -731,6 +762,7 @@ struct adapter { u32 t4_bar0; struct pci_dev *pdev; struct device *pdev_dev; + const char *name; unsigned int mbox; unsigned int pf; unsigned int flags; @@ -776,6 +808,10 @@ struct adapter { struct work_struct db_drop_task; bool tid_release_task_busy; + /* support for mailbox command/reply logging */ +#define T4_OS_LOG_MBOX_CMDS 256 + struct mbox_cmd_log *mbox_log; + struct dentry *debugfs_root; bool use_bd; /* Use SGE Back Door intfc for reading SGE Contexts */ bool trace_rss; /* 1 implies that different RSS flit per filter is @@ -1306,6 +1342,7 @@ int t4_fl_pkt_align(struct adapter *adap); unsigned int t4_flash_cfg_addr(struct adapter *adapter); int t4_check_fw_version(struct adapter *adap); int t4_get_fw_version(struct adapter *adapter, u32 *vers); +int t4_get_bs_version(struct adapter *adapter, u32 *vers); int t4_get_tp_version(struct adapter *adapter, u32 *vers); int t4_get_exprom_version(struct adapter *adapter, u32 *vers); int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info, @@ -1329,6 +1366,8 @@ int t4_init_sge_params(struct adapter *adapter); int t4_init_tp_params(struct adapter *adap); int t4_filter_field_shift(const struct adapter *adap, int filter_sel); int t4_init_rss_mode(struct adapter *adap, int mbox); +int t4_init_portinfo(struct port_info *pi, int mbox, + int port, int pf, int vf, u8 mac[]); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf); void t4_fatal_err(struct adapter *adapter); int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, @@ -1451,6 +1490,9 @@ int t4_mdio_rd(struct adapter *adap, unsigned int mbox, unsigned int phy_addr, unsigned int mmd, unsigned int reg, u16 *valp); int t4_mdio_wr(struct adapter *adap, unsigned int mbox, unsigned int phy_addr, unsigned int mmd, unsigned int reg, u16 val); +int t4_iq_stop(struct adapter *adap, unsigned int mbox, unsigned int pf, + unsigned int vf, unsigned int iqtype, unsigned int iqid, + unsigned int fl0id, unsigned int fl1id); int t4_iq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int iqtype, unsigned int iqid, unsigned int fl0id, unsigned int fl1id); @@ -1461,6 +1503,7 @@ int t4_ctrl_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int eqid); int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox); +void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl); int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl); void t4_db_full(struct adapter *adapter); void t4_db_dropped(struct adapter *adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index 052c660aca80..6ee2ed30626b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -253,7 +253,7 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap, { const union fw_port_dcb *fwdcb = &pcmd->u.dcb; int port = FW_PORT_CMD_PORTID_G(be32_to_cpu(pcmd->op_to_portid)); - struct net_device *dev = adap->port[port]; + struct net_device *dev = adap->port[adap->chan_map[port]]; struct port_info *pi = netdev_priv(dev); struct port_dcb_info *dcb = &pi->dcb; int dcb_type = pcmd->u.dcb.pgid.type; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 0bb41e9b9b1c..91fb50850fff 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -1152,6 +1152,104 @@ static const struct file_operations devlog_fops = { .release = seq_release_private }; +/* Show Firmware Mailbox Command/Reply Log + * + * Note that we don't do any locking when dumping the Firmware Mailbox Log so + * it's possible that we can catch things during a log update and therefore + * see partially corrupted log entries. But it's probably Good Enough(tm). + * If we ever decide that we want to make sure that we're dumping a coherent + * log, we'd need to perform locking in the mailbox logging and in + * mboxlog_open() where we'd need to grab the entire mailbox log in one go + * like we do for the Firmware Device Log. + */ +static int mboxlog_show(struct seq_file *seq, void *v) +{ + struct adapter *adapter = seq->private; + struct mbox_cmd_log *log = adapter->mbox_log; + struct mbox_cmd *entry; + int entry_idx, i; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, + "%10s %15s %5s %5s %s\n", + "Seq#", "Tstamp", "Atime", "Etime", + "Command/Reply"); + return 0; + } + + entry_idx = log->cursor + ((uintptr_t)v - 2); + if (entry_idx >= log->size) + entry_idx -= log->size; + entry = mbox_cmd_log_entry(log, entry_idx); + + /* skip over unused entries */ + if (entry->timestamp == 0) + return 0; + + seq_printf(seq, "%10u %15llu %5d %5d", + entry->seqno, entry->timestamp, + entry->access, entry->execute); + for (i = 0; i < MBOX_LEN / 8; i++) { + u64 flit = entry->cmd[i]; + u32 hi = (u32)(flit >> 32); + u32 lo = (u32)flit; + + seq_printf(seq, " %08x %08x", hi, lo); + } + seq_puts(seq, "\n"); + return 0; +} + +static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos) +{ + struct adapter *adapter = seq->private; + struct mbox_cmd_log *log = adapter->mbox_log; + + return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL); +} + +static void *mboxlog_start(struct seq_file *seq, loff_t *pos) +{ + return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN; +} + +static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return mboxlog_get_idx(seq, *pos); +} + +static void mboxlog_stop(struct seq_file *seq, void *v) +{ +} + +static const struct seq_operations mboxlog_seq_ops = { + .start = mboxlog_start, + .next = mboxlog_next, + .stop = mboxlog_stop, + .show = mboxlog_show +}; + +static int mboxlog_open(struct inode *inode, struct file *file) +{ + int res = seq_open(file, &mboxlog_seq_ops); + + if (!res) { + struct seq_file *seq = file->private_data; + + seq->private = inode->i_private; + } + return res; +} + +static const struct file_operations mboxlog_fops = { + .owner = THIS_MODULE, + .open = mboxlog_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static int mbox_show(struct seq_file *seq, void *v) { static const char * const owner[] = { "none", "FW", "driver", @@ -1572,6 +1670,7 @@ static const struct file_operations flash_debugfs_fops = { .owner = THIS_MODULE, .open = mem_open, .read = flash_read, + .llseek = default_llseek, }; static inline void tcamxy2valmask(u64 x, u64 y, u8 *addr, u64 *mask) @@ -3128,6 +3227,7 @@ int t4_setup_debugfs(struct adapter *adap) { "cim_qcfg", &cim_qcfg_fops, S_IRUSR, 0 }, { "clk", &clk_debugfs_fops, S_IRUSR, 0 }, { "devlog", &devlog_fops, S_IRUSR, 0 }, + { "mboxlog", &mboxlog_fops, S_IRUSR, 0 }, { "mbox0", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 0 }, { "mbox1", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 1 }, { "mbox2", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 2 }, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index a1e329ec24cd..d7f40436f319 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -337,6 +337,17 @@ void t4_os_portmod_changed(const struct adapter *adap, int port_id) netdev_info(dev, "port module unplugged\n"); else if (pi->mod_type < ARRAY_SIZE(mod_str)) netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]); + else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) + netdev_info(dev, "%s: unsupported port module inserted\n", + dev->name); + else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) + netdev_info(dev, "%s: unknown port module inserted\n", + dev->name); + else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR) + netdev_info(dev, "%s: transceiver module error\n", dev->name); + else + netdev_info(dev, "%s: unknown module type %d inserted\n", + dev->name, pi->mod_type); } int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */ @@ -504,7 +515,7 @@ EXPORT_SYMBOL(cxgb4_dcb_enabled); static void dcb_rpl(struct adapter *adap, const struct fw_port_cmd *pcmd) { int port = FW_PORT_CMD_PORTID_G(ntohl(pcmd->op_to_portid)); - struct net_device *dev = adap->port[port]; + struct net_device *dev = adap->port[adap->chan_map[port]]; int old_dcb_enabled = cxgb4_dcb_enabled(dev); int new_dcb_enabled; @@ -634,7 +645,8 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, action == FW_PORT_ACTION_GET_PORT_INFO) { int port = FW_PORT_CMD_PORTID_G( be32_to_cpu(pcmd->op_to_portid)); - struct net_device *dev = q->adap->port[port]; + struct net_device *dev = + q->adap->port[q->adap->chan_map[port]]; int state_input = ((pcmd->u.info.dcbxdis_pkd & FW_PORT_CMD_DCBXDIS_F) ? CXGB4_DCB_INPUT_FW_DISABLED @@ -3738,7 +3750,10 @@ static int adap_init0(struct adapter *adap) * is excessively mismatched relative to the driver.) */ t4_get_fw_version(adap, &adap->params.fw_vers); + t4_get_bs_version(adap, &adap->params.bs_vers); t4_get_tp_version(adap, &adap->params.tp_vers); + t4_get_exprom_version(adap, &adap->params.er_vers); + ret = t4_check_fw_version(adap); /* If firmware is too old (not supported by driver) force an update. */ if (ret) @@ -4652,6 +4667,68 @@ static void cxgb4_check_pcie_caps(struct adapter *adap) "suggested for optimal performance.\n"); } +/* Dump basic information about the adapter */ +static void print_adapter_info(struct adapter *adapter) +{ + /* Device information */ + dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n", + adapter->params.vpd.id, + CHELSIO_CHIP_RELEASE(adapter->params.chip)); + dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n", + adapter->params.vpd.sn, adapter->params.vpd.pn); + + /* Firmware Version */ + if (!adapter->params.fw_vers) + dev_warn(adapter->pdev_dev, "No firmware loaded\n"); + else + dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers)); + + /* Bootstrap Firmware Version. (Some adapters don't have Bootstrap + * Firmware, so dev_info() is more appropriate here.) + */ + if (!adapter->params.bs_vers) + dev_info(adapter->pdev_dev, "No bootstrap loaded\n"); + else + dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers)); + + /* TP Microcode Version */ + if (!adapter->params.tp_vers) + dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n"); + else + dev_info(adapter->pdev_dev, + "TP Microcode version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers)); + + /* Expansion ROM version */ + if (!adapter->params.er_vers) + dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n"); + else + dev_info(adapter->pdev_dev, + "Expansion ROM version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers)); + + /* Software/Hardware configuration */ + dev_info(adapter->pdev_dev, "Configuration: %sNIC %s, %s capable\n", + is_offload(adapter) ? "R" : "", + ((adapter->flags & USING_MSIX) ? "MSI-X" : + (adapter->flags & USING_MSI) ? "MSI" : ""), + is_offload(adapter) ? "Offload" : "non-Offload"); +} + static void print_port_info(const struct net_device *dev) { char buf[80]; @@ -4679,14 +4756,8 @@ static void print_port_info(const struct net_device *dev) --bufp; sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type)); - netdev_info(dev, "Chelsio %s rev %d %s %sNIC %s\n", - adap->params.vpd.id, - CHELSIO_CHIP_RELEASE(adap->params.chip), buf, - is_offload(adap) ? "R" : "", - (adap->flags & USING_MSIX) ? " MSI-X" : - (adap->flags & USING_MSI) ? " MSI" : ""); - netdev_info(dev, "S/N: %s, P/N: %s\n", - adap->params.vpd.sn, adap->params.vpd.pn); + netdev_info(dev, "%s: Chelsio %s (%s) %s\n", + dev->name, adap->params.vpd.id, adap->name, buf); } static void enable_pcie_relaxed_ordering(struct pci_dev *dev) @@ -4838,12 +4909,23 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_free_adapter; } + adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + + (sizeof(struct mbox_cmd) * + T4_OS_LOG_MBOX_CMDS), + GFP_KERNEL); + if (!adapter->mbox_log) { + err = -ENOMEM; + goto out_free_adapter; + } + adapter->mbox_log->size = T4_OS_LOG_MBOX_CMDS; + /* PCI device has been enabled */ adapter->flags |= DEV_ENABLED; adapter->regs = regs; adapter->pdev = pdev; adapter->pdev_dev = &pdev->dev; + adapter->name = pci_name(pdev); adapter->mbox = func; adapter->pf = func; adapter->msg_enable = dflt_msg_enable; @@ -5074,6 +5156,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (is_offload(adapter)) attach_ulds(adapter); + print_adapter_info(adapter); + sriov: #ifdef CONFIG_PCI_IOV if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0) @@ -5093,6 +5177,7 @@ sriov: if (adapter->workq) destroy_workqueue(adapter->workq); + kfree(adapter->mbox_log); kfree(adapter); out_unmap_bar0: iounmap(regs); @@ -5159,6 +5244,7 @@ static void remove_one(struct pci_dev *pdev) adapter->flags &= ~DEV_ENABLED; } pci_release_regions(pdev); + kfree(adapter->mbox_log); synchronize_rcu(); kfree(adapter); } else diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 13b144bcf725..bad253beb8c8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2981,18 +2981,34 @@ void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q) void t4_free_sge_resources(struct adapter *adap) { int i; - struct sge_eth_rxq *eq = adap->sge.ethrxq; - struct sge_eth_txq *etq = adap->sge.ethtxq; + struct sge_eth_rxq *eq; + struct sge_eth_txq *etq; + + /* stop all Rx queues in order to start them draining */ + for (i = 0; i < adap->sge.ethqsets; i++) { + eq = &adap->sge.ethrxq[i]; + if (eq->rspq.desc) + t4_iq_stop(adap, adap->mbox, adap->pf, 0, + FW_IQ_TYPE_FL_INT_CAP, + eq->rspq.cntxt_id, + eq->fl.size ? eq->fl.cntxt_id : 0xffff, + 0xffff); + } /* clean up Ethernet Tx/Rx queues */ - for (i = 0; i < adap->sge.ethqsets; i++, eq++, etq++) { + for (i = 0; i < adap->sge.ethqsets; i++) { + eq = &adap->sge.ethrxq[i]; if (eq->rspq.desc) free_rspq_fl(adap, &eq->rspq, eq->fl.size ? &eq->fl : NULL); + + etq = &adap->sge.ethtxq[i]; if (etq->q.desc) { t4_eth_eq_free(adap, adap->mbox, adap->pf, 0, etq->q.cntxt_id); + __netif_tx_lock_bh(etq->txq); free_tx_desc(adap, &etq->q, etq->q.in_use, true); + __netif_tx_unlock_bh(etq->txq); kfree(etq->q.sdesc); free_txq(adap, &etq->q); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index cc1736bece0f..49bcbf16c9ca 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -224,18 +224,34 @@ static void fw_asrt(struct adapter *adap, u32 mbox_addr) be32_to_cpu(asrt.u.assert.x), be32_to_cpu(asrt.u.assert.y)); } -static void dump_mbox(struct adapter *adap, int mbox, u32 data_reg) +/** + * t4_record_mbox - record a Firmware Mailbox Command/Reply in the log + * @adapter: the adapter + * @cmd: the Firmware Mailbox Command or Reply + * @size: command length in bytes + * @access: the time (ms) needed to access the Firmware Mailbox + * @execute: the time (ms) the command spent being executed + */ +static void t4_record_mbox(struct adapter *adapter, + const __be64 *cmd, unsigned int size, + int access, int execute) { - dev_err(adap->pdev_dev, - "mbox %d: %llx %llx %llx %llx %llx %llx %llx %llx\n", mbox, - (unsigned long long)t4_read_reg64(adap, data_reg), - (unsigned long long)t4_read_reg64(adap, data_reg + 8), - (unsigned long long)t4_read_reg64(adap, data_reg + 16), - (unsigned long long)t4_read_reg64(adap, data_reg + 24), - (unsigned long long)t4_read_reg64(adap, data_reg + 32), - (unsigned long long)t4_read_reg64(adap, data_reg + 40), - (unsigned long long)t4_read_reg64(adap, data_reg + 48), - (unsigned long long)t4_read_reg64(adap, data_reg + 56)); + struct mbox_cmd_log *log = adapter->mbox_log; + struct mbox_cmd *entry; + int i; + + entry = mbox_cmd_log_entry(log, log->cursor++); + if (log->cursor == log->size) + log->cursor = 0; + + for (i = 0; i < size / 8; i++) + entry->cmd[i] = be64_to_cpu(cmd[i]); + while (i < MBOX_LEN / 8) + entry->cmd[i++] = 0; + entry->timestamp = jiffies; + entry->seqno = log->seqno++; + entry->access = access; + entry->execute = execute; } /** @@ -268,12 +284,15 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, 1, 1, 3, 5, 10, 10, 20, 50, 100, 200 }; + u16 access = 0; + u16 execute = 0; u32 v; u64 res; - int i, ms, delay_idx; + int i, ms, delay_idx, ret; const __be64 *p = cmd; u32 data_reg = PF_REG(mbox, CIM_PF_MAILBOX_DATA_A); u32 ctl_reg = PF_REG(mbox, CIM_PF_MAILBOX_CTRL_A); + __be64 cmd_rpl[MBOX_LEN / 8]; if ((size & 15) || size > MBOX_LEN) return -EINVAL; @@ -289,9 +308,14 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, for (i = 0; v == MBOX_OWNER_NONE && i < 3; i++) v = MBOWNER_G(t4_read_reg(adap, ctl_reg)); - if (v != MBOX_OWNER_DRV) - return v ? -EBUSY : -ETIMEDOUT; + if (v != MBOX_OWNER_DRV) { + ret = (v == MBOX_OWNER_FW) ? -EBUSY : -ETIMEDOUT; + t4_record_mbox(adap, cmd, MBOX_LEN, access, ret); + return ret; + } + /* Copy in the new mailbox command and send it on its way ... */ + t4_record_mbox(adap, cmd, MBOX_LEN, access, 0); for (i = 0; i < size; i += 8) t4_write_reg64(adap, data_reg + i, be64_to_cpu(*p++)); @@ -317,26 +341,31 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, continue; } - res = t4_read_reg64(adap, data_reg); + get_mbox_rpl(adap, cmd_rpl, MBOX_LEN / 8, data_reg); + res = be64_to_cpu(cmd_rpl[0]); + if (FW_CMD_OP_G(res >> 32) == FW_DEBUG_CMD) { fw_asrt(adap, data_reg); res = FW_CMD_RETVAL_V(EIO); } else if (rpl) { - get_mbox_rpl(adap, rpl, size / 8, data_reg); + memcpy(rpl, cmd_rpl, size); } - if (FW_CMD_RETVAL_G((int)res)) - dump_mbox(adap, mbox, data_reg); t4_write_reg(adap, ctl_reg, 0); + + execute = i + ms; + t4_record_mbox(adap, cmd_rpl, + MBOX_LEN, access, execute); return -FW_CMD_RETVAL_G((int)res); } } - dump_mbox(adap, mbox, data_reg); + ret = -ETIMEDOUT; + t4_record_mbox(adap, cmd, MBOX_LEN, access, ret); dev_err(adap->pdev_dev, "command %#x in mailbox %d timed out\n", *(const u8 *)cmd, mbox); t4_report_fw_error(adap); - return -ETIMEDOUT; + return ret; } int t4_wr_mbox_meat(struct adapter *adap, int mbox, const void *cmd, int size, @@ -2557,6 +2586,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) } #define EEPROM_STAT_ADDR 0x7bfc +#define VPD_SIZE 0x800 #define VPD_BASE 0x400 #define VPD_BASE_OLD 0 #define VPD_LEN 1024 @@ -2594,6 +2624,15 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) if (!vpd) return -ENOMEM; + /* We have two VPD data structures stored in the adapter VPD area. + * By default, Linux calculates the size of the VPD area by traversing + * the first VPD area at offset 0x0, so we need to tell the OS what + * our real VPD size is. + */ + ret = pci_set_vpd_size(adapter->pdev, VPD_SIZE); + if (ret < 0) + goto out; + /* Card information normally starts at VPD_BASE but early cards had * it at 0. */ @@ -2927,6 +2966,20 @@ int t4_get_fw_version(struct adapter *adapter, u32 *vers) } /** + * t4_get_bs_version - read the firmware bootstrap version + * @adapter: the adapter + * @vers: where to place the version + * + * Reads the FW Bootstrap version from flash. + */ +int t4_get_bs_version(struct adapter *adapter, u32 *vers) +{ + return t4_read_flash(adapter, FLASH_FWBOOTSTRAP_START + + offsetof(struct fw_hdr, fw_ver), 1, + vers, 0); +} + +/** * t4_get_tp_version - read the TP microcode version * @adapter: the adapter * @vers: where to place the version @@ -6940,6 +6993,39 @@ int t4_identify_port(struct adapter *adap, unsigned int mbox, unsigned int viid, } /** + * t4_iq_stop - stop an ingress queue and its FLs + * @adap: the adapter + * @mbox: mailbox to use for the FW command + * @pf: the PF owning the queues + * @vf: the VF owning the queues + * @iqtype: the ingress queue type (FW_IQ_TYPE_FL_INT_CAP, etc.) + * @iqid: ingress queue id + * @fl0id: FL0 queue id or 0xffff if no attached FL0 + * @fl1id: FL1 queue id or 0xffff if no attached FL1 + * + * Stops an ingress queue and its associated FLs, if any. This causes + * any current or future data/messages destined for these queues to be + * tossed. + */ +int t4_iq_stop(struct adapter *adap, unsigned int mbox, unsigned int pf, + unsigned int vf, unsigned int iqtype, unsigned int iqid, + unsigned int fl0id, unsigned int fl1id) +{ + struct fw_iq_cmd c; + + memset(&c, 0, sizeof(c)); + c.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_IQ_CMD) | FW_CMD_REQUEST_F | + FW_CMD_EXEC_F | FW_IQ_CMD_PFN_V(pf) | + FW_IQ_CMD_VFN_V(vf)); + c.alloc_to_len16 = cpu_to_be32(FW_IQ_CMD_IQSTOP_F | FW_LEN16(c)); + c.type_to_iqandstindex = cpu_to_be32(FW_IQ_CMD_TYPE_V(iqtype)); + c.iqid = cpu_to_be16(iqid); + c.fl0id = cpu_to_be16(fl0id); + c.fl1id = cpu_to_be16(fl1id); + return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); +} + +/** * t4_iq_free - free an ingress queue and its FLs * @adap: the adapter * @mbox: mailbox to use for the FW command @@ -7046,52 +7132,122 @@ int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, } /** - * t4_handle_fw_rpl - process a FW reply message + * t4_link_down_rc_str - return a string for a Link Down Reason Code * @adap: the adapter + * @link_down_rc: Link Down Reason Code + * + * Returns a string representation of the Link Down Reason Code. + */ +static const char *t4_link_down_rc_str(unsigned char link_down_rc) +{ + static const char * const reason[] = { + "Link Down", + "Remote Fault", + "Auto-negotiation Failure", + "Reserved", + "Insufficient Airflow", + "Unable To Determine Reason", + "No RX Signal Detected", + "Reserved", + }; + + if (link_down_rc >= ARRAY_SIZE(reason)) + return "Bad Reason Code"; + + return reason[link_down_rc]; +} + +/** + * t4_handle_get_port_info - process a FW reply message + * @pi: the port info * @rpl: start of the FW message * - * Processes a FW message, such as link state change messages. + * Processes a GET_PORT_INFO FW reply message. + */ +void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) +{ + const struct fw_port_cmd *p = (const void *)rpl; + struct adapter *adap = pi->adapter; + + /* link/module state change message */ + int speed = 0, fc = 0; + struct link_config *lc; + u32 stat = be32_to_cpu(p->u.info.lstatus_to_modtype); + int link_ok = (stat & FW_PORT_CMD_LSTATUS_F) != 0; + u32 mod = FW_PORT_CMD_MODTYPE_G(stat); + + if (stat & FW_PORT_CMD_RXPAUSE_F) + fc |= PAUSE_RX; + if (stat & FW_PORT_CMD_TXPAUSE_F) + fc |= PAUSE_TX; + if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100M)) + speed = 100; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_1G)) + speed = 1000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G)) + speed = 10000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G)) + speed = 40000; + + lc = &pi->link_cfg; + + if (mod != pi->mod_type) { + pi->mod_type = mod; + t4_os_portmod_changed(adap, pi->port_id); + } + if (link_ok != lc->link_ok || speed != lc->speed || + fc != lc->fc) { /* something changed */ + if (!link_ok && lc->link_ok) { + unsigned char rc = FW_PORT_CMD_LINKDNRC_G(stat); + + lc->link_down_rc = rc; + dev_warn(adap->pdev_dev, + "Port %d link down, reason: %s\n", + pi->port_id, t4_link_down_rc_str(rc)); + } + lc->link_ok = link_ok; + lc->speed = speed; + lc->fc = fc; + lc->supported = be16_to_cpu(p->u.info.pcap); + t4_os_link_changed(adap, pi->port_id, link_ok); + } +} + +/** + * t4_handle_fw_rpl - process a FW reply message + * @adap: the adapter + * @rpl: start of the FW message + * + * Processes a FW message, such as link state change messages. */ int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl) { u8 opcode = *(const u8 *)rpl; - if (opcode == FW_PORT_CMD) { /* link/module state change message */ - int speed = 0, fc = 0; - const struct fw_port_cmd *p = (void *)rpl; + /* This might be a port command ... this simplifies the following + * conditionals ... We can get away with pre-dereferencing + * action_to_len16 because it's in the first 16 bytes and all messages + * will be at least that long. + */ + const struct fw_port_cmd *p = (const void *)rpl; + unsigned int action = + FW_PORT_CMD_ACTION_G(be32_to_cpu(p->action_to_len16)); + + if (opcode == FW_PORT_CMD && action == FW_PORT_ACTION_GET_PORT_INFO) { + int i; int chan = FW_PORT_CMD_PORTID_G(be32_to_cpu(p->op_to_portid)); - int port = adap->chan_map[chan]; - struct port_info *pi = adap2pinfo(adap, port); - struct link_config *lc = &pi->link_cfg; - u32 stat = be32_to_cpu(p->u.info.lstatus_to_modtype); - int link_ok = (stat & FW_PORT_CMD_LSTATUS_F) != 0; - u32 mod = FW_PORT_CMD_MODTYPE_G(stat); - - if (stat & FW_PORT_CMD_RXPAUSE_F) - fc |= PAUSE_RX; - if (stat & FW_PORT_CMD_TXPAUSE_F) - fc |= PAUSE_TX; - if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100M)) - speed = 100; - else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_1G)) - speed = 1000; - else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G)) - speed = 10000; - else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G)) - speed = 40000; - - if (link_ok != lc->link_ok || speed != lc->speed || - fc != lc->fc) { /* something changed */ - lc->link_ok = link_ok; - lc->speed = speed; - lc->fc = fc; - lc->supported = be16_to_cpu(p->u.info.pcap); - t4_os_link_changed(adap, port, link_ok); - } - if (mod != pi->mod_type) { - pi->mod_type = mod; - t4_os_portmod_changed(adap, port); + struct port_info *pi = NULL; + + for_each_port(adap, i) { + pi = adap2pinfo(adap, i); + if (pi->tx_chan == chan) + break; } + + t4_handle_get_port_info(pi, rpl); + } else { + dev_warn(adap->pdev_dev, "Unknown firmware reply %d\n", opcode); + return -EINVAL; } return 0; } @@ -7611,61 +7767,74 @@ int t4_init_rss_mode(struct adapter *adap, int mbox) return 0; } -int t4_port_init(struct adapter *adap, int mbox, int pf, int vf) +/** + * t4_init_portinfo - allocate a virtual interface amd initialize port_info + * @pi: the port_info + * @mbox: mailbox to use for the FW command + * @port: physical port associated with the VI + * @pf: the PF owning the VI + * @vf: the VF owning the VI + * @mac: the MAC address of the VI + * + * Allocates a virtual interface for the given physical port. If @mac is + * not %NULL it contains the MAC address of the VI as assigned by FW. + * @mac should be large enough to hold an Ethernet address. + * Returns < 0 on error. + */ +int t4_init_portinfo(struct port_info *pi, int mbox, + int port, int pf, int vf, u8 mac[]) { - u8 addr[6]; - int ret, i, j = 0; + int ret; struct fw_port_cmd c; - struct fw_rss_vi_config_cmd rvc; + unsigned int rss_size; memset(&c, 0, sizeof(c)); - memset(&rvc, 0, sizeof(rvc)); + c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F | + FW_PORT_CMD_PORTID_V(port)); + c.action_to_len16 = cpu_to_be32( + FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_GET_PORT_INFO) | + FW_LEN16(c)); + ret = t4_wr_mbox(pi->adapter, mbox, &c, sizeof(c), &c); + if (ret) + return ret; + + ret = t4_alloc_vi(pi->adapter, mbox, port, pf, vf, 1, mac, &rss_size); + if (ret < 0) + return ret; + + pi->viid = ret; + pi->tx_chan = port; + pi->lport = port; + pi->rss_size = rss_size; + + ret = be32_to_cpu(c.u.info.lstatus_to_modtype); + pi->mdio_addr = (ret & FW_PORT_CMD_MDIOCAP_F) ? + FW_PORT_CMD_MDIOADDR_G(ret) : -1; + pi->port_type = FW_PORT_CMD_PTYPE_G(ret); + pi->mod_type = FW_PORT_MOD_TYPE_NA; + + init_link_config(&pi->link_cfg, be16_to_cpu(c.u.info.pcap)); + return 0; +} + +int t4_port_init(struct adapter *adap, int mbox, int pf, int vf) +{ + u8 addr[6]; + int ret, i, j = 0; for_each_port(adap, i) { - unsigned int rss_size; - struct port_info *p = adap2pinfo(adap, i); + struct port_info *pi = adap2pinfo(adap, i); while ((adap->params.portvec & (1 << j)) == 0) j++; - c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) | - FW_CMD_REQUEST_F | FW_CMD_READ_F | - FW_PORT_CMD_PORTID_V(j)); - c.action_to_len16 = cpu_to_be32( - FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_GET_PORT_INFO) | - FW_LEN16(c)); - ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); + ret = t4_init_portinfo(pi, mbox, j, pf, vf, addr); if (ret) return ret; - ret = t4_alloc_vi(adap, mbox, j, pf, vf, 1, addr, &rss_size); - if (ret < 0) - return ret; - - p->viid = ret; - p->tx_chan = j; - p->lport = j; - p->rss_size = rss_size; memcpy(adap->port[i]->dev_addr, addr, ETH_ALEN); adap->port[i]->dev_port = j; - - ret = be32_to_cpu(c.u.info.lstatus_to_modtype); - p->mdio_addr = (ret & FW_PORT_CMD_MDIOCAP_F) ? - FW_PORT_CMD_MDIOADDR_G(ret) : -1; - p->port_type = FW_PORT_CMD_PTYPE_G(ret); - p->mod_type = FW_PORT_MOD_TYPE_NA; - - rvc.op_to_viid = - cpu_to_be32(FW_CMD_OP_V(FW_RSS_VI_CONFIG_CMD) | - FW_CMD_REQUEST_F | FW_CMD_READ_F | - FW_RSS_VI_CONFIG_CMD_VIID(p->viid)); - rvc.retval_len16 = cpu_to_be32(FW_LEN16(rvc)); - ret = t4_wr_mbox(adap, mbox, &rvc, sizeof(rvc), &rvc); - if (ret) - return ret; - p->rss_mode = be32_to_cpu(rvc.u.basicvirtual.defaultq_to_udpen); - - init_link_config(&p->link_cfg, be16_to_cpu(c.u.info.pcap)); j++; } return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 2fc60e83a7a1..7f59ca458431 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -220,6 +220,13 @@ enum { FLASH_FW_START = FLASH_START(FLASH_FW_START_SEC), FLASH_FW_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FW_NSECS), + /* Location of bootstrap firmware image in FLASH. + */ + FLASH_FWBOOTSTRAP_START_SEC = 27, + FLASH_FWBOOTSTRAP_NSECS = 1, + FLASH_FWBOOTSTRAP_START = FLASH_START(FLASH_FWBOOTSTRAP_START_SEC), + FLASH_FWBOOTSTRAP_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FWBOOTSTRAP_NSECS), + /* * iSCSI persistent/crash information. */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 7ad6d4e75b2a..392d6644fdd8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -2510,6 +2510,11 @@ struct fw_port_cmd { #define FW_PORT_CMD_PTYPE_G(x) \ (((x) >> FW_PORT_CMD_PTYPE_S) & FW_PORT_CMD_PTYPE_M) +#define FW_PORT_CMD_LINKDNRC_S 5 +#define FW_PORT_CMD_LINKDNRC_M 0x7 +#define FW_PORT_CMD_LINKDNRC_G(x) \ + (((x) >> FW_PORT_CMD_LINKDNRC_S) & FW_PORT_CMD_LINKDNRC_M) + #define FW_PORT_CMD_MODTYPE_S 0 #define FW_PORT_CMD_MODTYPE_M 0x1f #define FW_PORT_CMD_MODTYPE_V(x) ((x) << FW_PORT_CMD_MODTYPE_S) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h index 4a707c32d76f..734dd776c22f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h @@ -387,6 +387,10 @@ struct adapter { /* various locks */ spinlock_t stats_lock; + /* support for mailbox command/reply logging */ +#define T4VF_OS_LOG_MBOX_CMDS 256 + struct mbox_cmd_log *mbox_log; + /* list of MAC addresses in MPS Hash */ struct list_head mac_hlist; }; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 730fec73d5a6..04fc6f6d1e25 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1704,6 +1704,105 @@ static const struct ethtool_ops cxgb4vf_ethtool_ops = { */ /* + * Show Firmware Mailbox Command/Reply Log + * + * Note that we don't do any locking when dumping the Firmware Mailbox Log so + * it's possible that we can catch things during a log update and therefore + * see partially corrupted log entries. But i9t's probably Good Enough(tm). + * If we ever decide that we want to make sure that we're dumping a coherent + * log, we'd need to perform locking in the mailbox logging and in + * mboxlog_open() where we'd need to grab the entire mailbox log in one go + * like we do for the Firmware Device Log. But as stated above, meh ... + */ +static int mboxlog_show(struct seq_file *seq, void *v) +{ + struct adapter *adapter = seq->private; + struct mbox_cmd_log *log = adapter->mbox_log; + struct mbox_cmd *entry; + int entry_idx, i; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, + "%10s %15s %5s %5s %s\n", + "Seq#", "Tstamp", "Atime", "Etime", + "Command/Reply"); + return 0; + } + + entry_idx = log->cursor + ((uintptr_t)v - 2); + if (entry_idx >= log->size) + entry_idx -= log->size; + entry = mbox_cmd_log_entry(log, entry_idx); + + /* skip over unused entries */ + if (entry->timestamp == 0) + return 0; + + seq_printf(seq, "%10u %15llu %5d %5d", + entry->seqno, entry->timestamp, + entry->access, entry->execute); + for (i = 0; i < MBOX_LEN / 8; i++) { + u64 flit = entry->cmd[i]; + u32 hi = (u32)(flit >> 32); + u32 lo = (u32)flit; + + seq_printf(seq, " %08x %08x", hi, lo); + } + seq_puts(seq, "\n"); + return 0; +} + +static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos) +{ + struct adapter *adapter = seq->private; + struct mbox_cmd_log *log = adapter->mbox_log; + + return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL); +} + +static void *mboxlog_start(struct seq_file *seq, loff_t *pos) +{ + return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN; +} + +static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return mboxlog_get_idx(seq, *pos); +} + +static void mboxlog_stop(struct seq_file *seq, void *v) +{ +} + +static const struct seq_operations mboxlog_seq_ops = { + .start = mboxlog_start, + .next = mboxlog_next, + .stop = mboxlog_stop, + .show = mboxlog_show +}; + +static int mboxlog_open(struct inode *inode, struct file *file) +{ + int res = seq_open(file, &mboxlog_seq_ops); + + if (!res) { + struct seq_file *seq = file->private_data; + + seq->private = inode->i_private; + } + return res; +} + +static const struct file_operations mboxlog_fops = { + .owner = THIS_MODULE, + .open = mboxlog_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* * Show SGE Queue Set information. We display QPL Queues Sets per line. */ #define QPL 4 @@ -2122,6 +2221,7 @@ struct cxgb4vf_debugfs_entry { }; static struct cxgb4vf_debugfs_entry debugfs_files[] = { + { "mboxlog", S_IRUGO, &mboxlog_fops }, { "sge_qinfo", S_IRUGO, &sge_qinfo_debugfs_fops }, { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops }, { "resources", S_IRUGO, &resources_proc_fops }, @@ -2664,6 +2764,16 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, adapter->pdev = pdev; adapter->pdev_dev = &pdev->dev; + adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + + (sizeof(struct mbox_cmd) * + T4VF_OS_LOG_MBOX_CMDS), + GFP_KERNEL); + if (!adapter->mbox_log) { + err = -ENOMEM; + goto err_free_adapter; + } + adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS; + /* * Initialize SMP data synchronization resources. */ @@ -2913,6 +3023,7 @@ err_unmap_bar0: iounmap(adapter->regs); err_free_adapter: + kfree(adapter->mbox_log); kfree(adapter); err_release_regions: @@ -2982,6 +3093,7 @@ static void cxgb4vf_pci_remove(struct pci_dev *pdev) iounmap(adapter->regs); if (!is_t4(adapter->params.chip)) iounmap(adapter->bar2); + kfree(adapter->mbox_log); kfree(adapter); } diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 9b40a85cc1e4..438374a05791 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -36,6 +36,7 @@ #ifndef __T4VF_COMMON_H__ #define __T4VF_COMMON_H__ +#include "../cxgb4/t4_hw.h" #include "../cxgb4/t4fw_api.h" #define CHELSIO_CHIP_CODE(version, revision) (((version) << 4) | (revision)) @@ -227,6 +228,34 @@ struct adapter_params { u8 nports; /* # of Ethernet "ports" */ }; +/* Firmware Mailbox Command/Reply log. All values are in Host-Endian format. + * The access and execute times are signed in order to accommodate negative + * error returns. + */ +struct mbox_cmd { + u64 cmd[MBOX_LEN / 8]; /* a Firmware Mailbox Command/Reply */ + u64 timestamp; /* OS-dependent timestamp */ + u32 seqno; /* sequence number */ + s16 access; /* time (ms) to access mailbox */ + s16 execute; /* time (ms) to execute */ +}; + +struct mbox_cmd_log { + unsigned int size; /* number of entries in the log */ + unsigned int cursor; /* next position in the log to write */ + u32 seqno; /* next sequence number */ + /* variable length mailbox command log starts here */ +}; + +/* Given a pointer to a Firmware Mailbox Command Log and a log entry index, + * return a pointer to the specified entry. + */ +static inline struct mbox_cmd *mbox_cmd_log_entry(struct mbox_cmd_log *log, + unsigned int entry_idx) +{ + return &((struct mbox_cmd *)&(log)[1])[entry_idx]; +} + #include "adapter.h" #ifndef PCI_VENDOR_ID_CHELSIO diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index fed83d88fc4e..955ff7c61f1b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -76,21 +76,33 @@ static void get_mbox_rpl(struct adapter *adapter, __be64 *rpl, int size, *rpl++ = cpu_to_be64(t4_read_reg64(adapter, mbox_data)); } -/* - * Dump contents of mailbox with a leading tag. +/** + * t4vf_record_mbox - record a Firmware Mailbox Command/Reply in the log + * @adapter: the adapter + * @cmd: the Firmware Mailbox Command or Reply + * @size: command length in bytes + * @access: the time (ms) needed to access the Firmware Mailbox + * @execute: the time (ms) the command spent being executed */ -static void dump_mbox(struct adapter *adapter, const char *tag, u32 mbox_data) +static void t4vf_record_mbox(struct adapter *adapter, const __be64 *cmd, + int size, int access, int execute) { - dev_err(adapter->pdev_dev, - "mbox %s: %llx %llx %llx %llx %llx %llx %llx %llx\n", tag, - (unsigned long long)t4_read_reg64(adapter, mbox_data + 0), - (unsigned long long)t4_read_reg64(adapter, mbox_data + 8), - (unsigned long long)t4_read_reg64(adapter, mbox_data + 16), - (unsigned long long)t4_read_reg64(adapter, mbox_data + 24), - (unsigned long long)t4_read_reg64(adapter, mbox_data + 32), - (unsigned long long)t4_read_reg64(adapter, mbox_data + 40), - (unsigned long long)t4_read_reg64(adapter, mbox_data + 48), - (unsigned long long)t4_read_reg64(adapter, mbox_data + 56)); + struct mbox_cmd_log *log = adapter->mbox_log; + struct mbox_cmd *entry; + int i; + + entry = mbox_cmd_log_entry(log, log->cursor++); + if (log->cursor == log->size) + log->cursor = 0; + + for (i = 0; i < size / 8; i++) + entry->cmd[i] = be64_to_cpu(cmd[i]); + while (i < MBOX_LEN / 8) + entry->cmd[i++] = 0; + entry->timestamp = jiffies; + entry->seqno = log->seqno++; + entry->access = access; + entry->execute = execute; } /** @@ -120,10 +132,13 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size, 1, 1, 3, 5, 10, 10, 20, 50, 100 }; + u16 access = 0, execute = 0; u32 v, mbox_data; - int i, ms, delay_idx; + int i, ms, delay_idx, ret; const __be64 *p; u32 mbox_ctl = T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL; + u32 cmd_op = FW_CMD_OP_G(be32_to_cpu(((struct fw_cmd_hdr *)cmd)->hi)); + __be64 cmd_rpl[MBOX_LEN / 8]; /* In T6, mailbox size is changed to 128 bytes to avoid * invalidating the entire prefetch buffer. @@ -148,8 +163,11 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size, v = MBOWNER_G(t4_read_reg(adapter, mbox_ctl)); for (i = 0; v == MBOX_OWNER_NONE && i < 3; i++) v = MBOWNER_G(t4_read_reg(adapter, mbox_ctl)); - if (v != MBOX_OWNER_DRV) - return v == MBOX_OWNER_FW ? -EBUSY : -ETIMEDOUT; + if (v != MBOX_OWNER_DRV) { + ret = (v == MBOX_OWNER_FW) ? -EBUSY : -ETIMEDOUT; + t4vf_record_mbox(adapter, cmd, size, access, ret); + return ret; + } /* * Write the command array into the Mailbox Data register array and @@ -164,6 +182,8 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size, * Data registers before doing the write to the VF Mailbox Control * register. */ + if (cmd_op != FW_VI_STATS_CMD) + t4vf_record_mbox(adapter, cmd, size, access, 0); for (i = 0, p = cmd; i < size; i += 8) t4_write_reg64(adapter, mbox_data + i, be64_to_cpu(*p++)); t4_read_reg(adapter, mbox_data); /* flush write */ @@ -209,31 +229,33 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size, * We return the (negated) firmware command return * code (this depends on FW_SUCCESS == 0). */ + get_mbox_rpl(adapter, cmd_rpl, size, mbox_data); /* return value in low-order little-endian word */ - v = t4_read_reg(adapter, mbox_data); - if (FW_CMD_RETVAL_G(v)) - dump_mbox(adapter, "FW Error", mbox_data); + v = be64_to_cpu(cmd_rpl[0]); if (rpl) { /* request bit in high-order BE word */ WARN_ON((be32_to_cpu(*(const __be32 *)cmd) & FW_CMD_REQUEST_F) == 0); - get_mbox_rpl(adapter, rpl, size, mbox_data); + memcpy(rpl, cmd_rpl, size); WARN_ON((be32_to_cpu(*(__be32 *)rpl) & FW_CMD_REQUEST_F) != 0); } t4_write_reg(adapter, mbox_ctl, MBOWNER_V(MBOX_OWNER_NONE)); + execute = i + ms; + if (cmd_op != FW_VI_STATS_CMD) + t4vf_record_mbox(adapter, cmd_rpl, size, access, + execute); return -FW_CMD_RETVAL_G(v); } } - /* - * We timed out. Return the error ... - */ - dump_mbox(adapter, "FW Timeout", mbox_data); - return -ETIMEDOUT; + /* We timed out. Return the error ... */ + ret = -ETIMEDOUT; + t4vf_record_mbox(adapter, cmd, size, access, ret); + return ret; } #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index b2182d3ba3cc..f15560a06718 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2740,6 +2740,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_RXCSUM; netdev->features |= netdev->hw_features; + netdev->vlan_features |= netdev->features; #ifdef CONFIG_RFS_ACCEL netdev->hw_features |= NETIF_F_NTUPLE; diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 3acde3b9b767..d88fbab378aa 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -1465,7 +1465,7 @@ de4x5_queue_pkt(struct sk_buff *skb, struct net_device *dev) netif_stop_queue(dev); if (!lp->tx_enable) /* Cannot send for now */ - return NETDEV_TX_LOCKED; + goto tx_err; /* ** Clean out the TX ring asynchronously to interrupts - sometimes the @@ -1478,7 +1478,7 @@ de4x5_queue_pkt(struct sk_buff *skb, struct net_device *dev) /* Test if cache is already locked - requeue skb if so */ if (test_and_set_bit(0, (void *)&lp->cache.lock) && !lp->interrupt) - return NETDEV_TX_LOCKED; + goto tx_err; /* Transmit descriptor ring full or stale skb */ if (netif_queue_stopped(dev) || (u_long) lp->tx_skb[lp->tx_new] > 1) { @@ -1519,6 +1519,9 @@ de4x5_queue_pkt(struct sk_buff *skb, struct net_device *dev) lp->cache.lock = 0; return NETDEV_TX_OK; +tx_err: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; } /* diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 536686476369..ed98ef1ecac3 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4890,11 +4890,13 @@ static int be_resume(struct be_adapter *adapter) if (status) return status; - if (netif_running(netdev)) { + rtnl_lock(); + if (netif_running(netdev)) status = be_open(netdev); - if (status) - return status; - } + rtnl_unlock(); + + if (status) + return status; netif_device_attach(netdev); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 08243c2ff4b4..bfa10c3da35f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2058,8 +2058,8 @@ static void fec_enet_mii_remove(struct fec_enet_private *fep) } } -static int fec_enet_get_settings(struct net_device *ndev, - struct ethtool_cmd *cmd) +static int fec_enet_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct fec_enet_private *fep = netdev_priv(ndev); struct phy_device *phydev = fep->phy_dev; @@ -2067,11 +2067,11 @@ static int fec_enet_get_settings(struct net_device *ndev, if (!phydev) return -ENODEV; - return phy_ethtool_gset(phydev, cmd); + return phy_ethtool_ksettings_get(phydev, cmd); } -static int fec_enet_set_settings(struct net_device *ndev, - struct ethtool_cmd *cmd) +static int fec_enet_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct fec_enet_private *fep = netdev_priv(ndev); struct phy_device *phydev = fep->phy_dev; @@ -2079,7 +2079,7 @@ static int fec_enet_set_settings(struct net_device *ndev, if (!phydev) return -ENODEV; - return phy_ethtool_sset(phydev, cmd); + return phy_ethtool_ksettings_set(phydev, cmd); } static void fec_enet_get_drvinfo(struct net_device *ndev, @@ -2562,8 +2562,6 @@ fec_enet_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) } static const struct ethtool_ops fec_enet_ethtool_ops = { - .get_settings = fec_enet_get_settings, - .set_settings = fec_enet_set_settings, .get_drvinfo = fec_enet_get_drvinfo, .get_regs_len = fec_enet_get_regs_len, .get_regs = fec_enet_get_regs, @@ -2583,6 +2581,8 @@ static const struct ethtool_ops fec_enet_ethtool_ops = { .set_tunable = fec_enet_set_tunable, .get_wol = fec_enet_get_wol, .set_wol = fec_enet_set_wol, + .get_link_ksettings = fec_enet_get_link_ksettings, + .set_link_ksettings = fec_enet_set_link_ksettings, }; static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 159142272afb..7a757e88c89a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -29,25 +29,6 @@ static struct hns_mac_cb *hns_get_mac_cb(struct hnae_handle *handle) return vf_cb->mac_cb; } -/** - * hns_ae_map_eport_to_dport - translate enet port id to dsaf port id - * @port_id: enet port id - *: debug port 0-1, service port 2 -7 (dsaf mode only 2) - * return: dsaf port id - *: service ports 0 - 5, debug port 6-7 - **/ -static int hns_ae_map_eport_to_dport(u32 port_id) -{ - int port_index; - - if (port_id < DSAF_DEBUG_NW_NUM) - port_index = port_id + DSAF_SERVICE_PORT_NUM_PER_DSAF; - else - port_index = port_id - DSAF_DEBUG_NW_NUM; - - return port_index; -} - static struct dsaf_device *hns_ae_get_dsaf_dev(struct hnae_ae_dev *dev) { return container_of(dev, struct dsaf_device, ae_dev); @@ -56,50 +37,35 @@ static struct dsaf_device *hns_ae_get_dsaf_dev(struct hnae_ae_dev *dev) static struct hns_ppe_cb *hns_get_ppe_cb(struct hnae_handle *handle) { int ppe_index; - int ppe_common_index; struct ppe_common_cb *ppe_comm; struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle); - if (vf_cb->port_index < DSAF_SERVICE_PORT_NUM_PER_DSAF) { - ppe_index = vf_cb->port_index; - ppe_common_index = 0; - } else { - ppe_index = 0; - ppe_common_index = - vf_cb->port_index - DSAF_SERVICE_PORT_NUM_PER_DSAF + 1; - } - ppe_comm = vf_cb->dsaf_dev->ppe_common[ppe_common_index]; + ppe_comm = vf_cb->dsaf_dev->ppe_common[0]; + ppe_index = vf_cb->port_index; + return &ppe_comm->ppe_cb[ppe_index]; } static int hns_ae_get_q_num_per_vf( struct dsaf_device *dsaf_dev, int port) { - int common_idx = hns_dsaf_get_comm_idx_by_port(port); - - return dsaf_dev->rcb_common[common_idx]->max_q_per_vf; + return dsaf_dev->rcb_common[0]->max_q_per_vf; } static int hns_ae_get_vf_num_per_port( struct dsaf_device *dsaf_dev, int port) { - int common_idx = hns_dsaf_get_comm_idx_by_port(port); - - return dsaf_dev->rcb_common[common_idx]->max_vfn; + return dsaf_dev->rcb_common[0]->max_vfn; } static struct ring_pair_cb *hns_ae_get_base_ring_pair( struct dsaf_device *dsaf_dev, int port) { - int common_idx = hns_dsaf_get_comm_idx_by_port(port); - struct rcb_common_cb *rcb_comm = dsaf_dev->rcb_common[common_idx]; + struct rcb_common_cb *rcb_comm = dsaf_dev->rcb_common[0]; int q_num = rcb_comm->max_q_per_vf; int vf_num = rcb_comm->max_vfn; - if (common_idx == HNS_DSAF_COMM_SERVICE_NW_IDX) - return &rcb_comm->ring_pair_cb[port * q_num * vf_num]; - else - return &rcb_comm->ring_pair_cb[0]; + return &rcb_comm->ring_pair_cb[port * q_num * vf_num]; } static struct ring_pair_cb *hns_ae_get_ring_pair(struct hnae_queue *q) @@ -110,7 +76,6 @@ static struct ring_pair_cb *hns_ae_get_ring_pair(struct hnae_queue *q) struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev, u32 port_id) { - int port_idx; int vfnum_per_port; int qnum_per_vf; int i; @@ -120,11 +85,10 @@ struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev, struct hnae_vf_cb *vf_cb; dsaf_dev = hns_ae_get_dsaf_dev(dev); - port_idx = hns_ae_map_eport_to_dport(port_id); - ring_pair_cb = hns_ae_get_base_ring_pair(dsaf_dev, port_idx); - vfnum_per_port = hns_ae_get_vf_num_per_port(dsaf_dev, port_idx); - qnum_per_vf = hns_ae_get_q_num_per_vf(dsaf_dev, port_idx); + ring_pair_cb = hns_ae_get_base_ring_pair(dsaf_dev, port_id); + vfnum_per_port = hns_ae_get_vf_num_per_port(dsaf_dev, port_id); + qnum_per_vf = hns_ae_get_q_num_per_vf(dsaf_dev, port_id); vf_cb = kzalloc(sizeof(*vf_cb) + qnum_per_vf * sizeof(struct hnae_queue *), GFP_KERNEL); @@ -163,14 +127,14 @@ struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev, } vf_cb->dsaf_dev = dsaf_dev; - vf_cb->port_index = port_idx; - vf_cb->mac_cb = &dsaf_dev->mac_cb[port_idx]; + vf_cb->port_index = port_id; + vf_cb->mac_cb = dsaf_dev->mac_cb[port_id]; ae_handle->phy_if = vf_cb->mac_cb->phy_if; ae_handle->phy_node = vf_cb->mac_cb->phy_node; ae_handle->if_support = vf_cb->mac_cb->if_support; ae_handle->port_type = vf_cb->mac_cb->mac_type; - ae_handle->dport_id = port_idx; + ae_handle->dport_id = port_id; return ae_handle; vf_id_err: @@ -320,11 +284,8 @@ static void hns_ae_reset(struct hnae_handle *handle) struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle); if (vf_cb->mac_cb->mac_type == HNAE_PORT_DEBUG) { - u8 ppe_common_index = - vf_cb->port_index - DSAF_SERVICE_PORT_NUM_PER_DSAF + 1; - hns_mac_reset(vf_cb->mac_cb); - hns_ppe_reset_common(vf_cb->dsaf_dev, ppe_common_index); + hns_ppe_reset_common(vf_cb->dsaf_dev, 0); } } @@ -703,7 +664,7 @@ void hns_ae_update_led_status(struct hnae_handle *handle) assert(handle); mac_cb = hns_get_mac_cb(handle); - if (!mac_cb->cpld_vaddr) + if (!mac_cb->cpld_ctrl) return; hns_set_led_opt(mac_cb); } @@ -723,7 +684,6 @@ int hns_ae_cpld_set_led_id(struct hnae_handle *handle, void hns_ae_get_regs(struct hnae_handle *handle, void *data) { u32 *p = data; - u32 rcb_com_idx; int i; struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle); struct hns_ppe_cb *ppe_cb = hns_get_ppe_cb(handle); @@ -731,8 +691,7 @@ void hns_ae_get_regs(struct hnae_handle *handle, void *data) hns_ppe_get_regs(ppe_cb, p); p += hns_ppe_get_regs_count(); - rcb_com_idx = hns_dsaf_get_comm_idx_by_port(vf_cb->port_index); - hns_rcb_get_common_regs(vf_cb->dsaf_dev->rcb_common[rcb_com_idx], p); + hns_rcb_get_common_regs(vf_cb->dsaf_dev->rcb_common[0], p); p += hns_rcb_get_common_regs_count(); for (i = 0; i < handle->q_num; i++) { diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 10c367d20955..611581fccf2a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -7,18 +7,19 @@ * (at your option) any later version. */ -#include <linux/module.h> -#include <linux/kernel.h> #include <linux/init.h> -#include <linux/netdevice.h> -#include <linux/phy_fixed.h> #include <linux/interrupt.h> -#include <linux/platform_device.h> +#include <linux/kernel.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/netdevice.h> #include <linux/of.h> #include <linux/of_address.h> +#include <linux/phy_fixed.h> +#include <linux/platform_device.h> -#include "hns_dsaf_misc.h" #include "hns_dsaf_main.h" +#include "hns_dsaf_misc.h" #include "hns_dsaf_rcb.h" #define MAC_EN_FLAG_V 0xada0328 @@ -81,17 +82,6 @@ static enum mac_mode hns_get_enet_interface(const struct hns_mac_cb *mac_cb) } } -int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt) -{ - if (!mac_cb->cpld_vaddr) - return -ENODEV; - - *sfp_prsnt = !dsaf_read_b((u8 *)mac_cb->cpld_vaddr - + MAC_SFP_PORT_OFFSET); - - return 0; -} - void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status) { struct mac_driver *mac_ctrl_drv; @@ -168,10 +158,9 @@ static int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, u8 vmid, u8 *port_num) { u8 tmp_port; - u32 comm_idx; if (mac_cb->dsaf_dev->dsaf_mode <= DSAF_MODE_ENABLE) { - if (mac_cb->mac_id != DSAF_MAX_PORT_NUM_PER_CHIP) { + if (mac_cb->mac_id != DSAF_MAX_PORT_NUM) { dev_err(mac_cb->dev, "input invalid,%s mac%d vmid%d !\n", mac_cb->dsaf_dev->ae_dev.name, @@ -179,7 +168,7 @@ static int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, return -EINVAL; } } else if (mac_cb->dsaf_dev->dsaf_mode < DSAF_MODE_MAX) { - if (mac_cb->mac_id >= DSAF_MAX_PORT_NUM_PER_CHIP) { + if (mac_cb->mac_id >= DSAF_MAX_PORT_NUM) { dev_err(mac_cb->dev, "input invalid,%s mac%d vmid%d!\n", mac_cb->dsaf_dev->ae_dev.name, @@ -192,9 +181,7 @@ static int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, return -EINVAL; } - comm_idx = hns_dsaf_get_comm_idx_by_port(mac_cb->mac_id); - - if (vmid >= mac_cb->dsaf_dev->rcb_common[comm_idx]->max_vfn) { + if (vmid >= mac_cb->dsaf_dev->rcb_common[0]->max_vfn) { dev_err(mac_cb->dev, "input invalid,%s mac%d vmid%d !\n", mac_cb->dsaf_dev->ae_dev.name, mac_cb->mac_id, vmid); return -EINVAL; @@ -234,7 +221,7 @@ static int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, } /** - *hns_mac_get_inner_port_num - change vf mac address + *hns_mac_change_vf_addr - change vf mac address *@mac_cb: mac device *@vmid: vmid *@addr:mac address @@ -249,7 +236,7 @@ int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, struct mac_entry_idx *old_entry; old_entry = &mac_cb->addr_entry_idx[vmid]; - if (dsaf_dev) { + if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr)); mac_entry.in_vlan_id = old_entry->vlan_id; mac_entry.in_port_num = mac_cb->mac_id; @@ -289,7 +276,7 @@ int hns_mac_set_multi(struct hns_mac_cb *mac_cb, struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; struct dsaf_drv_mac_single_dest_entry mac_entry; - if (dsaf_dev && addr) { + if (!HNS_DSAF_IS_DEBUG(dsaf_dev) && addr) { memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr)); mac_entry.in_vlan_id = 0;/*vlan_id;*/ mac_entry.in_port_num = mac_cb->mac_id; @@ -380,7 +367,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb, if (mac_cb->mac_type == HNAE_PORT_DEBUG) return 0; - if (dsaf_dev) { + if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr)); mac_entry.in_vlan_id = vlan_id; mac_entry.in_port_num = mac_cb->mac_id; @@ -418,7 +405,7 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable) uc_mac_entry = &mac_cb->addr_entry_idx[vmid]; - if (dsaf_dev) { + if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr)); mac_entry.in_vlan_id = uc_mac_entry->vlan_id; mac_entry.in_port_num = mac_cb->mac_id; @@ -651,14 +638,18 @@ free_mac_drv: } /** - *mac_free_dev - get mac information from device node + *hns_mac_get_info - get mac information from device node *@mac_cb: mac device *@np:device node - *@mac_mode_idx:mac mode index + * return: 0 --success, negative --fail */ -static void hns_mac_get_info(struct hns_mac_cb *mac_cb, - struct device_node *np, u32 mac_mode_idx) +static int hns_mac_get_info(struct hns_mac_cb *mac_cb) { + struct device_node *np = mac_cb->dev->of_node; + struct regmap *syscon; + struct of_phandle_args cpld_args; + u32 ret; + mac_cb->link = false; mac_cb->half_duplex = false; mac_cb->speed = mac_phy_to_speed[mac_cb->phy_if]; @@ -674,12 +665,73 @@ static void hns_mac_get_info(struct hns_mac_cb *mac_cb, mac_cb->max_frm = MAC_DEFAULT_MTU; mac_cb->tx_pause_frm_time = MAC_DEFAULT_PAUSE_TIME; - - /* Get the rest of the PHY information */ - mac_cb->phy_node = of_parse_phandle(np, "phy-handle", mac_cb->mac_id); + mac_cb->port_rst_off = mac_cb->mac_id; + mac_cb->port_mode_off = 0; + + /* if the dsaf node doesn't contain a port subnode, get phy-handle + * from dsaf node + */ + if (!mac_cb->fw_port) { + mac_cb->phy_node = of_parse_phandle(np, "phy-handle", + mac_cb->mac_id); + if (mac_cb->phy_node) + dev_dbg(mac_cb->dev, "mac%d phy_node: %s\n", + mac_cb->mac_id, mac_cb->phy_node->name); + return 0; + } + if (!is_of_node(mac_cb->fw_port)) + return -EINVAL; + /* parse property from port subnode in dsaf */ + mac_cb->phy_node = of_parse_phandle(to_of_node(mac_cb->fw_port), + "phy-handle", 0); if (mac_cb->phy_node) dev_dbg(mac_cb->dev, "mac%d phy_node: %s\n", mac_cb->mac_id, mac_cb->phy_node->name); + syscon = syscon_node_to_regmap( + of_parse_phandle(to_of_node(mac_cb->fw_port), + "serdes-syscon", 0)); + if (IS_ERR_OR_NULL(syscon)) { + dev_err(mac_cb->dev, "serdes-syscon is needed!\n"); + return -EINVAL; + } + mac_cb->serdes_ctrl = syscon; + + ret = fwnode_property_read_u32(mac_cb->fw_port, + "port-rst-offset", + &mac_cb->port_rst_off); + if (ret) { + dev_dbg(mac_cb->dev, + "mac%d port-rst-offset not found, use default value.\n", + mac_cb->mac_id); + } + + ret = fwnode_property_read_u32(mac_cb->fw_port, + "port-mode-offset", + &mac_cb->port_mode_off); + if (ret) { + dev_dbg(mac_cb->dev, + "mac%d port-mode-offset not found, use default value.\n", + mac_cb->mac_id); + } + + ret = of_parse_phandle_with_fixed_args(to_of_node(mac_cb->fw_port), + "cpld-syscon", 1, 0, &cpld_args); + if (ret) { + dev_dbg(mac_cb->dev, "mac%d no cpld-syscon found.\n", + mac_cb->mac_id); + mac_cb->cpld_ctrl = NULL; + } else { + syscon = syscon_node_to_regmap(cpld_args.np); + if (IS_ERR_OR_NULL(syscon)) { + dev_dbg(mac_cb->dev, "no cpld-syscon found!\n"); + mac_cb->cpld_ctrl = NULL; + } else { + mac_cb->cpld_ctrl = syscon; + mac_cb->cpld_ctrl_reg = cpld_args.args[0]; + } + } + + return 0; } /** @@ -709,40 +761,31 @@ u8 __iomem *hns_mac_get_vaddr(struct dsaf_device *dsaf_dev, return base + 0x40000 + mac_id * 0x4000 - mac_mode_idx * 0x20000; else - return mac_cb->serdes_vaddr + 0x1000 - + (mac_id - DSAF_SERVICE_PORT_NUM_PER_DSAF) * 0x100000; + return dsaf_dev->ppe_base + 0x1000; } /** * hns_mac_get_cfg - get mac cfg from dtb or acpi table * @dsaf_dev: dsa fabric device struct pointer - * @mac_idx: mac index - * retuen 0 - success , negative --fail + * @mac_cb: mac control block + * return 0 - success , negative --fail */ -int hns_mac_get_cfg(struct dsaf_device *dsaf_dev, int mac_idx) +int hns_mac_get_cfg(struct dsaf_device *dsaf_dev, struct hns_mac_cb *mac_cb) { int ret; u32 mac_mode_idx; - struct hns_mac_cb *mac_cb = &dsaf_dev->mac_cb[mac_idx]; mac_cb->dsaf_dev = dsaf_dev; mac_cb->dev = dsaf_dev->dev; - mac_cb->mac_id = mac_idx; mac_cb->sys_ctl_vaddr = dsaf_dev->sc_base; mac_cb->serdes_vaddr = dsaf_dev->sds_base; - if (dsaf_dev->cpld_base && - mac_idx < DSAF_SERVICE_PORT_NUM_PER_DSAF) { - mac_cb->cpld_vaddr = dsaf_dev->cpld_base + - mac_cb->mac_id * CPLD_ADDR_PORT_OFFSET; - cpld_led_reset(mac_cb); - } mac_cb->sfp_prsnt = 0; mac_cb->txpkt_for_led = 0; mac_cb->rxpkt_for_led = 0; - if (mac_idx < DSAF_SERVICE_PORT_NUM_PER_DSAF) + if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) mac_cb->mac_type = HNAE_PORT_SERVICE; else mac_cb->mac_type = HNAE_PORT_DEBUG; @@ -758,53 +801,100 @@ int hns_mac_get_cfg(struct dsaf_device *dsaf_dev, int mac_idx) } mac_mode_idx = (u32)ret; - hns_mac_get_info(mac_cb, mac_cb->dev->of_node, mac_mode_idx); + ret = hns_mac_get_info(mac_cb); + if (ret) + return ret; + cpld_led_reset(mac_cb); mac_cb->vaddr = hns_mac_get_vaddr(dsaf_dev, mac_cb, mac_mode_idx); return 0; } +static int hns_mac_get_max_port_num(struct dsaf_device *dsaf_dev) +{ + if (HNS_DSAF_IS_DEBUG(dsaf_dev)) + return 1; + else + return DSAF_MAX_PORT_NUM; +} + /** * hns_mac_init - init mac * @dsaf_dev: dsa fabric device struct pointer - * retuen 0 - success , negative --fail + * return 0 - success , negative --fail */ int hns_mac_init(struct dsaf_device *dsaf_dev) { - int i; + bool found = false; int ret; - size_t size; + u32 port_id; + int max_port_num = hns_mac_get_max_port_num(dsaf_dev); struct hns_mac_cb *mac_cb; + struct fwnode_handle *child; - size = sizeof(struct hns_mac_cb) * DSAF_MAX_PORT_NUM_PER_CHIP; - dsaf_dev->mac_cb = devm_kzalloc(dsaf_dev->dev, size, GFP_KERNEL); - if (!dsaf_dev->mac_cb) - return -ENOMEM; + device_for_each_child_node(dsaf_dev->dev, child) { + ret = fwnode_property_read_u32(child, "reg", &port_id); + if (ret) { + dev_err(dsaf_dev->dev, + "get reg fail, ret=%d!\n", ret); + return ret; + } + if (port_id >= max_port_num) { + dev_err(dsaf_dev->dev, + "reg(%u) out of range!\n", port_id); + return -EINVAL; + } + mac_cb = devm_kzalloc(dsaf_dev->dev, sizeof(*mac_cb), + GFP_KERNEL); + if (!mac_cb) + return -ENOMEM; + mac_cb->fw_port = child; + mac_cb->mac_id = (u8)port_id; + dsaf_dev->mac_cb[port_id] = mac_cb; + found = true; + } - for (i = 0; i < DSAF_MAX_PORT_NUM_PER_CHIP; i++) { - ret = hns_mac_get_cfg(dsaf_dev, i); - if (ret) - goto free_mac_cb; + /* if don't get any port subnode from dsaf node + * will init all port then, this is compatible with the old dts + */ + if (!found) { + for (port_id = 0; port_id < max_port_num; port_id++) { + mac_cb = devm_kzalloc(dsaf_dev->dev, sizeof(*mac_cb), + GFP_KERNEL); + if (!mac_cb) + return -ENOMEM; + + mac_cb->mac_id = port_id; + dsaf_dev->mac_cb[port_id] = mac_cb; + } + } + /* init mac_cb for all port */ + for (port_id = 0; port_id < max_port_num; port_id++) { + mac_cb = dsaf_dev->mac_cb[port_id]; + if (!mac_cb) + continue; - mac_cb = &dsaf_dev->mac_cb[i]; + ret = hns_mac_get_cfg(dsaf_dev, mac_cb); + if (ret) + return ret; ret = hns_mac_init_ex(mac_cb); if (ret) - goto free_mac_cb; + return ret; } return 0; - -free_mac_cb: - dsaf_dev->mac_cb = NULL; - - return ret; } void hns_mac_uninit(struct dsaf_device *dsaf_dev) { - cpld_led_reset(dsaf_dev->mac_cb); - dsaf_dev->mac_cb = NULL; + int i; + int max_port_num = hns_mac_get_max_port_num(dsaf_dev); + + for (i = 0; i < max_port_num; i++) { + cpld_led_reset(dsaf_dev->mac_cb[i]); + dsaf_dev->mac_cb[i] = NULL; + } } int hns_mac_config_mac_loopback(struct hns_mac_cb *mac_cb, @@ -892,7 +982,7 @@ void hns_set_led_opt(struct hns_mac_cb *mac_cb) int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb, enum hnae_led_state status) { - if (!mac_cb || !mac_cb->cpld_vaddr) + if (!mac_cb || !mac_cb->cpld_ctrl) return 0; return cpld_set_led_id(mac_cb, status); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index 823b6e78c8aa..97ce9a750aaf 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -10,9 +10,10 @@ #ifndef _HNS_DSAF_MAC_H #define _HNS_DSAF_MAC_H -#include <linux/phy.h> -#include <linux/kernel.h> #include <linux/if_vlan.h> +#include <linux/kernel.h> +#include <linux/phy.h> +#include <linux/regmap.h> #include "hns_dsaf_main.h" struct dsaf_device; @@ -310,10 +311,15 @@ struct hns_mac_cb { struct device *dev; struct dsaf_device *dsaf_dev; struct mac_priv priv; + struct fwnode_handle *fw_port; u8 __iomem *vaddr; - u8 __iomem *cpld_vaddr; u8 __iomem *sys_ctl_vaddr; u8 __iomem *serdes_vaddr; + struct regmap *serdes_ctrl; + struct regmap *cpld_ctrl; + u32 cpld_ctrl_reg; + u32 port_rst_off; + u32 port_mode_off; struct mac_entry_idx addr_entry_idx[DSAF_MAX_VM_NUM]; u8 sfp_prsnt; u8 cpld_led_value; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 8439f6d8e360..1c2ddb25e776 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -7,27 +7,29 @@ * (at your option) any later version. */ -#include <linux/module.h> -#include <linux/kernel.h> +#include <linux/device.h> #include <linux/init.h> #include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/module.h> #include <linux/netdevice.h> -#include <linux/platform_device.h> +#include <linux/mfd/syscon.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> -#include <linux/device.h> +#include <linux/platform_device.h> #include <linux/vmalloc.h> +#include "hns_dsaf_mac.h" #include "hns_dsaf_main.h" -#include "hns_dsaf_rcb.h" #include "hns_dsaf_ppe.h" -#include "hns_dsaf_mac.h" +#include "hns_dsaf_rcb.h" const char *g_dsaf_mode_match[DSAF_MODE_MAX] = { [DSAF_MODE_DISABLE_2PORT_64VM] = "2port-64vf", [DSAF_MODE_DISABLE_6PORT_0VM] = "6port-16rss", [DSAF_MODE_DISABLE_6PORT_16VM] = "6port-16vf", + [DSAF_MODE_DISABLE_SP] = "single-port", }; int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) @@ -35,8 +37,13 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) int ret, i; u32 desc_num; u32 buf_size; + u32 reset_offset = 0; + u32 res_idx = 0; const char *mode_str; + struct regmap *syscon; + struct resource *res; struct device_node *np = dsaf_dev->dev->of_node; + struct platform_device *pdev = to_platform_device(dsaf_dev->dev); if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v1")) dsaf_dev->dsaf_ver = AE_VERSION_1; @@ -73,42 +80,68 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) else dsaf_dev->dsaf_tc_mode = HRD_DSAF_4TC_MODE; - dsaf_dev->sc_base = of_iomap(np, 0); - if (!dsaf_dev->sc_base) { - dev_err(dsaf_dev->dev, - "%s of_iomap 0 fail!\n", dsaf_dev->ae_dev.name); - ret = -ENOMEM; - goto unmap_base_addr; - } + syscon = syscon_node_to_regmap( + of_parse_phandle(np, "subctrl-syscon", 0)); + if (IS_ERR_OR_NULL(syscon)) { + res = platform_get_resource(pdev, IORESOURCE_MEM, res_idx++); + if (!res) { + dev_err(dsaf_dev->dev, "subctrl info is needed!\n"); + return -ENOMEM; + } + dsaf_dev->sc_base = devm_ioremap_resource(&pdev->dev, res); + if (!dsaf_dev->sc_base) { + dev_err(dsaf_dev->dev, "subctrl can not map!\n"); + return -ENOMEM; + } - dsaf_dev->sds_base = of_iomap(np, 1); - if (!dsaf_dev->sds_base) { - dev_err(dsaf_dev->dev, - "%s of_iomap 1 fail!\n", dsaf_dev->ae_dev.name); - ret = -ENOMEM; - goto unmap_base_addr; + res = platform_get_resource(pdev, IORESOURCE_MEM, res_idx++); + if (!res) { + dev_err(dsaf_dev->dev, "serdes-ctrl info is needed!\n"); + return -ENOMEM; + } + dsaf_dev->sds_base = devm_ioremap_resource(&pdev->dev, res); + if (!dsaf_dev->sds_base) { + dev_err(dsaf_dev->dev, "serdes-ctrl can not map!\n"); + return -ENOMEM; + } + } else { + dsaf_dev->sub_ctrl = syscon; } - dsaf_dev->ppe_base = of_iomap(np, 2); - if (!dsaf_dev->ppe_base) { - dev_err(dsaf_dev->dev, - "%s of_iomap 2 fail!\n", dsaf_dev->ae_dev.name); - ret = -ENOMEM; - goto unmap_base_addr; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ppe-base"); + if (!res) { + res = platform_get_resource(pdev, IORESOURCE_MEM, res_idx++); + if (!res) { + dev_err(dsaf_dev->dev, "ppe-base info is needed!\n"); + return -ENOMEM; + } } - - dsaf_dev->io_base = of_iomap(np, 3); - if (!dsaf_dev->io_base) { - dev_err(dsaf_dev->dev, - "%s of_iomap 3 fail!\n", dsaf_dev->ae_dev.name); - ret = -ENOMEM; - goto unmap_base_addr; + dsaf_dev->ppe_base = devm_ioremap_resource(&pdev->dev, res); + if (!dsaf_dev->ppe_base) { + dev_err(dsaf_dev->dev, "ppe-base resource can not map!\n"); + return -ENOMEM; + } + dsaf_dev->ppe_paddr = res->start; + + if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "dsaf-base"); + if (!res) { + res = platform_get_resource(pdev, IORESOURCE_MEM, + res_idx); + if (!res) { + dev_err(dsaf_dev->dev, + "dsaf-base info is needed!\n"); + return -ENOMEM; + } + } + dsaf_dev->io_base = devm_ioremap_resource(&pdev->dev, res); + if (!dsaf_dev->io_base) { + dev_err(dsaf_dev->dev, "dsaf-base resource can not map!\n"); + return -ENOMEM; + } } - dsaf_dev->cpld_base = of_iomap(np, 4); - if (!dsaf_dev->cpld_base) - dev_dbg(dsaf_dev->dev, "NO CPLD ADDR"); - ret = of_property_read_u32(np, "desc-num", &desc_num); if (ret < 0 || desc_num < HNS_DSAF_MIN_DESC_CNT || desc_num > HNS_DSAF_MAX_DESC_CNT) { @@ -118,6 +151,13 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) } dsaf_dev->desc_num = desc_num; + ret = of_property_read_u32(np, "reset-field-offset", &reset_offset); + if (ret < 0) { + dev_dbg(dsaf_dev->dev, + "get reset-field-offset fail, ret=%d!\r\n", ret); + } + dsaf_dev->reset_offset = reset_offset; + ret = of_property_read_u32(np, "buf-size", &buf_size); if (ret < 0) { dev_err(dsaf_dev->dev, @@ -149,8 +189,6 @@ unmap_base_addr: iounmap(dsaf_dev->sds_base); if (dsaf_dev->sc_base) iounmap(dsaf_dev->sc_base); - if (dsaf_dev->cpld_base) - iounmap(dsaf_dev->cpld_base); return ret; } @@ -167,9 +205,6 @@ static void hns_dsaf_free_cfg(struct dsaf_device *dsaf_dev) if (dsaf_dev->sc_base) iounmap(dsaf_dev->sc_base); - - if (dsaf_dev->cpld_base) - iounmap(dsaf_dev->cpld_base); } /** @@ -217,9 +252,7 @@ static void hns_dsaf_mix_def_qid_cfg(struct dsaf_device *dsaf_dev) u32 q_id, q_num_per_port; u32 i; - hns_rcb_get_queue_mode(dsaf_dev->dsaf_mode, - HNS_DSAF_COMM_SERVICE_NW_IDX, - &max_vfn, &max_q_per_vf); + hns_rcb_get_queue_mode(dsaf_dev->dsaf_mode, &max_vfn, &max_q_per_vf); q_num_per_port = max_vfn * max_q_per_vf; for (i = 0, q_id = 0; i < DSAF_SERVICE_NW_NUM; i++) { @@ -239,9 +272,7 @@ static void hns_dsaf_inner_qid_cfg(struct dsaf_device *dsaf_dev) if (AE_IS_VER1(dsaf_dev->dsaf_ver)) return; - hns_rcb_get_queue_mode(dsaf_dev->dsaf_mode, - HNS_DSAF_COMM_SERVICE_NW_IDX, - &max_vfn, &max_q_per_vf); + hns_rcb_get_queue_mode(dsaf_dev->dsaf_mode, &max_vfn, &max_q_per_vf); q_num_per_port = max_vfn * max_q_per_vf; for (mac_id = 0, q_id = 0; mac_id < DSAF_SERVICE_NW_NUM; mac_id++) { @@ -712,13 +743,15 @@ static void hns_dsaf_tbl_tcam_data_ucast_pul( void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en) { - dsaf_set_dev_bit(dsaf_dev, DSAF_CFG_0_REG, DSAF_CFG_MIX_MODE_S, !!en); + if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) + dsaf_set_dev_bit(dsaf_dev, DSAF_CFG_0_REG, + DSAF_CFG_MIX_MODE_S, !!en); } void hns_dsaf_set_inner_lb(struct dsaf_device *dsaf_dev, u32 mac_id, u32 en) { if (AE_IS_VER1(dsaf_dev->dsaf_ver) || - dsaf_dev->mac_cb[mac_id].mac_type == HNAE_PORT_DEBUG) + dsaf_dev->mac_cb[mac_id]->mac_type == HNAE_PORT_DEBUG) return; dsaf_set_dev_bit(dsaf_dev, DSAFV2_SERDES_LBK_0_REG + 4 * mac_id, @@ -1307,6 +1340,9 @@ static int hns_dsaf_init(struct dsaf_device *dsaf_dev) u32 i; int ret; + if (HNS_DSAF_IS_DEBUG(dsaf_dev)) + return 0; + ret = hns_dsaf_init_hw(dsaf_dev); if (ret) return ret; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index e8eedc571296..f0502ba0a677 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -41,6 +41,7 @@ struct hns_mac_cb; #define DSAF_STATIC_NUM 28 #define DSAF_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset)))) +#define HNS_DSAF_IS_DEBUG(dev) (dev->dsaf_mode == DSAF_MODE_DISABLE_SP) enum hal_dsaf_mode { HRD_DSAF_NO_DSAF_MODE = 0x0, @@ -117,6 +118,7 @@ enum dsaf_mode { DSAF_MODE_ENABLE_32VM, /**< en DSAF-mode, support 32 VM */ DSAF_MODE_ENABLE_128VM, /**< en DSAF-mode, support 128 VM */ DSAF_MODE_ENABLE, /**< before is enable DSAF mode*/ + DSAF_MODE_DISABLE_SP, /* <non-dsaf, single port mode */ DSAF_MODE_DISABLE_FIX, /**< non-dasf, fixed to queue*/ DSAF_MODE_DISABLE_2PORT_8VM, /**< non-dasf, 2port 8VM */ DSAF_MODE_DISABLE_2PORT_16VM, /**< non-dasf, 2port 16VM */ @@ -275,10 +277,12 @@ struct dsaf_device { u8 __iomem *sds_base; u8 __iomem *ppe_base; u8 __iomem *io_base; - u8 __iomem *cpld_base; + struct regmap *sub_ctrl; + phys_addr_t ppe_paddr; u32 desc_num; /* desc num per queue*/ u32 buf_size; /* ring buffer size */ + u32 reset_offset; /* reset field offset in sub sysctrl */ int buf_size_type; /* ring buffer size-type */ enum dsaf_mode dsaf_mode; /* dsaf mode */ enum hal_dsaf_mode dsaf_en; @@ -287,7 +291,7 @@ struct dsaf_device { struct ppe_common_cb *ppe_common[DSAF_COMM_DEV_NUM]; struct rcb_common_cb *rcb_common[DSAF_COMM_DEV_NUM]; - struct hns_mac_cb *mac_cb; + struct hns_mac_cb *mac_cb[DSAF_MAX_PORT_NUM]; struct dsaf_hw_stats hw_stats[DSAF_NODE_NUM]; struct dsaf_int_stat int_stat; @@ -359,14 +363,6 @@ static inline void hns_dsaf_tbl_line_addr_cfg(struct dsaf_device *dsaf_dev, tab_line_addr); } -static inline int hns_dsaf_get_comm_idx_by_port(int port) -{ - if ((port < DSAF_COMM_CHN) || (port == DSAF_MAX_PORT_NUM_PER_CHIP)) - return 0; - else - return (port - DSAF_COMM_CHN + 1); -} - static inline struct hnae_vf_cb *hns_ae_get_vf_cb( struct hnae_handle *handle) { diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index e69b02287c44..a837bb9e3839 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -7,10 +7,30 @@ * (at your option) any later version. */ -#include "hns_dsaf_misc.h" #include "hns_dsaf_mac.h" -#include "hns_dsaf_reg.h" +#include "hns_dsaf_misc.h" #include "hns_dsaf_ppe.h" +#include "hns_dsaf_reg.h" + +static void dsaf_write_sub(struct dsaf_device *dsaf_dev, u32 reg, u32 val) +{ + if (dsaf_dev->sub_ctrl) + dsaf_write_syscon(dsaf_dev->sub_ctrl, reg, val); + else + dsaf_write_reg(dsaf_dev->sc_base, reg, val); +} + +static u32 dsaf_read_sub(struct dsaf_device *dsaf_dev, u32 reg) +{ + u32 ret; + + if (dsaf_dev->sub_ctrl) + ret = dsaf_read_syscon(dsaf_dev->sub_ctrl, reg); + else + ret = dsaf_read_reg(dsaf_dev->sc_base, reg); + + return ret; +} void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status, u16 speed, int data) @@ -22,8 +42,8 @@ void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status, pr_err("sfp_led_opt mac_dev is null!\n"); return; } - if (!mac_cb->cpld_vaddr) { - dev_err(mac_cb->dev, "mac_id=%d, cpld_vaddr is null !\n", + if (!mac_cb->cpld_ctrl) { + dev_err(mac_cb->dev, "mac_id=%d, cpld syscon is null !\n", mac_cb->mac_id); return; } @@ -40,21 +60,24 @@ void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status, dsaf_set_bit(value, DSAF_LED_DATA_B, data); if (value != mac_cb->cpld_led_value) { - dsaf_write_b(mac_cb->cpld_vaddr, value); + dsaf_write_syscon(mac_cb->cpld_ctrl, + mac_cb->cpld_ctrl_reg, value); mac_cb->cpld_led_value = value; } } else { - dsaf_write_b(mac_cb->cpld_vaddr, CPLD_LED_DEFAULT_VALUE); + dsaf_write_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg, + CPLD_LED_DEFAULT_VALUE); mac_cb->cpld_led_value = CPLD_LED_DEFAULT_VALUE; } } void cpld_led_reset(struct hns_mac_cb *mac_cb) { - if (!mac_cb || !mac_cb->cpld_vaddr) + if (!mac_cb || !mac_cb->cpld_ctrl) return; - dsaf_write_b(mac_cb->cpld_vaddr, CPLD_LED_DEFAULT_VALUE); + dsaf_write_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg, + CPLD_LED_DEFAULT_VALUE); mac_cb->cpld_led_value = CPLD_LED_DEFAULT_VALUE; } @@ -63,15 +86,19 @@ int cpld_set_led_id(struct hns_mac_cb *mac_cb, { switch (status) { case HNAE_LED_ACTIVE: - mac_cb->cpld_led_value = dsaf_read_b(mac_cb->cpld_vaddr); + mac_cb->cpld_led_value = + dsaf_read_syscon(mac_cb->cpld_ctrl, + mac_cb->cpld_ctrl_reg); dsaf_set_bit(mac_cb->cpld_led_value, DSAF_LED_ANCHOR_B, CPLD_LED_ON_VALUE); - dsaf_write_b(mac_cb->cpld_vaddr, mac_cb->cpld_led_value); + dsaf_write_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg, + mac_cb->cpld_led_value); return 2; case HNAE_LED_INACTIVE: dsaf_set_bit(mac_cb->cpld_led_value, DSAF_LED_ANCHOR_B, CPLD_LED_DEFAULT_VALUE); - dsaf_write_b(mac_cb->cpld_vaddr, mac_cb->cpld_led_value); + dsaf_write_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg, + mac_cb->cpld_led_value); break; default: break; @@ -95,10 +122,8 @@ void hns_dsaf_rst(struct dsaf_device *dsaf_dev, u32 val) nt_reg_addr = DSAF_SUB_SC_NT_RESET_DREQ_REG; } - dsaf_write_reg(dsaf_dev->sc_base, xbar_reg_addr, - RESET_REQ_OR_DREQ); - dsaf_write_reg(dsaf_dev->sc_base, nt_reg_addr, - RESET_REQ_OR_DREQ); + dsaf_write_sub(dsaf_dev, xbar_reg_addr, RESET_REQ_OR_DREQ); + dsaf_write_sub(dsaf_dev, nt_reg_addr, RESET_REQ_OR_DREQ); } void hns_dsaf_xge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val) @@ -110,14 +135,14 @@ void hns_dsaf_xge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val) return; reg_val |= RESET_REQ_OR_DREQ; - reg_val |= 0x2082082 << port; + reg_val |= 0x2082082 << dsaf_dev->mac_cb[port]->port_rst_off; if (val == 0) reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG; else reg_addr = DSAF_SUB_SC_XGE_RESET_DREQ_REG; - dsaf_write_reg(dsaf_dev->sc_base, reg_addr, reg_val); + dsaf_write_sub(dsaf_dev, reg_addr, reg_val); } void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev, @@ -129,68 +154,63 @@ void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev, if (port >= DSAF_XGE_NUM) return; - reg_val |= XGMAC_TRX_CORE_SRST_M << port; + reg_val |= XGMAC_TRX_CORE_SRST_M + << dsaf_dev->mac_cb[port]->port_rst_off; if (val == 0) reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG; else reg_addr = DSAF_SUB_SC_XGE_RESET_DREQ_REG; - dsaf_write_reg(dsaf_dev->sc_base, reg_addr, reg_val); + dsaf_write_sub(dsaf_dev, reg_addr, reg_val); } void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val) { u32 reg_val_1; u32 reg_val_2; + u32 port_rst_off; if (port >= DSAF_GE_NUM) return; - if (port < DSAF_SERVICE_NW_NUM) { + if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { reg_val_1 = 0x1 << port; + port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off; /* there is difference between V1 and V2 in register.*/ if (AE_IS_VER1(dsaf_dev->dsaf_ver)) - reg_val_2 = 0x1041041 << port; + reg_val_2 = 0x1041041 << port_rst_off; else - reg_val_2 = 0x2082082 << port; + reg_val_2 = 0x2082082 << port_rst_off; if (val == 0) { - dsaf_write_reg(dsaf_dev->sc_base, - DSAF_SUB_SC_GE_RESET_REQ1_REG, + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_REQ1_REG, reg_val_1); - dsaf_write_reg(dsaf_dev->sc_base, - DSAF_SUB_SC_GE_RESET_REQ0_REG, + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_REQ0_REG, reg_val_2); } else { - dsaf_write_reg(dsaf_dev->sc_base, - DSAF_SUB_SC_GE_RESET_DREQ0_REG, + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_DREQ0_REG, reg_val_2); - dsaf_write_reg(dsaf_dev->sc_base, - DSAF_SUB_SC_GE_RESET_DREQ1_REG, + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_DREQ1_REG, reg_val_1); } } else { - reg_val_1 = 0x15540 << (port - 6); - reg_val_2 = 0x100 << (port - 6); + reg_val_1 = 0x15540 << dsaf_dev->reset_offset; + reg_val_2 = 0x100 << dsaf_dev->reset_offset; if (val == 0) { - dsaf_write_reg(dsaf_dev->sc_base, - DSAF_SUB_SC_GE_RESET_REQ1_REG, + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_REQ1_REG, reg_val_1); - dsaf_write_reg(dsaf_dev->sc_base, - DSAF_SUB_SC_PPE_RESET_REQ_REG, + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_PPE_RESET_REQ_REG, reg_val_2); } else { - dsaf_write_reg(dsaf_dev->sc_base, - DSAF_SUB_SC_GE_RESET_DREQ1_REG, + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_DREQ1_REG, reg_val_1); - dsaf_write_reg(dsaf_dev->sc_base, - DSAF_SUB_SC_PPE_RESET_DREQ_REG, + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_PPE_RESET_DREQ_REG, reg_val_2); } } @@ -201,24 +221,23 @@ void hns_ppe_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val) u32 reg_val = 0; u32 reg_addr; - reg_val |= RESET_REQ_OR_DREQ << port; + reg_val |= RESET_REQ_OR_DREQ << dsaf_dev->mac_cb[port]->port_rst_off; if (val == 0) reg_addr = DSAF_SUB_SC_PPE_RESET_REQ_REG; else reg_addr = DSAF_SUB_SC_PPE_RESET_DREQ_REG; - dsaf_write_reg(dsaf_dev->sc_base, reg_addr, reg_val); + dsaf_write_sub(dsaf_dev, reg_addr, reg_val); } void hns_ppe_com_srst(struct ppe_common_cb *ppe_common, u32 val) { - int comm_index = ppe_common->comm_index; struct dsaf_device *dsaf_dev = ppe_common->dsaf_dev; u32 reg_val; u32 reg_addr; - if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) { + if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { reg_val = RESET_REQ_OR_DREQ; if (val == 0) reg_addr = DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG; @@ -226,7 +245,7 @@ void hns_ppe_com_srst(struct ppe_common_cb *ppe_common, u32 val) reg_addr = DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG; } else { - reg_val = 0x100 << (comm_index - 1); + reg_val = 0x100 << dsaf_dev->reset_offset; if (val == 0) reg_addr = DSAF_SUB_SC_PPE_RESET_REQ_REG; @@ -234,7 +253,7 @@ void hns_ppe_com_srst(struct ppe_common_cb *ppe_common, u32 val) reg_addr = DSAF_SUB_SC_PPE_RESET_DREQ_REG; } - dsaf_write_reg(dsaf_dev->sc_base, reg_addr, reg_val); + dsaf_write_sub(dsaf_dev, reg_addr, reg_val); } /** @@ -246,36 +265,45 @@ phy_interface_t hns_mac_get_phy_if(struct hns_mac_cb *mac_cb) { u32 mode; u32 reg; - u32 shift; bool is_ver1 = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver); - void __iomem *sys_ctl_vaddr = mac_cb->sys_ctl_vaddr; int mac_id = mac_cb->mac_id; - phy_interface_t phy_if = PHY_INTERFACE_MODE_NA; + phy_interface_t phy_if; - if (is_ver1 && (mac_id >= 6 && mac_id <= 7)) { - phy_if = PHY_INTERFACE_MODE_SGMII; - } else if (mac_id >= 0 && mac_id <= 3) { - reg = is_ver1 ? HNS_MAC_HILINK4_REG : HNS_MAC_HILINK4V2_REG; - mode = dsaf_read_reg(sys_ctl_vaddr, reg); - /* mac_id 0, 1, 2, 3 ---> hilink4 lane 0, 1, 2, 3 */ - shift = is_ver1 ? 0 : mac_id; - if (dsaf_get_bit(mode, shift)) - phy_if = PHY_INTERFACE_MODE_XGMII; + if (is_ver1) { + if (HNS_DSAF_IS_DEBUG(mac_cb->dsaf_dev)) + return PHY_INTERFACE_MODE_SGMII; + + if (mac_id >= 0 && mac_id <= 3) + reg = HNS_MAC_HILINK4_REG; else - phy_if = PHY_INTERFACE_MODE_SGMII; - } else if (mac_id >= 4 && mac_id <= 7) { - reg = is_ver1 ? HNS_MAC_HILINK3_REG : HNS_MAC_HILINK3V2_REG; - mode = dsaf_read_reg(sys_ctl_vaddr, reg); - /* mac_id 4, 5, 6, 7 ---> hilink3 lane 2, 3, 0, 1 */ - shift = is_ver1 ? 0 : mac_id <= 5 ? mac_id - 2 : mac_id - 6; - if (dsaf_get_bit(mode, shift)) - phy_if = PHY_INTERFACE_MODE_XGMII; + reg = HNS_MAC_HILINK3_REG; + } else{ + if (!HNS_DSAF_IS_DEBUG(mac_cb->dsaf_dev) && mac_id <= 3) + reg = HNS_MAC_HILINK4V2_REG; else - phy_if = PHY_INTERFACE_MODE_SGMII; + reg = HNS_MAC_HILINK3V2_REG; } + + mode = dsaf_read_sub(mac_cb->dsaf_dev, reg); + if (dsaf_get_bit(mode, mac_cb->port_mode_off)) + phy_if = PHY_INTERFACE_MODE_XGMII; + else + phy_if = PHY_INTERFACE_MODE_SGMII; + return phy_if; } +int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt) +{ + if (!mac_cb->cpld_ctrl) + return -ENODEV; + + *sfp_prsnt = !dsaf_read_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg + + MAC_SFP_PORT_OFFSET); + + return 0; +} + /** * hns_mac_config_sds_loopback - set loop back for serdes * @mac_cb: mac control block @@ -312,7 +340,14 @@ int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, u8 en) pr_info("no sfp in this eth\n"); } - dsaf_set_reg_field(base_addr, reg_offset, 1ull << 10, 10, !!en); + if (mac_cb->serdes_ctrl) { + u32 origin = dsaf_read_syscon(mac_cb->serdes_ctrl, reg_offset); + + dsaf_set_field(origin, 1ull << 10, 10, !!en); + dsaf_write_syscon(mac_cb->serdes_ctrl, reg_offset, origin); + } else { + dsaf_set_reg_field(base_addr, reg_offset, 1ull << 10, 10, !!en); + } return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index ab27b3b14ca3..8cd151a5245e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -61,22 +61,10 @@ void hns_ppe_set_indir_table(struct hns_ppe_cb *ppe_cb, } } -static void __iomem *hns_ppe_common_get_ioaddr( - struct ppe_common_cb *ppe_common) +static void __iomem * +hns_ppe_common_get_ioaddr(struct ppe_common_cb *ppe_common) { - void __iomem *base_addr; - - int idx = ppe_common->comm_index; - - if (idx == HNS_DSAF_COMM_SERVICE_NW_IDX) - base_addr = ppe_common->dsaf_dev->ppe_base - + PPE_COMMON_REG_OFFSET; - else - base_addr = ppe_common->dsaf_dev->sds_base - + (idx - 1) * HNS_DSAF_DEBUG_NW_REG_OFFSET - + PPE_COMMON_REG_OFFSET; - - return base_addr; + return ppe_common->dsaf_dev->ppe_base + PPE_COMMON_REG_OFFSET; } /** @@ -90,7 +78,7 @@ int hns_ppe_common_get_cfg(struct dsaf_device *dsaf_dev, int comm_index) struct ppe_common_cb *ppe_common; int ppe_num; - if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) + if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) ppe_num = HNS_PPE_SERVICE_NW_ENGINE_NUM; else ppe_num = HNS_PPE_DEBUG_NW_ENGINE_NUM; @@ -103,7 +91,7 @@ int hns_ppe_common_get_cfg(struct dsaf_device *dsaf_dev, int comm_index) ppe_common->ppe_num = ppe_num; ppe_common->dsaf_dev = dsaf_dev; ppe_common->comm_index = comm_index; - if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) + if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) ppe_common->ppe_mode = PPE_COMMON_MODE_SERVICE; else ppe_common->ppe_mode = PPE_COMMON_MODE_DEBUG; @@ -124,32 +112,8 @@ void hns_ppe_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index) static void __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common, int ppe_idx) { - void __iomem *base_addr; - int common_idx = ppe_common->comm_index; - - if (ppe_common->ppe_mode == PPE_COMMON_MODE_SERVICE) { - base_addr = ppe_common->dsaf_dev->ppe_base + - ppe_idx * PPE_REG_OFFSET; - - } else { - base_addr = ppe_common->dsaf_dev->sds_base + - (common_idx - 1) * HNS_DSAF_DEBUG_NW_REG_OFFSET; - } - return base_addr; -} - -static int hns_ppe_get_port(struct ppe_common_cb *ppe_common, int idx) -{ - int port; - - if (ppe_common->ppe_mode == PPE_COMMON_MODE_SERVICE) - port = idx; - else - port = HNS_PPE_SERVICE_NW_ENGINE_NUM - + ppe_common->comm_index - 1; - - return port; + return ppe_common->dsaf_dev->ppe_base + ppe_idx * PPE_REG_OFFSET; } static void hns_ppe_get_cfg(struct ppe_common_cb *ppe_common) @@ -164,7 +128,6 @@ static void hns_ppe_get_cfg(struct ppe_common_cb *ppe_common) ppe_cb->next = NULL; ppe_cb->ppe_common_cb = ppe_common; ppe_cb->index = i; - ppe_cb->port = hns_ppe_get_port(ppe_common, i); ppe_cb->io_base = hns_ppe_get_iobase(ppe_common, i); ppe_cb->virq = 0; } @@ -318,7 +281,7 @@ static void hns_ppe_exc_irq_en(struct hns_ppe_cb *ppe_cb, int en) static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb) { struct ppe_common_cb *ppe_common_cb = ppe_cb->ppe_common_cb; - u32 port = ppe_cb->port; + u32 port = ppe_cb->index; struct dsaf_device *dsaf_dev = ppe_common_cb->dsaf_dev; int i; @@ -377,7 +340,8 @@ void hns_ppe_uninit_ex(struct ppe_common_cb *ppe_common) u32 i; for (i = 0; i < ppe_common->ppe_num; i++) { - hns_ppe_uninit_hw(&ppe_common->ppe_cb[i]); + if (ppe_common->dsaf_dev->mac_cb[i]) + hns_ppe_uninit_hw(&ppe_common->ppe_cb[i]); memset(&ppe_common->ppe_cb[i], 0, sizeof(struct hns_ppe_cb)); } } @@ -410,8 +374,11 @@ void hns_ppe_reset_common(struct dsaf_device *dsaf_dev, u8 ppe_common_index) if (ret) return; - for (i = 0; i < ppe_common->ppe_num; i++) - hns_ppe_init_hw(&ppe_common->ppe_cb[i]); + for (i = 0; i < ppe_common->ppe_num; i++) { + /* We only need to initiate ppe when the port exists */ + if (dsaf_dev->mac_cb[i]) + hns_ppe_init_hw(&ppe_common->ppe_cb[i]); + } ret = hns_rcb_common_init_hw(dsaf_dev->rcb_common[ppe_common_index]); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h index e9c0ec2fa0dd..9d8e643e8aa6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h @@ -80,7 +80,6 @@ struct hns_ppe_cb { struct hns_ppe_hw_stats hw_stats; u8 index; /* index in a ppe common device */ - u8 port; /* port id in dsaf */ void __iomem *io_base; int virq; u32 rss_indir_table[HNS_PPEV2_RSS_IND_TBL_SIZE]; /*shadow indir tab */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index 28ee26e5c478..4ef6d23d998e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -270,7 +270,7 @@ static void hns_rcb_set_port_timeout( static int hns_rcb_common_get_port_num(struct rcb_common_cb *rcb_common) { - if (rcb_common->comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) + if (!HNS_DSAF_IS_DEBUG(rcb_common->dsaf_dev)) return HNS_RCB_SERVICE_NW_ENGINE_NUM; else return HNS_RCB_DEBUG_NW_ENGINE_NUM; @@ -430,36 +430,20 @@ static void hns_rcb_ring_pair_get_cfg(struct ring_pair_cb *ring_pair_cb) static int hns_rcb_get_port_in_comm( struct rcb_common_cb *rcb_common, int ring_idx) { - int comm_index = rcb_common->comm_index; - int port; - int q_num; - if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) { - q_num = (int)rcb_common->max_q_per_vf * rcb_common->max_vfn; - port = ring_idx / q_num; - } else { - port = 0; /* config debug-ports port_id_in_comm to 0*/ - } - - return port; + return ring_idx / (rcb_common->max_q_per_vf * rcb_common->max_vfn); } #define SERVICE_RING_IRQ_IDX(v1) \ ((v1) ? HNS_SERVICE_RING_IRQ_IDX : HNSV2_SERVICE_RING_IRQ_IDX) -#define DEBUG_RING_IRQ_IDX(v1) \ - ((v1) ? HNS_DEBUG_RING_IRQ_IDX : HNSV2_DEBUG_RING_IRQ_IDX) -#define DEBUG_RING_IRQ_OFFSET(v1) \ - ((v1) ? HNS_DEBUG_RING_IRQ_OFFSET : HNSV2_DEBUG_RING_IRQ_OFFSET) static int hns_rcb_get_base_irq_idx(struct rcb_common_cb *rcb_common) { - int comm_index = rcb_common->comm_index; bool is_ver1 = AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver); - if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) + if (!HNS_DSAF_IS_DEBUG(rcb_common->dsaf_dev)) return SERVICE_RING_IRQ_IDX(is_ver1); else - return DEBUG_RING_IRQ_IDX(is_ver1) + - (comm_index - 1) * DEBUG_RING_IRQ_OFFSET(is_ver1); + return HNS_DEBUG_RING_IRQ_IDX; } #define RCB_COMM_BASE_TO_RING_BASE(base, ringid)\ @@ -549,7 +533,7 @@ int hns_rcb_set_coalesce_usecs( return 0; if (AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver)) { - if (rcb_common->comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) { + if (!HNS_DSAF_IS_DEBUG(rcb_common->dsaf_dev)) { dev_err(rcb_common->dsaf_dev->dev, "error: not support coalesce_usecs setting!\n"); return -EINVAL; @@ -601,113 +585,82 @@ int hns_rcb_set_coalesced_frames( *@max_vfn : max vfn number *@max_q_per_vf:max ring number per vm */ -void hns_rcb_get_queue_mode(enum dsaf_mode dsaf_mode, int comm_index, - u16 *max_vfn, u16 *max_q_per_vf) +void hns_rcb_get_queue_mode(enum dsaf_mode dsaf_mode, u16 *max_vfn, + u16 *max_q_per_vf) { - if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) { - switch (dsaf_mode) { - case DSAF_MODE_DISABLE_6PORT_0VM: - *max_vfn = 1; - *max_q_per_vf = 16; - break; - case DSAF_MODE_DISABLE_FIX: - *max_vfn = 1; - *max_q_per_vf = 1; - break; - case DSAF_MODE_DISABLE_2PORT_64VM: - *max_vfn = 64; - *max_q_per_vf = 1; - break; - case DSAF_MODE_DISABLE_6PORT_16VM: - *max_vfn = 16; - *max_q_per_vf = 1; - break; - default: - *max_vfn = 1; - *max_q_per_vf = 16; - break; - } - } else { + switch (dsaf_mode) { + case DSAF_MODE_DISABLE_6PORT_0VM: + *max_vfn = 1; + *max_q_per_vf = 16; + break; + case DSAF_MODE_DISABLE_FIX: + case DSAF_MODE_DISABLE_SP: *max_vfn = 1; *max_q_per_vf = 1; + break; + case DSAF_MODE_DISABLE_2PORT_64VM: + *max_vfn = 64; + *max_q_per_vf = 1; + break; + case DSAF_MODE_DISABLE_6PORT_16VM: + *max_vfn = 16; + *max_q_per_vf = 1; + break; + default: + *max_vfn = 1; + *max_q_per_vf = 16; + break; } } -int hns_rcb_get_ring_num(struct dsaf_device *dsaf_dev, int comm_index) +int hns_rcb_get_ring_num(struct dsaf_device *dsaf_dev) { - if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) { - switch (dsaf_dev->dsaf_mode) { - case DSAF_MODE_ENABLE_FIX: - return 1; - - case DSAF_MODE_DISABLE_FIX: - return 6; - - case DSAF_MODE_ENABLE_0VM: - return 32; - - case DSAF_MODE_DISABLE_6PORT_0VM: - case DSAF_MODE_ENABLE_16VM: - case DSAF_MODE_DISABLE_6PORT_2VM: - case DSAF_MODE_DISABLE_6PORT_16VM: - case DSAF_MODE_DISABLE_6PORT_4VM: - case DSAF_MODE_ENABLE_8VM: - return 96; - - case DSAF_MODE_DISABLE_2PORT_16VM: - case DSAF_MODE_DISABLE_2PORT_8VM: - case DSAF_MODE_ENABLE_32VM: - case DSAF_MODE_DISABLE_2PORT_64VM: - case DSAF_MODE_ENABLE_128VM: - return 128; - - default: - dev_warn(dsaf_dev->dev, - "get ring num fail,use default!dsaf_mode=%d\n", - dsaf_dev->dsaf_mode); - return 128; - } - } else { + switch (dsaf_dev->dsaf_mode) { + case DSAF_MODE_ENABLE_FIX: + case DSAF_MODE_DISABLE_SP: return 1; + + case DSAF_MODE_DISABLE_FIX: + return 6; + + case DSAF_MODE_ENABLE_0VM: + return 32; + + case DSAF_MODE_DISABLE_6PORT_0VM: + case DSAF_MODE_ENABLE_16VM: + case DSAF_MODE_DISABLE_6PORT_2VM: + case DSAF_MODE_DISABLE_6PORT_16VM: + case DSAF_MODE_DISABLE_6PORT_4VM: + case DSAF_MODE_ENABLE_8VM: + return 96; + + case DSAF_MODE_DISABLE_2PORT_16VM: + case DSAF_MODE_DISABLE_2PORT_8VM: + case DSAF_MODE_ENABLE_32VM: + case DSAF_MODE_DISABLE_2PORT_64VM: + case DSAF_MODE_ENABLE_128VM: + return 128; + + default: + dev_warn(dsaf_dev->dev, + "get ring num fail,use default!dsaf_mode=%d\n", + dsaf_dev->dsaf_mode); + return 128; } } -void __iomem *hns_rcb_common_get_vaddr(struct dsaf_device *dsaf_dev, - int comm_index) +void __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common) { - void __iomem *base_addr; - - if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) - base_addr = dsaf_dev->ppe_base + RCB_COMMON_REG_OFFSET; - else - base_addr = dsaf_dev->sds_base - + (comm_index - 1) * HNS_DSAF_DEBUG_NW_REG_OFFSET - + RCB_COMMON_REG_OFFSET; + struct dsaf_device *dsaf_dev = rcb_common->dsaf_dev; - return base_addr; + return dsaf_dev->ppe_base + RCB_COMMON_REG_OFFSET; } -static phys_addr_t hns_rcb_common_get_paddr(struct dsaf_device *dsaf_dev, - int comm_index) +static phys_addr_t hns_rcb_common_get_paddr(struct rcb_common_cb *rcb_common) { - struct device_node *np = dsaf_dev->dev->of_node; - phys_addr_t phy_addr; - const __be32 *tmp_addr; - u64 addr_offset = 0; - u64 size = 0; - int index = 0; - - if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) { - index = 2; - addr_offset = RCB_COMMON_REG_OFFSET; - } else { - index = 1; - addr_offset = (comm_index - 1) * HNS_DSAF_DEBUG_NW_REG_OFFSET + - RCB_COMMON_REG_OFFSET; - } - tmp_addr = of_get_address(np, index, &size, NULL); - phy_addr = of_translate_address(np, tmp_addr); - return phy_addr + addr_offset; + struct dsaf_device *dsaf_dev = rcb_common->dsaf_dev; + + return dsaf_dev->ppe_paddr + RCB_COMMON_REG_OFFSET; } int hns_rcb_common_get_cfg(struct dsaf_device *dsaf_dev, @@ -717,7 +670,7 @@ int hns_rcb_common_get_cfg(struct dsaf_device *dsaf_dev, enum dsaf_mode dsaf_mode = dsaf_dev->dsaf_mode; u16 max_vfn; u16 max_q_per_vf; - int ring_num = hns_rcb_get_ring_num(dsaf_dev, comm_index); + int ring_num = hns_rcb_get_ring_num(dsaf_dev); rcb_common = devm_kzalloc(dsaf_dev->dev, sizeof(*rcb_common) + @@ -732,12 +685,12 @@ int hns_rcb_common_get_cfg(struct dsaf_device *dsaf_dev, rcb_common->desc_num = dsaf_dev->desc_num; - hns_rcb_get_queue_mode(dsaf_mode, comm_index, &max_vfn, &max_q_per_vf); + hns_rcb_get_queue_mode(dsaf_mode, &max_vfn, &max_q_per_vf); rcb_common->max_vfn = max_vfn; rcb_common->max_q_per_vf = max_q_per_vf; - rcb_common->io_base = hns_rcb_common_get_vaddr(dsaf_dev, comm_index); - rcb_common->phy_base = hns_rcb_common_get_paddr(dsaf_dev, comm_index); + rcb_common->io_base = hns_rcb_common_get_vaddr(rcb_common); + rcb_common->phy_base = hns_rcb_common_get_paddr(rcb_common); dsaf_dev->rcb_common[comm_index] = rcb_common; return 0; @@ -932,7 +885,7 @@ void hns_rcb_get_common_regs(struct rcb_common_cb *rcb_com, void *data) { u32 *regs = data; bool is_ver1 = AE_IS_VER1(rcb_com->dsaf_dev->dsaf_ver); - bool is_dbg = (rcb_com->comm_index != HNS_DSAF_COMM_SERVICE_NW_IDX); + bool is_dbg = HNS_DSAF_IS_DEBUG(rcb_com->dsaf_dev); u32 reg_tmp; u32 reg_num_tmp; u32 i = 0; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h index eb61014ad615..bd54dac82ee0 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h @@ -111,7 +111,7 @@ void hns_rcb_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index); int hns_rcb_common_init_hw(struct rcb_common_cb *rcb_common); void hns_rcb_start(struct hnae_queue *q, u32 val); void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common); -void hns_rcb_get_queue_mode(enum dsaf_mode dsaf_mode, int comm_index, +void hns_rcb_get_queue_mode(enum dsaf_mode dsaf_mode, u16 *max_vfn, u16 *max_q_per_vf); void hns_rcb_common_init_commit_hw(struct rcb_common_cb *rcb_common); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 7ff195e60b02..7c3b5103d151 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -10,25 +10,20 @@ #ifndef _DSAF_REG_H_ #define _DSAF_REG_H_ -#define HNS_DEBUG_RING_IRQ_IDX 55 -#define HNS_SERVICE_RING_IRQ_IDX 59 -#define HNS_DEBUG_RING_IRQ_OFFSET 2 -#define HNSV2_DEBUG_RING_IRQ_IDX 409 -#define HNSV2_SERVICE_RING_IRQ_IDX 25 -#define HNSV2_DEBUG_RING_IRQ_OFFSET 9 - -#define DSAF_MAX_PORT_NUM_PER_CHIP 8 -#define DSAF_SERVICE_PORT_NUM_PER_DSAF 6 -#define DSAF_MAX_VM_NUM 128 - -#define DSAF_COMM_DEV_NUM 3 -#define DSAF_PPE_INODE_BASE 6 -#define HNS_DSAF_COMM_SERVICE_NW_IDX 0 +#include <linux/regmap.h> +#define HNS_DEBUG_RING_IRQ_IDX 0 +#define HNS_SERVICE_RING_IRQ_IDX 59 +#define HNSV2_SERVICE_RING_IRQ_IDX 25 + +#define DSAF_MAX_PORT_NUM 6 +#define DSAF_MAX_VM_NUM 128 + +#define DSAF_COMM_DEV_NUM 1 +#define DSAF_PPE_INODE_BASE 6 #define DSAF_DEBUG_NW_NUM 2 #define DSAF_SERVICE_NW_NUM 6 #define DSAF_COMM_CHN DSAF_SERVICE_NW_NUM #define DSAF_GE_NUM ((DSAF_SERVICE_NW_NUM) + (DSAF_DEBUG_NW_NUM)) -#define DSAF_PORT_NUM ((DSAF_SERVICE_NW_NUM) + (DSAF_DEBUG_NW_NUM)) #define DSAF_XGE_NUM DSAF_SERVICE_NW_NUM #define DSAF_PORT_TYPE_NUM 3 #define DSAF_NODE_NUM 18 @@ -994,6 +989,19 @@ static inline u32 dsaf_read_reg(u8 __iomem *base, u32 reg) return readl(reg_addr + reg); } +static inline void dsaf_write_syscon(struct regmap *base, u32 reg, u32 value) +{ + regmap_write(base, reg, value); +} + +static inline u32 dsaf_read_syscon(struct regmap *base, u32 reg) +{ + unsigned int val; + + regmap_read(base, reg, &val); + return val; +} + #define dsaf_read_dev(a, reg) \ dsaf_read_reg((a)->io_base, (reg)) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 687204b780b0..e47aff250b15 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1873,6 +1873,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev) struct net_device *ndev; struct hns_nic_priv *priv; struct device_node *node = dev->of_node; + u32 port_id; int ret; ndev = alloc_etherdev_mq(sizeof(struct hns_nic_priv), NIC_MAX_Q_PER_VF); @@ -1896,10 +1897,18 @@ static int hns_nic_dev_probe(struct platform_device *pdev) dev_err(dev, "not find ae-handle\n"); goto out_read_prop_fail; } - - ret = of_property_read_u32(node, "port-id", &priv->port_id); - if (ret) - goto out_read_prop_fail; + /* try to find port-idx-in-ae first */ + ret = of_property_read_u32(node, "port-idx-in-ae", &port_id); + if (ret) { + /* only for old code compatible */ + ret = of_property_read_u32(node, "port-id", &port_id); + if (ret) + goto out_read_prop_fail; + /* for old dts, we need to caculate the port offset */ + port_id = port_id < HNS_SRV_OFFSET ? port_id + HNS_DEBUG_OFFSET + : port_id - HNS_SRV_OFFSET; + } + priv->port_id = port_id; hns_init_mac_addr(ndev); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h index c68ab3d34fc2..337efa582bac 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h @@ -18,6 +18,9 @@ #include "hnae.h" +#define HNS_DEBUG_OFFSET 6 +#define HNS_SRV_OFFSET 2 + enum hns_nic_state { NIC_STATE_TESTING = 0, NIC_STATE_RESETTING, diff --git a/drivers/net/ethernet/intel/fm10k/Makefile b/drivers/net/ethernet/intel/fm10k/Makefile index b006ff66d028..cac645329cea 100644 --- a/drivers/net/ethernet/intel/fm10k/Makefile +++ b/drivers/net/ethernet/intel/fm10k/Makefile @@ -1,7 +1,7 @@ ################################################################################ # -# Intel Ethernet Switch Host Interface Driver -# Copyright(c) 2013 - 2015 Intel Corporation. +# Intel(R) Ethernet Switch Host Interface Driver +# Copyright(c) 2013 - 2016 Intel Corporation. # # This program is free software; you can redistribute it and/or modify it # under the terms and conditions of the GNU General Public License, @@ -22,7 +22,7 @@ ################################################################################ # -# Makefile for the Intel(R) FM10000 Ethernet Switch Host Interface driver +# Makefile for the Intel(R) Ethernet Switch Host Interface Driver # obj-$(CONFIG_FM10K) += fm10k.o @@ -30,7 +30,6 @@ obj-$(CONFIG_FM10K) += fm10k.o fm10k-y := fm10k_main.o \ fm10k_common.o \ fm10k_pci.o \ - fm10k_ptp.o \ fm10k_netdev.o \ fm10k_ethtool.o \ fm10k_pf.o \ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 9c7fafef7cf6..fcf106e545c5 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -27,9 +27,6 @@ #include <linux/rtnetlink.h> #include <linux/if_vlan.h> #include <linux/pci.h> -#include <linux/net_tstamp.h> -#include <linux/clocksource.h> -#include <linux/ptp_clock_kernel.h> #include "fm10k_pf.h" #include "fm10k_vf.h" @@ -333,6 +330,7 @@ struct fm10k_intfc { unsigned long last_reset; unsigned long link_down_event; bool host_ready; + bool lport_map_failed; u32 reta[FM10K_RETA_SIZE]; u32 rssrk[FM10K_RSSRK_SIZE]; @@ -342,22 +340,8 @@ struct fm10k_intfc { #ifdef CONFIG_DEBUG_FS struct dentry *dbg_intfc; - #endif /* CONFIG_DEBUG_FS */ - struct ptp_clock_info ptp_caps; - struct ptp_clock *ptp_clock; - - struct sk_buff_head ts_tx_skb_queue; - u32 tx_hwtstamp_timeouts; - struct hwtstamp_config ts_config; - /* We are unable to actually adjust the clock beyond the frequency - * value. Once the clock is started there is no resetting it. As - * such we maintain a separate offset from the actual hardware clock - * to allow for offset adjustment. - */ - s64 ptp_adjust; - rwlock_t systime_lock; #ifdef CONFIG_DCB u8 pfc_en; #endif @@ -546,21 +530,6 @@ static inline void fm10k_dbg_init(void) {} static inline void fm10k_dbg_exit(void) {} #endif /* CONFIG_DEBUG_FS */ -/* Time Stamping */ -void fm10k_systime_to_hwtstamp(struct fm10k_intfc *interface, - struct skb_shared_hwtstamps *hwtstamp, - u64 systime); -void fm10k_ts_tx_enqueue(struct fm10k_intfc *interface, struct sk_buff *skb); -void fm10k_ts_tx_hwtstamp(struct fm10k_intfc *interface, __le16 dglort, - u64 systime); -void fm10k_ts_reset(struct fm10k_intfc *interface); -void fm10k_ts_init(struct fm10k_intfc *interface); -void fm10k_ts_tx_subtask(struct fm10k_intfc *interface); -void fm10k_ptp_register(struct fm10k_intfc *interface); -void fm10k_ptp_unregister(struct fm10k_intfc *interface); -int fm10k_get_ts_config(struct net_device *netdev, struct ifreq *ifr); -int fm10k_set_ts_config(struct net_device *netdev, struct ifreq *ifr); - /* DCB */ #ifdef CONFIG_DCB void fm10k_dcbnl_set_ops(struct net_device *dev); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c index 6cfae6ac04ea..5bbf19cfe29b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.h b/drivers/net/ethernet/intel/fm10k/fm10k_common.h index 45e4e5b1f20a..50f71e997448 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.h @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c index 2be4361839db..db4bd8bf9722 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c index 5d6137faf7d1..5116fd043630 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index a23748777b1b..9c0d87503977 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -77,19 +77,6 @@ static const struct fm10k_stats fm10k_gstrings_global_stats[] = { FM10K_STAT("mac_rules_avail", hw.swapi.mac.avail), FM10K_STAT("tx_hang_count", tx_timeout_count), - - FM10K_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts), -}; - -static const struct fm10k_stats fm10k_gstrings_debug_stats[] = { - FM10K_STAT("hw_sm_mbx_full", hw_sm_mbx_full), - FM10K_STAT("hw_csum_tx_good", hw_csum_tx_good), - FM10K_STAT("hw_csum_rx_good", hw_csum_rx_good), - FM10K_STAT("rx_switch_errors", rx_switch_errors), - FM10K_STAT("rx_drops", rx_drops), - FM10K_STAT("rx_pp_errors", rx_pp_errors), - FM10K_STAT("rx_link_errors", rx_link_errors), - FM10K_STAT("rx_length_errors", rx_length_errors), }; static const struct fm10k_stats fm10k_gstrings_pf_stats[] = { @@ -121,13 +108,21 @@ static const struct fm10k_stats fm10k_gstrings_mbx_stats[] = { FM10K_MBX_STAT("mbx_rx_mbmem_pushed", rx_mbmem_pushed), }; +#define FM10K_QUEUE_STAT(_name, _stat) { \ + .stat_string = _name, \ + .sizeof_stat = FIELD_SIZEOF(struct fm10k_ring, _stat), \ + .stat_offset = offsetof(struct fm10k_ring, _stat) \ +} + +static const struct fm10k_stats fm10k_gstrings_queue_stats[] = { + FM10K_QUEUE_STAT("packets", stats.packets), + FM10K_QUEUE_STAT("bytes", stats.bytes), +}; + #define FM10K_GLOBAL_STATS_LEN ARRAY_SIZE(fm10k_gstrings_global_stats) -#define FM10K_DEBUG_STATS_LEN ARRAY_SIZE(fm10k_gstrings_debug_stats) #define FM10K_PF_STATS_LEN ARRAY_SIZE(fm10k_gstrings_pf_stats) #define FM10K_MBX_STATS_LEN ARRAY_SIZE(fm10k_gstrings_mbx_stats) - -#define FM10K_QUEUE_STATS_LEN(_n) \ - ((_n) * 2 * (sizeof(struct fm10k_queue_stats) / sizeof(u64))) +#define FM10K_QUEUE_STATS_LEN ARRAY_SIZE(fm10k_gstrings_queue_stats) #define FM10K_STATIC_STATS_LEN (FM10K_GLOBAL_STATS_LEN + \ FM10K_NETDEV_STATS_LEN + \ @@ -145,12 +140,10 @@ enum fm10k_self_test_types { }; enum { - FM10K_PRV_FLAG_DEBUG_STATS, FM10K_PRV_FLAG_LEN, }; static const char fm10k_prv_flags[FM10K_PRV_FLAG_LEN][ETH_GSTRING_LEN] = { - "debug-statistics", }; static void fm10k_add_stat_strings(char **p, const char *prefix, @@ -169,7 +162,6 @@ static void fm10k_add_stat_strings(char **p, const char *prefix, static void fm10k_get_stat_strings(struct net_device *dev, u8 *data) { struct fm10k_intfc *interface = netdev_priv(dev); - struct fm10k_iov_data *iov_data = interface->iov_data; char *p = (char *)data; unsigned int i; @@ -179,10 +171,6 @@ static void fm10k_get_stat_strings(struct net_device *dev, u8 *data) fm10k_add_stat_strings(&p, "", fm10k_gstrings_global_stats, FM10K_GLOBAL_STATS_LEN); - if (interface->flags & FM10K_FLAG_DEBUG_STATS) - fm10k_add_stat_strings(&p, "", fm10k_gstrings_debug_stats, - FM10K_DEBUG_STATS_LEN); - fm10k_add_stat_strings(&p, "", fm10k_gstrings_mbx_stats, FM10K_MBX_STATS_LEN); @@ -190,26 +178,18 @@ static void fm10k_get_stat_strings(struct net_device *dev, u8 *data) fm10k_add_stat_strings(&p, "", fm10k_gstrings_pf_stats, FM10K_PF_STATS_LEN); - if ((interface->flags & FM10K_FLAG_DEBUG_STATS) && iov_data) { - for (i = 0; i < iov_data->num_vfs; i++) { - char prefix[ETH_GSTRING_LEN]; + for (i = 0; i < interface->hw.mac.max_queues; i++) { + char prefix[ETH_GSTRING_LEN]; - snprintf(prefix, ETH_GSTRING_LEN, "vf_%u_", i); - fm10k_add_stat_strings(&p, prefix, - fm10k_gstrings_mbx_stats, - FM10K_MBX_STATS_LEN); - } - } + snprintf(prefix, ETH_GSTRING_LEN, "tx_queue_%u_", i); + fm10k_add_stat_strings(&p, prefix, + fm10k_gstrings_queue_stats, + FM10K_QUEUE_STATS_LEN); - for (i = 0; i < interface->hw.mac.max_queues; i++) { - snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; + snprintf(prefix, ETH_GSTRING_LEN, "rx_queue_%u_", i); + fm10k_add_stat_strings(&p, prefix, + fm10k_gstrings_queue_stats, + FM10K_QUEUE_STATS_LEN); } } @@ -236,7 +216,6 @@ static void fm10k_get_strings(struct net_device *dev, static int fm10k_get_sset_count(struct net_device *dev, int sset) { struct fm10k_intfc *interface = netdev_priv(dev); - struct fm10k_iov_data *iov_data = interface->iov_data; struct fm10k_hw *hw = &interface->hw; int stats_len = FM10K_STATIC_STATS_LEN; @@ -244,19 +223,11 @@ static int fm10k_get_sset_count(struct net_device *dev, int sset) case ETH_SS_TEST: return FM10K_TEST_LEN; case ETH_SS_STATS: - stats_len += FM10K_QUEUE_STATS_LEN(hw->mac.max_queues); + stats_len += hw->mac.max_queues * 2 * FM10K_QUEUE_STATS_LEN; if (hw->mac.type != fm10k_mac_vf) stats_len += FM10K_PF_STATS_LEN; - if (interface->flags & FM10K_FLAG_DEBUG_STATS) { - stats_len += FM10K_DEBUG_STATS_LEN; - - if (iov_data) - stats_len += FM10K_MBX_STATS_LEN * - iov_data->num_vfs; - } - return stats_len; case ETH_SS_PRIV_FLAGS: return FM10K_PRV_FLAG_LEN; @@ -272,9 +243,10 @@ static void fm10k_add_ethtool_stats(u64 **data, void *pointer, unsigned int i; char *p; - /* simply skip forward if we were not given a valid pointer */ if (!pointer) { - *data += size; + /* memory is not zero allocated so we have to clear it */ + for (i = 0; i < size; i++) + *((*data)++) = 0; return; } @@ -304,11 +276,9 @@ static void fm10k_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats __always_unused *stats, u64 *data) { - const int stat_count = sizeof(struct fm10k_queue_stats) / sizeof(u64); struct fm10k_intfc *interface = netdev_priv(netdev); - struct fm10k_iov_data *iov_data = interface->iov_data; struct net_device_stats *net_stats = &netdev->stats; - int i, j; + int i; fm10k_update_stats(interface); @@ -318,11 +288,6 @@ static void fm10k_get_ethtool_stats(struct net_device *netdev, fm10k_add_ethtool_stats(&data, interface, fm10k_gstrings_global_stats, FM10K_GLOBAL_STATS_LEN); - if (interface->flags & FM10K_FLAG_DEBUG_STATS) - fm10k_add_ethtool_stats(&data, interface, - fm10k_gstrings_debug_stats, - FM10K_DEBUG_STATS_LEN); - fm10k_add_ethtool_stats(&data, &interface->hw.mbx, fm10k_gstrings_mbx_stats, FM10K_MBX_STATS_LEN); @@ -333,33 +298,18 @@ static void fm10k_get_ethtool_stats(struct net_device *netdev, FM10K_PF_STATS_LEN); } - if ((interface->flags & FM10K_FLAG_DEBUG_STATS) && iov_data) { - for (i = 0; i < iov_data->num_vfs; i++) { - struct fm10k_vf_info *vf_info; - - vf_info = &iov_data->vf_info[i]; - - fm10k_add_ethtool_stats(&data, &vf_info->mbx, - fm10k_gstrings_mbx_stats, - FM10K_MBX_STATS_LEN); - } - } - for (i = 0; i < interface->hw.mac.max_queues; i++) { struct fm10k_ring *ring; - u64 *queue_stat; ring = interface->tx_ring[i]; - if (ring) - queue_stat = (u64 *)&ring->stats; - for (j = 0; j < stat_count; j++) - *(data++) = ring ? queue_stat[j] : 0; + fm10k_add_ethtool_stats(&data, ring, + fm10k_gstrings_queue_stats, + FM10K_QUEUE_STATS_LEN); ring = interface->rx_ring[i]; - if (ring) - queue_stat = (u64 *)&ring->stats; - for (j = 0; j < stat_count; j++) - *(data++) = ring ? queue_stat[j] : 0; + fm10k_add_ethtool_stats(&data, ring, + fm10k_gstrings_queue_stats, + FM10K_QUEUE_STATS_LEN); } } @@ -1003,27 +953,14 @@ static void fm10k_self_test(struct net_device *dev, static u32 fm10k_get_priv_flags(struct net_device *netdev) { - struct fm10k_intfc *interface = netdev_priv(netdev); - u32 priv_flags = 0; - - if (interface->flags & FM10K_FLAG_DEBUG_STATS) - priv_flags |= BIT(FM10K_PRV_FLAG_DEBUG_STATS); - - return priv_flags; + return 0; } static int fm10k_set_priv_flags(struct net_device *netdev, u32 priv_flags) { - struct fm10k_intfc *interface = netdev_priv(netdev); - if (priv_flags >= BIT(FM10K_PRV_FLAG_LEN)) return -EINVAL; - if (priv_flags & BIT(FM10K_PRV_FLAG_DEBUG_STATS)) - interface->flags |= FM10K_FLAG_DEBUG_STATS; - else - interface->flags &= ~FM10K_FLAG_DEBUG_STATS; - return 0; } @@ -1034,15 +971,29 @@ u32 fm10k_get_reta_size(struct net_device __always_unused *netdev) void fm10k_write_reta(struct fm10k_intfc *interface, const u32 *indir) { + u16 rss_i = interface->ring_feature[RING_F_RSS].indices; struct fm10k_hw *hw = &interface->hw; - int i; + u32 table[4]; + int i, j; /* record entries to reta table */ - for (i = 0; i < FM10K_RETA_SIZE; i++, indir += 4) { - u32 reta = indir[0] | - (indir[1] << 8) | - (indir[2] << 16) | - (indir[3] << 24); + for (i = 0; i < FM10K_RETA_SIZE; i++) { + u32 reta, n; + + /* generate a new table if we weren't given one */ + for (j = 0; j < 4; j++) { + if (indir) + n = indir[i + j]; + else + n = ethtool_rxfh_indir_default(i + j, rss_i); + + table[j] = n; + } + + reta = table[0] | + (table[1] << 8) | + (table[2] << 16) | + (table[3] << 24); if (interface->reta[i] == reta) continue; @@ -1201,31 +1152,6 @@ static int fm10k_set_channels(struct net_device *dev, return fm10k_setup_tc(dev, netdev_get_num_tc(dev)); } -static int fm10k_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) -{ - struct fm10k_intfc *interface = netdev_priv(dev); - - info->so_timestamping = - SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE | - SOF_TIMESTAMPING_TX_HARDWARE | - SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; - - if (interface->ptp_clock) - info->phc_index = ptp_clock_index(interface->ptp_clock); - else - info->phc_index = -1; - - info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON); - - info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); - - return 0; -} - static const struct ethtool_ops fm10k_ethtool_ops = { .get_strings = fm10k_get_strings, .get_sset_count = fm10k_get_sset_count, @@ -1253,7 +1179,6 @@ static const struct ethtool_ops fm10k_ethtool_ops = { .set_rxfh = fm10k_set_rssh, .get_channels = fm10k_get_channels, .set_channels = fm10k_set_channels, - .get_ts_info = fm10k_get_ts_info, }; void fm10k_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index bbf7c4bac303..47f0743ec03b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 0b465394f88a..0e166e9c90c8 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -29,15 +29,15 @@ #include "fm10k.h" #define DRV_VERSION "0.19.3-k" +#define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver" const char fm10k_driver_version[] = DRV_VERSION; char fm10k_driver_name[] = "fm10k"; -static const char fm10k_driver_string[] = - "Intel(R) Ethernet Switch Host Interface Driver"; +static const char fm10k_driver_string[] = DRV_SUMMARY; static const char fm10k_copyright[] = - "Copyright (c) 2013 Intel Corporation."; + "Copyright (c) 2013 - 2016 Intel Corporation."; MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); -MODULE_DESCRIPTION("Intel(R) Ethernet Switch Host Interface Driver"); +MODULE_DESCRIPTION(DRV_SUMMARY); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); @@ -424,19 +424,6 @@ static inline void fm10k_rx_hash(struct fm10k_ring *ring, PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); } -static void fm10k_rx_hwtstamp(struct fm10k_ring *rx_ring, - union fm10k_rx_desc *rx_desc, - struct sk_buff *skb) -{ - struct fm10k_intfc *interface = rx_ring->q_vector->interface; - - FM10K_CB(skb)->tstamp = rx_desc->q.timestamp; - - if (unlikely(interface->flags & FM10K_FLAG_RX_TS_ENABLED)) - fm10k_systime_to_hwtstamp(interface, skb_hwtstamps(skb), - le64_to_cpu(rx_desc->q.timestamp)); -} - static void fm10k_type_trans(struct fm10k_ring *rx_ring, union fm10k_rx_desc __maybe_unused *rx_desc, struct sk_buff *skb) @@ -486,8 +473,6 @@ static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring, fm10k_rx_checksum(rx_ring, rx_desc, skb); - fm10k_rx_hwtstamp(rx_ring, rx_desc, skb); - FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan; skb_record_rx_queue(skb, rx_ring->queue_index); @@ -835,6 +820,8 @@ static void fm10k_tx_csum(struct fm10k_ring *tx_ring, struct ipv6hdr *ipv6; u8 *raw; } network_hdr; + u8 *transport_hdr; + __be16 frag_off; __be16 protocol; u8 l4_hdr = 0; @@ -852,9 +839,11 @@ static void fm10k_tx_csum(struct fm10k_ring *tx_ring, goto no_csum; } network_hdr.raw = skb_inner_network_header(skb); + transport_hdr = skb_inner_transport_header(skb); } else { protocol = vlan_get_protocol(skb); network_hdr.raw = skb_network_header(skb); + transport_hdr = skb_transport_header(skb); } switch (protocol) { @@ -863,15 +852,17 @@ static void fm10k_tx_csum(struct fm10k_ring *tx_ring, break; case htons(ETH_P_IPV6): l4_hdr = network_hdr.ipv6->nexthdr; + if (likely((transport_hdr - network_hdr.raw) == + sizeof(struct ipv6hdr))) + break; + ipv6_skip_exthdr(skb, network_hdr.raw - skb->data + + sizeof(struct ipv6hdr), + &l4_hdr, &frag_off); + if (unlikely(frag_off)) + l4_hdr = NEXTHDR_FRAGMENT; break; default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but ip version=%x!\n", - protocol); - } - tx_ring->tx_stats.csum_err++; - goto no_csum; + break; } switch (l4_hdr) { @@ -884,9 +875,10 @@ static void fm10k_tx_csum(struct fm10k_ring *tx_ring, default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); + "partial checksum, version=%d l4 proto=%x\n", + protocol, l4_hdr); } + skb_checksum_help(skb); tx_ring->tx_stats.csum_err++; goto no_csum; } @@ -912,11 +904,6 @@ static u8 fm10k_tx_desc_flags(struct sk_buff *skb, u32 tx_flags) /* set type for advanced descriptor with frame checksum insertion */ u32 desc_flags = 0; - /* set timestamping bits */ - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - likely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) - desc_flags |= FM10K_TXD_FLAG_TIME; - /* set checksum offload bits */ desc_flags |= FM10K_SET_FLAG(tx_flags, FM10K_TX_FLAGS_CSUM, FM10K_TXD_FLAG_CSUM); @@ -1198,9 +1185,10 @@ void fm10k_tx_timeout_reset(struct fm10k_intfc *interface) * fm10k_clean_tx_irq - Reclaim resources after transmit completes * @q_vector: structure containing interrupt and ring information * @tx_ring: tx ring to clean + * @napi_budget: Used to determine if we are in netpoll **/ static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector, - struct fm10k_ring *tx_ring) + struct fm10k_ring *tx_ring, int napi_budget) { struct fm10k_intfc *interface = q_vector->interface; struct fm10k_tx_buffer *tx_buffer; @@ -1238,7 +1226,7 @@ static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector, total_packets += tx_buffer->gso_segs; /* free the skb */ - dev_consume_skb_any(tx_buffer->skb); + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -1449,8 +1437,10 @@ static int fm10k_poll(struct napi_struct *napi, int budget) int per_ring_budget, work_done = 0; bool clean_complete = true; - fm10k_for_each_ring(ring, q_vector->tx) - clean_complete &= fm10k_clean_tx_irq(q_vector, ring); + fm10k_for_each_ring(ring, q_vector->tx) { + if (!fm10k_clean_tx_irq(q_vector, ring, budget)) + clean_complete = false; + } /* Handle case where we are called by netpoll with a budget of 0 */ if (budget <= 0) @@ -1468,7 +1458,8 @@ static int fm10k_poll(struct napi_struct *napi, int budget) int work = fm10k_clean_rx_irq(q_vector, ring, per_ring_budget); work_done += work; - clean_complete &= !!(work < per_ring_budget); + if (work >= per_ring_budget) + clean_complete = false; } /* If all work not completed, return budget and keep polling */ @@ -1943,8 +1934,7 @@ static void fm10k_assign_rings(struct fm10k_intfc *interface) static void fm10k_init_reta(struct fm10k_intfc *interface) { u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices; - struct net_device *netdev = interface->netdev; - u32 reta, *indir; + u32 reta; /* If the Rx flow indirection table has been configured manually, we * need to maintain it when possible. @@ -1969,16 +1959,7 @@ static void fm10k_init_reta(struct fm10k_intfc *interface) } repopulate_reta: - indir = kcalloc(fm10k_get_reta_size(netdev), - sizeof(indir[0]), GFP_KERNEL); - - /* generate redirection table using the default kernel policy */ - for (i = 0; i < fm10k_get_reta_size(netdev); i++) - indir[i] = ethtool_rxfh_indir_default(i, rss_i); - - fm10k_write_reta(interface, indir); - - kfree(indir); + fm10k_write_reta(interface, NULL); } /** diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c index 98202c3d591c..c9dfa6564fcf 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h index 245a0a3dc32e..b7dbc8a84c05 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 1d0f0583222c..2a08d3f5b6df 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -243,9 +243,6 @@ void fm10k_clean_all_tx_rings(struct fm10k_intfc *interface) for (i = 0; i < interface->num_tx_queues; i++) fm10k_clean_tx_ring(interface->tx_ring[i]); - - /* remove any stale timestamp buffers and free them */ - skb_queue_purge(&interface->ts_tx_skb_queue); } /** @@ -660,10 +657,6 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) __skb_put(skb, pad_len); } - /* prepare packet for hardware time stamping */ - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) - fm10k_ts_tx_enqueue(interface, skb); - if (r_idx >= interface->num_tx_queues) r_idx %= interface->num_tx_queues; @@ -884,7 +877,7 @@ static int __fm10k_uc_sync(struct net_device *dev, return -EADDRNOTAVAIL; /* update table with current entries */ - for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 0; + for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { err = hw->mac.ops.update_uc_addr(hw, glort, addr, @@ -947,7 +940,7 @@ static int __fm10k_mc_sync(struct net_device *dev, u16 vid, glort = interface->glort; /* update table with current entries */ - for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 0; + for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { hw->mac.ops.update_mc_addr(hw, glort, addr, vid, sync); @@ -1002,11 +995,8 @@ static void fm10k_set_rx_mode(struct net_device *dev) } /* synchronize all of the addresses */ - if (xcast_mode != FM10K_XCAST_MODE_PROMISC) { - __dev_uc_sync(dev, fm10k_uc_sync, fm10k_uc_unsync); - if (xcast_mode != FM10K_XCAST_MODE_ALLMULTI) - __dev_mc_sync(dev, fm10k_mc_sync, fm10k_mc_unsync); - } + __dev_uc_sync(dev, fm10k_uc_sync, fm10k_uc_unsync); + __dev_mc_sync(dev, fm10k_mc_sync, fm10k_mc_unsync); fm10k_mbx_unlock(interface); } @@ -1044,7 +1034,7 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) hw->mac.ops.update_vlan(hw, 0, 0, true); /* update table with current entries */ - for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 0; + for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { hw->mac.ops.update_vlan(hw, vid, 0, true); @@ -1056,11 +1046,8 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) hw->mac.ops.update_xcast_mode(hw, glort, xcast_mode); /* synchronize all of the addresses */ - if (xcast_mode != FM10K_XCAST_MODE_PROMISC) { - __dev_uc_sync(netdev, fm10k_uc_sync, fm10k_uc_unsync); - if (xcast_mode != FM10K_XCAST_MODE_ALLMULTI) - __dev_mc_sync(netdev, fm10k_mc_sync, fm10k_mc_unsync); - } + __dev_uc_sync(netdev, fm10k_uc_sync, fm10k_uc_unsync); + __dev_mc_sync(netdev, fm10k_mc_sync, fm10k_mc_unsync); fm10k_mbx_unlock(interface); @@ -1213,18 +1200,6 @@ static int __fm10k_setup_tc(struct net_device *dev, u32 handle, __be16 proto, return fm10k_setup_tc(dev, tc->tc); } -static int fm10k_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - switch (cmd) { - case SIOCGHWTSTAMP: - return fm10k_get_ts_config(netdev, ifr); - case SIOCSHWTSTAMP: - return fm10k_set_ts_config(netdev, ifr); - default: - return -EOPNOTSUPP; - } -} - static void fm10k_assign_l2_accel(struct fm10k_intfc *interface, struct fm10k_l2_accel *l2_accel) { @@ -1402,7 +1377,6 @@ static const struct net_device_ops fm10k_netdev_ops = { .ndo_get_vf_config = fm10k_ndo_get_vf_config, .ndo_add_vxlan_port = fm10k_add_vxlan_port, .ndo_del_vxlan_port = fm10k_del_vxlan_port, - .ndo_do_ioctl = fm10k_ioctl, .ndo_dfwd_add_station = fm10k_dfwd_add_station, .ndo_dfwd_del_station = fm10k_dfwd_del_station, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index f0992950e228..206a466999ed 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -209,9 +209,6 @@ static void fm10k_reinit(struct fm10k_intfc *interface) netdev->features |= NETIF_F_HW_VLAN_CTAG_RX; } - /* reset clock */ - fm10k_ts_reset(interface); - err = netif_running(netdev) ? fm10k_open(netdev) : 0; if (err) goto err_open; @@ -559,7 +556,6 @@ static void fm10k_service_task(struct work_struct *work) /* tasks only run when interface is up */ fm10k_watchdog_subtask(interface); fm10k_check_hang_subtask(interface); - fm10k_ts_tx_subtask(interface); /* release lock on service events to allow scheduling next event */ fm10k_service_event_complete(interface); @@ -1204,25 +1200,6 @@ static s32 fm10k_mbx_mac_addr(struct fm10k_hw *hw, u32 **results, return 0; } -static s32 fm10k_1588_msg_vf(struct fm10k_hw *hw, u32 **results, - struct fm10k_mbx_info __always_unused *mbx) -{ - struct fm10k_intfc *interface; - u64 timestamp; - s32 err; - - err = fm10k_tlv_attr_get_u64(results[FM10K_1588_MSG_TIMESTAMP], - ×tamp); - if (err) - return err; - - interface = container_of(hw, struct fm10k_intfc, hw); - - fm10k_ts_tx_hwtstamp(interface, 0, timestamp); - - return 0; -} - /* generic error handler for mailbox issues */ static s32 fm10k_mbx_error(struct fm10k_hw *hw, u32 **results, struct fm10k_mbx_info __always_unused *mbx) @@ -1243,7 +1220,6 @@ static const struct fm10k_msg_data vf_mbx_data[] = { FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test), FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_mbx_mac_addr), FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_msg_lport_state_vf), - FM10K_VF_MSG_1588_HANDLER(fm10k_1588_msg_vf), FM10K_TLV_MSG_ERROR_HANDLER(fm10k_mbx_error), }; @@ -1287,11 +1263,40 @@ static s32 fm10k_lport_map(struct fm10k_hw *hw, u32 **results, u32 dglort_map = hw->mac.dglort_map; s32 err; + interface = container_of(hw, struct fm10k_intfc, hw); + + err = fm10k_msg_err_pf(hw, results, mbx); + if (!err && hw->swapi.status) { + /* force link down for a reasonable delay */ + interface->link_down_event = jiffies + (2 * HZ); + set_bit(__FM10K_LINK_DOWN, &interface->state); + + /* reset dglort_map back to no config */ + hw->mac.dglort_map = FM10K_DGLORTMAP_NONE; + + fm10k_service_event_schedule(interface); + + /* prevent overloading kernel message buffer */ + if (interface->lport_map_failed) + return 0; + + interface->lport_map_failed = true; + + if (hw->swapi.status == FM10K_MSG_ERR_PEP_NOT_SCHEDULED) + dev_warn(&interface->pdev->dev, + "cannot obtain link because the host interface is configured for a PCIe host interface bandwidth of zero\n"); + dev_warn(&interface->pdev->dev, + "request logical port map failed: %d\n", + hw->swapi.status); + + return 0; + } + err = fm10k_msg_lport_map_pf(hw, results, mbx); if (err) return err; - interface = container_of(hw, struct fm10k_intfc, hw); + interface->lport_map_failed = false; /* we need to reset if port count was just updated */ if (dglort_map != hw->mac.dglort_map) @@ -1341,68 +1346,6 @@ static s32 fm10k_update_pvid(struct fm10k_hw *hw, u32 **results, return 0; } -static s32 fm10k_1588_msg_pf(struct fm10k_hw *hw, u32 **results, - struct fm10k_mbx_info __always_unused *mbx) -{ - struct fm10k_swapi_1588_timestamp timestamp; - struct fm10k_iov_data *iov_data; - struct fm10k_intfc *interface; - u16 sglort, vf_idx; - s32 err; - - err = fm10k_tlv_attr_get_le_struct( - results[FM10K_PF_ATTR_ID_1588_TIMESTAMP], - ×tamp, sizeof(timestamp)); - if (err) - return err; - - interface = container_of(hw, struct fm10k_intfc, hw); - - if (timestamp.dglort) { - fm10k_ts_tx_hwtstamp(interface, timestamp.dglort, - le64_to_cpu(timestamp.egress)); - return 0; - } - - /* either dglort or sglort must be set */ - if (!timestamp.sglort) - return FM10K_ERR_PARAM; - - /* verify GLORT is at least one of the ones we own */ - sglort = le16_to_cpu(timestamp.sglort); - if (!fm10k_glort_valid_pf(hw, sglort)) - return FM10K_ERR_PARAM; - - if (sglort == interface->glort) { - fm10k_ts_tx_hwtstamp(interface, 0, - le64_to_cpu(timestamp.ingress)); - return 0; - } - - /* if there is no iov_data then there is no mailbox to process */ - if (!ACCESS_ONCE(interface->iov_data)) - return FM10K_ERR_PARAM; - - rcu_read_lock(); - - /* notify VF if this timestamp belongs to it */ - iov_data = interface->iov_data; - vf_idx = (hw->mac.dglort_map & FM10K_DGLORTMAP_NONE) - sglort; - - if (!iov_data || vf_idx >= iov_data->num_vfs) { - err = FM10K_ERR_PARAM; - goto err_unlock; - } - - err = hw->iov.ops.report_timestamp(hw, &iov_data->vf_info[vf_idx], - le64_to_cpu(timestamp.ingress)); - -err_unlock: - rcu_read_unlock(); - - return err; -} - static const struct fm10k_msg_data pf_mbx_data[] = { FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf), FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf), @@ -1410,7 +1353,6 @@ static const struct fm10k_msg_data pf_mbx_data[] = { FM10K_PF_MSG_ERR_HANDLER(LPORT_CREATE, fm10k_msg_err_pf), FM10K_PF_MSG_ERR_HANDLER(LPORT_DELETE, fm10k_msg_err_pf), FM10K_PF_MSG_UPDATE_PVID_HANDLER(fm10k_update_pvid), - FM10K_PF_MSG_1588_TIMESTAMP_HANDLER(fm10k_1588_msg_pf), FM10K_TLV_MSG_ERROR_HANDLER(fm10k_mbx_error), }; @@ -1789,18 +1731,9 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, return -EIO; } - /* assign BAR 4 resources for use with PTP */ - if (fm10k_read_reg(hw, FM10K_CTRL) & FM10K_CTRL_BAR4_ALLOWED) - interface->sw_addr = ioremap(pci_resource_start(pdev, 4), - pci_resource_len(pdev, 4)); - hw->sw_addr = interface->sw_addr; - /* initialize DCBNL interface */ fm10k_dcbnl_set_ops(netdev); - /* Intitialize timestamp data */ - fm10k_ts_init(interface); - /* set default ring sizes */ interface->tx_ring_count = FM10K_DEFAULT_TXD; interface->rx_ring_count = FM10K_DEFAULT_RXD; @@ -2018,9 +1951,6 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* kick off service timer now, even when interface is down */ mod_timer(&interface->service_timer, (HZ * 2) + jiffies); - /* Register PTP interface */ - fm10k_ptp_register(interface); - /* print warning for non-optimal configurations */ fm10k_slot_warn(interface); @@ -2077,9 +2007,6 @@ static void fm10k_remove(struct pci_dev *pdev) if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); - /* cleanup timestamp handling */ - fm10k_ptp_unregister(interface); - /* release VFs */ fm10k_iov_disable(pdev); @@ -2152,9 +2079,6 @@ static int fm10k_resume(struct pci_dev *pdev) /* reset statistics starting values */ hw->mac.ops.rebind_hw_stats(hw, &interface->stats); - /* reset clock */ - fm10k_ts_reset(interface); - rtnl_lock(); err = fm10k_init_queueing_scheme(interface); @@ -2271,6 +2195,8 @@ static pci_ers_result_t fm10k_io_error_detected(struct pci_dev *pdev, if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT; + rtnl_lock(); + if (netif_running(netdev)) fm10k_close(netdev); @@ -2279,7 +2205,7 @@ static pci_ers_result_t fm10k_io_error_detected(struct pci_dev *pdev, /* free interrupts */ fm10k_clear_queueing_scheme(interface); - pci_disable_device(pdev); + rtnl_unlock(); /* Request a slot reset. */ return PCI_ERS_RESULT_NEED_RESET; @@ -2349,27 +2275,31 @@ static void fm10k_io_resume(struct pci_dev *pdev) /* reset statistics starting values */ hw->mac.ops.rebind_hw_stats(hw, &interface->stats); + rtnl_lock(); + err = fm10k_init_queueing_scheme(interface); if (err) { dev_err(&interface->pdev->dev, "init_queueing_scheme failed: %d\n", err); - return; + goto unlock; } /* reassociate interrupts */ fm10k_mbx_request_irq(interface); - /* reset clock */ - fm10k_ts_reset(interface); - + rtnl_lock(); if (netif_running(netdev)) err = fm10k_open(netdev); + rtnl_unlock(); /* final check of hardware state before registering the interface */ err = err ? : fm10k_hw_ready(interface); if (!err) netif_device_attach(netdev); + +unlock: + rtnl_unlock(); } static const struct pci_error_handlers fm10k_err_handler = { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index ecc99f9d2cce..dc75507c9926 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -219,8 +219,8 @@ static s32 fm10k_update_vlan_pf(struct fm10k_hw *hw, u32 vid, u8 vsi, bool set) /* VLAN multi-bit write: * The multi-bit write has several parts to it. - * 3 2 1 0 - * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * 24 16 8 0 + * 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | RSVD0 | Length |C|RSVD0| VLAN ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -488,6 +488,10 @@ static s32 fm10k_update_lport_state_pf(struct fm10k_hw *hw, u16 glort, if (!fm10k_glort_valid_pf(hw, glort)) return FM10K_ERR_PARAM; + /* reset multicast mode if deleting lport */ + if (!enable) + fm10k_update_xcast_mode_pf(hw, glort, FM10K_XCAST_MODE_NONE); + /* construct the lport message from the 2 pieces of data we have */ lport_msg = ((u32)count << 16) | glort; @@ -864,9 +868,13 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw, fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), 0); fm10k_write_reg(hw, FM10K_TXDCTL(vf_q_idx), 0); - /* determine correct default VLAN ID */ + /* Determine correct default VLAN ID. The FM10K_VLAN_OVERRIDE bit is + * used here to indicate to the VF that it will not have privilege to + * write VLAN_TABLE. All policy is enforced on the PF but this allows + * the VF to correctly report errors to userspace rqeuests. + */ if (vf_info->pf_vid) - vf_vid = vf_info->pf_vid | FM10K_VLAN_CLEAR; + vf_vid = vf_info->pf_vid | FM10K_VLAN_OVERRIDE; else vf_vid = vf_info->sw_vid; @@ -1140,19 +1148,6 @@ static void fm10k_iov_update_stats_pf(struct fm10k_hw *hw, fm10k_update_hw_stats_q(hw, q, idx, qpp); } -static s32 fm10k_iov_report_timestamp_pf(struct fm10k_hw *hw, - struct fm10k_vf_info *vf_info, - u64 timestamp) -{ - u32 msg[4]; - - /* generate port state response to notify VF it is not ready */ - fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_1588); - fm10k_tlv_attr_put_u64(msg, FM10K_1588_MSG_TIMESTAMP, timestamp); - - return vf_info->mbx.ops.enqueue_tx(hw, &vf_info->mbx, msg); -} - /** * fm10k_iov_msg_msix_pf - Message handler for MSI-X request from VF * @hw: Pointer to hardware structure @@ -1223,18 +1218,32 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results, if (err) return err; - /* verify upper 16 bits are zero */ - if (vid >> 16) - return FM10K_ERR_PARAM; - set = !(vid & FM10K_VLAN_CLEAR); vid &= ~FM10K_VLAN_CLEAR; - err = fm10k_iov_select_vid(vf_info, (u16)vid); - if (err < 0) - return err; + /* if the length field has been set, this is a multi-bit + * update request. For multi-bit requests, simply disallow + * them when the pf_vid has been set. In this case, the PF + * should have already cleared the VLAN_TABLE, and if we + * allowed them, it could allow a rogue VF to receive traffic + * on a VLAN it was not assigned. In the single-bit case, we + * need to modify requests for VLAN 0 to use the default PF or + * SW vid when assigned. + */ - vid = err; + if (vid >> 16) { + /* prevent multi-bit requests when PF has + * administratively set the VLAN for this VF + */ + if (vf_info->pf_vid) + return FM10K_ERR_PARAM; + } else { + err = fm10k_iov_select_vid(vf_info, (u16)vid); + if (err < 0) + return err; + + vid = err; + } /* update VSI info for VF in regards to VLAN table */ err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set); @@ -1633,6 +1642,8 @@ out: /* This structure defines the attibutes to be parsed below */ const struct fm10k_tlv_attr fm10k_lport_map_msg_attr[] = { + FM10K_TLV_ATTR_LE_STRUCT(FM10K_PF_ATTR_ID_ERR, + sizeof(struct fm10k_swapi_error)), FM10K_TLV_ATTR_U32(FM10K_PF_ATTR_ID_LPORT_MAP), FM10K_TLV_ATTR_LAST }; @@ -1773,89 +1784,6 @@ s32 fm10k_msg_err_pf(struct fm10k_hw *hw, u32 **results, return 0; } -const struct fm10k_tlv_attr fm10k_1588_timestamp_msg_attr[] = { - FM10K_TLV_ATTR_LE_STRUCT(FM10K_PF_ATTR_ID_1588_TIMESTAMP, - sizeof(struct fm10k_swapi_1588_timestamp)), - FM10K_TLV_ATTR_LAST -}; - -/* currently there is no shared 1588 timestamp handler */ - -/** - * fm10k_adjust_systime_pf - Adjust systime frequency - * @hw: pointer to hardware structure - * @ppb: adjustment rate in parts per billion - * - * This function will adjust the SYSTIME_CFG register contained in BAR 4 - * if this function is supported for BAR 4 access. The adjustment amount - * is based on the parts per billion value provided and adjusted to a - * value based on parts per 2^48 clock cycles. - * - * If adjustment is not supported or the requested value is too large - * we will return an error. - **/ -static s32 fm10k_adjust_systime_pf(struct fm10k_hw *hw, s32 ppb) -{ - u64 systime_adjust; - - /* if sw_addr is not set we don't have switch register access */ - if (!hw->sw_addr) - return ppb ? FM10K_ERR_PARAM : 0; - - /* we must convert the value from parts per billion to parts per - * 2^48 cycles. In addition I have opted to only use the 30 most - * significant bits of the adjustment value as the 8 least - * significant bits are located in another register and represent - * a value significantly less than a part per billion, the result - * of dropping the 8 least significant bits is that the adjustment - * value is effectively multiplied by 2^8 when we write it. - * - * As a result of all this the math for this breaks down as follows: - * ppb / 10^9 == adjust * 2^8 / 2^48 - * If we solve this for adjust, and simplify it comes out as: - * ppb * 2^31 / 5^9 == adjust - */ - systime_adjust = (ppb < 0) ? -ppb : ppb; - systime_adjust <<= 31; - do_div(systime_adjust, 1953125); - - /* verify the requested adjustment value is in range */ - if (systime_adjust > FM10K_SW_SYSTIME_ADJUST_MASK) - return FM10K_ERR_PARAM; - - if (ppb > 0) - systime_adjust |= FM10K_SW_SYSTIME_ADJUST_DIR_POSITIVE; - - fm10k_write_sw_reg(hw, FM10K_SW_SYSTIME_ADJUST, (u32)systime_adjust); - - return 0; -} - -/** - * fm10k_read_systime_pf - Reads value of systime registers - * @hw: pointer to the hardware structure - * - * Function reads the content of 2 registers, combined to represent a 64 bit - * value measured in nanosecods. In order to guarantee the value is accurate - * we check the 32 most significant bits both before and after reading the - * 32 least significant bits to verify they didn't change as we were reading - * the registers. - **/ -static u64 fm10k_read_systime_pf(struct fm10k_hw *hw) -{ - u32 systime_l, systime_h, systime_tmp; - - systime_h = fm10k_read_reg(hw, FM10K_SYSTIME + 1); - - do { - systime_tmp = systime_h; - systime_l = fm10k_read_reg(hw, FM10K_SYSTIME); - systime_h = fm10k_read_reg(hw, FM10K_SYSTIME + 1); - } while (systime_tmp != systime_h); - - return ((u64)systime_h << 32) | systime_l; -} - static const struct fm10k_msg_data fm10k_msg_data_pf[] = { FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf), FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf), @@ -1885,8 +1813,6 @@ static const struct fm10k_mac_ops mac_ops_pf = { .set_dma_mask = fm10k_set_dma_mask_pf, .get_fault = fm10k_get_fault_pf, .get_host_state = fm10k_get_host_state_pf, - .adjust_systime = fm10k_adjust_systime_pf, - .read_systime = fm10k_read_systime_pf, }; static const struct fm10k_iov_ops iov_ops_pf = { @@ -1898,7 +1824,6 @@ static const struct fm10k_iov_ops iov_ops_pf = { .set_lport = fm10k_iov_set_lport_pf, .reset_lport = fm10k_iov_reset_lport_pf, .update_stats = fm10k_iov_update_stats_pf, - .report_timestamp = fm10k_iov_report_timestamp_pf, }; static s32 fm10k_get_invariants_pf(struct fm10k_hw *hw) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h index b2d96b45ca3c..3336d3c10760 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -42,8 +42,6 @@ enum fm10k_pf_tlv_msg_id_v1 { FM10K_PF_MSG_ID_UPDATE_FLOW = 0x503, FM10K_PF_MSG_ID_DELETE_FLOW = 0x504, FM10K_PF_MSG_ID_SET_FLOW_STATE = 0x505, - FM10K_PF_MSG_ID_GET_1588_INFO = 0x506, - FM10K_PF_MSG_ID_1588_TIMESTAMP = 0x701, }; enum fm10k_pf_tlv_attr_id_v1 { @@ -61,7 +59,6 @@ enum fm10k_pf_tlv_attr_id_v1 { FM10K_PF_ATTR_ID_DELETE_FLOW = 0x0B, FM10K_PF_ATTR_ID_PORT = 0x0C, FM10K_PF_ATTR_ID_UPDATE_PVID = 0x0D, - FM10K_PF_ATTR_ID_1588_TIMESTAMP = 0x10, }; #define FM10K_MSG_LPORT_MAP_GLORT_SHIFT 0 @@ -74,6 +71,8 @@ enum fm10k_pf_tlv_attr_id_v1 { #define FM10K_MSG_UPDATE_PVID_PVID_SHIFT 16 #define FM10K_MSG_UPDATE_PVID_PVID_SIZE 16 +#define FM10K_MSG_ERR_PEP_NOT_SCHEDULED 280 + /* The following data structures are overlayed directly onto TLV mailbox * messages, and must not break 4 byte alignment. Ensure the structures line * up correctly as per their TLV definition. @@ -100,13 +99,6 @@ struct fm10k_swapi_error { struct fm10k_global_table_data ffu; } __aligned(4) __packed; -struct fm10k_swapi_1588_timestamp { - __le64 egress; - __le64 ingress; - __le16 dglort; - __le16 sglort; -} __aligned(4) __packed; - s32 fm10k_msg_lport_map_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *); extern const struct fm10k_tlv_attr fm10k_lport_map_msg_attr[]; #define FM10K_PF_MSG_LPORT_MAP_HANDLER(func) \ @@ -122,11 +114,6 @@ extern const struct fm10k_tlv_attr fm10k_err_msg_attr[]; #define FM10K_PF_MSG_ERR_HANDLER(msg, func) \ FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_##msg, fm10k_err_msg_attr, func) -extern const struct fm10k_tlv_attr fm10k_1588_timestamp_msg_attr[]; -#define FM10K_PF_MSG_1588_TIMESTAMP_HANDLER(func) \ - FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_1588_TIMESTAMP, \ - fm10k_1588_timestamp_msg_attr, func) - s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *); s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ptp.c b/drivers/net/ethernet/intel/fm10k/fm10k_ptp.c deleted file mode 100644 index 1c1ccade6538..000000000000 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ptp.c +++ /dev/null @@ -1,462 +0,0 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ - -#include <linux/ptp_classify.h> -#include <linux/ptp_clock_kernel.h> - -#include "fm10k.h" - -#define FM10K_TS_TX_TIMEOUT (HZ * 15) - -void fm10k_systime_to_hwtstamp(struct fm10k_intfc *interface, - struct skb_shared_hwtstamps *hwtstamp, - u64 systime) -{ - unsigned long flags; - - read_lock_irqsave(&interface->systime_lock, flags); - systime += interface->ptp_adjust; - read_unlock_irqrestore(&interface->systime_lock, flags); - - hwtstamp->hwtstamp = ns_to_ktime(systime); -} - -static struct sk_buff *fm10k_ts_tx_skb(struct fm10k_intfc *interface, - __le16 dglort) -{ - struct sk_buff_head *list = &interface->ts_tx_skb_queue; - struct sk_buff *skb; - - skb_queue_walk(list, skb) { - if (FM10K_CB(skb)->fi.w.dglort == dglort) - return skb; - } - - return NULL; -} - -void fm10k_ts_tx_enqueue(struct fm10k_intfc *interface, struct sk_buff *skb) -{ - struct sk_buff_head *list = &interface->ts_tx_skb_queue; - struct sk_buff *clone; - unsigned long flags; - - /* create clone for us to return on the Tx path */ - clone = skb_clone_sk(skb); - if (!clone) - return; - - FM10K_CB(clone)->ts_tx_timeout = jiffies + FM10K_TS_TX_TIMEOUT; - spin_lock_irqsave(&list->lock, flags); - - /* attempt to locate any buffers with the same dglort, - * if none are present then insert skb in tail of list - */ - skb = fm10k_ts_tx_skb(interface, FM10K_CB(clone)->fi.w.dglort); - if (!skb) { - skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS; - __skb_queue_tail(list, clone); - } - - spin_unlock_irqrestore(&list->lock, flags); - - /* if list is already has one then we just free the clone */ - if (skb) - dev_kfree_skb(clone); -} - -void fm10k_ts_tx_hwtstamp(struct fm10k_intfc *interface, __le16 dglort, - u64 systime) -{ - struct skb_shared_hwtstamps shhwtstamps; - struct sk_buff_head *list = &interface->ts_tx_skb_queue; - struct sk_buff *skb; - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - - /* attempt to locate and pull the sk_buff out of the list */ - skb = fm10k_ts_tx_skb(interface, dglort); - if (skb) - __skb_unlink(skb, list); - - spin_unlock_irqrestore(&list->lock, flags); - - /* if not found do nothing */ - if (!skb) - return; - - /* timestamp the sk_buff and free out copy */ - fm10k_systime_to_hwtstamp(interface, &shhwtstamps, systime); - skb_tstamp_tx(skb, &shhwtstamps); - dev_kfree_skb_any(skb); -} - -void fm10k_ts_tx_subtask(struct fm10k_intfc *interface) -{ - struct sk_buff_head *list = &interface->ts_tx_skb_queue; - struct sk_buff *skb, *tmp; - unsigned long flags; - - /* If we're down or resetting, just bail */ - if (test_bit(__FM10K_DOWN, &interface->state) || - test_bit(__FM10K_RESETTING, &interface->state)) - return; - - spin_lock_irqsave(&list->lock, flags); - - /* walk though the list and flush any expired timestamp packets */ - skb_queue_walk_safe(list, skb, tmp) { - if (!time_is_after_jiffies(FM10K_CB(skb)->ts_tx_timeout)) - continue; - __skb_unlink(skb, list); - kfree_skb(skb); - interface->tx_hwtstamp_timeouts++; - } - - spin_unlock_irqrestore(&list->lock, flags); -} - -static u64 fm10k_systime_read(struct fm10k_intfc *interface) -{ - struct fm10k_hw *hw = &interface->hw; - - return hw->mac.ops.read_systime(hw); -} - -void fm10k_ts_reset(struct fm10k_intfc *interface) -{ - s64 ns = ktime_to_ns(ktime_get_real()); - unsigned long flags; - - /* reinitialize the clock */ - write_lock_irqsave(&interface->systime_lock, flags); - interface->ptp_adjust = fm10k_systime_read(interface) - ns; - write_unlock_irqrestore(&interface->systime_lock, flags); -} - -void fm10k_ts_init(struct fm10k_intfc *interface) -{ - /* Initialize lock protecting systime access */ - rwlock_init(&interface->systime_lock); - - /* Initialize skb queue for pending timestamp requests */ - skb_queue_head_init(&interface->ts_tx_skb_queue); - - /* reset the clock to current kernel time */ - fm10k_ts_reset(interface); -} - -/** - * fm10k_get_ts_config - get current hardware timestamping configuration - * @netdev: network interface device structure - * @ifreq: ioctl data - * - * This function returns the current timestamping settings. Rather than - * attempt to deconstruct registers to fill in the values, simply keep a copy - * of the old settings around, and return a copy when requested. - */ -int fm10k_get_ts_config(struct net_device *netdev, struct ifreq *ifr) -{ - struct fm10k_intfc *interface = netdev_priv(netdev); - struct hwtstamp_config *config = &interface->ts_config; - - return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? - -EFAULT : 0; -} - -/** - * fm10k_set_ts_config - control hardware time stamping - * @netdev: network interface device structure - * @ifreq: ioctl data - * - * Outgoing time stamping can be enabled and disabled. Play nice and - * disable it when requested, although it shouldn't cause any overhead - * when no packet needs it. At most one packet in the queue may be - * marked for time stamping, otherwise it would be impossible to tell - * for sure to which packet the hardware time stamp belongs. - * - * Incoming time stamping has to be configured via the hardware - * filters. Not all combinations are supported, in particular event - * type has to be specified. Matching the kind of event packet is - * not supported, with the exception of "all V2 events regardless of - * level 2 or 4". - * - * Since hardware always timestamps Path delay packets when timestamping V2 - * packets, regardless of the type specified in the register, only use V2 - * Event mode. This more accurately tells the user what the hardware is going - * to do anyways. - */ -int fm10k_set_ts_config(struct net_device *netdev, struct ifreq *ifr) -{ - struct fm10k_intfc *interface = netdev_priv(netdev); - struct hwtstamp_config ts_config; - - if (copy_from_user(&ts_config, ifr->ifr_data, sizeof(ts_config))) - return -EFAULT; - - /* reserved for future extensions */ - if (ts_config.flags) - return -EINVAL; - - switch (ts_config.tx_type) { - case HWTSTAMP_TX_OFF: - break; - case HWTSTAMP_TX_ON: - /* we likely need some check here to see if this is supported */ - break; - default: - return -ERANGE; - } - - switch (ts_config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - interface->flags &= ~FM10K_FLAG_RX_TS_ENABLED; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - case HWTSTAMP_FILTER_ALL: - interface->flags |= FM10K_FLAG_RX_TS_ENABLED; - ts_config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - default: - return -ERANGE; - } - - /* save these settings for future reference */ - interface->ts_config = ts_config; - - return copy_to_user(ifr->ifr_data, &ts_config, sizeof(ts_config)) ? - -EFAULT : 0; -} - -static int fm10k_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) -{ - struct fm10k_intfc *interface; - struct fm10k_hw *hw; - int err; - - interface = container_of(ptp, struct fm10k_intfc, ptp_caps); - hw = &interface->hw; - - err = hw->mac.ops.adjust_systime(hw, ppb); - - /* the only error we should see is if the value is out of range */ - return (err == FM10K_ERR_PARAM) ? -ERANGE : err; -} - -static int fm10k_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) -{ - struct fm10k_intfc *interface; - unsigned long flags; - - interface = container_of(ptp, struct fm10k_intfc, ptp_caps); - - write_lock_irqsave(&interface->systime_lock, flags); - interface->ptp_adjust += delta; - write_unlock_irqrestore(&interface->systime_lock, flags); - - return 0; -} - -static int fm10k_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) -{ - struct fm10k_intfc *interface; - unsigned long flags; - u64 now; - - interface = container_of(ptp, struct fm10k_intfc, ptp_caps); - - read_lock_irqsave(&interface->systime_lock, flags); - now = fm10k_systime_read(interface) + interface->ptp_adjust; - read_unlock_irqrestore(&interface->systime_lock, flags); - - *ts = ns_to_timespec64(now); - - return 0; -} - -static int fm10k_ptp_settime(struct ptp_clock_info *ptp, - const struct timespec64 *ts) -{ - struct fm10k_intfc *interface; - unsigned long flags; - u64 ns = timespec64_to_ns(ts); - - interface = container_of(ptp, struct fm10k_intfc, ptp_caps); - - write_lock_irqsave(&interface->systime_lock, flags); - interface->ptp_adjust = fm10k_systime_read(interface) - ns; - write_unlock_irqrestore(&interface->systime_lock, flags); - - return 0; -} - -static int fm10k_ptp_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int __always_unused on) -{ - struct ptp_clock_time *t = &rq->perout.period; - struct fm10k_intfc *interface; - struct fm10k_hw *hw; - u64 period; - u32 step; - - /* we can only support periodic output */ - if (rq->type != PTP_CLK_REQ_PEROUT) - return -EINVAL; - - /* verify the requested channel is there */ - if (rq->perout.index >= ptp->n_per_out) - return -EINVAL; - - /* we cannot enforce start time as there is no - * mechanism for that in the hardware, we can only control - * the period. - */ - - /* we cannot support periods greater than 4 seconds due to reg limit */ - if (t->sec > 4 || t->sec < 0) - return -ERANGE; - - interface = container_of(ptp, struct fm10k_intfc, ptp_caps); - hw = &interface->hw; - - /* we simply cannot support the operation if we don't have BAR4 */ - if (!hw->sw_addr) - return -ENOTSUPP; - - /* convert to unsigned 64b ns, verify we can put it in a 32b register */ - period = t->sec * 1000000000LL + t->nsec; - - /* determine the minimum size for period */ - step = 2 * (fm10k_read_reg(hw, FM10K_SYSTIME_CFG) & - FM10K_SYSTIME_CFG_STEP_MASK); - - /* verify the value is in range supported by hardware */ - if ((period && (period < step)) || (period > U32_MAX)) - return -ERANGE; - - /* notify hardware of request to being sending pulses */ - fm10k_write_sw_reg(hw, FM10K_SW_SYSTIME_PULSE(rq->perout.index), - (u32)period); - - return 0; -} - -static struct ptp_pin_desc fm10k_ptp_pd[2] = { - { - .name = "IEEE1588_PULSE0", - .index = 0, - .func = PTP_PF_PEROUT, - .chan = 0 - }, - { - .name = "IEEE1588_PULSE1", - .index = 1, - .func = PTP_PF_PEROUT, - .chan = 1 - } -}; - -static int fm10k_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, - enum ptp_pin_function func, unsigned int chan) -{ - /* verify the requested pin is there */ - if (pin >= ptp->n_pins || !ptp->pin_config) - return -EINVAL; - - /* enforce locked channels, no changing them */ - if (chan != ptp->pin_config[pin].chan) - return -EINVAL; - - /* we want to keep the functions locked as well */ - if (func != ptp->pin_config[pin].func) - return -EINVAL; - - return 0; -} - -void fm10k_ptp_register(struct fm10k_intfc *interface) -{ - struct ptp_clock_info *ptp_caps = &interface->ptp_caps; - struct device *dev = &interface->pdev->dev; - struct ptp_clock *ptp_clock; - - snprintf(ptp_caps->name, sizeof(ptp_caps->name), - "%s", interface->netdev->name); - ptp_caps->owner = THIS_MODULE; - /* This math is simply the inverse of the math in - * fm10k_adjust_systime_pf applied to an adjustment value - * of 2^30 - 1 which is the maximum value of the register: - * max_ppb == ((2^30 - 1) * 5^9) / 2^31 - */ - ptp_caps->max_adj = 976562; - ptp_caps->adjfreq = fm10k_ptp_adjfreq; - ptp_caps->adjtime = fm10k_ptp_adjtime; - ptp_caps->gettime64 = fm10k_ptp_gettime; - ptp_caps->settime64 = fm10k_ptp_settime; - - /* provide pins if BAR4 is accessible */ - if (interface->sw_addr) { - /* enable periodic outputs */ - ptp_caps->n_per_out = 2; - ptp_caps->enable = fm10k_ptp_enable; - - /* enable clock pins */ - ptp_caps->verify = fm10k_ptp_verify; - ptp_caps->n_pins = 2; - ptp_caps->pin_config = fm10k_ptp_pd; - } - - ptp_clock = ptp_clock_register(ptp_caps, dev); - if (IS_ERR(ptp_clock)) { - ptp_clock = NULL; - dev_err(dev, "ptp_clock_register failed\n"); - } else { - dev_info(dev, "registered PHC device %s\n", ptp_caps->name); - } - - interface->ptp_clock = ptp_clock; -} - -void fm10k_ptp_unregister(struct fm10k_intfc *interface) -{ - struct ptp_clock *ptp_clock = interface->ptp_clock; - struct device *dev = &interface->pdev->dev; - - if (!ptp_clock) - return; - - interface->ptp_clock = NULL; - - ptp_clock_unregister(ptp_clock); - dev_info(dev, "removed PHC %s\n", interface->ptp_caps.name); -} diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c index b999897e50d8..f8e87bf086b9 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -481,7 +481,8 @@ static s32 fm10k_tlv_attr_validate(u32 *attr, * up into an array of pointers stored in results. The function will * return FM10K_ERR_PARAM on any input or message error, * FM10K_NOT_IMPLEMENTED for any attribute that is outside of the array - * and 0 on success. + * and 0 on success. Any attributes not found in tlv_attr will be silently + * ignored. **/ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results, const struct fm10k_tlv_attr *tlv_attr) @@ -518,14 +519,15 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results, while (offset < len) { attr_id = *attr & FM10K_TLV_ID_MASK; - if (attr_id < FM10K_TLV_RESULTS_MAX) - err = fm10k_tlv_attr_validate(attr, tlv_attr); - else - err = FM10K_NOT_IMPLEMENTED; + if (attr_id >= FM10K_TLV_RESULTS_MAX) + return FM10K_NOT_IMPLEMENTED; - if (err < 0) + err = fm10k_tlv_attr_validate(attr, tlv_attr); + if (err == FM10K_NOT_IMPLEMENTED) + ; /* silently ignore non-implemented attributes */ + else if (err) return err; - if (!err) + else results[attr_id] = attr; /* update offset */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h index e1845e0a17d8..a1f1027fe184 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h index 5c0533054c5f..b8bc06183720 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -225,11 +225,6 @@ struct fm10k_hw; #define FM10K_STATS_LOOPBACK_DROP 0x3806 #define FM10K_STATS_NODESC_DROP 0x3807 -/* Timesync registers */ -#define FM10K_SYSTIME 0x3814 -#define FM10K_SYSTIME_CFG 0x3818 -#define FM10K_SYSTIME_CFG_STEP_MASK 0x0000000F - /* PCIe state registers */ #define FM10K_PHYADDR 0x381C @@ -355,6 +350,7 @@ struct fm10k_hw; #define FM10K_VLAN_TABLE_VSI_MAX 64 #define FM10K_VLAN_LENGTH_SHIFT 16 #define FM10K_VLAN_CLEAR BIT(15) +#define FM10K_VLAN_OVERRIDE FM10K_VLAN_CLEAR #define FM10K_VLAN_ALL \ ((FM10K_VLAN_TABLE_VID_MAX - 1) << FM10K_VLAN_LENGTH_SHIFT) @@ -381,12 +377,6 @@ struct fm10k_hw; #define FM10K_VFSYSTIME 0x00040 #define FM10K_VFITR(_n) ((_n) + 0x00060) -/* Registers contained in BAR 4 for Switch management */ -#define FM10K_SW_SYSTIME_ADJUST 0x0224D -#define FM10K_SW_SYSTIME_ADJUST_MASK 0x3FFFFFFF -#define FM10K_SW_SYSTIME_ADJUST_DIR_POSITIVE 0x80000000 -#define FM10K_SW_SYSTIME_PULSE(_n) ((_n) + 0x02252) - enum fm10k_int_source { fm10k_int_mailbox = 0, fm10k_int_pcie_fault = 1, @@ -550,8 +540,6 @@ struct fm10k_mac_ops { struct fm10k_dglort_cfg *); void (*set_dma_mask)(struct fm10k_hw *, u64); s32 (*get_fault)(struct fm10k_hw *, int, struct fm10k_fault *); - s32 (*adjust_systime)(struct fm10k_hw *, s32 ppb); - u64 (*read_systime)(struct fm10k_hw *); }; enum fm10k_mac_type { @@ -643,7 +631,6 @@ struct fm10k_iov_ops { s32 (*set_lport)(struct fm10k_hw *, struct fm10k_vf_info *, u16, u8); void (*reset_lport)(struct fm10k_hw *, struct fm10k_vf_info *); void (*update_stats)(struct fm10k_hw *, struct fm10k_hw_stats_q *, u16); - s32 (*report_timestamp)(struct fm10k_hw *, struct fm10k_vf_info *, u64); }; struct fm10k_iov_info { @@ -667,7 +654,6 @@ struct fm10k_info { struct fm10k_hw { u32 __iomem *hw_addr; - u32 __iomem *sw_addr; void *back; struct fm10k_mac_info mac; struct fm10k_bus_info bus; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c index 91f8d7311f3b..3b06685ea63b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2015 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -188,7 +188,7 @@ static s32 fm10k_update_vlan_vf(struct fm10k_hw *hw, u32 vid, u8 vsi, bool set) if (vsi) return FM10K_ERR_PARAM; - /* verify upper 4 bits of vid and length are 0 */ + /* clever trick to verify reserved bits in both vid and length */ if ((vid << 16 | vid) >> 28) return FM10K_ERR_PARAM; @@ -228,7 +228,7 @@ s32 fm10k_msg_mac_vlan_vf(struct fm10k_hw *hw, u32 **results, ether_addr_copy(hw->mac.perm_addr, perm_addr); hw->mac.default_vid = vid & (FM10K_VLAN_TABLE_VID_MAX - 1); - hw->mac.vlan_override = !!(vid & FM10K_VLAN_CLEAR); + hw->mac.vlan_override = !!(vid & FM10K_VLAN_OVERRIDE); return 0; } @@ -451,13 +451,6 @@ static s32 fm10k_update_xcast_mode_vf(struct fm10k_hw *hw, u16 glort, u8 mode) return mbx->ops.enqueue_tx(hw, mbx, msg); } -const struct fm10k_tlv_attr fm10k_1588_msg_attr[] = { - FM10K_TLV_ATTR_U64(FM10K_1588_MSG_TIMESTAMP), - FM10K_TLV_ATTR_LAST -}; - -/* currently there is no shared 1588 timestamp handler */ - /** * fm10k_update_hw_stats_vf - Updates hardware related statistics of VF * @hw: pointer to hardware structure @@ -509,52 +502,6 @@ static s32 fm10k_configure_dglort_map_vf(struct fm10k_hw *hw, return 0; } -/** - * fm10k_adjust_systime_vf - Adjust systime frequency - * @hw: pointer to hardware structure - * @ppb: adjustment rate in parts per billion - * - * This function takes an adjustment rate in parts per billion and will - * verify that this value is 0 as the VF cannot support adjusting the - * systime clock. - * - * If the ppb value is non-zero the return is ERR_PARAM else success - **/ -static s32 fm10k_adjust_systime_vf(struct fm10k_hw *hw, s32 ppb) -{ - /* The VF cannot adjust the clock frequency, however it should - * already have a syntonic clock with whichever host interface is - * running as the master for the host interface clock domain so - * there should be not frequency adjustment necessary. - */ - return ppb ? FM10K_ERR_PARAM : 0; -} - -/** - * fm10k_read_systime_vf - Reads value of systime registers - * @hw: pointer to the hardware structure - * - * Function reads the content of 2 registers, combined to represent a 64 bit - * value measured in nanoseconds. In order to guarantee the value is accurate - * we check the 32 most significant bits both before and after reading the - * 32 least significant bits to verify they didn't change as we were reading - * the registers. - **/ -static u64 fm10k_read_systime_vf(struct fm10k_hw *hw) -{ - u32 systime_l, systime_h, systime_tmp; - - systime_h = fm10k_read_reg(hw, FM10K_VFSYSTIME + 1); - - do { - systime_tmp = systime_h; - systime_l = fm10k_read_reg(hw, FM10K_VFSYSTIME); - systime_h = fm10k_read_reg(hw, FM10K_VFSYSTIME + 1); - } while (systime_tmp != systime_h); - - return ((u64)systime_h << 32) | systime_l; -} - static const struct fm10k_msg_data fm10k_msg_data_vf[] = { FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test), FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_msg_mac_vlan_vf), @@ -579,8 +526,6 @@ static const struct fm10k_mac_ops mac_ops_vf = { .rebind_hw_stats = fm10k_rebind_hw_stats_vf, .configure_dglort_map = fm10k_configure_dglort_map_vf, .get_host_state = fm10k_get_host_state_generic, - .adjust_systime = fm10k_adjust_systime_vf, - .read_systime = fm10k_read_systime_vf, }; static s32 fm10k_get_invariants_vf(struct fm10k_hw *hw) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.h b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h index c4439f1313a0..2662f33c0c71 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h @@ -1,5 +1,5 @@ -/* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. +/* Intel(R) Ethernet Switch Host Interface Driver + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -29,7 +29,6 @@ enum fm10k_vf_tlv_msg_id { FM10K_VF_MSG_ID_MSIX, FM10K_VF_MSG_ID_MAC_VLAN, FM10K_VF_MSG_ID_LPORT_STATE, - FM10K_VF_MSG_ID_1588, FM10K_VF_MSG_ID_MAX, }; @@ -49,11 +48,6 @@ enum fm10k_tlv_lport_state_attr_id { FM10K_LPORT_STATE_MSG_MAX }; -enum fm10k_tlv_1588_attr_id { - FM10K_1588_MSG_TIMESTAMP, - FM10K_1588_MSG_MAX -}; - #define FM10K_VF_MSG_MSIX_HANDLER(func) \ FM10K_MSG_HANDLER(FM10K_VF_MSG_ID_MSIX, NULL, func) @@ -70,9 +64,5 @@ extern const struct fm10k_tlv_attr fm10k_lport_state_msg_attr[]; FM10K_MSG_HANDLER(FM10K_VF_MSG_ID_LPORT_STATE, \ fm10k_lport_state_msg_attr, func) -extern const struct fm10k_tlv_attr fm10k_1588_msg_attr[]; -#define FM10K_VF_MSG_1588_HANDLER(func) \ - FM10K_MSG_HANDLER(FM10K_VF_MSG_ID_1588, fm10k_1588_msg_attr, func) - extern const struct fm10k_info fm10k_vf_info; #endif /* _FM10K_VF_H */ diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d25b3be5ba89..00c473874f01 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -202,6 +202,7 @@ struct i40e_lump_tracking { #define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4) #define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) +#define I40E_VF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4) enum i40e_fd_stat_idx { I40E_FD_STAT_ATR, @@ -553,7 +554,7 @@ struct i40e_vsi { u16 num_queue_pairs; /* Used tx and rx pairs */ u16 num_desc; enum i40e_vsi_type type; /* VSI type, e.g., LAN, FCoE, etc */ - u16 vf_id; /* Virtual function ID for SRIOV VSIs */ + s16 vf_id; /* Virtual function ID for SRIOV VSIs */ struct i40e_tc_configuration tc_config; struct i40e_aqc_vsi_properties_data info; diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 43bb4139d896..738b42a44f20 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -617,10 +617,6 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw) hw->nvm_release_on_done = false; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; - ret_code = i40e_aq_set_hmc_resource_profile(hw, - I40E_HMC_PROFILE_DEFAULT, - 0, - NULL); ret_code = 0; /* success! */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 8d5c65ab6267..eacbe7430b48 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -78,17 +78,17 @@ struct i40e_aq_desc { #define I40E_AQ_FLAG_EI_SHIFT 14 #define I40E_AQ_FLAG_FE_SHIFT 15 -#define I40E_AQ_FLAG_DD (1 << I40E_AQ_FLAG_DD_SHIFT) /* 0x1 */ -#define I40E_AQ_FLAG_CMP (1 << I40E_AQ_FLAG_CMP_SHIFT) /* 0x2 */ -#define I40E_AQ_FLAG_ERR (1 << I40E_AQ_FLAG_ERR_SHIFT) /* 0x4 */ -#define I40E_AQ_FLAG_VFE (1 << I40E_AQ_FLAG_VFE_SHIFT) /* 0x8 */ -#define I40E_AQ_FLAG_LB (1 << I40E_AQ_FLAG_LB_SHIFT) /* 0x200 */ -#define I40E_AQ_FLAG_RD (1 << I40E_AQ_FLAG_RD_SHIFT) /* 0x400 */ -#define I40E_AQ_FLAG_VFC (1 << I40E_AQ_FLAG_VFC_SHIFT) /* 0x800 */ -#define I40E_AQ_FLAG_BUF (1 << I40E_AQ_FLAG_BUF_SHIFT) /* 0x1000 */ -#define I40E_AQ_FLAG_SI (1 << I40E_AQ_FLAG_SI_SHIFT) /* 0x2000 */ -#define I40E_AQ_FLAG_EI (1 << I40E_AQ_FLAG_EI_SHIFT) /* 0x4000 */ -#define I40E_AQ_FLAG_FE (1 << I40E_AQ_FLAG_FE_SHIFT) /* 0x8000 */ +#define I40E_AQ_FLAG_DD BIT(I40E_AQ_FLAG_DD_SHIFT) /* 0x1 */ +#define I40E_AQ_FLAG_CMP BIT(I40E_AQ_FLAG_CMP_SHIFT) /* 0x2 */ +#define I40E_AQ_FLAG_ERR BIT(I40E_AQ_FLAG_ERR_SHIFT) /* 0x4 */ +#define I40E_AQ_FLAG_VFE BIT(I40E_AQ_FLAG_VFE_SHIFT) /* 0x8 */ +#define I40E_AQ_FLAG_LB BIT(I40E_AQ_FLAG_LB_SHIFT) /* 0x200 */ +#define I40E_AQ_FLAG_RD BIT(I40E_AQ_FLAG_RD_SHIFT) /* 0x400 */ +#define I40E_AQ_FLAG_VFC BIT(I40E_AQ_FLAG_VFC_SHIFT) /* 0x800 */ +#define I40E_AQ_FLAG_BUF BIT(I40E_AQ_FLAG_BUF_SHIFT) /* 0x1000 */ +#define I40E_AQ_FLAG_SI BIT(I40E_AQ_FLAG_SI_SHIFT) /* 0x2000 */ +#define I40E_AQ_FLAG_EI BIT(I40E_AQ_FLAG_EI_SHIFT) /* 0x4000 */ +#define I40E_AQ_FLAG_FE BIT(I40E_AQ_FLAG_FE_SHIFT) /* 0x8000 */ /* error codes */ enum i40e_admin_queue_err { @@ -205,10 +205,6 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_resume_port_tx = 0x041C, i40e_aqc_opc_configure_partition_bw = 0x041D, - /* hmc */ - i40e_aqc_opc_query_hmc_resource_profile = 0x0500, - i40e_aqc_opc_set_hmc_resource_profile = 0x0501, - /* phy commands*/ i40e_aqc_opc_get_phy_abilities = 0x0600, i40e_aqc_opc_set_phy_config = 0x0601, @@ -429,6 +425,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_SDP 0x0062 #define I40E_AQ_CAP_ID_MDIO 0x0063 #define I40E_AQ_CAP_ID_WSR_PROT 0x0064 +#define I40E_AQ_CAP_ID_NVM_MGMT 0x0080 #define I40E_AQ_CAP_ID_FLEX10 0x00F1 #define I40E_AQ_CAP_ID_CEM 0x00F2 @@ -1585,27 +1582,6 @@ struct i40e_aqc_configure_partition_bw_data { I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data); -/* Get and set the active HMC resource profile and status. - * (direct 0x0500) and (direct 0x0501) - */ -struct i40e_aq_get_set_hmc_resource_profile { - u8 pm_profile; - u8 pe_vf_enabled; - u8 reserved[14]; -}; - -I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile); - -enum i40e_aq_hmc_profile { - /* I40E_HMC_PROFILE_NO_CHANGE = 0, reserved */ - I40E_HMC_PROFILE_DEFAULT = 1, - I40E_HMC_PROFILE_FAVOR_VF = 2, - I40E_HMC_PROFILE_EQUAL = 3, -}; - -#define I40E_AQ_GET_HMC_RESOURCE_PROFILE_PM_MASK 0xF -#define I40E_AQ_GET_HMC_RESOURCE_PROFILE_COUNT_MASK 0x3F - /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */ /* set in param0 for get phy abilities to report qualified modules */ @@ -1652,11 +1628,11 @@ enum i40e_aq_phy_type { enum i40e_aq_link_speed { I40E_LINK_SPEED_UNKNOWN = 0, - I40E_LINK_SPEED_100MB = (1 << I40E_LINK_SPEED_100MB_SHIFT), - I40E_LINK_SPEED_1GB = (1 << I40E_LINK_SPEED_1000MB_SHIFT), - I40E_LINK_SPEED_10GB = (1 << I40E_LINK_SPEED_10GB_SHIFT), - I40E_LINK_SPEED_40GB = (1 << I40E_LINK_SPEED_40GB_SHIFT), - I40E_LINK_SPEED_20GB = (1 << I40E_LINK_SPEED_20GB_SHIFT) + I40E_LINK_SPEED_100MB = BIT(I40E_LINK_SPEED_100MB_SHIFT), + I40E_LINK_SPEED_1GB = BIT(I40E_LINK_SPEED_1000MB_SHIFT), + I40E_LINK_SPEED_10GB = BIT(I40E_LINK_SPEED_10GB_SHIFT), + I40E_LINK_SPEED_40GB = BIT(I40E_LINK_SPEED_40GB_SHIFT), + I40E_LINK_SPEED_20GB = BIT(I40E_LINK_SPEED_20GB_SHIFT) }; struct i40e_aqc_module_desc { @@ -1927,9 +1903,9 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_write); /* Used for 0x0704 as well as for 0x0705 commands */ #define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT 1 #define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_MASK \ - (1 << I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT) + BIT(I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT) #define I40E_AQ_ANVM_FEATURE 0 -#define I40E_AQ_ANVM_IMMEDIATE_FIELD (1 << FEATURE_OR_IMMEDIATE_SHIFT) +#define I40E_AQ_ANVM_IMMEDIATE_FIELD BIT(FEATURE_OR_IMMEDIATE_SHIFT) struct i40e_aqc_nvm_config_data_feature { __le16 feature_id; #define I40E_AQ_ANVM_FEATURE_OPTION_OEM_ONLY 0x01 @@ -2226,13 +2202,11 @@ I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_cee_dcb_cfg_resp); */ struct i40e_aqc_lldp_set_local_mib { #define SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT 0 -#define SET_LOCAL_MIB_AC_TYPE_DCBX_MASK (1 << SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT) -#define SET_LOCAL_MIB_AC_TYPE_DCBX_MASK (1 << \ - SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT) +#define SET_LOCAL_MIB_AC_TYPE_DCBX_MASK BIT(SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT) #define SET_LOCAL_MIB_AC_TYPE_LOCAL_MIB 0x0 #define SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_SHIFT (1) -#define SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_MASK (1 << \ - SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_SHIFT) +#define SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_MASK \ + BIT(SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_SHIFT) #define SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS 0x1 u8 type; u8 reserved0; @@ -2250,7 +2224,7 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_set_local_mib); struct i40e_aqc_lldp_stop_start_specific_agent { #define I40E_AQC_START_SPECIFIC_AGENT_SHIFT 0 #define I40E_AQC_START_SPECIFIC_AGENT_MASK \ - (1 << I40E_AQC_START_SPECIFIC_AGENT_SHIFT) + BIT(I40E_AQC_START_SPECIFIC_AGENT_SHIFT) u8 command; u8 reserved[15]; }; @@ -2303,7 +2277,7 @@ struct i40e_aqc_del_udp_tunnel_completion { I40E_CHECK_CMD_LENGTH(i40e_aqc_del_udp_tunnel_completion); struct i40e_aqc_get_set_rss_key { -#define I40E_AQC_SET_RSS_KEY_VSI_VALID (0x1 << 15) +#define I40E_AQC_SET_RSS_KEY_VSI_VALID BIT(15) #define I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT 0 #define I40E_AQC_SET_RSS_KEY_VSI_ID_MASK (0x3FF << \ I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT) @@ -2323,14 +2297,13 @@ struct i40e_aqc_get_set_rss_key_data { I40E_CHECK_STRUCT_LEN(0x34, i40e_aqc_get_set_rss_key_data); struct i40e_aqc_get_set_rss_lut { -#define I40E_AQC_SET_RSS_LUT_VSI_VALID (0x1 << 15) +#define I40E_AQC_SET_RSS_LUT_VSI_VALID BIT(15) #define I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT 0 #define I40E_AQC_SET_RSS_LUT_VSI_ID_MASK (0x3FF << \ I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT) __le16 vsi_id; #define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT 0 -#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK (0x1 << \ - I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) +#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK BIT(I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) #define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI 0 #define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF 1 diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index f3c1d8890cbb..4a934e14574d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -61,6 +61,7 @@ static i40e_status i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: case I40E_DEV_ID_SFP_I_X722: + case I40E_DEV_ID_QSFP_I_X722: hw->mac.type = I40E_MAC_X722; break; default: @@ -2038,6 +2039,76 @@ i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, } /** + * i40e_aq_set_vsi_mc_promisc_on_vlan + * @hw: pointer to the hw struct + * @seid: vsi number + * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN + * @vid: The VLAN tag filter - capture any multicast packet with this VLAN tag + * @cmd_details: pointer to command details structure or NULL + **/ +enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw, + u16 seid, bool enable, + u16 vid, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw; + enum i40e_status_code status; + u16 flags = 0; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_vsi_promiscuous_modes); + + if (enable) + flags |= I40E_AQC_SET_VSI_PROMISC_MULTICAST; + + cmd->promiscuous_flags = cpu_to_le16(flags); + cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_MULTICAST); + cmd->seid = cpu_to_le16(seid); + cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40e_aq_set_vsi_uc_promisc_on_vlan + * @hw: pointer to the hw struct + * @seid: vsi number + * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN + * @vid: The VLAN tag filter - capture any unicast packet with this VLAN tag + * @cmd_details: pointer to command details structure or NULL + **/ +enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw, + u16 seid, bool enable, + u16 vid, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw; + enum i40e_status_code status; + u16 flags = 0; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_vsi_promiscuous_modes); + + if (enable) + flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST; + + cmd->promiscuous_flags = cpu_to_le16(flags); + cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST); + cmd->seid = cpu_to_le16(seid); + cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** * i40e_aq_set_vsi_broadcast * @hw: pointer to the hw struct * @seid: vsi number @@ -2667,10 +2738,7 @@ i40e_status i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid, u16 *rules_used, u16 *rules_free) { /* Rule ID has to be valid except rule_type: INGRESS VLAN mirroring */ - if (rule_type != I40E_AQC_MIRROR_RULE_TYPE_VLAN) { - if (!rule_id) - return I40E_ERR_PARAM; - } else { + if (rule_type == I40E_AQC_MIRROR_RULE_TYPE_VLAN) { /* count and mr_list shall be valid for rule_type INGRESS VLAN * mirroring. For other rule_type, count and rule_type should * not matter. @@ -2787,36 +2855,6 @@ i40e_status i40e_aq_debug_write_register(struct i40e_hw *hw, } /** - * i40e_aq_set_hmc_resource_profile - * @hw: pointer to the hw struct - * @profile: type of profile the HMC is to be set as - * @pe_vf_enabled_count: the number of PE enabled VFs the system has - * @cmd_details: pointer to command details structure or NULL - * - * set the HMC profile of the device. - **/ -i40e_status i40e_aq_set_hmc_resource_profile(struct i40e_hw *hw, - enum i40e_aq_hmc_profile profile, - u8 pe_vf_enabled_count, - struct i40e_asq_cmd_details *cmd_details) -{ - struct i40e_aq_desc desc; - struct i40e_aq_get_set_hmc_resource_profile *cmd = - (struct i40e_aq_get_set_hmc_resource_profile *)&desc.params.raw; - i40e_status status; - - i40e_fill_default_direct_cmd_desc(&desc, - i40e_aqc_opc_set_hmc_resource_profile); - - cmd->pm_profile = (u8)profile; - cmd->pe_vf_enabled = pe_vf_enabled_count; - - status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); - - return status; -} - -/** * i40e_aq_request_resource * @hw: pointer to the hw struct * @resource: resource id @@ -3138,6 +3176,12 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, p->wr_csr_prot = (u64)number; p->wr_csr_prot |= (u64)logical_id << 32; break; + case I40E_AQ_CAP_ID_NVM_MGMT: + if (number & I40E_NVM_MGMT_SEC_REV_DISABLED) + p->sec_rev_disabled = true; + if (number & I40E_NVM_MGMT_UPDATE_DISABLED) + p->update_disabled = true; + break; default: break; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h index dd4457d29e98..d701861c6e1e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h @@ -45,6 +45,7 @@ #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 #define I40E_DEV_ID_SFP_I_X722 0x37D3 +#define I40E_DEV_ID_QSFP_I_X722 0x37D4 #define i40e_is_40G_device(d) ((d) == I40E_DEV_ID_QSFP_A || \ (d) == I40E_DEV_ID_QSFP_B || \ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 8a83d4514812..8e56c43c4104 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2506,7 +2506,6 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi, if (!vsi) return -EINVAL; - pf = vsi->back; if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) @@ -2564,15 +2563,18 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi, input->src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst; if (ntohl(fsp->m_ext.data[1])) { - if (ntohl(fsp->h_ext.data[1]) >= pf->num_alloc_vfs) { - netif_info(pf, drv, vsi->netdev, "Invalid VF id\n"); + vf_id = ntohl(fsp->h_ext.data[1]); + if (vf_id >= pf->num_alloc_vfs) { + netif_info(pf, drv, vsi->netdev, + "Invalid VF id %d\n", vf_id); goto free_input; } - vf_id = ntohl(fsp->h_ext.data[1]); /* Find vsi id from vf id and override dest vsi */ input->dest_vsi = pf->vf[vf_id].lan_vsi_id; if (input->q_index >= pf->vf[vf_id].num_queue_pairs) { - netif_info(pf, drv, vsi->netdev, "Invalid queue id\n"); + netif_info(pf, drv, vsi->netdev, + "Invalid queue id %d for VF %d\n", + input->q_index, vf_id); goto free_input; } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0b071cea305d..8e6c0f2487d7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -46,7 +46,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 5 -#define DRV_VERSION_BUILD 5 +#define DRV_VERSION_BUILD 10 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -91,6 +91,7 @@ static const struct pci_device_id i40e_pci_tbl[] = { {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_I_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0}, /* required last entry */ @@ -397,24 +398,6 @@ static void i40e_tx_timeout(struct net_device *netdev) } /** - * i40e_release_rx_desc - Store the new tail and head values - * @rx_ring: ring to bump - * @val: new head index - **/ -static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) -{ - rx_ring->next_to_use = val; - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - writel(val, rx_ring->tail); -} - -/** * i40e_get_vsi_stats_struct - Get System Network Statistics * @vsi: the VSI we care about * @@ -2098,6 +2081,12 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } } + /* if the VF is not trusted do not do promisc */ + if ((vsi->type == I40E_VSI_SRIOV) && !pf->vf[vsi->vf_id].trusted) { + clear_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state); + goto out; + } + /* check for changes in promiscuous modes */ if (changed_flags & IFF_ALLMULTI) { bool cur_multipromisc; @@ -8082,24 +8071,45 @@ static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; + u16 vf_id = vsi->vf_id; u8 i; /* Fill out hash function seed */ if (seed) { u32 *seed_dw = (u32 *)seed; - for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) - i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), seed_dw[i]); + if (vsi->type == I40E_VSI_MAIN) { + for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) + i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), + seed_dw[i]); + } else if (vsi->type == I40E_VSI_SRIOV) { + for (i = 0; i <= I40E_VFQF_HKEY1_MAX_INDEX; i++) + i40e_write_rx_ctl(hw, + I40E_VFQF_HKEY1(i, vf_id), + seed_dw[i]); + } else { + dev_err(&pf->pdev->dev, "Cannot set RSS seed - invalid VSI type\n"); + } } if (lut) { u32 *lut_dw = (u32 *)lut; - if (lut_size != I40E_HLUT_ARRAY_SIZE) - return -EINVAL; - - for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) - wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]); + if (vsi->type == I40E_VSI_MAIN) { + if (lut_size != I40E_HLUT_ARRAY_SIZE) + return -EINVAL; + for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) + wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]); + } else if (vsi->type == I40E_VSI_SRIOV) { + if (lut_size != I40E_VF_HLUT_ARRAY_SIZE) + return -EINVAL; + for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) + i40e_write_rx_ctl(hw, + I40E_VFQF_HLUT1(i, vf_id), + lut_dw[i]); + } else { + dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n"); + } } i40e_flush(hw); @@ -9111,40 +9121,44 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) np = netdev_priv(netdev); np->vsi = vsi; - netdev->hw_enc_features |= NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_TSO_ECN | - NETIF_F_GSO_GRE | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | + netdev->hw_enc_features |= NETIF_F_SG | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_HIGHDMA | + NETIF_F_SOFT_FEATURES | + NETIF_F_TSO | + NETIF_F_TSO_ECN | + NETIF_F_TSO6 | + NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_IPIP | + NETIF_F_GSO_SIT | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL | + NETIF_F_SCTP_CRC | + NETIF_F_RXHASH | + NETIF_F_RXCSUM | 0; - netdev->features = NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_SCTP_CRC | - NETIF_F_HIGHDMA | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_GRE | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_IPV6_CSUM | - NETIF_F_TSO | - NETIF_F_TSO_ECN | - NETIF_F_TSO6 | - NETIF_F_RXCSUM | - NETIF_F_RXHASH | - 0; + if (!(pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE)) + netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + + netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; + + /* record features VLANs can make use of */ + netdev->vlan_features |= netdev->hw_enc_features | + NETIF_F_TSO_MANGLEID; if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) - netdev->features |= NETIF_F_NTUPLE; - if (pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) - netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_features |= NETIF_F_NTUPLE; - /* copy netdev features into list of user selectable features */ - netdev->hw_features |= netdev->features; + netdev->hw_features |= netdev->hw_enc_features | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + + netdev->features |= netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; + netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; if (vsi->type == I40E_VSI_MAIN) { SET_NETDEV_DEV(netdev, &pf->pdev->dev); @@ -9183,12 +9197,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) ether_addr_copy(netdev->dev_addr, mac_addr); ether_addr_copy(netdev->perm_addr, mac_addr); - /* vlan gets same features (except vlan offload) - * after any tweaks for specific VSI types - */ - netdev->vlan_features = netdev->features & ~(NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER); + netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; /* Setup netdev TC information */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index f2cea3d25de3..954efe3118db 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -693,10 +693,10 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, /* early check for status command and debug msgs */ upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); - i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", + i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d opc 0x%04x cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", i40e_nvm_update_state_str[upd_cmd], hw->nvmupd_state, - hw->nvm_release_on_done, + hw->nvm_release_on_done, hw->nvm_wait_opcode, cmd->command, cmd->config, cmd->offset, cmd->data_size); if (upd_cmd == I40E_NVMUPD_INVALID) { @@ -710,7 +710,18 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, * going into the state machine */ if (upd_cmd == I40E_NVMUPD_STATUS) { + if (!cmd->data_size) { + *perrno = -EFAULT; + return I40E_ERR_BUF_TOO_SHORT; + } + bytes[0] = hw->nvmupd_state; + + if (cmd->data_size >= 4) { + bytes[1] = 0; + *((u16 *)&bytes[2]) = hw->nvm_wait_opcode; + } + return 0; } @@ -729,6 +740,14 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, case I40E_NVMUPD_STATE_INIT_WAIT: case I40E_NVMUPD_STATE_WRITE_WAIT: + /* if we need to stop waiting for an event, clear + * the wait info and return before doing anything else + */ + if (cmd->offset == 0xffff) { + i40e_nvmupd_check_wait_event(hw, hw->nvm_wait_opcode); + return 0; + } + status = I40E_ERR_NOT_READY; *perrno = -EBUSY; break; @@ -800,6 +819,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, i40e_release_nvm(hw); } else { hw->nvm_release_on_done = true; + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_erase; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } } @@ -816,6 +836,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, i40e_release_nvm(hw); } else { hw->nvm_release_on_done = true; + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } } @@ -828,10 +849,12 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, hw->aq.asq_last_status); } else { status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno); - if (status) + if (status) { i40e_release_nvm(hw); - else + } else { + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update; hw->nvmupd_state = I40E_NVMUPD_STATE_WRITE_WAIT; + } } break; @@ -850,6 +873,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, i40e_release_nvm(hw); } else { hw->nvm_release_on_done = true; + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } } @@ -940,8 +964,10 @@ retry: switch (upd_cmd) { case I40E_NVMUPD_WRITE_CON: status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno); - if (!status) + if (!status) { + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update; hw->nvmupd_state = I40E_NVMUPD_STATE_WRITE_WAIT; + } break; case I40E_NVMUPD_WRITE_LCB: @@ -954,6 +980,7 @@ retry: hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; } else { hw->nvm_release_on_done = true; + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } break; @@ -967,6 +994,7 @@ retry: -EIO; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; } else { + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update; hw->nvmupd_state = I40E_NVMUPD_STATE_WRITE_WAIT; } break; @@ -981,6 +1009,7 @@ retry: hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; } else { hw->nvm_release_on_done = true; + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } break; @@ -1036,14 +1065,14 @@ retry: **/ void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode) { - if (opcode == i40e_aqc_opc_nvm_erase || - opcode == i40e_aqc_opc_nvm_update) { + if (opcode == hw->nvm_wait_opcode) { i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: clearing wait on opcode 0x%04x\n", opcode); if (hw->nvm_release_on_done) { i40e_release_nvm(hw); hw->nvm_release_on_done = false; } + hw->nvm_wait_opcode = 0; switch (hw->nvmupd_state) { case I40E_NVMUPD_STATE_INIT_WAIT: @@ -1220,6 +1249,12 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw, *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); } + /* should we wait for a followup event? */ + if (cmd->offset) { + hw->nvm_wait_opcode = cmd->offset; + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; + } + return status; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 134035f53f2c..4c8977c805df 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -133,6 +133,14 @@ i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw, + u16 seid, bool enable, + u16 vid, + struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw, + u16 seid, bool enable, + u16 vid, + struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, u16 seid, bool enable, struct i40e_asq_cmd_details *cmd_details); @@ -228,10 +236,6 @@ i40e_status i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_dcb_updated(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); -i40e_status i40e_aq_set_hmc_resource_profile(struct i40e_hw *hw, - enum i40e_aq_hmc_profile profile, - u8 pe_vf_enabled_count, - struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_config_switch_comp_bw_limit(struct i40e_hw *hw, u16 seid, u16 credit, u8 max_bw, struct i40e_asq_cmd_details *cmd_details); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 565ca7c835bc..a1b878abd5b0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -158,9 +158,10 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); - struct timespec64 now, then = ns_to_timespec64(delta); + struct timespec64 now, then; unsigned long flags; + then = ns_to_timespec64(delta); spin_lock_irqsave(&pf->tmreg_lock, flags); i40e_ptp_read(pf, &now); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 29ffed27e5a9..2765d7efdd9c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1370,7 +1370,8 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring, { struct i40e_q_vector *q_vector = rx_ring->q_vector; - if (vlan_tag & VLAN_VID_MASK) + if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + (vlan_tag & VLAN_VID_MASK)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); napi_gro_receive(&q_vector->napi, skb); @@ -2299,9 +2300,16 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) ip.v6->payload_len = 0; } - if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | + SKB_GSO_IPIP | + SKB_GSO_SIT | + SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)) { - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) { + l4.udp->len = 0; + /* determine offset of outer transport header */ l4_offset = l4.hdr - skb->data; @@ -2442,13 +2450,6 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, &l4_proto, &frag_off); } - /* compute outer L3 header size */ - tunnel |= ((l4.hdr - ip.hdr) / 4) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; - - /* switch IP header pointer from outer to inner header */ - ip.hdr = skb_inner_network_header(skb); - /* define outer transport */ switch (l4_proto) { case IPPROTO_UDP: @@ -2459,6 +2460,11 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, tunnel |= I40E_TXD_CTX_GRE_TUNNELING; *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; + l4.hdr = skb_inner_network_header(skb); + break; default: if (*tx_flags & I40E_TX_FLAGS_TSO) return -1; @@ -2467,12 +2473,20 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, return 0; } + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + /* compute tunnel header size */ tunnel |= ((ip.hdr - l4.hdr) / 2) << I40E_TXD_CTX_QW0_NATLEN_SHIFT; /* indicate if we need to offload outer UDP header */ if ((*tx_flags & I40E_TX_FLAGS_TSO) && + !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; @@ -2600,35 +2614,34 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) } /** - * __i40e_chk_linearize - Check if there are more than 8 fragments per packet + * __i40e_chk_linearize - Check if there are more than 8 buffers per packet * @skb: send buffer * - * Note: Our HW can't scatter-gather more than 8 fragments to build - * a packet on the wire and so we need to figure out the cases where we - * need to linearize the skb. + * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire + * and so we need to figure out the cases where we need to linearize the skb. + * + * For TSO we need to count the TSO header and segment payload separately. + * As such we need to check cases where we have 7 fragments or more as we + * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for + * the segment payload in the first descriptor, and another 7 for the + * fragments. **/ bool __i40e_chk_linearize(struct sk_buff *skb) { const struct skb_frag_struct *frag, *stale; - int gso_size, nr_frags, sum; - - /* check to see if TSO is enabled, if so we may get a repreive */ - gso_size = skb_shinfo(skb)->gso_size; - if (unlikely(!gso_size)) - return true; + int nr_frags, sum; - /* no need to check if number of frags is less than 8 */ + /* no need to check if number of frags is less than 7 */ nr_frags = skb_shinfo(skb)->nr_frags; - if (nr_frags < I40E_MAX_BUFFER_TXD) + if (nr_frags < (I40E_MAX_BUFFER_TXD - 1)) return false; /* We need to walk through the list and validate that each group * of 6 fragments totals at least gso_size. However we don't need - * to perform such validation on the first or last 6 since the first - * 6 cannot inherit any data from a descriptor before them, and the - * last 6 cannot inherit any data from a descriptor after them. + * to perform such validation on the last 6 since the last 6 cannot + * inherit any data from a descriptor after them. */ - nr_frags -= I40E_MAX_BUFFER_TXD - 1; + nr_frags -= I40E_MAX_BUFFER_TXD - 2; frag = &skb_shinfo(skb)->frags[0]; /* Initialize size to the negative value of gso_size minus 1. We @@ -2637,21 +2650,21 @@ bool __i40e_chk_linearize(struct sk_buff *skb) * descriptors for a single transmit as the header and previous * fragment are already consuming 2 descriptors. */ - sum = 1 - gso_size; + sum = 1 - skb_shinfo(skb)->gso_size; - /* Add size of frags 1 through 5 to create our initial sum */ - sum += skb_frag_size(++frag); - sum += skb_frag_size(++frag); - sum += skb_frag_size(++frag); - sum += skb_frag_size(++frag); - sum += skb_frag_size(++frag); + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); /* Walk through fragments adding latest fragment, testing it, and * then removing stale fragments from the sum. */ stale = &skb_shinfo(skb)->frags[0]; for (;;) { - sum += skb_frag_size(++frag); + sum += skb_frag_size(frag++); /* if sum is negative we failed to make sufficient progress */ if (sum < 0) @@ -2661,7 +2674,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) if (!--nr_frags) break; - sum -= skb_frag_size(++stale); + sum -= skb_frag_size(stale++); } return false; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 77ccdde56c0c..6b2b1913527d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -442,11 +442,15 @@ static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) **/ static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) { - /* we can only support up to 8 data buffers for a single send */ - if (likely(count <= I40E_MAX_BUFFER_TXD)) + /* Both TSO and single send will work if count is less than 8 */ + if (likely(count < I40E_MAX_BUFFER_TXD)) return false; - return __i40e_chk_linearize(skb); + if (skb_is_gso(skb)) + return __i40e_chk_linearize(skb); + + /* we can support up to 8 data buffers for a single send */ + return count != I40E_MAX_BUFFER_TXD; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 793036b259e5..bd5f13bef83c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -36,7 +36,7 @@ #include "i40e_devids.h" /* I40E_MASK is a macro used on 32 bit registers */ -#define I40E_MASK(mask, shift) (mask << shift) +#define I40E_MASK(mask, shift) ((u32)(mask) << (shift)) #define I40E_MAX_VSI_QP 16 #define I40E_MAX_VF_VSI 3 @@ -275,6 +275,11 @@ struct i40e_hw_capabilities { #define I40E_FLEX10_STATUS_DCC_ERROR 0x1 #define I40E_FLEX10_STATUS_VC_MODE 0x2 + bool sec_rev_disabled; + bool update_disabled; +#define I40E_NVM_MGMT_SEC_REV_DISABLED 0x1 +#define I40E_NVM_MGMT_UPDATE_DISABLED 0x2 + bool mgmt_cem; bool ieee_1588; bool iwarp; @@ -550,6 +555,7 @@ struct i40e_hw { struct i40e_aq_desc nvm_wb_desc; struct i40e_virt_mem nvm_buff; bool nvm_release_on_done; + u16 nvm_wait_opcode; /* HMC info */ struct i40e_hmc_info hmc; /* HMC info struct */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 30f8cbe6b54b..6b9db7983693 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -48,7 +48,7 @@ static void i40e_vc_vf_broadcast(struct i40e_pf *pf, int i; for (i = 0; i < pf->num_alloc_vfs; i++, vf++) { - int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; + int abs_vf_id = vf->vf_id + (int)hw->func_caps.vf_base_id; /* Not all vfs are enabled so skip the ones that are not */ if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states) && !test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) @@ -74,7 +74,7 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf) struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; struct i40e_link_status *ls = &pf->hw.phy.link_info; - int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; + int abs_vf_id = vf->vf_id + (int)hw->func_caps.vf_base_id; pfe.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE; pfe.severity = I40E_PF_EVENT_SEVERITY_INFO; @@ -141,7 +141,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) !test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) return; - abs_vf_id = vf->vf_id + vf->pf->hw.func_caps.vf_base_id; + abs_vf_id = vf->vf_id + (int)vf->pf->hw.func_caps.vf_base_id; pfe.event = I40E_VIRTCHNL_EVENT_RESET_IMPENDING; pfe.severity = I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM; @@ -860,7 +860,11 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf) if (ret) goto error_alloc; total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; - set_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); + + if (vf->trusted) + set_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); + else + clear_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); /* store the total qps number for the runtime * VF req validation @@ -1348,12 +1352,16 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) set_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states); } - if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) { - if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ) - vfres->vf_offload_flags |= - I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ; + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { + vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF; } else { - vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG; + if ((pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) && + (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ)) + vfres->vf_offload_flags |= + I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ; + else + vfres->vf_offload_flags |= + I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG; } if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) { @@ -1382,6 +1390,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) vfres->num_vsis = num_vsis; vfres->num_queue_pairs = vf->num_queue_pairs; vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; + vfres->rss_key_size = I40E_HKEY_ARRAY_SIZE; + vfres->rss_lut_size = I40E_VF_HLUT_ARRAY_SIZE; + if (vf->lan_vsi_idx) { vfres->vsi_res[0].vsi_id = vf->lan_vsi_id; vfres->vsi_res[0].vsi_type = I40E_VSI_SRIOV; @@ -1420,6 +1431,25 @@ static void i40e_vc_reset_vf_msg(struct i40e_vf *vf) } /** + * i40e_getnum_vf_vsi_vlan_filters + * @vsi: pointer to the vsi + * + * called to get the number of VLANs offloaded on this VF + **/ +static inline int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) +{ + struct i40e_mac_filter *f; + int num_vlans = 0; + + list_for_each_entry(f, &vsi->mac_filter_list, list) { + if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) + num_vlans++; + } + + return num_vlans; +} + +/** * i40e_vc_config_promiscuous_mode_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1435,22 +1465,122 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, (struct i40e_virtchnl_promisc_info *)msg; struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; - struct i40e_vsi *vsi; + struct i40e_mac_filter *f; + i40e_status aq_ret = 0; bool allmulti = false; - i40e_status aq_ret; + struct i40e_vsi *vsi; + bool alluni = false; + int aq_err = 0; vsi = i40e_find_vsi_from_id(pf, info->vsi_id); if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) || - !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) || - (vsi->type != I40E_VSI_FCOE)) { + !i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) { + dev_err(&pf->pdev->dev, + "VF %d doesn't meet requirements to enter promiscuous mode\n", + vf->vf_id); aq_ret = I40E_ERR_PARAM; goto error_param; } + /* Multicast promiscuous handling*/ if (info->flags & I40E_FLAG_VF_MULTICAST_PROMISC) allmulti = true; - aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, - allmulti, NULL); + + if (vf->port_vlan_id) { + aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, vsi->seid, + allmulti, + vf->port_vlan_id, + NULL); + } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) { + list_for_each_entry(f, &vsi->mac_filter_list, list) { + if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID) + continue; + aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, + vsi->seid, + allmulti, + f->vlan, + NULL); + aq_err = pf->hw.aq.asq_last_status; + if (aq_ret) { + dev_err(&pf->pdev->dev, + "Could not add VLAN %d to multicast promiscuous domain err %s aq_err %s\n", + f->vlan, + i40e_stat_str(&pf->hw, aq_ret), + i40e_aq_str(&pf->hw, aq_err)); + break; + } + } + } else { + aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, + allmulti, NULL); + aq_err = pf->hw.aq.asq_last_status; + if (aq_ret) { + dev_err(&pf->pdev->dev, + "VF %d failed to set multicast promiscuous mode err %s aq_err %s\n", + vf->vf_id, + i40e_stat_str(&pf->hw, aq_ret), + i40e_aq_str(&pf->hw, aq_err)); + goto error_param_int; + } + } + + if (!aq_ret) { + dev_info(&pf->pdev->dev, + "VF %d successfully set multicast promiscuous mode\n", + vf->vf_id); + if (allmulti) + set_bit(I40E_VF_STAT_MC_PROMISC, &vf->vf_states); + else + clear_bit(I40E_VF_STAT_MC_PROMISC, &vf->vf_states); + } + + if (info->flags & I40E_FLAG_VF_UNICAST_PROMISC) + alluni = true; + if (vf->port_vlan_id) { + aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, vsi->seid, + alluni, + vf->port_vlan_id, + NULL); + } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) { + list_for_each_entry(f, &vsi->mac_filter_list, list) { + aq_ret = 0; + if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) + aq_ret = + i40e_aq_set_vsi_uc_promisc_on_vlan(hw, + vsi->seid, + alluni, + f->vlan, + NULL); + aq_err = pf->hw.aq.asq_last_status; + if (aq_ret) + dev_err(&pf->pdev->dev, + "Could not add VLAN %d to Unicast promiscuous domain err %s aq_err %s\n", + f->vlan, + i40e_stat_str(&pf->hw, aq_ret), + i40e_aq_str(&pf->hw, aq_err)); + } + } else { + aq_ret = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, + allmulti, NULL); + aq_err = pf->hw.aq.asq_last_status; + if (aq_ret) + dev_err(&pf->pdev->dev, + "VF %d failed to set unicast promiscuous mode %8.8x err %s aq_err %s\n", + vf->vf_id, info->flags, + i40e_stat_str(&pf->hw, aq_ret), + i40e_aq_str(&pf->hw, aq_err)); + } + +error_param_int: + if (!aq_ret) { + dev_info(&pf->pdev->dev, + "VF %d successfully set unicast promiscuous mode\n", + vf->vf_id); + if (alluni) + set_bit(I40E_VF_STAT_UC_PROMISC, &vf->vf_states); + else + clear_bit(I40E_VF_STAT_UC_PROMISC, &vf->vf_states); + } error_param: /* send the response to the VF */ @@ -1701,6 +1831,10 @@ error_param: (u8 *)&stats, sizeof(stats)); } +/* If the VF is not trusted restrict the number of MAC/VLAN it can program */ +#define I40E_VC_MAX_MAC_ADDR_PER_VF 8 +#define I40E_VC_MAX_VLAN_PER_VF 8 + /** * i40e_check_vf_permission * @vf: pointer to the VF info @@ -1721,15 +1855,22 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr) dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n", macaddr); ret = I40E_ERR_INVALID_MAC_ADDR; } else if (vf->pf_set_mac && !is_multicast_ether_addr(macaddr) && + !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) && !ether_addr_equal(macaddr, vf->default_lan_addr.addr)) { /* If the host VMM administrator has set the VF MAC address * administratively via the ndo_set_vf_mac command then deny * permission to the VF to add or delete unicast MAC addresses. + * Unless the VF is privileged and then it can do whatever. * The VF may request to set the MAC address filter already * assigned to it so do not return an error in that case. */ dev_err(&pf->pdev->dev, - "VF attempting to override administratively set MAC address\nPlease reload the VF driver to resume normal operation\n"); + "VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n"); + ret = -EPERM; + } else if ((vf->num_mac >= I40E_VC_MAX_MAC_ADDR_PER_VF) && + !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) { + dev_err(&pf->pdev->dev, + "VF is not trusted, switch the VF to trusted to add more functionality\n"); ret = -EPERM; } return ret; @@ -1754,7 +1895,6 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) int i; if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || - !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) || !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -1793,6 +1933,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) ret = I40E_ERR_PARAM; spin_unlock_bh(&vsi->mac_filter_list_lock); goto error_param; + } else { + vf->num_mac++; } } spin_unlock_bh(&vsi->mac_filter_list_lock); @@ -1828,7 +1970,6 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) int i; if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || - !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) || !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -1852,6 +1993,8 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) ret = I40E_ERR_INVALID_MAC_ADDR; spin_unlock_bh(&vsi->mac_filter_list_lock); goto error_param; + } else { + vf->num_mac--; } spin_unlock_bh(&vsi->mac_filter_list_lock); @@ -1886,8 +2029,13 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) i40e_status aq_ret = 0; int i; + if ((vf->num_vlan >= I40E_VC_MAX_VLAN_PER_VF) && + !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) { + dev_err(&pf->pdev->dev, + "VF is not trusted, switch the VF to trusted to add more VLAN addresses\n"); + goto error_param; + } if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || - !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) || !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; @@ -1911,6 +2059,19 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) for (i = 0; i < vfl->num_elements; i++) { /* add new VLAN filter */ int ret = i40e_vsi_add_vlan(vsi, vfl->vlan_id[i]); + if (!ret) + vf->num_vlan++; + + if (test_bit(I40E_VF_STAT_UC_PROMISC, &vf->vf_states)) + i40e_aq_set_vsi_uc_promisc_on_vlan(&pf->hw, vsi->seid, + true, + vfl->vlan_id[i], + NULL); + if (test_bit(I40E_VF_STAT_MC_PROMISC, &vf->vf_states)) + i40e_aq_set_vsi_mc_promisc_on_vlan(&pf->hw, vsi->seid, + true, + vfl->vlan_id[i], + NULL); if (ret) dev_err(&pf->pdev->dev, @@ -1942,7 +2103,6 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) int i; if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || - !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) || !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; @@ -1963,6 +2123,19 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) for (i = 0; i < vfl->num_elements; i++) { int ret = i40e_vsi_kill_vlan(vsi, vfl->vlan_id[i]); + if (!ret) + vf->num_vlan--; + + if (test_bit(I40E_VF_STAT_UC_PROMISC, &vf->vf_states)) + i40e_aq_set_vsi_uc_promisc_on_vlan(&pf->hw, vsi->seid, + false, + vfl->vlan_id[i], + NULL); + if (test_bit(I40E_VF_STAT_MC_PROMISC, &vf->vf_states)) + i40e_aq_set_vsi_mc_promisc_on_vlan(&pf->hw, vsi->seid, + false, + vfl->vlan_id[i], + NULL); if (ret) dev_err(&pf->pdev->dev, @@ -2042,6 +2215,135 @@ error_param: } /** + * i40e_vc_config_rss_key + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * @msglen: msg length + * + * Configure the VF's RSS key + **/ +static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg, u16 msglen) +{ + struct i40e_virtchnl_rss_key *vrk = + (struct i40e_virtchnl_rss_key *)msg; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + u16 vsi_id = vrk->vsi_id; + i40e_status aq_ret = 0; + + if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || + !i40e_vc_isvalid_vsi_id(vf, vsi_id) || + (vrk->key_len != I40E_HKEY_ARRAY_SIZE)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + vsi = pf->vsi[vf->lan_vsi_idx]; + aq_ret = i40e_config_rss(vsi, vrk->key, NULL, 0); +err: + /* send the response to the VF */ + return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY, + aq_ret); +} + +/** + * i40e_vc_config_rss_lut + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * @msglen: msg length + * + * Configure the VF's RSS LUT + **/ +static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg, u16 msglen) +{ + struct i40e_virtchnl_rss_lut *vrl = + (struct i40e_virtchnl_rss_lut *)msg; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + u16 vsi_id = vrl->vsi_id; + i40e_status aq_ret = 0; + + if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || + !i40e_vc_isvalid_vsi_id(vf, vsi_id) || + (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + vsi = pf->vsi[vf->lan_vsi_idx]; + aq_ret = i40e_config_rss(vsi, NULL, vrl->lut, I40E_VF_HLUT_ARRAY_SIZE); + /* send the response to the VF */ +err: + return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT, + aq_ret); +} + +/** + * i40e_vc_get_rss_hena + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * @msglen: msg length + * + * Return the RSS HENA bits allowed by the hardware + **/ +static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen) +{ + struct i40e_virtchnl_rss_hena *vrh = NULL; + struct i40e_pf *pf = vf->pf; + i40e_status aq_ret = 0; + int len = 0; + + if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + len = sizeof(struct i40e_virtchnl_rss_hena); + + vrh = kzalloc(len, GFP_KERNEL); + if (!vrh) { + aq_ret = I40E_ERR_NO_MEMORY; + len = 0; + goto err; + } + vrh->hena = i40e_pf_get_default_rss_hena(pf); +err: + /* send the response back to the VF */ + aq_ret = i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS, + aq_ret, (u8 *)vrh, len); + return aq_ret; +} + +/** + * i40e_vc_set_rss_hena + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * @msglen: msg length + * + * Set the RSS HENA bits for the VF + **/ +static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen) +{ + struct i40e_virtchnl_rss_hena *vrh = + (struct i40e_virtchnl_rss_hena *)msg; + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = &pf->hw; + i40e_status aq_ret = 0; + + if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + i40e_write_rx_ctl(hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)vrh->hena); + i40e_write_rx_ctl(hw, I40E_VFQF_HENA1(1, vf->vf_id), + (u32)(vrh->hena >> 32)); + + /* send the response to the VF */ +err: + return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_SET_RSS_HENA, + aq_ret); +} + +/** * i40e_vc_validate_vf_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2054,7 +2356,7 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen) { bool err_msg_format = false; - int valid_len; + int valid_len = 0; /* Check if VF is disabled. */ if (test_bit(I40E_VF_STAT_DISABLED, &vf->vf_states)) @@ -2066,13 +2368,10 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode, valid_len = sizeof(struct i40e_virtchnl_version_info); break; case I40E_VIRTCHNL_OP_RESET_VF: - valid_len = 0; break; case I40E_VIRTCHNL_OP_GET_VF_RESOURCES: if (VF_IS_V11(vf)) valid_len = sizeof(u32); - else - valid_len = 0; break; case I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE: valid_len = sizeof(struct i40e_virtchnl_txq_info); @@ -2162,6 +2461,35 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode, sizeof(struct i40e_virtchnl_iwarp_qv_info)); } break; + case I40E_VIRTCHNL_OP_CONFIG_RSS_KEY: + valid_len = sizeof(struct i40e_virtchnl_rss_key); + if (msglen >= valid_len) { + struct i40e_virtchnl_rss_key *vrk = + (struct i40e_virtchnl_rss_key *)msg; + if (vrk->key_len != I40E_HKEY_ARRAY_SIZE) { + err_msg_format = true; + break; + } + valid_len += vrk->key_len - 1; + } + break; + case I40E_VIRTCHNL_OP_CONFIG_RSS_LUT: + valid_len = sizeof(struct i40e_virtchnl_rss_lut); + if (msglen >= valid_len) { + struct i40e_virtchnl_rss_lut *vrl = + (struct i40e_virtchnl_rss_lut *)msg; + if (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE) { + err_msg_format = true; + break; + } + valid_len += vrl->lut_entries - 1; + } + break; + case I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS: + break; + case I40E_VIRTCHNL_OP_SET_RSS_HENA: + valid_len = sizeof(struct i40e_virtchnl_rss_hena); + break; /* These are always errors coming from the VF. */ case I40E_VIRTCHNL_OP_EVENT: case I40E_VIRTCHNL_OP_UNKNOWN: @@ -2188,11 +2516,11 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode, * called from the common aeq/arq handler to * process request from VF **/ -int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode, +int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen) { struct i40e_hw *hw = &pf->hw; - unsigned int local_vf_id = vf_id - hw->func_caps.vf_base_id; + int local_vf_id = vf_id - (s16)hw->func_caps.vf_base_id; struct i40e_vf *vf; int ret; @@ -2260,6 +2588,19 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode, case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP: ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, false); break; + case I40E_VIRTCHNL_OP_CONFIG_RSS_KEY: + ret = i40e_vc_config_rss_key(vf, msg, msglen); + break; + case I40E_VIRTCHNL_OP_CONFIG_RSS_LUT: + ret = i40e_vc_config_rss_lut(vf, msg, msglen); + break; + case I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS: + ret = i40e_vc_get_rss_hena(vf, msg, msglen); + break; + case I40E_VIRTCHNL_OP_SET_RSS_HENA: + ret = i40e_vc_set_rss_hena(vf, msg, msglen); + break; + case I40E_VIRTCHNL_OP_UNKNOWN: default: dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n", @@ -2281,9 +2622,10 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode, **/ int i40e_vc_process_vflr_event(struct i40e_pf *pf) { - u32 reg, reg_idx, bit_idx, vf_id; struct i40e_hw *hw = &pf->hw; + u32 reg, reg_idx, bit_idx; struct i40e_vf *vf; + int vf_id; if (!test_bit(__I40E_VFLR_EVENT_PENDING, &pf->state)) return 0; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 838cbd2299a4..875174141451 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -61,6 +61,8 @@ enum i40e_vf_states { I40E_VF_STAT_IWARPENA, I40E_VF_STAT_FCOEENA, I40E_VF_STAT_DISABLED, + I40E_VF_STAT_MC_PROMISC, + I40E_VF_STAT_UC_PROMISC, }; /* VF capabilities */ @@ -75,7 +77,7 @@ struct i40e_vf { struct i40e_pf *pf; /* VF id in the PF space */ - u16 vf_id; + s16 vf_id; /* all VF vsis connect to the same parent */ enum i40e_switch_element_types parent_type; struct i40e_virtchnl_version_info vf_ver; @@ -109,6 +111,9 @@ struct i40e_vf { bool link_forced; bool link_up; /* only valid if VF link is forced */ bool spoofchk; + u16 num_mac; + u16 num_vlan; + /* RDMA Client */ struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info; }; @@ -116,7 +121,7 @@ struct i40e_vf { void i40e_free_vfs(struct i40e_pf *pf); int i40e_pci_sriov_configure(struct pci_dev *dev, int num_vfs); int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs); -int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode, +int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen); int i40e_vc_process_vflr_event(struct i40e_pf *pf); void i40e_reset_vf(struct i40e_vf *vf, bool flr); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index aad8d6277110..3114dcfa1724 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -78,17 +78,17 @@ struct i40e_aq_desc { #define I40E_AQ_FLAG_EI_SHIFT 14 #define I40E_AQ_FLAG_FE_SHIFT 15 -#define I40E_AQ_FLAG_DD (1 << I40E_AQ_FLAG_DD_SHIFT) /* 0x1 */ -#define I40E_AQ_FLAG_CMP (1 << I40E_AQ_FLAG_CMP_SHIFT) /* 0x2 */ -#define I40E_AQ_FLAG_ERR (1 << I40E_AQ_FLAG_ERR_SHIFT) /* 0x4 */ -#define I40E_AQ_FLAG_VFE (1 << I40E_AQ_FLAG_VFE_SHIFT) /* 0x8 */ -#define I40E_AQ_FLAG_LB (1 << I40E_AQ_FLAG_LB_SHIFT) /* 0x200 */ -#define I40E_AQ_FLAG_RD (1 << I40E_AQ_FLAG_RD_SHIFT) /* 0x400 */ -#define I40E_AQ_FLAG_VFC (1 << I40E_AQ_FLAG_VFC_SHIFT) /* 0x800 */ -#define I40E_AQ_FLAG_BUF (1 << I40E_AQ_FLAG_BUF_SHIFT) /* 0x1000 */ -#define I40E_AQ_FLAG_SI (1 << I40E_AQ_FLAG_SI_SHIFT) /* 0x2000 */ -#define I40E_AQ_FLAG_EI (1 << I40E_AQ_FLAG_EI_SHIFT) /* 0x4000 */ -#define I40E_AQ_FLAG_FE (1 << I40E_AQ_FLAG_FE_SHIFT) /* 0x8000 */ +#define I40E_AQ_FLAG_DD BIT(I40E_AQ_FLAG_DD_SHIFT) /* 0x1 */ +#define I40E_AQ_FLAG_CMP BIT(I40E_AQ_FLAG_CMP_SHIFT) /* 0x2 */ +#define I40E_AQ_FLAG_ERR BIT(I40E_AQ_FLAG_ERR_SHIFT) /* 0x4 */ +#define I40E_AQ_FLAG_VFE BIT(I40E_AQ_FLAG_VFE_SHIFT) /* 0x8 */ +#define I40E_AQ_FLAG_LB BIT(I40E_AQ_FLAG_LB_SHIFT) /* 0x200 */ +#define I40E_AQ_FLAG_RD BIT(I40E_AQ_FLAG_RD_SHIFT) /* 0x400 */ +#define I40E_AQ_FLAG_VFC BIT(I40E_AQ_FLAG_VFC_SHIFT) /* 0x800 */ +#define I40E_AQ_FLAG_BUF BIT(I40E_AQ_FLAG_BUF_SHIFT) /* 0x1000 */ +#define I40E_AQ_FLAG_SI BIT(I40E_AQ_FLAG_SI_SHIFT) /* 0x2000 */ +#define I40E_AQ_FLAG_EI BIT(I40E_AQ_FLAG_EI_SHIFT) /* 0x4000 */ +#define I40E_AQ_FLAG_FE BIT(I40E_AQ_FLAG_FE_SHIFT) /* 0x8000 */ /* error codes */ enum i40e_admin_queue_err { @@ -205,10 +205,6 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_resume_port_tx = 0x041C, i40e_aqc_opc_configure_partition_bw = 0x041D, - /* hmc */ - i40e_aqc_opc_query_hmc_resource_profile = 0x0500, - i40e_aqc_opc_set_hmc_resource_profile = 0x0501, - /* phy commands*/ i40e_aqc_opc_get_phy_abilities = 0x0600, i40e_aqc_opc_set_phy_config = 0x0601, @@ -426,6 +422,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_SDP 0x0062 #define I40E_AQ_CAP_ID_MDIO 0x0063 #define I40E_AQ_CAP_ID_WSR_PROT 0x0064 +#define I40E_AQ_CAP_ID_NVM_MGMT 0x0080 #define I40E_AQ_CAP_ID_FLEX10 0x00F1 #define I40E_AQ_CAP_ID_CEM 0x00F2 @@ -1582,27 +1579,6 @@ struct i40e_aqc_configure_partition_bw_data { I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data); -/* Get and set the active HMC resource profile and status. - * (direct 0x0500) and (direct 0x0501) - */ -struct i40e_aq_get_set_hmc_resource_profile { - u8 pm_profile; - u8 pe_vf_enabled; - u8 reserved[14]; -}; - -I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile); - -enum i40e_aq_hmc_profile { - /* I40E_HMC_PROFILE_NO_CHANGE = 0, reserved */ - I40E_HMC_PROFILE_DEFAULT = 1, - I40E_HMC_PROFILE_FAVOR_VF = 2, - I40E_HMC_PROFILE_EQUAL = 3, -}; - -#define I40E_AQ_GET_HMC_RESOURCE_PROFILE_PM_MASK 0xF -#define I40E_AQ_GET_HMC_RESOURCE_PROFILE_COUNT_MASK 0x3F - /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */ /* set in param0 for get phy abilities to report qualified modules */ @@ -1649,11 +1625,11 @@ enum i40e_aq_phy_type { enum i40e_aq_link_speed { I40E_LINK_SPEED_UNKNOWN = 0, - I40E_LINK_SPEED_100MB = (1 << I40E_LINK_SPEED_100MB_SHIFT), - I40E_LINK_SPEED_1GB = (1 << I40E_LINK_SPEED_1000MB_SHIFT), - I40E_LINK_SPEED_10GB = (1 << I40E_LINK_SPEED_10GB_SHIFT), - I40E_LINK_SPEED_40GB = (1 << I40E_LINK_SPEED_40GB_SHIFT), - I40E_LINK_SPEED_20GB = (1 << I40E_LINK_SPEED_20GB_SHIFT) + I40E_LINK_SPEED_100MB = BIT(I40E_LINK_SPEED_100MB_SHIFT), + I40E_LINK_SPEED_1GB = BIT(I40E_LINK_SPEED_1000MB_SHIFT), + I40E_LINK_SPEED_10GB = BIT(I40E_LINK_SPEED_10GB_SHIFT), + I40E_LINK_SPEED_40GB = BIT(I40E_LINK_SPEED_40GB_SHIFT), + I40E_LINK_SPEED_20GB = BIT(I40E_LINK_SPEED_20GB_SHIFT) }; struct i40e_aqc_module_desc { @@ -1924,9 +1900,9 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_write); /* Used for 0x0704 as well as for 0x0705 commands */ #define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT 1 #define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_MASK \ - (1 << I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT) + BIT(I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT) #define I40E_AQ_ANVM_FEATURE 0 -#define I40E_AQ_ANVM_IMMEDIATE_FIELD (1 << FEATURE_OR_IMMEDIATE_SHIFT) +#define I40E_AQ_ANVM_IMMEDIATE_FIELD BIT(FEATURE_OR_IMMEDIATE_SHIFT) struct i40e_aqc_nvm_config_data_feature { __le16 feature_id; #define I40E_AQ_ANVM_FEATURE_OPTION_OEM_ONLY 0x01 @@ -2195,7 +2171,7 @@ struct i40e_aqc_del_udp_tunnel_completion { I40E_CHECK_CMD_LENGTH(i40e_aqc_del_udp_tunnel_completion); struct i40e_aqc_get_set_rss_key { -#define I40E_AQC_SET_RSS_KEY_VSI_VALID (0x1 << 15) +#define I40E_AQC_SET_RSS_KEY_VSI_VALID BIT(15) #define I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT 0 #define I40E_AQC_SET_RSS_KEY_VSI_ID_MASK (0x3FF << \ I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT) @@ -2215,14 +2191,14 @@ struct i40e_aqc_get_set_rss_key_data { I40E_CHECK_STRUCT_LEN(0x34, i40e_aqc_get_set_rss_key_data); struct i40e_aqc_get_set_rss_lut { -#define I40E_AQC_SET_RSS_LUT_VSI_VALID (0x1 << 15) +#define I40E_AQC_SET_RSS_LUT_VSI_VALID BIT(15) #define I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT 0 #define I40E_AQC_SET_RSS_LUT_VSI_ID_MASK (0x3FF << \ I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT) __le16 vsi_id; #define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT 0 -#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK (0x1 << \ - I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) +#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK \ + BIT(I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) #define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI 0 #define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF 1 diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 4db0c0326185..8f64204000fb 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -59,6 +59,7 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: case I40E_DEV_ID_SFP_I_X722: + case I40E_DEV_ID_QSFP_I_X722: hw->mac.type = I40E_MAC_X722; break; case I40E_DEV_ID_X722_VF: diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h index 70235706915e..d34972bab09c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h @@ -45,6 +45,7 @@ #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 #define I40E_DEV_ID_SFP_I_X722 0x37D3 +#define I40E_DEV_ID_QSFP_I_X722 0x37D4 #define I40E_DEV_ID_X722_VF 0x37CD #define I40E_DEV_ID_X722_VF_HV 0x37D9 diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 0c912a4999db..ede8dfc189bc 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -842,7 +842,8 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring, { struct i40e_q_vector *q_vector = rx_ring->q_vector; - if (vlan_tag & VLAN_VID_MASK) + if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + (vlan_tag & VLAN_VID_MASK)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); napi_gro_receive(&q_vector->napi, skb); @@ -1564,9 +1565,16 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) ip.v6->payload_len = 0; } - if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | + SKB_GSO_IPIP | + SKB_GSO_SIT | + SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)) { - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) { + l4.udp->len = 0; + /* determine offset of outer transport header */ l4_offset = l4.hdr - skb->data; @@ -1665,13 +1673,6 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, &l4_proto, &frag_off); } - /* compute outer L3 header size */ - tunnel |= ((l4.hdr - ip.hdr) / 4) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; - - /* switch IP header pointer from outer to inner header */ - ip.hdr = skb_inner_network_header(skb); - /* define outer transport */ switch (l4_proto) { case IPPROTO_UDP: @@ -1682,6 +1683,11 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, tunnel |= I40E_TXD_CTX_GRE_TUNNELING; *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; + l4.hdr = skb_inner_network_header(skb); + break; default: if (*tx_flags & I40E_TX_FLAGS_TSO) return -1; @@ -1690,12 +1696,20 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, return 0; } + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + /* compute tunnel header size */ tunnel |= ((ip.hdr - l4.hdr) / 2) << I40E_TXD_CTX_QW0_NATLEN_SHIFT; /* indicate if we need to offload outer UDP header */ if ((*tx_flags & I40E_TX_FLAGS_TSO) && + !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; @@ -1800,35 +1814,34 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, } /** - * __i40evf_chk_linearize - Check if there are more than 8 fragments per packet + * __i40evf_chk_linearize - Check if there are more than 8 buffers per packet * @skb: send buffer * - * Note: Our HW can't scatter-gather more than 8 fragments to build - * a packet on the wire and so we need to figure out the cases where we - * need to linearize the skb. + * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire + * and so we need to figure out the cases where we need to linearize the skb. + * + * For TSO we need to count the TSO header and segment payload separately. + * As such we need to check cases where we have 7 fragments or more as we + * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for + * the segment payload in the first descriptor, and another 7 for the + * fragments. **/ bool __i40evf_chk_linearize(struct sk_buff *skb) { const struct skb_frag_struct *frag, *stale; - int gso_size, nr_frags, sum; - - /* check to see if TSO is enabled, if so we may get a repreive */ - gso_size = skb_shinfo(skb)->gso_size; - if (unlikely(!gso_size)) - return true; + int nr_frags, sum; - /* no need to check if number of frags is less than 8 */ + /* no need to check if number of frags is less than 7 */ nr_frags = skb_shinfo(skb)->nr_frags; - if (nr_frags < I40E_MAX_BUFFER_TXD) + if (nr_frags < (I40E_MAX_BUFFER_TXD - 1)) return false; /* We need to walk through the list and validate that each group * of 6 fragments totals at least gso_size. However we don't need - * to perform such validation on the first or last 6 since the first - * 6 cannot inherit any data from a descriptor before them, and the - * last 6 cannot inherit any data from a descriptor after them. + * to perform such validation on the last 6 since the last 6 cannot + * inherit any data from a descriptor after them. */ - nr_frags -= I40E_MAX_BUFFER_TXD - 1; + nr_frags -= I40E_MAX_BUFFER_TXD - 2; frag = &skb_shinfo(skb)->frags[0]; /* Initialize size to the negative value of gso_size minus 1. We @@ -1837,21 +1850,21 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) * descriptors for a single transmit as the header and previous * fragment are already consuming 2 descriptors. */ - sum = 1 - gso_size; + sum = 1 - skb_shinfo(skb)->gso_size; - /* Add size of frags 1 through 5 to create our initial sum */ - sum += skb_frag_size(++frag); - sum += skb_frag_size(++frag); - sum += skb_frag_size(++frag); - sum += skb_frag_size(++frag); - sum += skb_frag_size(++frag); + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); /* Walk through fragments adding latest fragment, testing it, and * then removing stale fragments from the sum. */ stale = &skb_shinfo(skb)->frags[0]; for (;;) { - sum += skb_frag_size(++frag); + sum += skb_frag_size(frag++); /* if sum is negative we failed to make sufficient progress */ if (sum < 0) @@ -1861,7 +1874,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) if (!--nr_frags) break; - sum -= skb_frag_size(++stale); + sum -= skb_frag_size(stale++); } return false; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 84c28aa64fdf..54b52e8f7097 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -424,11 +424,15 @@ static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) **/ static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) { - /* we can only support up to 8 data buffers for a single send */ - if (likely(count <= I40E_MAX_BUFFER_TXD)) + /* Both TSO and single send will work if count is less than 8 */ + if (likely(count < I40E_MAX_BUFFER_TXD)) return false; - return __i40evf_chk_linearize(skb); + if (skb_is_gso(skb)) + return __i40evf_chk_linearize(skb); + + /* we can support up to 8 data buffers for a single send */ + return count != I40E_MAX_BUFFER_TXD; } /** diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 4a78c18e0b7b..97f96e0d9c4c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -36,7 +36,7 @@ #include "i40e_devids.h" /* I40E_MASK is a macro used on 32 bit registers */ -#define I40E_MASK(mask, shift) (mask << shift) +#define I40E_MASK(mask, shift) ((u32)(mask) << (shift)) #define I40E_MAX_VSI_QP 16 #define I40E_MAX_VF_VSI 3 @@ -258,6 +258,11 @@ struct i40e_hw_capabilities { #define I40E_FLEX10_STATUS_DCC_ERROR 0x1 #define I40E_FLEX10_STATUS_VC_MODE 0x2 + bool sec_rev_disabled; + bool update_disabled; +#define I40E_NVM_MGMT_SEC_REV_DISABLED 0x1 +#define I40E_NVM_MGMT_UPDATE_DISABLED 0x2 + bool mgmt_cem; bool ieee_1588; bool iwarp; @@ -523,6 +528,7 @@ struct i40e_hw { struct i40e_aq_desc nvm_wb_desc; struct i40e_virt_mem nvm_buff; bool nvm_release_on_done; + u16 nvm_wait_opcode; /* HMC info */ struct i40e_hmc_info hmc; /* HMC info struct */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index e657eccd232c..25afabf999d0 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -67,8 +67,6 @@ struct i40e_vsi { u16 rx_itr_setting; u16 tx_itr_setting; u16 qs_handle; - u8 *rss_hkey_user; /* User configured hash keys */ - u8 *rss_lut_user; /* User configured lookup table entries */ }; /* How many Rx Buffers do we bundle into one write to the hardware ? */ @@ -222,6 +220,7 @@ struct i40evf_adapter { #define I40EVF_FLAG_WB_ON_ITR_CAPABLE BIT(11) #define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(12) #define I40EVF_FLAG_ADDR_SET_BY_PF BIT(13) +#define I40EVF_FLAG_PROMISC_ON BIT(15) /* duplicates for common code */ #define I40E_FLAG_FDIR_ATR_ENABLED 0 #define I40E_FLAG_DCB_ENABLED 0 @@ -239,8 +238,15 @@ struct i40evf_adapter { #define I40EVF_FLAG_AQ_CONFIGURE_QUEUES BIT(6) #define I40EVF_FLAG_AQ_MAP_VECTORS BIT(7) #define I40EVF_FLAG_AQ_HANDLE_RESET BIT(8) -#define I40EVF_FLAG_AQ_CONFIGURE_RSS BIT(9) +#define I40EVF_FLAG_AQ_CONFIGURE_RSS BIT(9) /* direct AQ config */ #define I40EVF_FLAG_AQ_GET_CONFIG BIT(10) +/* Newer style, RSS done by the PF so we can ignore hardware vagaries. */ +#define I40EVF_FLAG_AQ_GET_HENA BIT(11) +#define I40EVF_FLAG_AQ_SET_HENA BIT(12) +#define I40EVF_FLAG_AQ_SET_RSS_KEY BIT(13) +#define I40EVF_FLAG_AQ_SET_RSS_LUT BIT(14) +#define I40EVF_FLAG_AQ_REQUEST_PROMISC BIT(15) +#define I40EVF_FLAG_AQ_RELEASE_PROMISC BIT(16) /* OS defined structs */ struct net_device *netdev; @@ -256,10 +262,18 @@ struct i40evf_adapter { bool netdev_registered; bool link_up; enum i40e_virtchnl_ops current_op; -#define CLIENT_ENABLED(_a) ((_a)->vf_res->vf_offload_flags & \ - I40E_VIRTCHNL_VF_OFFLOAD_IWARP) +#define CLIENT_ENABLED(_a) ((_a)->vf_res ? \ + (_a)->vf_res->vf_offload_flags & \ + I40E_VIRTCHNL_VF_OFFLOAD_IWARP : \ + 0) +/* RSS by the PF should be preferred over RSS via other methods. */ +#define RSS_PF(_a) ((_a)->vf_res->vf_offload_flags & \ + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) #define RSS_AQ(_a) ((_a)->vf_res->vf_offload_flags & \ I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ) +#define RSS_REG(_a) (!((_a)->vf_res->vf_offload_flags & \ + (I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ | \ + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF))) #define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_offload_flags & \ I40E_VIRTCHNL_VF_OFFLOAD_VLAN) struct i40e_virtchnl_vf_resource *vf_res; /* incl. all VSIs */ @@ -271,6 +285,12 @@ struct i40evf_adapter { struct i40e_eth_stats current_stats; struct i40e_vsi vsi; u32 aq_wait_count; + /* RSS stuff */ + u64 hena; + u16 rss_key_size; + u16 rss_lut_size; + u8 *rss_key; + u8 *rss_lut; }; @@ -314,11 +334,12 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter); void i40evf_set_promiscuous(struct i40evf_adapter *adapter, int flags); void i40evf_request_stats(struct i40evf_adapter *adapter); void i40evf_request_reset(struct i40evf_adapter *adapter); +void i40evf_get_hena(struct i40evf_adapter *adapter); +void i40evf_set_hena(struct i40evf_adapter *adapter); +void i40evf_set_rss_key(struct i40evf_adapter *adapter); +void i40evf_set_rss_lut(struct i40evf_adapter *adapter); void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, enum i40e_virtchnl_ops v_opcode, i40e_status v_retval, u8 *msg, u16 msglen); -int i40evf_config_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, - u16 lut_size); -int i40evf_get_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, - u16 lut_size); +int i40evf_config_rss(struct i40evf_adapter *adapter); #endif /* _I40EVF_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index dd4430aae7fa..5a48ee07688f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -378,63 +378,6 @@ static int i40evf_set_coalesce(struct net_device *netdev, } /** - * i40e_get_rss_hash_opts - Get RSS hash Input Set for each flow type - * @adapter: board private structure - * @cmd: ethtool rxnfc command - * - * Returns Success if the flow is supported, else Invalid Input. - **/ -static int i40evf_get_rss_hash_opts(struct i40evf_adapter *adapter, - struct ethtool_rxnfc *cmd) -{ - struct i40e_hw *hw = &adapter->hw; - u64 hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) | - ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32); - - /* We always hash on IP src and dest addresses */ - cmd->data = RXH_IP_SRC | RXH_IP_DST; - - switch (cmd->flow_type) { - case TCP_V4_FLOW: - if (hena & BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP)) - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - break; - case UDP_V4_FLOW: - if (hena & BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP)) - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - break; - - case SCTP_V4_FLOW: - case AH_ESP_V4_FLOW: - case AH_V4_FLOW: - case ESP_V4_FLOW: - case IPV4_FLOW: - break; - - case TCP_V6_FLOW: - if (hena & BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP)) - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - break; - case UDP_V6_FLOW: - if (hena & BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP)) - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - break; - - case SCTP_V6_FLOW: - case AH_ESP_V6_FLOW: - case AH_V6_FLOW: - case ESP_V6_FLOW: - case IPV6_FLOW: - break; - default: - cmd->data = 0; - return -EINVAL; - } - - return 0; -} - -/** * i40evf_get_rxnfc - command to get RX flow classification rules * @netdev: network interface device structure * @cmd: ethtool rxnfc command @@ -454,7 +397,8 @@ static int i40evf_get_rxnfc(struct net_device *netdev, ret = 0; break; case ETHTOOL_GRXFH: - ret = i40evf_get_rss_hash_opts(adapter, cmd); + netdev_info(netdev, + "RSS hash info is not available to vf, use pf.\n"); break; default: break; @@ -462,145 +406,6 @@ static int i40evf_get_rxnfc(struct net_device *netdev, return ret; } - -/** - * i40evf_set_rss_hash_opt - Enable/Disable flow types for RSS hash - * @adapter: board private structure - * @cmd: ethtool rxnfc command - * - * Returns Success if the flow input set is supported. - **/ -static int i40evf_set_rss_hash_opt(struct i40evf_adapter *adapter, - struct ethtool_rxnfc *nfc) -{ - struct i40e_hw *hw = &adapter->hw; - u32 flags = adapter->vf_res->vf_offload_flags; - - u64 hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) | - ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32); - - /* RSS does not support anything other than hashing - * to queues on src and dst IPs and ports - */ - if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | - RXH_L4_B_0_1 | RXH_L4_B_2_3)) - return -EINVAL; - - /* We need at least the IP SRC and DEST fields for hashing */ - if (!(nfc->data & RXH_IP_SRC) || - !(nfc->data & RXH_IP_DST)) - return -EINVAL; - - switch (nfc->flow_type) { - case TCP_V4_FLOW: - if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); - - hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - } else { - return -EINVAL; - } - break; - case TCP_V6_FLOW: - if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); - - hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - } else { - return -EINVAL; - } - break; - case UDP_V4_FLOW: - if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); - - hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - } else { - return -EINVAL; - } - break; - case UDP_V6_FLOW: - if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); - - hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - } else { - return -EINVAL; - } - break; - case AH_ESP_V4_FLOW: - case AH_V4_FLOW: - case ESP_V4_FLOW: - case SCTP_V4_FLOW: - if ((nfc->data & RXH_L4_B_0_1) || - (nfc->data & RXH_L4_B_2_3)) - return -EINVAL; - hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); - break; - case AH_ESP_V6_FLOW: - case AH_V6_FLOW: - case ESP_V6_FLOW: - case SCTP_V6_FLOW: - if ((nfc->data & RXH_L4_B_0_1) || - (nfc->data & RXH_L4_B_2_3)) - return -EINVAL; - hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); - break; - case IPV4_FLOW: - hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; - case IPV6_FLOW: - hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; - default: - return -EINVAL; - } - - wr32(hw, I40E_VFQF_HENA(0), (u32)hena); - wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); - i40e_flush(hw); - - return 0; -} - -/** - * i40evf_set_rxnfc - command to set RX flow classification rules - * @netdev: network interface device structure - * @cmd: ethtool rxnfc command - * - * Returns Success if the command is supported. - **/ -static int i40evf_set_rxnfc(struct net_device *netdev, - struct ethtool_rxnfc *cmd) -{ - struct i40evf_adapter *adapter = netdev_priv(netdev); - int ret = -EOPNOTSUPP; - - switch (cmd->cmd) { - case ETHTOOL_SRXFH: - ret = i40evf_set_rss_hash_opt(adapter, cmd); - break; - default: - break; - } - - return ret; -} - /** * i40evf_get_channels: get the number of channels supported by the device * @netdev: network interface device structure @@ -624,6 +429,19 @@ static void i40evf_get_channels(struct net_device *netdev, } /** + * i40evf_get_rxfh_key_size - get the RSS hash key size + * @netdev: network interface device structure + * + * Returns the table size. + **/ +static u32 i40evf_get_rxfh_key_size(struct net_device *netdev) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + + return adapter->rss_key_size; +} + +/** * i40evf_get_rxfh_indir_size - get the rx flow hash indirection table size * @netdev: network interface device structure * @@ -631,7 +449,9 @@ static void i40evf_get_channels(struct net_device *netdev, **/ static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev) { - return (I40E_VFQF_HLUT_MAX_INDEX + 1) * 4; + struct i40evf_adapter *adapter = netdev_priv(netdev); + + return adapter->rss_lut_size; } /** @@ -646,9 +466,6 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_vsi *vsi = &adapter->vsi; - u8 *seed = NULL, *lut; - int ret; u16 i; if (hfunc) @@ -656,24 +473,13 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, if (!indir) return 0; - seed = key; - - lut = kzalloc(I40EVF_HLUT_ARRAY_SIZE, GFP_KERNEL); - if (!lut) - return -ENOMEM; - - ret = i40evf_get_rss(vsi, seed, lut, I40EVF_HLUT_ARRAY_SIZE); - if (ret) - goto out; + memcpy(key, adapter->rss_key, adapter->rss_key_size); /* Each 32 bits pointed by 'indir' is stored with a lut entry */ - for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++) - indir[i] = (u32)lut[i]; + for (i = 0; i < adapter->rss_lut_size; i++) + indir[i] = (u32)adapter->rss_lut[i]; -out: - kfree(lut); - - return ret; + return 0; } /** @@ -689,8 +495,6 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, const u8 hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_vsi *vsi = &adapter->vsi; - u8 *seed = NULL; u16 i; /* We do not allow change in unsupported parameters */ @@ -701,28 +505,14 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, return 0; if (key) { - if (!vsi->rss_hkey_user) { - vsi->rss_hkey_user = kzalloc(I40EVF_HKEY_ARRAY_SIZE, - GFP_KERNEL); - if (!vsi->rss_hkey_user) - return -ENOMEM; - } - memcpy(vsi->rss_hkey_user, key, I40EVF_HKEY_ARRAY_SIZE); - seed = vsi->rss_hkey_user; - } - if (!vsi->rss_lut_user) { - vsi->rss_lut_user = kzalloc(I40EVF_HLUT_ARRAY_SIZE, - GFP_KERNEL); - if (!vsi->rss_lut_user) - return -ENOMEM; + memcpy(adapter->rss_key, key, adapter->rss_key_size); } /* Each 32 bits pointed by 'indir' is stored with a lut entry */ - for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++) - vsi->rss_lut_user[i] = (u8)(indir[i]); + for (i = 0; i < adapter->rss_lut_size; i++) + adapter->rss_lut[i] = (u8)(indir[i]); - return i40evf_config_rss(vsi, seed, vsi->rss_lut_user, - I40EVF_HLUT_ARRAY_SIZE); + return i40evf_config_rss(adapter); } /** @@ -789,11 +579,11 @@ static const struct ethtool_ops i40evf_ethtool_ops = { .get_coalesce = i40evf_get_coalesce, .set_coalesce = i40evf_set_coalesce, .get_rxnfc = i40evf_get_rxnfc, - .set_rxnfc = i40evf_set_rxnfc, .get_rxfh_indir_size = i40evf_get_rxfh_indir_size, .get_rxfh = i40evf_get_rxfh, .set_rxfh = i40evf_set_rxfh, .get_channels = i40evf_get_channels, + .get_rxfh_key_size = i40evf_get_rxfh_key_size, }; /** diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 9110319a8f00..9f0bd7acc22a 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -38,7 +38,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 5 -#define DRV_VERSION_BUILD 5 +#define DRV_VERSION_BUILD 10 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -943,6 +943,14 @@ static void i40evf_set_rx_mode(struct net_device *netdev) bottom_of_search_loop: continue; } + + if (netdev->flags & IFF_PROMISC && + !(adapter->flags & I40EVF_FLAG_PROMISC_ON)) + adapter->aq_required |= I40EVF_FLAG_AQ_REQUEST_PROMISC; + else if (!(netdev->flags & IFF_PROMISC) && + adapter->flags & I40EVF_FLAG_PROMISC_ON) + adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_PROMISC; + clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); } @@ -1224,24 +1232,18 @@ out: } /** - * i40e_config_rss_aq - Prepare for RSS using AQ commands - * @vsi: vsi structure - * @seed: RSS hash seed - * @lut: Lookup table - * @lut_size: Lookup table size + * i40e_config_rss_aq - Configure RSS keys and lut by using AQ commands + * @adapter: board private structure * * Return 0 on success, negative on failure **/ -static int i40evf_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, - u8 *lut, u16 lut_size) +static int i40evf_config_rss_aq(struct i40evf_adapter *adapter) { - struct i40evf_adapter *adapter = vsi->back; + struct i40e_aqc_get_set_rss_key_data *rss_key = + (struct i40e_aqc_get_set_rss_key_data *)adapter->rss_key; struct i40e_hw *hw = &adapter->hw; int ret = 0; - if (!vsi->id) - return -EINVAL; - if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ dev_err(&adapter->pdev->dev, "Cannot configure RSS, command %d pending\n", @@ -1249,198 +1251,82 @@ static int i40evf_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, return -EBUSY; } - if (seed) { - struct i40e_aqc_get_set_rss_key_data *rss_key = - (struct i40e_aqc_get_set_rss_key_data *)seed; - ret = i40evf_aq_set_rss_key(hw, vsi->id, rss_key); - if (ret) { - dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n", - i40evf_stat_str(hw, ret), - i40evf_aq_str(hw, hw->aq.asq_last_status)); - return ret; - } + ret = i40evf_aq_set_rss_key(hw, adapter->vsi.id, rss_key); + if (ret) { + dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } - if (lut) { - ret = i40evf_aq_set_rss_lut(hw, vsi->id, false, lut, lut_size); - if (ret) { - dev_err(&adapter->pdev->dev, - "Cannot set RSS lut, err %s aq_err %s\n", - i40evf_stat_str(hw, ret), - i40evf_aq_str(hw, hw->aq.asq_last_status)); - return ret; - } + ret = i40evf_aq_set_rss_lut(hw, adapter->vsi.id, false, + adapter->rss_lut, adapter->rss_lut_size); + if (ret) { + dev_err(&adapter->pdev->dev, "Cannot set RSS lut, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); } return ret; + } /** * i40evf_config_rss_reg - Configure RSS keys and lut by writing registers - * @vsi: Pointer to vsi structure - * @seed: RSS hash seed - * @lut: Lookup table - * @lut_size: Lookup table size + * @adapter: board private structure * * Returns 0 on success, negative on failure **/ -static int i40evf_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, - const u8 *lut, u16 lut_size) +static int i40evf_config_rss_reg(struct i40evf_adapter *adapter) { - struct i40evf_adapter *adapter = vsi->back; struct i40e_hw *hw = &adapter->hw; + u32 *dw; u16 i; - if (seed) { - u32 *seed_dw = (u32 *)seed; - - for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++) - wr32(hw, I40E_VFQF_HKEY(i), seed_dw[i]); - } + dw = (u32 *)adapter->rss_key; + for (i = 0; i <= adapter->rss_key_size / 4; i++) + wr32(hw, I40E_VFQF_HKEY(i), dw[i]); - if (lut) { - u32 *lut_dw = (u32 *)lut; + dw = (u32 *)adapter->rss_lut; + for (i = 0; i <= adapter->rss_lut_size / 4; i++) + wr32(hw, I40E_VFQF_HLUT(i), dw[i]); - if (lut_size != I40EVF_HLUT_ARRAY_SIZE) - return -EINVAL; - - for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) - wr32(hw, I40E_VFQF_HLUT(i), lut_dw[i]); - } i40e_flush(hw); return 0; } /** - * * i40evf_get_rss_aq - Get RSS keys and lut by using AQ commands - * @vsi: Pointer to vsi structure - * @seed: RSS hash seed - * @lut: Lookup table - * @lut_size: Lookup table size - * - * Return 0 on success, negative on failure - **/ -static int i40evf_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, - u8 *lut, u16 lut_size) -{ - struct i40evf_adapter *adapter = vsi->back; - struct i40e_hw *hw = &adapter->hw; - int ret = 0; - - if (seed) { - ret = i40evf_aq_get_rss_key(hw, vsi->id, - (struct i40e_aqc_get_set_rss_key_data *)seed); - if (ret) { - dev_err(&adapter->pdev->dev, - "Cannot get RSS key, err %s aq_err %s\n", - i40evf_stat_str(hw, ret), - i40evf_aq_str(hw, hw->aq.asq_last_status)); - return ret; - } - } - - if (lut) { - ret = i40evf_aq_get_rss_lut(hw, vsi->id, false, lut, lut_size); - if (ret) { - dev_err(&adapter->pdev->dev, - "Cannot get RSS lut, err %s aq_err %s\n", - i40evf_stat_str(hw, ret), - i40evf_aq_str(hw, hw->aq.asq_last_status)); - return ret; - } - } - - return ret; -} - -/** - * * i40evf_get_rss_reg - Get RSS keys and lut by reading registers - * @vsi: Pointer to vsi structure - * @seed: RSS hash seed - * @lut: Lookup table - * @lut_size: Lookup table size - * - * Returns 0 on success, negative on failure - **/ -static int i40evf_get_rss_reg(struct i40e_vsi *vsi, const u8 *seed, - const u8 *lut, u16 lut_size) -{ - struct i40evf_adapter *adapter = vsi->back; - struct i40e_hw *hw = &adapter->hw; - u16 i; - - if (seed) { - u32 *seed_dw = (u32 *)seed; - - for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++) - seed_dw[i] = rd32(hw, I40E_VFQF_HKEY(i)); - } - - if (lut) { - u32 *lut_dw = (u32 *)lut; - - if (lut_size != I40EVF_HLUT_ARRAY_SIZE) - return -EINVAL; - - for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) - lut_dw[i] = rd32(hw, I40E_VFQF_HLUT(i)); - } - - return 0; -} - -/** * i40evf_config_rss - Configure RSS keys and lut - * @vsi: Pointer to vsi structure - * @seed: RSS hash seed - * @lut: Lookup table - * @lut_size: Lookup table size - * - * Returns 0 on success, negative on failure - **/ -int i40evf_config_rss(struct i40e_vsi *vsi, const u8 *seed, - u8 *lut, u16 lut_size) -{ - struct i40evf_adapter *adapter = vsi->back; - - if (RSS_AQ(adapter)) - return i40evf_config_rss_aq(vsi, seed, lut, lut_size); - else - return i40evf_config_rss_reg(vsi, seed, lut, lut_size); -} - -/** - * i40evf_get_rss - Get RSS keys and lut - * @vsi: Pointer to vsi structure - * @seed: RSS hash seed - * @lut: Lookup table - * @lut_size: Lookup table size + * @adapter: board private structure * * Returns 0 on success, negative on failure **/ -int i40evf_get_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, u16 lut_size) +int i40evf_config_rss(struct i40evf_adapter *adapter) { - struct i40evf_adapter *adapter = vsi->back; - if (RSS_AQ(adapter)) - return i40evf_get_rss_aq(vsi, seed, lut, lut_size); - else - return i40evf_get_rss_reg(vsi, seed, lut, lut_size); + if (RSS_PF(adapter)) { + adapter->aq_required |= I40EVF_FLAG_AQ_SET_RSS_LUT | + I40EVF_FLAG_AQ_SET_RSS_KEY; + return 0; + } else if (RSS_AQ(adapter)) { + return i40evf_config_rss_aq(adapter); + } else { + return i40evf_config_rss_reg(adapter); + } } /** * i40evf_fill_rss_lut - Fill the lut with default values - * @lut: Lookup table to be filled with - * @rss_table_size: Lookup table size - * @rss_size: Range of queue number for hashing + * @adapter: board private structure **/ -static void i40evf_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) +static void i40evf_fill_rss_lut(struct i40evf_adapter *adapter) { u16 i; - for (i = 0; i < rss_table_size; i++) - lut[i] = i % rss_size; + for (i = 0; i < adapter->rss_lut_size; i++) + adapter->rss_lut[i] = i % adapter->num_active_queues; } /** @@ -1451,42 +1337,25 @@ static void i40evf_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) **/ static int i40evf_init_rss(struct i40evf_adapter *adapter) { - struct i40e_vsi *vsi = &adapter->vsi; struct i40e_hw *hw = &adapter->hw; - u8 seed[I40EVF_HKEY_ARRAY_SIZE]; - u64 hena; - u8 *lut; int ret; - /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ - if (adapter->vf_res->vf_offload_flags & - I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) - hena = I40E_DEFAULT_RSS_HENA_EXPANDED; - else - hena = I40E_DEFAULT_RSS_HENA; - wr32(hw, I40E_VFQF_HENA(0), (u32)hena); - wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); + if (!RSS_PF(adapter)) { + /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ + if (adapter->vf_res->vf_offload_flags & + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + adapter->hena = I40E_DEFAULT_RSS_HENA_EXPANDED; + else + adapter->hena = I40E_DEFAULT_RSS_HENA; - lut = kzalloc(I40EVF_HLUT_ARRAY_SIZE, GFP_KERNEL); - if (!lut) - return -ENOMEM; + wr32(hw, I40E_VFQF_HENA(0), (u32)adapter->hena); + wr32(hw, I40E_VFQF_HENA(1), (u32)(adapter->hena >> 32)); + } - /* Use user configured lut if there is one, otherwise use default */ - if (vsi->rss_lut_user) - memcpy(lut, vsi->rss_lut_user, I40EVF_HLUT_ARRAY_SIZE); - else - i40evf_fill_rss_lut(lut, I40EVF_HLUT_ARRAY_SIZE, - adapter->num_active_queues); + i40evf_fill_rss_lut(adapter); - /* Use user configured hash key if there is one, otherwise - * user default. - */ - if (vsi->rss_hkey_user) - memcpy(seed, vsi->rss_hkey_user, I40EVF_HKEY_ARRAY_SIZE); - else - netdev_rss_key_fill((void *)seed, I40EVF_HKEY_ARRAY_SIZE); - ret = i40evf_config_rss(vsi, seed, lut, I40EVF_HLUT_ARRAY_SIZE); - kfree(lut); + netdev_rss_key_fill((void *)adapter->rss_key, adapter->rss_key_size); + ret = i40evf_config_rss(adapter); return ret; } @@ -1601,19 +1470,16 @@ err_set_interrupt: } /** - * i40evf_clear_rss_config_user - Clear user configurations of RSS - * @vsi: Pointer to VSI structure + * i40evf_free_rss - Free memory used by RSS structs + * @adapter: board private structure **/ -static void i40evf_clear_rss_config_user(struct i40e_vsi *vsi) +static void i40evf_free_rss(struct i40evf_adapter *adapter) { - if (!vsi) - return; + kfree(adapter->rss_key); + adapter->rss_key = NULL; - kfree(vsi->rss_hkey_user); - vsi->rss_hkey_user = NULL; - - kfree(vsi->rss_lut_user); - vsi->rss_lut_user = NULL; + kfree(adapter->rss_lut); + adapter->rss_lut = NULL; } /** @@ -1747,6 +1613,33 @@ static void i40evf_watchdog_task(struct work_struct *work) adapter->aq_required &= ~I40EVF_FLAG_AQ_CONFIGURE_RSS; goto watchdog_done; } + if (adapter->aq_required & I40EVF_FLAG_AQ_GET_HENA) { + i40evf_get_hena(adapter); + goto watchdog_done; + } + if (adapter->aq_required & I40EVF_FLAG_AQ_SET_HENA) { + i40evf_set_hena(adapter); + goto watchdog_done; + } + if (adapter->aq_required & I40EVF_FLAG_AQ_SET_RSS_KEY) { + i40evf_set_rss_key(adapter); + goto watchdog_done; + } + if (adapter->aq_required & I40EVF_FLAG_AQ_SET_RSS_LUT) { + i40evf_set_rss_lut(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_REQUEST_PROMISC) { + i40evf_set_promiscuous(adapter, I40E_FLAG_VF_UNICAST_PROMISC | + I40E_FLAG_VF_MULTICAST_PROMISC); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_RELEASE_PROMISC) { + i40evf_set_promiscuous(adapter, 0); + goto watchdog_done; + } if (adapter->state == __I40EVF_RUNNING) i40evf_request_stats(adapter); @@ -2325,6 +2218,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter) { struct i40e_virtchnl_vf_resource *vfres = adapter->vf_res; struct net_device *netdev = adapter->netdev; + struct i40e_vsi *vsi = &adapter->vsi; int i; /* got VF config message back from PF, now we can parse it */ @@ -2337,40 +2231,46 @@ int i40evf_process_config(struct i40evf_adapter *adapter) return -ENODEV; } - netdev->features |= NETIF_F_HIGHDMA | - NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_SCTP_CRC | - NETIF_F_IPV6_CSUM | - NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_TSO_ECN | - NETIF_F_GSO_GRE | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_RXCSUM | - NETIF_F_GRO; - - netdev->hw_enc_features |= NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_TSO_ECN | - NETIF_F_GSO_GRE | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM; - - if (adapter->flags & I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE) - netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; - - /* always clear VLAN features because they can change at every reset */ - netdev->features &= ~(I40EVF_VLAN_FEATURES); - /* copy netdev features into list of user selectable features */ - netdev->hw_features |= netdev->features; - - if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_VLAN) { - netdev->vlan_features = netdev->features; - netdev->features |= I40EVF_VLAN_FEATURES; - } + netdev->hw_enc_features |= NETIF_F_SG | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_HIGHDMA | + NETIF_F_SOFT_FEATURES | + NETIF_F_TSO | + NETIF_F_TSO_ECN | + NETIF_F_TSO6 | + NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_IPIP | + NETIF_F_GSO_SIT | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL | + NETIF_F_SCTP_CRC | + NETIF_F_RXHASH | + NETIF_F_RXCSUM | + 0; + + if (!(adapter->flags & I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE)) + netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + + netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; + + /* record features VLANs can make use of */ + netdev->vlan_features |= netdev->hw_enc_features | + NETIF_F_TSO_MANGLEID; + + /* Write features and hw_features separately to avoid polluting + * with, or dropping, features that are set when we registgered. + */ + netdev->hw_features |= netdev->hw_enc_features; + + netdev->features |= netdev->hw_enc_features | I40EVF_VLAN_FEATURES; + netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; + + /* disable VLAN features if not supported */ + if (!(vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_VLAN)) + netdev->features ^= I40EVF_VLAN_FEATURES; adapter->vsi.id = adapter->vsi_res->vsi_id; @@ -2381,8 +2281,16 @@ int i40evf_process_config(struct i40evf_adapter *adapter) ITR_REG_TO_USEC(I40E_ITR_RX_DEF)); adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC | ITR_REG_TO_USEC(I40E_ITR_TX_DEF)); - adapter->vsi.netdev = adapter->netdev; - adapter->vsi.qs_handle = adapter->vsi_res->qset_handle; + vsi->netdev = adapter->netdev; + vsi->qs_handle = adapter->vsi_res->qset_handle; + if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { + adapter->rss_key_size = vfres->rss_key_size; + adapter->rss_lut_size = vfres->rss_lut_size; + } else { + adapter->rss_key_size = I40EVF_HKEY_ARRAY_SIZE; + adapter->rss_lut_size = I40EVF_HLUT_ARRAY_SIZE; + } + return 0; } @@ -2578,6 +2486,11 @@ static void i40evf_init_task(struct work_struct *work) set_bit(__I40E_DOWN, &adapter->vsi.state); i40evf_misc_irq_enable(adapter); + adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL); + adapter->rss_lut = kzalloc(adapter->rss_lut_size, GFP_KERNEL); + if (!adapter->rss_key || !adapter->rss_lut) + goto err_mem; + if (RSS_AQ(adapter)) { adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS; mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); @@ -2588,7 +2501,8 @@ static void i40evf_init_task(struct work_struct *work) restart: schedule_delayed_work(&adapter->init_task, msecs_to_jiffies(30)); return; - +err_mem: + i40evf_free_rss(adapter); err_register: i40evf_free_misc_irq(adapter); err_sw_init: @@ -2870,8 +2784,7 @@ static void i40evf_remove(struct pci_dev *pdev) flush_scheduled_work(); - /* Clear user configurations for RSS */ - i40evf_clear_rss_config_user(&adapter->vsi); + i40evf_free_rss(adapter); if (hw->aq.asq.count) i40evf_shutdown_adminq(hw); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 488e738f76c6..ba7fbc0608a6 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -652,6 +652,17 @@ void i40evf_set_promiscuous(struct i40evf_adapter *adapter, int flags) adapter->current_op); return; } + + if (flags) { + adapter->flags |= I40EVF_FLAG_PROMISC_ON; + adapter->aq_required &= ~I40EVF_FLAG_AQ_REQUEST_PROMISC; + dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n"); + } else { + adapter->flags &= ~I40EVF_FLAG_PROMISC_ON; + adapter->aq_required &= ~I40EVF_FLAG_AQ_RELEASE_PROMISC; + dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n"); + } + adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE; vpi.vsi_id = adapter->vsi_res->vsi_id; vpi.flags = flags; @@ -681,6 +692,115 @@ void i40evf_request_stats(struct i40evf_adapter *adapter) /* if the request failed, don't lock out others */ adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; } + +/** + * i40evf_get_hena + * @adapter: adapter structure + * + * Request hash enable capabilities from PF + **/ +void i40evf_get_hena(struct i40evf_adapter *adapter) +{ + if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot get RSS hash capabilities, command %d pending\n", + adapter->current_op); + return; + } + adapter->current_op = I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS; + adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_HENA; + i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS, + NULL, 0); +} + +/** + * i40evf_set_hena + * @adapter: adapter structure + * + * Request the PF to set our RSS hash capabilities + **/ +void i40evf_set_hena(struct i40evf_adapter *adapter) +{ + struct i40e_virtchnl_rss_hena vrh; + + if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot set RSS hash enable, command %d pending\n", + adapter->current_op); + return; + } + vrh.hena = adapter->hena; + adapter->current_op = I40E_VIRTCHNL_OP_SET_RSS_HENA; + adapter->aq_required &= ~I40EVF_FLAG_AQ_SET_HENA; + i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_SET_RSS_HENA, + (u8 *)&vrh, sizeof(vrh)); +} + +/** + * i40evf_set_rss_key + * @adapter: adapter structure + * + * Request the PF to set our RSS hash key + **/ +void i40evf_set_rss_key(struct i40evf_adapter *adapter) +{ + struct i40e_virtchnl_rss_key *vrk; + int len; + + if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot set RSS key, command %d pending\n", + adapter->current_op); + return; + } + len = sizeof(struct i40e_virtchnl_rss_key) + + (adapter->rss_key_size * sizeof(u8)) - 1; + vrk = kzalloc(len, GFP_KERNEL); + if (!vrk) + return; + vrk->vsi_id = adapter->vsi.id; + vrk->key_len = adapter->rss_key_size; + memcpy(vrk->key, adapter->rss_key, adapter->rss_key_size); + + adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_RSS_KEY; + adapter->aq_required &= ~I40EVF_FLAG_AQ_SET_RSS_KEY; + i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY, + (u8 *)vrk, len); + kfree(vrk); +} + +/** + * i40evf_set_rss_lut + * @adapter: adapter structure + * + * Request the PF to set our RSS lookup table + **/ +void i40evf_set_rss_lut(struct i40evf_adapter *adapter) +{ + struct i40e_virtchnl_rss_lut *vrl; + int len; + + if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot set RSS LUT, command %d pending\n", + adapter->current_op); + return; + } + len = sizeof(struct i40e_virtchnl_rss_lut) + + (adapter->rss_lut_size * sizeof(u8)) - 1; + vrl = kzalloc(len, GFP_KERNEL); + if (!vrl) + return; + vrl->vsi_id = adapter->vsi.id; + vrl->lut_entries = adapter->rss_lut_size; + memcpy(vrl->lut, adapter->rss_lut, adapter->rss_lut_size); + adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_RSS_LUT; + adapter->aq_required &= ~I40EVF_FLAG_AQ_SET_RSS_LUT; + i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT, + (u8 *)vrl, len); + kfree(vrl); +} + /** * i40evf_request_reset * @adapter: adapter structure @@ -820,6 +940,16 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, if (v_opcode != adapter->current_op) return; break; + case I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS: { + struct i40e_virtchnl_rss_hena *vrh = + (struct i40e_virtchnl_rss_hena *)msg; + if (msglen == sizeof(*vrh)) + adapter->hena = vrh->hena; + else + dev_warn(&adapter->pdev->dev, + "Invalid message %d from PF\n", v_opcode); + } + break; default: if (v_opcode != adapter->current_op) dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index d10ed62993c1..781c8787ab66 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -143,14 +143,11 @@ struct vf_data_storage { unsigned char vf_mac_addresses[ETH_ALEN]; u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES]; u16 num_vf_mc_hashes; - u16 default_vf_vlan_id; - u16 vlans_enabled; bool clear_to_send; bool pf_set_mac; u16 pf_vlan; /* When set, guest VLAN config not allowed. */ u16 pf_qos; u16 tx_rate; - u16 vlan_count; u8 spoofchk_enabled; bool rss_query_enabled; u8 trusted; @@ -173,7 +170,7 @@ struct vf_macvlans { }; #define IXGBE_MAX_TXD_PWR 14 -#define IXGBE_MAX_DATA_PER_TXD (1 << IXGBE_MAX_TXD_PWR) +#define IXGBE_MAX_DATA_PER_TXD (1u << IXGBE_MAX_TXD_PWR) /* Tx Descriptors needed, worst case */ #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IXGBE_MAX_DATA_PER_TXD) @@ -623,44 +620,44 @@ struct ixgbe_adapter { * thus the additional *_CAPABLE flags. */ u32 flags; -#define IXGBE_FLAG_MSI_ENABLED (u32)(1 << 1) -#define IXGBE_FLAG_MSIX_ENABLED (u32)(1 << 3) -#define IXGBE_FLAG_RX_1BUF_CAPABLE (u32)(1 << 4) -#define IXGBE_FLAG_RX_PS_CAPABLE (u32)(1 << 5) -#define IXGBE_FLAG_RX_PS_ENABLED (u32)(1 << 6) -#define IXGBE_FLAG_DCA_ENABLED (u32)(1 << 8) -#define IXGBE_FLAG_DCA_CAPABLE (u32)(1 << 9) -#define IXGBE_FLAG_IMIR_ENABLED (u32)(1 << 10) -#define IXGBE_FLAG_MQ_CAPABLE (u32)(1 << 11) -#define IXGBE_FLAG_DCB_ENABLED (u32)(1 << 12) -#define IXGBE_FLAG_VMDQ_CAPABLE (u32)(1 << 13) -#define IXGBE_FLAG_VMDQ_ENABLED (u32)(1 << 14) -#define IXGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 15) -#define IXGBE_FLAG_NEED_LINK_UPDATE (u32)(1 << 16) -#define IXGBE_FLAG_NEED_LINK_CONFIG (u32)(1 << 17) -#define IXGBE_FLAG_FDIR_HASH_CAPABLE (u32)(1 << 18) -#define IXGBE_FLAG_FDIR_PERFECT_CAPABLE (u32)(1 << 19) -#define IXGBE_FLAG_FCOE_CAPABLE (u32)(1 << 20) -#define IXGBE_FLAG_FCOE_ENABLED (u32)(1 << 21) -#define IXGBE_FLAG_SRIOV_CAPABLE (u32)(1 << 22) -#define IXGBE_FLAG_SRIOV_ENABLED (u32)(1 << 23) +#define IXGBE_FLAG_MSI_ENABLED BIT(1) +#define IXGBE_FLAG_MSIX_ENABLED BIT(3) +#define IXGBE_FLAG_RX_1BUF_CAPABLE BIT(4) +#define IXGBE_FLAG_RX_PS_CAPABLE BIT(5) +#define IXGBE_FLAG_RX_PS_ENABLED BIT(6) +#define IXGBE_FLAG_DCA_ENABLED BIT(8) +#define IXGBE_FLAG_DCA_CAPABLE BIT(9) +#define IXGBE_FLAG_IMIR_ENABLED BIT(10) +#define IXGBE_FLAG_MQ_CAPABLE BIT(11) +#define IXGBE_FLAG_DCB_ENABLED BIT(12) +#define IXGBE_FLAG_VMDQ_CAPABLE BIT(13) +#define IXGBE_FLAG_VMDQ_ENABLED BIT(14) +#define IXGBE_FLAG_FAN_FAIL_CAPABLE BIT(15) +#define IXGBE_FLAG_NEED_LINK_UPDATE BIT(16) +#define IXGBE_FLAG_NEED_LINK_CONFIG BIT(17) +#define IXGBE_FLAG_FDIR_HASH_CAPABLE BIT(18) +#define IXGBE_FLAG_FDIR_PERFECT_CAPABLE BIT(19) +#define IXGBE_FLAG_FCOE_CAPABLE BIT(20) +#define IXGBE_FLAG_FCOE_ENABLED BIT(21) +#define IXGBE_FLAG_SRIOV_CAPABLE BIT(22) +#define IXGBE_FLAG_SRIOV_ENABLED BIT(23) #define IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE BIT(24) #define IXGBE_FLAG_RX_HWTSTAMP_ENABLED BIT(25) #define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER BIT(26) u32 flags2; -#define IXGBE_FLAG2_RSC_CAPABLE (u32)(1 << 0) -#define IXGBE_FLAG2_RSC_ENABLED (u32)(1 << 1) -#define IXGBE_FLAG2_TEMP_SENSOR_CAPABLE (u32)(1 << 2) -#define IXGBE_FLAG2_TEMP_SENSOR_EVENT (u32)(1 << 3) -#define IXGBE_FLAG2_SEARCH_FOR_SFP (u32)(1 << 4) -#define IXGBE_FLAG2_SFP_NEEDS_RESET (u32)(1 << 5) -#define IXGBE_FLAG2_RESET_REQUESTED (u32)(1 << 6) -#define IXGBE_FLAG2_FDIR_REQUIRES_REINIT (u32)(1 << 7) -#define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP (u32)(1 << 8) -#define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP (u32)(1 << 9) -#define IXGBE_FLAG2_PTP_PPS_ENABLED (u32)(1 << 10) -#define IXGBE_FLAG2_PHY_INTERRUPT (u32)(1 << 11) +#define IXGBE_FLAG2_RSC_CAPABLE BIT(0) +#define IXGBE_FLAG2_RSC_ENABLED BIT(1) +#define IXGBE_FLAG2_TEMP_SENSOR_CAPABLE BIT(2) +#define IXGBE_FLAG2_TEMP_SENSOR_EVENT BIT(3) +#define IXGBE_FLAG2_SEARCH_FOR_SFP BIT(4) +#define IXGBE_FLAG2_SFP_NEEDS_RESET BIT(5) +#define IXGBE_FLAG2_RESET_REQUESTED BIT(6) +#define IXGBE_FLAG2_FDIR_REQUIRES_REINIT BIT(7) +#define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP BIT(8) +#define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP BIT(9) +#define IXGBE_FLAG2_PTP_PPS_ENABLED BIT(10) +#define IXGBE_FLAG2_PHY_INTERRUPT BIT(11) #define IXGBE_FLAG2_VXLAN_REREG_NEEDED BIT(12) #define IXGBE_FLAG2_VLAN_PROMISC BIT(13) @@ -806,6 +803,8 @@ struct ixgbe_adapter { #define IXGBE_RSS_KEY_SIZE 40 /* size of RSS Hash Key in bytes */ u32 rss_key[IXGBE_RSS_KEY_SIZE / sizeof(u32)]; + + bool need_crosstalk_fix; }; static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter) @@ -828,7 +827,7 @@ struct ixgbe_fdir_filter { struct hlist_node fdir_node; union ixgbe_atr_input filter; u16 sw_idx; - u16 action; + u64 action; }; enum ixgbe_state_t { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 6ecd598c6ef5..fb51be74dd4c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -792,7 +792,7 @@ mac_reset_top: } gheccr = IXGBE_READ_REG(hw, IXGBE_GHECCR); - gheccr &= ~((1 << 21) | (1 << 18) | (1 << 9) | (1 << 6)); + gheccr &= ~(BIT(21) | BIT(18) | BIT(9) | BIT(6)); IXGBE_WRITE_REG(hw, IXGBE_GHECCR, gheccr); /* @@ -914,10 +914,10 @@ static s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, bits = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex)); if (vlan_on) /* Turn on this VLAN id */ - bits |= (1 << bitindex); + bits |= BIT(bitindex); else /* Turn off this VLAN id */ - bits &= ~(1 << bitindex); + bits &= ~BIT(bitindex); IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), bits); return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 01519787324a..47afed74a54d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -1296,17 +1296,17 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl) #define IXGBE_COMPUTE_SIG_HASH_ITERATION(_n) \ do { \ u32 n = (_n); \ - if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << n)) \ + if (IXGBE_ATR_COMMON_HASH_KEY & BIT(n)) \ common_hash ^= lo_hash_dword >> n; \ - else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \ + else if (IXGBE_ATR_BUCKET_HASH_KEY & BIT(n)) \ bucket_hash ^= lo_hash_dword >> n; \ - else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << n)) \ + else if (IXGBE_ATR_SIGNATURE_HASH_KEY & BIT(n)) \ sig_hash ^= lo_hash_dword << (16 - n); \ - if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << (n + 16))) \ + if (IXGBE_ATR_COMMON_HASH_KEY & BIT(n + 16)) \ common_hash ^= hi_hash_dword >> n; \ - else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \ + else if (IXGBE_ATR_BUCKET_HASH_KEY & BIT(n + 16)) \ bucket_hash ^= hi_hash_dword >> n; \ - else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << (n + 16))) \ + else if (IXGBE_ATR_SIGNATURE_HASH_KEY & BIT(n + 16)) \ sig_hash ^= hi_hash_dword << (16 - n); \ } while (0) @@ -1440,9 +1440,9 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, #define IXGBE_COMPUTE_BKT_HASH_ITERATION(_n) \ do { \ u32 n = (_n); \ - if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \ + if (IXGBE_ATR_BUCKET_HASH_KEY & BIT(n)) \ bucket_hash ^= lo_hash_dword >> n; \ - if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \ + if (IXGBE_ATR_BUCKET_HASH_KEY & BIT(n + 16)) \ bucket_hash ^= hi_hash_dword >> n; \ } while (0) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 737443a015d5..902d2061ce73 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -825,8 +825,8 @@ s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw) */ eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> IXGBE_EEC_SIZE_SHIFT); - eeprom->word_size = 1 << (eeprom_size + - IXGBE_EEPROM_WORD_SIZE_SHIFT); + eeprom->word_size = BIT(eeprom_size + + IXGBE_EEPROM_WORD_SIZE_SHIFT); } if (eec & IXGBE_EEC_ADDR_SIZE) @@ -1502,7 +1502,7 @@ static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data, * Mask is used to shift "count" bits of "data" out to the EEPROM * one bit at a time. Determine the starting bit based on count */ - mask = 0x01 << (count - 1); + mask = BIT(count - 1); for (i = 0; i < count; i++) { /* @@ -1991,7 +1991,7 @@ static void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr) */ vector_reg = (vector >> 5) & 0x7F; vector_bit = vector & 0x1F; - hw->mac.mta_shadow[vector_reg] |= (1 << vector_bit); + hw->mac.mta_shadow[vector_reg] |= BIT(vector_bit); } /** @@ -2921,10 +2921,10 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) mpsar_hi = 0; } } else if (vmdq < 32) { - mpsar_lo &= ~(1 << vmdq); + mpsar_lo &= ~BIT(vmdq); IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar_lo); } else { - mpsar_hi &= ~(1 << (vmdq - 32)); + mpsar_hi &= ~BIT(vmdq - 32); IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar_hi); } @@ -2953,11 +2953,11 @@ s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) if (vmdq < 32) { mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); - mpsar |= 1 << vmdq; + mpsar |= BIT(vmdq); IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar); } else { mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar)); - mpsar |= 1 << (vmdq - 32); + mpsar |= BIT(vmdq - 32); IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar); } return 0; @@ -2978,11 +2978,11 @@ s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq) u32 rar = hw->mac.san_mac_rar_index; if (vmdq < 32) { - IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 1 << vmdq); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), BIT(vmdq)); IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 0); } else { IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 0); - IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 1 << (vmdq - 32)); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), BIT(vmdq - 32)); } return 0; @@ -3082,7 +3082,7 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, * bits[4-0]: which bit in the register */ regidx = vlan / 32; - vfta_delta = 1 << (vlan % 32); + vfta_delta = BIT(vlan % 32); vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regidx)); /* vfta_delta represents the difference between the current value @@ -3113,12 +3113,12 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, bits = IXGBE_READ_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + vind / 32)); /* set the pool bit */ - bits |= 1 << (vind % 32); + bits |= BIT(vind % 32); if (vlan_on) goto vlvf_update; /* clear the pool bit */ - bits ^= 1 << (vind % 32); + bits ^= BIT(vind % 32); if (!bits && !IXGBE_READ_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + 1 - vind / 32))) { @@ -3310,43 +3310,25 @@ wwn_prefix_err: /** * ixgbe_set_mac_anti_spoofing - Enable/Disable MAC anti-spoofing * @hw: pointer to hardware structure - * @enable: enable or disable switch for anti-spoofing - * @pf: Physical Function pool - do not enable anti-spoofing for the PF + * @enable: enable or disable switch for MAC anti-spoofing + * @vf: Virtual Function pool - VF Pool to set for MAC anti-spoofing * **/ -void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf) +void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf) { - int j; - int pf_target_reg = pf >> 3; - int pf_target_shift = pf % 8; - u32 pfvfspoof = 0; + int vf_target_reg = vf >> 3; + int vf_target_shift = vf % 8; + u32 pfvfspoof; if (hw->mac.type == ixgbe_mac_82598EB) return; + pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg)); if (enable) - pfvfspoof = IXGBE_SPOOF_MACAS_MASK; - - /* - * PFVFSPOOF register array is size 8 with 8 bits assigned to - * MAC anti-spoof enables in each register array element. - */ - for (j = 0; j < pf_target_reg; j++) - IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), pfvfspoof); - - /* - * The PF should be allowed to spoof so that it can support - * emulation mode NICs. Do not set the bits assigned to the PF - */ - pfvfspoof &= (1 << pf_target_shift) - 1; - IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), pfvfspoof); - - /* - * Remaining pools belong to the PF so they do not need to have - * anti-spoofing enabled. - */ - for (j++; j < IXGBE_PFVFSPOOF_REG_COUNT; j++) - IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), 0); + pfvfspoof |= BIT(vf_target_shift); + else + pfvfspoof &= ~BIT(vf_target_shift); + IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof); } /** @@ -3367,9 +3349,9 @@ void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf) pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg)); if (enable) - pfvfspoof |= (1 << vf_target_shift); + pfvfspoof |= BIT(vf_target_shift); else - pfvfspoof &= ~(1 << vf_target_shift); + pfvfspoof &= ~BIT(vf_target_shift); IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index 6f8e6a56e242..6d4c260d0cbd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -106,7 +106,7 @@ s32 prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked); s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index); s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index); -void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf); +void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf); void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf); s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps); s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c index f8fb2acc2632..072ef3b5fc61 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c @@ -186,7 +186,7 @@ void ixgbe_dcb_unpack_pfc(struct ixgbe_dcb_config *cfg, u8 *pfc_en) for (*pfc_en = 0, tc = 0; tc < MAX_TRAFFIC_CLASS; tc++) { if (tc_config[tc].dcb_pfc != pfc_disabled) - *pfc_en |= 1 << tc; + *pfc_en |= BIT(tc); } } @@ -232,7 +232,7 @@ void ixgbe_dcb_unpack_prio(struct ixgbe_dcb_config *cfg, int direction, u8 ixgbe_dcb_get_tc_from_up(struct ixgbe_dcb_config *cfg, int direction, u8 up) { struct tc_configuration *tc_config = &cfg->tc_config[0]; - u8 prio_mask = 1 << up; + u8 prio_mask = BIT(up); u8 tc = cfg->num_tcs.pg_tcs; /* If tc is 0 then DCB is likely not enabled or supported */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c index d3ba63f9ad37..b79e93a5b699 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c @@ -210,7 +210,7 @@ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en) /* Configure PFC Tx thresholds per TC */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { - if (!(pfc_en & (1 << i))) { + if (!(pfc_en & BIT(i))) { IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), 0); IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), 0); continue; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c index b5cc989a3d23..1011d644978f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c @@ -248,7 +248,7 @@ s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc) int enabled = 0; for (j = 0; j < MAX_USER_PRIORITY; j++) { - if ((prio_tc[j] == i) && (pfc_en & (1 << j))) { + if ((prio_tc[j] == i) && (pfc_en & BIT(j))) { enabled = 1; break; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c index 2707bda37418..b8fc3cfec831 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c @@ -62,7 +62,7 @@ static int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max) }; u8 up = dcb_getapp(adapter->netdev, &app); - if (up && !(up & (1 << adapter->fcoe.up))) + if (up && !(up & BIT(adapter->fcoe.up))) changes |= BIT_APP_UPCHG; #endif @@ -657,7 +657,7 @@ static int ixgbe_dcbnl_ieee_setapp(struct net_device *dev, app->protocol == ETH_P_FCOE) { u8 app_mask = dcb_ieee_getapp_mask(dev, app); - if (app_mask & (1 << adapter->fcoe.up)) + if (app_mask & BIT(adapter->fcoe.up)) return 0; adapter->fcoe.up = app->priority; @@ -700,7 +700,7 @@ static int ixgbe_dcbnl_ieee_delapp(struct net_device *dev, app->protocol == ETH_P_FCOE) { u8 app_mask = dcb_ieee_getapp_mask(dev, app); - if (app_mask & (1 << adapter->fcoe.up)) + if (app_mask & BIT(adapter->fcoe.up)) return 0; adapter->fcoe.up = app_mask ? diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 9f76be1431b1..d3efcb4fecce 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1586,7 +1586,7 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data) /* Test each interrupt */ for (; i < 10; i++) { /* Interrupt to test */ - mask = 1 << i; + mask = BIT(i); if (!shared_int) { /* @@ -3014,14 +3014,14 @@ static int ixgbe_get_ts_info(struct net_device *dev, info->phc_index = -1; info->tx_types = - (1 << HWTSTAMP_TX_OFF) | - (1 << HWTSTAMP_TX_ON); + BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); info->rx_filters = - (1 << HWTSTAMP_FILTER_NONE) | - (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | - (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | - (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); + BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT); break; default: return ethtool_op_get_ts_info(dev, info); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 2976df77bf14..0ef4a15bb23e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -371,6 +371,27 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg) if (ixgbe_removed(reg_addr)) return IXGBE_FAILED_READ_REG; + if (unlikely(hw->phy.nw_mng_if_sel & + IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M)) { + struct ixgbe_adapter *adapter; + int i; + + for (i = 0; i < 200; ++i) { + value = readl(reg_addr + IXGBE_MAC_SGMII_BUSY); + if (likely(!value)) + goto writes_completed; + if (value == IXGBE_FAILED_READ_REG) { + ixgbe_remove_adapter(hw); + return IXGBE_FAILED_READ_REG; + } + udelay(5); + } + + adapter = hw->back; + e_warn(hw, "register writes incomplete %08x\n", value); + } + +writes_completed: value = readl(reg_addr + reg); if (unlikely(value == IXGBE_FAILED_READ_REG)) ixgbe_check_remove(hw, reg); @@ -2224,7 +2245,7 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) /* Populate MSIX to EITR Select */ if (adapter->num_vfs > 32) { - u32 eitrsel = (1 << (adapter->num_vfs - 32)) - 1; + u32 eitrsel = BIT(adapter->num_vfs - 32) - 1; IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, eitrsel); } @@ -2863,7 +2884,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget) if (adapter->rx_itr_setting & 1) ixgbe_set_itr(q_vector); if (!test_bit(__IXGBE_DOWN, &adapter->state)) - ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx)); + ixgbe_irq_enable_queues(adapter, BIT_ULL(q_vector->v_idx)); return 0; } @@ -3156,15 +3177,15 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, * currently 40. */ if (!ring->q_vector || (ring->q_vector->itr < IXGBE_100K_ITR)) - txdctl |= (1 << 16); /* WTHRESH = 1 */ + txdctl |= 1u << 16; /* WTHRESH = 1 */ else - txdctl |= (8 << 16); /* WTHRESH = 8 */ + txdctl |= 8u << 16; /* WTHRESH = 8 */ /* * Setting PTHRESH to 32 both improves performance * and avoids a TX hang with DFP enabled */ - txdctl |= (1 << 8) | /* HTHRESH = 1 */ + txdctl |= (1u << 8) | /* HTHRESH = 1 */ 32; /* PTHRESH = 32 */ /* reinitialize flowdirector state */ @@ -3716,9 +3737,9 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter) return; if (rss_i > 3) - psrtype |= 2 << 29; + psrtype |= 2u << 29; else if (rss_i > 1) - psrtype |= 1 << 29; + psrtype |= 1u << 29; for_each_set_bit(pool, &adapter->fwd_bitmask, 32) IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(pool)), psrtype); @@ -3745,9 +3766,9 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter) reg_offset = (VMDQ_P(0) >= 32) ? 1 : 0; /* Enable only the PF's pool for Tx/Rx */ - IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), (~0) << vf_shift); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), GENMASK(vf_shift, 31)); IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset ^ 1), reg_offset - 1); - IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), (~0) << vf_shift); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), GENMASK(vf_shift, 31)); IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset ^ 1), reg_offset - 1); if (adapter->bridge_mode == BRIDGE_MODE_VEB) IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); @@ -3776,34 +3797,10 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext); - - /* Enable MAC Anti-Spoofing */ - hw->mac.ops.set_mac_anti_spoofing(hw, (adapter->num_vfs != 0), - adapter->num_vfs); - - /* Ensure LLDP and FC is set for Ethertype Antispoofing if we will be - * calling set_ethertype_anti_spoofing for each VF in loop below - */ - if (hw->mac.ops.set_ethertype_anti_spoofing) { - IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_LLDP), - (IXGBE_ETQF_FILTER_EN | - IXGBE_ETQF_TX_ANTISPOOF | - IXGBE_ETH_P_LLDP)); - - IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FC), - (IXGBE_ETQF_FILTER_EN | - IXGBE_ETQF_TX_ANTISPOOF | - ETH_P_PAUSE)); - } - - /* For VFs that have spoof checking turned off */ for (i = 0; i < adapter->num_vfs; i++) { - if (!adapter->vfinfo[i].spoofchk_enabled) - ixgbe_ndo_set_vf_spoofchk(adapter->netdev, i, false); - - /* enable ethertype anti spoofing if hw supports it */ - if (hw->mac.ops.set_ethertype_anti_spoofing) - hw->mac.ops.set_ethertype_anti_spoofing(hw, true, i); + /* configure spoof checking */ + ixgbe_ndo_set_vf_spoofchk(adapter->netdev, i, + adapter->vfinfo[i].spoofchk_enabled); /* Enable/Disable RSS query feature */ ixgbe_ndo_set_vf_rss_query_en(adapter->netdev, i, @@ -3997,7 +3994,7 @@ void ixgbe_update_pf_promisc_vlvf(struct ixgbe_adapter *adapter, u32 vid) * entry other than the PF. */ word = idx * 2 + (VMDQ_P(0) / 32); - bits = ~(1 << (VMDQ_P(0)) % 32); + bits = ~BIT(VMDQ_P(0) % 32); bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word)); /* Disable the filter so this falls into the default pool. */ @@ -4132,7 +4129,7 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32); u32 vlvfb = IXGBE_READ_REG(hw, reg_offset); - vlvfb |= 1 << (VMDQ_P(0) % 32); + vlvfb |= BIT(VMDQ_P(0) % 32); IXGBE_WRITE_REG(hw, reg_offset, vlvfb); } @@ -4162,7 +4159,7 @@ static void ixgbe_scrub_vfta(struct ixgbe_adapter *adapter, u32 vfta_offset) if (vlvf) { /* record VLAN ID in VFTA */ - vfta[(vid - vid_start) / 32] |= 1 << (vid % 32); + vfta[(vid - vid_start) / 32] |= BIT(vid % 32); /* if PF is part of this then continue */ if (test_bit(vid, adapter->active_vlans)) @@ -4171,7 +4168,7 @@ static void ixgbe_scrub_vfta(struct ixgbe_adapter *adapter, u32 vfta_offset) /* remove PF from the pool */ word = i * 2 + VMDQ_P(0) / 32; - bits = ~(1 << (VMDQ_P(0) % 32)); + bits = ~BIT(VMDQ_P(0) % 32); bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word)); IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), bits); } @@ -4865,9 +4862,9 @@ static void ixgbe_fwd_psrtype(struct ixgbe_fwd_adapter *vadapter) return; if (rss_i > 3) - psrtype |= 2 << 29; + psrtype |= 2u << 29; else if (rss_i > 1) - psrtype |= 1 << 29; + psrtype |= 1u << 29; IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(pool)), psrtype); } @@ -4931,7 +4928,7 @@ static void ixgbe_disable_fwd_ring(struct ixgbe_fwd_adapter *vadapter, /* shutdown specific queue receive and wait for dma to settle */ ixgbe_disable_rx_queue(adapter, rx_ring); usleep_range(10000, 20000); - ixgbe_irq_disable_queues(adapter, ((u64)1 << index)); + ixgbe_irq_disable_queues(adapter, BIT_ULL(index)); ixgbe_clean_rx_ring(rx_ring); rx_ring->l2_accel_priv = NULL; } @@ -5575,6 +5572,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) struct pci_dev *pdev = adapter->pdev; unsigned int rss, fdir; u32 fwsm; + u16 device_caps; #ifdef CONFIG_IXGBE_DCB int j; struct tc_configuration *tc; @@ -5740,6 +5738,22 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) adapter->tx_ring_count = IXGBE_DEFAULT_TXD; adapter->rx_ring_count = IXGBE_DEFAULT_RXD; + /* Cache bit indicating need for crosstalk fix */ + switch (hw->mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + hw->mac.ops.get_device_caps(hw, &device_caps); + if (device_caps & IXGBE_DEVICE_CAPS_NO_CROSSTALK_WR) + adapter->need_crosstalk_fix = false; + else + adapter->need_crosstalk_fix = true; + break; + default: + adapter->need_crosstalk_fix = false; + break; + } + /* set default work limits */ adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK; @@ -6631,7 +6645,7 @@ static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter) for (i = 0; i < adapter->num_q_vectors; i++) { struct ixgbe_q_vector *qv = adapter->q_vector[i]; if (qv->rx.ring || qv->tx.ring) - eics |= ((u64)1 << i); + eics |= BIT_ULL(i); } } @@ -6662,6 +6676,18 @@ static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter) link_up = true; } + /* If Crosstalk fix enabled do the sanity check of making sure + * the SFP+ cage is empty. + */ + if (adapter->need_crosstalk_fix) { + u32 sfp_cage_full; + + sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & + IXGBE_ESDP_SDP2; + if (ixgbe_is_sfp(hw) && link_up && !sfp_cage_full) + link_up = false; + } + if (adapter->ixgbe_ieee_pfc) pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); @@ -7008,6 +7034,16 @@ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; s32 err; + /* If crosstalk fix enabled verify the SFP+ cage is full */ + if (adapter->need_crosstalk_fix) { + u32 sfp_cage_full; + + sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & + IXGBE_ESDP_SDP2; + if (!sfp_cage_full) + return; + } + /* not searching for SFP so there is nothing to do here */ if (!(adapter->flags2 & IXGBE_FLAG2_SEARCH_FOR_SFP) && !(adapter->flags2 & IXGBE_FLAG2_SFP_NEEDS_RESET)) @@ -7192,10 +7228,12 @@ static void ixgbe_service_task(struct work_struct *work) return; } #ifdef CONFIG_IXGBE_VXLAN + rtnl_lock(); if (adapter->flags2 & IXGBE_FLAG2_VXLAN_REREG_NEEDED) { adapter->flags2 &= ~IXGBE_FLAG2_VXLAN_REREG_NEEDED; vxlan_get_rx_port(adapter->netdev); } + rtnl_unlock(); #endif /* CONFIG_IXGBE_VXLAN */ ixgbe_reset_subtask(adapter); ixgbe_phy_interrupt_subtask(adapter); @@ -7218,9 +7256,18 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first, u8 *hdr_len) { + u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; struct sk_buff *skb = first->skb; - u32 vlan_macip_lens, type_tucmd; - u32 mss_l4len_idx, l4len; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; int err; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -7233,46 +7280,52 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, if (err < 0) return err; + ip.hdr = skb_network_header(skb); + l4.hdr = skb_checksum_start(skb); + /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP; - if (first->protocol == htons(ETH_P_IP)) { - struct iphdr *iph = ip_hdr(skb); - iph->tot_len = 0; - iph->check = 0; - tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, - iph->daddr, 0, - IPPROTO_TCP, - 0); + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + /* IP header will have to cancel out any data that + * is not a part of the outer IP header + */ + ip.v4->check = csum_fold(csum_add(lco_csum(skb), + csum_unfold(l4.tcp->check))); type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; + + ip.v4->tot_len = 0; first->tx_flags |= IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_CSUM | IXGBE_TX_FLAGS_IPV4; - } else if (skb_is_gso_v6(skb)) { - ipv6_hdr(skb)->payload_len = 0; - tcp_hdr(skb)->check = - ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - 0, IPPROTO_TCP, 0); + } else { + ip.v6->payload_len = 0; first->tx_flags |= IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_CSUM; } - /* compute header lengths */ - l4len = tcp_hdrlen(skb); - *hdr_len = skb_transport_offset(skb) + l4len; + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; + + /* remove payload length from inner checksum */ + paylen = skb->len - l4_offset; + csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); /* update gso size and bytecount with header size */ first->gso_segs = skb_shinfo(skb)->gso_segs; first->bytecount += (first->gso_segs - 1) * *hdr_len; /* mss_l4len_id: use 0 as index for TSO */ - mss_l4len_idx = l4len << IXGBE_ADVTXD_L4LEN_SHIFT; + mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT; mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT; /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */ - vlan_macip_lens = skb_network_header_len(skb); - vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens = l4.hdr - ip.hdr; + vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, @@ -8860,17 +8913,36 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv) kfree(fwd_adapter); } -#define IXGBE_MAX_TUNNEL_HDR_LEN 80 +#define IXGBE_MAX_MAC_HDR_LEN 127 +#define IXGBE_MAX_NETWORK_HDR_LEN 511 + static netdev_features_t ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { - if (!skb->encapsulation) - return features; - - if (unlikely(skb_inner_mac_header(skb) - skb_transport_header(skb) > - IXGBE_MAX_TUNNEL_HDR_LEN)) - return features & ~NETIF_F_CSUM_MASK; + unsigned int network_hdr_len, mac_hdr_len; + + /* Make certain the headers can be described by a context descriptor */ + mac_hdr_len = skb_network_header(skb) - skb->data; + if (unlikely(mac_hdr_len > IXGBE_MAX_MAC_HDR_LEN)) + return features & ~(NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_TSO | + NETIF_F_TSO6); + + network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); + if (unlikely(network_hdr_len > IXGBE_MAX_NETWORK_HDR_LEN)) + return features & ~(NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC | + NETIF_F_TSO | + NETIF_F_TSO6); + + /* We can only support IPV4 TSO in tunnels if we can mangle the + * inner IP ID field, so strip TSO if MANGLEID is not supported. + */ + if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) + features &= ~NETIF_F_TSO; return features; } @@ -9154,7 +9226,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_ioremap; } /* If EEPROM is valid (bit 8 = 1), use default otherwise use bit bang */ - if (!(eec & (1 << 8))) + if (!(eec & BIT(8))) hw->eeprom.ops.read = &ixgbe_read_eeprom_bit_bang_generic; /* PHY */ @@ -9237,31 +9309,44 @@ skip_sriov: NETIF_F_TSO6 | NETIF_F_RXHASH | NETIF_F_RXCSUM | - NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER; + NETIF_F_HW_CSUM; + +#define IXGBE_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ + NETIF_F_GSO_GRE_CSUM | \ + NETIF_F_GSO_IPIP | \ + NETIF_F_GSO_SIT | \ + NETIF_F_GSO_UDP_TUNNEL | \ + NETIF_F_GSO_UDP_TUNNEL_CSUM) + + netdev->gso_partial_features = IXGBE_GSO_PARTIAL_FEATURES; + netdev->features |= NETIF_F_GSO_PARTIAL | + IXGBE_GSO_PARTIAL_FEATURES; if (hw->mac.type >= ixgbe_mac_82599EB) netdev->features |= NETIF_F_SCTP_CRC; /* copy netdev features into list of user selectable features */ - netdev->hw_features |= netdev->features; - netdev->hw_features |= NETIF_F_RXALL | + netdev->hw_features |= netdev->features | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_RXALL | NETIF_F_HW_L2FW_DOFFLOAD; if (hw->mac.type >= ixgbe_mac_82599EB) netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; - netdev->vlan_features |= NETIF_F_SG | - NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_HW_CSUM | - NETIF_F_SCTP_CRC; + if (pci_using_dac) + netdev->features |= NETIF_F_HIGHDMA; + + /* set this bit last since it cannot be part of vlan_features */ + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; + netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; + netdev->hw_enc_features |= netdev->vlan_features; netdev->mpls_features |= NETIF_F_HW_CSUM; - netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; @@ -9292,10 +9377,6 @@ skip_sriov: NETIF_F_FCOE_MTU; } #endif /* IXGBE_FCOE */ - if (pci_using_dac) { - netdev->features |= NETIF_F_HIGHDMA; - netdev->vlan_features |= NETIF_F_HIGHDMA; - } if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) netdev->hw_features |= NETIF_F_LRO; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c index b2125e358f7b..a0cb84381cd0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c @@ -314,8 +314,8 @@ static s32 ixgbe_check_for_rst_pf(struct ixgbe_hw *hw, u16 vf_number) break; } - if (vflre & (1 << vf_shift)) { - IXGBE_WRITE_REG(hw, IXGBE_VFLREC(reg_offset), (1 << vf_shift)); + if (vflre & BIT(vf_shift)) { + IXGBE_WRITE_REG(hw, IXGBE_VFLREC(reg_offset), BIT(vf_shift)); hw->mbx.stats.rsts++; return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index cdf4c3800801..cc735ec3e045 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -107,7 +107,7 @@ #define IXGBE_PE 0xE0 /* Port expander addr */ #define IXGBE_PE_OUTPUT 1 /* Output reg offset */ #define IXGBE_PE_CONFIG 3 /* Config reg offset */ -#define IXGBE_PE_BIT1 (1 << 1) +#define IXGBE_PE_BIT1 BIT(1) /* Flow control defines */ #define IXGBE_TAF_SYM_PAUSE 0x400 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index bdc8fdcc07a5..e5431bfe3339 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -396,7 +396,7 @@ static int ixgbe_ptp_adjfreq_82599(struct ptp_clock_info *ptp, s32 ppb) if (incval > 0x00FFFFFFULL) e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, - (1 << IXGBE_INCPER_SHIFT_82599) | + BIT(IXGBE_INCPER_SHIFT_82599) | ((u32)incval & 0x00FFFFFFUL)); break; default: @@ -1114,7 +1114,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) incval >>= IXGBE_INCVAL_SHIFT_82599; cc.shift -= IXGBE_INCVAL_SHIFT_82599; IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, - (1 << IXGBE_INCPER_SHIFT_82599) | incval); + BIT(IXGBE_INCPER_SHIFT_82599) | incval); break; default: /* other devices aren't supported */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index adcf00002483..c5caacdd193d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -406,7 +406,7 @@ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, vector_reg = (vfinfo->vf_mc_hashes[i] >> 5) & 0x7F; vector_bit = vfinfo->vf_mc_hashes[i] & 0x1F; mta_reg = IXGBE_READ_REG(hw, IXGBE_MTA(vector_reg)); - mta_reg |= (1 << vector_bit); + mta_reg |= BIT(vector_bit); IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg); } vmolr |= IXGBE_VMOLR_ROMPE; @@ -433,7 +433,7 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter) vector_reg = (vfinfo->vf_mc_hashes[j] >> 5) & 0x7F; vector_bit = vfinfo->vf_mc_hashes[j] & 0x1F; mta_reg = IXGBE_READ_REG(hw, IXGBE_MTA(vector_reg)); - mta_reg |= (1 << vector_bit); + mta_reg |= BIT(vector_bit); IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg); } @@ -536,9 +536,9 @@ static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) /* enable or disable receive depending on error */ vfre = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset)); if (err) - vfre &= ~(1 << vf_shift); + vfre &= ~BIT(vf_shift); else - vfre |= 1 << vf_shift; + vfre |= BIT(vf_shift); IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), vfre); if (err) { @@ -592,8 +592,8 @@ static void ixgbe_clear_vf_vlans(struct ixgbe_adapter *adapter, u32 vf) u32 vlvfb_mask, pool_mask, i; /* create mask for VF and other pools */ - pool_mask = ~(1 << (VMDQ_P(0) % 32)); - vlvfb_mask = 1 << (vf % 32); + pool_mask = ~BIT(VMDQ_P(0) % 32); + vlvfb_mask = BIT(vf % 32); /* post increment loop, covers VLVF_ENTRIES - 1 to 0 */ for (i = IXGBE_VLVF_ENTRIES; i--;) { @@ -629,7 +629,7 @@ static void ixgbe_clear_vf_vlans(struct ixgbe_adapter *adapter, u32 vf) goto update_vlvfb; vid = vlvf & VLAN_VID_MASK; - mask = 1 << (vid % 32); + mask = BIT(vid % 32); /* clear bit from VFTA */ vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(vid / 32)); @@ -813,7 +813,7 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) /* enable transmit for vf */ reg = IXGBE_READ_REG(hw, IXGBE_VFTE(reg_offset)); - reg |= 1 << vf_shift; + reg |= BIT(vf_shift); IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg); /* force drop enable for all VF Rx queues */ @@ -821,7 +821,7 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) /* enable receive for vf */ reg = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset)); - reg |= 1 << vf_shift; + reg |= BIT(vf_shift); /* * The 82599 cannot support a mix of jumbo and non-jumbo PF/VFs. * For more info take a look at ixgbe_set_vf_lpe @@ -837,7 +837,7 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) #endif /* CONFIG_FCOE */ if (pf_max_frame > ETH_FRAME_LEN) - reg &= ~(1 << vf_shift); + reg &= ~BIT(vf_shift); } IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), reg); @@ -846,7 +846,7 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) /* Enable counting of spoofed packets in the SSVPC register */ reg = IXGBE_READ_REG(hw, IXGBE_VMECM(reg_offset)); - reg |= (1 << vf_shift); + reg |= BIT(vf_shift); IXGBE_WRITE_REG(hw, IXGBE_VMECM(reg_offset), reg); /* @@ -908,8 +908,6 @@ static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter, u32 add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; u32 vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK); u8 tcs = netdev_get_num_tc(adapter->netdev); - struct ixgbe_hw *hw = &adapter->hw; - int err; if (adapter->vfinfo[vf].pf_vlan || tcs) { e_warn(drv, @@ -923,19 +921,7 @@ static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter, if (!vid && !add) return 0; - err = ixgbe_set_vf_vlan(adapter, add, vid, vf); - if (err) - return err; - - if (adapter->vfinfo[vf].spoofchk_enabled) - hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf); - - if (add) - adapter->vfinfo[vf].vlan_count++; - else if (adapter->vfinfo[vf].vlan_count) - adapter->vfinfo[vf].vlan_count--; - - return 0; + return ixgbe_set_vf_vlan(adapter, add, vid, vf); } static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter, @@ -964,8 +950,11 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter, * If the VF is allowed to set MAC filters then turn off * anti-spoofing to avoid false positives. */ - if (adapter->vfinfo[vf].spoofchk_enabled) - ixgbe_ndo_set_vf_spoofchk(adapter->netdev, vf, false); + if (adapter->vfinfo[vf].spoofchk_enabled) { + struct ixgbe_hw *hw = &adapter->hw; + + hw->mac.ops.set_mac_anti_spoofing(hw, false, vf); + } } err = ixgbe_set_vf_macvlan(adapter, vf, index, new_mac); @@ -1321,9 +1310,6 @@ static int ixgbe_enable_port_vlan(struct ixgbe_adapter *adapter, int vf, ixgbe_set_vmvir(adapter, vlan, qos, vf); ixgbe_set_vmolr(hw, vf, false); - if (adapter->vfinfo[vf].spoofchk_enabled) - hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf); - adapter->vfinfo[vf].vlan_count++; /* enable hide vlan on X550 */ if (hw->mac.type >= ixgbe_mac_X550) @@ -1356,9 +1342,6 @@ static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf) ixgbe_set_vf_vlan(adapter, true, 0, vf); ixgbe_clear_vmvir(adapter, vf); ixgbe_set_vmolr(hw, vf, true); - hw->mac.ops.set_vlan_anti_spoofing(hw, false, vf); - if (adapter->vfinfo[vf].vlan_count) - adapter->vfinfo[vf].vlan_count--; /* disable hide VLAN on X550 */ if (hw->mac.type >= ixgbe_mac_X550) @@ -1525,27 +1508,34 @@ int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - int vf_target_reg = vf >> 3; - int vf_target_shift = vf % 8; struct ixgbe_hw *hw = &adapter->hw; - u32 regval; if (vf >= adapter->num_vfs) return -EINVAL; adapter->vfinfo[vf].spoofchk_enabled = setting; - regval = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg)); - regval &= ~(1 << vf_target_shift); - regval |= (setting << vf_target_shift); - IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), regval); - - if (adapter->vfinfo[vf].vlan_count) { - vf_target_shift += IXGBE_SPOOF_VLANAS_SHIFT; - regval = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg)); - regval &= ~(1 << vf_target_shift); - regval |= (setting << vf_target_shift); - IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), regval); + /* configure MAC spoofing */ + hw->mac.ops.set_mac_anti_spoofing(hw, setting, vf); + + /* configure VLAN spoofing */ + hw->mac.ops.set_vlan_anti_spoofing(hw, setting, vf); + + /* Ensure LLDP and FC is set for Ethertype Antispoofing if we will be + * calling set_ethertype_anti_spoofing for each VF in loop below + */ + if (hw->mac.ops.set_ethertype_anti_spoofing) { + IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_LLDP), + (IXGBE_ETQF_FILTER_EN | + IXGBE_ETQF_TX_ANTISPOOF | + IXGBE_ETH_P_LLDP)); + + IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FC), + (IXGBE_ETQF_FILTER_EN | + IXGBE_ETQF_TX_ANTISPOOF | + ETH_P_PAUSE)); + + hw->mac.ops.set_ethertype_anti_spoofing(hw, setting, vf); } return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index ba3b837c7e9d..7af451460374 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -697,16 +697,16 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_FCDMARW 0x02420 /* FC Receive DMA RW */ #define IXGBE_FCINVST0 0x03FC0 /* FC Invalid DMA Context Status Reg 0 */ #define IXGBE_FCINVST(_i) (IXGBE_FCINVST0 + ((_i) * 4)) -#define IXGBE_FCBUFF_VALID (1 << 0) /* DMA Context Valid */ -#define IXGBE_FCBUFF_BUFFSIZE (3 << 3) /* User Buffer Size */ -#define IXGBE_FCBUFF_WRCONTX (1 << 7) /* 0: Initiator, 1: Target */ +#define IXGBE_FCBUFF_VALID BIT(0) /* DMA Context Valid */ +#define IXGBE_FCBUFF_BUFFSIZE (3u << 3) /* User Buffer Size */ +#define IXGBE_FCBUFF_WRCONTX BIT(7) /* 0: Initiator, 1: Target */ #define IXGBE_FCBUFF_BUFFCNT 0x0000ff00 /* Number of User Buffers */ #define IXGBE_FCBUFF_OFFSET 0xffff0000 /* User Buffer Offset */ #define IXGBE_FCBUFF_BUFFSIZE_SHIFT 3 #define IXGBE_FCBUFF_BUFFCNT_SHIFT 8 #define IXGBE_FCBUFF_OFFSET_SHIFT 16 -#define IXGBE_FCDMARW_WE (1 << 14) /* Write enable */ -#define IXGBE_FCDMARW_RE (1 << 15) /* Read enable */ +#define IXGBE_FCDMARW_WE BIT(14) /* Write enable */ +#define IXGBE_FCDMARW_RE BIT(15) /* Read enable */ #define IXGBE_FCDMARW_FCOESEL 0x000001ff /* FC X_ID: 11 bits */ #define IXGBE_FCDMARW_LASTSIZE 0xffff0000 /* Last User Buffer Size */ #define IXGBE_FCDMARW_LASTSIZE_SHIFT 16 @@ -723,23 +723,23 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_FCFLT 0x05108 /* FC FLT Context */ #define IXGBE_FCFLTRW 0x05110 /* FC Filter RW Control */ #define IXGBE_FCPARAM 0x051d8 /* FC Offset Parameter */ -#define IXGBE_FCFLT_VALID (1 << 0) /* Filter Context Valid */ -#define IXGBE_FCFLT_FIRST (1 << 1) /* Filter First */ +#define IXGBE_FCFLT_VALID BIT(0) /* Filter Context Valid */ +#define IXGBE_FCFLT_FIRST BIT(1) /* Filter First */ #define IXGBE_FCFLT_SEQID 0x00ff0000 /* Sequence ID */ #define IXGBE_FCFLT_SEQCNT 0xff000000 /* Sequence Count */ -#define IXGBE_FCFLTRW_RVALDT (1 << 13) /* Fast Re-Validation */ -#define IXGBE_FCFLTRW_WE (1 << 14) /* Write Enable */ -#define IXGBE_FCFLTRW_RE (1 << 15) /* Read Enable */ +#define IXGBE_FCFLTRW_RVALDT BIT(13) /* Fast Re-Validation */ +#define IXGBE_FCFLTRW_WE BIT(14) /* Write Enable */ +#define IXGBE_FCFLTRW_RE BIT(15) /* Read Enable */ /* FCoE Receive Control */ #define IXGBE_FCRXCTRL 0x05100 /* FC Receive Control */ -#define IXGBE_FCRXCTRL_FCOELLI (1 << 0) /* Low latency interrupt */ -#define IXGBE_FCRXCTRL_SAVBAD (1 << 1) /* Save Bad Frames */ -#define IXGBE_FCRXCTRL_FRSTRDH (1 << 2) /* EN 1st Read Header */ -#define IXGBE_FCRXCTRL_LASTSEQH (1 << 3) /* EN Last Header in Seq */ -#define IXGBE_FCRXCTRL_ALLH (1 << 4) /* EN All Headers */ -#define IXGBE_FCRXCTRL_FRSTSEQH (1 << 5) /* EN 1st Seq. Header */ -#define IXGBE_FCRXCTRL_ICRC (1 << 6) /* Ignore Bad FC CRC */ -#define IXGBE_FCRXCTRL_FCCRCBO (1 << 7) /* FC CRC Byte Ordering */ +#define IXGBE_FCRXCTRL_FCOELLI BIT(0) /* Low latency interrupt */ +#define IXGBE_FCRXCTRL_SAVBAD BIT(1) /* Save Bad Frames */ +#define IXGBE_FCRXCTRL_FRSTRDH BIT(2) /* EN 1st Read Header */ +#define IXGBE_FCRXCTRL_LASTSEQH BIT(3) /* EN Last Header in Seq */ +#define IXGBE_FCRXCTRL_ALLH BIT(4) /* EN All Headers */ +#define IXGBE_FCRXCTRL_FRSTSEQH BIT(5) /* EN 1st Seq. Header */ +#define IXGBE_FCRXCTRL_ICRC BIT(6) /* Ignore Bad FC CRC */ +#define IXGBE_FCRXCTRL_FCCRCBO BIT(7) /* FC CRC Byte Ordering */ #define IXGBE_FCRXCTRL_FCOEVER 0x00000f00 /* FCoE Version: 4 bits */ #define IXGBE_FCRXCTRL_FCOEVER_SHIFT 8 /* FCoE Redirection */ @@ -1131,6 +1131,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_XPCSS 0x04290 #define IXGBE_MFLCN 0x04294 #define IXGBE_SERDESC 0x04298 +#define IXGBE_MAC_SGMII_BUSY 0x04298 #define IXGBE_MACS 0x0429C #define IXGBE_AUTOC 0x042A0 #define IXGBE_LINKS 0x042A4 @@ -1255,20 +1256,20 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */ #define IXGBE_DCA_RXCTRL_CPUID_MASK_82599 0xFF000000 /* Rx CPUID Mask */ #define IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599 24 /* Rx CPUID Shift */ -#define IXGBE_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* DCA Rx Desc enable */ -#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* DCA Rx Desc header enable */ -#define IXGBE_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* DCA Rx Desc payload enable */ -#define IXGBE_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* DCA Rx rd Desc Relax Order */ -#define IXGBE_DCA_RXCTRL_DATA_WRO_EN (1 << 13) /* Rx wr data Relax Order */ -#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN (1 << 15) /* Rx wr header RO */ +#define IXGBE_DCA_RXCTRL_DESC_DCA_EN BIT(5) /* DCA Rx Desc enable */ +#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN BIT(6) /* DCA Rx Desc header enable */ +#define IXGBE_DCA_RXCTRL_DATA_DCA_EN BIT(7) /* DCA Rx Desc payload enable */ +#define IXGBE_DCA_RXCTRL_DESC_RRO_EN BIT(9) /* DCA Rx rd Desc Relax Order */ +#define IXGBE_DCA_RXCTRL_DATA_WRO_EN BIT(13) /* Rx wr data Relax Order */ +#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN BIT(15) /* Rx wr header RO */ #define IXGBE_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */ #define IXGBE_DCA_TXCTRL_CPUID_MASK_82599 0xFF000000 /* Tx CPUID Mask */ #define IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599 24 /* Tx CPUID Shift */ -#define IXGBE_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ -#define IXGBE_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ -#define IXGBE_DCA_TXCTRL_DESC_WRO_EN (1 << 11) /* Tx Desc writeback RO bit */ -#define IXGBE_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ +#define IXGBE_DCA_TXCTRL_DESC_DCA_EN BIT(5) /* DCA Tx Desc enable */ +#define IXGBE_DCA_TXCTRL_DESC_RRO_EN BIT(9) /* Tx rd Desc Relax Order */ +#define IXGBE_DCA_TXCTRL_DESC_WRO_EN BIT(11) /* Tx Desc writeback RO bit */ +#define IXGBE_DCA_TXCTRL_DATA_RRO_EN BIT(13) /* Tx rd data Relax Order */ #define IXGBE_DCA_MAX_QUEUES_82598 16 /* DCA regs only on 16 queues */ /* MSCA Bit Masks */ @@ -1747,7 +1748,7 @@ enum { #define IXGBE_ETQF_TX_ANTISPOOF 0x20000000 /* bit 29 */ #define IXGBE_ETQF_1588 0x40000000 /* bit 30 */ #define IXGBE_ETQF_FILTER_EN 0x80000000 /* bit 31 */ -#define IXGBE_ETQF_POOL_ENABLE (1 << 26) /* bit 26 */ +#define IXGBE_ETQF_POOL_ENABLE BIT(26) /* bit 26 */ #define IXGBE_ETQF_POOL_SHIFT 20 #define IXGBE_ETQS_RX_QUEUE 0x007F0000 /* bits 22:16 */ @@ -1873,20 +1874,20 @@ enum { #define IXGBE_AUTOC_1G_PMA_PMD_SHIFT 9 #define IXGBE_AUTOC_10G_PMA_PMD_MASK 0x00000180 #define IXGBE_AUTOC_10G_PMA_PMD_SHIFT 7 -#define IXGBE_AUTOC_10G_XAUI (0x0 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_10G_KX4 (0x1 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_10G_CX4 (0x2 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_1G_BX (0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_1G_KX (0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_1G_SFI (0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_1G_KX_BX (0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_10G_XAUI (0u << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_10G_KX4 (1u << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_10G_CX4 (2u << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_BX (0u << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_KX (1u << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_SFI (0u << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_KX_BX (1u << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) #define IXGBE_AUTOC2_UPPER_MASK 0xFFFF0000 #define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK 0x00030000 #define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT 16 -#define IXGBE_AUTOC2_10G_KR (0x0 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) -#define IXGBE_AUTOC2_10G_XFI (0x1 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) -#define IXGBE_AUTOC2_10G_SFI (0x2 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) +#define IXGBE_AUTOC2_10G_KR (0u << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) +#define IXGBE_AUTOC2_10G_XFI (1u << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) +#define IXGBE_AUTOC2_10G_SFI (2u << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) #define IXGBE_AUTOC2_LINK_DISABLE_ON_D3_MASK 0x50000000 #define IXGBE_AUTOC2_LINK_DISABLE_MASK 0x70000000 @@ -2123,6 +2124,7 @@ enum { #define IXGBE_SAN_MAC_ADDR_PORT1_OFFSET 0x3 #define IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP 0x1 #define IXGBE_DEVICE_CAPS_FCOE_OFFLOADS 0x2 +#define IXGBE_DEVICE_CAPS_NO_CROSSTALK_WR BIT(7) #define IXGBE_FW_LESM_PARAMETERS_PTR 0x2 #define IXGBE_FW_LESM_STATE_1 0x1 #define IXGBE_FW_LESM_STATE_ENABLED 0x8000 /* LESM Enable bit */ @@ -2838,15 +2840,15 @@ struct ixgbe_adv_tx_context_desc { #define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */ #define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */ #define IXGBE_ADVTXT_TUCMD_FCOE 0x00008000 /* FCoE Frame Type */ -#define IXGBE_ADVTXD_FCOEF_EOF_MASK (0x3 << 10) /* FC EOF index */ -#define IXGBE_ADVTXD_FCOEF_SOF ((1 << 2) << 10) /* FC SOF index */ -#define IXGBE_ADVTXD_FCOEF_PARINC ((1 << 3) << 10) /* Rel_Off in F_CTL */ -#define IXGBE_ADVTXD_FCOEF_ORIE ((1 << 4) << 10) /* Orientation: End */ -#define IXGBE_ADVTXD_FCOEF_ORIS ((1 << 5) << 10) /* Orientation: Start */ -#define IXGBE_ADVTXD_FCOEF_EOF_N (0x0 << 10) /* 00: EOFn */ -#define IXGBE_ADVTXD_FCOEF_EOF_T (0x1 << 10) /* 01: EOFt */ -#define IXGBE_ADVTXD_FCOEF_EOF_NI (0x2 << 10) /* 10: EOFni */ -#define IXGBE_ADVTXD_FCOEF_EOF_A (0x3 << 10) /* 11: EOFa */ +#define IXGBE_ADVTXD_FCOEF_SOF (BIT(2) << 10) /* FC SOF index */ +#define IXGBE_ADVTXD_FCOEF_PARINC (BIT(3) << 10) /* Rel_Off in F_CTL */ +#define IXGBE_ADVTXD_FCOEF_ORIE (BIT(4) << 10) /* Orientation: End */ +#define IXGBE_ADVTXD_FCOEF_ORIS (BIT(5) << 10) /* Orientation: Start */ +#define IXGBE_ADVTXD_FCOEF_EOF_N (0u << 10) /* 00: EOFn */ +#define IXGBE_ADVTXD_FCOEF_EOF_T (1u << 10) /* 01: EOFt */ +#define IXGBE_ADVTXD_FCOEF_EOF_NI (2u << 10) /* 10: EOFni */ +#define IXGBE_ADVTXD_FCOEF_EOF_A (3u << 10) /* 11: EOFa */ +#define IXGBE_ADVTXD_FCOEF_EOF_MASK (3u << 10) /* FC EOF index */ #define IXGBE_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ #define IXGBE_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ @@ -3581,7 +3583,7 @@ struct ixgbe_info { #define IXGBE_FUSES0_GROUP(_i) (0x11158 + ((_i) * 4)) #define IXGBE_FUSES0_300MHZ BIT(5) -#define IXGBE_FUSES0_REV_MASK (3 << 6) +#define IXGBE_FUSES0_REV_MASK (3u << 6) #define IXGBE_KRM_PORT_CAR_GEN_CTRL(P) ((P) ? 0x8010 : 0x4010) #define IXGBE_KRM_LINK_CTRL_1(P) ((P) ? 0x820C : 0x420C) @@ -3595,25 +3597,25 @@ struct ixgbe_info { #define IXGBE_KRM_TX_COEFF_CTRL_1(P) ((P) ? 0x9520 : 0x5520) #define IXGBE_KRM_RX_ANA_CTL(P) ((P) ? 0x9A00 : 0x5A00) -#define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B (1 << 9) -#define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS (1 << 11) +#define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B BIT(9) +#define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS BIT(11) -#define IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK (0x7 << 8) -#define IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G (2 << 8) -#define IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G (4 << 8) +#define IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK (7u << 8) +#define IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G (2u << 8) +#define IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G (4u << 8) #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_SGMII_EN BIT(12) #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CLAUSE_37_EN BIT(13) -#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_FEC_REQ (1 << 14) -#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC (1 << 15) -#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX (1 << 16) -#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KR (1 << 18) -#define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX (1 << 24) -#define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR (1 << 26) -#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE (1 << 29) -#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART (1 << 31) - -#define IXGBE_KRM_AN_CNTL_1_SYM_PAUSE (1 << 28) -#define IXGBE_KRM_AN_CNTL_1_ASM_PAUSE (1 << 29) +#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_FEC_REQ BIT(14) +#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC BIT(15) +#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX BIT(16) +#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KR BIT(18) +#define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX BIT(24) +#define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR BIT(26) +#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE BIT(29) +#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART BIT(31) + +#define IXGBE_KRM_AN_CNTL_1_SYM_PAUSE BIT(28) +#define IXGBE_KRM_AN_CNTL_1_ASM_PAUSE BIT(29) #define IXGBE_KRM_AN_CNTL_8_LINEAR BIT(0) #define IXGBE_KRM_AN_CNTL_8_LIMITING BIT(1) @@ -3621,28 +3623,28 @@ struct ixgbe_info { #define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_100_D BIT(12) #define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_10_D BIT(19) -#define IXGBE_KRM_DSP_TXFFE_STATE_C0_EN (1 << 6) -#define IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN (1 << 15) -#define IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN (1 << 16) +#define IXGBE_KRM_DSP_TXFFE_STATE_C0_EN BIT(6) +#define IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN BIT(15) +#define IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN BIT(16) -#define IXGBE_KRM_RX_TRN_LINKUP_CTRL_CONV_WO_PROTOCOL (1 << 4) -#define IXGBE_KRM_RX_TRN_LINKUP_CTRL_PROTOCOL_BYPASS (1 << 2) +#define IXGBE_KRM_RX_TRN_LINKUP_CTRL_CONV_WO_PROTOCOL BIT(4) +#define IXGBE_KRM_RX_TRN_LINKUP_CTRL_PROTOCOL_BYPASS BIT(2) -#define IXGBE_KRM_PMD_DFX_BURNIN_TX_RX_KR_LB_MASK (0x3 << 16) +#define IXGBE_KRM_PMD_DFX_BURNIN_TX_RX_KR_LB_MASK (3u << 16) -#define IXGBE_KRM_TX_COEFF_CTRL_1_CMINUS1_OVRRD_EN (1 << 1) -#define IXGBE_KRM_TX_COEFF_CTRL_1_CPLUS1_OVRRD_EN (1 << 2) -#define IXGBE_KRM_TX_COEFF_CTRL_1_CZERO_EN (1 << 3) -#define IXGBE_KRM_TX_COEFF_CTRL_1_OVRRD_EN (1 << 31) +#define IXGBE_KRM_TX_COEFF_CTRL_1_CMINUS1_OVRRD_EN BIT(1) +#define IXGBE_KRM_TX_COEFF_CTRL_1_CPLUS1_OVRRD_EN BIT(2) +#define IXGBE_KRM_TX_COEFF_CTRL_1_CZERO_EN BIT(3) +#define IXGBE_KRM_TX_COEFF_CTRL_1_OVRRD_EN BIT(31) #define IXGBE_KX4_LINK_CNTL_1 0x4C -#define IXGBE_KX4_LINK_CNTL_1_TETH_AN_CAP_KX (1 << 16) -#define IXGBE_KX4_LINK_CNTL_1_TETH_AN_CAP_KX4 (1 << 17) -#define IXGBE_KX4_LINK_CNTL_1_TETH_EEE_CAP_KX (1 << 24) -#define IXGBE_KX4_LINK_CNTL_1_TETH_EEE_CAP_KX4 (1 << 25) -#define IXGBE_KX4_LINK_CNTL_1_TETH_AN_ENABLE (1 << 29) -#define IXGBE_KX4_LINK_CNTL_1_TETH_FORCE_LINK_UP (1 << 30) -#define IXGBE_KX4_LINK_CNTL_1_TETH_AN_RESTART (1 << 31) +#define IXGBE_KX4_LINK_CNTL_1_TETH_AN_CAP_KX BIT(16) +#define IXGBE_KX4_LINK_CNTL_1_TETH_AN_CAP_KX4 BIT(17) +#define IXGBE_KX4_LINK_CNTL_1_TETH_EEE_CAP_KX BIT(24) +#define IXGBE_KX4_LINK_CNTL_1_TETH_EEE_CAP_KX4 BIT(25) +#define IXGBE_KX4_LINK_CNTL_1_TETH_AN_ENABLE BIT(29) +#define IXGBE_KX4_LINK_CNTL_1_TETH_FORCE_LINK_UP BIT(30) +#define IXGBE_KX4_LINK_CNTL_1_TETH_AN_RESTART BIT(31) #define IXGBE_SB_IOSF_INDIRECT_CTRL 0x00011144 #define IXGBE_SB_IOSF_INDIRECT_DATA 0x00011148 @@ -3658,7 +3660,7 @@ struct ixgbe_info { #define IXGBE_SB_IOSF_CTRL_TARGET_SELECT_SHIFT 28 #define IXGBE_SB_IOSF_CTRL_TARGET_SELECT_MASK 0x7 #define IXGBE_SB_IOSF_CTRL_BUSY_SHIFT 31 -#define IXGBE_SB_IOSF_CTRL_BUSY (1 << IXGBE_SB_IOSF_CTRL_BUSY_SHIFT) +#define IXGBE_SB_IOSF_CTRL_BUSY BIT(IXGBE_SB_IOSF_CTRL_BUSY_SHIFT) #define IXGBE_SB_IOSF_TARGET_KR_PHY 0 #define IXGBE_SB_IOSF_TARGET_KX4_UNIPHY 1 #define IXGBE_SB_IOSF_TARGET_KX4_PCS0 2 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index 40824d85d807..f2b1d48a16c3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -214,8 +214,8 @@ s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw) eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw)); eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> IXGBE_EEC_SIZE_SHIFT); - eeprom->word_size = 1 << (eeprom_size + - IXGBE_EEPROM_WORD_SIZE_SHIFT); + eeprom->word_size = BIT(eeprom_size + + IXGBE_EEPROM_WORD_SIZE_SHIFT); hw_dbg(hw, "Eeprom params: type = %d, size = %d\n", eeprom->type, eeprom->word_size); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index c71e93ed4451..19b75cd98682 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -335,8 +335,8 @@ static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw) eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw)); eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> IXGBE_EEC_SIZE_SHIFT); - eeprom->word_size = 1 << (eeprom_size + - IXGBE_EEPROM_WORD_SIZE_SHIFT); + eeprom->word_size = BIT(eeprom_size + + IXGBE_EEPROM_WORD_SIZE_SHIFT); hw_dbg(hw, "Eeprom params: type = %d, size = %d\n", eeprom->type, eeprom->word_size); @@ -2646,9 +2646,9 @@ static void ixgbe_set_ethertype_anti_spoofing_X550(struct ixgbe_hw *hw, pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg)); if (enable) - pfvfspoof |= (1 << vf_target_shift); + pfvfspoof |= BIT(vf_target_shift); else - pfvfspoof &= ~(1 << vf_target_shift); + pfvfspoof &= ~BIT(vf_target_shift); IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof); } @@ -2765,7 +2765,7 @@ static s32 ixgbe_acquire_swfw_sync_x550em_a(struct ixgbe_hw *hw, u32 mask) ixgbe_release_swfw_sync_X540(hw, hmask); if (status != IXGBE_ERR_TOKEN_RETRY) return status; - udelay(FW_PHY_TOKEN_DELAY * 1000); + msleep(FW_PHY_TOKEN_DELAY); } return status; @@ -2908,7 +2908,7 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x = { .get_media_type = &ixgbe_get_media_type_X550em, .get_san_mac_addr = NULL, .get_wwn_prefix = NULL, - .setup_link = NULL, /* defined later */ + .setup_link = &ixgbe_setup_mac_link_X540, .get_link_capabilities = &ixgbe_get_link_capabilities_X550em, .get_bus_info = &ixgbe_get_bus_info_X550em, .setup_sfp = ixgbe_setup_sfp_modules_X550em, @@ -2932,7 +2932,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = { .setup_sfp = ixgbe_setup_sfp_modules_X550em, .acquire_swfw_sync = ixgbe_acquire_swfw_sync_x550em_a, .release_swfw_sync = ixgbe_release_swfw_sync_x550em_a, - .setup_fc = ixgbe_setup_fc_generic, + .setup_fc = ixgbe_setup_fc_x550em, .read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550a, .write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550a, }; diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index 58434584b16d..74901f7ef391 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -74,7 +74,7 @@ typedef u32 ixgbe_link_speed; #define IXGBE_RXDCTL_RLPML_EN 0x00008000 /* DCA Control */ -#define IXGBE_DCA_TXCTRL_TX_WB_RO_EN (1 << 11) /* Tx Desc writeback RO bit */ +#define IXGBE_DCA_TXCTRL_TX_WB_RO_EN BIT(11) /* Tx Desc writeback RO bit */ /* PSRTYPE bit definitions */ #define IXGBE_PSRTYPE_TCPHDR 0x00000010 @@ -296,16 +296,16 @@ struct ixgbe_adv_tx_context_desc { #define IXGBE_TXDCTL_SWFLSH 0x04000000 /* Tx Desc. wr-bk flushing */ #define IXGBE_TXDCTL_WTHRESH_SHIFT 16 /* shift to WTHRESH bits */ -#define IXGBE_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* Rx Desc enable */ -#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* Rx Desc header ena */ -#define IXGBE_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* Rx Desc payload ena */ -#define IXGBE_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* Rx rd Desc Relax Order */ -#define IXGBE_DCA_RXCTRL_DATA_WRO_EN (1 << 13) /* Rx wr data Relax Order */ -#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN (1 << 15) /* Rx wr header RO */ - -#define IXGBE_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ -#define IXGBE_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ -#define IXGBE_DCA_TXCTRL_DESC_WRO_EN (1 << 11) /* Tx Desc writeback RO bit */ -#define IXGBE_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ +#define IXGBE_DCA_RXCTRL_DESC_DCA_EN BIT(5) /* Rx Desc enable */ +#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN BIT(6) /* Rx Desc header ena */ +#define IXGBE_DCA_RXCTRL_DATA_DCA_EN BIT(7) /* Rx Desc payload ena */ +#define IXGBE_DCA_RXCTRL_DESC_RRO_EN BIT(9) /* Rx rd Desc Relax Order */ +#define IXGBE_DCA_RXCTRL_DATA_WRO_EN BIT(13) /* Rx wr data Relax Order */ +#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN BIT(15) /* Rx wr header RO */ + +#define IXGBE_DCA_TXCTRL_DESC_DCA_EN BIT(5) /* DCA Tx Desc enable */ +#define IXGBE_DCA_TXCTRL_DESC_RRO_EN BIT(9) /* Tx rd Desc Relax Order */ +#define IXGBE_DCA_TXCTRL_DESC_WRO_EN BIT(11) /* Tx Desc writeback RO bit */ +#define IXGBE_DCA_TXCTRL_DATA_RRO_EN BIT(13) /* Tx rd data Relax Order */ #endif /* _IXGBEVF_DEFINES_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index d7aa4b203f40..508e72c5f1c2 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -42,65 +42,54 @@ #define IXGBE_ALL_RAR_ENTRIES 16 +enum {NETDEV_STATS, IXGBEVF_STATS}; + struct ixgbe_stats { char stat_string[ETH_GSTRING_LEN]; - struct { - int sizeof_stat; - int stat_offset; - int base_stat_offset; - int saved_reset_offset; - }; + int type; + int sizeof_stat; + int stat_offset; }; -#define IXGBEVF_STAT(m, b, r) { \ - .sizeof_stat = FIELD_SIZEOF(struct ixgbevf_adapter, m), \ - .stat_offset = offsetof(struct ixgbevf_adapter, m), \ - .base_stat_offset = offsetof(struct ixgbevf_adapter, b), \ - .saved_reset_offset = offsetof(struct ixgbevf_adapter, r) \ +#define IXGBEVF_STAT(_name, _stat) { \ + .stat_string = _name, \ + .type = IXGBEVF_STATS, \ + .sizeof_stat = FIELD_SIZEOF(struct ixgbevf_adapter, _stat), \ + .stat_offset = offsetof(struct ixgbevf_adapter, _stat) \ } -#define IXGBEVF_ZSTAT(m) { \ - .sizeof_stat = FIELD_SIZEOF(struct ixgbevf_adapter, m), \ - .stat_offset = offsetof(struct ixgbevf_adapter, m), \ - .base_stat_offset = -1, \ - .saved_reset_offset = -1 \ +#define IXGBEVF_NETDEV_STAT(_net_stat) { \ + .stat_string = #_net_stat, \ + .type = NETDEV_STATS, \ + .sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \ + .stat_offset = offsetof(struct net_device_stats, _net_stat) \ } -static const struct ixgbe_stats ixgbe_gstrings_stats[] = { - {"rx_packets", IXGBEVF_STAT(stats.vfgprc, stats.base_vfgprc, - stats.saved_reset_vfgprc)}, - {"tx_packets", IXGBEVF_STAT(stats.vfgptc, stats.base_vfgptc, - stats.saved_reset_vfgptc)}, - {"rx_bytes", IXGBEVF_STAT(stats.vfgorc, stats.base_vfgorc, - stats.saved_reset_vfgorc)}, - {"tx_bytes", IXGBEVF_STAT(stats.vfgotc, stats.base_vfgotc, - stats.saved_reset_vfgotc)}, - {"tx_busy", IXGBEVF_ZSTAT(tx_busy)}, - {"tx_restart_queue", IXGBEVF_ZSTAT(restart_queue)}, - {"tx_timeout_count", IXGBEVF_ZSTAT(tx_timeout_count)}, - {"multicast", IXGBEVF_STAT(stats.vfmprc, stats.base_vfmprc, - stats.saved_reset_vfmprc)}, - {"rx_csum_offload_errors", IXGBEVF_ZSTAT(hw_csum_rx_error)}, -#ifdef BP_EXTENDED_STATS - {"rx_bp_poll_yield", IXGBEVF_ZSTAT(bp_rx_yields)}, - {"rx_bp_cleaned", IXGBEVF_ZSTAT(bp_rx_cleaned)}, - {"rx_bp_misses", IXGBEVF_ZSTAT(bp_rx_missed)}, - {"tx_bp_napi_yield", IXGBEVF_ZSTAT(bp_tx_yields)}, - {"tx_bp_cleaned", IXGBEVF_ZSTAT(bp_tx_cleaned)}, - {"tx_bp_misses", IXGBEVF_ZSTAT(bp_tx_missed)}, -#endif +static struct ixgbe_stats ixgbevf_gstrings_stats[] = { + IXGBEVF_NETDEV_STAT(rx_packets), + IXGBEVF_NETDEV_STAT(tx_packets), + IXGBEVF_NETDEV_STAT(rx_bytes), + IXGBEVF_NETDEV_STAT(tx_bytes), + IXGBEVF_STAT("tx_busy", tx_busy), + IXGBEVF_STAT("tx_restart_queue", restart_queue), + IXGBEVF_STAT("tx_timeout_count", tx_timeout_count), + IXGBEVF_NETDEV_STAT(multicast), + IXGBEVF_STAT("rx_csum_offload_errors", hw_csum_rx_error), }; -#define IXGBE_QUEUE_STATS_LEN 0 -#define IXGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_stats) +#define IXGBEVF_QUEUE_STATS_LEN ( \ + (((struct ixgbevf_adapter *)netdev_priv(netdev))->num_tx_queues + \ + ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_rx_queues) * \ + (sizeof(struct ixgbe_stats) / sizeof(u64))) +#define IXGBEVF_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbevf_gstrings_stats) -#define IXGBEVF_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + IXGBE_QUEUE_STATS_LEN) +#define IXGBEVF_STATS_LEN (IXGBEVF_GLOBAL_STATS_LEN + IXGBEVF_QUEUE_STATS_LEN) static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { "Register test (offline)", "Link test (on/offline)" }; -#define IXGBE_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN) +#define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN) static int ixgbevf_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) @@ -177,7 +166,8 @@ static void ixgbevf_get_regs(struct net_device *netdev, memset(p, 0, regs_len); - regs->version = (1 << 24) | hw->revision_id << 16 | hw->device_id; + /* generate a number suitable for ethtool's register version */ + regs->version = (1u << 24) | (hw->revision_id << 16) | hw->device_id; /* General Registers */ regs_buff[0] = IXGBE_READ_REG(hw, IXGBE_VFCTRL); @@ -392,13 +382,13 @@ clear_reset: return err; } -static int ixgbevf_get_sset_count(struct net_device *dev, int stringset) +static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset) { switch (stringset) { case ETH_SS_TEST: - return IXGBE_TEST_LEN; + return IXGBEVF_TEST_LEN; case ETH_SS_STATS: - return IXGBE_GLOBAL_STATS_LEN; + return IXGBEVF_STATS_LEN; default: return -EINVAL; } @@ -408,70 +398,138 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); - char *base = (char *)adapter; - int i; -#ifdef BP_EXTENDED_STATS - u64 rx_yields = 0, rx_cleaned = 0, rx_missed = 0, - tx_yields = 0, tx_cleaned = 0, tx_missed = 0; + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *net_stats; + unsigned int start; + struct ixgbevf_ring *ring; + int i, j; + char *p; - for (i = 0; i < adapter->num_rx_queues; i++) { - rx_yields += adapter->rx_ring[i]->stats.yields; - rx_cleaned += adapter->rx_ring[i]->stats.cleaned; - rx_yields += adapter->rx_ring[i]->stats.yields; - } + ixgbevf_update_stats(adapter); + net_stats = dev_get_stats(netdev, &temp); + for (i = 0; i < IXGBEVF_GLOBAL_STATS_LEN; i++) { + switch (ixgbevf_gstrings_stats[i].type) { + case NETDEV_STATS: + p = (char *)net_stats + + ixgbevf_gstrings_stats[i].stat_offset; + break; + case IXGBEVF_STATS: + p = (char *)adapter + + ixgbevf_gstrings_stats[i].stat_offset; + break; + default: + data[i] = 0; + continue; + } - for (i = 0; i < adapter->num_tx_queues; i++) { - tx_yields += adapter->tx_ring[i]->stats.yields; - tx_cleaned += adapter->tx_ring[i]->stats.cleaned; - tx_yields += adapter->tx_ring[i]->stats.yields; + data[i] = (ixgbevf_gstrings_stats[i].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } - adapter->bp_rx_yields = rx_yields; - adapter->bp_rx_cleaned = rx_cleaned; - adapter->bp_rx_missed = rx_missed; + /* populate Tx queue data */ + for (j = 0; j < adapter->num_tx_queues; j++) { + ring = adapter->tx_ring[j]; + if (!ring) { + data[i++] = 0; + data[i++] = 0; +#ifdef BP_EXTENDED_STATS + data[i++] = 0; + data[i++] = 0; + data[i++] = 0; +#endif + continue; + } - adapter->bp_tx_yields = tx_yields; - adapter->bp_tx_cleaned = tx_cleaned; - adapter->bp_tx_missed = tx_missed; + do { + start = u64_stats_fetch_begin_irq(&ring->syncp); + data[i] = ring->stats.packets; + data[i + 1] = ring->stats.bytes; + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + i += 2; +#ifdef BP_EXTENDED_STATS + data[i] = ring->stats.yields; + data[i + 1] = ring->stats.misses; + data[i + 2] = ring->stats.cleaned; + i += 3; #endif + } - ixgbevf_update_stats(adapter); - for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) { - char *p = base + ixgbe_gstrings_stats[i].stat_offset; - char *b = base + ixgbe_gstrings_stats[i].base_stat_offset; - char *r = base + ixgbe_gstrings_stats[i].saved_reset_offset; - - if (ixgbe_gstrings_stats[i].sizeof_stat == sizeof(u64)) { - if (ixgbe_gstrings_stats[i].base_stat_offset >= 0) - data[i] = *(u64 *)p - *(u64 *)b + *(u64 *)r; - else - data[i] = *(u64 *)p; - } else { - if (ixgbe_gstrings_stats[i].base_stat_offset >= 0) - data[i] = *(u32 *)p - *(u32 *)b + *(u32 *)r; - else - data[i] = *(u32 *)p; + /* populate Rx queue data */ + for (j = 0; j < adapter->num_rx_queues; j++) { + ring = adapter->rx_ring[j]; + if (!ring) { + data[i++] = 0; + data[i++] = 0; +#ifdef BP_EXTENDED_STATS + data[i++] = 0; + data[i++] = 0; + data[i++] = 0; +#endif + continue; } + + do { + start = u64_stats_fetch_begin_irq(&ring->syncp); + data[i] = ring->stats.packets; + data[i + 1] = ring->stats.bytes; + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + i += 2; +#ifdef BP_EXTENDED_STATS + data[i] = ring->stats.yields; + data[i + 1] = ring->stats.misses; + data[i + 2] = ring->stats.cleaned; + i += 3; +#endif } } static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { + struct ixgbevf_adapter *adapter = netdev_priv(netdev); char *p = (char *)data; int i; switch (stringset) { case ETH_SS_TEST: memcpy(data, *ixgbe_gstrings_test, - IXGBE_TEST_LEN * ETH_GSTRING_LEN); + IXGBEVF_TEST_LEN * ETH_GSTRING_LEN); break; case ETH_SS_STATS: - for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) { - memcpy(p, ixgbe_gstrings_stats[i].stat_string, + for (i = 0; i < IXGBEVF_GLOBAL_STATS_LEN; i++) { + memcpy(p, ixgbevf_gstrings_stats[i].stat_string, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } + + for (i = 0; i < adapter->num_tx_queues; i++) { + sprintf(p, "tx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; +#ifdef BP_EXTENDED_STATS + sprintf(p, "tx_queue_%u_bp_napi_yield", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_bp_misses", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_bp_cleaned", i); + p += ETH_GSTRING_LEN; +#endif /* BP_EXTENDED_STATS */ + } + for (i = 0; i < adapter->num_rx_queues; i++) { + sprintf(p, "rx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; +#ifdef BP_EXTENDED_STATS + sprintf(p, "rx_queue_%u_bp_poll_yield", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_bp_misses", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_bp_cleaned", i); + p += ETH_GSTRING_LEN; +#endif /* BP_EXTENDED_STATS */ + } break; } } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 5ac60eefb0cd..aa28c4fb1a43 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -166,10 +166,10 @@ struct ixgbevf_ring { #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) -#define IXGBE_TX_FLAGS_CSUM (u32)(1) -#define IXGBE_TX_FLAGS_VLAN (u32)(1 << 1) -#define IXGBE_TX_FLAGS_TSO (u32)(1 << 2) -#define IXGBE_TX_FLAGS_IPV4 (u32)(1 << 3) +#define IXGBE_TX_FLAGS_CSUM BIT(0) +#define IXGBE_TX_FLAGS_VLAN BIT(1) +#define IXGBE_TX_FLAGS_TSO BIT(2) +#define IXGBE_TX_FLAGS_IPV4 BIT(3) #define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 @@ -422,16 +422,6 @@ struct ixgbevf_adapter { unsigned int tx_ring_count; unsigned int rx_ring_count; -#ifdef BP_EXTENDED_STATS - u64 bp_rx_yields; - u64 bp_rx_cleaned; - u64 bp_rx_missed; - - u64 bp_tx_yields; - u64 bp_tx_cleaned; - u64 bp_tx_missed; -#endif - u8 __iomem *io_addr; /* Mainly for iounmap use */ u32 link_speed; bool link_up; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 007cbe094990..319e25f29883 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1056,7 +1056,7 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) if (!test_bit(__IXGBEVF_DOWN, &adapter->state) && !test_bit(__IXGBEVF_REMOVING, &adapter->state)) ixgbevf_irq_enable_queues(adapter, - 1 << q_vector->v_idx); + BIT(q_vector->v_idx)); return 0; } @@ -1158,14 +1158,14 @@ static void ixgbevf_configure_msix(struct ixgbevf_adapter *adapter) } /* add q_vector eims value to global eims_enable_mask */ - adapter->eims_enable_mask |= 1 << v_idx; + adapter->eims_enable_mask |= BIT(v_idx); ixgbevf_write_eitr(q_vector); } ixgbevf_set_ivar(adapter, -1, 1, v_idx); /* setup eims_other and add value to global eims_enable_mask */ - adapter->eims_other = 1 << v_idx; + adapter->eims_other = BIT(v_idx); adapter->eims_enable_mask |= adapter->eims_other; } @@ -1589,8 +1589,8 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, txdctl |= (8 << 16); /* WTHRESH = 8 */ /* Setting PTHRESH to 32 both improves performance */ - txdctl |= (1 << 8) | /* HTHRESH = 1 */ - 32; /* PTHRESH = 32 */ + txdctl |= (1u << 8) | /* HTHRESH = 1 */ + 32; /* PTHRESH = 32 */ clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); @@ -1646,7 +1646,7 @@ static void ixgbevf_setup_psrtype(struct ixgbevf_adapter *adapter) IXGBE_PSRTYPE_L2HDR; if (adapter->num_rx_queues > 1) - psrtype |= 1 << 29; + psrtype |= BIT(29); IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype); } @@ -2056,7 +2056,7 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) spin_lock_bh(&adapter->mbx_lock); while (api[idx] != ixgbe_mbox_api_unknown) { - err = ixgbevf_negotiate_api_version(hw, api[idx]); + err = hw->mac.ops.negotiate_api_version(hw, api[idx]); if (!err) break; idx++; @@ -2797,7 +2797,7 @@ static void ixgbevf_check_hang_subtask(struct ixgbevf_adapter *adapter) struct ixgbevf_q_vector *qv = adapter->q_vector[i]; if (qv->rx.ring || qv->tx.ring) - eics |= 1 << i; + eics |= BIT(i); } /* Cause software interrupt to ensure rings are cleaned */ @@ -3272,9 +3272,18 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, struct ixgbevf_tx_buffer *first, u8 *hdr_len) { + u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; struct sk_buff *skb = first->skb; - u32 vlan_macip_lens, type_tucmd; - u32 mss_l4len_idx, l4len; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; int err; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -3287,49 +3296,53 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, if (err < 0) return err; + ip.hdr = skb_network_header(skb); + l4.hdr = skb_checksum_start(skb); + /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP; - if (first->protocol == htons(ETH_P_IP)) { - struct iphdr *iph = ip_hdr(skb); - - iph->tot_len = 0; - iph->check = 0; - tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, - iph->daddr, 0, - IPPROTO_TCP, - 0); + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + /* IP header will have to cancel out any data that + * is not a part of the outer IP header + */ + ip.v4->check = csum_fold(csum_add(lco_csum(skb), + csum_unfold(l4.tcp->check))); type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; + + ip.v4->tot_len = 0; first->tx_flags |= IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_CSUM | IXGBE_TX_FLAGS_IPV4; - } else if (skb_is_gso_v6(skb)) { - ipv6_hdr(skb)->payload_len = 0; - tcp_hdr(skb)->check = - ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - 0, IPPROTO_TCP, 0); + } else { + ip.v6->payload_len = 0; first->tx_flags |= IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_CSUM; } - /* compute header lengths */ - l4len = tcp_hdrlen(skb); - *hdr_len += l4len; - *hdr_len = skb_transport_offset(skb) + l4len; + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; - /* update GSO size and bytecount with header size */ + /* remove payload length from inner checksum */ + paylen = skb->len - l4_offset; + csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); + + /* update gso size and bytecount with header size */ first->gso_segs = skb_shinfo(skb)->gso_segs; first->bytecount += (first->gso_segs - 1) * *hdr_len; /* mss_l4len_id: use 1 as index for TSO */ - mss_l4len_idx = l4len << IXGBE_ADVTXD_L4LEN_SHIFT; + mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT; mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT; - mss_l4len_idx |= 1 << IXGBE_ADVTXD_IDX_SHIFT; + mss_l4len_idx |= (1u << IXGBE_ADVTXD_IDX_SHIFT); /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */ - vlan_macip_lens = skb_network_header_len(skb); - vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens = l4.hdr - ip.hdr; + vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens, @@ -3422,7 +3435,7 @@ static void ixgbevf_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc, /* use index 1 context for TSO/FSO/FCOE */ if (tx_flags & IXGBE_TX_FLAGS_TSO) - olinfo_status |= cpu_to_le32(1 << IXGBE_ADVTXD_IDX_SHIFT); + olinfo_status |= cpu_to_le32(1u << IXGBE_ADVTXD_IDX_SHIFT); /* Check Context must be set if Tx switch is enabled, which it * always is for case where virtual functions are running @@ -3870,6 +3883,40 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev, return stats; } +#define IXGBEVF_MAX_MAC_HDR_LEN 127 +#define IXGBEVF_MAX_NETWORK_HDR_LEN 511 + +static netdev_features_t +ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, + netdev_features_t features) +{ + unsigned int network_hdr_len, mac_hdr_len; + + /* Make certain the headers can be described by a context descriptor */ + mac_hdr_len = skb_network_header(skb) - skb->data; + if (unlikely(mac_hdr_len > IXGBEVF_MAX_MAC_HDR_LEN)) + return features & ~(NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_TSO | + NETIF_F_TSO6); + + network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); + if (unlikely(network_hdr_len > IXGBEVF_MAX_NETWORK_HDR_LEN)) + return features & ~(NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC | + NETIF_F_TSO | + NETIF_F_TSO6); + + /* We can only support IPV4 TSO in tunnels if we can mangle the + * inner IP ID field, so strip TSO if MANGLEID is not supported. + */ + if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) + features &= ~NETIF_F_TSO; + + return features; +} + static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_open = ixgbevf_open, .ndo_stop = ixgbevf_close, @@ -3888,7 +3935,7 @@ static const struct net_device_ops ixgbevf_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixgbevf_netpoll, #endif - .ndo_features_check = passthru_features_check, + .ndo_features_check = ixgbevf_features_check, }; static void ixgbevf_assign_netdev_ops(struct net_device *dev) @@ -3999,23 +4046,31 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC; - netdev->features = netdev->hw_features | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER; +#define IXGBEVF_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ + NETIF_F_GSO_GRE_CSUM | \ + NETIF_F_GSO_IPIP | \ + NETIF_F_GSO_SIT | \ + NETIF_F_GSO_UDP_TUNNEL | \ + NETIF_F_GSO_UDP_TUNNEL_CSUM) - netdev->vlan_features |= NETIF_F_SG | - NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_HW_CSUM | - NETIF_F_SCTP_CRC; + netdev->gso_partial_features = IXGBEVF_GSO_PARTIAL_FEATURES; + netdev->hw_features |= NETIF_F_GSO_PARTIAL | + IXGBEVF_GSO_PARTIAL_FEATURES; - netdev->mpls_features |= NETIF_F_HW_CSUM; - netdev->hw_enc_features |= NETIF_F_HW_CSUM; + netdev->features = netdev->hw_features; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; + netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->hw_enc_features |= netdev->vlan_features; + + /* set this bit last since it cannot be part of vlan_features */ + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; + netdev->priv_flags |= IFF_UNICAST_FLT; if (IXGBE_REMOVED(hw->hw_addr)) { diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 4d613a4f2a7f..987ad69d4918 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -670,11 +670,11 @@ void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size) } /** - * ixgbevf_negotiate_api_version - Negotiate supported API version + * ixgbevf_negotiate_api_version_vf - Negotiate supported API version * @hw: pointer to the HW structure * @api: integer containing requested API version **/ -int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api) +static int ixgbevf_negotiate_api_version_vf(struct ixgbe_hw *hw, int api) { int err; u32 msg[3]; @@ -769,6 +769,7 @@ static const struct ixgbe_mac_operations ixgbevf_mac_ops = { .stop_adapter = ixgbevf_stop_hw_vf, .setup_link = ixgbevf_setup_mac_link_vf, .check_link = ixgbevf_check_mac_link_vf, + .negotiate_api_version = ixgbevf_negotiate_api_version_vf, .set_rar = ixgbevf_set_rar_vf, .update_mc_addr_list = ixgbevf_update_mc_addr_list_vf, .update_xcast_mode = ixgbevf_update_xcast_mode, diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index ef9f7736b4dc..8e623f9327ae 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -51,6 +51,7 @@ struct ixgbe_mac_operations { s32 (*get_mac_addr)(struct ixgbe_hw *, u8 *); s32 (*stop_adapter)(struct ixgbe_hw *); s32 (*get_bus_info)(struct ixgbe_hw *); + s32 (*negotiate_api_version)(struct ixgbe_hw *hw, int api); /* Link */ s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool); @@ -208,7 +209,6 @@ static inline u32 ixgbe_read_reg_array(struct ixgbe_hw *hw, u32 reg, #define IXGBE_READ_REG_ARRAY(h, r, o) ixgbe_read_reg_array(h, r, o) void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size); -int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api); int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, unsigned int *default_tc); int ixgbevf_get_reta_locked(struct ixgbe_hw *hw, u32 *reta, int num_rx_queues); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index f69584a9b47f..c761194bb323 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -337,7 +337,7 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset) case ETH_SS_STATS: return bitmap_iterator_count(&it) + (priv->tx_ring_num * 2) + - (priv->rx_ring_num * 2); + (priv->rx_ring_num * 3); case ETH_SS_TEST: return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2; @@ -404,6 +404,7 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, for (i = 0; i < priv->rx_ring_num; i++) { data[index++] = priv->rx_ring[i]->packets; data[index++] = priv->rx_ring[i]->bytes; + data[index++] = priv->rx_ring[i]->dropped; } spin_unlock_bh(&priv->stats_lock); @@ -477,6 +478,8 @@ static void mlx4_en_get_strings(struct net_device *dev, "rx%d_packets", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_bytes", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_dropped", i); } break; case ETH_SS_PRIV_FLAGS: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index b4b258c8ca47..8bd143dda95d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1856,6 +1856,7 @@ static void mlx4_en_restart(struct work_struct *work) en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); + rtnl_lock(); mutex_lock(&mdev->state_lock); if (priv->port_up) { mlx4_en_stop_port(dev, 1); @@ -1863,6 +1864,7 @@ static void mlx4_en_restart(struct work_struct *work) en_err(priv, "Failed restarting port %d\n", priv->port); } mutex_unlock(&mdev->state_lock); + rtnl_unlock(); } static void mlx4_en_clear_stats(struct net_device *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 3904b5fc0b7c..20b6c2e678b8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -158,6 +158,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) u64 in_mod = reset << 8 | port; int err; int i, counter_index; + unsigned long sw_rx_dropped = 0; mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); if (IS_ERR(mailbox)) @@ -180,6 +181,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) for (i = 0; i < priv->rx_ring_num; i++) { stats->rx_packets += priv->rx_ring[i]->packets; stats->rx_bytes += priv->rx_ring[i]->bytes; + sw_rx_dropped += priv->rx_ring[i]->dropped; priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok; priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none; priv->port_stats.rx_chksum_complete += priv->rx_ring[i]->csum_complete; @@ -236,7 +238,8 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) &mlx4_en_stats->MCAST_prio_1, NUM_PRIORITIES); stats->collisions = 0; - stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP); + stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP) + + sw_rx_dropped; stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); stats->rx_over_errors = 0; stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 86bcfe510e4e..b723e3bcab39 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -61,7 +61,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, gfp_t gfp = _gfp; if (order) - gfp |= __GFP_COMP | __GFP_NOWARN; + gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NOMEMALLOC; page = alloc_pages(gfp, order); if (likely(page)) break; @@ -126,7 +126,9 @@ out: dma_unmap_page(priv->ddev, page_alloc[i].dma, page_alloc[i].page_size, PCI_DMA_FROMDEVICE); page = page_alloc[i].page; - set_page_count(page, 1); + /* Revert changes done by mlx4_alloc_pages */ + page_ref_sub(page, page_alloc[i].page_size / + priv->frag_info[i].frag_stride - 1); put_page(page); } } @@ -176,7 +178,9 @@ out: dma_unmap_page(priv->ddev, page_alloc->dma, page_alloc->page_size, PCI_DMA_FROMDEVICE); page = page_alloc->page; - set_page_count(page, 1); + /* Revert changes done by mlx4_alloc_pages */ + page_ref_sub(page, page_alloc->page_size / + priv->frag_info[i].frag_stride - 1); put_page(page); page_alloc->page = NULL; } @@ -939,7 +943,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* GRO not possible, complete processing here */ skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); if (!skb) { - priv->stats.rx_dropped++; + ring->dropped++; goto next; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index c0d7b7296236..a386f047c1af 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -405,7 +405,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, u32 packets = 0; u32 bytes = 0; int factor = priv->cqe_factor; - u64 timestamp = 0; int done = 0; int budget = priv->tx_work_limit; u32 last_nr_txbb; @@ -445,9 +444,12 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { + u64 timestamp = 0; + txbbs_skipped += last_nr_txbb; ring_index = (ring_index + last_nr_txbb) & size_mask; - if (ring->tx_info[ring_index].ts_requested) + + if (unlikely(ring->tx_info[ring_index].ts_requested)) timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */ diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 358f7230da58..12c77a70abdb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3172,6 +3172,34 @@ static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap return 0; } +static int mlx4_pci_enable_device(struct mlx4_dev *dev) +{ + struct pci_dev *pdev = dev->persist->pdev; + int err = 0; + + mutex_lock(&dev->persist->pci_status_mutex); + if (dev->persist->pci_status == MLX4_PCI_STATUS_DISABLED) { + err = pci_enable_device(pdev); + if (!err) + dev->persist->pci_status = MLX4_PCI_STATUS_ENABLED; + } + mutex_unlock(&dev->persist->pci_status_mutex); + + return err; +} + +static void mlx4_pci_disable_device(struct mlx4_dev *dev) +{ + struct pci_dev *pdev = dev->persist->pdev; + + mutex_lock(&dev->persist->pci_status_mutex); + if (dev->persist->pci_status == MLX4_PCI_STATUS_ENABLED) { + pci_disable_device(pdev); + dev->persist->pci_status = MLX4_PCI_STATUS_DISABLED; + } + mutex_unlock(&dev->persist->pci_status_mutex); +} + static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, int total_vfs, int *nvfs, struct mlx4_priv *priv, int reset_flow) @@ -3582,7 +3610,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); - err = pci_enable_device(pdev); + err = mlx4_pci_enable_device(&priv->dev); if (err) { dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n"); return err; @@ -3715,7 +3743,7 @@ err_release_regions: pci_release_regions(pdev); err_disable_pdev: - pci_disable_device(pdev); + mlx4_pci_disable_device(&priv->dev); pci_set_drvdata(pdev, NULL); return err; } @@ -3775,6 +3803,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) priv->pci_dev_data = id->driver_data; mutex_init(&dev->persist->device_state_mutex); mutex_init(&dev->persist->interface_state_mutex); + mutex_init(&dev->persist->pci_status_mutex); ret = devlink_register(devlink, &pdev->dev); if (ret) @@ -3923,7 +3952,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) } pci_release_regions(pdev); - pci_disable_device(pdev); + mlx4_pci_disable_device(dev); devlink_unregister(devlink); kfree(dev->persist); devlink_free(devlink); @@ -4042,7 +4071,7 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT; - pci_disable_device(pdev); + mlx4_pci_disable_device(persist->dev); return PCI_ERS_RESULT_NEED_RESET; } @@ -4050,45 +4079,53 @@ static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); struct mlx4_dev *dev = persist->dev; - struct mlx4_priv *priv = mlx4_priv(dev); - int ret; - int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; - int total_vfs; + int err; mlx4_err(dev, "mlx4_pci_slot_reset was called\n"); - ret = pci_enable_device(pdev); - if (ret) { - mlx4_err(dev, "Can not re-enable device, ret=%d\n", ret); + err = mlx4_pci_enable_device(dev); + if (err) { + mlx4_err(dev, "Can not re-enable device, err=%d\n", err); return PCI_ERS_RESULT_DISCONNECT; } pci_set_master(pdev); pci_restore_state(pdev); pci_save_state(pdev); + return PCI_ERS_RESULT_RECOVERED; +} +static void mlx4_pci_resume(struct pci_dev *pdev) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; + struct mlx4_priv *priv = mlx4_priv(dev); + int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + int total_vfs; + int err; + + mlx4_err(dev, "%s was called\n", __func__); total_vfs = dev->persist->num_vfs; memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); mutex_lock(&persist->interface_state_mutex); if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { - ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, + err = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, priv, 1); - if (ret) { - mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n", - __func__, ret); + if (err) { + mlx4_err(dev, "%s: mlx4_load_one failed, err=%d\n", + __func__, err); goto end; } - ret = restore_current_port_types(dev, dev->persist-> + err = restore_current_port_types(dev, dev->persist-> curr_port_type, dev->persist-> curr_port_poss_type); - if (ret) - mlx4_err(dev, "could not restore original port types (%d)\n", ret); + if (err) + mlx4_err(dev, "could not restore original port types (%d)\n", err); } end: mutex_unlock(&persist->interface_state_mutex); - return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } static void mlx4_shutdown(struct pci_dev *pdev) @@ -4105,6 +4142,7 @@ static void mlx4_shutdown(struct pci_dev *pdev) static const struct pci_error_handlers mlx4_err_handler = { .error_detected = mlx4_pci_err_detected, .slot_reset = mlx4_pci_slot_reset, + .resume = mlx4_pci_resume, }; static struct pci_driver mlx4_driver = { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index ef9683101ead..c9d7fc5159f2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -586,6 +586,8 @@ struct mlx4_mfunc_master_ctx { struct mlx4_master_qp0_state qp0_state[MLX4_MAX_PORTS + 1]; int init_port_ref[MLX4_MAX_PORTS + 1]; u16 max_mtu[MLX4_MAX_PORTS + 1]; + u8 pptx; + u8 pprx; int disable_mcast_ref[MLX4_MAX_PORTS + 1]; struct mlx4_resource_tracker res_tracker; struct workqueue_struct *comm_wq; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d12ab6a73344..63b1aeae2c03 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -323,6 +323,7 @@ struct mlx4_en_rx_ring { unsigned long csum_ok; unsigned long csum_none; unsigned long csum_complete; + unsigned long dropped; int hwtstamp_rx_filter; cpumask_var_t affinity_mask; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 211c65087997..087b23b320cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -1317,6 +1317,19 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, } gen_context->mtu = cpu_to_be16(master->max_mtu[port]); + /* Slave cannot change Global Pause configuration */ + if (slave != mlx4_master_func_num(dev) && + ((gen_context->pptx != master->pptx) || + (gen_context->pprx != master->pprx))) { + gen_context->pptx = master->pptx; + gen_context->pprx = master->pprx; + mlx4_warn(dev, + "denying Global Pause change for slave:%d\n", + slave); + } else { + master->pptx = gen_context->pptx; + master->pprx = gen_context->pprx; + } break; case MLX4_SET_PORT_GID_TABLE: /* change to MULTIPLE entries: number of guest's gids diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 4fc45ee0c5d1..679e18ffb3a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -9,3 +9,4 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ en_txrx.o en_clock.o vxlan.o en_tc.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o +mlx5_core-$(CONFIG_RFS_ACCEL) += en_arfs.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 879e6276c473..34523c48444e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -46,6 +46,9 @@ #include <linux/rhashtable.h> #include "wq.h" #include "mlx5_core.h" +#include "en_stats.h" + +#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) #define MLX5E_MAX_NUM_TC 8 @@ -57,12 +60,33 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4 +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 + +#define MLX5_MPWRQ_LOG_NUM_STRIDES 11 /* >= 9, HW restriction */ +#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ +#define MLX5_MPWRQ_NUM_STRIDES BIT(MLX5_MPWRQ_LOG_NUM_STRIDES) +#define MLX5_MPWRQ_STRIDE_SIZE BIT(MLX5_MPWRQ_LOG_STRIDE_SIZE) +#define MLX5_MPWRQ_LOG_WQE_SZ (MLX5_MPWRQ_LOG_NUM_STRIDES +\ + MLX5_MPWRQ_LOG_STRIDE_SIZE) +#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ + MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) +#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) +#define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \ + MLX5_MPWRQ_WQE_PAGE_ORDER) +#define MLX5_CHANNEL_MAX_NUM_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8) * \ + BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)) +#define MLX5_UMR_ALIGN (2048) +#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) + #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) @@ -73,230 +97,63 @@ #define MLX5E_NUM_MAIN_GROUPS 9 +static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW, + wq_size / 2); + default: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES, + wq_size / 2); + } +} + +static inline int mlx5_min_log_rq_size(int wq_type) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; + default: + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + } +} + +static inline int mlx5_max_log_rq_size(int wq_type) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW; + default: + return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + } +} + +struct mlx5e_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_eth_seg eth; +}; + +struct mlx5e_rx_wqe { + struct mlx5_wqe_srq_next_seg next; + struct mlx5_wqe_data_seg data; +}; + +struct mlx5e_umr_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_umr_ctrl_seg uctrl; + struct mlx5_mkey_seg mkc; + struct mlx5_wqe_data_seg data; +}; + #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ #define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ #endif -static const char vport_strings[][ETH_GSTRING_LEN] = { - /* vport statistics */ - "rx_packets", - "rx_bytes", - "tx_packets", - "tx_bytes", - "rx_error_packets", - "rx_error_bytes", - "tx_error_packets", - "tx_error_bytes", - "rx_unicast_packets", - "rx_unicast_bytes", - "tx_unicast_packets", - "tx_unicast_bytes", - "rx_multicast_packets", - "rx_multicast_bytes", - "tx_multicast_packets", - "tx_multicast_bytes", - "rx_broadcast_packets", - "rx_broadcast_bytes", - "tx_broadcast_packets", - "tx_broadcast_bytes", - - /* SW counters */ - "tso_packets", - "tso_bytes", - "tso_inner_packets", - "tso_inner_bytes", - "lro_packets", - "lro_bytes", - "rx_csum_good", - "rx_csum_none", - "rx_csum_sw", - "tx_csum_offload", - "tx_csum_inner", - "tx_queue_stopped", - "tx_queue_wake", - "tx_queue_dropped", - "rx_wqe_err", -}; - -struct mlx5e_vport_stats { - /* HW counters */ - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - u64 rx_error_packets; - u64 rx_error_bytes; - u64 tx_error_packets; - u64 tx_error_bytes; - u64 rx_unicast_packets; - u64 rx_unicast_bytes; - u64 tx_unicast_packets; - u64 tx_unicast_bytes; - u64 rx_multicast_packets; - u64 rx_multicast_bytes; - u64 tx_multicast_packets; - u64 tx_multicast_bytes; - u64 rx_broadcast_packets; - u64 rx_broadcast_bytes; - u64 tx_broadcast_packets; - u64 tx_broadcast_bytes; - - /* SW counters */ - u64 tso_packets; - u64 tso_bytes; - u64 tso_inner_packets; - u64 tso_inner_bytes; - u64 lro_packets; - u64 lro_bytes; - u64 rx_csum_good; - u64 rx_csum_none; - u64 rx_csum_sw; - u64 tx_csum_offload; - u64 tx_csum_inner; - u64 tx_queue_stopped; - u64 tx_queue_wake; - u64 tx_queue_dropped; - u64 rx_wqe_err; - -#define NUM_VPORT_COUNTERS 35 -}; - -static const char pport_strings[][ETH_GSTRING_LEN] = { - /* IEEE802.3 counters */ - "frames_tx", - "frames_rx", - "check_seq_err", - "alignment_err", - "octets_tx", - "octets_received", - "multicast_xmitted", - "broadcast_xmitted", - "multicast_rx", - "broadcast_rx", - "in_range_len_errors", - "out_of_range_len", - "too_long_errors", - "symbol_err", - "mac_control_tx", - "mac_control_rx", - "unsupported_op_rx", - "pause_ctrl_rx", - "pause_ctrl_tx", - - /* RFC2863 counters */ - "in_octets", - "in_ucast_pkts", - "in_discards", - "in_errors", - "in_unknown_protos", - "out_octets", - "out_ucast_pkts", - "out_discards", - "out_errors", - "in_multicast_pkts", - "in_broadcast_pkts", - "out_multicast_pkts", - "out_broadcast_pkts", - - /* RFC2819 counters */ - "drop_events", - "octets", - "pkts", - "broadcast_pkts", - "multicast_pkts", - "crc_align_errors", - "undersize_pkts", - "oversize_pkts", - "fragments", - "jabbers", - "collisions", - "p64octets", - "p65to127octets", - "p128to255octets", - "p256to511octets", - "p512to1023octets", - "p1024to1518octets", - "p1519to2047octets", - "p2048to4095octets", - "p4096to8191octets", - "p8192to10239octets", -}; - -#define NUM_IEEE_802_3_COUNTERS 19 -#define NUM_RFC_2863_COUNTERS 13 -#define NUM_RFC_2819_COUNTERS 21 -#define NUM_PPORT_COUNTERS (NUM_IEEE_802_3_COUNTERS + \ - NUM_RFC_2863_COUNTERS + \ - NUM_RFC_2819_COUNTERS) - -struct mlx5e_pport_stats { - __be64 IEEE_802_3_counters[NUM_IEEE_802_3_COUNTERS]; - __be64 RFC_2863_counters[NUM_RFC_2863_COUNTERS]; - __be64 RFC_2819_counters[NUM_RFC_2819_COUNTERS]; -}; - -static const char rq_stats_strings[][ETH_GSTRING_LEN] = { - "packets", - "bytes", - "csum_none", - "csum_sw", - "lro_packets", - "lro_bytes", - "wqe_err" -}; - -struct mlx5e_rq_stats { - u64 packets; - u64 bytes; - u64 csum_none; - u64 csum_sw; - u64 lro_packets; - u64 lro_bytes; - u64 wqe_err; -#define NUM_RQ_STATS 7 -}; - -static const char sq_stats_strings[][ETH_GSTRING_LEN] = { - "packets", - "bytes", - "tso_packets", - "tso_bytes", - "tso_inner_packets", - "tso_inner_bytes", - "csum_offload_inner", - "nop", - "csum_offload_none", - "stopped", - "wake", - "dropped", -}; - -struct mlx5e_sq_stats { - /* commonly accessed in data path */ - u64 packets; - u64 bytes; - u64 tso_packets; - u64 tso_bytes; - u64 tso_inner_packets; - u64 tso_inner_bytes; - u64 csum_offload_inner; - u64 nop; - /* less likely accessed in data path */ - u64 csum_offload_none; - u64 stopped; - u64 wake; - u64 dropped; -#define NUM_SQ_STATS 12 -}; - -struct mlx5e_stats { - struct mlx5e_vport_stats vport; - struct mlx5e_pport_stats pport; -}; - struct mlx5e_params { u8 log_sq_size; + u8 rq_wq_type; u8 log_rq_size; u16 num_channels; u8 num_tc; @@ -311,6 +168,7 @@ struct mlx5e_params { u8 rss_hfunc; u8 toeplitz_hash_key[40]; u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; + bool vlan_strip_disable; #ifdef CONFIG_MLX5_CORE_EN_DCB struct ieee_ets ets; #endif @@ -331,6 +189,7 @@ struct mlx5e_tstamp { enum { MLX5E_RQ_STATE_POST_WQES_ENABLE, + MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, }; struct mlx5e_cq { @@ -347,28 +206,75 @@ struct mlx5e_cq { struct mlx5_wq_ctrl wq_ctrl; } ____cacheline_aligned_in_smp; +struct mlx5e_rq; +typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe); +typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, + u16 ix); + +struct mlx5e_dma_info { + struct page *page; + dma_addr_t addr; +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; u32 wqe_sz; struct sk_buff **skb; + struct mlx5e_mpw_info *wqe_info; + __be32 mkey_be; + __be32 umr_mkey_be; struct device *pdev; struct net_device *netdev; struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_alloc_wqe alloc_wqe; unsigned long state; int ix; /* control */ struct mlx5_wq_ctrl wq_ctrl; + u8 wq_type; u32 rqn; struct mlx5e_channel *channel; struct mlx5e_priv *priv; } ____cacheline_aligned_in_smp; +struct mlx5e_umr_dma_info { + __be64 *mtt; + __be64 *mtt_no_align; + dma_addr_t mtt_addr; + struct mlx5e_dma_info *dma_info; +}; + +struct mlx5e_mpw_info { + union { + struct mlx5e_dma_info dma_info; + struct mlx5e_umr_dma_info umr; + }; + u16 consumed_strides; + u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; + + void (*dma_pre_sync)(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len); + void (*add_skb_frag)(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, u32 len); + void (*copy_skb_header)(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen); + void (*free_wqe)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); +}; + struct mlx5e_tx_wqe_info { u32 num_bytes; u8 num_wqebbs; @@ -391,6 +297,11 @@ enum { MLX5E_SQ_STATE_BF_ENABLE, }; +struct mlx5e_ico_wqe_info { + u8 opcode; + u8 num_wqebbs; +}; + struct mlx5e_sq { /* data path */ @@ -432,6 +343,7 @@ struct mlx5e_sq { struct mlx5_uar uar; struct mlx5e_channel *channel; int tc; + struct mlx5e_ico_wqe_info *ico_wqe_info; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) @@ -448,6 +360,7 @@ struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_sq icosq; /* internal control operations */ struct napi_struct napi; struct device *pdev; struct net_device *netdev; @@ -474,42 +387,42 @@ enum mlx5e_traffic_types { MLX5E_TT_IPV6, MLX5E_TT_ANY, MLX5E_NUM_TT, + MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; -#define IS_HASHING_TT(tt) (tt != MLX5E_TT_ANY) +enum { + MLX5E_STATE_ASYNC_EVENTS_ENABLE, + MLX5E_STATE_OPENED, + MLX5E_STATE_DESTROYING, +}; -enum mlx5e_rqt_ix { - MLX5E_INDIRECTION_RQT, - MLX5E_SINGLE_RQ_RQT, - MLX5E_NUM_RQT, +struct mlx5e_vxlan_db { + spinlock_t lock; /* protect vxlan table */ + struct radix_tree_root tree; }; -struct mlx5e_eth_addr_info { +struct mlx5e_l2_rule { u8 addr[ETH_ALEN + 2]; - u32 tt_vec; - struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT]; + struct mlx5_flow_rule *rule; }; -#define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) - -struct mlx5e_eth_addr_db { - struct hlist_head netdev_uc[MLX5E_ETH_ADDR_HASH_SIZE]; - struct hlist_head netdev_mc[MLX5E_ETH_ADDR_HASH_SIZE]; - struct mlx5e_eth_addr_info broadcast; - struct mlx5e_eth_addr_info allmulti; - struct mlx5e_eth_addr_info promisc; - bool broadcast_enabled; - bool allmulti_enabled; - bool promisc_enabled; +struct mlx5e_flow_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; }; -enum { - MLX5E_STATE_ASYNC_EVENTS_ENABLE, - MLX5E_STATE_OPENED, - MLX5E_STATE_DESTROYING, +#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE) + +struct mlx5e_tc_table { + struct mlx5_flow_table *t; + + struct rhashtable_params ht_params; + struct rhashtable ht; }; -struct mlx5e_vlan_db { +struct mlx5e_vlan_table { + struct mlx5e_flow_table ft; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx5_flow_rule *active_vlans_rule[VLAN_N_VID]; struct mlx5_flow_rule *untagged_rule; @@ -517,29 +430,74 @@ struct mlx5e_vlan_db { bool filter_disabled; }; -struct mlx5e_vxlan_db { - spinlock_t lock; /* protect vxlan table */ - struct radix_tree_root tree; +struct mlx5e_l2_table { + struct mlx5e_flow_table ft; + struct hlist_head netdev_uc[MLX5E_L2_ADDR_HASH_SIZE]; + struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE]; + struct mlx5e_l2_rule broadcast; + struct mlx5e_l2_rule allmulti; + struct mlx5e_l2_rule promisc; + bool broadcast_enabled; + bool allmulti_enabled; + bool promisc_enabled; }; -struct mlx5e_flow_table { - int num_groups; - struct mlx5_flow_table *t; - struct mlx5_flow_group **g; +/* L3/L4 traffic type classifier */ +struct mlx5e_ttc_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_rule *rules[MLX5E_NUM_TT]; }; -struct mlx5e_tc_flow_table { - struct mlx5_flow_table *t; +#define ARFS_HASH_SHIFT BITS_PER_BYTE +#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE) +struct arfs_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_rule *default_rule; + struct hlist_head rules_hash[ARFS_HASH_SIZE]; +}; - struct rhashtable_params ht_params; - struct rhashtable ht; +enum arfs_type { + ARFS_IPV4_TCP, + ARFS_IPV6_TCP, + ARFS_IPV4_UDP, + ARFS_IPV6_UDP, + ARFS_NUM_TYPES, +}; + +struct mlx5e_arfs_tables { + struct arfs_table arfs_tables[ARFS_NUM_TYPES]; + /* Protect aRFS rules list */ + spinlock_t arfs_lock; + struct list_head rules; + int last_filter_id; + struct workqueue_struct *wq; +}; + +/* NIC prio FTS */ +enum { + MLX5E_VLAN_FT_LEVEL = 0, + MLX5E_L2_FT_LEVEL, + MLX5E_TTC_FT_LEVEL, + MLX5E_ARFS_FT_LEVEL }; -struct mlx5e_flow_tables { - struct mlx5_flow_namespace *ns; - struct mlx5e_tc_flow_table tc; - struct mlx5e_flow_table vlan; - struct mlx5e_flow_table main; +struct mlx5e_flow_steering { + struct mlx5_flow_namespace *ns; + struct mlx5e_tc_table tc; + struct mlx5e_vlan_table vlan; + struct mlx5e_l2_table l2; + struct mlx5e_ttc_table ttc; + struct mlx5e_arfs_tables arfs; +}; + +struct mlx5e_direct_tir { + u32 tirn; + u32 rqtn; +}; + +enum { + MLX5E_TC_PRIO = 0, + MLX5E_NIC_PRIO }; struct mlx5e_priv { @@ -554,16 +512,16 @@ struct mlx5e_priv { u32 pdn; u32 tdn; struct mlx5_core_mkey mkey; + struct mlx5_core_mkey umr_mkey; struct mlx5e_rq drop_rq; struct mlx5e_channel **channel; u32 tisn[MLX5E_MAX_NUM_TC]; - u32 rqtn[MLX5E_NUM_RQT]; - u32 tirn[MLX5E_NUM_TT]; + u32 indir_rqtn; + u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_direct_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; - struct mlx5e_flow_tables fts; - struct mlx5e_eth_addr_db eth_addr; - struct mlx5e_vlan_db vlan; + struct mlx5e_flow_steering fs; struct mlx5e_vxlan_db vxlan; struct mlx5e_params params; @@ -575,18 +533,7 @@ struct mlx5e_priv { struct net_device *netdev; struct mlx5e_stats stats; struct mlx5e_tstamp tstamp; -}; - -#define MLX5E_NET_IP_ALIGN 2 - -struct mlx5e_tx_wqe { - struct mlx5_wqe_ctrl_seg ctrl; - struct mlx5_wqe_eth_seg eth; -}; - -struct mlx5e_rx_wqe { - struct mlx5_wqe_srq_next_seg next; - struct mlx5_wqe_data_seg data; + u16 q_counter; }; enum mlx5e_link_mode { @@ -609,7 +556,7 @@ enum mlx5e_link_mode { MLX5E_100GBASE_KR4 = 22, MLX5E_100GBASE_LR4 = 23, MLX5E_100BASE_TX = 24, - MLX5E_100BASE_T = 25, + MLX5E_1000BASE_T = 25, MLX5E_10GBASE_T = 26, MLX5E_25GBASE_CR = 27, MLX5E_25GBASE_KR = 28, @@ -631,14 +578,35 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); + +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq); +void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u16 byte_cnt, + struct mlx5e_mpw_info *wi, + struct sk_buff *skb); +void mlx5e_complete_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u16 byte_cnt, + struct mlx5e_mpw_info *wi, + struct sk_buff *skb); +void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi); +void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_update_stats(struct mlx5e_priv *priv); -int mlx5e_create_flow_tables(struct mlx5e_priv *priv); -void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv); -void mlx5e_init_eth_addr(struct mlx5e_priv *priv); +int mlx5e_create_flow_steering(struct mlx5e_priv *priv); +void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); +void mlx5e_init_l2_addr(struct mlx5e_priv *priv); +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); void mlx5e_set_rx_mode_work(struct work_struct *work); void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, @@ -655,16 +623,19 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix); +int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd); + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix); void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, + u32 *indirection_rqt, int len, int num_channels); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, - struct mlx5e_tx_wqe *wqe, int bf_sz) + struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) { u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; @@ -678,9 +649,9 @@ static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, */ wmb(); if (bf_sz) - __iowrite64_copy(sq->uar_map + ofst, &wqe->ctrl, bf_sz); + __iowrite64_copy(sq->uar_map + ofst, ctrl, bf_sz); else - mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL); + mlx5_write64((__be32 *)ctrl, sq->uar_map + ofst, NULL); /* flush the write-combining mapped buffer */ wmb(); @@ -701,12 +672,43 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) MLX5E_MAX_NUM_CHANNELS); } +static inline int mlx5e_get_mtt_octw(int npages) +{ + return ALIGN(npages, 8) / 2; +} + extern const struct ethtool_ops mlx5e_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets); #endif +#ifndef CONFIG_RFS_ACCEL +static inline int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {} + +static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) +{ + return -ENOTSUPP; +} + +static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) +{ + return -ENOTSUPP; +} +#else +int mlx5e_arfs_create_tables(struct mlx5e_priv *priv); +void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv); +int mlx5e_arfs_enable(struct mlx5e_priv *priv); +int mlx5e_arfs_disable(struct mlx5e_priv *priv); +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id); +#endif + u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c new file mode 100644 index 000000000000..b4ae0fe15878 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -0,0 +1,749 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/hash.h> +#include <linux/mlx5/fs.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "en.h" + +struct arfs_tuple { + __be16 etype; + u8 ip_proto; + union { + __be32 src_ipv4; + struct in6_addr src_ipv6; + }; + union { + __be32 dst_ipv4; + struct in6_addr dst_ipv6; + }; + __be16 src_port; + __be16 dst_port; +}; + +struct arfs_rule { + struct mlx5e_priv *priv; + struct work_struct arfs_work; + struct mlx5_flow_rule *rule; + struct hlist_node hlist; + int rxq; + /* Flow ID passed to ndo_rx_flow_steer */ + int flow_id; + /* Filter ID returned by ndo_rx_flow_steer */ + int filter_id; + struct arfs_tuple tuple; +}; + +#define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \ + for (i = 0; i < ARFS_NUM_TYPES; i++) \ + mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j) + +#define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \ + for (j = 0; j < ARFS_HASH_SIZE; j++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist) + +static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) +{ + switch (type) { + case ARFS_IPV4_TCP: + return MLX5E_TT_IPV4_TCP; + case ARFS_IPV4_UDP: + return MLX5E_TT_IPV4_UDP; + case ARFS_IPV6_TCP: + return MLX5E_TT_IPV6_TCP; + case ARFS_IPV6_UDP: + return MLX5E_TT_IPV6_UDP; + default: + return -EINVAL; + } +} + +static int arfs_disable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + u32 *tirn = priv->indir_tirn; + int err = 0; + int tt; + int i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (i = 0; i < ARFS_NUM_TYPES; i++) { + dest.tir_num = tirn[i]; + tt = arfs_get_tt(i); + /* Modify ttc rules destination to bypass the aRFS tables*/ + err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], + &dest); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc destination failed\n", + __func__); + return err; + } + } + return 0; +} + +static void arfs_del_rules(struct mlx5e_priv *priv); + +int mlx5e_arfs_disable(struct mlx5e_priv *priv) +{ + arfs_del_rules(priv); + + return arfs_disable(priv); +} + +int mlx5e_arfs_enable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + int err = 0; + int tt; + int i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < ARFS_NUM_TYPES; i++) { + dest.ft = priv->fs.arfs.arfs_tables[i].ft.t; + tt = arfs_get_tt(i); + /* Modify ttc rules destination to point on the aRFS FTs */ + err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], + &dest); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc destination failed err=%d\n", + __func__, err); + arfs_disable(priv); + return err; + } + } + return 0; +} + +static void arfs_destroy_table(struct arfs_table *arfs_t) +{ + mlx5_del_flow_rule(arfs_t->default_rule); + mlx5e_destroy_flow_table(&arfs_t->ft); +} + +void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) +{ + int i; + + if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) + return; + + arfs_del_rules(priv); + destroy_workqueue(priv->fs.arfs.wq); + for (i = 0; i < ARFS_NUM_TYPES; i++) { + if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t)) + arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]); + } +} + +static int arfs_add_default_rule(struct mlx5e_priv *priv, + enum arfs_type type) +{ + struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; + struct mlx5_flow_destination dest; + u8 match_criteria_enable = 0; + u32 *tirn = priv->indir_tirn; + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto out; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + switch (type) { + case ARFS_IPV4_TCP: + dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; + break; + case ARFS_IPV4_UDP: + dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; + break; + case ARFS_IPV6_TCP: + dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; + break; + case ARFS_IPV6_UDP: + dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; + break; + default: + err = -EINVAL; + goto out; + } + + arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, match_criteria_enable, + match_criteria, match_value, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest); + if (IS_ERR(arfs_t->default_rule)) { + err = PTR_ERR(arfs_t->default_rule); + arfs_t->default_rule = NULL; + netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n", + __func__, type); + } +out: + kvfree(match_criteria); + kvfree(match_value); + return err; +} + +#define MLX5E_ARFS_NUM_GROUPS 2 +#define MLX5E_ARFS_GROUP1_SIZE BIT(12) +#define MLX5E_ARFS_GROUP2_SIZE BIT(0) +#define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\ + MLX5E_ARFS_GROUP2_SIZE) +static int arfs_create_groups(struct mlx5e_flow_table *ft, + enum arfs_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS, + sizeof(*ft->g), GFP_KERNEL); + in = mlx5_vzalloc(inlen); + if (!in || !ft->g) { + kvfree(ft->g); + kvfree(in); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype); + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV6_TCP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport); + break; + case ARFS_IPV4_UDP: + case ARFS_IPV6_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport); + break; + default: + err = -EINVAL; + goto out; + } + + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV4_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + break; + case ARFS_IPV6_TCP: + case ARFS_IPV6_UDP: + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); + break; + default: + err = -EINVAL; + goto out; + } + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +out: + kvfree(in); + + return err; +} + +static int arfs_create_table(struct mlx5e_priv *priv, + enum arfs_type type) +{ + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft; + int err; + + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = arfs_create_groups(ft, type); + if (err) + goto err; + + err = arfs_add_default_rule(priv, type); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) +{ + int err = 0; + int i; + + if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) + return 0; + + spin_lock_init(&priv->fs.arfs.arfs_lock); + INIT_LIST_HEAD(&priv->fs.arfs.rules); + priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs"); + if (!priv->fs.arfs.wq) + return -ENOMEM; + + for (i = 0; i < ARFS_NUM_TYPES; i++) { + err = arfs_create_table(priv, i); + if (err) + goto err; + } + return 0; +err: + mlx5e_arfs_destroy_tables(priv); + return err; +} + +#define MLX5E_ARFS_EXPIRY_QUOTA 60 + +static void arfs_may_expire_flow(struct mlx5e_priv *priv) +{ + struct arfs_rule *arfs_rule; + struct hlist_node *htmp; + int quota = 0; + int i; + int j; + + HLIST_HEAD(del_list); + spin_lock_bh(&priv->fs.arfs.arfs_lock); + mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) { + if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) + break; + if (!work_pending(&arfs_rule->arfs_work) && + rps_may_expire_flow(priv->netdev, + arfs_rule->rxq, arfs_rule->flow_id, + arfs_rule->filter_id)) { + hlist_del_init(&arfs_rule->hlist); + hlist_add_head(&arfs_rule->hlist, &del_list); + } + } + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) { + if (arfs_rule->rule) + mlx5_del_flow_rule(arfs_rule->rule); + hlist_del(&arfs_rule->hlist); + kfree(arfs_rule); + } +} + +static void arfs_del_rules(struct mlx5e_priv *priv) +{ + struct hlist_node *htmp; + struct arfs_rule *rule; + int i; + int j; + + HLIST_HEAD(del_list); + spin_lock_bh(&priv->fs.arfs.arfs_lock); + mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) { + hlist_del_init(&rule->hlist); + hlist_add_head(&rule->hlist, &del_list); + } + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + + hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) { + cancel_work_sync(&rule->arfs_work); + if (rule->rule) + mlx5_del_flow_rule(rule->rule); + hlist_del(&rule->hlist); + kfree(rule); + } +} + +static struct hlist_head * +arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port, + __be16 dst_port) +{ + unsigned long l; + int bucket_idx; + + l = (__force unsigned long)src_port | + ((__force unsigned long)dst_port << 2); + + bucket_idx = hash_long(l, ARFS_HASH_SHIFT); + + return &arfs_t->rules_hash[bucket_idx]; +} + +static u8 arfs_get_ip_proto(const struct sk_buff *skb) +{ + return (skb->protocol == htons(ETH_P_IP)) ? + ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr; +} + +static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, + u8 ip_proto, __be16 etype) +{ + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV4_TCP]; + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV4_UDP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV6_TCP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV6_UDP]; + + return NULL; +} + +static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, + struct arfs_rule *arfs_rule) +{ + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct arfs_tuple *tuple = &arfs_rule->tuple; + struct mlx5_flow_rule *rule = NULL; + struct mlx5_flow_destination dest; + struct arfs_table *arfs_table; + u8 match_criteria_enable = 0; + struct mlx5_flow_table *ft; + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto out; + } + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.ethertype); + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ntohs(tuple->etype)); + arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype); + if (!arfs_table) { + err = -EINVAL; + goto out; + } + + ft = arfs_table->ft.t; + if (tuple->ip_proto == IPPROTO_TCP) { + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.tcp_dport); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.tcp_sport); + MLX5_SET(fte_match_param, match_value, outer_headers.tcp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, match_value, outer_headers.tcp_sport, + ntohs(tuple->src_port)); + } else { + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.udp_dport); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.udp_sport); + MLX5_SET(fte_match_param, match_value, outer_headers.udp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, match_value, outer_headers.udp_sport, + ntohs(tuple->src_port)); + } + if (tuple->etype == htons(ETH_P_IP)) { + memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), + &tuple->src_ipv4, + 4); + memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &tuple->dst_ipv4, + 4); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else { + memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + &tuple->src_ipv6, + 16); + memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &tuple->dst_ipv6, + 16); + memset(MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + memset(MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + } + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; + rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, + match_value, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n", + __func__, arfs_rule->filter_id, arfs_rule->rxq, err); + } + +out: + kvfree(match_criteria); + kvfree(match_value); + return err ? ERR_PTR(err) : rule; +} + +static void arfs_modify_rule_rq(struct mlx5e_priv *priv, + struct mlx5_flow_rule *rule, u16 rxq) +{ + struct mlx5_flow_destination dst; + int err = 0; + + dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst.tir_num = priv->direct_tir[rxq].tirn; + err = mlx5_modify_rule_destination(rule, &dst); + if (err) + netdev_warn(priv->netdev, + "Failed to modfiy aRFS rule destination to rq=%d\n", rxq); +} + +static void arfs_handle_work(struct work_struct *work) +{ + struct arfs_rule *arfs_rule = container_of(work, + struct arfs_rule, + arfs_work); + struct mlx5e_priv *priv = arfs_rule->priv; + struct mlx5_flow_rule *rule; + + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + spin_lock_bh(&priv->fs.arfs.arfs_lock); + hlist_del(&arfs_rule->hlist); + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + + mutex_unlock(&priv->state_lock); + kfree(arfs_rule); + goto out; + } + mutex_unlock(&priv->state_lock); + + if (!arfs_rule->rule) { + rule = arfs_add_rule(priv, arfs_rule); + if (IS_ERR(rule)) + goto out; + arfs_rule->rule = rule; + } else { + arfs_modify_rule_rq(priv, arfs_rule->rule, + arfs_rule->rxq); + } +out: + arfs_may_expire_flow(priv); +} + +/* return L4 destination port from ip4/6 packets */ +static __be16 arfs_get_dst_port(const struct sk_buff *skb) +{ + char *transport_header; + + transport_header = skb_transport_header(skb); + if (arfs_get_ip_proto(skb) == IPPROTO_TCP) + return ((struct tcphdr *)transport_header)->dest; + return ((struct udphdr *)transport_header)->dest; +} + +/* return L4 source port from ip4/6 packets */ +static __be16 arfs_get_src_port(const struct sk_buff *skb) +{ + char *transport_header; + + transport_header = skb_transport_header(skb); + if (arfs_get_ip_proto(skb) == IPPROTO_TCP) + return ((struct tcphdr *)transport_header)->source; + return ((struct udphdr *)transport_header)->source; +} + +static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, + struct arfs_table *arfs_t, + const struct sk_buff *skb, + u16 rxq, u32 flow_id) +{ + struct arfs_rule *rule; + struct arfs_tuple *tuple; + + rule = kzalloc(sizeof(*rule), GFP_ATOMIC); + if (!rule) + return NULL; + + rule->priv = priv; + rule->rxq = rxq; + INIT_WORK(&rule->arfs_work, arfs_handle_work); + + tuple = &rule->tuple; + tuple->etype = skb->protocol; + if (tuple->etype == htons(ETH_P_IP)) { + tuple->src_ipv4 = ip_hdr(skb)->saddr; + tuple->dst_ipv4 = ip_hdr(skb)->daddr; + } else { + memcpy(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + memcpy(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, + sizeof(struct in6_addr)); + } + tuple->ip_proto = arfs_get_ip_proto(skb); + tuple->src_port = arfs_get_src_port(skb); + tuple->dst_port = arfs_get_dst_port(skb); + + rule->flow_id = flow_id; + rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER; + + hlist_add_head(&rule->hlist, + arfs_hash_bucket(arfs_t, tuple->src_port, + tuple->dst_port)); + return rule; +} + +static bool arfs_cmp_ips(struct arfs_tuple *tuple, + const struct sk_buff *skb) +{ + if (tuple->etype == htons(ETH_P_IP) && + tuple->src_ipv4 == ip_hdr(skb)->saddr && + tuple->dst_ipv4 == ip_hdr(skb)->daddr) + return true; + if (tuple->etype == htons(ETH_P_IPV6) && + (!memcmp(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr))) && + (!memcmp(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, + sizeof(struct in6_addr)))) + return true; + return false; +} + +static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t, + const struct sk_buff *skb) +{ + struct arfs_rule *arfs_rule; + struct hlist_head *head; + __be16 src_port = arfs_get_src_port(skb); + __be16 dst_port = arfs_get_dst_port(skb); + + head = arfs_hash_bucket(arfs_t, src_port, dst_port); + hlist_for_each_entry(arfs_rule, head, hlist) { + if (arfs_rule->tuple.src_port == src_port && + arfs_rule->tuple.dst_port == dst_port && + arfs_cmp_ips(&arfs_rule->tuple, skb)) { + return arfs_rule; + } + } + + return NULL; +} + +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct arfs_table *arfs_t; + struct arfs_rule *arfs_rule; + + if (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)) + return -EPROTONOSUPPORT; + + arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol); + if (!arfs_t) + return -EPROTONOSUPPORT; + + spin_lock_bh(&arfs->arfs_lock); + arfs_rule = arfs_find_rule(arfs_t, skb); + if (arfs_rule) { + if (arfs_rule->rxq == rxq_index) { + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; + } + arfs_rule->rxq = rxq_index; + } else { + arfs_rule = arfs_alloc_rule(priv, arfs_t, skb, + rxq_index, flow_id); + if (!arfs_rule) { + spin_unlock_bh(&arfs->arfs_lock); + return -ENOMEM; + } + } + queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work); + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 3036f279a8fd..b2db180ae2a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -174,8 +174,14 @@ static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + int i; pfc->pfc_cap = mlx5_max_tc(mdev) + 1; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + pfc->requests[i] = PPORT_PER_PRIO_GET(pstats, i, tx_pause); + pfc->indications[i] = PPORT_PER_PRIO_GET(pstats, i, rx_pause); + } return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 68834b715f6c..534d99e2f9c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -138,10 +138,10 @@ static const struct { [MLX5E_100BASE_TX] = { .speed = 100, }, - [MLX5E_100BASE_T] = { - .supported = SUPPORTED_100baseT_Full, - .advertised = ADVERTISED_100baseT_Full, - .speed = 100, + [MLX5E_1000BASE_T] = { + .supported = SUPPORTED_1000baseT_Full, + .advertised = ADVERTISED_1000baseT_Full, + .speed = 1000, }, [MLX5E_10GBASE_T] = { .supported = SUPPORTED_10000baseT_Full, @@ -165,26 +165,112 @@ static const struct { }, }; +static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 pfc_en_tx; + u8 pfc_en_rx; + int err; + + err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); + + return err ? 0 : pfc_en_tx | pfc_en_rx; +} + +#define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) +#define MLX5E_NUM_RQ_STATS(priv) \ + (NUM_RQ_STATS * priv->params.num_channels * \ + test_bit(MLX5E_STATE_OPENED, &priv->state)) +#define MLX5E_NUM_SQ_STATS(priv) \ + (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ + test_bit(MLX5E_STATE_OPENED, &priv->state)) +#define MLX5E_NUM_PFC_COUNTERS(priv) hweight8(mlx5e_query_pfc_combined(priv)) + static int mlx5e_get_sset_count(struct net_device *dev, int sset) { struct mlx5e_priv *priv = netdev_priv(dev); switch (sset) { case ETH_SS_STATS: - return NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + - priv->params.num_channels * NUM_RQ_STATS + - priv->params.num_channels * priv->params.num_tc * - NUM_SQ_STATS; + return NUM_SW_COUNTERS + + MLX5E_NUM_Q_CNTRS(priv) + + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + + MLX5E_NUM_RQ_STATS(priv) + + MLX5E_NUM_SQ_STATS(priv) + + MLX5E_NUM_PFC_COUNTERS(priv); /* fallthrough */ default: return -EOPNOTSUPP; } } +static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + int i, j, tc, prio, idx = 0; + unsigned long pfc_combined; + + /* SW counters */ + for (i = 0; i < NUM_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name); + + /* Q counters */ + for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name); + + /* VPORT counters */ + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vport_stats_desc[i].name); + + /* PPORT counters */ + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_802_3_stats_desc[i].name); + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_2863_stats_desc[i].name); + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_2819_stats_desc[i].name); + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s", + prio, + pport_per_prio_traffic_stats_desc[i].name); + } + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s", + prio, pport_per_prio_pfc_stats_desc[i].name); + } + } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return; + + /* per channel counters */ + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i, + rq_stats_desc[j].name); + + for (tc = 0; tc < priv->params.num_tc; tc++) + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + "tx%d_%s", + priv->channeltc_to_txq_map[i][tc], + sq_stats_desc[j].name); +} + static void mlx5e_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { - int i, j, tc, idx = 0; struct mlx5e_priv *priv = netdev_priv(dev); switch (stringset) { @@ -195,30 +281,7 @@ static void mlx5e_get_strings(struct net_device *dev, break; case ETH_SS_STATS: - /* VPORT counters */ - for (i = 0; i < NUM_VPORT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - vport_strings[i]); - - /* PPORT counters */ - for (i = 0; i < NUM_PPORT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_strings[i]); - - /* per channel counters */ - for (i = 0; i < priv->params.num_channels; i++) - for (j = 0; j < NUM_RQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - "rx%d_%s", i, rq_stats_strings[j]); - - for (tc = 0; tc < priv->params.num_tc; tc++) - for (i = 0; i < priv->params.num_channels; i++) - for (j = 0; j < NUM_SQ_STATS; j++) - sprintf(data + - (idx++) * ETH_GSTRING_LEN, - "tx%d_%s", - priv->channeltc_to_txq_map[i][tc], - sq_stats_strings[j]); + mlx5e_fill_stats_strings(priv, data); break; } } @@ -227,7 +290,8 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct mlx5e_priv *priv = netdev_priv(dev); - int i, j, tc, idx = 0; + int i, j, tc, prio, idx = 0; + unsigned long pfc_combined; if (!data) return; @@ -237,33 +301,68 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, mlx5e_update_stats(priv); mutex_unlock(&priv->state_lock); + for (i = 0; i < NUM_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_stats_desc, i); + + for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + q_stats_desc, i); + for (i = 0; i < NUM_VPORT_COUNTERS; i++) - data[idx++] = ((u64 *)&priv->stats.vport)[i]; + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_stats_desc, i); + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, + pport_802_3_stats_desc, i); + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, + pport_2863_stats_desc, i); + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, + pport_2819_stats_desc, i); + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_traffic_stats_desc, i); + } - for (i = 0; i < NUM_PPORT_COUNTERS; i++) - data[idx++] = be64_to_cpu(((__be64 *)&priv->stats.pport)[i]); + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_pfc_stats_desc, i); + } + } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return; /* per channel counters */ for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_RQ_STATS; j++) - data[idx++] = !test_bit(MLX5E_STATE_OPENED, - &priv->state) ? 0 : - ((u64 *)&priv->channel[i]->rq.stats)[j]; + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel[i]->rq.stats, + rq_stats_desc, j); for (tc = 0; tc < priv->params.num_tc; tc++) for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_SQ_STATS; j++) - data[idx++] = !test_bit(MLX5E_STATE_OPENED, - &priv->state) ? 0 : - ((u64 *)&priv->channel[i]->sq[tc].stats)[j]; + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel[i]->sq[tc].stats, + sq_stats_desc, j); } static void mlx5e_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); + int rq_wq_type = priv->params.rq_wq_type; - param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + param->rx_max_pending = 1 << mlx5_max_log_rq_size(rq_wq_type); param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = 1 << priv->params.log_rq_size; param->tx_pending = 1 << priv->params.log_sq_size; @@ -274,6 +373,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); bool was_opened; + int rq_wq_type = priv->params.rq_wq_type; u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; @@ -289,16 +389,16 @@ static int mlx5e_set_ringparam(struct net_device *dev, __func__); return -EINVAL; } - if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + if (param->rx_pending < (1 << mlx5_min_log_rq_size(rq_wq_type))) { netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", __func__, param->rx_pending, - 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + 1 << mlx5_min_log_rq_size(rq_wq_type)); return -EINVAL; } - if (param->rx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE)) { + if (param->rx_pending > (1 << mlx5_max_log_rq_size(rq_wq_type))) { netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", __func__, param->rx_pending, - 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE); + 1 << mlx5_max_log_rq_size(rq_wq_type)); return -EINVAL; } if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { @@ -316,8 +416,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, log_rq_size = order_base_2(param->rx_pending); log_sq_size = order_base_2(param->tx_pending); - min_rx_wqes = min_t(u16, param->rx_pending - 1, - MLX5E_PARAMS_DEFAULT_MIN_RX_WQES); + min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, param->rx_pending); if (log_rq_size == priv->params.log_rq_size && log_sq_size == priv->params.log_sq_size && @@ -357,6 +456,7 @@ static int mlx5e_set_channels(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); int ncv = mlx5e_get_max_num_channels(priv->mdev); unsigned int count = ch->combined_count; + bool arfs_enabled; bool was_opened; int err = 0; @@ -385,13 +485,27 @@ static int mlx5e_set_channels(struct net_device *dev, if (was_opened) mlx5e_close_locked(dev); + arfs_enabled = dev->features & NETIF_F_NTUPLE; + if (arfs_enabled) + mlx5e_arfs_disable(priv); + priv->params.num_channels = count; - mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + mlx5e_build_default_indir_rqt(priv->mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); if (was_opened) err = mlx5e_open_locked(dev); + if (err) + goto out; + + if (arfs_enabled) { + err = mlx5e_arfs_enable(priv); + if (err) + netdev_err(dev, "%s: mlx5e_arfs_enable failed: %d\n", + __func__, err); + } +out: mutex_unlock(&priv->state_lock); return err; @@ -727,9 +841,8 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) MLX5_SET(modify_tir_in, in, bitmask.hash, 1); mlx5e_build_tir_ctx_hash(tirc, priv); - for (i = 0; i < MLX5E_NUM_TT; i++) - if (IS_HASHING_TT(i)) - mlx5_core_modify_tir(mdev, priv->tirn[i], in, inlen); + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5_core_modify_tir(mdev, priv->indir_tirn[i], in, inlen); } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, @@ -751,9 +864,11 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); if (indir) { + u32 rqtn = priv->indir_rqtn; + memcpy(priv->params.indirection_rqt, indir, sizeof(priv->params.indirection_rqt)); - mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT); + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); } if (key) @@ -1036,6 +1151,108 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return mlx5_set_port_wol(mdev, mlx5_wol_mode); } +static int mlx5e_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 beacon_duration; + + if (!MLX5_CAP_GEN(mdev, beacon_led)) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + beacon_duration = MLX5_BEACON_DURATION_INF; + break; + case ETHTOOL_ID_INACTIVE: + beacon_duration = MLX5_BEACON_DURATION_OFF; + break; + default: + return -EOPNOTSUPP; + } + + return mlx5_set_port_beacon(mdev, beacon_duration); +} + +static int mlx5e_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *dev = priv->mdev; + int size_read = 0; + u8 data[4]; + + size_read = mlx5_query_module_eeprom(dev, 0, 2, data); + if (size_read < 2) + return -EIO; + + /* data[0] = identifier byte */ + switch (data[0]) { + case MLX5_MODULE_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + case MLX5_MODULE_ID_QSFP_PLUS: + case MLX5_MODULE_ID_QSFP28: + /* data[1] = revision id */ + if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + } + break; + case MLX5_MODULE_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", + __func__, data[0]); + return -EINVAL; + } + + return 0; +} + +static int mlx5e_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int offset = ee->offset; + int size_read; + int i = 0; + + if (!ee->len) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + size_read = mlx5_query_module_eeprom(mdev, offset, ee->len - i, + data + i); + + if (!size_read) + /* Done reading */ + return 0; + + if (size_read < 0) { + netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", + __func__, size_read); + return 0; + } + + i += size_read; + offset += size_read; + } + + return 0; +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1060,6 +1277,9 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_pauseparam = mlx5e_get_pauseparam, .set_pauseparam = mlx5e_set_pauseparam, .get_ts_info = mlx5e_get_ts_info, + .set_phys_id = mlx5e_set_phys_id, .get_wol = mlx5e_get_wol, .set_wol = mlx5e_set_wol, + .get_module_info = mlx5e_get_module_info, + .get_module_eeprom = mlx5e_get_module_eeprom, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index d00a24203410..b32740092854 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -37,7 +37,10 @@ #include <linux/mlx5/fs.h> #include "en.h" -#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) +static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai, int type); +static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai); enum { MLX5E_FULLMATCH = 0, @@ -58,21 +61,21 @@ enum { MLX5E_ACTION_DEL = 2, }; -struct mlx5e_eth_addr_hash_node { +struct mlx5e_l2_hash_node { struct hlist_node hlist; u8 action; - struct mlx5e_eth_addr_info ai; + struct mlx5e_l2_rule ai; }; -static inline int mlx5e_hash_eth_addr(u8 *addr) +static inline int mlx5e_hash_l2(u8 *addr) { return addr[5]; } -static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) +static void mlx5e_add_l2_to_hash(struct hlist_head *hash, u8 *addr) { - struct mlx5e_eth_addr_hash_node *hn; - int ix = mlx5e_hash_eth_addr(addr); + struct mlx5e_l2_hash_node *hn; + int ix = mlx5e_hash_l2(addr); int found = 0; hlist_for_each_entry(hn, &hash[ix], hlist) @@ -96,371 +99,12 @@ static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) hlist_add_head(&hn->hlist, &hash[ix]); } -static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) +static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn) { hlist_del(&hn->hlist); kfree(hn); } -static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai) -{ - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_TCP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_TCP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_UDP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_UDP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4]); - - if (ai->tt_vec & BIT(MLX5E_TT_ANY)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_ANY]); -} - -static int mlx5e_get_eth_addr_type(u8 *addr) -{ - if (is_unicast_ether_addr(addr)) - return MLX5E_UC; - - if ((addr[0] == 0x01) && - (addr[1] == 0x00) && - (addr[2] == 0x5e) && - !(addr[3] & 0x80)) - return MLX5E_MC_IPV4; - - if ((addr[0] == 0x33) && - (addr[1] == 0x33)) - return MLX5E_MC_IPV6; - - return MLX5E_MC_OTHER; -} - -static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) -{ - int eth_addr_type; - u32 ret; - - switch (type) { - case MLX5E_FULLMATCH: - eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); - switch (eth_addr_type) { - case MLX5E_UC: - ret = - BIT(MLX5E_TT_IPV4_TCP) | - BIT(MLX5E_TT_IPV6_TCP) | - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4_IPSEC_AH) | - BIT(MLX5E_TT_IPV6_IPSEC_AH) | - BIT(MLX5E_TT_IPV4_IPSEC_ESP) | - BIT(MLX5E_TT_IPV6_IPSEC_ESP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - - case MLX5E_MC_IPV4: - ret = - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV4) | - 0; - break; - - case MLX5E_MC_IPV6: - ret = - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV6) | - 0; - break; - - case MLX5E_MC_OTHER: - ret = - BIT(MLX5E_TT_ANY) | - 0; - break; - } - - break; - - case MLX5E_ALLMULTI: - ret = - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - - default: /* MLX5E_PROMISC */ - ret = - BIT(MLX5E_TT_IPV4_TCP) | - BIT(MLX5E_TT_IPV6_TCP) | - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4_IPSEC_AH) | - BIT(MLX5E_TT_IPV6_IPSEC_AH) | - BIT(MLX5E_TT_IPV4_IPSEC_ESP) | - BIT(MLX5E_TT_IPV6_IPSEC_ESP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - } - - return ret; -} - -static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai, - int type, u32 *mc, u32 *mv) -{ - struct mlx5_flow_destination dest; - u8 match_criteria_enable = 0; - struct mlx5_flow_rule **rule_p; - struct mlx5_flow_table *ft = priv->fts.main.t; - u8 *mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, - outer_headers.dmac_47_16); - u8 *mv_dmac = MLX5_ADDR_OF(fte_match_param, mv, - outer_headers.dmac_47_16); - u32 *tirn = priv->tirn; - u32 tt_vec; - int err = 0; - - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - - switch (type) { - case MLX5E_FULLMATCH: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - eth_broadcast_addr(mc_dmac); - ether_addr_copy(mv_dmac, ai->addr); - break; - - case MLX5E_ALLMULTI: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - mc_dmac[0] = 0x01; - mv_dmac[0] = 0x01; - break; - - case MLX5E_PROMISC: - break; - } - - tt_vec = mlx5e_get_tt_vec(ai, type); - - if (tt_vec & BIT(MLX5E_TT_ANY)) { - rule_p = &ai->ft_rule[MLX5E_TT_ANY]; - dest.tir_num = tirn[MLX5E_TT_ANY]; - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_ANY); - } - - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - - if (tt_vec & BIT(MLX5E_TT_IPV4)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4]; - dest.tir_num = tirn[MLX5E_TT_IPV4]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6]; - dest.tir_num = tirn[MLX5E_TT_IPV6]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6); - } - - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_UDP); - - if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_UDP]; - dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_UDP]; - dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP); - } - - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_TCP); - - if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_TCP]; - dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_TCP]; - dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP); - } - - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_AH); - - if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]; - dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_AH]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]; - dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_AH]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH); - } - - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_ESP); - - if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]; - dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_ESP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]; - dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_ESP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP); - } - - return 0; - -err_del_ai: - err = PTR_ERR(*rule_p); - *rule_p = NULL; - mlx5e_del_eth_addr_from_flow_table(priv, ai); - - return err; -} - -static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai, int type) -{ - u32 *match_criteria; - u32 *match_value; - int err = 0; - - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_eth_addr_rule_out; - } - - err = __mlx5e_add_eth_addr_rule(priv, ai, type, match_criteria, - match_value); - -add_eth_addr_rule_out: - kvfree(match_criteria); - kvfree(match_value); - - return err; -} - static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) { struct net_device *ndev = priv->netdev; @@ -472,7 +116,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) int i; list_size = 0; - for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) + for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) list_size++; max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); @@ -489,7 +133,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) return -ENOMEM; i = 0; - for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) { + for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) { if (i >= list_size) break; vlans[i++] = vlan; @@ -514,28 +158,28 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid, u32 *mc, u32 *mv) { - struct mlx5_flow_table *ft = priv->fts.vlan.t; + struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; u8 match_criteria_enable = 0; struct mlx5_flow_rule **rule_p; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fts.main.t; + dest.ft = priv->fs.l2.ft.t; match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - rule_p = &priv->vlan.untagged_rule; + rule_p = &priv->fs.vlan.untagged_rule; break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: - rule_p = &priv->vlan.any_vlan_rule; + rule_p = &priv->fs.vlan.any_vlan_rule; MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ - rule_p = &priv->vlan.active_vlans_rule[vid]; + rule_p = &priv->fs.vlan.active_vlans_rule[vid]; MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid); @@ -589,22 +233,22 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, { switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - if (priv->vlan.untagged_rule) { - mlx5_del_flow_rule(priv->vlan.untagged_rule); - priv->vlan.untagged_rule = NULL; + if (priv->fs.vlan.untagged_rule) { + mlx5_del_flow_rule(priv->fs.vlan.untagged_rule); + priv->fs.vlan.untagged_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: - if (priv->vlan.any_vlan_rule) { - mlx5_del_flow_rule(priv->vlan.any_vlan_rule); - priv->vlan.any_vlan_rule = NULL; + if (priv->fs.vlan.any_vlan_rule) { + mlx5_del_flow_rule(priv->fs.vlan.any_vlan_rule); + priv->fs.vlan.any_vlan_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_VID: mlx5e_vport_context_update_vlans(priv); - if (priv->vlan.active_vlans_rule[vid]) { - mlx5_del_flow_rule(priv->vlan.active_vlans_rule[vid]); - priv->vlan.active_vlans_rule[vid] = NULL; + if (priv->fs.vlan.active_vlans_rule[vid]) { + mlx5_del_flow_rule(priv->fs.vlan.active_vlans_rule[vid]); + priv->fs.vlan.active_vlans_rule[vid] = NULL; } mlx5e_vport_context_update_vlans(priv); break; @@ -613,10 +257,10 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) { - if (!priv->vlan.filter_disabled) + if (!priv->fs.vlan.filter_disabled) return; - priv->vlan.filter_disabled = false; + priv->fs.vlan.filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); @@ -624,10 +268,10 @@ void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) { - if (priv->vlan.filter_disabled) + if (priv->fs.vlan.filter_disabled) return; - priv->vlan.filter_disabled = true; + priv->fs.vlan.filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); @@ -638,7 +282,7 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); - set_bit(vid, priv->vlan.active_vlans); + set_bit(vid, priv->fs.vlan.active_vlans); return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); } @@ -648,7 +292,7 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); - clear_bit(vid, priv->vlan.active_vlans); + clear_bit(vid, priv->fs.vlan.active_vlans); mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); @@ -656,21 +300,21 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, } #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ - for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ + for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) -static void mlx5e_execute_action(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_hash_node *hn) +static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, + struct mlx5e_l2_hash_node *hn) { switch (hn->action) { case MLX5E_ACTION_ADD: - mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); + mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH); hn->action = MLX5E_ACTION_NONE; break; case MLX5E_ACTION_DEL: - mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); - mlx5e_del_eth_addr_from_hash(hn); + mlx5e_del_l2_flow_rule(priv, &hn->ai); + mlx5e_del_l2_from_hash(hn); break; } } @@ -682,14 +326,14 @@ static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) netif_addr_lock_bh(netdev); - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, - priv->netdev->dev_addr); + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, + priv->netdev->dev_addr); netdev_for_each_uc_addr(ha, netdev) - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr); + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, ha->addr); netdev_for_each_mc_addr(ha, netdev) - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr); + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_mc, ha->addr); netif_addr_unlock_bh(netdev); } @@ -699,17 +343,17 @@ static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, { bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); struct net_device *ndev = priv->netdev; - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; struct hlist_head *addr_list; struct hlist_node *tmp; int i = 0; int hi; - addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc; if (is_uc) /* Make sure our own address is pushed first */ ether_addr_copy(addr_array[i++], ndev->dev_addr); - else if (priv->eth_addr.broadcast_enabled) + else if (priv->fs.l2.broadcast_enabled) ether_addr_copy(addr_array[i++], ndev->broadcast); mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { @@ -725,7 +369,7 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, int list_type) { bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; u8 (*addr_array)[ETH_ALEN] = NULL; struct hlist_head *addr_list; struct hlist_node *tmp; @@ -734,12 +378,12 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, int err; int hi; - size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0); + size = is_uc ? 0 : (priv->fs.l2.broadcast_enabled ? 1 : 0); max_size = is_uc ? 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); - addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc; mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) size++; @@ -770,7 +414,7 @@ out: static void mlx5e_vport_context_update(struct mlx5e_priv *priv) { - struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct mlx5e_l2_table *ea = &priv->fs.l2; mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC); mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC); @@ -781,26 +425,26 @@ static void mlx5e_vport_context_update(struct mlx5e_priv *priv) static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) { - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; struct hlist_node *tmp; int i; - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) - mlx5e_execute_action(priv, hn); + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i) + mlx5e_execute_l2_action(priv, hn); - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) - mlx5e_execute_action(priv, hn); + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i) + mlx5e_execute_l2_action(priv, hn); } static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) { - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; struct hlist_node *tmp; int i; - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i) hn->action = MLX5E_ACTION_DEL; - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i) hn->action = MLX5E_ACTION_DEL; if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state)) @@ -814,7 +458,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, set_rx_mode_work); - struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct mlx5e_l2_table *ea = &priv->fs.l2; struct net_device *ndev = priv->netdev; bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state); @@ -830,27 +474,27 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; if (enable_promisc) { - mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); - if (!priv->vlan.filter_disabled) + mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (!priv->fs.vlan.filter_disabled) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); } if (enable_allmulti) - mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); + mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); if (enable_broadcast) - mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); + mlx5e_add_l2_flow_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); mlx5e_handle_netdev_addr(priv); if (disable_broadcast) - mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); + mlx5e_del_l2_flow_rule(priv, &ea->broadcast); if (disable_allmulti) - mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); + mlx5e_del_l2_flow_rule(priv, &ea->allmulti); if (disable_promisc) { - if (!priv->vlan.filter_disabled) + if (!priv->fs.vlan.filter_disabled) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); - mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); + mlx5e_del_l2_flow_rule(priv, &ea->promisc); } ea->promisc_enabled = promisc_enabled; @@ -872,224 +516,454 @@ static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) ft->num_groups = 0; } -void mlx5e_init_eth_addr(struct mlx5e_priv *priv) +void mlx5e_init_l2_addr(struct mlx5e_priv *priv) { - ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast); + ether_addr_copy(priv->fs.l2.broadcast.addr, priv->netdev->broadcast); } -#define MLX5E_MAIN_GROUP0_SIZE BIT(3) -#define MLX5E_MAIN_GROUP1_SIZE BIT(1) -#define MLX5E_MAIN_GROUP2_SIZE BIT(0) -#define MLX5E_MAIN_GROUP3_SIZE BIT(14) -#define MLX5E_MAIN_GROUP4_SIZE BIT(13) -#define MLX5E_MAIN_GROUP5_SIZE BIT(11) -#define MLX5E_MAIN_GROUP6_SIZE BIT(2) -#define MLX5E_MAIN_GROUP7_SIZE BIT(1) -#define MLX5E_MAIN_GROUP8_SIZE BIT(0) -#define MLX5E_MAIN_TABLE_SIZE (MLX5E_MAIN_GROUP0_SIZE +\ - MLX5E_MAIN_GROUP1_SIZE +\ - MLX5E_MAIN_GROUP2_SIZE +\ - MLX5E_MAIN_GROUP3_SIZE +\ - MLX5E_MAIN_GROUP4_SIZE +\ - MLX5E_MAIN_GROUP5_SIZE +\ - MLX5E_MAIN_GROUP6_SIZE +\ - MLX5E_MAIN_GROUP7_SIZE +\ - MLX5E_MAIN_GROUP8_SIZE) - -static int __mlx5e_create_main_groups(struct mlx5e_flow_table *ft, u32 *in, - int inlen) +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) { - u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - u8 *dmac = MLX5_ADDR_OF(create_flow_group_in, in, - match_criteria.outer_headers.dmac_47_16); + mlx5e_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; +} + +static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) +{ + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->rules[i])) { + mlx5_del_flow_rule(ttc->rules[i]); + ttc->rules[i] = NULL; + } + } +} + +static struct { + u16 etype; + u8 proto; +} ttc_rules[] = { + [MLX5E_TT_IPV4_TCP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_TCP, + }, + [MLX5E_TT_IPV6_TCP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_TCP, + }, + [MLX5E_TT_IPV4_UDP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_UDP, + }, + [MLX5E_TT_IPV6_UDP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_UDP, + }, + [MLX5E_TT_IPV4_IPSEC_AH] = { + .etype = ETH_P_IP, + .proto = IPPROTO_AH, + }, + [MLX5E_TT_IPV6_IPSEC_AH] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_AH, + }, + [MLX5E_TT_IPV4_IPSEC_ESP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_ESP, + }, + [MLX5E_TT_IPV6_IPSEC_ESP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_ESP, + }, + [MLX5E_TT_IPV4] = { + .etype = ETH_P_IP, + .proto = 0, + }, + [MLX5E_TT_IPV6] = { + .etype = ETH_P_IPV6, + .proto = 0, + }, + [MLX5E_TT_ANY] = { + .etype = 0, + .proto = 0, + }, +}; + +static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, + u8 proto) +{ + struct mlx5_flow_rule *rule; + u8 match_criteria_enable = 0; + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto out; + } + + if (proto) { + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, proto); + } + if (etype) { + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, etype); + } + + rule = mlx5_add_flow_rule(ft, match_criteria_enable, + match_criteria, match_value, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + dest); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } +out: + kvfree(match_criteria); + kvfree(match_value); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + struct mlx5e_ttc_table *ttc; + struct mlx5_flow_rule **rules; + struct mlx5_flow_table *ft; + int tt; int err; + + ttc = &priv->fs.ttc; + ft = ttc->ft.t; + rules = ttc->rules; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + if (tt == MLX5E_TT_ANY) + dest.tir_num = priv->direct_tir[0].tirn; + else + dest.tir_num = priv->indir_tirn[tt]; + rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + + return 0; + +del_rules: + err = PTR_ERR(rules[tt]); + rules[tt] = NULL; + mlx5e_cleanup_ttc_rules(ttc); + return err; +} + +#define MLX5E_TTC_NUM_GROUPS 3 +#define MLX5E_TTC_GROUP1_SIZE BIT(3) +#define MLX5E_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ + MLX5E_TTC_GROUP2_SIZE +\ + MLX5E_TTC_GROUP3_SIZE) +static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &ttc->ft; int ix = 0; + u32 *in; + int err; + u8 *mc; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP0_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; - ft->num_groups++; + ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS, + sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = mlx5_vzalloc(inlen); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP1_SIZE; + ix += MLX5E_TTC_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; + goto err; ft->num_groups++; - memset(in, 0, inlen); + /* L3 Group */ + MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP2_SIZE; + ix += MLX5E_TTC_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; + goto err; ft->num_groups++; + /* Any Group */ memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - eth_broadcast_addr(dmac); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP3_SIZE; + ix += MLX5E_TTC_GROUP3_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; + goto err; ft->num_groups++; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - eth_broadcast_addr(dmac); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP4_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; - ft->num_groups++; + kvfree(in); + return 0; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - eth_broadcast_addr(dmac); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP5_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; - ft->num_groups++; +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - dmac[0] = 0x01; + return err; +} + +static void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.ttc; + + mlx5e_cleanup_ttc_rules(ttc); + mlx5e_destroy_flow_table(&ttc->ft); +} + +static int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.ttc; + struct mlx5e_flow_table *ft = &ttc->ft; + int err; + + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_ttc_table_groups(ttc); + if (err) + goto err; + + err = mlx5e_generate_ttc_table_rules(priv); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai) +{ + if (!IS_ERR_OR_NULL(ai->rule)) { + mlx5_del_flow_rule(ai->rule); + ai->rule = NULL; + } +} + +static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai, int type) +{ + struct mlx5_flow_table *ft = priv->fs.l2.ft.t; + struct mlx5_flow_destination dest; + u8 match_criteria_enable = 0; + u32 *match_criteria; + u32 *match_value; + int err = 0; + u8 *mc_dmac; + u8 *mv_dmac; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_l2_rule_out; + } + + mc_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + mv_dmac = MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dmac_47_16); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.ttc.ft.t; + + switch (type) { + case MLX5E_FULLMATCH: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + eth_broadcast_addr(mc_dmac); + ether_addr_copy(mv_dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + mc_dmac[0] = 0x01; + mv_dmac[0] = 0x01; + break; + + case MLX5E_PROMISC: + break; + } + + ai->rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, + match_value, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR(ai->rule)) { + netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n", + __func__, mv_dmac); + err = PTR_ERR(ai->rule); + ai->rule = NULL; + } + +add_l2_rule_out: + kvfree(match_criteria); + kvfree(match_value); + + return err; +} + +#define MLX5E_NUM_L2_GROUPS 3 +#define MLX5E_L2_GROUP1_SIZE BIT(0) +#define MLX5E_L2_GROUP2_SIZE BIT(15) +#define MLX5E_L2_GROUP3_SIZE BIT(0) +#define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\ + MLX5E_L2_GROUP2_SIZE +\ + MLX5E_L2_GROUP3_SIZE) +static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &l2_table->ft; + int ix = 0; + u8 *mc_dmac; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_NUM_L2_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = mlx5_vzalloc(inlen); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers.dmac_47_16); + /* Flow Group for promiscuous */ MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP6_SIZE; + ix += MLX5E_L2_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destroy_groups; ft->num_groups++; - memset(in, 0, inlen); + /* Flow Group for full match */ + eth_broadcast_addr(mc_dmac); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - dmac[0] = 0x01; MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP7_SIZE; + ix += MLX5E_L2_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destroy_groups; ft->num_groups++; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - dmac[0] = 0x01; + /* Flow Group for allmulti */ + eth_zero_addr(mc_dmac); + mc_dmac[0] = 0x01; MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP8_SIZE; + ix += MLX5E_L2_GROUP3_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destroy_groups; ft->num_groups++; + kvfree(in); return 0; err_destroy_groups: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; mlx5e_destroy_groups(ft); + kvfree(in); return err; } -static int mlx5e_create_main_groups(struct mlx5e_flow_table *ft) +static void mlx5e_destroy_l2_table(struct mlx5e_priv *priv) { - u32 *in; - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - int err; - - in = mlx5_vzalloc(inlen); - if (!in) - return -ENOMEM; - - err = __mlx5e_create_main_groups(ft, in, inlen); - - kvfree(in); - return err; + mlx5e_destroy_flow_table(&priv->fs.l2.ft); } -static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +static int mlx5e_create_l2_table(struct mlx5e_priv *priv) { - struct mlx5e_flow_table *ft = &priv->fts.main; + struct mlx5e_l2_table *l2_table = &priv->fs.l2; + struct mlx5e_flow_table *ft = &l2_table->ft; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_MAIN_TABLE_SIZE); + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; return err; } - ft->g = kcalloc(MLX5E_NUM_MAIN_GROUPS, sizeof(*ft->g), GFP_KERNEL); - if (!ft->g) { - err = -ENOMEM; - goto err_destroy_main_flow_table; - } - err = mlx5e_create_main_groups(ft); + err = mlx5e_create_l2_table_groups(l2_table); if (err) - goto err_free_g; - return 0; + goto err_destroy_flow_table; -err_free_g: - kfree(ft->g); + return 0; -err_destroy_main_flow_table: +err_destroy_flow_table: mlx5_destroy_flow_table(ft->t); ft->t = NULL; return err; } -static void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) -{ - mlx5e_destroy_groups(ft); - kfree(ft->g); - mlx5_destroy_flow_table(ft->t); - ft->t = NULL; -} - -static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) -{ - mlx5e_destroy_flow_table(&priv->fts.main); -} - #define MLX5E_NUM_VLAN_GROUPS 2 #define MLX5E_VLAN_GROUP0_SIZE BIT(12) #define MLX5E_VLAN_GROUP1_SIZE BIT(1) #define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ MLX5E_VLAN_GROUP1_SIZE) -static int __mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft, u32 *in, - int inlen) +static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) { int err; int ix = 0; @@ -1128,7 +1002,7 @@ err_destroy_groups: return err; } -static int mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft) +static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) { u32 *in; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -1138,19 +1012,20 @@ static int mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft) if (!in) return -ENOMEM; - err = __mlx5e_create_vlan_groups(ft, in, inlen); + err = __mlx5e_create_vlan_table_groups(ft, in, inlen); kvfree(in); return err; } -static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) { - struct mlx5e_flow_table *ft = &priv->fts.vlan; + struct mlx5e_flow_table *ft = &priv->fs.vlan.ft; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_VLAN_TABLE_SIZE); + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); @@ -1160,65 +1035,90 @@ static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); if (!ft->g) { err = -ENOMEM; - goto err_destroy_vlan_flow_table; + goto err_destroy_vlan_table; } - err = mlx5e_create_vlan_groups(ft); + err = mlx5e_create_vlan_table_groups(ft); if (err) goto err_free_g; + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + if (err) + goto err_destroy_vlan_flow_groups; + return 0; +err_destroy_vlan_flow_groups: + mlx5e_destroy_groups(ft); err_free_g: kfree(ft->g); - -err_destroy_vlan_flow_table: +err_destroy_vlan_table: mlx5_destroy_flow_table(ft->t); ft->t = NULL; return err; } -static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) +static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) { - mlx5e_destroy_flow_table(&priv->fts.vlan); + mlx5e_destroy_flow_table(&priv->fs.vlan.ft); } -int mlx5e_create_flow_tables(struct mlx5e_priv *priv) +int mlx5e_create_flow_steering(struct mlx5e_priv *priv) { int err; - priv->fts.ns = mlx5_get_flow_namespace(priv->mdev, + priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); - if (!priv->fts.ns) + if (!priv->fs.ns) return -EINVAL; - err = mlx5e_create_vlan_flow_table(priv); - if (err) - return err; + err = mlx5e_arfs_create_tables(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", + err); + priv->netdev->hw_features &= ~NETIF_F_NTUPLE; + } - err = mlx5e_create_main_flow_table(priv); - if (err) - goto err_destroy_vlan_flow_table; + err = mlx5e_create_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - if (err) - goto err_destroy_main_flow_table; + err = mlx5e_create_l2_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create l2 table, err=%d\n", + err); + goto err_destroy_ttc_table; + } + + err = mlx5e_create_vlan_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create vlan table, err=%d\n", + err); + goto err_destroy_l2_table; + } return 0; -err_destroy_main_flow_table: - mlx5e_destroy_main_flow_table(priv); -err_destroy_vlan_flow_table: - mlx5e_destroy_vlan_flow_table(priv); +err_destroy_l2_table: + mlx5e_destroy_l2_table(priv); +err_destroy_ttc_table: + mlx5e_destroy_ttc_table(priv); +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(priv); return err; } -void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv) +void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) { mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - mlx5e_destroy_main_flow_table(priv); - mlx5e_destroy_vlan_flow_table(priv); + mlx5e_destroy_vlan_table(priv); + mlx5e_destroy_l2_table(priv); + mlx5e_destroy_ttc_table(priv); + mlx5e_arfs_destroy_tables(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e0adb604f461..4ccfc1ac62c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -48,6 +48,7 @@ struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; u16 max_inline; + bool icosq; }; struct mlx5e_cq_param { @@ -59,8 +60,10 @@ struct mlx5e_cq_param { struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; + struct mlx5e_sq_param icosq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; + struct mlx5e_cq_param icosq_cq; }; static void mlx5e_update_carrier(struct mlx5e_priv *priv) @@ -88,82 +91,15 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } -static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_pport_stats *s = &priv->stats.pport; - u32 *in; - u32 *out; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - - in = mlx5_vzalloc(sz); - out = mlx5_vzalloc(sz); - if (!in || !out) - goto free_out; - - MLX5_SET(ppcnt_reg, in, local_port, 1); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->IEEE_802_3_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->IEEE_802_3_counters)); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->RFC_2863_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->RFC_2863_counters)); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->RFC_2819_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->RFC_2819_counters)); - -free_out: - kvfree(in); - kvfree(out); -} - -void mlx5e_update_stats(struct mlx5e_priv *priv) +static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) { - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_vport_stats *s = &priv->stats.vport; + struct mlx5e_sw_stats *s = &priv->stats.sw; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; - u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; - u32 *out; - int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); - u64 tx_offload_none; + u64 tx_offload_none = 0; int i, j; - out = mlx5_vzalloc(outlen); - if (!out) - return; - - /* Collect firts the SW counters and then HW for consistency */ - s->rx_packets = 0; - s->rx_bytes = 0; - s->tx_packets = 0; - s->tx_bytes = 0; - s->tso_packets = 0; - s->tso_bytes = 0; - s->tso_inner_packets = 0; - s->tso_inner_bytes = 0; - s->tx_queue_stopped = 0; - s->tx_queue_wake = 0; - s->tx_queue_dropped = 0; - s->tx_csum_inner = 0; - tx_offload_none = 0; - s->lro_packets = 0; - s->lro_bytes = 0; - s->rx_csum_none = 0; - s->rx_csum_sw = 0; - s->rx_wqe_err = 0; + memset(s, 0, sizeof(*s)); for (i = 0; i < priv->params.num_channels; i++) { rq_stats = &priv->channel[i]->rq.stats; @@ -173,7 +109,11 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->lro_bytes += rq_stats->lro_bytes; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_sw += rq_stats->csum_sw; + s->rx_csum_inner += rq_stats->csum_inner; s->rx_wqe_err += rq_stats->wqe_err; + s->rx_mpwqe_filler += rq_stats->mpwqe_filler; + s->rx_mpwqe_frag += rq_stats->mpwqe_frag; + s->rx_buff_alloc_err += rq_stats->buff_alloc_err; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; @@ -192,7 +132,23 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) } } - /* HW counters */ + /* Update calculated offload counters */ + s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner; + s->rx_csum_good = s->rx_packets - s->rx_csum_none - + s->rx_csum_sw; + + s->link_down_events = MLX5_GET(ppcnt_reg, + priv->stats.pport.phy_counters, + counter_set.phys_layer_cntrs.link_down_events); +} + +static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 *out = (u32 *)priv->stats.vport.query_vport_out; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + struct mlx5_core_dev *mdev = priv->mdev; + memset(in, 0, sizeof(in)); MLX5_SET(query_vport_counter_in, in, opcode, @@ -202,56 +158,69 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) memset(out, 0, outlen); - if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen)) + mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); +} + +static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int prio; + void *out; + u32 *in; + + in = mlx5_vzalloc(sz); + if (!in) goto free_out; -#define MLX5_GET_CTR(p, x) \ - MLX5_GET64(query_vport_counter_out, p, x) - - s->rx_error_packets = - MLX5_GET_CTR(out, received_errors.packets); - s->rx_error_bytes = - MLX5_GET_CTR(out, received_errors.octets); - s->tx_error_packets = - MLX5_GET_CTR(out, transmit_errors.packets); - s->tx_error_bytes = - MLX5_GET_CTR(out, transmit_errors.octets); - - s->rx_unicast_packets = - MLX5_GET_CTR(out, received_eth_unicast.packets); - s->rx_unicast_bytes = - MLX5_GET_CTR(out, received_eth_unicast.octets); - s->tx_unicast_packets = - MLX5_GET_CTR(out, transmitted_eth_unicast.packets); - s->tx_unicast_bytes = - MLX5_GET_CTR(out, transmitted_eth_unicast.octets); - - s->rx_multicast_packets = - MLX5_GET_CTR(out, received_eth_multicast.packets); - s->rx_multicast_bytes = - MLX5_GET_CTR(out, received_eth_multicast.octets); - s->tx_multicast_packets = - MLX5_GET_CTR(out, transmitted_eth_multicast.packets); - s->tx_multicast_bytes = - MLX5_GET_CTR(out, transmitted_eth_multicast.octets); - - s->rx_broadcast_packets = - MLX5_GET_CTR(out, received_eth_broadcast.packets); - s->rx_broadcast_bytes = - MLX5_GET_CTR(out, received_eth_broadcast.octets); - s->tx_broadcast_packets = - MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); - s->tx_broadcast_bytes = - MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + MLX5_SET(ppcnt_reg, in, local_port, 1); - /* Update calculated offload counters */ - s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner; - s->rx_csum_good = s->rx_packets - s->rx_csum_none - - s->rx_csum_sw; + out = pstats->IEEE_802_3_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + out = pstats->RFC_2863_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + out = pstats->RFC_2819_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + out = pstats->phy_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, + MLX5_REG_PPCNT, 0, 0); + } - mlx5e_update_pport_counters(priv); free_out: - kvfree(out); + kvfree(in); +} + +static void mlx5e_update_q_counter(struct mlx5e_priv *priv) +{ + struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + + if (!priv->q_counter) + return; + + mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter, + &qcnt->rx_out_of_buffer); +} + +void mlx5e_update_stats(struct mlx5e_priv *priv) +{ + mlx5e_update_q_counter(priv); + mlx5e_update_vport_counters(priv); + mlx5e_update_pport_counters(priv); + mlx5e_update_sw_counters(priv); } static void mlx5e_update_stats_work(struct work_struct *work) @@ -310,6 +279,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 byte_count; int wq_sz; int err; int i; @@ -324,32 +294,54 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; wq_sz = mlx5_wq_ll_get_size(&rq->wq); - rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!rq->skb) { - err = -ENOMEM; - goto err_rq_wq_destroy; - } - rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : - MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->wqe_info) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; + rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + + rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE; + byte_count = rq->wqe_sz; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!rq->skb) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + rq->alloc_wqe = mlx5e_alloc_rx_wqe; + + rq->wqe_sz = (priv->params.lro_en) ? + priv->params.lro_wqe_sz : + MLX5E_SW2HW_MTU(priv->netdev->mtu); + rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); + byte_count = rq->wqe_sz; + byte_count |= MLX5_HW_START_PADDING; + } for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); - u32 byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; - wqe->data.lkey = c->mkey_be; - wqe->data.byte_count = - cpu_to_be32(byte_count | MLX5_HW_START_PADDING); + wqe->data.byte_count = cpu_to_be32(byte_count); } + rq->wq_type = priv->params.rq_wq_type; rq->pdev = c->pdev; rq->netdev = c->netdev; rq->tstamp = &priv->tstamp; rq->channel = c; rq->ix = c->ix; rq->priv = c->priv; + rq->mkey_be = c->mkey_be; + rq->umr_mkey_be = cpu_to_be32(c->priv->umr_mkey.key); return 0; @@ -361,7 +353,14 @@ err_rq_wq_destroy: static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { - kfree(rq->skb); + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + kfree(rq->wqe_info); + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + kfree(rq->skb); + } + mlx5_wq_destroy(&rq->wq_ctrl); } @@ -390,6 +389,7 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); + MLX5_SET(rqc, rqc, vsd, priv->params.vlan_strip_disable); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); @@ -404,7 +404,8 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) return err; } -static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) +static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, + int next_state) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; @@ -432,6 +433,36 @@ static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) return err; } +static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); + MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_RQ_BITMASK_VSD); + MLX5_SET(rqc, rqc, vsd, vsd); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + static void mlx5e_disable_rq(struct mlx5e_rq *rq) { mlx5_core_destroy_rq(rq->priv->mdev, rq->rqn); @@ -458,6 +489,8 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { + struct mlx5e_sq *sq = &c->icosq; + u16 pi = sq->pc & sq->wq.sz_m1; int err; err = mlx5e_create_rq(c, param, rq); @@ -468,12 +501,15 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (err) goto err_destroy_rq; - err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) goto err_disable_rq; set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); - mlx5e_send_nop(&c->sq[0], true); /* trigger mlx5e_post_rx_wqes() */ + + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; + sq->ico_wqe_info[pi].num_wqebbs = 1; + mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ return 0; @@ -490,7 +526,7 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ - mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); + mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); while (!mlx5_wq_ll_is_empty(&rq->wq)) msleep(20); @@ -539,7 +575,6 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); - int txq_ix; int err; err = mlx5_alloc_map_uar(mdev, &sq->uar, true); @@ -567,8 +602,24 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - txq_ix = c->ix + tc * priv->params.num_channels; - sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); + if (param->icosq) { + u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) * + wq_sz, + GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!sq->ico_wqe_info) { + err = -ENOMEM; + goto err_free_sq_db; + } + } else { + int txq_ix; + + txq_ix = c->ix + tc * priv->params.num_channels; + sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); + priv->txq_to_sq_map[txq_ix] = sq; + } sq->pdev = c->pdev; sq->tstamp = &priv->tstamp; @@ -577,10 +628,12 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, sq->tc = tc; sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; sq->bf_budget = MLX5E_SQ_BF_BUDGET; - priv->txq_to_sq_map[txq_ix] = sq; return 0; +err_free_sq_db: + mlx5e_free_sq_db(sq); + err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -595,6 +648,7 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; + kfree(sq->ico_wqe_info); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(priv->mdev, &sq->uar); @@ -623,10 +677,10 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) memcpy(sqc, param->sqc, sizeof(param->sqc)); - MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]); - MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn); + MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, 1); + MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); @@ -701,9 +755,11 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, if (err) goto err_disable_sq; - set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); - netdev_tx_reset_queue(sq->txq); - netif_tx_start_queue(sq->txq); + if (sq->txq) { + set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); + } return 0; @@ -724,15 +780,19 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_close_sq(struct mlx5e_sq *sq) { - clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); - napi_synchronize(&sq->channel->napi); /* prevent netif_tx_wake_queue */ - netif_tx_disable_queue(sq->txq); + if (sq->txq) { + clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + /* prevent netif_tx_wake_queue */ + napi_synchronize(&sq->channel->napi); + netif_tx_disable_queue(sq->txq); - /* ensure hw is notified of all pending wqes */ - if (mlx5e_sq_has_room_for(sq, 1)) - mlx5e_send_nop(sq, true); + /* ensure hw is notified of all pending wqes */ + if (mlx5e_sq_has_room_for(sq, 1)) + mlx5e_send_nop(sq, true); + + mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); + } - mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); while (sq->cc != sq->pc) /* wait till sq is empty */ msleep(20); @@ -986,10 +1046,14 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); - err = mlx5e_open_tx_cqs(c, cparam); + err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, 0, 0); if (err) goto err_napi_del; + err = mlx5e_open_tx_cqs(c, cparam); + if (err) + goto err_close_icosq_cq; + err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation_pkts); @@ -998,10 +1062,14 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, napi_enable(&c->napi); - err = mlx5e_open_sqs(c, cparam); + err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq); if (err) goto err_disable_napi; + err = mlx5e_open_sqs(c, cparam); + if (err) + goto err_close_icosq; + err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_sqs; @@ -1014,6 +1082,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, err_close_sqs: mlx5e_close_sqs(c); +err_close_icosq: + mlx5e_close_sq(&c->icosq); + err_disable_napi: napi_disable(&c->napi); mlx5e_close_cq(&c->rq.cq); @@ -1021,6 +1092,9 @@ err_disable_napi: err_close_tx_cqs: mlx5e_close_tx_cqs(c); +err_close_icosq_cq: + mlx5e_close_cq(&c->icosq.cq); + err_napi_del: netif_napi_del(&c->napi); napi_hash_del(&c->napi); @@ -1033,9 +1107,11 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); + mlx5e_close_sq(&c->icosq); napi_disable(&c->napi); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); + mlx5e_close_cq(&c->icosq.cq); netif_napi_del(&c->napi); napi_hash_del(&c->napi); @@ -1050,11 +1126,23 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + MLX5_SET(wq, wq, log_wqe_num_of_strides, + MLX5_MPWRQ_LOG_NUM_STRIDES - 9); + MLX5_SET(wq, wq, log_wqe_stride_size, + MLX5_MPWRQ_LOG_STRIDE_SIZE - 6); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + } + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); MLX5_SET(wq, wq, pd, priv->pdn); + MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); param->wq.linear = 1; @@ -1069,17 +1157,27 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); } -static void mlx5e_build_sq_param(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param) +static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, priv->pdn); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); +} + +static void mlx5e_build_sq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + param->max_inline = priv->params.tx_max_inline; } @@ -1095,8 +1193,18 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; + u8 log_cq_size; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + log_cq_size = priv->params.log_rq_size + + MLX5_MPWRQ_LOG_NUM_STRIDES; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + log_cq_size = priv->params.log_rq_size; + } + + MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); mlx5e_build_common_cq_param(priv, param); } @@ -1106,25 +1214,52 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + + mlx5e_build_common_cq_param(priv, param); +} + +static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param, + u8 log_wq_size) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); mlx5e_build_common_cq_param(priv, param); } -static void mlx5e_build_channel_param(struct mlx5e_priv *priv, - struct mlx5e_channel_param *cparam) +static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param, + u8 log_wq_size) { - memset(cparam, 0, sizeof(*cparam)); + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); + + param->icosq = true; +} + +static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) +{ + u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, &cparam->icosq_cq, icosq_log_wq_sz); } static int mlx5e_open_channels(struct mlx5e_priv *priv) { - struct mlx5e_channel_param cparam; + struct mlx5e_channel_param *cparam; int nch = priv->params.num_channels; int err = -ENOMEM; int i; @@ -1136,12 +1271,15 @@ static int mlx5e_open_channels(struct mlx5e_priv *priv) priv->txq_to_sq_map = kcalloc(nch * priv->params.num_tc, sizeof(struct mlx5e_sq *), GFP_KERNEL); - if (!priv->channel || !priv->txq_to_sq_map) + cparam = kzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); + + if (!priv->channel || !priv->txq_to_sq_map || !cparam) goto err_free_txq_to_sq_map; - mlx5e_build_channel_param(priv, &cparam); + mlx5e_build_channel_param(priv, cparam); + for (i = 0; i < nch; i++) { - err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]); + err = mlx5e_open_channel(priv, i, cparam, &priv->channel[i]); if (err) goto err_close_channels; } @@ -1152,6 +1290,7 @@ static int mlx5e_open_channels(struct mlx5e_priv *priv) goto err_close_channels; } + kfree(cparam); return 0; err_close_channels: @@ -1161,6 +1300,7 @@ err_close_channels: err_free_txq_to_sq_map: kfree(priv->txq_to_sq_map); kfree(priv->channel); + kfree(cparam); return err; } @@ -1200,48 +1340,36 @@ static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc) for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) { int ix = i; + u32 rqn; if (priv->params.rss_hfunc == ETH_RSS_HASH_XOR) ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE); ix = priv->params.indirection_rqt[ix]; - MLX5_SET(rqtc, rqtc, rq_num[i], - test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[ix]->rq.rqn : - priv->drop_rq.rqn); + rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? + priv->channel[ix]->rq.rqn : + priv->drop_rq.rqn; + MLX5_SET(rqtc, rqtc, rq_num[i], rqn); } } -static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, void *rqtc, - enum mlx5e_rqt_ix rqt_ix) +static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc, + int ix) { + u32 rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? + priv->channel[ix]->rq.rqn : + priv->drop_rq.rqn; - switch (rqt_ix) { - case MLX5E_INDIRECTION_RQT: - mlx5e_fill_indir_rqt_rqns(priv, rqtc); - - break; - - default: /* MLX5E_SINGLE_RQ_RQT */ - MLX5_SET(rqtc, rqtc, rq_num[0], - test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[0]->rq.rqn : - priv->drop_rq.rqn); - - break; - } + MLX5_SET(rqtc, rqtc, rq_num[0], rqn); } -static int mlx5e_create_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) +static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) { struct mlx5_core_dev *mdev = priv->mdev; - u32 *in; void *rqtc; int inlen; - int sz; int err; - - sz = (rqt_ix == MLX5E_SINGLE_RQ_RQT) ? 1 : MLX5E_INDIR_RQT_SIZE; + u32 *in; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); @@ -1253,26 +1381,73 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - mlx5e_fill_rqt_rqns(priv, rqtc, rqt_ix); + if (sz > 1) /* RSS */ + mlx5e_fill_indir_rqt_rqns(priv, rqtc); + else + mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); - err = mlx5_core_create_rqt(mdev, in, inlen, &priv->rqtn[rqt_ix]); + err = mlx5_core_create_rqt(mdev, in, inlen, rqtn); kvfree(in); + return err; +} + +static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, u32 rqtn) +{ + mlx5_core_destroy_rqt(priv->mdev, rqtn); +} + +static int mlx5e_create_rqts(struct mlx5e_priv *priv) +{ + int nch = mlx5e_get_max_num_channels(priv->mdev); + u32 *rqtn; + int err; + int ix; + + /* Indirect RQT */ + rqtn = &priv->indir_rqtn; + err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqtn); + if (err) + return err; + + /* Direct RQTs */ + for (ix = 0; ix < nch; ix++) { + rqtn = &priv->direct_tir[ix].rqtn; + err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqtn); + if (err) + goto err_destroy_rqts; + } + + return 0; + +err_destroy_rqts: + for (ix--; ix >= 0; ix--) + mlx5e_destroy_rqt(priv, priv->direct_tir[ix].rqtn); + + mlx5e_destroy_rqt(priv, priv->indir_rqtn); return err; } -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) +static void mlx5e_destroy_rqts(struct mlx5e_priv *priv) +{ + int nch = mlx5e_get_max_num_channels(priv->mdev); + int i; + + for (i = 0; i < nch; i++) + mlx5e_destroy_rqt(priv, priv->direct_tir[i].rqtn); + + mlx5e_destroy_rqt(priv, priv->indir_rqtn); +} + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) { struct mlx5_core_dev *mdev = priv->mdev; - u32 *in; void *rqtc; int inlen; - int sz; + u32 *in; int err; - sz = (rqt_ix == MLX5E_SINGLE_RQ_RQT) ? 1 : MLX5E_INDIR_RQT_SIZE; - inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); if (!in) @@ -1281,27 +1456,31 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - - mlx5e_fill_rqt_rqns(priv, rqtc, rqt_ix); + if (sz > 1) /* RSS */ + mlx5e_fill_indir_rqt_rqns(priv, rqtc); + else + mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); - err = mlx5_core_modify_rqt(mdev, priv->rqtn[rqt_ix], in, inlen); + err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen); kvfree(in); return err; } -static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) -{ - mlx5_core_destroy_rqt(priv->mdev, priv->rqtn[rqt_ix]); -} - static void mlx5e_redirect_rqts(struct mlx5e_priv *priv) { - mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT); - mlx5e_redirect_rqt(priv, MLX5E_SINGLE_RQ_RQT); + u32 rqtn; + int ix; + + rqtn = priv->indir_rqtn; + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + for (ix = 0; ix < priv->params.num_channels; ix++) { + rqtn = priv->direct_tir[ix].rqtn; + mlx5e_redirect_rqt(priv, rqtn, 1, ix); + } } static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) @@ -1346,6 +1525,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) int inlen; int err; int tt; + int ix; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); @@ -1357,23 +1537,32 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) mlx5e_build_tir_ctx_lro(tirc, priv); - for (tt = 0; tt < MLX5E_NUM_TT; tt++) { - err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5_core_modify_tir(mdev, priv->indir_tirn[tt], in, + inlen); + if (err) + goto free_in; + } + + for (ix = 0; ix < mlx5e_get_max_num_channels(mdev); ix++) { + err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, + in, inlen); if (err) - break; + goto free_in; } +free_in: kvfree(in); return err; } -static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev, - u32 tirn) +static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) { void *in; int inlen; int err; + int i; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); @@ -1382,46 +1571,70 @@ static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev, MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); - err = mlx5_core_modify_tir(mdev, tirn, in, inlen); + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { + err = mlx5_core_modify_tir(priv->mdev, priv->indir_tirn[i], in, + inlen); + if (err) + return err; + } + + for (i = 0; i < priv->params.num_channels; i++) { + err = mlx5_core_modify_tir(priv->mdev, + priv->direct_tir[i].tirn, in, + inlen); + if (err) + return err; + } kvfree(in); - return err; + return 0; } -static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) +static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { + struct mlx5_core_dev *mdev = priv->mdev; + u16 hw_mtu = MLX5E_SW2HW_MTU(mtu); int err; - int i; - for (i = 0; i < MLX5E_NUM_TT; i++) { - err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev, - priv->tirn[i]); - if (err) - return err; - } + err = mlx5_set_port_mtu(mdev, hw_mtu, 1); + if (err) + return err; + /* Update vport context MTU */ + mlx5_modify_nic_vport_mtu(mdev, hw_mtu); return 0; } +static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u16 hw_mtu = 0; + int err; + + err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); + if (err || !hw_mtu) /* fallback to port oper mtu */ + mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); + + *mtu = MLX5E_HW2SW_MTU(hw_mtu); +} + static int mlx5e_set_dev_port_mtu(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; - int hw_mtu; + u16 mtu; int err; - err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(netdev->mtu), 1); + err = mlx5e_set_mtu(priv, netdev->mtu); if (err) return err; - mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); - - if (MLX5E_HW2SW_MTU(hw_mtu) != netdev->mtu) - netdev_warn(netdev, "%s: Port MTU %d is different than netdev mtu %d\n", - __func__, MLX5E_HW2SW_MTU(hw_mtu), netdev->mtu); + mlx5e_query_mtu(priv, &mtu); + if (mtu != netdev->mtu) + netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n", + __func__, mtu, netdev->mtu); - netdev->mtu = MLX5E_HW2SW_MTU(hw_mtu); + netdev->mtu = mtu; return 0; } @@ -1478,6 +1691,9 @@ int mlx5e_open_locked(struct net_device *netdev) mlx5e_redirect_rqts(priv); mlx5e_update_carrier(priv); mlx5e_timestamp_init(priv); +#ifdef CONFIG_RFS_ACCEL + priv->netdev->rx_cpu_rmap = priv->mdev->rmap; +#endif schedule_delayed_work(&priv->update_stats_work, 0); @@ -1685,7 +1901,8 @@ static void mlx5e_destroy_tises(struct mlx5e_priv *priv) mlx5e_destroy_tis(priv, tc); } -static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) +static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, + enum mlx5e_traffic_types tt) { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); @@ -1706,19 +1923,8 @@ static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) mlx5e_build_tir_ctx_lro(tirc, priv); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - - switch (tt) { - case MLX5E_TT_ANY: - MLX5_SET(tirc, tirc, indirect_table, - priv->rqtn[MLX5E_SINGLE_RQ_RQT]); - MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); - break; - default: - MLX5_SET(tirc, tirc, indirect_table, - priv->rqtn[MLX5E_INDIRECTION_RQT]); - mlx5e_build_tir_ctx_hash(tirc, priv); - break; - } + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqtn); + mlx5e_build_tir_ctx_hash(tirc, priv); switch (tt) { case MLX5E_TT_IPV4_TCP: @@ -1798,64 +2004,107 @@ static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); break; + default: + WARN_ONCE(true, + "mlx5e_build_indir_tir_ctx: bad traffic type!\n"); } } -static int mlx5e_create_tir(struct mlx5e_priv *priv, int tt) +static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, + u32 rqtn) { - struct mlx5_core_dev *mdev = priv->mdev; - u32 *in; + MLX5_SET(tirc, tirc, transport_domain, priv->tdn); + + mlx5e_build_tir_ctx_lro(tirc, priv); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, rqtn); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); +} + +static int mlx5e_create_tirs(struct mlx5e_priv *priv) +{ + int nch = mlx5e_get_max_num_channels(priv->mdev); void *tirc; int inlen; + u32 *tirn; int err; + u32 *in; + int ix; + int tt; inlen = MLX5_ST_SZ_BYTES(create_tir_in); in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + /* indirect tirs */ + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(in, 0, inlen); + tirn = &priv->indir_tirn[tt]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_indir_tir_ctx(priv, tirc, tt); + err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn); + if (err) + goto err_destroy_tirs; + } - mlx5e_build_tir_ctx(priv, tirc, tt); + /* direct tirs */ + for (ix = 0; ix < nch; ix++) { + memset(in, 0, inlen); + tirn = &priv->direct_tir[ix].tirn; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_direct_tir_ctx(priv, tirc, + priv->direct_tir[ix].rqtn); + err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn); + if (err) + goto err_destroy_ch_tirs; + } - err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]); + kvfree(in); + + return 0; + +err_destroy_ch_tirs: + for (ix--; ix >= 0; ix--) + mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[ix].tirn); + +err_destroy_tirs: + for (tt--; tt >= 0; tt--) + mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[tt]); kvfree(in); return err; } -static void mlx5e_destroy_tir(struct mlx5e_priv *priv, int tt) +static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) { - mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]); + int nch = mlx5e_get_max_num_channels(priv->mdev); + int i; + + for (i = 0; i < nch; i++) + mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[i].tirn); + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[i]); } -static int mlx5e_create_tirs(struct mlx5e_priv *priv) +int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) { - int err; + int err = 0; int i; - for (i = 0; i < MLX5E_NUM_TT; i++) { - err = mlx5e_create_tir(priv, i); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + for (i = 0; i < priv->params.num_channels; i++) { + err = mlx5e_modify_rq_vsd(&priv->channel[i]->rq, vsd); if (err) - goto err_destroy_tirs; + return err; } return 0; - -err_destroy_tirs: - for (i--; i >= 0; i--) - mlx5e_destroy_tir(priv, i); - - return err; -} - -static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) -{ - int i; - - for (i = 0; i < MLX5E_NUM_TT; i++) - mlx5e_destroy_tir(priv, i); } static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) @@ -1914,19 +2163,37 @@ static struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_sw_stats *sstats = &priv->stats.sw; struct mlx5e_vport_stats *vstats = &priv->stats.vport; - - stats->rx_packets = vstats->rx_packets; - stats->rx_bytes = vstats->rx_bytes; - stats->tx_packets = vstats->tx_packets; - stats->tx_bytes = vstats->tx_bytes; - stats->multicast = vstats->rx_multicast_packets + - vstats->tx_multicast_packets; - stats->tx_errors = vstats->tx_error_packets; - stats->rx_errors = vstats->rx_error_packets; - stats->tx_dropped = vstats->tx_queue_dropped; - stats->rx_crc_errors = 0; - stats->rx_length_errors = 0; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + + stats->rx_packets = sstats->rx_packets; + stats->rx_bytes = sstats->rx_bytes; + stats->tx_packets = sstats->tx_packets; + stats->tx_bytes = sstats->tx_bytes; + + stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; + stats->tx_dropped = sstats->tx_queue_dropped; + + stats->rx_length_errors = + PPORT_802_3_GET(pstats, a_in_range_length_errors) + + PPORT_802_3_GET(pstats, a_out_of_range_length_field) + + PPORT_802_3_GET(pstats, a_frame_too_long_errors); + stats->rx_crc_errors = + PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); + stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors); + stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards); + stats->tx_carrier_errors = + PPORT_802_3_GET(pstats, a_symbol_error_during_carrier); + stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + + stats->rx_frame_errors; + stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; + + /* vport multicast also counts packets that are dropped due to steering + * or rx out of buffer + */ + stats->multicast = + VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); return stats; } @@ -1955,66 +2222,175 @@ static int mlx5e_set_mac(struct net_device *netdev, void *addr) return 0; } -static int mlx5e_set_features(struct net_device *netdev, - netdev_features_t features) +#define MLX5E_SET_FEATURE(netdev, feature, enable) \ + do { \ + if (enable) \ + netdev->features |= feature; \ + else \ + netdev->features &= ~feature; \ + } while (0) + +typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); + +static int set_feature_lro(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); - int err = 0; - netdev_features_t changes = features ^ netdev->features; + bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + int err; mutex_lock(&priv->state_lock); - if (changes & NETIF_F_LRO) { - bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - - if (was_opened) - mlx5e_close_locked(priv->netdev); - - priv->params.lro_en = !!(features & NETIF_F_LRO); - err = mlx5e_modify_tirs_lro(priv); - if (err) - mlx5_core_warn(priv->mdev, "lro modify failed, %d\n", - err); + if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) + mlx5e_close_locked(priv->netdev); - if (was_opened) - err = mlx5e_open_locked(priv->netdev); + priv->params.lro_en = enable; + err = mlx5e_modify_tirs_lro(priv); + if (err) { + netdev_err(netdev, "lro modify failed, %d\n", err); + priv->params.lro_en = !enable; } + if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) + mlx5e_open_locked(priv->netdev); + mutex_unlock(&priv->state_lock); - if (changes & NETIF_F_HW_VLAN_CTAG_FILTER) { - if (features & NETIF_F_HW_VLAN_CTAG_FILTER) - mlx5e_enable_vlan_filter(priv); - else - mlx5e_disable_vlan_filter(priv); - } + return err; +} - if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) && - mlx5e_tc_num_filters(priv)) { +static int set_feature_vlan_filter(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (enable) + mlx5e_enable_vlan_filter(priv); + else + mlx5e_disable_vlan_filter(priv); + + return 0; +} + +static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (!enable && mlx5e_tc_num_filters(priv)) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; } + return 0; +} + +static int set_feature_rx_all(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_set_port_fcs(mdev, !enable); +} + +static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + + priv->params.vlan_strip_disable = !enable; + err = mlx5e_modify_rqs_vsd(priv, !enable); + if (err) + priv->params.vlan_strip_disable = enable; + + mutex_unlock(&priv->state_lock); + + return err; +} + +#ifdef CONFIG_RFS_ACCEL +static int set_feature_arfs(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (enable) + err = mlx5e_arfs_enable(priv); + else + err = mlx5e_arfs_disable(priv); + return err; } +#endif + +static int mlx5e_handle_feature(struct net_device *netdev, + netdev_features_t wanted_features, + netdev_features_t feature, + mlx5e_feature_handler feature_handler) +{ + netdev_features_t changes = wanted_features ^ netdev->features; + bool enable = !!(wanted_features & feature); + int err; + + if (!(changes & feature)) + return 0; + + err = feature_handler(netdev, enable); + if (err) { + netdev_err(netdev, "%s feature 0x%llx failed err %d\n", + enable ? "Enable" : "Disable", feature, err); + return err; + } + + MLX5E_SET_FEATURE(netdev, feature, enable); + return 0; +} + +static int mlx5e_set_features(struct net_device *netdev, + netdev_features_t features) +{ + int err; + + err = mlx5e_handle_feature(netdev, features, NETIF_F_LRO, + set_feature_lro); + err |= mlx5e_handle_feature(netdev, features, + NETIF_F_HW_VLAN_CTAG_FILTER, + set_feature_vlan_filter); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_TC, + set_feature_tc_num_filters); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, + set_feature_rx_all); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_RX, + set_feature_rx_vlan); +#ifdef CONFIG_RFS_ACCEL + err |= mlx5e_handle_feature(netdev, features, NETIF_F_NTUPLE, + set_feature_arfs); +#endif + + return err ? -EINVAL : 0; +} + +#define MXL5_HW_MIN_MTU 64 +#define MXL5E_MIN_MTU (MXL5_HW_MIN_MTU + ETH_FCS_LEN) static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; bool was_opened; - int max_mtu; + u16 max_mtu; + u16 min_mtu; int err = 0; mlx5_query_port_max_mtu(mdev, &max_mtu, 1); max_mtu = MLX5E_HW2SW_MTU(max_mtu); + min_mtu = MLX5E_HW2SW_MTU(MXL5E_MIN_MTU); - if (new_mtu > max_mtu) { + if (new_mtu > max_mtu || new_mtu < min_mtu) { netdev_err(netdev, - "%s: Bad MTU (%d) > (%d) Max\n", - __func__, new_mtu, max_mtu); + "%s: Bad MTU (%d), valid range is: [%d..%d]\n", + __func__, new_mtu, min_mtu, max_mtu); return -EINVAL; } @@ -2205,6 +2581,9 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_set_features = mlx5e_set_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = mlx5e_rx_flow_steer, +#endif }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -2224,6 +2603,9 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_add_vxlan_port = mlx5e_add_vxlan_port, .ndo_del_vxlan_port = mlx5e_del_vxlan_port, .ndo_features_check = mlx5e_features_check, +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = mlx5e_rx_flow_steer, +#endif .ndo_set_vf_mac = mlx5e_set_vf_mac, .ndo_set_vf_vlan = mlx5e_set_vf_vlan, .ndo_get_vf_config = mlx5e_get_vf_config, @@ -2283,15 +2665,33 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) } #endif -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, + u32 *indirection_rqt, int len, int num_channels) { + int node = mdev->priv.numa_node; + int node_num_of_cores; int i; + if (node == -1) + node = first_online_node; + + node_num_of_cores = cpumask_weight(cpumask_of_node(node)); + + if (node_num_of_cores) + num_channels = min_t(int, num_channels, node_num_of_cores); + for (i = 0; i < len; i++) indirection_rqt[i] = i % num_channels; } +static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, striding_rq) && + MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); +} + static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, int num_channels) @@ -2300,8 +2700,21 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.log_rq_size = - MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; + + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.lro_en = true; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + } + + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); priv->params.rx_cq_moderation_usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; priv->params.rx_cq_moderation_pkts = @@ -2311,15 +2724,13 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.tx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - priv->params.min_rx_wqes = - MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; priv->params.num_tc = 1; priv->params.rss_hfunc = ETH_RSS_HASH_XOR; netdev_rss_key_fill(priv->params.toeplitz_hash_key, sizeof(priv->params.toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, num_channels); priv->params.lro_wqe_sz = @@ -2356,6 +2767,8 @@ static void mlx5e_build_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + bool fcs_supported; + bool fcs_enabled; SET_NETDEV_DEV(netdev, &mdev->pdev->dev); @@ -2399,16 +2812,28 @@ static void mlx5e_build_netdev(struct net_device *netdev) netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; } + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); + + if (fcs_supported) + netdev->hw_features |= NETIF_F_RXALL; + netdev->features = netdev->hw_features; if (!priv->params.lro_en) netdev->features &= ~NETIF_F_LRO; + if (fcs_enabled) + netdev->features &= ~NETIF_F_RXALL; + #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) if (FT_CAP(flow_modify_en) && FT_CAP(modify_root) && FT_CAP(identified_miss_table_mode) && - FT_CAP(flow_table_modify)) - priv->netdev->hw_features |= NETIF_F_HW_TC; + FT_CAP(flow_table_modify)) { + netdev->hw_features |= NETIF_F_HW_TC; +#ifdef CONFIG_RFS_ACCEL + netdev->hw_features |= NETIF_F_NTUPLE; +#endif + } netdev->features |= NETIF_F_HIGHDMA; @@ -2442,6 +2867,61 @@ static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, return err; } +static void mlx5e_create_q_counter(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_core_alloc_q_counter(mdev, &priv->q_counter); + if (err) { + mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err); + priv->q_counter = 0; + } +} + +static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv) +{ + if (!priv->q_counter) + return; + + mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter); +} + +static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_mkey_seg *mkc; + int inlen = sizeof(*in); + u64 npages = + mlx5e_get_max_num_channels(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + mkc = &in->seg; + mkc->status = MLX5_MKEY_STATUS_FREE; + mkc->flags = MLX5_PERM_UMR_EN | + MLX5_PERM_LOCAL_READ | + MLX5_PERM_LOCAL_WRITE | + MLX5_ACCESS_MODE_MTT; + + mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + mkc->flags_pd = cpu_to_be32(priv->pdn); + mkc->len = cpu_to_be64(npages << PAGE_SHIFT); + mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages)); + mkc->log2_page_size = PAGE_SHIFT; + + err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL, + NULL, NULL); + + kvfree(in); + + return err; +} + static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) { struct net_device *netdev; @@ -2491,10 +2971,16 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_dealloc_transport_domain; } + err = mlx5e_create_umr_mkey(priv); + if (err) { + mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); + goto err_destroy_mkey; + } + err = mlx5e_create_tises(priv); if (err) { mlx5_core_warn(mdev, "create tises failed, %d\n", err); - goto err_destroy_mkey; + goto err_destroy_umr_mkey; } err = mlx5e_open_drop_rq(priv); @@ -2503,37 +2989,33 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_destroy_tises; } - err = mlx5e_create_rqt(priv, MLX5E_INDIRECTION_RQT); + err = mlx5e_create_rqts(priv); if (err) { - mlx5_core_warn(mdev, "create rqt(INDIR) failed, %d\n", err); + mlx5_core_warn(mdev, "create rqts failed, %d\n", err); goto err_close_drop_rq; } - err = mlx5e_create_rqt(priv, MLX5E_SINGLE_RQ_RQT); - if (err) { - mlx5_core_warn(mdev, "create rqt(SINGLE) failed, %d\n", err); - goto err_destroy_rqt_indir; - } - err = mlx5e_create_tirs(priv); if (err) { mlx5_core_warn(mdev, "create tirs failed, %d\n", err); - goto err_destroy_rqt_single; + goto err_destroy_rqts; } - err = mlx5e_create_flow_tables(priv); + err = mlx5e_create_flow_steering(priv); if (err) { - mlx5_core_warn(mdev, "create flow tables failed, %d\n", err); + mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); goto err_destroy_tirs; } - mlx5e_init_eth_addr(priv); + mlx5e_create_q_counter(priv); + + mlx5e_init_l2_addr(priv); mlx5e_vxlan_init(priv); err = mlx5e_tc_init(priv); if (err) - goto err_destroy_flow_tables; + goto err_dealloc_q_counters; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); @@ -2545,8 +3027,11 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_tc_cleanup; } - if (mlx5e_vxlan_allowed(mdev)) + if (mlx5e_vxlan_allowed(mdev)) { + rtnl_lock(); vxlan_get_rx_port(netdev); + rtnl_unlock(); + } mlx5e_enable_async_events(priv); schedule_work(&priv->set_rx_mode_work); @@ -2556,17 +3041,15 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) err_tc_cleanup: mlx5e_tc_cleanup(priv); -err_destroy_flow_tables: - mlx5e_destroy_flow_tables(priv); +err_dealloc_q_counters: + mlx5e_destroy_q_counter(priv); + mlx5e_destroy_flow_steering(priv); err_destroy_tirs: mlx5e_destroy_tirs(priv); -err_destroy_rqt_single: - mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT); - -err_destroy_rqt_indir: - mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); +err_destroy_rqts: + mlx5e_destroy_rqts(priv); err_close_drop_rq: mlx5e_close_drop_rq(priv); @@ -2574,6 +3057,9 @@ err_close_drop_rq: err_destroy_tises: mlx5e_destroy_tises(priv); +err_destroy_umr_mkey: + mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); + err_destroy_mkey: mlx5_core_destroy_mkey(mdev, &priv->mkey); @@ -2602,20 +3088,32 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) schedule_work(&priv->set_rx_mode_work); mlx5e_disable_async_events(priv); flush_scheduled_work(); - unregister_netdev(netdev); + if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { + netif_device_detach(netdev); + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_close_locked(netdev); + mutex_unlock(&priv->state_lock); + } else { + unregister_netdev(netdev); + } + mlx5e_tc_cleanup(priv); mlx5e_vxlan_cleanup(priv); - mlx5e_destroy_flow_tables(priv); + mlx5e_destroy_q_counter(priv); + mlx5e_destroy_flow_steering(priv); mlx5e_destroy_tirs(priv); - mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT); - mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); + mlx5e_destroy_rqts(priv); mlx5e_close_drop_rq(priv); mlx5e_destroy_tises(priv); + mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); mlx5_core_destroy_mkey(priv->mdev, &priv->mkey); mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); - free_netdev(netdev); + + if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) + free_netdev(netdev); } static void *mlx5e_get_netdev(void *vpriv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 58d4e2f962c3..23adfe2fcba9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -42,13 +42,12 @@ static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL; } -static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, u16 ix) +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { struct sk_buff *skb; dma_addr_t dma_addr; - skb = netdev_alloc_skb(rq->netdev, rq->wqe_sz); + skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz); if (unlikely(!skb)) return -ENOMEM; @@ -62,10 +61,9 @@ static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) goto err_free_skb; - skb_reserve(skb, MLX5E_NET_IP_ALIGN); - *((dma_addr_t *)skb->cb) = dma_addr; - wqe->data.addr = cpu_to_be64(dma_addr + MLX5E_NET_IP_ALIGN); + wqe->data.addr = cpu_to_be64(dma_addr); + wqe->data.lkey = rq->mkey_be; rq->skb[ix] = skb; @@ -77,18 +75,386 @@ err_free_skb: return -ENOMEM; } +static inline void +mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len) +{ + dma_sync_single_for_cpu(pdev, wi->dma_info.addr + wqe_offset, + len, DMA_FROM_DEVICE); +} + +static inline void +mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len) +{ + /* No dma pre sync for fragmented MPWQE */ +} + +static inline void +mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) +{ + unsigned int truesize = ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE); + + wi->skbs_frags[page_idx]++; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + &wi->dma_info.page[page_idx], frag_offset, + len, truesize); +} + +static inline void +mlx5e_add_skb_frag_fragmented_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) +{ + unsigned int truesize = ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE); + + dma_sync_single_for_cpu(pdev, + wi->umr.dma_info[page_idx].addr + frag_offset, + len, DMA_FROM_DEVICE); + wi->skbs_frags[page_idx]++; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + wi->umr.dma_info[page_idx].page, frag_offset, + len, truesize); +} + +static inline void +mlx5e_copy_skb_header_linear_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) +{ + struct page *page = &wi->dma_info.page[page_idx]; + + skb_copy_to_linear_data(skb, page_address(page) + offset, + ALIGN(headlen, sizeof(long))); +} + +static inline void +mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) +{ + u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx]; + unsigned int len; + + /* Aligning len to sizeof(long) optimizes memcpy performance */ + len = ALIGN(headlen_pg, sizeof(long)); + dma_sync_single_for_cpu(pdev, dma_info->addr + offset, len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data_offset(skb, 0, + page_address(dma_info->page) + offset, + len); +#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE) + if (unlikely(offset + headlen > PAGE_SIZE)) { + dma_info++; + headlen_pg = len; + len = ALIGN(headlen - headlen_pg, sizeof(long)); + dma_sync_single_for_cpu(pdev, dma_info->addr, len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data_offset(skb, headlen_pg, + page_address(dma_info->page), + len); + } +#endif +} + +static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix) +{ + return rq_ix * MLX5_CHANNEL_MAX_NUM_MTTS + + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); +} + +static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, + struct mlx5e_sq *sq, + struct mlx5e_umr_wqe *wqe, + u16 ix) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + struct mlx5_wqe_data_seg *dseg = &wqe->data; + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); + u16 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix); + + memset(wqe, 0, sizeof(*wqe)); + cseg->opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->imm = rq->umr_mkey_be; + + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; + ucseg->klm_octowords = + cpu_to_be16(mlx5e_get_mtt_octw(MLX5_MPWRQ_PAGES_PER_WQE)); + ucseg->bsf_octowords = + cpu_to_be16(mlx5e_get_mtt_octw(umr_wqe_mtt_offset)); + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + + dseg->lkey = sq->mkey_be; + dseg->addr = cpu_to_be64(wi->umr.mtt_addr); +} + +static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_sq *sq = &rq->channel->icosq; + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_umr_wqe *wqe; + u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB); + u16 pi; + + /* fill sq edge with nops to avoid wqe wrap around */ + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; + sq->ico_wqe_info[pi].num_wqebbs = 1; + mlx5e_send_nop(sq, true); + } + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + mlx5e_build_umr_wqe(rq, sq, wqe, ix); + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; + sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; + sq->pc += num_wqebbs; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); +} + +static inline int mlx5e_get_wqe_mtt_sz(void) +{ + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the mtt array, we allocate + * a little more. + */ + return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), + MLX5_UMR_MTT_ALIGNMENT); +} + +static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + int i) +{ + struct page *page; + + page = dev_alloc_page(); + if (unlikely(!page)) + return -ENOMEM; + + wi->umr.dma_info[i].page = page; + wi->umr.dma_info[i].addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->umr.dma_info[i].addr))) { + put_page(page); + return -ENOMEM; + } + wi->umr.mtt[i] = cpu_to_be64(wi->umr.dma_info[i].addr | MLX5_EN_WR); + + return 0; +} + +static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + u32 dma_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix) << PAGE_SHIFT; + int i; + + wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) * + MLX5_MPWRQ_PAGES_PER_WQE, + GFP_ATOMIC); + if (unlikely(!wi->umr.dma_info)) + goto err_out; + + /* We allocate more than mtt_sz as we will align the pointer */ + wi->umr.mtt_no_align = kzalloc(mtt_sz + MLX5_UMR_ALIGN - 1, + GFP_ATOMIC); + if (unlikely(!wi->umr.mtt_no_align)) + goto err_free_umr; + + wi->umr.mtt = PTR_ALIGN(wi->umr.mtt_no_align, MLX5_UMR_ALIGN); + wi->umr.mtt_addr = dma_map_single(rq->pdev, wi->umr.mtt, mtt_sz, + PCI_DMA_TODEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->umr.mtt_addr))) + goto err_free_mtt; + + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i))) + goto err_unmap; + atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE, + &wi->umr.dma_info[i].page->_count); + wi->skbs_frags[i] = 0; + } + + wi->consumed_strides = 0; + wi->dma_pre_sync = mlx5e_dma_pre_sync_fragmented_mpwqe; + wi->add_skb_frag = mlx5e_add_skb_frag_fragmented_mpwqe; + wi->copy_skb_header = mlx5e_copy_skb_header_fragmented_mpwqe; + wi->free_wqe = mlx5e_free_rx_fragmented_mpwqe; + wqe->data.lkey = rq->umr_mkey_be; + wqe->data.addr = cpu_to_be64(dma_offset); + + return 0; + +err_unmap: + while (--i >= 0) { + dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE, + &wi->umr.dma_info[i].page->_count); + put_page(wi->umr.dma_info[i].page); + } + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); + +err_free_mtt: + kfree(wi->umr.mtt_no_align); + +err_free_umr: + kfree(wi->umr.dma_info); + +err_out: + return -ENOMEM; +} + +void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi) +{ + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int i; + + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i], + &wi->umr.dma_info[i].page->_count); + put_page(wi->umr.dma_info[i].page); + } + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); + kfree(wi->umr.mtt_no_align); + kfree(wi->umr.dma_info); +} + +void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + + clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); + rq->stats.mpwqe_frag++; + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_ll_update_db_record(wq); +} + +static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + gfp_t gfp_mask; + int i; + + gfp_mask = GFP_ATOMIC | __GFP_COLD | __GFP_MEMALLOC; + wi->dma_info.page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, + MLX5_MPWRQ_WQE_PAGE_ORDER); + if (unlikely(!wi->dma_info.page)) + return -ENOMEM; + + wi->dma_info.addr = dma_map_page(rq->pdev, wi->dma_info.page, 0, + rq->wqe_sz, PCI_DMA_FROMDEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->dma_info.addr))) { + put_page(wi->dma_info.page); + return -ENOMEM; + } + + /* We split the high-order page into order-0 ones and manage their + * reference counter to minimize the memory held by small skb fragments + */ + split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE, + &wi->dma_info.page[i]._count); + wi->skbs_frags[i] = 0; + } + + wi->consumed_strides = 0; + wi->dma_pre_sync = mlx5e_dma_pre_sync_linear_mpwqe; + wi->add_skb_frag = mlx5e_add_skb_frag_linear_mpwqe; + wi->copy_skb_header = mlx5e_copy_skb_header_linear_mpwqe; + wi->free_wqe = mlx5e_free_rx_linear_mpwqe; + wqe->data.lkey = rq->mkey_be; + wqe->data.addr = cpu_to_be64(wi->dma_info.addr); + + return 0; +} + +void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi) +{ + int i; + + dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, + PCI_DMA_FROMDEVICE); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i], + &wi->dma_info.page[i]._count); + put_page(&wi->dma_info.page[i]); + } +} + +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + int err; + + err = mlx5e_alloc_rx_linear_mpwqe(rq, wqe, ix); + if (unlikely(err)) { + err = mlx5e_alloc_rx_fragmented_mpwqe(rq, wqe, ix); + if (unlikely(err)) + return err; + set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + mlx5e_post_umr_wqe(rq, ix); + return -EBUSY; + } + + return 0; +} + +#define RQ_CANNOT_POST(rq) \ + (!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \ + test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; - if (unlikely(!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state))) + if (unlikely(RQ_CANNOT_POST(rq))) return false; while (!mlx5_wq_ll_is_full(wq)) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + int err; - if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, wq->head))) + err = rq->alloc_wqe(rq, wqe, wq->head); + if (unlikely(err)) { + if (err != -EBUSY) + rq->stats.buff_alloc_err++; break; + } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); } @@ -101,7 +467,8 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) return !mlx5_wq_ll_is_full(wq); } -static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe) +static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); struct iphdr *ipv4 = (struct iphdr *)(skb->data + ETH_HLEN); @@ -112,7 +479,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe) int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); - u16 tot_len = be32_to_cpu(cqe->byte_cnt) - ETH_HLEN; + u16 tot_len = cqe_bcnt - ETH_HLEN; if (eth->h_proto == htons(ETH_P_IP)) { tcp = (struct tcphdr *)(skb->data + ETH_HLEN + @@ -176,35 +543,43 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (lro) { skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (likely(is_first_ethertype_ip(skb))) { + return; + } + + if (is_first_ethertype_ip(skb)) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); rq->stats.csum_sw++; - } else { - goto csum_none; + return; } - return; - + if (likely((cqe->hds_ip_ext & CQE_L3_OK) && + (cqe->hds_ip_ext & CQE_L4_OK))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (cqe_is_tunneled(cqe)) { + skb->csum_level = 1; + skb->encapsulation = 1; + rq->stats.csum_inner++; + } + return; + } csum_none: skb->ip_summed = CHECKSUM_NONE; rq->stats.csum_none++; } static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, struct mlx5e_rq *rq, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; - u32 cqe_bcnt = be32_to_cpu(cqe->byte_cnt); struct mlx5e_tstamp *tstamp = rq->tstamp; int lro_num_seg; - skb_put(skb, cqe_bcnt); - lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; if (lro_num_seg > 1) { - mlx5e_lro_update_hdr(skb, cqe); + mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); rq->stats.lro_packets++; rq->stats.lro_bytes += cqe_bcnt; @@ -213,10 +588,6 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (unlikely(mlx5e_rx_hw_stamp(tstamp))) mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); - mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); - - skb->protocol = eth_type_trans(skb, netdev); - skb_record_rx_queue(skb, rq->ix); if (likely(netdev->features & NETIF_F_RXHASH)) @@ -227,6 +598,141 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, be16_to_cpu(cqe->vlan_info)); skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; + + mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); + skb->protocol = eth_type_trans(skb, netdev); +} + +static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + rq->stats.packets++; + rq->stats.bytes += cqe_bcnt; + mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); + napi_gro_receive(rq->cq.napi, skb); +} + +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_rx_wqe *wqe; + struct sk_buff *skb; + __be16 wqe_counter_be; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + skb = rq->skb[wqe_counter]; + prefetch(skb->data); + rq->skb[wqe_counter] = NULL; + + dma_unmap_single(rq->pdev, + *((dma_addr_t *)skb->cb), + rq->wqe_sz, + DMA_FROM_DEVICE); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + dev_kfree_skb(skb); + goto wq_ll_pop; + } + + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + skb_put(skb, cqe_bcnt); + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + +static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_mpw_info *wi, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + u32 consumed_bytes = ALIGN(cqe_bcnt, MLX5_MPWRQ_STRIDE_SIZE); + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix * MLX5_MPWRQ_STRIDE_SIZE; + u32 head_offset = wqe_offset & (PAGE_SIZE - 1); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + u32 head_page_idx = page_idx; + u16 headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt); + u32 frag_offset = head_offset + headlen; + u16 byte_cnt = cqe_bcnt - headlen; + +#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE) + if (unlikely(frag_offset >= PAGE_SIZE)) { + page_idx++; + frag_offset -= PAGE_SIZE; + } +#endif + wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes); + + while (byte_cnt) { + u32 pg_consumed_bytes = + min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + + wi->add_skb_frag(rq->pdev, skb, wi, page_idx, frag_offset, + pg_consumed_bytes); + byte_cnt -= pg_consumed_bytes; + frag_offset = 0; + page_idx++; + } + /* copy header */ + wi->copy_skb_header(rq->pdev, skb, wi, head_page_idx, head_offset, + headlen); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; +} + +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + rq->stats.mpwqe_filler++; + goto mpwrq_cqe_out; + } + + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, + sizeof(long))); + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; + goto mpwrq_cqe_out; + } + + prefetch(skb->data); + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < MLX5_MPWRQ_NUM_STRIDES)) + return; + + wi->free_wqe(rq, wi); + mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index); } int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) @@ -235,44 +741,14 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) int work_done; for (work_done = 0; work_done < budget; work_done++) { - struct mlx5e_rx_wqe *wqe; - struct mlx5_cqe64 *cqe; - struct sk_buff *skb; - __be16 wqe_counter_be; - u16 wqe_counter; + struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq); - cqe = mlx5e_get_cqe(cq); if (!cqe) break; mlx5_cqwq_pop(&cq->wq); - wqe_counter_be = cqe->wqe_counter; - wqe_counter = be16_to_cpu(wqe_counter_be); - wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); - skb = rq->skb[wqe_counter]; - prefetch(skb->data); - rq->skb[wqe_counter] = NULL; - - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { - rq->stats.wqe_err++; - dev_kfree_skb(skb); - goto wq_ll_pop; - } - - mlx5e_build_rx_skb(cqe, rq, skb); - rq->stats.packets++; - rq->stats.bytes += be32_to_cpu(cqe->byte_cnt); - napi_gro_receive(cq->napi, skb); - -wq_ll_pop: - mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, - &wqe->next.next_wqe_index); + rq->handle_rx_cqe(rq, cqe); } mlx5_cqwq_update_db_record(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h new file mode 100644 index 000000000000..115752b53d85 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_STATS_H__ +#define __MLX5_EN_STATS_H__ + +#define MLX5E_READ_CTR64_CPU(ptr, dsc, i) \ + (*(u64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR64_BE(ptr, dsc, i) \ + be64_to_cpu(*(__be64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ + (*(u32 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ + be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + +#define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) + +struct counter_desc { + char name[ETH_GSTRING_LEN]; + int offset; /* Byte offset */ +}; + +struct mlx5e_sw_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 lro_packets; + u64 lro_bytes; + u64 rx_csum_good; + u64 rx_csum_none; + u64 rx_csum_sw; + u64 rx_csum_inner; + u64 tx_csum_offload; + u64 tx_csum_inner; + u64 tx_queue_stopped; + u64 tx_queue_wake; + u64 tx_queue_dropped; + u64 rx_wqe_err; + u64 rx_mpwqe_filler; + u64 rx_mpwqe_frag; + u64 rx_buff_alloc_err; + + /* Special handling counters */ + u64 link_down_events; +}; + +static const struct counter_desc sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_good) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_sw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_offload) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) }, +}; + +struct mlx5e_qcounter_stats { + u32 rx_out_of_buffer; +}; + +static const struct counter_desc q_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, +}; + +#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) +#define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \ + vstats->query_vport_out, c) + +struct mlx5e_vport_stats { + __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)]; +}; + +static const struct counter_desc vport_stats_desc[] = { + { "rx_vport_error_packets", + VPORT_COUNTER_OFF(received_errors.packets) }, + { "rx_vport_error_bytes", VPORT_COUNTER_OFF(received_errors.octets) }, + { "tx_vport_error_packets", + VPORT_COUNTER_OFF(transmit_errors.packets) }, + { "tx_vport_error_bytes", VPORT_COUNTER_OFF(transmit_errors.octets) }, + { "rx_vport_unicast_packets", + VPORT_COUNTER_OFF(received_eth_unicast.packets) }, + { "rx_vport_unicast_bytes", + VPORT_COUNTER_OFF(received_eth_unicast.octets) }, + { "tx_vport_unicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) }, + { "tx_vport_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) }, + { "rx_vport_multicast_packets", + VPORT_COUNTER_OFF(received_eth_multicast.packets) }, + { "rx_vport_multicast_bytes", + VPORT_COUNTER_OFF(received_eth_multicast.octets) }, + { "tx_vport_multicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) }, + { "tx_vport_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) }, + { "rx_vport_broadcast_packets", + VPORT_COUNTER_OFF(received_eth_broadcast.packets) }, + { "rx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(received_eth_broadcast.octets) }, + { "tx_vport_broadcast_packets", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, + { "tx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, +}; + +#define PPORT_802_3_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +#define PPORT_802_3_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +#define PPORT_2863_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +#define PPORT_2863_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +#define PPORT_2819_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_2819_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_PER_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +#define PPORT_PER_PRIO_GET(pstats, prio, c) \ + MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +#define NUM_PPORT_PRIO 8 + +struct mlx5e_pport_stats { + __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; +}; + +static const struct counter_desc pport_802_3_stats_desc[] = { + { "frames_tx", PPORT_802_3_OFF(a_frames_transmitted_ok) }, + { "frames_rx", PPORT_802_3_OFF(a_frames_received_ok) }, + { "check_seq_err", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, + { "alignment_err", PPORT_802_3_OFF(a_alignment_errors) }, + { "octets_tx", PPORT_802_3_OFF(a_octets_transmitted_ok) }, + { "octets_received", PPORT_802_3_OFF(a_octets_received_ok) }, + { "multicast_xmitted", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, + { "broadcast_xmitted", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, + { "multicast_rx", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, + { "broadcast_rx", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, + { "in_range_len_errors", PPORT_802_3_OFF(a_in_range_length_errors) }, + { "out_of_range_len", PPORT_802_3_OFF(a_out_of_range_length_field) }, + { "too_long_errors", PPORT_802_3_OFF(a_frame_too_long_errors) }, + { "symbol_err", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, + { "mac_control_tx", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, + { "mac_control_rx", PPORT_802_3_OFF(a_mac_control_frames_received) }, + { "unsupported_op_rx", + PPORT_802_3_OFF(a_unsupported_opcodes_received) }, + { "pause_ctrl_rx", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, + { "pause_ctrl_tx", + PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, +}; + +static const struct counter_desc pport_2863_stats_desc[] = { + { "in_octets", PPORT_2863_OFF(if_in_octets) }, + { "in_ucast_pkts", PPORT_2863_OFF(if_in_ucast_pkts) }, + { "in_discards", PPORT_2863_OFF(if_in_discards) }, + { "in_errors", PPORT_2863_OFF(if_in_errors) }, + { "in_unknown_protos", PPORT_2863_OFF(if_in_unknown_protos) }, + { "out_octets", PPORT_2863_OFF(if_out_octets) }, + { "out_ucast_pkts", PPORT_2863_OFF(if_out_ucast_pkts) }, + { "out_discards", PPORT_2863_OFF(if_out_discards) }, + { "out_errors", PPORT_2863_OFF(if_out_errors) }, + { "in_multicast_pkts", PPORT_2863_OFF(if_in_multicast_pkts) }, + { "in_broadcast_pkts", PPORT_2863_OFF(if_in_broadcast_pkts) }, + { "out_multicast_pkts", PPORT_2863_OFF(if_out_multicast_pkts) }, + { "out_broadcast_pkts", PPORT_2863_OFF(if_out_broadcast_pkts) }, +}; + +static const struct counter_desc pport_2819_stats_desc[] = { + { "drop_events", PPORT_2819_OFF(ether_stats_drop_events) }, + { "octets", PPORT_2819_OFF(ether_stats_octets) }, + { "pkts", PPORT_2819_OFF(ether_stats_pkts) }, + { "broadcast_pkts", PPORT_2819_OFF(ether_stats_broadcast_pkts) }, + { "multicast_pkts", PPORT_2819_OFF(ether_stats_multicast_pkts) }, + { "crc_align_errors", PPORT_2819_OFF(ether_stats_crc_align_errors) }, + { "undersize_pkts", PPORT_2819_OFF(ether_stats_undersize_pkts) }, + { "oversize_pkts", PPORT_2819_OFF(ether_stats_oversize_pkts) }, + { "fragments", PPORT_2819_OFF(ether_stats_fragments) }, + { "jabbers", PPORT_2819_OFF(ether_stats_jabbers) }, + { "collisions", PPORT_2819_OFF(ether_stats_collisions) }, + { "p64octets", PPORT_2819_OFF(ether_stats_pkts64octets) }, + { "p65to127octets", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, + { "p128to255octets", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, + { "p256to511octets", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, + { "p512to1023octets", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, + { "p1024to1518octets", + PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, + { "p1519to2047octets", + PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, + { "p2048to4095octets", + PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, + { "p4096to8191octets", + PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, + { "p8192to10239octets", + PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, +}; + +static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { + { "rx_octets", PPORT_PER_PRIO_OFF(rx_octets) }, + { "rx_frames", PPORT_PER_PRIO_OFF(rx_frames) }, + { "tx_octets", PPORT_PER_PRIO_OFF(tx_octets) }, + { "tx_frames", PPORT_PER_PRIO_OFF(tx_frames) }, +}; + +static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { + { "rx_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, +}; + +struct mlx5e_rq_stats { + u64 packets; + u64 bytes; + u64 csum_sw; + u64 csum_inner; + u64 csum_none; + u64 lro_packets; + u64 lro_bytes; + u64 wqe_err; + u64 mpwqe_filler; + u64 mpwqe_frag; + u64 buff_alloc_err; +}; + +static const struct counter_desc rq_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_sw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, +}; + +struct mlx5e_sq_stats { + /* commonly accessed in data path */ + u64 packets; + u64 bytes; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 csum_offload_inner; + u64 nop; + /* less likely accessed in data path */ + u64 csum_offload_none; + u64 stopped; + u64 wake; + u64 dropped; +}; + +static const struct counter_desc sq_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, dropped) }, +}; + +#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) +#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) +#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) +#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) +#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) +#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) +#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ + ARRAY_SIZE(pport_per_prio_traffic_stats_desc) +#define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ + ARRAY_SIZE(pport_per_prio_pfc_stats_desc) +#define NUM_PPORT_COUNTERS (NUM_PPORT_802_3_COUNTERS + \ + NUM_PPORT_2863_COUNTERS + \ + NUM_PPORT_2819_COUNTERS + \ + NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ + NUM_PPORT_PRIO) +#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) +#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) + +struct mlx5e_stats { + struct mlx5e_sw_stats sw; + struct mlx5e_qcounter_stats qcnt; + struct mlx5e_vport_stats vport; + struct mlx5e_pport_stats pport; +}; + +#endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b3de09f13425..ef017c0decdc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -46,8 +46,8 @@ struct mlx5e_tc_flow { struct mlx5_flow_rule *rule; }; -#define MLX5E_TC_FLOW_TABLE_NUM_ENTRIES 1024 -#define MLX5E_TC_FLOW_TABLE_NUM_GROUPS 4 +#define MLX5E_TC_TABLE_NUM_ENTRIES 1024 +#define MLX5E_TC_TABLE_NUM_GROUPS 4 static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, u32 *match_c, u32 *match_v, @@ -55,33 +55,35 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, { struct mlx5_flow_destination dest = { .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, - {.ft = priv->fts.vlan.t}, + {.ft = priv->fs.vlan.ft.t}, }; struct mlx5_flow_rule *rule; bool table_created = false; - if (IS_ERR_OR_NULL(priv->fts.tc.t)) { - priv->fts.tc.t = - mlx5_create_auto_grouped_flow_table(priv->fts.ns, 0, - MLX5E_TC_FLOW_TABLE_NUM_ENTRIES, - MLX5E_TC_FLOW_TABLE_NUM_GROUPS); - if (IS_ERR(priv->fts.tc.t)) { + if (IS_ERR_OR_NULL(priv->fs.tc.t)) { + priv->fs.tc.t = + mlx5_create_auto_grouped_flow_table(priv->fs.ns, + MLX5E_TC_PRIO, + MLX5E_TC_TABLE_NUM_ENTRIES, + MLX5E_TC_TABLE_NUM_GROUPS, + 0); + if (IS_ERR(priv->fs.tc.t)) { netdev_err(priv->netdev, "Failed to create tc offload table\n"); - return ERR_CAST(priv->fts.tc.t); + return ERR_CAST(priv->fs.tc.t); } table_created = true; } - rule = mlx5_add_flow_rule(priv->fts.tc.t, MLX5_MATCH_OUTER_HEADERS, + rule = mlx5_add_flow_rule(priv->fs.tc.t, MLX5_MATCH_OUTER_HEADERS, match_c, match_v, action, flow_tag, action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST ? &dest : NULL); if (IS_ERR(rule) && table_created) { - mlx5_destroy_flow_table(priv->fts.tc.t); - priv->fts.tc.t = NULL; + mlx5_destroy_flow_table(priv->fs.tc.t); + priv->fs.tc.t = NULL; } return rule; @@ -93,8 +95,8 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5_del_flow_rule(rule); if (!mlx5e_tc_num_filters(priv)) { - mlx5_destroy_flow_table(priv->fts.tc.t); - priv->fts.tc.t = NULL; + mlx5_destroy_flow_table(priv->fs.tc.t); + priv->fs.tc.t = NULL; } } @@ -310,7 +312,7 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct tc_cls_flower_offload *f) { - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + struct mlx5e_tc_table *tc = &priv->fs.tc; u32 *match_c; u32 *match_v; int err = 0; @@ -376,7 +378,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f) { struct mlx5e_tc_flow *flow; - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + struct mlx5e_tc_table *tc = &priv->fs.tc; flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, tc->ht_params); @@ -401,7 +403,7 @@ static const struct rhashtable_params mlx5e_tc_flow_ht_params = { int mlx5e_tc_init(struct mlx5e_priv *priv) { - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + struct mlx5e_tc_table *tc = &priv->fs.tc; tc->ht_params = mlx5e_tc_flow_ht_params; return rhashtable_init(&tc->ht, &tc->ht_params); @@ -418,12 +420,12 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg) void mlx5e_tc_cleanup(struct mlx5e_priv *priv) { - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + struct mlx5e_tc_table *tc = &priv->fs.tc; rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv); - if (!IS_ERR_OR_NULL(priv->fts.tc.t)) { - mlx5_destroy_flow_table(priv->fts.tc.t); - priv->fts.tc.t = NULL; + if (!IS_ERR_OR_NULL(tc->t)) { + mlx5_destroy_flow_table(tc->t); + tc->t = NULL; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index d677428dc10f..a4f17b974d62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -45,7 +45,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { - return atomic_read(&priv->fts.tc.ht.nelems); + return atomic_read(&priv->fs.tc.ht.nelems); } #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 1ffc7cb6f78c..229ab16fb8d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -54,10 +54,11 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) sq->skb[pi] = NULL; sq->pc++; + sq->stats.nop++; if (notify_hw) { cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, wqe, 0); + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } } @@ -309,7 +310,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) bf_sz = wi->num_wqebbs << 3; cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, wqe, bf_sz); + mlx5e_tx_notify_hw(sq, &wqe->ctrl, bf_sz); } /* fill sq edge with nops to avoid wqe wrap around */ @@ -387,7 +388,6 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wi = &sq->wqe_info[ci]; if (unlikely(!skb)) { /* nop */ - sq->stats.nop++; sqcc++; continue; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 9bb4395aceeb..c38781fa567d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -49,6 +49,60 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) return cqe; } +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) +{ + struct mlx5_wq_cyc *wq; + struct mlx5_cqe64 *cqe; + struct mlx5e_sq *sq; + u16 sqcc; + + cqe = mlx5e_get_cqe(cq); + if (likely(!cqe)) + return; + + sq = container_of(cq, struct mlx5e_sq, cq); + wq = &sq->wq; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + do { + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci]; + + mlx5_cqwq_pop(&cq->wq); + sqcc += icowi->num_wqebbs; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); + break; + } + + switch (icowi->opcode) { + case MLX5_OPCODE_NOP: + break; + case MLX5_OPCODE_UMR: + mlx5e_post_rx_fragmented_mpwqe(&sq->channel->rq); + break; + default: + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); + } + + } while ((cqe = mlx5e_get_cqe(cq))); + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -64,6 +118,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; + + mlx5e_poll_ico_cq(&c->icosq.cq); + busy |= mlx5e_post_rx_wqes(&c->rq); if (busy) @@ -80,6 +137,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); mlx5e_cq_arm(&c->rq.cq); + mlx5e_cq_arm(&c->icosq.cq); return work_done; } @@ -89,7 +147,6 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq) struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); - barrier(); napi_schedule(cq->napi); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index bc3d9f8a75c1..ff91bb5e1c43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -401,7 +401,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) memset(flow_group_in, 0, inlen); table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - fdb = mlx5_create_flow_table(root_ns, 0, table_size); + fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); if (IS_ERR_OR_NULL(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 5121be4675d1..4d78d5a48af3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -40,18 +40,18 @@ #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) -#define ADD_PRIO(num_prios_val, min_level_val, max_ft_val, caps_val,\ +#define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\ ...) {.type = FS_TYPE_PRIO,\ .min_ft_level = min_level_val,\ - .max_ft = max_ft_val,\ + .num_levels = num_levels_val,\ .num_leaf_prios = num_prios_val,\ .caps = caps_val,\ .children = (struct init_tree_node[]) {__VA_ARGS__},\ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ } -#define ADD_MULTIPLE_PRIO(num_prios_val, max_ft_val, ...)\ - ADD_PRIO(num_prios_val, 0, max_ft_val, {},\ +#define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\ + ADD_PRIO(num_prios_val, 0, num_levels_val, {},\ __VA_ARGS__)\ #define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\ @@ -67,17 +67,20 @@ #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ .caps = (long[]) {__VA_ARGS__} } -#define LEFTOVERS_MAX_FT 1 +#define LEFTOVERS_NUM_LEVELS 1 #define LEFTOVERS_NUM_PRIOS 1 -#define BY_PASS_PRIO_MAX_FT 1 -#define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ - LEFTOVERS_MAX_FT) -#define KERNEL_MAX_FT 3 -#define KERNEL_NUM_PRIOS 2 -#define KENREL_MIN_LEVEL 2 +#define BY_PASS_PRIO_NUM_LEVELS 1 +#define BY_PASS_MIN_LEVEL (KERNEL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ + LEFTOVERS_NUM_PRIOS) -#define ANCHOR_MAX_FT 1 +/* Vlan, mac, ttc, aRFS */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 4 +#define KERNEL_NIC_NUM_PRIOS 1 +/* One more level for tc */ +#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) + +#define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) struct node_caps { @@ -92,7 +95,7 @@ static struct init_tree_node { int min_ft_level; int num_leaf_prios; int prio; - int max_ft; + int num_levels; } root_fs = { .type = FS_TYPE_NAMESPACE, .ar_size = 4, @@ -102,17 +105,20 @@ static struct init_tree_node { FS_CAP(flow_table_properties_nic_receive.modify_root), FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), - ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_MAX_FT))), - ADD_PRIO(0, KENREL_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NUM_PRIOS, KERNEL_MAX_FT))), + ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(1, 1), + ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, + KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), FS_CAP(flow_table_properties_nic_receive.modify_root), FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), - ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))), + ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))), ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_MAX_FT))), + ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))), } }; @@ -222,19 +228,6 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, return NULL; } -static unsigned int find_next_free_level(struct fs_prio *prio) -{ - if (!list_empty(&prio->node.children)) { - struct mlx5_flow_table *ft; - - ft = list_last_entry(&prio->node.children, - struct mlx5_flow_table, - node.list); - return ft->level + 1; - } - return prio->start_level; -} - static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size) { unsigned int i; @@ -615,8 +608,8 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio return err; } -static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, - struct mlx5_flow_destination *dest) +int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest) { struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; @@ -693,9 +686,23 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table return err; } +static void list_add_flow_table(struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct list_head *prev = &prio->node.children; + struct mlx5_flow_table *iter; + + fs_for_each_ft(iter, prio) { + if (iter->level > ft->level) + break; + prev = &iter->node.list; + } + list_add(&ft->node.list, prev); +} + struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int max_fte) + int prio, int max_fte, + u32 level) { struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_table *ft; @@ -716,12 +723,15 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, err = -EINVAL; goto unlock_root; } - if (fs_prio->num_ft == fs_prio->max_ft) { + if (level >= fs_prio->num_levels) { err = -ENOSPC; goto unlock_root; } - - ft = alloc_flow_table(find_next_free_level(fs_prio), + /* The level is related to the + * priority level range. + */ + level += fs_prio->start_level; + ft = alloc_flow_table(level, roundup_pow_of_two(max_fte), root->table_type); if (!ft) { @@ -742,7 +752,7 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, goto destroy_ft; lock_ref_node(&fs_prio->node); tree_add_node(&ft->node, &fs_prio->node); - list_add_tail(&ft->node.list, &fs_prio->node.children); + list_add_flow_table(ft, fs_prio); fs_prio->num_ft++; unlock_ref_node(&fs_prio->node); mutex_unlock(&root->chain_lock); @@ -759,14 +769,15 @@ unlock_root: struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, - int max_num_groups) + int max_num_groups, + u32 level) { struct mlx5_flow_table *ft; if (max_num_groups > num_flow_table_entries) return ERR_PTR(-EINVAL); - ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries); + ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level); if (IS_ERR(ft)) return ft; @@ -1065,31 +1076,18 @@ unlock_fg: return rule; } -static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, - u8 action, - u32 flow_tag, - struct mlx5_flow_destination *dest) +static bool dest_is_valid(struct mlx5_flow_destination *dest, + u32 action, + struct mlx5_flow_table *ft) { - struct mlx5_flow_rule *rule; - struct mlx5_flow_group *g; + if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return true; - g = create_autogroup(ft, match_criteria_enable, match_criteria); - if (IS_ERR(g)) - return (void *)g; - - rule = add_rule_fg(g, match_value, - action, flow_tag, dest); - if (IS_ERR(rule)) { - /* Remove assumes refcount > 0 and autogroup creates a group - * with a refcount = 0. - */ - tree_get_node(&g->node); - tree_remove_node(&g->node); - } - return rule; + if (!dest || ((dest->type == + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) && + (dest->ft->level <= ft->level))) + return false; + return true; } static struct mlx5_flow_rule * @@ -1104,7 +1102,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft, struct mlx5_flow_group *g; struct mlx5_flow_rule *rule; - if ((action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !dest) + if (!dest_is_valid(dest, action, ft)) return ERR_PTR(-EINVAL); nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); @@ -1119,8 +1117,23 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft, goto unlock; } - rule = add_rule_to_auto_fg(ft, match_criteria_enable, match_criteria, - match_value, action, flow_tag, dest); + g = create_autogroup(ft, match_criteria_enable, match_criteria); + if (IS_ERR(g)) { + rule = (void *)g; + goto unlock; + } + + rule = add_rule_fg(g, match_value, + action, flow_tag, dest); + if (IS_ERR(rule)) { + /* Remove assumes refcount > 0 and autogroup creates a group + * with a refcount = 0. + */ + unlock_ref_node(&ft->node); + tree_get_node(&g->node); + tree_remove_node(&g->node); + return rule; + } unlock: unlock_ref_node(&ft->node); return rule; @@ -1288,7 +1301,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, { struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; int prio; - static struct fs_prio *fs_prio; + struct fs_prio *fs_prio; struct mlx5_flow_namespace *ns; if (!root_ns) @@ -1323,7 +1336,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, EXPORT_SYMBOL(mlx5_get_flow_namespace); static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, - unsigned prio, int max_ft) + unsigned int prio, int num_levels) { struct fs_prio *fs_prio; @@ -1334,7 +1347,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, fs_prio->node.type = FS_TYPE_PRIO; tree_init_node(&fs_prio->node, 1, NULL); tree_add_node(&fs_prio->node, &ns->node); - fs_prio->max_ft = max_ft; + fs_prio->num_levels = num_levels; fs_prio->prio = prio; list_add_tail(&fs_prio->node.list, &ns->node.children); @@ -1365,14 +1378,14 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ns; } -static int create_leaf_prios(struct mlx5_flow_namespace *ns, struct init_tree_node - *prio_metadata) +static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio, + struct init_tree_node *prio_metadata) { struct fs_prio *fs_prio; int i; for (i = 0; i < prio_metadata->num_leaf_prios; i++) { - fs_prio = fs_create_prio(ns, i, prio_metadata->max_ft); + fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels); if (IS_ERR(fs_prio)) return PTR_ERR(fs_prio); } @@ -1399,7 +1412,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, struct init_tree_node *init_node, struct fs_node *fs_parent_node, struct init_tree_node *init_parent_node, - int index) + int prio) { int max_ft_level = MLX5_CAP_FLOWTABLE(dev, flow_table_properties_nic_receive. @@ -1417,8 +1430,8 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, fs_get_obj(fs_ns, fs_parent_node); if (init_node->num_leaf_prios) - return create_leaf_prios(fs_ns, init_node); - fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft); + return create_leaf_prios(fs_ns, prio, init_node); + fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels); if (IS_ERR(fs_prio)) return PTR_ERR(fs_prio); base = &fs_prio->node; @@ -1431,11 +1444,16 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, } else { return -EINVAL; } + prio = 0; for (i = 0; i < init_node->ar_size; i++) { err = init_root_tree_recursive(dev, &init_node->children[i], - base, init_node, i); + base, init_node, prio); if (err) return err; + if (init_node->children[i].type == FS_TYPE_PRIO && + init_node->children[i].num_leaf_prios) { + prio += init_node->children[i].num_leaf_prios; + } } return 0; @@ -1491,9 +1509,9 @@ static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level) struct fs_prio *prio; fs_for_each_prio(prio, ns) { - /* This updates prio start_level and max_ft */ + /* This updates prio start_level and num_levels */ set_prio_attrs_in_prio(prio, acc_level); - acc_level += prio->max_ft; + acc_level += prio->num_levels; } return acc_level; } @@ -1505,11 +1523,11 @@ static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level) prio->start_level = acc_level; fs_for_each_ns(ns, prio) - /* This updates start_level and max_ft of ns's priority descendants */ + /* This updates start_level and num_levels of ns's priority descendants */ acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); - if (!prio->max_ft) - prio->max_ft = acc_level_ns - prio->start_level; - WARN_ON(prio->max_ft < acc_level_ns - prio->start_level); + if (!prio->num_levels) + prio->num_levels = acc_level_ns - prio->start_level; + WARN_ON(prio->num_levels < acc_level_ns - prio->start_level); } static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) @@ -1520,12 +1538,13 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) fs_for_each_prio(prio, ns) { set_prio_attrs_in_prio(prio, start_level); - start_level += prio->max_ft; + start_level += prio->num_levels; } } #define ANCHOR_PRIO 0 #define ANCHOR_SIZE 1 +#define ANCHOR_LEVEL 0 static int create_anchor_flow_table(struct mlx5_core_dev *dev) { @@ -1535,7 +1554,7 @@ static int create_anchor_flow_table(struct mlx5_core_dev ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR); if (!ns) return -EINVAL; - ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE); + ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL); if (IS_ERR(ft)) { mlx5_core_err(dev, "Failed to create last anchor flow table"); return PTR_ERR(ft); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index f37a6248a27b..d607e564f454 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -107,7 +107,7 @@ struct fs_fte { /* Type of children is mlx5_flow_table/namespace */ struct fs_prio { struct fs_node node; - unsigned int max_ft; + unsigned int num_levels; unsigned int start_level; unsigned int prio; unsigned int num_ft; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3f3b2fae4991..6feef7fb9d6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -48,6 +48,9 @@ #include <linux/kmod.h> #include <linux/delay.h> #include <linux/mlx5/mlx5_ifc.h> +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif #include "mlx5_core.h" #include "fs_core.h" #ifdef CONFIG_MLX5_CORE_EN @@ -665,6 +668,12 @@ static void free_comp_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq, *n; +#ifdef CONFIG_RFS_ACCEL + if (dev->rmap) { + free_irq_cpu_rmap(dev->rmap); + dev->rmap = NULL; + } +#endif spin_lock(&table->lock); list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); @@ -691,6 +700,11 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) INIT_LIST_HEAD(&table->comp_eqs_list); ncomp_vec = table->num_comp_vectors; nent = MLX5_COMP_EQ_SIZE; +#ifdef CONFIG_RFS_ACCEL + dev->rmap = alloc_irq_cpu_rmap(ncomp_vec); + if (!dev->rmap) + return -ENOMEM; +#endif for (i = 0; i < ncomp_vec; i++) { eq = kzalloc(sizeof(*eq), GFP_KERNEL); if (!eq) { @@ -698,6 +712,10 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) goto clean; } +#ifdef CONFIG_RFS_ACCEL + irq_cpu_rmap_add(dev->rmap, + dev->priv.msix_arr[i + MLX5_EQ_VEC_COMP_BASE].vector); +#endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, @@ -966,7 +984,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) int err; mutex_lock(&dev->intf_state_mutex); - if (dev->interface_state == MLX5_INTERFACE_STATE_UP) { + if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", __func__); goto out; @@ -1133,7 +1151,8 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) if (err) pr_info("failed request module on %s\n", MLX5_IB_MOD); - dev->interface_state = MLX5_INTERFACE_STATE_UP; + clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); + set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: mutex_unlock(&dev->intf_state_mutex); @@ -1207,7 +1226,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } mutex_lock(&dev->intf_state_mutex); - if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) { + if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__); goto out; @@ -1241,7 +1260,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_cmd_cleanup(dev); out: - dev->interface_state = MLX5_INTERFACE_STATE_DOWN; + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); mutex_unlock(&dev->intf_state_mutex); return err; } @@ -1452,6 +1472,18 @@ static const struct pci_error_handlers mlx5_err_handler = { .resume = mlx5_pci_resume }; +static void shutdown(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_priv *priv = &dev->priv; + + dev_info(&pdev->dev, "Shutdown was called\n"); + /* Notify mlx5 clients that the kernel is being shut down */ + set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state); + mlx5_unload_one(dev, priv); + mlx5_pci_disable_device(dev); +} + static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ @@ -1459,6 +1491,8 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ + { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5 */ + { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ { 0, } }; @@ -1469,6 +1503,7 @@ static struct pci_driver mlx5_core_driver = { .id_table = mlx5_core_pci_table, .probe = init_one, .remove = remove_one, + .shutdown = shutdown, .err_handler = &mlx5_err_handler, .sriov_configure = mlx5_core_sriov_configure, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index ae378c575deb..3e35611b19c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -115,6 +115,19 @@ int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, } EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) +{ + u32 out[MLX5_ST_SZ_DW(mlcr_reg)]; + u32 in[MLX5_ST_SZ_DW(mlcr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(mlcr_reg, in, local_port, 1); + MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MLCR, 0, 1); +} + int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 *proto_cap, int proto_mask) { @@ -247,8 +260,8 @@ int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status); -static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, int *admin_mtu, - int *max_mtu, int *oper_mtu, u8 port) +static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu, + u16 *max_mtu, u16 *oper_mtu, u8 port) { u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; @@ -268,7 +281,7 @@ static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, int *admin_mtu, *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu); } -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port) +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port) { u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; @@ -283,20 +296,96 @@ int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port) } EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); -void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port) { mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port); } EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); -void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, u8 port) { mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port); } EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); +static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) +{ + u32 out[MLX5_ST_SZ_DW(pmlp_reg)]; + u32 in[MLX5_ST_SZ_DW(pmlp_reg)]; + int module_mapping; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(pmlp_reg, in, local_port, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return err; + + module_mapping = MLX5_GET(pmlp_reg, out, lane0_module_mapping); + *module_num = module_mapping & MLX5_EEPROM_IDENTIFIER_BYTE_MASK; + + return 0; +} + +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data) +{ + u32 out[MLX5_ST_SZ_DW(mcia_reg)]; + u32 in[MLX5_ST_SZ_DW(mcia_reg)]; + int module_num; + u16 i2c_addr; + int status; + int err; + void *ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + + err = mlx5_query_module_num(dev, &module_num); + if (err) + return err; + + memset(in, 0, sizeof(in)); + size = min_t(int, size, MLX5_EEPROM_MAX_BYTES); + + if (offset < MLX5_EEPROM_PAGE_LENGTH && + offset + size > MLX5_EEPROM_PAGE_LENGTH) + /* Cross pages read, read until offset 256 in low page */ + size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; + + i2c_addr = MLX5_I2C_ADDR_LOW; + if (offset >= MLX5_EEPROM_PAGE_LENGTH) { + i2c_addr = MLX5_I2C_ADDR_HIGH; + offset -= MLX5_EEPROM_PAGE_LENGTH; + } + + MLX5_SET(mcia_reg, in, l, 0); + MLX5_SET(mcia_reg, in, module, module_num); + MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr); + MLX5_SET(mcia_reg, in, page_number, 0); + MLX5_SET(mcia_reg, in, device_address, offset); + MLX5_SET(mcia_reg, in, size, size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); + if (err) + return err; + + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + + memcpy(data, ptr, size); + + return size; +} +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); + static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, int pvlc_size, u8 local_port) { @@ -607,3 +696,52 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) return err; } EXPORT_SYMBOL_GPL(mlx5_query_port_wol); + +static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(pcmr_reg, in, local_port, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + outlen, MLX5_REG_PCMR, 0, 0); +} + +static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + + return mlx5_core_access_reg(mdev, in, inlen, out, + sizeof(out), MLX5_REG_PCMR, 0, 1); +} + +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, fcs_chk, enable); + + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + /* Default values for FW which do not support MLX5_REG_PCMR */ + *supported = false; + *enabled = true; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return; + + if (mlx5_query_ports_check(mdev, out, sizeof(out))) + return; + + *supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap)); + *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index def289375ecb..b720a274220d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -538,3 +538,71 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, mlx5_core_destroy_sq(dev, sq->qpn); } EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); + +int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id) +{ + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *counter_id = MLX5_GET(alloc_q_counter_out, out, + counter_set_id); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter); + +int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter); + +int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, + int reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + MLX5_SET(query_q_counter_in, in, clear, reset); + MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_size); +} +EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); + +int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, + u32 *out_of_buffer) +{ + int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); + void *out; + int err; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_q_counter(dev, counter_id, 0, out, outlen); + if (!err) + *out_of_buffer = MLX5_GET(query_q_counter_out, out, + out_of_buffer); + + kfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index bd518405859e..b69dadcfb897 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -196,6 +196,46 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address); +int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int err; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out, outlen); + if (!err) + *mtu = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.mtu); + + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mtu); + +int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); + MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, mtu); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); + int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, u32 vport, enum mlx5_list_type list_type, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 3958195526d1..b0a0b01bb4ef 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -44,7 +44,7 @@ #include <linux/seq_file.h> #include <linux/u64_stats_sync.h> #include <linux/netdevice.h> -#include <linux/wait.h> +#include <linux/completion.h> #include <linux/skbuff.h> #include <linux/etherdevice.h> #include <linux/types.h> @@ -55,6 +55,7 @@ #include <linux/mutex.h> #include <linux/rcupdate.h> #include <linux/slab.h> +#include <linux/workqueue.h> #include <asm/byteorder.h> #include <net/devlink.h> @@ -73,6 +74,8 @@ static const char mlxsw_core_driver_name[] = "mlxsw_core"; static struct dentry *mlxsw_core_dbg_root; +static struct workqueue_struct *mlxsw_wq; + struct mlxsw_core_pcpu_stats { u64 trap_rx_packets[MLXSW_TRAP_ID_MAX]; u64 trap_rx_bytes[MLXSW_TRAP_ID_MAX]; @@ -93,11 +96,9 @@ struct mlxsw_core { struct list_head rx_listener_list; struct list_head event_listener_list; struct { - struct sk_buff *resp_skb; - u64 tid; - wait_queue_head_t wait; - bool trans_active; - struct mutex lock; /* One EMAD transaction at a time. */ + atomic64_t tid; + struct list_head trans_list; + spinlock_t trans_list_lock; /* protects trans_list writes */ bool use_emad; } emad; struct mlxsw_core_pcpu_stats __percpu *pcpu_stats; @@ -290,7 +291,7 @@ static void mlxsw_emad_pack_reg_tlv(char *reg_tlv, static void mlxsw_emad_pack_op_tlv(char *op_tlv, const struct mlxsw_reg_info *reg, enum mlxsw_core_reg_access_type type, - struct mlxsw_core *mlxsw_core) + u64 tid) { mlxsw_emad_op_tlv_type_set(op_tlv, MLXSW_EMAD_TLV_TYPE_OP); mlxsw_emad_op_tlv_len_set(op_tlv, MLXSW_EMAD_OP_TLV_LEN); @@ -306,7 +307,7 @@ static void mlxsw_emad_pack_op_tlv(char *op_tlv, MLXSW_EMAD_OP_TLV_METHOD_WRITE); mlxsw_emad_op_tlv_class_set(op_tlv, MLXSW_EMAD_OP_TLV_CLASS_REG_ACCESS); - mlxsw_emad_op_tlv_tid_set(op_tlv, mlxsw_core->emad.tid); + mlxsw_emad_op_tlv_tid_set(op_tlv, tid); } static int mlxsw_emad_construct_eth_hdr(struct sk_buff *skb) @@ -328,7 +329,7 @@ static void mlxsw_emad_construct(struct sk_buff *skb, const struct mlxsw_reg_info *reg, char *payload, enum mlxsw_core_reg_access_type type, - struct mlxsw_core *mlxsw_core) + u64 tid) { char *buf; @@ -339,7 +340,7 @@ static void mlxsw_emad_construct(struct sk_buff *skb, mlxsw_emad_pack_reg_tlv(buf, reg, payload); buf = skb_push(skb, MLXSW_EMAD_OP_TLV_LEN * sizeof(u32)); - mlxsw_emad_pack_op_tlv(buf, reg, type, mlxsw_core); + mlxsw_emad_pack_op_tlv(buf, reg, type, tid); mlxsw_emad_construct_eth_hdr(skb); } @@ -376,58 +377,16 @@ static bool mlxsw_emad_is_resp(const struct sk_buff *skb) return (mlxsw_emad_op_tlv_r_get(op_tlv) == MLXSW_EMAD_OP_TLV_RESPONSE); } -#define MLXSW_EMAD_TIMEOUT_MS 200 - -static int __mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) -{ - int err; - int ret; - - mlxsw_core->emad.trans_active = true; - - err = mlxsw_core_skb_transmit(mlxsw_core, skb, tx_info); - if (err) { - dev_err(mlxsw_core->bus_info->dev, "Failed to transmit EMAD (tid=%llx)\n", - mlxsw_core->emad.tid); - dev_kfree_skb(skb); - goto trans_inactive_out; - } - - ret = wait_event_timeout(mlxsw_core->emad.wait, - !(mlxsw_core->emad.trans_active), - msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS)); - if (!ret) { - dev_warn(mlxsw_core->bus_info->dev, "EMAD timed-out (tid=%llx)\n", - mlxsw_core->emad.tid); - err = -EIO; - goto trans_inactive_out; - } - - return 0; - -trans_inactive_out: - mlxsw_core->emad.trans_active = false; - return err; -} - -static int mlxsw_emad_process_status(struct mlxsw_core *mlxsw_core, - char *op_tlv) +static int mlxsw_emad_process_status(char *op_tlv, + enum mlxsw_emad_op_tlv_status *p_status) { - enum mlxsw_emad_op_tlv_status status; - u64 tid; - - status = mlxsw_emad_op_tlv_status_get(op_tlv); - tid = mlxsw_emad_op_tlv_tid_get(op_tlv); + *p_status = mlxsw_emad_op_tlv_status_get(op_tlv); - switch (status) { + switch (*p_status) { case MLXSW_EMAD_OP_TLV_STATUS_SUCCESS: return 0; case MLXSW_EMAD_OP_TLV_STATUS_BUSY: case MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK: - dev_warn(mlxsw_core->bus_info->dev, "Reg access status again (tid=%llx,status=%x(%s))\n", - tid, status, mlxsw_emad_op_tlv_status_str(status)); return -EAGAIN; case MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED: case MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV: @@ -438,70 +397,150 @@ static int mlxsw_emad_process_status(struct mlxsw_core *mlxsw_core, case MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE: case MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR: default: - dev_err(mlxsw_core->bus_info->dev, "Reg access status failed (tid=%llx,status=%x(%s))\n", - tid, status, mlxsw_emad_op_tlv_status_str(status)); return -EIO; } } -static int mlxsw_emad_process_status_skb(struct mlxsw_core *mlxsw_core, - struct sk_buff *skb) +static int +mlxsw_emad_process_status_skb(struct sk_buff *skb, + enum mlxsw_emad_op_tlv_status *p_status) +{ + return mlxsw_emad_process_status(mlxsw_emad_op_tlv(skb), p_status); +} + +struct mlxsw_reg_trans { + struct list_head list; + struct list_head bulk_list; + struct mlxsw_core *core; + struct sk_buff *tx_skb; + struct mlxsw_tx_info tx_info; + struct delayed_work timeout_dw; + unsigned int retries; + u64 tid; + struct completion completion; + atomic_t active; + mlxsw_reg_trans_cb_t *cb; + unsigned long cb_priv; + const struct mlxsw_reg_info *reg; + enum mlxsw_core_reg_access_type type; + int err; + enum mlxsw_emad_op_tlv_status emad_status; + struct rcu_head rcu; +}; + +#define MLXSW_EMAD_TIMEOUT_MS 200 + +static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) { - return mlxsw_emad_process_status(mlxsw_core, mlxsw_emad_op_tlv(skb)); + unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS); + + mlxsw_core_schedule_dw(&trans->timeout_dw, timeout); } static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) + struct mlxsw_reg_trans *trans) { - struct sk_buff *trans_skb; - int n_retry; + struct sk_buff *skb; int err; - n_retry = 0; -retry: - /* We copy the EMAD to a new skb, since we might need - * to retransmit it in case of failure. - */ - trans_skb = skb_copy(skb, GFP_KERNEL); - if (!trans_skb) { - err = -ENOMEM; - goto out; + skb = skb_copy(trans->tx_skb, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + atomic_set(&trans->active, 1); + err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->tx_info); + if (err) { + dev_kfree_skb(skb); + return err; } + mlxsw_emad_trans_timeout_schedule(trans); + return 0; +} - err = __mlxsw_emad_transmit(mlxsw_core, trans_skb, tx_info); - if (!err) { - struct sk_buff *resp_skb = mlxsw_core->emad.resp_skb; +static void mlxsw_emad_trans_finish(struct mlxsw_reg_trans *trans, int err) +{ + struct mlxsw_core *mlxsw_core = trans->core; + + dev_kfree_skb(trans->tx_skb); + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_del_rcu(&trans->list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + trans->err = err; + complete(&trans->completion); +} + +static void mlxsw_emad_transmit_retry(struct mlxsw_core *mlxsw_core, + struct mlxsw_reg_trans *trans) +{ + int err; - err = mlxsw_emad_process_status_skb(mlxsw_core, resp_skb); - if (err) - dev_kfree_skb(resp_skb); - if (!err || err != -EAGAIN) - goto out; + if (trans->retries < MLXSW_EMAD_MAX_RETRY) { + trans->retries++; + err = mlxsw_emad_transmit(trans->core, trans); + if (err == 0) + return; + } else { + err = -EIO; } - if (n_retry++ < MLXSW_EMAD_MAX_RETRY) - goto retry; + mlxsw_emad_trans_finish(trans, err); +} -out: - dev_kfree_skb(skb); - mlxsw_core->emad.tid++; - return err; +static void mlxsw_emad_trans_timeout_work(struct work_struct *work) +{ + struct mlxsw_reg_trans *trans = container_of(work, + struct mlxsw_reg_trans, + timeout_dw.work); + + if (!atomic_dec_and_test(&trans->active)) + return; + + mlxsw_emad_transmit_retry(trans->core, trans); } +static void mlxsw_emad_process_response(struct mlxsw_core *mlxsw_core, + struct mlxsw_reg_trans *trans, + struct sk_buff *skb) +{ + int err; + + if (!atomic_dec_and_test(&trans->active)) + return; + + err = mlxsw_emad_process_status_skb(skb, &trans->emad_status); + if (err == -EAGAIN) { + mlxsw_emad_transmit_retry(mlxsw_core, trans); + } else { + if (err == 0) { + char *op_tlv = mlxsw_emad_op_tlv(skb); + + if (trans->cb) + trans->cb(mlxsw_core, + mlxsw_emad_reg_payload(op_tlv), + trans->reg->len, trans->cb_priv); + } + mlxsw_emad_trans_finish(trans, err); + } +} + +/* called with rcu read lock held */ static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port, void *priv) { struct mlxsw_core *mlxsw_core = priv; + struct mlxsw_reg_trans *trans; - if (mlxsw_emad_is_resp(skb) && - mlxsw_core->emad.trans_active && - mlxsw_emad_get_tid(skb) == mlxsw_core->emad.tid) { - mlxsw_core->emad.resp_skb = skb; - mlxsw_core->emad.trans_active = false; - wake_up(&mlxsw_core->emad.wait); - } else { - dev_kfree_skb(skb); + if (!mlxsw_emad_is_resp(skb)) + goto free_skb; + + list_for_each_entry_rcu(trans, &mlxsw_core->emad.trans_list, list) { + if (mlxsw_emad_get_tid(skb) == trans->tid) { + mlxsw_emad_process_response(mlxsw_core, trans, skb); + break; + } } + +free_skb: + dev_kfree_skb(skb); } static const struct mlxsw_rx_listener mlxsw_emad_rx_listener = { @@ -528,18 +567,19 @@ static int mlxsw_emad_traps_set(struct mlxsw_core *mlxsw_core) static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) { + u64 tid; int err; /* Set the upper 32 bits of the transaction ID field to a random * number. This allows us to discard EMADs addressed to other * devices. */ - get_random_bytes(&mlxsw_core->emad.tid, 4); - mlxsw_core->emad.tid = mlxsw_core->emad.tid << 32; + get_random_bytes(&tid, 4); + tid <<= 32; + atomic64_set(&mlxsw_core->emad.tid, tid); - init_waitqueue_head(&mlxsw_core->emad.wait); - mlxsw_core->emad.trans_active = false; - mutex_init(&mlxsw_core->emad.lock); + INIT_LIST_HEAD(&mlxsw_core->emad.trans_list); + spin_lock_init(&mlxsw_core->emad.trans_list_lock); err = mlxsw_core_rx_listener_register(mlxsw_core, &mlxsw_emad_rx_listener, @@ -597,6 +637,59 @@ static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, return skb; } +static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type, + struct mlxsw_reg_trans *trans, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, + unsigned long cb_priv, u64 tid) +{ + struct sk_buff *skb; + int err; + + dev_dbg(mlxsw_core->bus_info->dev, "EMAD reg access (tid=%llx,reg_id=%x(%s),type=%s)\n", + trans->tid, reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + + skb = mlxsw_emad_alloc(mlxsw_core, reg->len); + if (!skb) + return -ENOMEM; + + list_add_tail(&trans->bulk_list, bulk_list); + trans->core = mlxsw_core; + trans->tx_skb = skb; + trans->tx_info.local_port = MLXSW_PORT_CPU_PORT; + trans->tx_info.is_emad = true; + INIT_DELAYED_WORK(&trans->timeout_dw, mlxsw_emad_trans_timeout_work); + trans->tid = tid; + init_completion(&trans->completion); + trans->cb = cb; + trans->cb_priv = cb_priv; + trans->reg = reg; + trans->type = type; + + mlxsw_emad_construct(skb, reg, payload, type, trans->tid); + mlxsw_core->driver->txhdr_construct(skb, &trans->tx_info); + + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_add_tail_rcu(&trans->list, &mlxsw_core->emad.trans_list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + err = mlxsw_emad_transmit(mlxsw_core, trans); + if (err) + goto err_out; + return 0; + +err_out: + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_del_rcu(&trans->list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + list_del(&trans->bulk_list); + dev_kfree_skb(trans->tx_skb); + return err; +} + /***************** * Core functions *****************/ @@ -686,24 +779,6 @@ static const struct file_operations mlxsw_core_rx_stats_dbg_ops = { .llseek = seq_lseek }; -static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core, - const char *buf, size_t size) -{ - __be32 *m = (__be32 *) buf; - int i; - int count = size / sizeof(__be32); - - for (i = count - 1; i >= 0; i--) - if (m[i]) - break; - i++; - count = i ? i : 1; - for (i = 0; i < count; i += 4) - dev_dbg(mlxsw_core->bus_info->dev, "%04x - %08x %08x %08x %08x\n", - i * 4, be32_to_cpu(m[i]), be32_to_cpu(m[i + 1]), - be32_to_cpu(m[i + 2]), be32_to_cpu(m[i + 3])); -} - int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver) { spin_lock(&mlxsw_core_driver_list_lock); @@ -816,9 +891,168 @@ static int mlxsw_devlink_port_unsplit(struct devlink *devlink, return mlxsw_core->driver->port_unsplit(mlxsw_core, port_index); } +static int +mlxsw_devlink_sb_pool_get(struct devlink *devlink, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_pool_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_pool_get(mlxsw_core, sb_index, + pool_index, pool_info); +} + +static int +mlxsw_devlink_sb_pool_set(struct devlink *devlink, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_pool_set) + return -EOPNOTSUPP; + return mlxsw_driver->sb_pool_set(mlxsw_core, sb_index, + pool_index, size, threshold_type); +} + +static void *__dl_port(struct devlink_port *devlink_port) +{ + return container_of(devlink_port, struct mlxsw_core_port, devlink_port); +} + +static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_port_pool_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_port_pool_get(mlxsw_core_port, sb_index, + pool_index, p_threshold); +} + +static int mlxsw_devlink_sb_port_pool_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_port_pool_set) + return -EOPNOTSUPP; + return mlxsw_driver->sb_port_pool_set(mlxsw_core_port, sb_index, + pool_index, threshold); +} + +static int +mlxsw_devlink_sb_tc_pool_bind_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_tc_pool_bind_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_tc_pool_bind_get(mlxsw_core_port, sb_index, + tc_index, pool_type, + p_pool_index, p_threshold); +} + +static int +mlxsw_devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_tc_pool_bind_set) + return -EOPNOTSUPP; + return mlxsw_driver->sb_tc_pool_bind_set(mlxsw_core_port, sb_index, + tc_index, pool_type, + pool_index, threshold); +} + +static int mlxsw_devlink_sb_occ_snapshot(struct devlink *devlink, + unsigned int sb_index) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_occ_snapshot) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_snapshot(mlxsw_core, sb_index); +} + +static int mlxsw_devlink_sb_occ_max_clear(struct devlink *devlink, + unsigned int sb_index) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_occ_max_clear) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_max_clear(mlxsw_core, sb_index); +} + +static int +mlxsw_devlink_sb_occ_port_pool_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_occ_port_pool_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_port_pool_get(mlxsw_core_port, sb_index, + pool_index, p_cur, p_max); +} + +static int +mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_occ_tc_port_bind_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_tc_port_bind_get(mlxsw_core_port, + sb_index, tc_index, + pool_type, p_cur, p_max); +} + static const struct devlink_ops mlxsw_devlink_ops = { - .port_split = mlxsw_devlink_port_split, - .port_unsplit = mlxsw_devlink_port_unsplit, + .port_split = mlxsw_devlink_port_split, + .port_unsplit = mlxsw_devlink_port_unsplit, + .sb_pool_get = mlxsw_devlink_sb_pool_get, + .sb_pool_set = mlxsw_devlink_sb_pool_set, + .sb_port_pool_get = mlxsw_devlink_sb_port_pool_get, + .sb_port_pool_set = mlxsw_devlink_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_devlink_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_devlink_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_devlink_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_devlink_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_devlink_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get, }; int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, @@ -1102,56 +1336,112 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_event_listener_unregister); +static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core) +{ + return atomic64_inc_return(&mlxsw_core->emad.tid); +} + static int mlxsw_core_reg_access_emad(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload, - enum mlxsw_core_reg_access_type type) + enum mlxsw_core_reg_access_type type, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, + unsigned long cb_priv) { + u64 tid = mlxsw_core_tid_get(mlxsw_core); + struct mlxsw_reg_trans *trans; int err; - char *op_tlv; - struct sk_buff *skb; - struct mlxsw_tx_info tx_info = { - .local_port = MLXSW_PORT_CPU_PORT, - .is_emad = true, - }; - skb = mlxsw_emad_alloc(mlxsw_core, reg->len); - if (!skb) + trans = kzalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) return -ENOMEM; - mlxsw_emad_construct(skb, reg, payload, type, mlxsw_core); - mlxsw_core->driver->txhdr_construct(skb, &tx_info); + err = mlxsw_emad_reg_access(mlxsw_core, reg, payload, type, trans, + bulk_list, cb, cb_priv, tid); + if (err) { + kfree(trans); + return err; + } + return 0; +} - dev_dbg(mlxsw_core->bus_info->dev, "EMAD send (tid=%llx)\n", - mlxsw_core->emad.tid); - mlxsw_core_buf_dump_dbg(mlxsw_core, skb->data, skb->len); +int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv) +{ + return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_QUERY, + bulk_list, cb, cb_priv); +} +EXPORT_SYMBOL(mlxsw_reg_trans_query); - err = mlxsw_emad_transmit(mlxsw_core, skb, &tx_info); - if (!err) { - op_tlv = mlxsw_emad_op_tlv(mlxsw_core->emad.resp_skb); - memcpy(payload, mlxsw_emad_reg_payload(op_tlv), - reg->len); +int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv) +{ + return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_WRITE, + bulk_list, cb, cb_priv); +} +EXPORT_SYMBOL(mlxsw_reg_trans_write); - dev_dbg(mlxsw_core->bus_info->dev, "EMAD recv (tid=%llx)\n", - mlxsw_core->emad.tid - 1); - mlxsw_core_buf_dump_dbg(mlxsw_core, - mlxsw_core->emad.resp_skb->data, - mlxsw_core->emad.resp_skb->len); +static int mlxsw_reg_trans_wait(struct mlxsw_reg_trans *trans) +{ + struct mlxsw_core *mlxsw_core = trans->core; + int err; - dev_kfree_skb(mlxsw_core->emad.resp_skb); - } + wait_for_completion(&trans->completion); + cancel_delayed_work_sync(&trans->timeout_dw); + err = trans->err; + if (trans->retries) + dev_warn(mlxsw_core->bus_info->dev, "EMAD retries (%d/%d) (tid=%llx)\n", + trans->retries, MLXSW_EMAD_MAX_RETRY, trans->tid); + if (err) + dev_err(mlxsw_core->bus_info->dev, "EMAD reg access failed (tid=%llx,reg_id=%x(%s),type=%s,status=%x(%s))\n", + trans->tid, trans->reg->id, + mlxsw_reg_id_str(trans->reg->id), + mlxsw_core_reg_access_type_str(trans->type), + trans->emad_status, + mlxsw_emad_op_tlv_status_str(trans->emad_status)); + + list_del(&trans->bulk_list); + kfree_rcu(trans, rcu); return err; } +int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list) +{ + struct mlxsw_reg_trans *trans; + struct mlxsw_reg_trans *tmp; + int sum_err = 0; + int err; + + list_for_each_entry_safe(trans, tmp, bulk_list, bulk_list) { + err = mlxsw_reg_trans_wait(trans); + if (err && sum_err == 0) + sum_err = err; /* first error to be returned */ + } + return sum_err; +} +EXPORT_SYMBOL(mlxsw_reg_trans_bulk_wait); + static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload, enum mlxsw_core_reg_access_type type) { + enum mlxsw_emad_op_tlv_status status; int err, n_retry; char *in_mbox, *out_mbox, *tmp; + dev_dbg(mlxsw_core->bus_info->dev, "Reg cmd access (reg_id=%x(%s),type=%s)\n", + reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + in_mbox = mlxsw_cmd_mbox_alloc(); if (!in_mbox) return -ENOMEM; @@ -1162,7 +1452,8 @@ static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core, goto free_in_mbox; } - mlxsw_emad_pack_op_tlv(in_mbox, reg, type, mlxsw_core); + mlxsw_emad_pack_op_tlv(in_mbox, reg, type, + mlxsw_core_tid_get(mlxsw_core)); tmp = in_mbox + MLXSW_EMAD_OP_TLV_LEN * sizeof(u32); mlxsw_emad_pack_reg_tlv(tmp, reg, payload); @@ -1170,60 +1461,61 @@ static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core, retry: err = mlxsw_cmd_access_reg(mlxsw_core, in_mbox, out_mbox); if (!err) { - err = mlxsw_emad_process_status(mlxsw_core, out_mbox); - if (err == -EAGAIN && n_retry++ < MLXSW_EMAD_MAX_RETRY) - goto retry; + err = mlxsw_emad_process_status(out_mbox, &status); + if (err) { + if (err == -EAGAIN && n_retry++ < MLXSW_EMAD_MAX_RETRY) + goto retry; + dev_err(mlxsw_core->bus_info->dev, "Reg cmd access status failed (status=%x(%s))\n", + status, mlxsw_emad_op_tlv_status_str(status)); + } } if (!err) memcpy(payload, mlxsw_emad_reg_payload(out_mbox), reg->len); - mlxsw_core->emad.tid++; mlxsw_cmd_mbox_free(out_mbox); free_in_mbox: mlxsw_cmd_mbox_free(in_mbox); + if (err) + dev_err(mlxsw_core->bus_info->dev, "Reg cmd access failed (reg_id=%x(%s),type=%s)\n", + reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); return err; } +static void mlxsw_core_reg_access_cb(struct mlxsw_core *mlxsw_core, + char *payload, size_t payload_len, + unsigned long cb_priv) +{ + char *orig_payload = (char *) cb_priv; + + memcpy(orig_payload, payload, payload_len); +} + static int mlxsw_core_reg_access(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload, enum mlxsw_core_reg_access_type type) { - u64 cur_tid; + LIST_HEAD(bulk_list); int err; - if (mutex_lock_interruptible(&mlxsw_core->emad.lock)) { - dev_err(mlxsw_core->bus_info->dev, "Reg access interrupted (reg_id=%x(%s),type=%s)\n", - reg->id, mlxsw_reg_id_str(reg->id), - mlxsw_core_reg_access_type_str(type)); - return -EINTR; - } - - cur_tid = mlxsw_core->emad.tid; - dev_dbg(mlxsw_core->bus_info->dev, "Reg access (tid=%llx,reg_id=%x(%s),type=%s)\n", - cur_tid, reg->id, mlxsw_reg_id_str(reg->id), - mlxsw_core_reg_access_type_str(type)); - /* During initialization EMAD interface is not available to us, * so we default to command interface. We switch to EMAD interface * after setting the appropriate traps. */ if (!mlxsw_core->emad.use_emad) - err = mlxsw_core_reg_access_cmd(mlxsw_core, reg, - payload, type); - else - err = mlxsw_core_reg_access_emad(mlxsw_core, reg, + return mlxsw_core_reg_access_cmd(mlxsw_core, reg, payload, type); + err = mlxsw_core_reg_access_emad(mlxsw_core, reg, + payload, type, &bulk_list, + mlxsw_core_reg_access_cb, + (unsigned long) payload); if (err) - dev_err(mlxsw_core->bus_info->dev, "Reg access failed (tid=%llx,reg_id=%x(%s),type=%s)\n", - cur_tid, reg->id, mlxsw_reg_id_str(reg->id), - mlxsw_core_reg_access_type_str(type)); - - mutex_unlock(&mlxsw_core->emad.lock); - return err; + return err; + return mlxsw_reg_trans_bulk_wait(&bulk_list); } int mlxsw_reg_query(struct mlxsw_core *mlxsw_core, @@ -1374,6 +1666,24 @@ void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port) } EXPORT_SYMBOL(mlxsw_core_port_fini); +static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core, + const char *buf, size_t size) +{ + __be32 *m = (__be32 *) buf; + int i; + int count = size / sizeof(__be32); + + for (i = count - 1; i >= 0; i--) + if (m[i]) + break; + i++; + count = i ? i : 1; + for (i = 0; i < count; i += 4) + dev_dbg(mlxsw_core->bus_info->dev, "%04x - %08x %08x %08x %08x\n", + i * 4, be32_to_cpu(m[i]), be32_to_cpu(m[i + 1]), + be32_to_cpu(m[i + 2]), be32_to_cpu(m[i + 3])); +} + int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, u32 in_mod, bool out_mbox_direct, char *in_mbox, size_t in_mbox_size, @@ -1416,17 +1726,35 @@ int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, } EXPORT_SYMBOL(mlxsw_cmd_exec); +int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay) +{ + return queue_delayed_work(mlxsw_wq, dwork, delay); +} +EXPORT_SYMBOL(mlxsw_core_schedule_dw); + static int __init mlxsw_core_module_init(void) { - mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL); - if (!mlxsw_core_dbg_root) + int err; + + mlxsw_wq = create_workqueue(mlxsw_core_driver_name); + if (!mlxsw_wq) return -ENOMEM; + mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL); + if (!mlxsw_core_dbg_root) { + err = -ENOMEM; + goto err_debugfs_create_dir; + } return 0; + +err_debugfs_create_dir: + destroy_workqueue(mlxsw_wq); + return err; } static void __exit mlxsw_core_module_exit(void) { debugfs_remove_recursive(mlxsw_core_dbg_root); + destroy_workqueue(mlxsw_wq); } module_init(mlxsw_core_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index f3cebef9c31c..436bc49df6ab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -43,6 +43,7 @@ #include <linux/gfp.h> #include <linux/types.h> #include <linux/skbuff.h> +#include <linux/workqueue.h> #include <net/devlink.h> #include "trap.h" @@ -108,6 +109,19 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, const struct mlxsw_event_listener *el, void *priv); +typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload, + size_t payload_len, unsigned long cb_priv); + +int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv); +int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv); +int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list); + int mlxsw_reg_query(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload); int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, @@ -137,11 +151,22 @@ struct mlxsw_core_port { struct devlink_port devlink_port; }; +static inline void * +mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port) +{ + /* mlxsw_core_port is ensured to always be the first field in driver + * port structure. + */ + return mlxsw_core_port; +} + int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, struct mlxsw_core_port *mlxsw_core_port, u8 local_port, struct net_device *dev, bool split, u32 split_group); void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port); +int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay); + #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 struct mlxsw_swid_config { @@ -200,6 +225,37 @@ struct mlxsw_driver { int (*port_split)(struct mlxsw_core *mlxsw_core, u8 local_port, unsigned int count); int (*port_unsplit)(struct mlxsw_core *mlxsw_core, u8 local_port); + int (*sb_pool_get)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info); + int (*sb_pool_set)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type); + int (*sb_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); + int (*sb_port_pool_set)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold); + int (*sb_tc_pool_bind_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); + int (*sb_tc_pool_bind_set)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold); + int (*sb_occ_snapshot)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); + int (*sb_occ_max_clear)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); + int (*sb_occ_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); + int (*sb_occ_tc_port_bind_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); void (*txhdr_construct)(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); u8 txhdr_len; diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 57e4a6337ae3..1977e7a5c530 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3566,6 +3566,10 @@ MLXSW_ITEM32(reg, sbcm, dir, 0x00, 0, 2); */ MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24); +/* shared max_buff limits for dynamic threshold for SBCM, SBPM */ +#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1 +#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14 + /* reg_sbcm_max_buff * When the pool associated to the port-pg/tclass is configured to * static, Maximum buffer size for the limiter configured in cells. @@ -3632,6 +3636,27 @@ MLXSW_ITEM32(reg, sbpm, pool, 0x00, 8, 4); */ MLXSW_ITEM32(reg, sbpm, dir, 0x00, 0, 2); +/* reg_sbpm_buff_occupancy + * Current buffer occupancy in cells. + * Access: RO + */ +MLXSW_ITEM32(reg, sbpm, buff_occupancy, 0x10, 0, 24); + +/* reg_sbpm_clr + * Clear Max Buffer Occupancy + * When this bit is set, max_buff_occupancy field is cleared (and a + * new max value is tracked from the time the clear was performed). + * Access: OP + */ +MLXSW_ITEM32(reg, sbpm, clr, 0x14, 31, 1); + +/* reg_sbpm_max_buff_occupancy + * Maximum value of buffer occupancy in cells monitored. Cleared by + * writing to the clr field. + * Access: RO + */ +MLXSW_ITEM32(reg, sbpm, max_buff_occupancy, 0x14, 0, 24); + /* reg_sbpm_min_buff * Minimum buffer size for the limiter, in cells. * Access: RW @@ -3652,17 +3677,25 @@ MLXSW_ITEM32(reg, sbpm, min_buff, 0x18, 0, 24); MLXSW_ITEM32(reg, sbpm, max_buff, 0x1C, 0, 24); static inline void mlxsw_reg_sbpm_pack(char *payload, u8 local_port, u8 pool, - enum mlxsw_reg_sbxx_dir dir, + enum mlxsw_reg_sbxx_dir dir, bool clr, u32 min_buff, u32 max_buff) { MLXSW_REG_ZERO(sbpm, payload); mlxsw_reg_sbpm_local_port_set(payload, local_port); mlxsw_reg_sbpm_pool_set(payload, pool); mlxsw_reg_sbpm_dir_set(payload, dir); + mlxsw_reg_sbpm_clr_set(payload, clr); mlxsw_reg_sbpm_min_buff_set(payload, min_buff); mlxsw_reg_sbpm_max_buff_set(payload, max_buff); } +static inline void mlxsw_reg_sbpm_unpack(char *payload, u32 *p_buff_occupancy, + u32 *p_max_buff_occupancy) +{ + *p_buff_occupancy = mlxsw_reg_sbpm_buff_occupancy_get(payload); + *p_max_buff_occupancy = mlxsw_reg_sbpm_max_buff_occupancy_get(payload); +} + /* SBMM - Shared Buffer Multicast Management Register * -------------------------------------------------- * The SBMM register configures and retrieves the shared buffer allocation @@ -3718,6 +3751,104 @@ static inline void mlxsw_reg_sbmm_pack(char *payload, u8 prio, u32 min_buff, mlxsw_reg_sbmm_pool_set(payload, pool); } +/* SBSR - Shared Buffer Status Register + * ------------------------------------ + * The SBSR register retrieves the shared buffer occupancy according to + * Port-Pool. Note that this register enables reading a large amount of data. + * It is the user's responsibility to limit the amount of data to ensure the + * response can match the maximum transfer unit. In case the response exceeds + * the maximum transport unit, it will be truncated with no special notice. + */ +#define MLXSW_REG_SBSR_ID 0xB005 +#define MLXSW_REG_SBSR_BASE_LEN 0x5C /* base length, without records */ +#define MLXSW_REG_SBSR_REC_LEN 0x8 /* record length */ +#define MLXSW_REG_SBSR_REC_MAX_COUNT 120 +#define MLXSW_REG_SBSR_LEN (MLXSW_REG_SBSR_BASE_LEN + \ + MLXSW_REG_SBSR_REC_LEN * \ + MLXSW_REG_SBSR_REC_MAX_COUNT) + +static const struct mlxsw_reg_info mlxsw_reg_sbsr = { + .id = MLXSW_REG_SBSR_ID, + .len = MLXSW_REG_SBSR_LEN, +}; + +/* reg_sbsr_clr + * Clear Max Buffer Occupancy. When this bit is set, the max_buff_occupancy + * field is cleared (and a new max value is tracked from the time the clear + * was performed). + * Access: OP + */ +MLXSW_ITEM32(reg, sbsr, clr, 0x00, 31, 1); + +/* reg_sbsr_ingress_port_mask + * Bit vector for all ingress network ports. + * Indicates which of the ports (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other port + * does not change. + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, ingress_port_mask, 0x10, 0x20, 1); + +/* reg_sbsr_pg_buff_mask + * Bit vector for all switch priority groups. + * Indicates which of the priorities (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other priority + * does not change. + * Range is 0..cap_max_pg_buffers - 1 + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, pg_buff_mask, 0x30, 0x4, 1); + +/* reg_sbsr_egress_port_mask + * Bit vector for all egress network ports. + * Indicates which of the ports (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other port + * does not change. + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, egress_port_mask, 0x34, 0x20, 1); + +/* reg_sbsr_tclass_mask + * Bit vector for all traffic classes. + * Indicates which of the traffic classes (for which the relevant bit is + * set) are affected by the set operation. Configuration of any other + * traffic class does not change. + * Range is 0..cap_max_tclass - 1 + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, tclass_mask, 0x54, 0x8, 1); + +static inline void mlxsw_reg_sbsr_pack(char *payload, bool clr) +{ + MLXSW_REG_ZERO(sbsr, payload); + mlxsw_reg_sbsr_clr_set(payload, clr); +} + +/* reg_sbsr_rec_buff_occupancy + * Current buffer occupancy in cells. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sbsr, rec_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN, + 0, 24, MLXSW_REG_SBSR_REC_LEN, 0x00, false); + +/* reg_sbsr_rec_max_buff_occupancy + * Maximum value of buffer occupancy in cells monitored. Cleared by + * writing to the clr field. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sbsr, rec_max_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN, + 0, 24, MLXSW_REG_SBSR_REC_LEN, 0x04, false); + +static inline void mlxsw_reg_sbsr_rec_unpack(char *payload, int rec_index, + u32 *p_buff_occupancy, + u32 *p_max_buff_occupancy) +{ + *p_buff_occupancy = + mlxsw_reg_sbsr_rec_buff_occupancy_get(payload, rec_index); + *p_max_buff_occupancy = + mlxsw_reg_sbsr_rec_max_buff_occupancy_get(payload, rec_index); +} + static inline const char *mlxsw_reg_id_str(u16 reg_id) { switch (reg_id) { @@ -3813,6 +3944,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SBPM"; case MLXSW_REG_SBMM_ID: return "SBMM"; + case MLXSW_REG_SBSR_ID: + return "SBSR"; default: return "*UNKNOWN*"; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 19b3c144abc6..681afe1a3802 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2434,6 +2434,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, err_switchdev_init: err_lag_init: + mlxsw_sp_buffers_fini(mlxsw_sp); err_buffers_init: err_flood_init: mlxsw_sp_traps_fini(mlxsw_sp); @@ -2448,6 +2449,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); @@ -2491,16 +2493,26 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { }; static struct mlxsw_driver mlxsw_sp_driver = { - .kind = MLXSW_DEVICE_KIND_SPECTRUM, - .owner = THIS_MODULE, - .priv_size = sizeof(struct mlxsw_sp), - .init = mlxsw_sp_init, - .fini = mlxsw_sp_fini, - .port_split = mlxsw_sp_port_split, - .port_unsplit = mlxsw_sp_port_unsplit, - .txhdr_construct = mlxsw_sp_txhdr_construct, - .txhdr_len = MLXSW_TXHDR_LEN, - .profile = &mlxsw_sp_config_profile, + .kind = MLXSW_DEVICE_KIND_SPECTRUM, + .owner = THIS_MODULE, + .priv_size = sizeof(struct mlxsw_sp), + .init = mlxsw_sp_init, + .fini = mlxsw_sp_fini, + .port_split = mlxsw_sp_port_split, + .port_unsplit = mlxsw_sp_port_unsplit, + .sb_pool_get = mlxsw_sp_sb_pool_get, + .sb_pool_set = mlxsw_sp_sb_pool_set, + .sb_port_pool_get = mlxsw_sp_sb_port_pool_get, + .sb_port_pool_set = mlxsw_sp_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .txhdr_construct = mlxsw_sp_txhdr_construct, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sp_config_profile, }; static int diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 361b0c270b56..e2c022d3e2f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -65,6 +65,7 @@ #define MLXSW_SP_BYTES_PER_CELL 96 #define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL) +#define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) /* Maximum delay buffer needed in case of PAUSE frames, in cells. * Assumes 100m cable and maximum MTU. @@ -117,6 +118,40 @@ static inline bool mlxsw_sp_fid_is_vfid(u16 fid) return fid >= MLXSW_SP_VFID_BASE; } +struct mlxsw_sp_sb_pr { + enum mlxsw_reg_sbpr_mode mode; + u32 size; +}; + +struct mlxsw_cp_sb_occ { + u32 cur; + u32 max; +}; + +struct mlxsw_sp_sb_cm { + u32 min_buff; + u32 max_buff; + u8 pool; + struct mlxsw_cp_sb_occ occ; +}; + +struct mlxsw_sp_sb_pm { + u32 min_buff; + u32 max_buff; + struct mlxsw_cp_sb_occ occ; +}; + +#define MLXSW_SP_SB_POOL_COUNT 4 +#define MLXSW_SP_SB_TC_COUNT 8 + +struct mlxsw_sp_sb { + struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT]; + struct { + struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; + struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT]; + } ports[MLXSW_PORT_MAX_PORTS]; +}; + struct mlxsw_sp { struct { struct list_head list; @@ -147,6 +182,7 @@ struct mlxsw_sp { struct mlxsw_sp_upper master_bridge; struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; u8 port_to_module[MLXSW_PORT_MAX_PORTS]; + struct mlxsw_sp_sb sb; }; static inline struct mlxsw_sp_upper * @@ -277,7 +313,39 @@ enum mlxsw_sp_flood_table { }; int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info); +int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type); +int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); +int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold); +int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); +int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold); +int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); +int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); +int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); +int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index f58b1d3a619a..a3720a0fad7d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -36,36 +36,138 @@ #include <linux/types.h> #include <linux/dcbnl.h> #include <linux/if_ether.h> +#include <linux/list.h> #include "spectrum.h" #include "core.h" #include "port.h" #include "reg.h" -struct mlxsw_sp_pb { - u8 index; - u16 size; -}; +static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp, + u8 pool, + enum mlxsw_reg_sbxx_dir dir) +{ + return &mlxsw_sp->sb.prs[dir][pool]; +} -#define MLXSW_SP_PB(_index, _size) \ - { \ - .index = _index, \ - .size = _size, \ +static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 pg_buff, + enum mlxsw_reg_sbxx_dir dir) +{ + return &mlxsw_sp->sb.ports[local_port].cms[dir][pg_buff]; +} + +static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 pool, + enum mlxsw_reg_sbxx_dir dir) +{ + return &mlxsw_sp->sb.ports[local_port].pms[dir][pool]; +} + +static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u8 pool, + enum mlxsw_reg_sbxx_dir dir, + enum mlxsw_reg_sbpr_mode mode, u32 size) +{ + char sbpr_pl[MLXSW_REG_SBPR_LEN]; + struct mlxsw_sp_sb_pr *pr; + int err; + + mlxsw_reg_sbpr_pack(sbpr_pl, pool, dir, mode, size); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); + if (err) + return err; + + pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + pr->mode = mode; + pr->size = size; + return 0; +} + +static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pg_buff, enum mlxsw_reg_sbxx_dir dir, + u32 min_buff, u32 max_buff, u8 pool) +{ + char sbcm_pl[MLXSW_REG_SBCM_LEN]; + int err; + + mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, dir, + min_buff, max_buff, pool); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); + if (err) + return err; + if (pg_buff < MLXSW_SP_SB_TC_COUNT) { + struct mlxsw_sp_sb_cm *cm; + + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); + cm->min_buff = min_buff; + cm->max_buff = max_buff; + cm->pool = pool; } + return 0; +} + +static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pool, enum mlxsw_reg_sbxx_dir dir, + u32 min_buff, u32 max_buff) +{ + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + struct mlxsw_sp_sb_pm *pm; + int err; + + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, + min_buff, max_buff); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl); + if (err) + return err; + + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); + pm->min_buff = min_buff; + pm->max_buff = max_buff; + return 0; +} + +static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pool, enum mlxsw_reg_sbxx_dir dir, + struct list_head *bulk_list) +{ + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, true, 0, 0); + return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, + bulk_list, NULL, 0); +} + +static void mlxsw_sp_sb_pm_occ_query_cb(struct mlxsw_core *mlxsw_core, + char *sbpm_pl, size_t sbpm_pl_len, + unsigned long cb_priv) +{ + struct mlxsw_sp_sb_pm *pm = (struct mlxsw_sp_sb_pm *) cb_priv; + + mlxsw_reg_sbpm_unpack(sbpm_pl, &pm->occ.cur, &pm->occ.max); +} -static const struct mlxsw_sp_pb mlxsw_sp_pbs[] = { - MLXSW_SP_PB(0, 2 * MLXSW_SP_BYTES_TO_CELLS(ETH_FRAME_LEN)), - MLXSW_SP_PB(1, 0), - MLXSW_SP_PB(2, 0), - MLXSW_SP_PB(3, 0), - MLXSW_SP_PB(4, 0), - MLXSW_SP_PB(5, 0), - MLXSW_SP_PB(6, 0), - MLXSW_SP_PB(7, 0), - MLXSW_SP_PB(9, 2 * MLXSW_SP_BYTES_TO_CELLS(MLXSW_PORT_MAX_MTU)), +static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pool, enum mlxsw_reg_sbxx_dir dir, + struct list_head *bulk_list) +{ + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + struct mlxsw_sp_sb_pm *pm; + + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, 0, 0); + return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, + bulk_list, + mlxsw_sp_sb_pm_occ_query_cb, + (unsigned long) pm); +} + +static const u16 mlxsw_sp_pbs[] = { + [0] = 2 * MLXSW_SP_BYTES_TO_CELLS(ETH_FRAME_LEN), + [9] = 2 * MLXSW_SP_BYTES_TO_CELLS(MLXSW_PORT_MAX_MTU), }; #define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs) +#define MLXSW_SP_PB_UNUSED 8 static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) { @@ -75,10 +177,9 @@ static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0xffff, 0xffff / 2); for (i = 0; i < MLXSW_SP_PBS_LEN; i++) { - const struct mlxsw_sp_pb *pb; - - pb = &mlxsw_sp_pbs[i]; - mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pb->index, pb->size); + if (i == MLXSW_SP_PB_UNUSED) + continue; + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, mlxsw_sp_pbs[i]); } mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0); @@ -108,181 +209,174 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port); } -struct mlxsw_sp_sb_pool { - u8 pool; - enum mlxsw_reg_sbxx_dir dir; - enum mlxsw_reg_sbpr_mode mode; - u32 size; -}; - -#define MLXSW_SP_SB_POOL_INGRESS_SIZE \ +#define MLXSW_SP_SB_PR_INGRESS_SIZE \ (15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS)) -#define MLXSW_SP_SB_POOL_EGRESS_SIZE \ +#define MLXSW_SP_SB_PR_INGRESS_MNG_SIZE (200 * 1000) +#define MLXSW_SP_SB_PR_EGRESS_SIZE \ (14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS)) -#define MLXSW_SP_SB_POOL(_pool, _dir, _mode, _size) \ - { \ - .pool = _pool, \ - .dir = _dir, \ - .mode = _mode, \ - .size = _size, \ +#define MLXSW_SP_SB_PR(_mode, _size) \ + { \ + .mode = _mode, \ + .size = _size, \ } -#define MLXSW_SP_SB_POOL_INGRESS(_pool, _size) \ - MLXSW_SP_SB_POOL(_pool, MLXSW_REG_SBXX_DIR_INGRESS, \ - MLXSW_REG_SBPR_MODE_DYNAMIC, _size) - -#define MLXSW_SP_SB_POOL_EGRESS(_pool, _size) \ - MLXSW_SP_SB_POOL(_pool, MLXSW_REG_SBXX_DIR_EGRESS, \ - MLXSW_REG_SBPR_MODE_DYNAMIC, _size) - -static const struct mlxsw_sp_sb_pool mlxsw_sp_sb_pools[] = { - MLXSW_SP_SB_POOL_INGRESS(0, MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_POOL_INGRESS_SIZE)), - MLXSW_SP_SB_POOL_INGRESS(1, 0), - MLXSW_SP_SB_POOL_INGRESS(2, 0), - MLXSW_SP_SB_POOL_INGRESS(3, 0), - MLXSW_SP_SB_POOL_EGRESS(0, MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_POOL_EGRESS_SIZE)), - MLXSW_SP_SB_POOL_EGRESS(1, 0), - MLXSW_SP_SB_POOL_EGRESS(2, 0), - MLXSW_SP_SB_POOL_EGRESS(2, MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_POOL_EGRESS_SIZE)), +static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = { + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_SIZE)), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_MNG_SIZE)), +}; + +#define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress) + +static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = { + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_EGRESS_SIZE)), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), }; -#define MLXSW_SP_SB_POOLS_LEN ARRAY_SIZE(mlxsw_sp_sb_pools) +#define MLXSW_SP_SB_PRS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_egress) -static int mlxsw_sp_sb_pools_init(struct mlxsw_sp *mlxsw_sp) +static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_sbxx_dir dir, + const struct mlxsw_sp_sb_pr *prs, + size_t prs_len) { - char sbpr_pl[MLXSW_REG_SBPR_LEN]; int i; int err; - for (i = 0; i < MLXSW_SP_SB_POOLS_LEN; i++) { - const struct mlxsw_sp_sb_pool *pool; + for (i = 0; i < prs_len; i++) { + const struct mlxsw_sp_sb_pr *pr; - pool = &mlxsw_sp_sb_pools[i]; - mlxsw_reg_sbpr_pack(sbpr_pl, pool->pool, pool->dir, - pool->mode, pool->size); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); + pr = &prs[i]; + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, + pr->mode, pr->size); if (err) return err; } return 0; } -struct mlxsw_sp_sb_cm { - union { - u8 pg; - u8 tc; - } u; - enum mlxsw_reg_sbxx_dir dir; - u32 min_buff; - u32 max_buff; - u8 pool; -}; +static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_INGRESS, + mlxsw_sp_sb_prs_ingress, + MLXSW_SP_SB_PRS_INGRESS_LEN); + if (err) + return err; + return __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_sb_prs_egress, + MLXSW_SP_SB_PRS_EGRESS_LEN); +} -#define MLXSW_SP_SB_CM(_pg_tc, _dir, _min_buff, _max_buff, _pool) \ - { \ - .u.pg = _pg_tc, \ - .dir = _dir, \ - .min_buff = _min_buff, \ - .max_buff = _max_buff, \ - .pool = _pool, \ +#define MLXSW_SP_SB_CM(_min_buff, _max_buff, _pool) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool = _pool, \ } -#define MLXSW_SP_SB_CM_INGRESS(_pg, _min_buff, _max_buff) \ - MLXSW_SP_SB_CM(_pg, MLXSW_REG_SBXX_DIR_INGRESS, \ - _min_buff, _max_buff, 0) - -#define MLXSW_SP_SB_CM_EGRESS(_tc, _min_buff, _max_buff) \ - MLXSW_SP_SB_CM(_tc, MLXSW_REG_SBXX_DIR_EGRESS, \ - _min_buff, _max_buff, 0) - -#define MLXSW_SP_CPU_PORT_SB_CM_EGRESS(_tc) \ - MLXSW_SP_SB_CM(_tc, MLXSW_REG_SBXX_DIR_EGRESS, 104, 2, 3) - -static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms[] = { - MLXSW_SP_SB_CM_INGRESS(0, MLXSW_SP_BYTES_TO_CELLS(10000), 8), - MLXSW_SP_SB_CM_INGRESS(1, 0, 0), - MLXSW_SP_SB_CM_INGRESS(2, 0, 0), - MLXSW_SP_SB_CM_INGRESS(3, 0, 0), - MLXSW_SP_SB_CM_INGRESS(4, 0, 0), - MLXSW_SP_SB_CM_INGRESS(5, 0, 0), - MLXSW_SP_SB_CM_INGRESS(6, 0, 0), - MLXSW_SP_SB_CM_INGRESS(7, 0, 0), - MLXSW_SP_SB_CM_INGRESS(9, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff), - MLXSW_SP_SB_CM_EGRESS(0, MLXSW_SP_BYTES_TO_CELLS(1500), 9), - MLXSW_SP_SB_CM_EGRESS(1, MLXSW_SP_BYTES_TO_CELLS(1500), 9), - MLXSW_SP_SB_CM_EGRESS(2, MLXSW_SP_BYTES_TO_CELLS(1500), 9), - MLXSW_SP_SB_CM_EGRESS(3, MLXSW_SP_BYTES_TO_CELLS(1500), 9), - MLXSW_SP_SB_CM_EGRESS(4, MLXSW_SP_BYTES_TO_CELLS(1500), 9), - MLXSW_SP_SB_CM_EGRESS(5, MLXSW_SP_BYTES_TO_CELLS(1500), 9), - MLXSW_SP_SB_CM_EGRESS(6, MLXSW_SP_BYTES_TO_CELLS(1500), 9), - MLXSW_SP_SB_CM_EGRESS(7, MLXSW_SP_BYTES_TO_CELLS(1500), 9), - MLXSW_SP_SB_CM_EGRESS(8, 0, 0), - MLXSW_SP_SB_CM_EGRESS(9, 0, 0), - MLXSW_SP_SB_CM_EGRESS(10, 0, 0), - MLXSW_SP_SB_CM_EGRESS(11, 0, 0), - MLXSW_SP_SB_CM_EGRESS(12, 0, 0), - MLXSW_SP_SB_CM_EGRESS(13, 0, 0), - MLXSW_SP_SB_CM_EGRESS(14, 0, 0), - MLXSW_SP_SB_CM_EGRESS(15, 0, 0), - MLXSW_SP_SB_CM_EGRESS(16, 1, 0xff), +static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 8, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */ + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(20000), 1, 3), }; -#define MLXSW_SP_SB_CMS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms) +#define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress) + +static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(1, 0xff, 0), +}; + +#define MLXSW_SP_SB_CMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_egress) + +#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 0) static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(0), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(1), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(2), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(3), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(4), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(5), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(6), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(7), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(8), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(9), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(10), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(11), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(12), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(13), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(14), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(15), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(16), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(17), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(18), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(19), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(20), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(21), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(22), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(23), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(24), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(25), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(26), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(27), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(28), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(29), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(30), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(31), + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, }; #define MLXSW_SP_CPU_PORT_SB_MCS_LEN \ ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms) -static int mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, - const struct mlxsw_sp_sb_cm *cms, - size_t cms_len) +static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, + enum mlxsw_reg_sbxx_dir dir, + const struct mlxsw_sp_sb_cm *cms, + size_t cms_len) { - char sbcm_pl[MLXSW_REG_SBCM_LEN]; int i; int err; for (i = 0; i < cms_len; i++) { const struct mlxsw_sp_sb_cm *cm; + if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS) + continue; /* PG number 8 does not exist, skip it */ cm = &cms[i]; - mlxsw_reg_sbcm_pack(sbcm_pl, local_port, cm->u.pg, cm->dir, - cm->min_buff, cm->max_buff, cm->pool); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); + err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir, + cm->min_buff, cm->max_buff, + cm->pool); if (err) return err; } @@ -291,105 +385,120 @@ static int mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, static int mlxsw_sp_port_sb_cms_init(struct mlxsw_sp_port *mlxsw_sp_port) { - return mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, mlxsw_sp_sb_cms, - MLXSW_SP_SB_CMS_LEN); + int err; + + err = __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_INGRESS, + mlxsw_sp_sb_cms_ingress, + MLXSW_SP_SB_CMS_INGRESS_LEN); + if (err) + return err; + return __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_sb_cms_egress, + MLXSW_SP_SB_CMS_EGRESS_LEN); } static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp) { - return mlxsw_sp_sb_cms_init(mlxsw_sp, 0, mlxsw_sp_cpu_port_sb_cms, - MLXSW_SP_CPU_PORT_SB_MCS_LEN); + return __mlxsw_sp_sb_cms_init(mlxsw_sp, 0, MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_cpu_port_sb_cms, + MLXSW_SP_CPU_PORT_SB_MCS_LEN); } -struct mlxsw_sp_sb_pm { - u8 pool; - enum mlxsw_reg_sbxx_dir dir; - u32 min_buff; - u32 max_buff; +#define MLXSW_SP_SB_PM(_min_buff, _max_buff) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + } + +static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_ingress[] = { + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), }; -#define MLXSW_SP_SB_PM(_pool, _dir, _min_buff, _max_buff) \ - { \ - .pool = _pool, \ - .dir = _dir, \ - .min_buff = _min_buff, \ - .max_buff = _max_buff, \ - } +#define MLXSW_SP_SB_PMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_ingress) -#define MLXSW_SP_SB_PM_INGRESS(_pool, _min_buff, _max_buff) \ - MLXSW_SP_SB_PM(_pool, MLXSW_REG_SBXX_DIR_INGRESS, \ - _min_buff, _max_buff) - -#define MLXSW_SP_SB_PM_EGRESS(_pool, _min_buff, _max_buff) \ - MLXSW_SP_SB_PM(_pool, MLXSW_REG_SBXX_DIR_EGRESS, \ - _min_buff, _max_buff) - -static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { - MLXSW_SP_SB_PM_INGRESS(0, 0, 0xff), - MLXSW_SP_SB_PM_INGRESS(1, 0, 0), - MLXSW_SP_SB_PM_INGRESS(2, 0, 0), - MLXSW_SP_SB_PM_INGRESS(3, 0, 0), - MLXSW_SP_SB_PM_EGRESS(0, 0, 7), - MLXSW_SP_SB_PM_EGRESS(1, 0, 0), - MLXSW_SP_SB_PM_EGRESS(2, 0, 0), - MLXSW_SP_SB_PM_EGRESS(3, 0, 0), +static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_egress[] = { + MLXSW_SP_SB_PM(0, 7), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), }; -#define MLXSW_SP_SB_PMS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms) +#define MLXSW_SP_SB_PMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_egress) -static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) +static int __mlxsw_sp_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, + enum mlxsw_reg_sbxx_dir dir, + const struct mlxsw_sp_sb_pm *pms, + size_t pms_len) { - char sbpm_pl[MLXSW_REG_SBPM_LEN]; int i; int err; - for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) { + for (i = 0; i < pms_len; i++) { const struct mlxsw_sp_sb_pm *pm; - pm = &mlxsw_sp_sb_pms[i]; - mlxsw_reg_sbpm_pack(sbpm_pl, mlxsw_sp_port->local_port, - pm->pool, pm->dir, - pm->min_buff, pm->max_buff); - err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, - MLXSW_REG(sbpm), sbpm_pl); + pm = &pms[i]; + err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, dir, + pm->min_buff, pm->max_buff); if (err) return err; } return 0; } +static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_INGRESS, + mlxsw_sp_sb_pms_ingress, + MLXSW_SP_SB_PMS_INGRESS_LEN); + if (err) + return err; + return __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_sb_pms_egress, + MLXSW_SP_SB_PMS_EGRESS_LEN); +} + struct mlxsw_sp_sb_mm { - u8 prio; u32 min_buff; u32 max_buff; u8 pool; }; -#define MLXSW_SP_SB_MM(_prio, _min_buff, _max_buff, _pool) \ - { \ - .prio = _prio, \ - .min_buff = _min_buff, \ - .max_buff = _max_buff, \ - .pool = _pool, \ +#define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool = _pool, \ } static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { - MLXSW_SP_SB_MM(0, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(1, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(2, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(3, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(4, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(5, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(6, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(7, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(8, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(9, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(10, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(11, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(12, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(13, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(14, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), }; #define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) @@ -404,7 +513,7 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) const struct mlxsw_sp_sb_mm *mc; mc = &mlxsw_sp_sb_mms[i]; - mlxsw_reg_sbmm_pack(sbmm_pl, mc->prio, mc->min_buff, + mlxsw_reg_sbmm_pack(sbmm_pl, i, mc->min_buff, mc->max_buff, mc->pool); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl); if (err) @@ -413,19 +522,32 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) return 0; } +#define MLXSW_SP_SB_SIZE (16 * 1024 * 1024) + int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { int err; - err = mlxsw_sp_sb_pools_init(mlxsw_sp); + err = mlxsw_sp_sb_prs_init(mlxsw_sp); if (err) return err; err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp); if (err) return err; err = mlxsw_sp_sb_mms_init(mlxsw_sp); + if (err) + return err; + return devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, + MLXSW_SP_SB_SIZE, + MLXSW_SP_SB_POOL_COUNT, + MLXSW_SP_SB_POOL_COUNT, + MLXSW_SP_SB_TC_COUNT, + MLXSW_SP_SB_TC_COUNT); +} - return err; +void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp) +{ + devlink_sb_unregister(priv_to_devlink(mlxsw_sp->core), 0); } int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) @@ -442,3 +564,394 @@ int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) return err; } + +static u8 pool_get(u16 pool_index) +{ + return pool_index % MLXSW_SP_SB_POOL_COUNT; +} + +static u16 pool_index_get(u8 pool, enum mlxsw_reg_sbxx_dir dir) +{ + u16 pool_index; + + pool_index = pool; + if (dir == MLXSW_REG_SBXX_DIR_EGRESS) + pool_index += MLXSW_SP_SB_POOL_COUNT; + return pool_index; +} + +static enum mlxsw_reg_sbxx_dir dir_get(u16 pool_index) +{ + return pool_index < MLXSW_SP_SB_POOL_COUNT ? + MLXSW_REG_SBXX_DIR_INGRESS : MLXSW_REG_SBXX_DIR_EGRESS; +} + +int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + + pool_info->pool_type = dir; + pool_info->size = MLXSW_SP_CELLS_TO_BYTES(pr->size); + pool_info->threshold_type = pr->mode; + return 0; +} + +int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + enum mlxsw_reg_sbpr_mode mode = threshold_type; + u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size); + + return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size); +} + +#define MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET (-2) /* 3->1, 16->14 */ + +static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool, + enum mlxsw_reg_sbxx_dir dir, u32 max_buff) +{ + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + + if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) + return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; + return MLXSW_SP_CELLS_TO_BYTES(max_buff); +} + +static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool, + enum mlxsw_reg_sbxx_dir dir, u32 threshold, + u32 *p_max_buff) +{ + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + + if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) { + int val; + + val = threshold + MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; + if (val < MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN || + val > MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX) + return -EINVAL; + *p_max_buff = val; + } else { + *p_max_buff = MLXSW_SP_BYTES_TO_CELLS(threshold); + } + return 0; +} + +int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, + pool, dir); + + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool, dir, + pm->max_buff); + return 0; +} + +int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + u32 max_buff; + int err; + + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, + threshold, &max_buff); + if (err) + return err; + + return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool, dir, + 0, max_buff); +} + +int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = pool_type; + struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, + pg_buff, dir); + + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir, + cm->max_buff); + *p_pool_index = pool_index_get(cm->pool, pool_type); + return 0; +} + +int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = pool_type; + u8 pool = pool_index; + u32 max_buff; + int err; + + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, + threshold, &max_buff); + if (err) + return err; + + if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS) { + if (pool < MLXSW_SP_SB_POOL_COUNT) + return -EINVAL; + pool -= MLXSW_SP_SB_POOL_COUNT; + } else if (pool >= MLXSW_SP_SB_POOL_COUNT) { + return -EINVAL; + } + return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir, + 0, max_buff, pool); +} + +#define MASKED_COUNT_MAX \ + (MLXSW_REG_SBSR_REC_MAX_COUNT / (MLXSW_SP_SB_TC_COUNT * 2)) + +struct mlxsw_sp_sb_sr_occ_query_cb_ctx { + u8 masked_count; + u8 local_port_1; +}; + +static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, + char *sbsr_pl, size_t sbsr_pl_len, + unsigned long cb_priv) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx; + u8 masked_count; + u8 local_port; + int rec_index = 0; + struct mlxsw_sp_sb_cm *cm; + int i; + + memcpy(&cb_ctx, &cb_priv, sizeof(cb_ctx)); + + masked_count = 0; + for (local_port = cb_ctx.local_port_1; + local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_INGRESS); + mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, + &cm->occ.cur, &cm->occ.max); + } + if (++masked_count == cb_ctx.masked_count) + break; + } + masked_count = 0; + for (local_port = cb_ctx.local_port_1; + local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_EGRESS); + mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, + &cm->occ.cur, &cm->occ.max); + } + if (++masked_count == cb_ctx.masked_count) + break; + } +} + +int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, + unsigned int sb_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx; + unsigned long cb_priv; + LIST_HEAD(bulk_list); + char *sbsr_pl; + u8 masked_count; + u8 local_port_1; + u8 local_port = 0; + int i; + int err; + int err2; + + sbsr_pl = kmalloc(MLXSW_REG_SBSR_LEN, GFP_KERNEL); + if (!sbsr_pl) + return -ENOMEM; + +next_batch: + local_port++; + local_port_1 = local_port; + masked_count = 0; + mlxsw_reg_sbsr_pack(sbsr_pl, false); + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); + } + for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) { + err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_INGRESS, + &bulk_list); + if (err) + goto out; + err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_EGRESS, + &bulk_list); + if (err) + goto out; + } + if (++masked_count == MASKED_COUNT_MAX) + goto do_query; + } + +do_query: + cb_ctx.masked_count = masked_count; + cb_ctx.local_port_1 = local_port_1; + memcpy(&cb_priv, &cb_ctx, sizeof(cb_ctx)); + err = mlxsw_reg_trans_query(mlxsw_core, MLXSW_REG(sbsr), sbsr_pl, + &bulk_list, mlxsw_sp_sb_sr_occ_query_cb, + cb_priv); + if (err) + goto out; + if (local_port < MLXSW_PORT_MAX_PORTS) + goto next_batch; + +out: + err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); + if (!err) + err = err2; + kfree(sbsr_pl); + return err; +} + +int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, + unsigned int sb_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + LIST_HEAD(bulk_list); + char *sbsr_pl; + unsigned int masked_count; + u8 local_port = 0; + int i; + int err; + int err2; + + sbsr_pl = kmalloc(MLXSW_REG_SBSR_LEN, GFP_KERNEL); + if (!sbsr_pl) + return -ENOMEM; + +next_batch: + local_port++; + masked_count = 0; + mlxsw_reg_sbsr_pack(sbsr_pl, true); + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); + } + for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) { + err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_INGRESS, + &bulk_list); + if (err) + goto out; + err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_EGRESS, + &bulk_list); + if (err) + goto out; + } + if (++masked_count == MASKED_COUNT_MAX) + goto do_query; + } + +do_query: + err = mlxsw_reg_trans_query(mlxsw_core, MLXSW_REG(sbsr), sbsr_pl, + &bulk_list, NULL, 0); + if (err) + goto out; + if (local_port < MLXSW_PORT_MAX_PORTS) + goto next_batch; + +out: + err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); + if (!err) + err = err2; + kfree(sbsr_pl); + return err; +} + +int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, + pool, dir); + + *p_cur = MLXSW_SP_CELLS_TO_BYTES(pm->occ.cur); + *p_max = MLXSW_SP_CELLS_TO_BYTES(pm->occ.max); + return 0; +} + +int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = pool_type; + struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, + pg_buff, dir); + + *p_cur = MLXSW_SP_CELLS_TO_BYTES(cm->occ.cur); + *p_max = MLXSW_SP_CELLS_TO_BYTES(cm->occ.max); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index e1c74efff51a..fb9efb84f13b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1430,8 +1430,8 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp) { - schedule_delayed_work(&mlxsw_sp->fdb_notify.dw, - msecs_to_jiffies(mlxsw_sp->fdb_notify.interval)); + mlxsw_core_schedule_dw(&mlxsw_sp->fdb_notify.dw, + msecs_to_jiffies(mlxsw_sp->fdb_notify.interval)); } static void mlxsw_sp_fdb_notify_work(struct work_struct *work) diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 86ea17e7ba7b..7066954c39d6 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -28,11 +28,12 @@ #include <linux/skbuff.h> #include <linux/delay.h> #include <linux/spi/spi.h> +#include <linux/of_net.h> #include "enc28j60_hw.h" #define DRV_NAME "enc28j60" -#define DRV_VERSION "1.01" +#define DRV_VERSION "1.02" #define SPI_OPLEN 1 @@ -89,22 +90,26 @@ spi_read_buf(struct enc28j60_net *priv, int len, u8 *data) { u8 *rx_buf = priv->spi_transfer_buf + 4; u8 *tx_buf = priv->spi_transfer_buf; - struct spi_transfer t = { + struct spi_transfer tx = { .tx_buf = tx_buf, + .len = SPI_OPLEN, + }; + struct spi_transfer rx = { .rx_buf = rx_buf, - .len = SPI_OPLEN + len, + .len = len, }; struct spi_message msg; int ret; tx_buf[0] = ENC28J60_READ_BUF_MEM; - tx_buf[1] = tx_buf[2] = tx_buf[3] = 0; /* don't care */ spi_message_init(&msg); - spi_message_add_tail(&t, &msg); + spi_message_add_tail(&tx, &msg); + spi_message_add_tail(&rx, &msg); + ret = spi_sync(priv->spi, &msg); if (ret == 0) { - memcpy(data, &rx_buf[SPI_OPLEN], len); + memcpy(data, rx_buf, len); ret = msg.status; } if (ret && netif_msg_drv(priv)) @@ -1544,6 +1549,7 @@ static int enc28j60_probe(struct spi_device *spi) { struct net_device *dev; struct enc28j60_net *priv; + const void *macaddr; int ret = 0; if (netif_msg_drv(&debug)) @@ -1575,7 +1581,12 @@ static int enc28j60_probe(struct spi_device *spi) ret = -EIO; goto error_irq; } - eth_hw_addr_random(dev); + + macaddr = of_get_mac_address(spi->dev.of_node); + if (macaddr) + ether_addr_copy(dev->dev_addr, macaddr); + else + eth_hw_addr_random(dev); enc28j60_set_hw_macaddr(dev); /* Board setup must set the relevant edge trigger type; @@ -1630,9 +1641,16 @@ static int enc28j60_remove(struct spi_device *spi) return 0; } +static const struct of_device_id enc28j60_dt_ids[] = { + { .compatible = "microchip,enc28j60" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, enc28j60_dt_ids); + static struct spi_driver enc28j60_driver = { .driver = { - .name = DRV_NAME, + .name = DRV_NAME, + .of_match_table = enc28j60_dt_ids, }, .probe = enc28j60_probe, .remove = enc28j60_remove, diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 9ba975853ec6..2874dffe77de 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -4021,7 +4021,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) unsigned long flags = 0; u16 vlan_tag = 0; struct fifo_info *fifo = NULL; - int do_spin_lock = 1; int offload_type; int enable_per_list_interrupt = 0; struct config_param *config = &sp->config; @@ -4074,7 +4073,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) queue += sp->udp_fifo_idx; if (skb->len > 1024) enable_per_list_interrupt = 1; - do_spin_lock = 0; } } } @@ -4084,12 +4082,7 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) [skb->priority & (MAX_TX_FIFOS - 1)]; fifo = &mac_control->fifos[queue]; - if (do_spin_lock) - spin_lock_irqsave(&fifo->tx_lock, flags); - else { - if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags))) - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&fifo->tx_lock, flags); if (sp->config.multiq) { if (__netif_subqueue_stopped(dev, fifo->fifo_no)) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 3d53fcf323eb..e744acc18ef4 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -59,8 +59,8 @@ netdev_warn((nn)->netdev, fmt, ## args); \ } while (0) -/* Max time to wait for NFP to respond on updates (in ms) */ -#define NFP_NET_POLL_TIMEOUT 5000 +/* Max time to wait for NFP to respond on updates (in seconds) */ +#define NFP_NET_POLL_TIMEOUT 5 /* Bar allocation */ #define NFP_NET_CRTL_BAR 0 @@ -447,6 +447,10 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, * @shared_name: Name for shared interrupt * @me_freq_mhz: ME clock_freq (MHz) * @reconfig_lock: Protects HW reconfiguration request regs/machinery + * @reconfig_posted: Pending reconfig bits coming from async sources + * @reconfig_timer_active: Timer for reading reconfiguration results is pending + * @reconfig_sync_present: Some thread is performing synchronous reconfig + * @reconfig_timer: Timer for async reading of reconfig results * @link_up: Is the link up? * @link_status_lock: Protects @link_up and ensures atomicity with BAR reading * @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter @@ -531,6 +535,10 @@ struct nfp_net { spinlock_t link_status_lock; spinlock_t reconfig_lock; + u32 reconfig_posted; + bool reconfig_timer_active; + bool reconfig_sync_present; + struct timer_list reconfig_timer; u32 rx_coalesce_usecs; u32 rx_coalesce_max_frames; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 0bdff390c958..fa47c14c743a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -80,6 +80,116 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, put_unaligned_le32(reg, fw_ver); } +/* Firmware reconfig + * + * Firmware reconfig may take a while so we have two versions of it - + * synchronous and asynchronous (posted). All synchronous callers are holding + * RTNL so we don't have to worry about serializing them. + */ +static void nfp_net_reconfig_start(struct nfp_net *nn, u32 update) +{ + nn_writel(nn, NFP_NET_CFG_UPDATE, update); + /* ensure update is written before pinging HW */ + nn_pci_flush(nn); + nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1); +} + +/* Pass 0 as update to run posted reconfigs. */ +static void nfp_net_reconfig_start_async(struct nfp_net *nn, u32 update) +{ + update |= nn->reconfig_posted; + nn->reconfig_posted = 0; + + nfp_net_reconfig_start(nn, update); + + nn->reconfig_timer_active = true; + mod_timer(&nn->reconfig_timer, jiffies + NFP_NET_POLL_TIMEOUT * HZ); +} + +static bool nfp_net_reconfig_check_done(struct nfp_net *nn, bool last_check) +{ + u32 reg; + + reg = nn_readl(nn, NFP_NET_CFG_UPDATE); + if (reg == 0) + return true; + if (reg & NFP_NET_CFG_UPDATE_ERR) { + nn_err(nn, "Reconfig error: 0x%08x\n", reg); + return true; + } else if (last_check) { + nn_err(nn, "Reconfig timeout: 0x%08x\n", reg); + return true; + } + + return false; +} + +static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline) +{ + bool timed_out = false; + + /* Poll update field, waiting for NFP to ack the config */ + while (!nfp_net_reconfig_check_done(nn, timed_out)) { + msleep(1); + timed_out = time_is_before_eq_jiffies(deadline); + } + + if (nn_readl(nn, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR) + return -EIO; + + return timed_out ? -EIO : 0; +} + +static void nfp_net_reconfig_timer(unsigned long data) +{ + struct nfp_net *nn = (void *)data; + + spin_lock_bh(&nn->reconfig_lock); + + nn->reconfig_timer_active = false; + + /* If sync caller is present it will take over from us */ + if (nn->reconfig_sync_present) + goto done; + + /* Read reconfig status and report errors */ + nfp_net_reconfig_check_done(nn, true); + + if (nn->reconfig_posted) + nfp_net_reconfig_start_async(nn, 0); +done: + spin_unlock_bh(&nn->reconfig_lock); +} + +/** + * nfp_net_reconfig_post() - Post async reconfig request + * @nn: NFP Net device to reconfigure + * @update: The value for the update field in the BAR config + * + * Record FW reconfiguration request. Reconfiguration will be kicked off + * whenever reconfiguration machinery is idle. Multiple requests can be + * merged together! + */ +static void nfp_net_reconfig_post(struct nfp_net *nn, u32 update) +{ + spin_lock_bh(&nn->reconfig_lock); + + /* Sync caller will kick off async reconf when it's done, just post */ + if (nn->reconfig_sync_present) { + nn->reconfig_posted |= update; + goto done; + } + + /* Opportunistically check if the previous command is done */ + if (!nn->reconfig_timer_active || + nfp_net_reconfig_check_done(nn, false)) + nfp_net_reconfig_start_async(nn, update); + else + nn->reconfig_posted |= update; +done: + spin_unlock_bh(&nn->reconfig_lock); +} + /** * nfp_net_reconfig() - Reconfigure the firmware * @nn: NFP Net device to reconfigure @@ -93,35 +203,45 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, */ int nfp_net_reconfig(struct nfp_net *nn, u32 update) { - int cnt, ret = 0; - u32 new; + bool cancelled_timer = false; + u32 pre_posted_requests; + int ret; spin_lock_bh(&nn->reconfig_lock); - nn_writel(nn, NFP_NET_CFG_UPDATE, update); - /* ensure update is written before pinging HW */ - nn_pci_flush(nn); - nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1); + nn->reconfig_sync_present = true; - /* Poll update field, waiting for NFP to ack the config */ - for (cnt = 0; ; cnt++) { - new = nn_readl(nn, NFP_NET_CFG_UPDATE); - if (new == 0) - break; - if (new & NFP_NET_CFG_UPDATE_ERR) { - nn_err(nn, "Reconfig error: 0x%08x\n", new); - ret = -EIO; - break; - } else if (cnt >= NFP_NET_POLL_TIMEOUT) { - nn_err(nn, "Reconfig timeout for 0x%08x after %dms\n", - update, cnt); - ret = -EIO; - break; - } - mdelay(1); + if (nn->reconfig_timer_active) { + del_timer(&nn->reconfig_timer); + nn->reconfig_timer_active = false; + cancelled_timer = true; + } + pre_posted_requests = nn->reconfig_posted; + nn->reconfig_posted = 0; + + spin_unlock_bh(&nn->reconfig_lock); + + if (cancelled_timer) + nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires); + + /* Run the posted reconfigs which were issued before we started */ + if (pre_posted_requests) { + nfp_net_reconfig_start(nn, pre_posted_requests); + nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT); } + nfp_net_reconfig_start(nn, update); + ret = nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT); + + spin_lock_bh(&nn->reconfig_lock); + + if (nn->reconfig_posted) + nfp_net_reconfig_start_async(nn, 0); + + nn->reconfig_sync_present = false; + spin_unlock_bh(&nn->reconfig_lock); + return ret; } @@ -1298,23 +1418,25 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr); + /* < meta_len > + * <-- [rx_offset] --> + * --------------------------------------------------------- + * | [XX] | metadata | packet | XXXX | + * --------------------------------------------------------- + * <---------------- data_len ---------------> + * + * The rx_offset is fixed for all packets, the meta_len can vary + * on a packet by packet basis. If rx_offset is set to zero + * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the + * buffer and is immediately followed by the packet (no [XX]). + */ meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; data_len = le16_to_cpu(rxd->rxd.data_len); - if (WARN_ON_ONCE(data_len > nn->fl_bufsz)) { - dev_kfree_skb_any(skb); - continue; - } - - if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) { - /* The packet data starts after the metadata */ + if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) skb_reserve(skb, meta_len); - } else { - /* The packet data starts at a fixed offset */ + else skb_reserve(skb, nn->rx_offset); - } - - /* Adjust the SKB for the dynamic meta data pre-pended */ skb_put(skb, data_len - meta_len); nfp_net_set_hash(nn->netdev, skb, rxd); @@ -2094,8 +2216,7 @@ static void nfp_net_set_rx_mode(struct net_device *netdev) return; nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); - if (nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN)) - return; + nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN); nn->ctrl = new_ctrl; } @@ -2403,7 +2524,7 @@ static void nfp_net_set_vxlan_port(struct nfp_net *nn, int idx, __be16 port) be16_to_cpu(nn->vxlan_ports[i + 1]) << 16 | be16_to_cpu(nn->vxlan_ports[i])); - nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VXLAN); + nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_VXLAN); } /** @@ -2549,6 +2670,9 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, spin_lock_init(&nn->reconfig_lock); spin_lock_init(&nn->link_status_lock); + setup_timer(&nn->reconfig_timer, + nfp_net_reconfig_timer, (unsigned long)nn); + return nn; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 8692003aeed8..ad6c4e31cedd 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -81,14 +81,10 @@ /** * @NFP_NET_TXR_MAX: Maximum number of TX rings - * @NFP_NET_TXR_MASK: Mask for TX rings * @NFP_NET_RXR_MAX: Maximum number of RX rings - * @NFP_NET_RXR_MASK: Mask for RX rings */ #define NFP_NET_TXR_MAX 64 -#define NFP_NET_TXR_MASK (NFP_NET_TXR_MAX - 1) #define NFP_NET_RXR_MAX 64 -#define NFP_NET_RXR_MASK (NFP_NET_RXR_MAX - 1) /** * Read/Write config words (0x0000 - 0x002c) @@ -152,9 +148,9 @@ * @NFP_NET_CFG_VERSION: Firmware version number * @NFP_NET_CFG_STS: Status * @NFP_NET_CFG_CAP: Capabilities (same bits as @NFP_NET_CFG_CTRL) - * @NFP_NET_MAX_TXRINGS: Maximum number of TX rings - * @NFP_NET_MAX_RXRINGS: Maximum number of RX rings - * @NFP_NET_MAX_MTU: Maximum support MTU + * @NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings + * @NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings + * @NFP_NET_CFG_MAX_MTU: Maximum support MTU * @NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only) * @NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only) * diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c index f86a1f13d27b..f7c9a5bc4aa3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -187,7 +187,7 @@ static const struct file_operations nfp_tx_q_fops = { void nfp_net_debugfs_adapter_add(struct nfp_net *nn) { - static struct dentry *queues, *tx, *rx; + struct dentry *queues, *tx, *rx; char int_name[16]; int i; @@ -200,7 +200,7 @@ void nfp_net_debugfs_adapter_add(struct nfp_net *nn) /* Create queue debugging sub-tree */ queues = debugfs_create_dir("queue", nn->debugfs_dir); - if (IS_ERR_OR_NULL(nn->debugfs_dir)) + if (IS_ERR_OR_NULL(queues)) return; rx = debugfs_create_dir("rx", queues); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h index 2a55d6d53ee6..8d710a3b4db0 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h @@ -481,7 +481,6 @@ struct pch_gbe_buffer { /** * struct pch_gbe_tx_ring - tx ring information - * @tx_lock: spinlock structs * @desc: pointer to the descriptor ring memory * @dma: physical address of the descriptor ring * @size: length of descriptor ring in bytes @@ -491,7 +490,6 @@ struct pch_gbe_buffer { * @buffer_info: array of buffer information structs */ struct pch_gbe_tx_ring { - spinlock_t tx_lock; struct pch_gbe_tx_desc *desc; dma_addr_t dma; unsigned int size; diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 3b98b263bad0..3cd87a41ac92 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1640,7 +1640,7 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, cleaned_count); if (cleaned_count > 0) { /*skip this if nothing cleaned*/ /* Recover from running out of Tx resources in xmit_frame */ - spin_lock(&tx_ring->tx_lock); + netif_tx_lock(adapter->netdev); if (unlikely(cleaned && (netif_queue_stopped(adapter->netdev)))) { netif_wake_queue(adapter->netdev); @@ -1652,7 +1652,7 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, netdev_dbg(adapter->netdev, "next_to_clean : %d\n", tx_ring->next_to_clean); - spin_unlock(&tx_ring->tx_lock); + netif_tx_unlock(adapter->netdev); } return cleaned; } @@ -1805,7 +1805,6 @@ int pch_gbe_setup_tx_resources(struct pch_gbe_adapter *adapter, tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - spin_lock_init(&tx_ring->tx_lock); for (desNo = 0; desNo < tx_ring->count; desNo++) { tx_desc = PCH_GBE_TX_DESC(*tx_ring, desNo); @@ -2135,15 +2134,9 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring; - unsigned long flags; - if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) { - /* Collision - tell upper layer to requeue */ - return NETDEV_TX_LOCKED; - } if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) { netif_stop_queue(netdev); - spin_unlock_irqrestore(&tx_ring->tx_lock, flags); netdev_dbg(netdev, "Return : BUSY next_to use : 0x%08x next_to clean : 0x%08x\n", tx_ring->next_to_use, tx_ring->next_to_clean); @@ -2152,7 +2145,6 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) /* CRC,ITAG no support */ pch_gbe_tx_queue(adapter, tx_ring, skb); - spin_unlock_irqrestore(&tx_ring->tx_lock, flags); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index ddcfcab034c2..c0a11b5158e7 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -103,4 +103,25 @@ config QEDE depends on QED ---help--- This enables the support for ... + +config QEDE_VXLAN + bool "Virtual eXtensible Local Area Network support" + default n + depends on QEDE && VXLAN && !(QEDE=y && VXLAN=m) + ---help--- + This enables hardware offload support for VXLAN protocol over + qede module. Say Y here if you want to enable hardware offload + support for Virtual eXtensible Local Area Network (VXLAN) + in the driver. + +config QEDE_GENEVE + bool "Generic Network Virtualization Encapsulation (GENEVE) support" + depends on QEDE && GENEVE && !(QEDE=y && GENEVE=m) + ---help--- + This allows one to create GENEVE virtual interfaces that provide + Layer 2 Networks over Layer 3 Networks. GENEVE is often used + to tunnel virtual network infrastructure in virtualized environments. + Say Y here if you want to enable hardware offload support for + Generic Network Virtualization Encapsulation (GENEVE) in the driver. + endif # NET_VENDOR_QLOGIC diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index 5c2fd57236fe..aafa6692e62f 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_QED) := qed.o qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ - qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o + qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o \ + qed_selftest.o diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 0f0d2d1d77e5..cceac3272cce 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -32,6 +32,8 @@ extern const struct qed_common_ops qed_common_ops_pass; #define NAME_SIZE 16 #define VER_SIZE 16 +#define QED_WFQ_UNIT 100 + /* cau states */ enum qed_coalescing_mode { QED_COAL_MODE_DISABLE, @@ -74,6 +76,51 @@ struct qed_rt_data { bool *b_valid; }; +enum qed_tunn_mode { + QED_MODE_L2GENEVE_TUNN, + QED_MODE_IPGENEVE_TUNN, + QED_MODE_L2GRE_TUNN, + QED_MODE_IPGRE_TUNN, + QED_MODE_VXLAN_TUNN, +}; + +enum qed_tunn_clss { + QED_TUNN_CLSS_MAC_VLAN, + QED_TUNN_CLSS_MAC_VNI, + QED_TUNN_CLSS_INNER_MAC_VLAN, + QED_TUNN_CLSS_INNER_MAC_VNI, + MAX_QED_TUNN_CLSS, +}; + +struct qed_tunn_start_params { + unsigned long tunn_mode; + u16 vxlan_udp_port; + u16 geneve_udp_port; + u8 update_vxlan_udp_port; + u8 update_geneve_udp_port; + u8 tunn_clss_vxlan; + u8 tunn_clss_l2geneve; + u8 tunn_clss_ipgeneve; + u8 tunn_clss_l2gre; + u8 tunn_clss_ipgre; +}; + +struct qed_tunn_update_params { + unsigned long tunn_mode_update_mask; + unsigned long tunn_mode; + u16 vxlan_udp_port; + u16 geneve_udp_port; + u8 update_rx_pf_clss; + u8 update_tx_pf_clss; + u8 update_vxlan_udp_port; + u8 update_geneve_udp_port; + u8 tunn_clss_vxlan; + u8 tunn_clss_l2geneve; + u8 tunn_clss_ipgeneve; + u8 tunn_clss_l2gre; + u8 tunn_clss_ipgre; +}; + /* The PCI personality is not quite synonymous to protocol ID: * 1. All personalities need CORE connections * 2. The Ethernet personality may support also the RoCE protocol @@ -192,6 +239,12 @@ struct qed_dmae_info { struct dmae_cmd *p_dmae_cmd; }; +struct qed_wfq_data { + /* when feature is configured for at least 1 vport */ + u32 min_speed; + bool configured; +}; + struct qed_qm_info { struct init_qm_pq_params *qm_pq_params; struct init_qm_vport_params *qm_vport_params; @@ -212,6 +265,7 @@ struct qed_qm_info { bool vport_wfq_en; u8 pf_wfq; u32 pf_rl; + struct qed_wfq_data *wfq_data; }; struct storm_stats { @@ -430,6 +484,7 @@ struct qed_dev { u8 num_hwfns; struct qed_hwfn hwfns[MAX_HWFNS_PER_DEVICE]; + unsigned long tunn_mode; u32 drv_type; struct qed_eth_stats *reset_stats; @@ -480,6 +535,8 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev, #define PURE_LB_TC 8 +void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, u32 min_pf_rate); + #define QED_LEADING_HWFN(dev) (&dev->hwfns[0]) /* Other Linux specific common definitions */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index b7d100f6bd6f..b500c86d7d06 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -105,6 +105,8 @@ static void qed_qm_info_free(struct qed_hwfn *p_hwfn) qm_info->qm_vport_params = NULL; kfree(qm_info->qm_port_params); qm_info->qm_port_params = NULL; + kfree(qm_info->wfq_data); + qm_info->wfq_data = NULL; } void qed_resc_free(struct qed_dev *cdev) @@ -175,6 +177,11 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn) if (!qm_info->qm_port_params) goto alloc_err; + qm_info->wfq_data = kcalloc(num_vports, sizeof(*qm_info->wfq_data), + GFP_KERNEL); + if (!qm_info->wfq_data) + goto alloc_err; + vport_id = (u8)RESC_START(p_hwfn, QED_VPORT); /* First init per-TC PQs */ @@ -213,18 +220,19 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn) qm_info->start_vport = (u8)RESC_START(p_hwfn, QED_VPORT); + for (i = 0; i < qm_info->num_vports; i++) + qm_info->qm_vport_params[i].vport_wfq = 1; + qm_info->pf_wfq = 0; qm_info->pf_rl = 0; qm_info->vport_rl_en = 1; + qm_info->vport_wfq_en = 1; return 0; alloc_err: DP_NOTICE(p_hwfn, "Failed to allocate memory for QM params\n"); - kfree(qm_info->qm_pq_params); - kfree(qm_info->qm_vport_params); - kfree(qm_info->qm_port_params); - + qed_qm_info_free(p_hwfn); return -ENOMEM; } @@ -558,6 +566,7 @@ static int qed_hw_init_port(struct qed_hwfn *p_hwfn, static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_tunn_start_params *p_tunn, int hw_mode, bool b_hw_start, enum qed_int_mode int_mode, @@ -574,7 +583,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, p_hwfn->qm_info.pf_wfq = p_info->bandwidth_min; /* Update rate limit once we'll actually have a link */ - p_hwfn->qm_info.pf_rl = 100; + p_hwfn->qm_info.pf_rl = 100000; } qed_cxt_hw_init_pf(p_hwfn); @@ -625,7 +634,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, qed_int_igu_enable(p_hwfn, p_ptt, int_mode); /* send function start command */ - rc = qed_sp_pf_start(p_hwfn, p_hwfn->cdev->mf_mode); + rc = qed_sp_pf_start(p_hwfn, p_tunn, p_hwfn->cdev->mf_mode); if (rc) DP_NOTICE(p_hwfn, "Function start ramrod failed\n"); } @@ -672,6 +681,7 @@ static void qed_reset_mb_shadow(struct qed_hwfn *p_hwfn, } int qed_hw_init(struct qed_dev *cdev, + struct qed_tunn_start_params *p_tunn, bool b_hw_start, enum qed_int_mode int_mode, bool allow_npar_tx_switch, @@ -724,7 +734,7 @@ int qed_hw_init(struct qed_dev *cdev, /* Fall into */ case FW_MSG_CODE_DRV_LOAD_FUNCTION: rc = qed_hw_init_pf(p_hwfn, p_hwfn->p_main_ptt, - p_hwfn->hw_info.hw_mode, + p_tunn, p_hwfn->hw_info.hw_mode, b_hw_start, int_mode, allow_npar_tx_switch); break; @@ -1593,3 +1603,312 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, return 0; } + +/* Calculate final WFQ values for all vports and configure them. + * After this configuration each vport will have + * approx min rate = min_pf_rate * (vport_wfq / QED_WFQ_UNIT) + */ +static void qed_configure_wfq_for_all_vports(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 min_pf_rate) +{ + struct init_qm_vport_params *vport_params; + int i; + + vport_params = p_hwfn->qm_info.qm_vport_params; + + for (i = 0; i < p_hwfn->qm_info.num_vports; i++) { + u32 wfq_speed = p_hwfn->qm_info.wfq_data[i].min_speed; + + vport_params[i].vport_wfq = (wfq_speed * QED_WFQ_UNIT) / + min_pf_rate; + qed_init_vport_wfq(p_hwfn, p_ptt, + vport_params[i].first_tx_pq_id, + vport_params[i].vport_wfq); + } +} + +static void qed_init_wfq_default_param(struct qed_hwfn *p_hwfn, + u32 min_pf_rate) + +{ + int i; + + for (i = 0; i < p_hwfn->qm_info.num_vports; i++) + p_hwfn->qm_info.qm_vport_params[i].vport_wfq = 1; +} + +static void qed_disable_wfq_for_all_vports(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 min_pf_rate) +{ + struct init_qm_vport_params *vport_params; + int i; + + vport_params = p_hwfn->qm_info.qm_vport_params; + + for (i = 0; i < p_hwfn->qm_info.num_vports; i++) { + qed_init_wfq_default_param(p_hwfn, min_pf_rate); + qed_init_vport_wfq(p_hwfn, p_ptt, + vport_params[i].first_tx_pq_id, + vport_params[i].vport_wfq); + } +} + +/* This function performs several validations for WFQ + * configuration and required min rate for a given vport + * 1. req_rate must be greater than one percent of min_pf_rate. + * 2. req_rate should not cause other vports [not configured for WFQ explicitly] + * rates to get less than one percent of min_pf_rate. + * 3. total_req_min_rate [all vports min rate sum] shouldn't exceed min_pf_rate. + */ +static int qed_init_wfq_param(struct qed_hwfn *p_hwfn, + u16 vport_id, u32 req_rate, + u32 min_pf_rate) +{ + u32 total_req_min_rate = 0, total_left_rate = 0, left_rate_per_vp = 0; + int non_requested_count = 0, req_count = 0, i, num_vports; + + num_vports = p_hwfn->qm_info.num_vports; + + /* Accounting for the vports which are configured for WFQ explicitly */ + for (i = 0; i < num_vports; i++) { + u32 tmp_speed; + + if ((i != vport_id) && + p_hwfn->qm_info.wfq_data[i].configured) { + req_count++; + tmp_speed = p_hwfn->qm_info.wfq_data[i].min_speed; + total_req_min_rate += tmp_speed; + } + } + + /* Include current vport data as well */ + req_count++; + total_req_min_rate += req_rate; + non_requested_count = num_vports - req_count; + + if (req_rate < min_pf_rate / QED_WFQ_UNIT) { + DP_VERBOSE(p_hwfn, NETIF_MSG_LINK, + "Vport [%d] - Requested rate[%d Mbps] is less than one percent of configured PF min rate[%d Mbps]\n", + vport_id, req_rate, min_pf_rate); + return -EINVAL; + } + + if (num_vports > QED_WFQ_UNIT) { + DP_VERBOSE(p_hwfn, NETIF_MSG_LINK, + "Number of vports is greater than %d\n", + QED_WFQ_UNIT); + return -EINVAL; + } + + if (total_req_min_rate > min_pf_rate) { + DP_VERBOSE(p_hwfn, NETIF_MSG_LINK, + "Total requested min rate for all vports[%d Mbps] is greater than configured PF min rate[%d Mbps]\n", + total_req_min_rate, min_pf_rate); + return -EINVAL; + } + + total_left_rate = min_pf_rate - total_req_min_rate; + + left_rate_per_vp = total_left_rate / non_requested_count; + if (left_rate_per_vp < min_pf_rate / QED_WFQ_UNIT) { + DP_VERBOSE(p_hwfn, NETIF_MSG_LINK, + "Non WFQ configured vports rate [%d Mbps] is less than one percent of configured PF min rate[%d Mbps]\n", + left_rate_per_vp, min_pf_rate); + return -EINVAL; + } + + p_hwfn->qm_info.wfq_data[vport_id].min_speed = req_rate; + p_hwfn->qm_info.wfq_data[vport_id].configured = true; + + for (i = 0; i < num_vports; i++) { + if (p_hwfn->qm_info.wfq_data[i].configured) + continue; + + p_hwfn->qm_info.wfq_data[i].min_speed = left_rate_per_vp; + } + + return 0; +} + +static int __qed_configure_vp_wfq_on_link_change(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 min_pf_rate) +{ + bool use_wfq = false; + int rc = 0; + u16 i; + + /* Validate all pre configured vports for wfq */ + for (i = 0; i < p_hwfn->qm_info.num_vports; i++) { + u32 rate; + + if (!p_hwfn->qm_info.wfq_data[i].configured) + continue; + + rate = p_hwfn->qm_info.wfq_data[i].min_speed; + use_wfq = true; + + rc = qed_init_wfq_param(p_hwfn, i, rate, min_pf_rate); + if (rc) { + DP_NOTICE(p_hwfn, + "WFQ validation failed while configuring min rate\n"); + break; + } + } + + if (!rc && use_wfq) + qed_configure_wfq_for_all_vports(p_hwfn, p_ptt, min_pf_rate); + else + qed_disable_wfq_for_all_vports(p_hwfn, p_ptt, min_pf_rate); + + return rc; +} + +/* API to configure WFQ from mcp link change */ +void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, u32 min_pf_rate) +{ + int i; + + for_each_hwfn(cdev, i) { + struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + + __qed_configure_vp_wfq_on_link_change(p_hwfn, + p_hwfn->p_dpc_ptt, + min_pf_rate); + } +} + +int __qed_configure_pf_max_bandwidth(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_mcp_link_state *p_link, + u8 max_bw) +{ + int rc = 0; + + p_hwfn->mcp_info->func_info.bandwidth_max = max_bw; + + if (!p_link->line_speed && (max_bw != 100)) + return rc; + + p_link->speed = (p_link->line_speed * max_bw) / 100; + p_hwfn->qm_info.pf_rl = p_link->speed; + + /* Since the limiter also affects Tx-switched traffic, we don't want it + * to limit such traffic in case there's no actual limit. + * In that case, set limit to imaginary high boundary. + */ + if (max_bw == 100) + p_hwfn->qm_info.pf_rl = 100000; + + rc = qed_init_pf_rl(p_hwfn, p_ptt, p_hwfn->rel_pf_id, + p_hwfn->qm_info.pf_rl); + + DP_VERBOSE(p_hwfn, NETIF_MSG_LINK, + "Configured MAX bandwidth to be %08x Mb/sec\n", + p_link->speed); + + return rc; +} + +/* Main API to configure PF max bandwidth where bw range is [1 - 100] */ +int qed_configure_pf_max_bandwidth(struct qed_dev *cdev, u8 max_bw) +{ + int i, rc = -EINVAL; + + if (max_bw < 1 || max_bw > 100) { + DP_NOTICE(cdev, "PF max bw valid range is [1-100]\n"); + return rc; + } + + for_each_hwfn(cdev, i) { + struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + struct qed_hwfn *p_lead = QED_LEADING_HWFN(cdev); + struct qed_mcp_link_state *p_link; + struct qed_ptt *p_ptt; + + p_link = &p_lead->mcp_info->link_output; + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EBUSY; + + rc = __qed_configure_pf_max_bandwidth(p_hwfn, p_ptt, + p_link, max_bw); + + qed_ptt_release(p_hwfn, p_ptt); + + if (rc) + break; + } + + return rc; +} + +int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_mcp_link_state *p_link, + u8 min_bw) +{ + int rc = 0; + + p_hwfn->mcp_info->func_info.bandwidth_min = min_bw; + p_hwfn->qm_info.pf_wfq = min_bw; + + if (!p_link->line_speed) + return rc; + + p_link->min_pf_rate = (p_link->line_speed * min_bw) / 100; + + rc = qed_init_pf_wfq(p_hwfn, p_ptt, p_hwfn->rel_pf_id, min_bw); + + DP_VERBOSE(p_hwfn, NETIF_MSG_LINK, + "Configured MIN bandwidth to be %d Mb/sec\n", + p_link->min_pf_rate); + + return rc; +} + +/* Main API to configure PF min bandwidth where bw range is [1-100] */ +int qed_configure_pf_min_bandwidth(struct qed_dev *cdev, u8 min_bw) +{ + int i, rc = -EINVAL; + + if (min_bw < 1 || min_bw > 100) { + DP_NOTICE(cdev, "PF min bw valid range is [1-100]\n"); + return rc; + } + + for_each_hwfn(cdev, i) { + struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + struct qed_hwfn *p_lead = QED_LEADING_HWFN(cdev); + struct qed_mcp_link_state *p_link; + struct qed_ptt *p_ptt; + + p_link = &p_lead->mcp_info->link_output; + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EBUSY; + + rc = __qed_configure_pf_min_bandwidth(p_hwfn, p_ptt, + p_link, min_bw); + if (rc) { + qed_ptt_release(p_hwfn, p_ptt); + return rc; + } + + if (p_link->min_pf_rate) { + u32 min_rate = p_link->min_pf_rate; + + rc = __qed_configure_vp_wfq_on_link_change(p_hwfn, + p_ptt, + min_rate); + } + + qed_ptt_release(p_hwfn, p_ptt); + } + + return rc; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index d6c7ddf4f4d4..6aac3f855aa1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -62,6 +62,7 @@ void qed_resc_setup(struct qed_dev *cdev); * @brief qed_hw_init - * * @param cdev + * @param p_tunn * @param b_hw_start * @param int_mode - interrupt mode [msix, inta, etc.] to use. * @param allow_npar_tx_switch - npar tx switching to be used @@ -72,6 +73,7 @@ void qed_resc_setup(struct qed_dev *cdev); * @return int */ int qed_hw_init(struct qed_dev *cdev, + struct qed_tunn_start_params *p_tunn, bool b_hw_start, enum qed_int_mode int_mode, bool allow_npar_tx_switch, diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index a368f5e71d95..c4fae71bed11 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -46,7 +46,7 @@ enum common_ramrod_cmd_id { COMMON_RAMROD_PF_STOP /* PF Function Stop Ramrod */, COMMON_RAMROD_RESERVED, COMMON_RAMROD_RESERVED2, - COMMON_RAMROD_RESERVED3, + COMMON_RAMROD_PF_UPDATE, COMMON_RAMROD_EMPTY, MAX_COMMON_RAMROD_CMD_ID }; @@ -626,6 +626,42 @@ struct pf_start_ramrod_data { u8 reserved0[4]; }; +/* tunnel configuration */ +struct pf_update_tunnel_config { + u8 update_rx_pf_clss; + u8 update_tx_pf_clss; + u8 set_vxlan_udp_port_flg; + u8 set_geneve_udp_port_flg; + u8 tx_enable_vxlan; + u8 tx_enable_l2geneve; + u8 tx_enable_ipgeneve; + u8 tx_enable_l2gre; + u8 tx_enable_ipgre; + u8 tunnel_clss_vxlan; + u8 tunnel_clss_l2geneve; + u8 tunnel_clss_ipgeneve; + u8 tunnel_clss_l2gre; + u8 tunnel_clss_ipgre; + __le16 vxlan_udp_port; + __le16 geneve_udp_port; + __le16 reserved[3]; +}; + +struct pf_update_ramrod_data { + u32 reserved[2]; + u32 reserved_1[6]; + struct pf_update_tunnel_config tunnel_config; +}; + +/* Tunnel classification scheme */ +enum tunnel_clss { + TUNNEL_CLSS_MAC_VLAN = 0, + TUNNEL_CLSS_MAC_VNI, + TUNNEL_CLSS_INNER_MAC_VLAN, + TUNNEL_CLSS_INNER_MAC_VNI, + MAX_TUNNEL_CLSS +}; + enum ports_mode { ENGX2_PORTX1 /* 2 engines x 1 port */, ENGX2_PORTX2 /* 2 engines x 2 ports */, @@ -1603,6 +1639,19 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn, u16 start_pq, u16 num_pqs); +void qed_set_vxlan_dest_port(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u16 dest_port); +void qed_set_vxlan_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool vxlan_enable); +void qed_set_gre_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool eth_gre_enable, + bool ip_gre_enable); +void qed_set_geneve_dest_port(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u16 dest_port); +void qed_set_geneve_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool eth_geneve_enable, + bool ip_geneve_enable); + /* Ystorm flow control mode. Use enum fw_flow_ctrl_mode */ #define YSTORM_FLOW_CONTROL_MODE_OFFSET (IRO[0].base) #define YSTORM_FLOW_CONTROL_MODE_SIZE (IRO[0].size) @@ -3788,7 +3837,7 @@ struct public_drv_mb { #define DRV_MSG_CODE_SET_LLDP 0x24000000 #define DRV_MSG_CODE_SET_DCBX 0x25000000 - +#define DRV_MSG_CODE_BW_UPDATE_ACK 0x32000000 #define DRV_MSG_CODE_NIG_DRAIN 0x30000000 #define DRV_MSG_CODE_INITIATE_FLR 0x02000000 @@ -3808,6 +3857,7 @@ struct public_drv_mb { #define DRV_MSG_CODE_PHY_CORE_WRITE 0x000e0000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 +#define DRV_MSG_CODE_BIST_TEST 0x001e0000 #define DRV_MSG_CODE_SET_LED_MODE 0x00200000 #define DRV_MSG_SEQ_NUMBER_MASK 0x0000ffff @@ -3865,6 +3915,18 @@ struct public_drv_mb { #define DRV_MB_PARAM_SET_LED_MODE_ON 0x1 #define DRV_MB_PARAM_SET_LED_MODE_OFF 0x2 +#define DRV_MB_PARAM_BIST_UNKNOWN_TEST 0 +#define DRV_MB_PARAM_BIST_REGISTER_TEST 1 +#define DRV_MB_PARAM_BIST_CLOCK_TEST 2 + +#define DRV_MB_PARAM_BIST_RC_UNKNOWN 0 +#define DRV_MB_PARAM_BIST_RC_PASSED 1 +#define DRV_MB_PARAM_BIST_RC_FAILED 2 +#define DRV_MB_PARAM_BIST_RC_INVALID_PARAMETER 3 + +#define DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT 0 +#define DRV_MB_PARAM_BIST_TEST_INDEX_MASK 0x000000FF + u32 fw_mb_header; #define FW_MSG_CODE_MASK 0xffff0000 #define FW_MSG_CODE_DRV_LOAD_ENGINE 0x10100000 @@ -5067,4 +5129,8 @@ struct hw_set_image { struct hw_set_info hw_sets[1]; }; +int qed_init_pf_wfq(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + u8 pf_id, u16 pf_wfq); +int qed_init_vport_wfq(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + u16 first_tx_pq_id[NUM_OF_TCS], u16 vport_wfq); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c index f55ebdc3c832..e8a3b9da59b5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c @@ -712,6 +712,21 @@ int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn, return 0; } +int qed_init_pf_wfq(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u8 pf_id, u16 pf_wfq) +{ + u32 inc_val = QM_WFQ_INC_VAL(pf_wfq); + + if (!inc_val || inc_val > QM_WFQ_MAX_INC_VAL) { + DP_NOTICE(p_hwfn, "Invalid PF WFQ weight configuration"); + return -1; + } + + qed_wr(p_hwfn, p_ptt, QM_REG_WFQPFWEIGHT + pf_id * 4, inc_val); + return 0; +} + int qed_init_pf_rl(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u8 pf_id, @@ -732,6 +747,31 @@ int qed_init_pf_rl(struct qed_hwfn *p_hwfn, return 0; } +int qed_init_vport_wfq(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u16 first_tx_pq_id[NUM_OF_TCS], + u16 vport_wfq) +{ + u32 inc_val = QM_WFQ_INC_VAL(vport_wfq); + u8 tc; + + if (!inc_val || inc_val > QM_WFQ_MAX_INC_VAL) { + DP_NOTICE(p_hwfn, "Invalid VPORT WFQ weight configuration"); + return -1; + } + + for (tc = 0; tc < NUM_OF_TCS; tc++) { + u16 vport_pq_id = first_tx_pq_id[tc]; + + if (vport_pq_id != QM_INVALID_PQ_ID) + qed_wr(p_hwfn, p_ptt, + QM_REG_WFQVPWEIGHT + vport_pq_id * 4, + inc_val); + } + + return 0; +} + int qed_init_vport_rl(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u8 vport_id, @@ -788,3 +828,130 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn, return true; } + +static void +qed_set_tunnel_type_enable_bit(unsigned long *var, int bit, bool enable) +{ + if (enable) + set_bit(bit, var); + else + clear_bit(bit, var); +} + +#define PRS_ETH_TUNN_FIC_FORMAT -188897008 + +void qed_set_vxlan_dest_port(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u16 dest_port) +{ + qed_wr(p_hwfn, p_ptt, PRS_REG_VXLAN_PORT, dest_port); + qed_wr(p_hwfn, p_ptt, NIG_REG_VXLAN_PORT, dest_port); + qed_wr(p_hwfn, p_ptt, PBF_REG_VXLAN_PORT, dest_port); +} + +void qed_set_vxlan_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + bool vxlan_enable) +{ + unsigned long reg_val = 0; + u8 shift; + + reg_val = qed_rd(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN); + shift = PRS_REG_ENCAPSULATION_TYPE_EN_VXLAN_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, vxlan_enable); + + qed_wr(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN, reg_val); + + if (reg_val) + qed_wr(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0, + PRS_ETH_TUNN_FIC_FORMAT); + + reg_val = qed_rd(p_hwfn, p_ptt, NIG_REG_ENC_TYPE_ENABLE); + shift = NIG_REG_ENC_TYPE_ENABLE_VXLAN_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, vxlan_enable); + + qed_wr(p_hwfn, p_ptt, NIG_REG_ENC_TYPE_ENABLE, reg_val); + + qed_wr(p_hwfn, p_ptt, DORQ_REG_L2_EDPM_TUNNEL_VXLAN_EN, + vxlan_enable ? 1 : 0); +} + +void qed_set_gre_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + bool eth_gre_enable, bool ip_gre_enable) +{ + unsigned long reg_val = 0; + u8 shift; + + reg_val = qed_rd(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN); + shift = PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GRE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, eth_gre_enable); + + shift = PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GRE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, ip_gre_enable); + qed_wr(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN, reg_val); + if (reg_val) + qed_wr(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0, + PRS_ETH_TUNN_FIC_FORMAT); + + reg_val = qed_rd(p_hwfn, p_ptt, NIG_REG_ENC_TYPE_ENABLE); + shift = NIG_REG_ENC_TYPE_ENABLE_ETH_OVER_GRE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, eth_gre_enable); + + shift = NIG_REG_ENC_TYPE_ENABLE_IP_OVER_GRE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, ip_gre_enable); + qed_wr(p_hwfn, p_ptt, NIG_REG_ENC_TYPE_ENABLE, reg_val); + + qed_wr(p_hwfn, p_ptt, DORQ_REG_L2_EDPM_TUNNEL_GRE_ETH_EN, + eth_gre_enable ? 1 : 0); + qed_wr(p_hwfn, p_ptt, DORQ_REG_L2_EDPM_TUNNEL_GRE_IP_EN, + ip_gre_enable ? 1 : 0); +} + +void qed_set_geneve_dest_port(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u16 dest_port) +{ + qed_wr(p_hwfn, p_ptt, PRS_REG_NGE_PORT, dest_port); + qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_PORT, dest_port); + qed_wr(p_hwfn, p_ptt, PBF_REG_NGE_PORT, dest_port); +} + +void qed_set_geneve_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + bool eth_geneve_enable, + bool ip_geneve_enable) +{ + unsigned long reg_val = 0; + u8 shift; + + reg_val = qed_rd(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN); + shift = PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GENEVE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, eth_geneve_enable); + + shift = PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GENEVE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, ip_geneve_enable); + + qed_wr(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN, reg_val); + if (reg_val) + qed_wr(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0, + PRS_ETH_TUNN_FIC_FORMAT); + + qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_ETH_ENABLE, + eth_geneve_enable ? 1 : 0); + qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_IP_ENABLE, ip_geneve_enable ? 1 : 0); + + /* comp ver */ + reg_val = (ip_geneve_enable || eth_geneve_enable) ? 1 : 0; + qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_COMP_VER, reg_val); + qed_wr(p_hwfn, p_ptt, PBF_REG_NGE_COMP_VER, reg_val); + qed_wr(p_hwfn, p_ptt, PRS_REG_NGE_COMP_VER, reg_val); + + /* EDPM with geneve tunnel not supported in BB_B0 */ + if (QED_IS_BB_B0(p_hwfn->cdev)) + return; + + qed_wr(p_hwfn, p_ptt, DORQ_REG_L2_EDPM_TUNNEL_NGE_ETH_EN, + eth_geneve_enable ? 1 : 0); + qed_wr(p_hwfn, p_ptt, DORQ_REG_L2_EDPM_TUNNEL_NGE_IP_EN, + ip_geneve_enable ? 1 : 0); +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 5005497ee23e..31e1d510a991 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1415,16 +1415,16 @@ static void __qed_get_vport_port_stats(struct qed_hwfn *p_hwfn, sizeof(port_stats)); p_stats->rx_64_byte_packets += port_stats.pmm.r64; - p_stats->rx_127_byte_packets += port_stats.pmm.r127; - p_stats->rx_255_byte_packets += port_stats.pmm.r255; - p_stats->rx_511_byte_packets += port_stats.pmm.r511; - p_stats->rx_1023_byte_packets += port_stats.pmm.r1023; - p_stats->rx_1518_byte_packets += port_stats.pmm.r1518; - p_stats->rx_1522_byte_packets += port_stats.pmm.r1522; - p_stats->rx_2047_byte_packets += port_stats.pmm.r2047; - p_stats->rx_4095_byte_packets += port_stats.pmm.r4095; - p_stats->rx_9216_byte_packets += port_stats.pmm.r9216; - p_stats->rx_16383_byte_packets += port_stats.pmm.r16383; + p_stats->rx_65_to_127_byte_packets += port_stats.pmm.r127; + p_stats->rx_128_to_255_byte_packets += port_stats.pmm.r255; + p_stats->rx_256_to_511_byte_packets += port_stats.pmm.r511; + p_stats->rx_512_to_1023_byte_packets += port_stats.pmm.r1023; + p_stats->rx_1024_to_1518_byte_packets += port_stats.pmm.r1518; + p_stats->rx_1519_to_1522_byte_packets += port_stats.pmm.r1522; + p_stats->rx_1519_to_2047_byte_packets += port_stats.pmm.r2047; + p_stats->rx_2048_to_4095_byte_packets += port_stats.pmm.r4095; + p_stats->rx_4096_to_9216_byte_packets += port_stats.pmm.r9216; + p_stats->rx_9217_to_16383_byte_packets += port_stats.pmm.r16383; p_stats->rx_crc_errors += port_stats.pmm.rfcs; p_stats->rx_mac_crtl_frames += port_stats.pmm.rxcf; p_stats->rx_pause_frames += port_stats.pmm.rxpf; @@ -1884,6 +1884,36 @@ static int qed_stop_txq(struct qed_dev *cdev, return 0; } +static int qed_tunn_configure(struct qed_dev *cdev, + struct qed_tunn_params *tunn_params) +{ + struct qed_tunn_update_params tunn_info; + int i, rc; + + memset(&tunn_info, 0, sizeof(tunn_info)); + if (tunn_params->update_vxlan_port == 1) { + tunn_info.update_vxlan_udp_port = 1; + tunn_info.vxlan_udp_port = tunn_params->vxlan_port; + } + + if (tunn_params->update_geneve_port == 1) { + tunn_info.update_geneve_udp_port = 1; + tunn_info.geneve_udp_port = tunn_params->geneve_port; + } + + for_each_hwfn(cdev, i) { + struct qed_hwfn *hwfn = &cdev->hwfns[i]; + + rc = qed_sp_pf_update_tunn_cfg(hwfn, &tunn_info, + QED_SPQ_MODE_EBLOCK, NULL); + + if (rc) + return rc; + } + + return 0; +} + static int qed_configure_filter_rx_mode(struct qed_dev *cdev, enum qed_filter_rx_mode_type type) { @@ -2026,6 +2056,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = { .fastpath_stop = &qed_fastpath_stop, .eth_cqe_completion = &qed_fp_cqe_completion, .get_vport_stats = &qed_get_vport_stats, + .tunn_config = &qed_tunn_configure, }; const struct qed_eth_ops *qed_get_eth_ops(void) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c31d485f72d6..1b758bdec587 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -28,6 +28,7 @@ #include "qed_dev_api.h" #include "qed_mcp.h" #include "qed_hw.h" +#include "qed_selftest.h" static char version[] = "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n"; @@ -744,6 +745,7 @@ static void qed_update_pf_params(struct qed_dev *cdev, static int qed_slowpath_start(struct qed_dev *cdev, struct qed_slowpath_params *params) { + struct qed_tunn_start_params tunn_info; struct qed_mcp_drv_version drv_version; const u8 *data = NULL; struct qed_hwfn *hwfn; @@ -776,7 +778,19 @@ static int qed_slowpath_start(struct qed_dev *cdev, /* Start the slowpath */ data = cdev->firmware->data; - rc = qed_hw_init(cdev, true, cdev->int_params.out.int_mode, + memset(&tunn_info, 0, sizeof(tunn_info)); + tunn_info.tunn_mode |= 1 << QED_MODE_VXLAN_TUNN | + 1 << QED_MODE_L2GRE_TUNN | + 1 << QED_MODE_IPGRE_TUNN | + 1 << QED_MODE_L2GENEVE_TUNN | + 1 << QED_MODE_IPGENEVE_TUNN; + + tunn_info.tunn_clss_vxlan = QED_TUNN_CLSS_MAC_VLAN; + tunn_info.tunn_clss_l2gre = QED_TUNN_CLSS_MAC_VLAN; + tunn_info.tunn_clss_ipgre = QED_TUNN_CLSS_MAC_VLAN; + + rc = qed_hw_init(cdev, &tunn_info, true, + cdev->int_params.out.int_mode, true, data); if (rc) goto err2; @@ -902,6 +916,11 @@ static u32 qed_sb_release(struct qed_dev *cdev, return rc; } +static bool qed_can_link_change(struct qed_dev *cdev) +{ + return true; +} + static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params) { @@ -944,6 +963,39 @@ static int qed_set_link(struct qed_dev *cdev, } if (params->override_flags & QED_LINK_OVERRIDE_SPEED_FORCED_SPEED) link_params->speed.forced_speed = params->forced_speed; + if (params->override_flags & QED_LINK_OVERRIDE_PAUSE_CONFIG) { + if (params->pause_config & QED_LINK_PAUSE_AUTONEG_ENABLE) + link_params->pause.autoneg = true; + else + link_params->pause.autoneg = false; + if (params->pause_config & QED_LINK_PAUSE_RX_ENABLE) + link_params->pause.forced_rx = true; + else + link_params->pause.forced_rx = false; + if (params->pause_config & QED_LINK_PAUSE_TX_ENABLE) + link_params->pause.forced_tx = true; + else + link_params->pause.forced_tx = false; + } + if (params->override_flags & QED_LINK_OVERRIDE_LOOPBACK_MODE) { + switch (params->loopback_mode) { + case QED_LINK_LOOPBACK_INT_PHY: + link_params->loopback_mode = PMM_LOOPBACK_INT_PHY; + break; + case QED_LINK_LOOPBACK_EXT_PHY: + link_params->loopback_mode = PMM_LOOPBACK_EXT_PHY; + break; + case QED_LINK_LOOPBACK_EXT: + link_params->loopback_mode = PMM_LOOPBACK_EXT; + break; + case QED_LINK_LOOPBACK_MAC: + link_params->loopback_mode = PMM_LOOPBACK_MAC; + break; + default: + link_params->loopback_mode = PMM_LOOPBACK_NONE; + break; + } + } rc = qed_mcp_set_link(hwfn, ptt, params->link_up); @@ -1150,7 +1202,15 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) return status; } +struct qed_selftest_ops qed_selftest_ops_pass = { + .selftest_memory = &qed_selftest_memory, + .selftest_interrupt = &qed_selftest_interrupt, + .selftest_register = &qed_selftest_register, + .selftest_clock = &qed_selftest_clock, +}; + const struct qed_common_ops qed_common_ops_pass = { + .selftest = &qed_selftest_ops_pass, .probe = &qed_probe, .remove = &qed_remove, .set_power_state = &qed_set_power_state, @@ -1164,6 +1224,7 @@ const struct qed_common_ops qed_common_ops_pass = { .sb_release = &qed_sb_release, .simd_handler_config = &qed_simd_handler_config, .simd_handler_clean = &qed_simd_handler_clean, + .can_link_change = &qed_can_link_change, .set_link = &qed_set_link, .get_link = &qed_get_current_link, .drain = &qed_drain, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index b89c9a8e1655..2f8309d772c8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -472,6 +472,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, bool b_reset) { struct qed_mcp_link_state *p_link; + u8 max_bw, min_bw; u32 status = 0; p_link = &p_hwfn->mcp_info->link_output; @@ -527,17 +528,20 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, p_link->speed = 0; } - /* Correct speed according to bandwidth allocation */ - if (p_hwfn->mcp_info->func_info.bandwidth_max && p_link->speed) { - p_link->speed = p_link->speed * - p_hwfn->mcp_info->func_info.bandwidth_max / - 100; - qed_init_pf_rl(p_hwfn, p_ptt, p_hwfn->rel_pf_id, - p_link->speed); - DP_VERBOSE(p_hwfn, NETIF_MSG_LINK, - "Configured MAX bandwidth to be %08x Mb/sec\n", - p_link->speed); - } + if (p_link->link_up && p_link->speed) + p_link->line_speed = p_link->speed; + else + p_link->line_speed = 0; + + max_bw = p_hwfn->mcp_info->func_info.bandwidth_max; + min_bw = p_hwfn->mcp_info->func_info.bandwidth_min; + + /* Max bandwidth configuration */ + __qed_configure_pf_max_bandwidth(p_hwfn, p_ptt, p_link, max_bw); + + /* Min bandwidth configuration */ + __qed_configure_pf_min_bandwidth(p_hwfn, p_ptt, p_link, min_bw); + qed_configure_vp_wfq_on_link_change(p_hwfn->cdev, p_link->min_pf_rate); p_link->an = !!(status & LINK_STATUS_AUTO_NEGOTIATE_ENABLED); p_link->an_complete = !!(status & @@ -648,6 +652,77 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, return 0; } +static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn, + struct public_func *p_shmem_info) +{ + struct qed_mcp_function_info *p_info; + + p_info = &p_hwfn->mcp_info->func_info; + + p_info->bandwidth_min = (p_shmem_info->config & + FUNC_MF_CFG_MIN_BW_MASK) >> + FUNC_MF_CFG_MIN_BW_SHIFT; + if (p_info->bandwidth_min < 1 || p_info->bandwidth_min > 100) { + DP_INFO(p_hwfn, + "bandwidth minimum out of bounds [%02x]. Set to 1\n", + p_info->bandwidth_min); + p_info->bandwidth_min = 1; + } + + p_info->bandwidth_max = (p_shmem_info->config & + FUNC_MF_CFG_MAX_BW_MASK) >> + FUNC_MF_CFG_MAX_BW_SHIFT; + if (p_info->bandwidth_max < 1 || p_info->bandwidth_max > 100) { + DP_INFO(p_hwfn, + "bandwidth maximum out of bounds [%02x]. Set to 100\n", + p_info->bandwidth_max); + p_info->bandwidth_max = 100; + } +} + +static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct public_func *p_data, + int pfid) +{ + u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, + PUBLIC_FUNC); + u32 mfw_path_offsize = qed_rd(p_hwfn, p_ptt, addr); + u32 func_addr = SECTION_ADDR(mfw_path_offsize, pfid); + u32 i, size; + + memset(p_data, 0, sizeof(*p_data)); + + size = min_t(u32, sizeof(*p_data), + QED_SECTION_SIZE(mfw_path_offsize)); + for (i = 0; i < size / sizeof(u32); i++) + ((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt, + func_addr + (i << 2)); + return size; +} + +static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct qed_mcp_function_info *p_info; + struct public_func shmem_info; + u32 resp = 0, param = 0; + + qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, + MCP_PF_ID(p_hwfn)); + + qed_read_pf_bandwidth(p_hwfn, &shmem_info); + + p_info = &p_hwfn->mcp_info->func_info; + + qed_configure_pf_min_bandwidth(p_hwfn->cdev, p_info->bandwidth_min); + qed_configure_pf_max_bandwidth(p_hwfn->cdev, p_info->bandwidth_max); + + /* Acknowledge the MFW */ + qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BW_UPDATE_ACK, 0, &resp, + ¶m); +} + int qed_mcp_handle_events(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { @@ -679,6 +754,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn, case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE: qed_mcp_handle_transceiver_change(p_hwfn, p_ptt); break; + case MFW_DRV_MSG_BW_UPDATE: + qed_mcp_update_bw(p_hwfn, p_ptt); + break; default: DP_NOTICE(p_hwfn, "Unimplemented MFW message %d\n", i); rc = -EINVAL; @@ -758,28 +836,6 @@ int qed_mcp_get_media_type(struct qed_dev *cdev, return 0; } -static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct public_func *p_data, - int pfid) -{ - u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, - PUBLIC_FUNC); - u32 mfw_path_offsize = qed_rd(p_hwfn, p_ptt, addr); - u32 func_addr = SECTION_ADDR(mfw_path_offsize, pfid); - u32 i, size; - - memset(p_data, 0, sizeof(*p_data)); - - size = min_t(u32, sizeof(*p_data), - QED_SECTION_SIZE(mfw_path_offsize)); - for (i = 0; i < size / sizeof(u32); i++) - ((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt, - func_addr + (i << 2)); - - return size; -} - static int qed_mcp_get_shmem_proto(struct qed_hwfn *p_hwfn, struct public_func *p_info, @@ -818,26 +874,7 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, return -EINVAL; } - - info->bandwidth_min = (shmem_info.config & - FUNC_MF_CFG_MIN_BW_MASK) >> - FUNC_MF_CFG_MIN_BW_SHIFT; - if (info->bandwidth_min < 1 || info->bandwidth_min > 100) { - DP_INFO(p_hwfn, - "bandwidth minimum out of bounds [%02x]. Set to 1\n", - info->bandwidth_min); - info->bandwidth_min = 1; - } - - info->bandwidth_max = (shmem_info.config & - FUNC_MF_CFG_MAX_BW_MASK) >> - FUNC_MF_CFG_MAX_BW_SHIFT; - if (info->bandwidth_max < 1 || info->bandwidth_max > 100) { - DP_INFO(p_hwfn, - "bandwidth maximum out of bounds [%02x]. Set to 100\n", - info->bandwidth_max); - info->bandwidth_max = 100; - } + qed_read_pf_bandwidth(p_hwfn, &shmem_info); if (shmem_info.mac_upper || shmem_info.mac_lower) { info->mac[0] = (u8)(shmem_info.mac_upper >> 8); @@ -938,9 +975,10 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, p_drv_version = &union_data.drv_version; p_drv_version->version = p_ver->version; + for (i = 0; i < MCP_DRV_VER_STR_SIZE - 1; i += 4) { val = cpu_to_be32(p_ver->name[i]); - *(u32 *)&p_drv_version->name[i * sizeof(u32)] = val; + *(__be32 *)&p_drv_version->name[i * sizeof(u32)] = val; } memset(&mb_params, 0, sizeof(mb_params)); @@ -979,3 +1017,45 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, return rc; } + +int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 drv_mb_param = 0, rsp, param; + int rc = 0; + + drv_mb_param = (DRV_MB_PARAM_BIST_REGISTER_TEST << + DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT); + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BIST_TEST, + drv_mb_param, &rsp, ¶m); + + if (rc) + return rc; + + if (((rsp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK) || + (param != DRV_MB_PARAM_BIST_RC_PASSED)) + rc = -EAGAIN; + + return rc; +} + +int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 drv_mb_param, rsp, param; + int rc = 0; + + drv_mb_param = (DRV_MB_PARAM_BIST_CLOCK_TEST << + DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT); + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BIST_TEST, + drv_mb_param, &rsp, ¶m); + + if (rc) + return rc; + + if (((rsp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK) || + (param != DRV_MB_PARAM_BIST_RC_PASSED)) + rc = -EAGAIN; + + return rc; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 50917a2131a5..5f218eed0541 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -40,7 +40,15 @@ struct qed_mcp_link_capabilities { struct qed_mcp_link_state { bool link_up; - u32 speed; /* In Mb/s */ + u32 min_pf_rate; + + /* Actual link speed in Mb/s */ + u32 line_speed; + + /* PF max speed in Mb/s, deduced from line_speed + * according to PF max bandwidth configuration. + */ + u32 speed; bool full_duplex; bool an; @@ -237,6 +245,28 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, enum qed_led_mode mode); +/** + * @brief Bist register test + * + * @param p_hwfn - hw function + * @param p_ptt - PTT required for register access + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt); + +/** + * @brief Bist clock test + * + * @param p_hwfn - hw function + * @param p_ptt - PTT required for register access + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt); + /* Using hwfn number (and not pf_num) is required since in CMT mode, * same pf_num may be used by two different hwfn * TODO - this shouldn't really be in .h file, but until all fields @@ -388,5 +418,14 @@ int qed_mcp_reset(struct qed_hwfn *p_hwfn, * @return true iff MFW is running and mcp_info is initialized */ bool qed_mcp_is_init(struct qed_hwfn *p_hwfn); - +int qed_configure_pf_min_bandwidth(struct qed_dev *cdev, u8 min_bw); +int qed_configure_pf_max_bandwidth(struct qed_dev *cdev, u8 max_bw); +int __qed_configure_pf_max_bandwidth(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_mcp_link_state *p_link, + u8 max_bw); +int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_mcp_link_state *p_link, + u8 min_bw); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index c15b1622e636..bf4d7ccd56bb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -427,4 +427,37 @@ 0x2aae60UL #define PGLUE_B_REG_PF_BAR1_SIZE \ 0x2aae64UL +#define PRS_REG_ENCAPSULATION_TYPE_EN 0x1f0730UL +#define PRS_REG_GRE_PROTOCOL 0x1f0734UL +#define PRS_REG_VXLAN_PORT 0x1f0738UL +#define PRS_REG_OUTPUT_FORMAT_4_0 0x1f099cUL +#define NIG_REG_ENC_TYPE_ENABLE 0x501058UL + +#define NIG_REG_ENC_TYPE_ENABLE_ETH_OVER_GRE_ENABLE (0x1 << 0) +#define NIG_REG_ENC_TYPE_ENABLE_ETH_OVER_GRE_ENABLE_SHIFT 0 +#define NIG_REG_ENC_TYPE_ENABLE_IP_OVER_GRE_ENABLE (0x1 << 1) +#define NIG_REG_ENC_TYPE_ENABLE_IP_OVER_GRE_ENABLE_SHIFT 1 +#define NIG_REG_ENC_TYPE_ENABLE_VXLAN_ENABLE (0x1 << 2) +#define NIG_REG_ENC_TYPE_ENABLE_VXLAN_ENABLE_SHIFT 2 + +#define NIG_REG_VXLAN_PORT 0x50105cUL +#define PBF_REG_VXLAN_PORT 0xd80518UL +#define PBF_REG_NGE_PORT 0xd8051cUL +#define PRS_REG_NGE_PORT 0x1f086cUL +#define NIG_REG_NGE_PORT 0x508b38UL + +#define DORQ_REG_L2_EDPM_TUNNEL_GRE_ETH_EN 0x10090cUL +#define DORQ_REG_L2_EDPM_TUNNEL_GRE_IP_EN 0x100910UL +#define DORQ_REG_L2_EDPM_TUNNEL_VXLAN_EN 0x100914UL +#define DORQ_REG_L2_EDPM_TUNNEL_NGE_IP_EN 0x10092cUL +#define DORQ_REG_L2_EDPM_TUNNEL_NGE_ETH_EN 0x100930UL + +#define NIG_REG_NGE_IP_ENABLE 0x508b28UL +#define NIG_REG_NGE_ETH_ENABLE 0x508b2cUL +#define NIG_REG_NGE_COMP_VER 0x508b30UL +#define PBF_REG_NGE_COMP_VER 0xd80524UL +#define PRS_REG_NGE_COMP_VER 0x1f0878UL + +#define QM_REG_WFQPFWEIGHT 0x2f4e80UL +#define QM_REG_WFQVPWEIGHT 0x2fa000UL #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.c b/drivers/net/ethernet/qlogic/qed/qed_selftest.c new file mode 100644 index 000000000000..a342bfe4280d --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.c @@ -0,0 +1,76 @@ +#include "qed.h" +#include "qed_dev_api.h" +#include "qed_mcp.h" +#include "qed_sp.h" + +int qed_selftest_memory(struct qed_dev *cdev) +{ + int rc = 0, i; + + for_each_hwfn(cdev, i) { + rc = qed_sp_heartbeat_ramrod(&cdev->hwfns[i]); + if (rc) + return rc; + } + + return rc; +} + +int qed_selftest_interrupt(struct qed_dev *cdev) +{ + int rc = 0, i; + + for_each_hwfn(cdev, i) { + rc = qed_sp_heartbeat_ramrod(&cdev->hwfns[i]); + if (rc) + return rc; + } + + return rc; +} + +int qed_selftest_register(struct qed_dev *cdev) +{ + struct qed_hwfn *p_hwfn; + struct qed_ptt *p_ptt; + int rc = 0, i; + + /* although performed by MCP, this test is per engine */ + for_each_hwfn(cdev, i) { + p_hwfn = &cdev->hwfns[i]; + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_ERR(p_hwfn, "failed to acquire ptt\n"); + return -EBUSY; + } + rc = qed_mcp_bist_register_test(p_hwfn, p_ptt); + qed_ptt_release(p_hwfn, p_ptt); + if (rc) + break; + } + + return rc; +} + +int qed_selftest_clock(struct qed_dev *cdev) +{ + struct qed_hwfn *p_hwfn; + struct qed_ptt *p_ptt; + int rc = 0, i; + + /* although performed by MCP, this test is per engine */ + for_each_hwfn(cdev, i) { + p_hwfn = &cdev->hwfns[i]; + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_ERR(p_hwfn, "failed to acquire ptt\n"); + return -EBUSY; + } + rc = qed_mcp_bist_clock_test(p_hwfn, p_ptt); + qed_ptt_release(p_hwfn, p_ptt); + if (rc) + break; + } + + return rc; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.h b/drivers/net/ethernet/qlogic/qed/qed_selftest.h new file mode 100644 index 000000000000..50eb0b49950f --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.h @@ -0,0 +1,40 @@ +#ifndef _QED_SELFTEST_API_H +#define _QED_SELFTEST_API_H +#include <linux/types.h> + +/** + * @brief qed_selftest_memory - Perform memory test + * + * @param cdev + * + * @return int + */ +int qed_selftest_memory(struct qed_dev *cdev); + +/** + * @brief qed_selftest_interrupt - Perform interrupt test + * + * @param cdev + * + * @return int + */ +int qed_selftest_interrupt(struct qed_dev *cdev); + +/** + * @brief qed_selftest_register - Perform register test + * + * @param cdev + * + * @return int + */ +int qed_selftest_register(struct qed_dev *cdev); + +/** + * @brief qed_selftest_clock - Perform clock test + * + * @param cdev + * + * @return int + */ +int qed_selftest_clock(struct qed_dev *cdev); +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index d39f914b66ee..eec137f40895 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -52,6 +52,7 @@ int qed_eth_cqe_completion(struct qed_hwfn *p_hwfn, union ramrod_data { struct pf_start_ramrod_data pf_start; + struct pf_update_ramrod_data pf_update; struct rx_queue_start_ramrod_data rx_queue_start; struct rx_queue_update_ramrod_data rx_queue_update; struct rx_queue_stop_ramrod_data rx_queue_stop; @@ -338,12 +339,14 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, * to the internal RAM of the UStorm by the Function Start Ramrod. * * @param p_hwfn + * @param p_tunn * @param mode * * @return int */ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, + struct qed_tunn_start_params *p_tunn, enum qed_mf_mode mode); /** @@ -362,4 +365,18 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, int qed_sp_pf_stop(struct qed_hwfn *p_hwfn); +int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn, + struct qed_tunn_update_params *p_tunn, + enum spq_mode comp_mode, + struct qed_spq_comp_cb *p_comp_data); +/** + * @brief qed_sp_heartbeat_ramrod - Send empty Ramrod + * + * @param p_hwfn + * + * @return int + */ + +int qed_sp_heartbeat_ramrod(struct qed_hwfn *p_hwfn); + #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 1c06c37d4c3d..9f9bc10d0f6c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -87,7 +87,217 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, return 0; } +static enum tunnel_clss qed_tunn_get_clss_type(u8 type) +{ + switch (type) { + case QED_TUNN_CLSS_MAC_VLAN: + return TUNNEL_CLSS_MAC_VLAN; + case QED_TUNN_CLSS_MAC_VNI: + return TUNNEL_CLSS_MAC_VNI; + case QED_TUNN_CLSS_INNER_MAC_VLAN: + return TUNNEL_CLSS_INNER_MAC_VLAN; + case QED_TUNN_CLSS_INNER_MAC_VNI: + return TUNNEL_CLSS_INNER_MAC_VNI; + default: + return TUNNEL_CLSS_MAC_VLAN; + } +} + +static void +qed_tunn_set_pf_fix_tunn_mode(struct qed_hwfn *p_hwfn, + struct qed_tunn_update_params *p_src, + struct pf_update_tunnel_config *p_tunn_cfg) +{ + unsigned long cached_tunn_mode = p_hwfn->cdev->tunn_mode; + unsigned long update_mask = p_src->tunn_mode_update_mask; + unsigned long tunn_mode = p_src->tunn_mode; + unsigned long new_tunn_mode = 0; + + if (test_bit(QED_MODE_L2GRE_TUNN, &update_mask)) { + if (test_bit(QED_MODE_L2GRE_TUNN, &tunn_mode)) + __set_bit(QED_MODE_L2GRE_TUNN, &new_tunn_mode); + } else { + if (test_bit(QED_MODE_L2GRE_TUNN, &cached_tunn_mode)) + __set_bit(QED_MODE_L2GRE_TUNN, &new_tunn_mode); + } + + if (test_bit(QED_MODE_IPGRE_TUNN, &update_mask)) { + if (test_bit(QED_MODE_IPGRE_TUNN, &tunn_mode)) + __set_bit(QED_MODE_IPGRE_TUNN, &new_tunn_mode); + } else { + if (test_bit(QED_MODE_IPGRE_TUNN, &cached_tunn_mode)) + __set_bit(QED_MODE_IPGRE_TUNN, &new_tunn_mode); + } + + if (test_bit(QED_MODE_VXLAN_TUNN, &update_mask)) { + if (test_bit(QED_MODE_VXLAN_TUNN, &tunn_mode)) + __set_bit(QED_MODE_VXLAN_TUNN, &new_tunn_mode); + } else { + if (test_bit(QED_MODE_VXLAN_TUNN, &cached_tunn_mode)) + __set_bit(QED_MODE_VXLAN_TUNN, &new_tunn_mode); + } + + if (p_src->update_geneve_udp_port) { + p_tunn_cfg->set_geneve_udp_port_flg = 1; + p_tunn_cfg->geneve_udp_port = + cpu_to_le16(p_src->geneve_udp_port); + } + + if (test_bit(QED_MODE_L2GENEVE_TUNN, &update_mask)) { + if (test_bit(QED_MODE_L2GENEVE_TUNN, &tunn_mode)) + __set_bit(QED_MODE_L2GENEVE_TUNN, &new_tunn_mode); + } else { + if (test_bit(QED_MODE_L2GENEVE_TUNN, &cached_tunn_mode)) + __set_bit(QED_MODE_L2GENEVE_TUNN, &new_tunn_mode); + } + + if (test_bit(QED_MODE_IPGENEVE_TUNN, &update_mask)) { + if (test_bit(QED_MODE_IPGENEVE_TUNN, &tunn_mode)) + __set_bit(QED_MODE_IPGENEVE_TUNN, &new_tunn_mode); + } else { + if (test_bit(QED_MODE_IPGENEVE_TUNN, &cached_tunn_mode)) + __set_bit(QED_MODE_IPGENEVE_TUNN, &new_tunn_mode); + } + + p_src->tunn_mode = new_tunn_mode; +} + +static void +qed_tunn_set_pf_update_params(struct qed_hwfn *p_hwfn, + struct qed_tunn_update_params *p_src, + struct pf_update_tunnel_config *p_tunn_cfg) +{ + unsigned long tunn_mode = p_src->tunn_mode; + enum tunnel_clss type; + + qed_tunn_set_pf_fix_tunn_mode(p_hwfn, p_src, p_tunn_cfg); + p_tunn_cfg->update_rx_pf_clss = p_src->update_rx_pf_clss; + p_tunn_cfg->update_tx_pf_clss = p_src->update_tx_pf_clss; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_vxlan); + p_tunn_cfg->tunnel_clss_vxlan = type; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_l2gre); + p_tunn_cfg->tunnel_clss_l2gre = type; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_ipgre); + p_tunn_cfg->tunnel_clss_ipgre = type; + + if (p_src->update_vxlan_udp_port) { + p_tunn_cfg->set_vxlan_udp_port_flg = 1; + p_tunn_cfg->vxlan_udp_port = cpu_to_le16(p_src->vxlan_udp_port); + } + + if (test_bit(QED_MODE_L2GRE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_l2gre = 1; + + if (test_bit(QED_MODE_IPGRE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_ipgre = 1; + + if (test_bit(QED_MODE_VXLAN_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_vxlan = 1; + + if (p_src->update_geneve_udp_port) { + p_tunn_cfg->set_geneve_udp_port_flg = 1; + p_tunn_cfg->geneve_udp_port = + cpu_to_le16(p_src->geneve_udp_port); + } + + if (test_bit(QED_MODE_L2GENEVE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_l2geneve = 1; + + if (test_bit(QED_MODE_IPGENEVE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_ipgeneve = 1; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_l2geneve); + p_tunn_cfg->tunnel_clss_l2geneve = type; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_ipgeneve); + p_tunn_cfg->tunnel_clss_ipgeneve = type; +} + +static void qed_set_hw_tunn_mode(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + unsigned long tunn_mode) +{ + u8 l2gre_enable = 0, ipgre_enable = 0, vxlan_enable = 0; + u8 l2geneve_enable = 0, ipgeneve_enable = 0; + + if (test_bit(QED_MODE_L2GRE_TUNN, &tunn_mode)) + l2gre_enable = 1; + + if (test_bit(QED_MODE_IPGRE_TUNN, &tunn_mode)) + ipgre_enable = 1; + + if (test_bit(QED_MODE_VXLAN_TUNN, &tunn_mode)) + vxlan_enable = 1; + + qed_set_gre_enable(p_hwfn, p_ptt, l2gre_enable, ipgre_enable); + qed_set_vxlan_enable(p_hwfn, p_ptt, vxlan_enable); + + if (test_bit(QED_MODE_L2GENEVE_TUNN, &tunn_mode)) + l2geneve_enable = 1; + + if (test_bit(QED_MODE_IPGENEVE_TUNN, &tunn_mode)) + ipgeneve_enable = 1; + + qed_set_geneve_enable(p_hwfn, p_ptt, l2geneve_enable, + ipgeneve_enable); +} + +static void +qed_tunn_set_pf_start_params(struct qed_hwfn *p_hwfn, + struct qed_tunn_start_params *p_src, + struct pf_start_tunnel_config *p_tunn_cfg) +{ + unsigned long tunn_mode; + enum tunnel_clss type; + + if (!p_src) + return; + + tunn_mode = p_src->tunn_mode; + type = qed_tunn_get_clss_type(p_src->tunn_clss_vxlan); + p_tunn_cfg->tunnel_clss_vxlan = type; + type = qed_tunn_get_clss_type(p_src->tunn_clss_l2gre); + p_tunn_cfg->tunnel_clss_l2gre = type; + type = qed_tunn_get_clss_type(p_src->tunn_clss_ipgre); + p_tunn_cfg->tunnel_clss_ipgre = type; + + if (p_src->update_vxlan_udp_port) { + p_tunn_cfg->set_vxlan_udp_port_flg = 1; + p_tunn_cfg->vxlan_udp_port = cpu_to_le16(p_src->vxlan_udp_port); + } + + if (test_bit(QED_MODE_L2GRE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_l2gre = 1; + + if (test_bit(QED_MODE_IPGRE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_ipgre = 1; + + if (test_bit(QED_MODE_VXLAN_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_vxlan = 1; + + if (p_src->update_geneve_udp_port) { + p_tunn_cfg->set_geneve_udp_port_flg = 1; + p_tunn_cfg->geneve_udp_port = + cpu_to_le16(p_src->geneve_udp_port); + } + + if (test_bit(QED_MODE_L2GENEVE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_l2geneve = 1; + + if (test_bit(QED_MODE_IPGENEVE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_ipgeneve = 1; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_l2geneve); + p_tunn_cfg->tunnel_clss_l2geneve = type; + type = qed_tunn_get_clss_type(p_src->tunn_clss_ipgeneve); + p_tunn_cfg->tunnel_clss_ipgeneve = type; +} + int qed_sp_pf_start(struct qed_hwfn *p_hwfn, + struct qed_tunn_start_params *p_tunn, enum qed_mf_mode mode) { struct pf_start_ramrod_data *p_ramrod = NULL; @@ -143,6 +353,8 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr, p_hwfn->p_consq->chain.pbl.p_phys_table); + qed_tunn_set_pf_start_params(p_hwfn, p_tunn, + &p_ramrod->tunnel_config); p_hwfn->hw_info.personality = PERSONALITY_ETH; DP_VERBOSE(p_hwfn, QED_MSG_SPQ, @@ -153,6 +365,49 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, return qed_spq_post(p_hwfn, p_ent, NULL); } +/* Set pf update ramrod command params */ +int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn, + struct qed_tunn_update_params *p_tunn, + enum spq_mode comp_mode, + struct qed_spq_comp_cb *p_comp_data) +{ + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + int rc = -EINVAL; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qed_spq_get_cid(p_hwfn); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = comp_mode; + init_data.p_comp_data = p_comp_data; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + COMMON_RAMROD_PF_UPDATE, PROTOCOLID_COMMON, + &init_data); + if (rc) + return rc; + + qed_tunn_set_pf_update_params(p_hwfn, p_tunn, + &p_ent->ramrod.pf_update.tunnel_config); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + return rc; + + if (p_tunn->update_vxlan_udp_port) + qed_set_vxlan_dest_port(p_hwfn, p_hwfn->p_main_ptt, + p_tunn->vxlan_udp_port); + if (p_tunn->update_geneve_udp_port) + qed_set_geneve_dest_port(p_hwfn, p_hwfn->p_main_ptt, + p_tunn->geneve_udp_port); + + qed_set_hw_tunn_mode(p_hwfn, p_hwfn->p_main_ptt, p_tunn->tunn_mode); + p_hwfn->cdev->tunn_mode = p_tunn->tunn_mode; + + return rc; +} + int qed_sp_pf_stop(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; @@ -173,3 +428,24 @@ int qed_sp_pf_stop(struct qed_hwfn *p_hwfn) return qed_spq_post(p_hwfn, p_ent, NULL); } + +int qed_sp_heartbeat_ramrod(struct qed_hwfn *p_hwfn) +{ + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + int rc; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qed_spq_get_cid(p_hwfn); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + COMMON_RAMROD_EMPTY, PROTOCOLID_COMMON, + &init_data); + if (rc) + return rc; + + return qed_spq_post(p_hwfn, p_ent, NULL); +} diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 41c418909a5c..ff3ac0caad5b 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -59,16 +59,16 @@ struct qede_stats { /* port */ u64 rx_64_byte_packets; - u64 rx_127_byte_packets; - u64 rx_255_byte_packets; - u64 rx_511_byte_packets; - u64 rx_1023_byte_packets; - u64 rx_1518_byte_packets; - u64 rx_1522_byte_packets; - u64 rx_2047_byte_packets; - u64 rx_4095_byte_packets; - u64 rx_9216_byte_packets; - u64 rx_16383_byte_packets; + u64 rx_65_to_127_byte_packets; + u64 rx_128_to_255_byte_packets; + u64 rx_256_to_511_byte_packets; + u64 rx_512_to_1023_byte_packets; + u64 rx_1024_to_1518_byte_packets; + u64 rx_1519_to_1522_byte_packets; + u64 rx_1519_to_2047_byte_packets; + u64 rx_2048_to_4095_byte_packets; + u64 rx_4096_to_9216_byte_packets; + u64 rx_9217_to_16383_byte_packets; u64 rx_crc_errors; u64 rx_mac_crtl_frames; u64 rx_pause_frames; @@ -169,6 +169,8 @@ struct qede_dev { bool accept_any_vlan; struct delayed_work sp_task; unsigned long sp_flags; + u16 vxlan_dst_port; + u16 geneve_dst_port; }; enum QEDE_STATE { @@ -288,8 +290,11 @@ struct qede_fastpath { #define QEDE_CSUM_ERROR BIT(0) #define QEDE_CSUM_UNNECESSARY BIT(1) +#define QEDE_TUNN_CSUM_UNNECESSARY BIT(2) -#define QEDE_SP_RX_MODE 1 +#define QEDE_SP_RX_MODE 1 +#define QEDE_SP_VXLAN_PORT_CONFIG 2 +#define QEDE_SP_GENEVE_PORT_CONFIG 3 union qede_reload_args { u16 mtu; @@ -303,6 +308,10 @@ void qede_reload(struct qede_dev *edev, union qede_reload_args *args); int qede_change_mtu(struct net_device *dev, int new_mtu); void qede_fill_by_demand_stats(struct qede_dev *edev); +bool qede_has_rx_work(struct qede_rx_queue *rxq); +int qede_txq_has_work(struct qede_tx_queue *txq); +void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, struct qede_dev *edev, + u8 count); #define RX_RING_SIZE_POW 13 #define RX_RING_SIZE ((u16)BIT(RX_RING_SIZE_POW)) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index f0982f163670..0d04f163ae45 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -9,6 +9,7 @@ #include <linux/version.h> #include <linux/types.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/string.h> #include <linux/pci.h> @@ -27,6 +28,9 @@ #define QEDE_RQSTAT_STRING(stat_name) (#stat_name) #define QEDE_RQSTAT(stat_name) \ {QEDE_RQSTAT_OFFSET(stat_name), QEDE_RQSTAT_STRING(stat_name)} + +#define QEDE_SELFTEST_POLL_COUNT 100 + static const struct { u64 offset; char string[ETH_GSTRING_LEN]; @@ -59,16 +63,16 @@ static const struct { QEDE_STAT(tx_bcast_pkts), QEDE_PF_STAT(rx_64_byte_packets), - QEDE_PF_STAT(rx_127_byte_packets), - QEDE_PF_STAT(rx_255_byte_packets), - QEDE_PF_STAT(rx_511_byte_packets), - QEDE_PF_STAT(rx_1023_byte_packets), - QEDE_PF_STAT(rx_1518_byte_packets), - QEDE_PF_STAT(rx_1522_byte_packets), - QEDE_PF_STAT(rx_2047_byte_packets), - QEDE_PF_STAT(rx_4095_byte_packets), - QEDE_PF_STAT(rx_9216_byte_packets), - QEDE_PF_STAT(rx_16383_byte_packets), + QEDE_PF_STAT(rx_65_to_127_byte_packets), + QEDE_PF_STAT(rx_128_to_255_byte_packets), + QEDE_PF_STAT(rx_256_to_511_byte_packets), + QEDE_PF_STAT(rx_512_to_1023_byte_packets), + QEDE_PF_STAT(rx_1024_to_1518_byte_packets), + QEDE_PF_STAT(rx_1519_to_1522_byte_packets), + QEDE_PF_STAT(rx_1519_to_2047_byte_packets), + QEDE_PF_STAT(rx_2048_to_4095_byte_packets), + QEDE_PF_STAT(rx_4096_to_9216_byte_packets), + QEDE_PF_STAT(rx_9217_to_16383_byte_packets), QEDE_PF_STAT(tx_64_byte_packets), QEDE_PF_STAT(tx_65_to_127_byte_packets), QEDE_PF_STAT(tx_128_to_255_byte_packets), @@ -116,6 +120,32 @@ static const struct { #define QEDE_NUM_STATS ARRAY_SIZE(qede_stats_arr) +enum { + QEDE_PRI_FLAG_CMT, + QEDE_PRI_FLAG_LEN, +}; + +static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = { + "Coupled-Function", +}; + +enum qede_ethtool_tests { + QEDE_ETHTOOL_INT_LOOPBACK, + QEDE_ETHTOOL_INTERRUPT_TEST, + QEDE_ETHTOOL_MEMORY_TEST, + QEDE_ETHTOOL_REGISTER_TEST, + QEDE_ETHTOOL_CLOCK_TEST, + QEDE_ETHTOOL_TEST_MAX +}; + +static const char qede_tests_str_arr[QEDE_ETHTOOL_TEST_MAX][ETH_GSTRING_LEN] = { + "Internal loopback (offline)", + "Interrupt (online)\t", + "Memory (online)\t\t", + "Register (online)\t", + "Clock (online)\t\t", +}; + static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf) { int i, j, k; @@ -139,6 +169,14 @@ static void qede_get_strings(struct net_device *dev, u32 stringset, u8 *buf) case ETH_SS_STATS: qede_get_strings_stats(edev, buf); break; + case ETH_SS_PRIV_FLAGS: + memcpy(buf, qede_private_arr, + ETH_GSTRING_LEN * QEDE_PRI_FLAG_LEN); + break; + case ETH_SS_TEST: + memcpy(buf, qede_tests_str_arr, + ETH_GSTRING_LEN * QEDE_ETHTOOL_TEST_MAX); + break; default: DP_VERBOSE(edev, QED_MSG_DEBUG, "Unsupported stringset 0x%08x\n", stringset); @@ -177,7 +215,10 @@ static int qede_get_sset_count(struct net_device *dev, int stringset) switch (stringset) { case ETH_SS_STATS: return num_stats + QEDE_NUM_RQSTATS; - + case ETH_SS_PRIV_FLAGS: + return QEDE_PRI_FLAG_LEN; + case ETH_SS_TEST: + return QEDE_ETHTOOL_TEST_MAX; default: DP_VERBOSE(edev, QED_MSG_DEBUG, "Unsupported stringset 0x%08x\n", stringset); @@ -185,6 +226,13 @@ static int qede_get_sset_count(struct net_device *dev, int stringset) } } +static u32 qede_get_priv_flags(struct net_device *dev) +{ + struct qede_dev *edev = netdev_priv(dev); + + return (!!(edev->dev_info.common.num_hwfns > 1)) << QEDE_PRI_FLAG_CMT; +} + static int qede_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct qede_dev *edev = netdev_priv(dev); @@ -217,9 +265,9 @@ static int qede_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct qed_link_params params; u32 speed; - if (!edev->dev_info.common.is_mf_default) { + if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) { DP_INFO(edev, - "Link parameters can not be changed in non-default mode\n"); + "Link settings are not allowed to be changed\n"); return -EOPNOTSUPP; } @@ -328,6 +376,12 @@ static int qede_nway_reset(struct net_device *dev) struct qed_link_output current_link; struct qed_link_params link_params; + if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) { + DP_INFO(edev, + "Link settings are not allowed to be changed\n"); + return -EOPNOTSUPP; + } + if (!netif_running(dev)) return 0; @@ -428,9 +482,9 @@ static int qede_set_pauseparam(struct net_device *dev, struct qed_link_params params; struct qed_link_output current_link; - if (!edev->dev_info.common.is_mf_default) { + if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) { DP_INFO(edev, - "Pause parameters can not be updated in non-default mode\n"); + "Pause settings are not allowed to be changed\n"); return -EOPNOTSUPP; } @@ -799,6 +853,267 @@ static int qede_set_rxfh(struct net_device *dev, const u32 *indir, return 0; } +/* This function enables the interrupt generation and the NAPI on the device */ +static void qede_netif_start(struct qede_dev *edev) +{ + int i; + + if (!netif_running(edev->ndev)) + return; + + for_each_rss(i) { + /* Update and reenable interrupts */ + qed_sb_ack(edev->fp_array[i].sb_info, IGU_INT_ENABLE, 1); + napi_enable(&edev->fp_array[i].napi); + } +} + +/* This function disables the NAPI and the interrupt generation on the device */ +static void qede_netif_stop(struct qede_dev *edev) +{ + int i; + + for_each_rss(i) { + napi_disable(&edev->fp_array[i].napi); + /* Disable interrupts */ + qed_sb_ack(edev->fp_array[i].sb_info, IGU_INT_DISABLE, 0); + } +} + +static int qede_selftest_transmit_traffic(struct qede_dev *edev, + struct sk_buff *skb) +{ + struct qede_tx_queue *txq = &edev->fp_array[0].txqs[0]; + struct eth_tx_1st_bd *first_bd; + dma_addr_t mapping; + int i, idx, val; + + /* Fill the entry in the SW ring and the BDs in the FW ring */ + idx = txq->sw_tx_prod & NUM_TX_BDS_MAX; + txq->sw_tx_ring[idx].skb = skb; + first_bd = qed_chain_produce(&txq->tx_pbl); + memset(first_bd, 0, sizeof(*first_bd)); + val = 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT; + first_bd->data.bd_flags.bitfields = val; + + /* Map skb linear data for DMA and set in the first BD */ + mapping = dma_map_single(&edev->pdev->dev, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) { + DP_NOTICE(edev, "SKB mapping failed\n"); + return -ENOMEM; + } + BD_SET_UNMAP_ADDR_LEN(first_bd, mapping, skb_headlen(skb)); + + /* update the first BD with the actual num BDs */ + first_bd->data.nbds = 1; + txq->sw_tx_prod++; + /* 'next page' entries are counted in the producer value */ + val = cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl)); + txq->tx_db.data.bd_prod = val; + + /* wmb makes sure that the BDs data is updated before updating the + * producer, otherwise FW may read old data from the BDs. + */ + wmb(); + barrier(); + writel(txq->tx_db.raw, txq->doorbell_addr); + + /* mmiowb is needed to synchronize doorbell writes from more than one + * processor. It guarantees that the write arrives to the device before + * the queue lock is released and another start_xmit is called (possibly + * on another CPU). Without this barrier, the next doorbell can bypass + * this doorbell. This is applicable to IA64/Altix systems. + */ + mmiowb(); + + for (i = 0; i < QEDE_SELFTEST_POLL_COUNT; i++) { + if (qede_txq_has_work(txq)) + break; + usleep_range(100, 200); + } + + if (!qede_txq_has_work(txq)) { + DP_NOTICE(edev, "Tx completion didn't happen\n"); + return -1; + } + + first_bd = (struct eth_tx_1st_bd *)qed_chain_consume(&txq->tx_pbl); + dma_unmap_page(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), + BD_UNMAP_LEN(first_bd), DMA_TO_DEVICE); + txq->sw_tx_cons++; + txq->sw_tx_ring[idx].skb = NULL; + + return 0; +} + +static int qede_selftest_receive_traffic(struct qede_dev *edev) +{ + struct qede_rx_queue *rxq = edev->fp_array[0].rxq; + u16 hw_comp_cons, sw_comp_cons, sw_rx_index, len; + struct eth_fast_path_rx_reg_cqe *fp_cqe; + struct sw_rx_data *sw_rx_data; + union eth_rx_cqe *cqe; + u8 *data_ptr; + int i; + + /* The packet is expected to receive on rx-queue 0 even though RSS is + * enabled. This is because the queue 0 is configured as the default + * queue and that the loopback traffic is not IP. + */ + for (i = 0; i < QEDE_SELFTEST_POLL_COUNT; i++) { + if (qede_has_rx_work(rxq)) + break; + usleep_range(100, 200); + } + + if (!qede_has_rx_work(rxq)) { + DP_NOTICE(edev, "Failed to receive the traffic\n"); + return -1; + } + + hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr); + sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring); + + /* Memory barrier to prevent the CPU from doing speculative reads of CQE + * / BD before reading hw_comp_cons. If the CQE is read before it is + * written by FW, then FW writes CQE and SB, and then the CPU reads the + * hw_comp_cons, it will use an old CQE. + */ + rmb(); + + /* Get the CQE from the completion ring */ + cqe = (union eth_rx_cqe *)qed_chain_consume(&rxq->rx_comp_ring); + + /* Get the data from the SW ring */ + sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX; + sw_rx_data = &rxq->sw_rx_ring[sw_rx_index]; + fp_cqe = &cqe->fast_path_regular; + len = le16_to_cpu(fp_cqe->len_on_first_bd); + data_ptr = (u8 *)(page_address(sw_rx_data->data) + + fp_cqe->placement_offset + sw_rx_data->page_offset); + for (i = ETH_HLEN; i < len; i++) + if (data_ptr[i] != (unsigned char)(i & 0xff)) { + DP_NOTICE(edev, "Loopback test failed\n"); + qede_recycle_rx_bd_ring(rxq, edev, 1); + return -1; + } + + qede_recycle_rx_bd_ring(rxq, edev, 1); + + return 0; +} + +static int qede_selftest_run_loopback(struct qede_dev *edev, u32 loopback_mode) +{ + struct qed_link_params link_params; + struct sk_buff *skb = NULL; + int rc = 0, i; + u32 pkt_size; + u8 *packet; + + if (!netif_running(edev->ndev)) { + DP_NOTICE(edev, "Interface is down\n"); + return -EINVAL; + } + + qede_netif_stop(edev); + + /* Bring up the link in Loopback mode */ + memset(&link_params, 0, sizeof(link_params)); + link_params.link_up = true; + link_params.override_flags = QED_LINK_OVERRIDE_LOOPBACK_MODE; + link_params.loopback_mode = loopback_mode; + edev->ops->common->set_link(edev->cdev, &link_params); + + /* Wait for loopback configuration to apply */ + msleep_interruptible(500); + + /* prepare the loopback packet */ + pkt_size = edev->ndev->mtu + ETH_HLEN; + + skb = netdev_alloc_skb(edev->ndev, pkt_size); + if (!skb) { + DP_INFO(edev, "Can't allocate skb\n"); + rc = -ENOMEM; + goto test_loopback_exit; + } + packet = skb_put(skb, pkt_size); + ether_addr_copy(packet, edev->ndev->dev_addr); + ether_addr_copy(packet + ETH_ALEN, edev->ndev->dev_addr); + memset(packet + (2 * ETH_ALEN), 0x77, (ETH_HLEN - (2 * ETH_ALEN))); + for (i = ETH_HLEN; i < pkt_size; i++) + packet[i] = (unsigned char)(i & 0xff); + + rc = qede_selftest_transmit_traffic(edev, skb); + if (rc) + goto test_loopback_exit; + + rc = qede_selftest_receive_traffic(edev); + if (rc) + goto test_loopback_exit; + + DP_VERBOSE(edev, NETIF_MSG_RX_STATUS, "Loopback test successful\n"); + +test_loopback_exit: + dev_kfree_skb(skb); + + /* Bring up the link in Normal mode */ + memset(&link_params, 0, sizeof(link_params)); + link_params.link_up = true; + link_params.override_flags = QED_LINK_OVERRIDE_LOOPBACK_MODE; + link_params.loopback_mode = QED_LINK_LOOPBACK_NONE; + edev->ops->common->set_link(edev->cdev, &link_params); + + /* Wait for loopback configuration to apply */ + msleep_interruptible(500); + + qede_netif_start(edev); + + return rc; +} + +static void qede_self_test(struct net_device *dev, + struct ethtool_test *etest, u64 *buf) +{ + struct qede_dev *edev = netdev_priv(dev); + + DP_VERBOSE(edev, QED_MSG_DEBUG, + "Self-test command parameters: offline = %d, external_lb = %d\n", + (etest->flags & ETH_TEST_FL_OFFLINE), + (etest->flags & ETH_TEST_FL_EXTERNAL_LB) >> 2); + + memset(buf, 0, sizeof(u64) * QEDE_ETHTOOL_TEST_MAX); + + if (etest->flags & ETH_TEST_FL_OFFLINE) { + if (qede_selftest_run_loopback(edev, + QED_LINK_LOOPBACK_INT_PHY)) { + buf[QEDE_ETHTOOL_INT_LOOPBACK] = 1; + etest->flags |= ETH_TEST_FL_FAILED; + } + } + + if (edev->ops->common->selftest->selftest_interrupt(edev->cdev)) { + buf[QEDE_ETHTOOL_INTERRUPT_TEST] = 1; + etest->flags |= ETH_TEST_FL_FAILED; + } + + if (edev->ops->common->selftest->selftest_memory(edev->cdev)) { + buf[QEDE_ETHTOOL_MEMORY_TEST] = 1; + etest->flags |= ETH_TEST_FL_FAILED; + } + + if (edev->ops->common->selftest->selftest_register(edev->cdev)) { + buf[QEDE_ETHTOOL_REGISTER_TEST] = 1; + etest->flags |= ETH_TEST_FL_FAILED; + } + + if (edev->ops->common->selftest->selftest_clock(edev->cdev)) { + buf[QEDE_ETHTOOL_CLOCK_TEST] = 1; + etest->flags |= ETH_TEST_FL_FAILED; + } +} + static const struct ethtool_ops qede_ethtool_ops = { .get_settings = qede_get_settings, .set_settings = qede_set_settings, @@ -814,6 +1129,7 @@ static const struct ethtool_ops qede_ethtool_ops = { .get_strings = qede_get_strings, .set_phys_id = qede_set_phys_id, .get_ethtool_stats = qede_get_ethtool_stats, + .get_priv_flags = qede_get_priv_flags, .get_sset_count = qede_get_sset_count, .get_rxnfc = qede_get_rxnfc, .set_rxnfc = qede_set_rxnfc, @@ -823,6 +1139,7 @@ static const struct ethtool_ops qede_ethtool_ops = { .set_rxfh = qede_set_rxfh, .get_channels = qede_get_channels, .set_channels = qede_set_channels, + .self_test = qede_self_test, }; void qede_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 457caad2e752..82d85ccc9ed1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -24,7 +24,12 @@ #include <linux/netdev_features.h> #include <linux/udp.h> #include <linux/tcp.h> +#ifdef CONFIG_QEDE_VXLAN #include <net/vxlan.h> +#endif +#ifdef CONFIG_QEDE_GENEVE +#include <net/geneve.h> +#endif #include <linux/ip.h> #include <net/ipv6.h> #include <net/tcp.h> @@ -310,6 +315,9 @@ static u32 qede_xmit_type(struct qede_dev *edev, (ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6)) *ipv6_ext = 1; + if (skb->encapsulation) + rc |= XMIT_ENC; + if (skb_is_gso(skb)) rc |= XMIT_LSO; @@ -371,6 +379,16 @@ static int map_frag_to_bd(struct qede_dev *edev, return 0; } +static u16 qede_get_skb_hlen(struct sk_buff *skb, bool is_encap_pkt) +{ + if (is_encap_pkt) + return (skb_inner_transport_header(skb) + + inner_tcp_hdrlen(skb) - skb->data); + else + return (skb_transport_header(skb) + + tcp_hdrlen(skb) - skb->data); +} + /* +2 for 1st BD for headers and 2nd BD for headlen (if required) */ #if ((MAX_SKB_FRAGS + 2) > ETH_TX_MAX_BDS_PER_NON_LSO_PACKET) static bool qede_pkt_req_lin(struct qede_dev *edev, struct sk_buff *skb, @@ -381,8 +399,7 @@ static bool qede_pkt_req_lin(struct qede_dev *edev, struct sk_buff *skb, if (xmit_type & XMIT_LSO) { int hlen; - hlen = skb_transport_header(skb) + - tcp_hdrlen(skb) - skb->data; + hlen = qede_get_skb_hlen(skb, xmit_type & XMIT_ENC); /* linear payload would require its own BD */ if (skb_headlen(skb) > hlen) @@ -490,7 +507,18 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, first_bd->data.bd_flags.bitfields |= 1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT; - first_bd->data.bitfields |= cpu_to_le16(temp); + if (xmit_type & XMIT_ENC) { + first_bd->data.bd_flags.bitfields |= + 1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT; + } else { + /* In cases when OS doesn't indicate for inner offloads + * when packet is tunnelled, we need to override the HW + * tunnel configuration so that packets are treated as + * regular non tunnelled packets and no inner offloads + * are done by the hardware. + */ + first_bd->data.bitfields |= cpu_to_le16(temp); + } /* If the packet is IPv6 with extension header, indicate that * to FW and pass few params, since the device cracker doesn't @@ -506,10 +534,15 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, third_bd->data.lso_mss = cpu_to_le16(skb_shinfo(skb)->gso_size); - first_bd->data.bd_flags.bitfields |= - 1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT; - hlen = skb_transport_header(skb) + - tcp_hdrlen(skb) - skb->data; + if (unlikely(xmit_type & XMIT_ENC)) { + first_bd->data.bd_flags.bitfields |= + 1 << ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT; + hlen = qede_get_skb_hlen(skb, true); + } else { + first_bd->data.bd_flags.bitfields |= + 1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT; + hlen = qede_get_skb_hlen(skb, false); + } /* @@@TBD - if will not be removed need to check */ third_bd->data.bitfields |= @@ -635,7 +668,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } -static int qede_txq_has_work(struct qede_tx_queue *txq) +int qede_txq_has_work(struct qede_tx_queue *txq) { u16 hw_bd_cons; @@ -718,7 +751,7 @@ static int qede_tx_int(struct qede_dev *edev, return 0; } -static bool qede_has_rx_work(struct qede_rx_queue *rxq) +bool qede_has_rx_work(struct qede_rx_queue *rxq) { u16 hw_comp_cons, sw_comp_cons; @@ -741,6 +774,12 @@ static bool qede_has_tx_work(struct qede_fastpath *fp) return false; } +static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq) +{ + qed_chain_consume(&rxq->rx_bd_ring); + rxq->sw_rx_cons++; +} + /* This function reuses the buffer(from an offset) from * consumer index to producer index in the bd ring */ @@ -764,6 +803,21 @@ static inline void qede_reuse_page(struct qede_dev *edev, curr_cons->data = NULL; } +/* In case of allocation failures reuse buffers + * from consumer index to produce buffers for firmware + */ +void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, + struct qede_dev *edev, u8 count) +{ + struct sw_rx_data *curr_cons; + + for (; count > 0; count--) { + curr_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS_MAX]; + qede_reuse_page(edev, rxq, curr_cons); + qede_rx_bd_ring_consume(rxq); + } +} + static inline int qede_realloc_rx_buffer(struct qede_dev *edev, struct qede_rx_queue *rxq, struct sw_rx_data *curr_cons) @@ -772,8 +826,14 @@ static inline int qede_realloc_rx_buffer(struct qede_dev *edev, curr_cons->page_offset += rxq->rx_buf_seg_size; if (curr_cons->page_offset == PAGE_SIZE) { - if (unlikely(qede_alloc_rx_buffer(edev, rxq))) + if (unlikely(qede_alloc_rx_buffer(edev, rxq))) { + /* Since we failed to allocate new buffer + * current buffer can be used again. + */ + curr_cons->page_offset -= rxq->rx_buf_seg_size; + return -ENOMEM; + } dma_unmap_page(&edev->pdev->dev, curr_cons->mapping, PAGE_SIZE, DMA_FROM_DEVICE); @@ -843,6 +903,9 @@ static void qede_set_skb_csum(struct sk_buff *skb, u8 csum_flag) if (csum_flag & QEDE_CSUM_UNNECESSARY) skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (csum_flag & QEDE_TUNN_CSUM_UNNECESSARY) + skb->csum_level = 1; } static inline void qede_skb_receive(struct qede_dev *edev, @@ -892,7 +955,10 @@ static int qede_fill_frag_skb(struct qede_dev *edev, len_on_bd); if (unlikely(qede_realloc_rx_buffer(edev, rxq, current_bd))) { - tpa_info->agg_state = QEDE_AGG_STATE_ERROR; + /* Incr page ref count to reuse on allocation failure + * so that it doesn't get freed while freeing SKB. + */ + atomic_inc(¤t_bd->data->_count); goto out; } @@ -906,6 +972,8 @@ static int qede_fill_frag_skb(struct qede_dev *edev, return 0; out: + tpa_info->agg_state = QEDE_AGG_STATE_ERROR; + qede_recycle_rx_bd_ring(rxq, edev, 1); return -ENOMEM; } @@ -957,8 +1025,9 @@ static void qede_tpa_start(struct qede_dev *edev, tpa_info->skb = netdev_alloc_skb(edev->ndev, le16_to_cpu(cqe->len_on_first_bd)); if (unlikely(!tpa_info->skb)) { + DP_NOTICE(edev, "Failed to allocate SKB for gro\n"); tpa_info->agg_state = QEDE_AGG_STATE_ERROR; - return; + goto cons_buf; } skb_put(tpa_info->skb, le16_to_cpu(cqe->len_on_first_bd)); @@ -981,6 +1050,7 @@ static void qede_tpa_start(struct qede_dev *edev, /* This is needed in order to enable forwarding support */ qede_set_gro_params(edev, tpa_info->skb, cqe); +cons_buf: /* We still need to handle bd_len_list to consume buffers */ if (likely(cqe->ext_bd_len_list[0])) qede_fill_frag_skb(edev, rxq, cqe->tpa_agg_index, le16_to_cpu(cqe->ext_bd_len_list[0])); @@ -998,7 +1068,6 @@ static void qede_gro_ip_csum(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th; - skb_set_network_header(skb, 0); skb_set_transport_header(skb, sizeof(struct iphdr)); th = tcp_hdr(skb); @@ -1013,7 +1082,6 @@ static void qede_gro_ipv6_csum(struct sk_buff *skb) struct ipv6hdr *iph = ipv6_hdr(skb); struct tcphdr *th; - skb_set_network_header(skb, 0); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); th = tcp_hdr(skb); @@ -1028,8 +1096,21 @@ static void qede_gro_receive(struct qede_dev *edev, struct sk_buff *skb, u16 vlan_tag) { + /* FW can send a single MTU sized packet from gro flow + * due to aggregation timeout/last segment etc. which + * is not expected to be a gro packet. If a skb has zero + * frags then simply push it in the stack as non gso skb. + */ + if (unlikely(!skb->data_len)) { + skb_shinfo(skb)->gso_type = 0; + skb_shinfo(skb)->gso_size = 0; + goto send_skb; + } + #ifdef CONFIG_INET if (skb_shinfo(skb)->gso_size) { + skb_set_network_header(skb, 0); + switch (skb->protocol) { case htons(ETH_P_IP): qede_gro_ip_csum(skb); @@ -1044,6 +1125,8 @@ static void qede_gro_receive(struct qede_dev *edev, } } #endif + +send_skb: skb_record_rx_queue(skb, fp->rss_id); qede_skb_receive(edev, fp, skb, vlan_tag); } @@ -1132,13 +1215,47 @@ err: tpa_info->skb = NULL; } -static u8 qede_check_csum(u16 flag) +static bool qede_tunn_exist(u16 flag) +{ + return !!(flag & (PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK << + PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT)); +} + +static u8 qede_check_tunn_csum(u16 flag) +{ + u16 csum_flag = 0; + u8 tcsum = 0; + + if (flag & (PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK << + PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT)) + csum_flag |= PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK << + PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT; + + if (flag & (PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK << + PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT)) { + csum_flag |= PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK << + PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT; + tcsum = QEDE_TUNN_CSUM_UNNECESSARY; + } + + csum_flag |= PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_MASK << + PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_SHIFT | + PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK << + PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT; + + if (csum_flag & flag) + return QEDE_CSUM_ERROR; + + return QEDE_CSUM_UNNECESSARY | tcsum; +} + +static u8 qede_check_notunn_csum(u16 flag) { u16 csum_flag = 0; u8 csum = 0; - if ((PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK << - PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT) & flag) { + if (flag & (PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK << + PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT)) { csum_flag |= PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK << PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT; csum = QEDE_CSUM_UNNECESSARY; @@ -1153,6 +1270,14 @@ static u8 qede_check_csum(u16 flag) return csum; } +static u8 qede_check_csum(u16 flag) +{ + if (!qede_tunn_exist(flag)) + return qede_check_notunn_csum(flag); + else + return qede_check_tunn_csum(flag); +} + static int qede_rx_int(struct qede_fastpath *fp, int budget) { struct qede_dev *edev = fp->edev; @@ -1235,17 +1360,17 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) "CQE in CONS = %u has error, flags = %x, dropping incoming packet\n", sw_comp_cons, parse_flag); rxq->rx_hw_errors++; - qede_reuse_page(edev, rxq, sw_rx_data); - goto next_rx; + qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num); + goto next_cqe; } skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE); if (unlikely(!skb)) { DP_NOTICE(edev, "Build_skb failed, dropping incoming packet\n"); - qede_reuse_page(edev, rxq, sw_rx_data); + qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num); rxq->rx_alloc_errors++; - goto next_rx; + goto next_cqe; } /* Copy data into SKB */ @@ -1279,11 +1404,22 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) if (unlikely(qede_realloc_rx_buffer(edev, rxq, sw_rx_data))) { DP_ERR(edev, "Failed to allocate rx buffer\n"); + /* Incr page ref count to reuse on allocation + * failure so that it doesn't get freed while + * freeing SKB. + */ + + atomic_inc(&sw_rx_data->data->_count); rxq->rx_alloc_errors++; + qede_recycle_rx_bd_ring(rxq, edev, + fp_cqe->bd_num); + dev_kfree_skb_any(skb); goto next_cqe; } } + qede_rx_bd_ring_consume(rxq); + if (fp_cqe->bd_num != 1) { u16 pkt_len = le16_to_cpu(fp_cqe->pkt_len); u8 num_frags; @@ -1294,18 +1430,27 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) num_frags--) { u16 cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size : pkt_len; + if (unlikely(!cur_size)) { + DP_ERR(edev, + "Still got %d BDs for mapping jumbo, but length became 0\n", + num_frags); + qede_recycle_rx_bd_ring(rxq, edev, + num_frags); + dev_kfree_skb_any(skb); + goto next_cqe; + } - WARN_ONCE(!cur_size, - "Still got %d BDs for mapping jumbo, but length became 0\n", - num_frags); - - if (unlikely(qede_alloc_rx_buffer(edev, rxq))) + if (unlikely(qede_alloc_rx_buffer(edev, rxq))) { + qede_recycle_rx_bd_ring(rxq, edev, + num_frags); + dev_kfree_skb_any(skb); goto next_cqe; + } - rxq->sw_rx_cons++; sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX; sw_rx_data = &rxq->sw_rx_ring[sw_rx_index]; - qed_chain_consume(&rxq->rx_bd_ring); + qede_rx_bd_ring_consume(rxq); + dma_unmap_page(&edev->pdev->dev, sw_rx_data->mapping, PAGE_SIZE, DMA_FROM_DEVICE); @@ -1321,7 +1466,7 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) pkt_len -= cur_size; } - if (pkt_len) + if (unlikely(pkt_len)) DP_ERR(edev, "Mapped all BDs of jumbo, but still have %d bytes\n", pkt_len); @@ -1340,10 +1485,6 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) skb_record_rx_queue(skb, fp->rss_id); qede_skb_receive(edev, fp, skb, le16_to_cpu(fp_cqe->vlan_tag)); - - qed_chain_consume(&rxq->rx_bd_ring); -next_rx: - rxq->sw_rx_cons++; next_rx_only: rx_pkt++; @@ -1497,16 +1638,25 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) edev->stats.coalesced_bytes = stats.tpa_coalesced_bytes; edev->stats.rx_64_byte_packets = stats.rx_64_byte_packets; - edev->stats.rx_127_byte_packets = stats.rx_127_byte_packets; - edev->stats.rx_255_byte_packets = stats.rx_255_byte_packets; - edev->stats.rx_511_byte_packets = stats.rx_511_byte_packets; - edev->stats.rx_1023_byte_packets = stats.rx_1023_byte_packets; - edev->stats.rx_1518_byte_packets = stats.rx_1518_byte_packets; - edev->stats.rx_1522_byte_packets = stats.rx_1522_byte_packets; - edev->stats.rx_2047_byte_packets = stats.rx_2047_byte_packets; - edev->stats.rx_4095_byte_packets = stats.rx_4095_byte_packets; - edev->stats.rx_9216_byte_packets = stats.rx_9216_byte_packets; - edev->stats.rx_16383_byte_packets = stats.rx_16383_byte_packets; + edev->stats.rx_65_to_127_byte_packets = stats.rx_65_to_127_byte_packets; + edev->stats.rx_128_to_255_byte_packets = + stats.rx_128_to_255_byte_packets; + edev->stats.rx_256_to_511_byte_packets = + stats.rx_256_to_511_byte_packets; + edev->stats.rx_512_to_1023_byte_packets = + stats.rx_512_to_1023_byte_packets; + edev->stats.rx_1024_to_1518_byte_packets = + stats.rx_1024_to_1518_byte_packets; + edev->stats.rx_1519_to_1522_byte_packets = + stats.rx_1519_to_1522_byte_packets; + edev->stats.rx_1519_to_2047_byte_packets = + stats.rx_1519_to_2047_byte_packets; + edev->stats.rx_2048_to_4095_byte_packets = + stats.rx_2048_to_4095_byte_packets; + edev->stats.rx_4096_to_9216_byte_packets = + stats.rx_4096_to_9216_byte_packets; + edev->stats.rx_9217_to_16383_byte_packets = + stats.rx_9217_to_16383_byte_packets; edev->stats.rx_crc_errors = stats.rx_crc_errors; edev->stats.rx_mac_crtl_frames = stats.rx_mac_crtl_frames; edev->stats.rx_pause_frames = stats.rx_pause_frames; @@ -1821,6 +1971,76 @@ static void qede_vlan_mark_nonconfigured(struct qede_dev *edev) edev->accept_any_vlan = false; } +#ifdef CONFIG_QEDE_VXLAN +static void qede_add_vxlan_port(struct net_device *dev, + sa_family_t sa_family, __be16 port) +{ + struct qede_dev *edev = netdev_priv(dev); + u16 t_port = ntohs(port); + + if (edev->vxlan_dst_port) + return; + + edev->vxlan_dst_port = t_port; + + DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d", t_port); + + set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags); + schedule_delayed_work(&edev->sp_task, 0); +} + +static void qede_del_vxlan_port(struct net_device *dev, + sa_family_t sa_family, __be16 port) +{ + struct qede_dev *edev = netdev_priv(dev); + u16 t_port = ntohs(port); + + if (t_port != edev->vxlan_dst_port) + return; + + edev->vxlan_dst_port = 0; + + DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d", t_port); + + set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags); + schedule_delayed_work(&edev->sp_task, 0); +} +#endif + +#ifdef CONFIG_QEDE_GENEVE +static void qede_add_geneve_port(struct net_device *dev, + sa_family_t sa_family, __be16 port) +{ + struct qede_dev *edev = netdev_priv(dev); + u16 t_port = ntohs(port); + + if (edev->geneve_dst_port) + return; + + edev->geneve_dst_port = t_port; + + DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d", t_port); + set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags); + schedule_delayed_work(&edev->sp_task, 0); +} + +static void qede_del_geneve_port(struct net_device *dev, + sa_family_t sa_family, __be16 port) +{ + struct qede_dev *edev = netdev_priv(dev); + u16 t_port = ntohs(port); + + if (t_port != edev->geneve_dst_port) + return; + + edev->geneve_dst_port = 0; + + DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d", t_port); + set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags); + schedule_delayed_work(&edev->sp_task, 0); +} +#endif + static const struct net_device_ops qede_netdev_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, @@ -1832,6 +2052,14 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid, .ndo_get_stats64 = qede_get_stats64, +#ifdef CONFIG_QEDE_VXLAN + .ndo_add_vxlan_port = qede_add_vxlan_port, + .ndo_del_vxlan_port = qede_del_vxlan_port, +#endif +#ifdef CONFIG_QEDE_GENEVE + .ndo_add_geneve_port = qede_add_geneve_port, + .ndo_del_geneve_port = qede_del_geneve_port, +#endif }; /* ------------------------------------------------------------------------- @@ -1904,6 +2132,14 @@ static void qede_init_ndev(struct qede_dev *edev) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; + /* Encap features*/ + hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_TSO_ECN; + ndev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO_ECN | + NETIF_F_TSO6 | NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RXCSUM; + ndev->vlan_features = hw_features | NETIF_F_RXHASH | NETIF_F_RXCSUM | NETIF_F_HIGHDMA; ndev->features = hw_features | NETIF_F_RXHASH | NETIF_F_RXCSUM | @@ -2004,6 +2240,8 @@ static void qede_sp_task(struct work_struct *work) { struct qede_dev *edev = container_of(work, struct qede_dev, sp_task.work); + struct qed_dev *cdev = edev->cdev; + mutex_lock(&edev->qede_lock); if (edev->state == QEDE_STATE_OPEN) { @@ -2011,6 +2249,24 @@ static void qede_sp_task(struct work_struct *work) qede_config_rx_mode(edev->ndev); } + if (test_and_clear_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags)) { + struct qed_tunn_params tunn_params; + + memset(&tunn_params, 0, sizeof(tunn_params)); + tunn_params.update_vxlan_port = 1; + tunn_params.vxlan_port = edev->vxlan_dst_port; + qed_ops->tunn_config(cdev, &tunn_params); + } + + if (test_and_clear_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags)) { + struct qed_tunn_params tunn_params; + + memset(&tunn_params, 0, sizeof(tunn_params)); + tunn_params.update_geneve_port = 1; + tunn_params.geneve_port = edev->geneve_dst_port; + qed_ops->tunn_config(cdev, &tunn_params); + } + mutex_unlock(&edev->qede_lock); } @@ -2248,7 +2504,7 @@ static void qede_free_sge_mem(struct qede_dev *edev, struct qede_agg_info *tpa_info = &rxq->tpa_info[i]; struct sw_rx_data *replace_buf = &tpa_info->replace_buf; - if (replace_buf) { + if (replace_buf->data) { dma_unmap_page(&edev->pdev->dev, dma_unmap_addr(replace_buf, mapping), PAGE_SIZE, DMA_FROM_DEVICE); @@ -2368,7 +2624,7 @@ err: static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq) { - int i, rc, size, num_allocated; + int i, rc, size; rxq->num_rx_buffers = edev->q_num_rx_buffers; @@ -2385,6 +2641,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, rxq->sw_rx_ring = kzalloc(size, GFP_KERNEL); if (!rxq->sw_rx_ring) { DP_ERR(edev, "Rx buffers ring allocation failed\n"); + rc = -ENOMEM; goto err; } @@ -2412,26 +2669,16 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, /* Allocate buffers for the Rx ring */ for (i = 0; i < rxq->num_rx_buffers; i++) { rc = qede_alloc_rx_buffer(edev, rxq); - if (rc) - break; - } - num_allocated = i; - if (!num_allocated) { - DP_ERR(edev, "Rx buffers allocation failed\n"); - goto err; - } else if (num_allocated < rxq->num_rx_buffers) { - DP_NOTICE(edev, - "Allocated less buffers than desired (%d allocated)\n", - num_allocated); + if (rc) { + DP_ERR(edev, + "Rx buffers allocation failed at index %d\n", i); + goto err; + } } - qede_alloc_sge_mem(edev, rxq); - - return 0; - + rc = qede_alloc_sge_mem(edev, rxq); err: - qede_free_mem_rxq(edev, rxq); - return -ENOMEM; + return rc; } static void qede_free_mem_txq(struct qede_dev *edev, @@ -2514,10 +2761,8 @@ static int qede_alloc_mem_fp(struct qede_dev *edev, } return 0; - err: - qede_free_mem_fp(edev, fp); - return -ENOMEM; + return rc; } static void qede_free_mem_load(struct qede_dev *edev) @@ -2540,22 +2785,13 @@ static int qede_alloc_mem_load(struct qede_dev *edev) struct qede_fastpath *fp = &edev->fp_array[rss_id]; rc = qede_alloc_mem_fp(edev, fp); - if (rc) - break; - } - - if (rss_id != QEDE_RSS_CNT(edev)) { - /* Failed allocating memory for all the queues */ - if (!rss_id) { + if (rc) { DP_ERR(edev, - "Failed to allocate memory for the leading queue\n"); - rc = -ENOMEM; - } else { - DP_NOTICE(edev, - "Failed to allocate memory for all of RSS queues\n Desired: %d queues, allocated: %d queues\n", - QEDE_RSS_CNT(edev), rss_id); + "Failed to allocate memory for fastpath - rss id = %d\n", + rss_id); + qede_free_mem_load(edev); + return rc; } - edev->num_rss = rss_id; } return 0; @@ -3149,12 +3385,24 @@ void qede_reload(struct qede_dev *edev, static int qede_open(struct net_device *ndev) { struct qede_dev *edev = netdev_priv(ndev); + int rc; netif_carrier_off(ndev); edev->ops->common->set_power_state(edev->cdev, PCI_D0); - return qede_load(edev, QEDE_LOAD_NORMAL); + rc = qede_load(edev, QEDE_LOAD_NORMAL); + + if (rc) + return rc; + +#ifdef CONFIG_QEDE_VXLAN + vxlan_get_rx_port(ndev); +#endif +#ifdef CONFIG_QEDE_GENEVE + geneve_get_rx_port(ndev); +#endif + return 0; } static int qede_close(struct net_device *ndev) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 55007f1e6bbc..caf6ddb7ea76 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -37,8 +37,8 @@ #define _QLCNIC_LINUX_MAJOR 5 #define _QLCNIC_LINUX_MINOR 3 -#define _QLCNIC_LINUX_SUBVERSION 63 -#define QLCNIC_LINUX_VERSIONID "5.3.63" +#define _QLCNIC_LINUX_SUBVERSION 64 +#define QLCNIC_LINUX_VERSIONID "5.3.64" #define QLCNIC_DRV_IDC_VER 0x01 #define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\ (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION)) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 1205f6f9c941..1c29105b6c36 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -3952,8 +3952,14 @@ static pci_ers_result_t qlcnic_82xx_io_error_detected(struct pci_dev *pdev, static pci_ers_result_t qlcnic_82xx_io_slot_reset(struct pci_dev *pdev) { - return qlcnic_attach_func(pdev) ? PCI_ERS_RESULT_DISCONNECT : - PCI_ERS_RESULT_RECOVERED; + pci_ers_result_t res; + + rtnl_lock(); + res = qlcnic_attach_func(pdev) ? PCI_ERS_RESULT_DISCONNECT : + PCI_ERS_RESULT_RECOVERED; + rtnl_unlock(); + + return res; } static void qlcnic_82xx_io_resume(struct pci_dev *pdev) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index b28e73ea2c25..83d72106471c 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -4687,7 +4687,7 @@ static int ql_init_device(struct pci_dev *pdev, struct net_device *ndev, /* * Set up the operating parameters. */ - qdev->workqueue = create_singlethread_workqueue(ndev->name); + qdev->workqueue = alloc_ordered_workqueue(ndev->name, WQ_MEM_RECLAIM); INIT_DELAYED_WORK(&qdev->asic_reset_work, ql_asic_reset_work); INIT_DELAYED_WORK(&qdev->mpi_reset_work, ql_mpi_reset_work); INIT_DELAYED_WORK(&qdev->mpi_work, ql_mpi_work); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 0f1b314f4d64..238b56feb1ce 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1854,6 +1854,9 @@ static int ravb_set_gti(struct net_device *ndev) rate = clk_get_rate(clk); clk_put(clk); + if (!rate) + return -EINVAL; + inc = 1000000000ULL << 20; do_div(inc, rate); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 004e2d7560fd..07e29638299f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -482,7 +482,7 @@ static void sh_eth_chip_reset(struct net_device *ndev) struct sh_eth_private *mdp = netdev_priv(ndev); /* reset device */ - sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR); + sh_eth_tsu_write(mdp, ARSTR_ARST, ARSTR); mdelay(1); } @@ -540,7 +540,7 @@ static void sh_eth_chip_reset_r8a7740(struct net_device *ndev) struct sh_eth_private *mdp = netdev_priv(ndev); /* reset device */ - sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR); + sh_eth_tsu_write(mdp, ARSTR_ARST, ARSTR); mdelay(1); sh_eth_select_mii(ndev); @@ -735,7 +735,7 @@ static void sh_eth_chip_reset_giga(struct net_device *ndev) } /* reset device */ - iowrite32(ARSTR_ARSTR, (void *)(SH_GIGA_ETH_BASE + 0x1800)); + iowrite32(ARSTR_ARST, (void *)(SH_GIGA_ETH_BASE + 0x1800)); mdelay(1); /* restore MAHR and MALR */ @@ -899,7 +899,7 @@ static int sh_eth_check_reset(struct net_device *ndev) int cnt = 100; while (cnt > 0) { - if (!(sh_eth_read(ndev, EDMR) & 0x3)) + if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER)) break; mdelay(1); cnt--; @@ -1229,7 +1229,7 @@ ring_free: return -ENOMEM; } -static int sh_eth_dev_init(struct net_device *ndev, bool start) +static int sh_eth_dev_init(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); int ret; @@ -1279,10 +1279,8 @@ static int sh_eth_dev_init(struct net_device *ndev, bool start) RFLR); sh_eth_modify(ndev, EESR, 0, 0); - if (start) { - mdp->irq_enabled = true; - sh_eth_write(ndev, mdp->cd->eesipr_value, EESIPR); - } + mdp->irq_enabled = true; + sh_eth_write(ndev, mdp->cd->eesipr_value, EESIPR); /* PAUSE Prohibition */ sh_eth_write(ndev, ECMR_ZPF | (mdp->duplex ? ECMR_DM : 0) | @@ -1295,8 +1293,7 @@ static int sh_eth_dev_init(struct net_device *ndev, bool start) sh_eth_write(ndev, mdp->cd->ecsr_value, ECSR); /* E-MAC Interrupt Enable register */ - if (start) - sh_eth_write(ndev, mdp->cd->ecsipr_value, ECSIPR); + sh_eth_write(ndev, mdp->cd->ecsipr_value, ECSIPR); /* Set MAC address */ update_mac_address(ndev); @@ -1309,10 +1306,8 @@ static int sh_eth_dev_init(struct net_device *ndev, bool start) if (mdp->cd->tpauser) sh_eth_write(ndev, TPAUSER_UNLIMITED, TPAUSER); - if (start) { - /* Setting the Rx mode will start the Rx process. */ - sh_eth_write(ndev, EDRRR_R, EDRRR); - } + /* Setting the Rx mode will start the Rx process. */ + sh_eth_write(ndev, EDRRR_R, EDRRR); return ret; } @@ -2194,17 +2189,13 @@ static int sh_eth_set_ringparam(struct net_device *ndev, __func__); return ret; } - ret = sh_eth_dev_init(ndev, false); + ret = sh_eth_dev_init(ndev); if (ret < 0) { netdev_err(ndev, "%s: sh_eth_dev_init failed.\n", __func__); return ret; } - mdp->irq_enabled = true; - sh_eth_write(ndev, mdp->cd->eesipr_value, EESIPR); - /* Setting the Rx mode will start the Rx process. */ - sh_eth_write(ndev, EDRRR_R, EDRRR); netif_device_attach(ndev); } @@ -2250,7 +2241,7 @@ static int sh_eth_open(struct net_device *ndev) goto out_free_irq; /* device init */ - ret = sh_eth_dev_init(ndev, true); + ret = sh_eth_dev_init(ndev); if (ret) goto out_free_irq; @@ -2303,7 +2294,7 @@ static void sh_eth_tx_timeout(struct net_device *ndev) } /* device init */ - sh_eth_dev_init(ndev, true); + sh_eth_dev_init(ndev); netif_start_queue(ndev); } diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index 8fa4ef3a7fdd..c62380e34a1d 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -394,7 +394,7 @@ enum RPADIR_BIT { #define DEFAULT_FDR_INIT 0x00000707 /* ARSTR */ -enum ARSTR_BIT { ARSTR_ARSTR = 0x00000001, }; +enum ARSTR_BIT { ARSTR_ARST = 0x00000001, }; /* TSU_FWEN0 */ enum TSU_FWEN0_BIT { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index f0d797ab74d8..784eb53361b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -34,6 +34,9 @@ #define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK 0x00000003 #define SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK 0x00000010 +#define SYSMGR_FPGAGRP_MODULE_REG 0x00000028 +#define SYSMGR_FPGAGRP_MODULE_EMAC 0x00000004 + #define EMAC_SPLITTER_CTRL_REG 0x0 #define EMAC_SPLITTER_CTRL_SPEED_MASK 0x3 #define EMAC_SPLITTER_CTRL_SPEED_10 0x2 @@ -46,7 +49,6 @@ struct socfpga_dwmac { u32 reg_shift; struct device *dev; struct regmap *sys_mgr_base_addr; - struct reset_control *stmmac_rst; void __iomem *splitter_base; bool f2h_ptp_ref_clk; }; @@ -89,15 +91,6 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * struct device_node *np_splitter; struct resource res_splitter; - dwmac->stmmac_rst = devm_reset_control_get(dev, - STMMAC_RESOURCE_NAME); - if (IS_ERR(dwmac->stmmac_rst)) { - dev_info(dev, "Could not get reset control!\n"); - if (PTR_ERR(dwmac->stmmac_rst) == -EPROBE_DEFER) - return -EPROBE_DEFER; - dwmac->stmmac_rst = NULL; - } - dwmac->interface = of_get_phy_mode(np); sys_mgr_base_addr = syscon_regmap_lookup_by_phandle(np, "altr,sysmgr-syscon"); @@ -148,7 +141,7 @@ static int socfpga_dwmac_setup(struct socfpga_dwmac *dwmac) int phymode = dwmac->interface; u32 reg_offset = dwmac->reg_offset; u32 reg_shift = dwmac->reg_shift; - u32 ctrl, val; + u32 ctrl, val, module; switch (phymode) { case PHY_INTERFACE_MODE_RGMII: @@ -175,39 +168,39 @@ static int socfpga_dwmac_setup(struct socfpga_dwmac *dwmac) ctrl &= ~(SYSMGR_EMACGRP_CTRL_PHYSEL_MASK << reg_shift); ctrl |= val << reg_shift; - if (dwmac->f2h_ptp_ref_clk) + if (dwmac->f2h_ptp_ref_clk) { ctrl |= SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2); - else + regmap_read(sys_mgr_base_addr, SYSMGR_FPGAGRP_MODULE_REG, + &module); + module |= (SYSMGR_FPGAGRP_MODULE_EMAC << (reg_shift / 2)); + regmap_write(sys_mgr_base_addr, SYSMGR_FPGAGRP_MODULE_REG, + module); + } else { ctrl &= ~(SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2)); + } regmap_write(sys_mgr_base_addr, reg_offset, ctrl); - return 0; -} -static void socfpga_dwmac_exit(struct platform_device *pdev, void *priv) -{ - struct socfpga_dwmac *dwmac = priv; - - /* On socfpga platform exit, assert and hold reset to the - * enet controller - the default state after a hard reset. - */ - if (dwmac->stmmac_rst) - reset_control_assert(dwmac->stmmac_rst); + return 0; } static int socfpga_dwmac_init(struct platform_device *pdev, void *priv) { - struct socfpga_dwmac *dwmac = priv; + struct socfpga_dwmac *dwmac = priv; struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *stpriv = NULL; int ret = 0; - if (ndev) - stpriv = netdev_priv(ndev); + if (!ndev) + return -EINVAL; + + stpriv = netdev_priv(ndev); + if (!stpriv) + return -EINVAL; /* Assert reset to the enet controller before changing the phy mode */ - if (dwmac->stmmac_rst) - reset_control_assert(dwmac->stmmac_rst); + if (stpriv->stmmac_rst) + reset_control_assert(stpriv->stmmac_rst); /* Setup the phy mode in the system manager registers according to * devicetree configuration @@ -217,8 +210,8 @@ static int socfpga_dwmac_init(struct platform_device *pdev, void *priv) /* Deassert reset for the phy configuration to be sampled by * the enet controller, and operation to start in requested mode */ - if (dwmac->stmmac_rst) - reset_control_deassert(dwmac->stmmac_rst); + if (stpriv->stmmac_rst) + reset_control_deassert(stpriv->stmmac_rst); /* Before the enet controller is suspended, the phy is suspended. * This causes the phy clock to be gated. The enet controller is @@ -235,7 +228,7 @@ static int socfpga_dwmac_init(struct platform_device *pdev, void *priv) * control register 0, and can be modified by the phy driver * framework. */ - if (stpriv && stpriv->phydev) + if (stpriv->phydev) phy_resume(stpriv->phydev); return ret; @@ -267,22 +260,15 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) return ret; } - ret = socfpga_dwmac_setup(dwmac); - if (ret) { - dev_err(dev, "couldn't setup SoC glue (%d)\n", ret); - return ret; - } - plat_dat->bsp_priv = dwmac; plat_dat->init = socfpga_dwmac_init; - plat_dat->exit = socfpga_dwmac_exit; plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed; - ret = socfpga_dwmac_init(pdev, plat_dat->bsp_priv); - if (ret) - return ret; + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + if (!ret) + ret = socfpga_dwmac_init(pdev, dwmac); - return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + return ret; } static const struct of_device_id socfpga_dwmac_match[] = { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 06704ca6f9ca..3f83c369f56c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -37,6 +37,18 @@ #define MII_BUSY 0x00000001 #define MII_WRITE 0x00000002 +/* GMAC4 defines */ +#define MII_GMAC4_GOC_SHIFT 2 +#define MII_GMAC4_WRITE (1 << MII_GMAC4_GOC_SHIFT) +#define MII_GMAC4_READ (3 << MII_GMAC4_GOC_SHIFT) + +#define MII_PHY_ADDR_GMAC4_SHIFT 21 +#define MII_PHY_ADDR_GMAC4_MASK GENMASK(25, 21) +#define MII_PHY_REG_GMAC4_SHIFT 16 +#define MII_PHY_REG_GMAC4_MASK GENMASK(20, 16) +#define MII_CSR_CLK_GMAC4_SHIFT 8 +#define MII_CSR_CLK_GMAC4_MASK GENMASK(11, 8) + static int stmmac_mdio_busy_wait(void __iomem *ioaddr, unsigned int mii_addr) { unsigned long curr; @@ -124,6 +136,80 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, } /** + * stmmac_mdio_read_gmac4 + * @bus: points to the mii_bus structure + * @phyaddr: MII addr reg bits 25-21 + * @phyreg: MII addr reg bits 20-16 + * Description: it reads data from the MII register of GMAC4 from within + * the phy device. + */ +static int stmmac_mdio_read_gmac4(struct mii_bus *bus, int phyaddr, int phyreg) +{ + struct net_device *ndev = bus->priv; + struct stmmac_priv *priv = netdev_priv(ndev); + unsigned int mii_address = priv->hw->mii.addr; + unsigned int mii_data = priv->hw->mii.data; + int data; + u32 value = (((phyaddr << MII_PHY_ADDR_GMAC4_SHIFT) & + (MII_PHY_ADDR_GMAC4_MASK)) | + ((phyreg << MII_PHY_REG_GMAC4_SHIFT) & + (MII_PHY_REG_GMAC4_MASK))) | MII_GMAC4_READ; + + value |= MII_BUSY | ((priv->clk_csr & MII_CSR_CLK_GMAC4_MASK) + << MII_CSR_CLK_GMAC4_SHIFT); + + if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address)) + return -EBUSY; + + writel(value, priv->ioaddr + mii_address); + + if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address)) + return -EBUSY; + + /* Read the data from the MII data register */ + data = (int)readl(priv->ioaddr + mii_data); + + return data; +} + +/** + * stmmac_mdio_write_gmac4 + * @bus: points to the mii_bus structure + * @phyaddr: MII addr reg bits 25-21 + * @phyreg: MII addr reg bits 20-16 + * @phydata: phy data + * Description: it writes the data into the MII register of GMAC4 from within + * the device. + */ +static int stmmac_mdio_write_gmac4(struct mii_bus *bus, int phyaddr, int phyreg, + u16 phydata) +{ + struct net_device *ndev = bus->priv; + struct stmmac_priv *priv = netdev_priv(ndev); + unsigned int mii_address = priv->hw->mii.addr; + unsigned int mii_data = priv->hw->mii.data; + + u32 value = (((phyaddr << MII_PHY_ADDR_GMAC4_SHIFT) & + (MII_PHY_ADDR_GMAC4_MASK)) | + ((phyreg << MII_PHY_REG_GMAC4_SHIFT) & + (MII_PHY_REG_GMAC4_MASK))) | MII_GMAC4_WRITE; + + value |= MII_BUSY | ((priv->clk_csr & MII_CSR_CLK_GMAC4_MASK) + << MII_CSR_CLK_GMAC4_SHIFT); + + /* Wait until any existing MII operation is complete */ + if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address)) + return -EBUSY; + + /* Set the MII address register to write */ + writel(phydata, priv->ioaddr + mii_data); + writel(value, priv->ioaddr + mii_address); + + /* Wait until any existing MII operation is complete */ + return stmmac_mdio_busy_wait(priv->ioaddr, mii_address); +} + +/** * stmmac_mdio_reset * @bus: points to the mii_bus structure * Description: reset the MII bus @@ -180,9 +266,11 @@ int stmmac_mdio_reset(struct mii_bus *bus) /* This is a workaround for problems with the STE101P PHY. * It doesn't complete its reset until at least one clock cycle - * on MDC, so perform a dummy mdio read. + * on MDC, so perform a dummy mdio read. To be upadted for GMAC4 + * if needed. */ - writel(0, priv->ioaddr + mii_address); + if (!priv->plat->has_gmac4) + writel(0, priv->ioaddr + mii_address); #endif return 0; } @@ -217,8 +305,14 @@ int stmmac_mdio_register(struct net_device *ndev) #endif new_bus->name = "stmmac"; - new_bus->read = &stmmac_mdio_read; - new_bus->write = &stmmac_mdio_write; + if (priv->plat->has_gmac4) { + new_bus->read = &stmmac_mdio_read_gmac4; + new_bus->write = &stmmac_mdio_write_gmac4; + } else { + new_bus->read = &stmmac_mdio_read; + new_bus->write = &stmmac_mdio_write; + } + new_bus->reset = &stmmac_mdio_reset; snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x", new_bus->name, priv->plat->bus_id); diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index 14c9d1baa85c..2524a69db318 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -1610,7 +1610,6 @@ static inline int bdx_tx_space(struct bdx_priv *priv) * o NETDEV_TX_BUSY Cannot transmit packet, try later * Usually a bug, means queue start/stop flow control is broken in * the driver. Note: the driver must NOT put the skb in its DMA ring. - * o NETDEV_TX_LOCKED Locking failed, please retry quickly. */ static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb, struct net_device *ndev) @@ -1630,12 +1629,7 @@ static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb, ENTER; local_irq_save(flags); - if (!spin_trylock(&priv->tx_lock)) { - local_irq_restore(flags); - DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n", - BDX_DRV_NAME, ndev->name); - return NETDEV_TX_LOCKED; - } + spin_lock(&priv->tx_lock); /* build tx descriptor */ BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */ diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 54bcc3851b7e..0fa75a86b1c1 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1244,12 +1244,12 @@ static int cpsw_ndo_open(struct net_device *ndev) int i, ret; u32 reg; + pm_runtime_get_sync(&priv->pdev->dev); + if (!cpsw_common_res_usage_state(priv)) cpsw_intr_disable(priv); netif_carrier_off(ndev); - pm_runtime_get_sync(&priv->pdev->dev); - reg = priv->version; dev_info(priv->dev, "initializing cpsw version %d.%d (%d)\n", diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 5d9abedd6b75..58d58f002559 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1878,8 +1878,6 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv) pdata->hw_ram_addr = auxdata->hw_ram_addr; } - pdev->dev.platform_data = pdata; - return pdata; } @@ -2101,6 +2099,7 @@ static int davinci_emac_remove(struct platform_device *pdev) cpdma_ctlr_destroy(priv->dma); unregister_netdev(ndev); + pm_runtime_disable(&pdev->dev); free_netdev(ndev); return 0; diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index 520cf50a3d5a..01a77145a0fa 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1314,7 +1314,8 @@ static int tsi108_open(struct net_device *dev) data->txring = dma_zalloc_coherent(NULL, txring_size, &data->txdma, GFP_KERNEL); if (!data->txring) { - pci_free_consistent(0, rxring_size, data->rxring, data->rxdma); + pci_free_consistent(NULL, rxring_size, data->rxring, + data->rxdma); return -ENOMEM; } diff --git a/drivers/net/ethernet/wiznet/Kconfig b/drivers/net/ethernet/wiznet/Kconfig index f98b91d21f33..1981e88c18dc 100644 --- a/drivers/net/ethernet/wiznet/Kconfig +++ b/drivers/net/ethernet/wiznet/Kconfig @@ -69,4 +69,18 @@ config WIZNET_BUS_ANY Performance may decrease compared to explicitly selected bus mode. endchoice +config WIZNET_W5100_SPI + tristate "WIZnet W5100/W5200/W5500 Ethernet support for SPI mode" + depends on WIZNET_BUS_ANY && WIZNET_W5100 + depends on SPI + ---help--- + In SPI mode host system accesses registers using SPI protocol + (mode 0) on the SPI bus. + + Performance decreases compared to other bus interface mode. + In W5100 SPI mode, burst READ/WRITE processing are not provided. + + To compile this driver as a module, choose M here: the module + will be called w5100-spi. + endif # NET_VENDOR_WIZNET diff --git a/drivers/net/ethernet/wiznet/Makefile b/drivers/net/ethernet/wiznet/Makefile index c614535227e8..1e05e1a84208 100644 --- a/drivers/net/ethernet/wiznet/Makefile +++ b/drivers/net/ethernet/wiznet/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_WIZNET_W5100) += w5100.o +obj-$(CONFIG_WIZNET_W5100_SPI) += w5100-spi.o obj-$(CONFIG_WIZNET_W5300) += w5300.o diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c new file mode 100644 index 000000000000..b868e458d0b5 --- /dev/null +++ b/drivers/net/ethernet/wiznet/w5100-spi.c @@ -0,0 +1,464 @@ +/* + * Ethernet driver for the WIZnet W5100/W5200/W5500 chip. + * + * Copyright (C) 2016 Akinobu Mita <akinobu.mita@gmail.com> + * + * Licensed under the GPL-2 or later. + * + * Datasheet: + * http://www.wiznet.co.kr/wp-content/uploads/wiznethome/Chip/W5100/Document/W5100_Datasheet_v1.2.6.pdf + * http://wiznethome.cafe24.com/wp-content/uploads/wiznethome/Chip/W5200/Documents/W5200_DS_V140E.pdf + * http://wizwiki.net/wiki/lib/exe/fetch.php?media=products:w5500:w5500_ds_v106e_141230.pdf + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/netdevice.h> +#include <linux/spi/spi.h> + +#include "w5100.h" + +#define W5100_SPI_WRITE_OPCODE 0xf0 +#define W5100_SPI_READ_OPCODE 0x0f + +static int w5100_spi_read(struct net_device *ndev, u32 addr) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + u8 cmd[3] = { W5100_SPI_READ_OPCODE, addr >> 8, addr & 0xff }; + u8 data; + int ret; + + ret = spi_write_then_read(spi, cmd, sizeof(cmd), &data, 1); + + return ret ? ret : data; +} + +static int w5100_spi_write(struct net_device *ndev, u32 addr, u8 data) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + u8 cmd[4] = { W5100_SPI_WRITE_OPCODE, addr >> 8, addr & 0xff, data}; + + return spi_write_then_read(spi, cmd, sizeof(cmd), NULL, 0); +} + +static int w5100_spi_read16(struct net_device *ndev, u32 addr) +{ + u16 data; + int ret; + + ret = w5100_spi_read(ndev, addr); + if (ret < 0) + return ret; + data = ret << 8; + ret = w5100_spi_read(ndev, addr + 1); + + return ret < 0 ? ret : data | ret; +} + +static int w5100_spi_write16(struct net_device *ndev, u32 addr, u16 data) +{ + int ret; + + ret = w5100_spi_write(ndev, addr, data >> 8); + if (ret) + return ret; + + return w5100_spi_write(ndev, addr + 1, data & 0xff); +} + +static int w5100_spi_readbulk(struct net_device *ndev, u32 addr, u8 *buf, + int len) +{ + int i; + + for (i = 0; i < len; i++) { + int ret = w5100_spi_read(ndev, addr + i); + + if (ret < 0) + return ret; + buf[i] = ret; + } + + return 0; +} + +static int w5100_spi_writebulk(struct net_device *ndev, u32 addr, const u8 *buf, + int len) +{ + int i; + + for (i = 0; i < len; i++) { + int ret = w5100_spi_write(ndev, addr + i, buf[i]); + + if (ret) + return ret; + } + + return 0; +} + +static const struct w5100_ops w5100_spi_ops = { + .may_sleep = true, + .chip_id = W5100, + .read = w5100_spi_read, + .write = w5100_spi_write, + .read16 = w5100_spi_read16, + .write16 = w5100_spi_write16, + .readbulk = w5100_spi_readbulk, + .writebulk = w5100_spi_writebulk, +}; + +#define W5200_SPI_WRITE_OPCODE 0x80 + +struct w5200_spi_priv { + /* Serialize access to cmd_buf */ + struct mutex cmd_lock; + + /* DMA (thus cache coherency maintenance) requires the + * transfer buffers to live in their own cache lines. + */ + u8 cmd_buf[4] ____cacheline_aligned; +}; + +static struct w5200_spi_priv *w5200_spi_priv(struct net_device *ndev) +{ + return w5100_ops_priv(ndev); +} + +static int w5200_spi_init(struct net_device *ndev) +{ + struct w5200_spi_priv *spi_priv = w5200_spi_priv(ndev); + + mutex_init(&spi_priv->cmd_lock); + + return 0; +} + +static int w5200_spi_read(struct net_device *ndev, u32 addr) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + u8 cmd[4] = { addr >> 8, addr & 0xff, 0, 1 }; + u8 data; + int ret; + + ret = spi_write_then_read(spi, cmd, sizeof(cmd), &data, 1); + + return ret ? ret : data; +} + +static int w5200_spi_write(struct net_device *ndev, u32 addr, u8 data) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + u8 cmd[5] = { addr >> 8, addr & 0xff, W5200_SPI_WRITE_OPCODE, 1, data }; + + return spi_write_then_read(spi, cmd, sizeof(cmd), NULL, 0); +} + +static int w5200_spi_read16(struct net_device *ndev, u32 addr) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + u8 cmd[4] = { addr >> 8, addr & 0xff, 0, 2 }; + __be16 data; + int ret; + + ret = spi_write_then_read(spi, cmd, sizeof(cmd), &data, sizeof(data)); + + return ret ? ret : be16_to_cpu(data); +} + +static int w5200_spi_write16(struct net_device *ndev, u32 addr, u16 data) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + u8 cmd[6] = { + addr >> 8, addr & 0xff, + W5200_SPI_WRITE_OPCODE, 2, + data >> 8, data & 0xff + }; + + return spi_write_then_read(spi, cmd, sizeof(cmd), NULL, 0); +} + +static int w5200_spi_readbulk(struct net_device *ndev, u32 addr, u8 *buf, + int len) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + struct w5200_spi_priv *spi_priv = w5200_spi_priv(ndev); + struct spi_transfer xfer[] = { + { + .tx_buf = spi_priv->cmd_buf, + .len = sizeof(spi_priv->cmd_buf), + }, + { + .rx_buf = buf, + .len = len, + }, + }; + int ret; + + mutex_lock(&spi_priv->cmd_lock); + + spi_priv->cmd_buf[0] = addr >> 8; + spi_priv->cmd_buf[1] = addr; + spi_priv->cmd_buf[2] = len >> 8; + spi_priv->cmd_buf[3] = len; + ret = spi_sync_transfer(spi, xfer, ARRAY_SIZE(xfer)); + + mutex_unlock(&spi_priv->cmd_lock); + + return ret; +} + +static int w5200_spi_writebulk(struct net_device *ndev, u32 addr, const u8 *buf, + int len) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + struct w5200_spi_priv *spi_priv = w5200_spi_priv(ndev); + struct spi_transfer xfer[] = { + { + .tx_buf = spi_priv->cmd_buf, + .len = sizeof(spi_priv->cmd_buf), + }, + { + .tx_buf = buf, + .len = len, + }, + }; + int ret; + + mutex_lock(&spi_priv->cmd_lock); + + spi_priv->cmd_buf[0] = addr >> 8; + spi_priv->cmd_buf[1] = addr; + spi_priv->cmd_buf[2] = W5200_SPI_WRITE_OPCODE | (len >> 8); + spi_priv->cmd_buf[3] = len; + ret = spi_sync_transfer(spi, xfer, ARRAY_SIZE(xfer)); + + mutex_unlock(&spi_priv->cmd_lock); + + return ret; +} + +static const struct w5100_ops w5200_ops = { + .may_sleep = true, + .chip_id = W5200, + .read = w5200_spi_read, + .write = w5200_spi_write, + .read16 = w5200_spi_read16, + .write16 = w5200_spi_write16, + .readbulk = w5200_spi_readbulk, + .writebulk = w5200_spi_writebulk, + .init = w5200_spi_init, +}; + +#define W5500_SPI_BLOCK_SELECT(addr) (((addr) >> 16) & 0x1f) +#define W5500_SPI_READ_CONTROL(addr) (W5500_SPI_BLOCK_SELECT(addr) << 3) +#define W5500_SPI_WRITE_CONTROL(addr) \ + ((W5500_SPI_BLOCK_SELECT(addr) << 3) | BIT(2)) + +struct w5500_spi_priv { + /* Serialize access to cmd_buf */ + struct mutex cmd_lock; + + /* DMA (thus cache coherency maintenance) requires the + * transfer buffers to live in their own cache lines. + */ + u8 cmd_buf[3] ____cacheline_aligned; +}; + +static struct w5500_spi_priv *w5500_spi_priv(struct net_device *ndev) +{ + return w5100_ops_priv(ndev); +} + +static int w5500_spi_init(struct net_device *ndev) +{ + struct w5500_spi_priv *spi_priv = w5500_spi_priv(ndev); + + mutex_init(&spi_priv->cmd_lock); + + return 0; +} + +static int w5500_spi_read(struct net_device *ndev, u32 addr) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + u8 cmd[3] = { + addr >> 8, + addr, + W5500_SPI_READ_CONTROL(addr) + }; + u8 data; + int ret; + + ret = spi_write_then_read(spi, cmd, sizeof(cmd), &data, 1); + + return ret ? ret : data; +} + +static int w5500_spi_write(struct net_device *ndev, u32 addr, u8 data) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + u8 cmd[4] = { + addr >> 8, + addr, + W5500_SPI_WRITE_CONTROL(addr), + data + }; + + return spi_write_then_read(spi, cmd, sizeof(cmd), NULL, 0); +} + +static int w5500_spi_read16(struct net_device *ndev, u32 addr) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + u8 cmd[3] = { + addr >> 8, + addr, + W5500_SPI_READ_CONTROL(addr) + }; + __be16 data; + int ret; + + ret = spi_write_then_read(spi, cmd, sizeof(cmd), &data, sizeof(data)); + + return ret ? ret : be16_to_cpu(data); +} + +static int w5500_spi_write16(struct net_device *ndev, u32 addr, u16 data) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + u8 cmd[5] = { + addr >> 8, + addr, + W5500_SPI_WRITE_CONTROL(addr), + data >> 8, + data + }; + + return spi_write_then_read(spi, cmd, sizeof(cmd), NULL, 0); +} + +static int w5500_spi_readbulk(struct net_device *ndev, u32 addr, u8 *buf, + int len) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + struct w5500_spi_priv *spi_priv = w5500_spi_priv(ndev); + struct spi_transfer xfer[] = { + { + .tx_buf = spi_priv->cmd_buf, + .len = sizeof(spi_priv->cmd_buf), + }, + { + .rx_buf = buf, + .len = len, + }, + }; + int ret; + + mutex_lock(&spi_priv->cmd_lock); + + spi_priv->cmd_buf[0] = addr >> 8; + spi_priv->cmd_buf[1] = addr; + spi_priv->cmd_buf[2] = W5500_SPI_READ_CONTROL(addr); + ret = spi_sync_transfer(spi, xfer, ARRAY_SIZE(xfer)); + + mutex_unlock(&spi_priv->cmd_lock); + + return ret; +} + +static int w5500_spi_writebulk(struct net_device *ndev, u32 addr, const u8 *buf, + int len) +{ + struct spi_device *spi = to_spi_device(ndev->dev.parent); + struct w5500_spi_priv *spi_priv = w5500_spi_priv(ndev); + struct spi_transfer xfer[] = { + { + .tx_buf = spi_priv->cmd_buf, + .len = sizeof(spi_priv->cmd_buf), + }, + { + .tx_buf = buf, + .len = len, + }, + }; + int ret; + + mutex_lock(&spi_priv->cmd_lock); + + spi_priv->cmd_buf[0] = addr >> 8; + spi_priv->cmd_buf[1] = addr; + spi_priv->cmd_buf[2] = W5500_SPI_WRITE_CONTROL(addr); + ret = spi_sync_transfer(spi, xfer, ARRAY_SIZE(xfer)); + + mutex_unlock(&spi_priv->cmd_lock); + + return ret; +} + +static const struct w5100_ops w5500_ops = { + .may_sleep = true, + .chip_id = W5500, + .read = w5500_spi_read, + .write = w5500_spi_write, + .read16 = w5500_spi_read16, + .write16 = w5500_spi_write16, + .readbulk = w5500_spi_readbulk, + .writebulk = w5500_spi_writebulk, + .init = w5500_spi_init, +}; + +static int w5100_spi_probe(struct spi_device *spi) +{ + const struct spi_device_id *id = spi_get_device_id(spi); + const struct w5100_ops *ops; + int priv_size; + + switch (id->driver_data) { + case W5100: + ops = &w5100_spi_ops; + priv_size = 0; + break; + case W5200: + ops = &w5200_ops; + priv_size = sizeof(struct w5200_spi_priv); + break; + case W5500: + ops = &w5500_ops; + priv_size = sizeof(struct w5500_spi_priv); + break; + default: + return -EINVAL; + } + + return w5100_probe(&spi->dev, ops, priv_size, NULL, spi->irq, -EINVAL); +} + +static int w5100_spi_remove(struct spi_device *spi) +{ + return w5100_remove(&spi->dev); +} + +static const struct spi_device_id w5100_spi_ids[] = { + { "w5100", W5100 }, + { "w5200", W5200 }, + { "w5500", W5500 }, + {} +}; +MODULE_DEVICE_TABLE(spi, w5100_spi_ids); + +static struct spi_driver w5100_spi_driver = { + .driver = { + .name = "w5100", + .pm = &w5100_pm_ops, + }, + .probe = w5100_spi_probe, + .remove = w5100_spi_remove, + .id_table = w5100_spi_ids, +}; +module_spi_driver(w5100_spi_driver); + +MODULE_DESCRIPTION("WIZnet W5100/W5200/W5500 Ethernet driver for SPI mode"); +MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index 8b282d0b169c..8ed0c7735ee3 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -27,6 +27,8 @@ #include <linux/irq.h> #include <linux/gpio.h> +#include "w5100.h" + #define DRV_NAME "w5100" #define DRV_VERSION "2012-04-04" @@ -36,7 +38,7 @@ MODULE_ALIAS("platform:"DRV_NAME); MODULE_LICENSE("GPL"); /* - * Registers + * W5100/W5200/W5500 common registers */ #define W5100_COMMON_REGS 0x0000 #define W5100_MR 0x0000 /* Mode Register */ @@ -46,55 +48,114 @@ MODULE_LICENSE("GPL"); #define MR_IND 0x01 /* Indirect mode */ #define W5100_SHAR 0x0009 /* Source MAC address */ #define W5100_IR 0x0015 /* Interrupt Register */ -#define W5100_IMR 0x0016 /* Interrupt Mask Register */ -#define IR_S0 0x01 /* S0 interrupt */ -#define W5100_RTR 0x0017 /* Retry Time-value Register */ -#define RTR_DEFAULT 2000 /* =0x07d0 (2000) */ -#define W5100_RMSR 0x001a /* Receive Memory Size */ -#define W5100_TMSR 0x001b /* Transmit Memory Size */ #define W5100_COMMON_REGS_LEN 0x0040 -#define W5100_S0_REGS 0x0400 -#define W5100_S0_MR 0x0400 /* S0 Mode Register */ +#define W5100_Sn_MR 0x0000 /* Sn Mode Register */ +#define W5100_Sn_CR 0x0001 /* Sn Command Register */ +#define W5100_Sn_IR 0x0002 /* Sn Interrupt Register */ +#define W5100_Sn_SR 0x0003 /* Sn Status Register */ +#define W5100_Sn_TX_FSR 0x0020 /* Sn Transmit free memory size */ +#define W5100_Sn_TX_RD 0x0022 /* Sn Transmit memory read pointer */ +#define W5100_Sn_TX_WR 0x0024 /* Sn Transmit memory write pointer */ +#define W5100_Sn_RX_RSR 0x0026 /* Sn Receive free memory size */ +#define W5100_Sn_RX_RD 0x0028 /* Sn Receive memory read pointer */ + +#define S0_REGS(priv) ((priv)->s0_regs) + +#define W5100_S0_MR(priv) (S0_REGS(priv) + W5100_Sn_MR) #define S0_MR_MACRAW 0x04 /* MAC RAW mode (promiscuous) */ #define S0_MR_MACRAW_MF 0x44 /* MAC RAW mode (filtered) */ -#define W5100_S0_CR 0x0401 /* S0 Command Register */ +#define W5100_S0_CR(priv) (S0_REGS(priv) + W5100_Sn_CR) #define S0_CR_OPEN 0x01 /* OPEN command */ #define S0_CR_CLOSE 0x10 /* CLOSE command */ #define S0_CR_SEND 0x20 /* SEND command */ #define S0_CR_RECV 0x40 /* RECV command */ -#define W5100_S0_IR 0x0402 /* S0 Interrupt Register */ +#define W5100_S0_IR(priv) (S0_REGS(priv) + W5100_Sn_IR) #define S0_IR_SENDOK 0x10 /* complete sending */ #define S0_IR_RECV 0x04 /* receiving data */ -#define W5100_S0_SR 0x0403 /* S0 Status Register */ +#define W5100_S0_SR(priv) (S0_REGS(priv) + W5100_Sn_SR) #define S0_SR_MACRAW 0x42 /* mac raw mode */ -#define W5100_S0_TX_FSR 0x0420 /* S0 Transmit free memory size */ -#define W5100_S0_TX_RD 0x0422 /* S0 Transmit memory read pointer */ -#define W5100_S0_TX_WR 0x0424 /* S0 Transmit memory write pointer */ -#define W5100_S0_RX_RSR 0x0426 /* S0 Receive free memory size */ -#define W5100_S0_RX_RD 0x0428 /* S0 Receive memory read pointer */ +#define W5100_S0_TX_FSR(priv) (S0_REGS(priv) + W5100_Sn_TX_FSR) +#define W5100_S0_TX_RD(priv) (S0_REGS(priv) + W5100_Sn_TX_RD) +#define W5100_S0_TX_WR(priv) (S0_REGS(priv) + W5100_Sn_TX_WR) +#define W5100_S0_RX_RSR(priv) (S0_REGS(priv) + W5100_Sn_RX_RSR) +#define W5100_S0_RX_RD(priv) (S0_REGS(priv) + W5100_Sn_RX_RD) + #define W5100_S0_REGS_LEN 0x0040 +/* + * W5100 and W5200 common registers + */ +#define W5100_IMR 0x0016 /* Interrupt Mask Register */ +#define IR_S0 0x01 /* S0 interrupt */ +#define W5100_RTR 0x0017 /* Retry Time-value Register */ +#define RTR_DEFAULT 2000 /* =0x07d0 (2000) */ + +/* + * W5100 specific register and memory + */ +#define W5100_RMSR 0x001a /* Receive Memory Size */ +#define W5100_TMSR 0x001b /* Transmit Memory Size */ + +#define W5100_S0_REGS 0x0400 + #define W5100_TX_MEM_START 0x4000 -#define W5100_TX_MEM_END 0x5fff -#define W5100_TX_MEM_MASK 0x1fff +#define W5100_TX_MEM_SIZE 0x2000 #define W5100_RX_MEM_START 0x6000 -#define W5100_RX_MEM_END 0x7fff -#define W5100_RX_MEM_MASK 0x1fff +#define W5100_RX_MEM_SIZE 0x2000 + +/* + * W5200 specific register and memory + */ +#define W5200_S0_REGS 0x4000 + +#define W5200_Sn_RXMEM_SIZE(n) (0x401e + (n) * 0x0100) /* Sn RX Memory Size */ +#define W5200_Sn_TXMEM_SIZE(n) (0x401f + (n) * 0x0100) /* Sn TX Memory Size */ + +#define W5200_TX_MEM_START 0x8000 +#define W5200_TX_MEM_SIZE 0x4000 +#define W5200_RX_MEM_START 0xc000 +#define W5200_RX_MEM_SIZE 0x4000 + +/* + * W5500 specific register and memory + * + * W5500 register and memory are organized by multiple blocks. Each one is + * selected by 16bits offset address and 5bits block select bits. So we + * encode it into 32bits address. (lower 16bits is offset address and + * upper 16bits is block select bits) + */ +#define W5500_SIMR 0x0018 /* Socket Interrupt Mask Register */ +#define W5500_RTR 0x0019 /* Retry Time-value Register */ + +#define W5500_S0_REGS 0x10000 + +#define W5500_Sn_RXMEM_SIZE(n) \ + (0x1001e + (n) * 0x40000) /* Sn RX Memory Size */ +#define W5500_Sn_TXMEM_SIZE(n) \ + (0x1001f + (n) * 0x40000) /* Sn TX Memory Size */ + +#define W5500_TX_MEM_START 0x20000 +#define W5500_TX_MEM_SIZE 0x04000 +#define W5500_RX_MEM_START 0x30000 +#define W5500_RX_MEM_SIZE 0x04000 /* * Device driver private data structure */ + struct w5100_priv { - void __iomem *base; - spinlock_t reg_lock; - bool indirect; - u8 (*read)(struct w5100_priv *priv, u16 addr); - void (*write)(struct w5100_priv *priv, u16 addr, u8 data); - u16 (*read16)(struct w5100_priv *priv, u16 addr); - void (*write16)(struct w5100_priv *priv, u16 addr, u16 data); - void (*readbuf)(struct w5100_priv *priv, u16 addr, u8 *buf, int len); - void (*writebuf)(struct w5100_priv *priv, u16 addr, u8 *buf, int len); + const struct w5100_ops *ops; + + /* Socket 0 register offset address */ + u32 s0_regs; + /* Socket 0 TX buffer offset address and size */ + u32 s0_tx_buf; + u16 s0_tx_buf_size; + /* Socket 0 RX buffer offset address and size */ + u32 s0_rx_buf; + u16 s0_rx_buf_size; + int irq; int link_irq; int link_gpio; @@ -103,71 +164,142 @@ struct w5100_priv { struct net_device *ndev; bool promisc; u32 msg_enable; + + struct workqueue_struct *xfer_wq; + struct work_struct rx_work; + struct sk_buff *tx_skb; + struct work_struct tx_work; + struct work_struct setrx_work; + struct work_struct restart_work; }; +static inline bool is_w5200(struct w5100_priv *priv) +{ + return priv->ops->chip_id == W5200; +} + /************************************************************************ * * Lowlevel I/O functions * ***********************************************************************/ +struct w5100_mmio_priv { + void __iomem *base; + /* Serialize access in indirect address mode */ + spinlock_t reg_lock; +}; + +static inline struct w5100_mmio_priv *w5100_mmio_priv(struct net_device *dev) +{ + return w5100_ops_priv(dev); +} + +static inline void __iomem *w5100_mmio(struct net_device *ndev) +{ + struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev); + + return mmio_priv->base; +} + /* * In direct address mode host system can directly access W5100 registers * after mapping to Memory-Mapped I/O space. * * 0x8000 bytes are required for memory space. */ -static inline u8 w5100_read_direct(struct w5100_priv *priv, u16 addr) +static inline int w5100_read_direct(struct net_device *ndev, u32 addr) { - return ioread8(priv->base + (addr << CONFIG_WIZNET_BUS_SHIFT)); + return ioread8(w5100_mmio(ndev) + (addr << CONFIG_WIZNET_BUS_SHIFT)); } -static inline void w5100_write_direct(struct w5100_priv *priv, - u16 addr, u8 data) +static inline int __w5100_write_direct(struct net_device *ndev, u32 addr, + u8 data) { - iowrite8(data, priv->base + (addr << CONFIG_WIZNET_BUS_SHIFT)); + iowrite8(data, w5100_mmio(ndev) + (addr << CONFIG_WIZNET_BUS_SHIFT)); + + return 0; } -static u16 w5100_read16_direct(struct w5100_priv *priv, u16 addr) +static inline int w5100_write_direct(struct net_device *ndev, u32 addr, u8 data) +{ + __w5100_write_direct(ndev, addr, data); + mmiowb(); + + return 0; +} + +static int w5100_read16_direct(struct net_device *ndev, u32 addr) { u16 data; - data = w5100_read_direct(priv, addr) << 8; - data |= w5100_read_direct(priv, addr + 1); + data = w5100_read_direct(ndev, addr) << 8; + data |= w5100_read_direct(ndev, addr + 1); return data; } -static void w5100_write16_direct(struct w5100_priv *priv, u16 addr, u16 data) +static int w5100_write16_direct(struct net_device *ndev, u32 addr, u16 data) { - w5100_write_direct(priv, addr, data >> 8); - w5100_write_direct(priv, addr + 1, data); + __w5100_write_direct(ndev, addr, data >> 8); + __w5100_write_direct(ndev, addr + 1, data); + mmiowb(); + + return 0; } -static void w5100_readbuf_direct(struct w5100_priv *priv, - u16 offset, u8 *buf, int len) +static int w5100_readbulk_direct(struct net_device *ndev, u32 addr, u8 *buf, + int len) { - u16 addr = W5100_RX_MEM_START + (offset & W5100_RX_MEM_MASK); int i; - for (i = 0; i < len; i++, addr++) { - if (unlikely(addr > W5100_RX_MEM_END)) - addr = W5100_RX_MEM_START; - *buf++ = w5100_read_direct(priv, addr); - } + for (i = 0; i < len; i++, addr++) + *buf++ = w5100_read_direct(ndev, addr); + + return 0; } -static void w5100_writebuf_direct(struct w5100_priv *priv, - u16 offset, u8 *buf, int len) +static int w5100_writebulk_direct(struct net_device *ndev, u32 addr, + const u8 *buf, int len) { - u16 addr = W5100_TX_MEM_START + (offset & W5100_TX_MEM_MASK); int i; - for (i = 0; i < len; i++, addr++) { - if (unlikely(addr > W5100_TX_MEM_END)) - addr = W5100_TX_MEM_START; - w5100_write_direct(priv, addr, *buf++); - } + for (i = 0; i < len; i++, addr++) + __w5100_write_direct(ndev, addr, *buf++); + + mmiowb(); + + return 0; } +static int w5100_mmio_init(struct net_device *ndev) +{ + struct platform_device *pdev = to_platform_device(ndev->dev.parent); + struct w5100_priv *priv = netdev_priv(ndev); + struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev); + struct resource *mem; + + spin_lock_init(&mmio_priv->reg_lock); + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + mmio_priv->base = devm_ioremap_resource(&pdev->dev, mem); + if (IS_ERR(mmio_priv->base)) + return PTR_ERR(mmio_priv->base); + + netdev_info(ndev, "at 0x%llx irq %d\n", (u64)mem->start, priv->irq); + + return 0; +} + +static const struct w5100_ops w5100_mmio_direct_ops = { + .chip_id = W5100, + .read = w5100_read_direct, + .write = w5100_write_direct, + .read16 = w5100_read16_direct, + .write16 = w5100_write16_direct, + .readbulk = w5100_readbulk_direct, + .writebulk = w5100_writebulk_direct, + .init = w5100_mmio_init, +}; + /* * In indirect address mode host system indirectly accesses registers by * using Indirect Mode Address Register (IDM_AR) and Indirect Mode Data @@ -179,139 +311,290 @@ static void w5100_writebuf_direct(struct w5100_priv *priv, #define W5100_IDM_AR 0x01 /* Indirect Mode Address Register */ #define W5100_IDM_DR 0x03 /* Indirect Mode Data Register */ -static u8 w5100_read_indirect(struct w5100_priv *priv, u16 addr) +static int w5100_read_indirect(struct net_device *ndev, u32 addr) { + struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev); unsigned long flags; u8 data; - spin_lock_irqsave(&priv->reg_lock, flags); - w5100_write16_direct(priv, W5100_IDM_AR, addr); - mmiowb(); - data = w5100_read_direct(priv, W5100_IDM_DR); - spin_unlock_irqrestore(&priv->reg_lock, flags); + spin_lock_irqsave(&mmio_priv->reg_lock, flags); + w5100_write16_direct(ndev, W5100_IDM_AR, addr); + data = w5100_read_direct(ndev, W5100_IDM_DR); + spin_unlock_irqrestore(&mmio_priv->reg_lock, flags); return data; } -static void w5100_write_indirect(struct w5100_priv *priv, u16 addr, u8 data) +static int w5100_write_indirect(struct net_device *ndev, u32 addr, u8 data) { + struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev); unsigned long flags; - spin_lock_irqsave(&priv->reg_lock, flags); - w5100_write16_direct(priv, W5100_IDM_AR, addr); - mmiowb(); - w5100_write_direct(priv, W5100_IDM_DR, data); - mmiowb(); - spin_unlock_irqrestore(&priv->reg_lock, flags); + spin_lock_irqsave(&mmio_priv->reg_lock, flags); + w5100_write16_direct(ndev, W5100_IDM_AR, addr); + w5100_write_direct(ndev, W5100_IDM_DR, data); + spin_unlock_irqrestore(&mmio_priv->reg_lock, flags); + + return 0; } -static u16 w5100_read16_indirect(struct w5100_priv *priv, u16 addr) +static int w5100_read16_indirect(struct net_device *ndev, u32 addr) { + struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev); unsigned long flags; u16 data; - spin_lock_irqsave(&priv->reg_lock, flags); - w5100_write16_direct(priv, W5100_IDM_AR, addr); - mmiowb(); - data = w5100_read_direct(priv, W5100_IDM_DR) << 8; - data |= w5100_read_direct(priv, W5100_IDM_DR); - spin_unlock_irqrestore(&priv->reg_lock, flags); + spin_lock_irqsave(&mmio_priv->reg_lock, flags); + w5100_write16_direct(ndev, W5100_IDM_AR, addr); + data = w5100_read_direct(ndev, W5100_IDM_DR) << 8; + data |= w5100_read_direct(ndev, W5100_IDM_DR); + spin_unlock_irqrestore(&mmio_priv->reg_lock, flags); return data; } -static void w5100_write16_indirect(struct w5100_priv *priv, u16 addr, u16 data) +static int w5100_write16_indirect(struct net_device *ndev, u32 addr, u16 data) { + struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev); unsigned long flags; - spin_lock_irqsave(&priv->reg_lock, flags); - w5100_write16_direct(priv, W5100_IDM_AR, addr); - mmiowb(); - w5100_write_direct(priv, W5100_IDM_DR, data >> 8); - w5100_write_direct(priv, W5100_IDM_DR, data); - mmiowb(); - spin_unlock_irqrestore(&priv->reg_lock, flags); + spin_lock_irqsave(&mmio_priv->reg_lock, flags); + w5100_write16_direct(ndev, W5100_IDM_AR, addr); + __w5100_write_direct(ndev, W5100_IDM_DR, data >> 8); + w5100_write_direct(ndev, W5100_IDM_DR, data); + spin_unlock_irqrestore(&mmio_priv->reg_lock, flags); + + return 0; } -static void w5100_readbuf_indirect(struct w5100_priv *priv, - u16 offset, u8 *buf, int len) +static int w5100_readbulk_indirect(struct net_device *ndev, u32 addr, u8 *buf, + int len) { - u16 addr = W5100_RX_MEM_START + (offset & W5100_RX_MEM_MASK); + struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev); unsigned long flags; int i; - spin_lock_irqsave(&priv->reg_lock, flags); - w5100_write16_direct(priv, W5100_IDM_AR, addr); - mmiowb(); + spin_lock_irqsave(&mmio_priv->reg_lock, flags); + w5100_write16_direct(ndev, W5100_IDM_AR, addr); + + for (i = 0; i < len; i++) + *buf++ = w5100_read_direct(ndev, W5100_IDM_DR); - for (i = 0; i < len; i++, addr++) { - if (unlikely(addr > W5100_RX_MEM_END)) { - addr = W5100_RX_MEM_START; - w5100_write16_direct(priv, W5100_IDM_AR, addr); - mmiowb(); - } - *buf++ = w5100_read_direct(priv, W5100_IDM_DR); - } mmiowb(); - spin_unlock_irqrestore(&priv->reg_lock, flags); + spin_unlock_irqrestore(&mmio_priv->reg_lock, flags); + + return 0; } -static void w5100_writebuf_indirect(struct w5100_priv *priv, - u16 offset, u8 *buf, int len) +static int w5100_writebulk_indirect(struct net_device *ndev, u32 addr, + const u8 *buf, int len) { - u16 addr = W5100_TX_MEM_START + (offset & W5100_TX_MEM_MASK); + struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev); unsigned long flags; int i; - spin_lock_irqsave(&priv->reg_lock, flags); - w5100_write16_direct(priv, W5100_IDM_AR, addr); - mmiowb(); + spin_lock_irqsave(&mmio_priv->reg_lock, flags); + w5100_write16_direct(ndev, W5100_IDM_AR, addr); + + for (i = 0; i < len; i++) + __w5100_write_direct(ndev, W5100_IDM_DR, *buf++); - for (i = 0; i < len; i++, addr++) { - if (unlikely(addr > W5100_TX_MEM_END)) { - addr = W5100_TX_MEM_START; - w5100_write16_direct(priv, W5100_IDM_AR, addr); - mmiowb(); - } - w5100_write_direct(priv, W5100_IDM_DR, *buf++); - } mmiowb(); - spin_unlock_irqrestore(&priv->reg_lock, flags); + spin_unlock_irqrestore(&mmio_priv->reg_lock, flags); + + return 0; +} + +static int w5100_reset_indirect(struct net_device *ndev) +{ + w5100_write_direct(ndev, W5100_MR, MR_RST); + mdelay(5); + w5100_write_direct(ndev, W5100_MR, MR_PB | MR_AI | MR_IND); + + return 0; } +static const struct w5100_ops w5100_mmio_indirect_ops = { + .chip_id = W5100, + .read = w5100_read_indirect, + .write = w5100_write_indirect, + .read16 = w5100_read16_indirect, + .write16 = w5100_write16_indirect, + .readbulk = w5100_readbulk_indirect, + .writebulk = w5100_writebulk_indirect, + .init = w5100_mmio_init, + .reset = w5100_reset_indirect, +}; + #if defined(CONFIG_WIZNET_BUS_DIRECT) -#define w5100_read w5100_read_direct -#define w5100_write w5100_write_direct -#define w5100_read16 w5100_read16_direct -#define w5100_write16 w5100_write16_direct -#define w5100_readbuf w5100_readbuf_direct -#define w5100_writebuf w5100_writebuf_direct + +static int w5100_read(struct w5100_priv *priv, u32 addr) +{ + return w5100_read_direct(priv->ndev, addr); +} + +static int w5100_write(struct w5100_priv *priv, u32 addr, u8 data) +{ + return w5100_write_direct(priv->ndev, addr, data); +} + +static int w5100_read16(struct w5100_priv *priv, u32 addr) +{ + return w5100_read16_direct(priv->ndev, addr); +} + +static int w5100_write16(struct w5100_priv *priv, u32 addr, u16 data) +{ + return w5100_write16_direct(priv->ndev, addr, data); +} + +static int w5100_readbulk(struct w5100_priv *priv, u32 addr, u8 *buf, int len) +{ + return w5100_readbulk_direct(priv->ndev, addr, buf, len); +} + +static int w5100_writebulk(struct w5100_priv *priv, u32 addr, const u8 *buf, + int len) +{ + return w5100_writebulk_direct(priv->ndev, addr, buf, len); +} #elif defined(CONFIG_WIZNET_BUS_INDIRECT) -#define w5100_read w5100_read_indirect -#define w5100_write w5100_write_indirect -#define w5100_read16 w5100_read16_indirect -#define w5100_write16 w5100_write16_indirect -#define w5100_readbuf w5100_readbuf_indirect -#define w5100_writebuf w5100_writebuf_indirect + +static int w5100_read(struct w5100_priv *priv, u32 addr) +{ + return w5100_read_indirect(priv->ndev, addr); +} + +static int w5100_write(struct w5100_priv *priv, u32 addr, u8 data) +{ + return w5100_write_indirect(priv->ndev, addr, data); +} + +static int w5100_read16(struct w5100_priv *priv, u32 addr) +{ + return w5100_read16_indirect(priv->ndev, addr); +} + +static int w5100_write16(struct w5100_priv *priv, u32 addr, u16 data) +{ + return w5100_write16_indirect(priv->ndev, addr, data); +} + +static int w5100_readbulk(struct w5100_priv *priv, u32 addr, u8 *buf, int len) +{ + return w5100_readbulk_indirect(priv->ndev, addr, buf, len); +} + +static int w5100_writebulk(struct w5100_priv *priv, u32 addr, const u8 *buf, + int len) +{ + return w5100_writebulk_indirect(priv->ndev, addr, buf, len); +} #else /* CONFIG_WIZNET_BUS_ANY */ -#define w5100_read priv->read -#define w5100_write priv->write -#define w5100_read16 priv->read16 -#define w5100_write16 priv->write16 -#define w5100_readbuf priv->readbuf -#define w5100_writebuf priv->writebuf + +static int w5100_read(struct w5100_priv *priv, u32 addr) +{ + return priv->ops->read(priv->ndev, addr); +} + +static int w5100_write(struct w5100_priv *priv, u32 addr, u8 data) +{ + return priv->ops->write(priv->ndev, addr, data); +} + +static int w5100_read16(struct w5100_priv *priv, u32 addr) +{ + return priv->ops->read16(priv->ndev, addr); +} + +static int w5100_write16(struct w5100_priv *priv, u32 addr, u16 data) +{ + return priv->ops->write16(priv->ndev, addr, data); +} + +static int w5100_readbulk(struct w5100_priv *priv, u32 addr, u8 *buf, int len) +{ + return priv->ops->readbulk(priv->ndev, addr, buf, len); +} + +static int w5100_writebulk(struct w5100_priv *priv, u32 addr, const u8 *buf, + int len) +{ + return priv->ops->writebulk(priv->ndev, addr, buf, len); +} + #endif +static int w5100_readbuf(struct w5100_priv *priv, u16 offset, u8 *buf, int len) +{ + u32 addr; + int remain = 0; + int ret; + const u32 mem_start = priv->s0_rx_buf; + const u16 mem_size = priv->s0_rx_buf_size; + + offset %= mem_size; + addr = mem_start + offset; + + if (offset + len > mem_size) { + remain = (offset + len) % mem_size; + len = mem_size - offset; + } + + ret = w5100_readbulk(priv, addr, buf, len); + if (ret || !remain) + return ret; + + return w5100_readbulk(priv, mem_start, buf + len, remain); +} + +static int w5100_writebuf(struct w5100_priv *priv, u16 offset, const u8 *buf, + int len) +{ + u32 addr; + int ret; + int remain = 0; + const u32 mem_start = priv->s0_tx_buf; + const u16 mem_size = priv->s0_tx_buf_size; + + offset %= mem_size; + addr = mem_start + offset; + + if (offset + len > mem_size) { + remain = (offset + len) % mem_size; + len = mem_size - offset; + } + + ret = w5100_writebulk(priv, addr, buf, len); + if (ret || !remain) + return ret; + + return w5100_writebulk(priv, mem_start, buf + len, remain); +} + +static int w5100_reset(struct w5100_priv *priv) +{ + if (priv->ops->reset) + return priv->ops->reset(priv->ndev); + + w5100_write(priv, W5100_MR, MR_RST); + mdelay(5); + w5100_write(priv, W5100_MR, MR_PB); + + return 0; +} + static int w5100_command(struct w5100_priv *priv, u16 cmd) { - unsigned long timeout = jiffies + msecs_to_jiffies(100); + unsigned long timeout; - w5100_write(priv, W5100_S0_CR, cmd); - mmiowb(); + w5100_write(priv, W5100_S0_CR(priv), cmd); + + timeout = jiffies + msecs_to_jiffies(100); - while (w5100_read(priv, W5100_S0_CR) != 0) { + while (w5100_read(priv, W5100_S0_CR(priv)) != 0) { if (time_after(jiffies, timeout)) return -EIO; cpu_relax(); @@ -323,47 +606,116 @@ static int w5100_command(struct w5100_priv *priv, u16 cmd) static void w5100_write_macaddr(struct w5100_priv *priv) { struct net_device *ndev = priv->ndev; - int i; - for (i = 0; i < ETH_ALEN; i++) - w5100_write(priv, W5100_SHAR + i, ndev->dev_addr[i]); - mmiowb(); + w5100_writebulk(priv, W5100_SHAR, ndev->dev_addr, ETH_ALEN); } -static void w5100_hw_reset(struct w5100_priv *priv) +static void w5100_socket_intr_mask(struct w5100_priv *priv, u8 mask) { - w5100_write_direct(priv, W5100_MR, MR_RST); - mmiowb(); - mdelay(5); - w5100_write_direct(priv, W5100_MR, priv->indirect ? - MR_PB | MR_AI | MR_IND : - MR_PB); - mmiowb(); - w5100_write(priv, W5100_IMR, 0); - w5100_write_macaddr(priv); + u32 imr; + if (priv->ops->chip_id == W5500) + imr = W5500_SIMR; + else + imr = W5100_IMR; + + w5100_write(priv, imr, mask); +} + +static void w5100_enable_intr(struct w5100_priv *priv) +{ + w5100_socket_intr_mask(priv, IR_S0); +} + +static void w5100_disable_intr(struct w5100_priv *priv) +{ + w5100_socket_intr_mask(priv, 0); +} + +static void w5100_memory_configure(struct w5100_priv *priv) +{ /* Configure 16K of internal memory * as 8K RX buffer and 8K TX buffer */ w5100_write(priv, W5100_RMSR, 0x03); w5100_write(priv, W5100_TMSR, 0x03); - mmiowb(); +} + +static void w5200_memory_configure(struct w5100_priv *priv) +{ + int i; + + /* Configure internal RX memory as 16K RX buffer and + * internal TX memory as 16K TX buffer + */ + w5100_write(priv, W5200_Sn_RXMEM_SIZE(0), 0x10); + w5100_write(priv, W5200_Sn_TXMEM_SIZE(0), 0x10); + + for (i = 1; i < 8; i++) { + w5100_write(priv, W5200_Sn_RXMEM_SIZE(i), 0); + w5100_write(priv, W5200_Sn_TXMEM_SIZE(i), 0); + } +} + +static void w5500_memory_configure(struct w5100_priv *priv) +{ + int i; + + /* Configure internal RX memory as 16K RX buffer and + * internal TX memory as 16K TX buffer + */ + w5100_write(priv, W5500_Sn_RXMEM_SIZE(0), 0x10); + w5100_write(priv, W5500_Sn_TXMEM_SIZE(0), 0x10); + + for (i = 1; i < 8; i++) { + w5100_write(priv, W5500_Sn_RXMEM_SIZE(i), 0); + w5100_write(priv, W5500_Sn_TXMEM_SIZE(i), 0); + } +} + +static int w5100_hw_reset(struct w5100_priv *priv) +{ + u32 rtr; + + w5100_reset(priv); + + w5100_disable_intr(priv); + w5100_write_macaddr(priv); + + switch (priv->ops->chip_id) { + case W5100: + w5100_memory_configure(priv); + rtr = W5100_RTR; + break; + case W5200: + w5200_memory_configure(priv); + rtr = W5100_RTR; + break; + case W5500: + w5500_memory_configure(priv); + rtr = W5500_RTR; + break; + default: + return -EINVAL; + } + + if (w5100_read16(priv, rtr) != RTR_DEFAULT) + return -ENODEV; + + return 0; } static void w5100_hw_start(struct w5100_priv *priv) { - w5100_write(priv, W5100_S0_MR, priv->promisc ? + w5100_write(priv, W5100_S0_MR(priv), priv->promisc ? S0_MR_MACRAW : S0_MR_MACRAW_MF); - mmiowb(); w5100_command(priv, S0_CR_OPEN); - w5100_write(priv, W5100_IMR, IR_S0); - mmiowb(); + w5100_enable_intr(priv); } static void w5100_hw_close(struct w5100_priv *priv) { - w5100_write(priv, W5100_IMR, 0); - mmiowb(); + w5100_disable_intr(priv); w5100_command(priv, S0_CR_CLOSE); } @@ -412,20 +764,17 @@ static int w5100_get_regs_len(struct net_device *ndev) } static void w5100_get_regs(struct net_device *ndev, - struct ethtool_regs *regs, void *_buf) + struct ethtool_regs *regs, void *buf) { struct w5100_priv *priv = netdev_priv(ndev); - u8 *buf = _buf; - u16 i; regs->version = 1; - for (i = 0; i < W5100_COMMON_REGS_LEN; i++) - *buf++ = w5100_read(priv, W5100_COMMON_REGS + i); - for (i = 0; i < W5100_S0_REGS_LEN; i++) - *buf++ = w5100_read(priv, W5100_S0_REGS + i); + w5100_readbulk(priv, W5100_COMMON_REGS, buf, W5100_COMMON_REGS_LEN); + buf += W5100_COMMON_REGS_LEN; + w5100_readbulk(priv, S0_REGS(priv), buf, W5100_S0_REGS_LEN); } -static void w5100_tx_timeout(struct net_device *ndev) +static void w5100_restart(struct net_device *ndev) { struct w5100_priv *priv = netdev_priv(ndev); @@ -437,70 +786,134 @@ static void w5100_tx_timeout(struct net_device *ndev) netif_wake_queue(ndev); } -static int w5100_start_tx(struct sk_buff *skb, struct net_device *ndev) +static void w5100_restart_work(struct work_struct *work) +{ + struct w5100_priv *priv = container_of(work, struct w5100_priv, + restart_work); + + w5100_restart(priv->ndev); +} + +static void w5100_tx_timeout(struct net_device *ndev) { struct w5100_priv *priv = netdev_priv(ndev); - u16 offset; - netif_stop_queue(ndev); + if (priv->ops->may_sleep) + schedule_work(&priv->restart_work); + else + w5100_restart(ndev); +} - offset = w5100_read16(priv, W5100_S0_TX_WR); +static void w5100_tx_skb(struct net_device *ndev, struct sk_buff *skb) +{ + struct w5100_priv *priv = netdev_priv(ndev); + u16 offset; + + offset = w5100_read16(priv, W5100_S0_TX_WR(priv)); w5100_writebuf(priv, offset, skb->data, skb->len); - w5100_write16(priv, W5100_S0_TX_WR, offset + skb->len); - mmiowb(); + w5100_write16(priv, W5100_S0_TX_WR(priv), offset + skb->len); ndev->stats.tx_bytes += skb->len; ndev->stats.tx_packets++; dev_kfree_skb(skb); w5100_command(priv, S0_CR_SEND); +} + +static void w5100_tx_work(struct work_struct *work) +{ + struct w5100_priv *priv = container_of(work, struct w5100_priv, + tx_work); + struct sk_buff *skb = priv->tx_skb; + + priv->tx_skb = NULL; + + if (WARN_ON(!skb)) + return; + w5100_tx_skb(priv->ndev, skb); +} + +static int w5100_start_tx(struct sk_buff *skb, struct net_device *ndev) +{ + struct w5100_priv *priv = netdev_priv(ndev); + + netif_stop_queue(ndev); + + if (priv->ops->may_sleep) { + WARN_ON(priv->tx_skb); + priv->tx_skb = skb; + queue_work(priv->xfer_wq, &priv->tx_work); + } else { + w5100_tx_skb(ndev, skb); + } return NETDEV_TX_OK; } -static int w5100_napi_poll(struct napi_struct *napi, int budget) +static struct sk_buff *w5100_rx_skb(struct net_device *ndev) { - struct w5100_priv *priv = container_of(napi, struct w5100_priv, napi); - struct net_device *ndev = priv->ndev; + struct w5100_priv *priv = netdev_priv(ndev); struct sk_buff *skb; - int rx_count; u16 rx_len; u16 offset; u8 header[2]; + u16 rx_buf_len = w5100_read16(priv, W5100_S0_RX_RSR(priv)); - for (rx_count = 0; rx_count < budget; rx_count++) { - u16 rx_buf_len = w5100_read16(priv, W5100_S0_RX_RSR); - if (rx_buf_len == 0) - break; + if (rx_buf_len == 0) + return NULL; - offset = w5100_read16(priv, W5100_S0_RX_RD); - w5100_readbuf(priv, offset, header, 2); - rx_len = get_unaligned_be16(header) - 2; - - skb = netdev_alloc_skb_ip_align(ndev, rx_len); - if (unlikely(!skb)) { - w5100_write16(priv, W5100_S0_RX_RD, - offset + rx_buf_len); - w5100_command(priv, S0_CR_RECV); - ndev->stats.rx_dropped++; - return -ENOMEM; - } + offset = w5100_read16(priv, W5100_S0_RX_RD(priv)); + w5100_readbuf(priv, offset, header, 2); + rx_len = get_unaligned_be16(header) - 2; - skb_put(skb, rx_len); - w5100_readbuf(priv, offset + 2, skb->data, rx_len); - w5100_write16(priv, W5100_S0_RX_RD, offset + 2 + rx_len); - mmiowb(); + skb = netdev_alloc_skb_ip_align(ndev, rx_len); + if (unlikely(!skb)) { + w5100_write16(priv, W5100_S0_RX_RD(priv), offset + rx_buf_len); w5100_command(priv, S0_CR_RECV); - skb->protocol = eth_type_trans(skb, ndev); + ndev->stats.rx_dropped++; + return NULL; + } + + skb_put(skb, rx_len); + w5100_readbuf(priv, offset + 2, skb->data, rx_len); + w5100_write16(priv, W5100_S0_RX_RD(priv), offset + 2 + rx_len); + w5100_command(priv, S0_CR_RECV); + skb->protocol = eth_type_trans(skb, ndev); + + ndev->stats.rx_packets++; + ndev->stats.rx_bytes += rx_len; + + return skb; +} + +static void w5100_rx_work(struct work_struct *work) +{ + struct w5100_priv *priv = container_of(work, struct w5100_priv, + rx_work); + struct sk_buff *skb; + + while ((skb = w5100_rx_skb(priv->ndev))) + netif_rx_ni(skb); + + w5100_enable_intr(priv); +} + +static int w5100_napi_poll(struct napi_struct *napi, int budget) +{ + struct w5100_priv *priv = container_of(napi, struct w5100_priv, napi); + int rx_count; + + for (rx_count = 0; rx_count < budget; rx_count++) { + struct sk_buff *skb = w5100_rx_skb(priv->ndev); - netif_receive_skb(skb); - ndev->stats.rx_packets++; - ndev->stats.rx_bytes += rx_len; + if (skb) + netif_receive_skb(skb); + else + break; } if (rx_count < budget) { napi_complete(napi); - w5100_write(priv, W5100_IMR, IR_S0); - mmiowb(); + w5100_enable_intr(priv); } return rx_count; @@ -511,11 +924,10 @@ static irqreturn_t w5100_interrupt(int irq, void *ndev_instance) struct net_device *ndev = ndev_instance; struct w5100_priv *priv = netdev_priv(ndev); - int ir = w5100_read(priv, W5100_S0_IR); + int ir = w5100_read(priv, W5100_S0_IR(priv)); if (!ir) return IRQ_NONE; - w5100_write(priv, W5100_S0_IR, ir); - mmiowb(); + w5100_write(priv, W5100_S0_IR(priv), ir); if (ir & S0_IR_SENDOK) { netif_dbg(priv, tx_done, ndev, "tx done\n"); @@ -523,11 +935,12 @@ static irqreturn_t w5100_interrupt(int irq, void *ndev_instance) } if (ir & S0_IR_RECV) { - if (napi_schedule_prep(&priv->napi)) { - w5100_write(priv, W5100_IMR, 0); - mmiowb(); + w5100_disable_intr(priv); + + if (priv->ops->may_sleep) + queue_work(priv->xfer_wq, &priv->rx_work); + else if (napi_schedule_prep(&priv->napi)) __napi_schedule(&priv->napi); - } } return IRQ_HANDLED; @@ -551,6 +964,14 @@ static irqreturn_t w5100_detect_link(int irq, void *ndev_instance) return IRQ_HANDLED; } +static void w5100_setrx_work(struct work_struct *work) +{ + struct w5100_priv *priv = container_of(work, struct w5100_priv, + setrx_work); + + w5100_hw_start(priv); +} + static void w5100_set_rx_mode(struct net_device *ndev) { struct w5100_priv *priv = netdev_priv(ndev); @@ -558,7 +979,11 @@ static void w5100_set_rx_mode(struct net_device *ndev) if (priv->promisc != set_promisc) { priv->promisc = set_promisc; - w5100_hw_start(priv); + + if (priv->ops->may_sleep) + schedule_work(&priv->setrx_work); + else + w5100_hw_start(priv); } } @@ -620,91 +1045,96 @@ static const struct net_device_ops w5100_netdev_ops = { .ndo_change_mtu = eth_change_mtu, }; -static int w5100_hw_probe(struct platform_device *pdev) +static int w5100_mmio_probe(struct platform_device *pdev) { struct wiznet_platform_data *data = dev_get_platdata(&pdev->dev); - struct net_device *ndev = platform_get_drvdata(pdev); - struct w5100_priv *priv = netdev_priv(ndev); - const char *name = netdev_name(ndev); + u8 *mac_addr = NULL; struct resource *mem; - int mem_size; + const struct w5100_ops *ops; int irq; - int ret; - if (data && is_valid_ether_addr(data->mac_addr)) { - memcpy(ndev->dev_addr, data->mac_addr, ETH_ALEN); - } else { - eth_hw_addr_random(ndev); - } + if (data && is_valid_ether_addr(data->mac_addr)) + mac_addr = data->mac_addr; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - priv->base = devm_ioremap_resource(&pdev->dev, mem); - if (IS_ERR(priv->base)) - return PTR_ERR(priv->base); - - mem_size = resource_size(mem); - - spin_lock_init(&priv->reg_lock); - priv->indirect = mem_size < W5100_BUS_DIRECT_SIZE; - if (priv->indirect) { - priv->read = w5100_read_indirect; - priv->write = w5100_write_indirect; - priv->read16 = w5100_read16_indirect; - priv->write16 = w5100_write16_indirect; - priv->readbuf = w5100_readbuf_indirect; - priv->writebuf = w5100_writebuf_indirect; - } else { - priv->read = w5100_read_direct; - priv->write = w5100_write_direct; - priv->read16 = w5100_read16_direct; - priv->write16 = w5100_write16_direct; - priv->readbuf = w5100_readbuf_direct; - priv->writebuf = w5100_writebuf_direct; - } - - w5100_hw_reset(priv); - if (w5100_read16(priv, W5100_RTR) != RTR_DEFAULT) - return -ENODEV; + if (resource_size(mem) < W5100_BUS_DIRECT_SIZE) + ops = &w5100_mmio_indirect_ops; + else + ops = &w5100_mmio_direct_ops; irq = platform_get_irq(pdev, 0); if (irq < 0) return irq; - ret = request_irq(irq, w5100_interrupt, - IRQ_TYPE_LEVEL_LOW, name, ndev); - if (ret < 0) - return ret; - priv->irq = irq; - priv->link_gpio = data ? data->link_gpio : -EINVAL; - if (gpio_is_valid(priv->link_gpio)) { - char *link_name = devm_kzalloc(&pdev->dev, 16, GFP_KERNEL); - if (!link_name) - return -ENOMEM; - snprintf(link_name, 16, "%s-link", name); - priv->link_irq = gpio_to_irq(priv->link_gpio); - if (request_any_context_irq(priv->link_irq, w5100_detect_link, - IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, - link_name, priv->ndev) < 0) - priv->link_gpio = -EINVAL; - } + return w5100_probe(&pdev->dev, ops, sizeof(struct w5100_mmio_priv), + mac_addr, irq, data ? data->link_gpio : -EINVAL); +} - netdev_info(ndev, "at 0x%llx irq %d\n", (u64)mem->start, irq); - return 0; +static int w5100_mmio_remove(struct platform_device *pdev) +{ + return w5100_remove(&pdev->dev); +} + +void *w5100_ops_priv(const struct net_device *ndev) +{ + return netdev_priv(ndev) + + ALIGN(sizeof(struct w5100_priv), NETDEV_ALIGN); } +EXPORT_SYMBOL_GPL(w5100_ops_priv); -static int w5100_probe(struct platform_device *pdev) +int w5100_probe(struct device *dev, const struct w5100_ops *ops, + int sizeof_ops_priv, u8 *mac_addr, int irq, int link_gpio) { struct w5100_priv *priv; struct net_device *ndev; int err; + size_t alloc_size; - ndev = alloc_etherdev(sizeof(*priv)); + alloc_size = sizeof(*priv); + if (sizeof_ops_priv) { + alloc_size = ALIGN(alloc_size, NETDEV_ALIGN); + alloc_size += sizeof_ops_priv; + } + alloc_size += NETDEV_ALIGN - 1; + + ndev = alloc_etherdev(alloc_size); if (!ndev) return -ENOMEM; - SET_NETDEV_DEV(ndev, &pdev->dev); - platform_set_drvdata(pdev, ndev); + SET_NETDEV_DEV(ndev, dev); + dev_set_drvdata(dev, ndev); priv = netdev_priv(ndev); + + switch (ops->chip_id) { + case W5100: + priv->s0_regs = W5100_S0_REGS; + priv->s0_tx_buf = W5100_TX_MEM_START; + priv->s0_tx_buf_size = W5100_TX_MEM_SIZE; + priv->s0_rx_buf = W5100_RX_MEM_START; + priv->s0_rx_buf_size = W5100_RX_MEM_SIZE; + break; + case W5200: + priv->s0_regs = W5200_S0_REGS; + priv->s0_tx_buf = W5200_TX_MEM_START; + priv->s0_tx_buf_size = W5200_TX_MEM_SIZE; + priv->s0_rx_buf = W5200_RX_MEM_START; + priv->s0_rx_buf_size = W5200_RX_MEM_SIZE; + break; + case W5500: + priv->s0_regs = W5500_S0_REGS; + priv->s0_tx_buf = W5500_TX_MEM_START; + priv->s0_tx_buf_size = W5500_TX_MEM_SIZE; + priv->s0_rx_buf = W5500_RX_MEM_START; + priv->s0_rx_buf_size = W5500_RX_MEM_SIZE; + break; + default: + err = -EINVAL; + goto err_register; + } + priv->ndev = ndev; + priv->ops = ops; + priv->irq = irq; + priv->link_gpio = link_gpio; ndev->netdev_ops = &w5100_netdev_ops; ndev->ethtool_ops = &w5100_ethtool_ops; @@ -720,22 +1150,76 @@ static int w5100_probe(struct platform_device *pdev) if (err < 0) goto err_register; - err = w5100_hw_probe(pdev); - if (err < 0) - goto err_hw_probe; + priv->xfer_wq = create_workqueue(netdev_name(ndev)); + if (!priv->xfer_wq) { + err = -ENOMEM; + goto err_wq; + } + + INIT_WORK(&priv->rx_work, w5100_rx_work); + INIT_WORK(&priv->tx_work, w5100_tx_work); + INIT_WORK(&priv->setrx_work, w5100_setrx_work); + INIT_WORK(&priv->restart_work, w5100_restart_work); + + if (mac_addr) + memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); + else + eth_hw_addr_random(ndev); + + if (priv->ops->init) { + err = priv->ops->init(priv->ndev); + if (err) + goto err_hw; + } + + err = w5100_hw_reset(priv); + if (err) + goto err_hw; + + if (ops->may_sleep) { + err = request_threaded_irq(priv->irq, NULL, w5100_interrupt, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + netdev_name(ndev), ndev); + } else { + err = request_irq(priv->irq, w5100_interrupt, + IRQF_TRIGGER_LOW, netdev_name(ndev), ndev); + } + if (err) + goto err_hw; + + if (gpio_is_valid(priv->link_gpio)) { + char *link_name = devm_kzalloc(dev, 16, GFP_KERNEL); + + if (!link_name) { + err = -ENOMEM; + goto err_gpio; + } + snprintf(link_name, 16, "%s-link", netdev_name(ndev)); + priv->link_irq = gpio_to_irq(priv->link_gpio); + if (request_any_context_irq(priv->link_irq, w5100_detect_link, + IRQF_TRIGGER_RISING | + IRQF_TRIGGER_FALLING, + link_name, priv->ndev) < 0) + priv->link_gpio = -EINVAL; + } return 0; -err_hw_probe: +err_gpio: + free_irq(priv->irq, ndev); +err_hw: + destroy_workqueue(priv->xfer_wq); +err_wq: unregister_netdev(ndev); err_register: free_netdev(ndev); return err; } +EXPORT_SYMBOL_GPL(w5100_probe); -static int w5100_remove(struct platform_device *pdev) +int w5100_remove(struct device *dev) { - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); struct w5100_priv *priv = netdev_priv(ndev); w5100_hw_reset(priv); @@ -743,16 +1227,21 @@ static int w5100_remove(struct platform_device *pdev) if (gpio_is_valid(priv->link_gpio)) free_irq(priv->link_irq, ndev); + flush_work(&priv->setrx_work); + flush_work(&priv->restart_work); + flush_workqueue(priv->xfer_wq); + destroy_workqueue(priv->xfer_wq); + unregister_netdev(ndev); free_netdev(ndev); return 0; } +EXPORT_SYMBOL_GPL(w5100_remove); #ifdef CONFIG_PM_SLEEP static int w5100_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); struct w5100_priv *priv = netdev_priv(ndev); if (netif_running(ndev)) { @@ -766,8 +1255,7 @@ static int w5100_suspend(struct device *dev) static int w5100_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); struct w5100_priv *priv = netdev_priv(ndev); if (netif_running(ndev)) { @@ -783,15 +1271,15 @@ static int w5100_resume(struct device *dev) } #endif /* CONFIG_PM_SLEEP */ -static SIMPLE_DEV_PM_OPS(w5100_pm_ops, w5100_suspend, w5100_resume); +SIMPLE_DEV_PM_OPS(w5100_pm_ops, w5100_suspend, w5100_resume); +EXPORT_SYMBOL_GPL(w5100_pm_ops); -static struct platform_driver w5100_driver = { +static struct platform_driver w5100_mmio_driver = { .driver = { .name = DRV_NAME, .pm = &w5100_pm_ops, }, - .probe = w5100_probe, - .remove = w5100_remove, + .probe = w5100_mmio_probe, + .remove = w5100_mmio_remove, }; - -module_platform_driver(w5100_driver); +module_platform_driver(w5100_mmio_driver); diff --git a/drivers/net/ethernet/wiznet/w5100.h b/drivers/net/ethernet/wiznet/w5100.h new file mode 100644 index 000000000000..f8a16fad807b --- /dev/null +++ b/drivers/net/ethernet/wiznet/w5100.h @@ -0,0 +1,36 @@ +/* + * Ethernet driver for the WIZnet W5100 chip. + * + * Copyright (C) 2006-2008 WIZnet Co.,Ltd. + * Copyright (C) 2012 Mike Sinkovsky <msink@permonline.ru> + * + * Licensed under the GPL-2 or later. + */ + +enum { + W5100, + W5200, + W5500, +}; + +struct w5100_ops { + bool may_sleep; + int chip_id; + int (*read)(struct net_device *ndev, u32 addr); + int (*write)(struct net_device *ndev, u32 addr, u8 data); + int (*read16)(struct net_device *ndev, u32 addr); + int (*write16)(struct net_device *ndev, u32 addr, u16 data); + int (*readbulk)(struct net_device *ndev, u32 addr, u8 *buf, int len); + int (*writebulk)(struct net_device *ndev, u32 addr, const u8 *buf, + int len); + int (*reset)(struct net_device *ndev); + int (*init)(struct net_device *ndev); +}; + +void *w5100_ops_priv(const struct net_device *ndev); + +int w5100_probe(struct device *dev, const struct w5100_ops *ops, + int sizeof_ops_priv, u8 *mac_addr, int irq, int link_gpio); +int w5100_remove(struct device *dev); + +extern const struct dev_pm_ops w5100_pm_ops; diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c index b103adb8d62e..0dbafedc0a34 100644 --- a/drivers/net/fjes/fjes_hw.c +++ b/drivers/net/fjes/fjes_hw.c @@ -179,6 +179,8 @@ void fjes_hw_setup_epbuf(struct epbuf_handler *epbh, u8 *mac_addr, u32 mtu) for (i = 0; i < EP_BUFFER_SUPPORT_VLAN_MAX; i++) info->v1i.vlan_id[i] = vlan_id[i]; + + info->v1i.rx_status |= FJES_RX_MTU_CHANGING_DONE; } void @@ -214,6 +216,7 @@ static int fjes_hw_setup(struct fjes_hw *hw) u8 mac[ETH_ALEN] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; struct fjes_device_command_param param; struct ep_share_mem_info *buf_pair; + unsigned long flags; size_t mem_size; int result; int epidx; @@ -262,10 +265,12 @@ static int fjes_hw_setup(struct fjes_hw *hw) if (result) return result; + spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf(&buf_pair->tx, mac, fjes_support_mtu[0]); fjes_hw_setup_epbuf(&buf_pair->rx, mac, fjes_support_mtu[0]); + spin_unlock_irqrestore(&hw->rx_status_lock, flags); } } @@ -327,6 +332,7 @@ int fjes_hw_init(struct fjes_hw *hw) INIT_WORK(&hw->epstop_task, fjes_hw_epstop_task); mutex_init(&hw->hw_info.lock); + spin_lock_init(&hw->rx_status_lock); hw->max_epid = fjes_hw_get_max_epid(hw); hw->my_epid = fjes_hw_get_my_epid(hw); @@ -734,6 +740,7 @@ fjes_hw_get_partner_ep_status(struct fjes_hw *hw, int epid) void fjes_hw_raise_epstop(struct fjes_hw *hw) { enum ep_partner_status status; + unsigned long flags; int epidx; for (epidx = 0; epidx < hw->max_epid; epidx++) { @@ -753,8 +760,10 @@ void fjes_hw_raise_epstop(struct fjes_hw *hw) set_bit(epidx, &hw->hw_info.buffer_unshare_reserve_bit); set_bit(epidx, &hw->txrx_stop_req_bit); + spin_lock_irqsave(&hw->rx_status_lock, flags); hw->ep_shm_info[epidx].tx.info->v1i.rx_status |= FJES_RX_STOP_REQ_REQUEST; + spin_unlock_irqrestore(&hw->rx_status_lock, flags); } } @@ -810,7 +819,8 @@ bool fjes_hw_check_mtu(struct epbuf_handler *epbh, u32 mtu) { union ep_buffer_info *info = epbh->info; - return (info->v1i.frame_max == FJES_MTU_TO_FRAME_SIZE(mtu)); + return ((info->v1i.frame_max == FJES_MTU_TO_FRAME_SIZE(mtu)) && + info->v1i.rx_status & FJES_RX_MTU_CHANGING_DONE); } bool fjes_hw_check_vlan_id(struct epbuf_handler *epbh, u16 vlan_id) @@ -863,6 +873,9 @@ bool fjes_hw_epbuf_rx_is_empty(struct epbuf_handler *epbh) { union ep_buffer_info *info = epbh->info; + if (!(info->v1i.rx_status & FJES_RX_MTU_CHANGING_DONE)) + return true; + if (info->v1i.count_max == 0) return true; @@ -932,6 +945,7 @@ static void fjes_hw_update_zone_task(struct work_struct *work) struct fjes_adapter *adapter; struct net_device *netdev; + unsigned long flags; ulong unshare_bit = 0; ulong share_bit = 0; @@ -1024,8 +1038,10 @@ static void fjes_hw_update_zone_task(struct work_struct *work) continue; if (test_bit(epidx, &share_bit)) { + spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf(&hw->ep_shm_info[epidx].tx, netdev->dev_addr, netdev->mtu); + spin_unlock_irqrestore(&hw->rx_status_lock, flags); mutex_lock(&hw->hw_info.lock); @@ -1069,10 +1085,14 @@ static void fjes_hw_update_zone_task(struct work_struct *work) mutex_unlock(&hw->hw_info.lock); - if (ret == 0) + if (ret == 0) { + spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf( &hw->ep_shm_info[epidx].tx, netdev->dev_addr, netdev->mtu); + spin_unlock_irqrestore(&hw->rx_status_lock, + flags); + } } if (test_bit(epidx, &irq_bit)) { @@ -1080,9 +1100,11 @@ static void fjes_hw_update_zone_task(struct work_struct *work) REG_ICTL_MASK_TXRX_STOP_REQ); set_bit(epidx, &hw->txrx_stop_req_bit); + spin_lock_irqsave(&hw->rx_status_lock, flags); hw->ep_shm_info[epidx].tx. info->v1i.rx_status |= FJES_RX_STOP_REQ_REQUEST; + spin_unlock_irqrestore(&hw->rx_status_lock, flags); set_bit(epidx, &hw->hw_info.buffer_unshare_reserve_bit); } } @@ -1098,6 +1120,7 @@ static void fjes_hw_epstop_task(struct work_struct *work) { struct fjes_hw *hw = container_of(work, struct fjes_hw, epstop_task); struct fjes_adapter *adapter = (struct fjes_adapter *)hw->back; + unsigned long flags; ulong remain_bit; int epid_bit; @@ -1105,9 +1128,12 @@ static void fjes_hw_epstop_task(struct work_struct *work) while ((remain_bit = hw->epstop_req_bit)) { for (epid_bit = 0; remain_bit; remain_bit >>= 1, epid_bit++) { if (remain_bit & 1) { + spin_lock_irqsave(&hw->rx_status_lock, flags); hw->ep_shm_info[epid_bit]. tx.info->v1i.rx_status |= FJES_RX_STOP_REQ_DONE; + spin_unlock_irqrestore(&hw->rx_status_lock, + flags); clear_bit(epid_bit, &hw->epstop_req_bit); set_bit(epid_bit, diff --git a/drivers/net/fjes/fjes_hw.h b/drivers/net/fjes/fjes_hw.h index 6d57b89a0ee8..1445ac99d6e3 100644 --- a/drivers/net/fjes/fjes_hw.h +++ b/drivers/net/fjes/fjes_hw.h @@ -33,9 +33,9 @@ struct fjes_hw; #define EP_BUFFER_SUPPORT_VLAN_MAX 4 #define EP_BUFFER_INFO_SIZE 4096 -#define FJES_DEVICE_RESET_TIMEOUT ((17 + 1) * 3) /* sec */ -#define FJES_COMMAND_REQ_TIMEOUT (5 + 1) /* sec */ -#define FJES_COMMAND_REQ_BUFF_TIMEOUT (8 * 3) /* sec */ +#define FJES_DEVICE_RESET_TIMEOUT ((17 + 1) * 3 * 8) /* sec */ +#define FJES_COMMAND_REQ_TIMEOUT ((5 + 1) * 3 * 8) /* sec */ +#define FJES_COMMAND_REQ_BUFF_TIMEOUT (60 * 3) /* sec */ #define FJES_COMMAND_EPSTOP_WAIT_TIMEOUT (1) /* sec */ #define FJES_CMD_REQ_ERR_INFO_PARAM (0x0001) @@ -57,6 +57,7 @@ struct fjes_hw; #define FJES_RX_STOP_REQ_DONE (0x1) #define FJES_RX_STOP_REQ_REQUEST (0x2) #define FJES_RX_POLL_WORK (0x4) +#define FJES_RX_MTU_CHANGING_DONE (0x8) #define EP_BUFFER_SIZE \ (((sizeof(union ep_buffer_info) + (128 * (64 * 1024))) \ @@ -299,6 +300,8 @@ struct fjes_hw { u8 *base; struct fjes_hw_info hw_info; + + spinlock_t rx_status_lock; /* spinlock for rx_status */ }; int fjes_hw_init(struct fjes_hw *); diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 061b4af4ee62..bb7e90368f8f 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -29,7 +29,7 @@ #include "fjes.h" #define MAJ 1 -#define MIN 0 +#define MIN 1 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) #define DRV_NAME "fjes" char fjes_driver_name[] = DRV_NAME; @@ -290,6 +290,7 @@ static int fjes_close(struct net_device *netdev) { struct fjes_adapter *adapter = netdev_priv(netdev); struct fjes_hw *hw = &adapter->hw; + unsigned long flags; int epidx; netif_tx_stop_all_queues(netdev); @@ -299,13 +300,18 @@ static int fjes_close(struct net_device *netdev) napi_disable(&adapter->napi); + spin_lock_irqsave(&hw->rx_status_lock, flags); for (epidx = 0; epidx < hw->max_epid; epidx++) { if (epidx == hw->my_epid) continue; - adapter->hw.ep_shm_info[epidx].tx.info->v1i.rx_status &= - ~FJES_RX_POLL_WORK; + if (fjes_hw_get_partner_ep_status(hw, epidx) == + EP_PARTNER_SHARED) + adapter->hw.ep_shm_info[epidx] + .tx.info->v1i.rx_status &= + ~FJES_RX_POLL_WORK; } + spin_unlock_irqrestore(&hw->rx_status_lock, flags); fjes_free_irq(adapter); @@ -330,6 +336,7 @@ static int fjes_setup_resources(struct fjes_adapter *adapter) struct net_device *netdev = adapter->netdev; struct ep_share_mem_info *buf_pair; struct fjes_hw *hw = &adapter->hw; + unsigned long flags; int result; int epidx; @@ -371,8 +378,10 @@ static int fjes_setup_resources(struct fjes_adapter *adapter) buf_pair = &hw->ep_shm_info[epidx]; + spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf(&buf_pair->tx, netdev->dev_addr, netdev->mtu); + spin_unlock_irqrestore(&hw->rx_status_lock, flags); if (fjes_hw_epid_is_same_zone(hw, epidx)) { mutex_lock(&hw->hw_info.lock); @@ -402,6 +411,7 @@ static void fjes_free_resources(struct fjes_adapter *adapter) struct ep_share_mem_info *buf_pair; struct fjes_hw *hw = &adapter->hw; bool reset_flag = false; + unsigned long flags; int result; int epidx; @@ -418,8 +428,10 @@ static void fjes_free_resources(struct fjes_adapter *adapter) buf_pair = &hw->ep_shm_info[epidx]; + spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf(&buf_pair->tx, netdev->dev_addr, netdev->mtu); + spin_unlock_irqrestore(&hw->rx_status_lock, flags); clear_bit(epidx, &hw->txrx_stop_req_bit); } @@ -481,6 +493,9 @@ static void fjes_tx_stall_task(struct work_struct *work) info = adapter->hw.ep_shm_info[epid].tx.info; + if (!(info->v1i.rx_status & FJES_RX_MTU_CHANGING_DONE)) + return; + if (EP_RING_FULL(info->v1i.head, info->v1i.tail, info->v1i.count_max)) { all_queue_available = 0; @@ -549,7 +564,8 @@ static void fjes_raise_intr_rxdata_task(struct work_struct *work) if ((hw->ep_shm_info[epid].tx_status_work == FJES_TX_DELAY_SEND_PENDING) && (pstatus == EP_PARTNER_SHARED) && - !(hw->ep_shm_info[epid].rx.info->v1i.rx_status)) { + !(hw->ep_shm_info[epid].rx.info->v1i.rx_status & + FJES_RX_POLL_WORK)) { fjes_hw_raise_interrupt(hw, epid, REG_ICTL_MASK_RX_DATA); } @@ -653,7 +669,7 @@ fjes_xmit_frame(struct sk_buff *skb, struct net_device *netdev) &adapter->hw.ep_shm_info[dest_epid].rx, 0)) { /* version is NOT 0 */ adapter->stats64.tx_carrier_errors += 1; - hw->ep_shm_info[my_epid].net_stats + hw->ep_shm_info[dest_epid].net_stats .tx_carrier_errors += 1; ret = NETDEV_TX_OK; @@ -661,9 +677,9 @@ fjes_xmit_frame(struct sk_buff *skb, struct net_device *netdev) &adapter->hw.ep_shm_info[dest_epid].rx, netdev->mtu)) { adapter->stats64.tx_dropped += 1; - hw->ep_shm_info[my_epid].net_stats.tx_dropped += 1; + hw->ep_shm_info[dest_epid].net_stats.tx_dropped += 1; adapter->stats64.tx_errors += 1; - hw->ep_shm_info[my_epid].net_stats.tx_errors += 1; + hw->ep_shm_info[dest_epid].net_stats.tx_errors += 1; ret = NETDEV_TX_OK; } else if (vlan && @@ -694,10 +710,10 @@ fjes_xmit_frame(struct sk_buff *skb, struct net_device *netdev) (long)adapter->tx_start_jiffies) >= FJES_TX_RETRY_TIMEOUT) { adapter->stats64.tx_fifo_errors += 1; - hw->ep_shm_info[my_epid].net_stats + hw->ep_shm_info[dest_epid].net_stats .tx_fifo_errors += 1; adapter->stats64.tx_errors += 1; - hw->ep_shm_info[my_epid].net_stats + hw->ep_shm_info[dest_epid].net_stats .tx_errors += 1; ret = NETDEV_TX_OK; @@ -714,10 +730,10 @@ fjes_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } else { if (!is_multi) { adapter->stats64.tx_packets += 1; - hw->ep_shm_info[my_epid].net_stats + hw->ep_shm_info[dest_epid].net_stats .tx_packets += 1; adapter->stats64.tx_bytes += len; - hw->ep_shm_info[my_epid].net_stats + hw->ep_shm_info[dest_epid].net_stats .tx_bytes += len; } @@ -759,9 +775,12 @@ fjes_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) static int fjes_change_mtu(struct net_device *netdev, int new_mtu) { + struct fjes_adapter *adapter = netdev_priv(netdev); bool running = netif_running(netdev); - int ret = 0; - int idx; + struct fjes_hw *hw = &adapter->hw; + unsigned long flags; + int ret = -EINVAL; + int idx, epidx; for (idx = 0; fjes_support_mtu[idx] != 0; idx++) { if (new_mtu <= fjes_support_mtu[idx]) { @@ -769,19 +788,59 @@ static int fjes_change_mtu(struct net_device *netdev, int new_mtu) if (new_mtu == netdev->mtu) return 0; - if (running) - fjes_close(netdev); + ret = 0; + break; + } + } + + if (ret) + return ret; + + if (running) { + spin_lock_irqsave(&hw->rx_status_lock, flags); + for (epidx = 0; epidx < hw->max_epid; epidx++) { + if (epidx == hw->my_epid) + continue; + hw->ep_shm_info[epidx].tx.info->v1i.rx_status &= + ~FJES_RX_MTU_CHANGING_DONE; + } + spin_unlock_irqrestore(&hw->rx_status_lock, flags); + + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + cancel_work_sync(&adapter->tx_stall_task); + napi_disable(&adapter->napi); + + msleep(1000); + + netif_tx_stop_all_queues(netdev); + } + + netdev->mtu = new_mtu; - netdev->mtu = new_mtu; + if (running) { + spin_lock_irqsave(&hw->rx_status_lock, flags); + for (epidx = 0; epidx < hw->max_epid; epidx++) { + if (epidx == hw->my_epid) + continue; - if (running) - ret = fjes_open(netdev); + spin_lock_irqsave(&hw->rx_status_lock, flags); + fjes_hw_setup_epbuf(&hw->ep_shm_info[epidx].tx, + netdev->dev_addr, + netdev->mtu); - return ret; + hw->ep_shm_info[epidx].tx.info->v1i.rx_status |= + FJES_RX_MTU_CHANGING_DONE; + spin_unlock_irqrestore(&hw->rx_status_lock, flags); } + + netif_tx_wake_all_queues(netdev); + netif_carrier_on(netdev); + napi_enable(&adapter->napi); + napi_schedule(&adapter->napi); } - return -EINVAL; + return ret; } static int fjes_vlan_rx_add_vid(struct net_device *netdev, @@ -825,6 +884,7 @@ static void fjes_txrx_stop_req_irq(struct fjes_adapter *adapter, { struct fjes_hw *hw = &adapter->hw; enum ep_partner_status status; + unsigned long flags; status = fjes_hw_get_partner_ep_status(hw, src_epid); switch (status) { @@ -834,8 +894,10 @@ static void fjes_txrx_stop_req_irq(struct fjes_adapter *adapter, break; case EP_PARTNER_WAITING: if (src_epid < hw->my_epid) { + spin_lock_irqsave(&hw->rx_status_lock, flags); hw->ep_shm_info[src_epid].tx.info->v1i.rx_status |= FJES_RX_STOP_REQ_DONE; + spin_unlock_irqrestore(&hw->rx_status_lock, flags); clear_bit(src_epid, &hw->txrx_stop_req_bit); set_bit(src_epid, &adapter->unshare_watch_bitmask); @@ -861,14 +923,17 @@ static void fjes_stop_req_irq(struct fjes_adapter *adapter, int src_epid) { struct fjes_hw *hw = &adapter->hw; enum ep_partner_status status; + unsigned long flags; set_bit(src_epid, &hw->hw_info.buffer_unshare_reserve_bit); status = fjes_hw_get_partner_ep_status(hw, src_epid); switch (status) { case EP_PARTNER_WAITING: + spin_lock_irqsave(&hw->rx_status_lock, flags); hw->ep_shm_info[src_epid].tx.info->v1i.rx_status |= FJES_RX_STOP_REQ_DONE; + spin_unlock_irqrestore(&hw->rx_status_lock, flags); clear_bit(src_epid, &hw->txrx_stop_req_bit); /* fall through */ case EP_PARTNER_UNSHARE: @@ -1001,13 +1066,17 @@ static int fjes_poll(struct napi_struct *napi, int budget) size_t frame_len; void *frame; + spin_lock(&hw->rx_status_lock); for (epidx = 0; epidx < hw->max_epid; epidx++) { if (epidx == hw->my_epid) continue; - adapter->hw.ep_shm_info[epidx].tx.info->v1i.rx_status |= - FJES_RX_POLL_WORK; + if (fjes_hw_get_partner_ep_status(hw, epidx) == + EP_PARTNER_SHARED) + adapter->hw.ep_shm_info[epidx] + .tx.info->v1i.rx_status |= FJES_RX_POLL_WORK; } + spin_unlock(&hw->rx_status_lock); while (work_done < budget) { prefetch(&adapter->hw); @@ -1065,13 +1134,17 @@ static int fjes_poll(struct napi_struct *napi, int budget) if (((long)jiffies - (long)adapter->rx_last_jiffies) < 3) { napi_reschedule(napi); } else { + spin_lock(&hw->rx_status_lock); for (epidx = 0; epidx < hw->max_epid; epidx++) { if (epidx == hw->my_epid) continue; - adapter->hw.ep_shm_info[epidx] - .tx.info->v1i.rx_status &= + if (fjes_hw_get_partner_ep_status(hw, epidx) == + EP_PARTNER_SHARED) + adapter->hw.ep_shm_info[epidx].tx + .info->v1i.rx_status &= ~FJES_RX_POLL_WORK; } + spin_unlock(&hw->rx_status_lock); fjes_hw_set_irqmask(hw, REG_ICTL_MASK_RX_DATA, false); } @@ -1203,7 +1276,7 @@ static void fjes_netdev_setup(struct net_device *netdev) netdev->watchdog_timeo = FJES_TX_RETRY_INTERVAL; netdev->netdev_ops = &fjes_netdev_ops; fjes_set_ethtool_ops(netdev); - netdev->mtu = fjes_support_mtu[0]; + netdev->mtu = fjes_support_mtu[3]; netdev->flags |= IFF_BROADCAST; netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER; } @@ -1240,6 +1313,7 @@ static void fjes_watch_unshare_task(struct work_struct *work) int max_epid, my_epid, epidx; int stop_req, stop_req_done; ulong unshare_watch_bitmask; + unsigned long flags; int wait_time = 0; int is_shared; int ret; @@ -1292,8 +1366,10 @@ static void fjes_watch_unshare_task(struct work_struct *work) } mutex_unlock(&hw->hw_info.lock); + spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf(&hw->ep_shm_info[epidx].tx, netdev->dev_addr, netdev->mtu); + spin_unlock_irqrestore(&hw->rx_status_lock, flags); clear_bit(epidx, &hw->txrx_stop_req_bit); clear_bit(epidx, &unshare_watch_bitmask); @@ -1331,9 +1407,12 @@ static void fjes_watch_unshare_task(struct work_struct *work) } mutex_unlock(&hw->hw_info.lock); + spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf( &hw->ep_shm_info[epidx].tx, netdev->dev_addr, netdev->mtu); + spin_unlock_irqrestore(&hw->rx_status_lock, + flags); clear_bit(epidx, &hw->txrx_stop_req_bit); clear_bit(epidx, &unshare_watch_bitmask); @@ -1341,8 +1420,11 @@ static void fjes_watch_unshare_task(struct work_struct *work) } if (test_bit(epidx, &unshare_watch_bitmask)) { + spin_lock_irqsave(&hw->rx_status_lock, flags); hw->ep_shm_info[epidx].tx.info->v1i.rx_status &= ~FJES_RX_STOP_REQ_DONE; + spin_unlock_irqrestore(&hw->rx_status_lock, + flags); } } } diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index a9fbf17eb256..9c40b88fabd5 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -696,16 +696,12 @@ static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb, min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr); err = skb_cow_head(skb, min_headroom); - if (unlikely(err)) { - kfree_skb(skb); + if (unlikely(err)) goto free_rt; - } - skb = udp_tunnel_handle_offloads(skb, udp_sum); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); + err = udp_tunnel_handle_offloads(skb, udp_sum); + if (err) goto free_rt; - } gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); @@ -733,16 +729,12 @@ static int geneve6_build_skb(struct dst_entry *dst, struct sk_buff *skb, min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct ipv6hdr); err = skb_cow_head(skb, min_headroom); - if (unlikely(err)) { - kfree_skb(skb); + if (unlikely(err)) goto free_dst; - } - skb = udp_tunnel_handle_offloads(skb, udp_sum); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); + err = udp_tunnel_handle_offloads(skb, udp_sum); + if (err) goto free_dst; - } gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); @@ -937,7 +929,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, err = geneve_build_skb(rt, skb, key->tun_flags, vni, info->options_len, opts, flags, xnet); if (unlikely(err)) - goto err; + goto tx_error; tos = ip_tunnel_ecn_encap(key->tos, iip, skb); ttl = key->ttl; @@ -946,7 +938,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, err = geneve_build_skb(rt, skb, 0, geneve->vni, 0, NULL, flags, xnet); if (unlikely(err)) - goto err; + goto tx_error; tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb); ttl = geneve->ttl; @@ -964,7 +956,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, tx_error: dev_kfree_skb(skb); -err: + if (err == -ELOOP) dev->stats.collisions++; else if (err == -ENETUNREACH) @@ -1026,7 +1018,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, info->options_len, opts, flags, xnet); if (unlikely(err)) - goto err; + goto tx_error; prio = ip_tunnel_ecn_encap(key->tos, iip, skb); ttl = key->ttl; @@ -1035,7 +1027,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, err = geneve6_build_skb(dst, skb, 0, geneve->vni, 0, NULL, flags, xnet); if (unlikely(err)) - goto err; + goto tx_error; prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel), iip, skb); @@ -1054,7 +1046,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, tx_error: dev_kfree_skb(skb); -err: + if (err == -ELOOP) dev->stats.collisions++; else if (err == -ENETUNREACH) @@ -1180,7 +1172,7 @@ static struct device_type geneve_type = { * supply the listening GENEVE udp ports. Callers are expected * to implement the ndo_add_geneve_port. */ -void geneve_get_rx_port(struct net_device *dev) +static void geneve_push_rx_ports(struct net_device *dev) { struct net *net = dev_net(dev); struct geneve_net *gn = net_generic(net, geneve_net_id); @@ -1189,6 +1181,9 @@ void geneve_get_rx_port(struct net_device *dev) struct sock *sk; __be16 port; + if (!dev->netdev_ops->ndo_add_geneve_port) + return; + rcu_read_lock(); list_for_each_entry_rcu(gs, &gn->sock_list, list) { sk = gs->sock->sk; @@ -1198,7 +1193,6 @@ void geneve_get_rx_port(struct net_device *dev) } rcu_read_unlock(); } -EXPORT_SYMBOL_GPL(geneve_get_rx_port); /* Initialize the device structure. */ static void geneve_setup(struct net_device *dev) @@ -1546,6 +1540,21 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, } EXPORT_SYMBOL_GPL(geneve_dev_create_fb); +static int geneve_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (event == NETDEV_OFFLOAD_PUSH_GENEVE) + geneve_push_rx_ports(dev); + + return NOTIFY_DONE; +} + +static struct notifier_block geneve_notifier_block __read_mostly = { + .notifier_call = geneve_netdevice_event, +}; + static __net_init int geneve_init_net(struct net *net) { struct geneve_net *gn = net_generic(net, geneve_net_id); @@ -1598,11 +1607,18 @@ static int __init geneve_init_module(void) if (rc) goto out1; - rc = rtnl_link_register(&geneve_link_ops); + rc = register_netdevice_notifier(&geneve_notifier_block); if (rc) goto out2; + rc = rtnl_link_register(&geneve_link_ops); + if (rc) + goto out3; + return 0; + +out3: + unregister_netdevice_notifier(&geneve_notifier_block); out2: unregister_pernet_subsys(&geneve_net_ops); out1: @@ -1613,6 +1629,7 @@ late_initcall(geneve_init_module); static void __exit geneve_cleanup_module(void) { rtnl_link_unregister(&geneve_link_ops); + unregister_netdevice_notifier(&geneve_notifier_block); unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 72c9f1f352b4..eb6663866c9f 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -780,8 +780,10 @@ static int baycom_send_packet(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb(skb); return NETDEV_TX_OK; } - if (bc->skb) - return NETDEV_TX_LOCKED; + if (bc->skb) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } /* strip KISS byte */ if (skb->len >= HDLCDRV_MAXFLEN+1 || skb->len < 3) { dev_kfree_skb(skb); diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index 49fe59b180a8..4bad0b894e9c 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -412,8 +412,10 @@ static netdev_tx_t hdlcdrv_send_packet(struct sk_buff *skb, dev_kfree_skb(skb); return NETDEV_TX_OK; } - if (sm->skb) - return NETDEV_TX_LOCKED; + if (sm->skb) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } netif_stop_queue(dev); sm->skb = skb; return NETDEV_TX_OK; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 8b3bd8ecd1c4..6700a4dca7c8 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -202,6 +202,8 @@ int rndis_filter_receive(struct hv_device *dev, int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter); int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac); +void netvsc_switch_datapath(struct netvsc_device *nv_dev, bool vf); + #define NVSP_INVALID_PROTOCOL_VERSION ((u32)0xFFFFFFFF) #define NVSP_PROTOCOL_VERSION_1 2 @@ -641,6 +643,12 @@ struct netvsc_reconfig { u32 event; }; +struct garp_wrk { + struct work_struct dwrk; + struct net_device *netdev; + struct netvsc_device *netvsc_dev; +}; + /* The context of the netvsc device */ struct net_device_context { /* point back to our device context */ @@ -656,6 +664,7 @@ struct net_device_context { struct work_struct work; u32 msg_enable; /* debug level */ + struct garp_wrk gwrk; struct netvsc_stats __percpu *tx_stats; struct netvsc_stats __percpu *rx_stats; @@ -730,6 +739,11 @@ struct netvsc_device { u32 vf_alloc; /* Serial number of the VF to team with */ u32 vf_serial; + atomic_t open_cnt; + /* State to manage the associated VF interface. */ + bool vf_inject; + struct net_device *vf_netdev; + atomic_t vf_use_cnt; }; /* NdisInitialize message */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ec313fc08d82..eddce3cdafa8 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -33,6 +33,30 @@ #include "hyperv_net.h" +/* + * Switch the data path from the synthetic interface to the VF + * interface. + */ +void netvsc_switch_datapath(struct netvsc_device *nv_dev, bool vf) +{ + struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt; + struct hv_device *dev = nv_dev->dev; + + memset(init_pkt, 0, sizeof(struct nvsp_message)); + init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH; + if (vf) + init_pkt->msg.v4_msg.active_dp.active_datapath = + NVSP_DATAPATH_VF; + else + init_pkt->msg.v4_msg.active_dp.active_datapath = + NVSP_DATAPATH_SYNTHETIC; + + vmbus_sendpacket(dev->channel, init_pkt, + sizeof(struct nvsp_message), + (unsigned long)init_pkt, + VM_PKT_DATA_INBAND, 0); +} + static struct netvsc_device *alloc_net_device(struct hv_device *device) { @@ -52,11 +76,16 @@ static struct netvsc_device *alloc_net_device(struct hv_device *device) init_waitqueue_head(&net_device->wait_drain); net_device->start_remove = false; net_device->destroy = false; + atomic_set(&net_device->open_cnt, 0); + atomic_set(&net_device->vf_use_cnt, 0); net_device->dev = device; net_device->ndev = ndev; net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; + net_device->vf_netdev = NULL; + net_device->vf_inject = false; + hv_set_drvdata(device, net_device); return net_device; } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index b8121eba33ff..ba3f3f3d48ef 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -610,42 +610,24 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, schedule_delayed_work(&ndev_ctx->dwork, 0); } -/* - * netvsc_recv_callback - Callback when we receive a packet from the - * "wire" on the specified device. - */ -int netvsc_recv_callback(struct hv_device *device_obj, + +static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, struct hv_netvsc_packet *packet, - void **data, struct ndis_tcp_ip_checksum_info *csum_info, - struct vmbus_channel *channel, - u16 vlan_tci) + void *data, u16 vlan_tci) { - struct net_device *net; - struct net_device_context *net_device_ctx; struct sk_buff *skb; - struct netvsc_stats *rx_stats; - net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev; - if (!net || net->reg_state != NETREG_REGISTERED) { - return NVSP_STAT_FAIL; - } - net_device_ctx = netdev_priv(net); - rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); - - /* Allocate a skb - TODO direct I/O to pages? */ skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); - if (unlikely(!skb)) { - ++net->stats.rx_dropped; - return NVSP_STAT_FAIL; - } + if (!skb) + return skb; /* * Copy to skb. This copy is needed here since the memory pointed by * hv_netvsc_packet cannot be deallocated */ - memcpy(skb_put(skb, packet->total_data_buflen), *data, - packet->total_data_buflen); + memcpy(skb_put(skb, packet->total_data_buflen), data, + packet->total_data_buflen); skb->protocol = eth_type_trans(skb, net); if (csum_info) { @@ -663,6 +645,75 @@ int netvsc_recv_callback(struct hv_device *device_obj, __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + return skb; +} + +/* + * netvsc_recv_callback - Callback when we receive a packet from the + * "wire" on the specified device. + */ +int netvsc_recv_callback(struct hv_device *device_obj, + struct hv_netvsc_packet *packet, + void **data, + struct ndis_tcp_ip_checksum_info *csum_info, + struct vmbus_channel *channel, + u16 vlan_tci) +{ + struct net_device *net; + struct net_device_context *net_device_ctx; + struct sk_buff *skb; + struct sk_buff *vf_skb; + struct netvsc_stats *rx_stats; + struct netvsc_device *netvsc_dev = hv_get_drvdata(device_obj); + u32 bytes_recvd = packet->total_data_buflen; + int ret = 0; + + net = netvsc_dev->ndev; + if (!net || net->reg_state != NETREG_REGISTERED) + return NVSP_STAT_FAIL; + + if (READ_ONCE(netvsc_dev->vf_inject)) { + atomic_inc(&netvsc_dev->vf_use_cnt); + if (!READ_ONCE(netvsc_dev->vf_inject)) { + /* + * We raced; just move on. + */ + atomic_dec(&netvsc_dev->vf_use_cnt); + goto vf_injection_done; + } + + /* + * Inject this packet into the VF inerface. + * On Hyper-V, multicast and brodcast packets + * are only delivered on the synthetic interface + * (after subjecting these to policy filters on + * the host). Deliver these via the VF interface + * in the guest. + */ + vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet, + csum_info, *data, vlan_tci); + if (vf_skb != NULL) { + ++netvsc_dev->vf_netdev->stats.rx_packets; + netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd; + netif_receive_skb(vf_skb); + } else { + ++net->stats.rx_dropped; + ret = NVSP_STAT_FAIL; + } + atomic_dec(&netvsc_dev->vf_use_cnt); + return ret; + } + +vf_injection_done: + net_device_ctx = netdev_priv(net); + rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); + + /* Allocate a skb - TODO direct I/O to pages? */ + skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); + if (unlikely(!skb)) { + ++net->stats.rx_dropped; + return NVSP_STAT_FAIL; + } skb_record_rx_queue(skb, channel-> offermsg.offer.sub_channel_index); @@ -1074,7 +1125,7 @@ static void netvsc_link_change(struct work_struct *w) netif_tx_stop_all_queues(net); event->event = RNDIS_STATUS_MEDIA_CONNECT; spin_lock_irqsave(&ndev_ctx->lock, flags); - list_add_tail(&event->list, &ndev_ctx->reconfig_events); + list_add(&event->list, &ndev_ctx->reconfig_events); spin_unlock_irqrestore(&ndev_ctx->lock, flags); reschedule = true; } @@ -1102,6 +1153,175 @@ static void netvsc_free_netdev(struct net_device *netdev) free_netdev(netdev); } +static void netvsc_notify_peers(struct work_struct *wrk) +{ + struct garp_wrk *gwrk; + + gwrk = container_of(wrk, struct garp_wrk, dwrk); + + netdev_notify_peers(gwrk->netdev); + + atomic_dec(&gwrk->netvsc_dev->vf_use_cnt); +} + +static struct netvsc_device *get_netvsc_device(char *mac) +{ + struct net_device *dev; + struct net_device_context *netvsc_ctx = NULL; + int rtnl_locked; + + rtnl_locked = rtnl_trylock(); + + for_each_netdev(&init_net, dev) { + if (memcmp(dev->dev_addr, mac, ETH_ALEN) == 0) { + if (dev->netdev_ops != &device_ops) + continue; + netvsc_ctx = netdev_priv(dev); + break; + } + } + if (rtnl_locked) + rtnl_unlock(); + + if (netvsc_ctx == NULL) + return NULL; + + return hv_get_drvdata(netvsc_ctx->device_ctx); +} + +static int netvsc_register_vf(struct net_device *vf_netdev) +{ + struct netvsc_device *netvsc_dev; + const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; + + if (eth_ops == NULL || eth_ops == ðtool_ops) + return NOTIFY_DONE; + + /* + * We will use the MAC address to locate the synthetic interface to + * associate with the VF interface. If we don't find a matching + * synthetic interface, move on. + */ + netvsc_dev = get_netvsc_device(vf_netdev->dev_addr); + if (netvsc_dev == NULL) + return NOTIFY_DONE; + + netdev_info(netvsc_dev->ndev, "VF registering: %s\n", vf_netdev->name); + /* + * Take a reference on the module. + */ + try_module_get(THIS_MODULE); + netvsc_dev->vf_netdev = vf_netdev; + return NOTIFY_OK; +} + + +static int netvsc_vf_up(struct net_device *vf_netdev) +{ + struct netvsc_device *netvsc_dev; + const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; + struct net_device_context *net_device_ctx; + + if (eth_ops == ðtool_ops) + return NOTIFY_DONE; + + netvsc_dev = get_netvsc_device(vf_netdev->dev_addr); + + if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL)) + return NOTIFY_DONE; + + netdev_info(netvsc_dev->ndev, "VF up: %s\n", vf_netdev->name); + net_device_ctx = netdev_priv(netvsc_dev->ndev); + netvsc_dev->vf_inject = true; + + /* + * Open the device before switching data path. + */ + rndis_filter_open(net_device_ctx->device_ctx); + + /* + * notify the host to switch the data path. + */ + netvsc_switch_datapath(netvsc_dev, true); + netdev_info(netvsc_dev->ndev, "Data path switched to VF: %s\n", + vf_netdev->name); + + netif_carrier_off(netvsc_dev->ndev); + + /* + * Now notify peers. We are scheduling work to + * notify peers; take a reference to prevent + * the VF interface from vanishing. + */ + atomic_inc(&netvsc_dev->vf_use_cnt); + net_device_ctx->gwrk.netdev = vf_netdev; + net_device_ctx->gwrk.netvsc_dev = netvsc_dev; + schedule_work(&net_device_ctx->gwrk.dwrk); + + return NOTIFY_OK; +} + + +static int netvsc_vf_down(struct net_device *vf_netdev) +{ + struct netvsc_device *netvsc_dev; + struct net_device_context *net_device_ctx; + const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; + + if (eth_ops == ðtool_ops) + return NOTIFY_DONE; + + netvsc_dev = get_netvsc_device(vf_netdev->dev_addr); + + if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL)) + return NOTIFY_DONE; + + netdev_info(netvsc_dev->ndev, "VF down: %s\n", vf_netdev->name); + net_device_ctx = netdev_priv(netvsc_dev->ndev); + netvsc_dev->vf_inject = false; + /* + * Wait for currently active users to + * drain out. + */ + + while (atomic_read(&netvsc_dev->vf_use_cnt) != 0) + udelay(50); + netvsc_switch_datapath(netvsc_dev, false); + netdev_info(netvsc_dev->ndev, "Data path switched from VF: %s\n", + vf_netdev->name); + rndis_filter_close(net_device_ctx->device_ctx); + netif_carrier_on(netvsc_dev->ndev); + /* + * Notify peers. + */ + atomic_inc(&netvsc_dev->vf_use_cnt); + net_device_ctx->gwrk.netdev = netvsc_dev->ndev; + net_device_ctx->gwrk.netvsc_dev = netvsc_dev; + schedule_work(&net_device_ctx->gwrk.dwrk); + + return NOTIFY_OK; +} + + +static int netvsc_unregister_vf(struct net_device *vf_netdev) +{ + struct netvsc_device *netvsc_dev; + const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; + + if (eth_ops == ðtool_ops) + return NOTIFY_DONE; + + netvsc_dev = get_netvsc_device(vf_netdev->dev_addr); + if (netvsc_dev == NULL) + return NOTIFY_DONE; + netdev_info(netvsc_dev->ndev, "VF unregistering: %s\n", + vf_netdev->name); + + netvsc_dev->vf_netdev = NULL; + module_put(THIS_MODULE); + return NOTIFY_OK; +} + static int netvsc_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { @@ -1140,6 +1360,7 @@ static int netvsc_probe(struct hv_device *dev, hv_set_drvdata(dev, net); INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); INIT_WORK(&net_device_ctx->work, do_set_multicast); + INIT_WORK(&net_device_ctx->gwrk.dwrk, netvsc_notify_peers); spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); @@ -1235,19 +1456,58 @@ static struct hv_driver netvsc_drv = { .remove = netvsc_remove, }; + +/* + * On Hyper-V, every VF interface is matched with a corresponding + * synthetic interface. The synthetic interface is presented first + * to the guest. When the corresponding VF instance is registered, + * we will take care of switching the data path. + */ +static int netvsc_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_REGISTER: + return netvsc_register_vf(event_dev); + case NETDEV_UNREGISTER: + return netvsc_unregister_vf(event_dev); + case NETDEV_UP: + return netvsc_vf_up(event_dev); + case NETDEV_DOWN: + return netvsc_vf_down(event_dev); + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block netvsc_netdev_notifier = { + .notifier_call = netvsc_netdev_event, +}; + static void __exit netvsc_drv_exit(void) { + unregister_netdevice_notifier(&netvsc_netdev_notifier); vmbus_driver_unregister(&netvsc_drv); } static int __init netvsc_drv_init(void) { + int ret; + if (ring_size < RING_SIZE_MIN) { ring_size = RING_SIZE_MIN; pr_info("Increased ring_size to %d (min allowed)\n", ring_size); } - return vmbus_driver_register(&netvsc_drv); + ret = vmbus_driver_register(&netvsc_drv); + + if (ret) + return ret; + + register_netdevice_notifier(&netvsc_netdev_notifier); + return 0; } MODULE_LICENSE("GPL"); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index c4e1e0408433..a59cdebc9b4b 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1229,6 +1229,9 @@ int rndis_filter_open(struct hv_device *dev) if (!net_device) return -EINVAL; + if (atomic_inc_return(&net_device->open_cnt) != 1) + return 0; + return rndis_filter_open_device(net_device->extension); } @@ -1239,5 +1242,8 @@ int rndis_filter_close(struct hv_device *dev) if (!nvdev) return -EINVAL; + if (atomic_dec_return(&nvdev->open_cnt) != 0) + return 0; + return rndis_filter_close_device(nvdev->extension); } diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index cb9e9fe6d77a..9f10da60e02d 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1340,7 +1340,7 @@ static struct at86rf2xx_chip_data at86rf233_data = { .t_off_to_aack = 80, .t_off_to_tx_on = 80, .t_off_to_sleep = 35, - .t_sleep_to_off = 210, + .t_sleep_to_off = 1000, .t_frame = 4096, .t_p_ack = 545, .rssi_base_val = -91, @@ -1355,7 +1355,7 @@ static struct at86rf2xx_chip_data at86rf231_data = { .t_off_to_aack = 110, .t_off_to_tx_on = 110, .t_off_to_sleep = 35, - .t_sleep_to_off = 380, + .t_sleep_to_off = 1000, .t_frame = 4096, .t_p_ack = 545, .rssi_base_val = -91, @@ -1370,7 +1370,7 @@ static struct at86rf2xx_chip_data at86rf212_data = { .t_off_to_aack = 200, .t_off_to_tx_on = 200, .t_off_to_sleep = 35, - .t_sleep_to_off = 380, + .t_sleep_to_off = 1000, .t_frame = 4096, .t_p_ack = 545, .rssi_base_val = -100, diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index b1cd865ade2e..52c9051f3b95 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -3,6 +3,8 @@ * * Written 2013 by Werner Almesberger <werner@almesberger.net> * + * Copyright (c) 2015 - 2016 Stefan Schmidt <stefan@datenfreihafen.org> + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation, version 2 @@ -472,6 +474,76 @@ atusb_set_txpower(struct ieee802154_hw *hw, s32 mbm) return -EINVAL; } +#define ATUSB_MAX_ED_LEVELS 0xF +static const s32 atusb_ed_levels[ATUSB_MAX_ED_LEVELS + 1] = { + -9100, -8900, -8700, -8500, -8300, -8100, -7900, -7700, -7500, -7300, + -7100, -6900, -6700, -6500, -6300, -6100, +}; + +static int +atusb_set_cca_mode(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca) +{ + struct atusb *atusb = hw->priv; + u8 val; + + /* mapping 802.15.4 to driver spec */ + switch (cca->mode) { + case NL802154_CCA_ENERGY: + val = 1; + break; + case NL802154_CCA_CARRIER: + val = 2; + break; + case NL802154_CCA_ENERGY_CARRIER: + switch (cca->opt) { + case NL802154_CCA_OPT_ENERGY_CARRIER_AND: + val = 3; + break; + case NL802154_CCA_OPT_ENERGY_CARRIER_OR: + val = 0; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + return atusb_write_subreg(atusb, SR_CCA_MODE, val); +} + +static int +atusb_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm) +{ + struct atusb *atusb = hw->priv; + u32 i; + + for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) { + if (hw->phy->supported.cca_ed_levels[i] == mbm) + return atusb_write_subreg(atusb, SR_CCA_ED_THRES, i); + } + + return -EINVAL; +} + +static int +atusb_set_csma_params(struct ieee802154_hw *hw, u8 min_be, u8 max_be, u8 retries) +{ + struct atusb *atusb = hw->priv; + int ret; + + ret = atusb_write_subreg(atusb, SR_MIN_BE, min_be); + if (ret) + return ret; + + ret = atusb_write_subreg(atusb, SR_MAX_BE, max_be); + if (ret) + return ret; + + return atusb_write_subreg(atusb, SR_MAX_CSMA_RETRIES, retries); +} + static int atusb_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on) { @@ -508,6 +580,9 @@ static struct ieee802154_ops atusb_ops = { .stop = atusb_stop, .set_hw_addr_filt = atusb_set_hw_addr_filt, .set_txpower = atusb_set_txpower, + .set_cca_mode = atusb_set_cca_mode, + .set_cca_ed_level = atusb_set_cca_ed_level, + .set_csma_params = atusb_set_csma_params, .set_promiscuous_mode = atusb_set_promiscuous_mode, }; @@ -636,9 +711,20 @@ static int atusb_probe(struct usb_interface *interface, hw->parent = &usb_dev->dev; hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT | - IEEE802154_HW_PROMISCUOUS; + IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS; + + hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL | + WPAN_PHY_FLAG_CCA_MODE; + + hw->phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) | + BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER); + hw->phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) | + BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR); + + hw->phy->supported.cca_ed_levels = atusb_ed_levels; + hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels); - hw->phy->flags = WPAN_PHY_FLAG_TXPOWER; + hw->phy->cca.mode = NL802154_CCA_ENERGY; hw->phy->current_page = 0; hw->phy->current_channel = 11; /* reset default */ @@ -647,6 +733,7 @@ static int atusb_probe(struct usb_interface *interface, hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers); hw->phy->transmit_power = hw->phy->supported.tx_powers[0]; ieee802154_random_extended_addr(&hw->phy->perm_extended_addr); + hw->phy->cca_ed_level = hw->phy->supported.cca_ed_levels[7]; atusb_command(atusb, ATUSB_RF_RESET, 0); atusb_get_and_show_chip(atusb); diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index 764a2bddfaee..f446db828561 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -61,6 +61,7 @@ #define REG_TXBCON0 0x1A #define REG_TXNCON 0x1B /* Transmit Normal FIFO Control */ #define BIT_TXNTRIG BIT(0) +#define BIT_TXNSECEN BIT(1) #define BIT_TXNACKREQ BIT(2) #define REG_TXG1CON 0x1C @@ -85,10 +86,13 @@ #define REG_INTSTAT 0x31 /* Interrupt Status */ #define BIT_TXNIF BIT(0) #define BIT_RXIF BIT(3) +#define BIT_SECIF BIT(4) +#define BIT_SECIGNORE BIT(7) #define REG_INTCON 0x32 /* Interrupt Control */ #define BIT_TXNIE BIT(0) #define BIT_RXIE BIT(3) +#define BIT_SECIE BIT(4) #define REG_GPIO 0x33 /* GPIO */ #define REG_TRISGPIO 0x34 /* GPIO direction */ @@ -548,6 +552,9 @@ static void write_tx_buf_complete(void *context) u8 val = BIT_TXNTRIG; int ret; + if (ieee802154_is_secen(fc)) + val |= BIT_TXNSECEN; + if (ieee802154_is_ackreq(fc)) val |= BIT_TXNACKREQ; @@ -616,7 +623,7 @@ static int mrf24j40_start(struct ieee802154_hw *hw) /* Clear TXNIE and RXIE. Enable interrupts */ return regmap_update_bits(devrec->regmap_short, REG_INTCON, - BIT_TXNIE | BIT_RXIE, 0); + BIT_TXNIE | BIT_RXIE | BIT_SECIE, 0); } static void mrf24j40_stop(struct ieee802154_hw *hw) @@ -1025,6 +1032,11 @@ static void mrf24j40_intstat_complete(void *context) enable_irq(devrec->spi->irq); + /* Ignore Rx security decryption */ + if (intstat & BIT_SECIF) + regmap_write_async(devrec->regmap_short, REG_SECCON0, + BIT_SECIGNORE); + /* Check for TX complete */ if (intstat & BIT_TXNIF) ieee802154_xmit_complete(devrec->hw, devrec->tx_skb, false); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 57941d3f4227..1c4d395fbd49 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -113,6 +113,7 @@ static int ipvlan_init(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); const struct net_device *phy_dev = ipvlan->phy_dev; + struct ipvl_port *port = ipvlan->port; dev->state = (dev->state & ~IPVLAN_STATE_MASK) | (phy_dev->state & IPVLAN_STATE_MASK); @@ -128,6 +129,8 @@ static int ipvlan_init(struct net_device *dev) if (!ipvlan->pcpu_stats) return -ENOMEM; + port->count += 1; + return 0; } @@ -481,27 +484,21 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, dev->priv_flags |= IFF_IPVLAN_SLAVE; - port->count += 1; err = register_netdevice(dev); if (err < 0) - goto ipvlan_destroy_port; + return err; err = netdev_upper_dev_link(phy_dev, dev); - if (err) - goto ipvlan_destroy_port; + if (err) { + unregister_netdevice(dev); + return err; + } list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); ipvlan_set_port_mode(port, mode); netif_stacked_transfer_operstate(phy_dev, dev); return 0; - -ipvlan_destroy_port: - port->count -= 1; - if (!port->count) - ipvlan_port_destroy(phy_dev); - - return err; } static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 84d3e5ca8817..3add2c4aac21 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -880,12 +880,12 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, macsec_skb_cb(skb)->valid = false; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) - return NULL; + return ERR_PTR(-ENOMEM); req = aead_request_alloc(rx_sa->key.tfm, GFP_ATOMIC); if (!req) { kfree_skb(skb); - return NULL; + return ERR_PTR(-ENOMEM); } hdr = (struct macsec_eth_header *)skb->data; @@ -905,7 +905,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) { aead_request_free(req); - return NULL; + return ERR_PTR(-ENOMEM); } } else { /* integrity only: all headers + data authenticated */ @@ -921,14 +921,14 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, dev_hold(dev); ret = crypto_aead_decrypt(req); if (ret == -EINPROGRESS) { - return NULL; + return ERR_PTR(ret); } else if (ret != 0) { /* decryption/authentication failed * 10.6 if validateFrames is disabled, deliver anyway */ if (ret != -EBADMSG) { kfree_skb(skb); - skb = NULL; + skb = ERR_PTR(ret); } } else { macsec_skb_cb(skb)->valid = true; @@ -1146,8 +1146,10 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) secy->validate_frames != MACSEC_VALIDATE_DISABLED) skb = macsec_decrypt(skb, dev, rx_sa, sci, secy); - if (!skb) { - macsec_rxsa_put(rx_sa); + if (IS_ERR(skb)) { + /* the decrypt callback needs the reference */ + if (PTR_ERR(skb) != -EINPROGRESS) + macsec_rxsa_put(rx_sa); rcu_read_unlock(); *pskb = NULL; return RX_HANDLER_CONSUMED; @@ -1161,7 +1163,8 @@ deliver: macsec_extra_len(macsec_skb_cb(skb)->has_sci)); macsec_reset_skb(skb, secy->netdev); - macsec_rxsa_put(rx_sa); + if (rx_sa) + macsec_rxsa_put(rx_sa); count_rx(dev, skb->len); rcu_read_unlock(); @@ -1405,9 +1408,10 @@ static sci_t nla_get_sci(const struct nlattr *nla) return (__force sci_t)nla_get_u64(nla); } -static int nla_put_sci(struct sk_buff *skb, int attrtype, sci_t value) +static int nla_put_sci(struct sk_buff *skb, int attrtype, sci_t value, + int padattr) { - return nla_put_u64(skb, attrtype, (__force u64)value); + return nla_put_u64_64bit(skb, attrtype, (__force u64)value, padattr); } static struct macsec_tx_sa *get_txsa_from_nl(struct net *net, @@ -1622,8 +1626,9 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) } rx_sa = kmalloc(sizeof(*rx_sa), GFP_KERNEL); - if (init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len, - secy->icv_len)) { + if (!rx_sa || init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), + secy->key_len, secy->icv_len)) { + kfree(rx_sa); rtnl_unlock(); return -ENOMEM; } @@ -1768,6 +1773,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) tx_sa = kmalloc(sizeof(*tx_sa), GFP_KERNEL); if (!tx_sa || init_tx_sa(tx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len, secy->icv_len)) { + kfree(tx_sa); rtnl_unlock(); return -ENOMEM; } @@ -2131,16 +2137,36 @@ static int copy_rx_sc_stats(struct sk_buff *skb, sum.InPktsUnusedSA += tmp.InPktsUnusedSA; } - if (nla_put_u64(skb, MACSEC_RXSC_STATS_ATTR_IN_OCTETS_VALIDATED, sum.InOctetsValidated) || - nla_put_u64(skb, MACSEC_RXSC_STATS_ATTR_IN_OCTETS_DECRYPTED, sum.InOctetsDecrypted) || - nla_put_u64(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNCHECKED, sum.InPktsUnchecked) || - nla_put_u64(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_DELAYED, sum.InPktsDelayed) || - nla_put_u64(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_OK, sum.InPktsOK) || - nla_put_u64(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_INVALID, sum.InPktsInvalid) || - nla_put_u64(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_LATE, sum.InPktsLate) || - nla_put_u64(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_VALID, sum.InPktsNotValid) || - nla_put_u64(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_USING_SA, sum.InPktsNotUsingSA) || - nla_put_u64(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNUSED_SA, sum.InPktsUnusedSA)) + if (nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_OCTETS_VALIDATED, + sum.InOctetsValidated, + MACSEC_RXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_OCTETS_DECRYPTED, + sum.InOctetsDecrypted, + MACSEC_RXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNCHECKED, + sum.InPktsUnchecked, + MACSEC_RXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_DELAYED, + sum.InPktsDelayed, + MACSEC_RXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_OK, + sum.InPktsOK, + MACSEC_RXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_INVALID, + sum.InPktsInvalid, + MACSEC_RXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_LATE, + sum.InPktsLate, + MACSEC_RXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_VALID, + sum.InPktsNotValid, + MACSEC_RXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_USING_SA, + sum.InPktsNotUsingSA, + MACSEC_RXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNUSED_SA, + sum.InPktsUnusedSA, + MACSEC_RXSC_STATS_ATTR_PAD)) return -EMSGSIZE; return 0; @@ -2169,10 +2195,18 @@ static int copy_tx_sc_stats(struct sk_buff *skb, sum.OutOctetsEncrypted += tmp.OutOctetsEncrypted; } - if (nla_put_u64(skb, MACSEC_TXSC_STATS_ATTR_OUT_PKTS_PROTECTED, sum.OutPktsProtected) || - nla_put_u64(skb, MACSEC_TXSC_STATS_ATTR_OUT_PKTS_ENCRYPTED, sum.OutPktsEncrypted) || - nla_put_u64(skb, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_PROTECTED, sum.OutOctetsProtected) || - nla_put_u64(skb, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_ENCRYPTED, sum.OutOctetsEncrypted)) + if (nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_PKTS_PROTECTED, + sum.OutPktsProtected, + MACSEC_TXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_PKTS_ENCRYPTED, + sum.OutPktsEncrypted, + MACSEC_TXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_PROTECTED, + sum.OutOctetsProtected, + MACSEC_TXSC_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_ENCRYPTED, + sum.OutOctetsEncrypted, + MACSEC_TXSC_STATS_ATTR_PAD)) return -EMSGSIZE; return 0; @@ -2205,14 +2239,30 @@ static int copy_secy_stats(struct sk_buff *skb, sum.InPktsOverrun += tmp.InPktsOverrun; } - if (nla_put_u64(skb, MACSEC_SECY_STATS_ATTR_OUT_PKTS_UNTAGGED, sum.OutPktsUntagged) || - nla_put_u64(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_UNTAGGED, sum.InPktsUntagged) || - nla_put_u64(skb, MACSEC_SECY_STATS_ATTR_OUT_PKTS_TOO_LONG, sum.OutPktsTooLong) || - nla_put_u64(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_TAG, sum.InPktsNoTag) || - nla_put_u64(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_BAD_TAG, sum.InPktsBadTag) || - nla_put_u64(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_UNKNOWN_SCI, sum.InPktsUnknownSCI) || - nla_put_u64(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_SCI, sum.InPktsNoSCI) || - nla_put_u64(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_OVERRUN, sum.InPktsOverrun)) + if (nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_OUT_PKTS_UNTAGGED, + sum.OutPktsUntagged, + MACSEC_SECY_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_UNTAGGED, + sum.InPktsUntagged, + MACSEC_SECY_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_OUT_PKTS_TOO_LONG, + sum.OutPktsTooLong, + MACSEC_SECY_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_TAG, + sum.InPktsNoTag, + MACSEC_SECY_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_BAD_TAG, + sum.InPktsBadTag, + MACSEC_SECY_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_UNKNOWN_SCI, + sum.InPktsUnknownSCI, + MACSEC_SECY_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_SCI, + sum.InPktsNoSCI, + MACSEC_SECY_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_OVERRUN, + sum.InPktsOverrun, + MACSEC_SECY_STATS_ATTR_PAD)) return -EMSGSIZE; return 0; @@ -2226,8 +2276,11 @@ static int nla_put_secy(struct macsec_secy *secy, struct sk_buff *skb) if (!secy_nest) return 1; - if (nla_put_sci(skb, MACSEC_SECY_ATTR_SCI, secy->sci) || - nla_put_u64(skb, MACSEC_SECY_ATTR_CIPHER_SUITE, DEFAULT_CIPHER_ID) || + if (nla_put_sci(skb, MACSEC_SECY_ATTR_SCI, secy->sci, + MACSEC_SECY_ATTR_PAD) || + nla_put_u64_64bit(skb, MACSEC_SECY_ATTR_CIPHER_SUITE, + MACSEC_DEFAULT_CIPHER_ID, + MACSEC_SECY_ATTR_PAD) || nla_put_u8(skb, MACSEC_SECY_ATTR_ICV_LEN, secy->icv_len) || nla_put_u8(skb, MACSEC_SECY_ATTR_OPER, secy->operational) || nla_put_u8(skb, MACSEC_SECY_ATTR_PROTECT, secy->protect_frames) || @@ -2268,7 +2321,7 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev, if (!hdr) return -EMSGSIZE; - rtnl_lock(); + genl_dump_check_consistent(cb, hdr, &macsec_fam); if (nla_put_u32(skb, MACSEC_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -2312,7 +2365,9 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev, if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || nla_put_u32(skb, MACSEC_SA_ATTR_PN, tx_sa->next_pn) || - nla_put_u64(skb, MACSEC_SA_ATTR_KEYID, tx_sa->key.id) || + nla_put_u64_64bit(skb, MACSEC_SA_ATTR_KEYID, + tx_sa->key.id, + MACSEC_SA_ATTR_PAD) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, tx_sa->active)) { nla_nest_cancel(skb, txsa_nest); nla_nest_cancel(skb, txsa_list); @@ -2353,7 +2408,8 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev, } if (nla_put_u8(skb, MACSEC_RXSC_ATTR_ACTIVE, rx_sc->active) || - nla_put_sci(skb, MACSEC_RXSC_ATTR_SCI, rx_sc->sci)) { + nla_put_sci(skb, MACSEC_RXSC_ATTR_SCI, rx_sc->sci, + MACSEC_RXSC_ATTR_PAD)) { nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; @@ -2413,7 +2469,9 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev, if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || nla_put_u32(skb, MACSEC_SA_ATTR_PN, rx_sa->next_pn) || - nla_put_u64(skb, MACSEC_SA_ATTR_KEYID, rx_sa->key.id) || + nla_put_u64_64bit(skb, MACSEC_SA_ATTR_KEYID, + rx_sa->key.id, + MACSEC_SA_ATTR_PAD) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, rx_sa->active)) { nla_nest_cancel(skb, rxsa_nest); nla_nest_cancel(skb, rxsc_nest); @@ -2429,18 +2487,17 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev, nla_nest_end(skb, rxsc_list); - rtnl_unlock(); - genlmsg_end(skb, hdr); return 0; nla_put_failure: - rtnl_unlock(); genlmsg_cancel(skb, hdr); return -EMSGSIZE; } +static int macsec_generation = 1; /* protected by RTNL */ + static int macsec_dump_txsc(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -2450,6 +2507,10 @@ static int macsec_dump_txsc(struct sk_buff *skb, struct netlink_callback *cb) dev_idx = cb->args[0]; d = 0; + rtnl_lock(); + + cb->seq = macsec_generation; + for_each_netdev(net, dev) { struct macsec_secy *secy; @@ -2467,6 +2528,7 @@ next: } done: + rtnl_unlock(); cb->args[0] = d; return skb->len; } @@ -2826,7 +2888,7 @@ static void macsec_free_netdev(struct net_device *dev) static void macsec_setup(struct net_device *dev) { ether_setup(dev); - dev->tx_queue_len = 0; + dev->priv_flags |= IFF_NO_QUEUE; dev->netdev_ops = &macsec_netdev_ops; dev->destructor = macsec_free_netdev; @@ -2920,10 +2982,14 @@ static void macsec_dellink(struct net_device *dev, struct list_head *head) struct net_device *real_dev = macsec->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); + macsec_generation++; + unregister_netdevice_queue(dev, head); list_del_rcu(&macsec->secys); - if (list_empty(&rxd->secys)) + if (list_empty(&rxd->secys)) { netdev_rx_handler_unregister(real_dev); + kfree(rxd); + } macsec_del_dev(macsec); } @@ -2945,8 +3011,10 @@ static int register_macsec_dev(struct net_device *real_dev, err = netdev_rx_handler_register(real_dev, macsec_handle_frame, rxd); - if (err < 0) + if (err < 0) { + kfree(rxd); return err; + } } list_add_tail_rcu(&macsec->secys, &rxd->secys); @@ -3066,6 +3134,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (err < 0) goto del_dev; + macsec_generation++; + dev_hold(real_dev); return 0; @@ -3079,7 +3149,7 @@ unregister: static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[]) { - u64 csid = DEFAULT_CIPHER_ID; + u64 csid = MACSEC_DEFAULT_CIPHER_ID; u8 icv_len = DEFAULT_ICV_LEN; int flag; bool es, scb, sci; @@ -3094,8 +3164,8 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[]) icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); switch (csid) { - case DEFAULT_CIPHER_ID: - case DEFAULT_CIPHER_ALT: + case MACSEC_DEFAULT_CIPHER_ID: + case MACSEC_DEFAULT_CIPHER_ALT: if (icv_len < MACSEC_MIN_ICV_LEN || icv_len > MACSEC_MAX_ICV_LEN) return -EINVAL; @@ -3129,8 +3199,8 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[]) nla_get_u8(data[IFLA_MACSEC_VALIDATION]) > MACSEC_VALIDATE_MAX) return -EINVAL; - if ((data[IFLA_MACSEC_PROTECT] && - nla_get_u8(data[IFLA_MACSEC_PROTECT])) && + if ((data[IFLA_MACSEC_REPLAY_PROTECT] && + nla_get_u8(data[IFLA_MACSEC_REPLAY_PROTECT])) && !data[IFLA_MACSEC_WINDOW]) return -EINVAL; @@ -3145,9 +3215,9 @@ static struct net *macsec_get_link_net(const struct net_device *dev) static size_t macsec_get_size(const struct net_device *dev) { return 0 + - nla_total_size(8) + /* SCI */ + nla_total_size_64bit(8) + /* SCI */ nla_total_size(1) + /* ICV_LEN */ - nla_total_size(8) + /* CIPHER_SUITE */ + nla_total_size_64bit(8) + /* CIPHER_SUITE */ nla_total_size(4) + /* WINDOW */ nla_total_size(1) + /* ENCODING_SA */ nla_total_size(1) + /* ENCRYPT */ @@ -3166,9 +3236,11 @@ static int macsec_fill_info(struct sk_buff *skb, struct macsec_secy *secy = &macsec_priv(dev)->secy; struct macsec_tx_sc *tx_sc = &secy->tx_sc; - if (nla_put_sci(skb, IFLA_MACSEC_SCI, secy->sci) || + if (nla_put_sci(skb, IFLA_MACSEC_SCI, secy->sci, + IFLA_MACSEC_PAD) || nla_put_u8(skb, IFLA_MACSEC_ICV_LEN, secy->icv_len) || - nla_put_u64(skb, IFLA_MACSEC_CIPHER_SUITE, DEFAULT_CIPHER_ID) || + nla_put_u64_64bit(skb, IFLA_MACSEC_CIPHER_SUITE, + MACSEC_DEFAULT_CIPHER_ID, IFLA_MACSEC_PAD) || nla_put_u8(skb, IFLA_MACSEC_ENCODING_SA, tx_sc->encoding_sa) || nla_put_u8(skb, IFLA_MACSEC_ENCRYPT, tx_sc->encrypt) || nla_put_u8(skb, IFLA_MACSEC_PROTECT, secy->protect_frames) || diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 2bcf1f321bea..cb01023eab41 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -795,6 +795,7 @@ static int macvlan_init(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); const struct net_device *lowerdev = vlan->lowerdev; + struct macvlan_port *port = vlan->port; dev->state = (dev->state & ~MACVLAN_STATE_MASK) | (lowerdev->state & MACVLAN_STATE_MASK); @@ -812,6 +813,8 @@ static int macvlan_init(struct net_device *dev) if (!vlan->pcpu_stats) return -ENOMEM; + port->count += 1; + return 0; } @@ -1312,10 +1315,9 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, return err; } - port->count += 1; err = register_netdevice(dev); if (err < 0) - goto destroy_port; + return err; dev->priv_flags |= IFF_MACVLAN; err = netdev_upper_dev_link(lowerdev, dev); @@ -1330,10 +1332,6 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, unregister_netdev: unregister_netdevice(dev); -destroy_port: - port->count -= 1; - if (!port->count) - macvlan_port_destroy(lowerdev); return err; } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 95394edd1ed5..74cb15a2e032 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -1303,6 +1303,9 @@ static int macvtap_device_event(struct notifier_block *unused, } break; case NETDEV_UNREGISTER: + /* vlan->minor == 0 if NETDEV_REGISTER above failed */ + if (vlan->minor == 0) + break; devt = MKDEV(MAJOR(macvtap_major), vlan->minor); device_destroy(macvtap_class, devt); macvtap_free_minor(vlan); diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index fc07a8866020..9050f21e6f33 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -328,7 +328,7 @@ struct phy_device *fixed_phy_register(unsigned int irq, return ERR_PTR(ret); phy = get_phy_device(fmb->mii_bus, phy_addr, false); - if (!phy || IS_ERR(phy)) { + if (IS_ERR(phy)) { fixed_phy_del(phy_addr); return ERR_PTR(-EINVAL); } diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 751202a285a6..388f9922647b 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -333,7 +333,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) struct phy_device *phydev; phydev = mdiobus_scan(bus, i); - if (IS_ERR(phydev)) { + if (IS_ERR(phydev) && (PTR_ERR(phydev) != -ENODEV)) { err = PTR_ERR(phydev); goto error; } @@ -419,7 +419,7 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr) int err; phydev = get_phy_device(bus, addr, false); - if (IS_ERR(phydev) || phydev == NULL) + if (IS_ERR(phydev)) return phydev; /* diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 5590b9c182c9..6f221c8c2a7f 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -362,6 +362,60 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) } EXPORT_SYMBOL(phy_ethtool_sset); +int phy_ethtool_ksettings_set(struct phy_device *phydev, + const struct ethtool_link_ksettings *cmd) +{ + u8 autoneg = cmd->base.autoneg; + u8 duplex = cmd->base.duplex; + u32 speed = cmd->base.speed; + u32 advertising; + + if (cmd->base.phy_address != phydev->mdio.addr) + return -EINVAL; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + + /* We make sure that we don't pass unsupported values in to the PHY */ + advertising &= phydev->supported; + + /* Verify the settings we care about. */ + if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE) + return -EINVAL; + + if (autoneg == AUTONEG_ENABLE && advertising == 0) + return -EINVAL; + + if (autoneg == AUTONEG_DISABLE && + ((speed != SPEED_1000 && + speed != SPEED_100 && + speed != SPEED_10) || + (duplex != DUPLEX_HALF && + duplex != DUPLEX_FULL))) + return -EINVAL; + + phydev->autoneg = autoneg; + + phydev->speed = speed; + + phydev->advertising = advertising; + + if (autoneg == AUTONEG_ENABLE) + phydev->advertising |= ADVERTISED_Autoneg; + else + phydev->advertising &= ~ADVERTISED_Autoneg; + + phydev->duplex = duplex; + + phydev->mdix = cmd->base.eth_tp_mdix_ctrl; + + /* Restart the PHY */ + phy_start_aneg(phydev); + + return 0; +} +EXPORT_SYMBOL(phy_ethtool_ksettings_set); + int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) { cmd->supported = phydev->supported; @@ -385,6 +439,33 @@ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) } EXPORT_SYMBOL(phy_ethtool_gset); +int phy_ethtool_ksettings_get(struct phy_device *phydev, + struct ethtool_link_ksettings *cmd) +{ + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + phydev->supported); + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + phydev->advertising); + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, + phydev->lp_advertising); + + cmd->base.speed = phydev->speed; + cmd->base.duplex = phydev->duplex; + if (phydev->interface == PHY_INTERFACE_MODE_MOCA) + cmd->base.port = PORT_BNC; + else + cmd->base.port = PORT_MII; + + cmd->base.phy_address = phydev->mdio.addr; + cmd->base.autoneg = phydev->autoneg; + cmd->base.eth_tp_mdix_ctrl = phydev->mdix; + + return 0; +} +EXPORT_SYMBOL(phy_ethtool_ksettings_get); + /** * phy_mii_ioctl - generic PHY MII ioctl interface * @phydev: the phy_device struct diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index e551f3a89cfd..e977ba931878 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -529,7 +529,7 @@ struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45) /* If the phy_id is mostly Fs, there is no device there */ if ((phy_id & 0x1fffffff) == 0x1fffffff) - return NULL; + return ERR_PTR(-ENODEV); return phy_device_create(bus, addr, phy_id, is_c45, &c45_ids); } @@ -1123,8 +1123,9 @@ static int genphy_config_advert(struct phy_device *phydev) */ int genphy_setup_forced(struct phy_device *phydev) { - int ctl = 0; + int ctl = phy_read(phydev, MII_BMCR); + ctl &= BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN; phydev->pause = 0; phydev->asym_pause = 0; diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index b5d50d458728..93ffedfa2994 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -441,7 +441,7 @@ static int ks8995_probe(struct spi_device *spi) return -ENOMEM; mutex_init(&ks->lock); - ks->spi = spi_dev_get(spi); + ks->spi = spi; ks->chip = &ks8995_chip[variant]; if (ks->spi->dev.of_node) { diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index f572b31a2b20..8dedafa1a95d 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -46,6 +46,7 @@ #include <linux/device.h> #include <linux/mutex.h> #include <linux/slab.h> +#include <linux/file.h> #include <asm/unaligned.h> #include <net/slhc_vj.h> #include <linux/atomic.h> @@ -183,6 +184,12 @@ struct channel { #endif /* CONFIG_PPP_MULTILINK */ }; +struct ppp_config { + struct file *file; + s32 unit; + bool ifname_is_set; +}; + /* * SMP locking issues: * Both the ppp.rlock and ppp.wlock locks protect the ppp.channels @@ -269,8 +276,7 @@ static void ppp_ccp_peek(struct ppp *ppp, struct sk_buff *skb, int inbound); static void ppp_ccp_closed(struct ppp *ppp); static struct compressor *find_compressor(int type); static void ppp_get_stats(struct ppp *ppp, struct ppp_stats *st); -static struct ppp *ppp_create_interface(struct net *net, int unit, - struct file *file, int *retp); +static int ppp_create_interface(struct net *net, struct file *file, int *unit); static void init_ppp_file(struct ppp_file *pf, int kind); static void ppp_destroy_interface(struct ppp *ppp); static struct ppp *ppp_find_unit(struct ppp_net *pn, int unit); @@ -282,6 +288,7 @@ static int unit_get(struct idr *p, void *ptr); static int unit_set(struct idr *p, void *ptr, int n); static void unit_put(struct idr *p, int n); static void *unit_find(struct idr *p, int n); +static void ppp_setup(struct net_device *dev); static const struct net_device_ops ppp_netdev_ops; @@ -853,12 +860,12 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, /* Create a new ppp unit */ if (get_user(unit, p)) break; - ppp = ppp_create_interface(net, unit, file, &err); - if (!ppp) + err = ppp_create_interface(net, file, &unit); + if (err < 0) break; - file->private_data = &ppp->file; + err = -EFAULT; - if (put_user(ppp->file.index, p)) + if (put_user(unit, p)) break; err = 0; break; @@ -960,6 +967,188 @@ static struct pernet_operations ppp_net_ops = { .size = sizeof(struct ppp_net), }; +static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set) +{ + struct ppp_net *pn = ppp_pernet(ppp->ppp_net); + int ret; + + mutex_lock(&pn->all_ppp_mutex); + + if (unit < 0) { + ret = unit_get(&pn->units_idr, ppp); + if (ret < 0) + goto err; + } else { + /* Caller asked for a specific unit number. Fail with -EEXIST + * if unavailable. For backward compatibility, return -EEXIST + * too if idr allocation fails; this makes pppd retry without + * requesting a specific unit number. + */ + if (unit_find(&pn->units_idr, unit)) { + ret = -EEXIST; + goto err; + } + ret = unit_set(&pn->units_idr, ppp, unit); + if (ret < 0) { + /* Rewrite error for backward compatibility */ + ret = -EEXIST; + goto err; + } + } + ppp->file.index = ret; + + if (!ifname_is_set) + snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ppp->file.index); + + ret = register_netdevice(ppp->dev); + if (ret < 0) + goto err_unit; + + atomic_inc(&ppp_unit_count); + + mutex_unlock(&pn->all_ppp_mutex); + + return 0; + +err_unit: + unit_put(&pn->units_idr, ppp->file.index); +err: + mutex_unlock(&pn->all_ppp_mutex); + + return ret; +} + +static int ppp_dev_configure(struct net *src_net, struct net_device *dev, + const struct ppp_config *conf) +{ + struct ppp *ppp = netdev_priv(dev); + int indx; + int err; + + ppp->dev = dev; + ppp->ppp_net = src_net; + ppp->mru = PPP_MRU; + ppp->owner = conf->file; + + init_ppp_file(&ppp->file, INTERFACE); + ppp->file.hdrlen = PPP_HDRLEN - 2; /* don't count proto bytes */ + + for (indx = 0; indx < NUM_NP; ++indx) + ppp->npmode[indx] = NPMODE_PASS; + INIT_LIST_HEAD(&ppp->channels); + spin_lock_init(&ppp->rlock); + spin_lock_init(&ppp->wlock); +#ifdef CONFIG_PPP_MULTILINK + ppp->minseq = -1; + skb_queue_head_init(&ppp->mrq); +#endif /* CONFIG_PPP_MULTILINK */ +#ifdef CONFIG_PPP_FILTER + ppp->pass_filter = NULL; + ppp->active_filter = NULL; +#endif /* CONFIG_PPP_FILTER */ + + err = ppp_unit_register(ppp, conf->unit, conf->ifname_is_set); + if (err < 0) + return err; + + conf->file->private_data = &ppp->file; + + return 0; +} + +static const struct nla_policy ppp_nl_policy[IFLA_PPP_MAX + 1] = { + [IFLA_PPP_DEV_FD] = { .type = NLA_S32 }, +}; + +static int ppp_nl_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (!data) + return -EINVAL; + + if (!data[IFLA_PPP_DEV_FD]) + return -EINVAL; + if (nla_get_s32(data[IFLA_PPP_DEV_FD]) < 0) + return -EBADF; + + return 0; +} + +static int ppp_nl_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct ppp_config conf = { + .unit = -1, + .ifname_is_set = true, + }; + struct file *file; + int err; + + file = fget(nla_get_s32(data[IFLA_PPP_DEV_FD])); + if (!file) + return -EBADF; + + /* rtnl_lock is already held here, but ppp_create_interface() locks + * ppp_mutex before holding rtnl_lock. Using mutex_trylock() avoids + * possible deadlock due to lock order inversion, at the cost of + * pushing the problem back to userspace. + */ + if (!mutex_trylock(&ppp_mutex)) { + err = -EBUSY; + goto out; + } + + if (file->f_op != &ppp_device_fops || file->private_data) { + err = -EBADF; + goto out_unlock; + } + + conf.file = file; + err = ppp_dev_configure(src_net, dev, &conf); + +out_unlock: + mutex_unlock(&ppp_mutex); +out: + fput(file); + + return err; +} + +static void ppp_nl_dellink(struct net_device *dev, struct list_head *head) +{ + unregister_netdevice_queue(dev, head); +} + +static size_t ppp_nl_get_size(const struct net_device *dev) +{ + return 0; +} + +static int ppp_nl_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + return 0; +} + +static struct net *ppp_nl_get_link_net(const struct net_device *dev) +{ + struct ppp *ppp = netdev_priv(dev); + + return ppp->ppp_net; +} + +static struct rtnl_link_ops ppp_link_ops __read_mostly = { + .kind = "ppp", + .maxtype = IFLA_PPP_MAX, + .policy = ppp_nl_policy, + .priv_size = sizeof(struct ppp), + .setup = ppp_setup, + .validate = ppp_nl_validate, + .newlink = ppp_nl_newlink, + .dellink = ppp_nl_dellink, + .get_size = ppp_nl_get_size, + .fill_info = ppp_nl_fill_info, + .get_link_net = ppp_nl_get_link_net, +}; + #define PPP_MAJOR 108 /* Called at boot time if ppp is compiled into the kernel, @@ -988,11 +1177,19 @@ static int __init ppp_init(void) goto out_chrdev; } + err = rtnl_link_register(&ppp_link_ops); + if (err) { + pr_err("failed to register rtnetlink PPP handler\n"); + goto out_class; + } + /* not a big deal if we fail here :-) */ device_create(ppp_class, NULL, MKDEV(PPP_MAJOR, 0), NULL, "ppp"); return 0; +out_class: + class_destroy(ppp_class); out_chrdev: unregister_chrdev(PPP_MAJOR, "ppp"); out_net: @@ -2732,102 +2929,42 @@ ppp_get_stats(struct ppp *ppp, struct ppp_stats *st) * or if there is already a unit with the requested number. * unit == -1 means allocate a new number. */ -static struct ppp *ppp_create_interface(struct net *net, int unit, - struct file *file, int *retp) +static int ppp_create_interface(struct net *net, struct file *file, int *unit) { + struct ppp_config conf = { + .file = file, + .unit = *unit, + .ifname_is_set = false, + }; + struct net_device *dev; struct ppp *ppp; - struct ppp_net *pn; - struct net_device *dev = NULL; - int ret = -ENOMEM; - int i; + int err; dev = alloc_netdev(sizeof(struct ppp), "", NET_NAME_ENUM, ppp_setup); - if (!dev) - goto out1; - - pn = ppp_pernet(net); - - ppp = netdev_priv(dev); - ppp->dev = dev; - ppp->mru = PPP_MRU; - init_ppp_file(&ppp->file, INTERFACE); - ppp->file.hdrlen = PPP_HDRLEN - 2; /* don't count proto bytes */ - ppp->owner = file; - for (i = 0; i < NUM_NP; ++i) - ppp->npmode[i] = NPMODE_PASS; - INIT_LIST_HEAD(&ppp->channels); - spin_lock_init(&ppp->rlock); - spin_lock_init(&ppp->wlock); -#ifdef CONFIG_PPP_MULTILINK - ppp->minseq = -1; - skb_queue_head_init(&ppp->mrq); -#endif /* CONFIG_PPP_MULTILINK */ -#ifdef CONFIG_PPP_FILTER - ppp->pass_filter = NULL; - ppp->active_filter = NULL; -#endif /* CONFIG_PPP_FILTER */ - - /* - * drum roll: don't forget to set - * the net device is belong to - */ + if (!dev) { + err = -ENOMEM; + goto err; + } dev_net_set(dev, net); + dev->rtnl_link_ops = &ppp_link_ops; rtnl_lock(); - mutex_lock(&pn->all_ppp_mutex); - if (unit < 0) { - unit = unit_get(&pn->units_idr, ppp); - if (unit < 0) { - ret = unit; - goto out2; - } - } else { - ret = -EEXIST; - if (unit_find(&pn->units_idr, unit)) - goto out2; /* unit already exists */ - /* - * if caller need a specified unit number - * lets try to satisfy him, otherwise -- - * he should better ask us for new unit number - * - * NOTE: yes I know that returning EEXIST it's not - * fair but at least pppd will ask us to allocate - * new unit in this case so user is happy :) - */ - unit = unit_set(&pn->units_idr, ppp, unit); - if (unit < 0) - goto out2; - } - - /* Initialize the new ppp unit */ - ppp->file.index = unit; - sprintf(dev->name, "ppp%d", unit); - - ret = register_netdevice(dev); - if (ret != 0) { - unit_put(&pn->units_idr, unit); - netdev_err(ppp->dev, "PPP: couldn't register device %s (%d)\n", - dev->name, ret); - goto out2; - } - - ppp->ppp_net = net; + err = ppp_dev_configure(net, dev, &conf); + if (err < 0) + goto err_dev; + ppp = netdev_priv(dev); + *unit = ppp->file.index; - atomic_inc(&ppp_unit_count); - mutex_unlock(&pn->all_ppp_mutex); rtnl_unlock(); - *retp = 0; - return ppp; + return 0; -out2: - mutex_unlock(&pn->all_ppp_mutex); +err_dev: rtnl_unlock(); free_netdev(dev); -out1: - *retp = ret; - return NULL; +err: + return err; } /* @@ -3016,6 +3153,7 @@ static void __exit ppp_cleanup(void) /* should never happen */ if (atomic_read(&ppp_unit_count) || atomic_read(&channel_count)) pr_err("PPP: removing module but units remain!\n"); + rtnl_link_unregister(&ppp_link_ops); unregister_chrdev(PPP_MAJOR, "ppp"); device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0)); class_destroy(ppp_class); @@ -3074,4 +3212,5 @@ EXPORT_SYMBOL(ppp_register_compressor); EXPORT_SYMBOL(ppp_unregister_compressor); MODULE_LICENSE("GPL"); MODULE_ALIAS_CHARDEV(PPP_MAJOR, 0); +MODULE_ALIAS_RTNL_LINK("ppp"); MODULE_ALIAS("devname:ppp"); diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 9cfe6aeac84e..a31f4610b493 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -179,11 +179,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) unsigned long flags; int add_num = 1; - local_irq_save(flags); - if (!spin_trylock(&rnet->tx_lock)) { - local_irq_restore(flags); - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&rnet->tx_lock, flags); if (is_multicast_ether_addr(eth->h_dest)) add_num = nets[rnet->mport->id].nact; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a74661690a11..425e983bab93 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -131,6 +131,17 @@ struct tap_filter { #define TUN_FLOW_EXPIRE (3 * HZ) +struct tun_pcpu_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; + u32 rx_dropped; + u32 tx_dropped; + u32 rx_frame_errors; +}; + /* A tun_file connects an open character device to a tuntap netdevice. It * also contains all socket related structures (except sock_fprog and tap_filter) * to serve as one transmit queue for tuntap device. The sock_fprog and @@ -205,6 +216,7 @@ struct tun_struct { struct list_head disabled; void *security; u32 flow_count; + struct tun_pcpu_stats __percpu *pcpu_stats; }; #ifdef CONFIG_TUN_VNET_CROSS_LE @@ -821,7 +833,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) if (txq >= numqueues) goto drop; - if (numqueues == 1) { +#ifdef CONFIG_RPS + if (numqueues == 1 && static_key_false(&rps_needed)) { /* Select queue was not called for the skbuff, so we extract the * RPS hash and save it into the flow_table here. */ @@ -836,6 +849,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) tun_flow_save_rps_rxhash(e, rxhash); } } +#endif tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); @@ -886,7 +900,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; drop: - dev->stats.tx_dropped++; + this_cpu_inc(tun->pcpu_stats->tx_dropped); skb_tx_error(skb); kfree_skb(skb); rcu_read_unlock(); @@ -949,6 +963,43 @@ static void tun_set_headroom(struct net_device *dev, int new_hr) tun->align = new_hr; } +static struct rtnl_link_stats64 * +tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + u32 rx_dropped = 0, tx_dropped = 0, rx_frame_errors = 0; + struct tun_struct *tun = netdev_priv(dev); + struct tun_pcpu_stats *p; + int i; + + for_each_possible_cpu(i) { + u64 rxpackets, rxbytes, txpackets, txbytes; + unsigned int start; + + p = per_cpu_ptr(tun->pcpu_stats, i); + do { + start = u64_stats_fetch_begin(&p->syncp); + rxpackets = p->rx_packets; + rxbytes = p->rx_bytes; + txpackets = p->tx_packets; + txbytes = p->tx_bytes; + } while (u64_stats_fetch_retry(&p->syncp, start)); + + stats->rx_packets += rxpackets; + stats->rx_bytes += rxbytes; + stats->tx_packets += txpackets; + stats->tx_bytes += txbytes; + + /* u32 counters */ + rx_dropped += p->rx_dropped; + rx_frame_errors += p->rx_frame_errors; + tx_dropped += p->tx_dropped; + } + stats->rx_dropped = rx_dropped; + stats->rx_frame_errors = rx_frame_errors; + stats->tx_dropped = tx_dropped; + return stats; +} + static const struct net_device_ops tun_netdev_ops = { .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, @@ -961,6 +1012,7 @@ static const struct net_device_ops tun_netdev_ops = { .ndo_poll_controller = tun_poll_controller, #endif .ndo_set_rx_headroom = tun_set_headroom, + .ndo_get_stats64 = tun_net_get_stats64, }; static const struct net_device_ops tap_netdev_ops = { @@ -979,6 +1031,7 @@ static const struct net_device_ops tap_netdev_ops = { #endif .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = tun_set_headroom, + .ndo_get_stats64 = tun_net_get_stats64, }; static void tun_flow_init(struct tun_struct *tun) @@ -1103,6 +1156,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, size_t total_len = iov_iter_count(from); size_t len = total_len, align = tun->align, linear; struct virtio_net_hdr gso = { 0 }; + struct tun_pcpu_stats *stats; int good_linear; int copylen; bool zerocopy = false; @@ -1177,7 +1231,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) - tun->dev->stats.rx_dropped++; + this_cpu_inc(tun->pcpu_stats->rx_dropped); return PTR_ERR(skb); } @@ -1192,7 +1246,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (err) { - tun->dev->stats.rx_dropped++; + this_cpu_inc(tun->pcpu_stats->rx_dropped); kfree_skb(skb); return -EFAULT; } @@ -1200,7 +1254,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { if (!skb_partial_csum_set(skb, tun16_to_cpu(tun, gso.csum_start), tun16_to_cpu(tun, gso.csum_offset))) { - tun->dev->stats.rx_frame_errors++; + this_cpu_inc(tun->pcpu_stats->rx_frame_errors); kfree_skb(skb); return -EINVAL; } @@ -1217,7 +1271,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, pi.proto = htons(ETH_P_IPV6); break; default: - tun->dev->stats.rx_dropped++; + this_cpu_inc(tun->pcpu_stats->rx_dropped); kfree_skb(skb); return -EINVAL; } @@ -1245,7 +1299,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->gso_type = SKB_GSO_UDP; break; default: - tun->dev->stats.rx_frame_errors++; + this_cpu_inc(tun->pcpu_stats->rx_frame_errors); kfree_skb(skb); return -EINVAL; } @@ -1255,7 +1309,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->gso_size = tun16_to_cpu(tun, gso.gso_size); if (skb_shinfo(skb)->gso_size == 0) { - tun->dev->stats.rx_frame_errors++; + this_cpu_inc(tun->pcpu_stats->rx_frame_errors); kfree_skb(skb); return -EINVAL; } @@ -1278,8 +1332,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, rxhash = skb_get_hash(skb); netif_rx_ni(skb); - tun->dev->stats.rx_packets++; - tun->dev->stats.rx_bytes += len; + stats = get_cpu_ptr(tun->pcpu_stats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += len; + u64_stats_update_end(&stats->syncp); + put_cpu_ptr(stats); tun_flow_update(tun, rxhash, tfile); return total_len; @@ -1308,6 +1366,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, struct iov_iter *iter) { struct tun_pi pi = { 0, skb->protocol }; + struct tun_pcpu_stats *stats; ssize_t total; int vlan_offset = 0; int vlan_hlen = 0; @@ -1408,8 +1467,13 @@ static ssize_t tun_put_user(struct tun_struct *tun, skb_copy_datagram_iter(skb, vlan_offset, iter, skb->len - vlan_offset); done: - tun->dev->stats.tx_packets++; - tun->dev->stats.tx_bytes += skb->len + vlan_hlen; + /* caller is in process context, */ + stats = get_cpu_ptr(tun->pcpu_stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += skb->len + vlan_hlen; + u64_stats_update_end(&stats->syncp); + put_cpu_ptr(tun->pcpu_stats); return total; } @@ -1467,6 +1531,7 @@ static void tun_free_netdev(struct net_device *dev) struct tun_struct *tun = netdev_priv(dev); BUG_ON(!(list_empty(&tun->disabled))); + free_percpu(tun->pcpu_stats); tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); free_netdev(dev); @@ -1715,11 +1780,17 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->filter_attached = false; tun->sndbuf = tfile->socket.sk->sk_sndbuf; + tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); + if (!tun->pcpu_stats) { + err = -ENOMEM; + goto err_free_dev; + } + spin_lock_init(&tun->lock); err = security_tun_dev_alloc_security(&tun->security); if (err < 0) - goto err_free_dev; + goto err_free_stat; tun_net_init(dev); tun_flow_init(tun); @@ -1727,7 +1798,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - dev->features = dev->hw_features; + dev->features = dev->hw_features | NETIF_F_LLTX; dev->vlan_features = dev->features & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); @@ -1763,6 +1834,8 @@ err_detach: err_free_flow: tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); +err_free_stat: + free_percpu(tun->pcpu_stats); err_free_dev: free_netdev(dev); return err; diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index bdd83d95ec0a..96a5028621c8 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -617,8 +617,13 @@ static const struct usb_device_id mbim_devs[] = { { USB_VENDOR_AND_INTERFACE_INFO(0x0bdb, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info, }, - /* Huawei E3372 fails unless NDP comes after the IP packets */ - { USB_DEVICE_AND_INTERFACE_INFO(0x12d1, 0x157d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + + /* Some Huawei devices, ME906s-158 (12d1:15c1) and E3372 + * (12d1:157d), are known to fail unless the NDP is placed + * after the IP packets. Applying the quirk to all Huawei + * devices is broader than necessary, but harmless. + */ + { USB_VENDOR_AND_INTERFACE_INFO(0x12d1, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end, }, /* default entry */ diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 4f30a6ae50d0..f37a6e61d4ad 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -312,10 +312,9 @@ static const struct net_device_ops veth_netdev_ops = { .ndo_set_rx_headroom = veth_set_rx_headroom, }; -#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ - NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \ - NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL | \ - NETIF_F_GSO_IPIP | NETIF_F_GSO_SIT | NETIF_F_UFO | \ +#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ + NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \ + NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index b2348f67b00a..db8022ae415b 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1152,12 +1152,16 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, union Vmxnet3_GenericDesc *gdesc) { if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) { - /* typical case: TCP/UDP over IP and both csums are correct */ - if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) == - VMXNET3_RCD_CSUM_OK) { + if (gdesc->rcd.v4 && + (le32_to_cpu(gdesc->dword[3]) & + VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp)); + BUG_ON(gdesc->rcd.frg); + } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) & + (1 << VMXNET3_RCD_TUC_SHIFT))) { skb->ip_summed = CHECKSUM_UNNECESSARY; BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp)); - BUG_ON(!(gdesc->rcd.v4 || gdesc->rcd.v6)); BUG_ON(gdesc->rcd.frg); } else { if (gdesc->rcd.csum) { diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 729c344e6774..c4825392d64b 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.6.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.7.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040600 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040700 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9a9fabb900c1..8a8f1e58b415 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -60,41 +60,6 @@ struct pcpu_dstats { struct u64_stats_sync syncp; }; -static struct dst_entry *vrf_ip_check(struct dst_entry *dst, u32 cookie) -{ - return dst; -} - -static int vrf_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - return ip_local_out(net, sk, skb); -} - -static unsigned int vrf_v4_mtu(const struct dst_entry *dst) -{ - /* TO-DO: return max ethernet size? */ - return dst->dev->mtu; -} - -static void vrf_dst_destroy(struct dst_entry *dst) -{ - /* our dst lives forever - or until the device is closed */ -} - -static unsigned int vrf_default_advmss(const struct dst_entry *dst) -{ - return 65535 - 40; -} - -static struct dst_ops vrf_dst_ops = { - .family = AF_INET, - .local_out = vrf_ip_local_out, - .check = vrf_ip_check, - .mtu = vrf_v4_mtu, - .destroy = vrf_dst_destroy, - .default_advmss = vrf_default_advmss, -}; - /* neighbor handling is done with actual device; do not want * to flip skb->dev for those ndisc packets. This really fails * for multiple next protocols (e.g., NEXTHDR_HOP). But it is @@ -349,46 +314,6 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) } #if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *vrf_ip6_check(struct dst_entry *dst, u32 cookie) -{ - return dst; -} - -static struct dst_ops vrf_dst_ops6 = { - .family = AF_INET6, - .local_out = ip6_local_out, - .check = vrf_ip6_check, - .mtu = vrf_v4_mtu, - .destroy = vrf_dst_destroy, - .default_advmss = vrf_default_advmss, -}; - -static int init_dst_ops6_kmem_cachep(void) -{ - vrf_dst_ops6.kmem_cachep = kmem_cache_create("vrf_ip6_dst_cache", - sizeof(struct rt6_info), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - - if (!vrf_dst_ops6.kmem_cachep) - return -ENOMEM; - - return 0; -} - -static void free_dst_ops6_kmem_cachep(void) -{ - kmem_cache_destroy(vrf_dst_ops6.kmem_cachep); -} - -static int vrf_input6(struct sk_buff *skb) -{ - skb->dev->stats.rx_errors++; - kfree_skb(skb); - return 0; -} - /* modelled after ip6_finish_output2 */ static int vrf_finish_output6(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -429,67 +354,34 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } -static void vrf_rt6_destroy(struct net_vrf *vrf) +static void vrf_rt6_release(struct net_vrf *vrf) { - dst_destroy(&vrf->rt6->dst); - free_percpu(vrf->rt6->rt6i_pcpu); + dst_release(&vrf->rt6->dst); vrf->rt6 = NULL; } static int vrf_rt6_create(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); - struct dst_entry *dst; + struct net *net = dev_net(dev); struct rt6_info *rt6; - int cpu; int rc = -ENOMEM; - rt6 = dst_alloc(&vrf_dst_ops6, dev, 0, - DST_OBSOLETE_NONE, - (DST_HOST | DST_NOPOLICY | DST_NOXFRM)); + rt6 = ip6_dst_alloc(net, dev, + DST_HOST | DST_NOPOLICY | DST_NOXFRM | DST_NOCACHE); if (!rt6) goto out; - dst = &rt6->dst; - - rt6->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_KERNEL); - if (!rt6->rt6i_pcpu) { - dst_destroy(dst); - goto out; - } - for_each_possible_cpu(cpu) { - struct rt6_info **p = per_cpu_ptr(rt6->rt6i_pcpu, cpu); - *p = NULL; - } - - memset(dst + 1, 0, sizeof(*rt6) - sizeof(*dst)); - - INIT_LIST_HEAD(&rt6->rt6i_siblings); - INIT_LIST_HEAD(&rt6->rt6i_uncached); - - rt6->dst.input = vrf_input6; rt6->dst.output = vrf_output6; - - rt6->rt6i_table = fib6_get_table(dev_net(dev), vrf->tb_id); - - atomic_set(&rt6->dst.__refcnt, 2); - + rt6->rt6i_table = fib6_get_table(net, vrf->tb_id); + dst_hold(&rt6->dst); vrf->rt6 = rt6; rc = 0; out: return rc; } #else -static int init_dst_ops6_kmem_cachep(void) -{ - return 0; -} - -static void free_dst_ops6_kmem_cachep(void) -{ -} - -static void vrf_rt6_destroy(struct net_vrf *vrf) +static void vrf_rt6_release(struct net_vrf *vrf) { } @@ -557,11 +449,11 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } -static void vrf_rtable_destroy(struct net_vrf *vrf) +static void vrf_rtable_release(struct net_vrf *vrf) { struct dst_entry *dst = (struct dst_entry *)vrf->rth; - dst_destroy(dst); + dst_release(dst); vrf->rth = NULL; } @@ -570,22 +462,10 @@ static struct rtable *vrf_rtable_create(struct net_device *dev) struct net_vrf *vrf = netdev_priv(dev); struct rtable *rth; - rth = dst_alloc(&vrf_dst_ops, dev, 2, - DST_OBSOLETE_NONE, - (DST_HOST | DST_NOPOLICY | DST_NOXFRM)); + rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1, 1, 0); if (rth) { rth->dst.output = vrf_output; - rth->rt_genid = rt_genid_ipv4(dev_net(dev)); - rth->rt_flags = 0; - rth->rt_type = RTN_UNICAST; - rth->rt_is_input = 0; - rth->rt_iif = 0; - rth->rt_pmtu = 0; - rth->rt_gateway = 0; - rth->rt_uses_gateway = 0; rth->rt_table_id = vrf->tb_id; - INIT_LIST_HEAD(&rth->rt_uncached); - rth->rt_uncached_list = NULL; } return rth; @@ -673,8 +553,8 @@ static void vrf_dev_uninit(struct net_device *dev) struct net_device *port_dev; struct list_head *iter; - vrf_rtable_destroy(vrf); - vrf_rt6_destroy(vrf); + vrf_rtable_release(vrf); + vrf_rt6_release(vrf); netdev_for_each_lower_dev(dev, port_dev, iter) vrf_del_slave(dev, port_dev); @@ -704,7 +584,7 @@ static int vrf_dev_init(struct net_device *dev) return 0; out_rth: - vrf_rtable_destroy(vrf); + vrf_rtable_release(vrf); out_stats: free_percpu(dev->dstats); dev->dstats = NULL; @@ -737,7 +617,7 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev, struct net_vrf *vrf = netdev_priv(dev); rth = vrf->rth; - atomic_inc(&rth->dst.__refcnt); + dst_hold(&rth->dst); } return rth; @@ -788,7 +668,7 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, struct net_vrf *vrf = netdev_priv(dev); rt = vrf->rt6; - atomic_inc(&rt->dst.__refcnt); + dst_hold(&rt->dst); } return (struct dst_entry *)rt; @@ -946,19 +826,6 @@ static int __init vrf_init_module(void) { int rc; - vrf_dst_ops.kmem_cachep = - kmem_cache_create("vrf_ip_dst_cache", - sizeof(struct rtable), 0, - SLAB_HWCACHE_ALIGN, - NULL); - - if (!vrf_dst_ops.kmem_cachep) - return -ENOMEM; - - rc = init_dst_ops6_kmem_cachep(); - if (rc != 0) - goto error2; - register_netdevice_notifier(&vrf_notifier_block); rc = rtnl_link_register(&vrf_link_ops); @@ -969,22 +836,10 @@ static int __init vrf_init_module(void) error: unregister_netdevice_notifier(&vrf_notifier_block); - free_dst_ops6_kmem_cachep(); -error2: - kmem_cache_destroy(vrf_dst_ops.kmem_cachep); return rc; } -static void __exit vrf_cleanup_module(void) -{ - rtnl_link_unregister(&vrf_link_ops); - unregister_netdevice_notifier(&vrf_notifier_block); - kmem_cache_destroy(vrf_dst_ops.kmem_cachep); - free_dst_ops6_kmem_cachep(); -} - module_init(vrf_init_module); -module_exit(vrf_cleanup_module); MODULE_AUTHOR("Shrijeet Mukherjee, David Ahern"); MODULE_DESCRIPTION("Device driver to instantiate VRF domains"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 7f697a3f00a4..2668e528dee4 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -98,7 +98,6 @@ struct vxlan_fdb { /* salt for hash table */ static u32 vxlan_salt __read_mostly; -static struct workqueue_struct *vxlan_wq; static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) { @@ -1038,14 +1037,14 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) return false; } -static void __vxlan_sock_release(struct vxlan_sock *vs) +static bool __vxlan_sock_release_prep(struct vxlan_sock *vs) { struct vxlan_net *vn; if (!vs) - return; + return false; if (!atomic_dec_and_test(&vs->refcnt)) - return; + return false; vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id); spin_lock(&vn->sock_lock); @@ -1053,14 +1052,28 @@ static void __vxlan_sock_release(struct vxlan_sock *vs) vxlan_notify_del_rx_port(vs); spin_unlock(&vn->sock_lock); - queue_work(vxlan_wq, &vs->del_work); + return true; } static void vxlan_sock_release(struct vxlan_dev *vxlan) { - __vxlan_sock_release(vxlan->vn4_sock); + bool ipv4 = __vxlan_sock_release_prep(vxlan->vn4_sock); +#if IS_ENABLED(CONFIG_IPV6) + bool ipv6 = __vxlan_sock_release_prep(vxlan->vn6_sock); +#endif + + synchronize_net(); + + if (ipv4) { + udp_tunnel_sock_release(vxlan->vn4_sock->sock); + kfree(vxlan->vn4_sock); + } + #if IS_ENABLED(CONFIG_IPV6) - __vxlan_sock_release(vxlan->vn6_sock); + if (ipv6) { + udp_tunnel_sock_release(vxlan->vn6_sock->sock); + kfree(vxlan->vn6_sock); + } #endif } @@ -1784,9 +1797,9 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, if (WARN_ON(!skb)) return -ENOMEM; - skb = iptunnel_handle_offloads(skb, type); - if (IS_ERR(skb)) - return PTR_ERR(skb); + err = iptunnel_handle_offloads(skb, type); + if (err) + goto out_free; vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = VXLAN_HF_VNI; @@ -2514,7 +2527,7 @@ static struct device_type vxlan_type = { * supply the listening VXLAN udp ports. Callers are expected * to implement the ndo_add_vxlan_port. */ -void vxlan_get_rx_port(struct net_device *dev) +static void vxlan_push_rx_ports(struct net_device *dev) { struct vxlan_sock *vs; struct net *net = dev_net(dev); @@ -2523,6 +2536,9 @@ void vxlan_get_rx_port(struct net_device *dev) __be16 port; unsigned int i; + if (!dev->netdev_ops->ndo_add_vxlan_port) + return; + spin_lock(&vn->sock_lock); for (i = 0; i < PORT_HASH_SIZE; ++i) { hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) { @@ -2534,7 +2550,6 @@ void vxlan_get_rx_port(struct net_device *dev) } spin_unlock(&vn->sock_lock); } -EXPORT_SYMBOL_GPL(vxlan_get_rx_port); /* Initialize the device structure. */ static void vxlan_setup(struct net_device *dev) @@ -2542,6 +2557,9 @@ static void vxlan_setup(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); unsigned int h; + eth_hw_addr_random(dev); + ether_setup(dev); + dev->destructor = free_netdev; SET_NETDEV_DEVTYPE(dev, &vxlan_type); @@ -2577,8 +2595,6 @@ static void vxlan_setup(struct net_device *dev) static void vxlan_ether_setup(struct net_device *dev) { - eth_hw_addr_random(dev); - ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; dev->netdev_ops = &vxlan_netdev_ether_ops; @@ -2586,11 +2602,10 @@ static void vxlan_ether_setup(struct net_device *dev) static void vxlan_raw_setup(struct net_device *dev) { + dev->header_ops = NULL; dev->type = ARPHRD_NONE; dev->hard_header_len = 0; dev->addr_len = 0; - dev->mtu = ETH_DATA_LEN; - dev->tx_queue_len = 1000; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; dev->netdev_ops = &vxlan_netdev_raw_ops; } @@ -2674,13 +2689,6 @@ static const struct ethtool_ops vxlan_ethtool_ops = { .get_link = ethtool_op_get_link, }; -static void vxlan_del_work(struct work_struct *work) -{ - struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work); - udp_tunnel_sock_release(vs->sock); - kfree_rcu(vs, rcu); -} - static struct socket *vxlan_create_sock(struct net *net, bool ipv6, __be16 port, u32 flags) { @@ -2726,8 +2734,6 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6, for (h = 0; h < VNI_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vs->vni_list[h]); - INIT_WORK(&vs->del_work, vxlan_del_work); - sock = vxlan_create_sock(net, ipv6, port, flags); if (IS_ERR(sock)) { pr_info("Cannot bind port %d, err=%ld\n", ntohs(port), @@ -3279,20 +3285,22 @@ static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn, unregister_netdevice_many(&list_kill); } -static int vxlan_lowerdev_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int vxlan_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); if (event == NETDEV_UNREGISTER) vxlan_handle_lowerdev_unregister(vn, dev); + else if (event == NETDEV_OFFLOAD_PUSH_VXLAN) + vxlan_push_rx_ports(dev); return NOTIFY_DONE; } static struct notifier_block vxlan_notifier_block __read_mostly = { - .notifier_call = vxlan_lowerdev_event, + .notifier_call = vxlan_netdevice_event, }; static __net_init int vxlan_init_net(struct net *net) @@ -3346,10 +3354,6 @@ static int __init vxlan_init_module(void) { int rc; - vxlan_wq = alloc_workqueue("vxlan", 0, 0); - if (!vxlan_wq) - return -ENOMEM; - get_random_bytes(&vxlan_salt, sizeof(vxlan_salt)); rc = register_pernet_subsys(&vxlan_net_ops); @@ -3370,7 +3374,6 @@ out3: out2: unregister_pernet_subsys(&vxlan_net_ops); out1: - destroy_workqueue(vxlan_wq); return rc; } late_initcall(vxlan_init_module); @@ -3379,7 +3382,6 @@ static void __exit vxlan_cleanup_module(void) { rtnl_link_unregister(&vxlan_link_ops); unregister_netdevice_notifier(&vxlan_notifier_block); - destroy_workqueue(vxlan_wq); unregister_pernet_subsys(&vxlan_net_ops); /* rcu_barrier() is called by netns */ } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index ef91b3770703..5ace468070cb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1149,6 +1149,8 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm) /* the fw is stopped, the aux sta is dead: clean up driver state */ iwl_mvm_del_aux_sta(mvm); + iwl_free_fw_paging(mvm); + /* * Clear IN_HW_RESTART flag when stopping the hw (as restart_complete() * won't be called in this case). diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 656541c5360a..8bfb8e06a90c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -782,8 +782,6 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode) for (i = 0; i < NVM_MAX_NUM_SECTIONS; i++) kfree(mvm->nvm_sections[i].data); - iwl_free_fw_paging(mvm); - iwl_mvm_tof_clean(mvm); del_timer_sync(&mvm->scan_timer); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 5e1a13e82d60..ee081c2225ba 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -732,8 +732,8 @@ static int iwl_pcie_rsa_race_bug_wa(struct iwl_trans *trans) */ val = iwl_read_prph(trans, PREG_AUX_BUS_WPROT_0); if (val & (BIT(1) | BIT(17))) { - IWL_INFO(trans, - "can't access the RSA semaphore it is write protected\n"); + IWL_DEBUG_INFO(trans, + "can't access the RSA semaphore it is write protected\n"); return 0; } diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index c757f14c4c00..9ed0ed1bf514 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1030,7 +1030,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, data->pending_cookie++; cookie = data->pending_cookie; info->rate_driver_data[0] = (void *)cookie; - if (nla_put_u64(skb, HWSIM_ATTR_COOKIE, cookie)) + if (nla_put_u64_64bit(skb, HWSIM_ATTR_COOKIE, cookie, HWSIM_ATTR_PAD)) goto nla_put_failure; genlmsg_end(skb, msg_head); diff --git a/drivers/net/wireless/mac80211_hwsim.h b/drivers/net/wireless/mac80211_hwsim.h index 66e1c73bd507..39f22467ca2a 100644 --- a/drivers/net/wireless/mac80211_hwsim.h +++ b/drivers/net/wireless/mac80211_hwsim.h @@ -148,6 +148,7 @@ enum { HWSIM_ATTR_RADIO_NAME, HWSIM_ATTR_NO_VIF, HWSIM_ATTR_FREQ, + HWSIM_ATTR_PAD, __HWSIM_ATTR_MAX, }; #define HWSIM_ATTR_MAX (__HWSIM_ATTR_MAX - 1) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c index e346cb86cb08..17a681788611 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c @@ -2488,9 +2488,9 @@ void rtl8821ae_dm_txpower_tracking_callback_thermalmeter( for (p = RF90_PATH_A; p < MAX_PATH_NUM_8821A; p++) rtldm->swing_idx_ofdm_base[p] = rtldm->swing_idx_ofdm[p]; - RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD, - "pDM_Odm->RFCalibrateInfo.ThermalValue = %d ThermalValue= %d\n", - rtldm->thermalvalue, thermal_value); + RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD, + "pDM_Odm->RFCalibrateInfo.ThermalValue = %d ThermalValue= %d\n", + rtldm->thermalvalue, thermal_value); /*Record last Power Tracking Thermal Value*/ rtldm->thermalvalue = thermal_value; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index fc82743aefb6..19f822d7f652 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -407,7 +407,7 @@ static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = { [ND_CMD_IMPLEMENTED] = { }, [ND_CMD_SMART] = { .out_num = 2, - .out_sizes = { 4, 8, }, + .out_sizes = { 4, 128, }, }, [ND_CMD_SMART_THRESHOLD] = { .out_num = 2, diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 79646d0c3277..182a93fe3712 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -417,8 +417,8 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len) set_badblock(bb, start_sector, num_sectors); } -static void namespace_add_poison(struct list_head *poison_list, - struct badblocks *bb, struct resource *res) +static void badblocks_populate(struct list_head *poison_list, + struct badblocks *bb, const struct resource *res) { struct nd_poison *pl; @@ -460,36 +460,35 @@ static void namespace_add_poison(struct list_head *poison_list, } /** - * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks - * @ndns: the namespace containing poison ranges - * @bb: badblocks instance to populate - * @offset: offset at the start of the namespace before 'sector 0' + * nvdimm_badblocks_populate() - Convert a list of poison ranges to badblocks + * @region: parent region of the range to interrogate + * @bb: badblocks instance to populate + * @res: resource range to consider * - * The poison list generated during NFIT initialization may contain multiple, - * possibly overlapping ranges in the SPA (System Physical Address) space. - * Compare each of these ranges to the namespace currently being initialized, - * and add badblocks to the gendisk for all matching sub-ranges + * The poison list generated during bus initialization may contain + * multiple, possibly overlapping physical address ranges. Compare each + * of these ranges to the resource range currently being initialized, + * and add badblocks entries for all matching sub-ranges */ -void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, - struct badblocks *bb, resource_size_t offset) +void nvdimm_badblocks_populate(struct nd_region *nd_region, + struct badblocks *bb, const struct resource *res) { - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - struct nd_region *nd_region = to_nd_region(ndns->dev.parent); struct nvdimm_bus *nvdimm_bus; struct list_head *poison_list; - struct resource res = { - .start = nsio->res.start + offset, - .end = nsio->res.end, - }; - nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent); + if (!is_nd_pmem(&nd_region->dev)) { + dev_WARN_ONCE(&nd_region->dev, 1, + "%s only valid for pmem regions\n", __func__); + return; + } + nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); poison_list = &nvdimm_bus->poison_list; nvdimm_bus_lock(&nvdimm_bus->dev); - namespace_add_poison(poison_list, bb, &res); + badblocks_populate(poison_list, bb, res); nvdimm_bus_unlock(&nvdimm_bus->dev); } -EXPORT_SYMBOL_GPL(nvdimm_namespace_add_poison); +EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate); static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) { diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 1799bd97a9ce..875c524fafb0 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -266,8 +266,8 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns); int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns); const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name); -void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, - struct badblocks *bb, resource_size_t offset); +void nvdimm_badblocks_populate(struct nd_region *nd_region, + struct badblocks *bb, const struct resource *res); int nd_blk_region_init(struct nd_region *nd_region); void __nd_iostat_start(struct bio *bio, unsigned long *start); static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 254d3bc13f70..e071e214feba 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -376,7 +376,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) } else { /* from init we validate */ if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) - return -EINVAL; + return -ENODEV; } if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 12c86fa80c5f..f798899338ed 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -103,6 +103,20 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, flush_dcache_page(page); } } else { + /* + * Note that we write the data both before and after + * clearing poison. The write before clear poison + * handles situations where the latest written data is + * preserved and the clear poison operation simply marks + * the address range as valid without changing the data. + * In this case application software can assume that an + * interrupted write will either return the new good + * data or an error. + * + * However, if pmem_clear_poison() leaves the data in an + * indeterminate state we need to perform the write + * after clear poison. + */ flush_dcache_page(page); memcpy_to_pmem(pmem_addr, mem + off, len); if (unlikely(bad_pmem)) { @@ -244,7 +258,9 @@ static void pmem_detach_disk(struct pmem_device *pmem) static int pmem_attach_disk(struct device *dev, struct nd_namespace_common *ndns, struct pmem_device *pmem) { + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); int nid = dev_to_node(dev); + struct resource bb_res; struct gendisk *disk; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); @@ -271,8 +287,17 @@ static int pmem_attach_disk(struct device *dev, devm_exit_badblocks(dev, &pmem->bb); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset); - + bb_res.start = nsio->res.start + pmem->data_offset; + bb_res.end = nsio->res.end; + if (is_nd_pfn(dev)) { + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + + bb_res.start += __le32_to_cpu(pfn_sb->start_pad); + bb_res.end -= __le32_to_cpu(pfn_sb->end_trunc); + } + nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, + &bb_res); disk->bb = &pmem->bb; add_disk(disk); revalidate_disk(disk); @@ -553,7 +578,7 @@ static int nd_pmem_probe(struct device *dev) ndns->rw_bytes = pmem_rw_bytes; if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - nvdimm_namespace_add_poison(ndns, &pmem->bb, 0); + nvdimm_badblocks_populate(nd_region, &pmem->bb, &nsio->res); if (is_nd_btt(dev)) { /* btt allocates its own request_queue */ @@ -595,14 +620,25 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) { struct pmem_device *pmem = dev_get_drvdata(dev); struct nd_namespace_common *ndns = pmem->ndns; + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct resource res = { + .start = nsio->res.start + pmem->data_offset, + .end = nsio->res.end, + }; if (event != NVDIMM_REVALIDATE_POISON) return; - if (is_nd_btt(dev)) - nvdimm_namespace_add_poison(ndns, &pmem->bb, 0); - else - nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset); + if (is_nd_pfn(dev)) { + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + + res.start += __le32_to_cpu(pfn_sb->start_pad); + res.end -= __le32_to_cpu(pfn_sb->end_trunc); + } + + nvdimm_badblocks_populate(nd_region, &pmem->bb, &res); } MODULE_ALIAS("pmem"); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 24ccda303efb..4fd733ff72b1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1478,8 +1478,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (result > 0) { dev_err(dev->ctrl.device, "Could not set queue count (%d)\n", result); - nr_io_queues = 0; - result = 0; + return 0; } if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) { @@ -1513,7 +1512,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * If we enable msix early due to not intx, disable it again before * setting up the full range we need. */ - if (!pdev->irq) + if (pdev->msi_enabled) + pci_disable_msi(pdev); + else if (pdev->msix_enabled) pci_disable_msix(pdev); for (i = 0; i < nr_io_queues; i++) @@ -1696,7 +1697,6 @@ static int nvme_pci_enable(struct nvme_dev *dev) if (pci_enable_device_mem(pdev)) return result; - dev->entry[0].vector = pdev->irq; pci_set_master(pdev); if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && @@ -1709,13 +1709,18 @@ static int nvme_pci_enable(struct nvme_dev *dev) } /* - * Some devices don't advertse INTx interrupts, pre-enable a single - * MSIX vec for setup. We'll adjust this later. + * Some devices and/or platforms don't advertise or work with INTx + * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll + * adjust this later. */ - if (!pdev->irq) { - result = pci_enable_msix(pdev, dev->entry, 1); - if (result < 0) - goto disable; + if (pci_enable_msix(pdev, dev->entry, 1)) { + pci_enable_msi(pdev); + dev->entry[0].vector = pdev->irq; + } + + if (!dev->entry[0].vector) { + result = -ENODEV; + goto disable; } cap = lo_hi_readq(dev->bar + NVME_REG_CAP); @@ -1859,6 +1864,9 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); + if (test_bit(NVME_CTRL_REMOVING, &dev->flags)) + goto out; + set_bit(NVME_CTRL_RESETTING, &dev->flags); result = nvme_pci_enable(dev); @@ -2078,11 +2086,10 @@ static void nvme_remove(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - del_timer_sync(&dev->watchdog_timer); - set_bit(NVME_CTRL_REMOVING, &dev->flags); pci_set_drvdata(pdev, NULL); flush_work(&dev->async_work); + flush_work(&dev->reset_work); flush_work(&dev->scan_work); nvme_remove_namespaces(&dev->ctrl); nvme_uninit_ctrl(&dev->ctrl); diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 8453f08d2ef4..e051e1b57609 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -41,8 +41,8 @@ static int of_get_phy_id(struct device_node *device, u32 *phy_id) return -EINVAL; } -static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *child, - u32 addr) +static void of_mdiobus_register_phy(struct mii_bus *mdio, + struct device_node *child, u32 addr) { struct phy_device *phy; bool is_c45; @@ -56,8 +56,8 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *chi phy = phy_device_create(mdio, addr, phy_id, 0, NULL); else phy = get_phy_device(mdio, addr, is_c45); - if (IS_ERR_OR_NULL(phy)) - return 1; + if (IS_ERR(phy)) + return; rc = irq_of_parse_and_map(child, 0); if (rc > 0) { @@ -81,25 +81,22 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *chi if (rc) { phy_device_free(phy); of_node_put(child); - return 1; + return; } dev_dbg(&mdio->dev, "registered phy %s at address %i\n", child->name, addr); - - return 0; } -static int of_mdiobus_register_device(struct mii_bus *mdio, - struct device_node *child, - u32 addr) +static void of_mdiobus_register_device(struct mii_bus *mdio, + struct device_node *child, u32 addr) { struct mdio_device *mdiodev; int rc; mdiodev = mdio_device_create(mdio, addr); if (IS_ERR(mdiodev)) - return 1; + return; /* Associate the OF node with the device structure so it * can be looked up later. @@ -112,13 +109,11 @@ static int of_mdiobus_register_device(struct mii_bus *mdio, if (rc) { mdio_device_free(mdiodev); of_node_put(child); - return 1; + return; } dev_dbg(&mdio->dev, "registered mdio device %s at address %i\n", child->name, addr); - - return 0; } int of_mdio_parse_addr(struct device *dev, const struct device_node *np) @@ -214,6 +209,10 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) bool scanphys = false; int addr, rc; + /* Do not continue if the node is disabled */ + if (!of_device_is_available(np)) + return -ENODEV; + /* Mask out all PHYs from auto probing. Instead the PHYs listed in * the device tree are populated after the bus has been registered */ mdio->phy_mask = ~0; diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 01b9d0a00abc..d11cdbb8fba3 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -275,6 +275,19 @@ ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void } EXPORT_SYMBOL(pci_write_vpd); +/** + * pci_set_vpd_size - Set size of Vital Product Data space + * @dev: pci device struct + * @len: size of vpd space + */ +int pci_set_vpd_size(struct pci_dev *dev, size_t len) +{ + if (!dev->vpd || !dev->vpd->ops) + return -ENODEV; + return dev->vpd->ops->set_size(dev, len); +} +EXPORT_SYMBOL(pci_set_vpd_size); + #define PCI_VPD_MAX_SIZE (PCI_VPD_ADDR_MASK + 1) /** @@ -498,9 +511,23 @@ out: return ret ? ret : count; } +static int pci_vpd_set_size(struct pci_dev *dev, size_t len) +{ + struct pci_vpd *vpd = dev->vpd; + + if (len == 0 || len > PCI_VPD_MAX_SIZE) + return -EIO; + + vpd->valid = 1; + vpd->len = len; + + return 0; +} + static const struct pci_vpd_ops pci_vpd_ops = { .read = pci_vpd_read, .write = pci_vpd_write, + .set_size = pci_vpd_set_size, }; static ssize_t pci_vpd_f0_read(struct pci_dev *dev, loff_t pos, size_t count, @@ -533,9 +560,24 @@ static ssize_t pci_vpd_f0_write(struct pci_dev *dev, loff_t pos, size_t count, return ret; } +static int pci_vpd_f0_set_size(struct pci_dev *dev, size_t len) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, + PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); + int ret; + + if (!tdev) + return -ENODEV; + + ret = pci_set_vpd_size(tdev, len); + pci_dev_put(tdev); + return ret; +} + static const struct pci_vpd_ops pci_vpd_f0_ops = { .read = pci_vpd_f0_read, .write = pci_vpd_f0_write, + .set_size = pci_vpd_f0_set_size, }; int pci_vpd_init(struct pci_dev *dev) diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c index eb5a2755a164..2f817fa4c661 100644 --- a/drivers/pci/host/pci-imx6.c +++ b/drivers/pci/host/pci-imx6.c @@ -32,7 +32,7 @@ #define to_imx6_pcie(x) container_of(x, struct imx6_pcie, pp) struct imx6_pcie { - struct gpio_desc *reset_gpio; + int reset_gpio; struct clk *pcie_bus; struct clk *pcie_phy; struct clk *pcie; @@ -309,10 +309,10 @@ static int imx6_pcie_deassert_core_reset(struct pcie_port *pp) usleep_range(200, 500); /* Some boards don't have PCIe reset GPIO. */ - if (imx6_pcie->reset_gpio) { - gpiod_set_value_cansleep(imx6_pcie->reset_gpio, 0); + if (gpio_is_valid(imx6_pcie->reset_gpio)) { + gpio_set_value_cansleep(imx6_pcie->reset_gpio, 0); msleep(100); - gpiod_set_value_cansleep(imx6_pcie->reset_gpio, 1); + gpio_set_value_cansleep(imx6_pcie->reset_gpio, 1); } return 0; @@ -523,6 +523,7 @@ static int __init imx6_pcie_probe(struct platform_device *pdev) { struct imx6_pcie *imx6_pcie; struct pcie_port *pp; + struct device_node *np = pdev->dev.of_node; struct resource *dbi_base; struct device_node *node = pdev->dev.of_node; int ret; @@ -544,8 +545,15 @@ static int __init imx6_pcie_probe(struct platform_device *pdev) return PTR_ERR(pp->dbi_base); /* Fetch GPIOs */ - imx6_pcie->reset_gpio = devm_gpiod_get_optional(&pdev->dev, "reset", - GPIOD_OUT_LOW); + imx6_pcie->reset_gpio = of_get_named_gpio(np, "reset-gpio", 0); + if (gpio_is_valid(imx6_pcie->reset_gpio)) { + ret = devm_gpio_request_one(&pdev->dev, imx6_pcie->reset_gpio, + GPIOF_OUT_INIT_LOW, "PCIe reset"); + if (ret) { + dev_err(&pdev->dev, "unable to get reset gpio\n"); + return ret; + } + } /* Fetch clocks */ imx6_pcie->pcie_phy = devm_clk_get(&pdev->dev, "pcie_phy"); diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index e982010f0ed1..342b6918bbde 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -636,7 +636,7 @@ static ssize_t pci_read_config(struct file *filp, struct kobject *kobj, u8 *data = (u8 *) buf; /* Several chips lock up trying to read undefined config space */ - if (security_capable(filp->f_cred, &init_user_ns, CAP_SYS_ADMIN) == 0) + if (file_ns_capable(filp, &init_user_ns, CAP_SYS_ADMIN)) size = dev->cfg_size; else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) size = 128; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d0fb93481573..a814bbb80fcb 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -97,6 +97,7 @@ static inline bool pci_has_subordinate(struct pci_dev *pci_dev) struct pci_vpd_ops { ssize_t (*read)(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t (*write)(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); + int (*set_size)(struct pci_dev *dev, size_t len); }; struct pci_vpd { diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 32346b5a8a11..f70090897fdf 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -737,8 +737,19 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) break; case CPU_PM_EXIT: case CPU_PM_ENTER_FAILED: - /* Restore and enable the counter */ - armpmu_start(event, PERF_EF_RELOAD); + /* + * Restore and enable the counter. + * armpmu_start() indirectly calls + * + * perf_event_update_userpage() + * + * that requires RCU read locking to be functional, + * wrap the call within RCU_NONIDLE to make the + * RCU subsystem aware this cpu is not idle from + * an RCU perspective for the armpmu_start() call + * duration. + */ + RCU_NONIDLE(armpmu_start(event, PERF_EF_RELOAD)); break; default: break; diff --git a/drivers/phy/phy-rockchip-dp.c b/drivers/phy/phy-rockchip-dp.c index 77e2d02e6bee..793ecb6d87bc 100644 --- a/drivers/phy/phy-rockchip-dp.c +++ b/drivers/phy/phy-rockchip-dp.c @@ -86,6 +86,9 @@ static int rockchip_dp_phy_probe(struct platform_device *pdev) if (!np) return -ENODEV; + if (!dev->parent || !dev->parent->of_node) + return -ENODEV; + dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL); if (IS_ERR(dp)) return -ENOMEM; @@ -104,9 +107,9 @@ static int rockchip_dp_phy_probe(struct platform_device *pdev) return ret; } - dp->grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf"); + dp->grf = syscon_node_to_regmap(dev->parent->of_node); if (IS_ERR(dp->grf)) { - dev_err(dev, "rk3288-dp needs rockchip,grf property\n"); + dev_err(dev, "rk3288-dp needs the General Register Files syscon\n"); return PTR_ERR(dp->grf); } diff --git a/drivers/phy/phy-rockchip-emmc.c b/drivers/phy/phy-rockchip-emmc.c index 887b4c27195f..6ebcf3e41c46 100644 --- a/drivers/phy/phy-rockchip-emmc.c +++ b/drivers/phy/phy-rockchip-emmc.c @@ -176,7 +176,10 @@ static int rockchip_emmc_phy_probe(struct platform_device *pdev) struct regmap *grf; unsigned int reg_offset; - grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf"); + if (!dev->parent || !dev->parent->of_node) + return -ENODEV; + + grf = syscon_node_to_regmap(dev->parent->of_node); if (IS_ERR(grf)) { dev_err(dev, "Missing rockchip,grf property\n"); return PTR_ERR(grf); diff --git a/drivers/pinctrl/freescale/Kconfig b/drivers/pinctrl/freescale/Kconfig index debe1219d76d..fc8cbf611723 100644 --- a/drivers/pinctrl/freescale/Kconfig +++ b/drivers/pinctrl/freescale/Kconfig @@ -2,6 +2,7 @@ config PINCTRL_IMX bool select PINMUX select PINCONF + select REGMAP config PINCTRL_IMX1_CORE bool diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 2bbe6f7964a7..6ab8c3ccdeea 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1004,7 +1004,8 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset, struct mtk_pinctrl *pctl = dev_get_drvdata(chip->parent); int eint_num, virq, eint_offset; unsigned int set_offset, bit, clr_bit, clr_offset, rst, i, unmask, dbnc; - static const unsigned int dbnc_arr[] = {0 , 1, 16, 32, 64, 128, 256}; + static const unsigned int debounce_time[] = {500, 1000, 16000, 32000, 64000, + 128000, 256000}; const struct mtk_desc_pin *pin; struct irq_data *d; @@ -1022,9 +1023,9 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset, if (!mtk_eint_can_en_debounce(pctl, eint_num)) return -ENOSYS; - dbnc = ARRAY_SIZE(dbnc_arr); - for (i = 0; i < ARRAY_SIZE(dbnc_arr); i++) { - if (debounce <= dbnc_arr[i]) { + dbnc = ARRAY_SIZE(debounce_time); + for (i = 0; i < ARRAY_SIZE(debounce_time); i++) { + if (debounce <= debounce_time[i]) { dbnc = i; break; } diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index fb126d56ad40..cf9bafa10acf 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1280,9 +1280,9 @@ static int pcs_parse_bits_in_pinctrl_entry(struct pcs_device *pcs, /* Parse pins in each row from LSB */ while (mask) { - bit_pos = ffs(mask); + bit_pos = __ffs(mask); pin_num_from_lsb = bit_pos / pcs->bits_per_pin; - mask_pos = ((pcs->fmask) << (bit_pos - 1)); + mask_pos = ((pcs->fmask) << bit_pos); val_pos = val & mask_pos; submask = mask & mask_pos; @@ -1852,7 +1852,7 @@ static int pcs_probe(struct platform_device *pdev) ret = of_property_read_u32(np, "pinctrl-single,function-mask", &pcs->fmask); if (!ret) { - pcs->fshift = ffs(pcs->fmask) - 1; + pcs->fshift = __ffs(pcs->fmask); pcs->fmax = pcs->fmask >> pcs->fshift; } else { /* If mask property doesn't exist, function mux is invalid. */ diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 10ce6cba4455..09356684c32f 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -127,8 +127,10 @@ static int lis3lv02d_acpi_read(struct lis3lv02d *lis3, int reg, u8 *ret) arg0.integer.value = reg; status = acpi_evaluate_integer(dev->handle, "ALRD", &args, &lret); + if (ACPI_FAILURE(status)) + return -EINVAL; *ret = lret; - return (status != AE_OK) ? -EINVAL : 0; + return 0; } /** @@ -173,6 +175,7 @@ static int lis3lv02d_dmi_matched(const struct dmi_system_id *dmi) DEFINE_CONV(normal, 1, 2, 3); DEFINE_CONV(y_inverted, 1, -2, 3); DEFINE_CONV(x_inverted, -1, 2, 3); +DEFINE_CONV(x_inverted_usd, -1, 2, -3); DEFINE_CONV(z_inverted, 1, 2, -3); DEFINE_CONV(xy_swap, 2, 1, 3); DEFINE_CONV(xy_rotated_left, -2, 1, 3); @@ -236,6 +239,7 @@ static const struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("HP8710", "HP Compaq 8710", y_inverted), AXIS_DMI_MATCH("HDX18", "HP HDX 18", x_inverted), AXIS_DMI_MATCH("HPB432x", "HP ProBook 432", xy_rotated_left), + AXIS_DMI_MATCH("HPB440G3", "HP ProBook 440 G3", x_inverted_usd), AXIS_DMI_MATCH("HPB442x", "HP ProBook 442", xy_rotated_left), AXIS_DMI_MATCH("HPB452x", "HP ProBook 452", y_inverted), AXIS_DMI_MATCH("HPB522x", "HP ProBook 522", xy_swap), diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index f93abc8c1424..a818db6aa08f 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -91,6 +91,8 @@ static int intel_hid_pl_resume_handler(struct device *device) } static const struct dev_pm_ops intel_hid_pl_pm_ops = { + .freeze = intel_hid_pl_suspend_handler, + .restore = intel_hid_pl_resume_handler, .suspend = intel_hid_pl_suspend_handler, .resume = intel_hid_pl_resume_handler, }; diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c index 3fb1d85c70a8..6f497e80c9df 100644 --- a/drivers/platform/x86/intel_pmc_ipc.c +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -687,8 +687,8 @@ static int ipc_plat_get_res(struct platform_device *pdev) ipcdev.acpi_io_size = size; dev_info(&pdev->dev, "io res: %pR\n", res); - /* This is index 0 to cover BIOS data register */ punit_res = punit_res_array; + /* This is index 0 to cover BIOS data register */ res = platform_get_resource(pdev, IORESOURCE_MEM, PLAT_RESOURCE_BIOS_DATA_INDEX); if (!res) { @@ -698,55 +698,51 @@ static int ipc_plat_get_res(struct platform_device *pdev) *punit_res = *res; dev_info(&pdev->dev, "punit BIOS data res: %pR\n", res); + /* This is index 1 to cover BIOS interface register */ res = platform_get_resource(pdev, IORESOURCE_MEM, PLAT_RESOURCE_BIOS_IFACE_INDEX); if (!res) { dev_err(&pdev->dev, "Failed to get res of punit BIOS iface\n"); return -ENXIO; } - /* This is index 1 to cover BIOS interface register */ *++punit_res = *res; dev_info(&pdev->dev, "punit BIOS interface res: %pR\n", res); + /* This is index 2 to cover ISP data register, optional */ res = platform_get_resource(pdev, IORESOURCE_MEM, PLAT_RESOURCE_ISP_DATA_INDEX); - if (!res) { - dev_err(&pdev->dev, "Failed to get res of punit ISP data\n"); - return -ENXIO; + ++punit_res; + if (res) { + *punit_res = *res; + dev_info(&pdev->dev, "punit ISP data res: %pR\n", res); } - /* This is index 2 to cover ISP data register */ - *++punit_res = *res; - dev_info(&pdev->dev, "punit ISP data res: %pR\n", res); + /* This is index 3 to cover ISP interface register, optional */ res = platform_get_resource(pdev, IORESOURCE_MEM, PLAT_RESOURCE_ISP_IFACE_INDEX); - if (!res) { - dev_err(&pdev->dev, "Failed to get res of punit ISP iface\n"); - return -ENXIO; + ++punit_res; + if (res) { + *punit_res = *res; + dev_info(&pdev->dev, "punit ISP interface res: %pR\n", res); } - /* This is index 3 to cover ISP interface register */ - *++punit_res = *res; - dev_info(&pdev->dev, "punit ISP interface res: %pR\n", res); + /* This is index 4 to cover GTD data register, optional */ res = platform_get_resource(pdev, IORESOURCE_MEM, PLAT_RESOURCE_GTD_DATA_INDEX); - if (!res) { - dev_err(&pdev->dev, "Failed to get res of punit GTD data\n"); - return -ENXIO; + ++punit_res; + if (res) { + *punit_res = *res; + dev_info(&pdev->dev, "punit GTD data res: %pR\n", res); } - /* This is index 4 to cover GTD data register */ - *++punit_res = *res; - dev_info(&pdev->dev, "punit GTD data res: %pR\n", res); + /* This is index 5 to cover GTD interface register, optional */ res = platform_get_resource(pdev, IORESOURCE_MEM, PLAT_RESOURCE_GTD_IFACE_INDEX); - if (!res) { - dev_err(&pdev->dev, "Failed to get res of punit GTD iface\n"); - return -ENXIO; + ++punit_res; + if (res) { + *punit_res = *res; + dev_info(&pdev->dev, "punit GTD interface res: %pR\n", res); } - /* This is index 5 to cover GTD interface register */ - *++punit_res = *res; - dev_info(&pdev->dev, "punit GTD interface res: %pR\n", res); res = platform_get_resource(pdev, IORESOURCE_MEM, PLAT_RESOURCE_IPC_INDEX); diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c index bd875409a02d..a47a41fc10ad 100644 --- a/drivers/platform/x86/intel_punit_ipc.c +++ b/drivers/platform/x86/intel_punit_ipc.c @@ -227,6 +227,11 @@ static int intel_punit_get_bars(struct platform_device *pdev) struct resource *res; void __iomem *addr; + /* + * The following resources are required + * - BIOS_IPC BASE_DATA + * - BIOS_IPC BASE_IFACE + */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); addr = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(addr)) @@ -239,29 +244,40 @@ static int intel_punit_get_bars(struct platform_device *pdev) return PTR_ERR(addr); punit_ipcdev->base[BIOS_IPC][BASE_IFACE] = addr; + /* + * The following resources are optional + * - ISPDRIVER_IPC BASE_DATA + * - ISPDRIVER_IPC BASE_IFACE + * - GTDRIVER_IPC BASE_DATA + * - GTDRIVER_IPC BASE_IFACE + */ res = platform_get_resource(pdev, IORESOURCE_MEM, 2); - addr = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(addr)) - return PTR_ERR(addr); - punit_ipcdev->base[ISPDRIVER_IPC][BASE_DATA] = addr; + if (res) { + addr = devm_ioremap_resource(&pdev->dev, res); + if (!IS_ERR(addr)) + punit_ipcdev->base[ISPDRIVER_IPC][BASE_DATA] = addr; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 3); - addr = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(addr)) - return PTR_ERR(addr); - punit_ipcdev->base[ISPDRIVER_IPC][BASE_IFACE] = addr; + if (res) { + addr = devm_ioremap_resource(&pdev->dev, res); + if (!IS_ERR(addr)) + punit_ipcdev->base[ISPDRIVER_IPC][BASE_IFACE] = addr; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 4); - addr = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(addr)) - return PTR_ERR(addr); - punit_ipcdev->base[GTDRIVER_IPC][BASE_DATA] = addr; + if (res) { + addr = devm_ioremap_resource(&pdev->dev, res); + if (!IS_ERR(addr)) + punit_ipcdev->base[GTDRIVER_IPC][BASE_DATA] = addr; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 5); - addr = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(addr)) - return PTR_ERR(addr); - punit_ipcdev->base[GTDRIVER_IPC][BASE_IFACE] = addr; + if (res) { + addr = devm_ioremap_resource(&pdev->dev, res); + if (!IS_ERR(addr)) + punit_ipcdev->base[GTDRIVER_IPC][BASE_IFACE] = addr; + } return 0; } diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c index 397119f83e82..781bd10ca7ac 100644 --- a/drivers/platform/x86/intel_telemetry_pltdrv.c +++ b/drivers/platform/x86/intel_telemetry_pltdrv.c @@ -659,7 +659,7 @@ static int telemetry_plt_update_events(struct telemetry_evtconfig pss_evtconfig, static int telemetry_plt_set_sampling_period(u8 pss_period, u8 ioss_period) { u32 telem_ctrl = 0; - int ret; + int ret = 0; mutex_lock(&(telm_conf->telem_lock)); if (ioss_period) { diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e305ab541a22..9255ff3ee81a 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -7972,10 +7972,12 @@ static int fan_get_status_safe(u8 *status) fan_update_desired_level(s); mutex_unlock(&fan_mutex); + if (rc) + return rc; if (status) *status = s; - return rc; + return 0; } static int fan_get_speed(unsigned int *speed) diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c index 7225ac6b3df5..fad968eb75f6 100644 --- a/drivers/pwm/pwm-fsl-ftm.c +++ b/drivers/pwm/pwm-fsl-ftm.c @@ -392,7 +392,7 @@ static const struct regmap_config fsl_pwm_regmap_config = { .max_register = FTM_PWMLOAD, .volatile_reg = fsl_pwm_volatile_reg, - .cache_type = REGCACHE_RBTREE, + .cache_type = REGCACHE_FLAT, }; static int fsl_pwm_probe(struct platform_device *pdev) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index b2156ee5bae1..ecb7dbae9be9 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -863,7 +863,7 @@ out: * A user-initiated temperature conversion is not started by this function, * so the temperature is updated once every 64 seconds. */ -static int ds3231_hwmon_read_temp(struct device *dev, s16 *mC) +static int ds3231_hwmon_read_temp(struct device *dev, s32 *mC) { struct ds1307 *ds1307 = dev_get_drvdata(dev); u8 temp_buf[2]; @@ -892,7 +892,7 @@ static ssize_t ds3231_hwmon_show_temp(struct device *dev, struct device_attribute *attr, char *buf) { int ret; - s16 temp; + s32 temp; ret = ds3231_hwmon_read_temp(dev, &temp); if (ret) @@ -1531,7 +1531,7 @@ read_rtc: return PTR_ERR(ds1307->rtc); } - if (ds1307_can_wakeup_device) { + if (ds1307_can_wakeup_device && ds1307->client->irq <= 0) { /* Disable request for an IRQ */ want_irq = false; dev_info(&client->dev, "'wakeup-source' is set, request for an IRQ is disabled!\n"); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 1bce9cf51b1e..b83908670a9a 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -756,15 +756,16 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch blk_cleanup_queue(dev_info->dcssblk_queue); dev_info->gd->queue = NULL; put_disk(dev_info->gd); - device_unregister(&dev_info->dev); /* unload all related segments */ list_for_each_entry(entry, &dev_info->seg_list, lh) segment_unload(entry->segment_name); - put_device(&dev_info->dev); up_write(&dcssblk_devices_sem); + device_unregister(&dev_info->dev); + put_device(&dev_info->dev); + rc = count; out_buf: kfree(local_buf); diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 75d9896deccb..e6f54d3b8969 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -303,7 +303,7 @@ static void scm_blk_request(struct request_queue *rq) if (req->cmd_type != REQ_TYPE_FS) { blk_start_request(req); blk_dump_rq_flags(req, KMSG_COMPONENT " bad request"); - blk_end_request_all(req, -EIO); + __blk_end_request_all(req, -EIO); continue; } diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index f3bb7af4e984..ead83a24bcd1 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -688,6 +688,7 @@ static struct rt6_info *find_route_ipv6(const struct in6_addr *saddr, { struct flowi6 fl; + memset(&fl, 0, sizeof(fl)); if (saddr) memcpy(&fl.saddr, saddr, sizeof(struct in6_addr)); if (daddr) diff --git a/drivers/soc/mediatek/mtk-scpsys.c b/drivers/soc/mediatek/mtk-scpsys.c index 57e781c71e67..837effe19907 100644 --- a/drivers/soc/mediatek/mtk-scpsys.c +++ b/drivers/soc/mediatek/mtk-scpsys.c @@ -491,13 +491,14 @@ static int scpsys_probe(struct platform_device *pdev) genpd->dev_ops.active_wakeup = scpsys_active_wakeup; /* - * With CONFIG_PM disabled turn on all domains to make the - * hardware usable. + * Initially turn on all domains to make the domains usable + * with !CONFIG_PM and to get the hardware in sync with the + * software. The unused domains will be switched off during + * late_init time. */ - if (!IS_ENABLED(CONFIG_PM)) - genpd->power_on(genpd); + genpd->power_on(genpd); - pm_genpd_init(genpd, NULL, true); + pm_genpd_init(genpd, NULL, false); } /* diff --git a/drivers/staging/rtl8188eu/os_dep/mon.c b/drivers/staging/rtl8188eu/os_dep/mon.c index 63bb87593af0..d976e5e18d50 100644 --- a/drivers/staging/rtl8188eu/os_dep/mon.c +++ b/drivers/staging/rtl8188eu/os_dep/mon.c @@ -155,7 +155,7 @@ static void mon_setup(struct net_device *dev) dev->netdev_ops = &mon_netdev_ops; dev->destructor = free_netdev; ether_setup(dev); - dev->tx_queue_len = 0; + dev->priv_flags |= IFF_NO_QUEUE; dev->type = ARPHRD_IEEE80211; /* * Use a locally administered address (IEEE 802) diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index c37eedc35a24..3c3dc4a3d52c 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -376,6 +376,8 @@ config MTK_THERMAL tristate "Temperature sensor driver for mediatek SoCs" depends on ARCH_MEDIATEK || COMPILE_TEST depends on HAS_IOMEM + depends on NVMEM || NVMEM=n + depends on RESET_CONTROLLER default y help Enable this option if you want to have support for thermal management diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c index 3d93b1c07cee..507632b9648e 100644 --- a/drivers/thermal/mtk_thermal.c +++ b/drivers/thermal/mtk_thermal.c @@ -27,7 +27,6 @@ #include <linux/thermal.h> #include <linux/reset.h> #include <linux/types.h> -#include <linux/nvmem-consumer.h> /* AUXADC Registers */ #define AUXADC_CON0_V 0x000 @@ -619,7 +618,7 @@ static struct platform_driver mtk_thermal_driver = { module_platform_driver(mtk_thermal_driver); -MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de"); +MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>"); MODULE_AUTHOR("Hanyi Wu <hanyi.wu@mediatek.com>"); MODULE_DESCRIPTION("Mediatek thermal driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index 49ac23d3e776..d8ec44b194d6 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -803,8 +803,8 @@ static int thermal_of_populate_trip(struct device_node *np, * otherwise, it returns a corresponding ERR_PTR(). Caller must * check the return value with help of IS_ERR() helper. */ -static struct __thermal_zone * -thermal_of_build_thermal_zone(struct device_node *np) +static struct __thermal_zone +__init *thermal_of_build_thermal_zone(struct device_node *np) { struct device_node *child = NULL, *gchild; struct __thermal_zone *tz; diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 1246aa6fcab0..2f1a863a8e15 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -301,7 +301,7 @@ static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors, capped_extra_power = 0; extra_power = 0; for (i = 0; i < num_actors; i++) { - u64 req_range = req_power[i] * power_range; + u64 req_range = (u64)req_power[i] * power_range; granted_power[i] = DIV_ROUND_CLOSEST_ULL(req_range, total_req_power); diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index d4b54653ecf8..f1db49625555 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -688,7 +688,7 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr, { struct thermal_zone_device *tz = to_thermal_zone(dev); int trip, ret; - unsigned long temperature; + int temperature; if (!tz->ops->set_trip_temp) return -EPERM; @@ -696,7 +696,7 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr, if (!sscanf(attr->attr.name, "trip_point_%d_temp", &trip)) return -EINVAL; - if (kstrtoul(buf, 10, &temperature)) + if (kstrtoint(buf, 10, &temperature)) return -EINVAL; ret = tz->ops->set_trip_temp(tz, trip, temperature); @@ -899,9 +899,9 @@ emul_temp_store(struct device *dev, struct device_attribute *attr, { struct thermal_zone_device *tz = to_thermal_zone(dev); int ret = 0; - unsigned long temperature; + int temperature; - if (kstrtoul(buf, 10, &temperature)) + if (kstrtoint(buf, 10, &temperature)) return -EINVAL; if (!tz->ops->set_emul_temp) { diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index e16a49b507ef..cf0dc51a2690 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -626,7 +626,7 @@ static int pty_unix98_ioctl(struct tty_struct *tty, */ static struct tty_struct *ptm_unix98_lookup(struct tty_driver *driver, - struct inode *ptm_inode, int idx) + struct file *file, int idx) { /* Master must be open via /dev/ptmx */ return ERR_PTR(-EIO); @@ -642,12 +642,12 @@ static struct tty_struct *ptm_unix98_lookup(struct tty_driver *driver, */ static struct tty_struct *pts_unix98_lookup(struct tty_driver *driver, - struct inode *pts_inode, int idx) + struct file *file, int idx) { struct tty_struct *tty; mutex_lock(&devpts_mutex); - tty = devpts_get_priv(pts_inode); + tty = devpts_get_priv(file->f_path.dentry); mutex_unlock(&devpts_mutex); /* Master must be open before slave */ if (!tty) @@ -663,14 +663,14 @@ static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty) /* this is called once with whichever end is closed last */ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) { - struct inode *ptmx_inode; + struct pts_fs_info *fsi; if (tty->driver->subtype == PTY_TYPE_MASTER) - ptmx_inode = tty->driver_data; + fsi = tty->driver_data; else - ptmx_inode = tty->link->driver_data; - devpts_kill_index(ptmx_inode, tty->index); - devpts_del_ref(ptmx_inode); + fsi = tty->link->driver_data; + devpts_kill_index(fsi, tty->index); + devpts_put_ref(fsi); } static const struct tty_operations ptm_unix98_ops = { @@ -720,8 +720,9 @@ static const struct tty_operations pty_unix98_ops = { static int ptmx_open(struct inode *inode, struct file *filp) { + struct pts_fs_info *fsi; struct tty_struct *tty; - struct inode *slave_inode; + struct dentry *dentry; int retval; int index; @@ -734,54 +735,46 @@ static int ptmx_open(struct inode *inode, struct file *filp) if (retval) return retval; + fsi = devpts_get_ref(inode, filp); + retval = -ENODEV; + if (!fsi) + goto out_free_file; + /* find a device that is not in use. */ mutex_lock(&devpts_mutex); - index = devpts_new_index(inode); - if (index < 0) { - retval = index; - mutex_unlock(&devpts_mutex); - goto err_file; - } - + index = devpts_new_index(fsi); mutex_unlock(&devpts_mutex); - mutex_lock(&tty_mutex); - tty = tty_init_dev(ptm_driver, index); + retval = index; + if (index < 0) + goto out_put_ref; - if (IS_ERR(tty)) { - retval = PTR_ERR(tty); - goto out; - } + mutex_lock(&tty_mutex); + tty = tty_init_dev(ptm_driver, index); /* The tty returned here is locked so we can safely drop the mutex */ mutex_unlock(&tty_mutex); - set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ - tty->driver_data = inode; + retval = PTR_ERR(tty); + if (IS_ERR(tty)) + goto out; /* - * In the case where all references to ptmx inode are dropped and we - * still have /dev/tty opened pointing to the master/slave pair (ptmx - * is closed/released before /dev/tty), we must make sure that the inode - * is still valid when we call the final pty_unix98_shutdown, thus we - * hold an additional reference to the ptmx inode. For the same /dev/tty - * last close case, we also need to make sure the super_block isn't - * destroyed (devpts instance unmounted), before /dev/tty is closed and - * on its release devpts_kill_index is called. + * From here on out, the tty is "live", and the index and + * fsi will be killed/put by the tty_release() */ - devpts_add_ref(inode); + set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ + tty->driver_data = fsi; tty_add_file(tty, filp); - slave_inode = devpts_pty_new(inode, - MKDEV(UNIX98_PTY_SLAVE_MAJOR, index), index, - tty->link); - if (IS_ERR(slave_inode)) { - retval = PTR_ERR(slave_inode); + dentry = devpts_pty_new(fsi, index, tty->link); + if (IS_ERR(dentry)) { + retval = PTR_ERR(dentry); goto err_release; } - tty->link->driver_data = slave_inode; + tty->link->driver_data = dentry; retval = ptm_driver->ops->open(tty, filp); if (retval) @@ -793,12 +786,14 @@ static int ptmx_open(struct inode *inode, struct file *filp) return 0; err_release: tty_unlock(tty); + // This will also put-ref the fsi tty_release(inode, filp); return retval; out: - mutex_unlock(&tty_mutex); - devpts_kill_index(inode, index); -err_file: + devpts_kill_index(fsi, index); +out_put_ref: + devpts_put_ref(fsi); +out_free_file: tty_free_file(filp); return retval; } diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index e213da01a3d7..00ad2637b08c 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1403,9 +1403,18 @@ static void __do_stop_tx_rs485(struct uart_8250_port *p) /* * Empty the RX FIFO, we are not interested in anything * received during the half-duplex transmission. + * Enable previously disabled RX interrupts. */ - if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) + if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) { serial8250_clear_fifos(p); + + serial8250_rpm_get(p); + + p->ier |= UART_IER_RLSI | UART_IER_RDI; + serial_port_out(&p->port, UART_IER, p->ier); + + serial8250_rpm_put(p); + } } static void serial8250_em485_handle_stop_tx(unsigned long arg) diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index 64742a086ae3..4d7cb9c04fce 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -324,7 +324,6 @@ config SERIAL_8250_EM config SERIAL_8250_RT288X bool "Ralink RT288x/RT305x/RT3662/RT3883 serial port support" depends on SERIAL_8250 - depends on MIPS || COMPILE_TEST default y if MIPS_ALCHEMY || SOC_RT288X || SOC_RT305X || SOC_RT3883 || SOC_MT7620 help Selecting this option will add support for the alternate register diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c index c9fdfc8bf47f..d08baa668d5d 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c @@ -72,7 +72,7 @@ static void uartlite_outbe32(u32 val, void __iomem *addr) iowrite32be(val, addr); } -static const struct uartlite_reg_ops uartlite_be = { +static struct uartlite_reg_ops uartlite_be = { .in = uartlite_inbe32, .out = uartlite_outbe32, }; @@ -87,21 +87,21 @@ static void uartlite_outle32(u32 val, void __iomem *addr) iowrite32(val, addr); } -static const struct uartlite_reg_ops uartlite_le = { +static struct uartlite_reg_ops uartlite_le = { .in = uartlite_inle32, .out = uartlite_outle32, }; static inline u32 uart_in32(u32 offset, struct uart_port *port) { - const struct uartlite_reg_ops *reg_ops = port->private_data; + struct uartlite_reg_ops *reg_ops = port->private_data; return reg_ops->in(port->membase + offset); } static inline void uart_out32(u32 val, u32 offset, struct uart_port *port) { - const struct uartlite_reg_ops *reg_ops = port->private_data; + struct uartlite_reg_ops *reg_ops = port->private_data; reg_ops->out(val, port->membase + offset); } diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 9b04d72e752e..24d5491ef0da 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1367,12 +1367,12 @@ static ssize_t tty_line_name(struct tty_driver *driver, int index, char *p) * Locking: tty_mutex must be held. If the tty is found, bump the tty kref. */ static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver, - struct inode *inode, int idx) + struct file *file, int idx) { struct tty_struct *tty; if (driver->ops->lookup) - tty = driver->ops->lookup(driver, inode, idx); + tty = driver->ops->lookup(driver, file, idx); else tty = driver->ttys[idx]; @@ -2040,7 +2040,7 @@ static struct tty_struct *tty_open_by_driver(dev_t device, struct inode *inode, } /* check whether we're reopening an existing tty */ - tty = tty_driver_lookup_tty(driver, inode, index); + tty = tty_driver_lookup_tty(driver, filp, index); if (IS_ERR(tty)) { mutex_unlock(&tty_mutex); goto out; diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 83fd30b0577c..a6c4a1b895bd 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -744,11 +744,15 @@ static void acm_tty_flush_chars(struct tty_struct *tty) int err; unsigned long flags; + if (!cur) /* nothing to do */ + return; + acm->putbuffer = NULL; err = usb_autopm_get_interface_async(acm->control); spin_lock_irqsave(&acm->write_lock, flags); if (err < 0) { cur->use = 0; + acm->putbuffer = cur; goto out; } diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index f9d42cf23e55..7859d738df41 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -73,6 +73,15 @@ static void for_each_companion(struct pci_dev *pdev, struct usb_hcd *hcd, if (companion->bus != pdev->bus || PCI_SLOT(companion->devfn) != slot) continue; + + /* + * Companion device should be either UHCI,OHCI or EHCI host + * controller, otherwise skip. + */ + if (companion->class != CL_UHCI && companion->class != CL_OHCI && + companion->class != CL_EHCI) + continue; + companion_hcd = pci_get_drvdata(companion); if (!companion_hcd || !companion_hcd->self.root_hub) continue; diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index fa20f5a99d12..34277ced26bd 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1150,6 +1150,11 @@ static int dwc3_suspend(struct device *dev) phy_exit(dwc->usb2_generic_phy); phy_exit(dwc->usb3_generic_phy); + usb_phy_set_suspend(dwc->usb2_phy, 1); + usb_phy_set_suspend(dwc->usb3_phy, 1); + WARN_ON(phy_power_off(dwc->usb2_generic_phy) < 0); + WARN_ON(phy_power_off(dwc->usb3_generic_phy) < 0); + pinctrl_pm_select_sleep_state(dev); return 0; @@ -1163,11 +1168,21 @@ static int dwc3_resume(struct device *dev) pinctrl_pm_select_default_state(dev); + usb_phy_set_suspend(dwc->usb2_phy, 0); + usb_phy_set_suspend(dwc->usb3_phy, 0); + ret = phy_power_on(dwc->usb2_generic_phy); + if (ret < 0) + return ret; + + ret = phy_power_on(dwc->usb3_generic_phy); + if (ret < 0) + goto err_usb2phy_power; + usb_phy_init(dwc->usb3_phy); usb_phy_init(dwc->usb2_phy); ret = phy_init(dwc->usb2_generic_phy); if (ret < 0) - return ret; + goto err_usb3phy_power; ret = phy_init(dwc->usb3_generic_phy); if (ret < 0) @@ -1200,6 +1215,12 @@ static int dwc3_resume(struct device *dev) err_usb2phy_init: phy_exit(dwc->usb2_generic_phy); +err_usb3phy_power: + phy_power_off(dwc->usb3_generic_phy); + +err_usb2phy_power: + phy_power_off(dwc->usb2_generic_phy); + return ret; } diff --git a/drivers/usb/dwc3/debugfs.c b/drivers/usb/dwc3/debugfs.c index 9ac37fe1b6a7..cebf9e38b60a 100644 --- a/drivers/usb/dwc3/debugfs.c +++ b/drivers/usb/dwc3/debugfs.c @@ -645,7 +645,7 @@ int dwc3_debugfs_init(struct dwc3 *dwc) file = debugfs_create_regset32("regdump", S_IRUGO, root, dwc->regset); if (!file) { ret = -ENOMEM; - goto err1; + goto err2; } if (IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE)) { @@ -653,7 +653,7 @@ int dwc3_debugfs_init(struct dwc3 *dwc) dwc, &dwc3_mode_fops); if (!file) { ret = -ENOMEM; - goto err1; + goto err2; } } @@ -663,19 +663,22 @@ int dwc3_debugfs_init(struct dwc3 *dwc) dwc, &dwc3_testmode_fops); if (!file) { ret = -ENOMEM; - goto err1; + goto err2; } file = debugfs_create_file("link_state", S_IRUGO | S_IWUSR, root, dwc, &dwc3_link_state_fops); if (!file) { ret = -ENOMEM; - goto err1; + goto err2; } } return 0; +err2: + kfree(dwc->regset); + err1: debugfs_remove_recursive(root); @@ -686,5 +689,5 @@ err0: void dwc3_debugfs_exit(struct dwc3 *dwc) { debugfs_remove_recursive(dwc->root); - dwc->root = NULL; + kfree(dwc->regset); } diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 22e9606d8e08..55da2c7f727f 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -496,7 +496,7 @@ static int dwc3_omap_probe(struct platform_device *pdev) ret = pm_runtime_get_sync(dev); if (ret < 0) { dev_err(dev, "get_sync failed with err %d\n", ret); - goto err0; + goto err1; } dwc3_omap_map_offset(omap); @@ -516,28 +516,24 @@ static int dwc3_omap_probe(struct platform_device *pdev) ret = dwc3_omap_extcon_register(omap); if (ret < 0) - goto err2; + goto err1; ret = of_platform_populate(node, NULL, NULL, dev); if (ret) { dev_err(&pdev->dev, "failed to create dwc3 core\n"); - goto err3; + goto err2; } dwc3_omap_enable_irqs(omap); return 0; -err3: +err2: extcon_unregister_notifier(omap->edev, EXTCON_USB, &omap->vbus_nb); extcon_unregister_notifier(omap->edev, EXTCON_USB_HOST, &omap->id_nb); -err2: - dwc3_omap_disable_irqs(omap); err1: pm_runtime_put_sync(dev); - -err0: pm_runtime_disable(dev); return ret; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d54a028cdfeb..8e4a1b195e9b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2936,6 +2936,9 @@ void dwc3_gadget_exit(struct dwc3 *dwc) int dwc3_gadget_suspend(struct dwc3 *dwc) { + if (!dwc->gadget_driver) + return 0; + if (dwc->pullups_connected) { dwc3_gadget_disable_irq(dwc); dwc3_gadget_run_stop(dwc, true, true); @@ -2954,6 +2957,9 @@ int dwc3_gadget_resume(struct dwc3 *dwc) struct dwc3_ep *dep; int ret; + if (!dwc->gadget_driver) + return 0; + /* Start with SuperSpeed Default */ dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512); diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index de9ffd60fcfa..524e233d48de 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -651,6 +651,8 @@ static int bos_desc(struct usb_composite_dev *cdev) ssp_cap->bLength = USB_DT_USB_SSP_CAP_SIZE(1); ssp_cap->bDescriptorType = USB_DT_DEVICE_CAPABILITY; ssp_cap->bDevCapabilityType = USB_SSP_CAP_TYPE; + ssp_cap->bReserved = 0; + ssp_cap->wReserved = 0; /* SSAC = 1 (2 attributes) */ ssp_cap->bmAttributes = cpu_to_le32(1); diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index e21ca2bd6839..15b648cbc75c 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -646,6 +646,7 @@ static void ffs_user_copy_worker(struct work_struct *work) work); int ret = io_data->req->status ? io_data->req->status : io_data->req->actual; + bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD; if (io_data->read && ret > 0) { use_mm(io_data->mm); @@ -657,13 +658,11 @@ static void ffs_user_copy_worker(struct work_struct *work) io_data->kiocb->ki_complete(io_data->kiocb, ret, ret); - if (io_data->ffs->ffs_eventfd && - !(io_data->kiocb->ki_flags & IOCB_EVENTFD)) + if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd) eventfd_signal(io_data->ffs->ffs_eventfd, 1); usb_ep_free_request(io_data->ep, io_data->req); - io_data->kiocb->private = NULL; if (io_data->read) kfree(io_data->to_free); kfree(io_data->buf); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 80c1de239e9a..bad0d1f9a41d 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1861,6 +1861,12 @@ no_bw: kfree(xhci->rh_bw); kfree(xhci->ext_caps); + xhci->usb2_ports = NULL; + xhci->usb3_ports = NULL; + xhci->port_array = NULL; + xhci->rh_bw = NULL; + xhci->ext_caps = NULL; + xhci->page_size = 0; xhci->page_shift = 0; xhci->bus_state[0].bus_suspended = 0; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index f0640b7a1c42..48672fac7ff3 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -48,6 +48,7 @@ #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI 0xa12f #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI 0x9d2f #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI 0x0aa8 +#define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8 static const char hcd_name[] = "xhci_hcd"; @@ -155,7 +156,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) (pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && @@ -302,6 +304,7 @@ static void xhci_pci_remove(struct pci_dev *dev) struct xhci_hcd *xhci; xhci = hcd_to_xhci(pci_get_drvdata(dev)); + xhci->xhc_state |= XHCI_STATE_REMOVING; if (xhci->shared_hcd) { usb_remove_hcd(xhci->shared_hcd); usb_put_hcd(xhci->shared_hcd); diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 5c15e9bc5f7a..474b5fa14900 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -39,12 +39,25 @@ static const struct xhci_driver_overrides xhci_plat_overrides __initconst = { static void xhci_plat_quirks(struct device *dev, struct xhci_hcd *xhci) { + struct usb_hcd *hcd = xhci_to_hcd(xhci); + /* * As of now platform drivers don't provide MSI support so we ensure * here that the generic code does not try to make a pci_dev from our * dev struct in order to setup MSI */ xhci->quirks |= XHCI_PLAT; + + /* + * On R-Car Gen2 and Gen3, the AC64 bit (bit 0) of HCCPARAMS1 is set + * to 1. However, these SoCs don't support 64-bit address memory + * pointers. So, this driver clears the AC64 bit of xhci->hcc_params + * to call dma_set_coherent_mask(dev, DMA_BIT_MASK(32)) in + * xhci_gen_setup(). + */ + if (xhci_plat_type_is(hcd, XHCI_PLAT_TYPE_RENESAS_RCAR_GEN2) || + xhci_plat_type_is(hcd, XHCI_PLAT_TYPE_RENESAS_RCAR_GEN3)) + xhci->quirks |= XHCI_NO_64BIT_SUPPORT; } /* called during probe() after chip reset completes */ diff --git a/drivers/usb/host/xhci-plat.h b/drivers/usb/host/xhci-plat.h index 5a2e2e3936c4..529c3c40f901 100644 --- a/drivers/usb/host/xhci-plat.h +++ b/drivers/usb/host/xhci-plat.h @@ -14,7 +14,7 @@ #include "xhci.h" /* for hcd_to_xhci() */ enum xhci_plat_type { - XHCI_PLAT_TYPE_MARVELL_ARMADA, + XHCI_PLAT_TYPE_MARVELL_ARMADA = 1, XHCI_PLAT_TYPE_RENESAS_RCAR_GEN2, XHCI_PLAT_TYPE_RENESAS_RCAR_GEN3, }; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 7cf66212ceae..99b4ff42f7a0 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -4004,7 +4004,8 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd, int reserved_trbs = xhci->cmd_ring_reserved_trbs; int ret; - if (xhci->xhc_state) { + if ((xhci->xhc_state & XHCI_STATE_DYING) || + (xhci->xhc_state & XHCI_STATE_HALTED)) { xhci_dbg(xhci, "xHCI dying or halted, can't queue_command\n"); return -ESHUTDOWN; } diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index d51ee0c3cf9f..9e71c96ad74a 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -147,7 +147,8 @@ static int xhci_start(struct xhci_hcd *xhci) "waited %u microseconds.\n", XHCI_MAX_HALT_USEC); if (!ret) - xhci->xhc_state &= ~(XHCI_STATE_HALTED | XHCI_STATE_DYING); + /* clear state flags. Including dying, halted or removing */ + xhci->xhc_state = 0; return ret; } @@ -1108,8 +1109,8 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) /* Resume root hubs only when have pending events. */ status = readl(&xhci->op_regs->status); if (status & STS_EINT) { - usb_hcd_resume_root_hub(hcd); usb_hcd_resume_root_hub(xhci->shared_hcd); + usb_hcd_resume_root_hub(hcd); } } @@ -1124,10 +1125,10 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) /* Re-enable port polling. */ xhci_dbg(xhci, "%s: starting port polling.\n", __func__); - set_bit(HCD_FLAG_POLL_RH, &hcd->flags); - usb_hcd_poll_rh_status(hcd); set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); usb_hcd_poll_rh_status(xhci->shared_hcd); + set_bit(HCD_FLAG_POLL_RH, &hcd->flags); + usb_hcd_poll_rh_status(hcd); return retval; } @@ -2773,7 +2774,8 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) if (ret <= 0) return ret; xhci = hcd_to_xhci(hcd); - if (xhci->xhc_state & XHCI_STATE_DYING) + if ((xhci->xhc_state & XHCI_STATE_DYING) || + (xhci->xhc_state & XHCI_STATE_REMOVING)) return -ENODEV; xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev); @@ -3820,7 +3822,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_lock(&xhci->mutex); - if (xhci->xhc_state) /* dying or halted */ + if (xhci->xhc_state) /* dying, removing or halted */ goto out; if (!udev->slot_id) { @@ -4948,6 +4950,16 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) return retval; xhci_dbg(xhci, "Reset complete\n"); + /* + * On some xHCI controllers (e.g. R-Car SoCs), the AC64 bit (bit 0) + * of HCCPARAMS1 is set to 1. However, the xHCs don't support 64-bit + * address memory pointers actually. So, this driver clears the AC64 + * bit of xhci->hcc_params to call dma_set_coherent_mask(dev, + * DMA_BIT_MASK(32)) in this xhci_gen_setup(). + */ + if (xhci->quirks & XHCI_NO_64BIT_SUPPORT) + xhci->hcc_params &= ~BIT(0); + /* Set dma_mask and coherent_dma_mask to 64-bits, * if xHC supports 64-bit addressing */ if (HCC_64BIT_ADDR(xhci->hcc_params) && diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index e293e0974f48..6c629c97f8ad 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1605,6 +1605,7 @@ struct xhci_hcd { */ #define XHCI_STATE_DYING (1 << 0) #define XHCI_STATE_HALTED (1 << 1) +#define XHCI_STATE_REMOVING (1 << 2) /* Statistics */ int error_bitmask; unsigned int quirks; @@ -1641,6 +1642,7 @@ struct xhci_hcd { #define XHCI_PME_STUCK_QUIRK (1 << 20) #define XHCI_MTK_HOST (1 << 21) #define XHCI_SSIC_PORT_UNUSED (1 << 22) +#define XHCI_NO_64BIT_SUPPORT (1 << 23) unsigned int num_active_eps; unsigned int limit_active_eps; /* There are two roothubs to keep track of bus suspend info for */ diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 13e4cc31bc79..16bc679dc2fc 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -2,7 +2,7 @@ * USB Attached SCSI * Note that this is not the same as the USB Mass Storage driver * - * Copyright Hans de Goede <hdegoede@redhat.com> for Red Hat, Inc. 2013 - 2014 + * Copyright Hans de Goede <hdegoede@redhat.com> for Red Hat, Inc. 2013 - 2016 * Copyright Matthew Wilcox for Intel Corp, 2010 * Copyright Sarah Sharp for Intel Corp, 2010 * @@ -781,6 +781,17 @@ static int uas_eh_bus_reset_handler(struct scsi_cmnd *cmnd) return SUCCESS; } +static int uas_target_alloc(struct scsi_target *starget) +{ + struct uas_dev_info *devinfo = (struct uas_dev_info *) + dev_to_shost(starget->dev.parent)->hostdata; + + if (devinfo->flags & US_FL_NO_REPORT_LUNS) + starget->no_report_luns = 1; + + return 0; +} + static int uas_slave_alloc(struct scsi_device *sdev) { struct uas_dev_info *devinfo = @@ -824,7 +835,6 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_BROKEN_FUA) sdev->broken_fua = 1; - scsi_change_queue_depth(sdev, devinfo->qdepth - 2); return 0; } @@ -832,6 +842,7 @@ static struct scsi_host_template uas_host_template = { .module = THIS_MODULE, .name = "uas", .queuecommand = uas_queuecommand, + .target_alloc = uas_target_alloc, .slave_alloc = uas_slave_alloc, .slave_configure = uas_slave_configure, .eh_abort_handler = uas_eh_abort_handler, @@ -956,6 +967,12 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) if (result) goto set_alt0; + /* + * 1 tag is reserved for untagged commands + + * 1 tag to avoid off by one errors in some bridge firmwares + */ + shost->can_queue = devinfo->qdepth - 2; + usb_set_intfdata(intf, shost); result = scsi_add_host(shost, &intf->dev); if (result) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index ccc113e83d88..53341a77d89f 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -64,6 +64,13 @@ UNUSUAL_DEV(0x0bc2, 0x3312, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_ATA_1X), +/* Reported-by: David Webb <djw@noc.ac.uk> */ +UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, + "Seagate", + "Expansion Desk", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_REPORT_LUNS), + /* Reported-by: Hans de Goede <hdegoede@redhat.com> */ UNUSUAL_DEV(0x0bc2, 0x3320, 0x0000, 0x9999, "Seagate", diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 43576ed31ccd..9de988a0f856 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -482,7 +482,7 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 | US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE | US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES | - US_FL_MAX_SECTORS_240); + US_FL_MAX_SECTORS_240 | US_FL_NO_REPORT_LUNS); p = quirks; while (*p) { @@ -532,6 +532,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) case 'i': f |= US_FL_IGNORE_DEVICE; break; + case 'j': + f |= US_FL_NO_REPORT_LUNS; + break; case 'l': f |= US_FL_NOT_LOCKABLE; break; diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c index fe274b5851c7..93e66a9148b9 100644 --- a/drivers/video/fbdev/amba-clcd.c +++ b/drivers/video/fbdev/amba-clcd.c @@ -440,13 +440,14 @@ static int clcdfb_register(struct clcd_fb *fb) fb->off_ienb = CLCD_PL111_IENB; fb->off_cntl = CLCD_PL111_CNTL; } else { -#ifdef CONFIG_ARCH_VERSATILE - fb->off_ienb = CLCD_PL111_IENB; - fb->off_cntl = CLCD_PL111_CNTL; -#else - fb->off_ienb = CLCD_PL110_IENB; - fb->off_cntl = CLCD_PL110_CNTL; -#endif + if (of_machine_is_compatible("arm,versatile-ab") || + of_machine_is_compatible("arm,versatile-pb")) { + fb->off_ienb = CLCD_PL111_IENB; + fb->off_cntl = CLCD_PL111_CNTL; + } else { + fb->off_ienb = CLCD_PL110_IENB; + fb->off_cntl = CLCD_PL110_CNTL; + } } fb->clk = clk_get(&fb->dev->dev, NULL); diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c index abfd1f6e3327..1954ec913ce5 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c @@ -200,20 +200,16 @@ static struct omap_dss_driver sharp_ls_ops = { static int sharp_ls_get_gpio(struct device *dev, int gpio, unsigned long flags, char *desc, struct gpio_desc **gpiod) { - struct gpio_desc *gd; int r; - *gpiod = NULL; - r = devm_gpio_request_one(dev, gpio, flags, desc); - if (r) + if (r) { + *gpiod = NULL; return r == -ENOENT ? 0 : r; + } - gd = gpio_to_desc(gpio); - if (IS_ERR(gd)) - return PTR_ERR(gd) == -ENOENT ? 0 : PTR_ERR(gd); + *gpiod = gpio_to_desc(gpio); - *gpiod = gd; return 0; } diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 7f5804537d30..2fc8c43ce531 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -26,6 +26,7 @@ #include <linux/ratelimit.h> #include <linux/bio.h> #include <linux/dcache.h> +#include <linux/namei.h> #include <linux/fscrypto.h> #include <linux/ecryptfs.h> @@ -81,13 +82,14 @@ EXPORT_SYMBOL(fscrypt_release_ctx); /** * fscrypt_get_ctx() - Gets an encryption context * @inode: The inode for which we are doing the crypto + * @gfp_flags: The gfp flag for memory allocation * * Allocates and initializes an encryption context. * * Return: An allocated and initialized encryption context on success; error * value or NULL otherwise. */ -struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode) +struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags) { struct fscrypt_ctx *ctx = NULL; struct fscrypt_info *ci = inode->i_crypt_info; @@ -113,7 +115,7 @@ struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode) list_del(&ctx->free_list); spin_unlock_irqrestore(&fscrypt_ctx_lock, flags); if (!ctx) { - ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, GFP_NOFS); + ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, gfp_flags); if (!ctx) return ERR_PTR(-ENOMEM); ctx->flags |= FS_CTX_REQUIRES_FREE_ENCRYPT_FL; @@ -147,7 +149,8 @@ typedef enum { static int do_page_crypto(struct inode *inode, fscrypt_direction_t rw, pgoff_t index, - struct page *src_page, struct page *dest_page) + struct page *src_page, struct page *dest_page, + gfp_t gfp_flags) { u8 xts_tweak[FS_XTS_TWEAK_SIZE]; struct skcipher_request *req = NULL; @@ -157,7 +160,7 @@ static int do_page_crypto(struct inode *inode, struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; - req = skcipher_request_alloc(tfm, GFP_NOFS); + req = skcipher_request_alloc(tfm, gfp_flags); if (!req) { printk_ratelimited(KERN_ERR "%s: crypto_request_alloc() failed\n", @@ -199,10 +202,9 @@ static int do_page_crypto(struct inode *inode, return 0; } -static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx) +static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags) { - ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, - GFP_NOWAIT); + ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); if (ctx->w.bounce_page == NULL) return ERR_PTR(-ENOMEM); ctx->flags |= FS_WRITE_PATH_FL; @@ -213,6 +215,7 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx) * fscypt_encrypt_page() - Encrypts a page * @inode: The inode for which the encryption should take place * @plaintext_page: The page to encrypt. Must be locked. + * @gfp_flags: The gfp flag for memory allocation * * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx * encryption context. @@ -225,7 +228,7 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx) * error value or NULL. */ struct page *fscrypt_encrypt_page(struct inode *inode, - struct page *plaintext_page) + struct page *plaintext_page, gfp_t gfp_flags) { struct fscrypt_ctx *ctx; struct page *ciphertext_page = NULL; @@ -233,18 +236,19 @@ struct page *fscrypt_encrypt_page(struct inode *inode, BUG_ON(!PageLocked(plaintext_page)); - ctx = fscrypt_get_ctx(inode); + ctx = fscrypt_get_ctx(inode, gfp_flags); if (IS_ERR(ctx)) return (struct page *)ctx; /* The encryption operation will require a bounce page. */ - ciphertext_page = alloc_bounce_page(ctx); + ciphertext_page = alloc_bounce_page(ctx, gfp_flags); if (IS_ERR(ciphertext_page)) goto errout; ctx->w.control_page = plaintext_page; err = do_page_crypto(inode, FS_ENCRYPT, plaintext_page->index, - plaintext_page, ciphertext_page); + plaintext_page, ciphertext_page, + gfp_flags); if (err) { ciphertext_page = ERR_PTR(err); goto errout; @@ -275,7 +279,7 @@ int fscrypt_decrypt_page(struct page *page) BUG_ON(!PageLocked(page)); return do_page_crypto(page->mapping->host, - FS_DECRYPT, page->index, page, page); + FS_DECRYPT, page->index, page, page, GFP_NOFS); } EXPORT_SYMBOL(fscrypt_decrypt_page); @@ -289,11 +293,11 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk, BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE); - ctx = fscrypt_get_ctx(inode); + ctx = fscrypt_get_ctx(inode, GFP_NOFS); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ciphertext_page = alloc_bounce_page(ctx); + ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT); if (IS_ERR(ciphertext_page)) { err = PTR_ERR(ciphertext_page); goto errout; @@ -301,11 +305,12 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk, while (len--) { err = do_page_crypto(inode, FS_ENCRYPT, lblk, - ZERO_PAGE(0), ciphertext_page); + ZERO_PAGE(0), ciphertext_page, + GFP_NOFS); if (err) goto errout; - bio = bio_alloc(GFP_KERNEL, 1); + bio = bio_alloc(GFP_NOWAIT, 1); if (!bio) { err = -ENOMEM; goto errout; @@ -345,13 +350,20 @@ EXPORT_SYMBOL(fscrypt_zeroout_range); */ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) { - struct inode *dir = d_inode(dentry->d_parent); - struct fscrypt_info *ci = dir->i_crypt_info; + struct dentry *dir; + struct fscrypt_info *ci; int dir_has_key, cached_with_key; - if (!dir->i_sb->s_cop->is_encrypted(dir)) + if (flags & LOOKUP_RCU) + return -ECHILD; + + dir = dget_parent(dentry); + if (!d_inode(dir)->i_sb->s_cop->is_encrypted(d_inode(dir))) { + dput(dir); return 0; + } + ci = d_inode(dir)->i_crypt_info; if (ci && ci->ci_keyring_key && (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED) | @@ -363,6 +375,7 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY; spin_unlock(&dentry->d_lock); dir_has_key = (ci != NULL); + dput(dir); /* * If the dentry was cached without the key, and it is a diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index bece948b363d..8580831ed237 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -457,7 +457,7 @@ struct dentry *debugfs_create_automount(const char *name, if (unlikely(!inode)) return failed_creating(dentry); - inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + make_empty_dir_inode(inode); inode->i_flags |= S_AUTOMOUNT; inode->i_private = data; dentry->d_fsdata = (void *)f; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 655f21f99160..0b2954d7172d 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -128,6 +128,7 @@ static const match_table_t tokens = { struct pts_fs_info { struct ida allocated_ptys; struct pts_mount_opts mount_opts; + struct super_block *sb; struct dentry *ptmx_dentry; }; @@ -358,7 +359,7 @@ static const struct super_operations devpts_sops = { .show_options = devpts_show_options, }; -static void *new_pts_fs_info(void) +static void *new_pts_fs_info(struct super_block *sb) { struct pts_fs_info *fsi; @@ -369,6 +370,7 @@ static void *new_pts_fs_info(void) ida_init(&fsi->allocated_ptys); fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE; fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; + fsi->sb = sb; return fsi; } @@ -384,7 +386,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent) s->s_op = &devpts_sops; s->s_time_gran = 1; - s->s_fs_info = new_pts_fs_info(); + s->s_fs_info = new_pts_fs_info(s); if (!s->s_fs_info) goto fail; @@ -524,17 +526,14 @@ static struct file_system_type devpts_fs_type = { * to the System V naming convention */ -int devpts_new_index(struct inode *ptmx_inode) +int devpts_new_index(struct pts_fs_info *fsi) { - struct super_block *sb = pts_sb_from_inode(ptmx_inode); - struct pts_fs_info *fsi; int index; int ida_ret; - if (!sb) + if (!fsi) return -ENODEV; - fsi = DEVPTS_SB(sb); retry: if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL)) return -ENOMEM; @@ -564,11 +563,8 @@ retry: return index; } -void devpts_kill_index(struct inode *ptmx_inode, int idx) +void devpts_kill_index(struct pts_fs_info *fsi, int idx) { - struct super_block *sb = pts_sb_from_inode(ptmx_inode); - struct pts_fs_info *fsi = DEVPTS_SB(sb); - mutex_lock(&allocated_ptys_lock); ida_remove(&fsi->allocated_ptys, idx); pty_count--; @@ -578,21 +574,25 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx) /* * pty code needs to hold extra references in case of last /dev/tty close */ - -void devpts_add_ref(struct inode *ptmx_inode) +struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file) { - struct super_block *sb = pts_sb_from_inode(ptmx_inode); + struct super_block *sb; + struct pts_fs_info *fsi; + + sb = pts_sb_from_inode(ptmx_inode); + if (!sb) + return NULL; + fsi = DEVPTS_SB(sb); + if (!fsi) + return NULL; atomic_inc(&sb->s_active); - ihold(ptmx_inode); + return fsi; } -void devpts_del_ref(struct inode *ptmx_inode) +void devpts_put_ref(struct pts_fs_info *fsi) { - struct super_block *sb = pts_sb_from_inode(ptmx_inode); - - iput(ptmx_inode); - deactivate_super(sb); + deactivate_super(fsi->sb); } /** @@ -604,22 +604,20 @@ void devpts_del_ref(struct inode *ptmx_inode) * * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill. */ -struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, - void *priv) +struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) { struct dentry *dentry; - struct super_block *sb = pts_sb_from_inode(ptmx_inode); + struct super_block *sb; struct inode *inode; struct dentry *root; - struct pts_fs_info *fsi; struct pts_mount_opts *opts; char s[12]; - if (!sb) + if (!fsi) return ERR_PTR(-ENODEV); + sb = fsi->sb; root = sb->s_root; - fsi = DEVPTS_SB(sb); opts = &fsi->mount_opts; inode = new_inode(sb); @@ -630,25 +628,21 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, inode->i_uid = opts->setuid ? opts->uid : current_fsuid(); inode->i_gid = opts->setgid ? opts->gid : current_fsgid(); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - init_special_inode(inode, S_IFCHR|opts->mode, device); - inode->i_private = priv; + init_special_inode(inode, S_IFCHR|opts->mode, MKDEV(UNIX98_PTY_SLAVE_MAJOR, index)); sprintf(s, "%d", index); - inode_lock(d_inode(root)); - dentry = d_alloc_name(root, s); if (dentry) { + dentry->d_fsdata = priv; d_add(dentry, inode); fsnotify_create(d_inode(root), dentry); } else { iput(inode); - inode = ERR_PTR(-ENOMEM); + dentry = ERR_PTR(-ENOMEM); } - inode_unlock(d_inode(root)); - - return inode; + return dentry; } /** @@ -657,24 +651,10 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, * * Returns whatever was passed as priv in devpts_pty_new for a given inode. */ -void *devpts_get_priv(struct inode *pts_inode) +void *devpts_get_priv(struct dentry *dentry) { - struct dentry *dentry; - void *priv = NULL; - - BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); - - /* Ensure dentry has not been deleted by devpts_pty_kill() */ - dentry = d_find_alias(pts_inode); - if (!dentry) - return NULL; - - if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) - priv = pts_inode->i_private; - - dput(dentry); - - return priv; + WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC); + return dentry->d_fsdata; } /** @@ -683,24 +663,14 @@ void *devpts_get_priv(struct inode *pts_inode) * * This is an inverse operation of devpts_pty_new. */ -void devpts_pty_kill(struct inode *inode) +void devpts_pty_kill(struct dentry *dentry) { - struct super_block *sb = pts_sb_from_inode(inode); - struct dentry *root = sb->s_root; - struct dentry *dentry; + WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC); - BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); - - inode_lock(d_inode(root)); - - dentry = d_find_alias(inode); - - drop_nlink(inode); + dentry->d_fsdata = NULL; + drop_nlink(dentry->d_inode); d_delete(dentry); dput(dentry); /* d_alloc_name() in devpts_pty_new() */ - dput(dentry); /* d_find_alias above */ - - inode_unlock(d_inode(root)); } static int __init init_devpts_fs(void) diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index db9ae6e18154..6a6c27373b54 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -32,6 +32,7 @@ #include <linux/random.h> #include <linux/scatterlist.h> #include <linux/spinlock_types.h> +#include <linux/namei.h> #include "ext4_extents.h" #include "xattr.h" @@ -482,6 +483,9 @@ static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) struct ext4_crypt_info *ci; int dir_has_key, cached_with_key; + if (flags & LOOKUP_RCU) + return -ECHILD; + dir = dget_parent(dentry); if (!ext4_encrypted_inode(d_inode(dir))) { dput(dir); diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 4173bfe21114..561d7308b393 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -150,11 +150,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) while (ctx->pos < inode->i_size) { struct ext4_map_blocks map; - if (fatal_signal_pending(current)) { - err = -ERESTARTSYS; - goto errout; - } - cond_resched(); map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); map.m_len = 1; err = ext4_map_blocks(NULL, inode, &map, 0); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index db98f89f737f..48e4b8907826 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1107,11 +1107,6 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, } while (1) { - if (signal_pending(current)) { - err = -ERESTARTSYS; - goto errout; - } - cond_resched(); block = dx_get_block(frame->at); ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, start_hash, start_minor_hash); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 53fec0872e60..5dafb9cef12e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -992,7 +992,7 @@ submit_and_realloc: if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - ctx = fscrypt_get_ctx(inode); + ctx = fscrypt_get_ctx(inode, GFP_NOFS); if (IS_ERR(ctx)) goto set_error_page; @@ -1092,14 +1092,24 @@ int do_write_data_page(struct f2fs_io_info *fio) } if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + gfp_t gfp_flags = GFP_NOFS; /* wait for GCed encrypted page writeback */ f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode), fio->old_blkaddr); - - fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page); +retry_encrypt: + fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, + gfp_flags); if (IS_ERR(fio->encrypted_page)) { err = PTR_ERR(fio->encrypted_page); + if (err == -ENOMEM) { + /* flush pending ios and wait for a while */ + f2fs_flush_merged_bios(F2FS_I_SB(inode)); + congestion_wait(BLK_RW_ASYNC, HZ/50); + gfp_flags |= __GFP_NOFAIL; + err = 0; + goto retry_encrypt; + } goto out_writepage; } } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 443e07705c2a..90d1157a09f9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -441,7 +441,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) static int f2fs_file_open(struct inode *inode, struct file *filp) { int ret = generic_file_open(inode, filp); - struct inode *dir = filp->f_path.dentry->d_parent->d_inode; + struct dentry *dir; if (!ret && f2fs_encrypted_inode(inode)) { ret = fscrypt_get_encryption_info(inode); @@ -450,9 +450,13 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (!fscrypt_has_encryption_key(inode)) return -ENOKEY; } - if (f2fs_encrypted_inode(dir) && - !fscrypt_has_permitted_context(dir, inode)) + dir = dget_parent(file_dentry(filp)); + if (f2fs_encrypted_inode(d_inode(dir)) && + !fscrypt_has_permitted_context(d_inode(dir), inode)) { + dput(dir); return -EPERM; + } + dput(dir); return ret; } diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index d07a2f91d858..8b252673d454 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -47,7 +47,7 @@ void quota_send_warning(struct kqid qid, dev_t dev, void *msg_head; int ret; int msg_size = 4 * nla_total_size(sizeof(u32)) + - 2 * nla_total_size(sizeof(u64)); + 2 * nla_total_size_64bit(sizeof(u64)); /* We have to allocate using GFP_NOFS as we are called from a * filesystem performing write and thus further recursion into @@ -68,8 +68,9 @@ void quota_send_warning(struct kqid qid, dev_t dev, ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, qid.type); if (ret) goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, - from_kqid_munged(&init_user_ns, qid)); + ret = nla_put_u64_64bit(skb, QUOTA_NL_A_EXCESS_ID, + from_kqid_munged(&init_user_ns, qid), + QUOTA_NL_A_PAD); if (ret) goto attr_err_out; ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); @@ -81,8 +82,9 @@ void quota_send_warning(struct kqid qid, dev_t dev, ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); if (ret) goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, - from_kuid_munged(&init_user_ns, current_uid())); + ret = nla_put_u64_64bit(skb, QUOTA_NL_A_CAUSED_ID, + from_kuid_munged(&init_user_ns, current_uid()), + QUOTA_NL_A_PAD); if (ret) goto attr_err_out; genlmsg_end(skb, msg_head); diff --git a/fs/seq_file.c b/fs/seq_file.c index e85664b7c7d9..19f532e7d35e 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -72,9 +72,10 @@ int seq_open(struct file *file, const struct seq_operations *op) mutex_init(&p->lock); p->op = op; -#ifdef CONFIG_USER_NS - p->user_ns = file->f_cred->user_ns; -#endif + + // No refcounting: the lifetime of 'p' is constrained + // to the lifetime of the file. + p->file = file; /* * Wrappers around seq_open(e.g. swaps_open) need to be diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index e56272c919b5..bf2d34c9d804 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -108,11 +108,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 val; preempt_disable(); - if (unlikely(get_user(val, uaddr) != 0)) + if (unlikely(get_user(val, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } - if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) + if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } *uval = val; preempt_enable(); diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index 461a0558bca4..cebecff536a3 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -39,6 +39,8 @@ static inline bool drm_arch_can_wc_memory(void) { #if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE) return false; +#elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3) + return false; #else return true; #endif diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b2365a6eba3d..f63afdc43bec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -66,6 +66,11 @@ enum bpf_arg_type { * functions that access data on eBPF program stack */ ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ + ARG_PTR_TO_RAW_STACK, /* any pointer to eBPF program stack, area does not + * need to be initialized, helper function must fill + * all bytes or clear them in error case. + */ + ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */ @@ -164,7 +169,9 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_fd_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); + const struct bpf_func_proto *bpf_get_trace_printk_proto(void); +const struct bpf_func_proto *bpf_get_event_output_proto(void); #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index e0ee0b3000b2..5871f292b596 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -15,37 +15,23 @@ #include <linux/errno.h> +struct pts_fs_info; + #ifdef CONFIG_UNIX98_PTYS -int devpts_new_index(struct inode *ptmx_inode); -void devpts_kill_index(struct inode *ptmx_inode, int idx); -void devpts_add_ref(struct inode *ptmx_inode); -void devpts_del_ref(struct inode *ptmx_inode); +/* Look up a pts fs info and get a ref to it */ +struct pts_fs_info *devpts_get_ref(struct inode *, struct file *); +void devpts_put_ref(struct pts_fs_info *); + +int devpts_new_index(struct pts_fs_info *); +void devpts_kill_index(struct pts_fs_info *, int); + /* mknod in devpts */ -struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, - void *priv); +struct dentry *devpts_pty_new(struct pts_fs_info *, int, void *); /* get private structure */ -void *devpts_get_priv(struct inode *pts_inode); +void *devpts_get_priv(struct dentry *); /* unlink */ -void devpts_pty_kill(struct inode *inode); - -#else - -/* Dummy stubs in the no-pty case */ -static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; } -static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { } -static inline void devpts_add_ref(struct inode *ptmx_inode) { } -static inline void devpts_del_ref(struct inode *ptmx_inode) { } -static inline struct inode *devpts_pty_new(struct inode *ptmx_inode, - dev_t device, int index, void *priv) -{ - return ERR_PTR(-EINVAL); -} -static inline void *devpts_get_priv(struct inode *pts_inode) -{ - return NULL; -} -static inline void devpts_pty_kill(struct inode *inode) { } +void devpts_pty_kill(struct dentry *); #endif diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e2b7bf27c03e..9ded8c6d8176 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -150,6 +150,13 @@ extern int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); +void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, + u32 legacy_u32); + +/* return false if src had higher bits set. lower bits always updated. */ +bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, + const unsigned long *src); + /** * struct ethtool_ops - optional netdev operations * @get_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h index cd91f75de49b..6027f6bbb061 100644 --- a/include/linux/fscrypto.h +++ b/include/linux/fscrypto.h @@ -263,9 +263,9 @@ static inline void fscrypt_set_d_op(struct dentry *dentry) extern struct kmem_cache *fscrypt_info_cachep; int fscrypt_initialize(void); -extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *); +extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t); extern void fscrypt_release_ctx(struct fscrypt_ctx *); -extern struct page *fscrypt_encrypt_page(struct inode *, struct page *); +extern struct page *fscrypt_encrypt_page(struct inode *, struct page *, gfp_t); extern int fscrypt_decrypt_page(struct page *); extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); extern void fscrypt_pullback_bio_page(struct page **, bool); @@ -299,7 +299,8 @@ extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, #endif /* crypto.c */ -static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i) +static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i, + gfp_t f) { return ERR_PTR(-EOPNOTSUPP); } @@ -310,7 +311,7 @@ static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c) } static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i, - struct page *p) + struct page *p, gfp_t f) { return ERR_PTR(-EOPNOTSUPP); } diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index d3e415674dac..acedbb68a5a3 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -47,6 +47,7 @@ #define IEEE802154_ADDR_SHORT_UNSPEC 0xfffe #define IEEE802154_EXTENDED_ADDR_LEN 8 +#define IEEE802154_SHORT_ADDR_LEN 2 #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 @@ -218,6 +219,7 @@ enum { /* frame control handling */ #define IEEE802154_FCTL_FTYPE 0x0003 #define IEEE802154_FCTL_ACKREQ 0x0020 +#define IEEE802154_FCTL_SECEN 0x0004 #define IEEE802154_FCTL_INTRA_PAN 0x0040 #define IEEE802154_FTYPE_DATA 0x0001 @@ -233,6 +235,15 @@ static inline int ieee802154_is_data(__le16 fc) } /** + * ieee802154_is_secen - check if Security bit is set + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee802154_is_secen(__le16 fc) +{ + return fc & cpu_to_le16(IEEE802154_FCTL_SECEN); +} + +/** * ieee802154_is_ackreq - check if acknowledgment request bit is set * @fc: frame control bytes in little-endian byteorder */ @@ -260,17 +271,17 @@ static inline bool ieee802154_is_intra_pan(__le16 fc) * * @len: psdu len with (MHR + payload + MFR) */ -static inline bool ieee802154_is_valid_psdu_len(const u8 len) +static inline bool ieee802154_is_valid_psdu_len(u8 len) { return (len == IEEE802154_ACK_PSDU_LEN || (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU)); } /** - * ieee802154_is_valid_psdu_len - check if extended addr is valid + * ieee802154_is_valid_extended_unicast_addr - check if extended addr is valid * @addr: extended addr to check */ -static inline bool ieee802154_is_valid_extended_unicast_addr(const __le64 addr) +static inline bool ieee802154_is_valid_extended_unicast_addr(__le64 addr) { /* Bail out if the address is all zero, or if the group * address bit is set. @@ -280,6 +291,34 @@ static inline bool ieee802154_is_valid_extended_unicast_addr(const __le64 addr) } /** + * ieee802154_is_broadcast_short_addr - check if short addr is broadcast + * @addr: short addr to check + */ +static inline bool ieee802154_is_broadcast_short_addr(__le16 addr) +{ + return (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST)); +} + +/** + * ieee802154_is_unspec_short_addr - check if short addr is unspecified + * @addr: short addr to check + */ +static inline bool ieee802154_is_unspec_short_addr(__le16 addr) +{ + return (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC)); +} + +/** + * ieee802154_is_valid_src_short_addr - check if source short address is valid + * @addr: short addr to check + */ +static inline bool ieee802154_is_valid_src_short_addr(__le16 addr) +{ + return !(ieee802154_is_broadcast_short_addr(addr) || + ieee802154_is_unspec_short_addr(addr)); +} + +/** * ieee802154_random_extended_addr - generates a random extended address * @addr: extended addr pointer to place the random address */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7edc14fb66b6..58d6e158755f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -63,7 +63,8 @@ struct ipv6_devconf { } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; - void *sysctl; + + struct ctl_table_header *sysctl_header; }; struct ipv6_params { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8541a913f6a3..d1f904c8b2cb 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -828,6 +828,11 @@ struct mlx4_vf_dev { u8 n_ports; }; +enum mlx4_pci_status { + MLX4_PCI_STATUS_DISABLED, + MLX4_PCI_STATUS_ENABLED, +}; + struct mlx4_dev_persistent { struct pci_dev *pdev; struct mlx4_dev *dev; @@ -841,6 +846,8 @@ struct mlx4_dev_persistent { u8 state; struct mutex interface_state_mutex; /* protect SW state */ u8 interface_state; + struct mutex pci_status_mutex; /* sync pci state */ + enum mlx4_pci_status pci_status; }; struct mlx4_dev { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8156e3c9239c..6bd429b53b77 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -59,6 +59,7 @@ #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8) #define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8) #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32) +#define MLX5_ST_SZ_QW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 64) #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) @@ -644,7 +645,9 @@ struct mlx5_err_cqe { }; struct mlx5_cqe64 { - u8 rsvd0[4]; + u8 outer_l3_tunneled; + u8 rsvd0; + __be16 wqe_id; u8 lro_tcppsh_abort_dupack; u8 lro_min_ttl; __be16 lro_tcp_win; @@ -657,7 +660,7 @@ struct mlx5_cqe64 { __be16 slid; __be32 flags_rqpn; u8 hds_ip_ext; - u8 l4_hdr_type_etc; + u8 l4_l3_hdr_type; __be16 vlan_info; __be32 srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */ __be32 imm_inval_pkey; @@ -678,12 +681,22 @@ static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) { - return (cqe->l4_hdr_type_etc >> 4) & 0x7; + return (cqe->l4_l3_hdr_type >> 4) & 0x7; +} + +static inline u8 get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe) +{ + return (cqe->l4_l3_hdr_type >> 2) & 0x3; +} + +static inline u8 cqe_is_tunneled(struct mlx5_cqe64 *cqe) +{ + return cqe->outer_l3_tunneled & 0x1; } static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe) { - return !!(cqe->l4_hdr_type_etc & 0x1); + return !!(cqe->l4_l3_hdr_type & 0x1); } static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) @@ -696,6 +709,42 @@ static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) return (u64)lo | ((u64)hi << 32); } +struct mpwrq_cqe_bc { + __be16 filler_consumed_strides; + __be16 byte_cnt; +}; + +static inline u16 mpwrq_get_cqe_byte_cnt(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return be16_to_cpu(bc->byte_cnt); +} + +static inline u16 mpwrq_get_cqe_bc_consumed_strides(struct mpwrq_cqe_bc *bc) +{ + return 0x7fff & be16_to_cpu(bc->filler_consumed_strides); +} + +static inline u16 mpwrq_get_cqe_consumed_strides(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return mpwrq_get_cqe_bc_consumed_strides(bc); +} + +static inline bool mpwrq_is_filler_cqe(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return 0x8000 & be16_to_cpu(bc->filler_consumed_strides); +} + +static inline u16 mpwrq_get_cqe_stride_index(struct mlx5_cqe64 *cqe) +{ + return be16_to_cpu(cqe->wqe_counter); +} + enum { CQE_L4_HDR_TYPE_NONE = 0x0, CQE_L4_HDR_TYPE_TCP_NO_ACK = 0x1, @@ -1331,6 +1380,7 @@ enum { MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5, MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11, + MLX5_PHYSICAL_LAYER_COUNTERS_GROUP = 0x12, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index dcd5ac8d3b14..d5529449ef47 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -46,6 +46,10 @@ #include <linux/mlx5/doorbell.h> enum { + MLX5_RQ_BITMASK_VSD = 1 << 1, +}; + +enum { MLX5_BOARD_ID_LEN = 64, MLX5_MAX_NAME_LEN = 16, }; @@ -112,9 +116,12 @@ enum { MLX5_REG_PMPE = 0x5010, MLX5_REG_PELC = 0x500e, MLX5_REG_PVLC = 0x500f, - MLX5_REG_PMLP = 0, /* TBD */ + MLX5_REG_PCMR = 0x5041, + MLX5_REG_PMLP = 0x5002, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, + MLX5_REG_MCIA = 0x9014, + MLX5_REG_MLCR = 0x902b, }; enum { @@ -519,8 +526,9 @@ enum mlx5_device_state { }; enum mlx5_interface_state { - MLX5_INTERFACE_STATE_DOWN, - MLX5_INTERFACE_STATE_UP, + MLX5_INTERFACE_STATE_DOWN = BIT(0), + MLX5_INTERFACE_STATE_UP = BIT(1), + MLX5_INTERFACE_STATE_SHUTDOWN = BIT(2), }; enum mlx5_pci_status { @@ -544,7 +552,7 @@ struct mlx5_core_dev { enum mlx5_device_state state; /* sync interface state */ struct mutex intf_state_mutex; - enum mlx5_interface_state interface_state; + unsigned long intf_state; void (*event) (struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); @@ -552,6 +560,9 @@ struct mlx5_core_dev { struct mlx5_profile *profile; atomic_t num_qps; u32 issi; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif }; struct mlx5_db { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 8dec5508d93d..165ff4f9cc6a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -82,12 +82,14 @@ struct mlx5_flow_table * mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, - int max_num_groups); + int max_num_groups, + u32 level); struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, - int num_flow_table_entries); + int num_flow_table_entries, + u32 level); int mlx5_destroy_flow_table(struct mlx5_flow_table *ft); /* inbox should be set with the following values: @@ -113,4 +115,7 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft, struct mlx5_flow_destination *dest); void mlx5_del_flow_rule(struct mlx5_flow_rule *fr); +int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest); + #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c15b8a864937..4ce4ea422a10 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -513,7 +513,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 max_lso_cap[0x5]; u8 reserved_at_10[0x4]; u8 rss_ind_tbl_cap[0x4]; - u8 reserved_at_18[0x3]; + u8 reg_umr_sq[0x1]; + u8 scatter_fcs[0x1]; + u8 reserved_at_1a[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; u8 tunnel_statless_gre[0x1]; @@ -648,7 +650,7 @@ struct mlx5_ifc_vector_calc_cap_bits { enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, - MLX5_WQ_TYPE_STRQ = 0x2, + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ = 0x2, }; enum { @@ -750,21 +752,25 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ets[0x1]; u8 nic_flow_table[0x1]; u8 eswitch_flow_table[0x1]; - u8 early_vf_enable; - u8 reserved_at_1a8[0x2]; + u8 early_vf_enable[0x1]; + u8 reserved_at_1a9[0x2]; u8 local_ca_ack_delay[0x5]; - u8 reserved_at_1af[0x6]; + u8 reserved_at_1af[0x2]; + u8 ports_check[0x1]; + u8 reserved_at_1b2[0x1]; + u8 disable_link_up[0x1]; + u8 beacon_led[0x1]; u8 port_type[0x2]; u8 num_ports[0x8]; - u8 reserved_at_1bf[0x3]; + u8 reserved_at_1c0[0x3]; u8 log_max_msg[0x5]; - u8 reserved_at_1c7[0x4]; + u8 reserved_at_1c8[0x4]; u8 max_tc[0x4]; - u8 reserved_at_1cf[0x6]; + u8 reserved_at_1d0[0x6]; u8 rol_s[0x1]; u8 rol_g[0x1]; - u8 reserved_at_1d7[0x1]; + u8 reserved_at_1d8[0x1]; u8 wol_s[0x1]; u8 wol_g[0x1]; u8 wol_a[0x1]; @@ -774,47 +780,48 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 wol_p[0x1]; u8 stat_rate_support[0x10]; - u8 reserved_at_1ef[0xc]; + u8 reserved_at_1f0[0xc]; u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; - u8 reserved_at_200[0x3]; + u8 striding_rq[0x1]; + u8 reserved_at_201[0x2]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_204[0xa]; + u8 reserved_at_205[0xa]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; - u8 reserved_at_212[0x1]; + u8 reserved_at_213[0x1]; u8 wq_signature[0x1]; u8 sctr_data_cqe[0x1]; - u8 reserved_at_215[0x1]; + u8 reserved_at_216[0x1]; u8 sho[0x1]; u8 tph[0x1]; u8 rf[0x1]; u8 dct[0x1]; - u8 reserved_at_21a[0x1]; + u8 reserved_at_21b[0x1]; u8 eth_net_offloads[0x1]; u8 roce[0x1]; u8 atomic[0x1]; - u8 reserved_at_21e[0x1]; + u8 reserved_at_21f[0x1]; u8 cq_oi[0x1]; u8 cq_resize[0x1]; u8 cq_moderation[0x1]; - u8 reserved_at_222[0x3]; + u8 reserved_at_223[0x3]; u8 cq_eq_remap[0x1]; u8 pg[0x1]; u8 block_lb_mc[0x1]; - u8 reserved_at_228[0x1]; + u8 reserved_at_229[0x1]; u8 scqe_break_moderation[0x1]; - u8 reserved_at_22a[0x1]; + u8 cq_period_start_from_cqe[0x1]; u8 cd[0x1]; - u8 reserved_at_22c[0x1]; + u8 reserved_at_22d[0x1]; u8 apm[0x1]; u8 vector_calc[0x1]; - u8 reserved_at_22f[0x1]; + u8 umr_ptr_rlky[0x1]; u8 imaicl[0x1]; - u8 reserved_at_231[0x4]; + u8 reserved_at_232[0x4]; u8 qkv[0x1]; u8 pkv[0x1]; u8 set_deth_sqpn[0x1]; @@ -824,98 +831,101 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 uc[0x1]; u8 rc[0x1]; - u8 reserved_at_23f[0xa]; + u8 reserved_at_240[0xa]; u8 uar_sz[0x6]; - u8 reserved_at_24f[0x8]; + u8 reserved_at_250[0x8]; u8 log_pg_sz[0x8]; u8 bf[0x1]; - u8 reserved_at_260[0x1]; + u8 reserved_at_261[0x1]; u8 pad_tx_eth_packet[0x1]; - u8 reserved_at_262[0x8]; + u8 reserved_at_263[0x8]; u8 log_bf_reg_size[0x5]; - u8 reserved_at_26f[0x10]; + u8 reserved_at_270[0x10]; - u8 reserved_at_27f[0x10]; + u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_at_29f[0x10]; + u8 reserved_at_2a0[0x10]; u8 max_wqe_sz_rq[0x10]; - u8 reserved_at_2bf[0x10]; + u8 reserved_at_2c0[0x10]; u8 max_wqe_sz_sq_dc[0x10]; - u8 reserved_at_2df[0x7]; + u8 reserved_at_2e0[0x7]; u8 max_qp_mcg[0x19]; - u8 reserved_at_2ff[0x18]; + u8 reserved_at_300[0x18]; u8 log_max_mcg[0x8]; - u8 reserved_at_31f[0x3]; + u8 reserved_at_320[0x3]; u8 log_max_transport_domain[0x5]; - u8 reserved_at_327[0x3]; + u8 reserved_at_328[0x3]; u8 log_max_pd[0x5]; - u8 reserved_at_32f[0xb]; + u8 reserved_at_330[0xb]; u8 log_max_xrcd[0x5]; - u8 reserved_at_33f[0x20]; + u8 reserved_at_340[0x20]; - u8 reserved_at_35f[0x3]; + u8 reserved_at_360[0x3]; u8 log_max_rq[0x5]; - u8 reserved_at_367[0x3]; + u8 reserved_at_368[0x3]; u8 log_max_sq[0x5]; - u8 reserved_at_36f[0x3]; + u8 reserved_at_370[0x3]; u8 log_max_tir[0x5]; - u8 reserved_at_377[0x3]; + u8 reserved_at_378[0x3]; u8 log_max_tis[0x5]; u8 basic_cyclic_rcv_wqe[0x1]; - u8 reserved_at_380[0x2]; + u8 reserved_at_381[0x2]; u8 log_max_rmp[0x5]; - u8 reserved_at_387[0x3]; + u8 reserved_at_388[0x3]; u8 log_max_rqt[0x5]; - u8 reserved_at_38f[0x3]; + u8 reserved_at_390[0x3]; u8 log_max_rqt_size[0x5]; - u8 reserved_at_397[0x3]; + u8 reserved_at_398[0x3]; u8 log_max_tis_per_sq[0x5]; - u8 reserved_at_39f[0x3]; + u8 reserved_at_3a0[0x3]; u8 log_max_stride_sz_rq[0x5]; - u8 reserved_at_3a7[0x3]; + u8 reserved_at_3a8[0x3]; u8 log_min_stride_sz_rq[0x5]; - u8 reserved_at_3af[0x3]; + u8 reserved_at_3b0[0x3]; u8 log_max_stride_sz_sq[0x5]; - u8 reserved_at_3b7[0x3]; + u8 reserved_at_3b8[0x3]; u8 log_min_stride_sz_sq[0x5]; - u8 reserved_at_3bf[0x1b]; + u8 reserved_at_3c0[0x1b]; u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; - u8 reserved_at_3e0[0xa]; + u8 reserved_at_3e1[0xa]; u8 log_max_vlan_list[0x5]; - u8 reserved_at_3ef[0x3]; + u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; - u8 reserved_at_3f7[0x3]; + u8 reserved_at_3f8[0x3]; u8 log_max_current_uc_list[0x5]; - u8 reserved_at_3ff[0x80]; + u8 reserved_at_400[0x80]; - u8 reserved_at_47f[0x3]; + u8 reserved_at_480[0x3]; u8 log_max_l2_table[0x5]; - u8 reserved_at_487[0x8]; + u8 reserved_at_488[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_at_49f[0x20]; + u8 reserved_at_4a0[0x20]; u8 device_frequency_mhz[0x20]; u8 device_frequency_khz[0x20]; - u8 reserved_at_4ff[0x5f]; - u8 cqe_zip[0x1]; - u8 cqe_zip_timeout[0x10]; - u8 cqe_zip_max_num[0x10]; + u8 reserved_at_500[0x80]; - u8 reserved_at_57f[0x220]; + u8 reserved_at_580[0x3f]; + u8 cqe_compression[0x1]; + + u8 cqe_compression_timeout[0x10]; + u8 cqe_compression_max_num[0x10]; + + u8 reserved_at_5e0[0x220]; }; enum mlx5_flow_destination_type { @@ -997,7 +1007,13 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_118[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_at_120[0x4e0]; + u8 reserved_at_120[0x15]; + u8 log_wqe_num_of_strides[0x3]; + u8 two_byte_shift_en[0x1]; + u8 reserved_at_139[0x4]; + u8 log_wqe_stride_size[0x3]; + + u8 reserved_at_140[0x4c0]; struct mlx5_ifc_cmd_pas_bits pas[0]; }; @@ -2196,7 +2212,8 @@ struct mlx5_ifc_sqc_bits { u8 flush_in_error_en[0x1]; u8 reserved_at_4[0x4]; u8 state[0x4]; - u8 reserved_at_c[0x14]; + u8 reg_umr[0x1]; + u8 reserved_at_d[0x13]; u8 reserved_at_20[0x8]; u8 user_index[0x18]; @@ -2244,7 +2261,8 @@ enum { struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; - u8 reserved_at_1[0x2]; + u8 reserved_at_1[0x1]; + u8 scatter_fcs[0x1]; u8 vsd[0x1]; u8 mem_rq_type[0x4]; u8 state[0x4]; @@ -2601,6 +2619,11 @@ enum { MLX5_CQC_ST_FIRED = 0xa, }; +enum { + MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, + MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, +}; + struct mlx5_ifc_cqc_bits { u8 status[0x4]; u8 reserved_at_4[0x4]; @@ -2609,8 +2632,8 @@ struct mlx5_ifc_cqc_bits { u8 reserved_at_c[0x1]; u8 scqe_break_moderation_en[0x1]; u8 oi[0x1]; - u8 reserved_at_f[0x2]; - u8 cqe_zip_en[0x1]; + u8 cq_period_mode[0x2]; + u8 cqe_comp_en[0x1]; u8 mini_cqe_res_format[0x2]; u8 st[0x4]; u8 reserved_at_18[0x8]; @@ -2984,7 +3007,11 @@ struct mlx5_ifc_set_fte_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5178,7 +5205,11 @@ struct mlx5_ifc_destroy_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5205,7 +5236,11 @@ struct mlx5_ifc_destroy_flow_group_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5346,7 +5381,11 @@ struct mlx5_ifc_delete_fte_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5792,7 +5831,11 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5836,7 +5879,11 @@ struct mlx5_ifc_create_flow_group_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -6369,6 +6416,17 @@ struct mlx5_ifc_ptys_reg_bits { u8 reserved_at_1a0[0x60]; }; +struct mlx5_ifc_mlcr_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x20]; + + u8 beacon_duration[0x10]; + u8 reserved_at_40[0x10]; + + u8 beacon_remain[0x10]; +}; + struct mlx5_ifc_ptas_reg_bits { u8 reserved_at_0[0x20]; @@ -6778,6 +6836,16 @@ struct mlx5_ifc_pamp_reg_bits { u8 index_data[18][0x10]; }; +struct mlx5_ifc_pcmr_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x2e]; + u8 fcs_cap[0x1]; + u8 reserved_at_3f[0x1f]; + u8 fcs_chk[0x1]; + u8 reserved_at_5f[0x1]; +}; + struct mlx5_ifc_lane_2_module_mapping_bits { u8 reserved_at_0[0x6]; u8 rx_lane[0x2]; @@ -7114,6 +7182,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pspa_reg_bits pspa_reg; struct mlx5_ifc_ptas_reg_bits ptas_reg; struct mlx5_ifc_ptys_reg_bits ptys_reg; + struct mlx5_ifc_mlcr_reg_bits mlcr_reg; struct mlx5_ifc_pude_reg_bits pude_reg; struct mlx5_ifc_pvlc_reg_bits pvlc_reg; struct mlx5_ifc_slrg_reg_bits slrg_reg; @@ -7147,7 +7216,11 @@ struct mlx5_ifc_set_flow_table_root_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -7178,7 +7251,9 @@ struct mlx5_ifc_modify_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x20]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; u8 reserved_at_60[0x10]; u8 modify_field_select[0x10]; @@ -7244,4 +7319,34 @@ struct mlx5_ifc_qtct_reg_bits { u8 tclass[0x3]; }; +struct mlx5_ifc_mcia_reg_bits { + u8 l[0x1]; + u8 reserved_at_1[0x7]; + u8 module[0x8]; + u8 reserved_at_10[0x8]; + u8 status[0x8]; + + u8 i2c_device_address[0x8]; + u8 page_number[0x8]; + u8 device_address[0x10]; + + u8 reserved_at_40[0x10]; + u8 size[0x10]; + + u8 reserved_at_60[0x20]; + + u8 dword_0[0x20]; + u8 dword_1[0x20]; + u8 dword_2[0x20]; + u8 dword_3[0x20]; + u8 dword_4[0x20]; + u8 dword_5[0x20]; + u8 dword_6[0x20]; + u8 dword_7[0x20]; + u8 dword_8[0x20]; + u8 dword_9[0x20]; + u8 dword_10[0x20]; + u8 dword_11[0x20]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index a1d145abd4eb..9851862c0ec5 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -35,6 +35,24 @@ #include <linux/mlx5/driver.h> +enum mlx5_beacon_duration { + MLX5_BEACON_DURATION_OFF = 0x0, + MLX5_BEACON_DURATION_INF = 0xffff, +}; + +enum mlx5_module_id { + MLX5_MODULE_ID_SFP = 0x3, + MLX5_MODULE_ID_QSFP = 0xC, + MLX5_MODULE_ID_QSFP_PLUS = 0xD, + MLX5_MODULE_ID_QSFP28 = 0x11, +}; + +#define MLX5_EEPROM_MAX_BYTES 32 +#define MLX5_EEPROM_IDENTIFIER_BYTE_MASK 0x000000ff +#define MLX5_I2C_ADDR_LOW 0x50 +#define MLX5_I2C_ADDR_HIGH 0x51 +#define MLX5_EEPROM_PAGE_LENGTH 256 + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); @@ -53,10 +71,11 @@ int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port); -void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port); -void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, u8 port); int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, @@ -84,4 +103,10 @@ int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode); int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode); +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable); +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled); +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data); + #endif /* __MLX5_PORT_H__ */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index cf031a3f16c5..64221027bf1f 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -668,6 +668,12 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq); void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *sq); +int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id); +int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id); +int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, + int reset, void *out, int out_size); +int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, + u32 *out_of_buffer); static inline const char *mlx5_qp_type_str(int type) { diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index bd93e6323603..301da4a5e6bf 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -45,6 +45,8 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, u16 vport, u8 *addr); +int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); +int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); diff --git a/include/linux/mm.h b/include/linux/mm.h index ffcff53e3b2b..a55e5be0894f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1250,78 +1250,20 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages6(unsigned long start, unsigned long nr_pages, +long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, +long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, unsigned int gup_flags); -long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages, +long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); -/* suppress warnings from use in EXPORT_SYMBOL() */ -#ifndef __DISABLE_GUP_DEPRECATED -#define __gup_deprecated __deprecated -#else -#define __gup_deprecated -#endif -/* - * These macros provide backward-compatibility with the old - * get_user_pages() variants which took tsk/mm. These - * functions/macros provide both compile-time __deprecated so we - * can catch old-style use and not break the build. The actual - * functions also have WARN_ON()s to let us know at runtime if - * the get_user_pages() should have been the "remote" variant. - * - * These are hideous, but temporary. - * - * If you run into one of these __deprecated warnings, look - * at how you are calling get_user_pages(). If you are calling - * it with current/current->mm as the first two arguments, - * simply remove those arguments. The behavior will be the same - * as it is now. If you are calling it on another task, use - * get_user_pages_remote() instead. - * - * Any questions? Ask Dave Hansen <dave@sr71.net> - */ -long -__gup_deprecated -get_user_pages8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - struct vm_area_struct **vmas); -#define GUP_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages, ...) \ - get_user_pages -#define get_user_pages(...) GUP_MACRO(__VA_ARGS__, \ - get_user_pages8, x, \ - get_user_pages6, x, x, x, x, x)(__VA_ARGS__) - -__gup_deprecated -long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - int *locked); -#define GUPL_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages_locked, ...) \ - get_user_pages_locked -#define get_user_pages_locked(...) GUPL_MACRO(__VA_ARGS__, \ - get_user_pages_locked8, x, \ - get_user_pages_locked6, x, x, x, x)(__VA_ARGS__) - -__gup_deprecated -long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages); -#define GUPU_MACRO(_1, _2, _3, _4, _5, _6, _7, get_user_pages_unlocked, ...) \ - get_user_pages_unlocked -#define get_user_pages_unlocked(...) GUPU_MACRO(__VA_ARGS__, \ - get_user_pages_unlocked7, x, \ - get_user_pages_unlocked5, x, x, x, x)(__VA_ARGS__) - /* Container for pinned pfns / pages */ struct frame_vector { unsigned int nr_allocated; /* Number of frames we have space for */ diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index a734bf43d190..bc8736266749 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -39,6 +39,7 @@ enum { NETIF_F_UFO_BIT, /* ... UDPv4 fragmentation */ NETIF_F_GSO_ROBUST_BIT, /* ... ->SKB_GSO_DODGY */ NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ + NETIF_F_TSO_MANGLEID_BIT, /* ... IPV4 ID mangling allowed */ NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ @@ -47,6 +48,10 @@ enum { NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */ + NETIF_F_GSO_PARTIAL_BIT, /* ... Only segment inner-most L4 + * in hardware and all other + * headers in software. + */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, @@ -120,6 +125,8 @@ enum { #define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) +#define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) +#define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) @@ -145,10 +152,6 @@ enum { #define NETIF_F_GSO_MASK (__NETIF_F_BIT(NETIF_F_GSO_LAST + 1) - \ __NETIF_F_BIT(NETIF_F_GSO_SHIFT)) -/* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ - NETIF_F_TSO6 | NETIF_F_UFO) - /* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- * this would be contradictory @@ -156,11 +159,15 @@ enum { #define NETIF_F_CSUM_MASK (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \ NETIF_F_HW_CSUM) -#define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) +#define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | \ + NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID) #define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \ NETIF_F_FSO) +/* List of features with software fallbacks. */ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO) + /* * If one device supports one of these features, then enable them * for all in netdev_increment_features. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9884fe9a6552..f2182594160e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -106,7 +106,6 @@ enum netdev_tx { __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */ NETDEV_TX_OK = 0x00, /* driver took care of packet */ NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/ - NETDEV_TX_LOCKED = 0x20, /* driver tx lock was already taken */ }; typedef enum netdev_tx netdev_tx_t; @@ -570,6 +569,12 @@ struct netdev_queue { #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) int numa_node; #endif + unsigned long tx_maxrate; + /* + * Number of TX timeouts for this queue + * (/sys/class/net/DEV/Q/trans_timeout) + */ + unsigned long trans_timeout; /* * write-mostly part */ @@ -580,18 +585,11 @@ struct netdev_queue { */ unsigned long trans_start; - /* - * Number of TX timeouts for this queue - * (/sys/class/net/DEV/Q/trans_timeout) - */ - unsigned long trans_timeout; - unsigned long state; #ifdef CONFIG_BQL struct dql dql; #endif - unsigned long tx_maxrate; } ____cacheline_aligned_in_smp; static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) @@ -831,7 +829,6 @@ struct tc_to_netdev { * the queue before that can happen; it's for obsolete devices and weird * corner cases, but the stack really does a non-trivial amount * of useless work if you return NETDEV_TX_BUSY. - * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required; cannot be NULL. * * netdev_features_t (*ndo_fix_features)(struct net_device *dev, @@ -1654,6 +1651,7 @@ struct net_device { netdev_features_t vlan_features; netdev_features_t hw_enc_features; netdev_features_t mpls_features; + netdev_features_t gso_partial_features; int ifindex; int group; @@ -2121,7 +2119,10 @@ struct napi_gro_cb { /* Used in GRE, set in fou/gue_gro_receive */ u8 is_fou:1; - /* 6 bit hole */ + /* Used to determine if flush_id can be ignored */ + u8 is_atomic:1; + + /* 5 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2240,6 +2241,8 @@ struct netdev_lag_lower_state_info { #define NETDEV_BONDING_INFO 0x0019 #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B +#define NETDEV_OFFLOAD_PUSH_VXLAN 0x001C +#define NETDEV_OFFLOAD_PUSH_GENEVE 0x001D int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); @@ -2731,7 +2734,6 @@ struct softnet_data { /* stats */ unsigned int processed; unsigned int time_squeeze; - unsigned int cpu_collision; unsigned int received_rps; #ifdef CONFIG_RPS struct softnet_data *rps_ipi_list; @@ -2744,11 +2746,15 @@ struct softnet_data { struct sk_buff *completion_queue; #ifdef CONFIG_RPS - /* Elements below can be accessed between CPUs for RPS */ + /* input_queue_head should be written by cpu owning this struct, + * and only read by other cpus. Worth using a cache line. + */ + unsigned int input_queue_head ____cacheline_aligned_in_smp; + + /* Elements below can be accessed between CPUs for RPS/RFS */ struct call_single_data csd ____cacheline_aligned_in_smp; struct softnet_data *rps_ipi_next; unsigned int cpu; - unsigned int input_queue_head; unsigned int input_queue_tail; #endif unsigned int dropped; @@ -3257,7 +3263,8 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); -bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); +bool is_skb_forwardable(const struct net_device *dev, + const struct sk_buff *skb); extern int netdev_budget; @@ -3992,6 +3999,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT)); @@ -4000,6 +4008,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_SIT != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f48b8a664b0f..83b9a2e0d8d4 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -351,7 +351,8 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) return ((skbinfo->skbmark || skbinfo->skbmarkmask) && nla_put_net64(skb, IPSET_ATTR_SKBMARK, cpu_to_be64((u64)skbinfo->skbmark << 32 | - skbinfo->skbmarkmask))) || + skbinfo->skbmarkmask), + IPSET_ATTR_PAD)) || (skbinfo->skbprio && nla_put_net32(skb, IPSET_ATTR_SKBPRIO, cpu_to_be32(skbinfo->skbprio))) || @@ -374,9 +375,11 @@ static inline bool ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter) { return nla_put_net64(skb, IPSET_ATTR_BYTES, - cpu_to_be64(ip_set_get_bytes(counter))) || + cpu_to_be64(ip_set_get_bytes(counter)), + IPSET_ATTR_PAD) || nla_put_net64(skb, IPSET_ATTR_PACKETS, - cpu_to_be64(ip_set_get_packets(counter))); + cpu_to_be64(ip_set_get_packets(counter)), + IPSET_ATTR_PAD); } static inline void diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 80a305b85323..4dd9306c9d56 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -242,11 +242,18 @@ void xt_unregister_match(struct xt_match *target); int xt_register_matches(struct xt_match *match, unsigned int n); void xt_unregister_matches(struct xt_match *match, unsigned int n); +int xt_check_entry_offsets(const void *base, const char *elems, + unsigned int target_offset, + unsigned int next_offset); + int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat); + struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, struct xt_table_info *bootstrap, @@ -480,7 +487,7 @@ void xt_compat_init_offsets(u_int8_t af, unsigned int number); int xt_compat_calc_jump(u_int8_t af, unsigned int offset); int xt_compat_match_offset(const struct xt_match *match); -int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size); int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size); @@ -490,6 +497,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); +int xt_compat_check_entry_offsets(const void *base, const char *elems, + unsigned int target_offset, + unsigned int next_offset); #endif /* CONFIG_COMPAT */ #endif /* _X_TABLES_H */ diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 167342c2ce6b..0f6f6607f592 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -92,6 +92,8 @@ enum { IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, + IEEE802154_ATTR_PAD, + __IEEE802154_ATTR_MAX, }; diff --git a/include/linux/pci.h b/include/linux/pci.h index 004b8133417d..932ec74909c6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1111,6 +1111,7 @@ void pci_unlock_rescan_remove(void); /* Vital product data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); +int pci_set_vpd_size(struct pci_dev *dev, size_t len); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx); diff --git a/include/linux/phy.h b/include/linux/phy.h index 2abd7918f64f..be3f83bbdc0b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -805,6 +805,10 @@ void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); +int phy_ethtool_ksettings_get(struct phy_device *phydev, + struct ethtool_link_ksettings *cmd); +int phy_ethtool_ksettings_set(struct phy_device *phydev, + const struct ethtool_link_ksettings *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); diff --git a/include/linux/pmem.h b/include/linux/pmem.h index ac6d872ce067..57d146fe44dd 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -72,6 +72,18 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) } #endif +static inline bool arch_has_pmem_api(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); +} + +static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src, + size_t size) +{ + memcpy(dst, (void __force *) src, size); + return 0; +} + /* * memcpy_from_pmem - read from persistent memory with error handling * @dst: destination buffer @@ -83,12 +95,10 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) static inline int memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) { - return arch_memcpy_from_pmem(dst, src, size); -} - -static inline bool arch_has_pmem_api(void) -{ - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); + if (arch_has_pmem_api()) + return arch_memcpy_from_pmem(dst, src, size); + else + return default_memcpy_from_pmem(dst, src, size); } /** diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 795c9902e02f..3a4c806be156 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -112,6 +112,13 @@ struct qed_queue_start_common_params { u16 sb_idx; }; +struct qed_tunn_params { + u16 vxlan_port; + u8 update_vxlan_port; + u16 geneve_port; + u8 update_geneve_port; +}; + struct qed_eth_cb_ops { struct qed_common_cb_ops common; }; @@ -166,6 +173,9 @@ struct qed_eth_ops { void (*get_vport_stats)(struct qed_dev *cdev, struct qed_eth_stats *stats); + + int (*tunn_config)(struct qed_dev *cdev, + struct qed_tunn_params *params); }; const struct qed_eth_ops *qed_get_eth_ops(void); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 67e8c206b2c1..d72c832a9397 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -110,6 +110,7 @@ struct qed_link_params { #define QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS BIT(1) #define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED BIT(2) #define QED_LINK_OVERRIDE_PAUSE_CONFIG BIT(3) +#define QED_LINK_OVERRIDE_LOOPBACK_MODE BIT(4) u32 override_flags; bool autoneg; u32 adv_speeds; @@ -118,6 +119,12 @@ struct qed_link_params { #define QED_LINK_PAUSE_RX_ENABLE BIT(1) #define QED_LINK_PAUSE_TX_ENABLE BIT(2) u32 pause_config; +#define QED_LINK_LOOPBACK_NONE BIT(0) +#define QED_LINK_LOOPBACK_INT_PHY BIT(1) +#define QED_LINK_LOOPBACK_EXT_PHY BIT(2) +#define QED_LINK_LOOPBACK_EXT BIT(3) +#define QED_LINK_LOOPBACK_MAC BIT(4) + u32 loopback_mode; }; struct qed_link_output { @@ -158,7 +165,47 @@ struct qed_common_cb_ops { struct qed_link_output *link); }; +struct qed_selftest_ops { +/** + * @brief selftest_interrupt - Perform interrupt test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_interrupt)(struct qed_dev *cdev); + +/** + * @brief selftest_memory - Perform memory test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_memory)(struct qed_dev *cdev); + +/** + * @brief selftest_register - Perform register test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_register)(struct qed_dev *cdev); + +/** + * @brief selftest_clock - Perform clock test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_clock)(struct qed_dev *cdev); +}; + struct qed_common_ops { + struct qed_selftest_ops *selftest; + struct qed_dev* (*probe)(struct pci_dev *dev, enum qed_protocol protocol, u32 dp_module, u8 dp_level); @@ -211,6 +258,16 @@ struct qed_common_ops { void (*simd_handler_clean)(struct qed_dev *cdev, int index); + +/** + * @brief can_link_change - can the instance change the link or not + * + * @param cdev + * + * @return true if link-change is allowed, false otherwise. + */ + bool (*can_link_change)(struct qed_dev *cdev); + /** * @brief set_link - set links according to params * @@ -384,16 +441,16 @@ struct qed_eth_stats { /* port */ u64 rx_64_byte_packets; - u64 rx_127_byte_packets; - u64 rx_255_byte_packets; - u64 rx_511_byte_packets; - u64 rx_1023_byte_packets; - u64 rx_1518_byte_packets; - u64 rx_1522_byte_packets; - u64 rx_2047_byte_packets; - u64 rx_4095_byte_packets; - u64 rx_9216_byte_packets; - u64 rx_16383_byte_packets; + u64 rx_65_to_127_byte_packets; + u64 rx_128_to_255_byte_packets; + u64 rx_256_to_511_byte_packets; + u64 rx_512_to_1023_byte_packets; + u64 rx_1024_to_1518_byte_packets; + u64 rx_1519_to_1522_byte_packets; + u64 rx_1519_to_2047_byte_packets; + u64 rx_2048_to_4095_byte_packets; + u64 rx_4096_to_9216_byte_packets; + u64 rx_9217_to_16383_byte_packets; u64 rx_crc_errors; u64 rx_mac_crtl_frames; u64 rx_pause_frames; diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 17d4f849c65e..8beb98dcf14f 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -488,6 +488,42 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, } /** + * hlist_add_tail_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_add_tail_rcu(struct hlist_node *n, + struct hlist_head *h) +{ + struct hlist_node *i, *last = NULL; + + for (i = hlist_first_rcu(h); i; i = hlist_next_rcu(i)) + last = i; + + if (last) { + n->next = last->next; + n->pprev = &last->next; + rcu_assign_pointer(hlist_next_rcu(last), n); + } else { + hlist_add_head_rcu(n, h); + } +} + +/** * hlist_add_before_rcu * @n: the new element to add to the hash list. * @next: the existing element to add the new element before. diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 1c33dd7da4a7..4ae95f7e8597 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -98,6 +98,45 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, if (!is_a_nulls(first)) first->pprev = &n->next; } + +/** + * hlist_nulls_add_tail_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the end of the specified hlist_nulls, + * while permitting racing traversals. NOTE: tail insertion requires + * list traversal. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *i, *last = NULL; + + for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i); + i = hlist_nulls_next_rcu(i)) + last = i; + + if (last) { + n->next = last->next; + n->pprev = &last->next; + rcu_assign_pointer(hlist_nulls_next_rcu(last), n); + } else { + hlist_nulls_add_head_rcu(n, h); + } +} + /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/linux/sctp.h b/include/linux/sctp.h index a9414fd49dc6..dacb5e711994 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -705,4 +705,71 @@ typedef struct sctp_auth_chunk { sctp_authhdr_t auth_hdr; } __packed sctp_auth_chunk_t; +struct sctp_info { + __u32 sctpi_tag; + __u32 sctpi_state; + __u32 sctpi_rwnd; + __u16 sctpi_unackdata; + __u16 sctpi_penddata; + __u16 sctpi_instrms; + __u16 sctpi_outstrms; + __u32 sctpi_fragmentation_point; + __u32 sctpi_inqueue; + __u32 sctpi_outqueue; + __u32 sctpi_overall_error; + __u32 sctpi_max_burst; + __u32 sctpi_maxseg; + __u32 sctpi_peer_rwnd; + __u32 sctpi_peer_tag; + __u8 sctpi_peer_capable; + __u8 sctpi_peer_sack; + __u16 __reserved1; + + /* assoc status info */ + __u64 sctpi_isacks; + __u64 sctpi_osacks; + __u64 sctpi_opackets; + __u64 sctpi_ipackets; + __u64 sctpi_rtxchunks; + __u64 sctpi_outofseqtsns; + __u64 sctpi_idupchunks; + __u64 sctpi_gapcnt; + __u64 sctpi_ouodchunks; + __u64 sctpi_iuodchunks; + __u64 sctpi_oodchunks; + __u64 sctpi_iodchunks; + __u64 sctpi_octrlchunks; + __u64 sctpi_ictrlchunks; + + /* primary transport info */ + struct sockaddr_storage sctpi_p_address; + __s32 sctpi_p_state; + __u32 sctpi_p_cwnd; + __u32 sctpi_p_srtt; + __u32 sctpi_p_rto; + __u32 sctpi_p_hbinterval; + __u32 sctpi_p_pathmaxrxt; + __u32 sctpi_p_sackdelay; + __u32 sctpi_p_sackfreq; + __u32 sctpi_p_ssthresh; + __u32 sctpi_p_partial_bytes_acked; + __u32 sctpi_p_flight_size; + __u16 sctpi_p_error; + __u16 __reserved2; + + /* sctp sock info */ + __u32 sctpi_s_autoclose; + __u32 sctpi_s_adaptation_ind; + __u32 sctpi_s_pd_point; + __u8 sctpi_s_nodelay; + __u8 sctpi_s_disable_fragments; + __u8 sctpi_s_v4mapped; + __u8 sctpi_s_frag_interleave; +}; + +struct sctp_infox { + struct sctp_info *sctpinfo; + struct sctp_association *asoc; +}; + #endif /* __LINUX_SCTP_H__ */ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index dde00defbaa5..f3d45dd42695 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -7,13 +7,10 @@ #include <linux/mutex.h> #include <linux/cpumask.h> #include <linux/nodemask.h> +#include <linux/fs.h> +#include <linux/cred.h> struct seq_operations; -struct file; -struct path; -struct inode; -struct dentry; -struct user_namespace; struct seq_file { char *buf; @@ -27,9 +24,7 @@ struct seq_file { struct mutex lock; const struct seq_operations *op; int poll_event; -#ifdef CONFIG_USER_NS - struct user_namespace *user_ns; -#endif + const struct file *file; void *private; }; @@ -147,7 +142,7 @@ int seq_release_private(struct inode *, struct file *); static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { #ifdef CONFIG_USER_NS - return seq->user_ns; + return seq->file->f_cred->user_ns; #else extern struct user_namespace init_user_ns; return &init_user_ns; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 007381270ff8..c413c588a24f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -382,14 +382,10 @@ enum { /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, - - /* generate software timestamp on peer data acknowledgment */ - SKBTX_ACK_TSTAMP = 1 << 7, }; #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ - SKBTX_SCHED_TSTAMP | \ - SKBTX_ACK_TSTAMP) + SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) /* @@ -465,23 +461,27 @@ enum { /* This indicates the tcp segment has CWR set. */ SKB_GSO_TCP_ECN = 1 << 3, - SKB_GSO_TCPV6 = 1 << 4, + SKB_GSO_TCP_FIXEDID = 1 << 4, + + SKB_GSO_TCPV6 = 1 << 5, - SKB_GSO_FCOE = 1 << 5, + SKB_GSO_FCOE = 1 << 6, - SKB_GSO_GRE = 1 << 6, + SKB_GSO_GRE = 1 << 7, - SKB_GSO_GRE_CSUM = 1 << 7, + SKB_GSO_GRE_CSUM = 1 << 8, - SKB_GSO_IPIP = 1 << 8, + SKB_GSO_IPIP = 1 << 9, - SKB_GSO_SIT = 1 << 9, + SKB_GSO_SIT = 1 << 10, - SKB_GSO_UDP_TUNNEL = 1 << 10, + SKB_GSO_UDP_TUNNEL = 1 << 11, - SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, - SKB_GSO_TUNNEL_REMCSUM = 1 << 12, + SKB_GSO_PARTIAL = 1 << 13, + + SKB_GSO_TUNNEL_REMCSUM = 1 << 14, }; #if BITS_PER_LONG > 32 @@ -1325,6 +1325,16 @@ static inline int skb_header_cloned(const struct sk_buff *skb) return dataref != 1; } +static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(gfpflags_allow_blocking(pri)); + + if (skb_header_cloned(skb)) + return pskb_expand_head(skb, 0, 0, pri); + + return 0; +} + /** * skb_header_release - release reference to header * @skb: buffer to operate on @@ -2982,6 +2992,8 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); +struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, + gfp_t gfp); static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) { @@ -3589,7 +3601,10 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) * Keeps track of level of encapsulation of network headers. */ struct skb_gso_cb { - int mac_offset; + union { + int mac_offset; + int data_offset; + }; int encap_level; __wsum csum; __u16 csum_start; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index a55d0523f75d..1b8a5a7876ce 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -352,8 +352,8 @@ struct thermal_zone_of_device_ops { struct thermal_trip { struct device_node *np; - unsigned long int temperature; - unsigned long int hysteresis; + int temperature; + int hysteresis; enum thermal_trip_type type; }; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index fe6441203b59..222f6aa0418f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -609,6 +609,11 @@ extern void ftrace_profile_free_filter(struct perf_event *event); void perf_trace_buf_update(void *record, u16 type); void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); +void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, + struct trace_event_call *call, u64 count, + struct pt_regs *regs, struct hlist_head *head, + struct task_struct *task); + static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 161052477f77..b742b5e47cc2 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -7,7 +7,7 @@ * defined; unless noted otherwise, they are optional, and can be * filled in with a null pointer. * - * struct tty_struct * (*lookup)(struct tty_driver *self, int idx) + * struct tty_struct * (*lookup)(struct tty_driver *self, struct file *, int idx) * * Return the tty device corresponding to idx, NULL if there is not * one currently in use and an ERR_PTR value on error. Called under @@ -250,7 +250,7 @@ struct serial_icounter_struct; struct tty_operations { struct tty_struct * (*lookup)(struct tty_driver *driver, - struct inode *inode, int idx); + struct file *filp, int idx); int (*install)(struct tty_driver *driver, struct tty_struct *tty); void (*remove)(struct tty_driver *driver, struct tty_struct *tty); int (*open)(struct tty_struct * tty, struct file * filp); diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 7f5f78bd15ad..245f57dbbb61 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -79,6 +79,8 @@ /* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */ \ US_FLAG(MAX_SECTORS_240, 0x08000000) \ /* Sets max_sectors to 240 */ \ + US_FLAG(NO_REPORT_LUNS, 0x10000000) \ + /* Cannot handle REPORT_LUNS */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index da3a77d25fcb..da84cf920b78 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -58,6 +58,9 @@ #include <net/ipv6.h> #include <net/net_namespace.h> +/* special link-layer handling */ +#include <net/mac802154.h> + #define EUI64_ADDR_LEN 8 #define LOWPAN_NHC_MAX_ID_LEN 1 @@ -93,7 +96,7 @@ static inline bool lowpan_is_iphc(u8 dispatch) } #define LOWPAN_PRIV_SIZE(llpriv_size) \ - (sizeof(struct lowpan_priv) + llpriv_size) + (sizeof(struct lowpan_dev) + llpriv_size) enum lowpan_lltypes { LOWPAN_LLTYPE_BTLE, @@ -129,7 +132,7 @@ lowpan_iphc_ctx_is_compression(const struct lowpan_iphc_ctx *ctx) return test_bit(LOWPAN_IPHC_CTX_FLAG_COMPRESSION, &ctx->flags); } -struct lowpan_priv { +struct lowpan_dev { enum lowpan_lltypes lltype; struct dentry *iface_debugfs; struct lowpan_iphc_ctx_table ctx; @@ -139,11 +142,23 @@ struct lowpan_priv { }; static inline -struct lowpan_priv *lowpan_priv(const struct net_device *dev) +struct lowpan_dev *lowpan_dev(const struct net_device *dev) { return netdev_priv(dev); } +/* private device info */ +struct lowpan_802154_dev { + struct net_device *wdev; /* wpan device ptr */ + u16 fragment_tag; +}; + +static inline struct +lowpan_802154_dev *lowpan_802154_dev(const struct net_device *dev) +{ + return (struct lowpan_802154_dev *)lowpan_dev(dev)->priv; +} + struct lowpan_802154_cb { u16 d_tag; unsigned int d_size; @@ -157,6 +172,22 @@ struct lowpan_802154_cb *lowpan_802154_cb(const struct sk_buff *skb) return (struct lowpan_802154_cb *)skb->cb; } +static inline void lowpan_iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr, + const void *lladdr) +{ + /* fe:80::XXXX:XXXX:XXXX:XXXX + * \_________________/ + * hwaddr + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + memcpy(&ipaddr->s6_addr[8], lladdr, EUI64_ADDR_LEN); + /* second bit-flip (Universe/Local) + * is done according RFC2464 + */ + ipaddr->s6_addr[8] ^= 0x02; +} + #ifdef DEBUG /* print data in line */ static inline void raw_dump_inline(const char *caller, char *msg, diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 5d38d980b89d..eefcf3e96421 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -61,6 +61,8 @@ #define HCI_RS232 4 #define HCI_PCI 5 #define HCI_SDIO 6 +#define HCI_SPI 7 +#define HCI_I2C 8 /* HCI controller types */ #define HCI_BREDR 0x00 diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index c0a92e2c286d..74c9693d4941 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -17,6 +17,7 @@ #include <linux/hardirq.h> #include <linux/rcupdate.h> #include <net/sock.h> +#include <net/inet_sock.h> #ifdef CONFIG_CGROUP_NET_CLASSID struct cgroup_cls_state { @@ -63,11 +64,13 @@ static inline u32 task_get_classid(const struct sk_buff *skb) * softirqs always disables bh. */ if (in_serving_softirq()) { + struct sock *sk = skb_to_full_sk(skb); + /* If there is an sock_cgroup_classid we'll use that. */ - if (!skb->sk) + if (!sk || !sk_fullsock(sk)) return 0; - classid = sock_cgroup_classid(&skb->sk->sk_cgrp_data); + classid = sock_cgroup_classid(&sk->sk_cgrp_data); } return classid; diff --git a/include/net/codel.h b/include/net/codel.h index d168aca115cc..a6e428f80135 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -87,27 +87,6 @@ static inline codel_time_t codel_get_time(void) ((s32)((a) - (b)) >= 0)) #define codel_time_before_eq(a, b) codel_time_after_eq(b, a) -/* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ -struct codel_skb_cb { - codel_time_t enqueue_time; -}; - -static struct codel_skb_cb *get_codel_cb(const struct sk_buff *skb) -{ - qdisc_cb_private_validate(skb, sizeof(struct codel_skb_cb)); - return (struct codel_skb_cb *)qdisc_skb_cb(skb)->data; -} - -static codel_time_t codel_get_enqueue_time(const struct sk_buff *skb) -{ - return get_codel_cb(skb)->enqueue_time; -} - -static void codel_set_enqueue_time(struct sk_buff *skb) -{ - get_codel_cb(skb)->enqueue_time = codel_get_time(); -} - static inline u32 codel_time_to_us(codel_time_t val) { u64 valns = ((u64)val << CODEL_SHIFT); @@ -176,198 +155,10 @@ struct codel_stats { #define CODEL_DISABLED_THRESHOLD INT_MAX -static void codel_params_init(struct codel_params *params, - const struct Qdisc *sch) -{ - params->interval = MS2TIME(100); - params->target = MS2TIME(5); - params->mtu = psched_mtu(qdisc_dev(sch)); - params->ce_threshold = CODEL_DISABLED_THRESHOLD; - params->ecn = false; -} - -static void codel_vars_init(struct codel_vars *vars) -{ - memset(vars, 0, sizeof(*vars)); -} - -static void codel_stats_init(struct codel_stats *stats) -{ - stats->maxpacket = 0; -} - -/* - * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots - * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) - * - * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 - */ -static void codel_Newton_step(struct codel_vars *vars) -{ - u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; - u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; - u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2); - - val >>= 2; /* avoid overflow in following multiply */ - val = (val * invsqrt) >> (32 - 2 + 1); - - vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; -} - -/* - * CoDel control_law is t + interval/sqrt(count) - * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid - * both sqrt() and divide operation. - */ -static codel_time_t codel_control_law(codel_time_t t, - codel_time_t interval, - u32 rec_inv_sqrt) -{ - return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); -} - -static bool codel_should_drop(const struct sk_buff *skb, - struct Qdisc *sch, - struct codel_vars *vars, - struct codel_params *params, - struct codel_stats *stats, - codel_time_t now) -{ - bool ok_to_drop; - - if (!skb) { - vars->first_above_time = 0; - return false; - } - - vars->ldelay = now - codel_get_enqueue_time(skb); - sch->qstats.backlog -= qdisc_pkt_len(skb); - - if (unlikely(qdisc_pkt_len(skb) > stats->maxpacket)) - stats->maxpacket = qdisc_pkt_len(skb); - - if (codel_time_before(vars->ldelay, params->target) || - sch->qstats.backlog <= params->mtu) { - /* went below - stay below for at least interval */ - vars->first_above_time = 0; - return false; - } - ok_to_drop = false; - if (vars->first_above_time == 0) { - /* just went above from below. If we stay above - * for at least interval we'll say it's ok to drop - */ - vars->first_above_time = now + params->interval; - } else if (codel_time_after(now, vars->first_above_time)) { - ok_to_drop = true; - } - return ok_to_drop; -} - +typedef u32 (*codel_skb_len_t)(const struct sk_buff *skb); +typedef codel_time_t (*codel_skb_time_t)(const struct sk_buff *skb); +typedef void (*codel_skb_drop_t)(struct sk_buff *skb, void *ctx); typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars, - struct Qdisc *sch); + void *ctx); -static struct sk_buff *codel_dequeue(struct Qdisc *sch, - struct codel_params *params, - struct codel_vars *vars, - struct codel_stats *stats, - codel_skb_dequeue_t dequeue_func) -{ - struct sk_buff *skb = dequeue_func(vars, sch); - codel_time_t now; - bool drop; - - if (!skb) { - vars->dropping = false; - return skb; - } - now = codel_get_time(); - drop = codel_should_drop(skb, sch, vars, params, stats, now); - if (vars->dropping) { - if (!drop) { - /* sojourn time below target - leave dropping state */ - vars->dropping = false; - } else if (codel_time_after_eq(now, vars->drop_next)) { - /* It's time for the next drop. Drop the current - * packet and dequeue the next. The dequeue might - * take us out of dropping state. - * If not, schedule the next drop. - * A large backlog might result in drop rates so high - * that the next drop should happen now, - * hence the while loop. - */ - while (vars->dropping && - codel_time_after_eq(now, vars->drop_next)) { - vars->count++; /* dont care of possible wrap - * since there is no more divide - */ - codel_Newton_step(vars); - if (params->ecn && INET_ECN_set_ce(skb)) { - stats->ecn_mark++; - vars->drop_next = - codel_control_law(vars->drop_next, - params->interval, - vars->rec_inv_sqrt); - goto end; - } - stats->drop_len += qdisc_pkt_len(skb); - qdisc_drop(skb, sch); - stats->drop_count++; - skb = dequeue_func(vars, sch); - if (!codel_should_drop(skb, sch, - vars, params, stats, now)) { - /* leave dropping state */ - vars->dropping = false; - } else { - /* and schedule the next drop */ - vars->drop_next = - codel_control_law(vars->drop_next, - params->interval, - vars->rec_inv_sqrt); - } - } - } - } else if (drop) { - u32 delta; - - if (params->ecn && INET_ECN_set_ce(skb)) { - stats->ecn_mark++; - } else { - stats->drop_len += qdisc_pkt_len(skb); - qdisc_drop(skb, sch); - stats->drop_count++; - - skb = dequeue_func(vars, sch); - drop = codel_should_drop(skb, sch, vars, params, - stats, now); - } - vars->dropping = true; - /* if min went above target close to when we last went below it - * assume that the drop rate that controlled the queue on the - * last cycle is a good starting point to control it now. - */ - delta = vars->count - vars->lastcount; - if (delta > 1 && - codel_time_before(now - vars->drop_next, - 16 * params->interval)) { - vars->count = delta; - /* we dont care if rec_inv_sqrt approximation - * is not very precise : - * Next Newton steps will correct it quadratically. - */ - codel_Newton_step(vars); - } else { - vars->count = 1; - vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; - } - vars->lastcount = vars->count; - vars->drop_next = codel_control_law(now, params->interval, - vars->rec_inv_sqrt); - } -end: - if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && - INET_ECN_set_ce(skb)) - stats->ce_mark++; - return skb; -} #endif diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h new file mode 100644 index 000000000000..d289b91dcd65 --- /dev/null +++ b/include/net/codel_impl.h @@ -0,0 +1,255 @@ +#ifndef __NET_SCHED_CODEL_IMPL_H +#define __NET_SCHED_CODEL_IMPL_H + +/* + * Codel - The Controlled-Delay Active Queue Management algorithm + * + * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> + * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> + * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> + * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +/* Controlling Queue Delay (CoDel) algorithm + * ========================================= + * Source : Kathleen Nichols and Van Jacobson + * http://queue.acm.org/detail.cfm?id=2209336 + * + * Implemented on linux by Dave Taht and Eric Dumazet + */ + +static void codel_params_init(struct codel_params *params) +{ + params->interval = MS2TIME(100); + params->target = MS2TIME(5); + params->ce_threshold = CODEL_DISABLED_THRESHOLD; + params->ecn = false; +} + +static void codel_vars_init(struct codel_vars *vars) +{ + memset(vars, 0, sizeof(*vars)); +} + +static void codel_stats_init(struct codel_stats *stats) +{ + stats->maxpacket = 0; +} + +/* + * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots + * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) + * + * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 + */ +static void codel_Newton_step(struct codel_vars *vars) +{ + u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; + u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; + u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2); + + val >>= 2; /* avoid overflow in following multiply */ + val = (val * invsqrt) >> (32 - 2 + 1); + + vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; +} + +/* + * CoDel control_law is t + interval/sqrt(count) + * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid + * both sqrt() and divide operation. + */ +static codel_time_t codel_control_law(codel_time_t t, + codel_time_t interval, + u32 rec_inv_sqrt) +{ + return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); +} + +static bool codel_should_drop(const struct sk_buff *skb, + void *ctx, + struct codel_vars *vars, + struct codel_params *params, + struct codel_stats *stats, + codel_skb_len_t skb_len_func, + codel_skb_time_t skb_time_func, + u32 *backlog, + codel_time_t now) +{ + bool ok_to_drop; + u32 skb_len; + + if (!skb) { + vars->first_above_time = 0; + return false; + } + + skb_len = skb_len_func(skb); + vars->ldelay = now - skb_time_func(skb); + + if (unlikely(skb_len > stats->maxpacket)) + stats->maxpacket = skb_len; + + if (codel_time_before(vars->ldelay, params->target) || + *backlog <= params->mtu) { + /* went below - stay below for at least interval */ + vars->first_above_time = 0; + return false; + } + ok_to_drop = false; + if (vars->first_above_time == 0) { + /* just went above from below. If we stay above + * for at least interval we'll say it's ok to drop + */ + vars->first_above_time = now + params->interval; + } else if (codel_time_after(now, vars->first_above_time)) { + ok_to_drop = true; + } + return ok_to_drop; +} + +static struct sk_buff *codel_dequeue(void *ctx, + u32 *backlog, + struct codel_params *params, + struct codel_vars *vars, + struct codel_stats *stats, + codel_skb_len_t skb_len_func, + codel_skb_time_t skb_time_func, + codel_skb_drop_t drop_func, + codel_skb_dequeue_t dequeue_func) +{ + struct sk_buff *skb = dequeue_func(vars, ctx); + codel_time_t now; + bool drop; + + if (!skb) { + vars->dropping = false; + return skb; + } + now = codel_get_time(); + drop = codel_should_drop(skb, ctx, vars, params, stats, + skb_len_func, skb_time_func, backlog, now); + if (vars->dropping) { + if (!drop) { + /* sojourn time below target - leave dropping state */ + vars->dropping = false; + } else if (codel_time_after_eq(now, vars->drop_next)) { + /* It's time for the next drop. Drop the current + * packet and dequeue the next. The dequeue might + * take us out of dropping state. + * If not, schedule the next drop. + * A large backlog might result in drop rates so high + * that the next drop should happen now, + * hence the while loop. + */ + while (vars->dropping && + codel_time_after_eq(now, vars->drop_next)) { + vars->count++; /* dont care of possible wrap + * since there is no more divide + */ + codel_Newton_step(vars); + if (params->ecn && INET_ECN_set_ce(skb)) { + stats->ecn_mark++; + vars->drop_next = + codel_control_law(vars->drop_next, + params->interval, + vars->rec_inv_sqrt); + goto end; + } + stats->drop_len += skb_len_func(skb); + drop_func(skb, ctx); + stats->drop_count++; + skb = dequeue_func(vars, ctx); + if (!codel_should_drop(skb, ctx, + vars, params, stats, + skb_len_func, + skb_time_func, + backlog, now)) { + /* leave dropping state */ + vars->dropping = false; + } else { + /* and schedule the next drop */ + vars->drop_next = + codel_control_law(vars->drop_next, + params->interval, + vars->rec_inv_sqrt); + } + } + } + } else if (drop) { + u32 delta; + + if (params->ecn && INET_ECN_set_ce(skb)) { + stats->ecn_mark++; + } else { + stats->drop_len += skb_len_func(skb); + drop_func(skb, ctx); + stats->drop_count++; + + skb = dequeue_func(vars, ctx); + drop = codel_should_drop(skb, ctx, vars, params, + stats, skb_len_func, + skb_time_func, backlog, now); + } + vars->dropping = true; + /* if min went above target close to when we last went below it + * assume that the drop rate that controlled the queue on the + * last cycle is a good starting point to control it now. + */ + delta = vars->count - vars->lastcount; + if (delta > 1 && + codel_time_before(now - vars->drop_next, + 16 * params->interval)) { + vars->count = delta; + /* we dont care if rec_inv_sqrt approximation + * is not very precise : + * Next Newton steps will correct it quadratically. + */ + codel_Newton_step(vars); + } else { + vars->count = 1; + vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; + } + vars->lastcount = vars->count; + vars->drop_next = codel_control_law(now, params->interval, + vars->rec_inv_sqrt); + } +end: + if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && + INET_ECN_set_ce(skb)) + stats->ce_mark++; + return skb; +} + +#endif diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h new file mode 100644 index 000000000000..8144d9cd2908 --- /dev/null +++ b/include/net/codel_qdisc.h @@ -0,0 +1,73 @@ +#ifndef __NET_SCHED_CODEL_QDISC_H +#define __NET_SCHED_CODEL_QDISC_H + +/* + * Codel - The Controlled-Delay Active Queue Management algorithm + * + * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> + * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> + * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> + * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +/* Controlling Queue Delay (CoDel) algorithm + * ========================================= + * Source : Kathleen Nichols and Van Jacobson + * http://queue.acm.org/detail.cfm?id=2209336 + * + * Implemented on linux by Dave Taht and Eric Dumazet + */ + +/* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ +struct codel_skb_cb { + codel_time_t enqueue_time; +}; + +static struct codel_skb_cb *get_codel_cb(const struct sk_buff *skb) +{ + qdisc_cb_private_validate(skb, sizeof(struct codel_skb_cb)); + return (struct codel_skb_cb *)qdisc_skb_cb(skb)->data; +} + +static codel_time_t codel_get_enqueue_time(const struct sk_buff *skb) +{ + return get_codel_cb(skb)->enqueue_time; +} + +static void codel_set_enqueue_time(struct sk_buff *skb) +{ + get_codel_cb(skb)->enqueue_time = codel_get_time(); +} + +#endif diff --git a/include/net/devlink.h b/include/net/devlink.h index c37d257891d6..1d45b61cb320 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -24,6 +24,7 @@ struct devlink_ops; struct devlink { struct list_head list; struct list_head port_list; + struct list_head sb_list; const struct devlink_ops *ops; struct device *dev; possible_net_t _net; @@ -42,6 +43,12 @@ struct devlink_port { u32 split_group; }; +struct devlink_sb_pool_info { + enum devlink_sb_pool_type pool_type; + u32 size; + enum devlink_sb_threshold_type threshold_type; +}; + struct devlink_ops { size_t priv_size; int (*port_type_set)(struct devlink_port *devlink_port, @@ -49,6 +56,40 @@ struct devlink_ops { int (*port_split)(struct devlink *devlink, unsigned int port_index, unsigned int count); int (*port_unsplit)(struct devlink *devlink, unsigned int port_index); + int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index, + u16 pool_index, + struct devlink_sb_pool_info *pool_info); + int (*sb_pool_set)(struct devlink *devlink, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type); + int (*sb_port_pool_get)(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); + int (*sb_port_pool_set)(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 threshold); + int (*sb_tc_pool_bind_get)(struct devlink_port *devlink_port, + unsigned int sb_index, + u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); + int (*sb_tc_pool_bind_set)(struct devlink_port *devlink_port, + unsigned int sb_index, + u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold); + int (*sb_occ_snapshot)(struct devlink *devlink, + unsigned int sb_index); + int (*sb_occ_max_clear)(struct devlink *devlink, + unsigned int sb_index); + int (*sb_occ_port_pool_get)(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); + int (*sb_occ_tc_port_bind_get)(struct devlink_port *devlink_port, + unsigned int sb_index, + u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); }; static inline void *devlink_priv(struct devlink *devlink) @@ -82,6 +123,11 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port, void devlink_port_type_clear(struct devlink_port *devlink_port); void devlink_port_split_set(struct devlink_port *devlink_port, u32 split_group); +int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, + u32 size, u16 ingress_pools_count, + u16 egress_pools_count, u16 ingress_tc_count, + u16 egress_tc_count); +void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); #else @@ -135,6 +181,21 @@ static inline void devlink_port_split_set(struct devlink_port *devlink_port, { } +static inline int devlink_sb_register(struct devlink *devlink, + unsigned int sb_index, u32 size, + u16 ingress_pools_count, + u16 egress_pools_count, + u16 ingress_tc_count, + u16 egress_tc_count) +{ + return 0; +} + +static inline void devlink_sb_unregister(struct devlink *devlink, + unsigned int sb_index) +{ +} + #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 689ebd3542ba..8e86af87c84f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -111,6 +111,11 @@ struct dsa_switch_tree { enum dsa_tag_protocol tag_protocol; /* + * Original copy of the master netdev ethtool_ops + */ + struct ethtool_ops master_ethtool_ops; + + /* * The switch and port to which the CPU is attached. */ s8 cpu_switch; @@ -136,11 +141,6 @@ struct dsa_switch { void *priv; /* - * Tagging protocol understood by this switch - */ - enum dsa_tag_protocol tag_protocol; - - /* * Configuration data for this switch. */ struct dsa_chip_data *pd; @@ -217,8 +217,9 @@ struct dsa_switch_driver { /* * Probing and setup. */ - char *(*probe)(struct device *dsa_dev, struct device *host_dev, - int sw_addr, void **priv); + const char *(*probe)(struct device *dsa_dev, + struct device *host_dev, int sw_addr, + void **priv); int (*setup)(struct dsa_switch *ds); int (*set_addr)(struct dsa_switch *ds, u8 *addr); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); diff --git a/include/net/dst.h b/include/net/dst.h index 5c98443c1c9e..6835d224d47b 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -85,12 +85,11 @@ struct dst_entry { #endif #ifdef CONFIG_64BIT - struct lwtunnel_state *lwtstate; /* * Align __refcnt to a 64 bytes alignment * (L1_CACHE_SIZE would be too much) */ - long __pad_to_align_refcnt[1]; + long __pad_to_align_refcnt[2]; #endif /* * __refcnt wants to be on a different cache line from @@ -99,9 +98,7 @@ struct dst_entry { atomic_t __refcnt; /* client references */ int __use; unsigned long lastuse; -#ifndef CONFIG_64BIT struct lwtunnel_state *lwtstate; -#endif union { struct dst_entry *next; struct rtable __rcu *rt_next; diff --git a/include/net/fq.h b/include/net/fq.h new file mode 100644 index 000000000000..268b49049c37 --- /dev/null +++ b/include/net/fq.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016 Qualcomm Atheros, Inc + * + * GPL v2 + * + * Based on net/sched/sch_fq_codel.c + */ +#ifndef __NET_SCHED_FQ_H +#define __NET_SCHED_FQ_H + +struct fq_tin; + +/** + * struct fq_flow - per traffic flow queue + * + * @tin: owner of this flow. Used to manage collisions, i.e. when a packet + * hashes to an index which points to a flow that is already owned by a + * different tin the packet is destined to. In such case the implementer + * must provide a fallback flow + * @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++ + * (deficit round robin) based round robin queuing similar to the one + * found in net/sched/sch_fq_codel.c + * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of + * fat flows and efficient head-dropping if packet limit is reached + * @queue: sk_buff queue to hold packets + * @backlog: number of bytes pending in the queue. The number of packets can be + * found in @queue.qlen + * @deficit: used for DRR++ + */ +struct fq_flow { + struct fq_tin *tin; + struct list_head flowchain; + struct list_head backlogchain; + struct sk_buff_head queue; + u32 backlog; + int deficit; +}; + +/** + * struct fq_tin - a logical container of fq_flows + * + * Used to group fq_flows into a logical aggregate. DRR++ scheme is used to + * pull interleaved packets out of the associated flows. + * + * @new_flows: linked list of fq_flow + * @old_flows: linked list of fq_flow + */ +struct fq_tin { + struct list_head new_flows; + struct list_head old_flows; + u32 backlog_bytes; + u32 backlog_packets; + u32 overlimit; + u32 collisions; + u32 flows; + u32 tx_bytes; + u32 tx_packets; +}; + +/** + * struct fq - main container for fair queuing purposes + * + * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient + * head-dropping when @backlog reaches @limit + * @limit: max number of packets that can be queued across all flows + * @backlog: number of packets queued across all flows + */ +struct fq { + struct fq_flow *flows; + struct list_head backlogs; + spinlock_t lock; + u32 flows_cnt; + u32 perturbation; + u32 limit; + u32 quantum; + u32 backlog; + u32 overlimit; + u32 collisions; +}; + +typedef struct sk_buff *fq_tin_dequeue_t(struct fq *, + struct fq_tin *, + struct fq_flow *flow); + +typedef void fq_skb_free_t(struct fq *, + struct fq_tin *, + struct fq_flow *, + struct sk_buff *); + +typedef struct fq_flow *fq_flow_get_default_t(struct fq *, + struct fq_tin *, + int idx, + struct sk_buff *); + +#endif diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h new file mode 100644 index 000000000000..163f3ed0f05a --- /dev/null +++ b/include/net/fq_impl.h @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2016 Qualcomm Atheros, Inc + * + * GPL v2 + * + * Based on net/sched/sch_fq_codel.c + */ +#ifndef __NET_SCHED_FQ_IMPL_H +#define __NET_SCHED_FQ_IMPL_H + +#include <net/fq.h> + +/* functions that are embedded into includer */ + +static struct sk_buff *fq_flow_dequeue(struct fq *fq, + struct fq_flow *flow) +{ + struct fq_tin *tin = flow->tin; + struct fq_flow *i; + struct sk_buff *skb; + + lockdep_assert_held(&fq->lock); + + skb = __skb_dequeue(&flow->queue); + if (!skb) + return NULL; + + tin->backlog_bytes -= skb->len; + tin->backlog_packets--; + flow->backlog -= skb->len; + fq->backlog--; + + if (flow->backlog == 0) { + list_del_init(&flow->backlogchain); + } else { + i = flow; + + list_for_each_entry_continue(i, &fq->backlogs, backlogchain) + if (i->backlog < flow->backlog) + break; + + list_move_tail(&flow->backlogchain, + &i->backlogchain); + } + + return skb; +} + +static struct sk_buff *fq_tin_dequeue(struct fq *fq, + struct fq_tin *tin, + fq_tin_dequeue_t dequeue_func) +{ + struct fq_flow *flow; + struct list_head *head; + struct sk_buff *skb; + + lockdep_assert_held(&fq->lock); + +begin: + head = &tin->new_flows; + if (list_empty(head)) { + head = &tin->old_flows; + if (list_empty(head)) + return NULL; + } + + flow = list_first_entry(head, struct fq_flow, flowchain); + + if (flow->deficit <= 0) { + flow->deficit += fq->quantum; + list_move_tail(&flow->flowchain, + &tin->old_flows); + goto begin; + } + + skb = dequeue_func(fq, tin, flow); + if (!skb) { + /* force a pass through old_flows to prevent starvation */ + if ((head == &tin->new_flows) && + !list_empty(&tin->old_flows)) { + list_move_tail(&flow->flowchain, &tin->old_flows); + } else { + list_del_init(&flow->flowchain); + flow->tin = NULL; + } + goto begin; + } + + flow->deficit -= skb->len; + tin->tx_bytes += skb->len; + tin->tx_packets++; + + return skb; +} + +static struct fq_flow *fq_flow_classify(struct fq *fq, + struct fq_tin *tin, + struct sk_buff *skb, + fq_flow_get_default_t get_default_func) +{ + struct fq_flow *flow; + u32 hash; + u32 idx; + + lockdep_assert_held(&fq->lock); + + hash = skb_get_hash_perturb(skb, fq->perturbation); + idx = reciprocal_scale(hash, fq->flows_cnt); + flow = &fq->flows[idx]; + + if (flow->tin && flow->tin != tin) { + flow = get_default_func(fq, tin, idx, skb); + tin->collisions++; + fq->collisions++; + } + + if (!flow->tin) + tin->flows++; + + return flow; +} + +static void fq_recalc_backlog(struct fq *fq, + struct fq_tin *tin, + struct fq_flow *flow) +{ + struct fq_flow *i; + + if (list_empty(&flow->backlogchain)) + list_add_tail(&flow->backlogchain, &fq->backlogs); + + i = flow; + list_for_each_entry_continue_reverse(i, &fq->backlogs, + backlogchain) + if (i->backlog > flow->backlog) + break; + + list_move(&flow->backlogchain, &i->backlogchain); +} + +static void fq_tin_enqueue(struct fq *fq, + struct fq_tin *tin, + struct sk_buff *skb, + fq_skb_free_t free_func, + fq_flow_get_default_t get_default_func) +{ + struct fq_flow *flow; + + lockdep_assert_held(&fq->lock); + + flow = fq_flow_classify(fq, tin, skb, get_default_func); + + flow->tin = tin; + flow->backlog += skb->len; + tin->backlog_bytes += skb->len; + tin->backlog_packets++; + fq->backlog++; + + fq_recalc_backlog(fq, tin, flow); + + if (list_empty(&flow->flowchain)) { + flow->deficit = fq->quantum; + list_add_tail(&flow->flowchain, + &tin->new_flows); + } + + __skb_queue_tail(&flow->queue, skb); + + if (fq->backlog > fq->limit) { + flow = list_first_entry_or_null(&fq->backlogs, + struct fq_flow, + backlogchain); + if (!flow) + return; + + skb = fq_flow_dequeue(fq, flow); + if (!skb) + return; + + free_func(fq, flow->tin, flow, skb); + + flow->tin->overlimit++; + fq->overlimit++; + } +} + +static void fq_flow_reset(struct fq *fq, + struct fq_flow *flow, + fq_skb_free_t free_func) +{ + struct sk_buff *skb; + + while ((skb = fq_flow_dequeue(fq, flow))) + free_func(fq, flow->tin, flow, skb); + + if (!list_empty(&flow->flowchain)) + list_del_init(&flow->flowchain); + + if (!list_empty(&flow->backlogchain)) + list_del_init(&flow->backlogchain); + + flow->tin = NULL; + + WARN_ON_ONCE(flow->backlog); +} + +static void fq_tin_reset(struct fq *fq, + struct fq_tin *tin, + fq_skb_free_t free_func) +{ + struct list_head *head; + struct fq_flow *flow; + + for (;;) { + head = &tin->new_flows; + if (list_empty(head)) { + head = &tin->old_flows; + if (list_empty(head)) + break; + } + + flow = list_first_entry(head, struct fq_flow, flowchain); + fq_flow_reset(fq, flow, free_func); + } + + WARN_ON_ONCE(tin->backlog_bytes); + WARN_ON_ONCE(tin->backlog_packets); +} + +static void fq_flow_init(struct fq_flow *flow) +{ + INIT_LIST_HEAD(&flow->flowchain); + INIT_LIST_HEAD(&flow->backlogchain); + __skb_queue_head_init(&flow->queue); +} + +static void fq_tin_init(struct fq_tin *tin) +{ + INIT_LIST_HEAD(&tin->new_flows); + INIT_LIST_HEAD(&tin->old_flows); +} + +static int fq_init(struct fq *fq, int flows_cnt) +{ + int i; + + memset(fq, 0, sizeof(fq[0])); + INIT_LIST_HEAD(&fq->backlogs); + spin_lock_init(&fq->lock); + fq->flows_cnt = max_t(u32, flows_cnt, 1); + fq->perturbation = prandom_u32(); + fq->quantum = 300; + fq->limit = 8192; + + fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); + if (!fq->flows) + return -ENOMEM; + + for (i = 0; i < fq->flows_cnt; i++) + fq_flow_init(&fq->flows[i]); + + return 0; +} + +static void fq_reset(struct fq *fq, + fq_skb_free_t free_func) +{ + int i; + + for (i = 0; i < fq->flows_cnt; i++) + fq_flow_reset(fq, &fq->flows[i], free_func); + + kfree(fq->flows); + fq->flows = NULL; +} + +#endif diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index cbafa3768d48..610cd397890e 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -19,17 +19,19 @@ struct gnet_dump { /* Backward compatibility */ int compat_tc_stats; int compat_xstats; + int padattr; void * xstats; int xstats_len; struct tc_stats tc_stats; }; int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, - struct gnet_dump *d); + struct gnet_dump *d, int padattr); int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, int xstats_type, - spinlock_t *lock, struct gnet_dump *d); + spinlock_t *lock, struct gnet_dump *d, + int padattr); int gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, diff --git a/include/net/geneve.h b/include/net/geneve.h index e6c23dc765f7..cb544a530146 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,13 +62,11 @@ struct genevehdr { struct geneve_opt options[]; }; -#if IS_ENABLED(CONFIG_GENEVE) -void geneve_get_rx_port(struct net_device *netdev); -#else static inline void geneve_get_rx_port(struct net_device *netdev) { + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_GENEVE, netdev); } -#endif #ifdef CONFIG_INET struct net_device *geneve_dev_create_fb(struct net *net, const char *name, diff --git a/include/net/gre.h b/include/net/gre.h index 97eafdc47eea..29e37322c06e 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -25,4 +25,108 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version); struct net_device *gretap_fb_dev_create(struct net *net, const char *name, u8 name_assign_type); +int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err, int *hdr_len); + +static inline int gre_calc_hlen(__be16 o_flags) +{ + int addend = 4; + + if (o_flags & TUNNEL_CSUM) + addend += 4; + if (o_flags & TUNNEL_KEY) + addend += 4; + if (o_flags & TUNNEL_SEQ) + addend += 4; + return addend; +} + +static inline __be16 gre_flags_to_tnl_flags(__be16 flags) +{ + __be16 tflags = 0; + + if (flags & GRE_CSUM) + tflags |= TUNNEL_CSUM; + if (flags & GRE_ROUTING) + tflags |= TUNNEL_ROUTING; + if (flags & GRE_KEY) + tflags |= TUNNEL_KEY; + if (flags & GRE_SEQ) + tflags |= TUNNEL_SEQ; + if (flags & GRE_STRICT) + tflags |= TUNNEL_STRICT; + if (flags & GRE_REC) + tflags |= TUNNEL_REC; + if (flags & GRE_VERSION) + tflags |= TUNNEL_VERSION; + + return tflags; +} + +static inline __be16 gre_tnl_flags_to_gre_flags(__be16 tflags) +{ + __be16 flags = 0; + + if (tflags & TUNNEL_CSUM) + flags |= GRE_CSUM; + if (tflags & TUNNEL_ROUTING) + flags |= GRE_ROUTING; + if (tflags & TUNNEL_KEY) + flags |= GRE_KEY; + if (tflags & TUNNEL_SEQ) + flags |= GRE_SEQ; + if (tflags & TUNNEL_STRICT) + flags |= GRE_STRICT; + if (tflags & TUNNEL_REC) + flags |= GRE_REC; + if (tflags & TUNNEL_VERSION) + flags |= GRE_VERSION; + + return flags; +} + +static inline __sum16 gre_checksum(struct sk_buff *skb) +{ + __wsum csum; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + csum = lco_csum(skb); + else + csum = skb_checksum(skb, 0, skb->len, 0); + return csum_fold(csum); +} + +static inline void gre_build_header(struct sk_buff *skb, int hdr_len, + __be16 flags, __be16 proto, + __be32 key, __be32 seq) +{ + struct gre_base_hdr *greh; + + skb_push(skb, hdr_len); + + skb_reset_transport_header(skb); + greh = (struct gre_base_hdr *)skb->data; + greh->flags = gre_tnl_flags_to_gre_flags(flags); + greh->protocol = proto; + + if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + if (flags & TUNNEL_SEQ) { + *ptr = seq; + ptr--; + } + if (flags & TUNNEL_KEY) { + *ptr = key; + ptr--; + } + if (flags & TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { + *ptr = 0; + *(__sum16 *)ptr = gre_checksum(skb); + } + } +} + #endif diff --git a/include/net/icmp.h b/include/net/icmp.h index 970028e13382..3ef2743a8eec 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -30,9 +30,9 @@ struct icmp_err { extern const struct icmp_err icmp_err_convert[]; #define ICMP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.icmp_statistics, field) -#define ICMP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field) +#define __ICMP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.icmp_statistics, field) #define ICMPMSGOUT_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256) -#define ICMPMSGIN_INC_STATS_BH(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field) +#define ICMPMSGIN_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field) struct dst_entry; struct net_proto_family; diff --git a/include/net/ip.h b/include/net/ip.h index 93725e546758..247ac82e9cf2 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -187,17 +187,15 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, unsigned int len); #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field) -#define IP_INC_STATS_BH(net, field) SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field) +#define __IP_INC_STATS(net, field) __SNMP_INC_STATS64((net)->mib.ip_statistics, field) #define IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) -#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val) +#define __IP_ADD_STATS(net, field, val) __SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) #define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) -#define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val) +#define __IP_UPD_PO_STATS(net, field, val) __SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) #define NET_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.net_statistics, field) -#define NET_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.net_statistics, field) -#define NET_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)->mib.net_statistics, field) +#define __NET_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.net_statistics, field) #define NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) -#define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd) -#define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd) +#define __NET_ADD_STATS(net, field, adnd) __SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct); unsigned long snmp_fold_field(void __percpu *mib, int offt); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 295d291269e2..54c779416eec 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -101,6 +101,9 @@ void fib6_force_start_gc(struct net *net); struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, bool anycast); +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, + int flags); + /* * support functions for ND * diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 499a707765ea..fb9e0153f4f2 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -42,6 +42,7 @@ struct ip6_tnl { struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ struct dst_cache dst_cache; /* cached dst */ + struct gro_cells gro_cells; int err_count; unsigned long err_time; @@ -49,8 +50,10 @@ struct ip6_tnl { /* These fields used only by GRE */ __u32 i_seqno; /* The last seen seqno */ __u32 o_seqno; /* The last output seqno */ - int hlen; /* Precalculated GRE header length */ + int hlen; /* tun_hlen + encap_hlen */ + int tun_hlen; /* Precalculated header length */ int mlink; + }; /* Tunnel encapsulation limit destination sub-option */ @@ -63,13 +66,19 @@ struct ipv6_tlv_tnl_enc_lim { int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); +int ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, + bool log_ecn_error); int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); +int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, + struct flowi6 *fl6, int encap_limit, __u32 *pmtu, __u8 proto); __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw); __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); struct net *ip6_tnl_get_link_net(const struct net_device *dev); int ip6_tnl_get_iflink(const struct net_device *dev); +int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu); #ifdef CONFIG_INET static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9ae9fbbccd67..6d790910ebdf 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -309,7 +309,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags); -struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); +int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); static inline int iptunnel_pull_offloads(struct sk_buff *skb) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 55ee1eb7d026..415213da5be3 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -121,21 +121,21 @@ struct frag_hdr { extern int sysctl_mld_max_msf; extern int sysctl_mld_qrv; -#define _DEVINC(net, statname, modifier, idev, field) \ +#define _DEVINC(net, statname, mod, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ - SNMP_INC_STATS##modifier((_idev)->stats.statname, (field)); \ - SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\ + mod##SNMP_INC_STATS64((_idev)->stats.statname, (field));\ + mod##SNMP_INC_STATS64((net)->mib.statname##_statistics, (field));\ }) /* per device counters are atomic_long_t */ -#define _DEVINCATOMIC(net, statname, modifier, idev, field) \ +#define _DEVINCATOMIC(net, statname, mod, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ SNMP_INC_STATS_ATOMIC_LONG((_idev)->stats.statname##dev, (field)); \ - SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\ + mod##SNMP_INC_STATS((net)->mib.statname##_statistics, (field));\ }) /* per device and per net counters are atomic_long_t */ @@ -147,46 +147,44 @@ extern int sysctl_mld_qrv; SNMP_INC_STATS_ATOMIC_LONG((net)->mib.statname##_statistics, (field));\ }) -#define _DEVADD(net, statname, modifier, idev, field, val) \ +#define _DEVADD(net, statname, mod, idev, field, val) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ - SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \ - SNMP_ADD_STATS##modifier((net)->mib.statname##_statistics, (field), (val));\ + mod##SNMP_ADD_STATS((_idev)->stats.statname, (field), (val)); \ + mod##SNMP_ADD_STATS((net)->mib.statname##_statistics, (field), (val));\ }) -#define _DEVUPD(net, statname, modifier, idev, field, val) \ +#define _DEVUPD(net, statname, mod, idev, field, val) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ - SNMP_UPD_PO_STATS##modifier((_idev)->stats.statname, field, (val)); \ - SNMP_UPD_PO_STATS##modifier((net)->mib.statname##_statistics, field, (val));\ + mod##SNMP_UPD_PO_STATS((_idev)->stats.statname, field, (val)); \ + mod##SNMP_UPD_PO_STATS((net)->mib.statname##_statistics, field, (val));\ }) /* MIBs */ #define IP6_INC_STATS(net, idev,field) \ - _DEVINC(net, ipv6, 64, idev, field) -#define IP6_INC_STATS_BH(net, idev,field) \ - _DEVINC(net, ipv6, 64_BH, idev, field) + _DEVINC(net, ipv6, , idev, field) +#define __IP6_INC_STATS(net, idev,field) \ + _DEVINC(net, ipv6, __, idev, field) #define IP6_ADD_STATS(net, idev,field,val) \ - _DEVADD(net, ipv6, 64, idev, field, val) -#define IP6_ADD_STATS_BH(net, idev,field,val) \ - _DEVADD(net, ipv6, 64_BH, idev, field, val) + _DEVADD(net, ipv6, , idev, field, val) +#define __IP6_ADD_STATS(net, idev,field,val) \ + _DEVADD(net, ipv6, __, idev, field, val) #define IP6_UPD_PO_STATS(net, idev,field,val) \ - _DEVUPD(net, ipv6, 64, idev, field, val) -#define IP6_UPD_PO_STATS_BH(net, idev,field,val) \ - _DEVUPD(net, ipv6, 64_BH, idev, field, val) + _DEVUPD(net, ipv6, , idev, field, val) +#define __IP6_UPD_PO_STATS(net, idev,field,val) \ + _DEVUPD(net, ipv6, __, idev, field, val) #define ICMP6_INC_STATS(net, idev, field) \ _DEVINCATOMIC(net, icmpv6, , idev, field) -#define ICMP6_INC_STATS_BH(net, idev, field) \ - _DEVINCATOMIC(net, icmpv6, _BH, idev, field) +#define __ICMP6_INC_STATS(net, idev, field) \ + _DEVINCATOMIC(net, icmpv6, __, idev, field) #define ICMP6MSGOUT_INC_STATS(net, idev, field) \ _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256) -#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) \ - _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256) -#define ICMP6MSGIN_INC_STATS_BH(net, idev, field) \ +#define ICMP6MSGIN_INC_STATS(net, idev, field) \ _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field) struct ip6_ra_chain { @@ -961,6 +959,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, int addr_len); +int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr); +void ip6_datagram_release_cb(struct sock *sk); int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 6cd7a70706a9..e465c8551ac3 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -288,6 +288,16 @@ static inline void ieee802154_le16_to_be16(void *be16_dst, const void *le16_src) } /** + * ieee802154_be16_to_le16 - copies and convert be16 to le16 + * @le16_dst: le16 destination pointer + * @be16_src: be16 source pointer + */ +static inline void ieee802154_be16_to_le16(void *le16_dst, const void *be16_src) +{ + put_unaligned_le16(get_unaligned_be16(be16_src), le16_dst); +} + +/** * ieee802154_alloc_hw - Allocate a new hardware device * * This must be called once for each hardware device. The returned pointer diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 7e2b1d025f50..c5f8fc736b3d 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -45,7 +45,6 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) #endif } -bool nf_connlabel_match(const struct nf_conn *ct, u16 bit); int nf_connlabel_set(struct nf_conn *ct, u16 bit); int nf_connlabels_replace(struct nf_conn *ct, @@ -54,11 +53,11 @@ int nf_connlabels_replace(struct nf_conn *ct, #ifdef CONFIG_NF_CONNTRACK_LABELS int nf_conntrack_labels_init(void); void nf_conntrack_labels_fini(void); -int nf_connlabels_get(struct net *net, unsigned int n_bits); +int nf_connlabels_get(struct net *net, unsigned int bit); void nf_connlabels_put(struct net *net); #else static inline int nf_conntrack_labels_init(void) { return 0; } static inline void nf_conntrack_labels_fini(void) {} -static inline int nf_connlabels_get(struct net *net, unsigned int n_bits) { return 0; } +static inline int nf_connlabels_get(struct net *net, unsigned int bit) { return 0; } static inline void nf_connlabels_put(struct net *net) {} #endif diff --git a/include/net/netlink.h b/include/net/netlink.h index 0e3172751755..e589cb3dccee 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -102,10 +102,12 @@ * nla_put_s8(skb, type, value) add s8 attribute to skb * nla_put_s16(skb, type, value) add s16 attribute to skb * nla_put_s32(skb, type, value) add s32 attribute to skb - * nla_put_s64(skb, type, value) add s64 attribute to skb + * nla_put_s64(skb, type, value, + * padattr) add s64 attribute to skb * nla_put_string(skb, type, str) add string attribute to skb * nla_put_flag(skb, type) add flag attribute to skb - * nla_put_msecs(skb, type, jiffies) add msecs attribute to skb + * nla_put_msecs(skb, type, jiffies, + * padattr) add msecs attribute to skb * nla_put_in_addr(skb, type, addr) add IPv4 address attribute to skb * nla_put_in6_addr(skb, type, addr) add IPv6 address attribute to skb * @@ -244,13 +246,21 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count); int nla_memcmp(const struct nlattr *nla, const void *data, size_t size); int nla_strcmp(const struct nlattr *nla, const char *str); struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); +struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype, + int attrlen, int padattr); void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen); struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); +struct nlattr *nla_reserve_64bit(struct sk_buff *skb, int attrtype, + int attrlen, int padattr); void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen); void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); +void __nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, + const void *data, int padattr); void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data); int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); +int nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, + const void *data, int padattr); int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data); int nla_append(struct sk_buff *skb, int attrlen, const void *data); @@ -848,36 +858,56 @@ static inline int nla_put_u64(struct sk_buff *skb, int attrtype, u64 value) } /** - * nla_put_be64 - Add a __be64 netlink attribute to a socket buffer + * nla_put_u64_64bit - Add a u64 netlink attribute to a skb and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value + * @padattr: attribute type for the padding */ -static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value) +static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype, + u64 value, int padattr) { - return nla_put(skb, attrtype, sizeof(__be64), &value); + return nla_put_64bit(skb, attrtype, sizeof(u64), &value, padattr); } /** - * nla_put_net64 - Add 64-bit network byte order netlink attribute to a socket buffer + * nla_put_be64 - Add a __be64 netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value + * @padattr: attribute type for the padding */ -static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value) +static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, + int padattr) { - return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value); + return nla_put_64bit(skb, attrtype, sizeof(__be64), &value, padattr); } /** - * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer + * nla_put_net64 - Add 64-bit network byte order nlattr to a skb and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value + * @padattr: attribute type for the padding */ -static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value) +static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, + int padattr) { - return nla_put(skb, attrtype, sizeof(__le64), &value); + return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value, + padattr); +} + +/** + * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer and align it + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + * @padattr: attribute type for the padding + */ +static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value, + int padattr) +{ + return nla_put_64bit(skb, attrtype, sizeof(__le64), &value, padattr); } /** @@ -914,14 +944,16 @@ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value) } /** - * nla_put_s64 - Add a s64 netlink attribute to a socket buffer + * nla_put_s64 - Add a s64 netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value + * @padattr: attribute type for the padding */ -static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value) +static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value, + int padattr) { - return nla_put(skb, attrtype, sizeof(s64), &value); + return nla_put_64bit(skb, attrtype, sizeof(s64), &value, padattr); } /** @@ -947,16 +979,18 @@ static inline int nla_put_flag(struct sk_buff *skb, int attrtype) } /** - * nla_put_msecs - Add a msecs netlink attribute to a socket buffer + * nla_put_msecs - Add a msecs netlink attribute to a skb and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @njiffies: number of jiffies to convert to msecs + * @padattr: attribute type for the padding */ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, - unsigned long njiffies) + unsigned long njiffies, int padattr) { u64 tmp = jiffies_to_msecs(njiffies); - return nla_put(skb, attrtype, sizeof(u64), &tmp); + + return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr); } /** @@ -1231,6 +1265,61 @@ static inline int nla_validate_nested(const struct nlattr *start, int maxtype, } /** + * nla_need_padding_for_64bit - test 64-bit alignment of the next attribute + * @skb: socket buffer the message is stored in + * + * Return true if padding is needed to align the next attribute (nla_data()) to + * a 64-bit aligned area. + */ +static inline bool nla_need_padding_for_64bit(struct sk_buff *skb) +{ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + /* The nlattr header is 4 bytes in size, that's why we test + * if the skb->data _is_ aligned. A NOP attribute, plus + * nlattr header for next attribute, will make nla_data() + * 8-byte aligned. + */ + if (IS_ALIGNED((unsigned long)skb_tail_pointer(skb), 8)) + return true; +#endif + return false; +} + +/** + * nla_align_64bit - 64-bit align the nla_data() of next attribute + * @skb: socket buffer the message is stored in + * @padattr: attribute type for the padding + * + * Conditionally emit a padding netlink attribute in order to make + * the next attribute we emit have a 64-bit aligned nla_data() area. + * This will only be done in architectures which do not have + * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS defined. + * + * Returns zero on success or a negative error code. + */ +static inline int nla_align_64bit(struct sk_buff *skb, int padattr) +{ + if (nla_need_padding_for_64bit(skb) && + !nla_reserve(skb, padattr, 0)) + return -EMSGSIZE; + + return 0; +} + +/** + * nla_total_size_64bit - total length of attribute including padding + * @payload: length of payload + */ +static inline int nla_total_size_64bit(int payload) +{ + return NLA_ALIGN(nla_attr_size(payload)) +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + + NLA_ALIGN(nla_attr_size(0)) +#endif + ; +} + +/** * nla_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute * @head: head of attribute stream diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 32cb3e591e07..fcab4de49951 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -138,6 +138,8 @@ enum nl802154_attrs { NL802154_ATTR_SEC_KEY, #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ + NL802154_ATTR_PAD, + __NL802154_ATTR_AFTER_LAST, NL802154_ATTR_MAX = __NL802154_ATTR_AFTER_LAST - 1 }; @@ -295,6 +297,7 @@ enum nl802154_dev_addr_attrs { NL802154_DEV_ADDR_ATTR_MODE, NL802154_DEV_ADDR_ATTR_SHORT, NL802154_DEV_ADDR_ATTR_EXTENDED, + NL802154_DEV_ADDR_ATTR_PAD, /* keep last */ __NL802154_DEV_ADDR_ATTR_AFTER_LAST, @@ -320,6 +323,7 @@ enum nl802154_key_id_attrs { NL802154_KEY_ID_ATTR_IMPLICIT, NL802154_KEY_ID_ATTR_SOURCE_SHORT, NL802154_KEY_ID_ATTR_SOURCE_EXTENDED, + NL802154_KEY_ID_ATTR_PAD, /* keep last */ __NL802154_KEY_ID_ATTR_AFTER_LAST, @@ -402,6 +406,7 @@ enum nl802154_dev { NL802154_DEV_ATTR_EXTENDED_ADDR, NL802154_DEV_ATTR_SECLEVEL_EXEMPT, NL802154_DEV_ATTR_KEY_MODE, + NL802154_DEV_ATTR_PAD, /* keep last */ __NL802154_DEV_ATTR_AFTER_LAST, @@ -414,6 +419,7 @@ enum nl802154_devkey { NL802154_DEVKEY_ATTR_FRAME_COUNTER, NL802154_DEVKEY_ATTR_EXTENDED_ADDR, NL802154_DEVKEY_ATTR_ID, + NL802154_DEVKEY_ATTR_PAD, /* keep last */ __NL802154_DEVKEY_ATTR_AFTER_LAST, diff --git a/include/net/route.h b/include/net/route.h index f4b11eee1754..ad777d79af94 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -209,6 +209,9 @@ unsigned int inet_addr_type_dev_table(struct net *net, void ip_rt_multicast_event(struct in_device *); int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); +struct rtable *rt_dst_alloc(struct net_device *dev, + unsigned int flags, u16 type, + bool nopolicy, bool noxfrm, bool will_cache); struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 2f87c1ba13de..006a7b81d758 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -47,6 +47,9 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) * @get_num_rx_queues: Function to determine number of receive queues * to create when creating a new device. * @get_link_net: Function to get the i/o netns of the device + * @get_linkxstats_size: Function to calculate the required room for + * dumping device-specific extended link stats + * @fill_linkxstats: Function to dump device-specific extended link stats */ struct rtnl_link_ops { struct list_head list; @@ -95,6 +98,10 @@ struct rtnl_link_ops { const struct net_device *dev, const struct net_device *slave_dev); struct net *(*get_link_net)(const struct net_device *dev); + size_t (*get_linkxstats_size)(const struct net_device *dev); + int (*fill_linkxstats)(struct sk_buff *skb, + const struct net_device *dev, + int *prividx); }; int __rtnl_link_register(struct rtnl_link_ops *ops); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 03fb33efcae2..b392ac8382f2 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -116,6 +116,22 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +int sctp_transport_walk_start(struct rhashtable_iter *iter); +void sctp_transport_walk_stop(struct rhashtable_iter *iter); +struct sctp_transport *sctp_transport_get_next(struct net *net, + struct rhashtable_iter *iter); +struct sctp_transport *sctp_transport_get_idx(struct net *net, + struct rhashtable_iter *iter, int pos); +int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), + struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr, void *p); +int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), + struct net *net, int pos, void *p); +int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); +int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, + struct sctp_info *info); + /* * sctp/primitive.c */ @@ -189,10 +205,9 @@ extern int sysctl_sctp_wmem[3]; */ /* SCTP SNMP MIB stats handlers */ -#define SCTP_INC_STATS(net, field) SNMP_INC_STATS((net)->sctp.sctp_statistics, field) -#define SCTP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field) -#define SCTP_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)->sctp.sctp_statistics, field) -#define SCTP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->sctp.sctp_statistics, field) +#define SCTP_INC_STATS(net, field) SNMP_INC_STATS((net)->sctp.sctp_statistics, field) +#define __SCTP_INC_STATS(net, field) __SNMP_INC_STATS((net)->sctp.sctp_statistics, field) +#define SCTP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->sctp.sctp_statistics, field) /* sctp mib definitions */ enum { @@ -359,21 +374,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp); #define sctp_skb_for_each(pos, head, tmp) \ skb_queue_walk_safe(head, pos, tmp) -/* A helper to append an entire skb list (list) to another (head). */ -static inline void sctp_skb_list_tail(struct sk_buff_head *list, - struct sk_buff_head *head) -{ - unsigned long flags; - - spin_lock_irqsave(&head->lock, flags); - spin_lock(&list->lock); - - skb_queue_splice_tail_init(list, head); - - spin_unlock(&list->lock); - spin_unlock_irqrestore(&head->lock, flags); -} - /** * sctp_list_dequeue - remove from the head of the queue * @list: list to dequeue from diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 21cb11107e37..16b013a6191c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -218,7 +218,7 @@ struct sctp_sock { frag_interleave:1, recvrcvinfo:1, recvnxtinfo:1, - pending_data_ready:1; + data_ready_signalled:1; atomic_t pd_mode; /* Receive to here while partial delivery is in effect. */ @@ -848,6 +848,11 @@ struct sctp_transport { */ ktime_t last_time_heard; + /* When was the last time that we sent a chunk using this + * transport? We use this to check for idle transports + */ + unsigned long last_time_sent; + /* Last time(in jiffies) when cwnd is reduced due to the congestion * indication based on ECNE chunk. */ @@ -953,7 +958,8 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *, struct sctp_sock *); void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk); void sctp_transport_free(struct sctp_transport *); -void sctp_transport_reset_timers(struct sctp_transport *); +void sctp_transport_reset_t3_rtx(struct sctp_transport *); +void sctp_transport_reset_hb_timer(struct sctp_transport *); int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); diff --git a/include/net/snmp.h b/include/net/snmp.h index 35512ac6dcfb..c9228ad7ee91 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -123,12 +123,9 @@ struct linux_xfrm_mib { #define DECLARE_SNMP_STAT(type, name) \ extern __typeof__(type) __percpu *name -#define SNMP_INC_STATS_BH(mib, field) \ +#define __SNMP_INC_STATS(mib, field) \ __this_cpu_inc(mib->mibs[field]) -#define SNMP_INC_STATS_USER(mib, field) \ - this_cpu_inc(mib->mibs[field]) - #define SNMP_INC_STATS_ATOMIC_LONG(mib, field) \ atomic_long_inc(&mib->mibs[field]) @@ -138,12 +135,9 @@ struct linux_xfrm_mib { #define SNMP_DEC_STATS(mib, field) \ this_cpu_dec(mib->mibs[field]) -#define SNMP_ADD_STATS_BH(mib, field, addend) \ +#define __SNMP_ADD_STATS(mib, field, addend) \ __this_cpu_add(mib->mibs[field], addend) -#define SNMP_ADD_STATS_USER(mib, field, addend) \ - this_cpu_add(mib->mibs[field], addend) - #define SNMP_ADD_STATS(mib, field, addend) \ this_cpu_add(mib->mibs[field], addend) #define SNMP_UPD_PO_STATS(mib, basefield, addend) \ @@ -152,7 +146,7 @@ struct linux_xfrm_mib { this_cpu_inc(ptr[basefield##PKTS]); \ this_cpu_add(ptr[basefield##OCTETS], addend); \ } while (0) -#define SNMP_UPD_PO_STATS_BH(mib, basefield, addend) \ +#define __SNMP_UPD_PO_STATS(mib, basefield, addend) \ do { \ __typeof__((mib->mibs) + 0) ptr = mib->mibs; \ __this_cpu_inc(ptr[basefield##PKTS]); \ @@ -162,7 +156,7 @@ struct linux_xfrm_mib { #if BITS_PER_LONG==32 -#define SNMP_ADD_STATS64_BH(mib, field, addend) \ +#define __SNMP_ADD_STATS64(mib, field, addend) \ do { \ __typeof__(*mib) *ptr = raw_cpu_ptr(mib); \ u64_stats_update_begin(&ptr->syncp); \ @@ -170,20 +164,16 @@ struct linux_xfrm_mib { u64_stats_update_end(&ptr->syncp); \ } while (0) -#define SNMP_ADD_STATS64_USER(mib, field, addend) \ +#define SNMP_ADD_STATS64(mib, field, addend) \ do { \ local_bh_disable(); \ - SNMP_ADD_STATS64_BH(mib, field, addend); \ - local_bh_enable(); \ + __SNMP_ADD_STATS64(mib, field, addend); \ + local_bh_enable(); \ } while (0) -#define SNMP_ADD_STATS64(mib, field, addend) \ - SNMP_ADD_STATS64_USER(mib, field, addend) - -#define SNMP_INC_STATS64_BH(mib, field) SNMP_ADD_STATS64_BH(mib, field, 1) -#define SNMP_INC_STATS64_USER(mib, field) SNMP_ADD_STATS64_USER(mib, field, 1) +#define __SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1) #define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1) -#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend) \ +#define __SNMP_UPD_PO_STATS64(mib, basefield, addend) \ do { \ __typeof__(*mib) *ptr; \ ptr = raw_cpu_ptr((mib)); \ @@ -195,19 +185,17 @@ struct linux_xfrm_mib { #define SNMP_UPD_PO_STATS64(mib, basefield, addend) \ do { \ local_bh_disable(); \ - SNMP_UPD_PO_STATS64_BH(mib, basefield, addend); \ - local_bh_enable(); \ + __SNMP_UPD_PO_STATS64(mib, basefield, addend); \ + local_bh_enable(); \ } while (0) #else -#define SNMP_INC_STATS64_BH(mib, field) SNMP_INC_STATS_BH(mib, field) -#define SNMP_INC_STATS64_USER(mib, field) SNMP_INC_STATS_USER(mib, field) +#define __SNMP_INC_STATS64(mib, field) __SNMP_INC_STATS(mib, field) #define SNMP_INC_STATS64(mib, field) SNMP_INC_STATS(mib, field) #define SNMP_DEC_STATS64(mib, field) SNMP_DEC_STATS(mib, field) -#define SNMP_ADD_STATS64_BH(mib, field, addend) SNMP_ADD_STATS_BH(mib, field, addend) -#define SNMP_ADD_STATS64_USER(mib, field, addend) SNMP_ADD_STATS_USER(mib, field, addend) +#define __SNMP_ADD_STATS64(mib, field, addend) __SNMP_ADD_STATS(mib, field, addend) #define SNMP_ADD_STATS64(mib, field, addend) SNMP_ADD_STATS(mib, field, addend) #define SNMP_UPD_PO_STATS64(mib, basefield, addend) SNMP_UPD_PO_STATS(mib, basefield, addend) -#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend) SNMP_UPD_PO_STATS_BH(mib, basefield, addend) +#define __SNMP_UPD_PO_STATS64(mib, basefield, addend) __SNMP_UPD_PO_STATS(mib, basefield, addend) #endif #endif diff --git a/include/net/sock.h b/include/net/sock.h index d997ec13a643..1dbb1f9f7c1b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -630,12 +630,20 @@ static inline void sk_add_node(struct sock *sk, struct hlist_head *list) static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) { sock_hold(sk); - hlist_add_head_rcu(&sk->sk_node, list); + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) + hlist_add_tail_rcu(&sk->sk_node, list); + else + hlist_add_head_rcu(&sk->sk_node, list); } static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { - hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) + hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); + else + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) @@ -918,6 +926,17 @@ void sk_stream_kill_queues(struct sock *sk); void sk_set_memalloc(struct sock *sk); void sk_clear_memalloc(struct sock *sk); +void __sk_flush_backlog(struct sock *sk); + +static inline bool sk_flush_backlog(struct sock *sk) +{ + if (unlikely(READ_ONCE(sk->sk_backlog.tail))) { + __sk_flush_backlog(sk); + return true; + } + return false; +} + int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb); struct request_sock_ops; @@ -1405,7 +1424,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) static inline bool sock_owned_by_user(const struct sock *sk) { #ifdef CONFIG_LOCKDEP - WARN_ON(!lockdep_sock_is_held(sk)); + WARN_ON_ONCE(!lockdep_sock_is_held(sk) && debug_locks); #endif return sk->sk_lock.owned; } @@ -1932,11 +1951,19 @@ static inline unsigned long sock_wspace(struct sock *sk) */ static inline void sk_set_bit(int nr, struct sock *sk) { + if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) && + !sock_flag(sk, SOCK_FASYNC)) + return; + set_bit(nr, &sk->sk_wq_raw->flags); } static inline void sk_clear_bit(int nr, struct sock *sk) { + if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) && + !sock_flag(sk, SOCK_FASYNC)) + return; + clear_bit(nr, &sk->sk_wq_raw->flags); } diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d451122e8404..51d77b2ce2b2 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -54,6 +54,8 @@ struct switchdev_attr { struct net_device *orig_dev; enum switchdev_attr_id id; u32 flags; + void *complete_priv; + void (*complete)(struct net_device *dev, int err, void *priv); union { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ @@ -75,6 +77,8 @@ struct switchdev_obj { struct net_device *orig_dev; enum switchdev_obj_id id; u32 flags; + void *complete_priv; + void (*complete)(struct net_device *dev, int err, void *priv); }; /* SWITCHDEV_OBJ_ID_PORT_VLAN */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 74d3ed5eb219..24ec80483805 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -332,9 +332,8 @@ bool tcp_check_oom(struct sock *sk, int shift); extern struct proto tcp_prot; #define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field) -#define TCP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field) +#define __TCP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.tcp_statistics, field) #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) -#define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) void tcp_tasklet_init(void); @@ -452,10 +451,15 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int tcp_connect(struct sock *sk); +enum tcp_synack_type { + TCP_SYNACK_NORMAL, + TCP_SYNACK_FASTOPEN, + TCP_SYNACK_COOKIE, +}; struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req); + enum tcp_synack_type synack_type); int tcp_disconnect(struct sock *sk, int flags); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); @@ -533,8 +537,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); bool tcp_may_send_now(struct sock *sk); -int __tcp_retransmit_skb(struct sock *, struct sk_buff *); -int tcp_retransmit_skb(struct sock *, struct sk_buff *); +int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); +int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); void tcp_retransmit_timer(struct sock *sk); void tcp_xmit_retransmit_queue(struct sock *); void tcp_simple_retransmit(struct sock *); @@ -552,6 +556,8 @@ void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); bool tcp_schedule_loss_probe(struct sock *sk); +void tcp_skb_collapse_tstamp(struct sk_buff *skb, + const struct sk_buff *next_skb); /* tcp_input.c */ void tcp_resume_early_retransmit(struct sock *sk); @@ -755,7 +761,8 @@ struct tcp_skb_cb { __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ __u8 txstamp_ack:1, /* Record TX timestamp for ack? */ - unused:7; + eor:1, /* Is skb MSG_EOR marked? */ + unused:6; __u32 ack_seq; /* Sequence number ACK'd */ union { struct inet_skb_parm h4; @@ -802,6 +809,11 @@ static inline int tcp_skb_mss(const struct sk_buff *skb) return TCP_SKB_CB(skb)->tcp_gso_size; } +static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb) +{ + return likely(!TCP_SKB_CB(skb)->eor); +} + /* Events passed to congestion control interface */ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ @@ -1291,10 +1303,10 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, static inline void tcp_mib_init(struct net *net) { /* See RFC 2012 */ - TCP_ADD_STATS_USER(net, TCP_MIB_RTOALGORITHM, 1); - TCP_ADD_STATS_USER(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ); - TCP_ADD_STATS_USER(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ); - TCP_ADD_STATS_USER(net, TCP_MIB_MAXCONN, -1); + TCP_ADD_STATS(net, TCP_MIB_RTOALGORITHM, 1); + TCP_ADD_STATS(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ); + TCP_ADD_STATS(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ); + TCP_ADD_STATS(net, TCP_MIB_MAXCONN, -1); } /* from STCP */ @@ -1728,7 +1740,7 @@ struct tcp_request_sock_ops { int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req); + enum tcp_synack_type synack_type); }; #ifdef CONFIG_SYN_COOKIES @@ -1737,7 +1749,7 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, __u16 *mss) { tcp_synq_overflow(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); return ops->cookie_init_seq(skb, mss); } #else @@ -1846,7 +1858,7 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) static inline void tcp_listendrop(const struct sock *sk) { atomic_inc(&((struct sock *)sk)->sk_drops); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); } #endif /* _TCP_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 3c5a65e0946d..ae07f375370d 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -289,32 +289,32 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, /* * SNMP statistics for UDP and UDP-Lite */ -#define UDP_INC_STATS_USER(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS_USER((net)->mib.udplite_statistics, field); \ - else SNMP_INC_STATS_USER((net)->mib.udp_statistics, field); } while(0) -#define UDP_INC_STATS_BH(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_statistics, field); \ - else SNMP_INC_STATS_BH((net)->mib.udp_statistics, field); } while(0) - -#define UDP6_INC_STATS_BH(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_stats_in6, field);\ - else SNMP_INC_STATS_BH((net)->mib.udp_stats_in6, field); \ +#define UDP_INC_STATS(net, field, is_udplite) do { \ + if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ + else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) +#define __UDP_INC_STATS(net, field, is_udplite) do { \ + if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ + else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) + +#define __UDP6_INC_STATS(net, field, is_udplite) do { \ + if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field);\ + else __SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) -#define UDP6_INC_STATS_USER(net, field, __lite) do { \ - if (__lite) SNMP_INC_STATS_USER((net)->mib.udplite_stats_in6, field); \ - else SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field); \ +#define UDP6_INC_STATS(net, field, __lite) do { \ + if (__lite) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \ + else SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) #if IS_ENABLED(CONFIG_IPV6) -#define UDPX_INC_STATS_BH(sk, field) \ +#define __UDPX_INC_STATS(sk, field) \ do { \ if ((sk)->sk_family == AF_INET) \ - UDP_INC_STATS_BH(sock_net(sk), field, 0); \ + __UDP_INC_STATS(sock_net(sk), field, 0); \ else \ - UDP6_INC_STATS_BH(sock_net(sk), field, 0); \ + __UDP6_INC_STATS(sock_net(sk), field, 0); \ } while (0) #else -#define UDPX_INC_STATS_BH(sk, field) UDP_INC_STATS_BH(sock_net(sk), field, 0) +#define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0) #endif /* /proc */ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 2dcf1de948ac..4f543262dd81 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -105,8 +105,7 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, int md_size); -static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb, - bool udp_csum) +static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) { int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 2f168f0ea32c..673e9f9e6da7 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -184,9 +184,7 @@ struct vxlan_metadata { /* per UDP socket information */ struct vxlan_sock { struct hlist_node hlist; - struct work_struct del_work; struct socket *sock; - struct rcu_head rcu; struct hlist_head vni_list[VNI_HASH_SIZE]; atomic_t refcnt; u32 flags; @@ -392,13 +390,11 @@ static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) return vni_field; } -#if IS_ENABLED(CONFIG_VXLAN) -void vxlan_get_rx_port(struct net_device *netdev); -#else static inline void vxlan_get_rx_port(struct net_device *netdev) { + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_VXLAN, netdev); } -#endif static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d6f6e5006ee9..adfebd6f243c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -45,12 +45,8 @@ #ifdef CONFIG_XFRM_STATISTICS #define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) -#define XFRM_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.xfrm_statistics, field) -#define XFRM_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)-mib.xfrm_statistics, field) #else #define XFRM_INC_STATS(net, field) ((void)(net)) -#define XFRM_INC_STATS_BH(net, field) ((void)(net)) -#define XFRM_INC_STATS_USER(net, field) ((void)(net)) #endif diff --git a/include/sound/hda_regmap.h b/include/sound/hda_regmap.h index 2767c55a641e..ca64f0f50b45 100644 --- a/include/sound/hda_regmap.h +++ b/include/sound/hda_regmap.h @@ -17,6 +17,8 @@ int snd_hdac_regmap_add_vendor_verb(struct hdac_device *codec, unsigned int verb); int snd_hdac_regmap_read_raw(struct hdac_device *codec, unsigned int reg, unsigned int *val); +int snd_hdac_regmap_read_raw_uncached(struct hdac_device *codec, + unsigned int reg, unsigned int *val); int snd_hdac_regmap_write_raw(struct hdac_device *codec, unsigned int reg, unsigned int val); int snd_hdac_regmap_update_raw(struct hdac_device *codec, unsigned int reg, diff --git a/include/trace/perf.h b/include/trace/perf.h index a182306eefd7..88de5c205e86 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -64,16 +64,9 @@ perf_trace_##call(void *__data, proto) \ \ { assign; } \ \ - if (prog) { \ - *(struct pt_regs **)entry = __regs; \ - if (!trace_call_bpf(prog, entry) || hlist_empty(head)) { \ - perf_swevent_put_recursion_context(rctx); \ - return; \ - } \ - } \ - perf_trace_buf_submit(entry, __entry_size, rctx, \ - event_call->event.type, __count, __regs, \ - head, __task); \ + perf_trace_run_bpf_submit(entry, __entry_size, rctx, \ + event_call, __count, __regs, \ + head, __task); \ } /* diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 2622b33fb2ec..6e0f5f01734c 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -717,9 +717,13 @@ __SYSCALL(__NR_membarrier, sys_membarrier) __SYSCALL(__NR_mlock2, sys_mlock2) #define __NR_copy_file_range 285 __SYSCALL(__NR_copy_file_range, sys_copy_file_range) +#define __NR_preadv2 286 +__SYSCALL(__NR_preadv2, sys_preadv2) +#define __NR_pwritev2 287 +__SYSCALL(__NR_pwritev2, sys_pwritev2) #undef __NR_syscalls -#define __NR_syscalls 286 +#define __NR_syscalls 288 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index b71fd0b5cbad..813ffb2e22c9 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -96,6 +96,7 @@ header-y += cyclades.h header-y += cycx_cfm.h header-y += dcbnl.h header-y += dccp.h +header-y += devlink.h header-y += dlmconstants.h header-y += dlm_device.h header-y += dlm.h diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 70eda5aeb304..b7b0fb1292e7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -347,6 +347,10 @@ enum bpf_func_id { #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) +/* BPF_FUNC_perf_event_output flags. */ +#define BPF_F_INDEX_MASK 0xffffffffULL +#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index c9fee5781eb1..ba0073b26fa6 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -33,6 +33,30 @@ enum devlink_command { DEVLINK_CMD_PORT_SPLIT, DEVLINK_CMD_PORT_UNSPLIT, + DEVLINK_CMD_SB_GET, /* can dump */ + DEVLINK_CMD_SB_SET, + DEVLINK_CMD_SB_NEW, + DEVLINK_CMD_SB_DEL, + + DEVLINK_CMD_SB_POOL_GET, /* can dump */ + DEVLINK_CMD_SB_POOL_SET, + DEVLINK_CMD_SB_POOL_NEW, + DEVLINK_CMD_SB_POOL_DEL, + + DEVLINK_CMD_SB_PORT_POOL_GET, /* can dump */ + DEVLINK_CMD_SB_PORT_POOL_SET, + DEVLINK_CMD_SB_PORT_POOL_NEW, + DEVLINK_CMD_SB_PORT_POOL_DEL, + + DEVLINK_CMD_SB_TC_POOL_BIND_GET, /* can dump */ + DEVLINK_CMD_SB_TC_POOL_BIND_SET, + DEVLINK_CMD_SB_TC_POOL_BIND_NEW, + DEVLINK_CMD_SB_TC_POOL_BIND_DEL, + + /* Shared buffer occupancy monitoring commands */ + DEVLINK_CMD_SB_OCC_SNAPSHOT, + DEVLINK_CMD_SB_OCC_MAX_CLEAR, + /* add new commands above here */ __DEVLINK_CMD_MAX, @@ -46,6 +70,31 @@ enum devlink_port_type { DEVLINK_PORT_TYPE_IB, }; +enum devlink_sb_pool_type { + DEVLINK_SB_POOL_TYPE_INGRESS, + DEVLINK_SB_POOL_TYPE_EGRESS, +}; + +/* static threshold - limiting the maximum number of bytes. + * dynamic threshold - limiting the maximum number of bytes + * based on the currently available free space in the shared buffer pool. + * In this mode, the maximum quota is calculated based + * on the following formula: + * max_quota = alpha / (1 + alpha) * Free_Buffer + * While Free_Buffer is the amount of none-occupied buffer associated to + * the relevant pool. + * The value range which can be passed is 0-20 and serves + * for computation of alpha by following formula: + * alpha = 2 ^ (passed_value - 10) + */ + +enum devlink_sb_threshold_type { + DEVLINK_SB_THRESHOLD_TYPE_STATIC, + DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC, +}; + +#define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20 + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -62,6 +111,20 @@ enum devlink_attr { DEVLINK_ATTR_PORT_IBDEV_NAME, /* string */ DEVLINK_ATTR_PORT_SPLIT_COUNT, /* u32 */ DEVLINK_ATTR_PORT_SPLIT_GROUP, /* u32 */ + DEVLINK_ATTR_SB_INDEX, /* u32 */ + DEVLINK_ATTR_SB_SIZE, /* u32 */ + DEVLINK_ATTR_SB_INGRESS_POOL_COUNT, /* u16 */ + DEVLINK_ATTR_SB_EGRESS_POOL_COUNT, /* u16 */ + DEVLINK_ATTR_SB_INGRESS_TC_COUNT, /* u16 */ + DEVLINK_ATTR_SB_EGRESS_TC_COUNT, /* u16 */ + DEVLINK_ATTR_SB_POOL_INDEX, /* u16 */ + DEVLINK_ATTR_SB_POOL_TYPE, /* u8 */ + DEVLINK_ATTR_SB_POOL_SIZE, /* u32 */ + DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, /* u8 */ + DEVLINK_ATTR_SB_THRESHOLD, /* u32 */ + DEVLINK_ATTR_SB_TC_INDEX, /* u16 */ + DEVLINK_ATTR_SB_OCC_CUR, /* u32 */ + DEVLINK_ATTR_SB_OCC_MAX, /* u32 */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 96161b8202b5..620c8a5ddc00 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -49,6 +49,7 @@ enum { FRA_TABLE, /* Extended table id */ FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, + FRA_PAD, __FRA_MAX }; diff --git a/include/uapi/linux/gen_stats.h b/include/uapi/linux/gen_stats.h index 6487317ea619..52deccc2128e 100644 --- a/include/uapi/linux/gen_stats.h +++ b/include/uapi/linux/gen_stats.h @@ -10,6 +10,7 @@ enum { TCA_STATS_QUEUE, TCA_STATS_APP, TCA_STATS_RATE_EST64, + TCA_STATS_PAD, __TCA_STATS_MAX, }; #define TCA_STATS_MAX (__TCA_STATS_MAX - 1) diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 0536eefff9bf..397d503fdedb 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -134,6 +134,16 @@ struct bridge_vlan_info { __u16 vid; }; +struct bridge_vlan_xstats { + __u64 rx_bytes; + __u64 rx_packets; + __u64 tx_bytes; + __u64 tx_packets; + __u16 vid; + __u16 pad1; + __u32 pad2; +}; + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { @@ -233,4 +243,12 @@ enum { }; #define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1) +/* Embedded inside LINK_XSTATS_TYPE_BRIDGE */ +enum { + BRIDGE_XSTATS_UNSPEC, + BRIDGE_XSTATS_VLAN, + __BRIDGE_XSTATS_MAX +}; +#define BRIDGE_XSTATS_MAX (__BRIDGE_XSTATS_MAX - 1) + #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 4a93051c578c..cec849a239f6 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -92,6 +92,7 @@ #define ETH_P_TDLS 0x890D /* TDLS */ #define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */ #define ETH_P_80221 0x8917 /* IEEE 802.21 Media Independent Handover Protocol */ +#define ETH_P_HSR 0x892F /* IEC 62439-3 HSRv1 */ #define ETH_P_LOOPBACK 0x9000 /* Ethernet loopback packet, per IEEE 802.3 */ #define ETH_P_QINQ1 0x9100 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 9427f17d06d6..d2d7fd4ba5f5 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -155,6 +155,7 @@ enum { IFLA_PROTO_DOWN, IFLA_GSO_MAX_SEGS, IFLA_GSO_MAX_SIZE, + IFLA_PAD, __IFLA_MAX }; @@ -270,6 +271,8 @@ enum { IFLA_BR_NF_CALL_IP6TABLES, IFLA_BR_NF_CALL_ARPTABLES, IFLA_BR_VLAN_DEFAULT_PVID, + IFLA_BR_PAD, + IFLA_BR_VLAN_STATS_ENABLED, __IFLA_BR_MAX, }; @@ -312,6 +315,7 @@ enum { IFLA_BRPORT_HOLD_TIMER, IFLA_BRPORT_FLUSH, IFLA_BRPORT_MULTICAST_ROUTER, + IFLA_BRPORT_PAD, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -431,6 +435,7 @@ enum { IFLA_MACSEC_SCB, IFLA_MACSEC_REPLAY_PROTECT, IFLA_MACSEC_VALIDATION, + IFLA_MACSEC_PAD, __IFLA_MACSEC_MAX, }; @@ -516,6 +521,14 @@ enum { }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) +/* PPP section */ +enum { + IFLA_PPP_UNSPEC, + IFLA_PPP_DEV_FD, + __IFLA_PPP_MAX +}; +#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1) + /* Bonding section */ enum { @@ -665,6 +678,7 @@ enum { IFLA_VF_STATS_TX_BYTES, IFLA_VF_STATS_BROADCAST, IFLA_VF_STATS_MULTICAST, + IFLA_VF_STATS_PAD, __IFLA_VF_STATS_MAX, }; @@ -775,9 +789,46 @@ enum { IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */ IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ IFLA_HSR_SEQ_NR, + IFLA_HSR_VERSION, /* HSR version */ __IFLA_HSR_MAX, }; #define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1) +/* STATS section */ + +struct if_stats_msg { + __u8 family; + __u8 pad1; + __u16 pad2; + __u32 ifindex; + __u32 filter_mask; +}; + +/* A stats attribute can be netdev specific or a global stat. + * For netdev stats, lets use the prefix IFLA_STATS_LINK_* + */ +enum { + IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */ + IFLA_STATS_LINK_64, + IFLA_STATS_LINK_XSTATS, + __IFLA_STATS_MAX, +}; + +#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1) + +#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) + +/* These are embedded into IFLA_STATS_LINK_XSTATS: + * [IFLA_STATS_LINK_XSTATS] + * -> [LINK_XSTATS_TYPE_xxx] + * -> [rtnl link type specific attributes] + */ +enum { + LINK_XSTATS_TYPE_UNSPEC, + LINK_XSTATS_TYPE_BRIDGE, + __LINK_XSTATS_TYPE_MAX +}; +#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 26b0d1e3e3e7..897a94942245 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -19,8 +19,8 @@ #define MACSEC_MAX_KEY_LEN 128 -#define DEFAULT_CIPHER_ID 0x0080020001000001ULL -#define DEFAULT_CIPHER_ALT 0x0080C20001000001ULL +#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL +#define MACSEC_DEFAULT_CIPHER_ALT 0x0080C20001000001ULL #define MACSEC_MIN_ICV_LEN 8 #define MACSEC_MAX_ICV_LEN 32 @@ -55,6 +55,7 @@ enum macsec_secy_attrs { MACSEC_SECY_ATTR_INC_SCI, MACSEC_SECY_ATTR_ES, MACSEC_SECY_ATTR_SCB, + MACSEC_SECY_ATTR_PAD, __MACSEC_SECY_ATTR_END, NUM_MACSEC_SECY_ATTR = __MACSEC_SECY_ATTR_END, MACSEC_SECY_ATTR_MAX = __MACSEC_SECY_ATTR_END - 1, @@ -66,6 +67,7 @@ enum macsec_rxsc_attrs { MACSEC_RXSC_ATTR_ACTIVE, /* config/dump, u8 0..1 */ MACSEC_RXSC_ATTR_SA_LIST, /* dump, nested */ MACSEC_RXSC_ATTR_STATS, /* dump, nested, macsec_rxsc_stats_attr */ + MACSEC_RXSC_ATTR_PAD, __MACSEC_RXSC_ATTR_END, NUM_MACSEC_RXSC_ATTR = __MACSEC_RXSC_ATTR_END, MACSEC_RXSC_ATTR_MAX = __MACSEC_RXSC_ATTR_END - 1, @@ -79,6 +81,7 @@ enum macsec_sa_attrs { MACSEC_SA_ATTR_KEY, /* config, data */ MACSEC_SA_ATTR_KEYID, /* config/dump, u64 */ MACSEC_SA_ATTR_STATS, /* dump, nested, macsec_sa_stats_attr */ + MACSEC_SA_ATTR_PAD, __MACSEC_SA_ATTR_END, NUM_MACSEC_SA_ATTR = __MACSEC_SA_ATTR_END, MACSEC_SA_ATTR_MAX = __MACSEC_SA_ATTR_END - 1, @@ -110,6 +113,7 @@ enum macsec_rxsc_stats_attr { MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_VALID, MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_USING_SA, MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNUSED_SA, + MACSEC_RXSC_STATS_ATTR_PAD, __MACSEC_RXSC_STATS_ATTR_END, NUM_MACSEC_RXSC_STATS_ATTR = __MACSEC_RXSC_STATS_ATTR_END, MACSEC_RXSC_STATS_ATTR_MAX = __MACSEC_RXSC_STATS_ATTR_END - 1, @@ -137,6 +141,7 @@ enum macsec_txsc_stats_attr { MACSEC_TXSC_STATS_ATTR_OUT_PKTS_ENCRYPTED, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_PROTECTED, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_ENCRYPTED, + MACSEC_TXSC_STATS_ATTR_PAD, __MACSEC_TXSC_STATS_ATTR_END, NUM_MACSEC_TXSC_STATS_ATTR = __MACSEC_TXSC_STATS_ATTR_END, MACSEC_TXSC_STATS_ATTR_MAX = __MACSEC_TXSC_STATS_ATTR_END - 1, @@ -153,6 +158,7 @@ enum macsec_secy_stats_attr { MACSEC_SECY_STATS_ATTR_IN_PKTS_UNKNOWN_SCI, MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_SCI, MACSEC_SECY_STATS_ATTR_IN_PKTS_OVERRUN, + MACSEC_SECY_STATS_ATTR_PAD, __MACSEC_SECY_STATS_ATTR_END, NUM_MACSEC_SECY_STATS_ATTR = __MACSEC_SECY_STATS_ATTR_END, MACSEC_SECY_STATS_ATTR_MAX = __MACSEC_SECY_STATS_ATTR_END - 1, diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h index abde7bbd6f3b..948c0a91e11b 100644 --- a/include/uapi/linux/ila.h +++ b/include/uapi/linux/ila.h @@ -14,6 +14,8 @@ enum { ILA_ATTR_LOCATOR_MATCH, /* u64 */ ILA_ATTR_IFINDEX, /* s32 */ ILA_ATTR_DIR, /* u32 */ + ILA_ATTR_PAD, + ILA_ATTR_CSUM_MODE, /* u8 */ __ILA_ATTR_MAX, }; @@ -34,4 +36,10 @@ enum { #define ILA_DIR_IN (1 << 0) #define ILA_DIR_OUT (1 << 1) +enum { + ILA_CSUM_ADJUST_TRANSPORT, + ILA_CSUM_NEUTRAL_MAP, + ILA_CSUM_NO_ACTION, +}; + #endif /* _UAPI_LINUX_ILA_H */ diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 68a1f71fde9f..a16643705669 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -113,9 +113,13 @@ enum { INET_DIAG_DCTCPINFO, INET_DIAG_PROTOCOL, /* response attribute only */ INET_DIAG_SKV6ONLY, + INET_DIAG_LOCALS, + INET_DIAG_PEERS, + INET_DIAG_PAD, + __INET_DIAG_MAX, }; -#define INET_DIAG_MAX INET_DIAG_SKV6ONLY +#define INET_DIAG_MAX (__INET_DIAG_MAX - 1) /* INET_DIAG_MEM */ diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index 391395c06c7e..22d69894bc92 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -435,6 +435,7 @@ enum { IPVS_STATS_ATTR_OUTPPS, /* current out packet rate */ IPVS_STATS_ATTR_INBPS, /* current in byte rate */ IPVS_STATS_ATTR_OUTBPS, /* current out byte rate */ + IPVS_STATS_ATTR_PAD, __IPVS_STATS_ATTR_MAX, }; diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 347ef22a964e..4bd27d0270a2 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -126,6 +126,7 @@ enum { L2TP_ATTR_IP6_DADDR, /* struct in6_addr */ L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* u8 */ L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* u8 */ + L2TP_ATTR_PAD, __L2TP_ATTR_MAX, }; @@ -142,6 +143,7 @@ enum { L2TP_ATTR_RX_SEQ_DISCARDS, /* u64 */ L2TP_ATTR_RX_OOS_PACKETS, /* u64 */ L2TP_ATTR_RX_ERRORS, /* u64 */ + L2TP_ATTR_STATS_PAD, __L2TP_ATTR_STATS_MAX, }; diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index f8b01887a495..a478fe80e203 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -22,6 +22,7 @@ enum lwtunnel_ip_t { LWTUNNEL_IP_TTL, LWTUNNEL_IP_TOS, LWTUNNEL_IP_FLAGS, + LWTUNNEL_IP_PAD, __LWTUNNEL_IP_MAX, }; @@ -35,6 +36,7 @@ enum lwtunnel_ip6_t { LWTUNNEL_IP6_HOPLIMIT, LWTUNNEL_IP6_TC, LWTUNNEL_IP6_FLAGS, + LWTUNNEL_IP6_PAD, __LWTUNNEL_IP6_MAX, }; diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 788655bfa0f3..bd99a8d80f36 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -128,6 +128,7 @@ enum { NDTPA_LOCKTIME, /* u64, msecs */ NDTPA_QUEUE_LENBYTES, /* u32 */ NDTPA_MCAST_REPROBES, /* u32 */ + NDTPA_PAD, __NDTPA_MAX }; #define NDTPA_MAX (__NDTPA_MAX - 1) @@ -160,6 +161,7 @@ enum { NDTA_PARMS, /* nested TLV NDTPA_* */ NDTA_STATS, /* struct ndt_stats, read-only */ NDTA_GC_INTERVAL, /* u64, msecs */ + NDTA_PAD, __NDTA_MAX }; #define NDTA_MAX (__NDTA_MAX - 1) diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 63b2e34f1b60..ebb5154976de 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -118,6 +118,7 @@ enum { IPSET_ATTR_SKBMARK, IPSET_ATTR_SKBPRIO, IPSET_ATTR_SKBQUEUE, + IPSET_ATTR_PAD, __IPSET_ATTR_ADT_MAX, }; #define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index eeffde196f80..660231363bb5 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -182,6 +182,7 @@ enum nft_chain_attributes { NFTA_CHAIN_USE, NFTA_CHAIN_TYPE, NFTA_CHAIN_COUNTERS, + NFTA_CHAIN_PAD, __NFTA_CHAIN_MAX }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) @@ -206,6 +207,7 @@ enum nft_rule_attributes { NFTA_RULE_COMPAT, NFTA_RULE_POSITION, NFTA_RULE_USERDATA, + NFTA_RULE_PAD, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) @@ -308,6 +310,7 @@ enum nft_set_attributes { NFTA_SET_TIMEOUT, NFTA_SET_GC_INTERVAL, NFTA_SET_USERDATA, + NFTA_SET_PAD, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) @@ -341,6 +344,7 @@ enum nft_set_elem_attributes { NFTA_SET_ELEM_EXPIRATION, NFTA_SET_ELEM_USERDATA, NFTA_SET_ELEM_EXPR, + NFTA_SET_ELEM_PAD, __NFTA_SET_ELEM_MAX }; #define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) @@ -584,6 +588,7 @@ enum nft_dynset_attributes { NFTA_DYNSET_SREG_DATA, NFTA_DYNSET_TIMEOUT, NFTA_DYNSET_EXPR, + NFTA_DYNSET_PAD, __NFTA_DYNSET_MAX, }; #define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1) @@ -806,6 +811,7 @@ enum nft_limit_attributes { NFTA_LIMIT_BURST, NFTA_LIMIT_TYPE, NFTA_LIMIT_FLAGS, + NFTA_LIMIT_PAD, __NFTA_LIMIT_MAX }; #define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) @@ -820,6 +826,7 @@ enum nft_counter_attributes { NFTA_COUNTER_UNSPEC, NFTA_COUNTER_BYTES, NFTA_COUNTER_PACKETS, + NFTA_COUNTER_PAD, __NFTA_COUNTER_MAX }; #define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1) @@ -1055,6 +1062,7 @@ enum nft_trace_attibutes { NFTA_TRACE_MARK, NFTA_TRACE_NFPROTO, NFTA_TRACE_POLICY, + NFTA_TRACE_PAD, __NFTA_TRACE_MAX }; #define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1) diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h index f3e34dbbf966..36047ec70f37 100644 --- a/include/uapi/linux/netfilter/nfnetlink_acct.h +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h @@ -29,6 +29,7 @@ enum nfnl_acct_type { NFACCT_FLAGS, NFACCT_QUOTA, NFACCT_FILTER, + NFACCT_PAD, __NFACCT_MAX }; #define NFACCT_MAX (__NFACCT_MAX - 1) diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index c1a4e1441a25..9df789709abe 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -116,6 +116,7 @@ enum ctattr_protoinfo_dccp { CTA_PROTOINFO_DCCP_STATE, CTA_PROTOINFO_DCCP_ROLE, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, + CTA_PROTOINFO_DCCP_PAD, __CTA_PROTOINFO_DCCP_MAX, }; #define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1) @@ -135,6 +136,7 @@ enum ctattr_counters { CTA_COUNTERS_BYTES, /* 64bit counters */ CTA_COUNTERS32_PACKETS, /* old 32bit counters, unused */ CTA_COUNTERS32_BYTES, /* old 32bit counters, unused */ + CTA_COUNTERS_PAD, __CTA_COUNTERS_MAX }; #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) @@ -143,6 +145,7 @@ enum ctattr_tstamp { CTA_TIMESTAMP_UNSPEC, CTA_TIMESTAMP_START, CTA_TIMESTAMP_STOP, + CTA_TIMESTAMP_PAD, __CTA_TIMESTAMP_MAX }; #define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1df655d8aa52..2c55dd1894c3 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2197,6 +2197,8 @@ enum nl80211_attrs { NL80211_ATTR_STA_SUPPORT_P2P_PS, + NL80211_ATTR_PAD, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3023,6 +3025,7 @@ enum nl80211_survey_info { NL80211_SURVEY_INFO_TIME_RX, NL80211_SURVEY_INFO_TIME_TX, NL80211_SURVEY_INFO_TIME_SCAN, + NL80211_SURVEY_INFO_PAD, /* keep last */ __NL80211_SURVEY_INFO_AFTER_LAST, @@ -3468,6 +3471,7 @@ enum nl80211_bss { NL80211_BSS_BEACON_TSF, NL80211_BSS_PRESP_DATA, NL80211_BSS_LAST_SEEN_BOOTTIME, + NL80211_BSS_PAD, /* keep last */ __NL80211_BSS_AFTER_LAST, diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 616d04761730..bb0d515b7654 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -84,6 +84,7 @@ enum ovs_datapath_attr { OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ + OVS_DP_ATTR_PAD, __OVS_DP_ATTR_MAX }; @@ -253,6 +254,7 @@ enum ovs_vport_attr { OVS_VPORT_ATTR_UPCALL_PID, /* array of u32 Netlink socket PIDs for */ /* receiving upcalls */ OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ + OVS_VPORT_ATTR_PAD, __OVS_VPORT_ATTR_MAX }; @@ -351,6 +353,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested OVS_VXLAN_EXT_* */ OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */ OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */ + OVS_TUNNEL_KEY_ATTR_PAD, __OVS_TUNNEL_KEY_ATTR_MAX }; @@ -518,6 +521,7 @@ enum ovs_flow_attr { * logging should be suppressed. */ OVS_FLOW_ATTR_UFID, /* Variable length unique flow identifier. */ OVS_FLOW_ATTR_UFID_FLAGS,/* u32 of OVS_UFID_F_*. */ + OVS_FLOW_ATTR_PAD, __OVS_FLOW_ATTR_MAX }; diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index c43c5f78b9c4..84660905fedf 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -66,6 +66,7 @@ enum { TCA_ACT_OPTIONS, TCA_ACT_INDEX, TCA_ACT_STATS, + TCA_ACT_PAD, __TCA_ACT_MAX }; @@ -173,6 +174,7 @@ enum { TCA_U32_PCNT, TCA_U32_MARK, TCA_U32_FLAGS, + TCA_U32_PAD, __TCA_U32_MAX }; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 8cb18b44968e..1c78c7454c7c 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -179,6 +179,7 @@ enum { TCA_TBF_PRATE64, TCA_TBF_BURST, TCA_TBF_PBURST, + TCA_TBF_PAD, __TCA_TBF_MAX, }; @@ -368,6 +369,7 @@ enum { TCA_HTB_DIRECT_QLEN, TCA_HTB_RATE64, TCA_HTB_CEIL64, + TCA_HTB_PAD, __TCA_HTB_MAX, }; @@ -531,6 +533,7 @@ enum { TCA_NETEM_RATE, TCA_NETEM_ECN, TCA_NETEM_RATE64, + TCA_NETEM_PAD, __TCA_NETEM_MAX, }; diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h index 38baddb807f5..4d2489ef6f10 100644 --- a/include/uapi/linux/quota.h +++ b/include/uapi/linux/quota.h @@ -191,6 +191,7 @@ enum { QUOTA_NL_A_DEV_MAJOR, QUOTA_NL_A_DEV_MINOR, QUOTA_NL_A_CAUSED_ID, + QUOTA_NL_A_PAD, __QUOTA_NL_A_MAX, }; #define QUOTA_NL_A_MAX (__QUOTA_NL_A_MAX - 1) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index ca764b5da86d..262f0379d83a 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -139,6 +139,11 @@ enum { RTM_GETNSID = 90, #define RTM_GETNSID RTM_GETNSID + RTM_NEWSTATS = 92, +#define RTM_NEWSTATS RTM_NEWSTATS + RTM_GETSTATS = 94, +#define RTM_GETSTATS RTM_GETSTATS + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; @@ -312,6 +317,7 @@ enum rtattr_type_t { RTA_ENCAP_TYPE, RTA_ENCAP, RTA_EXPIRES, + RTA_PAD, __RTA_MAX }; @@ -536,6 +542,7 @@ enum { TCA_FCNT, TCA_STATS2, TCA_STAB, + TCA_PAD, __TCA_MAX }; diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index 07f17cc70bb3..063d9d465119 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -26,6 +26,7 @@ enum { TCA_ACT_BPF_OPS, TCA_ACT_BPF_FD, TCA_ACT_BPF_NAME, + TCA_ACT_BPF_PAD, __TCA_ACT_BPF_MAX, }; #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h index 994b0971bce2..62a5e944c554 100644 --- a/include/uapi/linux/tc_act/tc_connmark.h +++ b/include/uapi/linux/tc_act/tc_connmark.h @@ -15,6 +15,7 @@ enum { TCA_CONNMARK_UNSPEC, TCA_CONNMARK_PARMS, TCA_CONNMARK_TM, + TCA_CONNMARK_PAD, __TCA_CONNMARK_MAX }; #define TCA_CONNMARK_MAX (__TCA_CONNMARK_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_csum.h b/include/uapi/linux/tc_act/tc_csum.h index a047c49a3153..8ac8041ab5f1 100644 --- a/include/uapi/linux/tc_act/tc_csum.h +++ b/include/uapi/linux/tc_act/tc_csum.h @@ -10,6 +10,7 @@ enum { TCA_CSUM_UNSPEC, TCA_CSUM_PARMS, TCA_CSUM_TM, + TCA_CSUM_PAD, __TCA_CSUM_MAX }; #define TCA_CSUM_MAX (__TCA_CSUM_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_defact.h b/include/uapi/linux/tc_act/tc_defact.h index 17dddb40f740..d2a3abb77aeb 100644 --- a/include/uapi/linux/tc_act/tc_defact.h +++ b/include/uapi/linux/tc_act/tc_defact.h @@ -12,6 +12,7 @@ enum { TCA_DEF_TM, TCA_DEF_PARMS, TCA_DEF_DATA, + TCA_DEF_PAD, __TCA_DEF_MAX }; #define TCA_DEF_MAX (__TCA_DEF_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_gact.h b/include/uapi/linux/tc_act/tc_gact.h index f7bf94eed510..70b536a8f8b2 100644 --- a/include/uapi/linux/tc_act/tc_gact.h +++ b/include/uapi/linux/tc_act/tc_gact.h @@ -25,6 +25,7 @@ enum { TCA_GACT_TM, TCA_GACT_PARMS, TCA_GACT_PROB, + TCA_GACT_PAD, __TCA_GACT_MAX }; #define TCA_GACT_MAX (__TCA_GACT_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h index d648ff66586f..4ece02a77b9a 100644 --- a/include/uapi/linux/tc_act/tc_ife.h +++ b/include/uapi/linux/tc_act/tc_ife.h @@ -23,6 +23,7 @@ enum { TCA_IFE_SMAC, TCA_IFE_TYPE, TCA_IFE_METALST, + TCA_IFE_PAD, __TCA_IFE_MAX }; #define TCA_IFE_MAX (__TCA_IFE_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h index 130aaadf6fac..7c6e155dd981 100644 --- a/include/uapi/linux/tc_act/tc_ipt.h +++ b/include/uapi/linux/tc_act/tc_ipt.h @@ -14,6 +14,7 @@ enum { TCA_IPT_CNT, TCA_IPT_TM, TCA_IPT_TARG, + TCA_IPT_PAD, __TCA_IPT_MAX }; #define TCA_IPT_MAX (__TCA_IPT_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h index 7561750e8fd6..3d7a2b352a62 100644 --- a/include/uapi/linux/tc_act/tc_mirred.h +++ b/include/uapi/linux/tc_act/tc_mirred.h @@ -20,6 +20,7 @@ enum { TCA_MIRRED_UNSPEC, TCA_MIRRED_TM, TCA_MIRRED_PARMS, + TCA_MIRRED_PAD, __TCA_MIRRED_MAX }; #define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_nat.h b/include/uapi/linux/tc_act/tc_nat.h index 6663aeba0b9a..923457c9ebf0 100644 --- a/include/uapi/linux/tc_act/tc_nat.h +++ b/include/uapi/linux/tc_act/tc_nat.h @@ -10,6 +10,7 @@ enum { TCA_NAT_UNSPEC, TCA_NAT_PARMS, TCA_NAT_TM, + TCA_NAT_PAD, __TCA_NAT_MAX }; #define TCA_NAT_MAX (__TCA_NAT_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h index 716cfabcd5b2..6389959a5157 100644 --- a/include/uapi/linux/tc_act/tc_pedit.h +++ b/include/uapi/linux/tc_act/tc_pedit.h @@ -10,6 +10,7 @@ enum { TCA_PEDIT_UNSPEC, TCA_PEDIT_TM, TCA_PEDIT_PARMS, + TCA_PEDIT_PAD, __TCA_PEDIT_MAX }; #define TCA_PEDIT_MAX (__TCA_PEDIT_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index 7a2e910a5f08..fecb5cc48c40 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -39,6 +39,7 @@ enum { TCA_SKBEDIT_PRIORITY, TCA_SKBEDIT_QUEUE_MAPPING, TCA_SKBEDIT_MARK, + TCA_SKBEDIT_PAD, __TCA_SKBEDIT_MAX }; #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index f7b8d448b960..31151ff6264f 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -28,6 +28,7 @@ enum { TCA_VLAN_PARMS, TCA_VLAN_PUSH_VLAN_ID, TCA_VLAN_PUSH_VLAN_PROTOCOL, + TCA_VLAN_PAD, __TCA_VLAN_MAX, }; #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1) diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index 93533926035c..80ad90d0cfc2 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -40,6 +40,7 @@ enum { TCP_METRICS_ATTR_FOPEN_COOKIE, /* binary */ TCP_METRICS_ATTR_SADDR_IPV4, /* u32 */ TCP_METRICS_ATTR_SADDR_IPV6, /* binary */ + TCP_METRICS_ATTR_PAD, __TCP_METRICS_ATTR_MAX, }; diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 2cd9e608d0d1..143338978b48 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -302,6 +302,7 @@ enum xfrm_attr_type_t { XFRMA_SA_EXTRA_FLAGS, /* __u32 */ XFRMA_PROTO, /* __u8 */ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ + XFRMA_PAD, __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index be0abf669ced..e4248fe79513 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -764,14 +764,21 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; + const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak; + const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { return NULL; } +const struct bpf_func_proto * __weak bpf_get_event_output_proto(void) +{ + return NULL; +} + /* Always built-in helper functions. */ const struct bpf_func_proto bpf_tail_call_proto = { .func = NULL, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 50da680c479f..ad7a0573f71b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -163,17 +163,26 @@ static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5) struct task_struct *task = current; char *buf = (char *) (long) r1; - if (!task) - return -EINVAL; + if (unlikely(!task)) + goto err_clear; - strlcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm))); + strncpy(buf, task->comm, size); + + /* Verifier guarantees that size > 0. For task->comm exceeding + * size, guarantee that buf is %NUL-terminated. Unconditionally + * done here to save the size test. + */ + buf[size - 1] = 0; return 0; +err_clear: + memset(buf, 0, size); + return -EINVAL; } const struct bpf_func_proto bpf_get_current_comm_proto = { .func = bpf_get_current_comm, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_STACK, + .arg1_type = ARG_PTR_TO_RAW_STACK, .arg2_type = ARG_CONST_STACK_SIZE, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6c5d7cd4cb0e..56f18068b52b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -205,6 +205,13 @@ struct verifier_env { #define BPF_COMPLEXITY_LIMIT_INSNS 65536 #define BPF_COMPLEXITY_LIMIT_STACK 1024 +struct bpf_call_arg_meta { + struct bpf_map *map_ptr; + bool raw_mode; + int regno; + int access_size; +}; + /* verbose verifier prints what it's seeing * bpf_check() is called under lock, so no race to access these global vars */ @@ -785,7 +792,8 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) * and all elements of stack are initialized */ static int check_stack_boundary(struct verifier_env *env, int regno, - int access_size, bool zero_size_allowed) + int access_size, bool zero_size_allowed, + struct bpf_call_arg_meta *meta) { struct verifier_state *state = &env->cur_state; struct reg_state *regs = state->regs; @@ -811,6 +819,12 @@ static int check_stack_boundary(struct verifier_env *env, int regno, return -EACCES; } + if (meta && meta->raw_mode) { + meta->access_size = access_size; + meta->regno = regno; + return 0; + } + for (i = 0; i < access_size; i++) { if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) { verbose("invalid indirect read from stack off %d+%d size %d\n", @@ -822,7 +836,8 @@ static int check_stack_boundary(struct verifier_env *env, int regno, } static int check_func_arg(struct verifier_env *env, u32 regno, - enum bpf_arg_type arg_type, struct bpf_map **mapp) + enum bpf_arg_type arg_type, + struct bpf_call_arg_meta *meta) { struct reg_state *reg = env->cur_state.regs + regno; enum bpf_reg_type expected_type; @@ -854,7 +869,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, expected_type = CONST_PTR_TO_MAP; } else if (arg_type == ARG_PTR_TO_CTX) { expected_type = PTR_TO_CTX; - } else if (arg_type == ARG_PTR_TO_STACK) { + } else if (arg_type == ARG_PTR_TO_STACK || + arg_type == ARG_PTR_TO_RAW_STACK) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be * passed in as argument, it's a CONST_IMM type. Final test @@ -862,6 +878,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno, */ if (reg->type == CONST_IMM && reg->imm == 0) expected_type = CONST_IMM; + meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK; } else { verbose("unsupported arg_type %d\n", arg_type); return -EFAULT; @@ -875,14 +892,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno, if (arg_type == ARG_CONST_MAP_PTR) { /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ - *mapp = reg->map_ptr; - + meta->map_ptr = reg->map_ptr; } else if (arg_type == ARG_PTR_TO_MAP_KEY) { /* bpf_map_xxx(..., map_ptr, ..., key) call: * check that [key, key + map->key_size) are within * stack limits and initialized */ - if (!*mapp) { + if (!meta->map_ptr) { /* in function declaration map_ptr must come before * map_key, so that it's verified and known before * we have to check map_key here. Otherwise it means @@ -891,19 +907,20 @@ static int check_func_arg(struct verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->key\n"); return -EACCES; } - err = check_stack_boundary(env, regno, (*mapp)->key_size, - false); + err = check_stack_boundary(env, regno, meta->map_ptr->key_size, + false, NULL); } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity */ - if (!*mapp) { + if (!meta->map_ptr) { /* kernel subsystem misconfigured verifier */ verbose("invalid map_ptr to access map->value\n"); return -EACCES; } - err = check_stack_boundary(env, regno, (*mapp)->value_size, - false); + err = check_stack_boundary(env, regno, + meta->map_ptr->value_size, + false, NULL); } else if (arg_type == ARG_CONST_STACK_SIZE || arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO); @@ -918,7 +935,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return -EACCES; } err = check_stack_boundary(env, regno - 1, reg->imm, - zero_size_allowed); + zero_size_allowed, meta); } return err; @@ -949,13 +966,31 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) return 0; } +static int check_raw_mode(const struct bpf_func_proto *fn) +{ + int count = 0; + + if (fn->arg1_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg2_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg3_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg4_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg5_type == ARG_PTR_TO_RAW_STACK) + count++; + + return count > 1 ? -EINVAL : 0; +} + static int check_call(struct verifier_env *env, int func_id) { struct verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; struct reg_state *regs = state->regs; - struct bpf_map *map = NULL; struct reg_state *reg; + struct bpf_call_arg_meta meta; int i, err; /* find function prototype */ @@ -978,23 +1013,43 @@ static int check_call(struct verifier_env *env, int func_id) return -EINVAL; } + memset(&meta, 0, sizeof(meta)); + + /* We only support one arg being in raw mode at the moment, which + * is sufficient for the helper functions we have right now. + */ + err = check_raw_mode(fn); + if (err) { + verbose("kernel subsystem misconfigured func %d\n", func_id); + return err; + } + /* check args */ - err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &map); + err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta); if (err) return err; - err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map); + err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); if (err) return err; - err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map); + err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); if (err) return err; - err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &map); + err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta); if (err) return err; - err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &map); + err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta); if (err) return err; + /* Mark slots with STACK_MISC in case of raw mode, stack offset + * is inferred from register state. + */ + for (i = 0; i < meta.access_size; i++) { + err = check_mem_access(env, meta.regno, i, BPF_B, BPF_WRITE, -1); + if (err) + return err; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { reg = regs + caller_saved[i]; @@ -1013,18 +1068,18 @@ static int check_call(struct verifier_env *env, int func_id) * can check 'value_size' boundary of memory access * to map element returned from bpf_map_lookup_elem() */ - if (map == NULL) { + if (meta.map_ptr == NULL) { verbose("kernel subsystem misconfigured verifier\n"); return -EINVAL; } - regs[BPF_REG_0].map_ptr = map; + regs[BPF_REG_0].map_ptr = meta.map_ptr; } else { verbose("unknown return type %d of func %d\n", fn->ret_type, func_id); return -EINVAL; } - err = check_map_func_compatibility(map, func_id); + err = check_map_func_compatibility(meta.map_ptr, func_id); if (err) return err; @@ -1379,6 +1434,7 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) } if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || + BPF_SIZE(insn->code) == BPF_DW || (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { verbose("BPF_LD_ABS uses reserved fields\n"); return -EINVAL; @@ -2036,7 +2092,6 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) if (IS_ERR(map)) { verbose("fd %d is not pointing to valid bpf_map\n", insn->imm); - fdput(f); return PTR_ERR(map); } diff --git a/kernel/cpu.c b/kernel/cpu.c index 6ea42e8da861..3e3f6e49eabb 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -36,6 +36,7 @@ * @target: The target state * @thread: Pointer to the hotplug thread * @should_run: Thread should execute + * @rollback: Perform a rollback * @cb_stat: The state for a single callback (install/uninstall) * @cb: Single callback function (install/uninstall) * @result: Result of the operation @@ -47,6 +48,7 @@ struct cpuhp_cpu_state { #ifdef CONFIG_SMP struct task_struct *thread; bool should_run; + bool rollback; enum cpuhp_state cb_state; int (*cb)(unsigned int cpu); int result; @@ -301,6 +303,11 @@ static int cpu_notify(unsigned long val, unsigned int cpu) return __cpu_notify(val, cpu, -1, NULL); } +static void cpu_notify_nofail(unsigned long val, unsigned int cpu) +{ + BUG_ON(cpu_notify(val, cpu)); +} + /* Notifier wrappers for transitioning to state machine */ static int notify_prepare(unsigned int cpu) { @@ -477,6 +484,16 @@ static void cpuhp_thread_fun(unsigned int cpu) } else { ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb); } + } else if (st->rollback) { + BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); + + undo_cpu_down(cpu, st, cpuhp_ap_states); + /* + * This is a momentary workaround to keep the notifier users + * happy. Will go away once we got rid of the notifiers. + */ + cpu_notify_nofail(CPU_DOWN_FAILED, cpu); + st->rollback = false; } else { /* Cannot happen .... */ BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); @@ -636,11 +653,6 @@ static inline void check_for_tasks(int dead_cpu) read_unlock(&tasklist_lock); } -static void cpu_notify_nofail(unsigned long val, unsigned int cpu) -{ - BUG_ON(cpu_notify(val, cpu)); -} - static int notify_down_prepare(unsigned int cpu) { int err, nr_calls = 0; @@ -721,9 +733,10 @@ static int takedown_cpu(unsigned int cpu) */ err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu)); if (err) { - /* CPU didn't die: tell everyone. Can't complain. */ - cpu_notify_nofail(CPU_DOWN_FAILED, cpu); + /* CPU refused to die */ irq_unlock_sparse(); + /* Unpark the hotplug thread so we can rollback there */ + kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread); return err; } BUG_ON(cpu_online(cpu)); @@ -832,6 +845,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, * to do the further cleanups. */ ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target); + if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) { + st->target = prev_state; + st->rollback = true; + cpuhp_kick_ap_work(cpu); + } hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE; out: @@ -1249,6 +1267,7 @@ static struct cpuhp_step cpuhp_ap_states[] = { .name = "notify:online", .startup = notify_online, .teardown = notify_down_prepare, + .skip_onerr = true, }, #endif /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 5056abffef27..9eb23dc27462 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6741,7 +6741,6 @@ void perf_swevent_put_recursion_context(int rctx) put_recursion_context(swhash->recursion, rctx); } -EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { @@ -6998,6 +6997,25 @@ static int perf_tp_event_match(struct perf_event *event, return 1; } +void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, + struct trace_event_call *call, u64 count, + struct pt_regs *regs, struct hlist_head *head, + struct task_struct *task) +{ + struct bpf_prog *prog = call->prog; + + if (prog) { + *(struct pt_regs **)raw_data = regs; + if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) { + perf_swevent_put_recursion_context(rctx); + return; + } + } + perf_tp_event(call->event.type, count, raw_data, size, regs, head, + rctx, task); +} +EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit); + void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, struct task_struct *task) diff --git a/kernel/futex.c b/kernel/futex.c index a5d2e74c89e0..c20f06f38ef3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1295,10 +1295,20 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, if (unlikely(should_fail_futex(true))) ret = -EFAULT; - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { ret = -EFAULT; - else if (curval != uval) - ret = -EINVAL; + } else if (curval != uval) { + /* + * If a unconditional UNLOCK_PI operation (user space did not + * try the TID->0 transition) raced with a waiter setting the + * FUTEX_WAITERS flag between get_user() and locking the hash + * bucket lock, retry the operation. + */ + if ((FUTEX_TID_MASK & curval) == uval) + ret = -EAGAIN; + else + ret = -EINVAL; + } if (ret) { raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); return ret; @@ -1525,8 +1535,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, if (likely(&hb1->chain != &hb2->chain)) { plist_del(&q->list, &hb1->chain); hb_waiters_dec(hb1); - plist_add(&q->list, &hb2->chain); hb_waiters_inc(hb2); + plist_add(&q->list, &hb2->chain); q->lock_ptr = &hb2->lock; } get_futex_key_refs(key2); @@ -2623,6 +2633,15 @@ retry: if (ret == -EFAULT) goto pi_faulted; /* + * A unconditional UNLOCK_PI op raced against a waiter + * setting the FUTEX_WAITERS bit. Try again. + */ + if (ret == -EAGAIN) { + spin_unlock(&hb->lock); + put_futex_key(&key); + goto retry; + } + /* * wake_futex_pi has detected invalid state. Tell user * space. */ diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c index c37f34b00a11..14777af8e097 100644 --- a/kernel/irq/ipi.c +++ b/kernel/irq/ipi.c @@ -94,6 +94,7 @@ unsigned int irq_reserve_ipi(struct irq_domain *domain, data = irq_get_irq_data(virq + i); cpumask_copy(data->common->affinity, dest); data->common->ipi_offset = offset; + irq_set_status_flags(virq + i, IRQ_NO_BALANCING); } return virq; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 2324ba5310db..ed9410936a22 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1999,6 +1999,7 @@ static inline int get_first_held_lock(struct task_struct *curr, return ++i; } +#ifdef CONFIG_DEBUG_LOCKDEP /* * Returns the next chain_key iteration */ @@ -2069,6 +2070,7 @@ static void print_collision(struct task_struct *curr, printk("\nstack backtrace:\n"); dump_stack(); } +#endif /* * Checks whether the chain and the current held locks are consistent diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index eb2a2c9bc3fc..d734b7502001 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -136,10 +136,12 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf, } if (counter == qstat_pv_hash_hops) { - u64 frac; + u64 frac = 0; - frac = 100ULL * do_div(stat, kicks); - frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); + if (kicks) { + frac = 100ULL * do_div(stat, kicks); + frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); + } /* * Return a X.XX decimal number diff --git a/kernel/resource.c b/kernel/resource.c index 2e78ead30934..9b5f04404152 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -105,16 +105,25 @@ static int r_show(struct seq_file *m, void *v) { struct resource *root = m->private; struct resource *r = v, *p; + unsigned long long start, end; int width = root->end < 0x10000 ? 4 : 8; int depth; for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent) if (p->parent == root) break; + + if (file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) { + start = r->start; + end = r->end; + } else { + start = end = 0; + } + seq_printf(m, "%*s%0*llx-%0*llx : %s\n", depth * 2, "", - width, (unsigned long long) r->start, - width, (unsigned long long) r->end, + width, start, + width, end, r->name ? r->name : "<BAD>"); return 0; } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 21f82c29c914..b3f05ee20d18 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -357,10 +357,6 @@ static int parse(struct nlattr *na, struct cpumask *mask) return ret; } -#if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) -#define TASKSTATS_NEEDS_PADDING 1 -#endif - static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) { struct nlattr *na, *ret; @@ -370,29 +366,6 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; - /* - * The taskstats structure is internally aligned on 8 byte - * boundaries but the layout of the aggregrate reply, with - * two NLA headers and the pid (each 4 bytes), actually - * force the entire structure to be unaligned. This causes - * the kernel to issue unaligned access warnings on some - * architectures like ia64. Unfortunately, some software out there - * doesn't properly unroll the NLA packet and assumes that the start - * of the taskstats structure will always be 20 bytes from the start - * of the netlink payload. Aligning the start of the taskstats - * structure breaks this software, which we don't want. So, for now - * the alignment only happens on architectures that require it - * and those users will have to update to fixed versions of those - * packages. Space is reserved in the packet only when needed. - * This ifdef should be removed in several years e.g. 2012 once - * we can be confident that fixed versions are installed on most - * systems. We add the padding before the aggregate since the - * aggregate is already a defined type. - */ -#ifdef TASKSTATS_NEEDS_PADDING - if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0) - goto err; -#endif na = nla_nest_start(skb, aggr); if (!na) goto err; @@ -401,7 +374,8 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) nla_nest_cancel(skb, na); goto err; } - ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); + ret = nla_reserve_64bit(skb, TASKSTATS_TYPE_STATS, + sizeof(struct taskstats), TASKSTATS_TYPE_NULL); if (!ret) { nla_nest_cancel(skb, na); goto err; @@ -500,10 +474,9 @@ static size_t taskstats_packet_size(void) size_t size; size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); -#ifdef TASKSTATS_NEEDS_PADDING - size += nla_total_size(0); /* Padding for alignment */ -#endif + nla_total_size_64bit(sizeof(struct taskstats)) + + nla_total_size(0); + return size; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 413ec5614180..780bcbe1d4de 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -62,17 +62,21 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { void *dst = (void *) (long) r1; - int size = (int) r2; + int ret, size = (int) r2; void *unsafe_ptr = (void *) (long) r3; - return probe_kernel_read(dst, unsafe_ptr, size); + ret = probe_kernel_read(dst, unsafe_ptr, size); + if (unlikely(ret < 0)) + memset(dst, 0, size); + + return ret; } static const struct bpf_func_proto bpf_probe_read_proto = { .func = bpf_probe_read, .gpl_only = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_STACK, + .arg1_type = ARG_PTR_TO_RAW_STACK, .arg2_type = ARG_CONST_STACK_SIZE, .arg3_type = ARG_ANYTHING, }; @@ -221,11 +225,12 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = { .arg2_type = ARG_ANYTHING, }; -static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) { struct pt_regs *regs = (struct pt_regs *) (long) r1; struct bpf_map *map = (struct bpf_map *) (long) r2; struct bpf_array *array = container_of(map, struct bpf_array, map); + u64 index = flags & BPF_F_INDEX_MASK; void *data = (void *) (long) r4; struct perf_sample_data sample_data; struct perf_event *event; @@ -235,6 +240,10 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size) .data = data, }; + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) + return -EINVAL; + if (index == BPF_F_CURRENT_CPU) + index = raw_smp_processor_id(); if (unlikely(index >= array->map.max_entries)) return -E2BIG; @@ -268,6 +277,33 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { .arg5_type = ARG_CONST_STACK_SIZE, }; +static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); + +static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) +{ + struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); + + perf_fetch_caller_regs(regs); + + return bpf_perf_event_output((long)regs, r2, flags, r4, size); +} + +static const struct bpf_func_proto bpf_event_output_proto = { + .func = bpf_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_STACK, + .arg5_type = ARG_CONST_STACK_SIZE, +}; + +const struct bpf_func_proto *bpf_get_event_output_proto(void) +{ + return &bpf_event_output_proto; +} + static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -347,7 +383,7 @@ static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size) * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it * from there and call the same bpf_perf_event_output() helper */ - u64 ctx = *(long *)r1; + u64 ctx = *(long *)(uintptr_t)r1; return bpf_perf_event_output(ctx, r2, index, r4, size); } @@ -365,7 +401,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { - u64 ctx = *(long *)r1; + u64 ctx = *(long *)(uintptr_t)r1; return bpf_get_stackid(ctx, r2, r3, r4, r5); } diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 03dd576e6773..59fd7c0b119c 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -524,7 +524,9 @@ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, free_slot = i; continue; } - if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { + if (assoc_array_ptr_is_leaf(ptr) && + ops->compare_object(assoc_array_ptr_to_leaf(ptr), + index_key)) { pr_devel("replace in slot %d\n", i); edit->leaf_p = &node->slots[i]; edit->dead_leaf = node->slots[i]; diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index abcecdc2d0f2..c79d7ea8a38e 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -11,8 +11,7 @@ /* * Detects 64 bits mode */ -#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \ - || defined(__ppc64__) || defined(__LP64__)) +#if defined(CONFIG_64BIT) #define LZ4_ARCH64 1 #else #define LZ4_ARCH64 0 @@ -25,9 +24,7 @@ typedef struct _U16_S { u16 v; } U16_S; typedef struct _U32_S { u32 v; } U32_S; typedef struct _U64_S { u64 v; } U64_S; -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ - || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \ - && defined(ARM_EFFICIENT_UNALIGNED_ACCESS) +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) #define A16(x) (((U16_S *)(x))->v) #define A32(x) (((U32_S *)(x))->v) @@ -35,6 +32,10 @@ typedef struct _U64_S { u64 v; } U64_S; #define PUT4(s, d) (A32(d) = A32(s)) #define PUT8(s, d) (A64(d) = A64(s)) + +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - A16(p)) + #define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ do { \ A16(p) = v; \ @@ -51,10 +52,13 @@ typedef struct _U64_S { u64 v; } U64_S; #define PUT8(s, d) \ put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) -#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ - do { \ - put_unaligned(v, (u16 *)(p)); \ - p += 2; \ +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - get_unaligned_le16(p)) + +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + put_unaligned_le16(v, (u16 *)(p)); \ + p += 2; \ } while (0) #endif @@ -140,9 +144,6 @@ typedef struct _U64_S { u64 v; } U64_S; #endif -#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ - (d = s - get_unaligned_le16(p)) - #define LZ4_WILDCOPY(s, d, e) \ do { \ LZ4_COPYPACKET(s, d); \ diff --git a/lib/nlattr.c b/lib/nlattr.c index f5907d23272d..fce1e9afc6d9 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -355,6 +355,30 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) EXPORT_SYMBOL(__nla_reserve); /** + * __nla_reserve_64bit - reserve room for attribute on the skb and align it + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @padattr: attribute type for the padding + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. It also ensure that this + * attribute will have a 64-bit aligned nla_data() area. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype, + int attrlen, int padattr) +{ + if (nla_need_padding_for_64bit(skb)) + nla_align_64bit(skb, padattr); + + return __nla_reserve(skb, attrtype, attrlen); +} +EXPORT_SYMBOL(__nla_reserve_64bit); + +/** * __nla_reserve_nohdr - reserve room for attribute without header * @skb: socket buffer to reserve room on * @attrlen: length of attribute payload @@ -397,6 +421,36 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) EXPORT_SYMBOL(nla_reserve); /** + * nla_reserve_64bit - reserve room for attribute on the skb and align it + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @padattr: attribute type for the padding + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. It also ensure that this + * attribute will have a 64-bit aligned nla_data() area. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +struct nlattr *nla_reserve_64bit(struct sk_buff *skb, int attrtype, int attrlen, + int padattr) +{ + size_t len; + + if (nla_need_padding_for_64bit(skb)) + len = nla_total_size_64bit(attrlen); + else + len = nla_total_size(attrlen); + if (unlikely(skb_tailroom(skb) < len)) + return NULL; + + return __nla_reserve_64bit(skb, attrtype, attrlen, padattr); +} +EXPORT_SYMBOL(nla_reserve_64bit); + +/** * nla_reserve_nohdr - reserve room for attribute without header * @skb: socket buffer to reserve room on * @attrlen: length of attribute payload @@ -436,6 +490,27 @@ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, EXPORT_SYMBOL(__nla_put); /** + * __nla_put_64bit - Add a netlink attribute to a socket buffer and align it + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * @padattr: attribute type for the padding + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +void __nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, + const void *data, int padattr) +{ + struct nlattr *nla; + + nla = __nla_reserve_64bit(skb, attrtype, attrlen, padattr); + memcpy(nla_data(nla), data, attrlen); +} +EXPORT_SYMBOL(__nla_put_64bit); + +/** * __nla_put_nohdr - Add a netlink attribute without header * @skb: socket buffer to add attribute to * @attrlen: length of attribute payload @@ -474,6 +549,34 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) EXPORT_SYMBOL(nla_put); /** + * nla_put_64bit - Add a netlink attribute to a socket buffer and align it + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * @padattr: attribute type for the padding + * + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +int nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, + const void *data, int padattr) +{ + size_t len; + + if (nla_need_padding_for_64bit(skb)) + len = nla_total_size_64bit(attrlen); + else + len = nla_total_size(attrlen); + if (unlikely(skb_tailroom(skb) < len)) + return -EMSGSIZE; + + __nla_put_64bit(skb, attrtype, attrlen, data, padattr); + return 0; +} +EXPORT_SYMBOL(nla_put_64bit); + +/** * nla_put_nohdr - Add a netlink attribute without header * @skb: socket buffer to add attribute to * @attrlen: length of attribute payload diff --git a/mm/backing-dev.c b/mm/backing-dev.c index bfbd7096b6ed..0c6317b7db38 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -898,7 +898,7 @@ static atomic_t nr_wb_congested[2]; void clear_wb_congested(struct bdi_writeback_congested *congested, int sync) { wait_queue_head_t *wqh = &congestion_wqh[sync]; - enum wb_state bit; + enum wb_congested_state bit; bit = sync ? WB_sync_congested : WB_async_congested; if (test_and_clear_bit(bit, &congested->state)) @@ -911,7 +911,7 @@ EXPORT_SYMBOL(clear_wb_congested); void set_wb_congested(struct bdi_writeback_congested *congested, int sync) { - enum wb_state bit; + enum wb_congested_state bit; bit = sync ? WB_sync_congested : WB_async_congested; if (!test_and_set_bit(bit, &congested->state)) @@ -1,4 +1,3 @@ -#define __DISABLE_GUP_DEPRECATED 1 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/err.h> @@ -839,7 +838,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, * if (locked) * up_read(&mm->mmap_sem); */ -long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, +long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked) { @@ -847,7 +846,7 @@ long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, write, force, pages, NULL, locked, true, FOLL_TOUCH); } -EXPORT_SYMBOL(get_user_pages_locked6); +EXPORT_SYMBOL(get_user_pages_locked); /* * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to @@ -892,13 +891,13 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); * or if "force" shall be set to 1 (get_user_pages_fast misses the * "force" parameter). */ -long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages, +long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages) { return __get_user_pages_unlocked(current, current->mm, start, nr_pages, write, force, pages, FOLL_TOUCH); } -EXPORT_SYMBOL(get_user_pages_unlocked5); +EXPORT_SYMBOL(get_user_pages_unlocked); /* * get_user_pages_remote() - pin user pages in memory @@ -972,7 +971,7 @@ EXPORT_SYMBOL(get_user_pages_remote); * and mm being operated on are the current task's. We also * obviously don't pass FOLL_REMOTE in here. */ -long get_user_pages6(unsigned long start, unsigned long nr_pages, +long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas) { @@ -980,7 +979,7 @@ long get_user_pages6(unsigned long start, unsigned long nr_pages, write, force, pages, vmas, NULL, false, FOLL_TOUCH); } -EXPORT_SYMBOL(get_user_pages6); +EXPORT_SYMBOL(get_user_pages); /** * populate_vma_page_range() - populate a range of pages in the vma. @@ -1491,7 +1490,6 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { - struct mm_struct *mm = current->mm; int nr, ret; start &= PAGE_MASK; @@ -1503,8 +1501,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, start += nr << PAGE_SHIFT; pages += nr; - ret = get_user_pages_unlocked(current, mm, start, - nr_pages - nr, write, 0, pages); + ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages); /* Have to be a bit careful with return values */ if (nr > 0) { @@ -1519,38 +1516,3 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, } #endif /* CONFIG_HAVE_GENERIC_RCU_GUP */ - -long get_user_pages8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - struct vm_area_struct **vmas) -{ - WARN_ONCE(tsk != current, "get_user_pages() called on remote task"); - WARN_ONCE(mm != current->mm, "get_user_pages() called on remote mm"); - - return get_user_pages6(start, nr_pages, write, force, pages, vmas); -} -EXPORT_SYMBOL(get_user_pages8); - -long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, int *locked) -{ - WARN_ONCE(tsk != current, "get_user_pages_locked() called on remote task"); - WARN_ONCE(mm != current->mm, "get_user_pages_locked() called on remote mm"); - - return get_user_pages_locked6(start, nr_pages, write, force, pages, locked); -} -EXPORT_SYMBOL(get_user_pages_locked8); - -long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) -{ - WARN_ONCE(tsk != current, "get_user_pages_unlocked() called on remote task"); - WARN_ONCE(mm != current->mm, "get_user_pages_unlocked() called on remote mm"); - - return get_user_pages_unlocked5(start, nr_pages, write, force, pages); -} -EXPORT_SYMBOL(get_user_pages_unlocked7); - diff --git a/mm/nommu.c b/mm/nommu.c index 102e257cc6c3..c8bd59a03c71 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -15,8 +15,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#define __DISABLE_GUP_DEPRECATED - #include <linux/export.h> #include <linux/mm.h> #include <linux/vmacache.h> @@ -161,7 +159,7 @@ finish_or_fault: * slab page or a secondary page from a compound page * - don't permit access to VMAs that don't support it, such as I/O mappings */ -long get_user_pages6(unsigned long start, unsigned long nr_pages, +long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas) { @@ -175,15 +173,15 @@ long get_user_pages6(unsigned long start, unsigned long nr_pages, return __get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas, NULL); } -EXPORT_SYMBOL(get_user_pages6); +EXPORT_SYMBOL(get_user_pages); -long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, +long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked) { - return get_user_pages6(start, nr_pages, write, force, pages, NULL); + return get_user_pages(start, nr_pages, write, force, pages, NULL); } -EXPORT_SYMBOL(get_user_pages_locked6); +EXPORT_SYMBOL(get_user_pages_locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, @@ -199,13 +197,13 @@ long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(__get_user_pages_unlocked); -long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages, +long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages) { return __get_user_pages_unlocked(current, current->mm, start, nr_pages, write, force, pages, 0); } -EXPORT_SYMBOL(get_user_pages_unlocked5); +EXPORT_SYMBOL(get_user_pages_unlocked); /** * follow_pfn - look up PFN at a user virtual address @@ -1989,31 +1987,3 @@ static int __meminit init_admin_reserve(void) return 0; } subsys_initcall(init_admin_reserve); - -long get_user_pages8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - struct vm_area_struct **vmas) -{ - return get_user_pages6(start, nr_pages, write, force, pages, vmas); -} -EXPORT_SYMBOL(get_user_pages8); - -long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - int *locked) -{ - return get_user_pages_locked6(start, nr_pages, write, - force, pages, locked); -} -EXPORT_SYMBOL(get_user_pages_locked8); - -long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) -{ - return get_user_pages_unlocked5(start, nr_pages, write, force, pages); -} -EXPORT_SYMBOL(get_user_pages_unlocked7); - diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h index d16bb4b14aa1..97ecc27aeca6 100644 --- a/net/6lowpan/6lowpan_i.h +++ b/net/6lowpan/6lowpan_i.h @@ -3,6 +3,15 @@ #include <linux/netdevice.h> +#include <net/6lowpan.h> + +/* caller need to be sure it's dev->type is ARPHRD_6LOWPAN */ +static inline bool lowpan_is_ll(const struct net_device *dev, + enum lowpan_lltypes lltype) +{ + return lowpan_dev(dev)->lltype == lltype; +} + #ifdef CONFIG_6LOWPAN_DEBUGFS int lowpan_dev_debugfs_init(struct net_device *dev); void lowpan_dev_debugfs_exit(struct net_device *dev); diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index 34e44c0c0836..7a240b3eaed1 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -27,11 +27,11 @@ int lowpan_register_netdevice(struct net_device *dev, dev->mtu = IPV6_MIN_MTU; dev->priv_flags |= IFF_NO_QUEUE; - lowpan_priv(dev)->lltype = lltype; + lowpan_dev(dev)->lltype = lltype; - spin_lock_init(&lowpan_priv(dev)->ctx.lock); + spin_lock_init(&lowpan_dev(dev)->ctx.lock); for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) - lowpan_priv(dev)->ctx.table[i].id = i; + lowpan_dev(dev)->ctx.table[i].id = i; ret = register_netdevice(dev); if (ret < 0) @@ -85,7 +85,7 @@ static int lowpan_event(struct notifier_block *unused, case NETDEV_DOWN: for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE, - &lowpan_priv(dev)->ctx.table[i].flags); + &lowpan_dev(dev)->ctx.table[i].flags); break; default: return NOTIFY_DONE; diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c index 0793a8157472..acbaa3db493b 100644 --- a/net/6lowpan/debugfs.c +++ b/net/6lowpan/debugfs.c @@ -172,7 +172,7 @@ static const struct file_operations lowpan_ctx_pfx_fops = { static int lowpan_dev_debugfs_ctx_init(struct net_device *dev, struct dentry *ctx, u8 id) { - struct lowpan_priv *lpriv = lowpan_priv(dev); + struct lowpan_dev *ldev = lowpan_dev(dev); struct dentry *dentry, *root; char buf[32]; @@ -185,25 +185,25 @@ static int lowpan_dev_debugfs_ctx_init(struct net_device *dev, return -EINVAL; dentry = debugfs_create_file("active", 0644, root, - &lpriv->ctx.table[id], + &ldev->ctx.table[id], &lowpan_ctx_flag_active_fops); if (!dentry) return -EINVAL; dentry = debugfs_create_file("compression", 0644, root, - &lpriv->ctx.table[id], + &ldev->ctx.table[id], &lowpan_ctx_flag_c_fops); if (!dentry) return -EINVAL; dentry = debugfs_create_file("prefix", 0644, root, - &lpriv->ctx.table[id], + &ldev->ctx.table[id], &lowpan_ctx_pfx_fops); if (!dentry) return -EINVAL; dentry = debugfs_create_file("prefix_len", 0644, root, - &lpriv->ctx.table[id], + &ldev->ctx.table[id], &lowpan_ctx_plen_fops); if (!dentry) return -EINVAL; @@ -247,21 +247,21 @@ static const struct file_operations lowpan_context_fops = { int lowpan_dev_debugfs_init(struct net_device *dev) { - struct lowpan_priv *lpriv = lowpan_priv(dev); + struct lowpan_dev *ldev = lowpan_dev(dev); struct dentry *contexts, *dentry; int ret, i; /* creating the root */ - lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs); - if (!lpriv->iface_debugfs) + ldev->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs); + if (!ldev->iface_debugfs) goto fail; - contexts = debugfs_create_dir("contexts", lpriv->iface_debugfs); + contexts = debugfs_create_dir("contexts", ldev->iface_debugfs); if (!contexts) goto remove_root; dentry = debugfs_create_file("show", 0644, contexts, - &lowpan_priv(dev)->ctx, + &lowpan_dev(dev)->ctx, &lowpan_context_fops); if (!dentry) goto remove_root; @@ -282,7 +282,7 @@ fail: void lowpan_dev_debugfs_exit(struct net_device *dev) { - debugfs_remove_recursive(lowpan_priv(dev)->iface_debugfs); + debugfs_remove_recursive(lowpan_dev(dev)->iface_debugfs); } int __init lowpan_debugfs_init(void) diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index 68c80f3c9add..8501dd532fe1 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -53,9 +53,6 @@ #include <net/6lowpan.h> #include <net/ipv6.h> -/* special link-layer handling */ -#include <net/mac802154.h> - #include "6lowpan_i.h" #include "nhc.h" @@ -156,32 +153,17 @@ #define LOWPAN_IPHC_CID_DCI(cid) (cid & 0x0f) #define LOWPAN_IPHC_CID_SCI(cid) ((cid & 0xf0) >> 4) -static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr, - const void *lladdr) -{ - /* fe:80::XXXX:XXXX:XXXX:XXXX - * \_________________/ - * hwaddr - */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - memcpy(&ipaddr->s6_addr[8], lladdr, EUI64_ADDR_LEN); - /* second bit-flip (Universe/Local) - * is done according RFC2464 - */ - ipaddr->s6_addr[8] ^= 0x02; -} - -static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr, - const void *lladdr) +static inline void +lowpan_iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr, + const void *lladdr) { const struct ieee802154_addr *addr = lladdr; - u8 eui64[EUI64_ADDR_LEN] = { }; + u8 eui64[EUI64_ADDR_LEN]; switch (addr->mode) { case IEEE802154_ADDR_LONG: ieee802154_le64_to_be64(eui64, &addr->extended_addr); - iphc_uncompress_eui64_lladdr(ipaddr, eui64); + lowpan_iphc_uncompress_eui64_lladdr(ipaddr, eui64); break; case IEEE802154_ADDR_SHORT: /* fe:80::ff:fe00:XXXX @@ -207,7 +189,7 @@ static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr, static struct lowpan_iphc_ctx * lowpan_iphc_ctx_get_by_id(const struct net_device *dev, u8 id) { - struct lowpan_iphc_ctx *ret = &lowpan_priv(dev)->ctx.table[id]; + struct lowpan_iphc_ctx *ret = &lowpan_dev(dev)->ctx.table[id]; if (!lowpan_iphc_ctx_is_active(ret)) return NULL; @@ -219,7 +201,7 @@ static struct lowpan_iphc_ctx * lowpan_iphc_ctx_get_by_addr(const struct net_device *dev, const struct in6_addr *addr) { - struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table; + struct lowpan_iphc_ctx *table = lowpan_dev(dev)->ctx.table; struct lowpan_iphc_ctx *ret = NULL; struct in6_addr addr_pfx; u8 addr_plen; @@ -263,7 +245,7 @@ static struct lowpan_iphc_ctx * lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev, const struct in6_addr *addr) { - struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table; + struct lowpan_iphc_ctx *table = lowpan_dev(dev)->ctx.table; struct lowpan_iphc_ctx *ret = NULL; struct in6_addr addr_mcast, network_pfx = {}; int i; @@ -301,9 +283,10 @@ lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev, * * address_mode is the masked value for sam or dam value */ -static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev, - struct in6_addr *ipaddr, u8 address_mode, - const void *lladdr) +static int lowpan_iphc_uncompress_addr(struct sk_buff *skb, + const struct net_device *dev, + struct in6_addr *ipaddr, + u8 address_mode, const void *lladdr) { bool fail; @@ -332,12 +315,12 @@ static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev, case LOWPAN_IPHC_SAM_11: case LOWPAN_IPHC_DAM_11: fail = false; - switch (lowpan_priv(dev)->lltype) { + switch (lowpan_dev(dev)->lltype) { case LOWPAN_LLTYPE_IEEE802154: - iphc_uncompress_802154_lladdr(ipaddr, lladdr); + lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr); break; default: - iphc_uncompress_eui64_lladdr(ipaddr, lladdr); + lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr); break; } break; @@ -360,11 +343,11 @@ static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev, /* Uncompress address function for source context * based address(non-multicast). */ -static int uncompress_ctx_addr(struct sk_buff *skb, - const struct net_device *dev, - const struct lowpan_iphc_ctx *ctx, - struct in6_addr *ipaddr, u8 address_mode, - const void *lladdr) +static int lowpan_iphc_uncompress_ctx_addr(struct sk_buff *skb, + const struct net_device *dev, + const struct lowpan_iphc_ctx *ctx, + struct in6_addr *ipaddr, + u8 address_mode, const void *lladdr) { bool fail; @@ -393,12 +376,12 @@ static int uncompress_ctx_addr(struct sk_buff *skb, case LOWPAN_IPHC_SAM_11: case LOWPAN_IPHC_DAM_11: fail = false; - switch (lowpan_priv(dev)->lltype) { + switch (lowpan_dev(dev)->lltype) { case LOWPAN_LLTYPE_IEEE802154: - iphc_uncompress_802154_lladdr(ipaddr, lladdr); + lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr); break; default: - iphc_uncompress_eui64_lladdr(ipaddr, lladdr); + lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr); break; } ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen); @@ -657,22 +640,24 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, } if (iphc1 & LOWPAN_IPHC_SAC) { - spin_lock_bh(&lowpan_priv(dev)->ctx.lock); + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_SCI(cid)); if (!ci) { - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); return -EINVAL; } pr_debug("SAC bit is set. Handle context based source address.\n"); - err = uncompress_ctx_addr(skb, dev, ci, &hdr.saddr, - iphc1 & LOWPAN_IPHC_SAM_MASK, saddr); - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + err = lowpan_iphc_uncompress_ctx_addr(skb, dev, ci, &hdr.saddr, + iphc1 & LOWPAN_IPHC_SAM_MASK, + saddr); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); } else { /* Source address uncompression */ pr_debug("source address stateless compression\n"); - err = uncompress_addr(skb, dev, &hdr.saddr, - iphc1 & LOWPAN_IPHC_SAM_MASK, saddr); + err = lowpan_iphc_uncompress_addr(skb, dev, &hdr.saddr, + iphc1 & LOWPAN_IPHC_SAM_MASK, + saddr); } /* Check on error of previous branch */ @@ -681,10 +666,10 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, switch (iphc1 & (LOWPAN_IPHC_M | LOWPAN_IPHC_DAC)) { case LOWPAN_IPHC_M | LOWPAN_IPHC_DAC: - spin_lock_bh(&lowpan_priv(dev)->ctx.lock); + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid)); if (!ci) { - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); return -EINVAL; } @@ -693,7 +678,7 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, err = lowpan_uncompress_multicast_ctx_daddr(skb, ci, &hdr.daddr, iphc1 & LOWPAN_IPHC_DAM_MASK); - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); break; case LOWPAN_IPHC_M: /* multicast */ @@ -701,22 +686,24 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, iphc1 & LOWPAN_IPHC_DAM_MASK); break; case LOWPAN_IPHC_DAC: - spin_lock_bh(&lowpan_priv(dev)->ctx.lock); + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid)); if (!ci) { - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); return -EINVAL; } /* Destination address context based uncompression */ pr_debug("DAC bit is set. Handle context based destination address.\n"); - err = uncompress_ctx_addr(skb, dev, ci, &hdr.daddr, - iphc1 & LOWPAN_IPHC_DAM_MASK, daddr); - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + err = lowpan_iphc_uncompress_ctx_addr(skb, dev, ci, &hdr.daddr, + iphc1 & LOWPAN_IPHC_DAM_MASK, + daddr); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); break; default: - err = uncompress_addr(skb, dev, &hdr.daddr, - iphc1 & LOWPAN_IPHC_DAM_MASK, daddr); + err = lowpan_iphc_uncompress_addr(skb, dev, &hdr.daddr, + iphc1 & LOWPAN_IPHC_DAM_MASK, + daddr); pr_debug("dest: stateless compression mode %d dest %pI6c\n", iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr); break; @@ -736,7 +723,7 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, return err; } - switch (lowpan_priv(dev)->lltype) { + switch (lowpan_dev(dev)->lltype) { case LOWPAN_LLTYPE_IEEE802154: if (lowpan_802154_cb(skb)->d_size) hdr.payload_len = htons(lowpan_802154_cb(skb)->d_size - @@ -1033,7 +1020,7 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev, skb->data, skb->len); ipv6_daddr_type = ipv6_addr_type(&hdr->daddr); - spin_lock_bh(&lowpan_priv(dev)->ctx.lock); + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); if (ipv6_daddr_type & IPV6_ADDR_MULTICAST) dci = lowpan_iphc_ctx_get_by_mcast_addr(dev, &hdr->daddr); else @@ -1042,15 +1029,15 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev, memcpy(&dci_entry, dci, sizeof(*dci)); cid |= dci->id; } - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); - spin_lock_bh(&lowpan_priv(dev)->ctx.lock); + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); sci = lowpan_iphc_ctx_get_by_addr(dev, &hdr->saddr); if (sci) { memcpy(&sci_entry, sci, sizeof(*sci)); cid |= (sci->id << 4); } - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); /* if cid is zero it will be compressed */ if (cid) { diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c index 69537a2eaab1..225d91906dfa 100644 --- a/net/6lowpan/nhc_udp.c +++ b/net/6lowpan/nhc_udp.c @@ -91,7 +91,7 @@ static int udp_uncompress(struct sk_buff *skb, size_t needed) * here, we obtain the hint from the remaining size of the * frame */ - switch (lowpan_priv(skb->dev)->lltype) { + switch (lowpan_dev(skb->dev)->lltype) { case LOWPAN_LLTYPE_IEEE802154: if (lowpan_802154_cb(skb)->d_size) uh.len = htons(lowpan_802154_cb(skb)->d_size - diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 8a4cc2f7f0db..780089d75915 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -68,7 +68,7 @@ struct lowpan_peer { struct in6_addr peer_addr; }; -struct lowpan_dev { +struct lowpan_btle_dev { struct list_head list; struct hci_dev *hdev; @@ -80,18 +80,21 @@ struct lowpan_dev { struct delayed_work notify_peers; }; -static inline struct lowpan_dev *lowpan_dev(const struct net_device *netdev) +static inline struct lowpan_btle_dev * +lowpan_btle_dev(const struct net_device *netdev) { - return (struct lowpan_dev *)lowpan_priv(netdev)->priv; + return (struct lowpan_btle_dev *)lowpan_dev(netdev)->priv; } -static inline void peer_add(struct lowpan_dev *dev, struct lowpan_peer *peer) +static inline void peer_add(struct lowpan_btle_dev *dev, + struct lowpan_peer *peer) { list_add_rcu(&peer->list, &dev->peers); atomic_inc(&dev->peer_count); } -static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer) +static inline bool peer_del(struct lowpan_btle_dev *dev, + struct lowpan_peer *peer) { list_del_rcu(&peer->list); kfree_rcu(peer, rcu); @@ -106,7 +109,7 @@ static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer) return false; } -static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev, +static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_btle_dev *dev, bdaddr_t *ba, __u8 type) { struct lowpan_peer *peer; @@ -134,8 +137,8 @@ static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev, return NULL; } -static inline struct lowpan_peer *__peer_lookup_chan(struct lowpan_dev *dev, - struct l2cap_chan *chan) +static inline struct lowpan_peer * +__peer_lookup_chan(struct lowpan_btle_dev *dev, struct l2cap_chan *chan) { struct lowpan_peer *peer; @@ -147,8 +150,8 @@ static inline struct lowpan_peer *__peer_lookup_chan(struct lowpan_dev *dev, return NULL; } -static inline struct lowpan_peer *__peer_lookup_conn(struct lowpan_dev *dev, - struct l2cap_conn *conn) +static inline struct lowpan_peer * +__peer_lookup_conn(struct lowpan_btle_dev *dev, struct l2cap_conn *conn) { struct lowpan_peer *peer; @@ -160,7 +163,7 @@ static inline struct lowpan_peer *__peer_lookup_conn(struct lowpan_dev *dev, return NULL; } -static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev, +static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev, struct in6_addr *daddr, struct sk_buff *skb) { @@ -220,7 +223,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev, static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn) { - struct lowpan_dev *entry; + struct lowpan_btle_dev *entry; struct lowpan_peer *peer = NULL; rcu_read_lock(); @@ -236,10 +239,10 @@ static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn) return peer; } -static struct lowpan_dev *lookup_dev(struct l2cap_conn *conn) +static struct lowpan_btle_dev *lookup_dev(struct l2cap_conn *conn) { - struct lowpan_dev *entry; - struct lowpan_dev *dev = NULL; + struct lowpan_btle_dev *entry; + struct lowpan_btle_dev *dev = NULL; rcu_read_lock(); @@ -270,10 +273,10 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev, struct l2cap_chan *chan) { const u8 *saddr, *daddr; - struct lowpan_dev *dev; + struct lowpan_btle_dev *dev; struct lowpan_peer *peer; - dev = lowpan_dev(netdev); + dev = lowpan_btle_dev(netdev); rcu_read_lock(); peer = __peer_lookup_chan(dev, chan); @@ -375,7 +378,7 @@ drop: /* Packet from BT LE device */ static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { - struct lowpan_dev *dev; + struct lowpan_btle_dev *dev; struct lowpan_peer *peer; int err; @@ -431,15 +434,18 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev, bdaddr_t *peer_addr, u8 *peer_addr_type) { struct in6_addr ipv6_daddr; - struct lowpan_dev *dev; + struct ipv6hdr *hdr; + struct lowpan_btle_dev *dev; struct lowpan_peer *peer; bdaddr_t addr, *any = BDADDR_ANY; u8 *daddr = any->b; int err, status = 0; - dev = lowpan_dev(netdev); + hdr = ipv6_hdr(skb); + + dev = lowpan_btle_dev(netdev); - memcpy(&ipv6_daddr, &lowpan_cb(skb)->addr, sizeof(ipv6_daddr)); + memcpy(&ipv6_daddr, &hdr->daddr, sizeof(ipv6_daddr)); if (ipv6_addr_is_multicast(&ipv6_daddr)) { lowpan_cb(skb)->chan = NULL; @@ -489,15 +495,9 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev, unsigned short type, const void *_daddr, const void *_saddr, unsigned int len) { - struct ipv6hdr *hdr; - if (type != ETH_P_IPV6) return -EINVAL; - hdr = ipv6_hdr(skb); - - memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, sizeof(struct in6_addr)); - return 0; } @@ -543,19 +543,19 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb, static int send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev) { struct sk_buff *local_skb; - struct lowpan_dev *entry; + struct lowpan_btle_dev *entry; int err = 0; rcu_read_lock(); list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { struct lowpan_peer *pentry; - struct lowpan_dev *dev; + struct lowpan_btle_dev *dev; if (entry->netdev != netdev) continue; - dev = lowpan_dev(entry->netdev); + dev = lowpan_btle_dev(entry->netdev); list_for_each_entry_rcu(pentry, &dev->peers, list) { int ret; @@ -723,8 +723,8 @@ static void ifdown(struct net_device *netdev) static void do_notify_peers(struct work_struct *work) { - struct lowpan_dev *dev = container_of(work, struct lowpan_dev, - notify_peers.work); + struct lowpan_btle_dev *dev = container_of(work, struct lowpan_btle_dev, + notify_peers.work); netdev_notify_peers(dev->netdev); /* send neighbour adv at startup */ } @@ -766,7 +766,7 @@ static void set_ip_addr_bits(u8 addr_type, u8 *addr) } static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, - struct lowpan_dev *dev) + struct lowpan_btle_dev *dev) { struct lowpan_peer *peer; @@ -803,12 +803,12 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, return peer->chan; } -static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev) +static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev) { struct net_device *netdev; int err = 0; - netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev)), + netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_btle_dev)), IFACE_NAME_TEMPLATE, NET_NAME_UNKNOWN, netdev_setup); if (!netdev) @@ -820,7 +820,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev) SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev); SET_NETDEV_DEVTYPE(netdev, &bt_type); - *dev = lowpan_dev(netdev); + *dev = lowpan_btle_dev(netdev); (*dev)->netdev = netdev; (*dev)->hdev = chan->conn->hcon->hdev; INIT_LIST_HEAD(&(*dev)->peers); @@ -853,7 +853,7 @@ out: static inline void chan_ready_cb(struct l2cap_chan *chan) { - struct lowpan_dev *dev; + struct lowpan_btle_dev *dev; dev = lookup_dev(chan->conn); @@ -890,8 +890,9 @@ static inline struct l2cap_chan *chan_new_conn_cb(struct l2cap_chan *pchan) static void delete_netdev(struct work_struct *work) { - struct lowpan_dev *entry = container_of(work, struct lowpan_dev, - delete_netdev); + struct lowpan_btle_dev *entry = container_of(work, + struct lowpan_btle_dev, + delete_netdev); lowpan_unregister_netdev(entry->netdev); @@ -900,8 +901,8 @@ static void delete_netdev(struct work_struct *work) static void chan_close_cb(struct l2cap_chan *chan) { - struct lowpan_dev *entry; - struct lowpan_dev *dev = NULL; + struct lowpan_btle_dev *entry; + struct lowpan_btle_dev *dev = NULL; struct lowpan_peer *peer; int err = -ENOENT; bool last = false, remove = true; @@ -921,7 +922,7 @@ static void chan_close_cb(struct l2cap_chan *chan) spin_lock(&devices_lock); list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { - dev = lowpan_dev(entry->netdev); + dev = lowpan_btle_dev(entry->netdev); peer = __peer_lookup_chan(dev, chan); if (peer) { last = peer_del(dev, peer); @@ -1131,7 +1132,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, static void disconnect_all_peers(void) { - struct lowpan_dev *entry; + struct lowpan_btle_dev *entry; struct lowpan_peer *peer, *tmp_peer, *new_peer; struct list_head peers; @@ -1291,7 +1292,7 @@ static ssize_t lowpan_control_write(struct file *fp, static int lowpan_control_show(struct seq_file *f, void *ptr) { - struct lowpan_dev *entry; + struct lowpan_btle_dev *entry; struct lowpan_peer *peer; spin_lock(&devices_lock); @@ -1322,7 +1323,7 @@ static const struct file_operations lowpan_control_fops = { static void disconnect_devices(void) { - struct lowpan_dev *entry, *tmp, *new_dev; + struct lowpan_btle_dev *entry, *tmp, *new_dev; struct list_head devices; INIT_LIST_HEAD(&devices); @@ -1360,7 +1361,7 @@ static int device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); - struct lowpan_dev *entry; + struct lowpan_btle_dev *entry; if (netdev->type != ARPHRD_6LOWPAN) return NOTIFY_DONE; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 253bc77eda3b..7dbc80d01eb0 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -61,6 +61,19 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags) e->flags |= MDB_FLAGS_OFFLOAD; } +static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip) +{ + memset(ip, 0, sizeof(struct br_ip)); + ip->vid = entry->vid; + ip->proto = entry->addr.proto; + if (ip->proto == htons(ETH_P_IP)) + ip->u.ip4 = entry->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + else + ip->u.ip6 = entry->addr.u.ip6; +#endif +} + static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev) { @@ -243,9 +256,45 @@ static inline size_t rtnl_mdb_nlmsg_size(void) + nla_total_size(sizeof(struct br_mdb_entry)); } -static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, - int type, struct net_bridge_port_group *pg) +struct br_mdb_complete_info { + struct net_bridge_port *port; + struct br_ip ip; +}; + +static void br_mdb_complete(struct net_device *dev, int err, void *priv) { + struct br_mdb_complete_info *data = priv; + struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port *port = data->port; + struct net_bridge *br = port->br; + + if (err) + goto err; + + spin_lock_bh(&br->multicast_lock); + mdb = mlock_dereference(br->mdb, br); + mp = br_mdb_ip_get(mdb, &data->ip); + if (!mp) + goto out; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port != port) + continue; + p->flags |= MDB_PG_FLAGS_OFFLOAD; + } +out: + spin_unlock_bh(&br->multicast_lock); +err: + kfree(priv); +} + +static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, + struct br_mdb_entry *entry, int type) +{ + struct br_mdb_complete_info *complete_info; struct switchdev_obj_port_mdb mdb = { .obj = { .id = SWITCHDEV_OBJ_ID_PORT_MDB, @@ -268,9 +317,14 @@ static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, mdb.obj.orig_dev = port_dev; if (port_dev && type == RTM_NEWMDB) { - err = switchdev_port_obj_add(port_dev, &mdb.obj); - if (!err && pg) - pg->flags |= MDB_PG_FLAGS_OFFLOAD; + complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); + if (complete_info) { + complete_info->port = p; + __mdb_entry_to_br_ip(entry, &complete_info->ip); + mdb.obj.complete_priv = complete_info; + mdb.obj.complete = br_mdb_complete; + switchdev_port_obj_add(port_dev, &mdb.obj); + } } else if (port_dev && type == RTM_DELMDB) { switchdev_port_obj_del(port_dev, &mdb.obj); } @@ -291,21 +345,21 @@ errout: rtnl_set_sk_err(net, RTNLGRP_MDB, err); } -void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg, - int type) +void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type, u8 flags) { struct br_mdb_entry entry; memset(&entry, 0, sizeof(entry)); - entry.ifindex = pg->port->dev->ifindex; - entry.addr.proto = pg->addr.proto; - entry.addr.u.ip4 = pg->addr.u.ip4; + entry.ifindex = port->dev->ifindex; + entry.addr.proto = group->proto; + entry.addr.u.ip4 = group->u.ip4; #if IS_ENABLED(CONFIG_IPV6) - entry.addr.u.ip6 = pg->addr.u.ip6; + entry.addr.u.ip6 = group->u.ip6; #endif - entry.vid = pg->addr.vid; - __mdb_entry_fill_flags(&entry, pg->flags); - __br_mdb_notify(dev, &entry, type, pg); + entry.vid = group->vid; + __mdb_entry_fill_flags(&entry, flags); + __br_mdb_notify(dev, port, &entry, type); } static int nlmsg_populate_rtr_fill(struct sk_buff *skb, @@ -450,8 +504,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, } static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group, unsigned char state, - struct net_bridge_port_group **pg) + struct br_ip *group, unsigned char state) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; @@ -482,7 +535,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); - *pg = p; if (state == MDB_TEMPORARY) mod_timer(&p->timer, now + br->multicast_membership_interval); @@ -490,8 +542,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, } static int __br_mdb_add(struct net *net, struct net_bridge *br, - struct br_mdb_entry *entry, - struct net_bridge_port_group **pg) + struct br_mdb_entry *entry) { struct br_ip ip; struct net_device *dev; @@ -509,18 +560,10 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; - memset(&ip, 0, sizeof(ip)); - ip.vid = entry->vid; - ip.proto = entry->addr.proto; - if (ip.proto == htons(ETH_P_IP)) - ip.u.ip4 = entry->addr.u.ip4; -#if IS_ENABLED(CONFIG_IPV6) - else - ip.u.ip6 = entry->addr.u.ip6; -#endif + __mdb_entry_to_br_ip(entry, &ip); spin_lock_bh(&br->multicast_lock); - ret = br_mdb_add_group(br, p, &ip, entry->state, pg); + ret = br_mdb_add_group(br, p, &ip, entry->state); spin_unlock_bh(&br->multicast_lock); return ret; } @@ -528,7 +571,6 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); - struct net_bridge_port_group *pg; struct net_bridge_vlan_group *vg; struct net_device *dev, *pdev; struct br_mdb_entry *entry; @@ -558,15 +600,15 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) if (br_vlan_enabled(br) && vg && entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; - err = __br_mdb_add(net, br, entry, &pg); + err = __br_mdb_add(net, br, entry); if (err) break; - __br_mdb_notify(dev, entry, RTM_NEWMDB, pg); + __br_mdb_notify(dev, p, entry, RTM_NEWMDB); } } else { - err = __br_mdb_add(net, br, entry, &pg); + err = __br_mdb_add(net, br, entry); if (!err) - __br_mdb_notify(dev, entry, RTM_NEWMDB, pg); + __br_mdb_notify(dev, p, entry, RTM_NEWMDB); } return err; @@ -584,15 +626,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; - memset(&ip, 0, sizeof(ip)); - ip.vid = entry->vid; - ip.proto = entry->addr.proto; - if (ip.proto == htons(ETH_P_IP)) - ip.u.ip4 = entry->addr.u.ip4; -#if IS_ENABLED(CONFIG_IPV6) - else - ip.u.ip6 = entry->addr.u.ip6; -#endif + __mdb_entry_to_br_ip(entry, &ip); spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); @@ -662,12 +696,12 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) entry->vid = v->vid; err = __br_mdb_del(br, entry); if (!err) - __br_mdb_notify(dev, entry, RTM_DELMDB, NULL); + __br_mdb_notify(dev, p, entry, RTM_DELMDB); } } else { err = __br_mdb_del(br, entry); if (!err) - __br_mdb_notify(dev, entry, RTM_DELMDB, NULL); + __br_mdb_notify(dev, p, entry, RTM_DELMDB); } return err; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a4c15df2b792..191ea66e4d92 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -283,7 +283,8 @@ static void br_multicast_del_pg(struct net_bridge *br, rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); - br_mdb_notify(br->dev, p, RTM_DELMDB); + br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB, + p->flags); call_rcu_bh(&p->rcu, br_multicast_free_pg); if (!mp->ports && !mp->mglist && @@ -705,7 +706,7 @@ static int br_multicast_add_group(struct net_bridge *br, if (unlikely(!p)) goto err; rcu_assign_pointer(*pp, p); - br_mdb_notify(br->dev, p, RTM_NEWMDB); + br_mdb_notify(br->dev, port, group, RTM_NEWMDB, 0); found: mod_timer(&p->timer, now + br->multicast_membership_interval); @@ -1461,7 +1462,8 @@ br_multicast_leave_group(struct net_bridge *br, hlist_del_init(&p->mglist); del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); - br_mdb_notify(br->dev, p, RTM_DELMDB); + br_mdb_notify(br->dev, port, group, RTM_DELMDB, + p->flags); if (!mp->ports && !mp->mglist && netif_running(br->dev)) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 44114a94c576..2d25979273a6 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -217,13 +217,13 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) len = ntohs(iph->tot_len); if (skb->len < len) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS); + __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); + __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); goto drop; } @@ -236,7 +236,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) return 0; inhdr_error: - IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: return -1; } diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index d61f56efc8dc..5e59a8457e7b 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -122,13 +122,13 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + ip6h_len > skb->len) { - IP6_INC_STATS_BH(net, idev, - IPSTATS_MIB_INTRUNCATEDPKTS); + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { - IP6_INC_STATS_BH(net, idev, - IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INDISCARDS); goto drop; } } @@ -142,7 +142,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) return 0; inhdr_error: - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); drop: return -1; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e9c635eae24d..a5343c7232bf 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -135,9 +135,9 @@ static inline size_t br_port_info_size(void) + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_NO */ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_TOPOLOGY_CHANGE_ACK */ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_CONFIG_PENDING */ - + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_MESSAGE_AGE_TIMER */ - + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_FORWARD_DELAY_TIMER */ - + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */ + + nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_MESSAGE_AGE_TIMER */ + + nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_FORWARD_DELAY_TIMER */ + + nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */ #endif @@ -190,13 +190,16 @@ static int br_port_fill_attrs(struct sk_buff *skb, return -EMSGSIZE; timerval = br_timer_value(&p->message_age_timer); - if (nla_put_u64(skb, IFLA_BRPORT_MESSAGE_AGE_TIMER, timerval)) + if (nla_put_u64_64bit(skb, IFLA_BRPORT_MESSAGE_AGE_TIMER, timerval, + IFLA_BRPORT_PAD)) return -EMSGSIZE; timerval = br_timer_value(&p->forward_delay_timer); - if (nla_put_u64(skb, IFLA_BRPORT_FORWARD_DELAY_TIMER, timerval)) + if (nla_put_u64_64bit(skb, IFLA_BRPORT_FORWARD_DELAY_TIMER, timerval, + IFLA_BRPORT_PAD)) return -EMSGSIZE; timerval = br_timer_value(&p->hold_timer); - if (nla_put_u64(skb, IFLA_BRPORT_HOLD_TIMER, timerval)) + if (nla_put_u64_64bit(skb, IFLA_BRPORT_HOLD_TIMER, timerval, + IFLA_BRPORT_PAD)) return -EMSGSIZE; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING @@ -847,6 +850,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_NF_CALL_IP6TABLES] = { .type = NLA_U8 }, [IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 }, [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 }, + [IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -918,6 +922,14 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (err) return err; } + + if (data[IFLA_BR_VLAN_STATS_ENABLED]) { + __u8 vlan_stats = nla_get_u8(data[IFLA_BR_VLAN_STATS_ENABLED]); + + err = br_vlan_set_stats(br, vlan_stats); + if (err) + return err; + } #endif if (data[IFLA_BR_GROUP_FWD_MASK]) { @@ -1079,6 +1091,7 @@ static size_t br_get_size(const struct net_device *brdev) #ifdef CONFIG_BRIDGE_VLAN_FILTERING nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */ nla_total_size(sizeof(u16)) + /* IFLA_BR_VLAN_DEFAULT_PVID */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_STATS_ENABLED */ #endif nla_total_size(sizeof(u16)) + /* IFLA_BR_GROUP_FWD_MASK */ nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_ROOT_ID */ @@ -1087,10 +1100,10 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u32)) + /* IFLA_BR_ROOT_PATH_COST */ nla_total_size(sizeof(u8)) + /* IFLA_BR_TOPOLOGY_CHANGE */ nla_total_size(sizeof(u8)) + /* IFLA_BR_TOPOLOGY_CHANGE_DETECTED */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_HELLO_TIMER */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_TCN_TIMER */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_GC_TIMER */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_HELLO_TIMER */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_TCN_TIMER */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_GC_TIMER */ nla_total_size(ETH_ALEN) + /* IFLA_BR_GROUP_ADDR */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_ROUTER */ @@ -1101,12 +1114,12 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_MAX */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_LAST_MEMBER_CNT */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_STARTUP_QUERY_CNT */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_LAST_MEMBER_INTVL */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_MEMBERSHIP_INTVL */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERIER_INTVL */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_LAST_MEMBER_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_MEMBERSHIP_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERIER_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */ #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */ @@ -1129,16 +1142,17 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) u64 clockval; clockval = br_timer_value(&br->hello_timer); - if (nla_put_u64(skb, IFLA_BR_HELLO_TIMER, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_HELLO_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; clockval = br_timer_value(&br->tcn_timer); - if (nla_put_u64(skb, IFLA_BR_TCN_TIMER, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_TCN_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; clockval = br_timer_value(&br->topology_change_timer); - if (nla_put_u64(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = br_timer_value(&br->gc_timer); - if (nla_put_u64(skb, IFLA_BR_GC_TIMER, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) || @@ -1163,7 +1177,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) #ifdef CONFIG_BRIDGE_VLAN_FILTERING if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto) || - nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid)) + nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid) || + nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, br->vlan_stats_enabled)) return -EMSGSIZE; #endif #ifdef CONFIG_BRIDGE_IGMP_SNOOPING @@ -1182,22 +1197,28 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_last_member_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_membership_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_querier_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_query_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_query_response_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_startup_query_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) @@ -1213,6 +1234,69 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) return 0; } +static size_t br_get_linkxstats_size(const struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + int numvls = 0; + + vg = br_vlan_group(br); + if (!vg) + return 0; + + /* we need to count all, even placeholder entries */ + list_for_each_entry(v, &vg->vlan_list, vlist) + numvls++; + + /* account for the vlans and the link xstats type nest attribute */ + return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) + + nla_total_size(0); +} + +static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, + int *prividx) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct nlattr *nest; + int vl_idx = 0; + + vg = br_vlan_group(br); + if (!vg) + goto out; + nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); + if (!nest) + return -EMSGSIZE; + list_for_each_entry(v, &vg->vlan_list, vlist) { + struct bridge_vlan_xstats vxi; + struct br_vlan_stats stats; + + if (vl_idx++ < *prividx) + continue; + memset(&vxi, 0, sizeof(vxi)); + vxi.vid = v->vid; + br_vlan_get_stats(v, &stats); + vxi.rx_bytes = stats.rx_bytes; + vxi.rx_packets = stats.rx_packets; + vxi.tx_bytes = stats.tx_bytes; + vxi.tx_packets = stats.tx_packets; + + if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi)) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + *prividx = 0; +out: + return 0; + +nla_put_failure: + nla_nest_end(skb, nest); + *prividx = vl_idx; + + return -EMSGSIZE; +} static struct rtnl_af_ops br_af_ops __read_mostly = { .family = AF_BRIDGE, @@ -1231,6 +1315,8 @@ struct rtnl_link_ops br_link_ops __read_mostly = { .dellink = br_dev_delete, .get_size = br_get_size, .fill_info = br_fill_info, + .fill_linkxstats = br_fill_linkxstats, + .get_linkxstats_size = br_get_linkxstats_size, .slave_maxtype = IFLA_BRPORT_MAX, .slave_policy = br_port_policy, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1b5d145dfcbf..c7fb5d7a7218 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -77,12 +77,21 @@ struct bridge_mcast_querier { }; #endif +struct br_vlan_stats { + u64 rx_bytes; + u64 rx_packets; + u64 tx_bytes; + u64 tx_packets; + struct u64_stats_sync syncp; +}; + /** * struct net_bridge_vlan - per-vlan entry * * @vnode: rhashtable member * @vid: VLAN id * @flags: bridge vlan flags + * @stats: per-cpu VLAN statistics * @br: if MASTER flag set, this points to a bridge struct * @port: if MASTER flag unset, this points to a port struct * @refcnt: if MASTER flag set, this is bumped for each port referencing it @@ -100,6 +109,7 @@ struct net_bridge_vlan { struct rhash_head vnode; u16 vid; u16 flags; + struct br_vlan_stats __percpu *stats; union { struct net_bridge *br; struct net_bridge_port *port; @@ -342,6 +352,7 @@ struct net_bridge #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; u8 vlan_enabled; + u8 vlan_stats_enabled; __be16 vlan_proto; u16 default_pvid; #endif @@ -560,8 +571,8 @@ br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, unsigned char flags); void br_mdb_init(void); void br_mdb_uninit(void); -void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg, - int type); +void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type, u8 flags); void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, int type); @@ -691,6 +702,7 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int __br_vlan_set_proto(struct net_bridge *br, __be16 proto); int br_vlan_set_proto(struct net_bridge *br, unsigned long val); +int br_vlan_set_stats(struct net_bridge *br, unsigned long val); int br_vlan_init(struct net_bridge *br); int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid); @@ -699,6 +711,8 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); void nbp_vlan_flush(struct net_bridge_port *port); int nbp_vlan_init(struct net_bridge_port *port); int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask); +void br_vlan_get_stats(const struct net_bridge_vlan *v, + struct br_vlan_stats *stats); static inline struct net_bridge_vlan_group *br_vlan_group( const struct net_bridge *br) @@ -881,6 +895,10 @@ static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu( return NULL; } +static inline void br_vlan_get_stats(const struct net_bridge_vlan *v, + struct br_vlan_stats *stats) +{ +} #endif struct nf_br_ops { diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 70bddfd0f3e9..beb47071e38d 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -731,6 +731,22 @@ static ssize_t default_pvid_store(struct device *d, return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid); } static DEVICE_ATTR_RW(default_pvid); + +static ssize_t vlan_stats_enabled_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->vlan_stats_enabled); +} + +static ssize_t vlan_stats_enabled_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_set_stats); +} +static DEVICE_ATTR_RW(vlan_stats_enabled); #endif static struct attribute *bridge_attrs[] = { @@ -778,6 +794,7 @@ static struct attribute *bridge_attrs[] = { &dev_attr_vlan_filtering.attr, &dev_attr_vlan_protocol.attr, &dev_attr_default_pvid.attr, + &dev_attr_vlan_stats_enabled.attr, #endif NULL }; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index e001152d6ad1..b6de4f457161 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -162,6 +162,17 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid return masterv; } +static void br_master_vlan_rcu_free(struct rcu_head *rcu) +{ + struct net_bridge_vlan *v; + + v = container_of(rcu, struct net_bridge_vlan, rcu); + WARN_ON(!br_vlan_is_master(v)); + free_percpu(v->stats); + v->stats = NULL; + kfree(v); +} + static void br_vlan_put_master(struct net_bridge_vlan *masterv) { struct net_bridge_vlan_group *vg; @@ -174,7 +185,7 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv) rhashtable_remove_fast(&vg->vlan_hash, &masterv->vnode, br_vlan_rht_params); __vlan_del_list(masterv); - kfree_rcu(masterv, rcu); + call_rcu(&masterv->rcu, br_master_vlan_rcu_free); } } @@ -230,6 +241,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags) if (!masterv) goto out_filt; v->brvlan = masterv; + v->stats = masterv->stats; } /* Add the dev mac and count the vlan only if it's usable */ @@ -329,6 +341,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb) { + struct br_vlan_stats *stats; struct net_bridge_vlan *v; u16 vid; @@ -355,18 +368,27 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, return NULL; } } + if (br->vlan_stats_enabled) { + stats = this_cpu_ptr(v->stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_bytes += skb->len; + stats->tx_packets++; + u64_stats_update_end(&stats->syncp); + } + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) skb->vlan_tci = 0; - out: return skb; } /* Called under RCU */ -static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, +static bool __allowed_ingress(const struct net_bridge *br, + struct net_bridge_vlan_group *vg, struct sk_buff *skb, u16 *vid) { - const struct net_bridge_vlan *v; + struct br_vlan_stats *stats; + struct net_bridge_vlan *v; bool tagged; BR_INPUT_SKB_CB(skb)->vlan_filtered = true; @@ -375,7 +397,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, * HW accelerated vlan tag. */ if (unlikely(!skb_vlan_tag_present(skb) && - skb->protocol == proto)) { + skb->protocol == br->vlan_proto)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) return false; @@ -383,7 +405,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, if (!br_vlan_get_tag(skb, vid)) { /* Tagged frame */ - if (skb->vlan_proto != proto) { + if (skb->vlan_proto != br->vlan_proto) { /* Protocol-mismatch, empty out vlan_tci for new tag */ skb_push(skb, ETH_HLEN); skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, @@ -419,7 +441,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, *vid = pvid; if (likely(!tagged)) /* Untagged Frame. */ - __vlan_hwaccel_put_tag(skb, proto, pvid); + __vlan_hwaccel_put_tag(skb, br->vlan_proto, pvid); else /* Priority-tagged Frame. * At this point, We know that skb->vlan_tci had @@ -428,13 +450,24 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, */ skb->vlan_tci |= pvid; - return true; + /* if stats are disabled we can avoid the lookup */ + if (!br->vlan_stats_enabled) + return true; } - - /* Frame had a valid vlan tag. See if vlan is allowed */ v = br_vlan_find(vg, *vid); - if (v && br_vlan_should_use(v)) - return true; + if (!v || !br_vlan_should_use(v)) + goto drop; + + if (br->vlan_stats_enabled) { + stats = this_cpu_ptr(v->stats); + u64_stats_update_begin(&stats->syncp); + stats->rx_bytes += skb->len; + stats->rx_packets++; + u64_stats_update_end(&stats->syncp); + } + + return true; + drop: kfree_skb(skb); return false; @@ -452,7 +485,7 @@ bool br_allowed_ingress(const struct net_bridge *br, return true; } - return __allowed_ingress(vg, br->vlan_proto, skb, vid); + return __allowed_ingress(br, vg, skb, vid); } /* Called under RCU. */ @@ -542,6 +575,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) if (!vlan) return -ENOMEM; + vlan->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats); + if (!vlan->stats) { + kfree(vlan); + return -ENOMEM; + } vlan->vid = vid; vlan->flags = flags | BRIDGE_VLAN_INFO_MASTER; vlan->flags &= ~BRIDGE_VLAN_INFO_PVID; @@ -549,8 +587,10 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) if (flags & BRIDGE_VLAN_INFO_BRENTRY) atomic_set(&vlan->refcnt, 1); ret = __vlan_add(vlan, flags); - if (ret) + if (ret) { + free_percpu(vlan->stats); kfree(vlan); + } return ret; } @@ -711,6 +751,20 @@ int br_vlan_set_proto(struct net_bridge *br, unsigned long val) return __br_vlan_set_proto(br, htons(val)); } +int br_vlan_set_stats(struct net_bridge *br, unsigned long val) +{ + switch (val) { + case 0: + case 1: + br->vlan_stats_enabled = val; + break; + default: + return -EINVAL; + } + + return 0; +} + static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid) { struct net_bridge_vlan *v; @@ -1000,3 +1054,30 @@ void nbp_vlan_flush(struct net_bridge_port *port) synchronize_rcu(); __vlan_group_free(vg); } + +void br_vlan_get_stats(const struct net_bridge_vlan *v, + struct br_vlan_stats *stats) +{ + int i; + + memset(stats, 0, sizeof(*stats)); + for_each_possible_cpu(i) { + u64 rxpackets, rxbytes, txpackets, txbytes; + struct br_vlan_stats *cpu_stats; + unsigned int start; + + cpu_stats = per_cpu_ptr(v->stats, i); + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + rxpackets = cpu_stats->rx_packets; + rxbytes = cpu_stats->rx_bytes; + txbytes = cpu_stats->tx_bytes; + txpackets = cpu_stats->tx_packets; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + stats->rx_packets += rxpackets; + stats->rx_bytes += rxbytes; + stats->tx_bytes += txbytes; + stats->tx_packets += txpackets; + } +} diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8570bc7744c2..5a61f35412a0 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -370,7 +370,11 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, left - sizeof(struct ebt_entry_match) < m->match_size) return -EINVAL; - match = xt_request_find_match(NFPROTO_BRIDGE, m->u.name, 0); + match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0); + if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) { + request_module("ebt_%s", m->u.name); + match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0); + } if (IS_ERR(match)) return PTR_ERR(match); m->u.match = match; diff --git a/net/core/dev.c b/net/core/dev.c index 09fb1ace9dc8..d91dfbec0fc6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1741,7 +1741,7 @@ static inline void net_timestamp_set(struct sk_buff *skb) __net_timestamp(SKB); \ } \ -bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) +bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb) { unsigned int len; @@ -2711,6 +2711,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + /* Only report GSO partial support if it will enable us to + * support segmentation on this frame without needing additional + * work. + */ + if (features & NETIF_F_GSO_PARTIAL) { + netdev_features_t partial_features = NETIF_F_GSO_ROBUST; + struct net_device *dev = skb->dev; + + partial_features |= dev->features & dev->gso_partial_features; + if (!skb_gso_ok(skb, features | partial_features)) + features &= ~NETIF_F_GSO_PARTIAL; + } + BUILD_BUG_ON(SKB_SGO_CB_OFFSET + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); @@ -2825,14 +2838,45 @@ static netdev_features_t dflt_features_check(const struct sk_buff *skb, return vlan_features_check(skb, features); } +static netdev_features_t gso_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + u16 gso_segs = skb_shinfo(skb)->gso_segs; + + if (gso_segs > dev->gso_max_segs) + return features & ~NETIF_F_GSO_MASK; + + /* Support for GSO partial features requires software + * intervention before we can actually process the packets + * so we need to strip support for any partial features now + * and we can pull them back in after we have partially + * segmented the frame. + */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL)) + features &= ~dev->gso_partial_features; + + /* Make sure to clear the IPv4 ID mangling feature if the + * IPv4 header has the potential to be fragmented. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + struct iphdr *iph = skb->encapsulation ? + inner_ip_hdr(skb) : ip_hdr(skb); + + if (!(iph->frag_off & htons(IP_DF))) + features &= ~NETIF_F_TSO_MANGLEID; + } + + return features; +} + netdev_features_t netif_skb_features(struct sk_buff *skb) { struct net_device *dev = skb->dev; netdev_features_t features = dev->features; - u16 gso_segs = skb_shinfo(skb)->gso_segs; - if (gso_segs > dev->gso_max_segs) - features &= ~NETIF_F_GSO_MASK; + if (skb_is_gso(skb)) + features = gso_features_check(skb, dev, features); /* If encapsulation offload request, verify we are testing * hardware encapsulation features instead of standard @@ -2915,9 +2959,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device { netdev_features_t features; - if (skb->next) - return skb; - features = netif_skb_features(skb); skb = validate_xmit_vlan(skb, features); if (unlikely(!skb)) @@ -2960,6 +3001,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device out_kfree_skb: kfree_skb(skb); out_null: + atomic_long_inc(&dev->tx_dropped); return NULL; } @@ -3349,7 +3391,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) skb = validate_xmit_skb(skb, dev); if (!skb) - goto drop; + goto out; HARD_TX_LOCK(dev, txq, cpu); @@ -3376,7 +3418,6 @@ recursion_alert: } rc = -ENETDOWN; -drop: rcu_read_unlock_bh(); atomic_long_inc(&dev->tx_dropped); @@ -3428,6 +3469,7 @@ u32 rps_cpu_mask __read_mostly; EXPORT_SYMBOL(rps_cpu_mask); struct static_key rps_needed __read_mostly; +EXPORT_SYMBOL(rps_needed); static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, @@ -4440,6 +4482,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->is_fou = 0; + NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ @@ -4940,8 +4983,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock) netpoll_poll_unlock(have); } if (rc > 0) - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); + __NET_ADD_STATS(sock_net(sk), + LINUX_MIB_BUSYPOLLRXPACKETS, rc); local_bh_enable(); if (rc == LL_FLUSH_FAILED) @@ -6706,6 +6749,14 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } + /* GSO partial features require GSO partial be set */ + if ((features & dev->gso_partial_features) && + !(features & NETIF_F_GSO_PARTIAL)) { + netdev_dbg(dev, + "Dropping partially supported GSO features since no GSO partial.\n"); + features &= ~dev->gso_partial_features; + } + #ifdef CONFIG_NET_RX_BUSY_POLL if (dev->netdev_ops->ndo_busy_poll) features |= NETIF_F_BUSY_POLL; @@ -6976,9 +7027,22 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - if (!(dev->flags & IFF_LOOPBACK)) { + if (!(dev->flags & IFF_LOOPBACK)) dev->hw_features |= NETIF_F_NOCACHE_COPY; - } + + /* If IPv4 TCP segmentation offload is supported we should also + * allow the device to enable segmenting the frame with the option + * of ignoring a static IP ID value. This doesn't enable the + * feature itself but allows the user to enable it later. + */ + if (dev->hw_features & NETIF_F_TSO) + dev->hw_features |= NETIF_F_TSO_MANGLEID; + if (dev->vlan_features & NETIF_F_TSO) + dev->vlan_features |= NETIF_F_TSO_MANGLEID; + if (dev->mpls_features & NETIF_F_TSO) + dev->mpls_features |= NETIF_F_TSO_MANGLEID; + if (dev->hw_enc_features & NETIF_F_TSO) + dev->hw_enc_features |= NETIF_F_TSO_MANGLEID; /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. */ @@ -6986,7 +7050,7 @@ int register_netdevice(struct net_device *dev) /* Make NETIF_F_SG inheritable to tunnel devices. */ - dev->hw_enc_features |= NETIF_F_SG; + dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL; /* Make NETIF_F_SG inheritable to MPLS. */ diff --git a/net/core/devlink.c b/net/core/devlink.c index b84cf0df4a0e..933e8d4d3968 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -119,8 +119,171 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink, return devlink_port_get_from_attrs(devlink, info->attrs); } +struct devlink_sb { + struct list_head list; + unsigned int index; + u32 size; + u16 ingress_pools_count; + u16 egress_pools_count; + u16 ingress_tc_count; + u16 egress_tc_count; +}; + +static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb) +{ + return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count; +} + +static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink, + unsigned int sb_index) +{ + struct devlink_sb *devlink_sb; + + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + if (devlink_sb->index == sb_index) + return devlink_sb; + } + return NULL; +} + +static bool devlink_sb_index_exists(struct devlink *devlink, + unsigned int sb_index) +{ + return devlink_sb_get_by_index(devlink, sb_index); +} + +static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink, + struct nlattr **attrs) +{ + if (attrs[DEVLINK_ATTR_SB_INDEX]) { + u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]); + struct devlink_sb *devlink_sb; + + devlink_sb = devlink_sb_get_by_index(devlink, sb_index); + if (!devlink_sb) + return ERR_PTR(-ENODEV); + return devlink_sb; + } + return ERR_PTR(-EINVAL); +} + +static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + return devlink_sb_get_from_attrs(devlink, info->attrs); +} + +static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb, + struct nlattr **attrs, + u16 *p_pool_index) +{ + u16 val; + + if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX]) + return -EINVAL; + + val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]); + if (val >= devlink_sb_pool_count(devlink_sb)) + return -EINVAL; + *p_pool_index = val; + return 0; +} + +static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb, + struct genl_info *info, + u16 *p_pool_index) +{ + return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs, + p_pool_index); +} + +static int +devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs, + enum devlink_sb_pool_type *p_pool_type) +{ + u8 val; + + if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE]) + return -EINVAL; + + val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]); + if (val != DEVLINK_SB_POOL_TYPE_INGRESS && + val != DEVLINK_SB_POOL_TYPE_EGRESS) + return -EINVAL; + *p_pool_type = val; + return 0; +} + +static int +devlink_sb_pool_type_get_from_info(struct genl_info *info, + enum devlink_sb_pool_type *p_pool_type) +{ + return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type); +} + +static int +devlink_sb_th_type_get_from_attrs(struct nlattr **attrs, + enum devlink_sb_threshold_type *p_th_type) +{ + u8 val; + + if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]) + return -EINVAL; + + val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]); + if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC && + val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC) + return -EINVAL; + *p_th_type = val; + return 0; +} + +static int +devlink_sb_th_type_get_from_info(struct genl_info *info, + enum devlink_sb_threshold_type *p_th_type) +{ + return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type); +} + +static int +devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb, + struct nlattr **attrs, + enum devlink_sb_pool_type pool_type, + u16 *p_tc_index) +{ + u16 val; + + if (!attrs[DEVLINK_ATTR_SB_TC_INDEX]) + return -EINVAL; + + val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]); + if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS && + val >= devlink_sb->ingress_tc_count) + return -EINVAL; + if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS && + val >= devlink_sb->egress_tc_count) + return -EINVAL; + *p_tc_index = val; + return 0; +} + +static int +devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb, + struct genl_info *info, + enum devlink_sb_pool_type pool_type, + u16 *p_tc_index) +{ + return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs, + pool_type, p_tc_index); +} + #define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) #define DEVLINK_NL_FLAG_NEED_PORT BIT(1) +#define DEVLINK_NL_FLAG_NEED_SB BIT(2) +#define DEVLINK_NL_FLAG_LOCK_PORTS BIT(3) + /* port is not needed but we need to ensure they don't + * change in the middle of command + */ static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) @@ -147,13 +310,29 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, } info->user_ptr[0] = devlink_port; } + if (ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) { + mutex_lock(&devlink_port_mutex); + } + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) { + struct devlink_sb *devlink_sb; + + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) { + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) + mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink_mutex); + return PTR_ERR(devlink_sb); + } + info->user_ptr[1] = devlink_sb; + } return 0; } static void devlink_nl_post_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT || + ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) mutex_unlock(&devlink_port_mutex); mutex_unlock(&devlink_mutex); } @@ -499,12 +678,735 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, return devlink_port_unsplit(devlink, port_index); } +static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, + struct devlink_sb *devlink_sb, + enum devlink_command cmd, u32 portid, + u32 seq, int flags) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT, + devlink_sb->ingress_pools_count)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT, + devlink_sb->egress_pools_count)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT, + devlink_sb->ingress_tc_count)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT, + devlink_sb->egress_tc_count)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + int err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_fill(msg, devlink, devlink_sb, + DEVLINK_CMD_SB_NEW, + info->snd_portid, info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_sb_fill(msg, devlink, devlink_sb, + DEVLINK_CMD_SB_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) + goto out; + idx++; + } + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink, + struct devlink_sb *devlink_sb, + u16 pool_index, enum devlink_command cmd, + u32 portid, u32 seq, int flags) +{ + struct devlink_sb_pool_info pool_info; + void *hdr; + int err; + + err = devlink->ops->sb_pool_get(devlink, devlink_sb->index, + pool_index, &pool_info); + if (err) + return err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) + goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size)) + goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, + pool_info.threshold_type)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + u16 pool_index; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!devlink->ops || !devlink->ops->sb_pool_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index, + DEVLINK_CMD_SB_POOL_NEW, + info->snd_portid, info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx, + struct devlink *devlink, + struct devlink_sb *devlink_sb, + u32 portid, u32 seq) +{ + u16 pool_count = devlink_sb_pool_count(devlink_sb); + u16 pool_index; + int err; + + for (pool_index = 0; pool_index < pool_count; pool_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_pool_fill(msg, devlink, + devlink_sb, + pool_index, + DEVLINK_CMD_SB_POOL_NEW, + portid, seq, NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + return 0; +} + +static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || + !devlink->ops || !devlink->ops->sb_pool_get) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_pool_get_dumpit(msg, start, &idx, devlink, + devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err && err != -EOPNOTSUPP) + goto out; + } + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type) + +{ + const struct devlink_ops *ops = devlink->ops; + + if (ops && ops->sb_pool_set) + return ops->sb_pool_set(devlink, sb_index, pool_index, + size, threshold_type); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + enum devlink_sb_threshold_type threshold_type; + u16 pool_index; + u32 size; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + err = devlink_sb_th_type_get_from_info(info, &threshold_type); + if (err) + return err; + + if (!info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]) + return -EINVAL; + + size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]); + return devlink_sb_pool_set(devlink, devlink_sb->index, + pool_index, size, threshold_type); +} + +static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_port *devlink_port, + struct devlink_sb *devlink_sb, + u16 pool_index, + enum devlink_command cmd, + u32 portid, u32 seq, int flags) +{ + const struct devlink_ops *ops = devlink->ops; + u32 threshold; + void *hdr; + int err; + + err = ops->sb_port_pool_get(devlink_port, devlink_sb->index, + pool_index, &threshold); + if (err) + return err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold)) + goto nla_put_failure; + + if (ops->sb_occ_port_pool_get) { + u32 cur; + u32 max; + + err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index, + pool_index, &cur, &max); + if (err && err != -EOPNOTSUPP) + return err; + if (!err) { + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max)) + goto nla_put_failure; + } + } + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink *devlink = devlink_port->devlink; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + u16 pool_index; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!devlink->ops || !devlink->ops->sb_port_pool_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port, + devlink_sb, pool_index, + DEVLINK_CMD_SB_PORT_POOL_NEW, + info->snd_portid, info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx, + struct devlink *devlink, + struct devlink_sb *devlink_sb, + u32 portid, u32 seq) +{ + struct devlink_port *devlink_port; + u16 pool_count = devlink_sb_pool_count(devlink_sb); + u16 pool_index; + int err; + + list_for_each_entry(devlink_port, &devlink->port_list, list) { + for (pool_index = 0; pool_index < pool_count; pool_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_port_pool_fill(msg, devlink, + devlink_port, + devlink_sb, + pool_index, + DEVLINK_CMD_SB_PORT_POOL_NEW, + portid, seq, + NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + } + return 0; +} + +static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + mutex_lock(&devlink_port_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || + !devlink->ops || !devlink->ops->sb_port_pool_get) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_port_pool_get_dumpit(msg, start, &idx, + devlink, devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err && err != -EOPNOTSUPP) + goto out; + } + } +out: + mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_sb_port_pool_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 threshold) + +{ + const struct devlink_ops *ops = devlink_port->devlink->ops; + + if (ops && ops->sb_port_pool_set) + return ops->sb_port_pool_set(devlink_port, sb_index, + pool_index, threshold); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + u16 pool_index; + u32 threshold; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD]) + return -EINVAL; + + threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); + return devlink_sb_port_pool_set(devlink_port, devlink_sb->index, + pool_index, threshold); +} + +static int +devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink, + struct devlink_port *devlink_port, + struct devlink_sb *devlink_sb, u16 tc_index, + enum devlink_sb_pool_type pool_type, + enum devlink_command cmd, + u32 portid, u32 seq, int flags) +{ + const struct devlink_ops *ops = devlink->ops; + u16 pool_index; + u32 threshold; + void *hdr; + int err; + + err = ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index, + tc_index, pool_type, + &pool_index, &threshold); + if (err) + return err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index)) + goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold)) + goto nla_put_failure; + + if (ops->sb_occ_tc_port_bind_get) { + u32 cur; + u32 max; + + err = ops->sb_occ_tc_port_bind_get(devlink_port, + devlink_sb->index, + tc_index, pool_type, + &cur, &max); + if (err && err != -EOPNOTSUPP) + return err; + if (!err) { + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max)) + goto nla_put_failure; + } + } + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink *devlink = devlink_port->devlink; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + enum devlink_sb_pool_type pool_type; + u16 tc_index; + int err; + + err = devlink_sb_pool_type_get_from_info(info, &pool_type); + if (err) + return err; + + err = devlink_sb_tc_index_get_from_info(devlink_sb, info, + pool_type, &tc_index); + if (err) + return err; + + if (!devlink->ops || !devlink->ops->sb_tc_pool_bind_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port, + devlink_sb, tc_index, pool_type, + DEVLINK_CMD_SB_TC_POOL_BIND_NEW, + info->snd_portid, + info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, + int start, int *p_idx, + struct devlink *devlink, + struct devlink_sb *devlink_sb, + u32 portid, u32 seq) +{ + struct devlink_port *devlink_port; + u16 tc_index; + int err; + + list_for_each_entry(devlink_port, &devlink->port_list, list) { + for (tc_index = 0; + tc_index < devlink_sb->ingress_tc_count; tc_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, + devlink_port, + devlink_sb, + tc_index, + DEVLINK_SB_POOL_TYPE_INGRESS, + DEVLINK_CMD_SB_TC_POOL_BIND_NEW, + portid, seq, + NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + for (tc_index = 0; + tc_index < devlink_sb->egress_tc_count; tc_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, + devlink_port, + devlink_sb, + tc_index, + DEVLINK_SB_POOL_TYPE_EGRESS, + DEVLINK_CMD_SB_TC_POOL_BIND_NEW, + portid, seq, + NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + } + return 0; +} + +static int +devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + mutex_lock(&devlink_port_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || + !devlink->ops || !devlink->ops->sb_tc_pool_bind_get) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx, + devlink, + devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err && err != -EOPNOTSUPP) + goto out; + } + } +out: + mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold) + +{ + const struct devlink_ops *ops = devlink_port->devlink->ops; + + if (ops && ops->sb_tc_pool_bind_set) + return ops->sb_tc_pool_bind_set(devlink_port, sb_index, + tc_index, pool_type, + pool_index, threshold); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + enum devlink_sb_pool_type pool_type; + u16 tc_index; + u16 pool_index; + u32 threshold; + int err; + + err = devlink_sb_pool_type_get_from_info(info, &pool_type); + if (err) + return err; + + err = devlink_sb_tc_index_get_from_info(devlink_sb, info, + pool_type, &tc_index); + if (err) + return err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD]) + return -EINVAL; + + threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); + return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index, + tc_index, pool_type, + pool_index, threshold); +} + +static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + const struct devlink_ops *ops = devlink->ops; + + if (ops && ops->sb_occ_snapshot) + return ops->sb_occ_snapshot(devlink, devlink_sb->index); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + const struct devlink_ops *ops = devlink->ops; + + if (ops && ops->sb_occ_max_clear) + return ops->sb_occ_max_clear(devlink, devlink_sb->index); + return -EOPNOTSUPP; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 }, [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 }, [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 }, + [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -545,6 +1447,84 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_SB_GET, + .doit = devlink_nl_cmd_sb_get_doit, + .dumpit = devlink_nl_cmd_sb_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_POOL_GET, + .doit = devlink_nl_cmd_sb_pool_get_doit, + .dumpit = devlink_nl_cmd_sb_pool_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_POOL_SET, + .doit = devlink_nl_cmd_sb_pool_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB, + }, + { + .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, + .doit = devlink_nl_cmd_sb_port_pool_get_doit, + .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, + .doit = devlink_nl_cmd_sb_port_pool_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + }, + { + .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, + .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit, + .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, + .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + }, + { + .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, + .doit = devlink_nl_cmd_sb_occ_snapshot_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB | + DEVLINK_NL_FLAG_LOCK_PORTS, + }, + { + .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, + .doit = devlink_nl_cmd_sb_occ_max_clear_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB | + DEVLINK_NL_FLAG_LOCK_PORTS, + }, }; /** @@ -566,6 +1546,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) devlink->ops = ops; devlink_net_set(devlink, &init_net); INIT_LIST_HEAD(&devlink->port_list); + INIT_LIST_HEAD(&devlink->sb_list); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -721,6 +1702,51 @@ void devlink_port_split_set(struct devlink_port *devlink_port, } EXPORT_SYMBOL_GPL(devlink_port_split_set); +int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, + u32 size, u16 ingress_pools_count, + u16 egress_pools_count, u16 ingress_tc_count, + u16 egress_tc_count) +{ + struct devlink_sb *devlink_sb; + int err = 0; + + mutex_lock(&devlink_mutex); + if (devlink_sb_index_exists(devlink, sb_index)) { + err = -EEXIST; + goto unlock; + } + + devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL); + if (!devlink_sb) { + err = -ENOMEM; + goto unlock; + } + devlink_sb->index = sb_index; + devlink_sb->size = size; + devlink_sb->ingress_pools_count = ingress_pools_count; + devlink_sb->egress_pools_count = egress_pools_count; + devlink_sb->ingress_tc_count = ingress_tc_count; + devlink_sb->egress_tc_count = egress_tc_count; + list_add_tail(&devlink_sb->list, &devlink->sb_list); +unlock: + mutex_unlock(&devlink_mutex); + return err; +} +EXPORT_SYMBOL_GPL(devlink_sb_register); + +void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) +{ + struct devlink_sb *devlink_sb; + + mutex_lock(&devlink_mutex); + devlink_sb = devlink_sb_get_by_index(devlink, sb_index); + WARN_ON(!devlink_sb); + list_del(&devlink_sb->list); + mutex_unlock(&devlink_mutex); + kfree(devlink_sb); +} +EXPORT_SYMBOL_GPL(devlink_sb_unregister); + static int __init devlink_module_init(void) { return genl_register_family_with_ops_groups(&devlink_nl_family, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f426c5ad6149..bdb4013581b1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -79,12 +79,16 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", + [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", + [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation", [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", + [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", + [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", @@ -387,15 +391,17 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) return 0; } -static void convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32) +void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, + u32 legacy_u32) { bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); dst[0] = legacy_u32; } +EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode); /* return false if src had higher bits set. lower bits always updated. */ -static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32, - const unsigned long *src) +bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, + const unsigned long *src) { bool retval = true; @@ -415,6 +421,7 @@ static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32, *legacy_u32 = src[0]; return retval; } +EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32); /* return false if legacy contained non-0 deprecated fields * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated @@ -437,13 +444,13 @@ convert_legacy_settings_to_link_ksettings( legacy_settings->maxrxpkt) retval = false; - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.supported, legacy_settings->supported); - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.advertising, legacy_settings->advertising); - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.lp_advertising, legacy_settings->lp_advertising); link_ksettings->base.speed @@ -482,13 +489,13 @@ convert_link_ksettings_to_legacy_settings( * __u32 maxrxpkt; */ - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->supported, link_ksettings->link_modes.supported); - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->advertising, link_ksettings->link_modes.advertising); - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->lp_advertising, link_ksettings->link_modes.lp_advertising); ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 365de66436ac..840acebbb80c 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -549,7 +549,7 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */ + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ - + nla_total_size(8); /* FRA_TUN_ID */ + + nla_total_size_64bit(8); /* FRA_TUN_ID */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -607,7 +607,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->target && nla_put_u32(skb, FRA_GOTO, rule->target)) || (rule->tun_id && - nla_put_be64(skb, FRA_TUN_ID, rule->tun_id))) + nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/core/filter.c b/net/core/filter.c index e8486ba601ea..218e5de8c402 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1409,16 +1409,19 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) unsigned int len = (unsigned int) r4; void *ptr; - if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK)) - return -EFAULT; + if (unlikely((u32) offset > 0xffff)) + goto err_clear; ptr = skb_header_pointer(skb, offset, len, to); if (unlikely(!ptr)) - return -EFAULT; + goto err_clear; if (ptr != to) memcpy(to, ptr, len); return 0; +err_clear: + memset(to, 0, len); + return -EFAULT; } static const struct bpf_func_proto bpf_skb_load_bytes_proto = { @@ -1427,7 +1430,7 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_STACK, + .arg3_type = ARG_PTR_TO_RAW_STACK, .arg4_type = ARG_CONST_STACK_SIZE, }; @@ -1756,12 +1759,19 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); u8 compat[sizeof(struct bpf_tunnel_key)]; + void *to_orig = to; + int err; - if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) - return -EINVAL; - if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) - return -EPROTO; + if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) { + err = -EINVAL; + goto err_clear; + } + if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) { + err = -EPROTO; + goto err_clear; + } if (unlikely(size != sizeof(struct bpf_tunnel_key))) { + err = -EINVAL; switch (size) { case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, tunnel_ext): @@ -1771,12 +1781,12 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) * a common path later on. */ if (ip_tunnel_info_af(info) != AF_INET) - return -EINVAL; + goto err_clear; set_compat: to = (struct bpf_tunnel_key *)compat; break; default: - return -EINVAL; + goto err_clear; } } @@ -1793,9 +1803,12 @@ set_compat: } if (unlikely(size != sizeof(struct bpf_tunnel_key))) - memcpy((void *)(long) r2, to, size); + memcpy(to_orig, to, size); return 0; +err_clear: + memset(to_orig, 0, size); + return err; } static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { @@ -1803,7 +1816,7 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_PTR_TO_RAW_STACK, .arg3_type = ARG_CONST_STACK_SIZE, .arg4_type = ARG_ANYTHING, }; @@ -1813,16 +1826,26 @@ static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5) struct sk_buff *skb = (struct sk_buff *) (long) r1; u8 *to = (u8 *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); + int err; if (unlikely(!info || - !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) - return -ENOENT; - if (unlikely(size < info->options_len)) - return -ENOMEM; + !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) { + err = -ENOENT; + goto err_clear; + } + if (unlikely(size < info->options_len)) { + err = -ENOMEM; + goto err_clear; + } ip_tunnel_info_opts_get(to, info); + if (size > info->options_len) + memset(to + info->options_len, 0, size - info->options_len); return info->options_len; +err_clear: + memset(to, 0, size); + return err; } static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { @@ -1830,7 +1853,7 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_PTR_TO_RAW_STACK, .arg3_type = ARG_CONST_STACK_SIZE, }; @@ -2016,6 +2039,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_redirect_proto; case BPF_FUNC_get_route_realm: return &bpf_get_route_realm_proto; + case BPF_FUNC_perf_event_output: + return bpf_get_event_output_proto(); default: return sk_filter_func_proto(func_id); } diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index e640462ea8bf..f96ee8b9478d 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -25,9 +25,9 @@ static inline int -gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) +gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr) { - if (nla_put(d->skb, type, size, buf)) + if (nla_put_64bit(d->skb, type, size, buf, padattr)) goto nla_put_failure; return 0; @@ -59,7 +59,8 @@ nla_put_failure: */ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, - int xstats_type, spinlock_t *lock, struct gnet_dump *d) + int xstats_type, spinlock_t *lock, + struct gnet_dump *d, int padattr) __acquires(lock) { memset(d, 0, sizeof(*d)); @@ -71,16 +72,17 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, d->skb = skb; d->compat_tc_stats = tc_stats_type; d->compat_xstats = xstats_type; + d->padattr = padattr; if (d->tail) - return gnet_stats_copy(d, type, NULL, 0); + return gnet_stats_copy(d, type, NULL, 0, padattr); return 0; } EXPORT_SYMBOL(gnet_stats_start_copy_compat); /** - * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode + * gnet_stats_start_copy - start dumping procedure in compatibility mode * @skb: socket buffer to put statistics TLVs into * @type: TLV type for top level statistic TLV * @lock: statistics lock @@ -94,9 +96,9 @@ EXPORT_SYMBOL(gnet_stats_start_copy_compat); */ int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, - struct gnet_dump *d) + struct gnet_dump *d, int padattr) { - return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d); + return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d, padattr); } EXPORT_SYMBOL(gnet_stats_start_copy); @@ -169,7 +171,8 @@ gnet_stats_copy_basic(struct gnet_dump *d, memset(&sb, 0, sizeof(sb)); sb.bytes = bstats.bytes; sb.packets = bstats.packets; - return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb)); + return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb), + TCA_STATS_PAD); } return 0; } @@ -208,11 +211,13 @@ gnet_stats_copy_rate_est(struct gnet_dump *d, } if (d->tail) { - res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est)); + res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est), + TCA_STATS_PAD); if (res < 0 || est.bps == r->bps) return res; /* emit 64bit stats only if needed */ - return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r)); + return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r), + TCA_STATS_PAD); } return 0; @@ -286,7 +291,8 @@ gnet_stats_copy_queue(struct gnet_dump *d, if (d->tail) return gnet_stats_copy(d, TCA_STATS_QUEUE, - &qstats, sizeof(qstats)); + &qstats, sizeof(qstats), + TCA_STATS_PAD); return 0; } @@ -316,7 +322,8 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) } if (d->tail) - return gnet_stats_copy(d, TCA_STATS_APP, st, len); + return gnet_stats_copy(d, TCA_STATS_APP, st, len, + TCA_STATS_PAD); return 0; @@ -347,12 +354,12 @@ gnet_stats_finish_copy(struct gnet_dump *d) if (d->compat_tc_stats) if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats, - sizeof(d->tc_stats)) < 0) + sizeof(d->tc_stats), d->padattr) < 0) return -1; if (d->compat_xstats && d->xstats) { if (gnet_stats_copy(d, d->compat_xstats, d->xstats, - d->xstats_len) < 0) + d->xstats_len, d->padattr) < 0) return -1; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f18ae91b652e..29dd8cc22bbf 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1763,21 +1763,22 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) NEIGH_VAR(parms, MCAST_PROBES)) || nla_put_u32(skb, NDTPA_MCAST_REPROBES, NEIGH_VAR(parms, MCAST_REPROBES)) || - nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || + nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, + NDTPA_PAD) || nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, - NEIGH_VAR(parms, BASE_REACHABLE_TIME)) || + NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_GC_STALETIME, - NEIGH_VAR(parms, GC_STALETIME)) || + NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, - NEIGH_VAR(parms, DELAY_PROBE_TIME)) || + NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_RETRANS_TIME, - NEIGH_VAR(parms, RETRANS_TIME)) || + NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, - NEIGH_VAR(parms, ANYCAST_DELAY)) || + NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_PROXY_DELAY, - NEIGH_VAR(parms, PROXY_DELAY)) || + NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_LOCKTIME, - NEIGH_VAR(parms, LOCKTIME))) + NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); @@ -1804,7 +1805,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndtmsg->ndtm_pad2 = 0; if (nla_put_string(skb, NDTA_NAME, tbl->id) || - nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) || + nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) @@ -1856,7 +1857,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndst.ndts_table_fulls += st->table_fulls; } - if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst)) + if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, + NDTA_PAD)) goto nla_put_failure; } diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 2bf83299600a..14d09345f00d 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -162,7 +162,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v) "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - sd->cpu_collision, sd->received_rps, flow_limit_count); + 0, /* was cpu_collision */ + sd->received_rps, flow_limit_count); return 0; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 20999aa596dd..8604ae245960 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3472,7 +3472,6 @@ xmit_more: pkt_dev->odevname, ret); pkt_dev->errors++; /* fallthru */ - case NETDEV_TX_LOCKED: case NETDEV_TX_BUSY: /* Retry it next time */ atomic_dec(&(pkt_dev->skb->users)); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a75f7e94b445..d471f097c739 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -808,11 +808,6 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->rx_nohandler = b->rx_nohandler; } -static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) -{ - memcpy(v, b, sizeof(*b)); -} - /* All VF info */ static inline int rtnl_vfinfo_size(const struct net_device *dev, u32 ext_filter_mask) @@ -830,17 +825,17 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, nla_total_size(sizeof(struct ifla_vf_link_state)) + nla_total_size(sizeof(struct ifla_vf_rss_query_en)) + /* IFLA_VF_STATS_RX_PACKETS */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_TX_PACKETS */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_RX_BYTES */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_TX_BYTES */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_BROADCAST */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_MULTICAST */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + nla_total_size(sizeof(struct ifla_vf_trust))); return size; } else @@ -881,9 +876,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ - + nla_total_size(sizeof(struct rtnl_link_ifmap)) + + nla_total_size_64bit(sizeof(struct rtnl_link_ifmap)) + nla_total_size(sizeof(struct rtnl_link_stats)) - + nla_total_size(sizeof(struct rtnl_link_stats64)) + + nla_total_size_64bit(sizeof(struct rtnl_link_stats64)) + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ + nla_total_size(4) /* IFLA_TXQLEN */ @@ -1054,25 +1049,23 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb, struct net_device *dev) { - const struct rtnl_link_stats64 *stats; - struct rtnl_link_stats64 temp; + struct rtnl_link_stats64 *sp; struct nlattr *attr; - stats = dev_get_stats(dev, &temp); - - attr = nla_reserve(skb, IFLA_STATS, - sizeof(struct rtnl_link_stats)); + attr = nla_reserve_64bit(skb, IFLA_STATS64, + sizeof(struct rtnl_link_stats64), IFLA_PAD); if (!attr) return -EMSGSIZE; - copy_rtnl_link_stats(nla_data(attr), stats); + sp = nla_data(attr); + dev_get_stats(dev, sp); - attr = nla_reserve(skb, IFLA_STATS64, - sizeof(struct rtnl_link_stats64)); + attr = nla_reserve(skb, IFLA_STATS, + sizeof(struct rtnl_link_stats)); if (!attr) return -EMSGSIZE; - copy_rtnl_link_stats64(nla_data(attr), stats); + copy_rtnl_link_stats(nla_data(attr), sp); return 0; } @@ -1160,18 +1153,18 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, nla_nest_cancel(skb, vfinfo); return -EMSGSIZE; } - if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS, - vf_stats.rx_packets) || - nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS, - vf_stats.tx_packets) || - nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES, - vf_stats.rx_bytes) || - nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES, - vf_stats.tx_bytes) || - nla_put_u64(skb, IFLA_VF_STATS_BROADCAST, - vf_stats.broadcast) || - nla_put_u64(skb, IFLA_VF_STATS_MULTICAST, - vf_stats.multicast)) + if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, + vf_stats.rx_packets, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, + vf_stats.tx_packets, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES, + vf_stats.rx_bytes, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES, + vf_stats.tx_bytes, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, + vf_stats.broadcast, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, + vf_stats.multicast, IFLA_VF_STATS_PAD)) return -EMSGSIZE; nla_nest_end(skb, vfstats); nla_nest_end(skb, vf); @@ -1188,7 +1181,7 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) .dma = dev->dma, .port = dev->if_port, }; - if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) + if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD)) return -EMSGSIZE; return 0; @@ -3451,6 +3444,202 @@ out: return err; } +static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr) +{ + return (mask & IFLA_STATS_FILTER_BIT(attrid)) && + (!idxattr || idxattr == attrid); +} + +static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, + int type, u32 pid, u32 seq, u32 change, + unsigned int flags, unsigned int filter_mask, + int *idxattr, int *prividx) +{ + struct if_stats_msg *ifsm; + struct nlmsghdr *nlh; + struct nlattr *attr; + int s_prividx = *prividx; + + ASSERT_RTNL(); + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags); + if (!nlh) + return -EMSGSIZE; + + ifsm = nlmsg_data(nlh); + ifsm->ifindex = dev->ifindex; + ifsm->filter_mask = filter_mask; + + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, *idxattr)) { + struct rtnl_link_stats64 *sp; + + attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64, + sizeof(struct rtnl_link_stats64), + IFLA_STATS_UNSPEC); + if (!attr) + goto nla_put_failure; + + sp = nla_data(attr); + dev_get_stats(dev, sp); + } + + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, *idxattr)) { + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + + if (ops && ops->fill_linkxstats) { + int err; + + *idxattr = IFLA_STATS_LINK_XSTATS; + attr = nla_nest_start(skb, + IFLA_STATS_LINK_XSTATS); + if (!attr) + goto nla_put_failure; + + err = ops->fill_linkxstats(skb, dev, prividx); + nla_nest_end(skb, attr); + if (err) + goto nla_put_failure; + *idxattr = 0; + } + } + + nlmsg_end(skb, nlh); + + return 0; + +nla_put_failure: + /* not a multi message or no progress mean a real error */ + if (!(flags & NLM_F_MULTI) || s_prividx == *prividx) + nlmsg_cancel(skb, nlh); + else + nlmsg_end(skb, nlh); + + return -EMSGSIZE; +} + +static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = { + [IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) }, +}; + +static size_t if_nlmsg_stats_size(const struct net_device *dev, + u32 filter_mask) +{ + size_t size = 0; + + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0)) + size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); + + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) { + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + + if (ops && ops->get_linkxstats_size) { + size += nla_total_size(ops->get_linkxstats_size(dev)); + /* for IFLA_STATS_LINK_XSTATS */ + size += nla_total_size(0); + } + } + + return size; +} + +static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev = NULL; + int idxattr = 0, prividx = 0; + struct if_stats_msg *ifsm; + struct sk_buff *nskb; + u32 filter_mask; + int err; + + ifsm = nlmsg_data(nlh); + if (ifsm->ifindex > 0) + dev = __dev_get_by_index(net, ifsm->ifindex); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + filter_mask = ifsm->filter_mask; + if (!filter_mask) + return -EINVAL; + + nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL); + if (!nskb) + return -ENOBUFS; + + err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS, + NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + 0, filter_mask, &idxattr, &prividx); + if (err < 0) { + /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(nskb); + } else { + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); + } + + return err; +} + +static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int h, s_h, err, s_idx, s_idxattr, s_prividx; + struct net *net = sock_net(skb->sk); + unsigned int flags = NLM_F_MULTI; + struct if_stats_msg *ifsm; + struct hlist_head *head; + struct net_device *dev; + u32 filter_mask = 0; + int idx = 0; + + s_h = cb->args[0]; + s_idx = cb->args[1]; + s_idxattr = cb->args[2]; + s_prividx = cb->args[3]; + + cb->seq = net->dev_base_seq; + + ifsm = nlmsg_data(cb->nlh); + filter_mask = ifsm->filter_mask; + if (!filter_mask) + return -EINVAL; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + flags, filter_mask, + &s_idxattr, &s_prividx); + /* If we ran out of room on the first message, + * we're in trouble + */ + WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + + if (err < 0) + goto out; + s_prividx = 0; + s_idxattr = 0; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + } +out: + cb->args[3] = s_prividx; + cb->args[2] = s_idxattr; + cb->args[1] = idx; + cb->args[0] = h; + + return skb->len; +} + /* Process one rtnetlink message. */ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -3600,4 +3789,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); + + rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, + NULL); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d04c2d1c8c87..7a1d48983f81 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3076,8 +3076,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); + unsigned int partial_segs = 0; unsigned int headroom; - unsigned int len; + unsigned int len = head_skb->len; __be16 proto; bool csum; int sg = !!(features & NETIF_F_SG); @@ -3094,6 +3095,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, csum = !!can_checksum_protocol(features, proto); + /* GSO partial only requires that we trim off any excess that + * doesn't fit into an MSS sized block, so take care of that + * now. + */ + if (features & NETIF_F_GSO_PARTIAL) { + partial_segs = len / mss; + mss *= partial_segs; + } + headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -3281,6 +3291,23 @@ perform_csum_check: */ segs->prev = tail; + /* Update GSO info on first skb in partial sequence. */ + if (partial_segs) { + int type = skb_shinfo(head_skb)->gso_type; + + /* Update type to add partial and then remove dodgy if set */ + type |= SKB_GSO_PARTIAL; + type &= ~SKB_GSO_DODGY; + + /* Update GSO info and prepare to start updating headers on + * our way back down the stack of protocols. + */ + skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size; + skb_shinfo(segs)->gso_segs = partial_segs; + skb_shinfo(segs)->gso_type = type; + SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset; + } + /* Following permits correct backpressure, for protocols * using skb_set_owner_w(). * Idea is to tranfert ownership from head_skb to last segment. @@ -4502,13 +4529,16 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) __skb_push(skb, offset); err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); - if (err) + if (err) { + __skb_pull(skb, offset); return err; + } + skb->protocol = skb->vlan_proto; skb->mac_len += VLAN_HLEN; - __skb_pull(skb, offset); skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); + __skb_pull(skb, offset); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; @@ -4592,3 +4622,245 @@ failure: return NULL; } EXPORT_SYMBOL(alloc_skb_with_frags); + +/* carve out the first off bytes from skb when off < headlen */ +static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, + const int headlen, gfp_t gfp_mask) +{ + int i; + int size = skb_end_offset(skb); + int new_hlen = headlen - off; + u8 *data; + int doff = 0; + + size = SKB_DATA_ALIGN(size); + + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + data = kmalloc_reserve(size + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + gfp_mask, NUMA_NO_NODE, NULL); + if (!data) + return -ENOMEM; + + size = SKB_WITH_OVERHEAD(ksize(data)); + + /* Copy real data, and all frags */ + skb_copy_from_linear_data_offset(skb, off, data, new_hlen); + skb->len -= off; + + memcpy((struct skb_shared_info *)(data + size), + skb_shinfo(skb), + offsetof(struct skb_shared_info, + frags[skb_shinfo(skb)->nr_frags])); + if (skb_cloned(skb)) { + /* drop the old head gracefully */ + if (skb_orphan_frags(skb, gfp_mask)) { + kfree(data); + return -ENOMEM; + } + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + skb_frag_ref(skb, i); + if (skb_has_frag_list(skb)) + skb_clone_fraglist(skb); + skb_release_data(skb); + } else { + /* we can reuse existing recount- all we did was + * relocate values + */ + skb_free_head(skb); + } + + doff = (data - skb->head); + skb->head = data; + skb->data = data; + skb->head_frag = 0; +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->end = size; + doff = 0; +#else + skb->end = skb->head + size; +#endif + skb_set_tail_pointer(skb, skb_headlen(skb)); + skb_headers_offset_update(skb, 0); + skb->cloned = 0; + skb->hdr_len = 0; + skb->nohdr = 0; + atomic_set(&skb_shinfo(skb)->dataref, 1); + + return 0; +} + +static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp); + +/* carve out the first eat bytes from skb's frag_list. May recurse into + * pskb_carve() + */ +static int pskb_carve_frag_list(struct sk_buff *skb, + struct skb_shared_info *shinfo, int eat, + gfp_t gfp_mask) +{ + struct sk_buff *list = shinfo->frag_list; + struct sk_buff *clone = NULL; + struct sk_buff *insp = NULL; + + do { + if (!list) { + pr_err("Not enough bytes to eat. Want %d\n", eat); + return -EFAULT; + } + if (list->len <= eat) { + /* Eaten as whole. */ + eat -= list->len; + list = list->next; + insp = list; + } else { + /* Eaten partially. */ + if (skb_shared(list)) { + clone = skb_clone(list, gfp_mask); + if (!clone) + return -ENOMEM; + insp = list->next; + list = clone; + } else { + /* This may be pulled without problems. */ + insp = list; + } + if (pskb_carve(list, eat, gfp_mask) < 0) { + kfree_skb(clone); + return -ENOMEM; + } + break; + } + } while (eat); + + /* Free pulled out fragments. */ + while ((list = shinfo->frag_list) != insp) { + shinfo->frag_list = list->next; + kfree_skb(list); + } + /* And insert new clone at head. */ + if (clone) { + clone->next = list; + shinfo->frag_list = clone; + } + return 0; +} + +/* carve off first len bytes from skb. Split line (off) is in the + * non-linear part of skb + */ +static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, + int pos, gfp_t gfp_mask) +{ + int i, k = 0; + int size = skb_end_offset(skb); + u8 *data; + const int nfrags = skb_shinfo(skb)->nr_frags; + struct skb_shared_info *shinfo; + int doff = 0; + + size = SKB_DATA_ALIGN(size); + + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + data = kmalloc_reserve(size + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + gfp_mask, NUMA_NO_NODE, NULL); + if (!data) + return -ENOMEM; + + size = SKB_WITH_OVERHEAD(ksize(data)); + + memcpy((struct skb_shared_info *)(data + size), + skb_shinfo(skb), offsetof(struct skb_shared_info, + frags[skb_shinfo(skb)->nr_frags])); + if (skb_orphan_frags(skb, gfp_mask)) { + kfree(data); + return -ENOMEM; + } + shinfo = (struct skb_shared_info *)(data + size); + for (i = 0; i < nfrags; i++) { + int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (pos + fsize > off) { + shinfo->frags[k] = skb_shinfo(skb)->frags[i]; + + if (pos < off) { + /* Split frag. + * We have two variants in this case: + * 1. Move all the frag to the second + * part, if it is possible. F.e. + * this approach is mandatory for TUX, + * where splitting is expensive. + * 2. Split is accurately. We make this. + */ + shinfo->frags[0].page_offset += off - pos; + skb_frag_size_sub(&shinfo->frags[0], off - pos); + } + skb_frag_ref(skb, i); + k++; + } + pos += fsize; + } + shinfo->nr_frags = k; + if (skb_has_frag_list(skb)) + skb_clone_fraglist(skb); + + if (k == 0) { + /* split line is in frag list */ + pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask); + } + skb_release_data(skb); + + doff = (data - skb->head); + skb->head = data; + skb->head_frag = 0; + skb->data = data; +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->end = size; + doff = 0; +#else + skb->end = skb->head + size; +#endif + skb_reset_tail_pointer(skb); + skb_headers_offset_update(skb, 0); + skb->cloned = 0; + skb->hdr_len = 0; + skb->nohdr = 0; + skb->len -= off; + skb->data_len = skb->len; + atomic_set(&skb_shinfo(skb)->dataref, 1); + return 0; +} + +/* remove len bytes from the beginning of the skb */ +static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp) +{ + int headlen = skb_headlen(skb); + + if (len < headlen) + return pskb_carve_inside_header(skb, len, headlen, gfp); + else + return pskb_carve_inside_nonlinear(skb, len, headlen, gfp); +} + +/* Extract to_copy bytes starting at off from skb, and return this in + * a new skb + */ +struct sk_buff *pskb_extract(struct sk_buff *skb, int off, + int to_copy, gfp_t gfp) +{ + struct sk_buff *clone = skb_clone(skb, gfp); + + if (!clone) + return NULL; + + if (pskb_carve(clone, off, gfp) < 0 || + pskb_trim(clone, to_copy)) { + kfree_skb(clone); + return NULL; + } + return clone; +} +EXPORT_SYMBOL(pskb_extract); diff --git a/net/core/sock.c b/net/core/sock.c index e16a5db853c6..f615e9391170 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2019,33 +2019,27 @@ static void __release_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) { - struct sk_buff *skb = sk->sk_backlog.head; + struct sk_buff *skb, *next; - do { + while ((skb = sk->sk_backlog.head) != NULL) { sk->sk_backlog.head = sk->sk_backlog.tail = NULL; - bh_unlock_sock(sk); - do { - struct sk_buff *next = skb->next; + spin_unlock_bh(&sk->sk_lock.slock); + do { + next = skb->next; prefetch(next); WARN_ON_ONCE(skb_dst_is_noref(skb)); skb->next = NULL; sk_backlog_rcv(sk, skb); - /* - * We are in process context here with softirqs - * disabled, use cond_resched_softirq() to preempt. - * This is safe to do because we've taken the backlog - * queue private: - */ - cond_resched_softirq(); + cond_resched(); skb = next; } while (skb != NULL); - bh_lock_sock(sk); - } while ((skb = sk->sk_backlog.head) != NULL); + spin_lock_bh(&sk->sk_lock.slock); + } /* * Doing the zeroing here guarantee we can not loop forever @@ -2054,6 +2048,13 @@ static void __release_sock(struct sock *sk) sk->sk_backlog.len = 0; } +void __sk_flush_backlog(struct sock *sk) +{ + spin_lock_bh(&sk->sk_lock.slock); + __release_sock(sk); + spin_unlock_bh(&sk->sk_lock.slock); +} + /** * sk_wait_data - wait for data to arrive at sk_receive_queue * @sk: sock to wait on diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index ca9e35bbe13c..6b10573cc9fa 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -120,7 +120,7 @@ static size_t sock_diag_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct inet_diag_msg) + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */ - + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */ + + nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */ } static void sock_diag_broadcast_destroy_work(struct work_struct *work) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index b0e28d24e1a7..0c55ffb859bf 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -198,9 +198,9 @@ struct dccp_mib { }; DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); -#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) -#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) -#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) +#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) +#define __DCCP_INC_STATS(field) __SNMP_INC_STATS(dccp_statistics, field) +#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) /* * Checksumming routines diff --git a/net/dccp/input.c b/net/dccp/input.c index 3bd14e885396..ba347184bda9 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -359,7 +359,7 @@ send_sync: goto discard; } - DCCP_INC_STATS_BH(DCCP_MIB_INERRS); + DCCP_INC_STATS(DCCP_MIB_INERRS); discard: __kfree_skb(skb); return 0; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f6d183f8f332..5c7e413a3ae4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -205,7 +205,7 @@ void dccp_req_err(struct sock *sk, u64 seq) * socket here. */ if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); } else { /* * Still in RESPOND, just remove it silently. @@ -247,7 +247,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; } @@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) iph->saddr, ntohs(dh->dccph_sport), inet_iif(skb)); if (!sk) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; } @@ -273,7 +273,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; @@ -281,7 +281,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) dp = dccp_sk(sk); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -318,7 +318,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) case DCCP_REQUESTING: case DCCP_RESPOND: if (!sock_owned_by_user(sk)) { - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); @@ -431,11 +431,11 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, return newsk; exit_overflow: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); exit_nonewsk: dst_release(dst); exit: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: inet_csk_prepare_forced_close(newsk); @@ -462,7 +462,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } @@ -533,8 +533,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) bh_unlock_sock(ctl_sk); if (net_xmit_eval(err) == 0) { - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS(DCCP_MIB_OUTRSTS); } out: dst_release(dst); @@ -637,7 +637,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) drop_and_free: reqsk_free(req); drop: - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); return -1; } EXPORT_SYMBOL_GPL(dccp_v4_conn_request); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8ceb3cebcad4..d176f4e66369 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -80,8 +80,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -91,8 +91,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, inet6_iif(skb)); if (!sk) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -106,7 +106,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, bh_lock_sock(sk); if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; @@ -114,7 +114,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, dp = dccp_sk(sk); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -156,7 +156,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; /* * Wake people up to see the error @@ -277,8 +277,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) if (!IS_ERR(dst)) { skb_dst_set(skb, dst); ip6_xmit(ctl_sk, skb, &fl6, NULL, 0); - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS(DCCP_MIB_OUTRSTS); return; } @@ -378,7 +378,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) drop_and_free: reqsk_free(req); drop: - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); return -1; } @@ -527,11 +527,11 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, return newsk; out_overflow: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: dst_release(dst); out: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 1994f8af646b..53eddf99e4f6 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -127,7 +127,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, } dccp_init_xmit_timers(newsk); - DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); + __DCCP_INC_STATS(DCCP_MIB_PASSIVEOPENS); } return newsk; } diff --git a/net/dccp/options.c b/net/dccp/options.c index 9bce31886bda..74d29c56c367 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -253,7 +253,7 @@ out_nonsensical_length: return 0; out_invalid_option: - DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); + DCCP_INC_STATS(DCCP_MIB_INVALIDOPT); rc = DCCP_RESET_CODE_OPTION_ERROR; out_featneg_failed: DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 3ef7acef3ce8..3a2c34027758 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -28,7 +28,7 @@ static void dccp_write_err(struct sock *sk) dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); dccp_done(sk); - DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT); + __DCCP_INC_STATS(DCCP_MIB_ABORTONTIMEOUT); } /* A write timeout has occurred. Process the after effects. */ @@ -100,7 +100,7 @@ static void dccp_retransmit_timer(struct sock *sk) * total number of retransmissions of clones of original packets. */ if (icsk->icsk_retransmits == 0) - DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); + __DCCP_INC_STATS(DCCP_MIB_TIMEOUTS); if (dccp_retransmit_skb(sk) != 0) { /* @@ -179,7 +179,7 @@ static void dccp_delack_timer(unsigned long data) if (sock_owned_by_user(sk)) { /* Try again later. */ icsk->icsk_ack.blocked = 1; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); goto out; @@ -209,7 +209,7 @@ static void dccp_delack_timer(unsigned long data) icsk->icsk_ack.ato = TCP_ATO_MIN; } dccp_send_ack(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); } out: bh_unlock_sock(sk); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 607a14f20d88..b1dc096d22f8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1034,10 +1034,13 @@ source_ok: if (!fld.daddr) { fld.daddr = fld.saddr; - err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); + err = -EINVAL; dev_out = init_net.loopback_dev; + if (!dev_out->dn_ptr) + goto out; + err = -EADDRNOTAVAIL; dev_hold(dev_out); if (!fld.daddr) { fld.daddr = @@ -1110,6 +1113,8 @@ source_ok: if (dev_out == NULL) goto out; dn_db = rcu_dereference_raw(dev_out->dn_ptr); + if (!dn_db) + goto e_inval; /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fld.daddr)) { dev_put(dev_out); @@ -1151,6 +1156,8 @@ select_source: dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); + if (!dev_out->dn_ptr) + goto e_inval; fld.flowidn_oif = dev_out->ifindex; if (res.fi) dn_fib_info_put(res.fi); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 60ea98481806..d61ceed912be 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -52,11 +52,11 @@ EXPORT_SYMBOL_GPL(unregister_switch_driver); static struct dsa_switch_driver * dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, - char **_name, void **priv) + const char **_name, void **priv) { struct dsa_switch_driver *ret; struct list_head *list; - char *name; + const char *name; ret = NULL; name = NULL; @@ -267,7 +267,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * switch. */ if (dst->cpu_switch == index) { - switch (ds->tag_protocol) { + switch (drv->tag_protocol) { #ifdef CONFIG_NET_DSA_TAG_DSA case DSA_TAG_PROTO_DSA: dst->rcv = dsa_netdev_ops.rcv; @@ -295,7 +295,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) goto out; } - dst->tag_protocol = ds->tag_protocol; + dst->tag_protocol = drv->tag_protocol; } /* @@ -383,7 +383,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, struct dsa_switch_driver *drv; struct dsa_switch *ds; int ret; - char *name; + const char *name; void *priv; /* @@ -411,7 +411,6 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, ds->pd = pd; ds->drv = drv; ds->priv = priv; - ds->tag_protocol = drv->tag_protocol; ds->master_dev = host_dev; ret = dsa_switch_setup_one(ds, parent); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 1d1a54687e4a..dfa33779d49c 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -22,11 +22,6 @@ struct dsa_device_ops { }; struct dsa_slave_priv { - /* - * The linux network interface corresponding to this - * switch port. - */ - struct net_device *dev; struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 2dae0d064359..5ea8a40c8d33 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -666,6 +666,78 @@ static void dsa_slave_get_strings(struct net_device *dev, } } +static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds = dst->ds[0]; + s8 cpu_port = dst->cpu_port; + int count = 0; + + if (dst->master_ethtool_ops.get_sset_count) { + count = dst->master_ethtool_ops.get_sset_count(dev, + ETH_SS_STATS); + dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data); + } + + if (ds->drv->get_ethtool_stats) + ds->drv->get_ethtool_stats(ds, cpu_port, data + count); +} + +static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds = dst->ds[0]; + int count = 0; + + if (dst->master_ethtool_ops.get_sset_count) + count += dst->master_ethtool_ops.get_sset_count(dev, sset); + + if (sset == ETH_SS_STATS && ds->drv->get_sset_count) + count += ds->drv->get_sset_count(ds); + + return count; +} + +static void dsa_cpu_port_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds = dst->ds[0]; + s8 cpu_port = dst->cpu_port; + int len = ETH_GSTRING_LEN; + int mcount = 0, count; + unsigned int i; + uint8_t pfx[4]; + uint8_t *ndata; + + snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port); + /* We do not want to be NULL-terminated, since this is a prefix */ + pfx[sizeof(pfx) - 1] = '_'; + + if (dst->master_ethtool_ops.get_sset_count) { + mcount = dst->master_ethtool_ops.get_sset_count(dev, + ETH_SS_STATS); + dst->master_ethtool_ops.get_strings(dev, stringset, data); + } + + if (stringset == ETH_SS_STATS && ds->drv->get_strings) { + ndata = data + mcount * len; + /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle + * the output after to prepend our CPU port prefix we + * constructed earlier + */ + ds->drv->get_strings(ds, cpu_port, ndata); + count = ds->drv->get_sset_count(ds); + for (i = 0; i < count; i++) { + memmove(ndata + (i * len + sizeof(pfx)), + ndata + i * len, len - sizeof(pfx)); + memcpy(ndata + i * len, pfx, sizeof(pfx)); + } + } +} + static void dsa_slave_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, uint64_t *data) @@ -673,10 +745,10 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - data[0] = p->dev->stats.tx_packets; - data[1] = p->dev->stats.tx_bytes; - data[2] = p->dev->stats.rx_packets; - data[3] = p->dev->stats.rx_bytes; + data[0] = dev->stats.tx_packets; + data[1] = dev->stats.tx_bytes; + data[2] = dev->stats.rx_packets; + data[3] = dev->stats.rx_bytes; if (ds->drv->get_ethtool_stats != NULL) ds->drv->get_ethtool_stats(ds, p->port, data + 4); } @@ -821,6 +893,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_eee = dsa_slave_get_eee, }; +static struct ethtool_ops dsa_cpu_port_ethtool_ops; + static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, @@ -1038,6 +1112,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, int port, char *name) { struct net_device *master = ds->dst->master_netdev; + struct dsa_switch_tree *dst = ds->dst; struct net_device *slave_dev; struct dsa_slave_priv *p; int ret; @@ -1049,6 +1124,19 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->features = master->vlan_features; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; + if (master->ethtool_ops != &dsa_cpu_port_ethtool_ops) { + memcpy(&dst->master_ethtool_ops, master->ethtool_ops, + sizeof(struct ethtool_ops)); + memcpy(&dsa_cpu_port_ethtool_ops, &dst->master_ethtool_ops, + sizeof(struct ethtool_ops)); + dsa_cpu_port_ethtool_ops.get_sset_count = + dsa_cpu_port_get_sset_count; + dsa_cpu_port_ethtool_ops.get_ethtool_stats = + dsa_cpu_port_get_ethtool_stats; + dsa_cpu_port_ethtool_ops.get_strings = + dsa_cpu_port_get_strings; + master->ethtool_ops = &dsa_cpu_port_ethtool_ops; + } eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; slave_dev->netdev_ops = &dsa_slave_netdev_ops; @@ -1063,7 +1151,6 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->vlan_features = master->vlan_features; p = netdev_priv(slave_dev); - p->dev = slave_dev; p->parent = ds; p->port = port; diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig index 0d3d709052ca..4b683fd0abf1 100644 --- a/net/hsr/Kconfig +++ b/net/hsr/Kconfig @@ -18,8 +18,9 @@ config HSR earlier. This code is a "best effort" to comply with the HSR standard as - described in IEC 62439-3:2010 (HSRv0), but no compliancy tests have - been made. + described in IEC 62439-3:2010 (HSRv0) and IEC 62439-3:2012 (HSRv1), + but no compliancy tests have been made. Use iproute2 to select + the version you desire. You need to perform any and all necessary tests yourself before relying on this code in a safety critical system! diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c7d1adca30d8..386cbce7bc51 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -90,7 +90,8 @@ static void hsr_check_announce(struct net_device *hsr_dev, hsr = netdev_priv(hsr_dev); - if ((hsr_dev->operstate == IF_OPER_UP) && (old_operstate != IF_OPER_UP)) { + if ((hsr_dev->operstate == IF_OPER_UP) + && (old_operstate != IF_OPER_UP)) { /* Went up */ hsr->announce_count = 0; hsr->announce_timer.expires = jiffies + @@ -250,31 +251,22 @@ static const struct header_ops hsr_header_ops = { .parse = eth_header_parse, }; - -/* HSR:2010 supervision frames should be padded so that the whole frame, - * including headers and FCS, is 64 bytes (without VLAN). - */ -static int hsr_pad(int size) -{ - const int min_size = ETH_ZLEN - HSR_HLEN - ETH_HLEN; - - if (size >= min_size) - return size; - return min_size; -} - -static void send_hsr_supervision_frame(struct hsr_port *master, u8 type) +static void send_hsr_supervision_frame(struct hsr_port *master, + u8 type, u8 hsrVer) { struct sk_buff *skb; int hlen, tlen; + struct hsr_tag *hsr_tag; struct hsr_sup_tag *hsr_stag; struct hsr_sup_payload *hsr_sp; unsigned long irqflags; hlen = LL_RESERVED_SPACE(master->dev); tlen = master->dev->needed_tailroom; - skb = alloc_skb(hsr_pad(sizeof(struct hsr_sup_payload)) + hlen + tlen, - GFP_ATOMIC); + skb = dev_alloc_skb( + sizeof(struct hsr_tag) + + sizeof(struct hsr_sup_tag) + + sizeof(struct hsr_sup_payload) + hlen + tlen); if (skb == NULL) return; @@ -282,32 +274,48 @@ static void send_hsr_supervision_frame(struct hsr_port *master, u8 type) skb_reserve(skb, hlen); skb->dev = master->dev; - skb->protocol = htons(ETH_P_PRP); + skb->protocol = htons(hsrVer ? ETH_P_HSR : ETH_P_PRP); skb->priority = TC_PRIO_CONTROL; - if (dev_hard_header(skb, skb->dev, ETH_P_PRP, + if (dev_hard_header(skb, skb->dev, (hsrVer ? ETH_P_HSR : ETH_P_PRP), master->hsr->sup_multicast_addr, skb->dev->dev_addr, skb->len) <= 0) goto out; skb_reset_mac_header(skb); - hsr_stag = (typeof(hsr_stag)) skb_put(skb, sizeof(*hsr_stag)); + if (hsrVer > 0) { + hsr_tag = (typeof(hsr_tag)) skb_put(skb, sizeof(struct hsr_tag)); + hsr_tag->encap_proto = htons(ETH_P_PRP); + set_hsr_tag_LSDU_size(hsr_tag, HSR_V1_SUP_LSDUSIZE); + } - set_hsr_stag_path(hsr_stag, 0xf); - set_hsr_stag_HSR_Ver(hsr_stag, 0); + hsr_stag = (typeof(hsr_stag)) skb_put(skb, sizeof(struct hsr_sup_tag)); + set_hsr_stag_path(hsr_stag, (hsrVer ? 0x0 : 0xf)); + set_hsr_stag_HSR_Ver(hsr_stag, hsrVer); + /* From HSRv1 on we have separate supervision sequence numbers. */ spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); - hsr_stag->sequence_nr = htons(master->hsr->sequence_nr); - master->hsr->sequence_nr++; + if (hsrVer > 0) { + hsr_stag->sequence_nr = htons(master->hsr->sup_sequence_nr); + hsr_tag->sequence_nr = htons(master->hsr->sequence_nr); + master->hsr->sup_sequence_nr++; + master->hsr->sequence_nr++; + } else { + hsr_stag->sequence_nr = htons(master->hsr->sequence_nr); + master->hsr->sequence_nr++; + } spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); hsr_stag->HSR_TLV_Type = type; - hsr_stag->HSR_TLV_Length = 12; + /* TODO: Why 12 in HSRv0? */ + hsr_stag->HSR_TLV_Length = hsrVer ? sizeof(struct hsr_sup_payload) : 12; /* Payload: MacAddressA */ - hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp)); + hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr); + skb_put_padto(skb, ETH_ZLEN + HSR_HLEN); + hsr_forward_skb(skb, master); return; @@ -329,19 +337,20 @@ static void hsr_announce(unsigned long data) rcu_read_lock(); master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); - if (hsr->announce_count < 3) { - send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE); + if (hsr->announce_count < 3 && hsr->protVersion == 0) { + send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE, + hsr->protVersion); hsr->announce_count++; - } else { - send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK); - } - if (hsr->announce_count < 3) hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); - else + } else { + send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK, + hsr->protVersion); + hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + } if (is_admin_up(master->dev)) add_timer(&hsr->announce_timer); @@ -428,7 +437,7 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { }; int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec) + unsigned char multicast_spec, u8 protocol_version) { struct hsr_priv *hsr; struct hsr_port *port; @@ -450,6 +459,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], spin_lock_init(&hsr->seqnr_lock); /* Overflow soon to find bugs easier: */ hsr->sequence_nr = HSR_SEQNR_START; + hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; init_timer(&hsr->announce_timer); hsr->announce_timer.function = hsr_announce; @@ -462,6 +472,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; + hsr->protVersion = protocol_version; + /* FIXME: should I modify the value of these? * * - hsr_dev->flags - i.e. diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index 108a5d59d2a6..9975e31bbb82 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -17,7 +17,7 @@ void hsr_dev_setup(struct net_device *dev); int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec); + unsigned char multicast_spec, u8 protocol_version); void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); bool is_hsr_master(struct net_device *dev); int hsr_get_max_mtu(struct hsr_priv *hsr); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 7871ed6d3825..5ee1d43f1310 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -50,21 +50,40 @@ struct hsr_frame_info { */ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) { - struct hsr_ethhdr_sp *hdr; + struct ethhdr *ethHdr; + struct hsr_sup_tag *hsrSupTag; + struct hsrv1_ethhdr_sp *hsrV1Hdr; WARN_ON_ONCE(!skb_mac_header_was_set(skb)); - hdr = (struct hsr_ethhdr_sp *) skb_mac_header(skb); + ethHdr = (struct ethhdr *) skb_mac_header(skb); - if (!ether_addr_equal(hdr->ethhdr.h_dest, + /* Correct addr? */ + if (!ether_addr_equal(ethHdr->h_dest, hsr->sup_multicast_addr)) return false; - if (get_hsr_stag_path(&hdr->hsr_sup) != 0x0f) + /* Correct ether type?. */ + if (!(ethHdr->h_proto == htons(ETH_P_PRP) + || ethHdr->h_proto == htons(ETH_P_HSR))) return false; - if ((hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_ANNOUNCE) && - (hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) + + /* Get the supervision header from correct location. */ + if (ethHdr->h_proto == htons(ETH_P_HSR)) { /* Okay HSRv1. */ + hsrV1Hdr = (struct hsrv1_ethhdr_sp *) skb_mac_header(skb); + if (hsrV1Hdr->hsr.encap_proto != htons(ETH_P_PRP)) + return false; + + hsrSupTag = &hsrV1Hdr->hsr_sup; + } else { + hsrSupTag = &((struct hsrv0_ethhdr_sp *) skb_mac_header(skb))->hsr_sup; + } + + if ((hsrSupTag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) && + (hsrSupTag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) return false; - if (hdr->hsr_sup.HSR_TLV_Length != 12) + if ((hsrSupTag->HSR_TLV_Length != 12) && + (hsrSupTag->HSR_TLV_Length != + sizeof(struct hsr_sup_payload))) return false; return true; @@ -110,7 +129,7 @@ static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame, static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, - struct hsr_port *port) + struct hsr_port *port, u8 protoVersion) { struct hsr_ethhdr *hsr_ethhdr; int lane_id; @@ -131,7 +150,8 @@ static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; - hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP); + hsr_ethhdr->ethhdr.h_proto = htons(protoVersion ? + ETH_P_HSR : ETH_P_PRP); } static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, @@ -160,7 +180,7 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, memmove(dst, src, movelen); skb_reset_mac_header(skb); - hsr_fill_tag(skb, frame, port); + hsr_fill_tag(skb, frame, port, port->hsr->protVersion); return skb; } @@ -320,7 +340,8 @@ static int hsr_fill_frame_info(struct hsr_frame_info *frame, /* FIXME: */ WARN_ONCE(1, "HSR: VLAN not yet supported"); } - if (ethhdr->h_proto == htons(ETH_P_PRP)) { + if (ethhdr->h_proto == htons(ETH_P_PRP) + || ethhdr->h_proto == htons(ETH_P_HSR)) { frame->skb_std = NULL; frame->skb_hsr = skb; frame->sequence_nr = hsr_get_skb_sequence_nr(skb); diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index bace124d14ef..7ea925816f79 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -177,17 +177,17 @@ struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, return node; } - if (!is_sup) - return NULL; /* Only supervision frame may create node entry */ + /* Everyone may create a node entry, connected node to a HSR device. */ - if (ethhdr->h_proto == htons(ETH_P_PRP)) { + if (ethhdr->h_proto == htons(ETH_P_PRP) + || ethhdr->h_proto == htons(ETH_P_HSR)) { /* Use the existing sequence_nr from the tag as starting point * for filtering duplicate frames. */ seq_out = hsr_get_skb_sequence_nr(skb) - 1; } else { WARN_ONCE(1, "%s: Non-HSR frame\n", __func__); - seq_out = 0; + seq_out = HSR_SEQNR_START; } return hsr_add_node(node_db, ethhdr->h_source, seq_out); @@ -200,17 +200,25 @@ struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, struct hsr_port *port_rcv) { + struct ethhdr *ethhdr; struct hsr_node *node_real; struct hsr_sup_payload *hsr_sp; struct list_head *node_db; int i; - skb_pull(skb, sizeof(struct hsr_ethhdr_sp)); - hsr_sp = (struct hsr_sup_payload *) skb->data; + ethhdr = (struct ethhdr *) skb_mac_header(skb); - if (ether_addr_equal(eth_hdr(skb)->h_source, hsr_sp->MacAddressA)) - /* Not sent from MacAddressB of a PICS_SUBS capable node */ - goto done; + /* Leave the ethernet header. */ + skb_pull(skb, sizeof(struct ethhdr)); + + /* And leave the HSR tag. */ + if (ethhdr->h_proto == htons(ETH_P_HSR)) + skb_pull(skb, sizeof(struct hsr_tag)); + + /* And leave the HSR sup tag. */ + skb_pull(skb, sizeof(struct hsr_sup_tag)); + + hsr_sp = (struct hsr_sup_payload *) skb->data; /* Merge node_curr (registered on MacAddressB) into node_real */ node_db = &port_rcv->hsr->node_db; @@ -225,7 +233,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, /* Node has already been merged */ goto done; - ether_addr_copy(node_real->MacAddressB, eth_hdr(skb)->h_source); + ether_addr_copy(node_real->MacAddressB, ethhdr->h_source); for (i = 0; i < HSR_PT_PORTS; i++) { if (!node_curr->time_in_stale[i] && time_after(node_curr->time_in[i], node_real->time_in[i])) { @@ -241,7 +249,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, kfree_rcu(node_curr, rcu_head); done: - skb_push(skb, sizeof(struct hsr_ethhdr_sp)); + skb_push(skb, sizeof(struct hsrv1_ethhdr_sp)); } diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 5a9c69962ded..9b9909e89e9e 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -30,6 +30,7 @@ */ #define MAX_SLAVE_DIFF 3000 /* ms */ #define HSR_SEQNR_START (USHRT_MAX - 1024) +#define HSR_SUP_SEQNR_START (HSR_SEQNR_START / 2) /* How often shall we check for broken ring and remove node entries older than @@ -58,6 +59,8 @@ struct hsr_tag { #define HSR_HLEN 6 +#define HSR_V1_SUP_LSDUSIZE 52 + /* The helper functions below assumes that 'path' occupies the 4 most * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or * equivalently, the 4 most significant bits of HSR tag byte 14). @@ -131,8 +134,14 @@ static inline void set_hsr_stag_HSR_Ver(struct hsr_sup_tag *hst, u16 HSR_Ver) set_hsr_tag_LSDU_size((struct hsr_tag *) hst, HSR_Ver); } -struct hsr_ethhdr_sp { +struct hsrv0_ethhdr_sp { + struct ethhdr ethhdr; + struct hsr_sup_tag hsr_sup; +} __packed; + +struct hsrv1_ethhdr_sp { struct ethhdr ethhdr; + struct hsr_tag hsr; struct hsr_sup_tag hsr_sup; } __packed; @@ -162,6 +171,8 @@ struct hsr_priv { struct timer_list prune_timer; int announce_count; u16 sequence_nr; + u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ + u8 protVersion; /* Indicate if HSRv0 or HSRv1. */ spinlock_t seqnr_lock; /* locking for sequence_nr */ unsigned char sup_multicast_addr[ETH_ALEN]; }; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index a2c7e4c0ac1e..d4d1617f43a8 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -23,7 +23,8 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_SLAVE1] = { .type = NLA_U32 }, [IFLA_HSR_SLAVE2] = { .type = NLA_U32 }, [IFLA_HSR_MULTICAST_SPEC] = { .type = NLA_U8 }, - [IFLA_HSR_SUPERVISION_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IFLA_HSR_VERSION] = { .type = NLA_U8 }, + [IFLA_HSR_SUPERVISION_ADDR] = { .len = ETH_ALEN }, [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, }; @@ -35,7 +36,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net_device *link[2]; - unsigned char multicast_spec; + unsigned char multicast_spec, hsr_version; if (!data) { netdev_info(dev, "HSR: No slave devices specified\n"); @@ -62,7 +63,12 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, else multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]); - return hsr_dev_finalize(dev, link, multicast_spec); + if (!data[IFLA_HSR_VERSION]) + hsr_version = 0; + else + hsr_version = nla_get_u8(data[IFLA_HSR_VERSION]); + + return hsr_dev_finalize(dev, link, multicast_spec, hsr_version); } static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) @@ -115,10 +121,9 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { /* attribute policy */ -/* NLA_BINARY missing in libnl; use NLA_UNSPEC in userspace instead. */ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { - [HSR_A_NODE_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, - [HSR_A_NODE_ADDR_B] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [HSR_A_NODE_ADDR] = { .len = ETH_ALEN }, + [HSR_A_NODE_ADDR_B] = { .len = ETH_ALEN }, [HSR_A_IFINDEX] = { .type = NLA_U32 }, [HSR_A_IF1_AGE] = { .type = NLA_U32 }, [HSR_A_IF2_AGE] = { .type = NLA_U32 }, diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 7d37366cc695..f5b60388d02f 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -22,6 +22,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct hsr_port *port; + u16 protocol; if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: skb invalid", __func__); @@ -37,7 +38,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) goto finish_consume; } - if (eth_hdr(skb)->h_proto != htons(ETH_P_PRP)) + protocol = eth_hdr(skb)->h_proto; + if (protocol != htons(ETH_P_PRP) && protocol != htons(ETH_P_HSR)) goto finish_pass; skb_push(skb, ETH_HLEN); diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h index b4e17a7c0df0..5ac778962e4e 100644 --- a/net/ieee802154/6lowpan/6lowpan_i.h +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -41,24 +41,12 @@ static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) return (((__force u64)a->extended_addr) >> 32) ^ (((__force u64)a->extended_addr) & 0xffffffff); case IEEE802154_ADDR_SHORT: - return (__force u32)(a->short_addr); + return (__force u32)(a->short_addr + (a->pan_id << 16)); default: return 0; } } -/* private device info */ -struct lowpan_dev_info { - struct net_device *wdev; /* wpan device ptr */ - u16 fragment_tag; -}; - -static inline struct -lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) -{ - return (struct lowpan_dev_info *)lowpan_priv(dev)->priv; -} - int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); void lowpan_net_frag_exit(void); int lowpan_net_frag_init(void); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 0023c9048812..dd085db8580e 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -148,7 +148,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, return -EBUSY; } - lowpan_dev_info(ldev)->wdev = wdev; + lowpan_802154_dev(ldev)->wdev = wdev; /* Set the lowpan hardware address to the wpan hardware address. */ memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN); /* We need headroom for possible wpan_dev_hard_header call and tailroom @@ -173,7 +173,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, static void lowpan_dellink(struct net_device *ldev, struct list_head *head) { - struct net_device *wdev = lowpan_dev_info(ldev)->wdev; + struct net_device *wdev = lowpan_802154_dev(ldev)->wdev; ASSERT_RTNL(); @@ -184,7 +184,7 @@ static void lowpan_dellink(struct net_device *ldev, struct list_head *head) static struct rtnl_link_ops lowpan_link_ops __read_mostly = { .kind = "lowpan", - .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev_info)), + .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_802154_dev)), .setup = lowpan_setup, .newlink = lowpan_newlink, .dellink = lowpan_dellink, diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index d4353faced35..e459afd16bb3 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -84,7 +84,7 @@ static struct sk_buff* lowpan_alloc_frag(struct sk_buff *skb, int size, const struct ieee802154_hdr *master_hdr, bool frag1) { - struct net_device *wdev = lowpan_dev_info(skb->dev)->wdev; + struct net_device *wdev = lowpan_802154_dev(skb->dev)->wdev; struct sk_buff *frag; int rc; @@ -148,8 +148,8 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *ldev, int frag_cap, frag_len, payload_cap, rc; int skb_unprocessed, skb_offset; - frag_tag = htons(lowpan_dev_info(ldev)->fragment_tag); - lowpan_dev_info(ldev)->fragment_tag++; + frag_tag = htons(lowpan_802154_dev(ldev)->fragment_tag); + lowpan_802154_dev(ldev)->fragment_tag++; frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); frag_hdr[1] = dgram_size & 0xff; @@ -208,7 +208,7 @@ err: static int lowpan_header(struct sk_buff *skb, struct net_device *ldev, u16 *dgram_size, u16 *dgram_offset) { - struct wpan_dev *wpan_dev = lowpan_dev_info(ldev)->wdev->ieee802154_ptr; + struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr; struct ieee802154_addr sa, da; struct ieee802154_mac_cb *cb = mac_cb_init(skb); struct lowpan_addr_info info; @@ -248,8 +248,8 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *ldev, cb->ackreq = wpan_dev->ackreq; } - return wpan_dev_hard_header(skb, lowpan_dev_info(ldev)->wdev, &da, &sa, - 0); + return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev, &da, + &sa, 0); } netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev) @@ -283,7 +283,7 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev) max_single = ieee802154_max_payload(&wpan_hdr); if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { - skb->dev = lowpan_dev_info(ldev)->wdev; + skb->dev = lowpan_802154_dev(ldev)->wdev; ldev->stats.tx_packets++; ldev->stats.tx_bytes += dgram_size; return dev_queue_xmit(skb); diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 3503c38954f9..d3cbb3258718 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -34,9 +34,11 @@ #include "ieee802154.h" -static int nla_put_hwaddr(struct sk_buff *msg, int type, __le64 hwaddr) +static int nla_put_hwaddr(struct sk_buff *msg, int type, __le64 hwaddr, + int padattr) { - return nla_put_u64(msg, type, swab64((__force u64)hwaddr)); + return nla_put_u64_64bit(msg, type, swab64((__force u64)hwaddr), + padattr); } static __le64 nla_get_hwaddr(const struct nlattr *nla) @@ -623,7 +625,8 @@ ieee802154_llsec_fill_key_id(struct sk_buff *msg, if (desc->device_addr.mode == IEEE802154_ADDR_LONG && nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, - desc->device_addr.extended_addr)) + desc->device_addr.extended_addr, + IEEE802154_ATTR_PAD)) return -EMSGSIZE; } @@ -638,7 +641,7 @@ ieee802154_llsec_fill_key_id(struct sk_buff *msg, if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED, - desc->extended_source)) + desc->extended_source, IEEE802154_ATTR_PAD)) return -EMSGSIZE; return 0; @@ -1063,7 +1066,8 @@ ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq, nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) || nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, desc->short_addr) || - nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr, + IEEE802154_ATTR_PAD) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, desc->frame_counter) || nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, @@ -1167,7 +1171,8 @@ ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq, if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || - nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr, + IEEE802154_ATTR_PAD) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, devkey->frame_counter) || ieee802154_llsec_fill_key_id(msg, &devkey->key_id)) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 16ef0d9f566e..ca207dbf673b 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -722,7 +722,8 @@ ieee802154_llsec_send_key_id(struct sk_buff *msg, break; case NL802154_DEV_ADDR_EXTENDED: if (nla_put_le64(msg, NL802154_DEV_ADDR_ATTR_EXTENDED, - desc->device_addr.extended_addr)) + desc->device_addr.extended_addr, + NL802154_DEV_ADDR_ATTR_PAD)) return -ENOBUFS; break; default: @@ -742,7 +743,8 @@ ieee802154_llsec_send_key_id(struct sk_buff *msg, break; case NL802154_KEY_ID_MODE_INDEX_EXTENDED: if (nla_put_le64(msg, NL802154_KEY_ID_ATTR_SOURCE_EXTENDED, - desc->extended_source)) + desc->extended_source, + NL802154_KEY_ID_ATTR_PAD)) return -ENOBUFS; break; default: @@ -811,7 +813,8 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, if (nla_put_u32(msg, NL802154_ATTR_WPAN_PHY, rdev->wpan_phy_idx) || nla_put_u32(msg, NL802154_ATTR_IFTYPE, wpan_dev->iftype) || - nla_put_u64(msg, NL802154_ATTR_WPAN_DEV, wpan_dev_id(wpan_dev)) || + nla_put_u64_64bit(msg, NL802154_ATTR_WPAN_DEV, + wpan_dev_id(wpan_dev), NL802154_ATTR_PAD) || nla_put_u32(msg, NL802154_ATTR_GENERATION, rdev->devlist_generation ^ (cfg802154_rdev_list_generation << 2))) @@ -819,7 +822,8 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, /* address settings */ if (nla_put_le64(msg, NL802154_ATTR_EXTENDED_ADDR, - wpan_dev->extended_addr) || + wpan_dev->extended_addr, + NL802154_ATTR_PAD) || nla_put_le16(msg, NL802154_ATTR_SHORT_ADDR, wpan_dev->short_addr) || nla_put_le16(msg, NL802154_ATTR_PAN_ID, wpan_dev->pan_id)) @@ -1074,6 +1078,11 @@ static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info) if (netif_running(dev)) return -EBUSY; + if (wpan_dev->lowpan_dev) { + if (netif_running(wpan_dev->lowpan_dev)) + return -EBUSY; + } + /* don't change address fields on monitor */ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR || !info->attrs[NL802154_ATTR_PAN_ID]) @@ -1105,6 +1114,11 @@ static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info) if (netif_running(dev)) return -EBUSY; + if (wpan_dev->lowpan_dev) { + if (netif_running(wpan_dev->lowpan_dev)) + return -EBUSY; + } + /* don't change address fields on monitor */ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR || !info->attrs[NL802154_ATTR_SHORT_ADDR]) @@ -1614,7 +1628,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid, nla_put_le16(msg, NL802154_DEV_ATTR_SHORT_ADDR, dev_desc->short_addr) || nla_put_le64(msg, NL802154_DEV_ATTR_EXTENDED_ADDR, - dev_desc->hwaddr) || + dev_desc->hwaddr, NL802154_DEV_ATTR_PAD) || nla_put_u8(msg, NL802154_DEV_ATTR_SECLEVEL_EXEMPT, dev_desc->seclevel_exempt) || nla_put_u32(msg, NL802154_DEV_ATTR_KEY_MODE, dev_desc->key_mode)) @@ -1778,7 +1792,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, goto nla_put_failure; if (nla_put_le64(msg, NL802154_DEVKEY_ATTR_EXTENDED_ADDR, - extended_addr) || + extended_addr, NL802154_DEVKEY_ATTR_PAD) || nla_put_u32(msg, NL802154_DEVKEY_ATTR_FRAME_COUNTER, devkey->frame_counter)) goto nla_put_failure; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8217cd22f921..2e6e65fc4d20 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1195,12 +1195,12 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); static struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { + bool udpfrag = false, fixedid = false, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; - bool udpfrag, encap; struct iphdr *iph; - int proto; + int proto, tot_len; int nhoff; int ihl; int id; @@ -1217,7 +1217,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TUNNEL_REMCSUM | + SKB_GSO_PARTIAL | 0))) goto out; @@ -1248,11 +1250,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); - if (skb->encapsulation && - skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) - udpfrag = proto == IPPROTO_UDP && encap; - else - udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; + if (!skb->encapsulation || encap) { + udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); + fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); + + /* fixed ID is invalid if DF bit is not set */ + if (fixedid && !(iph->frag_off & htons(IP_DF))) + goto out; + } ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) @@ -1265,15 +1270,25 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); if (udpfrag) { - iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next) iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; + tot_len = skb->len - nhoff; + } else if (skb_is_gso(skb)) { + if (!fixedid) { + iph->id = htons(id); + id += skb_shinfo(skb)->gso_segs; + } + tot_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)iph; } else { - iph->id = htons(id++); + if (!fixedid) + iph->id = htons(id++); + tot_len = skb->len - nhoff; } - iph->tot_len = htons(skb->len - nhoff); + iph->tot_len = htons(tot_len); ip_send_check(iph); if (encap) skb_reset_inner_headers(skb); @@ -1325,6 +1340,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, for (p = *head; p; p = p->next) { struct iphdr *iph2; + u16 flush_id; if (!NAPI_GRO_CB(p)->same_flow) continue; @@ -1348,16 +1364,36 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, (iph->tos ^ iph2->tos) | ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); - /* Save the IP ID check to be included later when we get to - * the transport layer so only the inner most IP ID is checked. - * This is because some GSO/TSO implementations do not - * correctly increment the IP ID for the outer hdrs. - */ - NAPI_GRO_CB(p)->flush_id = - ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; + + /* We need to store of the IP ID check to be included later + * when we can verify that this packet does in fact belong + * to a given flow. + */ + flush_id = (u16)(id - ntohs(iph2->id)); + + /* This bit of code makes it much easier for us to identify + * the cases where we are doing atomic vs non-atomic IP ID + * checks. Specifically an atomic check can return IP ID + * values 0 - 0xFFFF, while a non-atomic check can only + * return 0 or 0xFFFF. + */ + if (!NAPI_GRO_CB(p)->is_atomic || + !(iph->frag_off & htons(IP_DF))) { + flush_id ^= NAPI_GRO_CB(p)->count; + flush_id = flush_id ? 0xFFFF : 0; + } + + /* If the previous IP ID value was based on an atomic + * datagram we can overwrite the value and ignore it. + */ + if (NAPI_GRO_CB(skb)->is_atomic) + NAPI_GRO_CB(p)->flush_id = flush_id; + else + NAPI_GRO_CB(p)->flush_id |= flush_id; } + NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF)); NAPI_GRO_CB(skb)->flush |= flush; skb_set_network_header(skb, off); /* The above will be needed by the transport layer if there is one diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c34c7544d1db..89a8cac4726a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -436,7 +436,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) if (IS_ERR(rt)) return 1; if (rt->dst.dev != dev) { - NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); + __NET_INC_STATS(net, LINUX_MIB_ARPFILTER); flag = 1; } ip_rt_put(rt); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8a9246deccfe..63566ec54794 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -904,7 +904,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); if (!prim) { - pr_warn("%s: bug: prim == NULL\n", __func__); + /* if the device has been deleted, we don't perform + * address promotion + */ + if (!in_dev->dead) + pr_warn("%s: bug: prim == NULL\n", __func__); return; } if (iprim && iprim != prim) { diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index d039f8fff57f..7ac5ec87b004 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -802,11 +802,11 @@ int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; __be16 sport; + int err; - skb = iptunnel_handle_offloads(skb, type); - - if (IS_ERR(skb)) - return PTR_ERR(skb); + err = iptunnel_handle_offloads(skb, type); + if (err) + return err; sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), skb, 0, 0, false); @@ -826,6 +826,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, __be16 sport; void *data; bool need_priv = false; + int err; if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) && skb->ip_summed == CHECKSUM_PARTIAL) { @@ -836,10 +837,9 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, optlen += need_priv ? GUE_LEN_PRIV : 0; - skb = iptunnel_handle_offloads(skb, type); - - if (IS_ERR(skb)) - return PTR_ERR(skb); + err = iptunnel_handle_offloads(skb, type); + if (err) + return err; /* Get source port (based on flow hash) before skb_push */ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index d9c552a721fc..371674801e84 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -60,6 +60,70 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); +int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err, int *ret_hdr_len) +{ + const struct gre_base_hdr *greh; + __be32 *options; + int hdr_len; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; + + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + hdr_len = gre_calc_hlen(tpi->flags); + + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + tpi->proto = greh->protocol; + + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (skb_checksum_simple_validate(skb)) { + *csum_err = true; + return -EINVAL; + } + + skb_checksum_try_convert(skb, IPPROTO_GRE, 0, + null_compute_pseudo); + options++; + } + + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else { + tpi->key = 0; + } + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else { + tpi->seq = 0; + } + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + hdr_len += 4; + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + } + } + *ret_hdr_len = hdr_len; + return 0; +} +EXPORT_SYMBOL(gre_parse_header); + static int gre_rcv(struct sk_buff *skb) { const struct gre_protocol *proto; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6a5bd4317866..e88190a8699a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -32,10 +32,12 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | - SKB_GSO_SIT))) + SKB_GSO_SIT | + SKB_GSO_PARTIAL))) goto out; if (!skb->encapsulation) @@ -86,7 +88,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb = segs; do { struct gre_base_hdr *greh; - __be32 *pcsum; + __sum16 *pcsum; /* Set up inner headers if we are offloading inner checksum */ if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -106,10 +108,25 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, continue; greh = (struct gre_base_hdr *)skb_transport_header(skb); - pcsum = (__be32 *)(greh + 1); + pcsum = (__sum16 *)(greh + 1); + + if (skb_is_gso(skb)) { + unsigned int partial_adj; + + /* Adjust checksum to account for the fact that + * the partial checksum is based on actual size + * whereas headers should be based on MSS size. + */ + partial_adj = skb->len + skb_headroom(skb) - + SKB_GSO_CB(skb)->data_offset - + skb_shinfo(skb)->gso_size; + *pcsum = ~csum_fold((__force __wsum)htonl(partial_adj)); + } else { + *pcsum = 0; + } - *pcsum = 0; - *(__sum16 *)pcsum = gso_make_checksum(skb, 0); + *(pcsum + 1) = 0; + *pcsum = gso_make_checksum(skb, 0); } while ((skb = skb->next)); out: return segs; @@ -275,6 +292,18 @@ static const struct net_offload gre_offload = { static int __init gre_offload_init(void) { - return inet_add_offload(&gre_offload, IPPROTO_GRE); + int err; + + err = inet_add_offload(&gre_offload, IPPROTO_GRE); +#if IS_ENABLED(CONFIG_IPV6) + if (err) + return err; + + err = inet6_add_offload(&gre_offload, IPPROTO_GRE); + if (err) + inet_del_offload(&gre_offload, IPPROTO_GRE); +#endif + + return err; } device_initcall(gre_offload_init); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 6333489771ed..38abe70e595f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -363,7 +363,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, ipc, rt, MSG_DONTWAIT) < 0) { - ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS); + __ICMP_INC_STATS(sock_net(sk), ICMP_MIB_OUTERRORS); ip_flush_pending_frames(sk); } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { struct icmphdr *icmph = icmp_hdr(skb); @@ -744,7 +744,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) * avoid additional coding at protocol handlers. */ if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) { - ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); return; } @@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb) out: return true; out_err: - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return false; } @@ -877,7 +877,7 @@ out_err: static bool icmp_redirect(struct sk_buff *skb) { if (skb->len < sizeof(struct iphdr)) { - ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); return false; } @@ -956,7 +956,7 @@ static bool icmp_timestamp(struct sk_buff *skb) return true; out_err: - ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); return false; } @@ -996,7 +996,7 @@ int icmp_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); + __ICMP_INC_STATS(net, ICMP_MIB_INMSGS); if (skb_checksum_simple_validate(skb)) goto csum_error; @@ -1006,7 +1006,7 @@ int icmp_rcv(struct sk_buff *skb) icmph = icmp_hdr(skb); - ICMPMSGIN_INC_STATS_BH(net, icmph->type); + ICMPMSGIN_INC_STATS(net, icmph->type); /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * @@ -1052,9 +1052,9 @@ drop: kfree_skb(skb); return 0; csum_error: - ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS); error: - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); goto drop; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ab69da2d2a77..7ce112aa3a7b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -427,7 +427,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, route_err: ip_rt_put(rt); no_route: - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } EXPORT_SYMBOL_GPL(inet_csk_route_req); @@ -466,7 +466,7 @@ route_err: ip_rt_put(rt); no_route: rcu_read_unlock(); - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index bd591eb81ec9..25af1243649b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -66,7 +66,7 @@ static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) mutex_unlock(&inet_diag_table_mutex); } -static void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) +void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) { r->idiag_family = sk->sk_family; @@ -89,6 +89,7 @@ static void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) r->id.idiag_dst[0] = sk->sk_daddr; } } +EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); static size_t inet_sk_attr_size(void) { @@ -104,13 +105,50 @@ static size_t inet_sk_attr_size(void) + 64; } +int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct inet_diag_msg *r, int ext, + struct user_namespace *user_ns) +{ + const struct inet_sock *inet = inet_sk(sk); + + if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto errout; + + /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, + * hence this needs to be included regardless of socket family. + */ + if (ext & (1 << (INET_DIAG_TOS - 1))) + if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) + goto errout; + +#if IS_ENABLED(CONFIG_IPV6) + if (r->idiag_family == AF_INET6) { + if (ext & (1 << (INET_DIAG_TCLASS - 1))) + if (nla_put_u8(skb, INET_DIAG_TCLASS, + inet6_sk(sk)->tclass) < 0) + goto errout; + + if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && + nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) + goto errout; + } +#endif + + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); + r->idiag_inode = sock_i_ino(sk); + + return 0; +errout: + return 1; +} +EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); + int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { - const struct inet_sock *inet = inet_sk(sk); const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; int ext = req->idiag_ext; @@ -135,32 +173,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 0; r->idiag_retrans = 0; - if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) goto errout; - /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, - * hence this needs to be included regardless of socket family. - */ - if (ext & (1 << (INET_DIAG_TOS - 1))) - if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) - goto errout; - -#if IS_ENABLED(CONFIG_IPV6) - if (r->idiag_family == AF_INET6) { - if (ext & (1 << (INET_DIAG_TCLASS - 1))) - if (nla_put_u8(skb, INET_DIAG_TCLASS, - inet6_sk(sk)->tclass) < 0) - goto errout; - - if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && - nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) - goto errout; - } -#endif - - r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); - r->idiag_inode = sock_i_ino(sk); - if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), @@ -182,31 +197,32 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto out; } -#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) - if (icsk->icsk_pending == ICSK_TIME_RETRANS || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; - r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + r->idiag_expires = + jiffies_to_msecs(icsk->icsk_timeout - jiffies); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; - r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + r->idiag_expires = + jiffies_to_msecs(icsk->icsk_timeout - jiffies); } else if (timer_pending(&sk->sk_timer)) { r->idiag_timer = 2; r->idiag_retrans = icsk->icsk_probes_out; - r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); + r->idiag_expires = + jiffies_to_msecs(sk->sk_timer.expires - jiffies); } else { r->idiag_timer = 0; r->idiag_expires = 0; } -#undef EXPIRES_IN_MS if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { - attr = nla_reserve(skb, INET_DIAG_INFO, - handler->idiag_info_size); + attr = nla_reserve_64bit(skb, INET_DIAG_INFO, + handler->idiag_info_size, + INET_DIAG_PAD); if (!attr) goto errout; @@ -1063,7 +1079,9 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) } attr = handler->idiag_info_size - ? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size) + ? nla_reserve_64bit(skb, INET_DIAG_INFO, + handler->idiag_info_size, + INET_DIAG_PAD) : NULL; if (attr) info = nla_data(attr); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fcadb670f50b..3177211ab651 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -360,7 +360,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); + __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -479,7 +479,11 @@ int __inet_hash(struct sock *sk, struct sock *osk, if (err) goto unlock; } - hlist_add_head_rcu(&sk->sk_node, &ilb->head); + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) + hlist_add_tail_rcu(&sk->sk_node, &ilb->head); + else + hlist_add_head_rcu(&sk->sk_node, &ilb->head); sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c67f9bd7699c..99ee5c4a9b68 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -147,9 +147,9 @@ static void tw_timer_handler(unsigned long data) struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; if (tw->tw_kill) - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); + __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); else - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); + __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED); inet_twsk_kill(tw); } diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index af18f1e4889e..cbfb1808fcc4 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -65,8 +65,8 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s { struct ip_options *opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS); - IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len); + __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); + __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -157,7 +157,7 @@ sr_failed: too_many_hops: /* Tell the sender its packet died... */ - IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index efbd47d1a531..bbe7f72db9c1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -204,14 +204,14 @@ static void ip_expire(unsigned long arg) goto out; ipq_kill(qp); - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); if (!inet_frag_evicting(&qp->q)) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); + __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) goto out; @@ -291,7 +291,7 @@ static int ip_frag_too_far(struct ipq *qp) struct net *net; net = container_of(qp->q.net, struct net, ipv4.frags); - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); } return rc; @@ -635,7 +635,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, ip_send_check(iph); - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; qp->q.fragments_tail = NULL; return 0; @@ -647,7 +647,7 @@ out_nomem: out_oversize: net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr); out_fail: - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); return err; } @@ -658,7 +658,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) int vif = l3mdev_master_ifindex_rcu(dev); struct ipq *qp; - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); skb_orphan(skb); /* Lookup (or create) queue header */ @@ -675,7 +675,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) return ret; } - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -ENOMEM; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index af5d1f38217f..2480d79b0e37 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -122,125 +122,6 @@ static int ipgre_tunnel_init(struct net_device *dev); static int ipgre_net_id __read_mostly; static int gre_tap_net_id __read_mostly; -static int ip_gre_calc_hlen(__be16 o_flags) -{ - int addend = 4; - - if (o_flags & TUNNEL_CSUM) - addend += 4; - if (o_flags & TUNNEL_KEY) - addend += 4; - if (o_flags & TUNNEL_SEQ) - addend += 4; - return addend; -} - -static __be16 gre_flags_to_tnl_flags(__be16 flags) -{ - __be16 tflags = 0; - - if (flags & GRE_CSUM) - tflags |= TUNNEL_CSUM; - if (flags & GRE_ROUTING) - tflags |= TUNNEL_ROUTING; - if (flags & GRE_KEY) - tflags |= TUNNEL_KEY; - if (flags & GRE_SEQ) - tflags |= TUNNEL_SEQ; - if (flags & GRE_STRICT) - tflags |= TUNNEL_STRICT; - if (flags & GRE_REC) - tflags |= TUNNEL_REC; - if (flags & GRE_VERSION) - tflags |= TUNNEL_VERSION; - - return tflags; -} - -static __be16 tnl_flags_to_gre_flags(__be16 tflags) -{ - __be16 flags = 0; - - if (tflags & TUNNEL_CSUM) - flags |= GRE_CSUM; - if (tflags & TUNNEL_ROUTING) - flags |= GRE_ROUTING; - if (tflags & TUNNEL_KEY) - flags |= GRE_KEY; - if (tflags & TUNNEL_SEQ) - flags |= GRE_SEQ; - if (tflags & TUNNEL_STRICT) - flags |= GRE_STRICT; - if (tflags & TUNNEL_REC) - flags |= GRE_REC; - if (tflags & TUNNEL_VERSION) - flags |= GRE_VERSION; - - return flags; -} - -static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err) -{ - const struct gre_base_hdr *greh; - __be32 *options; - int hdr_len; - - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) - return -EINVAL; - - greh = (struct gre_base_hdr *)skb_transport_header(skb); - if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) - return -EINVAL; - - tpi->flags = gre_flags_to_tnl_flags(greh->flags); - hdr_len = ip_gre_calc_hlen(tpi->flags); - - if (!pskb_may_pull(skb, hdr_len)) - return -EINVAL; - - greh = (struct gre_base_hdr *)skb_transport_header(skb); - tpi->proto = greh->protocol; - - options = (__be32 *)(greh + 1); - if (greh->flags & GRE_CSUM) { - if (skb_checksum_simple_validate(skb)) { - *csum_err = true; - return -EINVAL; - } - - skb_checksum_try_convert(skb, IPPROTO_GRE, 0, - null_compute_pseudo); - options++; - } - - if (greh->flags & GRE_KEY) { - tpi->key = *options; - options++; - } else { - tpi->key = 0; - } - if (unlikely(greh->flags & GRE_SEQ)) { - tpi->seq = *options; - options++; - } else { - tpi->seq = 0; - } - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { - tpi->proto = htons(ETH_P_IP); - if ((*(u8 *)options & 0xF0) != 0x40) { - hdr_len += 4; - if (!pskb_may_pull(skb, hdr_len)) - return -EINVAL; - } - } - return iptunnel_pull_header(skb, hdr_len, tpi->proto, false); -} - static void ipgre_err(struct sk_buff *skb, u32 info, const struct tnl_ptk_info *tpi) { @@ -340,12 +221,16 @@ static void gre_err(struct sk_buff *skb, u32 info) const int code = icmp_hdr(skb)->code; struct tnl_ptk_info tpi; bool csum_err = false; + int hdr_len; - if (parse_gre_header(skb, &tpi, &csum_err)) { + if (gre_parse_header(skb, &tpi, &csum_err, &hdr_len)) { if (!csum_err) /* ignore csum errors. */ return; } + if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false)) + return; + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, skb->dev->ifindex, 0, IPPROTO_GRE, 0); @@ -419,6 +304,7 @@ static int gre_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; + int hdr_len; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { @@ -428,7 +314,10 @@ static int gre_rcv(struct sk_buff *skb) } #endif - if (parse_gre_header(skb, &tpi, &csum_err) < 0) + if (gre_parse_header(skb, &tpi, &csum_err, &hdr_len) < 0) + goto drop; + + if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false)) goto drop; if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) @@ -440,49 +329,6 @@ drop: return 0; } -static __sum16 gre_checksum(struct sk_buff *skb) -{ - __wsum csum; - - if (skb->ip_summed == CHECKSUM_PARTIAL) - csum = lco_csum(skb); - else - csum = skb_checksum(skb, 0, skb->len, 0); - return csum_fold(csum); -} - -static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags, - __be16 proto, __be32 key, __be32 seq) -{ - struct gre_base_hdr *greh; - - skb_push(skb, hdr_len); - - skb_reset_transport_header(skb); - greh = (struct gre_base_hdr *)skb->data; - greh->flags = tnl_flags_to_gre_flags(flags); - greh->protocol = proto; - - if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - if (flags & TUNNEL_SEQ) { - *ptr = seq; - ptr--; - } - if (flags & TUNNEL_KEY) { - *ptr = key; - ptr--; - } - if (flags & TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & - (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { - *ptr = 0; - *(__sum16 *)ptr = gre_checksum(skb); - } - } -} - static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, __be16 proto) @@ -493,15 +339,15 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->o_seqno++; /* Push GRE header. */ - build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, - proto, tunnel->parms.o_key, htonl(tunnel->o_seqno)); + gre_build_header(skb, tunnel->tun_hlen, + tunnel->parms.o_flags, proto, tunnel->parms.o_key, + htonl(tunnel->o_seqno)); skb_set_inner_protocol(skb, proto); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } -static struct sk_buff *gre_handle_offloads(struct sk_buff *skb, - bool csum) +static int gre_handle_offloads(struct sk_buff *skb, bool csum) { return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } @@ -553,7 +399,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) fl.saddr); } - tunnel_hlen = ip_gre_calc_hlen(key->tun_flags); + tunnel_hlen = gre_calc_hlen(key->tun_flags); min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + tunnel_hlen + sizeof(struct iphdr); @@ -568,15 +414,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) } /* Push Tunnel header. */ - skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)); - if (IS_ERR(skb)) { - skb = NULL; + if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) goto err_free_rt; - } flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); - build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB), - tunnel_id_to_key(tun_info->key.tun_id), 0); + gre_build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB), + tunnel_id_to_key(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -640,16 +483,14 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, tnl_params = &tunnel->parms.iph; } - skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); - if (IS_ERR(skb)) - goto out; + if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) + goto free_skb; __gre_xmit(skb, dev, tnl_params, skb->protocol); return NETDEV_TX_OK; free_skb: kfree_skb(skb); -out: dev->stats.tx_dropped++; return NETDEV_TX_OK; } @@ -664,9 +505,8 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); - if (IS_ERR(skb)) - goto out; + if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) + goto free_skb; if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; @@ -676,7 +516,6 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, free_skb: kfree_skb(skb); -out: dev->stats.tx_dropped++; return NETDEV_TX_OK; } @@ -702,8 +541,8 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, if (err) return err; - p.i_flags = tnl_flags_to_gre_flags(p.i_flags); - p.o_flags = tnl_flags_to_gre_flags(p.o_flags); + p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); + p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) return -EFAULT; @@ -747,7 +586,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph)); greh = (struct gre_base_hdr *)(iph+1); - greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags); + greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags); greh->protocol = htons(type); memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); @@ -848,7 +687,7 @@ static void __gre_tunnel_init(struct net_device *dev) int t_hlen; tunnel = netdev_priv(dev); - tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags); + tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); tunnel->parms.iph.protocol = IPPROTO_GRE; tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; @@ -1163,8 +1002,10 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || - nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) || - nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, + gre_tnl_flags_to_gre_flags(p->i_flags)) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, + gre_tnl_flags_to_gre_flags(p->o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index e3d782746d9d..751c0658e194 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -218,17 +218,17 @@ static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_b protocol = -ret; goto resubmit; } - IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); + __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS); + __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } kfree_skb(skb); } else { - IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); + __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); consume_skb(skb); } } @@ -273,7 +273,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb) --ANK (980813) */ if (skb_cow(skb, skb_headroom(skb))) { - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); + __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto drop; } @@ -282,7 +282,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb) opt->optlen = iph->ihl*4 - sizeof(struct iphdr); if (ip_options_compile(dev_net(dev), opt, skb)) { - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); + __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS); goto drop; } @@ -337,7 +337,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) - NET_INC_STATS_BH(net, LINUX_MIB_IPRPFILTER); + __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); goto drop; } } @@ -358,9 +358,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { - IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len); + __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) { - IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len); + __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len); } else if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { struct in_device *in_dev = __in_dev_get_rcu(skb->dev); @@ -409,11 +409,11 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, net = dev_net(dev); - IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len); + __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); + __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); goto out; } @@ -439,9 +439,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); - IP_ADD_STATS_BH(net, - IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), - max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); + __IP_ADD_STATS(net, + IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), + max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); if (!pskb_may_pull(skb, iph->ihl*4)) goto inhdr_error; @@ -453,7 +453,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, len = ntohs(iph->tot_len); if (skb->len < len) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS); + __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; @@ -463,7 +463,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, * Note this now means skb->len holds ntohs(iph->tot_len). */ if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); + __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); goto drop; } @@ -480,9 +480,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, ip_rcv_finish); csum_error: - IP_INC_STATS_BH(net, IPSTATS_MIB_CSUMERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); inhdr_error: - IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: kfree_skb(skb); out: diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 279471c4e58f..bdb222c0c6a2 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -510,9 +510,10 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) copied = len; } err = skb_copy_datagram_msg(skb, 0, msg, copied); - if (err) - goto out_free_skb; - + if (unlikely(err)) { + kfree_skb(skb); + return err; + } sock_recv_timestamp(msg, sk, skb); serr = SKB_EXT_ERR(skb); @@ -544,8 +545,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_ERRQUEUE; err = copied; -out_free_skb: - kfree_skb(skb); + consume_skb(skb); out: return err; } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 43445df61efd..9118b0e640ba 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -146,8 +146,8 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, } EXPORT_SYMBOL_GPL(iptunnel_metadata_reply); -struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, - int gso_type_mask) +int iptunnel_handle_offloads(struct sk_buff *skb, + int gso_type_mask) { int err; @@ -157,11 +157,11 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, } if (skb_is_gso(skb)) { - err = skb_unclone(skb, GFP_ATOMIC); + err = skb_header_unclone(skb, GFP_ATOMIC); if (unlikely(err)) - goto error; + return err; skb_shinfo(skb)->gso_type |= gso_type_mask; - return skb; + return 0; } if (skb->ip_summed != CHECKSUM_PARTIAL) { @@ -174,10 +174,7 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, skb->encapsulation = 0; } - return skb; -error: - kfree_skb(skb); - return ERR_PTR(err); + return 0; } EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); @@ -274,7 +271,8 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); - if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) || + if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id, + LWTUNNEL_IP_PAD) || nla_put_in_addr(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || @@ -287,7 +285,7 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate) { - return nla_total_size(8) /* LWTUNNEL_IP_ID */ + return nla_total_size_64bit(8) /* LWTUNNEL_IP_ID */ + nla_total_size(4) /* LWTUNNEL_IP_DST */ + nla_total_size(4) /* LWTUNNEL_IP_SRC */ + nla_total_size(1) /* LWTUNNEL_IP_TOS */ @@ -369,7 +367,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); - if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) || + if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id, + LWTUNNEL_IP6_PAD) || nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) || nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) || @@ -382,7 +381,7 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate) { - return nla_total_size(8) /* LWTUNNEL_IP6_ID */ + return nla_total_size_64bit(8) /* LWTUNNEL_IP6_ID */ + nla_total_size(16) /* LWTUNNEL_IP6_DST */ + nla_total_size(16) /* LWTUNNEL_IP6_SRC */ + nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ec51d02166de..92827483ee3d 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -219,9 +219,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->protocol != htons(ETH_P_IP))) goto tx_error; - skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP); - if (IS_ERR(skb)) - goto out; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); @@ -230,7 +229,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) tx_error: kfree_skb(skb); -out: + dev->stats.tx_errors++; return NETDEV_TX_OK; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 395e2814a46d..21a38e296fe2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2104,7 +2104,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; - if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0) + if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0) return -EMSGSIZE; rtm->rtm_type = RTN_MULTICAST; @@ -2237,7 +2237,7 @@ static size_t mroute_msgsize(bool unresolved, int maxvif) + nla_total_size(0) /* RTA_MULTIPATH */ + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) /* RTA_MFC_STATS */ - + nla_total_size(sizeof(struct rta_mfc_stats)) + + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) ; return len; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 4133b0f513af..60f5161abcb4 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -367,6 +367,18 @@ static inline bool unconditional(const struct arpt_entry *e) memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } +static bool find_jump_target(const struct xt_table_info *t, + const struct arpt_entry *target) +{ + struct arpt_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. */ @@ -439,6 +451,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct arpt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -447,20 +461,18 @@ static int mark_source_chains(const struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { - if (newpos > newinfo->size - - sizeof(struct arpt_entry)) { - duprintf("mark_source_chains: " - "bad verdict (%i)\n", - newpos); - return 0; - } - /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct arpt_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct arpt_entry *) (entry0 + newpos); @@ -474,23 +486,6 @@ next: return 1; } -static inline int check_entry(const struct arpt_entry *e) -{ - const struct xt_entry_target *t; - - if (!arp_checkentry(&e->arp)) - return -EINVAL; - - if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) - return -EINVAL; - - t = arpt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - static inline int check_target(struct arpt_entry *e, const char *name) { struct xt_entry_target *t = arpt_get_target(e); @@ -586,7 +581,11 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return -EINVAL; } - err = check_entry(e); + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -691,10 +690,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, } } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { - duprintf("Looping hook\n"); + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) return -ELOOP; - } /* Finally, each sanity check must pass */ i = 0; @@ -1126,55 +1123,17 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct arpt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, NFPROTO_ARP, name); + t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1182,7 +1141,7 @@ static int do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1209,6 +1168,18 @@ static int do_add_counters(struct net *net, const void __user *user, } #ifdef CONFIG_COMPAT +struct compat_arpt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_ARP_NUMHOOKS]; + u32 underflow[NF_ARP_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; + struct compat_arpt_entry entries[0]; +}; + static inline void compat_release_entry(struct compat_arpt_entry *e) { struct xt_entry_target *t; @@ -1217,20 +1188,17 @@ static inline void compat_release_entry(struct compat_arpt_entry *e) module_put(t->u.kernel.target->me); } -static inline int +static int check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || @@ -1247,8 +1215,11 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct arpt_entry *)e); + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (ret) return ret; @@ -1272,17 +1243,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (ret) goto release_target; - /* Check hooks & underflows */ - for (h = 0; h < NF_ARP_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; release_target: @@ -1291,18 +1251,17 @@ out: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct xt_target *target; struct arpt_entry *de; unsigned int origsize; - int ret, h; + int h; - ret = 0; origsize = *size; de = (struct arpt_entry *)*dstptr; memcpy(de, e, sizeof(struct arpt_entry)); @@ -1323,148 +1282,82 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } -static int translate_compat_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info **pinfo, +static int translate_compat_table(struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_arpt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_arpt_entry *iter0; - struct arpt_entry *iter1; + struct arpt_replace repl; unsigned int size; int ret = 0; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; - - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } + size = compatr->size; + info->number = compatr->num_entries; duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(NFPROTO_ARP); - xt_compat_init_offsets(NFPROTO_ARP, number); + xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone */ + xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - iter1->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(iter1->counters.pcnt)) { - ret = -ENOMEM; - break; - } - - ret = check_target(iter1, name); - if (ret != 0) { - xt_percpu_counter_free(iter1->counters.pcnt); - break; - } - ++i; - if (strcmp(arpt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1472,31 +1365,18 @@ static int translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(NFPROTO_ARP); - xt_compat_unlock(NFPROTO_ARP); - goto out; } -struct compat_arpt_replace { - char name[XT_TABLE_MAXNAMELEN]; - u32 valid_hooks; - u32 num_entries; - u32 size; - u32 hook_entry[NF_ARP_NUMHOOKS]; - u32 underflow[NF_ARP_NUMHOOKS]; - u32 num_counters; - compat_uptr_t counters; - struct compat_arpt_entry entries[0]; -}; - static int compat_do_replace(struct net *net, void __user *user, unsigned int len) { @@ -1510,8 +1390,6 @@ static int compat_do_replace(struct net *net, void __user *user, return -EFAULT; /* overflow check */ - if (tmp.size >= INT_MAX / num_possible_cpus()) - return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) @@ -1529,10 +1407,7 @@ static int compat_do_replace(struct net *net, void __user *user, goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index dd8c80dc32a2..8f8713b4388f 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -81,6 +81,12 @@ static int __init arptable_filter_init(void) return ret; } + ret = arptable_filter_table_init(&init_net); + if (ret) { + unregister_pernet_subsys(&arptable_filter_net_ops); + kfree(arpfilter_ops); + } + return ret; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 631c100a1338..735d1ee8c1ab 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -443,6 +443,18 @@ ipt_do_table(struct sk_buff *skb, #endif } +static bool find_jump_target(const struct xt_table_info *t, + const struct ipt_entry *target) +{ + struct ipt_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int @@ -520,6 +532,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ipt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -528,19 +542,18 @@ mark_source_chains(const struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { - if (newpos > newinfo->size - - sizeof(struct ipt_entry)) { - duprintf("mark_source_chains: " - "bad verdict (%i)\n", - newpos); - return 0; - } /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct ipt_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ipt_entry *) (entry0 + newpos); @@ -568,25 +581,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ipt_entry *e) -{ - const struct xt_entry_target *t; - - if (!ip_checkentry(&e->ip)) - return -EINVAL; - - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ipt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - -static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ipt_ip *ip = par->entryinfo; @@ -750,7 +744,11 @@ check_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } - err = check_entry(e); + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1309,55 +1307,17 @@ do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ipt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, AF_INET, name); + t = xt_find_table_lock(net, AF_INET, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1365,7 +1325,7 @@ do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1444,7 +1404,6 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ipt_ip *ip, int *size) { @@ -1479,17 +1438,14 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || @@ -1506,8 +1462,11 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ipt_entry *)e); + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, + e->target_offset, e->next_offset); if (ret) return ret; @@ -1515,7 +1474,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, &e->ip, &off); + ret = compat_find_calc_match(ematch, &e->ip, &off); if (ret != 0) goto release_matches; ++j; @@ -1538,17 +1497,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1562,19 +1510,18 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct xt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); @@ -1583,201 +1530,105 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, *dstptr += sizeof(struct ipt_entry); *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); target = t->u.kernel.target; xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; -} - -static int -compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) -{ - struct xt_entry_match *ematch; - struct xt_mtchk_param mtpar; - unsigned int j; - int ret = 0; - - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) - return -ENOMEM; - - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ip; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV4; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - - xt_percpu_counter_free(e->counters.pcnt); - - return ret; } static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ipt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ipt_entry *iter0; - struct ipt_entry *iter1; + struct ipt_replace repl; unsigned int size; int ret; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; - - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } + size = compatr->size; + info->number = compatr->num_entries; duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET); - xt_compat_init_offsets(AF_INET, number); + xt_compat_init_offsets(AF_INET, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone. + * entry1/newinfo contains a 64bit ruleset that looks exactly as + * generated by 64bit userspace. + * + * Call standard translate_table() to validate all hook_entrys, + * underflows, check for loops, etc. + */ xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); - if (ret != 0) - break; - ++i; - if (strcmp(ipt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1785,17 +1636,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET); - xt_compat_unlock(AF_INET); - goto out; } static int @@ -1811,8 +1661,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -EFAULT; /* overflow check */ - if (tmp.size >= INT_MAX / num_possible_cpus()) - return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) @@ -1831,10 +1679,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 02c62299d717..8c8c655bb2c4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -915,11 +915,11 @@ static int ip_error(struct sk_buff *skb) if (!IN_DEV_FORWARD(in_dev)) { switch (rt->dst.error) { case EHOSTUNREACH: - IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS); break; case ENETUNREACH: - IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; } goto out; @@ -934,7 +934,7 @@ static int ip_error(struct sk_buff *skb) break; case ENETUNREACH: code = ICMP_NET_UNREACH; - IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; case EACCES: code = ICMP_PKT_FILTERED; @@ -1438,9 +1438,9 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, #endif } -static struct rtable *rt_dst_alloc(struct net_device *dev, - unsigned int flags, u16 type, - bool nopolicy, bool noxfrm, bool will_cache) +struct rtable *rt_dst_alloc(struct net_device *dev, + unsigned int flags, u16 type, + bool nopolicy, bool noxfrm, bool will_cache) { struct rtable *rt; @@ -1468,6 +1468,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, return rt; } +EXPORT_SYMBOL(rt_dst_alloc); /* called in rcu_read_lock() section */ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, @@ -2045,6 +2046,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res, */ if (fi && res->prefixlen < 4) fi = NULL; + } else if ((type == RTN_LOCAL) && (orig_oif != 0) && + (orig_oif != dev_out->ifindex)) { + /* For local routes that require a particular output interface + * we do not want to cache the result. Caching the result + * causes incorrect behaviour when there are multiple source + * addresses on the interface, the end result being that if the + * intended recipient is waiting on that interface for the + * packet he won't receive it because it will be delivered on + * the loopback interface and the IP_PKTINFO ipi_ifindex will + * be set to the loopback interface as well. + */ + fi = NULL; } fnhe = NULL; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4c04f09338e3..e3c4043c27de 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -312,11 +312,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) mss = __cookie_v4_check(ip_hdr(skb), th, cookie); if (mss == 0) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4d73858991af..b945c2b046c5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -430,14 +430,15 @@ EXPORT_SYMBOL(tcp_init_sock); static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) { - if (sk->sk_tsflags || tsflags) { + if (tsflags) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags); - if (shinfo->tx_flags & SKBTX_ANY_TSTAMP) + if (tsflags & SOF_TIMESTAMPING_TX_ACK) + tcb->txstamp_ack = 1; + if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; - tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP); } } @@ -908,7 +909,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, int copy, i; bool can_coalesce; - if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { + if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 || + !tcp_skb_can_collapse_to(skb)) { new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; @@ -1134,11 +1136,12 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) /* This should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - mss_now = tcp_send_mss(sk, &size_goal, flags); - /* Ok commence sending. */ copied = 0; +restart: + mss_now = tcp_send_mss(sk, &size_goal, flags); + err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto out_err; @@ -1156,7 +1159,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) copy = max - skb->len; } - if (copy <= 0) { + if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. @@ -1164,6 +1167,9 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; + if (sk_flush_backlog(sk)) + goto restart; + skb = sk_stream_alloc_skb(sk, select_size(sk, sg), sk->sk_allocation, @@ -1250,6 +1256,8 @@ new_segment: copied += copy; if (!msg_data_left(msg)) { tcp_tx_timestamp(sk, sockc.tsflags, skb); + if (unlikely(flags & MSG_EOR)) + TCP_SKB_CB(skb)->eor = 1; goto out; } @@ -1443,14 +1451,10 @@ static void tcp_prequeue_process(struct sock *sk) struct sk_buff *skb; struct tcp_sock *tp = tcp_sk(sk); - NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED); - /* RX process wants to run with disabled BHs, though it is not - * necessary */ - local_bh_disable(); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) sk_backlog_rcv(sk, skb); - local_bh_enable(); /* Clear memory counter. */ tp->ucopy.memory = 0; @@ -1777,7 +1781,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, chunk = len - tp->ucopy.len; if (chunk != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); len -= chunk; copied += chunk; } @@ -1789,7 +1793,7 @@ do_prequeue: chunk = len - tp->ucopy.len; if (chunk != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } @@ -1875,7 +1879,7 @@ skip_copy: tcp_prequeue_process(sk); if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } @@ -2065,13 +2069,13 @@ void tcp_close(struct sock *sk, long timeout) sk->sk_prot->disconnect(sk, 0); } else if (data_was_unread) { /* Unread data was tossed, zap the connection. */ - NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, sk->sk_allocation); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); - NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); } else if (tcp_close_state(sk)) { /* We FIN if the application ate all the data before * zapping the connection. @@ -2148,7 +2152,7 @@ adjudge_to_death: if (tp->linger2 < 0) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); - NET_INC_STATS_BH(sock_net(sk), + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONLINGER); } else { const int tmo = tcp_fin_time(sk); @@ -2167,7 +2171,7 @@ adjudge_to_death: if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); - NET_INC_STATS_BH(sock_net(sk), + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } } @@ -3091,7 +3095,7 @@ void tcp_done(struct sock *sk) struct request_sock *req = tcp_sk(sk)->fastopen_rsk; if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 167b6a3e1b98..ccce8a55f1e1 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -155,11 +155,11 @@ static void tcp_cdg_hystart_update(struct sock *sk) ca->last_ack = now_us; if (after(now_us, ca->round_start + base_owd)) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINDETECT); - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINCWND, - tp->snd_cwnd); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINDETECT); + NET_ADD_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINCWND, + tp->snd_cwnd); tp->snd_ssthresh = tp->snd_cwnd; return; } @@ -174,11 +174,11 @@ static void tcp_cdg_hystart_update(struct sock *sk) 125U); if (ca->rtt.min > thresh) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTDELAYDETECT); - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTDELAYCWND, - tp->snd_cwnd); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYDETECT); + NET_ADD_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYCWND, + tp->snd_cwnd); tp->snd_ssthresh = tp->snd_cwnd; } } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 448c2615fece..0ce946e395e1 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -402,11 +402,11 @@ static void hystart_update(struct sock *sk, u32 delay) ca->last_ack = now; if ((s32)(now - ca->round_start) > ca->delay_min >> 4) { ca->found |= HYSTART_ACK_TRAIN; - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINDETECT); - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINCWND, - tp->snd_cwnd); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINDETECT); + NET_ADD_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINCWND, + tp->snd_cwnd); tp->snd_ssthresh = tp->snd_cwnd; } } @@ -423,11 +423,11 @@ static void hystart_update(struct sock *sk, u32 delay) if (ca->curr_rtt > ca->delay_min + HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { ca->found |= HYSTART_DELAY; - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTDELAYDETECT); - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTDELAYCWND, - tp->snd_cwnd); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYDETECT); + NET_ADD_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYCWND, + tp->snd_cwnd); tp->snd_ssthresh = tp->snd_cwnd; } } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index cffd8f9ed1a9..54d9f9b0120f 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -255,9 +255,9 @@ static bool tcp_fastopen_queue_check(struct sock *sk) spin_lock(&fastopenq->lock); req1 = fastopenq->rskq_rst_head; if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { + __NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); spin_unlock(&fastopenq->lock); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); return false; } fastopenq->rskq_rst_head = req1->dl_next; @@ -282,7 +282,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct sock *child; if (foc->len == 0) /* Client requests a cookie */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && (syn_data || foc->len >= 0) && @@ -311,13 +311,13 @@ fastopen: child = tcp_fastopen_create_child(sk, skb, dst, req); if (child) { foc->len = -1; - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVE); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVE); return child; } - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); } else if (foc->len > 0) /* Client presents an invalid cookie */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); valid_foc.exp = foc->exp; *foc = valid_foc; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 983f04c11177..6171f92be090 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -869,7 +869,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, else mib_idx = LINUX_MIB_TCPSACKREORDER; - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -1062,7 +1062,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { dup_sack = true; tcp_dsack_seen(tp); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1) { u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq); u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq); @@ -1071,7 +1071,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, !before(start_seq_0, start_seq_1)) { dup_sack = true; tcp_dsack_seen(tp); - NET_INC_STATS_BH(sock_net(sk), + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV); } } @@ -1289,7 +1289,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, if (skb->len > 0) { BUG_ON(!tcp_skb_pcount(skb)); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED); return false; } @@ -1303,16 +1303,18 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, } TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; + TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) TCP_SKB_CB(prev)->end_seq++; if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); + tcp_skb_collapse_tstamp(prev, skb); tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED); return true; } @@ -1367,6 +1369,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) goto fallback; + if (!tcp_skb_can_collapse_to(prev)) + goto fallback; + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); @@ -1468,7 +1473,7 @@ noop: return skb; fallback: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); + NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); return NULL; } @@ -1656,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, mib_idx = LINUX_MIB_TCPSACKDISCARD; } - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); if (i == 0) first_sack_index = -1; continue; @@ -1908,7 +1913,7 @@ void tcp_enter_loss(struct sock *sk) skb = tcp_write_queue_head(sk); is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); if (is_reneg) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; tp->fackets_out = 0; } @@ -2394,7 +2399,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) else mib_idx = LINUX_MIB_TCPFULLUNDO; - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq @@ -2416,7 +2421,7 @@ static bool tcp_try_undo_dsack(struct sock *sk) if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); tcp_undo_cwnd_reduction(sk, false); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); return true; } return false; @@ -2431,10 +2436,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) tcp_undo_cwnd_reduction(sk, true); DBGUNDO(sk, "partial loss"); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); if (frto_undo) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPSPURIOUSRTOS); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; if (frto_undo || tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); @@ -2558,7 +2563,7 @@ static void tcp_mtup_probe_failed(struct sock *sk) icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1; icsk->icsk_mtup.probe_size = 0; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPFAIL); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL); } static void tcp_mtup_probe_success(struct sock *sk) @@ -2578,7 +2583,7 @@ static void tcp_mtup_probe_success(struct sock *sk) icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; icsk->icsk_mtup.probe_size = 0; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS); } /* Do a simple retransmit without using the backoff mechanisms in @@ -2642,7 +2647,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) else mib_idx = LINUX_MIB_TCPSACKRECOVERY; - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); tp->prior_ssthresh = 0; tcp_init_undo(tp); @@ -2735,7 +2740,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked) DBGUNDO(sk, "partial recovery"); tcp_undo_cwnd_reduction(sk, true); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); tcp_try_keep_open(sk); return true; } @@ -3086,8 +3091,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, return; shinfo = skb_shinfo(skb); - if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) && - between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1)) + if (!before(shinfo->tskey, prior_snd_una) && + before(shinfo->tskey, tcp_sk(sk)->snd_una)) __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); } @@ -3429,7 +3434,7 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { - NET_INC_STATS_BH(net, mib_idx); + NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } } @@ -3462,7 +3467,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) challenge_count = 0; } if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); tcp_send_ack(sk); } } @@ -3511,8 +3516,8 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); tcp_try_keep_open(sk); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPLOSSPROBERECOVERY); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); } else if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP | FLAG_DATA_SACKED))) { /* Pure dupack: original and TLP probe arrived; no loss */ @@ -3616,14 +3621,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS); } else { u32 ack_ev_flags = CA_ACK_SLOWPATH; if (ack_seq != TCP_SKB_CB(skb)->end_seq) flag |= FLAG_DATA; else - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS); flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); @@ -4126,7 +4131,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) else mib_idx = LINUX_MIB_TCPDSACKOFOSENT; - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); tp->rx_opt.dsack = 1; tp->duplicate_sack[0].start_seq = seq; @@ -4150,7 +4155,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); + NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); if (tcp_is_sack(tp) && sysctl_tcp_dsack) { @@ -4300,7 +4305,7 @@ static bool tcp_try_coalesce(struct sock *sk, atomic_add(delta, &sk->sk_rmem_alloc); sk_mem_charge(sk, delta); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; @@ -4388,7 +4393,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tcp_ecn_check_ce(tp, skb); if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); tcp_drop(sk, skb); return; } @@ -4397,7 +4402,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tp->pred_flags = 0; inet_csk_schedule_ack(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); @@ -4452,7 +4457,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Drop. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); tcp_drop(sk, skb); skb = NULL; tcp_dsack_set(sk, seq, end_seq); @@ -4491,7 +4496,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) __skb_unlink(skb1, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); tcp_drop(sk, skb1); } @@ -4606,14 +4611,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) __set_current_state(TASK_RUNNING); - local_bh_enable(); if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) { tp->ucopy.len -= chunk; tp->copied_seq += chunk; eaten = (chunk == skb->len); tcp_rcv_space_adjust(sk); } - local_bh_disable(); } if (eaten <= 0) { @@ -4656,7 +4659,7 @@ queue_and_out: if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { /* A retransmit, 2nd most common case. Force an immediate ack. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); + NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); out_of_window: @@ -4702,7 +4705,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, __skb_unlink(skb, list); __kfree_skb(skb); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); return next; } @@ -4861,7 +4864,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk) bool res = false; if (!skb_queue_empty(&tp->out_of_order_queue)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); __skb_queue_purge(&tp->out_of_order_queue); /* Reset SACK state. A conforming SACK implementation will @@ -4890,7 +4893,7 @@ static int tcp_prune_queue(struct sock *sk) SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED); if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); @@ -4920,7 +4923,7 @@ static int tcp_prune_queue(struct sock *sk) * drop receive data on the floor. It will get retransmitted * and hopefully then we'll have sufficient space. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED); /* Massive buffer overcommit. */ tp->pred_flags = 0; @@ -5129,7 +5132,6 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) int chunk = skb->len - hlen; int err; - local_bh_enable(); if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk); else @@ -5141,32 +5143,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) tcp_rcv_space_adjust(sk); } - local_bh_disable(); return err; } -static __sum16 __tcp_checksum_complete_user(struct sock *sk, - struct sk_buff *skb) -{ - __sum16 result; - - if (sock_owned_by_user(sk)) { - local_bh_enable(); - result = __tcp_checksum_complete(skb); - local_bh_disable(); - } else { - result = __tcp_checksum_complete(skb); - } - return result; -} - -static inline bool tcp_checksum_complete_user(struct sock *sk, - struct sk_buff *skb) -{ - return !skb_csum_unnecessary(skb) && - __tcp_checksum_complete_user(sk, skb); -} - /* Does PAWS and seqno based validation of an incoming segment, flags will * play significant role here. */ @@ -5179,7 +5158,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); if (!tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDPAWS, &tp->last_oow_ack_time)) @@ -5231,8 +5210,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (th->syn) { syn_challenge: if (syn_inerr) - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); tcp_send_challenge_ack(sk, skb); goto discard; } @@ -5347,7 +5326,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_data_snd_check(sk); return; } else { /* Header too small */ - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; } } else { @@ -5375,12 +5354,13 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, __skb_pull(skb, tcp_header_len); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHPHITSTOUSER); eaten = 1; } } if (!eaten) { - if (tcp_checksum_complete_user(sk, skb)) + if (tcp_checksum_complete(skb)) goto csum_error; if ((int)skb->truesize > sk->sk_forward_alloc) @@ -5397,7 +5377,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_rcv_rtt_measure_ts(sk, skb); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ eaten = tcp_queue_rcv(sk, skb, tcp_header_len, @@ -5424,7 +5404,7 @@ no_ack: } slow_path: - if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) + if (len < (th->doff << 2) || tcp_checksum_complete(skb)) goto csum_error; if (!th->ack && !th->rst && !th->syn) @@ -5454,8 +5434,8 @@ step5: return; csum_error: - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); discard: tcp_drop(sk, skb); @@ -5543,16 +5523,18 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, if (data) { /* Retransmit unacked data in SYN */ tcp_for_write_queue_from(data, sk) { if (data == tcp_send_head(sk) || - __tcp_retransmit_skb(sk, data)) + __tcp_retransmit_skb(sk, data, 1)) break; } tcp_rearm_rto(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); return true; } tp->syn_data_acked = tp->syn_data; if (tp->syn_data_acked) - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVE); tcp_fastopen_add_skb(sk, synack); @@ -5587,7 +5569,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_PAWSACTIVEREJECTED); goto reset_and_undo; } @@ -5796,8 +5779,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) int queued = 0; bool acceptable; - tp->rx_opt.saw_tstamp = 0; - switch (sk->sk_state) { case TCP_CLOSE: goto discard; @@ -5815,29 +5796,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) return 1; - /* Now we have several options: In theory there is - * nothing else in the frame. KA9Q has an option to - * send data with the syn, BSD accepts data with the - * syn up to the [to be] advertised window and - * Solaris 2.1 gives you a protocol error. For now - * we just ignore it, that fits the spec precisely - * and avoids incompatibilities. It would be nice in - * future to drop through and process the data. - * - * Now that TTCP is starting to be used we ought to - * queue this data. - * But, this leaves one open to an easy denial of - * service attack, and SYN cookies can't defend - * against this problem. So, we drop the data - * in the interest of security over speed unless - * it's still in use. - */ - kfree_skb(skb); + consume_skb(skb); return 0; } goto discard; case TCP_SYN_SENT: + tp->rx_opt.saw_tstamp = 0; queued = tcp_rcv_synsent_state_process(sk, skb, th); if (queued >= 0) return queued; @@ -5849,6 +5814,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 0; } + tp->rx_opt.saw_tstamp = 0; req = tp->fastopen_rsk; if (req) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && @@ -5973,7 +5939,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); return 1; } @@ -6030,7 +5996,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (sk->sk_shutdown & RCV_SHUTDOWN) { if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); tcp_reset(sk); return 1; } @@ -6168,10 +6134,10 @@ static bool tcp_syn_flood_action(const struct sock *sk, if (net->ipv4.sysctl_tcp_syncookies) { msg = "Sending cookies"; want_cookie = true; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); } else #endif - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); if (!queue->synflood_warned && net->ipv4.sysctl_tcp_syncookies != 2 && @@ -6232,7 +6198,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, * timeout. */ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop; } @@ -6279,7 +6245,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (dst && strict && !tcp_peer_is_proven(req, dst, true, tmp_opt.saw_tstamp)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } } @@ -6327,7 +6293,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } if (fastopen_sk) { af_ops->send_synack(fastopen_sk, dst, &fl, req, - &foc, false); + &foc, TCP_SYNACK_FASTOPEN); /* Add the child socket directly into the accept queue */ inet_csk_reqsk_queue_add(sk, req, fastopen_sk); sk->sk_data_ready(sk); @@ -6337,8 +6303,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->tfo_listener = false; if (!want_cookie) inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - af_ops->send_synack(sk, dst, &fl, req, - &foc, !want_cookie); + af_ops->send_synack(sk, dst, &fl, req, &foc, + !want_cookie ? TCP_SYNACK_NORMAL : + TCP_SYNACK_COOKIE); if (want_cookie) { reqsk_free(req); return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f4f2a0a3849d..761bc492c5e3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -320,7 +320,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) * an established socket here. */ if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); } else if (abort) { /* * Still in SYN_RECV, just remove it silently. @@ -372,7 +372,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) th->dest, iph->saddr, ntohs(th->source), inet_iif(icmp_skb)); if (!sk) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; } if (sk->sk_state == TCP_TIME_WAIT) { @@ -396,13 +396,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) */ if (sock_owned_by_user(sk)) { if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); } if (sk->sk_state == TCP_CLOSE) goto out; if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { - NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto out; } @@ -413,7 +413,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -692,13 +692,15 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) offsetof(struct inet_timewait_sock, tw_bound_dev_if)); arg.tos = ip_hdr(skb)->tos; + preempt_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); - TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); - TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); + __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); + __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); + preempt_enable(); #ifdef CONFIG_TCP_MD5SIG out: @@ -774,12 +776,14 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + preempt_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); - TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); + preempt_enable(); } static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -830,7 +834,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req) + enum tcp_synack_type synack_type) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -841,7 +845,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, foc, attach_req); + skb = tcp_make_synack(sk, dst, req, foc, synack_type); if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); @@ -1151,12 +1155,12 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk, return false; if (hash_expected && !hash_location) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return true; } if (!hash_expected && hash_location) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return true; } @@ -1342,7 +1346,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, return newsk; exit_overflow: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); exit_nonewsk: dst_release(dst); exit: @@ -1432,8 +1436,8 @@ discard: return 0; csum_err: - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; } EXPORT_SYMBOL(tcp_v4_do_rcv); @@ -1506,16 +1510,16 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) __skb_queue_tail(&tp->ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; - if (tp->ucopy.memory > sk->sk_rcvbuf) { + if (skb_queue_len(&tp->ucopy.prequeue) >= 32 || + tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { struct sk_buff *skb1; BUG_ON(sock_owned_by_user(sk)); + __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED, + skb_queue_len(&tp->ucopy.prequeue)); - while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { + while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) sk_backlog_rcv(sk, skb1); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPPREQUEUEDROPPED); - } tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { @@ -1547,7 +1551,7 @@ int tcp_v4_rcv(struct sk_buff *skb) goto discard_it; /* Count it even if it's bad */ - TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); + __TCP_INC_STATS(net, TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; @@ -1629,7 +1633,7 @@ process: } } if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { - NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto discard_and_relse; } @@ -1662,7 +1666,7 @@ process: } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf + sk->sk_sndbuf))) { bh_unlock_sock(sk); - NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; } bh_unlock_sock(sk); @@ -1679,9 +1683,9 @@ no_tcp_socket: if (tcp_checksum_complete(skb)) { csum_error: - TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); + __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: - TCP_INC_STATS_BH(net, TCP_MIB_INERRS); + __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { tcp_v4_send_reset(NULL, skb); } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 7b7eec439906..b617826e2477 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -800,7 +800,8 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, } if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, - jiffies - tm->tcpm_stamp) < 0) + jiffies - tm->tcpm_stamp, + TCP_METRICS_ATTR_PAD) < 0) goto nla_put_failure; if (tm->tcpm_ts_stamp) { if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP, @@ -864,7 +865,8 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, (nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROPS, tfom->syn_loss) < 0 || nla_put_msecs(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS, - jiffies - tfom->last_syn_loss) < 0)) + jiffies - tfom->last_syn_loss, + TCP_METRICS_ATTR_PAD) < 0)) goto nla_put_failure; if (tfom->cookie.len > 0 && nla_put(msg, TCP_METRICS_ATTR_FOPEN_COOKIE, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4c53e7c86586..4b95ec4ed2c8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -235,7 +235,7 @@ kill: } if (paws_reject) - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); + __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); if (!th->rst) { /* In this case we must reset the TIMEWAIT timer. @@ -337,7 +337,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); } tcp_update_metrics(sk); @@ -545,7 +545,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rack.mstamp.v64 = 0; newtp->rack.advanced = 0; - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); } return newsk; } @@ -710,7 +710,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, &tcp_rsk(req)->last_oow_ack_time)) req->rsk_ops->send_ack(sk, skb, req); if (paws_reject) - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); return NULL; } @@ -729,7 +729,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * "fourth, check the SYN bit" */ if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; } @@ -752,7 +752,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); return NULL; } @@ -791,7 +791,7 @@ embryonic_reset: } if (!fastopen) { inet_csk_reqsk_queue_drop(sk, req); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); } return NULL; } diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 773083b7f1e9..02737b607aa7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -89,6 +89,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, ~(SKB_GSO_TCPV4 | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TCPV6 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | @@ -98,7 +99,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | 0) || - !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + !(type & (SKB_GSO_TCPV4 | + SKB_GSO_TCPV6)))) goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); @@ -107,6 +109,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, goto out; } + /* GSO partial only requires splitting the frame into an MSS + * multiple and possibly a remainder. So update the mss now. + */ + if (features & NETIF_F_GSO_PARTIAL) + mss = skb->len - (skb->len % mss); + copy_destructor = gso_skb->destructor == tcp_wfree; ooo_okay = gso_skb->ooo_okay; /* All segments but the first should have ooo_okay cleared */ @@ -131,7 +139,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); - do { + while (skb->next) { th->fin = th->psh = 0; th->check = newcheck; @@ -151,7 +159,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->seq = htonl(seq); th->cwr = 0; - } while (skb->next); + } /* Following permits TCP Small Queues to work well with GSO : * The callback to TCP stack will be called at the time last frag @@ -237,7 +245,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) found: /* Include the IP ID check below from the inner most IP hdr */ - flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; + flush = NAPI_GRO_CB(p)->flush; flush |= (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); @@ -246,6 +254,17 @@ found: flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); + /* When we receive our second frame we can made a decision on if we + * continue this flow as an atomic flow with a fixed ID or if we use + * an incrementing ID. + */ + if (NAPI_GRO_CB(p)->flush_id != 1 || + NAPI_GRO_CB(p)->count != 1 || + !NAPI_GRO_CB(p)->is_atomic) + flush |= NAPI_GRO_CB(p)->flush_id; + else + NAPI_GRO_CB(p)->is_atomic = false; + mss = skb_shinfo(p)->gso_size; flush |= (len - 1) >= mss; @@ -314,6 +333,9 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff) iph->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + if (NAPI_GRO_CB(skb)->is_atomic) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + return tcp_gro_complete(skb); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d2dc015cd19..25d527922b18 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1111,11 +1111,17 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de tcp_verify_left_out(tp); } +static bool tcp_has_tx_tstamp(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->txstamp_ack || + (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP); +} + static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) { struct skb_shared_info *shinfo = skb_shinfo(skb); - if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) && + if (unlikely(tcp_has_tx_tstamp(skb)) && !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) { struct skb_shared_info *shinfo2 = skb_shinfo(skb2); u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP; @@ -1123,9 +1129,17 @@ static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) shinfo->tx_flags &= ~tsflags; shinfo2->tx_flags |= tsflags; swap(shinfo->tskey, shinfo2->tskey); + TCP_SKB_CB(skb2)->txstamp_ack = TCP_SKB_CB(skb)->txstamp_ack; + TCP_SKB_CB(skb)->txstamp_ack = 0; } } +static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2) +{ + TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor; + TCP_SKB_CB(skb)->eor = 0; +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. @@ -1171,6 +1185,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->tcp_flags = flags; TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; + tcp_skb_fragment_eor(skb, buff); if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { /* Copy and checksum data tail into the new buffer. */ @@ -1731,6 +1746,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* This packet was never sent out yet, so no SACK bits. */ TCP_SKB_CB(buff)->sacked = 0; + tcp_skb_fragment_eor(skb, buff); + buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); tcp_fragment_tstamp(skb, buff); @@ -2204,14 +2221,13 @@ bool tcp_schedule_loss_probe(struct sock *sk) /* Thanks to skb fast clones, we can detect if a prior transmit of * a packet is still in a qdisc or driver queue. * In this case, there is very little point doing a retransmit ! - * Note: This is called from BH context only. */ static bool skb_still_in_host_queue(const struct sock *sk, const struct sk_buff *skb) { if (unlikely(skb_fclone_busy(sk, skb))) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); return true; } return false; @@ -2266,14 +2282,14 @@ void tcp_send_loss_probe(struct sock *sk) if (WARN_ON(!skb || !tcp_skb_pcount(skb))) goto rearm_timer; - if (__tcp_retransmit_skb(sk, skb)) + if (__tcp_retransmit_skb(sk, skb, 1)) goto rearm_timer; /* Record snd_nxt for loss detection. */ tp->tlp_high_seq = tp->snd_nxt; probe_sent: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); /* Reset s.t. tcp_rearm_rto will restart timer from now */ inet_csk(sk)->icsk_pending = 0; rearm_timer: @@ -2441,6 +2457,21 @@ u32 __tcp_select_window(struct sock *sk) return window; } +void tcp_skb_collapse_tstamp(struct sk_buff *skb, + const struct sk_buff *next_skb) +{ + if (unlikely(tcp_has_tx_tstamp(next_skb))) { + const struct skb_shared_info *next_shinfo = + skb_shinfo(next_skb); + struct skb_shared_info *shinfo = skb_shinfo(skb); + + shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; + shinfo->tskey = next_shinfo->tskey; + TCP_SKB_CB(skb)->txstamp_ack |= + TCP_SKB_CB(next_skb)->txstamp_ack; + } +} + /* Collapses two adjacent SKB's during retransmission. */ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) { @@ -2476,6 +2507,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) * packet counting does not break. */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; + TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor; /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); @@ -2484,6 +2516,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb)); + tcp_skb_collapse_tstamp(skb, next_skb); + sk_wmem_free_skb(sk, next_skb); } @@ -2525,6 +2559,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (!tcp_can_collapse(sk, skb)) break; + if (!tcp_skb_can_collapse_to(to)) + break; + space -= skb->len; if (first) { @@ -2551,17 +2588,17 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. */ -int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) { - struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); unsigned int cur_mss; - int err; + int diff, len, err; - /* Inconslusive MTU probe */ - if (icsk->icsk_mtup.probe_size) { + + /* Inconclusive MTU probe */ + if (icsk->icsk_mtup.probe_size) icsk->icsk_mtup.probe_size = 0; - } /* Do not sent more than we queued. 1/4 is reserved for possible * copying overhead: fragmentation, tunneling, mangling etc. @@ -2594,30 +2631,27 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->seq != tp->snd_una) return -EAGAIN; - if (skb->len > cur_mss) { - if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC)) + len = cur_mss * segs; + if (skb->len > len) { + if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC)) return -ENOMEM; /* We'll try again later. */ } else { - int oldpcount = tcp_skb_pcount(skb); + if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; - if (unlikely(oldpcount > 1)) { - if (skb_unclone(skb, GFP_ATOMIC)) - return -ENOMEM; - tcp_init_tso_segs(skb, cur_mss); - tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); - } + diff = tcp_skb_pcount(skb); + tcp_set_skb_tso_segs(skb, cur_mss); + diff -= tcp_skb_pcount(skb); + if (diff) + tcp_adjust_pcount(sk, skb, diff); + if (skb->len < cur_mss) + tcp_retrans_try_collapse(sk, skb, cur_mss); } /* RFC3168, section 6.1.1.1. ECN fallback */ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN) tcp_ecn_clear_syn(sk, skb); - tcp_retrans_try_collapse(sk, skb, cur_mss); - - /* Make a copy, if the first transmission SKB clone we made - * is still in somebody's hands, else make a clone. - */ - /* make sure skb->data is aligned on arches that require it * and check if ack-trimming & collapsing extended the headroom * beyond what csum_start can cover. @@ -2633,20 +2667,22 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) } if (likely(!err)) { + segs = tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; /* Update global TCP statistics. */ - TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); + TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - tp->total_retrans++; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + tp->total_retrans += segs; } return err; } -int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) { struct tcp_sock *tp = tcp_sk(sk); - int err = __tcp_retransmit_skb(sk, skb); + int err = __tcp_retransmit_skb(sk, skb, segs); if (err == 0) { #if FASTRETRANS_DEBUG > 0 @@ -2662,7 +2698,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) tp->retrans_stamp = tcp_skb_timestamp(skb); } else if (err != -EBUSY) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } if (tp->undo_retrans < 0) @@ -2737,6 +2773,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) tcp_for_write_queue_from(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; + int segs; if (skb == tcp_send_head(sk)) break; @@ -2744,14 +2781,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (!hole) tp->retransmit_skb_hint = skb; - /* Assume this retransmit will generate - * only one packet for congestion window - * calculation purposes. This works because - * tcp_retransmit_skb() will chop up the - * packet to be MSS sized and all the - * packet counting works out. - */ - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) + segs = tp->snd_cwnd - tcp_packets_in_flight(tp); + if (segs <= 0) return; if (fwd_rexmitting) { @@ -2788,10 +2819,10 @@ begin_fwd: if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) continue; - if (tcp_retransmit_skb(sk, skb)) + if (tcp_retransmit_skb(sk, skb, segs)) return; - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); if (tcp_in_cwnd_reduction(sk)) tp->prr_out += tcp_skb_pcount(skb); @@ -2944,7 +2975,7 @@ int tcp_send_synack(struct sock *sk) struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req) + enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); const struct tcp_sock *tp = tcp_sk(sk); @@ -2964,14 +2995,22 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, /* Reserve space for headers. */ skb_reserve(skb, MAX_TCP_HEADER); - if (attach_req) { + switch (synack_type) { + case TCP_SYNACK_NORMAL: skb_set_owner_w(skb, req_to_sk(req)); - } else { + break; + case TCP_SYNACK_COOKIE: + /* Under synflood, we do not attach skb to a socket, + * to avoid false sharing. + */ + break; + case TCP_SYNACK_FASTOPEN: /* sk is a const pointer, because we want to express multiple * cpu might call us concurrently. * sk->sk_wmem_alloc in an atomic, we can promote to rw. */ skb_set_owner_w(skb, (struct sock *)sk); + break; } skb_dst_set(skb, dst); @@ -3020,7 +3059,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, th->window = htons(min(req->rsk_rcv_wnd, 65535U)); tcp_options_write((__be32 *)(th + 1), NULL, &opts); th->doff = (tcp_header_size >> 2); - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ @@ -3516,10 +3555,10 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) int res; tcp_rsk(req)->txhash = net_tx_rndhash(); - res = af_ops->send_synack(sk, NULL, &fl, req, NULL, true); + res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL); if (!res) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); } return res; } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 5353085fd0b2..e36df4fcfeba 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -65,8 +65,8 @@ int tcp_rack_mark_lost(struct sock *sk) if (scb->sacked & TCPCB_SACKED_RETRANS) { scb->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPLOSTRETRANSMIT); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPLOSTRETRANSMIT); } } else if (!(scb->sacked & TCPCB_RETRANS)) { /* Original data are sent sequentially so stop early diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 49bc474f8e35..debdd8b33e69 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -30,7 +30,7 @@ static void tcp_write_err(struct sock *sk) sk->sk_error_report(sk); tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); } /* Do not allow orphaned sockets to eat all our resources. @@ -68,7 +68,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC); tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; } return 0; @@ -162,8 +162,8 @@ static int tcp_write_timeout(struct sock *sk) if (tp->syn_fastopen || tp->syn_data) tcp_fastopen_cache_set(sk, 0, NULL, true, 0); if (tp->syn_data && icsk->icsk_retransmits == 1) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENACTIVEFAIL); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; syn_set = true; @@ -178,8 +178,8 @@ static int tcp_write_timeout(struct sock *sk) tp->bytes_acked <= tp->rx_opt.mss_clamp) { tcp_fastopen_cache_set(sk, 0, NULL, true, 0); if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENACTIVEFAIL); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); } /* Black hole detection */ tcp_mtu_probing(icsk, sk); @@ -209,6 +209,7 @@ static int tcp_write_timeout(struct sock *sk) return 0; } +/* Called with BH disabled */ void tcp_delack_timer_handler(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -228,7 +229,7 @@ void tcp_delack_timer_handler(struct sock *sk) if (!skb_queue_empty(&tp->ucopy.prequeue)) { struct sk_buff *skb; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) sk_backlog_rcv(sk, skb); @@ -248,7 +249,7 @@ void tcp_delack_timer_handler(struct sock *sk) icsk->icsk_ack.ato = TCP_ATO_MIN; } tcp_send_ack(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); } out: @@ -265,7 +266,7 @@ static void tcp_delack_timer(unsigned long data) tcp_delack_timer_handler(sk); } else { inet_csk(sk)->icsk_ack.blocked = 1; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) sock_hold(sk); @@ -404,7 +405,7 @@ void tcp_retransmit_timer(struct sock *sk) goto out; } tcp_enter_loss(sk); - tcp_retransmit_skb(sk, tcp_write_queue_head(sk)); + tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1); __sk_dst_reset(sk); goto out_reset_timer; } @@ -431,12 +432,12 @@ void tcp_retransmit_timer(struct sock *sk) } else { mib_idx = LINUX_MIB_TCPTIMEOUTS; } - NET_INC_STATS_BH(sock_net(sk), mib_idx); + __NET_INC_STATS(sock_net(sk), mib_idx); } tcp_enter_loss(sk); - if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { + if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, * do not backoff. */ @@ -493,6 +494,7 @@ out_reset_timer: out:; } +/* Called with BH disabled */ void tcp_write_timer_handler(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -549,7 +551,7 @@ void tcp_syn_ack_timeout(const struct request_sock *req) { struct net *net = read_pnet(&inet_rsk(req)->ireq_net); - NET_INC_STATS_BH(net, LINUX_MIB_TCPTIMEOUTS); + __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS); } EXPORT_SYMBOL(tcp_syn_ack_timeout); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f1863136d3e4..f67f52ba4809 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -336,8 +336,13 @@ found: hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock(&hslot2->lock); - hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, - &hslot2->head); + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) + hlist_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node, + &hslot2->head); + else + hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &hslot2->head); hslot2->count++; spin_unlock(&hslot2->lock); } @@ -683,7 +688,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) iph->saddr, uh->source, skb->dev->ifindex, udptable, NULL); if (!sk) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; /* No socket for error */ } @@ -877,13 +882,13 @@ send: err = ip_send_skb(sock_net(sk), skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; } } else - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_OUTDATAGRAMS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -1152,8 +1157,8 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -1237,10 +1242,10 @@ static unsigned int first_packet_length(struct sock *sk) spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, - IS_UDPLITE(sk)); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, + IS_UDPLITE(sk)); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, + IS_UDPLITE(sk)); atomic_inc(&sk->sk_drops); __skb_unlink(skb, rcvq); __skb_queue_tail(&list_kill, skb); @@ -1276,12 +1281,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) { unsigned int amount = first_packet_length(sk); - if (amount) - /* - * We will only return the amount - * of this packet since that is all - * that will be read. - */ return put_user(amount, (int __user *)arg); } @@ -1353,16 +1352,16 @@ try_again: trace_kfree_skb(skb, udp_recvmsg); if (!peeked) { atomic_inc(&sk->sk_drops); - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); } skb_free_datagram_locked(sk, skb); return err; } if (!peeked) - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); sock_recv_ts_and_drops(msg, sk, skb); @@ -1387,8 +1386,8 @@ try_again: csum_copy_err: slow = lock_sock_fast(sk); if (!skb_kill_datagram(sk, skb, flags)) { - UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); - UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } unlock_sock_fast(sk, slow); @@ -1515,9 +1514,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, - is_udplite); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); trace_udp_fail_queue_rcv_skb(rc, sk); return -1; @@ -1581,9 +1580,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); + __UDP_INC_STATS(sock_net(sk), + UDP_MIB_INDATAGRAMS, + is_udplite); return -ret; } } @@ -1634,8 +1633,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, - is_udplite); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); goto drop; } @@ -1654,9 +1653,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return rc; csum_error: - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); kfree_skb(skb); return -1; @@ -1716,10 +1715,10 @@ start_lookup: if (unlikely(!nskb)) { atomic_inc(&sk->sk_drops); - UDP_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, - IS_UDPLITE(sk)); + __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS, + IS_UDPLITE(sk)); + __UDP_INC_STATS(net, UDP_MIB_INERRORS, + IS_UDPLITE(sk)); continue; } if (udp_queue_rcv_skb(sk, nskb) > 0) @@ -1737,8 +1736,8 @@ start_lookup: consume_skb(skb); } else { kfree_skb(skb); - UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); } return 0; } @@ -1852,7 +1851,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp_lib_checksum_complete(skb)) goto csum_error; - UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -1879,9 +1878,9 @@ csum_error: proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), ulen); - UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); drop: - UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6230cf4b0d2d..097060def7f0 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -39,8 +39,11 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * 16 bit length field due to the header being added outside of an * IP or IPv6 frame that was already limited to 64K - 1. */ - partial = csum_sub(csum_unfold(uh->check), - (__force __wsum)htonl(skb->len)); + if (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) + partial = (__force __wsum)uh->len; + else + partial = (__force __wsum)htonl(skb->len); + partial = csum_sub(csum_unfold(uh->check), partial); /* setup inner skb. */ skb->encapsulation = 0; @@ -89,7 +92,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, udp_offset = outer_hlen - tnl_hlen; skb = segs; do { - __be16 len; + unsigned int len; if (remcsum) skb->ip_summed = CHECKSUM_NONE; @@ -107,14 +110,26 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_set_transport_header(skb, udp_offset); - len = htons(skb->len - udp_offset); + len = skb->len - udp_offset; uh = udp_hdr(skb); - uh->len = len; + + /* If we are only performing partial GSO the inner header + * will be using a length value equal to only one MSS sized + * segment instead of the entire frame. + */ + if (skb_is_gso(skb)) { + uh->len = htons(skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)uh); + } else { + uh->len = htons(len); + } if (!need_csum) continue; - uh->check = ~csum_fold(csum_add(partial, (__force __wsum)len)); + uh->check = ~csum_fold(csum_add(partial, + (__force __wsum)htonl(len))); if (skb->encapsulation || !offload_csum) { uh->check = gso_make_checksum(skb, ~uh->check); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a6c99275bd8c..47f837a58e0a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -359,7 +359,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64; ndev->cnf.mtu6 = dev->mtu; - ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); if (!ndev->nd_parms) { kfree(ndev); @@ -3176,35 +3175,9 @@ static void addrconf_gre_config(struct net_device *dev) } #endif -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) -/* If the host route is cached on the addr struct make sure it is associated - * with the proper table. e.g., enslavement can change and if so the cached - * host route needs to move to the new table. - */ -static void l3mdev_check_host_rt(struct inet6_dev *idev, - struct inet6_ifaddr *ifp) -{ - if (ifp->rt) { - u32 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; - - if (tb_id != ifp->rt->rt6i_table->tb6_id) { - ip6_del_rt(ifp->rt); - ifp->rt = NULL; - } - } -} -#else -static void l3mdev_check_host_rt(struct inet6_dev *idev, - struct inet6_ifaddr *ifp) -{ -} -#endif - static int fixup_permanent_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) { - l3mdev_check_host_rt(idev, ifp); - if (!ifp->rt) { struct rt6_info *rt; @@ -3255,6 +3228,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info; struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; int err; @@ -3303,6 +3277,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; if (event == NETDEV_UP) { + /* restore routes for permanent addresses */ + addrconf_permanent_addr(dev); + if (!addrconf_qdisc_ok(dev)) { /* device is not ready yet. */ pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", @@ -3336,9 +3313,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } - /* restore routes for permanent addresses */ - addrconf_permanent_addr(dev); - switch (dev->type) { #if IS_ENABLED(CONFIG_IPV6_SIT) case ARPHRD_SIT: @@ -3413,6 +3387,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev) addrconf_type_change(dev, event); break; + + case NETDEV_CHANGEUPPER: + info = ptr; + + /* flush all routes if dev is linked to or unlinked from + * an L3 master device (e.g., VRF) + */ + if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + addrconf_ifdown(dev, 0); } return NOTIFY_OK; @@ -3438,6 +3421,12 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event) ipv6_mc_unmap(idev); } +static bool addr_is_local(const struct in6_addr *addr) +{ + return ipv6_addr_type(addr) & + (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); +} + static int addrconf_ifdown(struct net_device *dev, int how) { struct net *net = dev_net(dev); @@ -3495,7 +3484,8 @@ restart: * address is retained on a down event */ if (!keep_addr || - !(ifa->flags & IFA_F_PERMANENT)) { + !(ifa->flags & IFA_F_PERMANENT) || + addr_is_local(&ifa->addr)) { hlist_del_init_rcu(&ifa->addr_lst); goto restart; } @@ -3539,17 +3529,23 @@ restart: INIT_LIST_HEAD(&del_list); list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + struct rt6_info *rt = NULL; + addrconf_del_dad_work(ifa); write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); - if (keep_addr && (ifa->flags & IFA_F_PERMANENT)) { + if (keep_addr && (ifa->flags & IFA_F_PERMANENT) && + !addr_is_local(&ifa->addr)) { /* set state to skip the notifier below */ state = INET6_IFADDR_STATE_DEAD; ifa->state = 0; if (!(ifa->flags & IFA_F_NODAD)) ifa->flags |= IFA_F_TENTATIVE; + + rt = ifa->rt; + ifa->rt = NULL; } else { state = ifa->state; ifa->state = INET6_IFADDR_STATE_DEAD; @@ -3560,6 +3556,9 @@ restart: spin_unlock_bh(&ifa->lock); + if (rt) + ip6_del_rt(rt); + if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); @@ -5327,10 +5326,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (rt) ip6_del_rt(rt); } - dst_hold(&ifp->rt->dst); - - ip6_del_rt(ifp->rt); - + if (ifp->rt) { + dst_hold(&ifp->rt->dst); + ip6_del_rt(ifp->rt); + } rt_genid_bump_ipv6(net); break; } @@ -5620,376 +5619,366 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } -static struct addrconf_sysctl_table -{ - struct ctl_table_header *sysctl_header; - struct ctl_table addrconf_vars[DEVCONF_MAX+1]; -} addrconf_sysctl __read_mostly = { - .sysctl_header = NULL, - .addrconf_vars = { - { - .procname = "forwarding", - .data = &ipv6_devconf.forwarding, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_forward, - }, - { - .procname = "hop_limit", - .data = &ipv6_devconf.hop_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_hop_limit, - }, - { - .procname = "mtu", - .data = &ipv6_devconf.mtu6, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_mtu, - }, - { - .procname = "accept_ra", - .data = &ipv6_devconf.accept_ra, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_redirects", - .data = &ipv6_devconf.accept_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "autoconf", - .data = &ipv6_devconf.autoconf, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "dad_transmits", - .data = &ipv6_devconf.dad_transmits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "router_solicitations", - .data = &ipv6_devconf.rtr_solicits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "router_solicitation_interval", - .data = &ipv6_devconf.rtr_solicit_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "router_solicitation_delay", - .data = &ipv6_devconf.rtr_solicit_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "force_mld_version", - .data = &ipv6_devconf.force_mld_version, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "mldv1_unsolicited_report_interval", - .data = - &ipv6_devconf.mldv1_unsolicited_report_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, - { - .procname = "mldv2_unsolicited_report_interval", - .data = - &ipv6_devconf.mldv2_unsolicited_report_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, - { - .procname = "use_tempaddr", - .data = &ipv6_devconf.use_tempaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "temp_valid_lft", - .data = &ipv6_devconf.temp_valid_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "temp_prefered_lft", - .data = &ipv6_devconf.temp_prefered_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "regen_max_retry", - .data = &ipv6_devconf.regen_max_retry, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "max_desync_factor", - .data = &ipv6_devconf.max_desync_factor, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "max_addresses", - .data = &ipv6_devconf.max_addresses, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_defrtr", - .data = &ipv6_devconf.accept_ra_defrtr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_min_hop_limit", - .data = &ipv6_devconf.accept_ra_min_hop_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_pinfo", - .data = &ipv6_devconf.accept_ra_pinfo, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, +static const struct ctl_table addrconf_sysctl[] = { + { + .procname = "forwarding", + .data = &ipv6_devconf.forwarding, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_forward, + }, + { + .procname = "hop_limit", + .data = &ipv6_devconf.hop_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_hop_limit, + }, + { + .procname = "mtu", + .data = &ipv6_devconf.mtu6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_mtu, + }, + { + .procname = "accept_ra", + .data = &ipv6_devconf.accept_ra, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_redirects", + .data = &ipv6_devconf.accept_redirects, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "autoconf", + .data = &ipv6_devconf.autoconf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "dad_transmits", + .data = &ipv6_devconf.dad_transmits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "router_solicitations", + .data = &ipv6_devconf.rtr_solicits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "router_solicitation_interval", + .data = &ipv6_devconf.rtr_solicit_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "router_solicitation_delay", + .data = &ipv6_devconf.rtr_solicit_delay, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "force_mld_version", + .data = &ipv6_devconf.force_mld_version, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "mldv1_unsolicited_report_interval", + .data = + &ipv6_devconf.mldv1_unsolicited_report_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, + { + .procname = "mldv2_unsolicited_report_interval", + .data = + &ipv6_devconf.mldv2_unsolicited_report_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, + { + .procname = "use_tempaddr", + .data = &ipv6_devconf.use_tempaddr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "temp_valid_lft", + .data = &ipv6_devconf.temp_valid_lft, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "temp_prefered_lft", + .data = &ipv6_devconf.temp_prefered_lft, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "regen_max_retry", + .data = &ipv6_devconf.regen_max_retry, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "max_desync_factor", + .data = &ipv6_devconf.max_desync_factor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "max_addresses", + .data = &ipv6_devconf.max_addresses, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_defrtr", + .data = &ipv6_devconf.accept_ra_defrtr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_min_hop_limit", + .data = &ipv6_devconf.accept_ra_min_hop_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_pinfo", + .data = &ipv6_devconf.accept_ra_pinfo, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IPV6_ROUTER_PREF - { - .procname = "accept_ra_rtr_pref", - .data = &ipv6_devconf.accept_ra_rtr_pref, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "router_probe_interval", - .data = &ipv6_devconf.rtr_probe_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, + { + .procname = "accept_ra_rtr_pref", + .data = &ipv6_devconf.accept_ra_rtr_pref, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "router_probe_interval", + .data = &ipv6_devconf.rtr_probe_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, #ifdef CONFIG_IPV6_ROUTE_INFO - { - .procname = "accept_ra_rt_info_max_plen", - .data = &ipv6_devconf.accept_ra_rt_info_max_plen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, + { + .procname = "accept_ra_rt_info_max_plen", + .data = &ipv6_devconf.accept_ra_rt_info_max_plen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #endif - { - .procname = "proxy_ndp", - .data = &ipv6_devconf.proxy_ndp, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_proxy_ndp, - }, - { - .procname = "accept_source_route", - .data = &ipv6_devconf.accept_source_route, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, + { + .procname = "proxy_ndp", + .data = &ipv6_devconf.proxy_ndp, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_proxy_ndp, + }, + { + .procname = "accept_source_route", + .data = &ipv6_devconf.accept_source_route, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - { - .procname = "optimistic_dad", - .data = &ipv6_devconf.optimistic_dad, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - - }, - { - .procname = "use_optimistic", - .data = &ipv6_devconf.use_optimistic, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - - }, + { + .procname = "optimistic_dad", + .data = &ipv6_devconf.optimistic_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "use_optimistic", + .data = &ipv6_devconf.use_optimistic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #ifdef CONFIG_IPV6_MROUTE - { - .procname = "mc_forwarding", - .data = &ipv6_devconf.mc_forwarding, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, + { + .procname = "mc_forwarding", + .data = &ipv6_devconf.mc_forwarding, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, #endif - { - .procname = "disable_ipv6", - .data = &ipv6_devconf.disable_ipv6, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_disable, - }, - { - .procname = "accept_dad", - .data = &ipv6_devconf.accept_dad, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "force_tllao", - .data = &ipv6_devconf.force_tllao, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "ndisc_notify", - .data = &ipv6_devconf.ndisc_notify, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "suppress_frag_ndisc", - .data = &ipv6_devconf.suppress_frag_ndisc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "accept_ra_from_local", - .data = &ipv6_devconf.accept_ra_from_local, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_mtu", - .data = &ipv6_devconf.accept_ra_mtu, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "stable_secret", - .data = &ipv6_devconf.stable_secret, - .maxlen = IPV6_MAX_STRLEN, - .mode = 0600, - .proc_handler = addrconf_sysctl_stable_secret, - }, - { - .procname = "use_oif_addrs_only", - .data = &ipv6_devconf.use_oif_addrs_only, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ignore_routes_with_linkdown", - .data = &ipv6_devconf.ignore_routes_with_linkdown, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown, - }, - { - .procname = "drop_unicast_in_l2_multicast", - .data = &ipv6_devconf.drop_unicast_in_l2_multicast, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "drop_unsolicited_na", - .data = &ipv6_devconf.drop_unsolicited_na, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "keep_addr_on_down", - .data = &ipv6_devconf.keep_addr_on_down, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - - }, - { - /* sentinel */ - } + { + .procname = "disable_ipv6", + .data = &ipv6_devconf.disable_ipv6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_disable, + }, + { + .procname = "accept_dad", + .data = &ipv6_devconf.accept_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "force_tllao", + .data = &ipv6_devconf.force_tllao, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "ndisc_notify", + .data = &ipv6_devconf.ndisc_notify, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "suppress_frag_ndisc", + .data = &ipv6_devconf.suppress_frag_ndisc, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "accept_ra_from_local", + .data = &ipv6_devconf.accept_ra_from_local, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_mtu", + .data = &ipv6_devconf.accept_ra_mtu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, + { + .procname = "stable_secret", + .data = &ipv6_devconf.stable_secret, + .maxlen = IPV6_MAX_STRLEN, + .mode = 0600, + .proc_handler = addrconf_sysctl_stable_secret, + }, + { + .procname = "use_oif_addrs_only", + .data = &ipv6_devconf.use_oif_addrs_only, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "ignore_routes_with_linkdown", + .data = &ipv6_devconf.ignore_routes_with_linkdown, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown, + }, + { + .procname = "drop_unicast_in_l2_multicast", + .data = &ipv6_devconf.drop_unicast_in_l2_multicast, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "drop_unsolicited_na", + .data = &ipv6_devconf.drop_unsolicited_na, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "keep_addr_on_down", + .data = &ipv6_devconf.keep_addr_on_down, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, + { + /* sentinel */ + } }; static int __addrconf_sysctl_register(struct net *net, char *dev_name, struct inet6_dev *idev, struct ipv6_devconf *p) { int i; - struct addrconf_sysctl_table *t; + struct ctl_table *table; char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; - t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); - if (!t) + table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL); + if (!table) goto out; - for (i = 0; t->addrconf_vars[i].data; i++) { - t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf; - t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ - t->addrconf_vars[i].extra2 = net; + for (i = 0; table[i].data; i++) { + table[i].data += (char *)p - (char *)&ipv6_devconf; + table[i].extra1 = idev; /* embedded; no ref */ + table[i].extra2 = net; } snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); - t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars); - if (!t->sysctl_header) + p->sysctl_header = register_net_sysctl(net, path, table); + if (!p->sysctl_header) goto free; - p->sysctl = t; return 0; free: - kfree(t); + kfree(table); out: return -ENOBUFS; } static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) { - struct addrconf_sysctl_table *t; + struct ctl_table *table; - if (!p->sysctl) + if (!p->sysctl_header) return; - t = p->sysctl; - p->sysctl = NULL; - unregister_net_sysctl_table(t->sysctl_header); - kfree(t); + table = p->sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(p->sysctl_header); + p->sysctl_header = NULL; + kfree(table); } static int addrconf_sysctl_register(struct inet6_dev *idev) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a73d70119fcd..ea9ee5cce5cf 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,18 +40,114 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } +static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = sk->sk_protocol; + fl6->daddr = sk->sk_v6_daddr; + fl6->saddr = np->saddr; + fl6->flowi6_oif = sk->sk_bound_dev_if; + fl6->flowi6_mark = sk->sk_mark; + fl6->fl6_dport = inet->inet_dport; + fl6->fl6_sport = inet->inet_sport; + fl6->flowlabel = np->flow_label; + + if (!fl6->flowi6_oif) + fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + + if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) + fl6->flowi6_oif = np->mcast_oif; + + security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); +} + +int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) +{ + struct ip6_flowlabel *flowlabel = NULL; + struct in6_addr *final_p, final; + struct ipv6_txoptions *opt; + struct dst_entry *dst; + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 fl6; + int err = 0; + + if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) { + flowlabel = fl6_sock_lookup(sk, np->flow_label); + if (!flowlabel) + return -EINVAL; + } + ip6_datagram_flow_key_init(&fl6, sk); + + rcu_read_lock(); + opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); + final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_unlock(); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto out; + } + + if (fix_sk_saddr) { + if (ipv6_addr_any(&np->saddr)) + np->saddr = fl6.saddr; + + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + sk->sk_v6_rcv_saddr = fl6.saddr; + inet->inet_rcv_saddr = LOOPBACK4_IPV6; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } + } + + ip6_dst_store(sk, dst, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl6.saddr, &np->saddr) ? + &np->saddr : +#endif + NULL); + +out: + fl6_sock_release(flowlabel); + return err; +} + +void ip6_datagram_release_cb(struct sock *sk) +{ + struct dst_entry *dst; + + if (ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + return; + + rcu_read_lock(); + dst = __sk_dst_get(sk); + if (!dst || !dst->obsolete || + dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + ip6_datagram_dst_update(sk, false); +} +EXPORT_SYMBOL_GPL(ip6_datagram_release_cb); + static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr, *final_p, final; - struct dst_entry *dst; - struct flowi6 fl6; - struct ip6_flowlabel *flowlabel = NULL; - struct ipv6_txoptions *opt; + struct in6_addr *daddr; int addr_type; int err; + __be32 fl6_flowlabel = 0; if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) @@ -66,15 +162,8 @@ static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int a if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { - fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (!flowlabel) - return -EINVAL; - } - } + if (np->sndflow) + fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; addr_type = ipv6_addr_type(&usin->sin6_addr); @@ -145,7 +234,7 @@ ipv4_connected: } sk->sk_v6_daddr = *daddr; - np->flow_label = fl6.flowlabel; + np->flow_label = fl6_flowlabel; inet->inet_dport = usin->sin6_port; @@ -154,59 +243,13 @@ ipv4_connected: * destination cache for it. */ - fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = sk->sk_v6_daddr; - fl6.saddr = np->saddr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet->inet_dport; - fl6.fl6_sport = inet->inet_sport; - - if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - - if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) - fl6.flowi6_oif = np->mcast_oif; - - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - - rcu_read_lock(); - opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); - final_p = fl6_update_dst(&fl6, opt, &final); - rcu_read_unlock(); - - dst = ip6_dst_lookup_flow(sk, &fl6, final_p); - err = 0; - if (IS_ERR(dst)) { - err = PTR_ERR(dst); + err = ip6_datagram_dst_update(sk, true); + if (err) goto out; - } - - /* source address lookup done in ip6_dst_lookup */ - - if (ipv6_addr_any(&np->saddr)) - np->saddr = fl6.saddr; - - if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - sk->sk_v6_rcv_saddr = fl6.saddr; - inet->inet_rcv_saddr = LOOPBACK4_IPV6; - if (sk->sk_prot->rehash) - sk->sk_prot->rehash(sk); - } - - ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? - &sk->sk_v6_daddr : NULL, -#ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl6.saddr, &np->saddr) ? - &np->saddr : -#endif - NULL); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); out: - fl6_sock_release(flowlabel); return err; } @@ -407,9 +450,10 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) copied = len; } err = skb_copy_datagram_msg(skb, 0, msg, copied); - if (err) - goto out_free_skb; - + if (unlikely(err)) { + kfree_skb(skb); + return err; + } sock_recv_timestamp(msg, sk, skb); serr = SKB_EXT_ERR(skb); @@ -466,8 +510,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_ERRQUEUE; err = copied; -out_free_skb: - kfree_skb(skb); + consume_skb(skb); out: return err; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index ea7c4d64a00a..8de5dd7aaa05 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -258,8 +258,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -280,8 +280,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return 1; } - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); return -1; } @@ -309,8 +309,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -319,8 +319,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -334,8 +334,8 @@ looped_back: * processed by own */ if (!addr) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -360,8 +360,8 @@ looped_back: goto unknown_rh; /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -379,8 +379,8 @@ looped_back: n = hdr->hdrlen >> 1; if (hdr->segments_left > n) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - skb_network_header(skb))); @@ -393,8 +393,8 @@ looped_back: if (skb_cloned(skb)) { /* the copy is a forwarded packet */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; } @@ -416,14 +416,14 @@ looped_back: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, IPPROTO_ROUTING) < 0) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -434,8 +434,8 @@ looped_back: } if (ipv6_addr_is_multicast(addr)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -454,8 +454,8 @@ looped_back: if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); kfree_skb(skb); @@ -470,7 +470,7 @@ looped_back: return -1; unknown_rh: - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb_network_header(skb)); return -1; @@ -568,28 +568,28 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]); - IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); goto drop; } pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); if (pkt_len <= IPV6_MAXPLEN) { - IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); return false; } if (ipv6_hdr(skb)->payload_len) { - IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); return false; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { - IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INTRUNCATEDPKTS); + __IP6_INC_STATS(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6b573ebe49de..23b9a4cc418e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -622,7 +622,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) np->dontfrag, &sockc_unused); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); + __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, @@ -674,7 +674,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) return; out: - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); } /* @@ -710,7 +710,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS); + __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; @@ -728,7 +728,7 @@ static int icmpv6_rcv(struct sk_buff *skb) type = hdr->icmp6_type; - ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type); + ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: @@ -812,9 +812,9 @@ static int icmpv6_rcv(struct sk_buff *skb) return 0; csum_error: - ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); + __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: - ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb(skb); return 0; diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index 28542cb2b387..d08fd2d48a78 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -23,10 +23,76 @@ #include <net/protocol.h> #include <uapi/linux/ila.h> +struct ila_locator { + union { + __u8 v8[8]; + __be16 v16[4]; + __be32 v32[2]; + __be64 v64; + }; +}; + +struct ila_identifier { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 __space:4; + u8 csum_neutral:1; + u8 type:3; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 type:3; + u8 csum_neutral:1; + u8 __space:4; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + u8 __space2[7]; + }; + __u8 v8[8]; + __be16 v16[4]; + __be32 v32[2]; + __be64 v64; + }; +}; + +enum { + ILA_ATYPE_IID = 0, + ILA_ATYPE_LUID, + ILA_ATYPE_VIRT_V4, + ILA_ATYPE_VIRT_UNI_V6, + ILA_ATYPE_VIRT_MULTI_V6, + ILA_ATYPE_RSVD_1, + ILA_ATYPE_RSVD_2, + ILA_ATYPE_RSVD_3, +}; + +#define CSUM_NEUTRAL_FLAG htonl(0x10000000) + +struct ila_addr { + union { + struct in6_addr addr; + struct { + struct ila_locator loc; + struct ila_identifier ident; + }; + }; +}; + +static inline struct ila_addr *ila_a2i(struct in6_addr *addr) +{ + return (struct ila_addr *)addr; +} + +static inline bool ila_addr_is_ila(struct ila_addr *iaddr) +{ + return (iaddr->ident.type != ILA_ATYPE_IID); +} + struct ila_params { - __be64 locator; - __be64 locator_match; + struct ila_locator locator; + struct ila_locator locator_match; __wsum csum_diff; + u8 csum_mode; }; static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) @@ -38,7 +104,14 @@ static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) return csum_partial(diff, sizeof(diff), 0); } -void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); +static inline bool ila_csum_neutral_set(struct ila_identifier ident) +{ + return !!(ident.csum_neutral); +} + +void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); + +void ila_init_saved_csum(struct ila_params *p); int ila_lwt_init(void); void ila_lwt_fini(void); diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 30613050e4ca..0e94042d1289 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -15,20 +15,52 @@ static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) { - if (*(__be64 *)&ip6h->daddr == p->locator_match) + struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); + + if (p->locator_match.v64) return p->csum_diff; else - return compute_csum_diff8((__be32 *)&ip6h->daddr, + return compute_csum_diff8((__be32 *)&iaddr->loc, + (__be32 *)&p->locator); +} + +static void ila_csum_do_neutral(struct ila_addr *iaddr, + struct ila_params *p) +{ + __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; + __wsum diff, fval; + + /* Check if checksum adjust value has been cached */ + if (p->locator_match.v64) { + diff = p->csum_diff; + } else { + diff = compute_csum_diff8((__be32 *)iaddr, (__be32 *)&p->locator); + } + + fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ? + ~CSUM_NEUTRAL_FLAG : CSUM_NEUTRAL_FLAG); + + diff = csum_add(diff, fval); + + *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust))); + + /* Flip the csum-neutral bit. Either we are doing a SIR->ILA + * translation with ILA_CSUM_NEUTRAL_MAP as the csum_method + * and the C-bit is not set, or we are doing an ILA-SIR + * tranlsation and the C-bit is set. + */ + iaddr->ident.csum_neutral ^= 1; } -void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) +static void ila_csum_adjust_transport(struct sk_buff *skb, + struct ila_params *p) { __wsum diff; struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); size_t nhoff = sizeof(struct ipv6hdr); - /* First update checksum */ switch (ip6h->nexthdr) { case NEXTHDR_TCP: if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { @@ -68,7 +100,46 @@ void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) } /* Now change destination address */ - *(__be64 *)&ip6h->daddr = p->locator; + iaddr->loc = p->locator; +} + +void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) +{ + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); + + /* First deal with the transport checksum */ + if (ila_csum_neutral_set(iaddr->ident)) { + /* C-bit is set in the locator indicating that this + * is a locator being translated to a SIR address. + * Perform (receiver) checksum-neutral translation. + */ + ila_csum_do_neutral(iaddr, p); + } else { + switch (p->csum_mode) { + case ILA_CSUM_ADJUST_TRANSPORT: + ila_csum_adjust_transport(skb, p); + break; + case ILA_CSUM_NEUTRAL_MAP: + ila_csum_do_neutral(iaddr, p); + break; + case ILA_CSUM_NO_ACTION: + break; + } + } + + /* Now change destination address */ + iaddr->loc = p->locator; +} + +void ila_init_saved_csum(struct ila_params *p) +{ + if (!p->locator_match.v64) + return; + + p->csum_diff = compute_csum_diff8( + (__be32 *)&p->locator_match, + (__be32 *)&p->locator); } static int __init ila_init(void) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 2ae3c4fd8aab..4985e1a735a6 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -26,7 +26,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); return dst->lwtstate->orig_output(net, sk, skb); @@ -42,7 +42,7 @@ static int ila_input(struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); return dst->lwtstate->orig_input(skb); @@ -53,6 +53,7 @@ drop: static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, + [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, }; static int ila_build_state(struct net_device *dev, struct nlattr *nla, @@ -64,11 +65,28 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, size_t encap_len = sizeof(*p); struct lwtunnel_state *newts; const struct fib6_config *cfg6 = cfg; + struct ila_addr *iaddr; int ret; if (family != AF_INET6) return -EINVAL; + if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) { + /* Need to have full locator and at least type field + * included in destination + */ + return -EINVAL; + } + + iaddr = (struct ila_addr *)&cfg6->fc_dst; + + if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) { + /* Don't allow translation for a non-ILA address or checksum + * neutral flag to be set. + */ + return -EINVAL; + } + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy); if (ret < 0) @@ -84,16 +102,19 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, newts->len = encap_len; p = ila_params_lwtunnel(newts); - p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); + p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); - if (cfg6->fc_dst_len > sizeof(__be64)) { - /* Precompute checksum difference for translation since we - * know both the old locator and the new one. - */ - p->locator_match = *(__be64 *)&cfg6->fc_dst; - p->csum_diff = compute_csum_diff8( - (__be32 *)&p->locator_match, (__be32 *)&p->locator); - } + /* Precompute checksum difference for translation since we + * know both the old locator and the new one. + */ + p->locator_match = iaddr->loc; + p->csum_diff = compute_csum_diff8( + (__be32 *)&p->locator_match, (__be32 *)&p->locator); + + if (tb[ILA_ATTR_CSUM_MODE]) + p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]); + + ila_init_saved_csum(p); newts->type = LWTUNNEL_ENCAP_ILA; newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | @@ -109,7 +130,10 @@ static int ila_fill_encap_info(struct sk_buff *skb, { struct ila_params *p = ila_params_lwtunnel(lwtstate); - if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator)) + if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64, + ILA_ATTR_PAD)) + goto nla_put_failure; + if (nla_put_u64(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode)) goto nla_put_failure; return 0; @@ -129,7 +153,7 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) struct ila_params *a_p = ila_params_lwtunnel(a); struct ila_params *b_p = ila_params_lwtunnel(b); - return (a_p->locator != b_p->locator); + return (a_p->locator.v64 != b_p->locator.v64); } static const struct lwtunnel_encap_ops ila_encap_ops = { diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 0b03533453e4..a90e57229c6c 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -11,13 +11,11 @@ struct ila_xlat_params { struct ila_params ip; - __be64 identifier; int ifindex; - unsigned int dir; }; struct ila_map { - struct ila_xlat_params p; + struct ila_xlat_params xp; struct rhash_head node; struct ila_map __rcu *next; struct rcu_head rcu; @@ -66,31 +64,29 @@ static __always_inline void __ila_hash_secret_init(void) net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static inline u32 ila_identifier_hash(__be64 identifier) +static inline u32 ila_locator_hash(struct ila_locator loc) { - u32 *v = (u32 *)&identifier; + u32 *v = (u32 *)loc.v32; return jhash_2words(v[0], v[1], hashrnd); } -static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier) +static inline spinlock_t *ila_get_lock(struct ila_net *ilan, + struct ila_locator loc) { - return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask]; + return &ilan->locks[ila_locator_hash(loc) & ilan->locks_mask]; } -static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc, - int ifindex, unsigned int dir) +static inline int ila_cmp_wildcards(struct ila_map *ila, + struct ila_addr *iaddr, int ifindex) { - return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) || - (ila->p.ifindex && ila->p.ifindex != ifindex) || - !(ila->p.dir & dir); + return (ila->xp.ifindex && ila->xp.ifindex != ifindex); } -static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p) +static inline int ila_cmp_params(struct ila_map *ila, + struct ila_xlat_params *xp) { - return (ila->p.ip.locator_match != p->ip.locator_match) || - (ila->p.ifindex != p->ifindex) || - (ila->p.dir != p->dir); + return (ila->xp.ifindex != xp->ifindex); } static int ila_cmpfn(struct rhashtable_compare_arg *arg, @@ -98,17 +94,14 @@ static int ila_cmpfn(struct rhashtable_compare_arg *arg, { const struct ila_map *ila = obj; - return (ila->p.identifier != *(__be64 *)arg->key); + return (ila->xp.ip.locator_match.v64 != *(__be64 *)arg->key); } static inline int ila_order(struct ila_map *ila) { int score = 0; - if (ila->p.ip.locator_match) - score += 1 << 0; - - if (ila->p.ifindex) + if (ila->xp.ifindex) score += 1 << 1; return score; @@ -117,7 +110,7 @@ static inline int ila_order(struct ila_map *ila) static const struct rhashtable_params rht_params = { .nelem_hint = 1024, .head_offset = offsetof(struct ila_map, node), - .key_offset = offsetof(struct ila_map, p.identifier), + .key_offset = offsetof(struct ila_map, xp.ip.locator_match), .key_len = sizeof(u64), /* identifier */ .max_size = 1048576, .min_size = 256, @@ -136,50 +129,45 @@ static struct genl_family ila_nl_family = { }; static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { - [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, }, [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, - [ILA_ATTR_DIR] = { .type = NLA_U32, }, + [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, }; static int parse_nl_config(struct genl_info *info, - struct ila_xlat_params *p) + struct ila_xlat_params *xp) { - memset(p, 0, sizeof(*p)); - - if (info->attrs[ILA_ATTR_IDENTIFIER]) - p->identifier = (__force __be64)nla_get_u64( - info->attrs[ILA_ATTR_IDENTIFIER]); + memset(xp, 0, sizeof(*xp)); if (info->attrs[ILA_ATTR_LOCATOR]) - p->ip.locator = (__force __be64)nla_get_u64( + xp->ip.locator.v64 = (__force __be64)nla_get_u64( info->attrs[ILA_ATTR_LOCATOR]); if (info->attrs[ILA_ATTR_LOCATOR_MATCH]) - p->ip.locator_match = (__force __be64)nla_get_u64( + xp->ip.locator_match.v64 = (__force __be64)nla_get_u64( info->attrs[ILA_ATTR_LOCATOR_MATCH]); - if (info->attrs[ILA_ATTR_IFINDEX]) - p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); + if (info->attrs[ILA_ATTR_CSUM_MODE]) + xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]); - if (info->attrs[ILA_ATTR_DIR]) - p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]); + if (info->attrs[ILA_ATTR_IFINDEX]) + xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); return 0; } /* Must be called with rcu readlock */ -static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc, +static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr, int ifindex, - unsigned int dir, struct ila_net *ilan) { struct ila_map *ila; - ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params); + ila = rhashtable_lookup_fast(&ilan->rhash_table, &iaddr->loc, + rht_params); while (ila) { - if (!ila_cmp_wildcards(ila, loc, ifindex, dir)) + if (!ila_cmp_wildcards(ila, iaddr, ifindex)) return ila; ila = rcu_access_pointer(ila->next); } @@ -188,15 +176,16 @@ static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc, } /* Must be called with rcu readlock */ -static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p, +static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp, struct ila_net *ilan) { struct ila_map *ila; - ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + ila = rhashtable_lookup_fast(&ilan->rhash_table, + &xp->ip.locator_match, rht_params); while (ila) { - if (!ila_cmp_params(ila, p)) + if (!ila_cmp_params(ila, xp)) return ila; ila = rcu_access_pointer(ila->next); } @@ -221,14 +210,14 @@ static void ila_free_cb(void *ptr, void *arg) } } -static int ila_xlat_addr(struct sk_buff *skb, int dir); +static int ila_xlat_addr(struct sk_buff *skb); static unsigned int ila_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - ila_xlat_addr(skb, ILA_DIR_IN); + ila_xlat_addr(skb); return NF_ACCEPT; } @@ -241,11 +230,11 @@ static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = { }, }; -static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) +static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) { struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_map *ila, *head; - spinlock_t *lock = ila_get_lock(ilan, p->identifier); + spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match); int err = 0, order; if (!ilan->hooks_registered) { @@ -264,22 +253,16 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) if (!ila) return -ENOMEM; - ila->p = *p; + ila_init_saved_csum(&xp->ip); - if (p->ip.locator_match) { - /* Precompute checksum difference for translation since we - * know both the old identifier and the new one. - */ - ila->p.ip.csum_diff = compute_csum_diff8( - (__be32 *)&p->ip.locator_match, - (__be32 *)&p->ip.locator); - } + ila->xp = *xp; order = ila_order(ila); spin_lock(lock); - head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + head = rhashtable_lookup_fast(&ilan->rhash_table, + &xp->ip.locator_match, rht_params); if (!head) { /* New entry for the rhash_table */ @@ -289,7 +272,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) struct ila_map *tila = head, *prev = NULL; do { - if (!ila_cmp_params(tila, p)) { + if (!ila_cmp_params(tila, xp)) { err = -EEXIST; goto out; } @@ -326,23 +309,23 @@ out: return err; } -static int ila_del_mapping(struct net *net, struct ila_xlat_params *p) +static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp) { struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_map *ila, *head, *prev; - spinlock_t *lock = ila_get_lock(ilan, p->identifier); + spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match); int err = -ENOENT; spin_lock(lock); head = rhashtable_lookup_fast(&ilan->rhash_table, - &p->identifier, rht_params); + &xp->ip.locator_match, rht_params); ila = head; prev = NULL; while (ila) { - if (ila_cmp_params(ila, p)) { + if (ila_cmp_params(ila, xp)) { prev = ila; ila = rcu_dereference_protected(ila->next, lockdep_is_held(lock)); @@ -404,28 +387,28 @@ static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info) static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); - struct ila_xlat_params p; + struct ila_xlat_params xp; int err; - err = parse_nl_config(info, &p); + err = parse_nl_config(info, &xp); if (err) return err; - ila_del_mapping(net, &p); + ila_del_mapping(net, &xp); return 0; } static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) { - if (nla_put_u64(msg, ILA_ATTR_IDENTIFIER, - (__force u64)ila->p.identifier) || - nla_put_u64(msg, ILA_ATTR_LOCATOR, - (__force u64)ila->p.ip.locator) || - nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH, - (__force u64)ila->p.ip.locator_match) || - nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) || - nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir)) + if (nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR, + (__force u64)ila->xp.ip.locator.v64, + ILA_ATTR_PAD) || + nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR_MATCH, + (__force u64)ila->xp.ip.locator_match.v64, + ILA_ATTR_PAD) || + nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) || + nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode)) return -1; return 0; @@ -457,11 +440,11 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) struct net *net = genl_info_net(info); struct ila_net *ilan = net_generic(net, ila_net_id); struct sk_buff *msg; - struct ila_xlat_params p; + struct ila_xlat_params xp; struct ila_map *ila; int ret; - ret = parse_nl_config(info, &p); + ret = parse_nl_config(info, &xp); if (ret) return ret; @@ -471,7 +454,7 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); - ila = ila_lookup_by_params(&p, ilan); + ila = ila_lookup_by_params(&xp, ilan); if (ila) { ret = ila_dump_info(ila, info->snd_portid, @@ -614,45 +597,32 @@ static struct pernet_operations ila_net_ops = { .size = sizeof(struct ila_net), }; -static int ila_xlat_addr(struct sk_buff *skb, int dir) +static int ila_xlat_addr(struct sk_buff *skb) { struct ila_map *ila; struct ipv6hdr *ip6h = ipv6_hdr(skb); struct net *net = dev_net(skb->dev); struct ila_net *ilan = net_generic(net, ila_net_id); - __be64 identifier, locator_match; - size_t nhoff; + struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); /* Assumes skb contains a valid IPv6 header that is pulled */ - identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8]; - locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0]; - nhoff = sizeof(struct ipv6hdr); + if (!ila_addr_is_ila(iaddr)) { + /* Type indicates this is not an ILA address */ + return 0; + } rcu_read_lock(); - ila = ila_lookup_wildcards(identifier, locator_match, - skb->dev->ifindex, dir, ilan); + ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan); if (ila) - update_ipv6_locator(skb, &ila->p.ip); + ila_update_ipv6_locator(skb, &ila->xp.ip); rcu_read_unlock(); return 0; } -int ila_xlat_incoming(struct sk_buff *skb) -{ - return ila_xlat_addr(skb, ILA_DIR_IN); -} -EXPORT_SYMBOL(ila_xlat_incoming); - -int ila_xlat_outgoing(struct sk_buff *skb) -{ - return ila_xlat_addr(skb, ILA_DIR_OUT); -} -EXPORT_SYMBOL(ila_xlat_outgoing); - int ila_xlat_init(void) { int ret; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index f1678388fb0d..00cf28ad4565 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -222,7 +222,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); + __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4e636e60a360..10127741a60d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -54,6 +54,7 @@ #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/ip6_tunnel.h> +#include <net/gre.h> static bool log_ecn_error = true; @@ -443,137 +444,40 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, t->err_time = jiffies; } -static int ip6gre_rcv(struct sk_buff *skb) +static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) { const struct ipv6hdr *ipv6h; - u8 *h; - __be16 flags; - __sum16 csum = 0; - __be32 key = 0; - u32 seqno = 0; struct ip6_tnl *tunnel; - int offset = 4; - __be16 gre_proto; - int err; - - if (!pskb_may_pull(skb, sizeof(struct in6_addr))) - goto drop; ipv6h = ipv6_hdr(skb); - h = skb->data; - flags = *(__be16 *)h; - - if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { - /* - Version must be 0. - - We do not support routing headers. - */ - if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop; - - if (flags&GRE_CSUM) { - csum = skb_checksum_simple_validate(skb); - offset += 4; - } - if (flags&GRE_KEY) { - key = *(__be32 *)(h + offset); - offset += 4; - } - if (flags&GRE_SEQ) { - seqno = ntohl(*(__be32 *)(h + offset)); - offset += 4; - } - } - - gre_proto = *(__be16 *)(h + 2); - tunnel = ip6gre_tunnel_lookup(skb->dev, - &ipv6h->saddr, &ipv6h->daddr, key, - gre_proto); + &ipv6h->saddr, &ipv6h->daddr, tpi->key, + tpi->proto); if (tunnel) { - struct pcpu_sw_netstats *tstats; + ip6_tnl_rcv(tunnel, skb, tpi, NULL, false); - if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto drop; - - if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) { - tunnel->dev->stats.rx_dropped++; - goto drop; - } - - skb->protocol = gre_proto; - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IPv6 - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IPV6); - if ((*(h + offset) & 0xF0) != 0x40) - offset += 4; - } - - skb->mac_header = skb->network_header; - __pskb_pull(skb, offset); - skb_postpull_rcsum(skb, skb_transport_header(skb), offset); - - if (((flags&GRE_CSUM) && csum) || - (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { - tunnel->dev->stats.rx_crc_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - if (tunnel->parms.i_flags&GRE_SEQ) { - if (!(flags&GRE_SEQ) || - (tunnel->i_seqno && - (s32)(seqno - tunnel->i_seqno) < 0)) { - tunnel->dev->stats.rx_fifo_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - tunnel->i_seqno = seqno + 1; - } - - /* Warning: All skb pointers will be invalidated! */ - if (tunnel->dev->type == ARPHRD_ETHER) { - if (!pskb_may_pull(skb, ETH_HLEN)) { - tunnel->dev->stats.rx_length_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - - ipv6h = ipv6_hdr(skb); - skb->protocol = eth_type_trans(skb, tunnel->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - } - - __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); + return PACKET_RCVD; + } - skb_reset_network_header(skb); + return PACKET_REJECT; +} - err = IP6_ECN_decapsulate(ipv6h, skb); - if (unlikely(err)) { - if (log_ecn_error) - net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", - &ipv6h->saddr, - ipv6_get_dsfield(ipv6h)); - if (err > 1) { - ++tunnel->dev->stats.rx_frame_errors; - ++tunnel->dev->stats.rx_errors; - goto drop; - } - } +static int gre_rcv(struct sk_buff *skb) +{ + struct tnl_ptk_info tpi; + bool csum_err = false; + int hdr_len; - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); + if (gre_parse_header(skb, &tpi, &csum_err, &hdr_len) < 0) + goto drop; - netif_rx(skb); + if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false)) + goto drop; + if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD) return 0; - } - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: kfree_skb(skb); return 0; @@ -584,187 +488,40 @@ struct ipv6_tel_txoption { __u8 dst_opt[8]; }; -static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) +static int gre_handle_offloads(struct sk_buff *skb, bool csum) { - memset(opt, 0, sizeof(struct ipv6_tel_txoption)); - - opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; - opt->dst_opt[3] = 1; - opt->dst_opt[4] = encap_limit; - opt->dst_opt[5] = IPV6_TLV_PADN; - opt->dst_opt[6] = 1; - - opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; - opt->ops.opt_nflen = 8; + return iptunnel_handle_offloads(skb, + csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } -static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, - struct net_device *dev, - __u8 dsfield, - struct flowi6 *fl6, - int encap_limit, - __u32 *pmtu) +static netdev_tx_t __gre6_xmit(struct sk_buff *skb, + struct net_device *dev, __u8 dsfield, + struct flowi6 *fl6, int encap_limit, + __u32 *pmtu, __be16 proto) { struct ip6_tnl *tunnel = netdev_priv(dev); - struct net *net = tunnel->net; - struct net_device *tdev; /* Device to other host */ - struct ipv6hdr *ipv6h; /* Our new IP header */ - unsigned int max_headroom = 0; /* The extra header space needed */ - int gre_hlen; - struct ipv6_tel_txoption opt; - int mtu; - struct dst_entry *dst = NULL, *ndst = NULL; - struct net_device_stats *stats = &tunnel->dev->stats; - int err = -1; - u8 proto; - struct sk_buff *new_skb; - __be16 protocol; + __be16 protocol = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : proto; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; - if (dev->header_ops && dev->type == ARPHRD_IP6GRE) { - gre_hlen = 0; - ipv6h = (struct ipv6hdr *)skb->data; - fl6->daddr = ipv6h->daddr; - } else { - gre_hlen = tunnel->hlen; + if (dev->header_ops && dev->type == ARPHRD_IP6GRE) + fl6->daddr = ((struct ipv6hdr *)skb->data)->daddr; + else fl6->daddr = tunnel->parms.raddr; - } - - if (!fl6->flowi6_mark) - dst = dst_cache_get(&tunnel->dst_cache); - - if (!dst) { - dst = ip6_route_output(net, NULL, fl6); - - if (dst->error) - goto tx_err_link_failure; - dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - dst = NULL; - goto tx_err_link_failure; - } - ndst = dst; - } - - tdev = dst->dev; - - if (tdev == dev) { - stats->collisions++; - net_warn_ratelimited("%s: Local routing loop detected!\n", - tunnel->parms.name); - goto tx_err_dst_release; - } - - mtu = dst_mtu(dst) - sizeof(*ipv6h); - if (encap_limit >= 0) { - max_headroom += 8; - mtu -= 8; - } - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { - *pmtu = mtu; - err = -EMSGSIZE; - goto tx_err_dst_release; - } - if (tunnel->err_count > 0) { - if (time_before(jiffies, - tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) { - tunnel->err_count--; + if (tunnel->parms.o_flags & TUNNEL_SEQ) + tunnel->o_seqno++; - dst_link_failure(skb); - } else - tunnel->err_count = 0; - } - - skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); - - max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; - - if (skb_headroom(skb) < max_headroom || skb_shared(skb) || - (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { - new_skb = skb_realloc_headroom(skb, max_headroom); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; - if (!new_skb) - goto tx_err_dst_release; - - if (skb->sk) - skb_set_owner_w(new_skb, skb->sk); - consume_skb(skb); - skb = new_skb; - } - - if (!fl6->flowi6_mark && ndst) - dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr); - skb_dst_set(skb, dst); - - proto = NEXTHDR_GRE; - if (encap_limit >= 0) { - init_tel_txopt(&opt, encap_limit); - ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); - } - - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - - skb_push(skb, gre_hlen); - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(*ipv6h)); - - /* - * Push down and install the IP header. - */ - ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), - ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); - ipv6h->hop_limit = tunnel->parms.hop_limit; - ipv6h->nexthdr = proto; - ipv6h->saddr = fl6->saddr; - ipv6h->daddr = fl6->daddr; - - ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags; - protocol = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : skb->protocol; - ((__be16 *)(ipv6h + 1))[1] = protocol; - - if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4); - - if (tunnel->parms.o_flags&GRE_SEQ) { - ++tunnel->o_seqno; - *ptr = htonl(tunnel->o_seqno); - ptr--; - } - if (tunnel->parms.o_flags&GRE_KEY) { - *ptr = tunnel->parms.o_key; - ptr--; - } - if (tunnel->parms.o_flags&GRE_CSUM) { - *ptr = 0; - *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1), - skb->len - sizeof(struct ipv6hdr)); - } - } + /* Push GRE header. */ + gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, + protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); - skb_set_inner_protocol(skb, protocol); + skb_set_inner_protocol(skb, proto); - ip6tunnel_xmit(NULL, skb, dev); - return 0; -tx_err_link_failure: - stats->tx_carrier_errors++; - dst_link_failure(skb); -tx_err_dst_release: - dst_release(dst); - return err; + return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, + NEXTHDR_GRE); } static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) @@ -783,7 +540,6 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_GRE; dsfield = ipv4_get_dsfield(iph); @@ -793,7 +549,12 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; - err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); + if (err) + return -1; + + err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + skb->protocol); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) @@ -833,7 +594,6 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_GRE; dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) @@ -843,7 +603,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; - err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) + return -1; + + err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, + &mtu, skb->protocol); if (err != 0) { if (err == -EMSGSIZE) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -887,7 +651,11 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = skb->protocol; - err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu); + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); + if (err) + return err; + + err = __gre6_xmit(skb, dev, 0, &fl6, encap_limit, &mtu, skb->protocol); return err; } @@ -987,6 +755,8 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) dev->mtu = rt->dst.dev->mtu - addend; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1061,6 +831,8 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, struct net *net = t->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + memset(&p1, 0, sizeof(p1)); + switch (cmd) { case SIOCGETTUNNEL: if (dev == ign->fb_tunnel_dev) { @@ -1160,15 +932,6 @@ done: return err; } -static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) @@ -1212,7 +975,7 @@ static const struct net_device_ops ip6gre_netdev_ops = { .ndo_uninit = ip6gre_tunnel_uninit, .ndo_start_xmit = ip6gre_tunnel_xmit, .ndo_do_ioctl = ip6gre_tunnel_ioctl, - .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -1228,17 +991,11 @@ static void ip6gre_dev_free(struct net_device *dev) static void ip6gre_tunnel_setup(struct net_device *dev) { - struct ip6_tnl *t; - dev->netdev_ops = &ip6gre_netdev_ops; dev->destructor = ip6gre_dev_free; dev->type = ARPHRD_IP6GRE; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4; - dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4; - t = netdev_priv(dev); - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; + dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); netif_keep_dst(dev); @@ -1248,6 +1005,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) { struct ip6_tnl *tunnel; int ret; + int t_hlen; tunnel = netdev_priv(dev); @@ -1266,6 +1024,16 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) return ret; } + tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); + + t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + + dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; + dev->mtu = ETH_DATA_LEN - t_hlen - 4; + + if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + return 0; } @@ -1304,7 +1072,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) static struct inet6_protocol ip6gre_protocol __read_mostly = { - .handler = ip6gre_rcv, + .handler = gre_rcv, .err_handler = ip6gre_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; @@ -1500,11 +1268,16 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_start_xmit = ip6gre_tunnel_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; +#define GRE6_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_HW_CSUM) + static void ip6gre_tap_setup(struct net_device *dev) { @@ -1538,9 +1311,21 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, nt->net = dev_net(dev); ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= GRE6_FEATURES; + dev->hw_features |= GRE6_FEATURES; + + if (!(nt->parms.o_flags & GRE_SEQ)) { + /* TCP segmentation offload is not supported when we + * generate output sequences. + */ + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + + /* Can use a lockless transmit, unless we generate + * output sequences + */ dev->features |= NETIF_F_LLTX; + } err = register_netdevice(dev); if (err) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index c05c425c2389..6ed56012005d 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -78,11 +78,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt idev = __in6_dev_get(skb->dev); - IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len); + __IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || !idev || unlikely(idev->cnf.disable_ipv6)) { - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } @@ -109,10 +109,10 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; - IP6_ADD_STATS_BH(net, idev, - IPSTATS_MIB_NOECTPKTS + + __IP6_ADD_STATS(net, idev, + IPSTATS_MIB_NOECTPKTS + (ipv6_get_dsfield(hdr) & INET_ECN_MASK), - max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); + max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); /* * RFC4291 2.5.3 * A packet received on an interface with a destination address @@ -169,12 +169,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt /* pkt_len may be zero if Jumbo payload option is present */ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { - IP6_INC_STATS_BH(net, - idev, IPSTATS_MIB_INTRUNCATEDPKTS); + __IP6_INC_STATS(net, + idev, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); goto drop; } hdr = ipv6_hdr(skb); @@ -182,7 +182,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->nexthdr == NEXTHDR_HOP) { if (ipv6_parse_hopopts(skb) < 0) { - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); rcu_read_unlock(); return NET_RX_DROP; } @@ -197,7 +197,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt net, NULL, skb, dev, NULL, ip6_rcv_finish); err: - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); drop: rcu_read_unlock(); kfree_skb(skb); @@ -259,18 +259,18 @@ resubmit: if (ret > 0) goto resubmit; else if (ret == 0) - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - IP6_INC_STATS_BH(net, idev, - IPSTATS_MIB_INUNKNOWNPROTOS); + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff); } kfree_skb(skb); } else { - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); consume_skb(skb); } } @@ -278,7 +278,7 @@ resubmit: return 0; discard: - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); kfree_skb(skb); return 0; @@ -297,7 +297,7 @@ int ip6_mc_input(struct sk_buff *skb) const struct ipv6hdr *hdr; bool deliver; - IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev), + __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST, skb->len); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 204af2219471..f5eb184e1093 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -63,6 +63,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int proto; struct frag_hdr *fptr; unsigned int unfrag_ip6hlen; + unsigned int payload_len; u8 *prevhdr; int offset = 0; bool encap, udpfrag; @@ -73,6 +74,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | + SKB_GSO_TCPV6 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | @@ -80,7 +83,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_TCPV6 | + SKB_GSO_PARTIAL | 0))) goto out; @@ -117,7 +120,13 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); + if (skb_is_gso(skb)) + payload_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)(ipv6h + 1); + else + payload_len = skb->len - nhoff - sizeof(*ipv6h); + ipv6h->payload_len = htons(payload_len); skb->network_header = (u8 *)ipv6h - skb->head; if (udpfrag) { @@ -239,10 +248,14 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); NAPI_GRO_CB(p)->flush |= flush; - /* Clear flush_id, there's really no concept of ID in IPv6. */ - NAPI_GRO_CB(p)->flush_id = 0; + /* If the previous IP ID value was based on an atomic + * datagram we can overwrite the value and ignore it. + */ + if (NAPI_GRO_CB(skb)->is_atomic) + NAPI_GRO_CB(p)->flush_id = 0; } + NAPI_GRO_CB(skb)->is_atomic = true; NAPI_GRO_CB(skb)->flush |= flush; skb_gro_postpull_rcsum(skb, iph, nlen); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 171518e3ca21..2b3ffc582d16 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -395,8 +395,8 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } @@ -427,8 +427,8 @@ int ip6_forward(struct sk_buff *skb) /* Force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -441,15 +441,15 @@ int ip6_forward(struct sk_buff *skb) if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb_dst(skb); @@ -505,17 +505,17 @@ int ip6_forward(struct sk_buff *skb) /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_INTOOBIGERRORS); - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_FRAGFAILS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INTOOBIGERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_OUTDISCARDS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_OUTDISCARDS); goto drop; } @@ -525,14 +525,14 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); + __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, net, NULL, skb, skb->dev, dst->dev, ip6_forward_finish); error: - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); drop: kfree_skb(skb); return -EINVAL; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 1f20345cbc97..ade55af6ace6 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -238,6 +238,7 @@ static void ip6_dev_free(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + gro_cells_destroy(&t->gro_cells); dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); free_netdev(dev); @@ -753,97 +754,157 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, } EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); -/** - * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally - * @skb: received socket buffer - * @protocol: ethernet protocol ID - * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN - * - * Return: 0 - **/ - -static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, - __u8 ipproto, - int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, - const struct ipv6hdr *ipv6h, - struct sk_buff *skb)) +static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, + struct metadata_dst *tun_dst, + int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb), + bool log_ecn_err) { - struct ip6_tnl *t; + struct pcpu_sw_netstats *tstats; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - u8 tproto; int err; - rcu_read_lock(); - t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); - if (t) { - struct pcpu_sw_netstats *tstats; + if ((!(tpi->flags & TUNNEL_CSUM) && + (tunnel->parms.i_flags & TUNNEL_CSUM)) || + ((tpi->flags & TUNNEL_CSUM) && + !(tunnel->parms.i_flags & TUNNEL_CSUM))) { + tunnel->dev->stats.rx_crc_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } - tproto = ACCESS_ONCE(t->parms.proto); - if (tproto != ipproto && tproto != 0) { - rcu_read_unlock(); - goto discard; + if (tunnel->parms.i_flags & TUNNEL_SEQ) { + if (!(tpi->flags & TUNNEL_SEQ) || + (tunnel->i_seqno && + (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { + tunnel->dev->stats.rx_fifo_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; } + tunnel->i_seqno = ntohl(tpi->seq) + 1; + } - if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - rcu_read_unlock(); - goto discard; - } + skb->protocol = tpi->proto; - if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { - t->dev->stats.rx_dropped++; - rcu_read_unlock(); - goto discard; + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; } - skb->mac_header = skb->network_header; - skb_reset_network_header(skb); - skb->protocol = htons(protocol); - memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); - - __skb_tunnel_rx(skb, t->dev, t->net); - - err = dscp_ecn_decapsulate(t, ipv6h, skb); - if (unlikely(err)) { - if (log_ecn_error) - net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", - &ipv6h->saddr, - ipv6_get_dsfield(ipv6h)); - if (err > 1) { - ++t->dev->stats.rx_frame_errors; - ++t->dev->stats.rx_errors; - rcu_read_unlock(); - goto discard; - } + + ipv6h = ipv6_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } else { + skb->dev = tunnel->dev; + } + + skb_reset_network_header(skb); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + + __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); + + err = dscp_ecn_decapsulate(tunnel, ipv6h, skb); + if (unlikely(err)) { + if (log_ecn_err) + net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n", + &ipv6h->saddr, + ipv6_get_dsfield(ipv6h)); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; } + } - tstats = this_cpu_ptr(t->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); + tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); - netif_rx(skb); + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); - rcu_read_unlock(); - return 0; + gro_cells_receive(&tunnel->gro_cells, skb); + return 0; + +drop: + kfree_skb(skb); + return 0; +} + +int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, + struct metadata_dst *tun_dst, + bool log_ecn_err) +{ + return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate, + log_ecn_err); +} +EXPORT_SYMBOL(ip6_tnl_rcv); + +static const struct tnl_ptk_info tpi_v6 = { + /* no tunnel info required for ipxip6. */ + .proto = htons(ETH_P_IPV6), +}; + +static const struct tnl_ptk_info tpi_v4 = { + /* no tunnel info required for ipxip6. */ + .proto = htons(ETH_P_IP), +}; + +static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, + const struct tnl_ptk_info *tpi, + int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb)) +{ + struct ip6_tnl *t; + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + int ret = -1; + + rcu_read_lock(); + t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + + if (t) { + u8 tproto = ACCESS_ONCE(t->parms.proto); + + if (tproto != ipproto && tproto != 0) + goto drop; + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) + goto drop; + if (iptunnel_pull_header(skb, 0, tpi->proto, false)) + goto drop; + ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate, + log_ecn_error); } + rcu_read_unlock(); - return 1; -discard: + return ret; + +drop: + rcu_read_unlock(); kfree_skb(skb); return 0; } static int ip4ip6_rcv(struct sk_buff *skb) { - return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP, - ip4ip6_dscp_ecn_decapsulate); + return ipxip6_rcv(skb, IPPROTO_IP, &tpi_v4, + ip4ip6_dscp_ecn_decapsulate); } static int ip6ip6_rcv(struct sk_buff *skb) { - return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6, - ip6ip6_dscp_ecn_decapsulate); + return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6, + ip6ip6_dscp_ecn_decapsulate); } struct ipv6_tel_txoption { @@ -918,13 +979,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); /** - * ip6_tnl_xmit2 - encapsulate packet and send + * ip6_tnl_xmit - encapsulate packet and send * @skb: the outgoing socket buffer * @dev: the outgoing tunnel device * @dsfield: dscp code for outer header - * @fl: flow of tunneled packet + * @fl6: flow of tunneled packet * @encap_limit: encapsulation limit * @pmtu: Path MTU is stored if packet is too big + * @proto: next header value * * Description: * Build new header and do some sanity checks on the packet before sending @@ -936,12 +998,9 @@ EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); * %-EMSGSIZE message too big. return mtu in this case. **/ -static int ip6_tnl_xmit2(struct sk_buff *skb, - struct net_device *dev, - __u8 dsfield, - struct flowi6 *fl6, - int encap_limit, - __u32 *pmtu) +int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, + struct flowi6 *fl6, int encap_limit, __u32 *pmtu, + __u8 proto) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = t->net; @@ -952,7 +1011,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, struct net_device *tdev; int mtu; unsigned int max_headroom = sizeof(struct ipv6hdr); - u8 proto; int err = -1; /* NBMA tunnel */ @@ -1014,12 +1072,23 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, mtu = IPV6_MIN_MTU; if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; } + if (t->err_count > 0) { + if (time_before(jiffies, + t->err_time + IP6TUNNEL_ERR_TIMEO)) { + t->err_count--; + + dst_link_failure(skb); + } else { + t->err_count = 0; + } + } + skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); /* @@ -1047,7 +1116,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb->transport_header = skb->network_header; - proto = fl6->flowi6_proto; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); @@ -1058,6 +1126,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb->encapsulation = 1; } + max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) + + dst->header_len; + if (max_headroom > dev->needed_headroom) + dev->needed_headroom = max_headroom; + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1076,6 +1149,7 @@ tx_err_dst_release: dst_release(dst); return err; } +EXPORT_SYMBOL(ip6_tnl_xmit); static inline int ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) @@ -1099,7 +1173,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); @@ -1109,7 +1182,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; - err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + IPPROTO_IPIP); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) @@ -1153,7 +1227,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) @@ -1163,7 +1236,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; - err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + IPPROTO_IPV6); if (err != 0) { if (err == -EMSGSIZE) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -1174,7 +1248,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } static netdev_tx_t -ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) +ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; @@ -1370,6 +1444,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + memset(&p1, 0, sizeof(p1)); + switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { @@ -1464,8 +1540,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * %-EINVAL if mtu too small **/ -static int -ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) +int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); @@ -1481,6 +1556,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL(ip6_tnl_change_mtu); int ip6_tnl_get_iflink(const struct net_device *dev) { @@ -1493,7 +1569,7 @@ EXPORT_SYMBOL(ip6_tnl_get_iflink); static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, - .ndo_start_xmit = ip6_tnl_xmit, + .ndo_start_xmit = ip6_tnl_start_xmit, .ndo_do_ioctl = ip6_tnl_ioctl, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats = ip6_get_stats, @@ -1549,13 +1625,25 @@ ip6_tnl_dev_init_gen(struct net_device *dev) return -ENOMEM; ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); - if (ret) { - free_percpu(dev->tstats); - dev->tstats = NULL; - return ret; - } + if (ret) + goto free_stats; + + ret = gro_cells_init(&t->gro_cells, dev); + if (ret) + goto destroy_dst; + + t->hlen = 0; + t->tun_hlen = 0; return 0; + +destroy_dst: + dst_cache_destroy(&t->dst_cache); +free_stats: + free_percpu(dev->tstats); + dev->tstats = NULL; + + return ret; } /** diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index a10e77103c88..f2e2013f8346 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1984,10 +1984,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTFORWDATAGRAMS); - IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTOCTETS, skb->len); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTFORWDATAGRAMS); + __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTOCTETS, skb->len); return dst_output(net, sk, skb); } @@ -2268,7 +2268,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; - if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0) + if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0) return -EMSGSIZE; rtm->rtm_type = RTN_MULTICAST; @@ -2411,7 +2411,7 @@ static int mr6_msgsize(bool unresolved, int maxvif) + nla_total_size(0) /* RTA_MULTIPATH */ + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) /* RTA_MFC_STATS */ - + nla_total_size(sizeof(struct rta_mfc_stats)) + + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) ; return len; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 86b67b70b626..73e606c719ef 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -455,6 +455,18 @@ ip6t_do_table(struct sk_buff *skb, #endif } +static bool find_jump_target(const struct xt_table_info *t, + const struct ip6t_entry *target) +{ + struct ip6t_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int @@ -532,6 +544,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ip6t_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -540,19 +554,18 @@ mark_source_chains(const struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { - if (newpos > newinfo->size - - sizeof(struct ip6t_entry)) { - duprintf("mark_source_chains: " - "bad verdict (%i)\n", - newpos); - return 0; - } /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct ip6t_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ip6t_entry *) (entry0 + newpos); @@ -579,25 +592,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) module_put(par.match->me); } -static int -check_entry(const struct ip6t_entry *e) -{ - const struct xt_entry_target *t; - - if (!ip6_checkentry(&e->ipv6)) - return -EINVAL; - - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ip6t_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ip6t_ip6 *ipv6 = par->entryinfo; @@ -762,7 +756,11 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return -EINVAL; } - err = check_entry(e); + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1321,55 +1319,16 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ip6t_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, AF_INET6, name); + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); + t = xt_find_table_lock(net, AF_INET6, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1377,7 +1336,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1456,7 +1415,6 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ip6t_ip6 *ipv6, int *size) { @@ -1491,17 +1449,14 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || @@ -1518,8 +1473,11 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e); + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, + e->target_offset, e->next_offset); if (ret) return ret; @@ -1527,7 +1485,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, &e->ipv6, &off); + ret = compat_find_calc_match(ematch, &e->ipv6, &off); if (ret != 0) goto release_matches; ++j; @@ -1550,17 +1508,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1574,18 +1521,17 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct ip6t_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ip6t_entry *)*dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); @@ -1594,11 +1540,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, *dstptr += sizeof(struct ip6t_entry); *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ip6t_get_target(e); xt_compat_target_from_user(t, dstptr, size); @@ -1610,183 +1554,83 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; -} - -static int compat_check_entry(struct ip6t_entry *e, struct net *net, - const char *name) -{ - unsigned int j; - int ret = 0; - struct xt_mtchk_param mtpar; - struct xt_entry_match *ematch; - - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) - return -ENOMEM; - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ipv6; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV6; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - - xt_percpu_counter_free(e->counters.pcnt); - - return ret; } static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ip6t_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ip6t_entry *iter0; - struct ip6t_entry *iter1; + struct ip6t_replace repl; unsigned int size; int ret = 0; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; - - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } + size = compatr->size; + info->number = compatr->num_entries; duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET6); - xt_compat_init_offsets(AF_INET6, number); + xt_compat_init_offsets(AF_INET6, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone. */ xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); - if (ret != 0) - break; - ++i; - if (strcmp(ip6t_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1794,17 +1638,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET6); + xt_compat_unlock(AF_INET6); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET6); - xt_compat_unlock(AF_INET6); - goto out; } static int @@ -1820,8 +1663,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -EFAULT; /* overflow check */ - if (tmp.size >= INT_MAX / num_possible_cpus()) - return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) @@ -1840,10 +1681,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index e2ea31175ef9..2160d5d009cb 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -145,12 +145,12 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, if (!dev) goto out_rcu_unlock; - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); if (inet_frag_evicting(&fq->q)) goto out_rcu_unlock; - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); /* Don't send error if the first segment did not arrive. */ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments) @@ -223,8 +223,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - skb_network_header(skb))); @@ -258,8 +258,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); return -1; @@ -361,8 +361,8 @@ found: discard_fq: inet_frag_kill(&fq->q, &ip6_frags); err: - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_REASMFAILS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; } @@ -500,7 +500,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, skb_network_header_len(head)); rcu_read_lock(); - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; fq->q.fragments_tail = NULL; @@ -513,7 +513,7 @@ out_oom: net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); out_fail: rcu_read_lock(); - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); return -1; } @@ -528,7 +528,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) goto fail_hdr; - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ if (hdr->payload_len == 0) @@ -544,8 +544,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) if (!(fhdr->frag_off & htons(0xFFF9))) { /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); - IP6_INC_STATS_BH(net, - ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); + __IP6_INC_STATS(net, + ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; @@ -566,13 +566,13 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return ret; } - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; fail_hdr: - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); return -1; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ed446639219c..af46e19205f5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -338,9 +338,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, return rt; } -static struct rt6_info *ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags) +struct rt6_info *ip6_dst_alloc(struct net *net, + struct net_device *dev, + int flags) { struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); @@ -364,6 +364,7 @@ static struct rt6_info *ip6_dst_alloc(struct net *net, return rt; } +EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { @@ -1417,8 +1418,20 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { + struct dst_entry *dst; + ip6_update_pmtu(skb, sock_net(sk), mtu, sk->sk_bound_dev_if, sk->sk_mark); + + dst = __sk_dst_get(sk); + if (!dst || !dst->obsolete || + dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) + return; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + ip6_datagram_dst_update(sk, false); + bh_unlock_sock(sk); } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); @@ -1756,6 +1769,37 @@ static int ip6_convert_metrics(struct mx6_config *mxc, return -EINVAL; } +static struct rt6_info *ip6_nh_lookup_table(struct net *net, + struct fib6_config *cfg, + const struct in6_addr *gw_addr) +{ + struct flowi6 fl6 = { + .flowi6_oif = cfg->fc_ifindex, + .daddr = *gw_addr, + .saddr = cfg->fc_prefsrc, + }; + struct fib6_table *table; + struct rt6_info *rt; + int flags = 0; + + table = fib6_get_table(net, cfg->fc_table); + if (!table) + return NULL; + + if (!ipv6_addr_any(&cfg->fc_prefsrc)) + flags |= RT6_LOOKUP_F_HAS_SADDR; + + rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags); + + /* if table lookup failed, fall back to full lookup */ + if (rt == net->ipv6.ip6_null_entry) { + ip6_rt_put(rt); + rt = NULL; + } + + return rt; +} + static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) { struct net *net = cfg->fc_nlinfo.nl_net; @@ -1927,7 +1971,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) rt->rt6i_gateway = *gw_addr; if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { - struct rt6_info *grt; + struct rt6_info *grt = NULL; /* IPv6 strictly inhibits using not link-local addresses as nexthop address. @@ -1939,7 +1983,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) if (!(gwa_type & IPV6_ADDR_UNICAST)) goto out; - grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); + if (cfg->fc_table) + grt = ip6_nh_lookup_table(net, cfg, gw_addr); + + if (!grt) + grt = rt6_lookup(net, gw_addr, NULL, + cfg->fc_ifindex, 1); err = -EHOSTUNREACH; if (!grt) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 83384308d032..a13d8c114ccb 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -913,10 +913,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - skb = iptunnel_handle_offloads(skb, SKB_GSO_SIT); - if (IS_ERR(skb)) { + if (iptunnel_handle_offloads(skb, SKB_GSO_SIT)) { ip_rt_put(rt); - goto out; + goto tx_error; } if (df) { @@ -992,7 +991,6 @@ tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); -out: dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -1002,15 +1000,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP); - if (IS_ERR(skb)) - goto out; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); return NETDEV_TX_OK; -out: +tx_error: + kfree_skb(skb); dev->stats.tx_errors++; return NETDEV_TX_OK; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index aab91fa86c5e..59c483937aec 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -155,11 +155,11 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); if (mss == 0) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0e621bc1ae11..7bdc9c9c231b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -336,8 +336,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb->dev->ifindex); if (!sk) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -352,13 +352,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) { - NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto out; } @@ -368,7 +368,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -439,7 +439,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req) + enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -452,7 +452,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, IPPROTO_TCP)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, foc, attach_req); + skb = tcp_make_synack(sk, dst, req, foc, synack_type); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, @@ -649,12 +649,12 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk, return false; if (hash_expected && !hash_location) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return true; } if (!hash_expected && hash_location) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return true; } @@ -825,9 +825,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (!IS_ERR(dst)) { skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); - TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) - TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); + TCP_INC_STATS(net, TCP_MIB_OUTRSTS); return; } @@ -1165,7 +1165,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * return newsk; out_overflow: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: dst_release(dst); out: @@ -1276,8 +1276,8 @@ discard: kfree_skb(skb); return 0; csum_err: - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; @@ -1359,7 +1359,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) /* * Count it even if it's bad. */ - TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); + __TCP_INC_STATS(net, TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; @@ -1421,7 +1421,7 @@ process: } } if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { - NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto discard_and_relse; } @@ -1454,7 +1454,7 @@ process: } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf + sk->sk_sndbuf))) { bh_unlock_sock(sk); - NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; } bh_unlock_sock(sk); @@ -1472,9 +1472,9 @@ no_tcp_socket: if (tcp_checksum_complete(skb)) { csum_error: - TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); + __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: - TCP_INC_STATS_BH(net, TCP_MIB_INERRS); + __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { tcp_v6_send_reset(NULL, skb); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a050b70b9101..f911c63f79e6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -423,24 +423,22 @@ try_again: if (!peeked) { atomic_inc(&sk->sk_drops); if (is_udp4) - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_INERRORS, - is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, + is_udplite); else - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INERRORS, - is_udplite); + UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, + is_udplite); } skb_free_datagram_locked(sk, skb); return err; } if (!peeked) { if (is_udp4) - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, + is_udplite); else - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, + is_udplite); } sock_recv_ts_and_drops(msg, sk, skb); @@ -487,15 +485,15 @@ csum_copy_err: slow = lock_sock_fast(sk); if (!skb_kill_datagram(sk, skb, flags)) { if (is_udp4) { - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_CSUMERRORS, is_udplite); - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_CSUMERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); } else { - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_CSUMERRORS, is_udplite); - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_CSUMERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); } } unlock_sock_fast(sk, slow); @@ -523,8 +521,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), udptable, skb); if (!sk) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -572,9 +570,9 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -630,9 +628,9 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); + __UDP_INC_STATS(sock_net(sk), + UDP_MIB_INDATAGRAMS, + is_udplite); return -ret; } } @@ -666,8 +664,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); + __UDP6_INC_STATS(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); goto drop; } @@ -686,9 +684,9 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return rc; csum_error: - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); + __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); kfree_skb(skb); return -1; @@ -771,10 +769,10 @@ start_lookup: nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) { atomic_inc(&sk->sk_drops); - UDP6_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, - IS_UDPLITE(sk)); + __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS, + IS_UDPLITE(sk)); + __UDP6_INC_STATS(net, UDP_MIB_INERRORS, + IS_UDPLITE(sk)); continue; } @@ -793,8 +791,8 @@ start_lookup: consume_skb(skb); } else { kfree_skb(skb); - UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); } return 0; } @@ -887,7 +885,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp_lib_checksum_complete(skb)) goto csum_error; - UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); kfree_skb(skb); @@ -901,9 +899,9 @@ short_packet: daddr, ntohs(uh->dest)); goto discard; csum_error: - UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); discard: - UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -1015,13 +1013,14 @@ send: err = ip6_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; } - } else - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_OUTDATAGRAMS, is_udplite); + } else { + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); + } return err; } @@ -1342,8 +1341,8 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -1490,6 +1489,7 @@ struct proto udpv6_prot = { .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, .backlog_rcv = __udpv6_queue_rcv_skb, + .release_cb = ip6_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 2caaa84ce92d..1d02e8d20e56 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -346,22 +346,30 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (nest == NULL) goto nla_put_failure; - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, - atomic_long_read(&tunnel->stats.tx_packets)) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, - atomic_long_read(&tunnel->stats.tx_bytes)) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, - atomic_long_read(&tunnel->stats.tx_errors)) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, - atomic_long_read(&tunnel->stats.rx_packets)) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, - atomic_long_read(&tunnel->stats.rx_bytes)) || - nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - atomic_long_read(&tunnel->stats.rx_seq_discards)) || - nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - atomic_long_read(&tunnel->stats.rx_oos_packets)) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, - atomic_long_read(&tunnel->stats.rx_errors))) + if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&tunnel->stats.tx_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&tunnel->stats.tx_bytes), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&tunnel->stats.tx_errors), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&tunnel->stats.rx_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&tunnel->stats.rx_bytes), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_SEQ_DISCARDS, + atomic_long_read(&tunnel->stats.rx_seq_discards), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_OOS_PACKETS, + atomic_long_read(&tunnel->stats.rx_oos_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&tunnel->stats.rx_errors), + L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -746,29 +754,38 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl nla_put_u8(skb, L2TP_ATTR_USING_IPSEC, 1)) || #endif (session->reorder_timeout && - nla_put_msecs(skb, L2TP_ATTR_RECV_TIMEOUT, session->reorder_timeout))) + nla_put_msecs(skb, L2TP_ATTR_RECV_TIMEOUT, + session->reorder_timeout, L2TP_ATTR_PAD))) goto nla_put_failure; nest = nla_nest_start(skb, L2TP_ATTR_STATS); if (nest == NULL) goto nla_put_failure; - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, - atomic_long_read(&session->stats.tx_packets)) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, - atomic_long_read(&session->stats.tx_bytes)) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, - atomic_long_read(&session->stats.tx_errors)) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, - atomic_long_read(&session->stats.rx_packets)) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, - atomic_long_read(&session->stats.rx_bytes)) || - nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - atomic_long_read(&session->stats.rx_seq_discards)) || - nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - atomic_long_read(&session->stats.rx_oos_packets)) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, - atomic_long_read(&session->stats.rx_errors))) + if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&session->stats.tx_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&session->stats.tx_bytes), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&session->stats.tx_errors), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&session->stats.rx_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&session->stats.rx_bytes), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_SEQ_DISCARDS, + atomic_long_read(&session->stats.rx_seq_discards), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_OOS_PACKETS, + atomic_long_read(&session->stats.rx_oos_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&session->stats.rx_errors), + L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 0183b32da942..bbcf60465e5c 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -31,6 +31,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TCP_ECN))) goto out; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 404b2a4f4b5b..f35ebc02fa5c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2875,8 +2875,10 @@ static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) || nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) || nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) || - nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, + IPVS_STATS_ATTR_PAD) || nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) || nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) || nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) || @@ -2900,16 +2902,26 @@ static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, if (!nl_stats) return -EMSGSIZE; - if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) || - nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) || - nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) || - nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) || - nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) || - nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps)) + if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps, + IPVS_STATS_ATTR_PAD)) goto nla_put_failure; nla_nest_end(skb, nl_stats); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index dc196a0f501d..6d19d2eeaa60 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -1013,8 +1013,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af)); - if (IS_ERR(skb)) + if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af))) goto tx_error; skb->transport_header = skb->network_header; @@ -1105,8 +1104,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af)); - if (IS_ERR(skb)) + if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af))) goto tx_error; skb->transport_header = skb->network_header; diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 3ce5c314ea4b..252e6a7cd2f1 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -16,28 +16,11 @@ static spinlock_t nf_connlabels_lock; -static unsigned int label_bits(const struct nf_conn_labels *l) -{ - unsigned int longs = l->words; - return longs * BITS_PER_LONG; -} - -bool nf_connlabel_match(const struct nf_conn *ct, u16 bit) -{ - struct nf_conn_labels *labels = nf_ct_labels_find(ct); - - if (!labels) - return false; - - return bit < label_bits(labels) && test_bit(bit, labels->bits); -} -EXPORT_SYMBOL_GPL(nf_connlabel_match); - int nf_connlabel_set(struct nf_conn *ct, u16 bit) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); - if (!labels || bit >= label_bits(labels)) + if (!labels || BIT_WORD(bit) >= labels->words) return -ENOSPC; if (test_bit(bit, labels->bits)) @@ -50,14 +33,18 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) } EXPORT_SYMBOL_GPL(nf_connlabel_set); -static void replace_u32(u32 *address, u32 mask, u32 new) +static int replace_u32(u32 *address, u32 mask, u32 new) { u32 old, tmp; do { old = *address; tmp = (old & mask) ^ new; + if (old == tmp) + return 0; } while (cmpxchg(address, old, tmp) != old); + + return 1; } int nf_connlabels_replace(struct nf_conn *ct, @@ -66,6 +53,7 @@ int nf_connlabels_replace(struct nf_conn *ct, { struct nf_conn_labels *labels; unsigned int size, i; + int changed = 0; u32 *dst; labels = nf_ct_labels_find(ct); @@ -77,29 +65,27 @@ int nf_connlabels_replace(struct nf_conn *ct, words32 = size / sizeof(u32); dst = (u32 *) labels->bits; - if (words32) { - for (i = 0; i < words32; i++) - replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); - } + for (i = 0; i < words32; i++) + changed |= replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); size /= sizeof(u32); for (i = words32; i < size; i++) /* pad */ replace_u32(&dst[i], 0, 0); - nf_conntrack_event_cache(IPCT_LABEL, ct); + if (changed) + nf_conntrack_event_cache(IPCT_LABEL, ct); return 0; } EXPORT_SYMBOL_GPL(nf_connlabels_replace); -int nf_connlabels_get(struct net *net, unsigned int n_bits) +int nf_connlabels_get(struct net *net, unsigned int bits) { size_t words; - if (n_bits > (NF_CT_LABELS_MAX_SIZE * BITS_PER_BYTE)) + words = BIT_WORD(bits) + 1; + if (words > NF_CT_LABELS_MAX_SIZE / sizeof(long)) return -ERANGE; - words = BITS_TO_LONGS(n_bits); - spin_lock(&nf_connlabels_lock); net->ct.labels_used++; if (words > net->ct.label_words) @@ -128,6 +114,8 @@ static struct nf_ct_ext_type labels_extend __read_mostly = { int nf_conntrack_labels_init(void) { + BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX); + spin_lock_init(&nf_connlabels_lock); return nf_ct_extend_register(&labels_extend); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 355e8552fd5b..294a8e28cec4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -58,10 +58,9 @@ MODULE_LICENSE("GPL"); static char __initdata version[] = "0.93"; -static inline int -ctnetlink_dump_tuples_proto(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l4proto *l4proto) +static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nlattr *nest_parms; @@ -83,10 +82,9 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_tuples_ip(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l3proto *l3proto) +static int ctnetlink_dump_tuples_ip(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_l3proto *l3proto) { int ret = 0; struct nlattr *nest_parms; @@ -106,9 +104,8 @@ nla_put_failure: return -1; } -static int -ctnetlink_dump_tuples(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple) +static int ctnetlink_dump_tuples(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple) { int ret; struct nf_conntrack_l3proto *l3proto; @@ -127,9 +124,8 @@ ctnetlink_dump_tuples(struct sk_buff *skb, return ret; } -static inline int -ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype, - const struct nf_conntrack_zone *zone, int dir) +static int ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype, + const struct nf_conntrack_zone *zone, int dir) { if (zone->id == NF_CT_DEFAULT_ZONE_ID || zone->dir != dir) return 0; @@ -141,8 +137,7 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_STATUS, htonl(ct->status))) goto nla_put_failure; @@ -152,8 +147,7 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) { long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ; @@ -168,8 +162,7 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) +static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) { struct nf_conntrack_l4proto *l4proto; struct nlattr *nest_proto; @@ -193,8 +186,8 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_helpinfo(struct sk_buff *skb, + const struct nf_conn *ct) { struct nlattr *nest_helper; const struct nf_conn_help *help = nfct_help(ct); @@ -245,8 +238,10 @@ dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct, if (!nest_count) goto nla_put_failure; - if (nla_put_be64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts)) || - nla_put_be64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes))) + if (nla_put_be64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts), + CTA_COUNTERS_PAD) || + nla_put_be64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes), + CTA_COUNTERS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest_count); @@ -287,9 +282,11 @@ ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) if (!nest_count) goto nla_put_failure; - if (nla_put_be64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start)) || + if (nla_put_be64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start), + CTA_TIMESTAMP_PAD) || (tstamp->stop != 0 && nla_put_be64(skb, CTA_TIMESTAMP_STOP, - cpu_to_be64(tstamp->stop)))) + cpu_to_be64(tstamp->stop), + CTA_TIMESTAMP_PAD))) goto nla_put_failure; nla_nest_end(skb, nest_count); @@ -300,8 +297,7 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_MARK -static inline int -ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark))) goto nla_put_failure; @@ -315,8 +311,7 @@ nla_put_failure: #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK -static inline int -ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_secctx; int len, ret; @@ -345,7 +340,7 @@ nla_put_failure: #endif #ifdef CONFIG_NF_CONNTRACK_LABELS -static int ctnetlink_label_size(const struct nf_conn *ct) +static inline int ctnetlink_label_size(const struct nf_conn *ct) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); @@ -380,8 +375,7 @@ ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) -static inline int -ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_parms; @@ -426,8 +420,8 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, + const struct nf_conn *ct) { struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); struct nf_ct_seqadj *seq; @@ -446,8 +440,7 @@ ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) return 0; } -static inline int -ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) goto nla_put_failure; @@ -457,8 +450,7 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use)))) goto nla_put_failure; @@ -538,8 +530,7 @@ nla_put_failure: return -1; } -static inline size_t -ctnetlink_proto_size(const struct nf_conn *ct) +static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) { struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -556,19 +547,17 @@ ctnetlink_proto_size(const struct nf_conn *ct) return len; } -static inline size_t -ctnetlink_acct_size(const struct nf_conn *ct) +static inline size_t ctnetlink_acct_size(const struct nf_conn *ct) { if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT)) return 0; return 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */ - + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */ - + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */ + + 2 * nla_total_size_64bit(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */ + + 2 * nla_total_size_64bit(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */ ; } -static inline int -ctnetlink_secctx_size(const struct nf_conn *ct) +static inline int ctnetlink_secctx_size(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_SECMARK int len, ret; @@ -584,20 +573,19 @@ ctnetlink_secctx_size(const struct nf_conn *ct) #endif } -static inline size_t -ctnetlink_timestamp_size(const struct nf_conn *ct) +static inline size_t ctnetlink_timestamp_size(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP)) return 0; - return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t)); + return nla_total_size(0) + 2 * nla_total_size_64bit(sizeof(uint64_t)); #else return 0; #endif } -static inline size_t -ctnetlink_nlmsg_size(const struct nf_conn *ct) +#ifdef CONFIG_NF_CONNTRACK_EVENTS +static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) { return NLMSG_ALIGN(sizeof(struct nfgenmsg)) + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ @@ -628,7 +616,6 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) ; } -#ifdef CONFIG_NF_CONNTRACK_EVENTS static int ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) { @@ -891,8 +878,8 @@ out: return skb->len; } -static inline int -ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) +static int ctnetlink_parse_tuple_ip(struct nlattr *attr, + struct nf_conntrack_tuple *tuple) { struct nlattr *tb[CTA_IP_MAX+1]; struct nf_conntrack_l3proto *l3proto; @@ -921,9 +908,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = { [CTA_PROTO_NUM] = { .type = NLA_U8 }, }; -static inline int -ctnetlink_parse_tuple_proto(struct nlattr *attr, - struct nf_conntrack_tuple *tuple) +static int ctnetlink_parse_tuple_proto(struct nlattr *attr, + struct nf_conntrack_tuple *tuple) { struct nlattr *tb[CTA_PROTO_MAX+1]; struct nf_conntrack_l4proto *l4proto; @@ -1050,9 +1036,8 @@ static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { .len = NF_CT_HELPER_NAME_LEN - 1 }, }; -static inline int -ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, - struct nlattr **helpinfo) +static int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, + struct nlattr **helpinfo) { int err; struct nlattr *tb[CTA_HELP_MAX+1]; @@ -1463,8 +1448,8 @@ ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) #endif } -static inline int -ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) +static int ctnetlink_change_helper(struct nf_conn *ct, + const struct nlattr * const cda[]) { struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); @@ -1524,8 +1509,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) return -EOPNOTSUPP; } -static inline int -ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[]) +static int ctnetlink_change_timeout(struct nf_conn *ct, + const struct nlattr * const cda[]) { u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); @@ -1544,8 +1529,8 @@ static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = { [CTA_PROTOINFO_SCTP] = { .type = NLA_NESTED }, }; -static inline int -ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]) +static int ctnetlink_change_protoinfo(struct nf_conn *ct, + const struct nlattr * const cda[]) { const struct nlattr *attr = cda[CTA_PROTOINFO]; struct nlattr *tb[CTA_PROTOINFO_MAX+1]; @@ -1571,8 +1556,8 @@ static const struct nla_policy seqadj_policy[CTA_SEQADJ_MAX+1] = { [CTA_SEQADJ_OFFSET_AFTER] = { .type = NLA_U32 }, }; -static inline int -change_seq_adj(struct nf_ct_seqadj *seq, const struct nlattr * const attr) +static int change_seq_adj(struct nf_ct_seqadj *seq, + const struct nlattr * const attr) { int err; struct nlattr *cda[CTA_SEQADJ_MAX+1]; @@ -2405,10 +2390,9 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = { * EXPECT ***********************************************************************/ -static inline int -ctnetlink_exp_dump_tuple(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple, - enum ctattr_expect type) +static int ctnetlink_exp_dump_tuple(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple, + enum ctattr_expect type) { struct nlattr *nest_parms; @@ -2425,10 +2409,9 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_exp_dump_mask(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple_mask *mask) +static int ctnetlink_exp_dump_mask(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple_mask *mask) { int ret; struct nf_conntrack_l3proto *l3proto; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index fce1b1cca32d..399a38fd685a 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -645,7 +645,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE, ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) || nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, - cpu_to_be64(ct->proto.dccp.handshake_seq))) + cpu_to_be64(ct->proto.dccp.handshake_seq), + CTA_PROTOINFO_DCCP_PAD)) goto nla_put_failure; nla_nest_end(skb, nest_parms); spin_unlock_bh(&ct->lock); @@ -660,6 +661,7 @@ static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 }, + [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC }, }; static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 9578a7c371ef..1d7ab960a9e6 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -191,13 +191,7 @@ static void sctp_print_tuple(struct seq_file *s, /* Print out the private part of the conntrack. */ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { - enum sctp_conntrack state; - - spin_lock_bh(&ct->lock); - state = ct->proto.sctp.state; - spin_unlock_bh(&ct->lock); - - seq_printf(s, "%s ", sctp_conntrack_names[state]); + seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]); } #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 278f3b9356ef..70c8381641a7 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -313,13 +313,7 @@ static void tcp_print_tuple(struct seq_file *s, /* Print out the private part of the conntrack. */ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { - enum tcp_conntrack state; - - spin_lock_bh(&ct->lock); - state = ct->proto.tcp.state; - spin_unlock_bh(&ct->lock); - - seq_printf(s, "%s ", tcp_conntrack_names[state]); + seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); } static unsigned int get_conntrack_index(const struct tcphdr *tcph) @@ -410,6 +404,8 @@ static void tcp_options(const struct sk_buff *skb, length--; continue; default: + if (length < 2) + return; opsize=*ptr++; if (opsize < 2) /* "silly options" */ return; @@ -470,6 +466,8 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, length--; continue; default: + if (length < 2) + return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2011977cd79d..7a85a9dd37ad 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -944,8 +944,10 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) if (nest == NULL) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts)) || - nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes))) + if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts), + NFTA_COUNTER_PAD) || + nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), + NFTA_COUNTER_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -975,7 +977,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle))) + if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle), + NFTA_CHAIN_PAD)) goto nla_put_failure; if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name)) goto nla_put_failure; @@ -1803,13 +1806,15 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name)) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle))) + if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle), + NFTA_RULE_PAD)) goto nla_put_failure; if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { prule = list_entry(rule->list.prev, struct nft_rule, list); if (nla_put_be64(skb, NFTA_RULE_POSITION, - cpu_to_be64(prule->handle))) + cpu_to_be64(prule->handle), + NFTA_RULE_PAD)) goto nla_put_failure; } @@ -2473,7 +2478,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, } if (set->timeout && - nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout))) + nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout), + NFTA_SET_PAD)) goto nla_put_failure; if (set->gc_int && nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int))) @@ -3076,7 +3082,8 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, - cpu_to_be64(*nft_set_ext_timeout(ext)))) + cpu_to_be64(*nft_set_ext_timeout(ext)), + NFTA_SET_ELEM_PAD)) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { @@ -3089,7 +3096,8 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, expires = 0; if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION, - cpu_to_be64(jiffies_to_msecs(expires)))) + cpu_to_be64(jiffies_to_msecs(expires)), + NFTA_SET_ELEM_PAD)) goto nla_put_failure; } diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index e9e959f65d91..39eb1cc62e91 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -156,7 +156,8 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb, return 0; return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE, - cpu_to_be64(info->rule->handle)); + cpu_to_be64(info->rule->handle), + NFTA_TRACE_PAD); } void nft_trace_notify(struct nft_traceinfo *info) @@ -174,7 +175,7 @@ void nft_trace_notify(struct nft_traceinfo *info) size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(NFT_TABLE_MAXNAMELEN) + nla_total_size(NFT_CHAIN_MAXNAMELEN) + - nla_total_size(sizeof(__be64)) + /* rule handle */ + nla_total_size_64bit(sizeof(__be64)) + /* rule handle */ nla_total_size(sizeof(__be32)) + /* trace type */ nla_total_size(0) + /* VERDICT, nested */ nla_total_size(sizeof(u32)) + /* verdict code */ diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 4c2b4c0c4d5f..d016066a25e3 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -160,15 +160,18 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, pkts = atomic64_read(&acct->pkts); bytes = atomic64_read(&acct->bytes); } - if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts)) || - nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) || + if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts), + NFACCT_PAD) || + nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes), + NFACCT_PAD) || nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) goto nla_put_failure; if (acct->flags & NFACCT_F_QUOTA) { u64 *quota = (u64 *)acct->data; if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) || - nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota))) + nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota), + NFACCT_PAD)) goto nla_put_failure; } nlmsg_end(skb, nlh); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index c9743f78f219..77db8358ab14 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -76,8 +76,10 @@ static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) nft_counter_fetch(priv->counter, &total); - if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) || - nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets))) + if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), + NFTA_COUNTER_PAD) || + nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets), + NFTA_COUNTER_PAD)) goto nla_put_failure; return 0; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d4a4619fcebc..25998facefd0 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -484,6 +484,8 @@ static struct nft_expr_type nft_ct_type __read_mostly = { static int __init nft_ct_module_init(void) { + BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE); + return nft_register_expr(&nft_ct_type); } diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 9dec3bd1b63c..78d4914fb39c 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -227,7 +227,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout))) + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout), + NFTA_DYNSET_PAD)) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) goto nla_put_failure; diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 99d18578afc6..070b98938e02 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -97,8 +97,10 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); u64 rate = limit->rate - limit->burst; - if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) || - nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) || + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate), + NFTA_LIMIT_PAD) || + nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs), + NFTA_LIMIT_PAD) || nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) || nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) || nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags))) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 582c9cfd6567..c69c892231d7 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -416,6 +416,47 @@ int xt_check_match(struct xt_mtchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_match); +/** xt_check_entry_match - check that matches end before start of target + * + * @match: beginning of xt_entry_match + * @target: beginning of this rules target (alleged end of matches) + * @alignment: alignment requirement of match structures + * + * Validates that all matches add up to the beginning of the target, + * and that each match covers at least the base structure size. + * + * Return: 0 on success, negative errno on failure. + */ +static int xt_check_entry_match(const char *match, const char *target, + const size_t alignment) +{ + const struct xt_entry_match *pos; + int length = target - match; + + if (length == 0) /* no matches */ + return 0; + + pos = (struct xt_entry_match *)match; + do { + if ((unsigned long)pos % alignment) + return -EINVAL; + + if (length < (int)sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size < sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size > length) + return -EINVAL; + + length -= pos->u.match_size; + pos = ((void *)((char *)(pos) + (pos)->u.match_size)); + } while (length > 0); + + return 0; +} + #ifdef CONFIG_COMPAT int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { @@ -485,13 +526,14 @@ int xt_compat_match_offset(const struct xt_match *match) } EXPORT_SYMBOL_GPL(xt_compat_match_offset); -int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, - unsigned int *size) +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, + unsigned int *size) { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; int pad, off = xt_compat_match_offset(match); u_int16_t msize = cm->u.user.match_size; + char name[sizeof(m->u.user.name)]; m = *dstptr; memcpy(m, cm, sizeof(*cm)); @@ -505,10 +547,12 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, msize += off; m->u.user.match_size = msize; + strlcpy(name, match->name, sizeof(name)); + module_put(match->me); + strncpy(m->u.user.name, name, sizeof(m->u.user.name)); *size += off; *dstptr += msize; - return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); @@ -539,8 +583,125 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_to_user); + +/* non-compat version may have padding after verdict */ +struct compat_xt_standard_target { + struct compat_xt_entry_target t; + compat_uint_t verdict; +}; + +int xt_compat_check_entry_offsets(const void *base, const char *elems, + unsigned int target_offset, + unsigned int next_offset) +{ + long size_of_base_struct = elems - (const char *)base; + const struct compat_xt_entry_target *t; + const char *e = base; + + if (target_offset < size_of_base_struct) + return -EINVAL; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + target_offset + sizeof(struct compat_xt_standard_target) != next_offset) + return -EINVAL; + + /* compat_xt_entry match has less strict aligment requirements, + * otherwise they are identical. In case of padding differences + * we need to add compat version of xt_check_entry_match. + */ + BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match)); + + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct compat_xt_entry_match)); +} +EXPORT_SYMBOL(xt_compat_check_entry_offsets); #endif /* CONFIG_COMPAT */ +/** + * xt_check_entry_offsets - validate arp/ip/ip6t_entry + * + * @base: pointer to arp/ip/ip6t_entry + * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems + * @target_offset: the arp/ip/ip6_t->target_offset + * @next_offset: the arp/ip/ip6_t->next_offset + * + * validates that target_offset and next_offset are sane and that all + * match sizes (if any) align with the target offset. + * + * This function does not validate the targets or matches themselves, it + * only tests that all the offsets and sizes are correct, that all + * match structures are aligned, and that the last structure ends where + * the target structure begins. + * + * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. + * + * The arp/ip/ip6t_entry structure @base must have passed following tests: + * - it must point to a valid memory location + * - base to base + next_offset must be accessible, i.e. not exceed allocated + * length. + * + * A well-formed entry looks like this: + * + * ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry + * e->elems[]-----' | | + * matchsize | | + * matchsize | | + * | | + * target_offset---------------------------------' | + * next_offset---------------------------------------------------' + * + * elems[]: flexible array member at end of ip(6)/arpt_entry struct. + * This is where matches (if any) and the target reside. + * target_offset: beginning of target. + * next_offset: start of the next rule; also: size of this rule. + * Since targets have a minimum size, target_offset + minlen <= next_offset. + * + * Every match stores its size, sum of sizes must not exceed target_offset. + * + * Return: 0 on success, negative errno on failure. + */ +int xt_check_entry_offsets(const void *base, + const char *elems, + unsigned int target_offset, + unsigned int next_offset) +{ + long size_of_base_struct = elems - (const char *)base; + const struct xt_entry_target *t; + const char *e = base; + + /* target start is within the ip/ip6/arpt_entry struct */ + if (target_offset < size_of_base_struct) + return -EINVAL; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + target_offset + sizeof(struct xt_standard_target) != next_offset) + return -EINVAL; + + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct xt_entry_match)); +} +EXPORT_SYMBOL(xt_check_entry_offsets); + int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { @@ -591,6 +752,80 @@ int xt_check_target(struct xt_tgchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_target); +/** + * xt_copy_counters_from_user - copy counters and metadata from userspace + * + * @user: src pointer to userspace memory + * @len: alleged size of userspace memory + * @info: where to store the xt_counters_info metadata + * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel + * + * Copies counter meta data from @user and stores it in @info. + * + * vmallocs memory to hold the counters, then copies the counter data + * from @user to the new memory and returns a pointer to it. + * + * If @compat is true, @info gets converted automatically to the 64bit + * representation. + * + * The metadata associated with the counters is stored in @info. + * + * Return: returns pointer that caller has to test via IS_ERR(). + * If IS_ERR is false, caller has to vfree the pointer. + */ +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat) +{ + void *mem; + u64 size; + +#ifdef CONFIG_COMPAT + if (compat) { + /* structures only differ in size due to alignment */ + struct compat_xt_counters_info compat_tmp; + + if (len <= sizeof(compat_tmp)) + return ERR_PTR(-EINVAL); + + len -= sizeof(compat_tmp); + if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) + return ERR_PTR(-EFAULT); + + strlcpy(info->name, compat_tmp.name, sizeof(info->name)); + info->num_counters = compat_tmp.num_counters; + user += sizeof(compat_tmp); + } else +#endif + { + if (len <= sizeof(*info)) + return ERR_PTR(-EINVAL); + + len -= sizeof(*info); + if (copy_from_user(info, user, sizeof(*info)) != 0) + return ERR_PTR(-EFAULT); + + info->name[sizeof(info->name) - 1] = '\0'; + user += sizeof(*info); + } + + size = sizeof(struct xt_counters); + size *= info->num_counters; + + if (size != (u64)len) + return ERR_PTR(-EINVAL); + + mem = vmalloc(len); + if (!mem) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(mem, user, len) == 0) + return mem; + + vfree(mem); + return ERR_PTR(-EFAULT); +} +EXPORT_SYMBOL_GPL(xt_copy_counters_from_user); + #ifdef CONFIG_COMPAT int xt_compat_target_offset(const struct xt_target *target) { @@ -606,6 +841,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; int pad, off = xt_compat_target_offset(target); u_int16_t tsize = ct->u.user.target_size; + char name[sizeof(t->u.user.name)]; t = *dstptr; memcpy(t, ct, sizeof(*ct)); @@ -619,6 +855,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, tsize += off; t->u.user.target_size = tsize; + strlcpy(name, target->name, sizeof(name)); + module_put(target->me); + strncpy(t->u.user.name, name, sizeof(t->u.user.name)); *size += off; *dstptr += tsize; diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index bb9cbeb18868..a79af255561a 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -18,6 +18,16 @@ MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); MODULE_ALIAS("ipt_connlabel"); MODULE_ALIAS("ip6t_connlabel"); +static bool connlabel_match(const struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return false; + + return BIT_WORD(bit) < labels->words && test_bit(bit, labels->bits); +} + static bool connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) { @@ -33,7 +43,7 @@ connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) if (info->options & XT_CONNLABEL_OP_SET) return (nf_connlabel_set(ct, info->bit) == 0) ^ invert; - return nf_connlabel_match(ct, info->bit) ^ invert; + return connlabel_match(ct, info->bit) ^ invert; } static int connlabel_mt_check(const struct xt_mtchk_param *par) @@ -55,7 +65,7 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) return ret; } - ret = nf_connlabels_get(par->net, info->bit + 1); + ret = nf_connlabels_get(par->net, info->bit); if (ret < 0) nf_ct_l3proto_module_put(par->family); return ret; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0f16bf635480..aeefe127691a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -688,7 +688,7 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->portid) { + if (nlk->portid && nlk->bound) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e9dd47b2a85b..879185fe183f 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -461,7 +461,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { - set_ipv6_addr(skb, key->ipv6_proto, saddr, masked, + set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked, true); memcpy(&flow_key->ipv6.addr.src, masked, sizeof(flow_key->ipv6.addr.src)); @@ -483,7 +483,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, NULL, &flags) != NEXTHDR_ROUTING); - set_ipv6_addr(skb, key->ipv6_proto, daddr, masked, + set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked, recalc_csum); memcpy(&flow_key->ipv6.addr.dst, masked, sizeof(flow_key->ipv6.addr.dst)); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 1b9d286756be..9741a76c7405 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -367,6 +367,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; + skb_orphan(skb); memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); err = nf_ct_frag6_gather(net, skb, user); if (err) @@ -1344,7 +1345,7 @@ void ovs_ct_init(struct net *net) unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); - if (nf_connlabels_get(net, n_bits)) { + if (nf_connlabels_get(net, n_bits - 1)) { ovs_net->xt_label = false; OVS_NLERR(true, "Failed to set connlabel length"); } else { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0cc66a4e492d..856bd8dba676 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -738,9 +738,9 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, len += nla_total_size(acts->orig_len); return len - + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ - + nla_total_size(8); /* OVS_FLOW_ATTR_USED */ + + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */ } /* Called with ovs_mutex or RCU read lock. */ @@ -754,11 +754,14 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow, ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); if (used && - nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) + nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used), + OVS_FLOW_ATTR_PAD)) return -EMSGSIZE; if (stats.n_packets && - nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) + nla_put_64bit(skb, OVS_FLOW_ATTR_STATS, + sizeof(struct ovs_flow_stats), &stats, + OVS_FLOW_ATTR_PAD)) return -EMSGSIZE; if ((u8)ntohs(tcp_flags) && @@ -1434,8 +1437,8 @@ static size_t ovs_dp_cmd_msg_size(void) size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); msgsize += nla_total_size(IFNAMSIZ); - msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); - msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); + msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats)); + msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats)); msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ return msgsize; @@ -1462,13 +1465,13 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, goto nla_put_failure; get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); - if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), - &dp_stats)) + if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), + &dp_stats, OVS_DP_ATTR_PAD)) goto nla_put_failure; - if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS, - sizeof(struct ovs_dp_megaflow_stats), - &dp_megaflow_stats)) + if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS, + sizeof(struct ovs_dp_megaflow_stats), + &dp_megaflow_stats, OVS_DP_ATTR_PAD)) goto nla_put_failure; if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) @@ -1837,8 +1840,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, goto nla_put_failure; ovs_vport_get_stats(vport, &vport_stats); - if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), - &vport_stats)) + if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, + sizeof(struct ovs_vport_stats), &vport_stats, + OVS_VPORT_ATTR_PAD)) goto nla_put_failure; if (ovs_vport_get_upcall_portids(vport, skb)) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 689c17264221..0bb650f4f219 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -261,7 +261,7 @@ size_t ovs_tun_key_attr_size(void) /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider * updating this function. */ - return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + return nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ @@ -720,7 +720,8 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, unsigned short tun_proto) { if (output->tun_flags & TUNNEL_KEY && - nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) + nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, + OVS_TUNNEL_KEY_ATTR_PAD)) return -EMSGSIZE; switch (tun_proto) { case AF_INET: diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 7c8b90bf0e54..2ee48e447b72 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -165,11 +165,10 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | - IFF_PHONY_HEADROOM; + IFF_PHONY_HEADROOM | IFF_NO_QUEUE; netdev->destructor = internal_dev_destructor; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; - netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 81a4c0574d73..4040eb92d9c9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2052,6 +2052,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; + bool is_drop_n_account = false; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -2140,6 +2141,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, return 0; drop_n_acct: + is_drop_n_account = true; spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_drops++; atomic_inc(&sk->sk_drops); @@ -2151,7 +2153,10 @@ drop_n_restore: skb->len = skb_len; } drop: - consume_skb(skb); + if (!is_drop_n_account) + consume_skb(skb); + else + kfree_skb(skb); return 0; } @@ -2170,6 +2175,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct sk_buff *copy_skb = NULL; struct timespec ts; __u32 ts_status; + bool is_drop_n_account = false; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. * We may add members to them until current aligned size without forcing @@ -2377,10 +2383,14 @@ drop_n_restore: skb->len = skb_len; } drop: - kfree_skb(skb); + if (!is_drop_n_account) + consume_skb(skb); + else + kfree_skb(skb); return 0; drop_n_account: + is_drop_n_account = true; po->stats.stats1.tp_drops++; spin_unlock(&sk->sk_receive_queue.lock); @@ -3541,6 +3551,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) i->ifindex = mreq->mr_ifindex; i->alen = mreq->mr_alen; memcpy(i->addr, mreq->mr_address, i->alen); + memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen); i->count = 1; i->next = po->mclist; po->mclist = i; diff --git a/net/rds/cong.c b/net/rds/cong.c index e6144b8246fd..6641bcf7c185 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -299,7 +299,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - __set_bit_le(off, (void *)map->m_page_addrs[i]); + set_bit_le(off, (void *)map->m_page_addrs[i]); } void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) @@ -313,7 +313,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - __clear_bit_le(off, (void *)map->m_page_addrs[i]); + clear_bit_le(off, (void *)map->m_page_addrs[i]); } static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 8764970f0c24..310cabce2311 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -194,7 +194,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); - dp->dp_ack_seq = rds_ib_piggyb_ack(ic); + dp->dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); /* Advertise flow control */ if (ic->i_flowctl) { diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 27a992154804..d75d8b56a9e3 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -207,22 +207,14 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } if (left && tc->t_tinc_data_rem) { - clone = skb_clone(skb, arg->gfp); + to_copy = min(tc->t_tinc_data_rem, left); + + clone = pskb_extract(skb, offset, to_copy, arg->gfp); if (!clone) { desc->error = -ENOMEM; goto out; } - to_copy = min(tc->t_tinc_data_rem, left); - if (!pskb_pull(clone, offset) || - pskb_trim(clone, to_copy)) { - pr_warn("rds_tcp_data_recv: pull/trim failed " - "left %zu data_rem %zu skb_len %d\n", - left, tc->t_tinc_data_rem, skb->len); - kfree_skb(clone); - desc->error = -ENOMEM; - goto out; - } skb_queue_tail(&tinc->ti_skb_list, clone); rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index 01e038146b7c..6ff97412a0bb 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -698,12 +698,12 @@ void rxrpc_data_ready(struct sock *sk) if (skb_checksum_complete(skb)) { rxrpc_free_skb(skb); rxrpc_put_local(local); - UDP_INC_STATS_BH(&init_net, UDP_MIB_INERRORS, 0); + __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); _leave(" [CSUM failed]"); return; } - UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0); + __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); /* The socket buffer we have is owned by UDP, with UDP's data all over * it, but we really want our own data there. diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 96066665e376..336774a535c3 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -657,12 +657,15 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, &p->tcfc_lock, &d); + TCA_STATS, + TCA_XSTATS, + &p->tcfc_lock, &d, + TCA_PAD); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - &p->tcfc_lock, &d); + &p->tcfc_lock, &d, TCA_ACT_PAD); if (err < 0) goto errout; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 8c9f1f0459ab..4fd703362563 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -156,7 +156,8 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse); tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires); - if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm)) + if (nla_put_64bit(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm, + TCA_ACT_BPF_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index c0ed93ce2391..2ba700c765e0 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -163,7 +163,8 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(ci->tcf_tm.expires); - if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t, + TCA_CONNMARK_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index d22426cdebc0..28e934ed038a 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -549,7 +549,7 @@ static int tcf_csum_dump(struct sk_buff *skb, t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(p->tcf_tm.expires); - if (nla_put(skb, TCA_CSUM_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 887fc1f209ff..1a6e09fbb2a5 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -177,7 +177,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); - if (nla_put(skb, TCA_GACT_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_GACT_TM, sizeof(t), &t, TCA_GACT_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c589a9ba506a..556f44c9c454 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -550,7 +550,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, t.install = jiffies_to_clock_t(jiffies - ife->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - ife->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(ife->tcf_tm.expires); - if (nla_put(skb, TCA_IFE_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD)) goto nla_put_failure; if (!is_zero_ether_addr(ife->eth_dst)) { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 350e134cffb3..1464f6a09446 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -275,7 +275,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); - if (nla_put(skb, TCA_IPT_TM, sizeof (tm), &tm)) + if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD)) goto nla_put_failure; kfree(t); return skb->len; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index e8a760cf7775..dea57c1ec90c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -214,7 +214,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(m->tcf_tm.expires); - if (nla_put(skb, TCA_MIRRED_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 0f65cdfbfb1d..c0a879f940de 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -267,7 +267,7 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(p->tcf_tm.expires); - if (nla_put(skb, TCA_NAT_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 429c3ab65142..c6e18f230af6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -203,7 +203,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(p->tcf_tm.expires); - if (nla_put(skb, TCA_PEDIT_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD)) goto nla_put_failure; kfree(opt); return skb->len; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 75b2be13fbcc..2057fd56d74c 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -155,7 +155,7 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(d->tcf_tm.expires); - if (nla_put(skb, TCA_DEF_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index cfcdbdc00c9b..51b24998904f 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -167,7 +167,7 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(d->tcf_tm.expires); - if (nla_put(skb, TCA_SKBEDIT_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index bab8ae0cefc0..c1682ab9bc7e 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -175,7 +175,7 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - v->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - v->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(v->tcf_tm.expires); - if (nla_put(skb, TCA_VLAN_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 563cdad76448..e64877a3c084 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1140,9 +1140,10 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, gpf->kcnts[i] += pf->kcnts[i]; } - if (nla_put(skb, TCA_U32_PCNT, - sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64), - gpf)) { + if (nla_put_64bit(skb, TCA_U32_PCNT, + sizeof(struct tc_u32_pcnt) + + n->sel.nkeys * sizeof(u64), + gpf, TCA_U32_PAD)) { kfree(gpf); goto nla_put_failure; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 3b180ff72f79..64f71a2155f3 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1365,7 +1365,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - qdisc_root_sleeping_lock(q), &d) < 0) + qdisc_root_sleeping_lock(q), &d, + TCA_PAD) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -1679,7 +1680,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - qdisc_root_sleeping_lock(q), &d) < 0) + qdisc_root_sleeping_lock(q), &d, + TCA_PAD) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 9b7e2980ee5c..dddf3bb65a32 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -49,6 +49,8 @@ #include <linux/prefetch.h> #include <net/pkt_sched.h> #include <net/codel.h> +#include <net/codel_impl.h> +#include <net/codel_qdisc.h> #define DEFAULT_CODEL_LIMIT 1000 @@ -64,20 +66,33 @@ struct codel_sched_data { * to dequeue a packet from queue. Note: backlog is handled in * codel, we dont need to reduce it here. */ -static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) +static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) { + struct Qdisc *sch = ctx; struct sk_buff *skb = __skb_dequeue(&sch->q); + if (skb) + sch->qstats.backlog -= qdisc_pkt_len(skb); + prefetch(&skb->end); /* we'll need skb_shinfo() */ return skb; } +static void drop_func(struct sk_buff *skb, void *ctx) +{ + struct Qdisc *sch = ctx; + + qdisc_drop(skb, sch); +} + static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) { struct codel_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue); + skb = codel_dequeue(sch, &sch->qstats.backlog, &q->params, &q->vars, + &q->stats, qdisc_pkt_len, codel_get_enqueue_time, + drop_func, dequeue_func); /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, * or HTB crashes. Defer it for next round. @@ -173,9 +188,10 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = DEFAULT_CODEL_LIMIT; - codel_params_init(&q->params, sch); + codel_params_init(&q->params); codel_vars_init(&q->vars); codel_stats_init(&q->stats); + q->params.mtu = psched_mtu(qdisc_dev(sch)); if (opt) { int err = codel_change(sch, opt); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index d3fc8f9dd3d4..a5e420b3d4ab 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -24,6 +24,8 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/codel.h> +#include <net/codel_impl.h> +#include <net/codel_qdisc.h> /* Fair Queue CoDel. * @@ -220,8 +222,9 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) * to dequeue a packet from queue. Note: backlog is handled in * codel, we dont need to reduce it here. */ -static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) +static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) { + struct Qdisc *sch = ctx; struct fq_codel_sched_data *q = qdisc_priv(sch); struct fq_codel_flow *flow; struct sk_buff *skb = NULL; @@ -231,10 +234,18 @@ static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) skb = dequeue_head(flow); q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb); sch->q.qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); } return skb; } +static void drop_func(struct sk_buff *skb, void *ctx) +{ + struct Qdisc *sch = ctx; + + qdisc_drop(skb, sch); +} + static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); @@ -263,8 +274,9 @@ begin: prev_ecn_mark = q->cstats.ecn_mark; prev_backlog = sch->qstats.backlog; - skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats, - dequeue); + skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams, + &flow->cvars, &q->cstats, qdisc_pkt_len, + codel_get_enqueue_time, drop_func, dequeue_func); flow->dropped += q->cstats.drop_count - prev_drop_count; flow->dropped += q->cstats.ecn_mark - prev_ecn_mark; @@ -423,9 +435,10 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) q->perturbation = prandom_u32(); INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); - codel_params_init(&q->cparams, sch); + codel_params_init(&q->cparams); codel_stats_init(&q->cstats); q->cparams.ecn = true; + q->cparams.mtu = psched_mtu(qdisc_dev(sch)); if (opt) { int err = fq_codel_change(sch, opt); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index f18c35024207..9c7756237904 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -108,35 +108,6 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, return skb; } -static inline int handle_dev_cpu_collision(struct sk_buff *skb, - struct netdev_queue *dev_queue, - struct Qdisc *q) -{ - int ret; - - if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { - /* - * Same CPU holding the lock. It may be a transient - * configuration error, when hard_start_xmit() recurses. We - * detect it by checking xmit owner and drop the packet when - * deadloop is detected. Return OK to try the next skb. - */ - kfree_skb_list(skb); - net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n", - dev_queue->dev->name); - ret = qdisc_qlen(q); - } else { - /* - * Another cpu is holding lock, requeue & delay xmits for - * some time. - */ - __this_cpu_inc(softnet_data.cpu_collision); - ret = dev_requeue_skb(skb, q); - } - - return ret; -} - /* * Transmit possibly several skbs, and handle the return status as * required. Holding the __QDISC___STATE_RUNNING bit guarantees that @@ -159,21 +130,21 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, if (validate) skb = validate_xmit_skb_list(skb, dev); - if (skb) { + if (likely(skb)) { HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); + } else { + spin_lock(root_lock); + return qdisc_qlen(q); } spin_lock(root_lock); if (dev_xmit_complete(ret)) { /* Driver sent out skb successfully or skb was consumed */ ret = qdisc_qlen(q); - } else if (ret == NETDEV_TX_LOCKED) { - /* Driver try lock failed */ - ret = handle_dev_cpu_collision(skb, txq, q); } else { /* Driver returned NETDEV_TX_BUSY - requeue skb */ if (unlikely(ret != NETDEV_TX_BUSY)) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 87b02ed3d5f2..f6bf5818ed4d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1122,10 +1122,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && - nla_put_u64(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps)) + nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps, + TCA_HTB_PAD)) goto nla_put_failure; if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) && - nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps)) + nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps, + TCA_HTB_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 9640bb39a5d2..491d6fd6430c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -994,7 +994,8 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; if (q->rate >= (1ULL << 32)) { - if (nla_put_u64(skb, TCA_NETEM_RATE64, q->rate)) + if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate, + TCA_NETEM_PAD)) goto nla_put_failure; rate.rate = ~0U; } else { diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index c2fbde742f37..83b90b584fae 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -472,11 +472,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if (q->rate.rate_bytes_ps >= (1ULL << 32) && - nla_put_u64(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps)) + nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps, + TCA_TBF_PAD)) goto nla_put_failure; if (tbf_peak_present(q) && q->peak.rate_bytes_ps >= (1ULL << 32) && - nla_put_u64(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps)) + nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps, + TCA_TBF_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 71c1a598d9bc..d9c04dc1b3f3 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -99,5 +99,9 @@ config SCTP_COOKIE_HMAC_SHA1 select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1 select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1 +config INET_SCTP_DIAG + depends on INET_DIAG + def_tristate INET_DIAG + endif # IP_SCTP diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 3b4ffb021cf1..0fca5824ad0e 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_IP_SCTP) += sctp.o obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o +obj-$(CONFIG_INET_SCTP_DIAG) += sctp_diag.o sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ protocol.o endpointola.o associola.o \ diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 958ef5f33f4b..1eb94bf18ef4 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -239,7 +239,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, offset = 0; if ((whole > 1) || (whole && over)) - SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); /* Create chunks for all the full sized DATA chunks. */ for (i = 0, len = first_len; i < whole; i++) { diff --git a/net/sctp/input.c b/net/sctp/input.c index 00b8445364e3..a701527a9480 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -84,7 +84,7 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb) if (val != cmp) { /* CRC failure, dump it. */ - SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS); + __SCTP_INC_STATS(net, SCTP_MIB_CHECKSUMERRORS); return -1; } return 0; @@ -122,7 +122,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) goto discard_it; - SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS); + __SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS); if (skb_linearize(skb)) goto discard_it; @@ -208,7 +208,7 @@ int sctp_rcv(struct sk_buff *skb) */ if (!asoc) { if (sctp_rcv_ootb(skb)) { - SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES); + __SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); goto discard_release; } } @@ -264,9 +264,9 @@ int sctp_rcv(struct sk_buff *skb) skb = NULL; /* sctp_chunk_free already freed the skb */ goto discard_release; } - SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG); + __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_BACKLOG); } else { - SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ); + __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); } @@ -281,7 +281,7 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: - SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS); + __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; @@ -532,7 +532,7 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); *app = asoc; *tpp = transport; @@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; } /* Warning: The sock lock is held. Remember to call diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 7e8a16c77039..9d87bba0ff1d 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -89,10 +89,12 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) * Eventually, we should clean up inqueue to not rely * on the BH related data structures. */ + local_bh_disable(); list_add_tail(&chunk->list, &q->in_chunk_list); if (chunk->asoc) chunk->asoc->stats.ipackets++; q->immediate.func(&q->immediate); + local_bh_enable(); } /* Peek at the next chunk on the inqeue. */ @@ -163,6 +165,9 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) chunk->singleton = 1; ch = (sctp_chunkhdr_t *) chunk->skb->data; chunk->data_accepted = 0; + + if (chunk->asoc) + sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb); } chunk->chunk_hdr = ch; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ce46f1c7f133..0657d18a85bf 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -162,7 +162,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS); goto out; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 8d3d3625130e..084718f9b3da 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -866,8 +866,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) * sender MUST assure that at least one T3-rtx * timer is running. */ - if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) - sctp_transport_reset_timers(transport); + if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) { + sctp_transport_reset_t3_rtx(transport); + transport->last_time_sent = jiffies; + } } break; @@ -924,8 +926,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) error = sctp_outq_flush_rtx(q, packet, rtx_timeout, &start_timer); - if (start_timer) - sctp_transport_reset_timers(transport); + if (start_timer) { + sctp_transport_reset_t3_rtx(transport); + transport->last_time_sent = jiffies; + } /* This can happen on COOKIE-ECHO resend. Only * one chunk can get bundled with a COOKIE-ECHO. @@ -1062,7 +1066,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) list_add_tail(&chunk->transmitted_list, &transport->transmitted); - sctp_transport_reset_timers(transport); + sctp_transport_reset_t3_rtx(transport); + transport->last_time_sent = jiffies; /* Only let one DATA chunk get bundled with a * COOKIE-ECHO chunk. diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 6d45d53321e6..4cb5aedfe3ee 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -280,83 +280,38 @@ void sctp_eps_proc_exit(struct net *net) struct sctp_ht_iter { struct seq_net_private p; struct rhashtable_iter hti; + int start_fail; }; -static struct sctp_transport *sctp_transport_get_next(struct seq_file *seq) +static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; - struct sctp_transport *t; - - t = rhashtable_walk_next(&iter->hti); - for (; t; t = rhashtable_walk_next(&iter->hti)) { - if (IS_ERR(t)) { - if (PTR_ERR(t) == -EAGAIN) - continue; - break; - } + int err = sctp_transport_walk_start(&iter->hti); - if (net_eq(sock_net(t->asoc->base.sk), seq_file_net(seq)) && - t->asoc->peer.primary_path == t) - break; + if (err) { + iter->start_fail = 1; + return ERR_PTR(err); } - return t; + return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); } -static struct sctp_transport *sctp_transport_get_idx(struct seq_file *seq, - loff_t pos) -{ - void *obj = SEQ_START_TOKEN; - - while (pos && (obj = sctp_transport_get_next(seq)) && !IS_ERR(obj)) - pos--; - - return obj; -} - -static int sctp_transport_walk_start(struct seq_file *seq) +static void sctp_transport_seq_stop(struct seq_file *seq, void *v) { struct sctp_ht_iter *iter = seq->private; - int err; - - err = rhashtable_walk_init(&sctp_transport_hashtable, &iter->hti, - GFP_KERNEL); - if (err) - return err; - - err = rhashtable_walk_start(&iter->hti); - return err == -EAGAIN ? 0 : err; + if (iter->start_fail) + return; + sctp_transport_walk_stop(&iter->hti); } -static void sctp_transport_walk_stop(struct seq_file *seq) +static void *sctp_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; - rhashtable_walk_stop(&iter->hti); - rhashtable_walk_exit(&iter->hti); -} - -static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) -{ - int err = sctp_transport_walk_start(seq); - - if (err) - return ERR_PTR(err); - - return sctp_transport_get_idx(seq, *pos); -} - -static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) -{ - sctp_transport_walk_stop(seq); -} - -static void *sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ ++*pos; - return sctp_transport_get_next(seq); + return sctp_transport_get_next(seq_file_net(seq), &iter->hti); } /* Display sctp associations (/proc/net/sctp/assocs). */ @@ -417,9 +372,9 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) } static const struct seq_operations sctp_assoc_ops = { - .start = sctp_assocs_seq_start, - .next = sctp_assocs_seq_next, - .stop = sctp_assocs_seq_stop, + .start = sctp_transport_seq_start, + .next = sctp_transport_seq_next, + .stop = sctp_transport_seq_stop, .show = sctp_assocs_seq_show, }; @@ -456,28 +411,6 @@ void sctp_assocs_proc_exit(struct net *net) remove_proc_entry("assocs", net->sctp.proc_net_sctp); } -static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos) -{ - int err = sctp_transport_walk_start(seq); - - if (err) - return ERR_PTR(err); - - return sctp_transport_get_idx(seq, *pos); -} - -static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - - return sctp_transport_get_next(seq); -} - -static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v) -{ - sctp_transport_walk_stop(seq); -} - static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { struct sctp_association *assoc; @@ -551,9 +484,9 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) } static const struct seq_operations sctp_remaddr_ops = { - .start = sctp_remaddr_seq_start, - .next = sctp_remaddr_seq_next, - .stop = sctp_remaddr_seq_stop, + .start = sctp_transport_seq_start, + .next = sctp_transport_seq_next, + .stop = sctp_transport_seq_stop, .show = sctp_remaddr_seq_show, }; diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c new file mode 100644 index 000000000000..8e3e769dc9ea --- /dev/null +++ b/net/sctp/sctp_diag.c @@ -0,0 +1,500 @@ +#include <linux/module.h> +#include <linux/inet_diag.h> +#include <linux/sock_diag.h> +#include <net/sctp/sctp.h> + +extern void inet_diag_msg_common_fill(struct inet_diag_msg *r, + struct sock *sk); +extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct inet_diag_msg *r, int ext, + struct user_namespace *user_ns); + +static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *info); + +/* define some functions to make asoc/ep fill look clean */ +static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, + struct sock *sk, + struct sctp_association *asoc) +{ + union sctp_addr laddr, paddr; + struct dst_entry *dst; + + laddr = list_entry(asoc->base.bind_addr.address_list.next, + struct sctp_sockaddr_entry, list)->a; + paddr = asoc->peer.primary_path->ipaddr; + dst = asoc->peer.primary_path->dst; + + r->idiag_family = sk->sk_family; + r->id.idiag_sport = htons(asoc->base.bind_addr.port); + r->id.idiag_dport = htons(asoc->peer.port); + r->id.idiag_if = dst ? dst->dev->ifindex : 0; + sock_diag_save_cookie(sk, r->id.idiag_cookie); + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + *(struct in6_addr *)r->id.idiag_src = laddr.v6.sin6_addr; + *(struct in6_addr *)r->id.idiag_dst = paddr.v6.sin6_addr; + } else +#endif + { + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + + r->id.idiag_src[0] = laddr.v4.sin_addr.s_addr; + r->id.idiag_dst[0] = paddr.v4.sin_addr.s_addr; + } + + r->idiag_state = asoc->state; + r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX; + r->idiag_retrans = asoc->rtx_data_chunks; + r->idiag_expires = jiffies_to_msecs( + asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] - jiffies); +} + +static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, + struct list_head *address_list) +{ + struct sctp_sockaddr_entry *laddr; + int addrlen = sizeof(struct sockaddr_storage); + int addrcnt = 0; + struct nlattr *attr; + void *info = NULL; + + list_for_each_entry_rcu(laddr, address_list, list) + addrcnt++; + + attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt); + if (!attr) + return -EMSGSIZE; + + info = nla_data(attr); + list_for_each_entry_rcu(laddr, address_list, list) { + memcpy(info, &laddr->a, addrlen); + info += addrlen; + } + + return 0; +} + +static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb, + struct sctp_association *asoc) +{ + int addrlen = sizeof(struct sockaddr_storage); + struct sctp_transport *from; + struct nlattr *attr; + void *info = NULL; + + attr = nla_reserve(skb, INET_DIAG_PEERS, + addrlen * asoc->peer.transport_count); + if (!attr) + return -EMSGSIZE; + + info = nla_data(attr); + list_for_each_entry(from, &asoc->peer.transport_addr_list, + transports) { + memcpy(info, &from->ipaddr, addrlen); + info += addrlen; + } + + return 0; +} + +/* sctp asoc/ep fill*/ +static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, + struct sk_buff *skb, + const struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, + int portid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + struct sctp_endpoint *ep = sctp_sk(sk)->ep; + struct list_head *addr_list; + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + int ext = req->idiag_ext; + struct sctp_infox infox; + void *info = NULL; + + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), + nlmsg_flags); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + BUG_ON(!sk_fullsock(sk)); + + if (asoc) { + inet_diag_msg_sctpasoc_fill(r, sk, asoc); + } else { + inet_diag_msg_common_fill(r, sk); + r->idiag_state = sk->sk_state; + r->idiag_timer = 0; + r->idiag_retrans = 0; + } + + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) + goto errout; + + if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) { + u32 mem[SK_MEMINFO_VARS]; + int amt; + + if (asoc && asoc->ep->sndbuf_policy) + amt = asoc->sndbuf_used; + else + amt = sk_wmem_alloc_get(sk); + mem[SK_MEMINFO_WMEM_ALLOC] = amt; + if (asoc && asoc->ep->rcvbuf_policy) + amt = atomic_read(&asoc->rmem_alloc); + else + amt = sk_rmem_alloc_get(sk); + mem[SK_MEMINFO_RMEM_ALLOC] = amt; + mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; + mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; + mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; + mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; + mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); + mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; + mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); + + if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0) + goto errout; + } + + if (ext & (1 << (INET_DIAG_INFO - 1))) { + struct nlattr *attr; + + attr = nla_reserve_64bit(skb, INET_DIAG_INFO, + sizeof(struct sctp_info), + INET_DIAG_PAD); + if (!attr) + goto errout; + + info = nla_data(attr); + } + infox.sctpinfo = (struct sctp_info *)info; + infox.asoc = asoc; + sctp_diag_get_info(sk, r, &infox); + + addr_list = asoc ? &asoc->base.bind_addr.address_list + : &ep->base.bind_addr.address_list; + if (inet_diag_msg_sctpladdrs_fill(skb, addr_list)) + goto errout; + + if (asoc && (ext & (1 << (INET_DIAG_CONG - 1)))) + if (nla_put_string(skb, INET_DIAG_CONG, "reno") < 0) + goto errout; + + if (asoc && inet_diag_msg_sctpaddrs_fill(skb, asoc)) + goto errout; + + nlmsg_end(skb, nlh); + return 0; + +errout: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* callback and param */ +struct sctp_comm_param { + struct sk_buff *skb; + struct netlink_callback *cb; + const struct inet_diag_req_v2 *r; + const struct nlmsghdr *nlh; +}; + +static size_t inet_assoc_attr_size(struct sctp_association *asoc) +{ + int addrlen = sizeof(struct sockaddr_storage); + int addrcnt = 0; + struct sctp_sockaddr_entry *laddr; + + list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list, + list) + addrcnt++; + + return nla_total_size(sizeof(struct sctp_info)) + + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(addrlen * asoc->peer.transport_count) + + nla_total_size(addrlen * addrcnt) + + nla_total_size(sizeof(struct inet_diag_meminfo)) + + nla_total_size(sizeof(struct inet_diag_msg)) + + 64; +} + +static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) +{ + struct sctp_association *assoc = tsp->asoc; + struct sock *sk = tsp->asoc->base.sk; + struct sctp_comm_param *commp = p; + struct sk_buff *in_skb = commp->skb; + const struct inet_diag_req_v2 *req = commp->r; + const struct nlmsghdr *nlh = commp->nlh; + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + int err; + + err = sock_diag_check_cookie(sk, req->id.idiag_cookie); + if (err) + goto out; + + err = -ENOMEM; + rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL); + if (!rep) + goto out; + + lock_sock(sk); + if (sk != assoc->base.sk) { + release_sock(sk); + sk = assoc->base.sk; + lock_sock(sk); + } + err = inet_sctp_diag_fill(sk, assoc, rep, req, + sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0, nlh); + release_sock(sk); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(rep); + goto out; + } + + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, + MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + return err; +} + +static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) +{ + struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + struct sk_buff *skb = commp->skb; + struct netlink_callback *cb = commp->cb; + const struct inet_diag_req_v2 *r = commp->r; + struct sctp_association *assoc = + list_entry(ep->asocs.next, struct sctp_association, asocs); + int err = 0; + + /* find the ep only once through the transports by this condition */ + if (tsp->asoc != assoc) + goto out; + + if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) + goto out; + + lock_sock(sk); + if (sk != assoc->base.sk) + goto release; + list_for_each_entry(assoc, &ep->asocs, asocs) { + if (cb->args[4] < cb->args[1]) + goto next; + + if (r->id.idiag_sport != htons(assoc->base.bind_addr.port) && + r->id.idiag_sport) + goto next; + if (r->id.idiag_dport != htons(assoc->peer.port) && + r->id.idiag_dport) + goto next; + + if (!cb->args[3] && + inet_sctp_diag_fill(sk, NULL, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, cb->nlh) < 0) { + cb->args[3] = 1; + err = 2; + goto release; + } + cb->args[3] = 1; + + if (inet_sctp_diag_fill(sk, assoc, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) { + err = 2; + goto release; + } +next: + cb->args[4]++; + } + cb->args[1] = 0; + cb->args[2]++; + cb->args[3] = 0; + cb->args[4] = 0; +release: + release_sock(sk); + return err; +out: + cb->args[2]++; + return err; +} + +static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) +{ + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + struct sk_buff *skb = commp->skb; + struct netlink_callback *cb = commp->cb; + const struct inet_diag_req_v2 *r = commp->r; + struct net *net = sock_net(skb->sk); + struct inet_sock *inet = inet_sk(sk); + int err = 0; + + if (!net_eq(sock_net(sk), net)) + goto out; + + if (cb->args[4] < cb->args[1]) + goto next; + + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next; + + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next; + + if (r->id.idiag_dport != inet->inet_dport && + r->id.idiag_dport) + goto next; + + if (inet_sctp_diag_fill(sk, NULL, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->nlh) < 0) { + err = 2; + goto out; + } +next: + cb->args[4]++; +out: + return err; +} + +/* define the functions for sctp_diag_handler*/ +static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *info) +{ + struct sctp_infox *infox = (struct sctp_infox *)info; + + if (infox->asoc) { + r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc); + r->idiag_wqueue = infox->asoc->sndbuf_used; + } else { + r->idiag_rqueue = sk->sk_ack_backlog; + r->idiag_wqueue = sk->sk_max_ack_backlog; + } + if (infox->sctpinfo) + sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo); +} + +static int sctp_diag_dump_one(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + union sctp_addr laddr, paddr; + struct sctp_comm_param commp = { + .skb = in_skb, + .r = req, + .nlh = nlh, + }; + + if (req->sdiag_family == AF_INET) { + laddr.v4.sin_port = req->id.idiag_sport; + laddr.v4.sin_addr.s_addr = req->id.idiag_src[0]; + laddr.v4.sin_family = AF_INET; + + paddr.v4.sin_port = req->id.idiag_dport; + paddr.v4.sin_addr.s_addr = req->id.idiag_dst[0]; + paddr.v4.sin_family = AF_INET; + } else { + laddr.v6.sin6_port = req->id.idiag_sport; + memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, 64); + laddr.v6.sin6_family = AF_INET6; + + paddr.v6.sin6_port = req->id.idiag_dport; + memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, 64); + paddr.v6.sin6_family = AF_INET6; + } + + return sctp_transport_lookup_process(sctp_tsp_dump_one, + net, &laddr, &paddr, &commp); +} + +static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + u32 idiag_states = r->idiag_states; + struct net *net = sock_net(skb->sk); + struct sctp_comm_param commp = { + .skb = skb, + .cb = cb, + .r = r, + }; + + /* eps hashtable dumps + * args: + * 0 : if it will traversal listen sock + * 1 : to record the sock pos of this time's traversal + * 4 : to work as a temporary variable to traversal list + */ + if (cb->args[0] == 0) { + if (!(idiag_states & TCPF_LISTEN)) + goto skip; + if (sctp_for_each_endpoint(sctp_ep_dump, &commp)) + goto done; +skip: + cb->args[0] = 1; + cb->args[1] = 0; + cb->args[4] = 0; + } + + /* asocs by transport hashtable dump + * args: + * 1 : to record the assoc pos of this time's traversal + * 2 : to record the transport pos of this time's traversal + * 3 : to mark if we have dumped the ep info of the current asoc + * 4 : to work as a temporary variable to traversal list + */ + if (!(idiag_states & ~TCPF_LISTEN)) + goto done; + sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp); +done: + cb->args[1] = cb->args[4]; + cb->args[4] = 0; +} + +static const struct inet_diag_handler sctp_diag_handler = { + .dump = sctp_diag_dump, + .dump_one = sctp_diag_dump_one, + .idiag_get_info = sctp_diag_get_info, + .idiag_type = IPPROTO_SCTP, + .idiag_info_size = sizeof(struct sctp_info), +}; + +static int __init sctp_diag_init(void) +{ + return inet_diag_register(&sctp_diag_handler); +} + +static void __exit sctp_diag_exit(void) +{ + inet_diag_unregister(&sctp_diag_handler); +} + +module_init(sctp_diag_init); +module_exit(sctp_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 7f0bf798205b..56f364d8f932 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3080,8 +3080,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, return SCTP_ERROR_RSRC_LOW; /* Start the heartbeat timer. */ - if (!mod_timer(&peer->hb_timer, sctp_transport_timeout(peer))) - sctp_transport_hold(peer); + sctp_transport_reset_hb_timer(peer); asoc->new_transport = peer; break; case SCTP_PARAM_DEL_IP: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index d06317de8730..aa3712259368 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -69,8 +69,6 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, sctp_cmd_seq_t *commands, gfp_t gfp); -static void sctp_cmd_hb_timer_update(sctp_cmd_seq_t *cmds, - struct sctp_transport *t); /******************************************************************** * Helper functions ********************************************************************/ @@ -367,6 +365,7 @@ void sctp_generate_heartbeat_event(unsigned long data) struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); + u32 elapsed, timeout; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -378,6 +377,16 @@ void sctp_generate_heartbeat_event(unsigned long data) goto out_unlock; } + /* Check if we should still send the heartbeat or reschedule */ + elapsed = jiffies - transport->last_time_sent; + timeout = sctp_transport_timeout(transport); + if (elapsed < timeout) { + elapsed = timeout - elapsed; + if (!mod_timer(&transport->hb_timer, jiffies + elapsed)) + sctp_transport_hold(transport); + goto out_unlock; + } + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT), asoc->state, asoc->ep, asoc, @@ -507,7 +516,7 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, 0); /* Update the hb timer to resend a heartbeat every rto */ - sctp_cmd_hb_timer_update(commands, transport); + sctp_transport_reset_hb_timer(transport); } if (transport->state != SCTP_INACTIVE && @@ -634,11 +643,8 @@ static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds, * hold a reference on the transport to make sure none of * the needed data structures go away. */ - list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { - - if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t))) - sctp_transport_hold(t); - } + list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) + sctp_transport_reset_hb_timer(t); } static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds, @@ -669,15 +675,6 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds, } -/* Helper function to update the heartbeat timer. */ -static void sctp_cmd_hb_timer_update(sctp_cmd_seq_t *cmds, - struct sctp_transport *t) -{ - /* Update the heartbeat timer. */ - if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t))) - sctp_transport_hold(t); -} - /* Helper function to handle the reception of an HEARTBEAT ACK. */ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, struct sctp_association *asoc, @@ -742,8 +739,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at)); /* Update the heartbeat timer. */ - if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t))) - sctp_transport_hold(t); + sctp_transport_reset_hb_timer(t); if (was_unconfirmed && asoc->peer.transport_count == 1) sctp_transport_immediate_rtx(t); @@ -1616,7 +1612,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_HB_TIMER_UPDATE: t = cmd->obj.transport; - sctp_cmd_hb_timer_update(commands, t); + sctp_transport_reset_hb_timer(t); break; case SCTP_CMD_HB_TIMERS_STOP: @@ -1745,10 +1741,9 @@ out: } else if (local_cork) error = sctp_outq_uncork(&asoc->outqueue, gfp); - if (sp->pending_data_ready) { - sk->sk_data_ready(sk); - sp->pending_data_ready = 0; - } + if (sp->data_ready_signalled) + sp->data_ready_signalled = 0; + return error; nomem: error = -ENOMEM; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 878d28eda1a6..777d0324594a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4202,6 +4202,222 @@ static void sctp_shutdown(struct sock *sk, int how) } } +int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, + struct sctp_info *info) +{ + struct sctp_transport *prim; + struct list_head *pos; + int mask; + + memset(info, 0, sizeof(*info)); + if (!asoc) { + struct sctp_sock *sp = sctp_sk(sk); + + info->sctpi_s_autoclose = sp->autoclose; + info->sctpi_s_adaptation_ind = sp->adaptation_ind; + info->sctpi_s_pd_point = sp->pd_point; + info->sctpi_s_nodelay = sp->nodelay; + info->sctpi_s_disable_fragments = sp->disable_fragments; + info->sctpi_s_v4mapped = sp->v4mapped; + info->sctpi_s_frag_interleave = sp->frag_interleave; + + return 0; + } + + info->sctpi_tag = asoc->c.my_vtag; + info->sctpi_state = asoc->state; + info->sctpi_rwnd = asoc->a_rwnd; + info->sctpi_unackdata = asoc->unack_data; + info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); + info->sctpi_instrms = asoc->c.sinit_max_instreams; + info->sctpi_outstrms = asoc->c.sinit_num_ostreams; + list_for_each(pos, &asoc->base.inqueue.in_chunk_list) + info->sctpi_inqueue++; + list_for_each(pos, &asoc->outqueue.out_chunk_list) + info->sctpi_outqueue++; + info->sctpi_overall_error = asoc->overall_error_count; + info->sctpi_max_burst = asoc->max_burst; + info->sctpi_maxseg = asoc->frag_point; + info->sctpi_peer_rwnd = asoc->peer.rwnd; + info->sctpi_peer_tag = asoc->c.peer_vtag; + + mask = asoc->peer.ecn_capable << 1; + mask = (mask | asoc->peer.ipv4_address) << 1; + mask = (mask | asoc->peer.ipv6_address) << 1; + mask = (mask | asoc->peer.hostname_address) << 1; + mask = (mask | asoc->peer.asconf_capable) << 1; + mask = (mask | asoc->peer.prsctp_capable) << 1; + mask = (mask | asoc->peer.auth_capable); + info->sctpi_peer_capable = mask; + mask = asoc->peer.sack_needed << 1; + mask = (mask | asoc->peer.sack_generation) << 1; + mask = (mask | asoc->peer.zero_window_announced); + info->sctpi_peer_sack = mask; + + info->sctpi_isacks = asoc->stats.isacks; + info->sctpi_osacks = asoc->stats.osacks; + info->sctpi_opackets = asoc->stats.opackets; + info->sctpi_ipackets = asoc->stats.ipackets; + info->sctpi_rtxchunks = asoc->stats.rtxchunks; + info->sctpi_outofseqtsns = asoc->stats.outofseqtsns; + info->sctpi_idupchunks = asoc->stats.idupchunks; + info->sctpi_gapcnt = asoc->stats.gapcnt; + info->sctpi_ouodchunks = asoc->stats.ouodchunks; + info->sctpi_iuodchunks = asoc->stats.iuodchunks; + info->sctpi_oodchunks = asoc->stats.oodchunks; + info->sctpi_iodchunks = asoc->stats.iodchunks; + info->sctpi_octrlchunks = asoc->stats.octrlchunks; + info->sctpi_ictrlchunks = asoc->stats.ictrlchunks; + + prim = asoc->peer.primary_path; + memcpy(&info->sctpi_p_address, &prim->ipaddr, + sizeof(struct sockaddr_storage)); + info->sctpi_p_state = prim->state; + info->sctpi_p_cwnd = prim->cwnd; + info->sctpi_p_srtt = prim->srtt; + info->sctpi_p_rto = jiffies_to_msecs(prim->rto); + info->sctpi_p_hbinterval = prim->hbinterval; + info->sctpi_p_pathmaxrxt = prim->pathmaxrxt; + info->sctpi_p_sackdelay = jiffies_to_msecs(prim->sackdelay); + info->sctpi_p_ssthresh = prim->ssthresh; + info->sctpi_p_partial_bytes_acked = prim->partial_bytes_acked; + info->sctpi_p_flight_size = prim->flight_size; + info->sctpi_p_error = prim->error_count; + + return 0; +} +EXPORT_SYMBOL_GPL(sctp_get_sctp_info); + +/* use callback to avoid exporting the core structure */ +int sctp_transport_walk_start(struct rhashtable_iter *iter) +{ + int err; + + err = rhashtable_walk_init(&sctp_transport_hashtable, iter, + GFP_KERNEL); + if (err) + return err; + + err = rhashtable_walk_start(iter); + if (err && err != -EAGAIN) { + rhashtable_walk_exit(iter); + return err; + } + + return 0; +} + +void sctp_transport_walk_stop(struct rhashtable_iter *iter) +{ + rhashtable_walk_stop(iter); + rhashtable_walk_exit(iter); +} + +struct sctp_transport *sctp_transport_get_next(struct net *net, + struct rhashtable_iter *iter) +{ + struct sctp_transport *t; + + t = rhashtable_walk_next(iter); + for (; t; t = rhashtable_walk_next(iter)) { + if (IS_ERR(t)) { + if (PTR_ERR(t) == -EAGAIN) + continue; + break; + } + + if (net_eq(sock_net(t->asoc->base.sk), net) && + t->asoc->peer.primary_path == t) + break; + } + + return t; +} + +struct sctp_transport *sctp_transport_get_idx(struct net *net, + struct rhashtable_iter *iter, + int pos) +{ + void *obj = SEQ_START_TOKEN; + + while (pos && (obj = sctp_transport_get_next(net, iter)) && + !IS_ERR(obj)) + pos--; + + return obj; +} + +int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), + void *p) { + int err = 0; + int hash = 0; + struct sctp_ep_common *epb; + struct sctp_hashbucket *head; + + for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize; + hash++, head++) { + read_lock(&head->lock); + sctp_for_each_hentry(epb, &head->chain) { + err = cb(sctp_ep(epb), p); + if (err) + break; + } + read_unlock(&head->lock); + } + + return err; +} +EXPORT_SYMBOL_GPL(sctp_for_each_endpoint); + +int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), + struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr, void *p) +{ + struct sctp_transport *transport; + int err = 0; + + rcu_read_lock(); + transport = sctp_addrs_lookup_transport(net, laddr, paddr); + if (!transport || !sctp_transport_hold(transport)) + goto out; + err = cb(transport, p); + sctp_transport_put(transport); + +out: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); + +int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), + struct net *net, int pos, void *p) { + struct rhashtable_iter hti; + void *obj; + int err; + + err = sctp_transport_walk_start(&hti); + if (err) + return err; + + sctp_transport_get_idx(net, &hti, pos); + obj = sctp_transport_get_next(net, &hti); + for (; obj && !IS_ERR(obj); obj = sctp_transport_get_next(net, &hti)) { + struct sctp_transport *transport = obj; + + if (!sctp_transport_hold(transport)) + continue; + err = cb(transport, p); + sctp_transport_put(transport); + if (err) + break; + } + sctp_transport_walk_stop(&hti); + + return err; +} +EXPORT_SYMBOL_GPL(sctp_for_each_transport); + /* 7.2.1 Association Status (SCTP_STATUS) * Applications can retrieve current status information about an @@ -6430,6 +6646,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) poll_wait(file, sk_sleep(sk), wait); + sock_rps_record_flow(sk); + /* A TCP-style listening socket becomes readable when the accept queue * is not empty. */ @@ -6764,13 +6982,11 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, * However, this function was correct in any case. 8) */ if (flags & MSG_PEEK) { - spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) atomic_inc(&skb->users); - spin_unlock_bh(&sk->sk_receive_queue.lock); } else { - skb = skb_dequeue(&sk->sk_receive_queue); + skb = __skb_dequeue(&sk->sk_receive_queue); } if (skb) @@ -7186,6 +7402,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_lingertime = sk->sk_lingertime; newsk->sk_rcvtimeo = sk->sk_rcvtimeo; newsk->sk_sndtimeo = sk->sk_sndtimeo; + newsk->sk_rxhash = sk->sk_rxhash; newinet = inet_sk(newsk); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 9b6b48c7524e..81b86678be4d 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -183,7 +183,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport) /* Start T3_rtx timer if it is not already running and update the heartbeat * timer. This routine is called every time a DATA chunk is sent. */ -void sctp_transport_reset_timers(struct sctp_transport *transport) +void sctp_transport_reset_t3_rtx(struct sctp_transport *transport) { /* RFC 2960 6.3.2 Retransmission Timer Rules * @@ -197,11 +197,18 @@ void sctp_transport_reset_timers(struct sctp_transport *transport) if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); +} + +void sctp_transport_reset_hb_timer(struct sctp_transport *transport) +{ + unsigned long expires; /* When a data chunk is sent, reset the heartbeat interval. */ - if (!mod_timer(&transport->hb_timer, - sctp_transport_timeout(transport))) - sctp_transport_hold(transport); + expires = jiffies + sctp_transport_timeout(transport); + if (time_before(transport->hb_timer.expires, expires) && + !mod_timer(&transport->hb_timer, + expires + prandom_u32_max(transport->rto))) + sctp_transport_hold(transport); } /* This transport has been assigned to an association. @@ -595,13 +602,13 @@ void sctp_transport_burst_reset(struct sctp_transport *t) unsigned long sctp_transport_timeout(struct sctp_transport *trans) { /* RTO + timer slack +/- 50% of RTO */ - unsigned long timeout = (trans->rto >> 1) + prandom_u32_max(trans->rto); + unsigned long timeout = trans->rto >> 1; if (trans->state != SCTP_UNCONFIRMED && trans->state != SCTP_PF) timeout += trans->hbinterval; - return timeout + jiffies; + return timeout; } /* Reset transport variables to their initial values */ diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 72e5b3e41cdd..ec166d2bd2d9 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -141,7 +141,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc) */ if (!skb_queue_empty(&sp->pd_lobby)) { struct list_head *list; - sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue); + skb_queue_splice_tail_init(&sp->pd_lobby, + &sk->sk_receive_queue); list = (struct list_head *)&sctp_sk(sk)->pd_lobby; INIT_LIST_HEAD(list); return 1; @@ -193,6 +194,7 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sock *sk = ulpq->asoc->base.sk; + struct sctp_sock *sp = sctp_sk(sk); struct sk_buff_head *queue, *skb_list; struct sk_buff *skb = sctp_event2skb(event); int clear_pd = 0; @@ -210,7 +212,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) sk_incoming_cpu_update(sk); } /* Check if the user wishes to receive this event. */ - if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe)) + if (!sctp_ulpevent_is_enabled(event, &sp->subscribe)) goto out_free; /* If we are in partial delivery mode, post to the lobby until @@ -218,7 +220,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) * the association the cause of the partial delivery. */ - if (atomic_read(&sctp_sk(sk)->pd_mode) == 0) { + if (atomic_read(&sp->pd_mode) == 0) { queue = &sk->sk_receive_queue; } else { if (ulpq->pd_mode) { @@ -230,7 +232,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if ((event->msg_flags & MSG_NOTIFICATION) || (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK))) - queue = &sctp_sk(sk)->pd_lobby; + queue = &sp->pd_lobby; else { clear_pd = event->msg_flags & MSG_EOR; queue = &sk->sk_receive_queue; @@ -241,10 +243,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) * can queue this to the receive queue instead * of the lobby. */ - if (sctp_sk(sk)->frag_interleave) + if (sp->frag_interleave) queue = &sk->sk_receive_queue; else - queue = &sctp_sk(sk)->pd_lobby; + queue = &sp->pd_lobby; } } @@ -252,7 +254,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) * collected on a list. */ if (skb_list) - sctp_skb_list_tail(skb_list, queue); + skb_queue_splice_tail_init(skb_list, queue); else __skb_queue_tail(queue, skb); @@ -263,8 +265,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if (clear_pd) sctp_ulpq_clear_pd(ulpq); - if (queue == &sk->sk_receive_queue) - sctp_sk(sk)->pending_data_ready = 1; + if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) { + sp->data_ready_signalled = 1; + sk->sk_data_ready(sk); + } return 1; out_free: @@ -1125,11 +1129,13 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *ev = NULL; struct sock *sk; + struct sctp_sock *sp; if (!ulpq->pd_mode) return; sk = ulpq->asoc->base.sk; + sp = sctp_sk(sk); if (sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT, &sctp_sk(sk)->subscribe)) ev = sctp_ulpevent_make_pdapi(ulpq->asoc, @@ -1139,6 +1145,8 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev)); /* If there is data waiting, send it up the socket now. */ - if (sctp_ulpq_clear_pd(ulpq) || ev) - sctp_sk(sk)->pending_data_ready = 1; + if ((sctp_ulpq_clear_pd(ulpq) || ev) && !sp->data_ready_signalled) { + sp->data_ready_signalled = 1; + sk->sk_data_ready(sk); + } } diff --git a/net/socket.c b/net/socket.c index 5dbb0bbe12a7..7789d79609dd 100644 --- a/net/socket.c +++ b/net/socket.c @@ -600,9 +600,6 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) if (tsflags & SOF_TIMESTAMPING_TX_SCHED) flags |= SKBTX_SCHED_TSTAMP; - if (tsflags & SOF_TIMESTAMPING_TX_ACK) - flags |= SKBTX_ACK_TSTAMP; - *tx_flags = flags; } EXPORT_SYMBOL(__sock_tx_timestamp); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 045e11ecd332..244245bcbbd2 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -78,6 +78,7 @@ krb5_encrypt( memcpy(out, in, length); sg_init_one(sg, out, length); + skcipher_request_set_tfm(req, tfm); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, length, local_iv); @@ -115,6 +116,7 @@ krb5_decrypt( memcpy(out, in, length); sg_init_one(sg, out, length); + skcipher_request_set_tfm(req, tfm); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, length, local_iv); @@ -946,7 +948,8 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, return PTR_ERR(hmac); } - desc = kmalloc(sizeof(*desc), GFP_KERNEL); + desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), + GFP_KERNEL); if (!desc) { dprintk("%s: failed to allocate shash descriptor for '%s'\n", __func__, kctx->gk5e->cksum_name); @@ -1012,7 +1015,8 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, return PTR_ERR(hmac); } - desc = kmalloc(sizeof(*desc), GFP_KERNEL); + desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), + GFP_KERNEL); if (!desc) { dprintk("%s: failed to allocate shash descriptor for '%s'\n", __func__, kctx->gk5e->cksum_name); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 71341ccb9890..65427492b1c9 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -451,7 +451,8 @@ context_derive_keys_rc4(struct krb5_ctx *ctx) goto out_err_free_hmac; - desc = kmalloc(sizeof(*desc), GFP_KERNEL); + desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), + GFP_KERNEL); if (!desc) { dprintk("%s: failed to allocate hash descriptor for '%s'\n", __func__, ctx->gk5e->cksum_name); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d0756ac5c0f2..a6c68dc086af 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1018,11 +1018,11 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, /* Suck it into the iovec, verify checksum if not done by hw. */ if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { - UDPX_INC_STATS_BH(sk, UDP_MIB_INERRORS); + __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); goto out_unlock; } - UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS); + __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); xprt_adjust_cwnd(xprt, task, copied); xprt_complete_rqst(task, copied); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 2b9b98f1c2ff..b7e01d88bdc5 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -305,6 +305,8 @@ static void switchdev_port_attr_set_deferred(struct net_device *dev, if (err && err != -EOPNOTSUPP) netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n", err, attr->id); + if (attr->complete) + attr->complete(dev, err, attr->complete_priv); } static int switchdev_port_attr_set_defer(struct net_device *dev, @@ -434,6 +436,8 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev, if (err && err != -EOPNOTSUPP) netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", err, obj->id); + if (obj->complete) + obj->complete(dev, err, obj->complete_priv); } static int switchdev_port_obj_add_defer(struct net_device *dev, @@ -502,6 +506,8 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev, if (err && err != -EOPNOTSUPP) netdev_err(dev, "failed (err=%d) to del object (id=%d)\n", err, obj->id); + if (obj->complete) + obj->complete(dev, err, obj->complete_priv); } static int switchdev_port_obj_del_defer(struct net_device *dev, diff --git a/net/tipc/core.c b/net/tipc/core.c index 03a842870c52..e2bdb07a49a2 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -69,6 +69,7 @@ static int __net_init tipc_init_net(struct net *net) if (err) goto out_nametbl; + INIT_LIST_HEAD(&tn->dist_queue); err = tipc_topsrv_start(net); if (err) goto out_subscr; diff --git a/net/tipc/core.h b/net/tipc/core.h index 5504d63503df..eff58dc53aa1 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -103,6 +103,9 @@ struct tipc_net { spinlock_t nametbl_lock; struct name_table *nametbl; + /* Name dist queue */ + struct list_head dist_queue; + /* Topology subscription server */ struct tipc_server *topsrv; atomic_t subscription_count; diff --git a/net/tipc/link.c b/net/tipc/link.c index 7d2bb3e70baa..7059c94f33c5 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -140,6 +140,7 @@ struct tipc_link { char if_name[TIPC_MAX_IF_NAME]; u32 priority; char net_plane; + u16 rst_cnt; /* Failover/synch */ u16 drop_point; @@ -701,40 +702,34 @@ static void link_profile_stats(struct tipc_link *l) /* tipc_link_timeout - perform periodic task as instructed from node timeout */ -/* tipc_link_timeout - perform periodic task as instructed from node timeout - */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { - int rc = 0; - int mtyp = STATE_MSG; - bool xmit = false; - bool prb = false; + int mtyp, rc = 0; + bool state = false; + bool probe = false; + bool setup = false; u16 bc_snt = l->bc_sndlink->snd_nxt - 1; u16 bc_acked = l->bc_rcvlink->acked; - bool bc_up = link_is_up(l->bc_rcvlink); link_profile_stats(l); switch (l->state) { case LINK_ESTABLISHED: case LINK_SYNCHING: - if (!l->silent_intv_cnt) { - if (bc_up && (bc_acked != bc_snt)) - xmit = true; - } else if (l->silent_intv_cnt <= l->abort_limit) { - xmit = true; - prb = true; - } else { - rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - } + if (l->silent_intv_cnt > l->abort_limit) + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + mtyp = STATE_MSG; + state = bc_acked != bc_snt; + probe = l->silent_intv_cnt; l->silent_intv_cnt++; break; case LINK_RESET: - xmit = true; + setup = l->rst_cnt++ <= 4; + setup |= !(l->rst_cnt % 16); mtyp = RESET_MSG; break; case LINK_ESTABLISHING: - xmit = true; + setup = true; mtyp = ACTIVATE_MSG; break; case LINK_PEER_RESET: @@ -745,8 +740,8 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) break; } - if (xmit) - tipc_link_build_proto_msg(l, mtyp, prb, 0, 0, 0, xmitq); + if (state || probe || setup) + tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq); return rc; } @@ -833,6 +828,7 @@ void tipc_link_reset(struct tipc_link *l) l->rcv_nxt = 1; l->acked = 0; l->silent_intv_cnt = 0; + l->rst_cnt = 0; l->stats.recv_info = 0; l->stale_count = 0; l->bc_peer_is_up = false; @@ -1110,12 +1106,12 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) return released; } -/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission +/* tipc_link_build_state_msg: prepare link state message for transmission * * Note that sending of broadcast ack is coordinated among nodes, to reduce * risk of ack storms towards the sender */ -int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { if (!l) return 0; @@ -1140,11 +1136,17 @@ int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { int mtyp = RESET_MSG; + struct sk_buff *skb; if (l->state == LINK_ESTABLISHING) mtyp = ACTIVATE_MSG; tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); + + /* Inform peer that this endpoint is going down if applicable */ + skb = skb_peek_tail(xmitq); + if (skb && (l->state == LINK_RESET)) + msg_set_peer_stopping(buf_msg(skb), 1); } /* tipc_link_build_nack_msg: prepare link nack message for transmission @@ -1219,7 +1221,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, if (!tipc_data_input(l, skb, l->inputq)) rc |= tipc_link_input(l, skb, l->inputq); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) - rc |= tipc_link_build_ack_msg(l, xmitq); + rc |= tipc_link_build_state_msg(l, xmitq); if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK)) break; } while ((skb = __skb_dequeue(defq))); @@ -1411,7 +1413,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->priority = peers_prio; /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ - if ((mtyp == RESET_MSG) || !link_is_up(l)) + if (msg_peer_stopping(hdr)) + rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + else if ((mtyp == RESET_MSG) || !link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); /* ACTIVATE_MSG takes up link if it was already locally reset */ diff --git a/net/tipc/link.h b/net/tipc/link.h index 6a94175ee20a..d7e9d42fcb2d 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -123,7 +123,7 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); -int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq); void tipc_link_add_bc_peer(struct tipc_link *snd_l, struct tipc_link *uc_l, struct sk_buff_head *xmitq); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index f34f639df643..58bf51541813 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -715,6 +715,16 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) msg_set_bits(m, 5, 12, 0x1, r); } +static inline u32 msg_peer_stopping(struct tipc_msg *m) +{ + return msg_bits(m, 5, 13, 0x1); +} + +static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s) +{ + msg_set_bits(m, 5, 13, 0x1, s); +} + static inline char *msg_media_addr(struct tipc_msg *m) { return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index ebe9d0ff6e9e..6b626a64b517 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -40,11 +40,6 @@ int sysctl_tipc_named_timeout __read_mostly = 2000; -/** - * struct tipc_dist_queue - queue holding deferred name table updates - */ -static struct list_head tipc_dist_queue = LIST_HEAD_INIT(tipc_dist_queue); - struct distr_queue_item { struct distr_item i; u32 dtype; @@ -229,12 +224,31 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) kfree_rcu(p, rcu); } +/** + * tipc_dist_queue_purge - remove deferred updates from a node that went down + */ +static void tipc_dist_queue_purge(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct distr_queue_item *e, *tmp; + + spin_lock_bh(&tn->nametbl_lock); + list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) { + if (e->node != addr) + continue; + list_del(&e->next); + kfree(e); + } + spin_unlock_bh(&tn->nametbl_lock); +} + void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) { struct publication *publ, *tmp; list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list) tipc_publ_purge(net, publ, addr); + tipc_dist_queue_purge(net, addr); } /** @@ -279,9 +293,11 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, * tipc_named_add_backlog - add a failed name table update to the backlog * */ -static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) +static void tipc_named_add_backlog(struct net *net, struct distr_item *i, + u32 type, u32 node) { struct distr_queue_item *e; + struct tipc_net *tn = net_generic(net, tipc_net_id); unsigned long now = get_jiffies_64(); e = kzalloc(sizeof(*e), GFP_ATOMIC); @@ -291,7 +307,7 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) e->node = node; e->expires = now + msecs_to_jiffies(sysctl_tipc_named_timeout); memcpy(e, i, sizeof(*i)); - list_add_tail(&e->next, &tipc_dist_queue); + list_add_tail(&e->next, &tn->dist_queue); } /** @@ -301,10 +317,11 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) void tipc_named_process_backlog(struct net *net) { struct distr_queue_item *e, *tmp; + struct tipc_net *tn = net_generic(net, tipc_net_id); char addr[16]; unsigned long now = get_jiffies_64(); - list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) { + list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) { if (time_after(e->expires, now)) { if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype)) continue; @@ -344,7 +361,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) node = msg_orignode(msg); while (count--) { if (!tipc_update_nametbl(net, item, node, mtype)) - tipc_named_add_backlog(item, mtype, node); + tipc_named_add_backlog(net, item, mtype, node); item++; } kfree_skb(skb); diff --git a/net/tipc/node.c b/net/tipc/node.c index ace178fd3850..c29915688230 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -545,12 +545,16 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, pr_debug("Established link <%s> on network plane %c\n", tipc_link_name(nl), tipc_link_plane(nl)); + /* Ensure that a STATE message goes first */ + tipc_link_build_state_msg(nl, xmitq); + /* First link? => give it both slots */ if (!ol) { *slot0 = bearer_id; *slot1 = bearer_id; tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); n->action_flags |= TIPC_NOTIFY_NODE_UP; + tipc_link_set_active(nl, true); tipc_bcast_add_peer(n->net, nl, xmitq); return; } @@ -581,8 +585,12 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, static void tipc_node_link_up(struct tipc_node *n, int bearer_id, struct sk_buff_head *xmitq) { + struct tipc_media_addr *maddr; + tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); + maddr = &n->links[bearer_id].maddr; + tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr); tipc_node_write_unlock(n); } @@ -1279,7 +1287,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /* Broadcast ACKs are sent on a unicast link */ if (rc & TIPC_LINK_SND_BC_ACK) { tipc_node_read_lock(n); - tipc_link_build_ack_msg(le->link, &xmitq); + tipc_link_build_state_msg(le->link, &xmitq); tipc_node_read_unlock(n); } diff --git a/net/tipc/server.c b/net/tipc/server.c index 2446bfbaa309..7a0af2dc0406 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,6 +86,7 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); +static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -102,6 +103,7 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); + tipc_sock_release(con); sock_release(sock); con->sock = NULL; } @@ -184,26 +186,31 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(&sk->sk_callback_lock); } +static void tipc_sock_release(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); + + tipc_unregister_callbacks(con); +} + static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - if (con->conid) - s->tipc_conn_shutdown(con->conid, con->usr_data); spin_lock_bh(&s->idr_lock); idr_remove(&s->conn_idr, con->conid); s->idr_in_use--; spin_unlock_bh(&s->idr_lock); - tipc_unregister_callbacks(con); - /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs * are harmless for us here as we have already deleted this - * connection from server connection list and set - * sk->sk_user_data to 0 before releasing connection object. + * connection from server connection list. */ kernel_sock_shutdown(con->sock, SHUT_RDWR); diff --git a/net/tipc/server.h b/net/tipc/server.h index 9015faedb1b0..34f8055afa3b 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -53,7 +53,7 @@ * @send_wq: send workqueue * @max_rcvbuf_size: maximum permitted receive message length * @tipc_conn_new: callback will be called when new connection is incoming - * @tipc_conn_shutdown: callback will be called when connection is shut down + * @tipc_conn_release: callback will be called before releasing the connection * @tipc_conn_recvmsg: callback will be called when message arrives * @saddr: TIPC server address * @name: server name @@ -70,7 +70,7 @@ struct tipc_server { struct workqueue_struct *send_wq; int max_rcvbuf_size; void *(*tipc_conn_new)(int conid); - void (*tipc_conn_shutdown)(int conid, void *usr_data); + void (*tipc_conn_release)(int conid, void *usr_data); void (*tipc_conn_recvmsg)(struct net *net, int conid, struct sockaddr_tipc *addr, void *usr_data, void *buf, size_t len); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index e6cb386fbf34..0dd02244e21d 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -302,7 +302,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, } /* Handle one termination request for the subscriber */ -static void tipc_subscrb_shutdown_cb(int conid, void *usr_data) +static void tipc_subscrb_release_cb(int conid, void *usr_data) { tipc_subscrb_delete((struct tipc_subscriber *)usr_data); } @@ -326,8 +326,7 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, return tipc_subscrp_cancel(s, subscriber); } - if (s) - tipc_subscrp_subscribe(net, s, subscriber, swap); + tipc_subscrp_subscribe(net, s, subscriber, swap); } /* Handle one request to establish a new subscriber */ @@ -365,7 +364,7 @@ int tipc_topsrv_start(struct net *net) topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb; topsrv->tipc_conn_new = tipc_subscrb_connect_cb; - topsrv->tipc_conn_shutdown = tipc_subscrb_shutdown_cb; + topsrv->tipc_conn_release = tipc_subscrb_release_cb; strncpy(topsrv->name, name, strlen(name) + 1); tn->topsrv = topsrv; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 662bdd20a748..56214736fe88 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1735,11 +1735,8 @@ static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk, /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); - if (err) - return err; - if (!skb) - return -EAGAIN; + return err; dg = (struct vmci_datagram *)skb->data; if (!dg) @@ -2154,7 +2151,7 @@ module_exit(vmci_transport_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); -MODULE_VERSION("1.0.3.0-k"); +MODULE_VERSION("1.0.4.0-k"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("vmware_vsock"); MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 13ef553b99d4..afeb1ef1b199 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2429,7 +2429,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFTYPE, wdev->iftype) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev_address(wdev)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ @@ -6874,7 +6875,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) goto nla_put_failure; - if (nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + if (nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto nla_put_failure; bss = nla_nest_start(msg, NL80211_ATTR_BSS); @@ -6895,7 +6897,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, */ ies = rcu_dereference(res->ies); if (ies) { - if (nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf)) + if (nla_put_u64_64bit(msg, NL80211_BSS_TSF, ies->tsf, + NL80211_BSS_PAD)) goto fail_unlock_rcu; if (ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS, ies->len, ies->data)) @@ -6905,7 +6908,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, /* and this pointer is always (unless driver didn't know) beacon data */ ies = rcu_dereference(res->beacon_ies); if (ies && ies->from_beacon) { - if (nla_put_u64(msg, NL80211_BSS_BEACON_TSF, ies->tsf)) + if (nla_put_u64_64bit(msg, NL80211_BSS_BEACON_TSF, ies->tsf, + NL80211_BSS_PAD)) goto fail_unlock_rcu; if (ies->len && nla_put(msg, NL80211_BSS_BEACON_IES, ies->len, ies->data)) @@ -6924,8 +6928,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, goto nla_put_failure; if (intbss->ts_boottime && - nla_put_u64(msg, NL80211_BSS_LAST_SEEN_BOOTTIME, - intbss->ts_boottime)) + nla_put_u64_64bit(msg, NL80211_BSS_LAST_SEEN_BOOTTIME, + intbss->ts_boottime, NL80211_BSS_PAD)) goto nla_put_failure; switch (rdev->wiphy.signal_type) { @@ -7045,28 +7049,28 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, nla_put_flag(msg, NL80211_SURVEY_INFO_IN_USE)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME, - survey->time)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME, + survey->time, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_BUSY) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_BUSY, - survey->time_busy)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_BUSY, + survey->time_busy, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_EXT_BUSY) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_EXT_BUSY, - survey->time_ext_busy)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_EXT_BUSY, + survey->time_ext_busy, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_RX) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_RX, - survey->time_rx)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_RX, + survey->time_rx, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_TX) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_TX, - survey->time_tx)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_TX, + survey->time_tx, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_SCAN) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_SCAN, - survey->time_scan)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_SCAN, + survey->time_scan, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; nla_nest_end(msg, infoattr); @@ -7786,8 +7790,8 @@ __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, } if (wdev) { - if (nla_put_u64(skb, NL80211_ATTR_WDEV, - wdev_id(wdev))) + if (nla_put_u64_64bit(skb, NL80211_ATTR_WDEV, + wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; if (wdev->netdev && nla_put_u32(skb, NL80211_ATTR_IFINDEX, @@ -8380,7 +8384,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (err) goto free_msg; - if (nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -8792,7 +8797,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) goto free_msg; if (msg) { - if (nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -10078,7 +10084,8 @@ static int nl80211_probe_client(struct sk_buff *skb, if (err) goto free_msg; - if (nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -10503,8 +10510,9 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, break; if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - (wdev && nla_put_u64(skb, NL80211_ATTR_WDEV, - wdev_id(wdev)))) { + (wdev && nla_put_u64_64bit(skb, NL80211_ATTR_WDEV, + wdev_id(wdev), + NL80211_ATTR_PAD))) { genlmsg_cancel(skb, hdr); break; } @@ -11711,7 +11719,8 @@ static int nl80211_send_scan_msg(struct sk_buff *msg, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto nla_put_failure; /* ignore errors and send incomplete event anyway */ @@ -12378,11 +12387,13 @@ static void nl80211_send_remain_on_chan_event( if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq) || nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, NL80211_CHAN_NO_HT) || - nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) + nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) goto nla_put_failure; if (cmd == NL80211_CMD_REMAIN_ON_CHANNEL && @@ -12616,7 +12627,8 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) || (sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) || @@ -12659,9 +12671,11 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_FRAME, len, buf) || - nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie) || + nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD) || (ack && nla_put_flag(msg, NL80211_ATTR_ACK))) goto nla_put_failure; @@ -13041,7 +13055,8 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = netdev->ieee80211_ptr; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto nla_put_failure; } @@ -13086,7 +13101,8 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || - nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie) || + nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD) || (acked && nla_put_flag(msg, NL80211_ATTR_ACK))) goto nla_put_failure; @@ -13231,7 +13247,8 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, goto free_msg; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto free_msg; if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, @@ -13372,7 +13389,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb, struct wireless_dev *wdev; struct cfg80211_beacon_registration *reg, *tmp; - if (state != NETLINK_URELEASE) + if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC) return NOTIFY_DONE; rcu_read_lock(); @@ -13506,7 +13523,8 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -13539,7 +13557,8 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev) if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto out; genlmsg_end(msg, hdr); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2cc7af858c6f..d516845e16e3 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -809,7 +809,8 @@ static int copy_to_user_state_extra(struct xfrm_state *x, goto out; } if (x->lastused) { - ret = nla_put_u64(skb, XFRMA_LASTUSED, x->lastused); + ret = nla_put_u64_64bit(skb, XFRMA_LASTUSED, x->lastused, + XFRMA_PAD); if (ret) goto out; } @@ -1813,7 +1814,7 @@ static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) + nla_total_size(replay_size) - + nla_total_size(sizeof(struct xfrm_lifetime_cur)) + + nla_total_size_64bit(sizeof(struct xfrm_lifetime_cur)) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ + nla_total_size(4); /* XFRM_AE_ETHR */ @@ -1848,7 +1849,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct } if (err) goto out_cancel; - err = nla_put(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); + err = nla_put_64bit(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft, + XFRMA_PAD); if (err) goto out_cancel; @@ -2617,7 +2619,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(x->props.extra_flags)); /* Must count x->lastused as it may become non-zero behind our back. */ - l += nla_total_size(sizeof(u64)); + l += nla_total_size_64bit(sizeof(u64)); return l; } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 744dd7a16144..66897e61232c 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -81,10 +81,43 @@ HOSTLOADLIBES_spintest += -lelf HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt +# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: +# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang +LLC ?= llc +CLANG ?= clang + +# Trick to allow make to be run from this directory +all: + $(MAKE) -C ../../ $$PWD/ + +clean: + $(MAKE) -C ../../ M=$$PWD clean + @rm -f *~ + +# Verify LLVM compiler tools are available and bpf target is supported by llc +.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC) + +verify_cmds: $(CLANG) $(LLC) + @for TOOL in $^ ; do \ + if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \ + echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\ + exit 1; \ + else true; fi; \ + done + +verify_target_bpf: verify_cmds + @if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \ + echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\ + echo " NOTICE: LLVM version >= 3.7.1 required" ;\ + exit 2; \ + else true; fi + +$(src)/*.c: verify_target_bpf + # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is # useless for BPF samples. $(obj)/%.o: $(src)/%.c - clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ + $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ - -O2 -emit-llvm -c $< -o -| llc -march=bpf -filetype=obj -o $@ + -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst new file mode 100644 index 000000000000..a43eae3f0551 --- /dev/null +++ b/samples/bpf/README.rst @@ -0,0 +1,66 @@ +eBPF sample programs +==================== + +This directory contains a mini eBPF library, test stubs, verifier +test-suite and examples for using eBPF. + +Build dependencies +================== + +Compiling requires having installed: + * clang >= version 3.4.0 + * llvm >= version 3.7.1 + +Note that LLVM's tool 'llc' must support target 'bpf', list version +and supported targets with command: ``llc --version`` + +Kernel headers +-------------- + +There are usually dependencies to header files of the current kernel. +To avoid installing devel kernel headers system wide, as a normal +user, simply call:: + + make headers_install + +This will creates a local "usr/include" directory in the git/build top +level directory, that the make system automatically pickup first. + +Compiling +========= + +For building the BPF samples, issue the below command from the kernel +top level directory:: + + make samples/bpf/ + +Do notice the "/" slash after the directory name. + +It is also possible to call make from this directory. This will just +hide the the invocation of make as above with the appended "/". + +Manually compiling LLVM with 'bpf' support +------------------------------------------ + +Since version 3.7.0, LLVM adds a proper LLVM backend target for the +BPF bytecode architecture. + +By default llvm will build all non-experimental backends including bpf. +To generate a smaller llc binary one can use:: + + -DLLVM_TARGETS_TO_BUILD="BPF" + +Quick sniplet for manually compiling LLVM and clang +(build dependencies are cmake and gcc-c++):: + + $ git clone http://llvm.org/git/llvm.git + $ cd llvm/tools + $ git clone --depth 1 http://llvm.org/git/clang.git + $ cd ..; mkdir build; cd build + $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86" + $ make -j $(getconf _NPROCESSORS_ONLN) + +It is also possible to point make to the newly compiled 'llc' or +'clang' command via redefining LLC or CLANG on the make command line:: + + make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c index 983629a31c79..e7d9a0a3d45b 100644 --- a/samples/bpf/offwaketime_kern.c +++ b/samples/bpf/offwaketime_kern.c @@ -11,7 +11,7 @@ #include <linux/version.h> #include <linux/sched.h> -#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;}) #define MINBLOCK_US 1 @@ -61,7 +61,7 @@ SEC("kprobe/try_to_wake_up") int waker(struct pt_regs *ctx) { struct task_struct *p = (void *) PT_REGS_PARM1(ctx); - struct wokeby_t woke = {}; + struct wokeby_t woke; u32 pid; pid = _(p->pid); @@ -75,17 +75,19 @@ int waker(struct pt_regs *ctx) static inline int update_counts(void *ctx, u32 pid, u64 delta) { - struct key_t key = {}; struct wokeby_t *woke; u64 zero = 0, *val; + struct key_t key; + __builtin_memset(&key.waker, 0, sizeof(key.waker)); bpf_get_current_comm(&key.target, sizeof(key.target)); key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); + key.wret = 0; woke = bpf_map_lookup_elem(&wokeby, &pid); if (woke) { key.wret = woke->ret; - __builtin_memcpy(&key.waker, woke->name, TASK_COMM_LEN); + __builtin_memcpy(&key.waker, woke->name, sizeof(key.waker)); bpf_map_delete_elem(&wokeby, &pid); } diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 4b51a9039c0d..9eba8d1d9dcc 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -309,6 +309,19 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, }, { + "check valid spill/fill, skb mark", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = ACCEPT, + }, + { "check corrupted spill/fill", .insns = { /* spill R1(ctx) into stack */ @@ -1180,6 +1193,261 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = ACCEPT, }, + { + "raw_stack: no skb_load_bytes", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + /* Call to skb_load_bytes() omitted. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid read from stack off -8+0 size 8", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, no init", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, init", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, spilled regs around bounds", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), /* spill ctx from R1 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), /* spill ctx from R1 */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), /* fill ctx into R0 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), /* fill ctx into R2 */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, + offsetof(struct __sk_buff, priority)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, spilled regs corruption", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), /* spill ctx from R1 */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), /* fill ctx into R0 */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, spilled regs corruption 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), /* spill ctx from R1 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), /* spill ctx from R1 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), /* spill ctx from R1 */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), /* fill ctx into R0 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), /* fill ctx into R2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), /* fill ctx into R3 */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, + offsetof(struct __sk_buff, priority)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_3, + offsetof(struct __sk_buff, pkt_type)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R3 invalid mem access 'inv'", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, spilled regs + data", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), /* spill ctx from R1 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), /* spill ctx from R1 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), /* spill ctx from R1 */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), /* fill ctx into R0 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), /* fill ctx into R2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), /* fill data into R3 */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, + offsetof(struct __sk_buff, priority)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, invalid access 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3 off=-513 access_size=8", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, invalid access 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3 off=-1 access_size=8", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, invalid access 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3 off=-1 access_size=-1", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, invalid access 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3 off=-1 access_size=2147483647", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, invalid access 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3 off=-512 access_size=2147483647", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, invalid access 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3 off=-512 access_size=0", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, large access", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 512), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(struct bpf_insn *fp) diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c index 3f450a8fa1f3..107da148820f 100644 --- a/samples/bpf/tracex1_kern.c +++ b/samples/bpf/tracex1_kern.c @@ -23,16 +23,14 @@ int bpf_prog1(struct pt_regs *ctx) /* attaches to kprobe netif_receive_skb, * looks for packets on loobpack device and prints them */ - char devname[IFNAMSIZ] = {}; + char devname[IFNAMSIZ]; struct net_device *dev; struct sk_buff *skb; int len; /* non-portable! works for the given kernel only */ skb = (struct sk_buff *) PT_REGS_PARM1(ctx); - dev = _(skb->dev); - len = _(skb->len); bpf_probe_read(devname, sizeof(devname), dev->name); diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c index 6d6eefd0d465..5e11c20ce5ec 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2_kern.c @@ -66,7 +66,7 @@ struct hist_key { char comm[16]; u64 pid_tgid; u64 uid_gid; - u32 index; + u64 index; }; struct bpf_map_def SEC("maps") my_hist_map = { @@ -82,7 +82,7 @@ int bpf_prog3(struct pt_regs *ctx) long write_size = PT_REGS_PARM3(ctx); long init_val = 1; long *value; - struct hist_key key = {}; + struct hist_key key; key.index = log2l(write_size); key.pid_tgid = bpf_get_current_pid_tgid(); diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index b3f4295bf288..f95f232cbab9 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -22,7 +22,7 @@ struct bpf_map_def SEC("maps") progs = { SEC("kprobe/seccomp_phase1") int bpf_prog1(struct pt_regs *ctx) { - struct seccomp_data sd = {}; + struct seccomp_data sd; bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); @@ -40,7 +40,7 @@ int bpf_prog1(struct pt_regs *ctx) /* we jump here when syscall number == __NR_write */ PROG(__NR_write)(struct pt_regs *ctx) { - struct seccomp_data sd = {}; + struct seccomp_data sd; bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); if (sd.args[2] == 512) { @@ -53,7 +53,7 @@ PROG(__NR_write)(struct pt_regs *ctx) PROG(__NR_read)(struct pt_regs *ctx) { - struct seccomp_data sd = {}; + struct seccomp_data sd; bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); if (sd.args[2] > 128 && sd.args[2] <= 1024) { diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c index e000f44e37b8..c1b7ef3e24c1 100644 --- a/scripts/asn1_compiler.c +++ b/scripts/asn1_compiler.c @@ -650,7 +650,7 @@ int main(int argc, char **argv) } hdr = fopen(headername, "w"); - if (!out) { + if (!hdr) { perror(headername); exit(1); } diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 8495b9368190..2ca9cde939d4 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -76,6 +76,8 @@ static struct nlmsg_perm nlmsg_route_perms[] = { RTM_NEWNSID, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNSID, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETNSID, NETLINK_ROUTE_SOCKET__NLMSG_READ }, + { RTM_NEWSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ }, + { RTM_GETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ }, }; static struct nlmsg_perm nlmsg_tcpdiag_perms[] = @@ -155,7 +157,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm) switch (sclass) { case SECCLASS_NETLINK_ROUTE_SOCKET: /* RTM_MAX always point to RTM_SETxxxx, ie RTM_NEWxxx + 3 */ - BUILD_BUG_ON(RTM_MAX != (RTM_NEWNSID + 3)); + BUILD_BUG_ON(RTM_MAX != (RTM_NEWSTATS + 3)); err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms, sizeof(nlmsg_route_perms)); break; diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c index d1a4d6973330..03c9872c31cf 100644 --- a/sound/hda/hdac_device.c +++ b/sound/hda/hdac_device.c @@ -299,13 +299,11 @@ EXPORT_SYMBOL_GPL(_snd_hdac_read_parm); int snd_hdac_read_parm_uncached(struct hdac_device *codec, hda_nid_t nid, int parm) { - int val; + unsigned int cmd, val; - if (codec->regmap) - regcache_cache_bypass(codec->regmap, true); - val = snd_hdac_read_parm(codec, nid, parm); - if (codec->regmap) - regcache_cache_bypass(codec->regmap, false); + cmd = snd_hdac_regmap_encode_verb(nid, AC_VERB_PARAMETERS) | parm; + if (snd_hdac_regmap_read_raw_uncached(codec, cmd, &val) < 0) + return -1; return val; } EXPORT_SYMBOL_GPL(snd_hdac_read_parm_uncached); diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index fb96aead8257..54babe1c0b16 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -267,6 +267,18 @@ int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops } EXPORT_SYMBOL_GPL(snd_hdac_i915_register_notifier); +/* check whether intel graphics is present */ +static bool i915_gfx_present(void) +{ + static struct pci_device_id ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_ANY_ID), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16 }, + {} + }; + return pci_dev_present(ids); +} + /** * snd_hdac_i915_init - Initialize i915 audio component * @bus: HDA core bus @@ -286,6 +298,9 @@ int snd_hdac_i915_init(struct hdac_bus *bus) struct i915_audio_component *acomp; int ret; + if (!i915_gfx_present()) + return -ENODEV; + acomp = kzalloc(sizeof(*acomp), GFP_KERNEL); if (!acomp) return -ENOMEM; diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c index bdbcd6b75ff6..87041ddd29cb 100644 --- a/sound/hda/hdac_regmap.c +++ b/sound/hda/hdac_regmap.c @@ -453,14 +453,30 @@ int snd_hdac_regmap_write_raw(struct hdac_device *codec, unsigned int reg, EXPORT_SYMBOL_GPL(snd_hdac_regmap_write_raw); static int reg_raw_read(struct hdac_device *codec, unsigned int reg, - unsigned int *val) + unsigned int *val, bool uncached) { - if (!codec->regmap) + if (uncached || !codec->regmap) return hda_reg_read(codec, reg, val); else return regmap_read(codec->regmap, reg, val); } +static int __snd_hdac_regmap_read_raw(struct hdac_device *codec, + unsigned int reg, unsigned int *val, + bool uncached) +{ + int err; + + err = reg_raw_read(codec, reg, val, uncached); + if (err == -EAGAIN) { + err = snd_hdac_power_up_pm(codec); + if (!err) + err = reg_raw_read(codec, reg, val, uncached); + snd_hdac_power_down_pm(codec); + } + return err; +} + /** * snd_hdac_regmap_read_raw - read a pseudo register with power mgmt * @codec: the codec object @@ -472,19 +488,19 @@ static int reg_raw_read(struct hdac_device *codec, unsigned int reg, int snd_hdac_regmap_read_raw(struct hdac_device *codec, unsigned int reg, unsigned int *val) { - int err; - - err = reg_raw_read(codec, reg, val); - if (err == -EAGAIN) { - err = snd_hdac_power_up_pm(codec); - if (!err) - err = reg_raw_read(codec, reg, val); - snd_hdac_power_down_pm(codec); - } - return err; + return __snd_hdac_regmap_read_raw(codec, reg, val, false); } EXPORT_SYMBOL_GPL(snd_hdac_regmap_read_raw); +/* Works like snd_hdac_regmap_read_raw(), but this doesn't read from the + * cache but always via hda verbs. + */ +int snd_hdac_regmap_read_raw_uncached(struct hdac_device *codec, + unsigned int reg, unsigned int *val) +{ + return __snd_hdac_regmap_read_raw(codec, reg, val, true); +} + /** * snd_hdac_regmap_update_raw - update a pseudo register with power mgmt * @codec: the codec object diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c index 7b248cdf06e2..fdcfa29e2205 100644 --- a/sound/isa/sscape.c +++ b/sound/isa/sscape.c @@ -591,7 +591,7 @@ static int sscape_upload_microcode(struct snd_card *card, int version) } err = upload_dma_data(sscape, init_fw->data, init_fw->size); if (err == 0) - snd_printk(KERN_INFO "sscape: MIDI firmware loaded %d KBs\n", + snd_printk(KERN_INFO "sscape: MIDI firmware loaded %zu KBs\n", init_fw->size >> 10); release_firmware(init_fw); diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 7ca5b89f088a..dfaf1a93fb8a 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -826,7 +826,7 @@ static hda_nid_t path_power_update(struct hda_codec *codec, bool allow_powerdown) { hda_nid_t nid, changed = 0; - int i, state; + int i, state, power; for (i = 0; i < path->depth; i++) { nid = path->path[i]; @@ -838,7 +838,9 @@ static hda_nid_t path_power_update(struct hda_codec *codec, state = AC_PWRST_D0; else state = AC_PWRST_D3; - if (!snd_hda_check_power_state(codec, nid, state)) { + power = snd_hda_codec_read(codec, nid, 0, + AC_VERB_GET_POWER_STATE, 0); + if (power != (state | (state << 4))) { snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_POWER_STATE, state); changed = nid; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b680be0e937d..637b8a0e2a91 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2232,6 +2232,9 @@ static const struct pci_device_id azx_ids[] = { /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, + /* Broxton-T */ + { PCI_DEVICE(0x8086, 0x1a98), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, /* Haswell */ { PCI_DEVICE(0x8086, 0x0a0c), .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL }, diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index a47e8ae0eb30..80bbadc83721 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -361,6 +361,7 @@ static int cs_parse_auto_config(struct hda_codec *codec) { struct cs_spec *spec = codec->spec; int err; + int i; err = snd_hda_parse_pin_defcfg(codec, &spec->gen.autocfg, NULL, 0); if (err < 0) @@ -370,6 +371,19 @@ static int cs_parse_auto_config(struct hda_codec *codec) if (err < 0) return err; + /* keep the ADCs powered up when it's dynamically switchable */ + if (spec->gen.dyn_adc_switch) { + unsigned int done = 0; + for (i = 0; i < spec->gen.input_mux.num_items; i++) { + int idx = spec->gen.dyn_adc_idx[i]; + if (done & (1 << idx)) + continue; + snd_hda_gen_fix_pin_power(codec, + spec->gen.adc_nids[idx]); + done |= 1 << idx; + } + } + return 0; } diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 5af372d01834..40933aa33afe 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1396,7 +1396,6 @@ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, struct hda_codec *codec = per_pin->codec; struct hdmi_spec *spec = codec->spec; struct hdmi_eld *eld = &spec->temp_eld; - struct hdmi_eld *pin_eld = &per_pin->sink_eld; hda_nid_t pin_nid = per_pin->pin_nid; /* * Always execute a GetPinSense verb here, even when called from @@ -1413,15 +1412,15 @@ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, present = snd_hda_pin_sense(codec, pin_nid); mutex_lock(&per_pin->lock); - pin_eld->monitor_present = !!(present & AC_PINSENSE_PRESENCE); - if (pin_eld->monitor_present) + eld->monitor_present = !!(present & AC_PINSENSE_PRESENCE); + if (eld->monitor_present) eld->eld_valid = !!(present & AC_PINSENSE_ELDV); else eld->eld_valid = false; codec_dbg(codec, "HDMI status: Codec=%d Pin=%d Presence_Detect=%d ELD_Valid=%d\n", - codec->addr, pin_nid, pin_eld->monitor_present, eld->eld_valid); + codec->addr, pin_nid, eld->monitor_present, eld->eld_valid); if (eld->eld_valid) { if (spec->ops.pin_get_eld(codec, pin_nid, eld->eld_buffer, @@ -1441,7 +1440,7 @@ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, else update_eld(codec, per_pin, eld); - ret = !repoll || !pin_eld->monitor_present || pin_eld->eld_valid; + ret = !repoll || !eld->monitor_present || eld->eld_valid; jack = snd_hda_jack_tbl_get(codec, pin_nid); if (jack) @@ -1859,6 +1858,8 @@ static void hdmi_set_chmap(struct hdac_device *hdac, int pcm_idx, struct hdmi_spec *spec = codec->spec; struct hdmi_spec_per_pin *per_pin = pcm_idx_to_pin(spec, pcm_idx); + if (!per_pin) + return; mutex_lock(&per_pin->lock); per_pin->chmap_set = true; memcpy(per_pin->chmap, chmap, ARRAY_SIZE(per_pin->chmap)); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fefe83f2beab..810bceee4fd2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4760,6 +4760,7 @@ enum { ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC280_FIXUP_HP_HEADSET_MIC, ALC221_FIXUP_HP_FRONT_MIC, + ALC292_FIXUP_TPT460, }; static const struct hda_fixup alc269_fixups[] = { @@ -5409,6 +5410,12 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC292_FIXUP_TPT460] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_tpt440_dock, + .chained = true, + .chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5442,6 +5449,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13), + SND_PCI_QUIRK(0x1028, 0x0669, "Dell Optiplex 9020m", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x069a, "Dell Vostro 5480", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), @@ -5563,7 +5571,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), - SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), + SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), @@ -5658,6 +5666,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC283_FIXUP_SENSE_COMBO_JACK, .name = "alc283-sense-combo"}, {.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"}, {.id = ALC292_FIXUP_TPT440, .name = "tpt440"}, + {.id = ALC292_FIXUP_TPT460, .name = "tpt460"}, {} }; #define ALC225_STANDARD_PINS \ diff --git a/sound/pci/pcxhr/pcxhr_core.c b/sound/pci/pcxhr/pcxhr_core.c index c5194f5b150a..d7e71f309299 100644 --- a/sound/pci/pcxhr/pcxhr_core.c +++ b/sound/pci/pcxhr/pcxhr_core.c @@ -1341,5 +1341,6 @@ irqreturn_t pcxhr_threaded_irq(int irq, void *dev_id) } pcxhr_msg_thread(mgr); + mutex_unlock(&mgr->lock); return IRQ_HANDLED; } diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index ddca6547399b..1f8fb0d904e0 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -349,6 +349,16 @@ static struct usbmix_name_map bose_companion5_map[] = { }; /* + * Dell usb dock with ALC4020 codec had a firmware problem where it got + * screwed up when zero volume is passed; just skip it as a workaround + */ +static const struct usbmix_name_map dell_alc4020_map[] = { + { 16, NULL }, + { 19, NULL }, + { 0 } +}; + +/* * Control map entries */ @@ -431,6 +441,10 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .map = aureon_51_2_map, }, { + .id = USB_ID(0x0bda, 0x4014), + .map = dell_alc4020_map, + }, + { .id = USB_ID(0x0dba, 0x1000), .map = mbox1_map, }, diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 6178bb5d0731..0adfd9537cf7 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1134,9 +1134,11 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ + case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ + case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ return true; } diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt index 5a95896105bc..55a60d331f47 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/stack-validation.txt @@ -299,18 +299,38 @@ they mean, and suggestions for how to fix them. Errors in .c files ------------------ -If you're getting an objtool error in a compiled .c file, chances are -the file uses an asm() statement which has a "call" instruction. An -asm() statement with a call instruction must declare the use of the -stack pointer in its output operand. For example, on x86_64: +1. c_file.o: warning: objtool: funcA() falls through to next function funcB() - register void *__sp asm("rsp"); - asm volatile("call func" : "+r" (__sp)); + This means that funcA() doesn't end with a return instruction or an + unconditional jump, and that objtool has determined that the function + can fall through into the next function. There could be different + reasons for this: -Otherwise the stack frame may not get created before the call. + 1) funcA()'s last instruction is a call to a "noreturn" function like + panic(). In this case the noreturn function needs to be added to + objtool's hard-coded global_noreturns array. Feel free to bug the + objtool maintainer, or you can submit a patch. -Another possible cause for errors in C code is if the Makefile removes --fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options. + 2) funcA() uses the unreachable() annotation in a section of code + that is actually reachable. + + 3) If funcA() calls an inline function, the object code for funcA() + might be corrupt due to a gcc bug. For more details, see: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646 + +2. If you're getting any other objtool error in a compiled .c file, it + may be because the file uses an asm() statement which has a "call" + instruction. An asm() statement with a call instruction must declare + the use of the stack pointer in its output operand. For example, on + x86_64: + + register void *__sp asm("rsp"); + asm volatile("call func" : "+r" (__sp)); + + Otherwise the stack frame may not get created before the call. + +3. Another possible cause for errors in C code is if the Makefile removes + -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options. Also see the above section for .S file errors for more information what the individual error messages mean. diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 7515cb2e879a..e8a1e69eb92c 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -54,6 +54,7 @@ struct instruction { struct symbol *call_dest; struct instruction *jump_dest; struct list_head alts; + struct symbol *func; }; struct alternative { @@ -66,6 +67,7 @@ struct objtool_file { struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 16); struct section *rodata, *whitelist; + bool ignore_unreachables, c_file; }; const char *objname; @@ -228,7 +230,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, } } - if (insn->type == INSN_JUMP_DYNAMIC) + if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts)) /* sibling call */ return 0; } @@ -248,6 +250,7 @@ static int dead_end_function(struct objtool_file *file, struct symbol *func) static int decode_instructions(struct objtool_file *file) { struct section *sec; + struct symbol *func; unsigned long offset; struct instruction *insn; int ret; @@ -281,6 +284,21 @@ static int decode_instructions(struct objtool_file *file) hash_add(file->insn_hash, &insn->hash, insn->offset); list_add_tail(&insn->list, &file->insn_list); } + + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + if (!find_insn(file, sec, func->offset)) { + WARN("%s(): can't find starting instruction", + func->name); + return -1; + } + + func_for_each_insn(file, func, insn) + if (!insn->func) + insn->func = func; + } } return 0; @@ -664,13 +682,40 @@ static int add_func_switch_tables(struct objtool_file *file, text_rela->addend); /* - * TODO: Document where this is needed, or get rid of it. - * * rare case: jmpq *[addr](%rip) + * + * This check is for a rare gcc quirk, currently only seen in + * three driver functions in the kernel, only with certain + * obscure non-distro configs. + * + * As part of an optimization, gcc makes a copy of an existing + * switch jump table, modifies it, and then hard-codes the jump + * (albeit with an indirect jump) to use a single entry in the + * table. The rest of the jump table and some of its jump + * targets remain as dead code. + * + * In such a case we can just crudely ignore all unreachable + * instruction warnings for the entire object file. Ideally we + * would just ignore them for the function, but that would + * require redesigning the code quite a bit. And honestly + * that's just not worth doing: unreachable instruction + * warnings are of questionable value anyway, and this is such + * a rare issue. + * + * kbuild reports: + * - https://lkml.kernel.org/r/201603231906.LWcVUpxm%25fengguang.wu@intel.com + * - https://lkml.kernel.org/r/201603271114.K9i45biy%25fengguang.wu@intel.com + * - https://lkml.kernel.org/r/201603291058.zuJ6ben1%25fengguang.wu@intel.com + * + * gcc bug: + * - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70604 */ - if (!rodata_rela) + if (!rodata_rela) { rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend + 4); + if (rodata_rela) + file->ignore_unreachables = true; + } if (!rodata_rela) continue; @@ -732,9 +777,6 @@ static int decode_sections(struct objtool_file *file) { int ret; - file->whitelist = find_section_by_name(file->elf, "__func_stack_frame_non_standard"); - file->rodata = find_section_by_name(file->elf, ".rodata"); - ret = decode_instructions(file); if (ret) return ret; @@ -799,6 +841,7 @@ static int validate_branch(struct objtool_file *file, struct alternative *alt; struct instruction *insn; struct section *sec; + struct symbol *func = NULL; unsigned char state; int ret; @@ -813,6 +856,16 @@ static int validate_branch(struct objtool_file *file, } while (1) { + if (file->c_file && insn->func) { + if (func && func != insn->func) { + WARN("%s() falls through to next function %s()", + func->name, insn->func->name); + return 1; + } + + func = insn->func; + } + if (insn->visited) { if (frame_state(insn->state) != frame_state(state)) { WARN_FUNC("frame pointer state mismatch", @@ -823,13 +876,6 @@ static int validate_branch(struct objtool_file *file, return 0; } - /* - * Catch a rare case where a noreturn function falls through to - * the next function. - */ - if (is_fentry_call(insn) && (state & STATE_FENTRY)) - return 0; - insn->visited = true; insn->state = state; @@ -1035,12 +1081,8 @@ static int validate_functions(struct objtool_file *file) continue; insn = find_insn(file, sec, func->offset); - if (!insn) { - WARN("%s(): can't find starting instruction", - func->name); - warnings++; + if (!insn) continue; - } ret = validate_branch(file, insn, 0); warnings += ret; @@ -1056,13 +1098,14 @@ static int validate_functions(struct objtool_file *file) if (insn->visited) continue; - if (!ignore_unreachable_insn(func, insn) && - !warnings) { - WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset); - warnings++; - } - insn->visited = true; + + if (file->ignore_unreachables || warnings || + ignore_unreachable_insn(func, insn)) + continue; + + WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset); + warnings++; } } } @@ -1133,6 +1176,10 @@ int cmd_check(int argc, const char **argv) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); + file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard"); + file.rodata = find_section_by_name(file.elf, ".rodata"); + file.ignore_unreachables = false; + file.c_file = find_section_by_name(file.elf, ".comment"); ret = decode_sections(&file); if (ret < 0) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 407f11b97c8d..617578440989 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1130,7 +1130,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", ret); - if (pt->synth_opts.callchain) + if (pt->synth_opts.last_branch) intel_pt_reset_last_branch_rb(ptq); return ret; diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 69bb3fc38fb2..0840684deb7d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -3,3 +3,4 @@ psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu +reuseport_dualstack diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index c658792d47b4..0e5340742620 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -4,7 +4,7 @@ CFLAGS = -Wall -O2 -g CFLAGS += -I../../../../usr/include/ -NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu +NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu reuseport_dualstack all: $(NET_PROGS) %: %.c diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c new file mode 100644 index 000000000000..90958aaaafb9 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_dualstack.c @@ -0,0 +1,208 @@ +/* + * It is possible to use SO_REUSEPORT to open multiple sockets bound to + * equivalent local addresses using AF_INET and AF_INET6 at the same time. If + * the AF_INET6 socket has IPV6_V6ONLY set, it's clear which socket should + * receive a given incoming packet. However, when it is not set, incoming v4 + * packets should prefer the AF_INET socket(s). This behavior was defined with + * the original SO_REUSEPORT implementation, but broke with + * e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") + * This test creates these mixed AF_INET/AF_INET6 sockets and asserts the + * AF_INET preference for v4 packets. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> + +static const int PORT = 8888; + +static void build_rcv_fd(int family, int proto, int *rcv_fds, int count) +{ + struct sockaddr_storage addr; + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + int opt, i; + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)&addr; + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_ANY); + addr4->sin_port = htons(PORT); + break; + case AF_INET6: + addr6 = (struct sockaddr_in6 *)&addr; + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_any; + addr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < count; ++i) { + rcv_fds[i] = socket(family, proto, 0); + if (rcv_fds[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fds[i], (struct sockaddr *)&addr, sizeof(addr))) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static void send_from_v4(int proto) +{ + struct sockaddr_in saddr, daddr; + int fd; + + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = htonl(INADDR_ANY); + saddr.sin_port = 0; + + daddr.sin_family = AF_INET; + daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + daddr.sin_port = htons(PORT); + + fd = socket(AF_INET, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr))) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr))) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + close(fd); +} + +static int receive_once(int epfd, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, -1); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + return ev.data.fd; +} + +static void test(int *rcv_fds, int count, int proto) +{ + struct epoll_event ev; + int epfd, i, test_fd; + uint16_t test_family; + socklen_t len; + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + + ev.events = EPOLLIN; + for (i = 0; i < count; ++i) { + ev.data.fd = rcv_fds[i]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + send_from_v4(proto); + + test_fd = receive_once(epfd, proto); + if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len)) + error(1, errno, "failed to read socket domain"); + if (test_family != AF_INET) + error(1, 0, "expected to receive on v4 socket but got v6 (%d)", + test_family); + + close(epfd); +} + +int main(void) +{ + int rcv_fds[32], i; + + fprintf(stderr, "---- UDP IPv4 created before IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 5); + build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[5]), 5); + test(rcv_fds, 10, SOCK_DGRAM); + for (i = 0; i < 10; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- UDP IPv6 created before IPv4 ----\n"); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 5); + build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[5]), 5); + test(rcv_fds, 10, SOCK_DGRAM); + for (i = 0; i < 10; ++i) + close(rcv_fds[i]); + + /* NOTE: UDP socket lookups traverse a different code path when there + * are > 10 sockets in a group. + */ + fprintf(stderr, "---- UDP IPv4 created before IPv6 (large) ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 16); + build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[16]), 16); + test(rcv_fds, 32, SOCK_DGRAM); + for (i = 0; i < 32; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- UDP IPv6 created before IPv4 (large) ----\n"); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 16); + build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[16]), 16); + test(rcv_fds, 32, SOCK_DGRAM); + for (i = 0; i < 32; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv4 created before IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 5); + build_rcv_fd(AF_INET6, SOCK_STREAM, &(rcv_fds[5]), 5); + test(rcv_fds, 10, SOCK_STREAM); + for (i = 0; i < 10; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv6 created before IPv4 ----\n"); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds, 5); + build_rcv_fd(AF_INET, SOCK_STREAM, &(rcv_fds[5]), 5); + test(rcv_fds, 10, SOCK_STREAM); + for (i = 0; i < 10; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index a9ad4fe3f68f..9aaa35dd9144 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -91,6 +91,8 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); vcpu->arch.timer_cpu.armed = false; + WARN_ON(!kvm_timer_should_fire(vcpu)); + /* * If the vcpu is blocked we want to wake it up so that it will see * the timer has expired when entering the guest. @@ -98,10 +100,46 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) kvm_vcpu_kick(vcpu); } +static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) +{ + cycle_t cval, now; + + cval = vcpu->arch.timer_cpu.cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + if (now < cval) { + u64 ns; + + ns = cyclecounter_cyc2ns(timecounter->cc, + cval - now, + timecounter->mask, + &timecounter->frac); + return ns; + } + + return 0; +} + static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) { struct arch_timer_cpu *timer; + struct kvm_vcpu *vcpu; + u64 ns; + timer = container_of(hrt, struct arch_timer_cpu, timer); + vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); + + /* + * Check that the timer has really expired from the guest's + * PoV (NTP on the host may have forced it to expire + * early). If we should have slept longer, restart it. + */ + ns = kvm_timer_compute_delta(vcpu); + if (unlikely(ns)) { + hrtimer_forward_now(hrt, ns_to_ktime(ns)); + return HRTIMER_RESTART; + } + queue_work(wqueue, &timer->expired); return HRTIMER_NORESTART; } @@ -176,8 +214,6 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) void kvm_timer_schedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - u64 ns; - cycle_t cval, now; BUG_ON(timer_is_armed(timer)); @@ -197,14 +233,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) return; /* The timer has not yet expired, schedule a background timer */ - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - ns = cyclecounter_cyc2ns(timecounter->cc, - cval - now, - timecounter->mask, - &timecounter->frac); - timer_arm(timer, ns); + timer_arm(timer, kvm_timer_compute_delta(vcpu)); } void kvm_timer_unschedule(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index b5754c6c5508..575c7aa30d7e 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -193,11 +193,12 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0; - if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) + if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0); reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0); reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1); reg &= kvm_pmu_valid_counter_mask(vcpu); + } return reg; } |