diff options
1098 files changed, 32663 insertions, 6731 deletions
@@ -333,6 +333,9 @@ Rémi Denis-Courmont <rdenis@simphalempin.com> Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com> Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org> Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com> +Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com> +Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com> +Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru> Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com> Rudolf Marek <R.Marek@sh.cvut.cz> Rui Saraiva <rmps@joel.ist.utl.pt> @@ -895,6 +895,12 @@ S: 3000 FORE Drive S: Warrendale, Pennsylvania 15086 S: USA +N: Ludovic Desroches +E: ludovic.desroches@microchip.com +D: Maintainer for ARM/Microchip (AT91) SoC support +D: Author of ADC, pinctrl, XDMA and SDHCI drivers for this platform +S: France + N: Martin Devera E: devik@cdi.cz W: http://luxik.cdi.cz/~devik/qos/ diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power index fde21d900420..859501366777 100644 --- a/Documentation/ABI/testing/sysfs-class-power +++ b/Documentation/ABI/testing/sysfs-class-power @@ -468,6 +468,7 @@ Description: auto: Charge normally, respect thresholds inhibit-charge: Do not charge while AC is attached force-discharge: Force discharge while AC is attached + ================ ==================================== What: /sys/class/power_supply/<supply_name>/technology Date: May 2007 diff --git a/Documentation/ABI/testing/sysfs-timecard b/Documentation/ABI/testing/sysfs-timecard index 97f6773794a5..5bf78486a469 100644 --- a/Documentation/ABI/testing/sysfs-timecard +++ b/Documentation/ABI/testing/sysfs-timecard @@ -63,6 +63,18 @@ Description: (RW) Contains the current synchronization source used by the PHC. May be changed by writing one of the listed values from the available_clock_sources attribute set. +What: /sys/class/timecard/ocpN/clock_status_drift +Date: March 2022 +Contact: Jonathan Lemon <jonathan.lemon@gmail.com> +Description: (RO) Contains the current drift value used by the firmware + for internal disciplining of the atomic clock. + +What: /sys/class/timecard/ocpN/clock_status_offset +Date: March 2022 +Contact: Jonathan Lemon <jonathan.lemon@gmail.com> +Description: (RO) Contains the current offset value used by the firmware + for internal disciplining of the atomic clock. + What: /sys/class/timecard/ocpN/gnss_sync Date: September 2021 Contact: Jonathan Lemon <jonathan.lemon@gmail.com> @@ -126,6 +138,16 @@ Description: (RW) These attributes specify the direction of the signal The 10Mhz reference clock input is currently only valid on SMA1 and may not be combined with other destination sinks. +What: /sys/class/timecard/ocpN/tod_correction +Date: March 2022 +Contact: Jonathan Lemon <jonathan.lemon@gmail.com> +Description: (RW) The incoming GNSS signal is in UTC time, and the NMEA + format messages do not provide a TAI offset. This sets the + correction value for the incoming time. + + If UBX_LS is enabled, this should be 0, and the offset is + taken from the UBX-NAV-TIMELS message. + What: /sys/class/timecard/ocpN/ts_window_adjust Date: September 2021 Contact: Jonathan Lemon <jonathan.lemon@gmail.com> diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst index 1887d92e8e92..17706dc91ec9 100644 --- a/Documentation/core-api/dma-attributes.rst +++ b/Documentation/core-api/dma-attributes.rst @@ -130,3 +130,11 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged subsystem that the buffer is fully accessible at the elevated privilege level (and ideally inaccessible or at least read-only at the lesser-privileged levels). + +DMA_ATTR_OVERWRITE +------------------ + +This is a hint to the DMA-mapping subsystem that the device is expected to +overwrite the entire mapped size, thus the caller does not require any of the +previous buffer contents to be preserved. This allows bounce-buffering +implementations to optimise DMA_FROM_DEVICE transfers. diff --git a/Documentation/cpu-freq/cpu-drivers.rst b/Documentation/cpu-freq/cpu-drivers.rst index 3b32336a7803..d84ededb66f9 100644 --- a/Documentation/cpu-freq/cpu-drivers.rst +++ b/Documentation/cpu-freq/cpu-drivers.rst @@ -75,6 +75,9 @@ And optionally .resume - A pointer to a per-policy resume function which is called with interrupts disabled and _before_ the governor is started again. + .ready - A pointer to a per-policy ready function which is called after + the policy is fully initialized. + .attr - A pointer to a NULL-terminated list of "struct freq_attr" which allow to export values to sysfs. diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.yaml b/Documentation/devicetree/bindings/arm/atmel-at91.yaml index c612e1f48dba..ff91df04f9f4 100644 --- a/Documentation/devicetree/bindings/arm/atmel-at91.yaml +++ b/Documentation/devicetree/bindings/arm/atmel-at91.yaml @@ -8,7 +8,8 @@ title: Atmel AT91 device tree bindings. maintainers: - Alexandre Belloni <alexandre.belloni@bootlin.com> - - Ludovic Desroches <ludovic.desroches@microchip.com> + - Claudiu Beznea <claudiu.beznea@microchip.com> + - Nicolas Ferre <nicolas.ferre@microchip.com> description: | Boards with a SoC of the Atmel AT91 or SMART family shall have the following diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt index b5cb374dc47d..10a91cc8b997 100644 --- a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt +++ b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt @@ -8,7 +8,7 @@ Required properties: - compatible: Should contain a chip-specific compatible string, Chip-specific strings are of the form "fsl,<chip>-dcfg", The following <chip>s are known to be supported: - ls1012a, ls1021a, ls1043a, ls1046a, ls2080a. + ls1012a, ls1021a, ls1043a, ls1046a, ls2080a, lx2160a - reg : should contain base address and length of DCFG memory-mapped registers diff --git a/Documentation/devicetree/bindings/clock/qoriq-clock.txt b/Documentation/devicetree/bindings/clock/qoriq-clock.txt index f7d48f23da44..10119d9ef4b1 100644 --- a/Documentation/devicetree/bindings/clock/qoriq-clock.txt +++ b/Documentation/devicetree/bindings/clock/qoriq-clock.txt @@ -44,6 +44,7 @@ Required properties: * "fsl,ls1046a-clockgen" * "fsl,ls1088a-clockgen" * "fsl,ls2080a-clockgen" + * "fsl,lx2160a-clockgen" Chassis-version clock strings include: * "fsl,qoriq-clockgen-1.0": for chassis 1.0 clocks * "fsl,qoriq-clockgen-2.0": for chassis 2.0 clocks diff --git a/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml index e04349567eeb..427c5873f96a 100644 --- a/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: SiFive GPIO controller maintainers: - - Yash Shah <yash.shah@sifive.com> - Paul Walmsley <paul.walmsley@sifive.com> properties: diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt index b864916e087f..fc3dd7ec0445 100644 --- a/Documentation/devicetree/bindings/i2c/i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c.txt @@ -95,6 +95,10 @@ wants to support one of the below features, it should adapt these bindings. - smbus-alert states that the optional SMBus-Alert feature apply to this bus. +- mctp-controller + indicates that the system is accessible via this bus as an endpoint for + MCTP over I2C transport. + Required properties (per child device) -------------------------------------- diff --git a/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml b/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml index 272832e9f8f2..fa86691ebf16 100644 --- a/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml +++ b/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml @@ -20,7 +20,7 @@ description: | maintainers: - Kishon Vijay Abraham I <kishon@ti.com> - - Roger Quadros <rogerq@ti.com + - Roger Quadros <rogerq@kernel.org> properties: compatible: diff --git a/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml b/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml index c93fe9d3ea82..3c51b2d02957 100644 --- a/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml +++ b/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml @@ -10,6 +10,9 @@ maintainers: - Chen-Yu Tsai <wens@csie.org> - Maxime Ripard <mripard@kernel.org> +allOf: + - $ref: can-controller.yaml# + properties: compatible: oneOf: diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml index 401ab7cdb379..b7f9803c1c6d 100644 --- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml +++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@ -9,7 +9,10 @@ title: Bosch MCAN controller Bindings description: Bosch MCAN controller for CAN bus maintainers: - - Sriram Dash <sriram.dash@samsung.com> + - Chandrasekar Ramakrishnan <rcsekar@samsung.com> + +allOf: + - $ref: can-controller.yaml# properties: compatible: @@ -66,8 +69,8 @@ properties: M_CAN includes the following elements according to user manual: 11-bit Filter 0-128 elements / 0-128 words 29-bit Filter 0-64 elements / 0-128 words - Rx FIFO 0 0-64 elements / 0-1152 words - Rx FIFO 1 0-64 elements / 0-1152 words + Rx FIFO 0 0-64 elements / 0-1152 words + Rx FIFO 1 0-64 elements / 0-1152 words Rx Buffers 0-64 elements / 0-1152 words Tx Event FIFO 0-32 elements / 0-64 words Tx Buffers 0-32 elements / 0-576 words diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml index 2a884c1fe0e0..b3826af6bd6e 100644 --- a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml +++ b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml @@ -11,6 +11,9 @@ title: maintainers: - Marc Kleine-Budde <mkl@pengutronix.de> +allOf: + - $ref: can-controller.yaml# + properties: compatible: oneOf: diff --git a/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml b/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml new file mode 100644 index 000000000000..afd11c9422fa --- /dev/null +++ b/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/mctp-i2c-controller.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MCTP I2C transport binding + +maintainers: + - Matt Johnston <matt@codeconstruct.com.au> + +description: | + An mctp-i2c-controller defines a local MCTP endpoint on an I2C controller. + MCTP I2C is specified by DMTF DSP0237. + + An mctp-i2c-controller must be attached to an I2C adapter which supports + slave functionality. I2C busses (either directly or as subordinate mux + busses) are attached to the mctp-i2c-controller with a 'mctp-controller' + property on each used bus. Each mctp-controller I2C bus will be presented + to the host system as a separate MCTP I2C instance. + +properties: + compatible: + const: mctp-i2c-controller + + reg: + minimum: 0x40000000 + maximum: 0x4000007f + description: | + 7 bit I2C address of the local endpoint. + I2C_OWN_SLAVE_ADDRESS (1<<30) flag must be set. + +additionalProperties: false + +required: + - compatible + - reg + +examples: + - | + // Basic case of a single I2C bus + #include <dt-bindings/i2c/i2c.h> + + i2c { + #address-cells = <1>; + #size-cells = <0>; + mctp-controller; + + mctp@30 { + compatible = "mctp-i2c-controller"; + reg = <(0x30 | I2C_OWN_SLAVE_ADDRESS)>; + }; + }; + + - | + // Mux topology with multiple MCTP-handling busses under + // a single mctp-i2c-controller. + // i2c1 and i2c6 can have MCTP devices, i2c5 does not. + #include <dt-bindings/i2c/i2c.h> + + i2c1: i2c { + #address-cells = <1>; + #size-cells = <0>; + mctp-controller; + + mctp@50 { + compatible = "mctp-i2c-controller"; + reg = <(0x50 | I2C_OWN_SLAVE_ADDRESS)>; + }; + }; + + i2c-mux { + #address-cells = <1>; + #size-cells = <0>; + i2c-parent = <&i2c1>; + + i2c5: i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + eeprom@33 { + reg = <0x33>; + }; + }; + + i2c6: i2c@1 { + #address-cells = <1>; + #size-cells = <0>; + reg = <1>; + mctp-controller; + }; + }; diff --git a/Documentation/devicetree/bindings/net/micrel.txt b/Documentation/devicetree/bindings/net/micrel.txt index 8d157f0295a5..c5ab62c39133 100644 --- a/Documentation/devicetree/bindings/net/micrel.txt +++ b/Documentation/devicetree/bindings/net/micrel.txt @@ -45,3 +45,20 @@ Optional properties: In fiber mode, auto-negotiation is disabled and the PHY can only work in 100base-fx (full and half duplex) modes. + + - lan8814,ignore-ts: If present the PHY will not support timestamping. + + This option acts as check whether Timestamping is supported by + hardware or not. LAN8814 phy support hardware tmestamping. + + - lan8814,latency_rx_10: Configures Latency value of phy in ingress at 10 Mbps. + + - lan8814,latency_tx_10: Configures Latency value of phy in egress at 10 Mbps. + + - lan8814,latency_rx_100: Configures Latency value of phy in ingress at 100 Mbps. + + - lan8814,latency_tx_100: Configures Latency value of phy in egress at 100 Mbps. + + - lan8814,latency_rx_1000: Configures Latency value of phy in ingress at 1000 Mbps. + + - lan8814,latency_tx_1000: Configures Latency value of phy in egress at 1000 Mbps. diff --git a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml index 347b912a46bb..6c86d3d85e99 100644 --- a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml +++ b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml @@ -53,12 +53,14 @@ properties: items: - description: register based extraction - description: frame dma based extraction + - description: ptp interrupt interrupt-names: minItems: 1 items: - const: xtr - const: fdma + - const: ptp resets: items: diff --git a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml index cbbf5e8b1197..f78d3246fbdc 100644 --- a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml +++ b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml @@ -8,7 +8,7 @@ title: OMAP USB2 PHY maintainers: - Kishon Vijay Abraham I <kishon@ti.com> - - Roger Quadros <rogerq@ti.com> + - Roger Quadros <rogerq@kernel.org> properties: compatible: diff --git a/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml b/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml index 84e66913d042..db41cd7bf150 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml +++ b/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml @@ -8,7 +8,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: SiFive PWM controller maintainers: - - Yash Shah <yash.shah@sifive.com> - Sagar Kadam <sagar.kadam@sifive.com> - Paul Walmsley <paul.walmsley@sifive.com> diff --git a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml index 2b1f91603897..e2d330bd4608 100644 --- a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml +++ b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml @@ -9,7 +9,6 @@ title: SiFive L2 Cache Controller maintainers: - Sagar Kadam <sagar.kadam@sifive.com> - - Yash Shah <yash.shah@sifive.com> - Paul Walmsley <paul.walmsley@sifive.com> description: diff --git a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml index 77adbebed824..c3e9f3485449 100644 --- a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml +++ b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml @@ -8,6 +8,7 @@ title: Audio codec controlled by ChromeOS EC maintainers: - Cheng-Yi Chiang <cychiang@chromium.org> + - Tzung-Bi Shih <tzungbi@google.com> description: | Google's ChromeOS EC codec is a digital mic codec provided by the diff --git a/Documentation/devicetree/bindings/usb/dwc2.yaml b/Documentation/devicetree/bindings/usb/dwc2.yaml index f00867ebc147..481aaa09f3f2 100644 --- a/Documentation/devicetree/bindings/usb/dwc2.yaml +++ b/Documentation/devicetree/bindings/usb/dwc2.yaml @@ -53,6 +53,7 @@ properties: - const: st,stm32mp15-hsotg - const: snps,dwc2 - const: samsung,s3c6400-hsotg + - const: intel,socfpga-agilex-hsotg reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml index a634774c537c..eedde385d299 100644 --- a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml +++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml @@ -7,7 +7,7 @@ $schema: "http://devicetree.org/meta-schemas/core.yaml#" title: Bindings for the TI wrapper module for the Cadence USBSS-DRD controller maintainers: - - Roger Quadros <rogerq@ti.com> + - Roger Quadros <rogerq@kernel.org> properties: compatible: diff --git a/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml b/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml index f6e91a5fd8fe..4f7a212fddd3 100644 --- a/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: TI Keystone Soc USB Controller maintainers: - - Roger Quadros <rogerq@ti.com> + - Roger Quadros <rogerq@kernel.org> properties: compatible: diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index ab98373535ea..525e6842dd33 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -313,6 +313,17 @@ arp_ip_target maximum number of targets that can be specified is 16. The default value is no IP addresses. +ns_ip6_target + + Specifies the IPv6 addresses to use as IPv6 monitoring peers when + arp_interval is > 0. These are the targets of the NS request + sent to determine the health of the link to the targets. + Specify these values in ffff:ffff::ffff:ffff format. Multiple IPv6 + addresses must be separated by a comma. At least one IPv6 + address must be given for NS/NA monitoring to function. The + maximum number of targets that can be specified is 16. The + default value is no IPv6 addresses. + arp_validate Specifies whether or not ARP probes and replies should be diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst index 29b1bae0cf00..e0219c1452ab 100644 --- a/Documentation/networking/dsa/sja1105.rst +++ b/Documentation/networking/dsa/sja1105.rst @@ -293,6 +293,33 @@ of dropped frames, which is a sum of frames dropped due to timing violations, lack of destination ports and MTU enforcement checks). Byte-level counters are not available. +Limitations +=========== + +The SJA1105 switch family always performs VLAN processing. When configured as +VLAN-unaware, frames carry a different VLAN tag internally, depending on +whether the port is standalone or under a VLAN-unaware bridge. + +The virtual link keys are always fixed at {MAC DA, VLAN ID, VLAN PCP}, but the +driver asks for the VLAN ID and VLAN PCP when the port is under a VLAN-aware +bridge. Otherwise, it fills in the VLAN ID and PCP automatically, based on +whether the port is standalone or in a VLAN-unaware bridge, and accepts only +"VLAN-unaware" tc-flower keys (MAC DA). + +The existing tc-flower keys that are offloaded using virtual links are no +longer operational after one of the following happens: + +- port was standalone and joins a bridge (VLAN-aware or VLAN-unaware) +- port is part of a bridge whose VLAN awareness state changes +- port was part of a bridge and becomes standalone +- port was standalone, but another port joins a VLAN-aware bridge and this + changes the global VLAN awareness state of the bridge + +The driver cannot veto all these operations, and it cannot update/remove the +existing tc-flower filters either. So for proper operation, the tc-flower +filters should be installed only after the forwarding configuration of the port +has been made, and removed by user space before making any changes to it. + Device Tree bindings and board design ===================================== diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index cae28af7a476..24d9be69065d 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -861,6 +861,7 @@ Kernel response contents: ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split + ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE ==================================== ====== =========================== ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with @@ -885,6 +886,7 @@ Request contents: ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring + ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE ==================================== ====== =========================== Kernel checks that requested ring sizes do not exceed limits reported by @@ -892,6 +894,15 @@ driver. Driver may impose additional constraints and may not suspport all attributes. +``ETHTOOL_A_RINGS_CQE_SIZE`` specifies the completion queue event size. +Completion queue events(CQE) are the events posted by NIC to indicate the +completion status of a packet when the packet is sent(like send success or +error) or received(like pointers to packet fragments). The CQE size parameter +enables to modify the CQE size other than default size if NIC supports it. +A bigger CQE can have more receive buffer pointers inturn NIC can transfer +a bigger frame from wire. Based on the NIC hardware, the overall completion +queue size can be adjusted in the driver if CQE size is modified. + CHANNELS_GET ============ diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 58bc8cd367c6..ce017136ab05 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -96,6 +96,7 @@ Contents: sctp secid seg6-sysctl + smc-sysctl statistics strparser switchdev diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst index a147591ce203..5db8c263b0c6 100644 --- a/Documentation/networking/page_pool.rst +++ b/Documentation/networking/page_pool.rst @@ -105,6 +105,47 @@ a page will cause no race conditions is enough. Please note the caller must not use data area after running page_pool_put_page_bulk(), as this function overwrites it. +* page_pool_get_stats(): Retrieve statistics about the page_pool. This API + is only available if the kernel has been configured with + ``CONFIG_PAGE_POOL_STATS=y``. A pointer to a caller allocated ``struct + page_pool_stats`` structure is passed to this API which is filled in. The + caller can then report those stats to the user (perhaps via ethtool, + debugfs, etc.). See below for an example usage of this API. + +Stats API and structures +------------------------ +If the kernel is configured with ``CONFIG_PAGE_POOL_STATS=y``, the API +``page_pool_get_stats()`` and structures described below are available. It +takes a pointer to a ``struct page_pool`` and a pointer to a ``struct +page_pool_stats`` allocated by the caller. + +The API will fill in the provided ``struct page_pool_stats`` with +statistics about the page_pool. + +The stats structure has the following fields:: + + struct page_pool_stats { + struct page_pool_alloc_stats alloc_stats; + struct page_pool_recycle_stats recycle_stats; + }; + + +The ``struct page_pool_alloc_stats`` has the following fields: + * ``fast``: successful fast path allocations + * ``slow``: slow path order-0 allocations + * ``slow_high_order``: slow path high order allocations + * ``empty``: ptr ring is empty, so a slow path allocation was forced. + * ``refill``: an allocation which triggered a refill of the cache + * ``waive``: pages obtained from the ptr ring that cannot be added to + the cache due to a NUMA mismatch. + +The ``struct page_pool_recycle_stats`` has the following fields: + * ``cached``: recycling placed page in the page pool cache + * ``cache_full``: page pool cache was full + * ``ring``: page placed into the ptr ring + * ``ring_full``: page released from page pool because the ptr ring was full + * ``released_refcnt``: page released (and not recycled) because refcnt > 1 + Coding examples =============== @@ -157,6 +198,21 @@ NAPI poller } } +Stats +----- + +.. code-block:: c + + #ifdef CONFIG_PAGE_POOL_STATS + /* retrieve stats */ + struct page_pool_stats stats = { 0 }; + if (page_pool_get_stats(page_pool, &stats)) { + /* perhaps the driver reports statistics with ethool */ + ethtool_print_allocation_stats(&stats.alloc_stats); + ethtool_print_recycle_stats(&stats.recycle_stats); + } + #endif + Driver unload ------------- diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst new file mode 100644 index 000000000000..0987fd1bc220 --- /dev/null +++ b/Documentation/networking/smc-sysctl.rst @@ -0,0 +1,23 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========== +SMC Sysctl +========== + +/proc/sys/net/smc/* Variables +============================= + +autocorking_size - INTEGER + Setting SMC auto corking size: + SMC auto corking is like TCP auto corking from the application's + perspective of view. When applications do consecutive small + write()/sendmsg() system calls, we try to coalesce these small writes + as much as possible, to lower total amount of CDC and RDMA Write been + sent. + autocorking_size limits the maximum corked bytes that can be sent to + the under device in 1 single sending. If set to 0, the SMC auto corking + is disabled. + Applications can still use TCP_CORK for optimal behavior when they + know how/when to uncork their sockets. + + Default: 64K diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index f5809206eb93..be4eb1242057 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -668,7 +668,7 @@ timestamping: (through another RX timestamping FIFO). Deferral on RX is typically necessary when retrieving the timestamp needs a sleepable context. In that case, it is the responsibility of the DSA driver to call - ``netif_rx_ni()`` on the freshly timestamped skb. + ``netif_rx()`` on the freshly timestamped skb. 3.2.2 Ethernet PHYs ^^^^^^^^^^^^^^^^^^^ diff --git a/Documentation/tools/rtla/common_hist_options.rst b/Documentation/tools/rtla/common_hist_options.rst index 0266cd08a6c9..df53ff835bfb 100644 --- a/Documentation/tools/rtla/common_hist_options.rst +++ b/Documentation/tools/rtla/common_hist_options.rst @@ -2,7 +2,7 @@ Set the histogram bucket size (default *1*). -**-e**, **--entries** *N* +**-E**, **--entries** *N* Set the number of entries of the histogram (default 256). diff --git a/Documentation/tools/rtla/common_osnoise_description.rst b/Documentation/tools/rtla/common_osnoise_description.rst index 8973c5df888f..d5d61615b967 100644 --- a/Documentation/tools/rtla/common_osnoise_description.rst +++ b/Documentation/tools/rtla/common_osnoise_description.rst @@ -1,7 +1,7 @@ The **rtla osnoise** tool is an interface for the *osnoise* tracer. The *osnoise* tracer dispatches a kernel thread per-cpu. These threads read the time in a loop while with preemption, softirq and IRQs enabled, thus -allowing all the sources of operating systme noise during its execution. +allowing all the sources of operating system noise during its execution. The *osnoise*'s tracer threads take note of the delta between each time read, along with an interference counter of all sources of interference. At the end of each period, the *osnoise* tracer displays a summary of diff --git a/Documentation/tools/rtla/rtla-osnoise-hist.rst b/Documentation/tools/rtla/rtla-osnoise-hist.rst index 52298ddd8701..f2e79d22c4c4 100644 --- a/Documentation/tools/rtla/rtla-osnoise-hist.rst +++ b/Documentation/tools/rtla/rtla-osnoise-hist.rst @@ -36,7 +36,7 @@ default). The reason for reducing the runtime is to avoid starving the **rtla** tool. The tool is also set to run for *one minute*. The output histogram is set to group outputs in buckets of *10us* and *25* entries:: - [root@f34 ~/]# rtla osnoise hist -P F:1 -c 0-11 -r 900000 -d 1M -b 10 -e 25 + [root@f34 ~/]# rtla osnoise hist -P F:1 -c 0-11 -r 900000 -d 1M -b 10 -E 25 # RTLA osnoise histogram # Time unit is microseconds (us) # Duration: 0 00:01:00 diff --git a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst index 87a36044f828..2ca92042767b 100644 --- a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst +++ b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst @@ -84,6 +84,8 @@ CPUfreq核心层注册一个cpufreq_driver结构体。 .resume - 一个指向per-policy恢复函数的指针,该函数在关中断且在调节器再一次启动前被 调用。 + .ready - 一个指向per-policy准备函数的指针,该函数在策略完全初始化之后被调用。 + .attr - 一个指向NULL结尾的"struct freq_attr"列表的指针,该列表允许导出值到 sysfs。 diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a4267104db50..9f3172376ec3 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1394,7 +1394,7 @@ documentation when it pops into existence). ------------------- :Capability: KVM_CAP_ENABLE_CAP -:Architectures: mips, ppc, s390 +:Architectures: mips, ppc, s390, x86 :Type: vcpu ioctl :Parameters: struct kvm_enable_cap (in) :Returns: 0 on success; -1 on error @@ -6997,6 +6997,20 @@ indicated by the fd to the VM this is called on. This is intended to support intra-host migration of VMs between userspace VMMs, upgrading the VMM process without interrupting the guest. +7.30 KVM_CAP_PPC_AIL_MODE_3 +------------------------------- + +:Capability: KVM_CAP_PPC_AIL_MODE_3 +:Architectures: ppc +:Type: vm + +This capability indicates that the kernel supports the mode 3 setting for the +"Address Translation Mode on Interrupt" aka "Alternate Interrupt Location" +resource that is controlled with the H_SET_MODE hypercall. + +This capability allows a guest kernel to use a better-performance mode for +handling interrupts and system calls. + 8. Other capabilities. ====================== diff --git a/MAINTAINERS b/MAINTAINERS index 65f5043ae48d..cb75c5d6d78b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2254,7 +2254,7 @@ F: drivers/phy/mediatek/ ARM/Microchip (AT91) SoC support M: Nicolas Ferre <nicolas.ferre@microchip.com> M: Alexandre Belloni <alexandre.belloni@bootlin.com> -M: Ludovic Desroches <ludovic.desroches@microchip.com> +M: Claudiu Beznea <claudiu.beznea@microchip.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported W: http://www.linux4sam.org @@ -4549,6 +4549,7 @@ F: drivers/platform/chrome/ CHROMEOS EC CODEC DRIVER M: Cheng-Yi Chiang <cychiang@chromium.org> +M: Tzung-Bi Shih <tzungbi@google.com> R: Guenter Roeck <groeck@chromium.org> S: Maintained F: Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml @@ -4914,7 +4915,8 @@ F: kernel/cgroup/cpuset.c CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG) M: Johannes Weiner <hannes@cmpxchg.org> M: Michal Hocko <mhocko@kernel.org> -M: Vladimir Davydov <vdavydov.dev@gmail.com> +M: Roman Gushchin <roman.gushchin@linux.dev> +M: Shakeel Butt <shakeelb@google.com> L: cgroups@vger.kernel.org L: linux-mm@kvack.org S: Maintained @@ -7012,12 +7014,6 @@ L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/sb_edac.c -EDAC-SIFIVE -M: Yash Shah <yash.shah@sifive.com> -L: linux-edac@vger.kernel.org -S: Supported -F: drivers/edac/sifive_edac.c - EDAC-SKYLAKE M: Tony Luck <tony.luck@intel.com> L: linux-edac@vger.kernel.org @@ -7750,8 +7746,7 @@ M: Qiang Zhao <qiang.zhao@nxp.com> L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/soc/fsl/qe/ -F: include/soc/fsl/*qe*.h -F: include/soc/fsl/*ucc*.h +F: include/soc/fsl/qe/ FREESCALE QUICC ENGINE UCC ETHERNET DRIVER M: Li Yang <leoyang.li@nxp.com> @@ -7782,6 +7777,7 @@ F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml F: Documentation/devicetree/bindings/soc/fsl/ F: drivers/soc/fsl/ F: include/linux/fsl/ +F: include/soc/fsl/ FREESCALE SOC FS_ENET DRIVER M: Pantelis Antoniou <pantelis.antoniou@gmail.com> @@ -7936,6 +7932,12 @@ L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/fujitsu-tablet.c +FUNGIBLE ETHERNET DRIVERS +M: Dimitris Michailidis <dmichail@fungible.com> +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/ethernet/fungible/ + FUSE: FILESYSTEM IN USERSPACE M: Miklos Szeredi <miklos@szeredi.hu> L: linux-fsdevel@vger.kernel.org @@ -9264,6 +9266,15 @@ S: Maintained W: https://github.com/o2genum/ideapad-slidebar F: drivers/input/misc/ideapad_slidebar.c +IDMAPPED MOUNTS +M: Christian Brauner <brauner@kernel.org> +L: linux-fsdevel@vger.kernel.org +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git +F: Documentation/filesystems/idmappings.rst +F: tools/testing/selftests/mount_setattr/ +F: include/linux/mnt_idmapping.h + IDT VersaClock 5 CLOCK DRIVER M: Luca Ceresoli <luca@lucaceresoli.net> S: Maintained @@ -13696,7 +13707,7 @@ F: scripts/nsdeps NTB AMD DRIVER M: Sanjay R Mehta <sanju.mehta@amd.com> M: Shyam Sundar S K <Shyam-sundar.S-k@amd.com> -L: linux-ntb@googlegroups.com +L: ntb@lists.linux.dev S: Supported F: drivers/ntb/hw/amd/ @@ -13704,7 +13715,7 @@ NTB DRIVER CORE M: Jon Mason <jdmason@kudzu.us> M: Dave Jiang <dave.jiang@intel.com> M: Allen Hubbe <allenbh@gmail.com> -L: linux-ntb@googlegroups.com +L: ntb@lists.linux.dev S: Supported W: https://github.com/jonmason/ntb/wiki T: git git://github.com/jonmason/ntb.git @@ -13716,13 +13727,13 @@ F: tools/testing/selftests/ntb/ NTB IDT DRIVER M: Serge Semin <fancer.lancer@gmail.com> -L: linux-ntb@googlegroups.com +L: ntb@lists.linux.dev S: Supported F: drivers/ntb/hw/idt/ NTB INTEL DRIVER M: Dave Jiang <dave.jiang@intel.com> -L: linux-ntb@googlegroups.com +L: ntb@lists.linux.dev S: Supported W: https://github.com/davejiang/linux/wiki T: git https://github.com/davejiang/linux.git @@ -15565,6 +15576,7 @@ M: Iurii Zaikin <yzaikin@google.com> L: linux-kernel@vger.kernel.org L: linux-fsdevel@vger.kernel.org S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git sysctl-next F: fs/proc/proc_sysctl.c F: include/linux/sysctl.h F: kernel/sysctl-test.c @@ -16000,14 +16012,6 @@ F: Documentation/devicetree/bindings/misc/qcom,fastrpc.txt F: drivers/misc/fastrpc.c F: include/uapi/misc/fastrpc.h -QUALCOMM GENERIC INTERFACE I2C DRIVER -M: Akash Asthana <akashast@codeaurora.org> -M: Mukesh Savaliya <msavaliy@codeaurora.org> -L: linux-i2c@vger.kernel.org -L: linux-arm-msm@vger.kernel.org -S: Supported -F: drivers/i2c/busses/i2c-qcom-geni.c - QUALCOMM HEXAGON ARCHITECTURE M: Brian Cain <bcain@codeaurora.org> L: linux-hexagon@vger.kernel.org @@ -16079,8 +16083,8 @@ F: Documentation/devicetree/bindings/mtd/qcom,nandc.yaml F: drivers/mtd/nand/raw/qcom_nandc.c QUALCOMM RMNET DRIVER -M: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org> -M: Sean Tranchetti <stranche@codeaurora.org> +M: Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com> +M: Sean Tranchetti <quic_stranche@quicinc.com> L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst @@ -16372,6 +16376,7 @@ F: drivers/watchdog/realtek_otto_wdt.c REALTEK RTL83xx SMI DSA ROUTER CHIPS M: Linus Walleij <linus.walleij@linaro.org> +M: Alvin Šipraga <alsi@bang-olufsen.dk> S: Maintained F: Documentation/devicetree/bindings/net/dsa/realtek-smi.txt F: drivers/net/dsa/realtek/* @@ -17764,8 +17769,10 @@ M: David Rientjes <rientjes@google.com> M: Joonsoo Kim <iamjoonsoo.kim@lge.com> M: Andrew Morton <akpm@linux-foundation.org> M: Vlastimil Babka <vbabka@suse.cz> +R: Roman Gushchin <roman.gushchin@linux.dev> L: linux-mm@kvack.org S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git F: include/linux/sl?b*.h F: mm/sl?b* @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc6 NAME = Superb Owl # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi index 5e55198e4576..54cd37336be7 100644 --- a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi +++ b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi @@ -158,6 +158,24 @@ status = "disabled"; }; +/* Unusable as clockevent because if unreliable oscillator, allow to idle */ +&timer1_target { + /delete-property/ti,no-reset-on-init; + /delete-property/ti,no-idle; + timer@0 { + /delete-property/ti,timer-alwon; + }; +}; + +/* Preferred timer for clockevent */ +&timer12_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + /* Always clocked by secure_32k_fck */ + }; +}; + &twl_gpio { ti,use-leds; /* diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts index c2995a280729..162d0726b008 100644 --- a/arch/arm/boot/dts/omap3-devkit8000.dts +++ b/arch/arm/boot/dts/omap3-devkit8000.dts @@ -14,36 +14,3 @@ display2 = &tv0; }; }; - -/* Unusable as clocksource because of unreliable oscillator */ -&counter32k { - status = "disabled"; -}; - -/* Unusable as clockevent because if unreliable oscillator, allow to idle */ -&timer1_target { - /delete-property/ti,no-reset-on-init; - /delete-property/ti,no-idle; - timer@0 { - /delete-property/ti,timer-alwon; - }; -}; - -/* Preferred always-on timer for clocksource */ -&timer12_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - /* Always clocked by secure_32k_fck */ - }; -}; - -/* Preferred timer for clockevent */ -&timer2_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - assigned-clocks = <&gpt2_fck>; - assigned-clock-parents = <&sys_ck>; - }; -}; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 8eed9e3a92e9..5868eb512f69 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -718,8 +718,8 @@ interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>; assigned-clocks = <&cru SCLK_HDMI_PHY>; assigned-clock-parents = <&hdmi_phy>; - clocks = <&cru SCLK_HDMI_HDCP>, <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_CEC>; - clock-names = "isfr", "iahb", "cec"; + clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>, <&cru SCLK_HDMI_CEC>; + clock-names = "iahb", "isfr", "cec"; pinctrl-names = "default"; pinctrl-0 = <&hdmii2c_xfer &hdmi_hpd &hdmi_cec>; resets = <&cru SRST_HDMI_P>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index aaaa61875701..45a9d9b908d2 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -971,7 +971,7 @@ status = "disabled"; }; - crypto: cypto-controller@ff8a0000 { + crypto: crypto@ff8a0000 { compatible = "rockchip,rk3288-crypto"; reg = <0x0 0xff8a0000 0x0 0x4000>; interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/tegra124-nyan-big.dts b/arch/arm/boot/dts/tegra124-nyan-big.dts index 1d2aac2cb6d0..fdc1d64dfff9 100644 --- a/arch/arm/boot/dts/tegra124-nyan-big.dts +++ b/arch/arm/boot/dts/tegra124-nyan-big.dts @@ -13,12 +13,15 @@ "google,nyan-big-rev1", "google,nyan-big-rev0", "google,nyan-big", "google,nyan", "nvidia,tegra124"; - panel: panel { - compatible = "auo,b133xtn01"; - - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; + host1x@50000000 { + dpaux@545c0000 { + aux-bus { + panel: panel { + compatible = "auo,b133xtn01"; + backlight = <&backlight>; + }; + }; + }; }; mmc@700b0400 { /* SD Card on this bus */ diff --git a/arch/arm/boot/dts/tegra124-nyan-blaze.dts b/arch/arm/boot/dts/tegra124-nyan-blaze.dts index 677babde6460..abdf4456826f 100644 --- a/arch/arm/boot/dts/tegra124-nyan-blaze.dts +++ b/arch/arm/boot/dts/tegra124-nyan-blaze.dts @@ -15,12 +15,15 @@ "google,nyan-blaze-rev0", "google,nyan-blaze", "google,nyan", "nvidia,tegra124"; - panel: panel { - compatible = "samsung,ltn140at29-301"; - - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; + host1x@50000000 { + dpaux@545c0000 { + aux-bus { + panel: panel { + compatible = "samsung,ltn140at29-301"; + backlight = <&backlight>; + }; + }; + }; }; sound { diff --git a/arch/arm/boot/dts/tegra124-venice2.dts b/arch/arm/boot/dts/tegra124-venice2.dts index 232c90604df9..6a9592ceb5f2 100644 --- a/arch/arm/boot/dts/tegra124-venice2.dts +++ b/arch/arm/boot/dts/tegra124-venice2.dts @@ -48,6 +48,13 @@ dpaux@545c0000 { vdd-supply = <&vdd_3v3_panel>; status = "okay"; + + aux-bus { + panel: panel { + compatible = "lg,lp129qe"; + backlight = <&backlight>; + }; + }; }; }; @@ -1080,13 +1087,6 @@ }; }; - panel: panel { - compatible = "lg,lp129qe"; - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; - }; - vdd_mux: regulator-mux { compatible = "regulator-fixed"; regulator-name = "+VDD_MUX"; diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 7bd30c0a4280..22f937e6f3ff 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -154,22 +154,38 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) return 0; } -static struct undef_hook kgdb_brkpt_hook = { +static struct undef_hook kgdb_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_BREAKINST, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_brk_fn }; -static struct undef_hook kgdb_compiled_brkpt_hook = { +static struct undef_hook kgdb_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_BREAKINST & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_brk_fn +}; + +static struct undef_hook kgdb_compiled_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_COMPILED_BREAK, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_compiled_brk_fn }; +static struct undef_hook kgdb_compiled_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_COMPILED_BREAK & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_compiled_brk_fn +}; + static int __kgdb_notify(struct die_args *args, unsigned long cmd) { struct pt_regs *regs = args->regs; @@ -210,8 +226,10 @@ int kgdb_arch_init(void) if (ret != 0) return ret; - register_undef_hook(&kgdb_brkpt_hook); - register_undef_hook(&kgdb_compiled_brkpt_hook); + register_undef_hook(&kgdb_brkpt_arm_hook); + register_undef_hook(&kgdb_brkpt_thumb_hook); + register_undef_hook(&kgdb_compiled_brkpt_arm_hook); + register_undef_hook(&kgdb_compiled_brkpt_thumb_hook); return 0; } @@ -224,8 +242,10 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { - unregister_undef_hook(&kgdb_brkpt_hook); - unregister_undef_hook(&kgdb_compiled_brkpt_hook); + unregister_undef_hook(&kgdb_brkpt_arm_hook); + unregister_undef_hook(&kgdb_brkpt_thumb_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_arm_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_thumb_hook); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 274e4f73fd33..5e2be37a198e 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -212,12 +212,14 @@ early_param("ecc", early_ecc); static int __init early_cachepolicy(char *p) { pr_warn("cachepolicy kernel parameter not supported without cp15\n"); + return 0; } early_param("cachepolicy", early_cachepolicy); static int __init noalign_setup(char *__unused) { pr_warn("noalign kernel parameter not supported without cp15\n"); + return 1; } __setup("noalign", noalign_setup); diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index 6288e104a089..a2635b14da30 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -543,8 +543,7 @@ <0x02000000 0x00 0x50000000 0x00 0x50000000 0x0 0x08000000>, <0x42000000 0x40 0x00000000 0x40 0x00000000 0x1 0x00000000>; /* Standard AXI Translation entries as programmed by EDK2 */ - dma-ranges = <0x02000000 0x0 0x2c1c0000 0x0 0x2c1c0000 0x0 0x00040000>, - <0x02000000 0x0 0x80000000 0x0 0x80000000 0x0 0x80000000>, + dma-ranges = <0x02000000 0x0 0x80000000 0x0 0x80000000 0x0 0x80000000>, <0x43000000 0x8 0x00000000 0x8 0x00000000 0x2 0x00000000>; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index f77f90ed416f..0c7a72c51a31 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -707,7 +707,6 @@ clocks = <&clk IMX8MM_CLK_VPU_DEC_ROOT>; assigned-clocks = <&clk IMX8MM_CLK_VPU_BUS>; assigned-clock-parents = <&clk IMX8MM_SYS_PLL1_800M>; - resets = <&src IMX8MQ_RESET_VPU_RESET>; }; pgc_vpu_g1: power-domain@7 { diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi index a987ff7156bd..09f7364dd1d0 100644 --- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi @@ -132,7 +132,7 @@ scmi_sensor: protocol@15 { reg = <0x15>; - #thermal-sensor-cells = <0>; + #thermal-sensor-cells = <1>; }; }; }; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index 0dd2d2ee765a..f4270cf18996 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -502,7 +502,7 @@ }; usb0: usb@ffb00000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb00000 0x40000>; interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy0>; @@ -515,7 +515,7 @@ }; usb1: usb@ffb40000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb40000 0x40000>; interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy0>; diff --git a/arch/arm64/boot/dts/microchip/sparx5.dtsi b/arch/arm64/boot/dts/microchip/sparx5.dtsi index 787ebcec121d..2dd5e38820b1 100644 --- a/arch/arm64/boot/dts/microchip/sparx5.dtsi +++ b/arch/arm64/boot/dts/microchip/sparx5.dtsi @@ -471,9 +471,10 @@ <0x6 0x10004000 0x7fc000>, <0x6 0x11010000 0xaf0000>; reg-names = "cpu", "dev", "gcb"; - interrupt-names = "xtr", "fdma"; + interrupt-names = "xtr", "fdma", "ptp"; interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>; + <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>; resets = <&reset 0>; reset-names = "switch"; }; diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index f972704dfe7a..56dfbb2e2fa6 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -711,7 +711,7 @@ clock-names = "pclk", "timer"; }; - dmac: dmac@ff240000 { + dmac: dma-controller@ff240000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xff240000 0x0 0x4000>; interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 39db0b85b4da..b822533dc7f1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -489,7 +489,7 @@ status = "disabled"; }; - dmac: dmac@ff1f0000 { + dmac: dma-controller@ff1f0000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xff1f0000 0x0 0x4000>; interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 45a5ae5d2027..162f08bca0d4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -286,7 +286,7 @@ sound: sound { compatible = "rockchip,rk3399-gru-sound"; - rockchip,cpu = <&i2s0 &i2s2>; + rockchip,cpu = <&i2s0 &spdif>; }; }; @@ -437,10 +437,6 @@ ap_i2c_audio: &i2c8 { status = "okay"; }; -&i2s2 { - status = "okay"; -}; - &io_domains { status = "okay"; @@ -537,6 +533,17 @@ ap_i2c_audio: &i2c8 { vqmmc-supply = <&ppvar_sd_card_io>; }; +&spdif { + status = "okay"; + + /* + * SPDIF is routed internally to DP; we either don't use these pins, or + * mux them to something else. + */ + /delete-property/ pinctrl-0; + /delete-property/ pinctrl-names; +}; + &spi1 { status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 292bb7e80cf3..3ae5d727e367 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -232,6 +232,7 @@ &usbdrd_dwc3_0 { dr_mode = "otg"; + extcon = <&extcon_usb3>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index fb67db4619ea..08fa00364b42 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -25,6 +25,13 @@ }; }; + extcon_usb3: extcon-usb3 { + compatible = "linux,extcon-usb-gpio"; + id-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&usb3_id>; + }; + clkin_gmac: external-gmac-clock { compatible = "fixed-clock"; clock-frequency = <125000000>; @@ -422,9 +429,22 @@ <4 RK_PA3 RK_FUNC_GPIO &pcfg_pull_none>; }; }; + + usb3 { + usb3_id: usb3-id { + rockchip,pins = + <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; + }; + }; }; &sdhci { + /* + * Signal integrity isn't great at 200MHz but 100MHz has proven stable + * enough. + */ + max-frequency = <100000000>; + bus-width = <8>; mmc-hs400-1_8v; mmc-hs400-enhanced-strobe; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index d3cdf6f42a30..080457a68e3c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1881,10 +1881,10 @@ interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH 0>; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_SFR>, - <&cru PLL_VPLL>, + <&cru SCLK_HDMI_CEC>, <&cru PCLK_VIO_GRF>, - <&cru SCLK_HDMI_CEC>; - clock-names = "iahb", "isfr", "vpll", "grf", "cec"; + <&cru PLL_VPLL>; + clock-names = "iahb", "isfr", "cec", "grf", "vpll"; power-domains = <&power RK3399_PD_HDCP>; reg-io-width = <4>; rockchip,grf = <&grf>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index 166399b7f13f..d9eb92d59099 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -285,8 +285,6 @@ vcc_ddr: DCDC_REG3 { regulator-always-on; regulator-boot-on; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1100000>; regulator-initial-mode = <0x2>; regulator-name = "vcc_ddr"; regulator-state-mem { diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi index 2fd313a295f8..d91df1cde736 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi @@ -32,13 +32,11 @@ clocks = <&cru SCLK_GMAC0>, <&cru SCLK_GMAC0_RX_TX>, <&cru SCLK_GMAC0_RX_TX>, <&cru CLK_MAC0_REFOUT>, <&cru ACLK_GMAC0>, <&cru PCLK_GMAC0>, - <&cru SCLK_GMAC0_RX_TX>, <&cru CLK_GMAC0_PTP_REF>, - <&cru PCLK_XPCS>; + <&cru SCLK_GMAC0_RX_TX>, <&cru CLK_GMAC0_PTP_REF>; clock-names = "stmmaceth", "mac_clk_rx", "mac_clk_tx", "clk_mac_refout", "aclk_mac", "pclk_mac", - "clk_mac_speed", "ptp_ref", - "pclk_xpcs"; + "clk_mac_speed", "ptp_ref"; resets = <&cru SRST_A_GMAC0>; reset-names = "stmmaceth"; rockchip,grf = <&grf>; diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index a68033a23975..8ccce54ee8e7 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -651,7 +651,7 @@ status = "disabled"; }; - dmac0: dmac@fe530000 { + dmac0: dma-controller@fe530000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xfe530000 0x0 0x4000>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, @@ -662,7 +662,7 @@ #dma-cells = <1>; }; - dmac1: dmac@fe550000 { + dmac1: dma-controller@fe550000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xfe550000 0x0 0x4000>; interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 3198acb2aad8..7f3c87f7a0ce 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -106,7 +106,7 @@ msr_s SYS_ICC_SRE_EL2, x0 isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, - tbz x0, #0, 1f // and check that it sticks + tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults .Lskip_gicv3_\@: .endm diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 3eae32876897..2ce60fecd861 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -46,8 +46,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) * specification (ARM DEN 0022A). This means all suspend states * for KVM will preserve the register state. */ - kvm_vcpu_halt(vcpu); - kvm_clear_request(KVM_REQ_UNHALT, vcpu); + kvm_vcpu_wfi(vcpu); return PSCI_RET_SUCCESS; } diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index f979adfd4fc2..ef73ba1e0ec1 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -803,7 +803,7 @@ early_param("coherentio", setcoherentio); static int __init setnocoherentio(char *str) { - dma_default_coherent = true; + dma_default_coherent = false; pr_info("Software DMA cache coherency (command line)\n"); return 0; } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index d542fb7af3ba..1986d1309410 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -351,6 +351,9 @@ asmlinkage void start_secondary(void) cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; + set_cpu_sibling_map(cpu); + set_cpu_core_map(cpu); + cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); @@ -362,9 +365,6 @@ asmlinkage void start_secondary(void) /* The CPU is running and counters synchronised, now mark it online */ set_cpu_online(cpu, true); - set_cpu_sibling_map(cpu); - set_cpu_core_map(cpu); - calculate_cpu_foreign_map(); /* diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c index d6efffd4dd20..fb0565bc34fd 100644 --- a/arch/mips/ralink/mt7621.c +++ b/arch/mips/ralink/mt7621.c @@ -22,7 +22,9 @@ #include "common.h" -static void *detect_magic __initdata = detect_memory_region; +#define MT7621_MEM_TEST_PATTERN 0xaa5555aa + +static u32 detect_magic __initdata; int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { @@ -58,24 +60,32 @@ phys_addr_t mips_cpc_default_phys_base(void) panic("Cannot detect cpc address"); } +static bool __init mt7621_addr_wraparound_test(phys_addr_t size) +{ + void *dm = (void *)KSEG1ADDR(&detect_magic); + + if (CPHYSADDR(dm + size) >= MT7621_LOWMEM_MAX_SIZE) + return true; + __raw_writel(MT7621_MEM_TEST_PATTERN, dm); + if (__raw_readl(dm) != __raw_readl(dm + size)) + return false; + __raw_writel(~MT7621_MEM_TEST_PATTERN, dm); + return __raw_readl(dm) == __raw_readl(dm + size); +} + static void __init mt7621_memory_detect(void) { - void *dm = &detect_magic; phys_addr_t size; - for (size = 32 * SZ_1M; size < 256 * SZ_1M; size <<= 1) { - if (!__builtin_memcmp(dm, dm + size, sizeof(detect_magic))) - break; + for (size = 32 * SZ_1M; size <= 256 * SZ_1M; size <<= 1) { + if (mt7621_addr_wraparound_test(size)) { + memblock_add(MT7621_LOWMEM_BASE, size); + return; + } } - if ((size == 256 * SZ_1M) && - (CPHYSADDR(dm + size) < MT7621_LOWMEM_MAX_SIZE) && - __builtin_memcmp(dm, dm + size, sizeof(detect_magic))) { - memblock_add(MT7621_LOWMEM_BASE, MT7621_LOWMEM_MAX_SIZE); - memblock_add(MT7621_HIGHMEM_BASE, MT7621_HIGHMEM_SIZE); - } else { - memblock_add(MT7621_LOWMEM_BASE, size); - } + memblock_add(MT7621_LOWMEM_BASE, MT7621_LOWMEM_MAX_SIZE); + memblock_add(MT7621_HIGHMEM_BASE, MT7621_HIGHMEM_SIZE); } void __init ralink_of_remap(void) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 237d20dd5622..286cec4d86d7 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -340,7 +340,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER ); - return 0; + return ret; } static int emulate_std(struct pt_regs *regs, int frreg, int flop) { @@ -397,7 +397,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) __asm__ __volatile__ ( " mtsp %4, %%sr1\n" " zdep %2, 29, 2, %%r19\n" -" dep %%r0, 31, 2, %2\n" +" dep %%r0, 31, 2, %3\n" " mtsar %%r19\n" " zvdepi -2, 32, %%r19\n" "1: ldw 0(%%sr1,%3),%%r20\n" @@ -409,7 +409,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) " andcm %%r21, %%r19, %%r21\n" " or %1, %%r20, %1\n" " or %2, %%r21, %2\n" -"3: stw %1,0(%%sr1,%1)\n" +"3: stw %1,0(%%sr1,%3)\n" "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" " copy %%r0, %0\n" @@ -596,7 +596,6 @@ void handle_unaligned(struct pt_regs *regs) ret = ERR_NOTHANDLED; /* "undefined", but lets kill them. */ break; } -#ifdef CONFIG_PA20 switch (regs->iir & OPCODE2_MASK) { case OPCODE_FLDD_L: @@ -607,22 +606,23 @@ void handle_unaligned(struct pt_regs *regs) flop=1; ret = emulate_std(regs, R2(regs->iir),1); break; +#ifdef CONFIG_PA20 case OPCODE_LDD_L: ret = emulate_ldd(regs, R2(regs->iir),0); break; case OPCODE_STD_L: ret = emulate_std(regs, R2(regs->iir),0); break; - } #endif + } switch (regs->iir & OPCODE3_MASK) { case OPCODE_FLDW_L: flop=1; - ret = emulate_ldw(regs, R2(regs->iir),0); + ret = emulate_ldw(regs, R2(regs->iir), 1); break; case OPCODE_LDW_M: - ret = emulate_ldw(regs, R2(regs->iir),1); + ret = emulate_ldw(regs, R2(regs->iir), 0); break; case OPCODE_FSTW_L: diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index fa84744d6b24..b876ef8c70a7 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -421,14 +421,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a94b0cd0bdc5..bd3734d5be89 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3264,12 +3264,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) case BARRIER_EIEIO: eieio(); break; +#ifdef CONFIG_PPC64 case BARRIER_LWSYNC: asm volatile("lwsync" : : : "memory"); break; case BARRIER_PTESYNC: asm volatile("ptesync" : : : "memory"); break; +#endif } break; diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 2a82a3b2992b..af64b95e88cc 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -23,7 +23,7 @@ CONFIG_SLOB=y CONFIG_SOC_CANAAN=y CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro" +CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro" CONFIG_CMDLINE_FORCE=y # CONFIG_SECCOMP is not set # CONFIG_STACKPROTECTOR is not set diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 612556faa527..ffc87e76b1dd 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -51,6 +51,8 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o +obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o + obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index ed29e9c8f660..d6a46ed0bf05 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -108,7 +108,7 @@ _save_context: .option pop #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif #ifdef CONFIG_CONTEXT_TRACKING @@ -143,7 +143,7 @@ skip_context_tracking: li t0, EXC_BREAKPOINT beq s4, t0, 1f #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_on + call __trace_hardirqs_on #endif csrs CSR_STATUS, SR_IE @@ -234,7 +234,7 @@ ret_from_exception: REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif #ifdef CONFIG_RISCV_M_MODE /* the MPP value is too large to be used as an immediate arg for addi */ @@ -270,10 +270,10 @@ restore_all: REG_L s1, PT_STATUS(sp) andi t0, s1, SR_PIE beqz t0, 1f - call trace_hardirqs_on + call __trace_hardirqs_on j 2f 1: - call trace_hardirqs_off + call __trace_hardirqs_off 2: #endif REG_L a0, PT_STATUS(sp) diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f72527fcb347..775d3322b422 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -5,6 +5,7 @@ * Copyright (c) 2020 Western Digital Corporation or its affiliates. */ +#include <linux/bits.h> #include <linux/init.h> #include <linux/pm.h> #include <linux/reboot.h> @@ -85,7 +86,7 @@ static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mas pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n"); break; } - hmask |= 1 << hartid; + hmask |= BIT(hartid); } return hmask; @@ -160,7 +161,7 @@ static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) { unsigned long hart_mask; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); @@ -176,7 +177,7 @@ static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, int result = 0; unsigned long hart_mask; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); @@ -249,26 +250,37 @@ static void __sbi_set_timer_v02(uint64_t stime_value) static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) { - unsigned long hartid, cpuid, hmask = 0, hbase = 0; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; struct sbiret ret = {0}; int result; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; for_each_cpu(cpuid, cpu_mask) { hartid = cpuid_to_hartid_map(cpuid); - if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { - ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, - hmask, hbase, 0, 0, 0, 0); - if (ret.error) - goto ecall_failed; - hmask = 0; - hbase = 0; + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + ret = sbi_ecall(SBI_EXT_IPI, + SBI_EXT_IPI_SEND_IPI, hmask, + hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask) + if (!hmask) { hbase = hartid; - hmask |= 1UL << (hartid - hbase); + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); } if (hmask) { @@ -344,25 +356,35 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) { - unsigned long hartid, cpuid, hmask = 0, hbase = 0; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; int result; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; for_each_cpu(cpuid, cpu_mask) { hartid = cpuid_to_hartid_map(cpuid); - if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { - result = __sbi_rfence_v02_call(fid, hmask, hbase, - start, size, arg4, arg5); - if (result) - return result; - hmask = 0; - hbase = 0; + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + result = __sbi_rfence_v02_call(fid, hmask, + hbase, start, size, arg4, arg5); + if (result) + return result; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask) + if (!hmask) { hbase = hartid; - hmask |= 1UL << (hartid - hbase); + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); } if (hmask) { diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c new file mode 100644 index 000000000000..095ac976d7da --- /dev/null +++ b/arch/riscv/kernel/trace_irq.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ + +#include <linux/irqflags.h> +#include <linux/kprobes.h> +#include "trace_irq.h" + +/* + * trace_hardirqs_on/off require the caller to setup frame pointer properly. + * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel. + * Here we add one extra level so they can be safely called by low + * level entry code which $fp is used for other purpose. + */ + +void __trace_hardirqs_on(void) +{ + trace_hardirqs_on(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_on); + +void __trace_hardirqs_off(void) +{ + trace_hardirqs_off(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_off); diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h new file mode 100644 index 000000000000..99fe67377e5e --- /dev/null +++ b/arch/riscv/kernel/trace_irq.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ +#ifndef __TRACE_IRQ_H +#define __TRACE_IRQ_H + +void __trace_hardirqs_on(void); +void __trace_hardirqs_off(void); + +#endif /* __TRACE_IRQ_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6dcccb304775..ec9830d2aabf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -703,7 +703,6 @@ struct kvm_vcpu_arch { struct fpu_guest guest_fpu; u64 xcr0; - u64 guest_supported_xcr0; struct kvm_pio_request pio; void *pio_data; diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 4b41efc9e367..8e4bc6453d26 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -344,10 +344,8 @@ static void sgx_reclaim_pages(void) { struct sgx_epc_page *chunk[SGX_NR_TO_SCAN]; struct sgx_backing backing[SGX_NR_TO_SCAN]; - struct sgx_epc_section *section; struct sgx_encl_page *encl_page; struct sgx_epc_page *epc_page; - struct sgx_numa_node *node; pgoff_t page_index; int cnt = 0; int ret; @@ -418,13 +416,7 @@ skip: kref_put(&encl_page->encl->refcount, sgx_encl_release); epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; - section = &sgx_epc_sections[epc_page->section]; - node = section->node; - - spin_lock(&node->lock); - list_add_tail(&epc_page->list, &node->free_page_list); - spin_unlock(&node->lock); - atomic_long_inc(&sgx_nr_free_pages); + sgx_free_epc_page(epc_page); } } diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 437d7c930c0b..75ffaef8c299 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -91,11 +91,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; - struct user32_fxsr_struct newstate; + struct fxregs_state newstate; int ret; - BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state)); - if (!cpu_feature_enabled(X86_FEATURE_FXSR)) return -ENODEV; @@ -116,9 +114,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, /* Copy the state */ memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate)); - /* Clear xmm8..15 */ + /* Clear xmm8..15 for 32-bit callers */ BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16); - memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16); + if (in_ia32_syscall()) + memset(&fpu->fpstate->regs.fxsave.xmm_space[8*4], 0, 8 * 16); /* Mark FP and SSE as in use when XSAVE is enabled */ if (use_xsave()) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 02b3ddaf4f75..7c7824ae7862 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1558,7 +1558,10 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, fpregs_restore_userregs(); newfps->xfeatures = curfps->xfeatures | xfeatures; - newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; + + if (!guest_fpu) + newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; + newfps->xfd = curfps->xfd & ~xfeatures; /* Do the final updates within the locked region */ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a438217cbfac..f734e3b0cfec 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -462,19 +462,22 @@ static bool pv_tlb_flush_supported(void) { return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) && + (num_possible_cpus() != 1)); } static bool pv_ipi_supported(void) { - return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI); + return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) && + (num_possible_cpus() != 1)); } static bool pv_sched_yield_supported(void) { return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) && + (num_possible_cpus() != 1)); } #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 6d2244c94799..8d2f2f995539 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1224,7 +1224,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, - .n = sizeof(struct user_i387_struct) / sizeof(long), + .n = sizeof(struct fxregs_state) / sizeof(long), .size = sizeof(long), .align = sizeof(long), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, @@ -1271,7 +1271,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { }, [REGSET_XFP] = { .core_note_type = NT_PRXFPREG, - .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), + .n = sizeof(struct fxregs_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 494d4d351859..b8f8d268d058 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -282,6 +282,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_cpuid_entry2 *best; + u64 guest_supported_xcr0; best = kvm_find_cpuid_entry(vcpu, 1, 0); if (best && apic) { @@ -293,9 +294,11 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_apic_set_version(vcpu); } - vcpu->arch.guest_supported_xcr0 = + guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); + vcpu->arch.guest_fpu.fpstate->user_xfeatures = guest_supported_xcr0; + kvm_update_pv_runtime(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 593093b52395..8e24f73bf60b 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3889,12 +3889,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } +static u32 alloc_apf_token(struct kvm_vcpu *vcpu) +{ + /* make sure the token value is not 0 */ + u32 id = vcpu->arch.apf.id; + + if (id << 12 == 0) + vcpu->arch.apf.id = 1; + + return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; +} + static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, gfn_t gfn) { struct kvm_arch_async_pf arch; - arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; + arch.token = alloc_apf_token(vcpu); arch.gfn = gfn; arch.direct_map = vcpu->arch.mmu->direct_map; arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 821edf664e7a..fd3a00c892c7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2693,8 +2693,23 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) u64 data = msr->data; switch (ecx) { case MSR_AMD64_TSC_RATIO: - if (!msr->host_initiated && !svm->tsc_scaling_enabled) - return 1; + + if (!svm->tsc_scaling_enabled) { + + if (!msr->host_initiated) + return 1; + /* + * In case TSC scaling is not enabled, always + * leave this MSR at the default value. + * + * Due to bug in qemu 6.2.0, it would try to set + * this msr to 0 if tsc scaling is not enabled. + * Ignore this value as well. + */ + if (data != 0 && data != svm->tsc_ratio_msr) + return 1; + break; + } if (data & TSC_RATIO_RSVD) return 1; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ba34e94049c7..dc822a1d403d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -246,8 +246,7 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx, src = &prev->host_state; dest = &vmx->loaded_vmcs->host_state; - vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel, - src->fs_base, src->gs_base); + vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base); dest->ldt_sel = src->ldt_sel; #ifdef CONFIG_X86_64 dest->ds_sel = src->ds_sel; @@ -3056,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr4; + unsigned long cr3, cr4; bool vm_fail; if (!nested_early_check) @@ -3079,6 +3078,12 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) */ vmcs_writel(GUEST_RFLAGS, 0); + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index efda5e4d6247..b730d799c26e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1080,14 +1080,9 @@ static void pt_guest_exit(struct vcpu_vmx *vmx) wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); } -void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3, - u16 fs_sel, u16 gs_sel, - unsigned long fs_base, unsigned long gs_base) +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base) { - if (unlikely(cr3 != host->cr3)) { - vmcs_writel(HOST_CR3, cr3); - host->cr3 = cr3; - } if (unlikely(fs_sel != host->fs_sel)) { if (!(fs_sel & 7)) vmcs_write16(HOST_FS_SELECTOR, fs_sel); @@ -1182,9 +1177,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) gs_base = segment_base(gs_sel); #endif - vmx_set_vmcs_host_state(host_state, __get_current_cr3_fast(), - fs_sel, gs_sel, fs_base, gs_base); - + vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); vmx->guest_state_loaded = true; } @@ -6791,7 +6784,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr4; + unsigned long cr3, cr4; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && @@ -6834,6 +6827,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vcpu->arch.regs_dirty = 0; + /* + * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately + * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time + * it switches back to the current->mm, which can occur in KVM context + * when switching to a temporary mm to patch kernel code, e.g. if KVM + * toggles a static key while handling a VM-Exit. + */ + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 7f2c82e7f38f..9c6bfcd84008 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -374,9 +374,8 @@ int allocate_vpid(void); void free_vpid(int vpid); void vmx_set_constant_host_state(struct vcpu_vmx *vmx); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); -void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3, - u16 fs_sel, u16 gs_sel, - unsigned long fs_base, unsigned long gs_base); +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base); int vmx_get_cpl(struct kvm_vcpu *vcpu); bool vmx_emulation_required(struct kvm_vcpu *vcpu); unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 641044db415d..82a9dcd8c67f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -984,6 +984,18 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state); +static inline u64 kvm_guest_supported_xcr0(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.guest_fpu.fpstate->user_xfeatures; +} + +#ifdef CONFIG_X86_64 +static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu) +{ + return kvm_guest_supported_xcr0(vcpu) & XFEATURE_MASK_USER_DYNAMIC; +} +#endif + static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { u64 xcr0 = xcr; @@ -1003,7 +1015,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) * saving. However, xcr0 bit 0 is always set, even if the * emulated CPU does not support XSAVE (see kvm_vcpu_reset()). */ - valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP; + valid_bits = kvm_guest_supported_xcr0(vcpu) | XFEATURE_MASK_FP; if (xcr0 & ~valid_bits) return 1; @@ -2351,10 +2363,12 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) return tsc; } +#ifdef CONFIG_X86_64 static inline int gtod_is_based_on_tsc(int mode) { return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK; } +#endif static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) { @@ -3706,8 +3720,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) return 1; - if (data & ~(XFEATURE_MASK_USER_DYNAMIC & - vcpu->arch.guest_supported_xcr0)) + if (data & ~kvm_guest_supported_xfd(vcpu)) return 1; fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data); @@ -3717,8 +3730,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) return 1; - if (data & ~(XFEATURE_MASK_USER_DYNAMIC & - vcpu->arch.guest_supported_xcr0)) + if (data & ~kvm_guest_supported_xfd(vcpu)) return 1; vcpu->arch.guest_fpu.xfd_err = data; @@ -4233,6 +4245,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_EXIT_ON_EMULATION_FAILURE: case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_SYS_ATTRIBUTES: + case KVM_CAP_ENABLE_CAP: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: @@ -8942,6 +8955,13 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK) return -KVM_EOPNOTSUPP; + /* + * When tsc is in permanent catchup mode guests won't be able to use + * pvclock_read_retry loop to get consistent view of pvclock + */ + if (vcpu->arch.tsc_always_catchup) + return -KVM_EOPNOTSUPP; + if (!kvm_get_walltime_and_clockread(&ts, &cycle)) return -KVM_EOPNOTSUPP; diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 962e5e145209..9fb99d18e3c2 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -304,7 +304,7 @@ static int iss_net_rx(struct net_device *dev) lp->stats.rx_bytes += skb->len; lp->stats.rx_packets++; - netif_rx_ni(skb); + netif_rx(skb); return pkt_len; } kfree_skb(skb); diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0c612a911696..36a66e97e3c2 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7018,6 +7018,8 @@ static void bfq_exit_queue(struct elevator_queue *e) spin_unlock_irq(&bfqd->lock); #endif + wbt_enable_default(bfqd->queue); + kfree(bfqd); } diff --git a/block/blk-core.c b/block/blk-core.c index d93e3bb9a769..1039515c99d6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -284,13 +284,6 @@ void blk_queue_start_drain(struct request_queue *q) wake_up_all(&q->mq_freeze_wq); } -void blk_set_queue_dying(struct request_queue *q) -{ - blk_queue_flag_set(QUEUE_FLAG_DYING, q); - blk_queue_start_drain(q); -} -EXPORT_SYMBOL_GPL(blk_set_queue_dying); - /** * blk_cleanup_queue - shutdown a request queue * @q: request queue to shutdown @@ -308,7 +301,8 @@ void blk_cleanup_queue(struct request_queue *q) WARN_ON_ONCE(blk_queue_registered(q)); /* mark @q DYING, no new request or merges will be allowed afterwards */ - blk_set_queue_dying(q); + blk_queue_flag_set(QUEUE_FLAG_DYING, q); + blk_queue_start_drain(q); blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); diff --git a/block/blk-map.c b/block/blk-map.c index 4526adde0156..c7f71d83eff1 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -446,7 +446,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data, if (bytes > len) bytes = len; - page = alloc_page(GFP_NOIO | gfp_mask); + page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask); if (!page) goto cleanup; diff --git a/block/blk-mq.c b/block/blk-mq.c index 1adfe4824ef5..d69ca91fbc8b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -736,6 +736,10 @@ static void blk_complete_request(struct request *req) /* Completion has already been traced */ bio_clear_flag(bio, BIO_TRACE_COMPLETION); + + if (req_op(req) == REQ_OP_ZONE_APPEND) + bio->bi_iter.bi_sector = req->__sector; + if (!is_flush) bio_endio(bio); bio = next; diff --git a/block/elevator.c b/block/elevator.c index ec98aed39c4f..482df2a350fc 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -525,8 +525,6 @@ void elv_unregister_queue(struct request_queue *q) kobject_del(&e->kobj); e->registered = 0; - /* Re-enable throttling in case elevator disabled it */ - wbt_enable_default(q); } } diff --git a/block/fops.c b/block/fops.c index 4f59e0f5bf30..a18e7fbd97b8 100644 --- a/block/fops.c +++ b/block/fops.c @@ -289,6 +289,8 @@ static void blkdev_bio_end_io_async(struct bio *bio) struct kiocb *iocb = dio->iocb; ssize_t ret; + WRITE_ONCE(iocb->private, NULL); + if (likely(!bio->bi_status)) { ret = dio->size; iocb->ki_pos += ret; diff --git a/block/genhd.c b/block/genhd.c index 626c8406f21a..9eca1f7d35c9 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -549,6 +549,20 @@ out_free_ext_minor: EXPORT_SYMBOL(device_add_disk); /** + * blk_mark_disk_dead - mark a disk as dead + * @disk: disk to mark as dead + * + * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O + * to this disk. + */ +void blk_mark_disk_dead(struct gendisk *disk) +{ + set_bit(GD_DEAD, &disk->state); + blk_queue_start_drain(disk->queue); +} +EXPORT_SYMBOL_GPL(blk_mark_disk_dead); + +/** * del_gendisk - remove the gendisk * @disk: the struct gendisk to remove * diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 86560a28751b..f8e9fa82cb9b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -96,6 +96,11 @@ static const struct dmi_system_id processor_power_dmi_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")}, (void *)1}, + /* T40 can not handle C3 idle state */ + { set_max_cstate, "IBM ThinkPad T40", { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "23737CU")}, + (void *)2}, {}, }; diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 0741a4933f62..34600b5b9d8e 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -400,7 +400,7 @@ int __init_or_acpilib acpi_table_parse_entries_array( acpi_get_table(id, instance, &table_header); if (!table_header) { - pr_warn("%4.4s not present\n", id); + pr_debug("%4.4s not present\n", id); return -ENODEV; } diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c index 7abc7e04f656..6fa4a2faf49c 100644 --- a/drivers/ata/pata_hpt37x.c +++ b/drivers/ata/pata_hpt37x.c @@ -920,6 +920,20 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) pci_write_config_byte(dev, 0x5a, irqmask); /* + * HPT371 chips physically have only one channel, the secondary one, + * but the primary channel registers do exist! Go figure... + * So, we manually disable the non-existing channel here + * (if the BIOS hasn't done this already). + */ + if (dev->device == PCI_DEVICE_ID_TTI_HPT371) { + u8 mcr1; + + pci_read_config_byte(dev, 0x50, &mcr1); + mcr1 &= ~0x04; + pci_write_config_byte(dev, 0x50, mcr1); + } + + /* * default to pci clock. make sure MA15/16 are set to output * to prevent drives having problems with 40-pin cables. Needed * for some drives such as IBM-DTLA which will not enter ready @@ -950,14 +964,14 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) if ((freq >> 12) != 0xABCDE) { int i; - u8 sr; + u16 sr; u32 total = 0; dev_warn(&dev->dev, "BIOS has not set timing clocks\n"); /* This is the process the HPT371 BIOS is reported to use */ for (i = 0; i < 128; i++) { - pci_read_config_byte(dev, 0x78, &sr); + pci_read_config_word(dev, 0x78, &sr); total += sr & 0x1FF; udelay(15); } diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 3bc3c314a467..4f67404fe64c 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -1676,6 +1676,8 @@ static int fs_init(struct fs_dev *dev) dev->hw_base = pci_resource_start(pci_dev, 0); dev->base = ioremap(dev->hw_base, 0x1000); + if (!dev->base) + return 1; reset_chip (dev); diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index bc5a6ab6fa4b..1a50de39f5b5 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -861,7 +861,6 @@ static void ns_init_card_error(ns_dev *card, int error) static scq_info *get_scq(ns_dev *card, int size, u32 scd) { scq_info *scq; - int i; if (size != VBR_SCQSIZE && size != CBR_SCQSIZE) return NULL; @@ -875,9 +874,8 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd) kfree(scq); return NULL; } - scq->skb = kmalloc_array(size / NS_SCQE_SIZE, - sizeof(*scq->skb), - GFP_KERNEL); + scq->skb = kcalloc(size / NS_SCQE_SIZE, sizeof(*scq->skb), + GFP_KERNEL); if (!scq->skb) { dma_free_coherent(&card->pcidev->dev, 2 * size, scq->org, scq->dma); @@ -890,15 +888,11 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd) scq->last = scq->base + (scq->num_entries - 1); scq->tail = scq->last; scq->scd = scd; - scq->num_entries = size / NS_SCQE_SIZE; scq->tbd_count = 0; init_waitqueue_head(&scq->scqfull_waitq); scq->full = 0; spin_lock_init(&scq->lock); - for (i = 0; i < scq->num_entries; i++) - scq->skb[i] = NULL; - return scq; } diff --git a/drivers/auxdisplay/lcd2s.c b/drivers/auxdisplay/lcd2s.c index 38ba08628ccb..2578b2d45439 100644 --- a/drivers/auxdisplay/lcd2s.c +++ b/drivers/auxdisplay/lcd2s.c @@ -238,7 +238,7 @@ static int lcd2s_redefine_char(struct charlcd *lcd, char *esc) if (buf[1] > 7) return 1; - i = 0; + i = 2; shift = 0; value = 0; while (*esc && i < LCD2S_CHARACTER_SIZE + 2) { @@ -298,6 +298,10 @@ static int lcd2s_i2c_probe(struct i2c_client *i2c, I2C_FUNC_SMBUS_WRITE_BLOCK_DATA)) return -EIO; + lcd2s = devm_kzalloc(&i2c->dev, sizeof(*lcd2s), GFP_KERNEL); + if (!lcd2s) + return -ENOMEM; + /* Test, if the display is responding */ err = lcd2s_i2c_smbus_write_byte(i2c, LCD2S_CMD_DISPLAY_OFF); if (err < 0) @@ -307,12 +311,6 @@ static int lcd2s_i2c_probe(struct i2c_client *i2c, if (!lcd) return -ENOMEM; - lcd2s = kzalloc(sizeof(struct lcd2s_data), GFP_KERNEL); - if (!lcd2s) { - err = -ENOMEM; - goto fail1; - } - lcd->drvdata = lcd2s; lcd2s->i2c = i2c; lcd2s->charlcd = lcd; @@ -321,26 +319,24 @@ static int lcd2s_i2c_probe(struct i2c_client *i2c, err = device_property_read_u32(&i2c->dev, "display-height-chars", &lcd->height); if (err) - goto fail2; + goto fail1; err = device_property_read_u32(&i2c->dev, "display-width-chars", &lcd->width); if (err) - goto fail2; + goto fail1; lcd->ops = &lcd2s_ops; err = charlcd_register(lcd2s->charlcd); if (err) - goto fail2; + goto fail1; i2c_set_clientdata(i2c, lcd2s); return 0; -fail2: - kfree(lcd2s); fail1: - kfree(lcd); + charlcd_free(lcd2s->charlcd); return err; } @@ -349,7 +345,7 @@ static int lcd2s_i2c_remove(struct i2c_client *i2c) struct lcd2s_data *lcd2s = i2c_get_clientdata(i2c); charlcd_unregister(lcd2s->charlcd); - kfree(lcd2s->charlcd); + charlcd_free(lcd2s->charlcd); return 0; } diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 9eaaff2f556c..f47cab21430f 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -629,6 +629,9 @@ re_probe: drv->remove(dev); devres_release_all(dev); + arch_teardown_dma_ops(dev); + kfree(dev->dma_range_map); + dev->dma_range_map = NULL; driver_sysfs_remove(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); @@ -1209,6 +1212,8 @@ static void __device_release_driver(struct device *dev, struct device *parent) devres_release_all(dev); arch_teardown_dma_ops(dev); + kfree(dev->dma_range_map); + dev->dma_range_map = NULL; dev->driver = NULL; dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index d2656581a608..4a446259a184 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -189,11 +189,9 @@ static void regmap_irq_sync_unlock(struct irq_data *data) ret = regmap_write(map, reg, d->mask_buf[i]); if (d->chip->clear_ack) { if (d->chip->ack_invert && !ret) - ret = regmap_write(map, reg, - d->mask_buf[i]); + ret = regmap_write(map, reg, UINT_MAX); else if (!ret) - ret = regmap_write(map, reg, - ~d->mask_buf[i]); + ret = regmap_write(map, reg, 0); } if (ret != 0) dev_err(d->map->dev, "Failed to ack 0x%x: %d\n", @@ -556,11 +554,9 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) data->status_buf[i]); if (chip->clear_ack) { if (chip->ack_invert && !ret) - ret = regmap_write(map, reg, - data->status_buf[i]); + ret = regmap_write(map, reg, UINT_MAX); else if (!ret) - ret = regmap_write(map, reg, - ~data->status_buf[i]); + ret = regmap_write(map, reg, 0); } if (ret != 0) dev_err(map->dev, "Failed to ack 0x%x: %d\n", @@ -817,13 +813,9 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, d->status_buf[i] & d->mask_buf[i]); if (chip->clear_ack) { if (chip->ack_invert && !ret) - ret = regmap_write(map, reg, - (d->status_buf[i] & - d->mask_buf[i])); + ret = regmap_write(map, reg, UINT_MAX); else if (!ret) - ret = regmap_write(map, reg, - ~(d->status_buf[i] & - d->mask_buf[i])); + ret = regmap_write(map, reg, 0); } if (ret != 0) { dev_err(map->dev, "Failed to ack 0x%x: %d\n", diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 150012ffb387..19fe19eaa50e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -79,6 +79,7 @@ #include <linux/ioprio.h> #include <linux/blk-cgroup.h> #include <linux/sched/mm.h> +#include <linux/statfs.h> #include "loop.h" @@ -774,8 +775,13 @@ static void loop_config_discard(struct loop_device *lo) granularity = 0; } else { + struct kstatfs sbuf; + max_discard_sectors = UINT_MAX >> 9; - granularity = inode->i_sb->s_blocksize; + if (!vfs_statfs(&file->f_path, &sbuf)) + granularity = sbuf.f_bsize; + else + max_discard_sectors = 0; } if (max_discard_sectors) { diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index e6005c232328..2b588b62cbbb 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -4112,7 +4112,7 @@ static void mtip_pci_remove(struct pci_dev *pdev) "Completion workers still active!\n"); } - blk_set_queue_dying(dd->queue); + blk_mark_disk_dead(dd->disk); set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); /* Clean up the block layer. */ diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4203cdab8abf..b844432bad20 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -7185,7 +7185,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus, * IO to complete/fail. */ blk_mq_freeze_queue(rbd_dev->disk->queue); - blk_set_queue_dying(rbd_dev->disk->queue); + blk_mark_disk_dead(rbd_dev->disk); } del_gendisk(rbd_dev->disk); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ccd0dd0c6b83..ca71a0585333 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2126,7 +2126,7 @@ static void blkfront_closing(struct blkfront_info *info) /* No more blkif_request(). */ blk_mq_stop_hw_queues(info->rq); - blk_set_queue_dying(info->rq); + blk_mark_disk_dead(info->gd); set_capacity(info->gd, 0); for_each_rinfo(info, rinfo, i) { diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index fd87a59837fa..5eb0fe73ddc4 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -815,7 +815,7 @@ static int moxtet_probe(struct spi_device *spi) return 0; } -static int moxtet_remove(struct spi_device *spi) +static void moxtet_remove(struct spi_device *spi) { struct moxtet *moxtet = spi_get_drvdata(spi); @@ -828,8 +828,6 @@ static int moxtet_remove(struct spi_device *spi) device_for_each_child(moxtet->dev, NULL, __unregister); mutex_destroy(&moxtet->lock); - - return 0; } static const struct of_device_id moxtet_dt_ids[] = { diff --git a/drivers/char/tpm/st33zp24/i2c.c b/drivers/char/tpm/st33zp24/i2c.c index 7c617edff4ca..3170d59d660c 100644 --- a/drivers/char/tpm/st33zp24/i2c.c +++ b/drivers/char/tpm/st33zp24/i2c.c @@ -267,11 +267,8 @@ static int st33zp24_i2c_probe(struct i2c_client *client, static int st33zp24_i2c_remove(struct i2c_client *client) { struct tpm_chip *chip = i2c_get_clientdata(client); - int ret; - ret = st33zp24_remove(chip); - if (ret) - return ret; + st33zp24_remove(chip); return 0; } diff --git a/drivers/char/tpm/st33zp24/spi.c b/drivers/char/tpm/st33zp24/spi.c index a75dafd39445..22d184884694 100644 --- a/drivers/char/tpm/st33zp24/spi.c +++ b/drivers/char/tpm/st33zp24/spi.c @@ -381,16 +381,11 @@ static int st33zp24_spi_probe(struct spi_device *dev) * @param: client, the spi_device description (TPM SPI description). * @return: 0 in case of success. */ -static int st33zp24_spi_remove(struct spi_device *dev) +static void st33zp24_spi_remove(struct spi_device *dev) { struct tpm_chip *chip = spi_get_drvdata(dev); - int ret; - ret = st33zp24_remove(chip); - if (ret) - return ret; - - return 0; + st33zp24_remove(chip); } static const struct spi_device_id st33zp24_spi_id[] = { diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c index ce9efb73c144..15b393e92c8e 100644 --- a/drivers/char/tpm/st33zp24/st33zp24.c +++ b/drivers/char/tpm/st33zp24/st33zp24.c @@ -511,10 +511,9 @@ _tpm_clean_answer: } EXPORT_SYMBOL(st33zp24_probe); -int st33zp24_remove(struct tpm_chip *chip) +void st33zp24_remove(struct tpm_chip *chip) { tpm_chip_unregister(chip); - return 0; } EXPORT_SYMBOL(st33zp24_remove); diff --git a/drivers/char/tpm/st33zp24/st33zp24.h b/drivers/char/tpm/st33zp24/st33zp24.h index 6747be1e2502..b387a476c555 100644 --- a/drivers/char/tpm/st33zp24/st33zp24.h +++ b/drivers/char/tpm/st33zp24/st33zp24.h @@ -34,5 +34,5 @@ int st33zp24_pm_resume(struct device *dev); int st33zp24_probe(void *phy_id, const struct st33zp24_phy_ops *ops, struct device *dev, int irq, int io_lpcpd); -int st33zp24_remove(struct tpm_chip *chip); +void st33zp24_remove(struct tpm_chip *chip); #endif /* __LOCAL_ST33ZP24_H__ */ diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c index aaa59a00eeae..184396b3af50 100644 --- a/drivers/char/tpm/tpm_tis_spi_main.c +++ b/drivers/char/tpm/tpm_tis_spi_main.c @@ -254,13 +254,12 @@ static int tpm_tis_spi_driver_probe(struct spi_device *spi) static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_spi_resume); -static int tpm_tis_spi_remove(struct spi_device *dev) +static void tpm_tis_spi_remove(struct spi_device *dev) { struct tpm_chip *chip = spi_get_drvdata(dev); tpm_chip_unregister(chip); tpm_tis_remove(chip); - return 0; } static const struct spi_device_id tpm_tis_spi_id[] = { diff --git a/drivers/clk/clk-lmk04832.c b/drivers/clk/clk-lmk04832.c index 8f02c0b88000..f416f8bc2898 100644 --- a/drivers/clk/clk-lmk04832.c +++ b/drivers/clk/clk-lmk04832.c @@ -1544,14 +1544,12 @@ err_disable_oscin: return ret; } -static int lmk04832_remove(struct spi_device *spi) +static void lmk04832_remove(struct spi_device *spi) { struct lmk04832 *lmk = spi_get_drvdata(spi); clk_disable_unprepare(lmk->oscin); of_clk_del_provider(spi->dev.of_node); - - return 0; } static const struct spi_device_id lmk04832_id[] = { { "lmk04832", LMK04832 }, diff --git a/drivers/clk/ingenic/jz4725b-cgu.c b/drivers/clk/ingenic/jz4725b-cgu.c index 744d136b721b..15d61793f53b 100644 --- a/drivers/clk/ingenic/jz4725b-cgu.c +++ b/drivers/clk/ingenic/jz4725b-cgu.c @@ -139,11 +139,10 @@ static const struct ingenic_cgu_clk_info jz4725b_cgu_clocks[] = { }, [JZ4725B_CLK_I2S] = { - "i2s", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE, + "i2s", CGU_CLK_MUX | CGU_CLK_DIV, .parents = { JZ4725B_CLK_EXT, JZ4725B_CLK_PLL_HALF, -1, -1 }, .mux = { CGU_REG_CPCCR, 31, 1 }, .div = { CGU_REG_I2SCDR, 0, 1, 9, -1, -1, -1 }, - .gate = { CGU_REG_CLKGR, 6 }, }, [JZ4725B_CLK_SPI] = { diff --git a/drivers/clk/qcom/gcc-msm8994.c b/drivers/clk/qcom/gcc-msm8994.c index 71aa630fa4bd..f09499999eb3 100644 --- a/drivers/clk/qcom/gcc-msm8994.c +++ b/drivers/clk/qcom/gcc-msm8994.c @@ -108,42 +108,6 @@ static const struct clk_parent_data gcc_xo_gpll0_gpll4[] = { { .hw = &gpll4.clkr.hw }, }; -static struct clk_rcg2 system_noc_clk_src = { - .cmd_rcgr = 0x0120, - .hid_width = 5, - .parent_map = gcc_xo_gpll0_map, - .clkr.hw.init = &(struct clk_init_data){ - .name = "system_noc_clk_src", - .parent_data = gcc_xo_gpll0, - .num_parents = ARRAY_SIZE(gcc_xo_gpll0), - .ops = &clk_rcg2_ops, - }, -}; - -static struct clk_rcg2 config_noc_clk_src = { - .cmd_rcgr = 0x0150, - .hid_width = 5, - .parent_map = gcc_xo_gpll0_map, - .clkr.hw.init = &(struct clk_init_data){ - .name = "config_noc_clk_src", - .parent_data = gcc_xo_gpll0, - .num_parents = ARRAY_SIZE(gcc_xo_gpll0), - .ops = &clk_rcg2_ops, - }, -}; - -static struct clk_rcg2 periph_noc_clk_src = { - .cmd_rcgr = 0x0190, - .hid_width = 5, - .parent_map = gcc_xo_gpll0_map, - .clkr.hw.init = &(struct clk_init_data){ - .name = "periph_noc_clk_src", - .parent_data = gcc_xo_gpll0, - .num_parents = ARRAY_SIZE(gcc_xo_gpll0), - .ops = &clk_rcg2_ops, - }, -}; - static struct freq_tbl ftbl_ufs_axi_clk_src[] = { F(50000000, P_GPLL0, 12, 0, 0), F(100000000, P_GPLL0, 6, 0, 0), @@ -1150,8 +1114,6 @@ static struct clk_branch gcc_blsp1_ahb_clk = { .enable_mask = BIT(17), .hw.init = &(struct clk_init_data){ .name = "gcc_blsp1_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -1435,8 +1397,6 @@ static struct clk_branch gcc_blsp2_ahb_clk = { .enable_mask = BIT(15), .hw.init = &(struct clk_init_data){ .name = "gcc_blsp2_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -1764,8 +1724,6 @@ static struct clk_branch gcc_lpass_q6_axi_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_lpass_q6_axi_clk", - .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -1778,8 +1736,6 @@ static struct clk_branch gcc_mss_q6_bimc_axi_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_mss_q6_bimc_axi_clk", - .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -1807,9 +1763,6 @@ static struct clk_branch gcc_pcie_0_cfg_ahb_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_pcie_0_cfg_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &config_noc_clk_src.clkr.hw }, - .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, @@ -1822,9 +1775,6 @@ static struct clk_branch gcc_pcie_0_mstr_axi_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_pcie_0_mstr_axi_clk", - .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw }, - .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, @@ -1854,9 +1804,6 @@ static struct clk_branch gcc_pcie_0_slv_axi_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_pcie_0_slv_axi_clk", - .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw }, - .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, @@ -1884,9 +1831,6 @@ static struct clk_branch gcc_pcie_1_cfg_ahb_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_pcie_1_cfg_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &config_noc_clk_src.clkr.hw }, - .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, @@ -1899,9 +1843,6 @@ static struct clk_branch gcc_pcie_1_mstr_axi_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_pcie_1_mstr_axi_clk", - .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw }, - .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, @@ -1930,9 +1871,6 @@ static struct clk_branch gcc_pcie_1_slv_axi_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_pcie_1_slv_axi_clk", - .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw }, - .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, @@ -1960,8 +1898,6 @@ static struct clk_branch gcc_pdm_ahb_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_pdm_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -1989,9 +1925,6 @@ static struct clk_branch gcc_sdcc1_ahb_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_sdcc1_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw }, - .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, @@ -2004,9 +1937,6 @@ static struct clk_branch gcc_sdcc2_ahb_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_sdcc2_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw }, - .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, @@ -2034,9 +1964,6 @@ static struct clk_branch gcc_sdcc3_ahb_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_sdcc3_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw }, - .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, @@ -2064,9 +1991,6 @@ static struct clk_branch gcc_sdcc4_ahb_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_sdcc4_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw }, - .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, @@ -2124,8 +2048,6 @@ static struct clk_branch gcc_tsif_ahb_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_tsif_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -2153,8 +2075,6 @@ static struct clk_branch gcc_ufs_ahb_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_ufs_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &config_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -2198,8 +2118,6 @@ static struct clk_branch gcc_ufs_rx_symbol_0_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_ufs_rx_symbol_0_clk", - .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -2213,8 +2131,6 @@ static struct clk_branch gcc_ufs_rx_symbol_1_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_ufs_rx_symbol_1_clk", - .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -2243,8 +2159,6 @@ static struct clk_branch gcc_ufs_tx_symbol_0_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_ufs_tx_symbol_0_clk", - .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -2258,8 +2172,6 @@ static struct clk_branch gcc_ufs_tx_symbol_1_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_ufs_tx_symbol_1_clk", - .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -2364,8 +2276,6 @@ static struct clk_branch gcc_usb_hs_ahb_clk = { .enable_mask = BIT(0), .hw.init = &(struct clk_init_data){ .name = "gcc_usb_hs_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -2488,8 +2398,6 @@ static struct clk_branch gcc_boot_rom_ahb_clk = { .enable_mask = BIT(10), .hw.init = &(struct clk_init_data){ .name = "gcc_boot_rom_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &config_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -2503,8 +2411,6 @@ static struct clk_branch gcc_prng_ahb_clk = { .enable_mask = BIT(13), .hw.init = &(struct clk_init_data){ .name = "gcc_prng_ahb_clk", - .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw }, - .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -2547,9 +2453,6 @@ static struct clk_regmap *gcc_msm8994_clocks[] = { [GPLL0] = &gpll0.clkr, [GPLL4_EARLY] = &gpll4_early.clkr, [GPLL4] = &gpll4.clkr, - [CONFIG_NOC_CLK_SRC] = &config_noc_clk_src.clkr, - [PERIPH_NOC_CLK_SRC] = &periph_noc_clk_src.clkr, - [SYSTEM_NOC_CLK_SRC] = &system_noc_clk_src.clkr, [UFS_AXI_CLK_SRC] = &ufs_axi_clk_src.clkr, [USB30_MASTER_CLK_SRC] = &usb30_master_clk_src.clkr, [BLSP1_QUP1_I2C_APPS_CLK_SRC] = &blsp1_qup1_i2c_apps_clk_src.clkr, @@ -2696,6 +2599,15 @@ static struct clk_regmap *gcc_msm8994_clocks[] = { [USB_SS_PHY_LDO] = &usb_ss_phy_ldo.clkr, [GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr, [GCC_PRNG_AHB_CLK] = &gcc_prng_ahb_clk.clkr, + + /* + * The following clocks should NOT be managed by this driver, but they once were + * mistakengly added. Now they are only here to indicate that they are not defined + * on purpose, even though the names will stay in the header file (for ABI sanity). + */ + [CONFIG_NOC_CLK_SRC] = NULL, + [PERIPH_NOC_CLK_SRC] = NULL, + [SYSTEM_NOC_CLK_SRC] = NULL, }; static struct gdsc *gcc_msm8994_gdscs[] = { diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c index 5c40ca1d4740..1fccb457fcc5 100644 --- a/drivers/clocksource/timer-ti-dm-systimer.c +++ b/drivers/clocksource/timer-ti-dm-systimer.c @@ -241,8 +241,7 @@ static void __init dmtimer_systimer_assign_alwon(void) bool quirk_unreliable_oscillator = false; /* Quirk unreliable 32 KiHz oscillator with incomplete dts */ - if (of_machine_is_compatible("ti,omap3-beagle-ab4") || - of_machine_is_compatible("timll,omap3-devkit8000")) { + if (of_machine_is_compatible("ti,omap3-beagle-ab4")) { quirk_unreliable_oscillator = true; counter_32k = -ENODEV; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b8d95536ee22..80f535cc8a75 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1518,6 +1518,10 @@ static int cpufreq_online(unsigned int cpu) kobject_uevent(&policy->kobj, KOBJ_ADD); + /* Callback for handling stuff after policy is ready */ + if (cpufreq_driver->ready) + cpufreq_driver->ready(policy); + if (cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 05f3d7876e44..effbb680b453 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -388,7 +388,7 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) snprintf(data->irq_name, sizeof(data->irq_name), "dcvsh-irq-%u", policy->cpu); ret = request_threaded_irq(data->throttle_irq, NULL, qcom_lmh_dcvs_handle_irq, - IRQF_ONESHOT, data->irq_name, data); + IRQF_ONESHOT | IRQF_NO_AUTOEN, data->irq_name, data); if (ret) { dev_err(&pdev->dev, "Error registering %s: %d\n", data->irq_name, ret); return 0; @@ -542,6 +542,14 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) return 0; } +static void qcom_cpufreq_ready(struct cpufreq_policy *policy) +{ + struct qcom_cpufreq_data *data = policy->driver_data; + + if (data->throttle_irq >= 0) + enable_irq(data->throttle_irq); +} + static struct freq_attr *qcom_cpufreq_hw_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, &cpufreq_freq_attr_scaling_boost_freqs, @@ -561,6 +569,7 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = { .fast_switch = qcom_cpufreq_hw_fast_switch, .name = "qcom-cpufreq-hw", .attr = qcom_cpufreq_hw_attr, + .ready = qcom_cpufreq_ready, }; static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index a1da2b4b6d73..1476156af74b 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1681,8 +1681,10 @@ static void at_xdmac_tasklet(struct tasklet_struct *t) __func__, atchan->irq_status); if (!(atchan->irq_status & AT_XDMAC_CIS_LIS) && - !(atchan->irq_status & error_mask)) + !(atchan->irq_status & error_mask)) { + spin_unlock_irq(&atchan->lock); return; + } if (atchan->irq_status & error_mask) at_xdmac_handle_error(atchan); diff --git a/drivers/dma/ptdma/ptdma-dev.c b/drivers/dma/ptdma/ptdma-dev.c index 8a6bf291a73f..daafea5bc35d 100644 --- a/drivers/dma/ptdma/ptdma-dev.c +++ b/drivers/dma/ptdma/ptdma-dev.c @@ -207,7 +207,7 @@ int pt_core_init(struct pt_device *pt) if (!cmd_q->qbase) { dev_err(dev, "unable to allocate command queue\n"); ret = -ENOMEM; - goto e_dma_alloc; + goto e_destroy_pool; } cmd_q->qidx = 0; @@ -229,8 +229,10 @@ int pt_core_init(struct pt_device *pt) /* Request an irq */ ret = request_irq(pt->pt_irq, pt_core_irq_handler, 0, dev_name(pt->dev), pt); - if (ret) - goto e_pool; + if (ret) { + dev_err(dev, "unable to allocate an IRQ\n"); + goto e_free_dma; + } /* Update the device registers with queue information. */ cmd_q->qcontrol &= ~CMD_Q_SIZE; @@ -250,21 +252,20 @@ int pt_core_init(struct pt_device *pt) /* Register the DMA engine support */ ret = pt_dmaengine_register(pt); if (ret) - goto e_dmaengine; + goto e_free_irq; /* Set up debugfs entries */ ptdma_debugfs_setup(pt); return 0; -e_dmaengine: +e_free_irq: free_irq(pt->pt_irq, pt); -e_dma_alloc: +e_free_dma: dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, cmd_q->qbase_dma); -e_pool: - dev_err(dev, "unable to allocate an IRQ\n"); +e_destroy_pool: dma_pool_destroy(pt->cmd_q.dma_pool); return ret; diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 481f45c77ce1..13d12d660cc2 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1868,8 +1868,13 @@ static int rcar_dmac_probe(struct platform_device *pdev) dmac->dev = &pdev->dev; platform_set_drvdata(pdev, dmac); - dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); - dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); + ret = dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); + if (ret) + return ret; + + ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; ret = rcar_dmac_parse_of(&pdev->dev, dmac); if (ret < 0) diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index 158e5e7defae..b26ed690f03c 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -115,8 +115,10 @@ static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx) ret = pm_runtime_get(schan->dev); spin_unlock_irq(&schan->chan_lock); - if (ret < 0) + if (ret < 0) { dev_err(schan->dev, "%s(): GET = %d\n", __func__, ret); + pm_runtime_put(schan->dev); + } pm_runtime_barrier(schan->dev); diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c index a42164389ebc..d5d55732adba 100644 --- a/drivers/dma/stm32-dmamux.c +++ b/drivers/dma/stm32-dmamux.c @@ -292,10 +292,12 @@ static int stm32_dmamux_probe(struct platform_device *pdev) ret = of_dma_router_register(node, stm32_dmamux_route_allocate, &stm32_dmamux->dmarouter); if (ret) - goto err_clk; + goto pm_disable; return 0; +pm_disable: + pm_runtime_disable(&pdev->dev); err_clk: clk_disable_unprepare(stm32_dmamux->clk); diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 9d9aabdec96b..f5677d81bd2d 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -215,7 +215,7 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems) else return (char *)ptr; - r = (unsigned long)p % align; + r = (unsigned long)ptr % align; if (r == 0) return (char *)ptr; diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index b406b3f78f46..d76bab3aaac4 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2112,7 +2112,7 @@ static void __exit scmi_driver_exit(void) } module_exit(scmi_driver_exit); -MODULE_ALIAS("platform: arm-scmi"); +MODULE_ALIAS("platform:arm-scmi"); MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>"); MODULE_DESCRIPTION("ARM SCMI protocol driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/firmware/efi/libstub/riscv-stub.c b/drivers/firmware/efi/libstub/riscv-stub.c index 380e4e251399..9c460843442f 100644 --- a/drivers/firmware/efi/libstub/riscv-stub.c +++ b/drivers/firmware/efi/libstub/riscv-stub.c @@ -25,7 +25,7 @@ typedef void __noreturn (*jump_kernel_func)(unsigned int, unsigned long); static u32 hartid; -static u32 get_boot_hartid_from_fdt(void) +static int get_boot_hartid_from_fdt(void) { const void *fdt; int chosen_node, len; @@ -33,23 +33,26 @@ static u32 get_boot_hartid_from_fdt(void) fdt = get_efi_config_table(DEVICE_TREE_GUID); if (!fdt) - return U32_MAX; + return -EINVAL; chosen_node = fdt_path_offset(fdt, "/chosen"); if (chosen_node < 0) - return U32_MAX; + return -EINVAL; prop = fdt_getprop((void *)fdt, chosen_node, "boot-hartid", &len); if (!prop || len != sizeof(u32)) - return U32_MAX; + return -EINVAL; - return fdt32_to_cpu(*prop); + hartid = fdt32_to_cpu(*prop); + return 0; } efi_status_t check_platform_features(void) { - hartid = get_boot_hartid_from_fdt(); - if (hartid == U32_MAX) { + int ret; + + ret = get_boot_hartid_from_fdt(); + if (ret) { efi_err("/chosen/boot-hartid missing or invalid!\n"); return EFI_UNSUPPORTED; } diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index abdc8a6a3963..cae590bd08f2 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -742,6 +742,7 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, { const struct efivar_operations *ops; efi_status_t status; + unsigned long varsize; if (!__efivars) return -EINVAL; @@ -764,15 +765,17 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, return efivar_entry_set_nonblocking(name, vendor, attributes, size, data); + varsize = size + ucs2_strsize(name, 1024); if (!block) { if (down_trylock(&efivars_lock)) return -EBUSY; + status = check_var_size_nonblocking(attributes, varsize); } else { if (down_interruptible(&efivars_lock)) return -EINTR; + status = check_var_size(attributes, varsize); } - status = check_var_size(attributes, size + ucs2_strsize(name, 1024)); if (status != EFI_SUCCESS) { up(&efivars_lock); return -ENOSPC; diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index 4a55cdf089d6..e00c33310517 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -163,15 +163,13 @@ exit_destroy: return ret; } -static int gen_74x164_remove(struct spi_device *spi) +static void gen_74x164_remove(struct spi_device *spi) { struct gen_74x164_chip *chip = spi_get_drvdata(spi); gpiod_set_value_cansleep(chip->gpiod_oe, 0); gpiochip_remove(&chip->gpio_chip); mutex_destroy(&chip->lock); - - return 0; } static const struct spi_device_id gen_74x164_spi_ids[] = { diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c index 51cd6f98d1c7..161c4751c5f7 100644 --- a/drivers/gpio/gpio-max3191x.c +++ b/drivers/gpio/gpio-max3191x.c @@ -443,14 +443,12 @@ static int max3191x_probe(struct spi_device *spi) return 0; } -static int max3191x_remove(struct spi_device *spi) +static void max3191x_remove(struct spi_device *spi) { struct max3191x_chip *max3191x = spi_get_drvdata(spi); gpiochip_remove(&max3191x->gpio); mutex_destroy(&max3191x->lock); - - return 0; } static int __init max3191x_register_driver(struct spi_driver *sdrv) diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c index 5862d73bf325..11813f41d460 100644 --- a/drivers/gpio/gpio-max7301.c +++ b/drivers/gpio/gpio-max7301.c @@ -64,11 +64,9 @@ static int max7301_probe(struct spi_device *spi) return ret; } -static int max7301_remove(struct spi_device *spi) +static void max7301_remove(struct spi_device *spi) { __max730x_remove(&spi->dev); - - return 0; } static const struct spi_device_id max7301_id[] = { diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c index 31d2be1bebc8..cd9b16dbe1a9 100644 --- a/drivers/gpio/gpio-mc33880.c +++ b/drivers/gpio/gpio-mc33880.c @@ -134,7 +134,7 @@ exit_destroy: return ret; } -static int mc33880_remove(struct spi_device *spi) +static void mc33880_remove(struct spi_device *spi) { struct mc33880 *mc; @@ -142,8 +142,6 @@ static int mc33880_remove(struct spi_device *spi) gpiochip_remove(&mc->chip); mutex_destroy(&mc->lock); - - return 0; } static struct spi_driver mc33880_driver = { diff --git a/drivers/gpio/gpio-pisosr.c b/drivers/gpio/gpio-pisosr.c index 8e04054cf07e..81a47ae09ff8 100644 --- a/drivers/gpio/gpio-pisosr.c +++ b/drivers/gpio/gpio-pisosr.c @@ -163,15 +163,13 @@ static int pisosr_gpio_probe(struct spi_device *spi) return 0; } -static int pisosr_gpio_remove(struct spi_device *spi) +static void pisosr_gpio_remove(struct spi_device *spi) { struct pisosr_gpio *gpio = spi_get_drvdata(spi); gpiochip_remove(&gpio->chip); mutex_destroy(&gpio->lock); - - return 0; } static const struct spi_device_id pisosr_gpio_id_table[] = { diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index a4c4e4584f5b..099e358d2491 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -410,10 +410,8 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type) level = rockchip_gpio_readl(bank, bank->gpio_regs->int_type); polarity = rockchip_gpio_readl(bank, bank->gpio_regs->int_polarity); - switch (type) { - case IRQ_TYPE_EDGE_BOTH: + if (type == IRQ_TYPE_EDGE_BOTH) { if (bank->gpio_type == GPIO_TYPE_V2) { - bank->toggle_edge_mode &= ~mask; rockchip_gpio_writel_bit(bank, d->hwirq, 1, bank->gpio_regs->int_bothedge); goto out; @@ -431,30 +429,34 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type) else polarity |= mask; } - break; - case IRQ_TYPE_EDGE_RISING: - bank->toggle_edge_mode &= ~mask; - level |= mask; - polarity |= mask; - break; - case IRQ_TYPE_EDGE_FALLING: - bank->toggle_edge_mode &= ~mask; - level |= mask; - polarity &= ~mask; - break; - case IRQ_TYPE_LEVEL_HIGH: - bank->toggle_edge_mode &= ~mask; - level &= ~mask; - polarity |= mask; - break; - case IRQ_TYPE_LEVEL_LOW: - bank->toggle_edge_mode &= ~mask; - level &= ~mask; - polarity &= ~mask; - break; - default: - ret = -EINVAL; - goto out; + } else { + if (bank->gpio_type == GPIO_TYPE_V2) { + rockchip_gpio_writel_bit(bank, d->hwirq, 0, + bank->gpio_regs->int_bothedge); + } else { + bank->toggle_edge_mode &= ~mask; + } + switch (type) { + case IRQ_TYPE_EDGE_RISING: + level |= mask; + polarity |= mask; + break; + case IRQ_TYPE_EDGE_FALLING: + level |= mask; + polarity &= ~mask; + break; + case IRQ_TYPE_LEVEL_HIGH: + level &= ~mask; + polarity |= mask; + break; + case IRQ_TYPE_LEVEL_LOW: + level &= ~mask; + polarity &= ~mask; + break; + default: + ret = -EINVAL; + goto out; + } } rockchip_gpio_writel(bank, level, bank->gpio_regs->int_type); diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index 34b36a8c035f..8d298beffd86 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -343,9 +343,12 @@ static int tegra186_gpio_of_xlate(struct gpio_chip *chip, return offset + pin; } +#define to_tegra_gpio(x) container_of((x), struct tegra_gpio, gpio) + static void tegra186_irq_ack(struct irq_data *data) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; base = tegra186_gpio_get_base(gpio, data->hwirq); @@ -357,7 +360,8 @@ static void tegra186_irq_ack(struct irq_data *data) static void tegra186_irq_mask(struct irq_data *data) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; u32 value; @@ -372,7 +376,8 @@ static void tegra186_irq_mask(struct irq_data *data) static void tegra186_irq_unmask(struct irq_data *data) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; u32 value; @@ -387,7 +392,8 @@ static void tegra186_irq_unmask(struct irq_data *data) static int tegra186_irq_set_type(struct irq_data *data, unsigned int type) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; u32 value; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 3859911b61e9..a3d14277f17c 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3147,6 +3147,16 @@ int gpiod_to_irq(const struct gpio_desc *desc) return retirq; } +#ifdef CONFIG_GPIOLIB_IRQCHIP + if (gc->irq.chip) { + /* + * Avoid race condition with other code, which tries to lookup + * an IRQ before the irqchip has been properly registered, + * i.e. while gpiochip is still being brought up. + */ + return -EPROBE_DEFER; + } +#endif return -ENXIO; } EXPORT_SYMBOL_GPL(gpiod_to_irq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 82011e75ed85..c4387b38229c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1141,7 +1141,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev, if (ret) return ret; - if (!dev->mode_config.allow_fb_modifiers) { + if (!dev->mode_config.allow_fb_modifiers && !adev->enable_virtual_display) { drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI, "GFX9+ requires FB check based on format modifier\n"); ret = check_tiling_flags_gfx6(rfb); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 63a089992645..0ead08ba58c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2011,6 +2011,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENODEV; } + if (amdgpu_aspm == -1 && !pcie_aspm_enabled(pdev)) + amdgpu_aspm = 0; + if (amdgpu_virtual_display || amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK)) supports_atomic = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index d99c8779b51e..5224d9a39737 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -391,7 +391,6 @@ static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev, int index) { struct drm_plane *plane; - uint64_t modifiers[] = {DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID}; int ret; plane = kzalloc(sizeof(*plane), GFP_KERNEL); @@ -402,7 +401,7 @@ static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev, &amdgpu_vkms_plane_funcs, amdgpu_vkms_formats, ARRAY_SIZE(amdgpu_vkms_formats), - modifiers, type, NULL); + NULL, type, NULL); if (ret) { kfree(plane); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b37fc7d7d2c7..d62190b3dd9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -768,11 +768,16 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, * Check if all VM PDs/PTs are ready for updates * * Returns: - * True if eviction list is empty. + * True if VM is not evicting. */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { - return list_empty(&vm->evicted); + bool ret; + + amdgpu_vm_eviction_lock(vm); + ret = !vm->evicting; + amdgpu_vm_eviction_unlock(vm); + return ret; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index e8e4749e9c79..f0638db57111 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2057,6 +2057,10 @@ static int sdma_v4_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* SMU saves SDMA state for us */ + if (adev->in_s0ix) + return 0; + return sdma_v4_0_hw_fini(adev); } @@ -2064,6 +2068,10 @@ static int sdma_v4_0_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* SMU restores SDMA state for us */ + if (adev->in_s0ix) + return 0; + return sdma_v4_0_hw_init(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 0fc1747e4a70..12f80fdc1fbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -619,8 +619,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev) static int soc15_asic_reset(struct amdgpu_device *adev) { /* original raven doesn't have full asic reset */ - if ((adev->apu_flags & AMD_APU_IS_RAVEN) && - !(adev->apu_flags & AMD_APU_IS_RAVEN2)) + if ((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) return 0; switch (soc15_asic_reset_method(adev)) { @@ -1114,8 +1114,11 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; + /* + * MMHUB PG needs to be disabled for Picasso for + * stability reasons. + */ adev->pg_flags = AMD_PG_SUPPORT_SDMA | - AMD_PG_SUPPORT_MMHUB | AMD_PG_SUPPORT_VCN; } else { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7c1c623ba799..075429bea427 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4256,6 +4256,9 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } #endif + /* Disable vblank IRQs aggressively for power-saving. */ + adev_to_drm(adev)->vblank_disable_immediate = true; + /* loops over all connectors on the board */ for (i = 0; i < link_cnt; i++) { struct dc_link *link = NULL; @@ -4301,19 +4304,17 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) update_connector_ext_caps(aconnector); if (psr_feature_enabled) amdgpu_dm_set_psr_caps(link); + + /* TODO: Fix vblank control helpers to delay PSR entry to allow this when + * PSR is also supported. + */ + if (link->psr_settings.psr_feature_enabled) + adev_to_drm(adev)->vblank_disable_immediate = false; } } - /* - * Disable vblank IRQs aggressively for power-saving. - * - * TODO: Fix vblank control helpers to delay PSR entry to allow this when PSR - * is also supported. - */ - adev_to_drm(adev)->vblank_disable_immediate = !psr_feature_enabled; - /* Software is initialized. Now we can register interrupt handlers. */ switch (adev->asic_type) { #if defined(CONFIG_DRM_AMD_DC_SI) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index f977f29907df..10c7be40dfb0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -473,8 +473,10 @@ static void dcn3_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) clk_mgr_base->bw_params->dc_mode_softmax_memclk = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK); /* Refresh bounding box */ + DC_FP_START(); clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box( clk_mgr->base.ctx->dc, clk_mgr_base->bw_params); + DC_FP_END(); } static bool dcn3_is_smu_present(struct clk_mgr *clk_mgr_base) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index d18e9f3ea998..ba1aa994db4b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -985,10 +985,13 @@ static bool dc_construct(struct dc *dc, goto fail; #ifdef CONFIG_DRM_AMD_DC_DCN dc->clk_mgr->force_smu_not_present = init_params->force_smu_not_present; -#endif - if (dc->res_pool->funcs->update_bw_bounding_box) + if (dc->res_pool->funcs->update_bw_bounding_box) { + DC_FP_START(); dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); + DC_FP_END(); + } +#endif /* Creation of current_state must occur after dc->dml * is initialized in dc_create_resource_pool because diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index b3912ff9dc91..18757c158523 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1964,10 +1964,6 @@ enum dc_status dc_remove_stream_from_ctx( dc->res_pool, del_pipe->stream_res.stream_enc, false); - /* Release link encoder from stream in new dc_state. */ - if (dc->res_pool->funcs->link_enc_unassign) - dc->res_pool->funcs->link_enc_unassign(new_ctx, del_pipe->stream); - #if defined(CONFIG_DRM_AMD_DC_DCN) if (is_dp_128b_132b_signal(del_pipe)) { update_hpo_dp_stream_engine_usage( diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index a4207293158c..5488a0edb942 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -421,6 +421,36 @@ static int sienna_cichlid_store_powerplay_table(struct smu_context *smu) return 0; } +static int sienna_cichlid_patch_pptable_quirk(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t *board_reserved; + uint16_t *freq_table_gfx; + uint32_t i; + + /* Fix some OEM SKU specific stability issues */ + GET_PPTABLE_MEMBER(BoardReserved, &board_reserved); + if ((adev->pdev->device == 0x73DF) && + (adev->pdev->revision == 0XC3) && + (adev->pdev->subsystem_device == 0x16C2) && + (adev->pdev->subsystem_vendor == 0x1043)) + board_reserved[0] = 1387; + + GET_PPTABLE_MEMBER(FreqTableGfx, &freq_table_gfx); + if ((adev->pdev->device == 0x73DF) && + (adev->pdev->revision == 0XC3) && + ((adev->pdev->subsystem_device == 0x16C2) || + (adev->pdev->subsystem_device == 0x133C)) && + (adev->pdev->subsystem_vendor == 0x1043)) { + for (i = 0; i < NUM_GFXCLK_DPM_LEVELS; i++) { + if (freq_table_gfx[i] > 2500) + freq_table_gfx[i] = 2500; + } + } + + return 0; +} + static int sienna_cichlid_setup_pptable(struct smu_context *smu) { int ret = 0; @@ -441,7 +471,7 @@ static int sienna_cichlid_setup_pptable(struct smu_context *smu) if (ret) return ret; - return ret; + return sienna_cichlid_patch_pptable_quirk(smu); } static int sienna_cichlid_tables_init(struct smu_context *smu) @@ -1238,21 +1268,37 @@ static int sienna_cichlid_populate_umd_state_clk(struct smu_context *smu) &dpm_context->dpm_tables.soc_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; + struct amdgpu_device *adev = smu->adev; pstate_table->gfxclk_pstate.min = gfx_table->min; pstate_table->gfxclk_pstate.peak = gfx_table->max; - if (gfx_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK) - pstate_table->gfxclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK; pstate_table->uclk_pstate.min = mem_table->min; pstate_table->uclk_pstate.peak = mem_table->max; - if (mem_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK) - pstate_table->uclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK; pstate_table->socclk_pstate.min = soc_table->min; pstate_table->socclk_pstate.peak = soc_table->max; - if (soc_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK) + + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + pstate_table->gfxclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK; + pstate_table->uclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK; pstate_table->socclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK; + break; + case CHIP_DIMGREY_CAVEFISH: + pstate_table->gfxclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_GFXCLK; + pstate_table->uclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_MEMCLK; + pstate_table->socclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_SOCCLK; + break; + case CHIP_BEIGE_GOBY: + pstate_table->gfxclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_GFXCLK; + pstate_table->uclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_MEMCLK; + pstate_table->socclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_SOCCLK; + break; + default: + break; + } return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h index 38cd0ece24f6..42f705c7a36f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h @@ -33,6 +33,14 @@ typedef enum { #define SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK 960 #define SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK 1000 +#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_GFXCLK 1950 +#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_SOCCLK 960 +#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_MEMCLK 676 + +#define BEIGE_GOBY_UMD_PSTATE_PROFILING_GFXCLK 2200 +#define BEIGE_GOBY_UMD_PSTATE_PROFILING_SOCCLK 960 +#define BEIGE_GOBY_UMD_PSTATE_PROFILING_MEMCLK 1000 + extern void sienna_cichlid_set_ppt_funcs(struct smu_context *smu); #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index caf1775d48ef..0bc84b709a93 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -282,14 +282,9 @@ static int yellow_carp_post_smu_init(struct smu_context *smu) static int yellow_carp_mode_reset(struct smu_context *smu, int type) { - int ret = 0, index = 0; - - index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG, - SMU_MSG_GfxDeviceDriverReset); - if (index < 0) - return index == -EACCES ? 0 : index; + int ret = 0; - ret = smu_cmn_send_smc_msg_with_param(smu, (uint16_t)index, type, NULL); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, type, NULL); if (ret) dev_err(smu->adev->dev, "Failed to mode reset!\n"); diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 9781722519c3..54d62fdb4ef9 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -76,15 +76,17 @@ int drm_atomic_set_mode_for_crtc(struct drm_crtc_state *state, state->mode_blob = NULL; if (mode) { + struct drm_property_blob *blob; + drm_mode_convert_to_umode(&umode, mode); - state->mode_blob = - drm_property_create_blob(state->crtc->dev, - sizeof(umode), - &umode); - if (IS_ERR(state->mode_blob)) - return PTR_ERR(state->mode_blob); + blob = drm_property_create_blob(crtc->dev, + sizeof(umode), &umode); + if (IS_ERR(blob)) + return PTR_ERR(blob); drm_mode_copy(&state->mode, mode); + + state->mode_blob = blob; state->enable = true; drm_dbg_atomic(crtc->dev, "Set [MODE:%s] for [CRTC:%d:%s] state %p\n", diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 12893e7be89b..f5f5de362ff2 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5345,6 +5345,7 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi if (!(edid->input & DRM_EDID_INPUT_DIGITAL)) return quirks; + info->color_formats |= DRM_COLOR_FORMAT_RGB444; drm_parse_cea_ext(connector, edid); /* @@ -5393,7 +5394,6 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi DRM_DEBUG("%s: Assigning EDID-1.4 digital sink color depth as %d bpc.\n", connector->name, info->bpc); - info->color_formats |= DRM_COLOR_FORMAT_RGB444; if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB444) info->color_formats |= DRM_COLOR_FORMAT_YCRCB444; if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422) diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index cefd0cbf9deb..dc275c466c9c 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -512,6 +512,7 @@ int drm_gem_cma_mmap(struct drm_gem_cma_object *cma_obj, struct vm_area_struct * */ vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node); vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_DONTEXPAND; if (cma_obj->map_noncoherent) { vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index a4c94dc2e216..cfd932514da2 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -101,6 +101,7 @@ config DRM_I915_USERPTR config DRM_I915_GVT bool "Enable Intel GVT-g graphics virtualization host support" depends on DRM_I915 + depends on X86 depends on 64BIT default n help diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 2da4aacc956b..8ac196e814d5 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -825,6 +825,7 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) unsigned int max_bw_point = 0, max_bw = 0; unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points; unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points; + bool changed = false; u32 mask = 0; /* FIXME earlier gens need some checks too */ @@ -868,6 +869,8 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) new_bw_state->data_rate[crtc->pipe] = new_data_rate; new_bw_state->num_active_planes[crtc->pipe] = new_active_planes; + changed = true; + drm_dbg_kms(&dev_priv->drm, "pipe %c data rate %u num active planes %u\n", pipe_name(crtc->pipe), @@ -875,7 +878,19 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) new_bw_state->num_active_planes[crtc->pipe]); } - if (!new_bw_state) + old_bw_state = intel_atomic_get_old_bw_state(state); + new_bw_state = intel_atomic_get_new_bw_state(state); + + if (new_bw_state && + intel_can_enable_sagv(dev_priv, old_bw_state) != + intel_can_enable_sagv(dev_priv, new_bw_state)) + changed = true; + + /* + * If none of our inputs (data rates, number of active + * planes, SAGV yes/no) changed then nothing to do here. + */ + if (!changed) return 0; ret = intel_atomic_lock_global_state(&new_bw_state->base); @@ -961,7 +976,6 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) */ new_bw_state->qgv_points_mask = ~allowed_points & mask; - old_bw_state = intel_atomic_get_old_bw_state(state); /* * If the actual mask had changed we need to make sure that * the commits are serialized(in case this is a nomodeset, nonblocking) diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index 46c6eecbd917..0ceaed1c9656 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -30,19 +30,19 @@ struct intel_bw_state { */ u8 pipe_sagv_reject; + /* bitmask of active pipes */ + u8 active_pipes; + /* * Current QGV points mask, which restricts * some particular SAGV states, not to confuse * with pipe_sagv_mask. */ - u8 qgv_points_mask; + u16 qgv_points_mask; unsigned int data_rate[I915_MAX_PIPES]; u8 num_active_planes[I915_MAX_PIPES]; - /* bitmask of active pipes */ - u8 active_pipes; - int min_cdclk; }; diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 160fd2bdafe5..957feeccff3f 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1115,7 +1115,8 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, /* Wa_22010751166: icl, ehl, tgl, dg1, rkl */ if (DISPLAY_VER(i915) >= 11 && - (plane_state->view.color_plane[0].y + drm_rect_height(&plane_state->uapi.src)) & 3) { + (plane_state->view.color_plane[0].y + + (drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) { plane_state->no_fbc_reason = "plane end Y offset misaligned"; return false; } diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 0065111593a6..4a2662838cd8 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -360,6 +360,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, port++; } + /* + * The port numbering and mapping here is bizarre. The now-obsolete + * swsci spec supports ports numbered [0..4]. Port E is handled as a + * special case, but port F and beyond are not. The functionality is + * supposed to be obsolete for new platforms. Just bail out if the port + * number is out of bounds after mapping. + */ + if (port > 4) { + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", + intel_encoder->base.base.id, intel_encoder->base.name, + port_name(intel_encoder->port), port); + return -EINVAL; + } + if (!enable) parm |= 4 << 8; diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c index 09f405e4d363..92ff654f54f5 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_phy.c +++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c @@ -34,7 +34,7 @@ void intel_snps_phy_wait_for_calibration(struct drm_i915_private *dev_priv) if (intel_de_wait_for_clear(dev_priv, ICL_PHY_MISC(phy), DG2_PHY_DP_TX_ACK_MASK, 25)) DRM_ERROR("SNPS PHY %c failed to calibrate after 25ms.\n", - phy); + phy_name(phy)); } } diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index dbd7d0d83a14..7784c30fe893 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -691,6 +691,8 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); struct intel_encoder *encoder = &dig_port->base; + intel_wakeref_t tc_cold_wref; + enum intel_display_power_domain domain; int active_links = 0; mutex_lock(&dig_port->tc_lock); @@ -702,12 +704,11 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port) drm_WARN_ON(&i915->drm, dig_port->tc_mode != TC_PORT_DISCONNECTED); drm_WARN_ON(&i915->drm, dig_port->tc_lock_wakeref); - if (active_links) { - enum intel_display_power_domain domain; - intel_wakeref_t tc_cold_wref = tc_cold_block(dig_port, &domain); - dig_port->tc_mode = intel_tc_port_get_current_mode(dig_port); + tc_cold_wref = tc_cold_block(dig_port, &domain); + dig_port->tc_mode = intel_tc_port_get_current_mode(dig_port); + if (active_links) { if (!icl_tc_phy_is_connected(dig_port)) drm_dbg_kms(&i915->drm, "Port %s: PHY disconnected with %d active link(s)\n", @@ -716,10 +717,23 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port) dig_port->tc_lock_wakeref = tc_cold_block(dig_port, &dig_port->tc_lock_power_domain); - - tc_cold_unblock(dig_port, domain, tc_cold_wref); + } else { + /* + * TBT-alt is the default mode in any case the PHY ownership is not + * held (regardless of the sink's connected live state), so + * we'll just switch to disconnected mode from it here without + * a note. + */ + if (dig_port->tc_mode != TC_PORT_TBT_ALT) + drm_dbg_kms(&i915->drm, + "Port %s: PHY left in %s mode on disabled port, disconnecting it\n", + dig_port->tc_port_name, + tc_port_mode_name(dig_port->tc_mode)); + icl_tc_phy_disconnect(dig_port); } + tc_cold_unblock(dig_port, domain, tc_cold_wref); + drm_dbg_kms(&i915->drm, "Port %s: sanitize mode (%s)\n", dig_port->tc_port_name, tc_port_mode_name(dig_port->tc_mode)); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index de3fe79b665a..1f880c8c66e7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -842,11 +842,9 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) } else if (obj->mm.madv != I915_MADV_WILLNEED) { bo->priority = I915_TTM_PRIO_PURGE; } else if (!i915_gem_object_has_pages(obj)) { - if (bo->priority < I915_TTM_PRIO_HAS_PAGES) - bo->priority = I915_TTM_PRIO_HAS_PAGES; + bo->priority = I915_TTM_PRIO_NO_PAGES; } else { - if (bo->priority > I915_TTM_PRIO_NO_PAGES) - bo->priority = I915_TTM_PRIO_NO_PAGES; + bo->priority = I915_TTM_PRIO_HAS_PAGES; } ttm_bo_move_to_lru_tail(bo, bo->resource, NULL); diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 99d1781fa5f0..af79b39048f7 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1148,7 +1148,7 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, ops->set_pfn(se, s->shadow_page.mfn); } -/** +/* * Check if can do 2M page * @vgpu: target vgpu * @entry: target pfn's gtt entry @@ -2193,7 +2193,7 @@ static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, } /** - * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read + * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read * @vgpu: a vGPU * @off: register offset * @p_data: data will be returned to guest diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3edba7fd0c49..fae4f7818d28 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4029,6 +4029,17 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) return ret; } + if (intel_can_enable_sagv(dev_priv, new_bw_state) != + intel_can_enable_sagv(dev_priv, old_bw_state)) { + ret = intel_atomic_serialize_global_state(&new_bw_state->base); + if (ret) + return ret; + } else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) { + ret = intel_atomic_lock_global_state(&new_bw_state->base); + if (ret) + return ret; + } + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { struct skl_pipe_wm *pipe_wm = &new_crtc_state->wm.skl.optimal; @@ -4044,17 +4055,6 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) intel_can_enable_sagv(dev_priv, new_bw_state); } - if (intel_can_enable_sagv(dev_priv, new_bw_state) != - intel_can_enable_sagv(dev_priv, old_bw_state)) { - ret = intel_atomic_serialize_global_state(&new_bw_state->base); - if (ret) - return ret; - } else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) { - ret = intel_atomic_lock_global_state(&new_bw_state->base); - if (ret) - return ret; - } - return 0; } @@ -4853,7 +4853,7 @@ static bool check_mbus_joined(u8 active_pipes, { int i; - for (i = 0; i < dbuf_slices[i].active_pipes; i++) { + for (i = 0; dbuf_slices[i].active_pipes != 0; i++) { if (dbuf_slices[i].active_pipes == active_pipes) return dbuf_slices[i].join_mbus; } @@ -4870,7 +4870,7 @@ static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus, { int i; - for (i = 0; i < dbuf_slices[i].active_pipes; i++) { + for (i = 0; dbuf_slices[i].active_pipes != 0; i++) { if (dbuf_slices[i].active_pipes == active_pipes && dbuf_slices[i].join_mbus == join_mbus) return dbuf_slices[i].dbuf_mask[pipe]; diff --git a/drivers/gpu/drm/imx/dcss/Kconfig b/drivers/gpu/drm/imx/dcss/Kconfig index 7374f1952762..5c2b2277afbf 100644 --- a/drivers/gpu/drm/imx/dcss/Kconfig +++ b/drivers/gpu/drm/imx/dcss/Kconfig @@ -2,6 +2,7 @@ config DRM_IMX_DCSS tristate "i.MX8MQ DCSS" select IMX_IRQSTEER select DRM_KMS_HELPER + select DRM_GEM_CMA_HELPER select VIDEOMODE_HELPERS depends on DRM && ARCH_MXC && ARM64 help diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 5d90d2eb0019..bced4c7d668e 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -786,18 +786,101 @@ void mtk_dsi_ddp_stop(struct device *dev) mtk_dsi_poweroff(dsi); } +static int mtk_dsi_encoder_init(struct drm_device *drm, struct mtk_dsi *dsi) +{ + int ret; + + ret = drm_simple_encoder_init(drm, &dsi->encoder, + DRM_MODE_ENCODER_DSI); + if (ret) { + DRM_ERROR("Failed to encoder init to drm\n"); + return ret; + } + + dsi->encoder.possible_crtcs = mtk_drm_find_possible_crtc_by_comp(drm, dsi->host.dev); + + ret = drm_bridge_attach(&dsi->encoder, &dsi->bridge, NULL, + DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (ret) + goto err_cleanup_encoder; + + dsi->connector = drm_bridge_connector_init(drm, &dsi->encoder); + if (IS_ERR(dsi->connector)) { + DRM_ERROR("Unable to create bridge connector\n"); + ret = PTR_ERR(dsi->connector); + goto err_cleanup_encoder; + } + drm_connector_attach_encoder(dsi->connector, &dsi->encoder); + + return 0; + +err_cleanup_encoder: + drm_encoder_cleanup(&dsi->encoder); + return ret; +} + +static int mtk_dsi_bind(struct device *dev, struct device *master, void *data) +{ + int ret; + struct drm_device *drm = data; + struct mtk_dsi *dsi = dev_get_drvdata(dev); + + ret = mtk_dsi_encoder_init(drm, dsi); + if (ret) + return ret; + + return device_reset_optional(dev); +} + +static void mtk_dsi_unbind(struct device *dev, struct device *master, + void *data) +{ + struct mtk_dsi *dsi = dev_get_drvdata(dev); + + drm_encoder_cleanup(&dsi->encoder); +} + +static const struct component_ops mtk_dsi_component_ops = { + .bind = mtk_dsi_bind, + .unbind = mtk_dsi_unbind, +}; + static int mtk_dsi_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { struct mtk_dsi *dsi = host_to_dsi(host); + struct device *dev = host->dev; + int ret; dsi->lanes = device->lanes; dsi->format = device->format; dsi->mode_flags = device->mode_flags; + dsi->next_bridge = devm_drm_of_get_bridge(dev, dev->of_node, 0, 0); + if (IS_ERR(dsi->next_bridge)) + return PTR_ERR(dsi->next_bridge); + + drm_bridge_add(&dsi->bridge); + + ret = component_add(host->dev, &mtk_dsi_component_ops); + if (ret) { + DRM_ERROR("failed to add dsi_host component: %d\n", ret); + drm_bridge_remove(&dsi->bridge); + return ret; + } return 0; } +static int mtk_dsi_host_detach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct mtk_dsi *dsi = host_to_dsi(host); + + component_del(host->dev, &mtk_dsi_component_ops); + drm_bridge_remove(&dsi->bridge); + return 0; +} + static void mtk_dsi_wait_for_idle(struct mtk_dsi *dsi) { int ret; @@ -938,73 +1021,14 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host, static const struct mipi_dsi_host_ops mtk_dsi_ops = { .attach = mtk_dsi_host_attach, + .detach = mtk_dsi_host_detach, .transfer = mtk_dsi_host_transfer, }; -static int mtk_dsi_encoder_init(struct drm_device *drm, struct mtk_dsi *dsi) -{ - int ret; - - ret = drm_simple_encoder_init(drm, &dsi->encoder, - DRM_MODE_ENCODER_DSI); - if (ret) { - DRM_ERROR("Failed to encoder init to drm\n"); - return ret; - } - - dsi->encoder.possible_crtcs = mtk_drm_find_possible_crtc_by_comp(drm, dsi->host.dev); - - ret = drm_bridge_attach(&dsi->encoder, &dsi->bridge, NULL, - DRM_BRIDGE_ATTACH_NO_CONNECTOR); - if (ret) - goto err_cleanup_encoder; - - dsi->connector = drm_bridge_connector_init(drm, &dsi->encoder); - if (IS_ERR(dsi->connector)) { - DRM_ERROR("Unable to create bridge connector\n"); - ret = PTR_ERR(dsi->connector); - goto err_cleanup_encoder; - } - drm_connector_attach_encoder(dsi->connector, &dsi->encoder); - - return 0; - -err_cleanup_encoder: - drm_encoder_cleanup(&dsi->encoder); - return ret; -} - -static int mtk_dsi_bind(struct device *dev, struct device *master, void *data) -{ - int ret; - struct drm_device *drm = data; - struct mtk_dsi *dsi = dev_get_drvdata(dev); - - ret = mtk_dsi_encoder_init(drm, dsi); - if (ret) - return ret; - - return device_reset_optional(dev); -} - -static void mtk_dsi_unbind(struct device *dev, struct device *master, - void *data) -{ - struct mtk_dsi *dsi = dev_get_drvdata(dev); - - drm_encoder_cleanup(&dsi->encoder); -} - -static const struct component_ops mtk_dsi_component_ops = { - .bind = mtk_dsi_bind, - .unbind = mtk_dsi_unbind, -}; - static int mtk_dsi_probe(struct platform_device *pdev) { struct mtk_dsi *dsi; struct device *dev = &pdev->dev; - struct drm_panel *panel; struct resource *regs; int irq_num; int ret; @@ -1021,19 +1045,6 @@ static int mtk_dsi_probe(struct platform_device *pdev) return ret; } - ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0, - &panel, &dsi->next_bridge); - if (ret) - goto err_unregister_host; - - if (panel) { - dsi->next_bridge = devm_drm_panel_bridge_add(dev, panel); - if (IS_ERR(dsi->next_bridge)) { - ret = PTR_ERR(dsi->next_bridge); - goto err_unregister_host; - } - } - dsi->driver_data = of_device_get_match_data(dev); dsi->engine_clk = devm_clk_get(dev, "engine"); @@ -1098,14 +1109,6 @@ static int mtk_dsi_probe(struct platform_device *pdev) dsi->bridge.of_node = dev->of_node; dsi->bridge.type = DRM_MODE_CONNECTOR_DSI; - drm_bridge_add(&dsi->bridge); - - ret = component_add(&pdev->dev, &mtk_dsi_component_ops); - if (ret) { - dev_err(&pdev->dev, "failed to add component: %d\n", ret); - goto err_unregister_host; - } - return 0; err_unregister_host: @@ -1118,8 +1121,6 @@ static int mtk_dsi_remove(struct platform_device *pdev) struct mtk_dsi *dsi = platform_get_drvdata(pdev); mtk_output_dsi_disable(dsi); - drm_bridge_remove(&dsi->bridge); - component_del(&pdev->dev, &mtk_dsi_component_ops); mipi_dsi_host_unregister(&dsi->host); return 0; diff --git a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c index f043b484055b..ed626fdc08e8 100644 --- a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c +++ b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c @@ -293,15 +293,13 @@ static int y030xx067a_probe(struct spi_device *spi) return 0; } -static int y030xx067a_remove(struct spi_device *spi) +static void y030xx067a_remove(struct spi_device *spi) { struct y030xx067a *priv = spi_get_drvdata(spi); drm_panel_remove(&priv->panel); drm_panel_disable(&priv->panel); drm_panel_unprepare(&priv->panel); - - return 0; } static const struct drm_display_mode y030xx067a_modes[] = { diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c index 8e84df9a0033..3dfafa585127 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c @@ -896,14 +896,12 @@ static int ili9322_probe(struct spi_device *spi) return 0; } -static int ili9322_remove(struct spi_device *spi) +static void ili9322_remove(struct spi_device *spi) { struct ili9322 *ili = spi_get_drvdata(spi); ili9322_power_off(ili); drm_panel_remove(&ili->panel); - - return 0; } /* diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c index 2c3378a259b1..a07ef26234e5 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -728,7 +728,7 @@ static int ili9341_probe(struct spi_device *spi) return -1; } -static int ili9341_remove(struct spi_device *spi) +static void ili9341_remove(struct spi_device *spi) { const struct spi_device_id *id = spi_get_device_id(spi); struct ili9341 *ili = spi_get_drvdata(spi); @@ -741,7 +741,6 @@ static int ili9341_remove(struct spi_device *spi) drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); } - return 0; } static void ili9341_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/panel/panel-innolux-ej030na.c b/drivers/gpu/drm/panel/panel-innolux-ej030na.c index c558de3f99be..e3b1daa0cb72 100644 --- a/drivers/gpu/drm/panel/panel-innolux-ej030na.c +++ b/drivers/gpu/drm/panel/panel-innolux-ej030na.c @@ -219,15 +219,13 @@ static int ej030na_probe(struct spi_device *spi) return 0; } -static int ej030na_remove(struct spi_device *spi) +static void ej030na_remove(struct spi_device *spi) { struct ej030na *priv = spi_get_drvdata(spi); drm_panel_remove(&priv->panel); drm_panel_disable(&priv->panel); drm_panel_unprepare(&priv->panel); - - return 0; } static const struct drm_display_mode ej030na_modes[] = { diff --git a/drivers/gpu/drm/panel/panel-lg-lb035q02.c b/drivers/gpu/drm/panel/panel-lg-lb035q02.c index f3183b68704f..9d0d4faa3f58 100644 --- a/drivers/gpu/drm/panel/panel-lg-lb035q02.c +++ b/drivers/gpu/drm/panel/panel-lg-lb035q02.c @@ -203,14 +203,12 @@ static int lb035q02_probe(struct spi_device *spi) return 0; } -static int lb035q02_remove(struct spi_device *spi) +static void lb035q02_remove(struct spi_device *spi) { struct lb035q02_device *lcd = spi_get_drvdata(spi); drm_panel_remove(&lcd->panel); drm_panel_disable(&lcd->panel); - - return 0; } static const struct of_device_id lb035q02_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-lg-lg4573.c b/drivers/gpu/drm/panel/panel-lg-lg4573.c index 8e5160af1de5..cf246d15b7b6 100644 --- a/drivers/gpu/drm/panel/panel-lg-lg4573.c +++ b/drivers/gpu/drm/panel/panel-lg-lg4573.c @@ -266,14 +266,12 @@ static int lg4573_probe(struct spi_device *spi) return 0; } -static int lg4573_remove(struct spi_device *spi) +static void lg4573_remove(struct spi_device *spi) { struct lg4573 *ctx = spi_get_drvdata(spi); lg4573_display_off(ctx); drm_panel_remove(&ctx->panel); - - return 0; } static const struct of_device_id lg4573_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c index 6e5ab1debc8b..81c5c541a351 100644 --- a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c +++ b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c @@ -212,15 +212,13 @@ static int nl8048_probe(struct spi_device *spi) return 0; } -static int nl8048_remove(struct spi_device *spi) +static void nl8048_remove(struct spi_device *spi) { struct nl8048_panel *lcd = spi_get_drvdata(spi); drm_panel_remove(&lcd->panel); drm_panel_disable(&lcd->panel); drm_panel_unprepare(&lcd->panel); - - return 0; } static const struct of_device_id nl8048_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-novatek-nt39016.c b/drivers/gpu/drm/panel/panel-novatek-nt39016.c index d036853db865..f58cfb10b58a 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt39016.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt39016.c @@ -292,7 +292,7 @@ static int nt39016_probe(struct spi_device *spi) return 0; } -static int nt39016_remove(struct spi_device *spi) +static void nt39016_remove(struct spi_device *spi) { struct nt39016 *panel = spi_get_drvdata(spi); @@ -300,8 +300,6 @@ static int nt39016_remove(struct spi_device *spi) nt39016_disable(&panel->drm_panel); nt39016_unprepare(&panel->drm_panel); - - return 0; } static const struct drm_display_mode kd035g6_display_modes[] = { diff --git a/drivers/gpu/drm/panel/panel-samsung-db7430.c b/drivers/gpu/drm/panel/panel-samsung-db7430.c index ead479719f00..04640c5256a8 100644 --- a/drivers/gpu/drm/panel/panel-samsung-db7430.c +++ b/drivers/gpu/drm/panel/panel-samsung-db7430.c @@ -314,12 +314,11 @@ static int db7430_probe(struct spi_device *spi) return 0; } -static int db7430_remove(struct spi_device *spi) +static void db7430_remove(struct spi_device *spi) { struct db7430 *db = spi_get_drvdata(spi); drm_panel_remove(&db->panel); - return 0; } /* diff --git a/drivers/gpu/drm/panel/panel-samsung-ld9040.c b/drivers/gpu/drm/panel/panel-samsung-ld9040.c index c4b388850a13..01eb211f32f7 100644 --- a/drivers/gpu/drm/panel/panel-samsung-ld9040.c +++ b/drivers/gpu/drm/panel/panel-samsung-ld9040.c @@ -358,14 +358,12 @@ static int ld9040_probe(struct spi_device *spi) return 0; } -static int ld9040_remove(struct spi_device *spi) +static void ld9040_remove(struct spi_device *spi) { struct ld9040 *ctx = spi_get_drvdata(spi); ld9040_power_off(ctx); drm_panel_remove(&ctx->panel); - - return 0; } static const struct of_device_id ld9040_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c b/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c index 1696ceb36aa0..2adb223a895c 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c @@ -291,12 +291,11 @@ static int s6d27a1_probe(struct spi_device *spi) return 0; } -static int s6d27a1_remove(struct spi_device *spi) +static void s6d27a1_remove(struct spi_device *spi) { struct s6d27a1 *ctx = spi_get_drvdata(spi); drm_panel_remove(&ctx->panel); - return 0; } static const struct of_device_id s6d27a1_match[] = { diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c index c178d962b0d5..d99afcc672ca 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c @@ -62,10 +62,9 @@ static int s6e63m0_spi_probe(struct spi_device *spi) s6e63m0_spi_dcs_write, false); } -static int s6e63m0_spi_remove(struct spi_device *spi) +static void s6e63m0_spi_remove(struct spi_device *spi) { s6e63m0_remove(&spi->dev); - return 0; } static const struct of_device_id s6e63m0_spi_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c index 61e565524542..bbc4569cbcdc 100644 --- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c @@ -387,13 +387,11 @@ static int st7789v_probe(struct spi_device *spi) return 0; } -static int st7789v_remove(struct spi_device *spi) +static void st7789v_remove(struct spi_device *spi) { struct st7789v *ctx = spi_get_drvdata(spi); drm_panel_remove(&ctx->panel); - - return 0; } static const struct of_device_id st7789v_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-sony-acx565akm.c b/drivers/gpu/drm/panel/panel-sony-acx565akm.c index ba0b3ead150f..0d7541a33f87 100644 --- a/drivers/gpu/drm/panel/panel-sony-acx565akm.c +++ b/drivers/gpu/drm/panel/panel-sony-acx565akm.c @@ -655,7 +655,7 @@ static int acx565akm_probe(struct spi_device *spi) return 0; } -static int acx565akm_remove(struct spi_device *spi) +static void acx565akm_remove(struct spi_device *spi) { struct acx565akm_panel *lcd = spi_get_drvdata(spi); @@ -666,8 +666,6 @@ static int acx565akm_remove(struct spi_device *spi) drm_panel_disable(&lcd->panel); drm_panel_unprepare(&lcd->panel); - - return 0; } static const struct of_device_id acx565akm_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c index ba0c00d1a001..4dbf8b88f264 100644 --- a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c +++ b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c @@ -350,15 +350,13 @@ static int td028ttec1_probe(struct spi_device *spi) return 0; } -static int td028ttec1_remove(struct spi_device *spi) +static void td028ttec1_remove(struct spi_device *spi) { struct td028ttec1_panel *lcd = spi_get_drvdata(spi); drm_panel_remove(&lcd->panel); drm_panel_disable(&lcd->panel); drm_panel_unprepare(&lcd->panel); - - return 0; } static const struct of_device_id td028ttec1_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c index 1866cdb8f9c1..cf4609bb9b1d 100644 --- a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c +++ b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c @@ -463,7 +463,7 @@ static int td043mtea1_probe(struct spi_device *spi) return 0; } -static int td043mtea1_remove(struct spi_device *spi) +static void td043mtea1_remove(struct spi_device *spi) { struct td043mtea1_panel *lcd = spi_get_drvdata(spi); @@ -472,8 +472,6 @@ static int td043mtea1_remove(struct spi_device *spi) drm_panel_unprepare(&lcd->panel); sysfs_remove_group(&spi->dev.kobj, &td043mtea1_attr_group); - - return 0; } static const struct of_device_id td043mtea1_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-tpo-tpg110.c b/drivers/gpu/drm/panel/panel-tpo-tpg110.c index e3791dad6830..0b1f5a11a055 100644 --- a/drivers/gpu/drm/panel/panel-tpo-tpg110.c +++ b/drivers/gpu/drm/panel/panel-tpo-tpg110.c @@ -450,12 +450,11 @@ static int tpg110_probe(struct spi_device *spi) return 0; } -static int tpg110_remove(struct spi_device *spi) +static void tpg110_remove(struct spi_device *spi) { struct tpg110 *tpg = spi_get_drvdata(spi); drm_panel_remove(&tpg->panel); - return 0; } static const struct of_device_id tpg110_match[] = { diff --git a/drivers/gpu/drm/panel/panel-widechips-ws2401.c b/drivers/gpu/drm/panel/panel-widechips-ws2401.c index 8bc976f54b80..236f3cb2b594 100644 --- a/drivers/gpu/drm/panel/panel-widechips-ws2401.c +++ b/drivers/gpu/drm/panel/panel-widechips-ws2401.c @@ -407,12 +407,11 @@ static int ws2401_probe(struct spi_device *spi) return 0; } -static int ws2401_remove(struct spi_device *spi) +static void ws2401_remove(struct spi_device *spi) { struct ws2401 *ws = spi_get_drvdata(spi); drm_panel_remove(&ws->panel); - return 0; } /* diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 0fce73b9a646..70bd84b7ef2b 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -198,7 +198,8 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, * so don't register a backlight device */ if ((rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) && - (rdev->pdev->device == 0x6741)) + (rdev->pdev->device == 0x6741) && + !dmi_match(DMI_PRODUCT_NAME, "iMac12,1")) return; if (!radeon_encoder->enc_priv) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 377f9cdb5b53..84013faa4756 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -470,8 +470,8 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, int32_t *msg, msg_type, handle; unsigned img_size = 0; void *ptr; - - int i, r; + long r; + int i; if (offset & 0x3F) { DRM_ERROR("UVD messages must be 64 byte aligned!\n"); @@ -481,13 +481,13 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) { - DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); + DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r); return r ? r : -ETIME; } r = radeon_bo_kmap(bo, &ptr); if (r) { - DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); + DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); return r; } diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index 8cf5aeb9db6c..201f5175ecfe 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -5,6 +5,7 @@ config DRM_TEGRA depends on COMMON_CLK depends on DRM depends on OF + select DRM_DP_AUX_BUS select DRM_KMS_HELPER select DRM_MIPI_DSI select DRM_PANEL diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c index 1f96e416fa08..d7a731d287d2 100644 --- a/drivers/gpu/drm/tegra/dpaux.c +++ b/drivers/gpu/drm/tegra/dpaux.c @@ -19,6 +19,7 @@ #include <linux/workqueue.h> #include <drm/drm_dp_helper.h> +#include <drm/drm_dp_aux_bus.h> #include <drm/drm_panel.h> #include "dp.h" @@ -570,6 +571,12 @@ static int tegra_dpaux_probe(struct platform_device *pdev) list_add_tail(&dpaux->list, &dpaux_list); mutex_unlock(&dpaux_lock); + err = devm_of_dp_aux_populate_ep_devices(&dpaux->aux); + if (err < 0) { + dev_err(dpaux->dev, "failed to populate AUX bus: %d\n", err); + return err; + } + return 0; } diff --git a/drivers/gpu/drm/tegra/falcon.c b/drivers/gpu/drm/tegra/falcon.c index 223ab2ceb7e6..3762d87759d9 100644 --- a/drivers/gpu/drm/tegra/falcon.c +++ b/drivers/gpu/drm/tegra/falcon.c @@ -63,7 +63,7 @@ static void falcon_copy_firmware_image(struct falcon *falcon, /* copy the whole thing taking into account endianness */ for (i = 0; i < firmware->size / sizeof(u32); i++) - virt[i] = le32_to_cpu(((u32 *)firmware->data)[i]); + virt[i] = le32_to_cpu(((__le32 *)firmware->data)[i]); } static int falcon_parse_firmware_image(struct falcon *falcon) diff --git a/drivers/gpu/drm/tiny/hx8357d.c b/drivers/gpu/drm/tiny/hx8357d.c index 9b33c05732aa..ebb025543f8d 100644 --- a/drivers/gpu/drm/tiny/hx8357d.c +++ b/drivers/gpu/drm/tiny/hx8357d.c @@ -263,14 +263,12 @@ static int hx8357d_probe(struct spi_device *spi) return 0; } -static int hx8357d_remove(struct spi_device *spi) +static void hx8357d_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void hx8357d_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/ili9163.c b/drivers/gpu/drm/tiny/ili9163.c index bcc181351236..fc8ed245b0bc 100644 --- a/drivers/gpu/drm/tiny/ili9163.c +++ b/drivers/gpu/drm/tiny/ili9163.c @@ -193,14 +193,12 @@ static int ili9163_probe(struct spi_device *spi) return 0; } -static int ili9163_remove(struct spi_device *spi) +static void ili9163_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void ili9163_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c index 976d3209f164..cc92eb9f2a07 100644 --- a/drivers/gpu/drm/tiny/ili9225.c +++ b/drivers/gpu/drm/tiny/ili9225.c @@ -411,14 +411,12 @@ static int ili9225_probe(struct spi_device *spi) return 0; } -static int ili9225_remove(struct spi_device *spi) +static void ili9225_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void ili9225_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/ili9341.c b/drivers/gpu/drm/tiny/ili9341.c index 37e0c33399c8..5b8cc770ee7b 100644 --- a/drivers/gpu/drm/tiny/ili9341.c +++ b/drivers/gpu/drm/tiny/ili9341.c @@ -225,14 +225,12 @@ static int ili9341_probe(struct spi_device *spi) return 0; } -static int ili9341_remove(struct spi_device *spi) +static void ili9341_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void ili9341_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/ili9486.c b/drivers/gpu/drm/tiny/ili9486.c index e9a63f4b2993..6d655e18e0aa 100644 --- a/drivers/gpu/drm/tiny/ili9486.c +++ b/drivers/gpu/drm/tiny/ili9486.c @@ -243,14 +243,12 @@ static int ili9486_probe(struct spi_device *spi) return 0; } -static int ili9486_remove(struct spi_device *spi) +static void ili9486_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void ili9486_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/mi0283qt.c b/drivers/gpu/drm/tiny/mi0283qt.c index 023de49e7a8e..5e060f6910bb 100644 --- a/drivers/gpu/drm/tiny/mi0283qt.c +++ b/drivers/gpu/drm/tiny/mi0283qt.c @@ -233,14 +233,12 @@ static int mi0283qt_probe(struct spi_device *spi) return 0; } -static int mi0283qt_remove(struct spi_device *spi) +static void mi0283qt_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void mi0283qt_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c index 97a775c48cea..beeeb170d0b1 100644 --- a/drivers/gpu/drm/tiny/repaper.c +++ b/drivers/gpu/drm/tiny/repaper.c @@ -1140,14 +1140,12 @@ static int repaper_probe(struct spi_device *spi) return 0; } -static int repaper_remove(struct spi_device *spi) +static void repaper_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void repaper_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c index 51b9b9fb3ead..3f38faa1cd8c 100644 --- a/drivers/gpu/drm/tiny/st7586.c +++ b/drivers/gpu/drm/tiny/st7586.c @@ -360,14 +360,12 @@ static int st7586_probe(struct spi_device *spi) return 0; } -static int st7586_remove(struct spi_device *spi) +static void st7586_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void st7586_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/st7735r.c b/drivers/gpu/drm/tiny/st7735r.c index fc40dd10efa8..29d618093e94 100644 --- a/drivers/gpu/drm/tiny/st7735r.c +++ b/drivers/gpu/drm/tiny/st7735r.c @@ -247,14 +247,12 @@ static int st7735r_probe(struct spi_device *spi) return 0; } -static int st7735r_remove(struct spi_device *spi) +static void st7735r_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void st7735r_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index e6cc47470e03..783890e8d43a 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -525,9 +525,11 @@ int vc4_crtc_disable_at_boot(struct drm_crtc *crtc) if (ret) return ret; - ret = pm_runtime_put(&vc4_hdmi->pdev->dev); - if (ret) - return ret; + /* + * post_crtc_powerdown will have called pm_runtime_put, so we + * don't need it here otherwise we'll get the reference counting + * wrong. + */ return 0; } diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index b30500405fa7..3a1626f261e5 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1749,6 +1749,7 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) dev_err(dev, "Couldn't register the HDMI codec: %ld\n", PTR_ERR(codec_pdev)); return PTR_ERR(codec_pdev); } + vc4_hdmi->audio.codec_pdev = codec_pdev; dai_link->cpus = &vc4_hdmi->audio.cpu; dai_link->codecs = &vc4_hdmi->audio.codec; @@ -1788,6 +1789,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) } +static void vc4_hdmi_audio_exit(struct vc4_hdmi *vc4_hdmi) +{ + platform_device_unregister(vc4_hdmi->audio.codec_pdev); + vc4_hdmi->audio.codec_pdev = NULL; +} + static irqreturn_t vc4_hdmi_hpd_irq_thread(int irq, void *priv) { struct vc4_hdmi *vc4_hdmi = priv; @@ -2660,6 +2667,7 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master, kfree(vc4_hdmi->hdmi_regset.regs); kfree(vc4_hdmi->hd_regset.regs); + vc4_hdmi_audio_exit(vc4_hdmi); vc4_hdmi_cec_exit(vc4_hdmi); vc4_hdmi_hotplug_exit(vc4_hdmi); vc4_hdmi_connector_destroy(&vc4_hdmi->connector); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h index 31b77a94c526..6ffdd4ec5fb6 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi.h @@ -116,6 +116,7 @@ struct vc4_hdmi_audio { struct snd_soc_dai_link_component platform; struct snd_dmaengine_dai_dma_data dma_data; struct hdmi_audio_infoframe infoframe; + struct platform_device *codec_pdev; bool streaming; }; diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index e08e331e46ae..f87a8705f518 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -137,8 +137,15 @@ void host1x_syncpt_restore(struct host1x *host) struct host1x_syncpt *sp_base = host->syncpt; unsigned int i; - for (i = 0; i < host1x_syncpt_nb_pts(host); i++) + for (i = 0; i < host1x_syncpt_nb_pts(host); i++) { + /* + * Unassign syncpt from channels for purposes of Tegra186 + * syncpoint protection. This prevents any channel from + * accessing it until it is reassigned. + */ + host1x_hw_syncpt_assign_to_channel(host, sp_base + i, NULL); host1x_hw_syncpt_restore(host, sp_base + i); + } for (i = 0; i < host1x_syncpt_nb_bases(host); i++) host1x_hw_syncpt_restore_wait_base(host, sp_base + i); @@ -227,27 +234,12 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, void *ref; struct host1x_waitlist *waiter; int err = 0, check_count = 0; - u32 val; if (value) - *value = 0; - - /* first check cache */ - if (host1x_syncpt_is_expired(sp, thresh)) { - if (value) - *value = host1x_syncpt_load(sp); + *value = host1x_syncpt_load(sp); + if (host1x_syncpt_is_expired(sp, thresh)) return 0; - } - - /* try to read from register */ - val = host1x_hw_syncpt_load(sp->host, sp); - if (host1x_syncpt_is_expired(sp, thresh)) { - if (value) - *value = val; - - goto done; - } if (!timeout) { err = -EAGAIN; @@ -352,13 +344,6 @@ int host1x_syncpt_init(struct host1x *host) for (i = 0; i < host->info->nb_pts; i++) { syncpt[i].id = i; syncpt[i].host = host; - - /* - * Unassign syncpt from channels for purposes of Tegra186 - * syncpoint protection. This prevents any channel from - * accessing it until it is reassigned. - */ - host1x_hw_syncpt_assign_to_channel(host, &syncpt[i], NULL); } for (i = 0; i < host->info->nb_bases; i++) diff --git a/drivers/hwmon/adcxx.c b/drivers/hwmon/adcxx.c index e5bc5ce09f4e..de37bce24fa6 100644 --- a/drivers/hwmon/adcxx.c +++ b/drivers/hwmon/adcxx.c @@ -194,7 +194,7 @@ out_err: return status; } -static int adcxx_remove(struct spi_device *spi) +static void adcxx_remove(struct spi_device *spi) { struct adcxx *adc = spi_get_drvdata(spi); int i; @@ -205,8 +205,6 @@ static int adcxx_remove(struct spi_device *spi) device_remove_file(&spi->dev, &ad_input[i].dev_attr); mutex_unlock(&adc->lock); - - return 0; } static const struct spi_device_id adcxx_ids[] = { diff --git a/drivers/hwmon/adt7310.c b/drivers/hwmon/adt7310.c index c40cac16af68..832d9ec64934 100644 --- a/drivers/hwmon/adt7310.c +++ b/drivers/hwmon/adt7310.c @@ -88,10 +88,9 @@ static int adt7310_spi_probe(struct spi_device *spi) &adt7310_spi_ops); } -static int adt7310_spi_remove(struct spi_device *spi) +static void adt7310_spi_remove(struct spi_device *spi) { adt7x10_remove(&spi->dev, spi->irq); - return 0; } static const struct spi_device_id adt7310_id[] = { diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 3501a3ead4ba..3ae961986fc3 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -214,12 +214,14 @@ static int hwmon_thermal_add_sensor(struct device *dev, int index) tzd = devm_thermal_zone_of_sensor_register(dev, index, tdata, &hwmon_thermal_ops); - /* - * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, - * so ignore that error but forward any other error. - */ - if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV)) - return PTR_ERR(tzd); + if (IS_ERR(tzd)) { + if (PTR_ERR(tzd) != -ENODEV) + return PTR_ERR(tzd); + dev_info(dev, "temp%d_input not attached to any thermal zone\n", + index + 1); + devm_kfree(dev, tdata); + return 0; + } err = devm_add_action(dev, hwmon_thermal_remove_sensor, &tdata->node); if (err) diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c index 5fcfd57df61e..4c5487aeb3cf 100644 --- a/drivers/hwmon/max1111.c +++ b/drivers/hwmon/max1111.c @@ -254,7 +254,7 @@ err_remove: return err; } -static int max1111_remove(struct spi_device *spi) +static void max1111_remove(struct spi_device *spi) { struct max1111_data *data = spi_get_drvdata(spi); @@ -265,7 +265,6 @@ static int max1111_remove(struct spi_device *spi) sysfs_remove_group(&spi->dev.kobj, &max1110_attr_group); sysfs_remove_group(&spi->dev.kobj, &max1111_attr_group); mutex_destroy(&data->drvdata_lock); - return 0; } static const struct spi_device_id max1111_ids[] = { diff --git a/drivers/hwmon/max31722.c b/drivers/hwmon/max31722.c index 4cf4fe6809a3..93e048ee4955 100644 --- a/drivers/hwmon/max31722.c +++ b/drivers/hwmon/max31722.c @@ -100,7 +100,7 @@ static int max31722_probe(struct spi_device *spi) return 0; } -static int max31722_remove(struct spi_device *spi) +static void max31722_remove(struct spi_device *spi) { struct max31722_data *data = spi_get_drvdata(spi); int ret; @@ -111,8 +111,6 @@ static int max31722_remove(struct spi_device *spi) if (ret) /* There is nothing we can do about this ... */ dev_warn(&spi->dev, "Failed to put device in stand-by mode\n"); - - return 0; } static int __maybe_unused max31722_suspend(struct device *dev) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index 414204f5704c..9c9e9f4ccb9e 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -59,7 +59,7 @@ static const struct platform_device_id ntc_thermistor_id[] = { [NTC_NCP15XH103] = { "ncp15xh103", TYPE_NCPXXXH103 }, [NTC_NCP18WB473] = { "ncp18wb473", TYPE_NCPXXWB473 }, [NTC_NCP21WB473] = { "ncp21wb473", TYPE_NCPXXWB473 }, - [NTC_SSG1404001221] = { "ssg1404-001221", TYPE_NCPXXWB473 }, + [NTC_SSG1404001221] = { "ssg1404_001221", TYPE_NCPXXWB473 }, [NTC_LAST] = { }, }; diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 776ee2237be2..ac2fbee1ba9c 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -911,6 +911,11 @@ static int pmbus_get_boolean(struct i2c_client *client, struct pmbus_boolean *b, pmbus_update_sensor_data(client, s2); regval = status & mask; + if (regval) { + ret = pmbus_write_byte_data(client, page, reg, regval); + if (ret) + goto unlock; + } if (s1 && s2) { s64 v1, v2; diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 42da31c1ab70..8a6c6ee28556 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -488,7 +488,7 @@ config I2C_BRCMSTB config I2C_CADENCE tristate "Cadence I2C Controller" - depends on ARCH_ZYNQ || ARM64 || XTENSA + depends on ARCH_ZYNQ || ARM64 || XTENSA || COMPILE_TEST help Say yes here to select Cadence I2C Host Controller. This controller is e.g. used by Xilinx Zynq. @@ -680,7 +680,7 @@ config I2C_IMG config I2C_IMX tristate "IMX I2C interface" - depends on ARCH_MXC || ARCH_LAYERSCAPE || COLDFIRE + depends on ARCH_MXC || ARCH_LAYERSCAPE || COLDFIRE || COMPILE_TEST select I2C_SLAVE help Say Y here if you want to use the IIC bus controller on @@ -935,7 +935,7 @@ config I2C_QCOM_GENI config I2C_QUP tristate "Qualcomm QUP based I2C controller" - depends on ARCH_QCOM + depends on ARCH_QCOM || COMPILE_TEST help If you say yes to this option, support will be included for the built-in I2C interface on the Qualcomm SoCs. diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index dfc534065595..5149454eef4a 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -23,6 +23,11 @@ #define BCM2835_I2C_FIFO 0x10 #define BCM2835_I2C_DIV 0x14 #define BCM2835_I2C_DEL 0x18 +/* + * 16-bit field for the number of SCL cycles to wait after rising SCL + * before deciding the slave is not responding. 0 disables the + * timeout detection. + */ #define BCM2835_I2C_CLKT 0x1c #define BCM2835_I2C_C_READ BIT(0) @@ -474,6 +479,12 @@ static int bcm2835_i2c_probe(struct platform_device *pdev) adap->dev.of_node = pdev->dev.of_node; adap->quirks = of_device_get_match_data(&pdev->dev); + /* + * Disable the hardware clock stretching timeout. SMBUS + * specifies a limit for how long the device can stretch the + * clock, but core I2C doesn't. + */ + bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_CLKT, 0); bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, 0); ret = i2c_add_adapter(adap); diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c index 490ee3962645..b00f35c0b066 100644 --- a/drivers/i2c/busses/i2c-brcmstb.c +++ b/drivers/i2c/busses/i2c-brcmstb.c @@ -673,7 +673,7 @@ static int brcmstb_i2c_probe(struct platform_device *pdev) /* set the data in/out register size for compatible SoCs */ if (of_device_is_compatible(dev->device->of_node, - "brcmstb,brcmper-i2c")) + "brcm,brcmper-i2c")) dev->data_regsz = sizeof(u8); else dev->data_regsz = sizeof(u32); diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index c1de8eb66169..cf54f1cb4c57 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -558,7 +558,7 @@ static int cci_probe(struct platform_device *pdev) cci->master[idx].adap.quirks = &cci->data->quirks; cci->master[idx].adap.algo = &cci_algo; cci->master[idx].adap.dev.parent = dev; - cci->master[idx].adap.dev.of_node = child; + cci->master[idx].adap.dev.of_node = of_node_get(child); cci->master[idx].master = idx; cci->master[idx].cci = cci; @@ -643,8 +643,10 @@ static int cci_probe(struct platform_device *pdev) continue; ret = i2c_add_adapter(&cci->master[i].adap); - if (ret < 0) + if (ret < 0) { + of_node_put(cci->master[i].adap.dev.of_node); goto error_i2c; + } } pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC); @@ -655,9 +657,11 @@ static int cci_probe(struct platform_device *pdev) return 0; error_i2c: - for (; i >= 0; i--) { - if (cci->master[i].cci) + for (--i ; i >= 0; i--) { + if (cci->master[i].cci) { i2c_del_adapter(&cci->master[i].adap); + of_node_put(cci->master[i].adap.dev.of_node); + } } error: disable_irq(cci->irq); @@ -673,8 +677,10 @@ static int cci_remove(struct platform_device *pdev) int i; for (i = 0; i < cci->data->num_masters; i++) { - if (cci->master[i].cci) + if (cci->master[i].cci) { i2c_del_adapter(&cci->master[i].adap); + of_node_put(cci->master[i].adap.dev.of_node); + } cci_halt(cci, i); } diff --git a/drivers/iio/accel/bma400_spi.c b/drivers/iio/accel/bma400_spi.c index 9f622e37477b..9040a717b247 100644 --- a/drivers/iio/accel/bma400_spi.c +++ b/drivers/iio/accel/bma400_spi.c @@ -87,11 +87,9 @@ static int bma400_spi_probe(struct spi_device *spi) return bma400_probe(&spi->dev, regmap, id->name); } -static int bma400_spi_remove(struct spi_device *spi) +static void bma400_spi_remove(struct spi_device *spi) { bma400_remove(&spi->dev); - - return 0; } static const struct spi_device_id bma400_spi_ids[] = { diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index e6081dd0a880..d11f668016a6 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -1783,11 +1783,14 @@ int bmc150_accel_core_probe(struct device *dev, struct regmap *regmap, int irq, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(dev, "Unable to register iio device\n"); - goto err_trigger_unregister; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); err_trigger_unregister: bmc150_accel_unregister_triggers(data, BMC150_ACCEL_TRIGGERS - 1); err_buffer_cleanup: diff --git a/drivers/iio/accel/bmc150-accel-spi.c b/drivers/iio/accel/bmc150-accel-spi.c index 11559567cb39..80007cc2d044 100644 --- a/drivers/iio/accel/bmc150-accel-spi.c +++ b/drivers/iio/accel/bmc150-accel-spi.c @@ -35,11 +35,9 @@ static int bmc150_accel_probe(struct spi_device *spi) true); } -static int bmc150_accel_remove(struct spi_device *spi) +static void bmc150_accel_remove(struct spi_device *spi) { bmc150_accel_core_remove(&spi->dev); - - return 0; } static const struct acpi_device_id bmc150_accel_acpi_match[] = { diff --git a/drivers/iio/accel/bmi088-accel-spi.c b/drivers/iio/accel/bmi088-accel-spi.c index 758ad2f12896..06d99d9949f3 100644 --- a/drivers/iio/accel/bmi088-accel-spi.c +++ b/drivers/iio/accel/bmi088-accel-spi.c @@ -56,11 +56,9 @@ static int bmi088_accel_probe(struct spi_device *spi) true); } -static int bmi088_accel_remove(struct spi_device *spi) +static void bmi088_accel_remove(struct spi_device *spi) { bmi088_accel_core_remove(&spi->dev); - - return 0; } static const struct spi_device_id bmi088_accel_id[] = { diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c index 32989d91b982..f7fd9e046588 100644 --- a/drivers/iio/accel/fxls8962af-core.c +++ b/drivers/iio/accel/fxls8962af-core.c @@ -173,12 +173,20 @@ struct fxls8962af_data { u16 upper_thres; }; -const struct regmap_config fxls8962af_regmap_conf = { +const struct regmap_config fxls8962af_i2c_regmap_conf = { .reg_bits = 8, .val_bits = 8, .max_register = FXLS8962AF_MAX_REG, }; -EXPORT_SYMBOL_GPL(fxls8962af_regmap_conf); +EXPORT_SYMBOL_GPL(fxls8962af_i2c_regmap_conf); + +const struct regmap_config fxls8962af_spi_regmap_conf = { + .reg_bits = 8, + .pad_bits = 8, + .val_bits = 8, + .max_register = FXLS8962AF_MAX_REG, +}; +EXPORT_SYMBOL_GPL(fxls8962af_spi_regmap_conf); enum { fxls8962af_idx_x, diff --git a/drivers/iio/accel/fxls8962af-i2c.c b/drivers/iio/accel/fxls8962af-i2c.c index cfb004b20455..6bde9891effb 100644 --- a/drivers/iio/accel/fxls8962af-i2c.c +++ b/drivers/iio/accel/fxls8962af-i2c.c @@ -18,7 +18,7 @@ static int fxls8962af_probe(struct i2c_client *client) { struct regmap *regmap; - regmap = devm_regmap_init_i2c(client, &fxls8962af_regmap_conf); + regmap = devm_regmap_init_i2c(client, &fxls8962af_i2c_regmap_conf); if (IS_ERR(regmap)) { dev_err(&client->dev, "Failed to initialize i2c regmap\n"); return PTR_ERR(regmap); diff --git a/drivers/iio/accel/fxls8962af-spi.c b/drivers/iio/accel/fxls8962af-spi.c index 57108d3d480b..6f4dff3238d3 100644 --- a/drivers/iio/accel/fxls8962af-spi.c +++ b/drivers/iio/accel/fxls8962af-spi.c @@ -18,7 +18,7 @@ static int fxls8962af_probe(struct spi_device *spi) { struct regmap *regmap; - regmap = devm_regmap_init_spi(spi, &fxls8962af_regmap_conf); + regmap = devm_regmap_init_spi(spi, &fxls8962af_spi_regmap_conf); if (IS_ERR(regmap)) { dev_err(&spi->dev, "Failed to initialize spi regmap\n"); return PTR_ERR(regmap); diff --git a/drivers/iio/accel/fxls8962af.h b/drivers/iio/accel/fxls8962af.h index b67572c3ef06..9cbe98c3ba9a 100644 --- a/drivers/iio/accel/fxls8962af.h +++ b/drivers/iio/accel/fxls8962af.h @@ -17,6 +17,7 @@ int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq); int fxls8962af_core_remove(struct device *dev); extern const struct dev_pm_ops fxls8962af_pm_ops; -extern const struct regmap_config fxls8962af_regmap_conf; +extern const struct regmap_config fxls8962af_i2c_regmap_conf; +extern const struct regmap_config fxls8962af_spi_regmap_conf; #endif /* _FXLS8962AF_H_ */ diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index 0fe570316848..ac74cdcd2bc8 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1590,11 +1590,14 @@ static int kxcjk1013_probe(struct i2c_client *client, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(&client->dev, "unable to register iio device\n"); - goto err_buffer_cleanup; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); err_buffer_cleanup: iio_triggered_buffer_cleanup(indio_dev); err_trigger_unregister: diff --git a/drivers/iio/accel/kxsd9-spi.c b/drivers/iio/accel/kxsd9-spi.c index 441e6b764281..57c451cfb9e5 100644 --- a/drivers/iio/accel/kxsd9-spi.c +++ b/drivers/iio/accel/kxsd9-spi.c @@ -32,11 +32,9 @@ static int kxsd9_spi_probe(struct spi_device *spi) spi_get_device_id(spi)->name); } -static int kxsd9_spi_remove(struct spi_device *spi) +static void kxsd9_spi_remove(struct spi_device *spi) { kxsd9_common_remove(&spi->dev); - - return 0; } static const struct spi_device_id kxsd9_spi_id[] = { diff --git a/drivers/iio/accel/mma7455_spi.c b/drivers/iio/accel/mma7455_spi.c index ecf690692dcc..b746031551a3 100644 --- a/drivers/iio/accel/mma7455_spi.c +++ b/drivers/iio/accel/mma7455_spi.c @@ -22,11 +22,9 @@ static int mma7455_spi_probe(struct spi_device *spi) return mma7455_core_probe(&spi->dev, regmap, id->name); } -static int mma7455_spi_remove(struct spi_device *spi) +static void mma7455_spi_remove(struct spi_device *spi) { mma7455_core_remove(&spi->dev); - - return 0; } static const struct spi_device_id mma7455_spi_ids[] = { diff --git a/drivers/iio/accel/mma9551.c b/drivers/iio/accel/mma9551.c index 4c359fb05480..c53a3398b14c 100644 --- a/drivers/iio/accel/mma9551.c +++ b/drivers/iio/accel/mma9551.c @@ -495,11 +495,14 @@ static int mma9551_probe(struct i2c_client *client, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(&client->dev, "unable to register iio device\n"); - goto out_poweroff; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); out_poweroff: mma9551_set_device_state(client, false); diff --git a/drivers/iio/accel/mma9553.c b/drivers/iio/accel/mma9553.c index 0570ab1cc064..5ff6bc70708b 100644 --- a/drivers/iio/accel/mma9553.c +++ b/drivers/iio/accel/mma9553.c @@ -1134,12 +1134,15 @@ static int mma9553_probe(struct i2c_client *client, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(&client->dev, "unable to register iio device\n"); - goto out_poweroff; + goto err_pm_cleanup; } dev_dbg(&indio_dev->dev, "Registered device %s\n", name); return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); out_poweroff: mma9551_set_device_state(client, false); return ret; diff --git a/drivers/iio/accel/sca3000.c b/drivers/iio/accel/sca3000.c index 43ecacbdc95a..83c81072511e 100644 --- a/drivers/iio/accel/sca3000.c +++ b/drivers/iio/accel/sca3000.c @@ -1524,7 +1524,7 @@ error_ret: return ret; } -static int sca3000_remove(struct spi_device *spi) +static void sca3000_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct sca3000_state *st = iio_priv(indio_dev); @@ -1535,8 +1535,6 @@ static int sca3000_remove(struct spi_device *spi) sca3000_stop_all_interrupts(st); if (spi->irq) free_irq(spi->irq, indio_dev); - - return 0; } static const struct spi_device_id sca3000_id[] = { diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index bc2cfa5f9592..b400bbe291aa 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -76,7 +76,7 @@ #define AD7124_CONFIG_REF_SEL(x) FIELD_PREP(AD7124_CONFIG_REF_SEL_MSK, x) #define AD7124_CONFIG_PGA_MSK GENMASK(2, 0) #define AD7124_CONFIG_PGA(x) FIELD_PREP(AD7124_CONFIG_PGA_MSK, x) -#define AD7124_CONFIG_IN_BUFF_MSK GENMASK(7, 6) +#define AD7124_CONFIG_IN_BUFF_MSK GENMASK(6, 5) #define AD7124_CONFIG_IN_BUFF(x) FIELD_PREP(AD7124_CONFIG_IN_BUFF_MSK, x) /* AD7124_FILTER_X */ diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 1d345d66742d..c17d9b5fbaf6 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -479,7 +479,7 @@ error_disable_reg: return ret; } -static int ad7266_remove(struct spi_device *spi) +static void ad7266_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad7266_state *st = iio_priv(indio_dev); @@ -488,8 +488,6 @@ static int ad7266_remove(struct spi_device *spi) iio_triggered_buffer_cleanup(indio_dev); if (!IS_ERR(st->reg)) regulator_disable(st->reg); - - return 0; } static const struct spi_device_id ad7266_id[] = { diff --git a/drivers/iio/adc/ltc2496.c b/drivers/iio/adc/ltc2496.c index dd956a7c216e..5a55f79f2574 100644 --- a/drivers/iio/adc/ltc2496.c +++ b/drivers/iio/adc/ltc2496.c @@ -78,13 +78,11 @@ static int ltc2496_probe(struct spi_device *spi) return ltc2497core_probe(dev, indio_dev); } -static int ltc2496_remove(struct spi_device *spi) +static void ltc2496_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); ltc2497core_remove(indio_dev); - - return 0; } static const struct of_device_id ltc2496_of_match[] = { diff --git a/drivers/iio/adc/mcp320x.c b/drivers/iio/adc/mcp320x.c index 8d1cff28cae0..b4c69acb33e3 100644 --- a/drivers/iio/adc/mcp320x.c +++ b/drivers/iio/adc/mcp320x.c @@ -459,15 +459,13 @@ reg_disable: return ret; } -static int mcp320x_remove(struct spi_device *spi) +static void mcp320x_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct mcp320x *adc = iio_priv(indio_dev); iio_device_unregister(indio_dev); regulator_disable(adc->reg); - - return 0; } static const struct of_device_id mcp320x_dt_ids[] = { diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c index 13535f148c4c..1cb4590fe412 100644 --- a/drivers/iio/adc/mcp3911.c +++ b/drivers/iio/adc/mcp3911.c @@ -321,7 +321,7 @@ reg_disable: return ret; } -static int mcp3911_remove(struct spi_device *spi) +static void mcp3911_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct mcp3911 *adc = iio_priv(indio_dev); @@ -331,8 +331,6 @@ static int mcp3911_remove(struct spi_device *spi) clk_disable_unprepare(adc->clki); if (adc->vref) regulator_disable(adc->vref); - - return 0; } static const struct of_device_id mcp3911_dt_ids[] = { diff --git a/drivers/iio/adc/men_z188_adc.c b/drivers/iio/adc/men_z188_adc.c index 42ea8bc7e780..adc5ceaef8c9 100644 --- a/drivers/iio/adc/men_z188_adc.c +++ b/drivers/iio/adc/men_z188_adc.c @@ -103,6 +103,7 @@ static int men_z188_probe(struct mcb_device *dev, struct z188_adc *adc; struct iio_dev *indio_dev; struct resource *mem; + int ret; indio_dev = devm_iio_device_alloc(&dev->dev, sizeof(struct z188_adc)); if (!indio_dev) @@ -128,8 +129,14 @@ static int men_z188_probe(struct mcb_device *dev, adc->mem = mem; mcb_set_drvdata(dev, indio_dev); - return iio_device_register(indio_dev); + ret = iio_device_register(indio_dev); + if (ret) + goto err_unmap; + + return 0; +err_unmap: + iounmap(adc->base); err: mcb_release_mem(mem); return -ENXIO; diff --git a/drivers/iio/adc/ti-adc12138.c b/drivers/iio/adc/ti-adc12138.c index 6eb62b564dae..59d75d09604f 100644 --- a/drivers/iio/adc/ti-adc12138.c +++ b/drivers/iio/adc/ti-adc12138.c @@ -503,7 +503,7 @@ err_clk_disable: return ret; } -static int adc12138_remove(struct spi_device *spi) +static void adc12138_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct adc12138 *adc = iio_priv(indio_dev); @@ -514,8 +514,6 @@ static int adc12138_remove(struct spi_device *spi) regulator_disable(adc->vref_n); regulator_disable(adc->vref_p); clk_disable_unprepare(adc->cclk); - - return 0; } static const struct of_device_id adc12138_dt_ids[] = { diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c index a7efa3eada2c..e3658b969c5b 100644 --- a/drivers/iio/adc/ti-ads7950.c +++ b/drivers/iio/adc/ti-ads7950.c @@ -662,7 +662,7 @@ error_destroy_mutex: return ret; } -static int ti_ads7950_remove(struct spi_device *spi) +static void ti_ads7950_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ti_ads7950_state *st = iio_priv(indio_dev); @@ -672,8 +672,6 @@ static int ti_ads7950_remove(struct spi_device *spi) iio_triggered_buffer_cleanup(indio_dev); regulator_disable(st->reg); mutex_destroy(&st->slock); - - return 0; } static const struct spi_device_id ti_ads7950_id[] = { diff --git a/drivers/iio/adc/ti-ads8688.c b/drivers/iio/adc/ti-ads8688.c index 2e24717d7f55..22c2583eedd0 100644 --- a/drivers/iio/adc/ti-ads8688.c +++ b/drivers/iio/adc/ti-ads8688.c @@ -479,7 +479,7 @@ err_regulator_disable: return ret; } -static int ads8688_remove(struct spi_device *spi) +static void ads8688_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ads8688_state *st = iio_priv(indio_dev); @@ -489,8 +489,6 @@ static int ads8688_remove(struct spi_device *spi) if (!IS_ERR(st->reg)) regulator_disable(st->reg); - - return 0; } static const struct spi_device_id ads8688_id[] = { diff --git a/drivers/iio/adc/ti-tlc4541.c b/drivers/iio/adc/ti-tlc4541.c index 403b787f9f7e..2406eda9dfc6 100644 --- a/drivers/iio/adc/ti-tlc4541.c +++ b/drivers/iio/adc/ti-tlc4541.c @@ -224,7 +224,7 @@ error_disable_reg: return ret; } -static int tlc4541_remove(struct spi_device *spi) +static void tlc4541_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct tlc4541_state *st = iio_priv(indio_dev); @@ -232,8 +232,6 @@ static int tlc4541_remove(struct spi_device *spi) iio_device_unregister(indio_dev); iio_triggered_buffer_cleanup(indio_dev); regulator_disable(st->reg); - - return 0; } static const struct of_device_id tlc4541_dt_ids[] = { diff --git a/drivers/iio/adc/ti-tsc2046.c b/drivers/iio/adc/ti-tsc2046.c index d84ae6b008c1..e8fc4d01f30b 100644 --- a/drivers/iio/adc/ti-tsc2046.c +++ b/drivers/iio/adc/ti-tsc2046.c @@ -388,7 +388,7 @@ static int tsc2046_adc_update_scan_mode(struct iio_dev *indio_dev, mutex_lock(&priv->slock); size = 0; - for_each_set_bit(ch_idx, active_scan_mask, indio_dev->num_channels) { + for_each_set_bit(ch_idx, active_scan_mask, ARRAY_SIZE(priv->l)) { size += tsc2046_adc_group_set_layout(priv, group, ch_idx); tsc2046_adc_group_set_cmd(priv, group, ch_idx); group++; @@ -548,7 +548,7 @@ static int tsc2046_adc_setup_spi_msg(struct tsc2046_adc_priv *priv) * enabled. */ size = 0; - for (ch_idx = 0; ch_idx < priv->dcfg->num_channels; ch_idx++) + for (ch_idx = 0; ch_idx < ARRAY_SIZE(priv->l); ch_idx++) size += tsc2046_adc_group_set_layout(priv, ch_idx, ch_idx); priv->tx = devm_kzalloc(&priv->spi->dev, size, GFP_KERNEL); diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c index 5271073bb74e..acd230a6af35 100644 --- a/drivers/iio/addac/ad74413r.c +++ b/drivers/iio/addac/ad74413r.c @@ -134,7 +134,6 @@ struct ad74413r_state { #define AD74413R_CH_EN_MASK(x) BIT(x) #define AD74413R_REG_DIN_COMP_OUT 0x25 -#define AD74413R_DIN_COMP_OUT_SHIFT_X(x) x #define AD74413R_REG_ADC_RESULT_X(x) (0x26 + (x)) #define AD74413R_ADC_RESULT_MAX GENMASK(15, 0) @@ -288,7 +287,7 @@ static void ad74413r_gpio_set_multiple(struct gpio_chip *chip, unsigned int offset = 0; int ret; - for_each_set_bit_from(offset, mask, AD74413R_CHANNEL_MAX) { + for_each_set_bit_from(offset, mask, chip->ngpio) { unsigned int real_offset = st->gpo_gpio_offsets[offset]; ret = ad74413r_set_gpo_config(st, real_offset, @@ -316,7 +315,7 @@ static int ad74413r_gpio_get(struct gpio_chip *chip, unsigned int offset) if (ret) return ret; - status &= AD74413R_DIN_COMP_OUT_SHIFT_X(real_offset); + status &= BIT(real_offset); return status ? 1 : 0; } @@ -334,11 +333,10 @@ static int ad74413r_gpio_get_multiple(struct gpio_chip *chip, if (ret) return ret; - for_each_set_bit_from(offset, mask, AD74413R_CHANNEL_MAX) { + for_each_set_bit_from(offset, mask, chip->ngpio) { unsigned int real_offset = st->comp_gpio_offsets[offset]; - if (val & BIT(real_offset)) - *bits |= offset; + __assign_bit(offset, bits, val & BIT(real_offset)); } return ret; @@ -840,7 +838,7 @@ static int ad74413r_update_scan_mode(struct iio_dev *indio_dev, { struct ad74413r_state *st = iio_priv(indio_dev); struct spi_transfer *xfer = st->adc_samples_xfer; - u8 *rx_buf = &st->adc_samples_buf.rx_buf[-1 * AD74413R_FRAME_SIZE]; + u8 *rx_buf = st->adc_samples_buf.rx_buf; u8 *tx_buf = st->adc_samples_tx_buf; unsigned int channel; int ret = -EINVAL; @@ -894,9 +892,10 @@ static int ad74413r_update_scan_mode(struct iio_dev *indio_dev, spi_message_add_tail(xfer, &st->adc_samples_msg); - xfer++; tx_buf += AD74413R_FRAME_SIZE; - rx_buf += AD74413R_FRAME_SIZE; + if (xfer != st->adc_samples_xfer) + rx_buf += AD74413R_FRAME_SIZE; + xfer++; } xfer->rx_buf = rx_buf; diff --git a/drivers/iio/amplifiers/ad8366.c b/drivers/iio/amplifiers/ad8366.c index cfcf18a0bce8..1134ae12e531 100644 --- a/drivers/iio/amplifiers/ad8366.c +++ b/drivers/iio/amplifiers/ad8366.c @@ -298,7 +298,7 @@ error_disable_reg: return ret; } -static int ad8366_remove(struct spi_device *spi) +static void ad8366_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad8366_state *st = iio_priv(indio_dev); @@ -308,8 +308,6 @@ static int ad8366_remove(struct spi_device *spi) if (!IS_ERR(reg)) regulator_disable(reg); - - return 0; } static const struct spi_device_id ad8366_id[] = { diff --git a/drivers/iio/common/ssp_sensors/ssp_dev.c b/drivers/iio/common/ssp_sensors/ssp_dev.c index 1aee87100038..eafaf4529df5 100644 --- a/drivers/iio/common/ssp_sensors/ssp_dev.c +++ b/drivers/iio/common/ssp_sensors/ssp_dev.c @@ -586,7 +586,7 @@ err_setup_irq: return ret; } -static int ssp_remove(struct spi_device *spi) +static void ssp_remove(struct spi_device *spi) { struct ssp_data *data = spi_get_drvdata(spi); @@ -608,8 +608,6 @@ static int ssp_remove(struct spi_device *spi) mutex_destroy(&data->pending_lock); mfd_remove_devices(&spi->dev); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/iio/dac/ad5360.c b/drivers/iio/dac/ad5360.c index 2d3b14c407d8..ecbc6a51d60f 100644 --- a/drivers/iio/dac/ad5360.c +++ b/drivers/iio/dac/ad5360.c @@ -521,7 +521,7 @@ error_free_channels: return ret; } -static int ad5360_remove(struct spi_device *spi) +static void ad5360_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5360_state *st = iio_priv(indio_dev); @@ -531,8 +531,6 @@ static int ad5360_remove(struct spi_device *spi) kfree(indio_dev->channels); regulator_bulk_disable(st->chip_info->num_vrefs, st->vref_reg); - - return 0; } static const struct spi_device_id ad5360_ids[] = { diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c index e38860a6a9f3..82e1d9bd773e 100644 --- a/drivers/iio/dac/ad5380.c +++ b/drivers/iio/dac/ad5380.c @@ -488,11 +488,9 @@ static int ad5380_spi_probe(struct spi_device *spi) return ad5380_probe(&spi->dev, regmap, id->driver_data, id->name); } -static int ad5380_spi_remove(struct spi_device *spi) +static void ad5380_spi_remove(struct spi_device *spi) { ad5380_remove(&spi->dev); - - return 0; } static const struct spi_device_id ad5380_spi_ids[] = { diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c index 1c9b54c012a7..14cfabacbea5 100644 --- a/drivers/iio/dac/ad5446.c +++ b/drivers/iio/dac/ad5446.c @@ -491,11 +491,9 @@ static int ad5446_spi_probe(struct spi_device *spi) &ad5446_spi_chip_info[id->driver_data]); } -static int ad5446_spi_remove(struct spi_device *spi) +static void ad5446_spi_remove(struct spi_device *spi) { ad5446_remove(&spi->dev); - - return 0; } static struct spi_driver ad5446_spi_driver = { diff --git a/drivers/iio/dac/ad5449.c b/drivers/iio/dac/ad5449.c index f5e93c6acc9d..bad9bdaafa94 100644 --- a/drivers/iio/dac/ad5449.c +++ b/drivers/iio/dac/ad5449.c @@ -330,7 +330,7 @@ error_disable_reg: return ret; } -static int ad5449_spi_remove(struct spi_device *spi) +static void ad5449_spi_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5449 *st = iio_priv(indio_dev); @@ -338,8 +338,6 @@ static int ad5449_spi_remove(struct spi_device *spi) iio_device_unregister(indio_dev); regulator_bulk_disable(st->chip_info->num_channels, st->vref_reg); - - return 0; } static const struct spi_device_id ad5449_spi_ids[] = { diff --git a/drivers/iio/dac/ad5504.c b/drivers/iio/dac/ad5504.c index b631261efa97..8507573aa13e 100644 --- a/drivers/iio/dac/ad5504.c +++ b/drivers/iio/dac/ad5504.c @@ -336,7 +336,7 @@ error_disable_reg: return ret; } -static int ad5504_remove(struct spi_device *spi) +static void ad5504_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5504_state *st = iio_priv(indio_dev); @@ -345,8 +345,6 @@ static int ad5504_remove(struct spi_device *spi) if (!IS_ERR(st->reg)) regulator_disable(st->reg); - - return 0; } static const struct spi_device_id ad5504_id[] = { diff --git a/drivers/iio/dac/ad5592r.c b/drivers/iio/dac/ad5592r.c index 6bfd7951e18c..0f7abfa75bec 100644 --- a/drivers/iio/dac/ad5592r.c +++ b/drivers/iio/dac/ad5592r.c @@ -130,11 +130,9 @@ static int ad5592r_spi_probe(struct spi_device *spi) return ad5592r_probe(&spi->dev, id->name, &ad5592r_rw_ops); } -static int ad5592r_spi_remove(struct spi_device *spi) +static void ad5592r_spi_remove(struct spi_device *spi) { ad5592r_remove(&spi->dev); - - return 0; } static const struct spi_device_id ad5592r_spi_ids[] = { diff --git a/drivers/iio/dac/ad5624r_spi.c b/drivers/iio/dac/ad5624r_spi.c index 3c98941b9f99..371e812850eb 100644 --- a/drivers/iio/dac/ad5624r_spi.c +++ b/drivers/iio/dac/ad5624r_spi.c @@ -293,7 +293,7 @@ error_disable_reg: return ret; } -static int ad5624r_remove(struct spi_device *spi) +static void ad5624r_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5624r_state *st = iio_priv(indio_dev); @@ -301,8 +301,6 @@ static int ad5624r_remove(struct spi_device *spi) iio_device_unregister(indio_dev); if (!IS_ERR(st->reg)) regulator_disable(st->reg); - - return 0; } static const struct spi_device_id ad5624r_id[] = { diff --git a/drivers/iio/dac/ad5686-spi.c b/drivers/iio/dac/ad5686-spi.c index 2628810fdbb1..d26fb29b6b04 100644 --- a/drivers/iio/dac/ad5686-spi.c +++ b/drivers/iio/dac/ad5686-spi.c @@ -95,11 +95,9 @@ static int ad5686_spi_probe(struct spi_device *spi) ad5686_spi_write, ad5686_spi_read); } -static int ad5686_spi_remove(struct spi_device *spi) +static void ad5686_spi_remove(struct spi_device *spi) { ad5686_remove(&spi->dev); - - return 0; } static const struct spi_device_id ad5686_spi_id[] = { diff --git a/drivers/iio/dac/ad5761.c b/drivers/iio/dac/ad5761.c index e37e095e94fc..4cb8471db81e 100644 --- a/drivers/iio/dac/ad5761.c +++ b/drivers/iio/dac/ad5761.c @@ -394,7 +394,7 @@ disable_regulator_err: return ret; } -static int ad5761_remove(struct spi_device *spi) +static void ad5761_remove(struct spi_device *spi) { struct iio_dev *iio_dev = spi_get_drvdata(spi); struct ad5761_state *st = iio_priv(iio_dev); @@ -403,8 +403,6 @@ static int ad5761_remove(struct spi_device *spi) if (!IS_ERR_OR_NULL(st->vref_reg)) regulator_disable(st->vref_reg); - - return 0; } static const struct spi_device_id ad5761_id[] = { diff --git a/drivers/iio/dac/ad5764.c b/drivers/iio/dac/ad5764.c index ae089b9145cb..d235a8047ba0 100644 --- a/drivers/iio/dac/ad5764.c +++ b/drivers/iio/dac/ad5764.c @@ -332,7 +332,7 @@ error_disable_reg: return ret; } -static int ad5764_remove(struct spi_device *spi) +static void ad5764_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5764_state *st = iio_priv(indio_dev); @@ -341,8 +341,6 @@ static int ad5764_remove(struct spi_device *spi) if (st->chip_info->int_vref == 0) regulator_bulk_disable(ARRAY_SIZE(st->vref_reg), st->vref_reg); - - return 0; } static const struct spi_device_id ad5764_ids[] = { diff --git a/drivers/iio/dac/ad5791.c b/drivers/iio/dac/ad5791.c index 7b4579d73d18..2b14914b4050 100644 --- a/drivers/iio/dac/ad5791.c +++ b/drivers/iio/dac/ad5791.c @@ -428,7 +428,7 @@ error_disable_reg_pos: return ret; } -static int ad5791_remove(struct spi_device *spi) +static void ad5791_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5791_state *st = iio_priv(indio_dev); @@ -439,8 +439,6 @@ static int ad5791_remove(struct spi_device *spi) if (!IS_ERR(st->reg_vss)) regulator_disable(st->reg_vss); - - return 0; } static const struct spi_device_id ad5791_id[] = { diff --git a/drivers/iio/dac/ad8801.c b/drivers/iio/dac/ad8801.c index 5ecfdad54dec..6be35c92d435 100644 --- a/drivers/iio/dac/ad8801.c +++ b/drivers/iio/dac/ad8801.c @@ -193,7 +193,7 @@ error_disable_vrefh_reg: return ret; } -static int ad8801_remove(struct spi_device *spi) +static void ad8801_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad8801_state *state = iio_priv(indio_dev); @@ -202,8 +202,6 @@ static int ad8801_remove(struct spi_device *spi) if (state->vrefl_reg) regulator_disable(state->vrefl_reg); regulator_disable(state->vrefh_reg); - - return 0; } static const struct spi_device_id ad8801_ids[] = { diff --git a/drivers/iio/dac/ltc1660.c b/drivers/iio/dac/ltc1660.c index f6ec9bf5815e..c76233c9bb72 100644 --- a/drivers/iio/dac/ltc1660.c +++ b/drivers/iio/dac/ltc1660.c @@ -206,15 +206,13 @@ error_disable_reg: return ret; } -static int ltc1660_remove(struct spi_device *spi) +static void ltc1660_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ltc1660_priv *priv = iio_priv(indio_dev); iio_device_unregister(indio_dev); regulator_disable(priv->vref_reg); - - return 0; } static const struct of_device_id ltc1660_dt_ids[] = { diff --git a/drivers/iio/dac/ltc2632.c b/drivers/iio/dac/ltc2632.c index 53e4b887d372..aed46c80757e 100644 --- a/drivers/iio/dac/ltc2632.c +++ b/drivers/iio/dac/ltc2632.c @@ -372,7 +372,7 @@ static int ltc2632_probe(struct spi_device *spi) return iio_device_register(indio_dev); } -static int ltc2632_remove(struct spi_device *spi) +static void ltc2632_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ltc2632_state *st = iio_priv(indio_dev); @@ -381,8 +381,6 @@ static int ltc2632_remove(struct spi_device *spi) if (st->vref_reg) regulator_disable(st->vref_reg); - - return 0; } static const struct spi_device_id ltc2632_id[] = { diff --git a/drivers/iio/dac/mcp4922.c b/drivers/iio/dac/mcp4922.c index 0ae414ee1716..cb9e60e71b91 100644 --- a/drivers/iio/dac/mcp4922.c +++ b/drivers/iio/dac/mcp4922.c @@ -172,7 +172,7 @@ error_disable_reg: return ret; } -static int mcp4922_remove(struct spi_device *spi) +static void mcp4922_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct mcp4922_state *state; @@ -180,8 +180,6 @@ static int mcp4922_remove(struct spi_device *spi) iio_device_unregister(indio_dev); state = iio_priv(indio_dev); regulator_disable(state->vref_reg); - - return 0; } static const struct spi_device_id mcp4922_id[] = { diff --git a/drivers/iio/dac/ti-dac082s085.c b/drivers/iio/dac/ti-dac082s085.c index 6beda2193683..4e1156e6deb2 100644 --- a/drivers/iio/dac/ti-dac082s085.c +++ b/drivers/iio/dac/ti-dac082s085.c @@ -313,7 +313,7 @@ err: return ret; } -static int ti_dac_remove(struct spi_device *spi) +static void ti_dac_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ti_dac_chip *ti_dac = iio_priv(indio_dev); @@ -321,8 +321,6 @@ static int ti_dac_remove(struct spi_device *spi) iio_device_unregister(indio_dev); mutex_destroy(&ti_dac->lock); regulator_disable(ti_dac->vref); - - return 0; } static const struct of_device_id ti_dac_of_id[] = { diff --git a/drivers/iio/dac/ti-dac7311.c b/drivers/iio/dac/ti-dac7311.c index 99f275829ec2..e10d17e60ed3 100644 --- a/drivers/iio/dac/ti-dac7311.c +++ b/drivers/iio/dac/ti-dac7311.c @@ -292,7 +292,7 @@ err: return ret; } -static int ti_dac_remove(struct spi_device *spi) +static void ti_dac_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ti_dac_chip *ti_dac = iio_priv(indio_dev); @@ -300,7 +300,6 @@ static int ti_dac_remove(struct spi_device *spi) iio_device_unregister(indio_dev); mutex_destroy(&ti_dac->lock); regulator_disable(ti_dac->vref); - return 0; } static const struct of_device_id ti_dac_of_id[] = { diff --git a/drivers/iio/frequency/adf4350.c b/drivers/iio/frequency/adf4350.c index 3d9eba716b69..f3521330f6fb 100644 --- a/drivers/iio/frequency/adf4350.c +++ b/drivers/iio/frequency/adf4350.c @@ -589,7 +589,7 @@ error_disable_clk: return ret; } -static int adf4350_remove(struct spi_device *spi) +static void adf4350_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct adf4350_state *st = iio_priv(indio_dev); @@ -604,8 +604,6 @@ static int adf4350_remove(struct spi_device *spi) if (!IS_ERR(reg)) regulator_disable(reg); - - return 0; } static const struct of_device_id adf4350_of_match[] = { diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c index 6cdeb50143af..3f3c478e9baa 100644 --- a/drivers/iio/frequency/admv1013.c +++ b/drivers/iio/frequency/admv1013.c @@ -348,7 +348,7 @@ static int admv1013_update_mixer_vgate(struct admv1013_state *st) vcm = regulator_get_voltage(st->reg); - if (vcm >= 0 && vcm < 1800000) + if (vcm < 1800000) mixer_vgate = (2389 * vcm / 1000000 + 8100) / 100; else if (vcm > 1800000 && vcm < 2600000) mixer_vgate = (2375 * vcm / 1000000 + 125) / 100; diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c index 17b939a367ad..81a6d09788bd 100644 --- a/drivers/iio/gyro/bmg160_core.c +++ b/drivers/iio/gyro/bmg160_core.c @@ -1188,11 +1188,14 @@ int bmg160_core_probe(struct device *dev, struct regmap *regmap, int irq, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(dev, "unable to register iio device\n"); - goto err_buffer_cleanup; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); err_buffer_cleanup: iio_triggered_buffer_cleanup(indio_dev); err_trigger_unregister: diff --git a/drivers/iio/gyro/bmg160_spi.c b/drivers/iio/gyro/bmg160_spi.c index 745962e1e423..fc2e453527b9 100644 --- a/drivers/iio/gyro/bmg160_spi.c +++ b/drivers/iio/gyro/bmg160_spi.c @@ -27,11 +27,9 @@ static int bmg160_spi_probe(struct spi_device *spi) return bmg160_core_probe(&spi->dev, regmap, spi->irq, id->name); } -static int bmg160_spi_remove(struct spi_device *spi) +static void bmg160_spi_remove(struct spi_device *spi) { bmg160_core_remove(&spi->dev); - - return 0; } static const struct spi_device_id bmg160_spi_id[] = { diff --git a/drivers/iio/gyro/fxas21002c_spi.c b/drivers/iio/gyro/fxas21002c_spi.c index 77ceebef4e34..c3ac169facf9 100644 --- a/drivers/iio/gyro/fxas21002c_spi.c +++ b/drivers/iio/gyro/fxas21002c_spi.c @@ -34,11 +34,9 @@ static int fxas21002c_spi_probe(struct spi_device *spi) return fxas21002c_core_probe(&spi->dev, regmap, spi->irq, id->name); } -static int fxas21002c_spi_remove(struct spi_device *spi) +static void fxas21002c_spi_remove(struct spi_device *spi) { fxas21002c_core_remove(&spi->dev); - - return 0; } static const struct spi_device_id fxas21002c_spi_id[] = { diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c index 273f16dcaff8..856ec901b091 100644 --- a/drivers/iio/health/afe4403.c +++ b/drivers/iio/health/afe4403.c @@ -570,7 +570,7 @@ err_disable_reg: return ret; } -static int afe4403_remove(struct spi_device *spi) +static void afe4403_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct afe4403_data *afe = iio_priv(indio_dev); @@ -586,8 +586,6 @@ static int afe4403_remove(struct spi_device *spi) ret = regulator_disable(afe->regulator); if (ret) dev_warn(afe->dev, "Unable to disable regulator\n"); - - return 0; } static const struct spi_device_id afe4403_ids[] = { diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c index ed129321a14d..f9b4540db1f4 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -1403,6 +1403,7 @@ static int adis16480_probe(struct spi_device *spi) { const struct spi_device_id *id = spi_get_device_id(spi); const struct adis_data *adis16480_data; + irq_handler_t trigger_handler = NULL; struct iio_dev *indio_dev; struct adis16480 *st; int ret; @@ -1474,8 +1475,12 @@ static int adis16480_probe(struct spi_device *spi) st->clk_freq = st->chip_info->int_clk; } + /* Only use our trigger handler if burst mode is supported */ + if (adis16480_data->burst_len) + trigger_handler = adis16480_trigger_handler; + ret = devm_adis_setup_buffer_and_trigger(&st->adis, indio_dev, - adis16480_trigger_handler); + trigger_handler); if (ret) return ret; diff --git a/drivers/iio/imu/kmx61.c b/drivers/iio/imu/kmx61.c index 1dabfd615dab..f89724481df9 100644 --- a/drivers/iio/imu/kmx61.c +++ b/drivers/iio/imu/kmx61.c @@ -1385,7 +1385,7 @@ static int kmx61_probe(struct i2c_client *client, ret = iio_device_register(data->acc_indio_dev); if (ret < 0) { dev_err(&client->dev, "Failed to register acc iio device\n"); - goto err_buffer_cleanup_mag; + goto err_pm_cleanup; } ret = iio_device_register(data->mag_indio_dev); @@ -1398,6 +1398,9 @@ static int kmx61_probe(struct i2c_client *client, err_iio_unregister_acc: iio_device_unregister(data->acc_indio_dev); +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); err_buffer_cleanup_mag: if (client->irq > 0) iio_triggered_buffer_cleanup(data->mag_indio_dev); diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 727b4b6ac696..93f0c6bce502 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -1374,8 +1374,12 @@ static int st_lsm6dsx_read_oneshot(struct st_lsm6dsx_sensor *sensor, if (err < 0) return err; + /* + * we need to wait for sensor settling time before + * reading data in order to avoid corrupted samples + */ delay = 1000000000 / sensor->odr; - usleep_range(delay, 2 * delay); + usleep_range(3 * delay, 4 * delay); err = st_lsm6dsx_read_locked(hw, addr, &data, sizeof(data)); if (err < 0) diff --git a/drivers/iio/magnetometer/bmc150_magn.c b/drivers/iio/magnetometer/bmc150_magn.c index f96f53175349..3d4d21f979fa 100644 --- a/drivers/iio/magnetometer/bmc150_magn.c +++ b/drivers/iio/magnetometer/bmc150_magn.c @@ -962,13 +962,14 @@ int bmc150_magn_probe(struct device *dev, struct regmap *regmap, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(dev, "unable to register iio device\n"); - goto err_disable_runtime_pm; + goto err_pm_cleanup; } dev_dbg(dev, "Registered device %s\n", name); return 0; -err_disable_runtime_pm: +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); err_buffer_cleanup: iio_triggered_buffer_cleanup(indio_dev); diff --git a/drivers/iio/magnetometer/bmc150_magn_spi.c b/drivers/iio/magnetometer/bmc150_magn_spi.c index c6ed3ea8460a..4c570412d65c 100644 --- a/drivers/iio/magnetometer/bmc150_magn_spi.c +++ b/drivers/iio/magnetometer/bmc150_magn_spi.c @@ -29,11 +29,9 @@ static int bmc150_magn_spi_probe(struct spi_device *spi) return bmc150_magn_probe(&spi->dev, regmap, spi->irq, id->name); } -static int bmc150_magn_spi_remove(struct spi_device *spi) +static void bmc150_magn_spi_remove(struct spi_device *spi) { bmc150_magn_remove(&spi->dev); - - return 0; } static const struct spi_device_id bmc150_magn_spi_id[] = { diff --git a/drivers/iio/magnetometer/hmc5843_spi.c b/drivers/iio/magnetometer/hmc5843_spi.c index 89cf59a62c28..a99dd9b33e95 100644 --- a/drivers/iio/magnetometer/hmc5843_spi.c +++ b/drivers/iio/magnetometer/hmc5843_spi.c @@ -74,11 +74,9 @@ static int hmc5843_spi_probe(struct spi_device *spi) id->driver_data, id->name); } -static int hmc5843_spi_remove(struct spi_device *spi) +static void hmc5843_spi_remove(struct spi_device *spi) { hmc5843_common_remove(&spi->dev); - - return 0; } static const struct spi_device_id hmc5843_id[] = { diff --git a/drivers/iio/potentiometer/max5487.c b/drivers/iio/potentiometer/max5487.c index 007c2bd324cb..42723c996c9f 100644 --- a/drivers/iio/potentiometer/max5487.c +++ b/drivers/iio/potentiometer/max5487.c @@ -112,7 +112,7 @@ static int max5487_spi_probe(struct spi_device *spi) return iio_device_register(indio_dev); } -static int max5487_spi_remove(struct spi_device *spi) +static void max5487_spi_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); int ret; @@ -123,8 +123,6 @@ static int max5487_spi_remove(struct spi_device *spi) ret = max5487_write_cmd(spi, MAX5487_COPY_AB_TO_NV); if (ret) dev_warn(&spi->dev, "Failed to save wiper regs to NV regs\n"); - - return 0; } static const struct spi_device_id max5487_id[] = { diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c index 9fa2dcd71760..7ccd960ced5d 100644 --- a/drivers/iio/pressure/ms5611_spi.c +++ b/drivers/iio/pressure/ms5611_spi.c @@ -107,11 +107,9 @@ static int ms5611_spi_probe(struct spi_device *spi) spi_get_device_id(spi)->driver_data); } -static int ms5611_spi_remove(struct spi_device *spi) +static void ms5611_spi_remove(struct spi_device *spi) { ms5611_remove(spi_get_drvdata(spi)); - - return 0; } static const struct of_device_id ms5611_spi_matches[] = { diff --git a/drivers/iio/pressure/zpa2326_spi.c b/drivers/iio/pressure/zpa2326_spi.c index 85201a4bae44..ee8ed77536ca 100644 --- a/drivers/iio/pressure/zpa2326_spi.c +++ b/drivers/iio/pressure/zpa2326_spi.c @@ -57,11 +57,9 @@ static int zpa2326_probe_spi(struct spi_device *spi) spi->irq, ZPA2326_DEVICE_ID, regmap); } -static int zpa2326_remove_spi(struct spi_device *spi) +static void zpa2326_remove_spi(struct spi_device *spi) { zpa2326_remove(&spi->dev); - - return 0; } static const struct spi_device_id zpa2326_spi_ids[] = { diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c447526288f4..50c53409ceb6 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3370,22 +3370,30 @@ err: static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, const struct sockaddr *dst_addr) { - if (!src_addr || !src_addr->sa_family) { - src_addr = (struct sockaddr *) &id->route.addr.src_addr; - src_addr->sa_family = dst_addr->sa_family; - if (IS_ENABLED(CONFIG_IPV6) && - dst_addr->sa_family == AF_INET6) { - struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; - struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; - src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; - if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; - } else if (dst_addr->sa_family == AF_IB) { - ((struct sockaddr_ib *) src_addr)->sib_pkey = - ((struct sockaddr_ib *) dst_addr)->sib_pkey; - } - } - return rdma_bind_addr(id, src_addr); + struct sockaddr_storage zero_sock = {}; + + if (src_addr && src_addr->sa_family) + return rdma_bind_addr(id, src_addr); + + /* + * When the src_addr is not specified, automatically supply an any addr + */ + zero_sock.ss_family = dst_addr->sa_family; + if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *src_addr6 = + (struct sockaddr_in6 *)&zero_sock; + struct sockaddr_in6 *dst_addr6 = + (struct sockaddr_in6 *)dst_addr; + + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + id->route.addr.dev_addr.bound_dev_if = + dst_addr6->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *)&zero_sock)->sib_pkey = + ((struct sockaddr_ib *)dst_addr)->sib_pkey; + } + return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); } /* diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 08b7f6bc56c3..fc036b4794fd 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1055,7 +1055,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( int cmd_out_len = uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT); void *cmd_out; - int err; + int err, err2; int uid; c = devx_ufile2uctx(attrs); @@ -1076,14 +1076,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( return PTR_ERR(cmd_out); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); - err = mlx5_cmd_exec(dev->mdev, cmd_in, - uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), - cmd_out, cmd_out_len); - if (err) + err = mlx5_cmd_do(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), + cmd_out, cmd_out_len); + if (err && err != -EREMOTEIO) return err; - return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, + err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, cmd_out_len); + + return err2 ?: err; } static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, @@ -1457,7 +1459,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; struct devx_obj *obj; u16 obj_type = 0; - int err; + int err, err2 = 0; int uid; u32 obj_id; u16 opcode; @@ -1497,15 +1499,18 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( !is_apu_cq(dev, cmd_in)) { obj->flags |= DEVX_OBJ_FLAGS_CQ; obj->core_cq.comp = devx_cq_comp; - err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, - cmd_in, cmd_in_len, cmd_out, - cmd_out_len); + err = mlx5_create_cq(dev->mdev, &obj->core_cq, + cmd_in, cmd_in_len, cmd_out, + cmd_out_len); } else { - err = mlx5_cmd_exec(dev->mdev, cmd_in, - cmd_in_len, - cmd_out, cmd_out_len); + err = mlx5_cmd_do(dev->mdev, cmd_in, cmd_in_len, + cmd_out, cmd_out_len); } + if (err == -EREMOTEIO) + err2 = uverbs_copy_to(attrs, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, + cmd_out, cmd_out_len); if (err) goto obj_free; @@ -1548,7 +1553,7 @@ obj_destroy: sizeof(out)); obj_free: kfree(obj); - return err; + return err2 ?: err; } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( @@ -1563,7 +1568,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device); void *cmd_out; - int err; + int err, err2; int uid; if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id)) @@ -1586,14 +1591,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); devx_set_umem_valid(cmd_in); - err = mlx5_cmd_exec(mdev->mdev, cmd_in, - uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), - cmd_out, cmd_out_len); - if (err) + err = mlx5_cmd_do(mdev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), + cmd_out, cmd_out_len); + if (err && err != -EREMOTEIO) return err; - return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, cmd_out, cmd_out_len); + + return err2 ?: err; } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( @@ -1607,7 +1614,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); void *cmd_out; - int err; + int err, err2; int uid; struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device); @@ -1629,14 +1636,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( return PTR_ERR(cmd_out); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); - err = mlx5_cmd_exec(mdev->mdev, cmd_in, - uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), - cmd_out, cmd_out_len); - if (err) + err = mlx5_cmd_do(mdev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), + cmd_out, cmd_out_len); + if (err && err != -EREMOTEIO) return err; - return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, cmd_out, cmd_out_len); + + return err2 ?: err; } struct devx_async_event_queue { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 157d862fb864..06e4b8cea6bd 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -140,6 +140,19 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); } +static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out) +{ + if (status == -ENXIO) /* core driver is not available */ + return; + + mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); + if (status != -EREMOTEIO) /* driver specific failure */ + return; + + /* Failed in FW, print cmd out failure details */ + mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out); +} + static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = @@ -149,7 +162,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) unsigned long flags; if (status) { - mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); + create_mkey_warn(dev, status, mr->out); kfree(mr); spin_lock_irqsave(&ent->lock, flags); ent->pending--; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 29475cf8c7c3..b7fe47107d76 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4465,6 +4465,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in, MLX5_ST_SZ_BYTES(create_dct_in), out, sizeof(out)); + err = mlx5_cmd_check(dev->mdev, err, qp->dct.in, out); if (err) return err; resp.dctn = qp->dct.mdct.mqp.qpn; diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index 8844eacf2380..542e4c63a8de 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -220,7 +220,7 @@ int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, init_completion(&dct->drained); MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT); - err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen); + err = mlx5_cmd_do(dev->mdev, in, inlen, out, outlen); if (err) return err; diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 0a3b28142c05..41c272980f91 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -541,7 +541,7 @@ static struct attribute *port_diagc_attributes[] = { }; static const struct attribute_group port_diagc_group = { - .name = "linkcontrol", + .name = "diag_counters", .attrs = port_diagc_attributes, }; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 7c3f98e57889..759b85f03331 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2682,6 +2682,8 @@ static void rtrs_clt_dev_release(struct device *dev) struct rtrs_clt_sess *clt = container_of(dev, struct rtrs_clt_sess, dev); + mutex_destroy(&clt->paths_ev_mutex); + mutex_destroy(&clt->paths_mutex); kfree(clt); } @@ -2711,6 +2713,8 @@ static struct rtrs_clt_sess *alloc_clt(const char *sessname, size_t paths_num, return ERR_PTR(-ENOMEM); } + clt->dev.class = rtrs_clt_dev_class; + clt->dev.release = rtrs_clt_dev_release; uuid_gen(&clt->paths_uuid); INIT_LIST_HEAD_RCU(&clt->paths_list); clt->paths_num = paths_num; @@ -2727,53 +2731,51 @@ static struct rtrs_clt_sess *alloc_clt(const char *sessname, size_t paths_num, init_waitqueue_head(&clt->permits_wait); mutex_init(&clt->paths_ev_mutex); mutex_init(&clt->paths_mutex); + device_initialize(&clt->dev); - clt->dev.class = rtrs_clt_dev_class; - clt->dev.release = rtrs_clt_dev_release; err = dev_set_name(&clt->dev, "%s", sessname); if (err) - goto err; + goto err_put; + /* * Suppress user space notification until * sysfs files are created */ dev_set_uevent_suppress(&clt->dev, true); - err = device_register(&clt->dev); - if (err) { - put_device(&clt->dev); - goto err; - } + err = device_add(&clt->dev); + if (err) + goto err_put; clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj); if (!clt->kobj_paths) { err = -ENOMEM; - goto err_dev; + goto err_del; } err = rtrs_clt_create_sysfs_root_files(clt); if (err) { kobject_del(clt->kobj_paths); kobject_put(clt->kobj_paths); - goto err_dev; + goto err_del; } dev_set_uevent_suppress(&clt->dev, false); kobject_uevent(&clt->dev.kobj, KOBJ_ADD); return clt; -err_dev: - device_unregister(&clt->dev); -err: +err_del: + device_del(&clt->dev); +err_put: free_percpu(clt->pcpu_path); - kfree(clt); + put_device(&clt->dev); return ERR_PTR(err); } static void free_clt(struct rtrs_clt_sess *clt) { - free_permits(clt); free_percpu(clt->pcpu_path); - mutex_destroy(&clt->paths_ev_mutex); - mutex_destroy(&clt->paths_mutex); - /* release callback will free clt in last put */ + + /* + * release callback will free clt and destroy mutexes in last put + */ device_unregister(&clt->dev); } @@ -2890,6 +2892,7 @@ void rtrs_clt_close(struct rtrs_clt_sess *clt) rtrs_clt_destroy_path_files(clt_path, NULL); kobject_put(&clt_path->kobj); } + free_permits(clt); free_clt(clt); } EXPORT_SYMBOL(rtrs_clt_close); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index e174e853f8a4..285b766e4e70 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -4047,9 +4047,11 @@ static void srp_remove_one(struct ib_device *device, void *client_data) spin_unlock(&host->target_lock); /* - * Wait for tl_err and target port removal tasks. + * srp_queue_remove_work() queues a call to + * srp_remove_target(). The latter function cancels + * target->tl_err_work so waiting for the remove works to + * finish is sufficient. */ - flush_workqueue(system_long_wq); flush_workqueue(srp_remove_wq); kfree(host); diff --git a/drivers/input/input.c b/drivers/input/input.c index ccaeb2426385..c3139bc2aa0d 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -2285,6 +2285,12 @@ int input_register_device(struct input_dev *dev) /* KEY_RESERVED is not supposed to be transmitted to userspace. */ __clear_bit(KEY_RESERVED, dev->keybit); + /* Buttonpads should not map BTN_RIGHT and/or BTN_MIDDLE. */ + if (test_bit(INPUT_PROP_BUTTONPAD, dev->propbit)) { + __clear_bit(BTN_RIGHT, dev->keybit); + __clear_bit(BTN_MIDDLE, dev->keybit); + } + /* Make sure that bitmasks not mentioned in dev->evbit are clean. */ input_cleanse_bitmasks(dev); diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c index eda1b23002b5..d1f5354d5ea2 100644 --- a/drivers/input/keyboard/applespi.c +++ b/drivers/input/keyboard/applespi.c @@ -1858,7 +1858,7 @@ static void applespi_drain_reads(struct applespi_data *applespi) spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags); } -static int applespi_remove(struct spi_device *spi) +static void applespi_remove(struct spi_device *spi) { struct applespi_data *applespi = spi_get_drvdata(spi); @@ -1871,8 +1871,6 @@ static int applespi_remove(struct spi_device *spi) applespi_drain_reads(applespi); debugfs_remove_recursive(applespi->debugfs_root); - - return 0; } static void applespi_shutdown(struct spi_device *spi) diff --git a/drivers/input/misc/adxl34x-spi.c b/drivers/input/misc/adxl34x-spi.c index 6e51c9bc619f..91e44d4c66f7 100644 --- a/drivers/input/misc/adxl34x-spi.c +++ b/drivers/input/misc/adxl34x-spi.c @@ -87,13 +87,11 @@ static int adxl34x_spi_probe(struct spi_device *spi) return 0; } -static int adxl34x_spi_remove(struct spi_device *spi) +static void adxl34x_spi_remove(struct spi_device *spi) { struct adxl34x *ac = spi_get_drvdata(spi); adxl34x_remove(ac); - - return 0; } static int __maybe_unused adxl34x_spi_suspend(struct device *dev) diff --git a/drivers/input/mouse/psmouse-smbus.c b/drivers/input/mouse/psmouse-smbus.c index a472489ccbad..164f6c757f6b 100644 --- a/drivers/input/mouse/psmouse-smbus.c +++ b/drivers/input/mouse/psmouse-smbus.c @@ -75,6 +75,8 @@ static void psmouse_smbus_detach_i2c_client(struct i2c_client *client) "Marking SMBus companion %s as gone\n", dev_name(&smbdev->client->dev)); smbdev->dead = true; + device_link_remove(&smbdev->client->dev, + &smbdev->psmouse->ps2dev.serio->dev); serio_rescan(smbdev->psmouse->ps2dev.serio); } else { list_del(&smbdev->node); @@ -174,6 +176,8 @@ static void psmouse_smbus_disconnect(struct psmouse *psmouse) kfree(smbdev); } else { smbdev->dead = true; + device_link_remove(&smbdev->client->dev, + &psmouse->ps2dev.serio->dev); psmouse_dbg(smbdev->psmouse, "posting removal request for SMBus companion %s\n", dev_name(&smbdev->client->dev)); @@ -270,6 +274,12 @@ int psmouse_smbus_init(struct psmouse *psmouse, if (smbdev->client) { /* We have our companion device */ + if (!device_link_add(&smbdev->client->dev, + &psmouse->ps2dev.serio->dev, + DL_FLAG_STATELESS)) + psmouse_warn(psmouse, + "failed to set up link with iSMBus companion %s\n", + dev_name(&smbdev->client->dev)); return 0; } diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index a25a77dd9a32..bed68a68f330 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1411,13 +1411,11 @@ static int ads7846_probe(struct spi_device *spi) return 0; } -static int ads7846_remove(struct spi_device *spi) +static void ads7846_remove(struct spi_device *spi) { struct ads7846 *ts = spi_get_drvdata(spi); ads7846_stop(ts); - - return 0; } static struct spi_driver ads7846_driver = { diff --git a/drivers/input/touchscreen/cyttsp4_spi.c b/drivers/input/touchscreen/cyttsp4_spi.c index 2aec41eb76b7..5d7db84f2749 100644 --- a/drivers/input/touchscreen/cyttsp4_spi.c +++ b/drivers/input/touchscreen/cyttsp4_spi.c @@ -164,12 +164,10 @@ static int cyttsp4_spi_probe(struct spi_device *spi) return PTR_ERR_OR_ZERO(ts); } -static int cyttsp4_spi_remove(struct spi_device *spi) +static void cyttsp4_spi_remove(struct spi_device *spi) { struct cyttsp4 *ts = spi_get_drvdata(spi); cyttsp4_remove(ts); - - return 0; } static struct spi_driver cyttsp4_spi_driver = { diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c index a2f55920b9b2..555dfe98b3c4 100644 --- a/drivers/input/touchscreen/tsc2005.c +++ b/drivers/input/touchscreen/tsc2005.c @@ -64,11 +64,9 @@ static int tsc2005_probe(struct spi_device *spi) tsc2005_cmd); } -static int tsc2005_remove(struct spi_device *spi) +static void tsc2005_remove(struct spi_device *spi) { tsc200x_remove(&spi->dev); - - return 0; } #ifdef CONFIG_OF diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c index 7c82c4f5fa6b..129ebc810de8 100644 --- a/drivers/input/touchscreen/zinitix.c +++ b/drivers/input/touchscreen/zinitix.c @@ -571,8 +571,20 @@ static SIMPLE_DEV_PM_OPS(zinitix_pm_ops, zinitix_suspend, zinitix_resume); #ifdef CONFIG_OF static const struct of_device_id zinitix_of_match[] = { + { .compatible = "zinitix,bt402" }, + { .compatible = "zinitix,bt403" }, + { .compatible = "zinitix,bt404" }, + { .compatible = "zinitix,bt412" }, + { .compatible = "zinitix,bt413" }, + { .compatible = "zinitix,bt431" }, + { .compatible = "zinitix,bt432" }, + { .compatible = "zinitix,bt531" }, { .compatible = "zinitix,bt532" }, + { .compatible = "zinitix,bt538" }, { .compatible = "zinitix,bt541" }, + { .compatible = "zinitix,bt548" }, + { .compatible = "zinitix,bt554" }, + { .compatible = "zinitix,at100" }, { } }; MODULE_DEVICE_TABLE(of, zinitix_of_match); diff --git a/drivers/leds/leds-cr0014114.c b/drivers/leds/leds-cr0014114.c index d03cfd3c0bfb..c87686bd7c18 100644 --- a/drivers/leds/leds-cr0014114.c +++ b/drivers/leds/leds-cr0014114.c @@ -266,14 +266,12 @@ static int cr0014114_probe(struct spi_device *spi) return 0; } -static int cr0014114_remove(struct spi_device *spi) +static void cr0014114_remove(struct spi_device *spi) { struct cr0014114 *priv = spi_get_drvdata(spi); cancel_delayed_work_sync(&priv->work); mutex_destroy(&priv->lock); - - return 0; } static const struct of_device_id cr0014114_dt_ids[] = { diff --git a/drivers/leds/leds-dac124s085.c b/drivers/leds/leds-dac124s085.c index 20dc9b9d7dea..cf5fb1195f87 100644 --- a/drivers/leds/leds-dac124s085.c +++ b/drivers/leds/leds-dac124s085.c @@ -85,15 +85,13 @@ eledcr: return ret; } -static int dac124s085_remove(struct spi_device *spi) +static void dac124s085_remove(struct spi_device *spi) { struct dac124s085 *dac = spi_get_drvdata(spi); int i; for (i = 0; i < ARRAY_SIZE(dac->leds); i++) led_classdev_unregister(&dac->leds[i].ldev); - - return 0; } static struct spi_driver dac124s085_driver = { diff --git a/drivers/leds/leds-el15203000.c b/drivers/leds/leds-el15203000.c index f9eb59a25570..7e7b617bcd56 100644 --- a/drivers/leds/leds-el15203000.c +++ b/drivers/leds/leds-el15203000.c @@ -315,13 +315,11 @@ static int el15203000_probe(struct spi_device *spi) return el15203000_probe_dt(priv); } -static int el15203000_remove(struct spi_device *spi) +static void el15203000_remove(struct spi_device *spi) { struct el15203000 *priv = spi_get_drvdata(spi); mutex_destroy(&priv->lock); - - return 0; } static const struct of_device_id el15203000_dt_ids[] = { diff --git a/drivers/leds/leds-spi-byte.c b/drivers/leds/leds-spi-byte.c index f1964c96fb15..2bc5c99daf51 100644 --- a/drivers/leds/leds-spi-byte.c +++ b/drivers/leds/leds-spi-byte.c @@ -130,13 +130,11 @@ static int spi_byte_probe(struct spi_device *spi) return 0; } -static int spi_byte_remove(struct spi_device *spi) +static void spi_byte_remove(struct spi_device *spi) { struct spi_byte_led *led = spi_get_drvdata(spi); mutex_destroy(&led->mutex); - - return 0; } static struct spi_driver spi_byte_driver = { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index dcbd6d201619..997ace47bbd5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2077,7 +2077,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait) set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); - blk_set_queue_dying(md->queue); + blk_mark_disk_dead(md->disk); /* * Take suspend_lock so that presuspend and postsuspend methods diff --git a/drivers/media/spi/cxd2880-spi.c b/drivers/media/spi/cxd2880-spi.c index 6f2a66bc87fb..6be4e5528879 100644 --- a/drivers/media/spi/cxd2880-spi.c +++ b/drivers/media/spi/cxd2880-spi.c @@ -625,7 +625,7 @@ fail_regulator: return ret; } -static int +static void cxd2880_spi_remove(struct spi_device *spi) { struct cxd2880_dvb_spi *dvb_spi = spi_get_drvdata(spi); @@ -643,8 +643,6 @@ cxd2880_spi_remove(struct spi_device *spi) kfree(dvb_spi); pr_info("cxd2880_spi remove ok.\n"); - - return 0; } static const struct spi_device_id cxd2880_spi_id[] = { diff --git a/drivers/media/spi/gs1662.c b/drivers/media/spi/gs1662.c index f86ef1ca1288..75c21a93e6d0 100644 --- a/drivers/media/spi/gs1662.c +++ b/drivers/media/spi/gs1662.c @@ -458,13 +458,11 @@ static int gs_probe(struct spi_device *spi) return ret; } -static int gs_remove(struct spi_device *spi) +static void gs_remove(struct spi_device *spi) { struct v4l2_subdev *sd = spi_get_drvdata(spi); v4l2_device_unregister_subdev(sd); - - return 0; } static struct spi_driver gs_driver = { diff --git a/drivers/media/tuners/msi001.c b/drivers/media/tuners/msi001.c index 44247049a319..ad6c72c1ed04 100644 --- a/drivers/media/tuners/msi001.c +++ b/drivers/media/tuners/msi001.c @@ -472,7 +472,7 @@ err: return ret; } -static int msi001_remove(struct spi_device *spi) +static void msi001_remove(struct spi_device *spi) { struct v4l2_subdev *sd = spi_get_drvdata(spi); struct msi001_dev *dev = sd_to_msi001_dev(sd); @@ -486,7 +486,6 @@ static int msi001_remove(struct spi_device *spi) v4l2_device_unregister_subdev(&dev->sd); v4l2_ctrl_handler_free(&dev->hdl); kfree(dev); - return 0; } static const struct spi_device_id msi001_id_table[] = { diff --git a/drivers/mfd/arizona-spi.c b/drivers/mfd/arizona-spi.c index 9fe06dda3782..03620c8efe34 100644 --- a/drivers/mfd/arizona-spi.c +++ b/drivers/mfd/arizona-spi.c @@ -206,13 +206,11 @@ static int arizona_spi_probe(struct spi_device *spi) return arizona_dev_init(arizona); } -static int arizona_spi_remove(struct spi_device *spi) +static void arizona_spi_remove(struct spi_device *spi) { struct arizona *arizona = spi_get_drvdata(spi); arizona_dev_exit(arizona); - - return 0; } static const struct spi_device_id arizona_spi_ids[] = { diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c index 5faf3766a5e2..b79a57b45c1e 100644 --- a/drivers/mfd/da9052-spi.c +++ b/drivers/mfd/da9052-spi.c @@ -55,12 +55,11 @@ static int da9052_spi_probe(struct spi_device *spi) return da9052_device_init(da9052, id->driver_data); } -static int da9052_spi_remove(struct spi_device *spi) +static void da9052_spi_remove(struct spi_device *spi) { struct da9052 *da9052 = spi_get_drvdata(spi); da9052_device_exit(da9052); - return 0; } static const struct spi_device_id da9052_spi_id[] = { diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c index 70fa18b04ad2..2280f756f422 100644 --- a/drivers/mfd/ezx-pcap.c +++ b/drivers/mfd/ezx-pcap.c @@ -392,7 +392,7 @@ static int pcap_add_subdev(struct pcap_chip *pcap, return ret; } -static int ezx_pcap_remove(struct spi_device *spi) +static void ezx_pcap_remove(struct spi_device *spi) { struct pcap_chip *pcap = spi_get_drvdata(spi); unsigned long flags; @@ -412,8 +412,6 @@ static int ezx_pcap_remove(struct spi_device *spi) irq_set_chip_and_handler(i, NULL, NULL); destroy_workqueue(pcap->workqueue); - - return 0; } static int ezx_pcap_probe(struct spi_device *spi) diff --git a/drivers/mfd/madera-spi.c b/drivers/mfd/madera-spi.c index e860f5ff0933..da84eb50e53a 100644 --- a/drivers/mfd/madera-spi.c +++ b/drivers/mfd/madera-spi.c @@ -112,13 +112,11 @@ static int madera_spi_probe(struct spi_device *spi) return madera_dev_init(madera); } -static int madera_spi_remove(struct spi_device *spi) +static void madera_spi_remove(struct spi_device *spi) { struct madera *madera = spi_get_drvdata(spi); madera_dev_exit(madera); - - return 0; } static const struct spi_device_id madera_spi_ids[] = { diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c index 4d8913d647e6..f803527e5819 100644 --- a/drivers/mfd/mc13xxx-spi.c +++ b/drivers/mfd/mc13xxx-spi.c @@ -166,10 +166,9 @@ static int mc13xxx_spi_probe(struct spi_device *spi) return mc13xxx_common_init(&spi->dev); } -static int mc13xxx_spi_remove(struct spi_device *spi) +static void mc13xxx_spi_remove(struct spi_device *spi) { mc13xxx_common_exit(&spi->dev); - return 0; } static struct spi_driver mc13xxx_spi_driver = { diff --git a/drivers/mfd/rsmu_spi.c b/drivers/mfd/rsmu_spi.c index fec2b4ec477c..d2f3d8f1e05a 100644 --- a/drivers/mfd/rsmu_spi.c +++ b/drivers/mfd/rsmu_spi.c @@ -220,13 +220,11 @@ static int rsmu_spi_probe(struct spi_device *client) return rsmu_core_init(rsmu); } -static int rsmu_spi_remove(struct spi_device *client) +static void rsmu_spi_remove(struct spi_device *client) { struct rsmu_ddata *rsmu = spi_get_drvdata(client); rsmu_core_exit(rsmu); - - return 0; } static const struct spi_device_id rsmu_spi_id[] = { diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c index 6c5915016be5..ad8055a0e286 100644 --- a/drivers/mfd/stmpe-spi.c +++ b/drivers/mfd/stmpe-spi.c @@ -102,13 +102,11 @@ stmpe_spi_probe(struct spi_device *spi) return stmpe_probe(&spi_ci, id->driver_data); } -static int stmpe_spi_remove(struct spi_device *spi) +static void stmpe_spi_remove(struct spi_device *spi) { struct stmpe *stmpe = spi_get_drvdata(spi); stmpe_remove(stmpe); - - return 0; } static const struct of_device_id stmpe_spi_of_match[] = { diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c index d701926aa46e..bba38fbc781d 100644 --- a/drivers/mfd/tps65912-spi.c +++ b/drivers/mfd/tps65912-spi.c @@ -50,13 +50,11 @@ static int tps65912_spi_probe(struct spi_device *spi) return tps65912_device_init(tps); } -static int tps65912_spi_remove(struct spi_device *spi) +static void tps65912_spi_remove(struct spi_device *spi) { struct tps65912 *tps = spi_get_drvdata(spi); tps65912_device_exit(tps); - - return 0; } static const struct spi_device_id tps65912_spi_id_table[] = { diff --git a/drivers/misc/ad525x_dpot-spi.c b/drivers/misc/ad525x_dpot-spi.c index a9e75d80ad36..263055bda48b 100644 --- a/drivers/misc/ad525x_dpot-spi.c +++ b/drivers/misc/ad525x_dpot-spi.c @@ -90,10 +90,9 @@ static int ad_dpot_spi_probe(struct spi_device *spi) spi_get_device_id(spi)->name); } -static int ad_dpot_spi_remove(struct spi_device *spi) +static void ad_dpot_spi_remove(struct spi_device *spi) { ad_dpot_remove(&spi->dev); - return 0; } static const struct spi_device_id ad_dpot_spi_id[] = { diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c index 1f15399e5cb4..b630625b3024 100644 --- a/drivers/misc/eeprom/eeprom_93xx46.c +++ b/drivers/misc/eeprom/eeprom_93xx46.c @@ -555,14 +555,12 @@ static int eeprom_93xx46_probe(struct spi_device *spi) return 0; } -static int eeprom_93xx46_remove(struct spi_device *spi) +static void eeprom_93xx46_remove(struct spi_device *spi) { struct eeprom_93xx46_dev *edev = spi_get_drvdata(spi); if (!(edev->pdata->flags & EE_READONLY)) device_remove_file(&spi->dev, &dev_attr_erase); - - return 0; } static struct spi_driver eeprom_93xx46_driver = { diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c index 98828030b5a4..bac4df2e5231 100644 --- a/drivers/misc/lattice-ecp3-config.c +++ b/drivers/misc/lattice-ecp3-config.c @@ -211,13 +211,11 @@ static int lattice_ecp3_probe(struct spi_device *spi) return 0; } -static int lattice_ecp3_remove(struct spi_device *spi) +static void lattice_ecp3_remove(struct spi_device *spi) { struct fpga_data *data = spi_get_drvdata(spi); wait_for_completion(&data->fw_loaded); - - return 0; } static const struct spi_device_id lattice_ecp3_id[] = { diff --git a/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/drivers/misc/lis3lv02d/lis3lv02d_spi.c index 9e40dfb60742..203a108b8883 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_spi.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_spi.c @@ -96,15 +96,13 @@ static int lis302dl_spi_probe(struct spi_device *spi) return lis3lv02d_init_device(&lis3_dev); } -static int lis302dl_spi_remove(struct spi_device *spi) +static void lis302dl_spi_remove(struct spi_device *spi) { struct lis3lv02d *lis3 = spi_get_drvdata(spi); lis3lv02d_joystick_disable(lis3); lis3lv02d_poweroff(lis3); lis3lv02d_remove_fs(&lis3_dev); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c index dab7b92db790..50644f83e78c 100644 --- a/drivers/misc/sgi-xp/xpnet.c +++ b/drivers/misc/sgi-xp/xpnet.c @@ -247,7 +247,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg) xpnet_device->stats.rx_packets++; xpnet_device->stats.rx_bytes += skb->len + ETH_HLEN; - netif_rx_ni(skb); + netif_rx(skb); xpc_received(partid, channel, (void *)msg); } diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index a576181e9db0..106dd204b1a7 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1489,7 +1489,7 @@ nomem: } -static int mmc_spi_remove(struct spi_device *spi) +static void mmc_spi_remove(struct spi_device *spi) { struct mmc_host *mmc = dev_get_drvdata(&spi->dev); struct mmc_spi_host *host = mmc_priv(mmc); @@ -1507,7 +1507,6 @@ static int mmc_spi_remove(struct spi_device *spi) spi->max_speed_hz = mmc->f_max; mmc_spi_put_pdata(spi); mmc_free_host(mmc); - return 0; } static const struct spi_device_id mmc_spi_dev_ids[] = { diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c index a8b31bddf14b..008df9d8898d 100644 --- a/drivers/mtd/devices/mchp23k256.c +++ b/drivers/mtd/devices/mchp23k256.c @@ -209,13 +209,11 @@ static int mchp23k256_probe(struct spi_device *spi) return 0; } -static int mchp23k256_remove(struct spi_device *spi) +static void mchp23k256_remove(struct spi_device *spi) { struct mchp23k256_flash *flash = spi_get_drvdata(spi); WARN_ON(mtd_device_unregister(&flash->mtd)); - - return 0; } static const struct of_device_id mchp23k256_of_table[] = { diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c index 231a10790196..a3fd426df74b 100644 --- a/drivers/mtd/devices/mchp48l640.c +++ b/drivers/mtd/devices/mchp48l640.c @@ -341,13 +341,11 @@ static int mchp48l640_probe(struct spi_device *spi) return 0; } -static int mchp48l640_remove(struct spi_device *spi) +static void mchp48l640_remove(struct spi_device *spi) { struct mchp48l640_flash *flash = spi_get_drvdata(spi); WARN_ON(mtd_device_unregister(&flash->mtd)); - - return 0; } static const struct of_device_id mchp48l640_of_table[] = { diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c index 734878abaa23..134e27328597 100644 --- a/drivers/mtd/devices/mtd_dataflash.c +++ b/drivers/mtd/devices/mtd_dataflash.c @@ -916,7 +916,7 @@ static int dataflash_probe(struct spi_device *spi) return status; } -static int dataflash_remove(struct spi_device *spi) +static void dataflash_remove(struct spi_device *spi) { struct dataflash *flash = spi_get_drvdata(spi); @@ -925,8 +925,6 @@ static int dataflash_remove(struct spi_device *spi) WARN_ON(mtd_device_unregister(&flash->mtd)); kfree(flash); - - return 0; } static struct spi_driver dataflash_driver = { diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c index 6ed6c51fac69..d503821a3e60 100644 --- a/drivers/mtd/devices/phram.c +++ b/drivers/mtd/devices/phram.c @@ -264,16 +264,20 @@ static int phram_setup(const char *val) } } - if (erasesize) - div_u64_rem(len, (uint32_t)erasesize, &rem); - if (len == 0 || erasesize == 0 || erasesize > len - || erasesize > UINT_MAX || rem) { + || erasesize > UINT_MAX) { parse_err("illegal erasesize or len\n"); ret = -EINVAL; goto error; } + div_u64_rem(len, (uint32_t)erasesize, &rem); + if (rem) { + parse_err("len is not multiple of erasesize\n"); + ret = -EINVAL; + goto error; + } + ret = register_device(name, start, len, (uint32_t)erasesize); if (ret) goto error; diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c index 7f124c1bfa40..8813994ce9f4 100644 --- a/drivers/mtd/devices/sst25l.c +++ b/drivers/mtd/devices/sst25l.c @@ -398,13 +398,11 @@ static int sst25l_probe(struct spi_device *spi) return 0; } -static int sst25l_remove(struct spi_device *spi) +static void sst25l_remove(struct spi_device *spi) { struct sst25l_flash *flash = spi_get_drvdata(spi); WARN_ON(mtd_device_unregister(&flash->mtd)); - - return 0; } static struct spi_driver sst25l_driver = { diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 70f492dce158..eef87b28d6c8 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -546,6 +546,7 @@ static int mtd_nvmem_add(struct mtd_info *mtd) config.stride = 1; config.read_only = true; config.root_only = true; + config.ignore_wp = true; config.no_of_node = !of_device_is_compatible(node, "nvmem-cells"); config.priv = mtd; @@ -833,6 +834,7 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd, config.owner = THIS_MODULE; config.type = NVMEM_TYPE_OTP; config.root_only = true; + config.ignore_wp = true; config.reg_read = reg_read; config.size = size; config.of_node = np; diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig index 20408b7db540..d986ab4e4c35 100644 --- a/drivers/mtd/nand/raw/Kconfig +++ b/drivers/mtd/nand/raw/Kconfig @@ -42,7 +42,8 @@ config MTD_NAND_OMAP2 tristate "OMAP2, OMAP3, OMAP4 and Keystone NAND controller" depends on ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST depends on HAS_IOMEM - select OMAP_GPMC if ARCH_K3 + select MEMORY + select OMAP_GPMC help Support for NAND flash on Texas Instruments OMAP2, OMAP3, OMAP4 and Keystone platforms. diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c index f75929783b94..aee78f5f4f15 100644 --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c @@ -2106,7 +2106,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip, mtd->oobsize / trans, host->hwcfg.sector_size_1k); - if (!ret) { + if (ret != -EBADMSG) { *err_addr = brcmnand_get_uncorrecc_addr(ctrl); if (*err_addr) diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 1b64c5a5140d..ded4df473928 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -2285,7 +2285,7 @@ static int gpmi_nfc_exec_op(struct nand_chip *chip, this->hw.must_apply_timings = false; ret = gpmi_nfc_apply_timings(this); if (ret) - return ret; + goto out_pm; } dev_dbg(this->dev, "%s: %d instructions\n", __func__, op->ninstrs); @@ -2414,6 +2414,7 @@ unmap: this->bch = false; +out_pm: pm_runtime_mark_last_busy(this->dev); pm_runtime_put_autosuspend(this->dev); diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c index efe0ffe4f1ab..9054559e52dd 100644 --- a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c +++ b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c @@ -68,9 +68,14 @@ static struct ingenic_ecc *ingenic_ecc_get(struct device_node *np) struct ingenic_ecc *ecc; pdev = of_find_device_by_node(np); - if (!pdev || !platform_get_drvdata(pdev)) + if (!pdev) return ERR_PTR(-EPROBE_DEFER); + if (!platform_get_drvdata(pdev)) { + put_device(&pdev->dev); + return ERR_PTR(-EPROBE_DEFER); + } + ecc = platform_get_drvdata(pdev); clk_prepare_enable(ecc->clk); diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 7c6efa3b6255..1a77542c6d67 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2,7 +2,6 @@ /* * Copyright (c) 2016, The Linux Foundation. All rights reserved. */ - #include <linux/clk.h> #include <linux/slab.h> #include <linux/bitops.h> @@ -3073,10 +3072,6 @@ static int qcom_nandc_probe(struct platform_device *pdev) if (dma_mapping_error(dev, nandc->base_dma)) return -ENXIO; - ret = qcom_nandc_alloc(nandc); - if (ret) - goto err_nandc_alloc; - ret = clk_prepare_enable(nandc->core_clk); if (ret) goto err_core_clk; @@ -3085,6 +3080,10 @@ static int qcom_nandc_probe(struct platform_device *pdev) if (ret) goto err_aon_clk; + ret = qcom_nandc_alloc(nandc); + if (ret) + goto err_nandc_alloc; + ret = qcom_nandc_setup(nandc); if (ret) goto err_setup; @@ -3096,15 +3095,14 @@ static int qcom_nandc_probe(struct platform_device *pdev) return 0; err_setup: + qcom_nandc_unalloc(nandc); +err_nandc_alloc: clk_disable_unprepare(nandc->aon_clk); err_aon_clk: clk_disable_unprepare(nandc->core_clk); err_core_clk: - qcom_nandc_unalloc(nandc); -err_nandc_alloc: dma_unmap_resource(dev, res->start, resource_size(res), DMA_BIDIRECTIONAL, 0); - return ret; } diff --git a/drivers/mtd/parsers/qcomsmempart.c b/drivers/mtd/parsers/qcomsmempart.c index 06a818cd2433..4311b89d8df0 100644 --- a/drivers/mtd/parsers/qcomsmempart.c +++ b/drivers/mtd/parsers/qcomsmempart.c @@ -58,11 +58,11 @@ static int parse_qcomsmem_part(struct mtd_info *mtd, const struct mtd_partition **pparts, struct mtd_part_parser_data *data) { + size_t len = SMEM_FLASH_PTABLE_HDR_LEN; + int ret, i, j, tmpparts, numparts = 0; struct smem_flash_pentry *pentry; struct smem_flash_ptable *ptable; - size_t len = SMEM_FLASH_PTABLE_HDR_LEN; struct mtd_partition *parts; - int ret, i, numparts; char *name, *c; if (IS_ENABLED(CONFIG_MTD_SPI_NOR_USE_4K_SECTORS) @@ -75,7 +75,8 @@ static int parse_qcomsmem_part(struct mtd_info *mtd, pr_debug("Parsing partition table info from SMEM\n"); ptable = qcom_smem_get(SMEM_APPS, SMEM_AARM_PARTITION_TABLE, &len); if (IS_ERR(ptable)) { - pr_err("Error reading partition table header\n"); + if (PTR_ERR(ptable) != -EPROBE_DEFER) + pr_err("Error reading partition table header\n"); return PTR_ERR(ptable); } @@ -87,8 +88,8 @@ static int parse_qcomsmem_part(struct mtd_info *mtd, } /* Ensure that # of partitions is less than the max we have allocated */ - numparts = le32_to_cpu(ptable->numparts); - if (numparts > SMEM_FLASH_PTABLE_MAX_PARTS_V4) { + tmpparts = le32_to_cpu(ptable->numparts); + if (tmpparts > SMEM_FLASH_PTABLE_MAX_PARTS_V4) { pr_err("Partition numbers exceed the max limit\n"); return -EINVAL; } @@ -116,11 +117,17 @@ static int parse_qcomsmem_part(struct mtd_info *mtd, return PTR_ERR(ptable); } + for (i = 0; i < tmpparts; i++) { + pentry = &ptable->pentry[i]; + if (pentry->name[0] != '\0') + numparts++; + } + parts = kcalloc(numparts, sizeof(*parts), GFP_KERNEL); if (!parts) return -ENOMEM; - for (i = 0; i < numparts; i++) { + for (i = 0, j = 0; i < tmpparts; i++) { pentry = &ptable->pentry[i]; if (pentry->name[0] == '\0') continue; @@ -135,24 +142,25 @@ static int parse_qcomsmem_part(struct mtd_info *mtd, for (c = name; *c != '\0'; c++) *c = tolower(*c); - parts[i].name = name; - parts[i].offset = le32_to_cpu(pentry->offset) * mtd->erasesize; - parts[i].mask_flags = pentry->attr; - parts[i].size = le32_to_cpu(pentry->length) * mtd->erasesize; + parts[j].name = name; + parts[j].offset = le32_to_cpu(pentry->offset) * mtd->erasesize; + parts[j].mask_flags = pentry->attr; + parts[j].size = le32_to_cpu(pentry->length) * mtd->erasesize; pr_debug("%d: %s offs=0x%08x size=0x%08x attr:0x%08x\n", i, pentry->name, le32_to_cpu(pentry->offset), le32_to_cpu(pentry->length), pentry->attr); + j++; } pr_debug("SMEM partition table found: ver: %d len: %d\n", - le32_to_cpu(ptable->version), numparts); + le32_to_cpu(ptable->version), tmpparts); *pparts = parts; return numparts; out_free_parts: - while (--i >= 0) - kfree(parts[i].name); + while (--j >= 0) + kfree(parts[j].name); kfree(parts); *pparts = NULL; @@ -166,6 +174,8 @@ static void parse_qcomsmem_cleanup(const struct mtd_partition *pparts, for (i = 0; i < nr_parts; i++) kfree(pparts[i].name); + + kfree(pparts); } static const struct of_device_id qcomsmem_of_match_table[] = { diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 50b23e71065f..3f1192d3c52d 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -31,7 +31,7 @@ obj-$(CONFIG_TUN) += tun.o obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o -obj-$(CONFIG_VXLAN) += vxlan.o +obj-$(CONFIG_VXLAN) += vxlan/ obj-$(CONFIG_GENEVE) += geneve.o obj-$(CONFIG_BAREUDP) += bareudp.o obj-$(CONFIG_GTP) += gtp.o diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index 6382e1937cca..c580acb8b1d3 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -138,6 +138,9 @@ static int com20020pci_probe(struct pci_dev *pdev, return -ENOMEM; ci = (struct com20020_pci_card_info *)id->driver_data; + if (!ci) + return -EINVAL; + priv->ci = ci; mm = &ci->misc_map; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 57d182c9f7d5..55e0ba2a163d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -88,6 +88,7 @@ #if IS_ENABLED(CONFIG_TLS_DEVICE) #include <net/tls.h> #endif +#include <net/ip6_route.h> #include "bonding_priv.h" @@ -2793,31 +2794,15 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip) return ret; } -/* We go to the (large) trouble of VLAN tagging ARP frames because - * switches in VLAN mode (especially if ports are configured as - * "native" to a VLAN) might not pass non-tagged frames. - */ -static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip, - __be32 src_ip, struct bond_vlan_tag *tags) +static bool bond_handle_vlan(struct slave *slave, struct bond_vlan_tag *tags, + struct sk_buff *skb) { - struct sk_buff *skb; - struct bond_vlan_tag *outer_tag = tags; - struct net_device *slave_dev = slave->dev; struct net_device *bond_dev = slave->bond->dev; - - slave_dbg(bond_dev, slave_dev, "arp %d on slave: dst %pI4 src %pI4\n", - arp_op, &dest_ip, &src_ip); - - skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, - NULL, slave_dev->dev_addr, NULL); - - if (!skb) { - net_err_ratelimited("ARP packet allocation failed\n"); - return; - } + struct net_device *slave_dev = slave->dev; + struct bond_vlan_tag *outer_tag = tags; if (!tags || tags->vlan_proto == VLAN_N_VID) - goto xmit; + return true; tags++; @@ -2834,7 +2819,7 @@ static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip, tags->vlan_id); if (!skb) { net_err_ratelimited("failed to insert inner VLAN tag\n"); - return; + return false; } tags++; @@ -2847,8 +2832,34 @@ static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip, outer_tag->vlan_id); } -xmit: - arp_xmit(skb); + return true; +} + +/* We go to the (large) trouble of VLAN tagging ARP frames because + * switches in VLAN mode (especially if ports are configured as + * "native" to a VLAN) might not pass non-tagged frames. + */ +static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip, + __be32 src_ip, struct bond_vlan_tag *tags) +{ + struct net_device *bond_dev = slave->bond->dev; + struct net_device *slave_dev = slave->dev; + struct sk_buff *skb; + + slave_dbg(bond_dev, slave_dev, "arp %d on slave: dst %pI4 src %pI4\n", + arp_op, &dest_ip, &src_ip); + + skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, + NULL, slave_dev->dev_addr, NULL); + + if (!skb) { + net_err_ratelimited("ARP packet allocation failed\n"); + return; + } + + if (bond_handle_vlan(slave, tags, skb)) + arp_xmit(skb); + return; } /* Validate the device path between the @start_dev and the @end_dev. @@ -2965,30 +2976,17 @@ static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 slave->target_last_arp_rx[i] = jiffies; } -int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, - struct slave *slave) +static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, + struct slave *slave) { struct arphdr *arp = (struct arphdr *)skb->data; struct slave *curr_active_slave, *curr_arp_slave; unsigned char *arp_ptr; __be32 sip, tip; - int is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); unsigned int alen; - if (!slave_do_arp_validate(bond, slave)) { - if ((slave_do_arp_validate_only(bond) && is_arp) || - !slave_do_arp_validate_only(bond)) - slave->last_rx = jiffies; - return RX_HANDLER_ANOTHER; - } else if (!is_arp) { - return RX_HANDLER_ANOTHER; - } - alen = arp_hdr_len(bond->dev); - slave_dbg(bond->dev, slave->dev, "%s: skb->dev %s\n", - __func__, skb->dev->name); - if (alen > skb_headlen(skb)) { arp = kmalloc(alen, GFP_ATOMIC); if (!arp) @@ -3059,6 +3057,216 @@ out_unlock: return RX_HANDLER_ANOTHER; } +#if IS_ENABLED(CONFIG_IPV6) +static void bond_ns_send(struct slave *slave, const struct in6_addr *daddr, + const struct in6_addr *saddr, struct bond_vlan_tag *tags) +{ + struct net_device *bond_dev = slave->bond->dev; + struct net_device *slave_dev = slave->dev; + struct in6_addr mcaddr; + struct sk_buff *skb; + + slave_dbg(bond_dev, slave_dev, "NS on slave: dst %pI6c src %pI6c\n", + daddr, saddr); + + skb = ndisc_ns_create(slave_dev, daddr, saddr, 0); + if (!skb) { + net_err_ratelimited("NS packet allocation failed\n"); + return; + } + + addrconf_addr_solict_mult(daddr, &mcaddr); + if (bond_handle_vlan(slave, tags, skb)) + ndisc_send_skb(skb, &mcaddr, saddr); +} + +static void bond_ns_send_all(struct bonding *bond, struct slave *slave) +{ + struct in6_addr *targets = bond->params.ns_targets; + struct bond_vlan_tag *tags; + struct dst_entry *dst; + struct in6_addr saddr; + struct flowi6 fl6; + int i; + + for (i = 0; i < BOND_MAX_NS_TARGETS && !ipv6_addr_any(&targets[i]); i++) { + slave_dbg(bond->dev, slave->dev, "%s: target %pI6c\n", + __func__, &targets[i]); + tags = NULL; + + /* Find out through which dev should the packet go */ + memset(&fl6, 0, sizeof(struct flowi6)); + fl6.daddr = targets[i]; + fl6.flowi6_oif = bond->dev->ifindex; + + dst = ip6_route_output(dev_net(bond->dev), NULL, &fl6); + if (dst->error) { + dst_release(dst); + /* there's no route to target - try to send arp + * probe to generate any traffic (arp_validate=0) + */ + if (bond->params.arp_validate) + pr_warn_once("%s: no route to ns_ip6_target %pI6c and arp_validate is set\n", + bond->dev->name, + &targets[i]); + bond_ns_send(slave, &targets[i], &in6addr_any, tags); + continue; + } + + /* bond device itself */ + if (dst->dev == bond->dev) + goto found; + + rcu_read_lock(); + tags = bond_verify_device_path(bond->dev, dst->dev, 0); + rcu_read_unlock(); + + if (!IS_ERR_OR_NULL(tags)) + goto found; + + /* Not our device - skip */ + slave_dbg(bond->dev, slave->dev, "no path to ns_ip6_target %pI6c via dst->dev %s\n", + &targets[i], dst->dev ? dst->dev->name : "NULL"); + + dst_release(dst); + continue; + +found: + if (!ipv6_dev_get_saddr(dev_net(dst->dev), dst->dev, &targets[i], 0, &saddr)) + bond_ns_send(slave, &targets[i], &saddr, tags); + dst_release(dst); + kfree(tags); + } +} + +static int bond_confirm_addr6(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + struct in6_addr *addr = (struct in6_addr *)priv->data; + + return ipv6_chk_addr(dev_net(dev), addr, dev, 0); +} + +static bool bond_has_this_ip6(struct bonding *bond, struct in6_addr *addr) +{ + struct netdev_nested_priv priv = { + .data = addr, + }; + int ret = false; + + if (bond_confirm_addr6(bond->dev, &priv)) + return true; + + rcu_read_lock(); + if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_confirm_addr6, &priv)) + ret = true; + rcu_read_unlock(); + + return ret; +} + +static void bond_validate_ns(struct bonding *bond, struct slave *slave, + struct in6_addr *saddr, struct in6_addr *daddr) +{ + int i; + + if (ipv6_addr_any(saddr) || !bond_has_this_ip6(bond, daddr)) { + slave_dbg(bond->dev, slave->dev, "%s: sip %pI6c tip %pI6c not found\n", + __func__, saddr, daddr); + return; + } + + i = bond_get_targets_ip6(bond->params.ns_targets, saddr); + if (i == -1) { + slave_dbg(bond->dev, slave->dev, "%s: sip %pI6c not found in targets\n", + __func__, saddr); + return; + } + slave->last_rx = jiffies; + slave->target_last_arp_rx[i] = jiffies; +} + +static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, + struct slave *slave) +{ + struct slave *curr_active_slave, *curr_arp_slave; + struct icmp6hdr *hdr = icmp6_hdr(skb); + struct in6_addr *saddr, *daddr; + + if (skb->pkt_type == PACKET_OTHERHOST || + skb->pkt_type == PACKET_LOOPBACK || + hdr->icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT) + goto out; + + saddr = &ipv6_hdr(skb)->saddr; + daddr = &ipv6_hdr(skb)->daddr; + + slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI6c tip %pI6c\n", + __func__, slave->dev->name, bond_slave_state(slave), + bond->params.arp_validate, slave_do_arp_validate(bond, slave), + saddr, daddr); + + curr_active_slave = rcu_dereference(bond->curr_active_slave); + curr_arp_slave = rcu_dereference(bond->current_arp_slave); + + /* We 'trust' the received ARP enough to validate it if: + * see bond_arp_rcv(). + */ + if (bond_is_active_slave(slave)) + bond_validate_ns(bond, slave, saddr, daddr); + else if (curr_active_slave && + time_after(slave_last_rx(bond, curr_active_slave), + curr_active_slave->last_link_up)) + bond_validate_ns(bond, slave, saddr, daddr); + else if (curr_arp_slave && + bond_time_in_interval(bond, + dev_trans_start(curr_arp_slave->dev), 1)) + bond_validate_ns(bond, slave, saddr, daddr); + +out: + return RX_HANDLER_ANOTHER; +} +#endif + +int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, + struct slave *slave) +{ +#if IS_ENABLED(CONFIG_IPV6) + bool is_ipv6 = skb->protocol == __cpu_to_be16(ETH_P_IPV6); +#endif + bool is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); + + slave_dbg(bond->dev, slave->dev, "%s: skb->dev %s\n", + __func__, skb->dev->name); + + /* Use arp validate logic for both ARP and NS */ + if (!slave_do_arp_validate(bond, slave)) { + if ((slave_do_arp_validate_only(bond) && is_arp) || +#if IS_ENABLED(CONFIG_IPV6) + (slave_do_arp_validate_only(bond) && is_ipv6) || +#endif + !slave_do_arp_validate_only(bond)) + slave->last_rx = jiffies; + return RX_HANDLER_ANOTHER; + } else if (is_arp) { + return bond_arp_rcv(skb, bond, slave); +#if IS_ENABLED(CONFIG_IPV6) + } else if (is_ipv6) { + return bond_na_rcv(skb, bond, slave); +#endif + } else { + return RX_HANDLER_ANOTHER; + } +} + +static void bond_send_validate(struct bonding *bond, struct slave *slave) +{ + bond_arp_send_all(bond, slave); +#if IS_ENABLED(CONFIG_IPV6) + bond_ns_send_all(bond, slave); +#endif +} + /* function to verify if we're in the arp_interval timeslice, returns true if * (last_act - arp_interval) <= jiffies <= (last_act + mod * arp_interval + * arp_interval/2) . the arp_interval/2 is needed for really fast networks. @@ -3154,7 +3362,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) * to be unstable during low/no traffic periods */ if (bond_slave_is_up(slave)) - bond_arp_send_all(bond, slave); + bond_send_validate(bond, slave); } rcu_read_unlock(); @@ -3368,7 +3576,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) curr_active_slave->dev->name); if (curr_active_slave) { - bond_arp_send_all(bond, curr_active_slave); + bond_send_validate(bond, curr_active_slave); return should_notify_rtnl; } @@ -3420,7 +3628,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) bond_set_slave_link_state(new_slave, BOND_LINK_BACK, BOND_SLAVE_NOTIFY_LATER); bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); - bond_arp_send_all(bond, new_slave); + bond_send_validate(bond, new_slave); new_slave->last_link_up = jiffies; rcu_assign_pointer(bond->current_arp_slave, new_slave); @@ -3956,7 +4164,7 @@ static int bond_open(struct net_device *bond_dev) if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ queue_delayed_work(bond->wq, &bond->arp_work, 0); - bond->recv_probe = bond_arp_rcv; + bond->recv_probe = bond_rcv_validate; } if (BOND_MODE(bond) == BOND_MODE_8023AD) { @@ -5937,6 +6145,7 @@ static int bond_check_params(struct bond_params *params) strscpy_pad(params->primary, primary, sizeof(params->primary)); memcpy(params->arp_targets, arp_target, sizeof(arp_target)); + memset(params->ns_targets, 0, sizeof(struct in6_addr) * BOND_MAX_NS_TARGETS); return 0; } diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 1007bf6d385d..f427fa1737c7 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -14,6 +14,7 @@ #include <net/netlink.h> #include <net/rtnetlink.h> #include <net/bonding.h> +#include <net/ipv6.h> static size_t bond_get_slave_size(const struct net_device *bond_dev, const struct net_device *slave_dev) @@ -111,6 +112,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NLA_U8 }, [IFLA_BOND_PEER_NOTIF_DELAY] = { .type = NLA_U32 }, [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 }, + [IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED }, }; static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { @@ -272,6 +274,40 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], if (err) return err; } +#if IS_ENABLED(CONFIG_IPV6) + if (data[IFLA_BOND_NS_IP6_TARGET]) { + struct nlattr *attr; + int i = 0, rem; + + bond_option_ns_ip6_targets_clear(bond); + nla_for_each_nested(attr, data[IFLA_BOND_NS_IP6_TARGET], rem) { + struct in6_addr addr6; + + if (nla_len(attr) < sizeof(addr6)) { + NL_SET_ERR_MSG(extack, "Invalid IPv6 address"); + return -EINVAL; + } + + addr6 = nla_get_in6_addr(attr); + + if (ipv6_addr_type(&addr6) & IPV6_ADDR_LINKLOCAL) { + NL_SET_ERR_MSG(extack, "Invalid IPv6 addr6"); + return -EINVAL; + } + + bond_opt_initextra(&newval, &addr6, sizeof(addr6)); + err = __bond_opt_set(bond, BOND_OPT_NS_TARGETS, + &newval); + if (err) + break; + i++; + } + if (i == 0 && bond->params.arp_interval) + netdev_warn(bond->dev, "Removing last ns target with arp_interval on\n"); + if (err) + return err; + } +#endif if (data[IFLA_BOND_ARP_VALIDATE]) { int arp_validate = nla_get_u32(data[IFLA_BOND_ARP_VALIDATE]); @@ -526,6 +562,9 @@ static size_t bond_get_size(const struct net_device *bond_dev) nla_total_size(sizeof(u8)) + /* IFLA_BOND_TLB_DYNAMIC_LB */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PEER_NOTIF_DELAY */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_MISSED_MAX */ + /* IFLA_BOND_NS_IP6_TARGET */ + nla_total_size(sizeof(struct nlattr)) + + nla_total_size(sizeof(struct in6_addr)) * BOND_MAX_NS_TARGETS + 0; } @@ -603,6 +642,26 @@ static int bond_fill_info(struct sk_buff *skb, bond->params.arp_all_targets)) goto nla_put_failure; +#if IS_ENABLED(CONFIG_IPV6) + targets = nla_nest_start(skb, IFLA_BOND_NS_IP6_TARGET); + if (!targets) + goto nla_put_failure; + + targets_added = 0; + for (i = 0; i < BOND_MAX_NS_TARGETS; i++) { + if (!ipv6_addr_any(&bond->params.ns_targets[i])) { + if (nla_put_in6_addr(skb, i, &bond->params.ns_targets[i])) + goto nla_put_failure; + targets_added = 1; + } + } + + if (targets_added) + nla_nest_end(skb, targets); + else + nla_nest_cancel(skb, targets); +#endif + primary = rtnl_dereference(bond->primary_slave); if (primary && nla_put_u32(skb, IFLA_BOND_PRIMARY, primary->dev->ifindex)) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 2e8484a91a0e..64f7db2627ce 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -34,6 +34,10 @@ static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target); static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target); static int bond_option_arp_ip_targets_set(struct bonding *bond, const struct bond_opt_value *newval); +#if IS_ENABLED(CONFIG_IPV6) +static int bond_option_ns_ip6_targets_set(struct bonding *bond, + const struct bond_opt_value *newval); +#endif static int bond_option_arp_validate_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_arp_all_targets_set(struct bonding *bond, @@ -295,6 +299,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .flags = BOND_OPTFLAG_RAWVAL, .set = bond_option_arp_ip_targets_set }, +#if IS_ENABLED(CONFIG_IPV6) + [BOND_OPT_NS_TARGETS] = { + .id = BOND_OPT_NS_TARGETS, + .name = "ns_ip6_target", + .desc = "NS targets in ffff:ffff::ffff:ffff form", + .flags = BOND_OPTFLAG_RAWVAL, + .set = bond_option_ns_ip6_targets_set + }, +#endif [BOND_OPT_DOWNDELAY] = { .id = BOND_OPT_DOWNDELAY, .name = "downdelay", @@ -1052,7 +1065,7 @@ static int bond_option_arp_interval_set(struct bonding *bond, cancel_delayed_work_sync(&bond->arp_work); } else { /* arp_validate can be set only in active-backup mode */ - bond->recv_probe = bond_arp_rcv; + bond->recv_probe = bond_rcv_validate; cancel_delayed_work_sync(&bond->mii_work); queue_delayed_work(bond->wq, &bond->arp_work, 0); } @@ -1184,6 +1197,65 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond, return ret; } +#if IS_ENABLED(CONFIG_IPV6) +static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot, + struct in6_addr *target, + unsigned long last_rx) +{ + struct in6_addr *targets = bond->params.ns_targets; + struct list_head *iter; + struct slave *slave; + + if (slot >= 0 && slot < BOND_MAX_NS_TARGETS) { + bond_for_each_slave(bond, slave, iter) + slave->target_last_arp_rx[slot] = last_rx; + targets[slot] = *target; + } +} + +void bond_option_ns_ip6_targets_clear(struct bonding *bond) +{ + struct in6_addr addr_any = in6addr_any; + int i; + + for (i = 0; i < BOND_MAX_NS_TARGETS; i++) + _bond_options_ns_ip6_target_set(bond, i, &addr_any, 0); +} + +static int bond_option_ns_ip6_targets_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + struct in6_addr *target = (struct in6_addr *)newval->extra; + struct in6_addr *targets = bond->params.ns_targets; + struct in6_addr addr_any = in6addr_any; + int index; + + if (!bond_is_ip6_target_ok(target)) { + netdev_err(bond->dev, "invalid NS target %pI6c specified for addition\n", + target); + return -EINVAL; + } + + if (bond_get_targets_ip6(targets, target) != -1) { /* dup */ + netdev_err(bond->dev, "NS target %pI6c is already present\n", + target); + return -EINVAL; + } + + index = bond_get_targets_ip6(targets, &addr_any); /* first free slot */ + if (index == -1) { + netdev_err(bond->dev, "NS target table is full!\n"); + return -EINVAL; + } + + netdev_dbg(bond->dev, "Adding NS target %pI6c\n", target); + + _bond_options_ns_ip6_target_set(bond, index, target, jiffies); + + return 0; +} +#endif + static int bond_option_arp_validate_set(struct bonding *bond, const struct bond_opt_value *newval) { diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index 2a7af611d43a..688075859ae4 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -196,7 +196,7 @@ static void ldisc_receive(struct tty_struct *tty, const u8 *data, skb_reset_mac_header(skb); debugfs_rx(ser, data, count); /* Push received packet up the stack. */ - ret = netif_rx_ni(skb); + ret = netif_rx(skb); if (!ret) { ser->dev->stats.rx_packets++; ser->dev->stats.rx_bytes += count; diff --git a/drivers/net/can/c_can/c_can_ethtool.c b/drivers/net/can/c_can/c_can_ethtool.c index 6655146294fc..8a826a6813bd 100644 --- a/drivers/net/can/c_can/c_can_ethtool.c +++ b/drivers/net/can/c_can/c_can_ethtool.c @@ -11,14 +11,6 @@ #include "c_can.h" -static void c_can_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *info) -{ - struct c_can_priv *priv = netdev_priv(netdev); - strscpy(info->driver, "c_can", sizeof(info->driver)); - strscpy(info->bus_info, dev_name(priv->device), sizeof(info->bus_info)); -} - static void c_can_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, @@ -33,7 +25,6 @@ static void c_can_get_ringparam(struct net_device *netdev, } static const struct ethtool_ops c_can_ethtool_ops = { - .get_drvinfo = c_can_get_drvinfo, .get_ringparam = c_can_get_ringparam, }; diff --git a/drivers/net/can/dev/bittiming.c b/drivers/net/can/dev/bittiming.c index d5fca3bfaf9a..2103bcca9012 100644 --- a/drivers/net/can/dev/bittiming.c +++ b/drivers/net/can/dev/bittiming.c @@ -24,7 +24,7 @@ */ static int can_update_sample_point(const struct can_bittiming_const *btc, - unsigned int sample_point_nominal, unsigned int tseg, + const unsigned int sample_point_nominal, const unsigned int tseg, unsigned int *tseg1_ptr, unsigned int *tseg2_ptr, unsigned int *sample_point_error_ptr) { @@ -63,7 +63,7 @@ can_update_sample_point(const struct can_bittiming_const *btc, return best_sample_point; } -int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, +int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc) { struct can_priv *priv = netdev_priv(dev); @@ -208,10 +208,10 @@ void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, * prescaler value brp. You can find more information in the header * file linux/can/netlink.h. */ -static int can_fixup_bittiming(struct net_device *dev, struct can_bittiming *bt, +static int can_fixup_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc) { - struct can_priv *priv = netdev_priv(dev); + const struct can_priv *priv = netdev_priv(dev); unsigned int tseg1, alltseg; u64 brp64; @@ -244,25 +244,21 @@ static int can_fixup_bittiming(struct net_device *dev, struct can_bittiming *bt, /* Checks the validity of predefined bitrate settings */ static int -can_validate_bitrate(struct net_device *dev, struct can_bittiming *bt, +can_validate_bitrate(const struct net_device *dev, const struct can_bittiming *bt, const u32 *bitrate_const, const unsigned int bitrate_const_cnt) { - struct can_priv *priv = netdev_priv(dev); unsigned int i; for (i = 0; i < bitrate_const_cnt; i++) { if (bt->bitrate == bitrate_const[i]) - break; + return 0; } - if (i >= priv->bitrate_const_cnt) - return -EINVAL; - - return 0; + return -EINVAL; } -int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, +int can_get_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc, const u32 *bitrate_const, const unsigned int bitrate_const_cnt) diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index 04687b15b250..41645a24384c 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -388,7 +388,7 @@ out_power: return ret; } -static int tcan4x5x_can_remove(struct spi_device *spi) +static void tcan4x5x_can_remove(struct spi_device *spi) { struct tcan4x5x_priv *priv = spi_get_drvdata(spi); @@ -397,8 +397,6 @@ static int tcan4x5x_can_remove(struct spi_device *spi) tcan4x5x_power_enable(priv->power, 0); m_can_class_free_dev(priv->cdev.net); - - return 0; } static const struct of_device_id tcan4x5x_of_match[] = { diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index b7dc1c32875f..acd74725831f 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -1715,15 +1715,15 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, netif_napi_add(ndev, &priv->napi, rcar_canfd_rx_poll, RCANFD_NAPI_WEIGHT); + spin_lock_init(&priv->tx_lock); + devm_can_led_init(ndev); + gpriv->ch[priv->channel] = priv; err = register_candev(ndev); if (err) { dev_err(&pdev->dev, "register_candev() failed, error %d\n", err); goto fail_candev; } - spin_lock_init(&priv->tx_lock); - devm_can_led_init(ndev); - gpriv->ch[priv->channel] = priv; dev_info(&pdev->dev, "device registered (channel %u)\n", priv->channel); return 0; diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c index d74e895bddf7..8d27ac66ca7f 100644 --- a/drivers/net/can/softing/softing_main.c +++ b/drivers/net/can/softing/softing_main.c @@ -392,13 +392,10 @@ static int softing_netdev_open(struct net_device *ndev) static int softing_netdev_stop(struct net_device *ndev) { - int ret; - netif_stop_queue(ndev); /* softing cycle does close_candev() */ - ret = softing_startstop(ndev, 0); - return ret; + return softing_startstop(ndev, 0); } static int softing_candev_set_mode(struct net_device *ndev, enum can_mode mode) diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index cfcc14fe3e42..664b8f14d7b0 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -948,7 +948,7 @@ static int hi3110_can_probe(struct spi_device *spi) return dev_err_probe(dev, ret, "Probe failed\n"); } -static int hi3110_can_remove(struct spi_device *spi) +static void hi3110_can_remove(struct spi_device *spi) { struct hi3110_priv *priv = spi_get_drvdata(spi); struct net_device *net = priv->net; @@ -960,8 +960,6 @@ static int hi3110_can_remove(struct spi_device *spi) clk_disable_unprepare(priv->clk); free_candev(net); - - return 0; } static int __maybe_unused hi3110_can_suspend(struct device *dev) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 025e07cb7439..d23edaf22420 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1427,7 +1427,7 @@ out_free: return ret; } -static int mcp251x_can_remove(struct spi_device *spi) +static void mcp251x_can_remove(struct spi_device *spi) { struct mcp251x_priv *priv = spi_get_drvdata(spi); struct net_device *net = priv->net; @@ -1442,8 +1442,6 @@ static int mcp251x_can_remove(struct spi_device *spi) clk_disable_unprepare(priv->clk); free_candev(net); - - return 0; } static int __maybe_unused mcp251x_can_suspend(struct device *dev) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-chip-fifo.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-chip-fifo.c index 2f9a623d381d..0d96097a2547 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-chip-fifo.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-chip-fifo.c @@ -78,7 +78,7 @@ int mcp251xfd_chip_fifo_init(const struct mcp251xfd_priv *priv) if (err) return err; - /* FIFO 1 - TX */ + /* TX FIFO */ val = FIELD_PREP(MCP251XFD_REG_FIFOCON_FSIZE_MASK, tx_ring->obj_num - 1) | MCP251XFD_REG_FIFOCON_TXEN | @@ -99,7 +99,7 @@ int mcp251xfd_chip_fifo_init(const struct mcp251xfd_priv *priv) MCP251XFD_REG_FIFOCON_TXAT_UNLIMITED); err = regmap_write(priv->map_reg, - MCP251XFD_REG_FIFOCON(MCP251XFD_TX_FIFO), + MCP251XFD_REG_FIFOCON(priv->tx->fifo_nr), val); if (err) return err; diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index b5986df6eca0..3da17cadbd63 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -112,6 +112,22 @@ static const char *mcp251xfd_get_mode_str(const u8 mode) return "<unknown>"; } +static const char * +mcp251xfd_get_osc_str(const u32 osc, const u32 osc_reference) +{ + switch (~osc & osc_reference & + (MCP251XFD_REG_OSC_OSCRDY | MCP251XFD_REG_OSC_PLLRDY)) { + case MCP251XFD_REG_OSC_PLLRDY: + return "PLL"; + case MCP251XFD_REG_OSC_OSCRDY: + return "Oscillator"; + case MCP251XFD_REG_OSC_PLLRDY | MCP251XFD_REG_OSC_OSCRDY: + return "Oscillator/PLL"; + } + + return "<unknown>"; +} + static inline int mcp251xfd_vdd_enable(const struct mcp251xfd_priv *priv) { if (!priv->reg_vdd) @@ -178,6 +194,11 @@ static int mcp251xfd_clks_and_vdd_disable(const struct mcp251xfd_priv *priv) return 0; } +static inline bool mcp251xfd_reg_invalid(u32 reg) +{ + return reg == 0x0 || reg == 0xffffffff; +} + static inline int mcp251xfd_chip_get_mode(const struct mcp251xfd_priv *priv, u8 *mode) { @@ -197,34 +218,55 @@ static int __mcp251xfd_chip_set_mode(const struct mcp251xfd_priv *priv, const u8 mode_req, bool nowait) { - u32 con, con_reqop; + u32 con = 0, con_reqop, osc = 0; + u8 mode; int err; con_reqop = FIELD_PREP(MCP251XFD_REG_CON_REQOP_MASK, mode_req); err = regmap_update_bits(priv->map_reg, MCP251XFD_REG_CON, MCP251XFD_REG_CON_REQOP_MASK, con_reqop); - if (err) + if (err == -EBADMSG) { + netdev_err(priv->ndev, + "Failed to set Requested Operation Mode.\n"); + + return -ENODEV; + } else if (err) { return err; + } if (mode_req == MCP251XFD_REG_CON_MODE_SLEEP || nowait) return 0; err = regmap_read_poll_timeout(priv->map_reg, MCP251XFD_REG_CON, con, + !mcp251xfd_reg_invalid(con) && FIELD_GET(MCP251XFD_REG_CON_OPMOD_MASK, con) == mode_req, MCP251XFD_POLL_SLEEP_US, MCP251XFD_POLL_TIMEOUT_US); - if (err) { - u8 mode = FIELD_GET(MCP251XFD_REG_CON_OPMOD_MASK, con); + if (err != -ETIMEDOUT && err != -EBADMSG) + return err; + + /* Ignore return value. + * Print below error messages, even if this fails. + */ + regmap_read(priv->map_reg, MCP251XFD_REG_OSC, &osc); + if (mcp251xfd_reg_invalid(con)) { netdev_err(priv->ndev, - "Controller failed to enter mode %s Mode (%u) and stays in %s Mode (%u).\n", - mcp251xfd_get_mode_str(mode_req), mode_req, - mcp251xfd_get_mode_str(mode), mode); - return err; + "Failed to read CAN Control Register (con=0x%08x, osc=0x%08x).\n", + con, osc); + + return -ENODEV; } - return 0; + mode = FIELD_GET(MCP251XFD_REG_CON_OPMOD_MASK, con); + netdev_err(priv->ndev, + "Controller failed to enter mode %s Mode (%u) and stays in %s Mode (%u) (con=0x%08x, osc=0x%08x).\n", + mcp251xfd_get_mode_str(mode_req), mode_req, + mcp251xfd_get_mode_str(mode), mode, + con, osc); + + return -ETIMEDOUT; } static inline int @@ -241,27 +283,58 @@ mcp251xfd_chip_set_mode_nowait(const struct mcp251xfd_priv *priv, return __mcp251xfd_chip_set_mode(priv, mode_req, true); } -static inline bool mcp251xfd_osc_invalid(u32 reg) +static int +mcp251xfd_chip_wait_for_osc_ready(const struct mcp251xfd_priv *priv, + u32 osc_reference, u32 osc_mask) { - return reg == 0x0 || reg == 0xffffffff; + u32 osc; + int err; + + err = regmap_read_poll_timeout(priv->map_reg, MCP251XFD_REG_OSC, osc, + !mcp251xfd_reg_invalid(osc) && + (osc & osc_mask) == osc_reference, + MCP251XFD_OSC_STAB_SLEEP_US, + MCP251XFD_OSC_STAB_TIMEOUT_US); + if (err != -ETIMEDOUT) + return err; + + if (mcp251xfd_reg_invalid(osc)) { + netdev_err(priv->ndev, + "Failed to read Oscillator Configuration Register (osc=0x%08x).\n", + osc); + return -ENODEV; + } + + netdev_err(priv->ndev, + "Timeout waiting for %s ready (osc=0x%08x, osc_reference=0x%08x, osc_mask=0x%08x).\n", + mcp251xfd_get_osc_str(osc, osc_reference), + osc, osc_reference, osc_mask); + + return -ETIMEDOUT; } -static int mcp251xfd_chip_clock_enable(const struct mcp251xfd_priv *priv) +static int mcp251xfd_chip_wake(const struct mcp251xfd_priv *priv) { u32 osc, osc_reference, osc_mask; int err; - /* Set Power On Defaults for "Clock Output Divisor" and remove - * "Oscillator Disable" bit. + /* For normal sleep on MCP2517FD and MCP2518FD, clearing + * "Oscillator Disable" will wake the chip. For low power mode + * on MCP2518FD, asserting the chip select will wake the + * chip. Writing to the Oscillator register will wake it in + * both cases. */ osc = FIELD_PREP(MCP251XFD_REG_OSC_CLKODIV_MASK, MCP251XFD_REG_OSC_CLKODIV_10); + + /* We cannot check for the PLL ready bit (either set or + * unset), as the PLL might be enabled. This can happen if the + * system reboots, while the mcp251xfd stays powered. + */ osc_reference = MCP251XFD_REG_OSC_OSCRDY; - osc_mask = MCP251XFD_REG_OSC_OSCRDY | MCP251XFD_REG_OSC_PLLRDY; + osc_mask = MCP251XFD_REG_OSC_OSCRDY; - /* Note: - * - * If the controller is in Sleep Mode the following write only + /* If the controller is in Sleep Mode the following write only * removes the "Oscillator Disable" bit and powers it up. All * other bits are unaffected. */ @@ -269,24 +342,31 @@ static int mcp251xfd_chip_clock_enable(const struct mcp251xfd_priv *priv) if (err) return err; - /* Wait for "Oscillator Ready" bit */ - err = regmap_read_poll_timeout(priv->map_reg, MCP251XFD_REG_OSC, osc, - (osc & osc_mask) == osc_reference, - MCP251XFD_OSC_STAB_SLEEP_US, - MCP251XFD_OSC_STAB_TIMEOUT_US); - if (mcp251xfd_osc_invalid(osc)) { - netdev_err(priv->ndev, - "Failed to detect %s (osc=0x%08x).\n", - mcp251xfd_get_model_str(priv), osc); - return -ENODEV; - } else if (err == -ETIMEDOUT) { - netdev_err(priv->ndev, - "Timeout waiting for Oscillator Ready (osc=0x%08x, osc_reference=0x%08x)\n", - osc, osc_reference); - return -ETIMEDOUT; + /* Sometimes the PLL is stuck enabled, the controller never + * sets the OSC Ready bit, and we get an -ETIMEDOUT. Our + * caller takes care of retry. + */ + return mcp251xfd_chip_wait_for_osc_ready(priv, osc_reference, osc_mask); +} + +static inline int mcp251xfd_chip_sleep(const struct mcp251xfd_priv *priv) +{ + if (priv->pll_enable) { + u32 osc; + int err; + + /* Turn off PLL */ + osc = FIELD_PREP(MCP251XFD_REG_OSC_CLKODIV_MASK, + MCP251XFD_REG_OSC_CLKODIV_10); + err = regmap_write(priv->map_reg, MCP251XFD_REG_OSC, osc); + if (err) + netdev_err(priv->ndev, + "Failed to disable PLL.\n"); + + priv->spi->max_speed_hz = priv->spi_max_speed_hz_slow; } - return err; + return mcp251xfd_chip_set_mode(priv, MCP251XFD_REG_CON_MODE_SLEEP); } static int mcp251xfd_chip_softreset_do(const struct mcp251xfd_priv *priv) @@ -294,10 +374,10 @@ static int mcp251xfd_chip_softreset_do(const struct mcp251xfd_priv *priv) const __be16 cmd = mcp251xfd_cmd_reset(); int err; - /* The Set Mode and SPI Reset command only seems to works if - * the controller is not in Sleep Mode. + /* The Set Mode and SPI Reset command only works if the + * controller is not in Sleep Mode. */ - err = mcp251xfd_chip_clock_enable(priv); + err = mcp251xfd_chip_wake(priv); if (err) return err; @@ -311,34 +391,29 @@ static int mcp251xfd_chip_softreset_do(const struct mcp251xfd_priv *priv) static int mcp251xfd_chip_softreset_check(const struct mcp251xfd_priv *priv) { - u32 osc, osc_reference; + u32 osc_reference, osc_mask; u8 mode; int err; - err = mcp251xfd_chip_get_mode(priv, &mode); - if (err) - return err; - - if (mode != MCP251XFD_REG_CON_MODE_CONFIG) { - netdev_info(priv->ndev, - "Controller not in Config Mode after reset, but in %s Mode (%u).\n", - mcp251xfd_get_mode_str(mode), mode); - return -ETIMEDOUT; - } - + /* Check for reset defaults of OSC reg. + * This will take care of stabilization period. + */ osc_reference = MCP251XFD_REG_OSC_OSCRDY | FIELD_PREP(MCP251XFD_REG_OSC_CLKODIV_MASK, MCP251XFD_REG_OSC_CLKODIV_10); + osc_mask = osc_reference | MCP251XFD_REG_OSC_PLLRDY; + err = mcp251xfd_chip_wait_for_osc_ready(priv, osc_reference, osc_mask); + if (err) + return err; - /* check reset defaults of OSC reg */ - err = regmap_read(priv->map_reg, MCP251XFD_REG_OSC, &osc); + err = mcp251xfd_chip_get_mode(priv, &mode); if (err) return err; - if (osc != osc_reference) { + if (mode != MCP251XFD_REG_CON_MODE_CONFIG) { netdev_info(priv->ndev, - "Controller failed to reset. osc=0x%08x, reference value=0x%08x.\n", - osc, osc_reference); + "Controller not in Config Mode after reset, but in %s Mode (%u).\n", + mcp251xfd_get_mode_str(mode), mode); return -ETIMEDOUT; } @@ -374,7 +449,7 @@ static int mcp251xfd_chip_softreset(const struct mcp251xfd_priv *priv) static int mcp251xfd_chip_clock_init(const struct mcp251xfd_priv *priv) { - u32 osc; + u32 osc, osc_reference, osc_mask; int err; /* Activate Low Power Mode on Oscillator Disable. This only @@ -384,10 +459,29 @@ static int mcp251xfd_chip_clock_init(const struct mcp251xfd_priv *priv) osc = MCP251XFD_REG_OSC_LPMEN | FIELD_PREP(MCP251XFD_REG_OSC_CLKODIV_MASK, MCP251XFD_REG_OSC_CLKODIV_10); + osc_reference = MCP251XFD_REG_OSC_OSCRDY; + osc_mask = MCP251XFD_REG_OSC_OSCRDY | MCP251XFD_REG_OSC_PLLRDY; + + if (priv->pll_enable) { + osc |= MCP251XFD_REG_OSC_PLLEN; + osc_reference |= MCP251XFD_REG_OSC_PLLRDY; + } + err = regmap_write(priv->map_reg, MCP251XFD_REG_OSC, osc); if (err) return err; + err = mcp251xfd_chip_wait_for_osc_ready(priv, osc_reference, osc_mask); + if (err) + return err; + + priv->spi->max_speed_hz = priv->spi_max_speed_hz_fast; + + return 0; +} + +static int mcp251xfd_chip_timestamp_init(const struct mcp251xfd_priv *priv) +{ /* Set Time Base Counter Prescaler to 1. * * This means an overflow of the 32 bit Time Base Counter @@ -628,14 +722,14 @@ static int mcp251xfd_chip_interrupts_disable(const struct mcp251xfd_priv *priv) return regmap_write(priv->map_reg, MCP251XFD_REG_CRC, 0); } -static int mcp251xfd_chip_stop(struct mcp251xfd_priv *priv, - const enum can_state state) +static void mcp251xfd_chip_stop(struct mcp251xfd_priv *priv, + const enum can_state state) { priv->can.state = state; mcp251xfd_chip_interrupts_disable(priv); mcp251xfd_chip_rx_int_disable(priv); - return mcp251xfd_chip_set_mode(priv, MCP251XFD_REG_CON_MODE_SLEEP); + mcp251xfd_chip_sleep(priv); } static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv) @@ -650,6 +744,10 @@ static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv) if (err) goto out_chip_stop; + err = mcp251xfd_chip_timestamp_init(priv); + if (err) + goto out_chip_stop; + err = mcp251xfd_set_bittiming(priv); if (err) goto out_chip_stop; @@ -662,7 +760,9 @@ static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv) if (err) goto out_chip_stop; - mcp251xfd_ring_init(priv); + err = mcp251xfd_ring_init(priv); + if (err) + goto out_chip_stop; err = mcp251xfd_chip_fifo_init(priv); if (err) @@ -1284,6 +1384,20 @@ static int mcp251xfd_handle_spicrcif(struct mcp251xfd_priv *priv) return 0; } +static int mcp251xfd_read_regs_status(struct mcp251xfd_priv *priv) +{ + const int val_bytes = regmap_get_val_bytes(priv->map_reg); + size_t len; + + if (priv->rx_ring_num == 1) + len = sizeof(priv->regs_status.intf); + else + len = sizeof(priv->regs_status); + + return regmap_bulk_read(priv->map_reg, MCP251XFD_REG_INT, + &priv->regs_status, len / val_bytes); +} + #define mcp251xfd_handle(priv, irq, ...) \ ({ \ struct mcp251xfd_priv *_priv = (priv); \ @@ -1300,7 +1414,6 @@ static int mcp251xfd_handle_spicrcif(struct mcp251xfd_priv *priv) static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) { struct mcp251xfd_priv *priv = dev_id; - const int val_bytes = regmap_get_val_bytes(priv->map_reg); irqreturn_t handled = IRQ_NONE; int err; @@ -1312,21 +1425,28 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) if (!rx_pending) break; + /* Assume 1st RX-FIFO pending, if other FIFOs + * are pending the main IRQ handler will take + * care. + */ + priv->regs_status.rxif = BIT(priv->rx[0]->fifo_nr); err = mcp251xfd_handle(priv, rxif); if (err) goto out_fail; handled = IRQ_HANDLED; - } while (1); + + /* We don't know which RX-FIFO is pending, but only + * handle the 1st RX-FIFO. Leave loop here if we have + * more than 1 RX-FIFO to avoid starvation. + */ + } while (priv->rx_ring_num == 1); do { u32 intf_pending, intf_pending_clearable; bool set_normal_mode = false; - err = regmap_bulk_read(priv->map_reg, MCP251XFD_REG_INT, - &priv->regs_status, - sizeof(priv->regs_status) / - val_bytes); + err = mcp251xfd_read_regs_status(priv); if (err) goto out_fail; @@ -1621,8 +1741,9 @@ static int mcp251xfd_register_check_rx_int(struct mcp251xfd_priv *priv) } static int -mcp251xfd_register_get_dev_id(const struct mcp251xfd_priv *priv, - u32 *dev_id, u32 *effective_speed_hz) +mcp251xfd_register_get_dev_id(const struct mcp251xfd_priv *priv, u32 *dev_id, + u32 *effective_speed_hz_slow, + u32 *effective_speed_hz_fast) { struct mcp251xfd_map_buf_nocrc *buf_rx; struct mcp251xfd_map_buf_nocrc *buf_tx; @@ -1641,16 +1762,20 @@ mcp251xfd_register_get_dev_id(const struct mcp251xfd_priv *priv, xfer[0].tx_buf = buf_tx; xfer[0].len = sizeof(buf_tx->cmd); + xfer[0].speed_hz = priv->spi_max_speed_hz_slow; xfer[1].rx_buf = buf_rx->data; xfer[1].len = sizeof(dev_id); + xfer[1].speed_hz = priv->spi_max_speed_hz_fast; mcp251xfd_spi_cmd_read_nocrc(&buf_tx->cmd, MCP251XFD_REG_DEVID); + err = spi_sync_transfer(priv->spi, xfer, ARRAY_SIZE(xfer)); if (err) goto out_kfree_buf_tx; *dev_id = be32_to_cpup((__be32 *)buf_rx->data); - *effective_speed_hz = xfer->effective_speed_hz; + *effective_speed_hz_slow = xfer[0].effective_speed_hz; + *effective_speed_hz_fast = xfer[1].effective_speed_hz; out_kfree_buf_tx: kfree(buf_tx); @@ -1666,34 +1791,45 @@ mcp251xfd_register_get_dev_id(const struct mcp251xfd_priv *priv, static int mcp251xfd_register_done(const struct mcp251xfd_priv *priv) { - u32 dev_id, effective_speed_hz; + u32 dev_id, effective_speed_hz_slow, effective_speed_hz_fast; + unsigned long clk_rate; int err; err = mcp251xfd_register_get_dev_id(priv, &dev_id, - &effective_speed_hz); + &effective_speed_hz_slow, + &effective_speed_hz_fast); if (err) return err; + clk_rate = clk_get_rate(priv->clk); + netdev_info(priv->ndev, - "%s rev%lu.%lu (%cRX_INT %cMAB_NO_WARN %cCRC_REG %cCRC_RX %cCRC_TX %cECC %cHD c:%u.%02uMHz m:%u.%02uMHz r:%u.%02uMHz e:%u.%02uMHz) successfully initialized.\n", + "%s rev%lu.%lu (%cRX_INT %cPLL %cMAB_NO_WARN %cCRC_REG %cCRC_RX %cCRC_TX %cECC %cHD o:%lu.%02luMHz c:%u.%02uMHz m:%u.%02uMHz rs:%u.%02uMHz es:%u.%02uMHz rf:%u.%02uMHz ef:%u.%02uMHz) successfully initialized.\n", mcp251xfd_get_model_str(priv), FIELD_GET(MCP251XFD_REG_DEVID_ID_MASK, dev_id), FIELD_GET(MCP251XFD_REG_DEVID_REV_MASK, dev_id), priv->rx_int ? '+' : '-', + priv->pll_enable ? '+' : '-', MCP251XFD_QUIRK_ACTIVE(MAB_NO_WARN), MCP251XFD_QUIRK_ACTIVE(CRC_REG), MCP251XFD_QUIRK_ACTIVE(CRC_RX), MCP251XFD_QUIRK_ACTIVE(CRC_TX), MCP251XFD_QUIRK_ACTIVE(ECC), MCP251XFD_QUIRK_ACTIVE(HALF_DUPLEX), + clk_rate / 1000000, + clk_rate % 1000000 / 1000 / 10, priv->can.clock.freq / 1000000, priv->can.clock.freq % 1000000 / 1000 / 10, priv->spi_max_speed_hz_orig / 1000000, priv->spi_max_speed_hz_orig % 1000000 / 1000 / 10, - priv->spi->max_speed_hz / 1000000, - priv->spi->max_speed_hz % 1000000 / 1000 / 10, - effective_speed_hz / 1000000, - effective_speed_hz % 1000000 / 1000 / 10); + priv->spi_max_speed_hz_slow / 1000000, + priv->spi_max_speed_hz_slow % 1000000 / 1000 / 10, + effective_speed_hz_slow / 1000000, + effective_speed_hz_slow % 1000000 / 1000 / 10, + priv->spi_max_speed_hz_fast / 1000000, + priv->spi_max_speed_hz_fast % 1000000 / 1000 / 10, + effective_speed_hz_fast / 1000000, + effective_speed_hz_fast % 1000000 / 1000 / 10); return 0; } @@ -1719,19 +1855,25 @@ static int mcp251xfd_register(struct mcp251xfd_priv *priv) if (err == -ENODEV) goto out_runtime_disable; if (err) - goto out_chip_set_mode_sleep; + goto out_chip_sleep; + + err = mcp251xfd_chip_clock_init(priv); + if (err == -ENODEV) + goto out_runtime_disable; + if (err) + goto out_chip_sleep; err = mcp251xfd_register_chip_detect(priv); if (err) - goto out_chip_set_mode_sleep; + goto out_chip_sleep; err = mcp251xfd_register_check_rx_int(priv); if (err) - goto out_chip_set_mode_sleep; + goto out_chip_sleep; err = register_candev(ndev); if (err) - goto out_chip_set_mode_sleep; + goto out_chip_sleep; err = mcp251xfd_register_done(priv); if (err) @@ -1741,7 +1883,7 @@ static int mcp251xfd_register(struct mcp251xfd_priv *priv) * disable the clocks and vdd. If CONFIG_PM is not enabled, * the clocks and vdd will stay powered. */ - err = mcp251xfd_chip_set_mode(priv, MCP251XFD_REG_CON_MODE_SLEEP); + err = mcp251xfd_chip_sleep(priv); if (err) goto out_unregister_candev; @@ -1751,8 +1893,8 @@ static int mcp251xfd_register(struct mcp251xfd_priv *priv) out_unregister_candev: unregister_candev(ndev); - out_chip_set_mode_sleep: - mcp251xfd_chip_set_mode(priv, MCP251XFD_REG_CON_MODE_SLEEP); + out_chip_sleep: + mcp251xfd_chip_sleep(priv); out_runtime_disable: pm_runtime_disable(ndev->dev.parent); out_runtime_put_noidle: @@ -1768,10 +1910,10 @@ static inline void mcp251xfd_unregister(struct mcp251xfd_priv *priv) unregister_candev(ndev); - pm_runtime_get_sync(ndev->dev.parent); - pm_runtime_put_noidle(ndev->dev.parent); - mcp251xfd_clks_and_vdd_disable(priv); - pm_runtime_disable(ndev->dev.parent); + if (pm_runtime_enabled(ndev->dev.parent)) + pm_runtime_disable(ndev->dev.parent); + else + mcp251xfd_clks_and_vdd_disable(priv); } static const struct of_device_id mcp251xfd_of_match[] = { @@ -1814,6 +1956,7 @@ static int mcp251xfd_probe(struct spi_device *spi) struct gpio_desc *rx_int; struct regulator *reg_vdd, *reg_xceiver; struct clk *clk; + bool pll_enable = false; u32 freq = 0; int err; @@ -1864,12 +2007,8 @@ static int mcp251xfd_probe(struct spi_device *spi) return -ERANGE; } - if (freq <= MCP251XFD_SYSCLOCK_HZ_MAX / MCP251XFD_OSC_PLL_MULTIPLIER) { - dev_err(&spi->dev, - "Oscillator frequency (%u Hz) is too low and PLL is not supported.\n", - freq); - return -ERANGE; - } + if (freq <= MCP251XFD_SYSCLOCK_HZ_MAX / MCP251XFD_OSC_PLL_MULTIPLIER) + pll_enable = true; ndev = alloc_candev(sizeof(struct mcp251xfd_priv), MCP251XFD_TX_OBJ_NUM_MAX); @@ -1885,6 +2024,8 @@ static int mcp251xfd_probe(struct spi_device *spi) priv = netdev_priv(ndev); spi_set_drvdata(spi, priv); priv->can.clock.freq = freq; + if (pll_enable) + priv->can.clock.freq *= MCP251XFD_OSC_PLL_MULTIPLIER; priv->can.do_set_mode = mcp251xfd_set_mode; priv->can.do_get_berr_counter = mcp251xfd_get_berr_counter; priv->can.bittiming_const = &mcp251xfd_bittiming_const; @@ -1897,6 +2038,7 @@ static int mcp251xfd_probe(struct spi_device *spi) priv->spi = spi; priv->rx_int = rx_int; priv->clk = clk; + priv->pll_enable = pll_enable; priv->reg_vdd = reg_vdd; priv->reg_xceiver = reg_xceiver; @@ -1934,7 +2076,16 @@ static int mcp251xfd_probe(struct spi_device *spi) * */ priv->spi_max_speed_hz_orig = spi->max_speed_hz; - spi->max_speed_hz = min(spi->max_speed_hz, freq / 2 / 1000 * 850); + priv->spi_max_speed_hz_slow = min(spi->max_speed_hz, + freq / 2 / 1000 * 850); + if (priv->pll_enable) + priv->spi_max_speed_hz_fast = min(spi->max_speed_hz, + freq * + MCP251XFD_OSC_PLL_MULTIPLIER / + 2 / 1000 * 850); + else + priv->spi_max_speed_hz_fast = priv->spi_max_speed_hz_slow; + spi->max_speed_hz = priv->spi_max_speed_hz_slow; spi->bits_per_word = 8; spi->rt = true; err = spi_setup(spi); @@ -1951,8 +2102,11 @@ static int mcp251xfd_probe(struct spi_device *spi) goto out_free_candev; err = mcp251xfd_register(priv); - if (err) + if (err) { + dev_err_probe(&spi->dev, err, "Failed to detect %s.\n", + mcp251xfd_get_model_str(priv)); goto out_can_rx_offload_del; + } return 0; @@ -1966,7 +2120,7 @@ static int mcp251xfd_probe(struct spi_device *spi) return err; } -static int mcp251xfd_remove(struct spi_device *spi) +static void mcp251xfd_remove(struct spi_device *spi) { struct mcp251xfd_priv *priv = spi_get_drvdata(spi); struct net_device *ndev = priv->ndev; @@ -1975,8 +2129,6 @@ static int mcp251xfd_remove(struct spi_device *spi) mcp251xfd_unregister(priv); spi->max_speed_hz = priv->spi_max_speed_hz_orig; free_candev(ndev); - - return 0; } static int __maybe_unused mcp251xfd_runtime_suspend(struct device *device) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c index ffae8fdd3af0..c991b30bc9f0 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c @@ -207,10 +207,10 @@ static void mcp251xfd_dump_tx_ring(const struct mcp251xfd_priv *priv, .val = tx->base, }, { .key = MCP251XFD_DUMP_OBJECT_RING_KEY_NR, - .val = 0, + .val = tx->nr, }, { .key = MCP251XFD_DUMP_OBJECT_RING_KEY_FIFO_NR, - .val = MCP251XFD_TX_FIFO, + .val = tx->fifo_nr, }, { .key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_NUM, .val = tx->obj_num, diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c index 7b120c716228..217510c12af5 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c @@ -2,8 +2,8 @@ // // mcp251xfd - Microchip MCP251xFD Family CAN controller driver // -// Copyright (c) 2019, 2020 Pengutronix, -// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2019, 2020, 2021 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> // #include "mcp251xfd.h" @@ -47,22 +47,32 @@ mcp251xfd_regmap_nocrc_gather_write(void *context, return spi_sync_transfer(spi, xfer, ARRAY_SIZE(xfer)); } -static inline bool mcp251xfd_update_bits_read_reg(unsigned int reg) +static inline bool +mcp251xfd_update_bits_read_reg(const struct mcp251xfd_priv *priv, + unsigned int reg) { + struct mcp251xfd_rx_ring *ring; + int n; + switch (reg) { case MCP251XFD_REG_INT: case MCP251XFD_REG_TEFCON: - case MCP251XFD_REG_FIFOCON(MCP251XFD_RX_FIFO(0)): case MCP251XFD_REG_FLTCON(0): case MCP251XFD_REG_ECCSTAT: case MCP251XFD_REG_CRC: return false; case MCP251XFD_REG_CON: - case MCP251XFD_REG_FIFOSTA(MCP251XFD_RX_FIFO(0)): case MCP251XFD_REG_OSC: case MCP251XFD_REG_ECCCON: return true; default: + mcp251xfd_for_each_rx_ring(priv, ring, n) { + if (reg == MCP251XFD_REG_FIFOCON(ring->fifo_nr)) + return false; + if (reg == MCP251XFD_REG_FIFOSTA(ring->fifo_nr)) + return true; + } + WARN(1, "Status of reg 0x%04x unknown.\n", reg); } @@ -92,7 +102,7 @@ mcp251xfd_regmap_nocrc_update_bits(void *context, unsigned int reg, last_byte = mcp251xfd_last_byte_set(mask); len = last_byte - first_byte + 1; - if (mcp251xfd_update_bits_read_reg(reg)) { + if (mcp251xfd_update_bits_read_reg(priv, reg)) { struct spi_transfer xfer[2] = { }; struct spi_message msg; @@ -368,7 +378,7 @@ mcp251xfd_regmap_crc_read(void *context, * to the caller. It will take care of both cases. * */ - if (reg == MCP251XFD_REG_OSC) { + if (reg == MCP251XFD_REG_OSC && val_len == sizeof(__le32)) { err = 0; goto out; } diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index 92f9e9b01289..848b8b2ecb5f 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -53,6 +53,49 @@ mcp251xfd_cmd_prepare_write_reg(const struct mcp251xfd_priv *priv, } static void +mcp251xfd_ring_init_tef(struct mcp251xfd_priv *priv, u16 *base) +{ + struct mcp251xfd_tef_ring *tef_ring; + struct spi_transfer *xfer; + u32 val; + u16 addr; + u8 len; + int i; + + /* TEF */ + tef_ring = priv->tef; + tef_ring->head = 0; + tef_ring->tail = 0; + + /* TEF- and TX-FIFO have same number of objects */ + *base = mcp251xfd_get_tef_obj_addr(priv->tx->obj_num); + + /* FIFO increment TEF tail pointer */ + addr = MCP251XFD_REG_TEFCON; + val = MCP251XFD_REG_TEFCON_UINC; + len = mcp251xfd_cmd_prepare_write_reg(priv, &tef_ring->uinc_buf, + addr, val, val); + + for (i = 0; i < ARRAY_SIZE(tef_ring->uinc_xfer); i++) { + xfer = &tef_ring->uinc_xfer[i]; + xfer->tx_buf = &tef_ring->uinc_buf; + xfer->len = len; + xfer->cs_change = 1; + xfer->cs_change_delay.value = 0; + xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS; + } + + /* "cs_change == 1" on the last transfer results in an active + * chip select after the complete SPI message. This causes the + * controller to interpret the next register access as + * data. Set "cs_change" of the last transfer to "0" to + * properly deactivate the chip select at the end of the + * message. + */ + xfer->cs_change = 0; +} + +static void mcp251xfd_tx_ring_init_tx_obj(const struct mcp251xfd_priv *priv, const struct mcp251xfd_tx_ring *ring, struct mcp251xfd_tx_obj *tx_obj, @@ -88,81 +131,55 @@ mcp251xfd_tx_ring_init_tx_obj(const struct mcp251xfd_priv *priv, ARRAY_SIZE(tx_obj->xfer)); } -void mcp251xfd_ring_init(struct mcp251xfd_priv *priv) +static void +mcp251xfd_ring_init_tx(struct mcp251xfd_priv *priv, u16 *base, u8 *fifo_nr) { - struct mcp251xfd_tef_ring *tef_ring; struct mcp251xfd_tx_ring *tx_ring; - struct mcp251xfd_rx_ring *rx_ring, *prev_rx_ring = NULL; struct mcp251xfd_tx_obj *tx_obj; - struct spi_transfer *xfer; u32 val; u16 addr; u8 len; - int i, j; - - netdev_reset_queue(priv->ndev); - - /* TEF */ - tef_ring = priv->tef; - tef_ring->head = 0; - tef_ring->tail = 0; - - /* FIFO increment TEF tail pointer */ - addr = MCP251XFD_REG_TEFCON; - val = MCP251XFD_REG_TEFCON_UINC; - len = mcp251xfd_cmd_prepare_write_reg(priv, &tef_ring->uinc_buf, - addr, val, val); - - for (j = 0; j < ARRAY_SIZE(tef_ring->uinc_xfer); j++) { - xfer = &tef_ring->uinc_xfer[j]; - xfer->tx_buf = &tef_ring->uinc_buf; - xfer->len = len; - xfer->cs_change = 1; - xfer->cs_change_delay.value = 0; - xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS; - } - - /* "cs_change == 1" on the last transfer results in an active - * chip select after the complete SPI message. This causes the - * controller to interpret the next register access as - * data. Set "cs_change" of the last transfer to "0" to - * properly deactivate the chip select at the end of the - * message. - */ - xfer->cs_change = 0; + int i; - /* TX */ tx_ring = priv->tx; tx_ring->head = 0; tx_ring->tail = 0; - tx_ring->base = mcp251xfd_get_tef_obj_addr(tx_ring->obj_num); + tx_ring->base = *base; + tx_ring->nr = 0; + tx_ring->fifo_nr = *fifo_nr; + + *base = mcp251xfd_get_tx_obj_addr(tx_ring, tx_ring->obj_num); + *fifo_nr += 1; /* FIFO request to send */ - addr = MCP251XFD_REG_FIFOCON(MCP251XFD_TX_FIFO); + addr = MCP251XFD_REG_FIFOCON(tx_ring->fifo_nr); val = MCP251XFD_REG_FIFOCON_TXREQ | MCP251XFD_REG_FIFOCON_UINC; len = mcp251xfd_cmd_prepare_write_reg(priv, &tx_ring->rts_buf, addr, val, val); mcp251xfd_for_each_tx_obj(tx_ring, tx_obj, i) mcp251xfd_tx_ring_init_tx_obj(priv, tx_ring, tx_obj, len, i); +} + +static void +mcp251xfd_ring_init_rx(struct mcp251xfd_priv *priv, u16 *base, u8 *fifo_nr) +{ + struct mcp251xfd_rx_ring *rx_ring; + struct spi_transfer *xfer; + u32 val; + u16 addr; + u8 len; + int i, j; - /* RX */ mcp251xfd_for_each_rx_ring(priv, rx_ring, i) { rx_ring->head = 0; rx_ring->tail = 0; + rx_ring->base = *base; rx_ring->nr = i; - rx_ring->fifo_nr = MCP251XFD_RX_FIFO(i); - - if (!prev_rx_ring) - rx_ring->base = - mcp251xfd_get_tx_obj_addr(tx_ring, - tx_ring->obj_num); - else - rx_ring->base = prev_rx_ring->base + - prev_rx_ring->obj_size * - prev_rx_ring->obj_num; + rx_ring->fifo_nr = *fifo_nr; - prev_rx_ring = rx_ring; + *base = mcp251xfd_get_rx_obj_addr(rx_ring, rx_ring->obj_num); + *fifo_nr += 1; /* FIFO increment RX tail pointer */ addr = MCP251XFD_REG_FIFOCON(rx_ring->fifo_nr); @@ -190,6 +207,74 @@ void mcp251xfd_ring_init(struct mcp251xfd_priv *priv) } } +int mcp251xfd_ring_init(struct mcp251xfd_priv *priv) +{ + const struct mcp251xfd_rx_ring *rx_ring; + u16 base = 0, ram_used; + u8 fifo_nr = 1; + int i; + + netdev_reset_queue(priv->ndev); + + mcp251xfd_ring_init_tef(priv, &base); + mcp251xfd_ring_init_rx(priv, &base, &fifo_nr); + mcp251xfd_ring_init_tx(priv, &base, &fifo_nr); + + /* mcp251xfd_handle_rxif() will iterate over all RX rings. + * Rings with their corresponding bit set in + * priv->regs_status.rxif are read out. + * + * If the chip is configured for only 1 RX-FIFO, and if there + * is an RX interrupt pending (RXIF in INT register is set), + * it must be the 1st RX-FIFO. + * + * We mark the RXIF of the 1st FIFO as pending here, so that + * we can skip the read of the RXIF register in + * mcp251xfd_read_regs_status() for the 1 RX-FIFO only case. + * + * If we use more than 1 RX-FIFO, this value gets overwritten + * in mcp251xfd_read_regs_status(), so set it unconditionally + * here. + */ + priv->regs_status.rxif = BIT(priv->rx[0]->fifo_nr); + + netdev_dbg(priv->ndev, + "FIFO setup: TEF: 0x%03x: %2d*%zu bytes = %4zu bytes\n", + mcp251xfd_get_tef_obj_addr(0), + priv->tx->obj_num, sizeof(struct mcp251xfd_hw_tef_obj), + priv->tx->obj_num * sizeof(struct mcp251xfd_hw_tef_obj)); + + mcp251xfd_for_each_rx_ring(priv, rx_ring, i) { + netdev_dbg(priv->ndev, + "FIFO setup: RX-%u: FIFO %u/0x%03x: %2u*%u bytes = %4u bytes\n", + rx_ring->nr, rx_ring->fifo_nr, + mcp251xfd_get_rx_obj_addr(rx_ring, 0), + rx_ring->obj_num, rx_ring->obj_size, + rx_ring->obj_num * rx_ring->obj_size); + } + + netdev_dbg(priv->ndev, + "FIFO setup: TX: FIFO %u/0x%03x: %2u*%u bytes = %4u bytes\n", + priv->tx->fifo_nr, + mcp251xfd_get_tx_obj_addr(priv->tx, 0), + priv->tx->obj_num, priv->tx->obj_size, + priv->tx->obj_num * priv->tx->obj_size); + + netdev_dbg(priv->ndev, + "FIFO setup: free: %4u bytes\n", + MCP251XFD_RAM_SIZE - (base - MCP251XFD_RAM_START)); + + ram_used = base - MCP251XFD_RAM_START; + if (ram_used > MCP251XFD_RAM_SIZE) { + netdev_err(priv->ndev, + "Error during ring configuration, using more RAM (%u bytes) than available (%u bytes).\n", + ram_used, MCP251XFD_RAM_SIZE); + return -ENOMEM; + } + + return 0; +} + void mcp251xfd_ring_free(struct mcp251xfd_priv *priv) { int i; @@ -249,21 +334,5 @@ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv) } priv->rx_ring_num = i; - netdev_dbg(priv->ndev, - "FIFO setup: TEF: %d*%d bytes = %d bytes, TX: %d*%d bytes = %d bytes\n", - tx_obj_num, tef_obj_size, tef_obj_size * tx_obj_num, - tx_obj_num, tx_obj_size, tx_obj_size * tx_obj_num); - - mcp251xfd_for_each_rx_ring(priv, rx_ring, i) { - netdev_dbg(priv->ndev, - "FIFO setup: RX-%d: %d*%d bytes = %d bytes\n", - i, rx_ring->obj_num, rx_ring->obj_size, - rx_ring->obj_size * rx_ring->obj_num); - } - - netdev_dbg(priv->ndev, - "FIFO setup: free: %d bytes\n", - ram_free); - return 0; } diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c index 63f2526464b3..e6d39876065a 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c @@ -19,7 +19,7 @@ static inline int mcp251xfd_rx_head_get_from_chip(const struct mcp251xfd_priv *priv, const struct mcp251xfd_rx_ring *ring, - u8 *rx_head) + u8 *rx_head, bool *fifo_empty) { u32 fifo_sta; int err; @@ -30,6 +30,7 @@ mcp251xfd_rx_head_get_from_chip(const struct mcp251xfd_priv *priv, return err; *rx_head = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta); + *fifo_empty = !(fifo_sta & MCP251XFD_REG_FIFOSTA_TFNRFNIF); return 0; } @@ -84,10 +85,12 @@ mcp251xfd_rx_ring_update(const struct mcp251xfd_priv *priv, { u32 new_head; u8 chip_rx_head; + bool fifo_empty; int err; - err = mcp251xfd_rx_head_get_from_chip(priv, ring, &chip_rx_head); - if (err) + err = mcp251xfd_rx_head_get_from_chip(priv, ring, &chip_rx_head, + &fifo_empty); + if (err || fifo_empty) return err; /* chip_rx_head, is the next RX-Object filled by the HW. @@ -251,6 +254,9 @@ int mcp251xfd_handle_rxif(struct mcp251xfd_priv *priv) int err, n; mcp251xfd_for_each_rx_ring(priv, ring, n) { + if (!(priv->regs_status.rxif & BIT(ring->fifo_nr))) + continue; + err = mcp251xfd_handle_rxif_ring(priv, ring); if (err) return err; diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index f551c900803e..87cc13d455c1 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -2,8 +2,8 @@ * * mcp251xfd - Microchip MCP251xFD Family CAN controller driver * - * Copyright (c) 2019 Pengutronix, - * Marc Kleine-Budde <kernel@pengutronix.de> + * Copyright (c) 2019, 2020, 2021 Pengutronix, + * Marc Kleine-Budde <kernel@pengutronix.de> * Copyright (c) 2019 Martin Sperl <kernel@martin.sperl.org> */ @@ -383,8 +383,6 @@ #endif #define MCP251XFD_NAPI_WEIGHT 32 -#define MCP251XFD_TX_FIFO 1 -#define MCP251XFD_RX_FIFO(x) (MCP251XFD_TX_FIFO + 1 + (x)) /* SPI commands */ #define MCP251XFD_SPI_INSTRUCTION_RESET 0x0000 @@ -412,6 +410,15 @@ static_assert(MCP251XFD_TIMESTAMP_WORK_DELAY_SEC < #define MCP251XFD_SANITIZE_SPI 1 #define MCP251XFD_SANITIZE_CAN 1 +/* FIFO and Ring */ +#define MCP251XFD_FIFO_TEF_NUM 1U +#define MCP251XFD_FIFO_RX_NUM_MAX 1U +#define MCP251XFD_FIFO_TX_NUM 1U + +static_assert(MCP251XFD_FIFO_TEF_NUM == 1U); +static_assert(MCP251XFD_FIFO_TEF_NUM == MCP251XFD_FIFO_TX_NUM); +static_assert(MCP251XFD_FIFO_RX_NUM_MAX <= 4U); + /* Silence TX MAB overflow warnings */ #define MCP251XFD_QUIRK_MAB_NO_WARN BIT(0) /* Use CRC to access registers */ @@ -521,6 +528,8 @@ struct mcp251xfd_tx_ring { unsigned int tail; u16 base; + u8 nr; + u8 fifo_nr; u8 obj_num; u8 obj_size; @@ -561,6 +570,7 @@ struct mcp251xfd_ecc { struct mcp251xfd_regs_status { u32 intf; + u32 rxif; }; enum mcp251xfd_model { @@ -592,10 +602,12 @@ struct mcp251xfd_priv { struct spi_device *spi; u32 spi_max_speed_hz_orig; + u32 spi_max_speed_hz_fast; + u32 spi_max_speed_hz_slow; - struct mcp251xfd_tef_ring tef[1]; - struct mcp251xfd_tx_ring tx[1]; - struct mcp251xfd_rx_ring *rx[1]; + struct mcp251xfd_tef_ring tef[MCP251XFD_FIFO_TEF_NUM]; + struct mcp251xfd_rx_ring *rx[MCP251XFD_FIFO_RX_NUM_MAX]; + struct mcp251xfd_tx_ring tx[MCP251XFD_FIFO_TX_NUM]; u8 rx_ring_num; @@ -608,6 +620,7 @@ struct mcp251xfd_priv { struct gpio_desc *rx_int; struct clk *clk; + bool pll_enable; struct regulator *reg_vdd; struct regulator *reg_xceiver; @@ -776,7 +789,7 @@ mcp251xfd_tx_tail_get_from_chip(const struct mcp251xfd_priv *priv, int err; err = regmap_read(priv->map_reg, - MCP251XFD_REG_FIFOSTA(MCP251XFD_TX_FIFO), + MCP251XFD_REG_FIFOSTA(priv->tx->fifo_nr), &fifo_sta); if (err) return err; @@ -879,7 +892,7 @@ u16 mcp251xfd_crc16_compute2(const void *cmd, size_t cmd_size, const void *data, size_t data_size); u16 mcp251xfd_crc16_compute(const void *data, size_t data_size); int mcp251xfd_regmap_init(struct mcp251xfd_priv *priv); -void mcp251xfd_ring_init(struct mcp251xfd_priv *priv); +int mcp251xfd_ring_init(struct mcp251xfd_priv *priv); void mcp251xfd_ring_free(struct mcp251xfd_priv *priv); int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv); int mcp251xfd_handle_rxif(struct mcp251xfd_priv *priv); diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 2ed2370a3166..2d73ebbf3836 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -1787,7 +1787,7 @@ static int es58x_open(struct net_device *netdev) struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev; int ret; - if (atomic_inc_return(&es58x_dev->opened_channel_cnt) == 1) { + if (!es58x_dev->opened_channel_cnt) { ret = es58x_alloc_rx_urbs(es58x_dev); if (ret) return ret; @@ -1805,12 +1805,13 @@ static int es58x_open(struct net_device *netdev) if (ret) goto free_urbs; + es58x_dev->opened_channel_cnt++; netif_start_queue(netdev); return ret; free_urbs: - if (atomic_dec_and_test(&es58x_dev->opened_channel_cnt)) + if (!es58x_dev->opened_channel_cnt) es58x_free_urbs(es58x_dev); netdev_err(netdev, "%s: Could not open the network device: %pe\n", __func__, ERR_PTR(ret)); @@ -1845,7 +1846,8 @@ static int es58x_stop(struct net_device *netdev) es58x_flush_pending_tx_msg(netdev); - if (atomic_dec_and_test(&es58x_dev->opened_channel_cnt)) + es58x_dev->opened_channel_cnt--; + if (!es58x_dev->opened_channel_cnt) es58x_free_urbs(es58x_dev); return 0; @@ -2215,7 +2217,6 @@ static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf, init_usb_anchor(&es58x_dev->tx_urbs_idle); init_usb_anchor(&es58x_dev->tx_urbs_busy); atomic_set(&es58x_dev->tx_urbs_idle_cnt, 0); - atomic_set(&es58x_dev->opened_channel_cnt, 0); usb_set_intfdata(intf, es58x_dev); es58x_dev->rx_pipe = usb_rcvbulkpipe(es58x_dev->udev, diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.h b/drivers/net/can/usb/etas_es58x/es58x_core.h index 826a15871573..e5033cb5e695 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.h +++ b/drivers/net/can/usb/etas_es58x/es58x_core.h @@ -373,8 +373,6 @@ struct es58x_operators { * queue wake/stop logic should prevent this URB from getting * empty. Please refer to es58x_get_tx_urb() for more details. * @tx_urbs_idle_cnt: number of urbs in @tx_urbs_idle. - * @opened_channel_cnt: number of channels opened (c.f. es58x_open() - * and es58x_stop()). * @ktime_req_ns: kernel timestamp when es58x_set_realtime_diff_ns() * was called. * @realtime_diff_ns: difference in nanoseconds between the clocks of @@ -384,6 +382,10 @@ struct es58x_operators { * in RX branches. * @rx_max_packet_size: Maximum length of bulk-in URB. * @num_can_ch: Number of CAN channel (i.e. number of elements of @netdev). + * @opened_channel_cnt: number of channels opened. Free of race + * conditions because its two users (net_device_ops:ndo_open() + * and net_device_ops:ndo_close()) guarantee that the network + * stack big kernel lock (a.k.a. rtnl_mutex) is being hold. * @rx_cmd_buf_len: Length of @rx_cmd_buf. * @rx_cmd_buf: The device might split the URB commands in an * arbitrary amount of pieces. This buffer is used to concatenate @@ -406,7 +408,6 @@ struct es58x_device { struct usb_anchor tx_urbs_busy; struct usb_anchor tx_urbs_idle; atomic_t tx_urbs_idle_cnt; - atomic_t opened_channel_cnt; u64 ktime_req_ns; s64 realtime_diff_ns; @@ -415,6 +416,7 @@ struct es58x_device { u16 rx_max_packet_size; u8 num_can_ch; + u8 opened_channel_cnt; u16 rx_cmd_buf_len; union es58x_urb_cmd rx_cmd_buf; diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.c b/drivers/net/can/usb/etas_es58x/es58x_fd.c index ec87126e1a7d..88d2540abbbe 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_fd.c +++ b/drivers/net/can/usb/etas_es58x/es58x_fd.c @@ -69,7 +69,8 @@ static int es58x_fd_echo_msg(struct net_device *netdev, int i, num_element; u32 rcv_packet_idx; - const u32 mask = GENMASK(31, sizeof(echo_msg->packet_idx) * 8); + const u32 mask = GENMASK(BITS_PER_TYPE(mask) - 1, + BITS_PER_TYPE(echo_msg->packet_idx)); num_element = es58x_msg_num_element(es58x_dev->dev, es58x_fd_urb_cmd->echo_msg, diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index b487e3fe770a..d35749fad1ef 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -191,8 +191,8 @@ struct gs_can { struct gs_usb { struct gs_can *canch[GS_MAX_INTF]; struct usb_anchor rx_submitted; - atomic_t active_channels; struct usb_device *udev; + u8 active_channels; }; /* 'allocate' a tx context. @@ -589,7 +589,7 @@ static int gs_can_open(struct net_device *netdev) if (rc) return rc; - if (atomic_add_return(1, &parent->active_channels) == 1) { + if (!parent->active_channels) { for (i = 0; i < GS_MAX_RX_URBS; i++) { struct urb *urb; u8 *buf; @@ -690,6 +690,7 @@ static int gs_can_open(struct net_device *netdev) dev->can.state = CAN_STATE_ERROR_ACTIVE; + parent->active_channels++; if (!(dev->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)) netif_start_queue(netdev); @@ -705,7 +706,8 @@ static int gs_can_close(struct net_device *netdev) netif_stop_queue(netdev); /* Stop polling */ - if (atomic_dec_and_test(&parent->active_channels)) + parent->active_channels--; + if (!parent->active_channels) usb_kill_anchored_urbs(&parent->rx_submitted); /* Stop sending URBs */ @@ -984,8 +986,6 @@ static int gs_usb_probe(struct usb_interface *intf, init_usb_anchor(&dev->rx_submitted); - atomic_set(&dev->active_channels, 0); - usb_set_intfdata(intf, dev); dev->udev = interface_to_usbdev(intf); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index c4b4d3d0a387..e67658b53d02 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -205,12 +205,10 @@ MODULE_DEVICE_TABLE(usb, kvaser_usb_table); int kvaser_usb_send_cmd(const struct kvaser_usb *dev, void *cmd, int len) { - int actual_len; /* Not used */ - return usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, dev->bulk_out->bEndpointAddress), - cmd, len, &actual_len, KVASER_USB_TIMEOUT); + cmd, len, NULL, KVASER_USB_TIMEOUT); } int kvaser_usb_recv_cmd(const struct kvaser_usb *dev, void *cmd, int len, diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 1674b561c9a2..e562c5ab1149 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -1215,10 +1215,11 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) } if (work_done < quota) { - napi_complete_done(napi, work_done); - ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier |= xcan_rx_int_mask(priv); - priv->write_reg(priv, XCAN_IER_OFFSET, ier); + if (napi_complete_done(napi, work_done)) { + ier = priv->read_reg(priv, XCAN_IER_OFFSET); + ier |= xcan_rx_int_mask(priv); + priv->write_reg(priv, XCAN_IER_OFFSET, ier); + } } return work_done; } diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a3b98992f180..122e63762979 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1309,46 +1309,50 @@ void b53_port_event(struct dsa_switch *ds, int port) } EXPORT_SYMBOL(b53_port_event); -void b53_phylink_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) +static void b53_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { struct b53_device *dev = ds->priv; - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - if (dev->ops->serdes_phylink_validate) - dev->ops->serdes_phylink_validate(dev, port, mask, state); + /* Internal ports need GMII for PHYLIB */ + __set_bit(PHY_INTERFACE_MODE_GMII, config->supported_interfaces); + + /* These switches appear to support MII and RevMII too, but beyond + * this, the code gives very few clues. FIXME: We probably need more + * interface modes here. + * + * According to b53_srab_mux_init(), ports 3..5 can support: + * SGMII, MII, GMII, RGMII or INTERNAL depending on the MUX setting. + * However, the interface mode read from the MUX configuration is + * not passed back to DSA, so phylink uses NA. + * DT can specify RGMII for ports 0, 1. + * For MDIO, port 8 can be RGMII_TXID. + */ + __set_bit(PHY_INTERFACE_MODE_MII, config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_REVMII, config->supported_interfaces); - /* Allow all the expected bits */ - phylink_set(mask, Autoneg); - phylink_set_port_modes(mask); - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); + config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100; - /* With the exclusion of 5325/5365, MII, Reverse MII and 802.3z, we - * support Gigabit, including Half duplex. + /* 5325/5365 are not capable of gigabit speeds, everything else is. + * Note: the original code also exclulded Gigagbit for MII, RevMII + * and 802.3z modes. MII and RevMII are not able to work above 100M, + * so will be excluded by the generic validator implementation. + * However, the exclusion of Gigabit for 802.3z just seems wrong. */ - if (state->interface != PHY_INTERFACE_MODE_MII && - state->interface != PHY_INTERFACE_MODE_REVMII && - !phy_interface_mode_is_8023z(state->interface) && - !(is5325(dev) || is5365(dev))) { - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseT_Half); - } + if (!(is5325(dev) || is5365(dev))) + config->mac_capabilities |= MAC_1000; - if (!phy_interface_mode_is_8023z(state->interface)) { - phylink_set(mask, 10baseT_Half); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 100baseT_Full); - } + /* Get the implementation specific capabilities */ + if (dev->ops->phylink_get_caps) + dev->ops->phylink_get_caps(dev, port, config); - linkmode_and(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mask); - - phylink_helper_basex_speed(state); + /* This driver does not make use of the speed, duplex, pause or the + * advertisement in its mac_config, so it is safe to mark this driver + * as non-legacy. + */ + config->legacy_pre_march2020 = false; } -EXPORT_SYMBOL(b53_phylink_validate); int b53_phylink_mac_link_state(struct dsa_switch *ds, int port, struct phylink_link_state *state) @@ -1704,7 +1708,8 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, } int b53_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct b53_device *priv = ds->priv; int ret; @@ -1724,7 +1729,8 @@ int b53_fdb_add(struct dsa_switch *ds, int port, EXPORT_SYMBOL(b53_fdb_add); int b53_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct b53_device *priv = ds->priv; int ret; @@ -1825,7 +1831,8 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, EXPORT_SYMBOL(b53_fdb_dump); int b53_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct b53_device *priv = ds->priv; int ret; @@ -1845,7 +1852,8 @@ int b53_mdb_add(struct dsa_switch *ds, int port, EXPORT_SYMBOL(b53_mdb_add); int b53_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct b53_device *priv = ds->priv; int ret; @@ -1861,7 +1869,7 @@ int b53_mdb_del(struct dsa_switch *ds, int port, EXPORT_SYMBOL(b53_mdb_del); int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload) + bool *tx_fwd_offload, struct netlink_ext_ack *extack) { struct b53_device *dev = ds->priv; s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; @@ -2259,7 +2267,7 @@ static const struct dsa_switch_ops b53_switch_ops = { .phy_read = b53_phy_read16, .phy_write = b53_phy_write16, .adjust_link = b53_adjust_link, - .phylink_validate = b53_phylink_validate, + .phylink_get_caps = b53_phylink_get_caps, .phylink_mac_link_state = b53_phylink_mac_link_state, .phylink_mac_config = b53_phylink_mac_config, .phylink_mac_an_restart = b53_phylink_mac_an_restart, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index b41dc8ac2ca8..86e7eb7924e7 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -46,6 +46,8 @@ struct b53_io_ops { int (*phy_write16)(struct b53_device *dev, int addr, int reg, u16 value); int (*irq_enable)(struct b53_device *dev, int port); void (*irq_disable)(struct b53_device *dev, int port); + void (*phylink_get_caps)(struct b53_device *dev, int port, + struct phylink_config *config); u8 (*serdes_map_lane)(struct b53_device *dev, int port); int (*serdes_link_state)(struct b53_device *dev, int port, struct phylink_link_state *state); @@ -56,9 +58,6 @@ struct b53_io_ops { void (*serdes_link_set)(struct b53_device *dev, int port, unsigned int mode, phy_interface_t interface, bool link_up); - void (*serdes_phylink_validate)(struct b53_device *dev, int port, - unsigned long *supported, - struct phylink_link_state *state); }; #define B53_INVALID_LANE 0xff @@ -325,7 +324,7 @@ void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data); int b53_get_sset_count(struct dsa_switch *ds, int port, int sset); void b53_get_ethtool_phy_stats(struct dsa_switch *ds, int port, uint64_t *data); int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload); + bool *tx_fwd_offload, struct netlink_ext_ack *extack); void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge); void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state); void b53_br_fast_age(struct dsa_switch *ds, int port); @@ -337,9 +336,6 @@ int b53_br_flags(struct dsa_switch *ds, int port, struct netlink_ext_ack *extack); int b53_setup_devlink_resources(struct dsa_switch *ds); void b53_port_event(struct dsa_switch *ds, int port); -void b53_phylink_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state); int b53_phylink_mac_link_state(struct dsa_switch *ds, int port, struct phylink_link_state *state); void b53_phylink_mac_config(struct dsa_switch *ds, int port, @@ -363,15 +359,19 @@ int b53_vlan_add(struct dsa_switch *ds, int port, int b53_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); int b53_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); int b53_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); int b53_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); int b53_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); int b53_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); int b53_mirror_add(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, bool ingress); enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/b53/b53_serdes.c b/drivers/net/dsa/b53/b53_serdes.c index 5ae3d9783b68..555e5b372321 100644 --- a/drivers/net/dsa/b53/b53_serdes.c +++ b/drivers/net/dsa/b53/b53_serdes.c @@ -158,9 +158,8 @@ void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode, } EXPORT_SYMBOL(b53_serdes_link_set); -void b53_serdes_phylink_validate(struct b53_device *dev, int port, - unsigned long *supported, - struct phylink_link_state *state) +void b53_serdes_phylink_get_caps(struct b53_device *dev, int port, + struct phylink_config *config) { u8 lane = b53_serdes_map_lane(dev, port); @@ -169,16 +168,24 @@ void b53_serdes_phylink_validate(struct b53_device *dev, int port, switch (lane) { case 0: - phylink_set(supported, 2500baseX_Full); + /* It appears lane 0 supports 2500base-X and 1000base-X */ + __set_bit(PHY_INTERFACE_MODE_2500BASEX, + config->supported_interfaces); + config->mac_capabilities |= MAC_2500FD; fallthrough; case 1: - phylink_set(supported, 1000baseX_Full); + /* It appears lane 1 only supports 1000base-X and SGMII */ + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_SGMII, + config->supported_interfaces); + config->mac_capabilities |= MAC_1000FD; break; default: break; } } -EXPORT_SYMBOL(b53_serdes_phylink_validate); +EXPORT_SYMBOL(b53_serdes_phylink_get_caps); int b53_serdes_init(struct b53_device *dev, int port) { diff --git a/drivers/net/dsa/b53/b53_serdes.h b/drivers/net/dsa/b53/b53_serdes.h index 55d280fe38e4..f47d5caa7557 100644 --- a/drivers/net/dsa/b53/b53_serdes.h +++ b/drivers/net/dsa/b53/b53_serdes.h @@ -115,9 +115,8 @@ void b53_serdes_config(struct b53_device *dev, int port, unsigned int mode, void b53_serdes_an_restart(struct b53_device *dev, int port); void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode, phy_interface_t interface, bool link_up); -void b53_serdes_phylink_validate(struct b53_device *dev, int port, - unsigned long *supported, - struct phylink_link_state *state); +void b53_serdes_phylink_get_caps(struct b53_device *dev, int port, + struct phylink_config *config); #if IS_ENABLED(CONFIG_B53_SERDES) int b53_serdes_init(struct b53_device *dev, int port); #else diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c index 2b88f03e5252..0e54b2a0c211 100644 --- a/drivers/net/dsa/b53/b53_spi.c +++ b/drivers/net/dsa/b53/b53_spi.c @@ -314,7 +314,7 @@ static int b53_spi_probe(struct spi_device *spi) return 0; } -static int b53_spi_remove(struct spi_device *spi) +static void b53_spi_remove(struct spi_device *spi) { struct b53_device *dev = spi_get_drvdata(spi); @@ -322,8 +322,6 @@ static int b53_spi_remove(struct spi_device *spi) b53_switch_remove(dev); spi_set_drvdata(spi, NULL); - - return 0; } static void b53_spi_shutdown(struct spi_device *spi) diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index 4591bb1c05d2..c51b716657db 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -443,6 +443,39 @@ static void b53_srab_irq_disable(struct b53_device *dev, int port) } } +static void b53_srab_phylink_get_caps(struct b53_device *dev, int port, + struct phylink_config *config) +{ + struct b53_srab_priv *priv = dev->priv; + struct b53_srab_port_priv *p = &priv->port_intrs[port]; + + switch (p->mode) { + case PHY_INTERFACE_MODE_SGMII: +#if IS_ENABLED(CONFIG_B53_SERDES) + /* If p->mode indicates SGMII mode, that essentially means we + * are using a serdes. As the serdes for the capabilities. + */ + b53_serdes_phylink_get_caps(dev, port, config); +#endif + break; + + case PHY_INTERFACE_MODE_NA: + break; + + case PHY_INTERFACE_MODE_RGMII: + /* If we support RGMII, support all RGMII modes, since + * that dictates the PHY delay settings. + */ + phy_interface_set_rgmii(config->supported_interfaces); + break; + + default: + /* Some other mode (e.g. MII, GMII etc) */ + __set_bit(p->mode, config->supported_interfaces); + break; + } +} + static const struct b53_io_ops b53_srab_ops = { .read8 = b53_srab_read8, .read16 = b53_srab_read16, @@ -456,13 +489,13 @@ static const struct b53_io_ops b53_srab_ops = { .write64 = b53_srab_write64, .irq_enable = b53_srab_irq_enable, .irq_disable = b53_srab_irq_disable, + .phylink_get_caps = b53_srab_phylink_get_caps, #if IS_ENABLED(CONFIG_B53_SERDES) .serdes_map_lane = b53_srab_serdes_map_lane, .serdes_link_state = b53_serdes_link_state, .serdes_config = b53_serdes_config, .serdes_an_restart = b53_serdes_an_restart, .serdes_link_set = b53_serdes_link_set, - .serdes_phylink_validate = b53_serdes_phylink_validate, #endif }; diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index 33daaf10c488..263e41191c29 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -168,7 +168,8 @@ static int dsa_loop_phy_write(struct dsa_switch *ds, int port, static int dsa_loop_port_bridge_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload) + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { dev_dbg(ds->dev, "%s: port: %d, bridge: %s\n", __func__, port, bridge.dev->name); diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 726f267cb228..ac1f3b3a7040 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -675,7 +675,8 @@ static int hellcreek_bridge_flags(struct dsa_switch *ds, int port, static int hellcreek_port_bridge_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload) + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { struct hellcreek *hellcreek = ds->priv; @@ -827,7 +828,8 @@ static int hellcreek_fdb_get(struct hellcreek *hellcreek, } static int hellcreek_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct hellcreek_fdb_entry entry = { 0 }; struct hellcreek *hellcreek = ds->priv; @@ -872,7 +874,8 @@ out: } static int hellcreek_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct hellcreek_fdb_entry entry = { 0 }; struct hellcreek *hellcreek = ds->priv; diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c index b3bc948d6145..ffd06cf8c44f 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c +++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c @@ -331,7 +331,7 @@ static void hellcreek_get_rxts(struct hellcreek *hellcreek, shwt = skb_hwtstamps(skb); memset(shwt, 0, sizeof(*shwt)); shwt->hwtstamp = ns_to_ktime(ns); - netif_rx_ni(skb); + netif_rx(skb); } } diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 3969d89fa4db..e03ff1f267bb 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1111,7 +1111,8 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port) static int lan9303_port_bridge_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload) + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { struct lan9303 *chip = ds->priv; @@ -1188,7 +1189,8 @@ static void lan9303_port_fast_age(struct dsa_switch *ds, int port) } static int lan9303_port_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct lan9303 *chip = ds->priv; @@ -1200,8 +1202,8 @@ static int lan9303_port_fdb_add(struct dsa_switch *ds, int port, } static int lan9303_port_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) - + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct lan9303 *chip = ds->priv; @@ -1245,7 +1247,8 @@ static int lan9303_port_mdb_prepare(struct dsa_switch *ds, int port, } static int lan9303_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct lan9303 *chip = ds->priv; int err; @@ -1260,7 +1263,8 @@ static int lan9303_port_mdb_add(struct dsa_switch *ds, int port, } static int lan9303_port_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct lan9303 *chip = ds->priv; diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 8a7a8093a156..a8bd233f3cb9 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1152,7 +1152,8 @@ static int gswip_vlan_remove(struct gswip_priv *priv, static int gswip_port_bridge_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload) + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { struct net_device *br = bridge.dev; struct gswip_priv *priv = ds->priv; @@ -1389,13 +1390,15 @@ static int gswip_port_fdb(struct dsa_switch *ds, int port, } static int gswip_port_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { return gswip_port_fdb(ds, port, addr, vid, true); } static int gswip_port_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { return gswip_port_fdb(ds, port, addr, vid, false); } diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c index 866767b70d65..673589dc88ab 100644 --- a/drivers/net/dsa/microchip/ksz8795_spi.c +++ b/drivers/net/dsa/microchip/ksz8795_spi.c @@ -87,7 +87,7 @@ static int ksz8795_spi_probe(struct spi_device *spi) return 0; } -static int ksz8795_spi_remove(struct spi_device *spi) +static void ksz8795_spi_remove(struct spi_device *spi) { struct ksz_device *dev = spi_get_drvdata(spi); @@ -95,8 +95,6 @@ static int ksz8795_spi_remove(struct spi_device *spi) ksz_switch_remove(dev); spi_set_drvdata(spi, NULL); - - return 0; } static void ksz8795_spi_shutdown(struct spi_device *spi) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index a85d990896b0..94ad6d9504f4 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -64,6 +64,100 @@ static const struct { { 0x83, "tx_discards" }, }; +struct ksz9477_stats_raw { + u64 rx_hi; + u64 rx_undersize; + u64 rx_fragments; + u64 rx_oversize; + u64 rx_jabbers; + u64 rx_symbol_err; + u64 rx_crc_err; + u64 rx_align_err; + u64 rx_mac_ctrl; + u64 rx_pause; + u64 rx_bcast; + u64 rx_mcast; + u64 rx_ucast; + u64 rx_64_or_less; + u64 rx_65_127; + u64 rx_128_255; + u64 rx_256_511; + u64 rx_512_1023; + u64 rx_1024_1522; + u64 rx_1523_2000; + u64 rx_2001; + u64 tx_hi; + u64 tx_late_col; + u64 tx_pause; + u64 tx_bcast; + u64 tx_mcast; + u64 tx_ucast; + u64 tx_deferred; + u64 tx_total_col; + u64 tx_exc_col; + u64 tx_single_col; + u64 tx_mult_col; + u64 rx_total; + u64 tx_total; + u64 rx_discards; + u64 tx_discards; +}; + +static void ksz9477_r_mib_stats64(struct ksz_device *dev, int port) +{ + struct rtnl_link_stats64 *stats; + struct ksz9477_stats_raw *raw; + struct ksz_port_mib *mib; + + mib = &dev->ports[port].mib; + stats = &mib->stats64; + raw = (struct ksz9477_stats_raw *)mib->counters; + + spin_lock(&mib->stats64_lock); + + stats->rx_packets = raw->rx_bcast + raw->rx_mcast + raw->rx_ucast; + stats->tx_packets = raw->tx_bcast + raw->tx_mcast + raw->tx_ucast; + + /* HW counters are counting bytes + FCS which is not acceptable + * for rtnl_link_stats64 interface + */ + stats->rx_bytes = raw->rx_total - stats->rx_packets * ETH_FCS_LEN; + stats->tx_bytes = raw->tx_total - stats->tx_packets * ETH_FCS_LEN; + + stats->rx_length_errors = raw->rx_undersize + raw->rx_fragments + + raw->rx_oversize; + + stats->rx_crc_errors = raw->rx_crc_err; + stats->rx_frame_errors = raw->rx_align_err; + stats->rx_dropped = raw->rx_discards; + stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + + stats->rx_frame_errors + stats->rx_dropped; + + stats->tx_window_errors = raw->tx_late_col; + stats->tx_fifo_errors = raw->tx_discards; + stats->tx_aborted_errors = raw->tx_exc_col; + stats->tx_errors = stats->tx_window_errors + stats->tx_fifo_errors + + stats->tx_aborted_errors; + + stats->multicast = raw->rx_mcast; + stats->collisions = raw->tx_total_col; + + spin_unlock(&mib->stats64_lock); +} + +static void ksz9477_get_stats64(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *s) +{ + struct ksz_device *dev = ds->priv; + struct ksz_port_mib *mib; + + mib = &dev->ports[port].mib; + + spin_lock(&mib->stats64_lock); + memcpy(s, &mib->stats64, sizeof(*s)); + spin_unlock(&mib->stats64_lock); +} + static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set) { regmap_update_bits(dev->regmap[0], addr, bits, set ? bits : 0); @@ -546,7 +640,8 @@ static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port, } static int ksz9477_port_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct ksz_device *dev = ds->priv; u32 alu_table[4]; @@ -603,7 +698,8 @@ exit: } static int ksz9477_port_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct ksz_device *dev = ds->priv; u32 alu_table[4]; @@ -745,7 +841,8 @@ exit: } static int ksz9477_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct ksz_device *dev = ds->priv; u32 static_table[4]; @@ -820,7 +917,8 @@ exit: } static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct ksz_device *dev = ds->priv; u32 static_table[4]; @@ -1365,6 +1463,7 @@ static const struct dsa_switch_ops ksz9477_switch_ops = { .port_mdb_del = ksz9477_port_mdb_del, .port_mirror_add = ksz9477_port_mirror_add, .port_mirror_del = ksz9477_port_mirror_del, + .get_stats64 = ksz9477_get_stats64, }; static u32 ksz9477_get_port_addr(int port, int offset) @@ -1524,6 +1623,7 @@ static int ksz9477_switch_init(struct ksz_device *dev) if (!dev->ports) return -ENOMEM; for (i = 0; i < dev->port_cnt; i++) { + spin_lock_init(&dev->ports[i].mib.stats64_lock); mutex_init(&dev->ports[i].mib.cnt_mutex); dev->ports[i].mib.counters = devm_kzalloc(dev->dev, @@ -1552,6 +1652,7 @@ static const struct ksz_dev_ops ksz9477_dev_ops = { .port_setup = ksz9477_port_setup, .r_mib_cnt = ksz9477_r_mib_cnt, .r_mib_pkt = ksz9477_r_mib_pkt, + .r_mib_stat64 = ksz9477_r_mib_stats64, .freeze_mib = ksz9477_freeze_mib, .port_init_cnt = ksz9477_port_init_cnt, .shutdown = ksz9477_reset_switch, diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index f3afb8b8c4cc..cbc0b20e7e1b 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -92,6 +92,7 @@ static const struct of_device_id ksz9477_dt_ids[] = { { .compatible = "microchip,ksz9893" }, { .compatible = "microchip,ksz9563" }, { .compatible = "microchip,ksz9567" }, + { .compatible = "microchip,ksz8563" }, {}, }; MODULE_DEVICE_TABLE(of, ksz9477_dt_ids); diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index e3cb0e6c9f6f..940bb9665f15 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -65,7 +65,7 @@ static int ksz9477_spi_probe(struct spi_device *spi) return 0; } -static int ksz9477_spi_remove(struct spi_device *spi) +static void ksz9477_spi_remove(struct spi_device *spi) { struct ksz_device *dev = spi_get_drvdata(spi); @@ -73,8 +73,6 @@ static int ksz9477_spi_remove(struct spi_device *spi) ksz_switch_remove(dev); spi_set_drvdata(spi, NULL); - - return 0; } static void ksz9477_spi_shutdown(struct spi_device *spi) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 7e33ec73f803..8014b18d9391 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -26,7 +26,7 @@ void ksz_update_port_member(struct ksz_device *dev, int port) struct dsa_switch *ds = dev->ds; u8 port_member = 0, cpu_port; const struct dsa_port *dp; - int i; + int i, j; if (!dsa_is_user_port(ds, port)) return; @@ -45,13 +45,33 @@ void ksz_update_port_member(struct ksz_device *dev, int port) continue; if (!dsa_port_bridge_same(dp, other_dp)) continue; + if (other_p->stp_state != BR_STATE_FORWARDING) + continue; - if (other_p->stp_state == BR_STATE_FORWARDING && - p->stp_state == BR_STATE_FORWARDING) { + if (p->stp_state == BR_STATE_FORWARDING) { val |= BIT(port); port_member |= BIT(i); } + /* Retain port [i]'s relationship to other ports than [port] */ + for (j = 0; j < ds->num_ports; j++) { + const struct dsa_port *third_dp; + struct ksz_port *third_p; + + if (j == i) + continue; + if (j == port) + continue; + if (!dsa_is_user_port(ds, j)) + continue; + third_p = &dev->ports[j]; + if (third_p->stp_state != BR_STATE_FORWARDING) + continue; + third_dp = dsa_to_port(ds, j); + if (dsa_port_bridge_same(other_dp, third_dp)) + val |= BIT(j); + } + dev->dev_ops->cfg_port_member(dev, i, val | cpu_port); } @@ -110,6 +130,10 @@ static void ksz_mib_read_work(struct work_struct *work) } port_r_cnt(dev, i); p->read = false; + + if (dev->dev_ops->r_mib_stat64) + dev->dev_ops->r_mib_stat64(dev, i); + mutex_unlock(&mib->cnt_mutex); } @@ -193,7 +217,8 @@ EXPORT_SYMBOL_GPL(ksz_get_ethtool_stats); int ksz_port_bridge_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload) + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { /* port_stp_state_set() will be called after to put the port in * appropriate state so there is no need to do anything. @@ -252,7 +277,8 @@ int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, EXPORT_SYMBOL_GPL(ksz_port_fdb_dump); int ksz_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct ksz_device *dev = ds->priv; struct alu_struct alu; @@ -297,7 +323,8 @@ int ksz_port_mdb_add(struct dsa_switch *ds, int port, EXPORT_SYMBOL_GPL(ksz_port_mdb_add); int ksz_port_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct ksz_device *dev = ds->priv; struct alu_struct alu; @@ -449,7 +476,7 @@ int ksz_switch_register(struct ksz_device *dev, } /* Read MIB counters every 30 seconds to avoid overflow. */ - dev->mib_read_interval = msecs_to_jiffies(30000); + dev->mib_read_interval = msecs_to_jiffies(5000); /* Start the MIB timer. */ schedule_delayed_work(&dev->mib_read, 0); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 3db63f62f0a1..4ff0a159ce3c 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -22,6 +22,8 @@ struct ksz_port_mib { struct mutex cnt_mutex; /* structure access */ u8 cnt_ptr; u64 *counters; + struct rtnl_link_stats64 stats64; + struct spinlock stats64_lock; }; struct ksz_port { @@ -128,6 +130,7 @@ struct ksz_dev_ops { u64 *cnt); void (*r_mib_pkt)(struct ksz_device *dev, int port, u16 addr, u64 *dropped, u64 *cnt); + void (*r_mib_stat64)(struct ksz_device *dev, int port); void (*freeze_mib)(struct ksz_device *dev, int port, bool freeze); void (*port_init_cnt)(struct ksz_device *dev, int port); int (*shutdown)(struct ksz_device *dev); @@ -156,16 +159,19 @@ void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode, int ksz_sset_count(struct dsa_switch *ds, int port, int sset); void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *buf); int ksz_port_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge, bool *tx_fwd_offload); + struct dsa_bridge bridge, bool *tx_fwd_offload, + struct netlink_ext_ack *extack); void ksz_port_bridge_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge); void ksz_port_fast_age(struct dsa_switch *ds, int port); int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); int ksz_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); int ksz_port_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy); /* Common register access functions */ diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index f74f25f479ed..66b00c19ebe0 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1186,7 +1186,8 @@ mt7530_port_bridge_flags(struct dsa_switch *ds, int port, static int mt7530_port_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge, bool *tx_fwd_offload) + struct dsa_bridge bridge, bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_to_port(ds, port), *other_dp; u32 port_bitmap = BIT(MT7530_CPU_PORT); @@ -1349,7 +1350,8 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port, static int mt7530_port_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct mt7530_priv *priv = ds->priv; int ret; @@ -1365,7 +1367,8 @@ mt7530_port_fdb_add(struct dsa_switch *ds, int port, static int mt7530_port_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct mt7530_priv *priv = ds->priv; int ret; @@ -1416,7 +1419,8 @@ err: static int mt7530_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct mt7530_priv *priv = ds->priv; const u8 *addr = mdb->addr; @@ -1442,7 +1446,8 @@ mt7530_port_mdb_add(struct dsa_switch *ds, int port, static int mt7530_port_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct mt7530_priv *priv = ds->priv; const u8 *addr = mdb->addr; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 7d5e72cdc125..84b90fc36c58 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1625,15 +1625,16 @@ static int mv88e6xxx_pvt_map(struct mv88e6xxx_chip *chip, int dev, int port) ds = dsa_switch_find(dst->index, dev); dp = ds ? dsa_to_port(ds, port) : NULL; - if (dp && dp->lag_dev) { + if (dp && dp->lag) { /* As the PVT is used to limit flooding of * FORWARD frames, which use the LAG ID as the * source port, we must translate dev/port to * the special "LAG device" in the PVT, using - * the LAG ID as the port number. + * the LAG ID (one-based) as the port number + * (zero-based). */ dev = MV88E6XXX_G2_PVT_ADDR_DEV_TRUNK; - port = dsa_lag_id(dst, dp->lag_dev); + port = dsa_port_lag_id_get(dp) - 1; } } @@ -1671,7 +1672,7 @@ static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) struct mv88e6xxx_chip *chip = ds->priv; int err; - if (dsa_to_port(ds, port)->lag_dev) + if (dsa_to_port(ds, port)->lag) /* Hardware is incapable of fast-aging a LAG through a * regular ATU move operation. Until we have something * more fancy in place this is a no-op. @@ -2455,7 +2456,8 @@ unlock: } static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct mv88e6xxx_chip *chip = ds->priv; int err; @@ -2469,7 +2471,8 @@ static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, } static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct mv88e6xxx_chip *chip = ds->priv; int err; @@ -2615,7 +2618,8 @@ static int mv88e6xxx_map_virtual_bridge_to_pvt(struct dsa_switch *ds, static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload) + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; int err; @@ -2681,7 +2685,8 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port, static int mv88e6xxx_crosschip_bridge_join(struct dsa_switch *ds, int tree_index, int sw_index, - int port, struct dsa_bridge bridge) + int port, struct dsa_bridge bridge, + struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; int err; @@ -6001,7 +6006,8 @@ static int mv88e6xxx_change_tag_protocol(struct dsa_switch *ds, int port, } static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct mv88e6xxx_chip *chip = ds->priv; int err; @@ -6015,7 +6021,8 @@ static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, } static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct mv88e6xxx_chip *chip = ds->priv; int err; @@ -6103,7 +6110,7 @@ static int mv88e6xxx_port_pre_bridge_flags(struct dsa_switch *ds, int port, const struct mv88e6xxx_ops *ops; if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | - BR_BCAST_FLOOD)) + BR_BCAST_FLOOD | BR_PORT_LOCKED)) return -EINVAL; ops = chip->info->ops; @@ -6161,6 +6168,13 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, goto out; } + if (flags.mask & BR_PORT_LOCKED) { + bool locked = !!(flags.val & BR_PORT_LOCKED); + + err = mv88e6xxx_port_set_lock(chip, port, locked); + if (err) + goto out; + } out: mv88e6xxx_reg_unlock(chip); @@ -6168,21 +6182,20 @@ out: } static bool mv88e6xxx_lag_can_offload(struct dsa_switch *ds, - struct net_device *lag, + struct dsa_lag lag, struct netdev_lag_upper_info *info) { struct mv88e6xxx_chip *chip = ds->priv; struct dsa_port *dp; - int id, members = 0; + int members = 0; if (!mv88e6xxx_has_lag(chip)) return false; - id = dsa_lag_id(ds->dst, lag); - if (id < 0 || id >= ds->num_lag_ids) + if (!lag.id) return false; - dsa_lag_foreach_port(dp, ds->dst, lag) + dsa_lag_foreach_port(dp, ds->dst, &lag) /* Includes the port joining the LAG */ members++; @@ -6202,20 +6215,21 @@ static bool mv88e6xxx_lag_can_offload(struct dsa_switch *ds, return true; } -static int mv88e6xxx_lag_sync_map(struct dsa_switch *ds, struct net_device *lag) +static int mv88e6xxx_lag_sync_map(struct dsa_switch *ds, struct dsa_lag lag) { struct mv88e6xxx_chip *chip = ds->priv; struct dsa_port *dp; u16 map = 0; int id; - id = dsa_lag_id(ds->dst, lag); + /* DSA LAG IDs are one-based, hardware is zero-based */ + id = lag.id - 1; /* Build the map of all ports to distribute flows destined for * this LAG. This can be either a local user port, or a DSA * port if the LAG port is on a remote chip. */ - dsa_lag_foreach_port(dp, ds->dst, lag) + dsa_lag_foreach_port(dp, ds->dst, &lag) map |= BIT(dsa_towards_port(ds, dp->ds->index, dp->index)); return mv88e6xxx_g2_trunk_mapping_write(chip, id, map); @@ -6260,8 +6274,8 @@ static int mv88e6xxx_lag_sync_masks(struct dsa_switch *ds) { struct mv88e6xxx_chip *chip = ds->priv; unsigned int id, num_tx; - struct net_device *lag; struct dsa_port *dp; + struct dsa_lag *lag; int i, err, nth; u16 mask[8]; u16 ivec; @@ -6270,8 +6284,8 @@ static int mv88e6xxx_lag_sync_masks(struct dsa_switch *ds) ivec = BIT(mv88e6xxx_num_ports(chip)) - 1; /* Disable all masks for ports that _are_ members of a LAG. */ - list_for_each_entry(dp, &ds->dst->ports, list) { - if (!dp->lag_dev || dp->ds != ds) + dsa_switch_for_each_port(dp, ds) { + if (!dp->lag) continue; ivec &= ~BIT(dp->index); @@ -6284,7 +6298,7 @@ static int mv88e6xxx_lag_sync_masks(struct dsa_switch *ds) * are in the Tx set. */ dsa_lags_foreach_id(id, ds->dst) { - lag = dsa_lag_dev(ds->dst, id); + lag = dsa_lag_by_id(ds->dst, id); if (!lag) continue; @@ -6320,7 +6334,7 @@ static int mv88e6xxx_lag_sync_masks(struct dsa_switch *ds) } static int mv88e6xxx_lag_sync_masks_map(struct dsa_switch *ds, - struct net_device *lag) + struct dsa_lag lag) { int err; @@ -6344,7 +6358,7 @@ static int mv88e6xxx_port_lag_change(struct dsa_switch *ds, int port) } static int mv88e6xxx_port_lag_join(struct dsa_switch *ds, int port, - struct net_device *lag, + struct dsa_lag lag, struct netdev_lag_upper_info *info) { struct mv88e6xxx_chip *chip = ds->priv; @@ -6353,7 +6367,8 @@ static int mv88e6xxx_port_lag_join(struct dsa_switch *ds, int port, if (!mv88e6xxx_lag_can_offload(ds, lag, info)) return -EOPNOTSUPP; - id = dsa_lag_id(ds->dst, lag); + /* DSA LAG IDs are one-based */ + id = lag.id - 1; mv88e6xxx_reg_lock(chip); @@ -6376,7 +6391,7 @@ err_unlock: } static int mv88e6xxx_port_lag_leave(struct dsa_switch *ds, int port, - struct net_device *lag) + struct dsa_lag lag) { struct mv88e6xxx_chip *chip = ds->priv; int err_sync, err_trunk; @@ -6401,7 +6416,7 @@ static int mv88e6xxx_crosschip_lag_change(struct dsa_switch *ds, int sw_index, } static int mv88e6xxx_crosschip_lag_join(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag, + int port, struct dsa_lag lag, struct netdev_lag_upper_info *info) { struct mv88e6xxx_chip *chip = ds->priv; @@ -6424,7 +6439,7 @@ unlock: } static int mv88e6xxx_crosschip_lag_leave(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag) + int port, struct dsa_lag lag) { struct mv88e6xxx_chip *chip = ds->priv; int err_sync, err_pvt; diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c index 389f8a6ec0ab..331b4ca089ff 100644 --- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c @@ -301,7 +301,7 @@ static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip, shwt->hwtstamp = ns_to_ktime(ns); status &= ~MV88E6XXX_PTP_TS_VALID; } - netif_rx_ni(skb); + netif_rx(skb); } } diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index ceb450113f88..795b3128768f 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -550,6 +550,9 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port, mode = PHY_INTERFACE_MODE_1000BASEX; switch (mode) { + case PHY_INTERFACE_MODE_RMII: + cmode = MV88E6XXX_PORT_STS_CMODE_RMII; + break; case PHY_INTERFACE_MODE_1000BASEX: cmode = MV88E6XXX_PORT_STS_CMODE_1000BASEX; break; @@ -610,6 +613,8 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port, chip->ports[port].cmode = cmode; lane = mv88e6xxx_serdes_get_lane(chip, port); + if (lane == -ENODEV) + return 0; if (lane < 0) return lane; @@ -1234,6 +1239,35 @@ int mv88e6xxx_port_set_mirror(struct mv88e6xxx_chip *chip, int port, return err; } +int mv88e6xxx_port_set_lock(struct mv88e6xxx_chip *chip, int port, + bool locked) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL0, ®); + if (err) + return err; + + reg &= ~MV88E6XXX_PORT_CTL0_SA_FILT_MASK; + if (locked) + reg |= MV88E6XXX_PORT_CTL0_SA_FILT_DROP_ON_LOCK; + + err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL0, reg); + if (err) + return err; + + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR, ®); + if (err) + return err; + + reg &= ~MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT; + if (locked) + reg |= MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT; + + return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR, reg); +} + int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port, u16 mode) { diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index 3a13db2ec27b..e0a705d82019 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -147,7 +147,11 @@ /* Offset 0x04: Port Control Register */ #define MV88E6XXX_PORT_CTL0 0x04 #define MV88E6XXX_PORT_CTL0_USE_CORE_TAG 0x8000 -#define MV88E6XXX_PORT_CTL0_DROP_ON_LOCK 0x4000 +#define MV88E6XXX_PORT_CTL0_SA_FILT_MASK 0xc000 +#define MV88E6XXX_PORT_CTL0_SA_FILT_DISABLED 0x0000 +#define MV88E6XXX_PORT_CTL0_SA_FILT_DROP_ON_LOCK 0x4000 +#define MV88E6XXX_PORT_CTL0_SA_FILT_DROP_ON_UNLOCK 0x8000 +#define MV88E6XXX_PORT_CTL0_SA_FILT_DROP_ON_CPU 0xc000 #define MV88E6XXX_PORT_CTL0_EGRESS_MODE_MASK 0x3000 #define MV88E6XXX_PORT_CTL0_EGRESS_MODE_UNMODIFIED 0x0000 #define MV88E6XXX_PORT_CTL0_EGRESS_MODE_UNTAGGED 0x1000 @@ -370,6 +374,9 @@ int mv88e6xxx_port_set_fid(struct mv88e6xxx_chip *chip, int port, u16 fid); int mv88e6xxx_port_get_pvid(struct mv88e6xxx_chip *chip, int port, u16 *pvid); int mv88e6xxx_port_set_pvid(struct mv88e6xxx_chip *chip, int port, u16 pvid); +int mv88e6xxx_port_set_lock(struct mv88e6xxx_chip *chip, int port, + bool locked); + int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port, u16 mode); int mv88e6095_port_tag_remap(struct mv88e6xxx_chip *chip, int port); diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 9ffd5491bf2d..7cc67097948b 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -25,21 +25,151 @@ #include <net/dsa.h> #include "felix.h" -static int felix_tag_8021q_rxvlan_add(struct felix *felix, int port, u16 vid, - bool pvid, bool untagged) +/* Translate the DSA database API into the ocelot switch library API, + * which uses VID 0 for all ports that aren't part of a bridge, + * and expects the bridge_dev to be NULL in that case. + */ +static struct net_device *felix_classify_db(struct dsa_db db) +{ + switch (db.type) { + case DSA_DB_PORT: + case DSA_DB_LAG: + return NULL; + case DSA_DB_BRIDGE: + return db.bridge.dev; + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +/* We are called before felix_npi_port_init(), so ocelot->npi is -1. */ +static int felix_migrate_fdbs_to_npi_port(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct net_device *bridge_dev = felix_classify_db(db); + struct ocelot *ocelot = ds->priv; + int cpu = ocelot->num_phys_ports; + int err; + + err = ocelot_fdb_del(ocelot, port, addr, vid, bridge_dev); + if (err) + return err; + + return ocelot_fdb_add(ocelot, cpu, addr, vid, bridge_dev); +} + +static int felix_migrate_mdbs_to_npi_port(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct net_device *bridge_dev = felix_classify_db(db); + struct switchdev_obj_port_mdb mdb; + struct ocelot *ocelot = ds->priv; + int cpu = ocelot->num_phys_ports; + int err; + + memset(&mdb, 0, sizeof(mdb)); + ether_addr_copy(mdb.addr, addr); + mdb.vid = vid; + + err = ocelot_port_mdb_del(ocelot, port, &mdb, bridge_dev); + if (err) + return err; + + return ocelot_port_mdb_add(ocelot, cpu, &mdb, bridge_dev); +} + +static void felix_migrate_pgid_bit(struct dsa_switch *ds, int from, int to, + int pgid) +{ + struct ocelot *ocelot = ds->priv; + bool on; + u32 val; + + val = ocelot_read_rix(ocelot, ANA_PGID_PGID, pgid); + on = !!(val & BIT(from)); + val &= ~BIT(from); + if (on) + val |= BIT(to); + else + val &= ~BIT(to); + + ocelot_write_rix(ocelot, val, ANA_PGID_PGID, pgid); +} + +static void felix_migrate_flood_to_npi_port(struct dsa_switch *ds, int port) +{ + struct ocelot *ocelot = ds->priv; + + felix_migrate_pgid_bit(ds, port, ocelot->num_phys_ports, PGID_UC); + felix_migrate_pgid_bit(ds, port, ocelot->num_phys_ports, PGID_MC); + felix_migrate_pgid_bit(ds, port, ocelot->num_phys_ports, PGID_BC); +} + +static void +felix_migrate_flood_to_tag_8021q_port(struct dsa_switch *ds, int port) +{ + struct ocelot *ocelot = ds->priv; + + felix_migrate_pgid_bit(ds, ocelot->num_phys_ports, port, PGID_UC); + felix_migrate_pgid_bit(ds, ocelot->num_phys_ports, port, PGID_MC); + felix_migrate_pgid_bit(ds, ocelot->num_phys_ports, port, PGID_BC); +} + +/* ocelot->npi was already set to -1 by felix_npi_port_deinit, so + * ocelot_fdb_add() will not redirect FDB entries towards the + * CPU port module here, which is what we want. + */ +static int +felix_migrate_fdbs_to_tag_8021q_port(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct net_device *bridge_dev = felix_classify_db(db); + struct ocelot *ocelot = ds->priv; + int cpu = ocelot->num_phys_ports; + int err; + + err = ocelot_fdb_del(ocelot, cpu, addr, vid, bridge_dev); + if (err) + return err; + + return ocelot_fdb_add(ocelot, port, addr, vid, bridge_dev); +} + +static int +felix_migrate_mdbs_to_tag_8021q_port(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct net_device *bridge_dev = felix_classify_db(db); + struct switchdev_obj_port_mdb mdb; + struct ocelot *ocelot = ds->priv; + int cpu = ocelot->num_phys_ports; + int err; + + memset(&mdb, 0, sizeof(mdb)); + ether_addr_copy(mdb.addr, addr); + mdb.vid = vid; + + err = ocelot_port_mdb_del(ocelot, cpu, &mdb, bridge_dev); + if (err) + return err; + + return ocelot_port_mdb_add(ocelot, port, &mdb, bridge_dev); +} + +/* Set up VCAP ES0 rules for pushing a tag_8021q VLAN towards the CPU such that + * the tagger can perform RX source port identification. + */ +static int felix_tag_8021q_vlan_add_rx(struct felix *felix, int port, u16 vid) { struct ocelot_vcap_filter *outer_tagging_rule; struct ocelot *ocelot = &felix->ocelot; struct dsa_switch *ds = felix->ds; int key_length, upstream, err; - /* We don't need to install the rxvlan into the other ports' filtering - * tables, because we're just pushing the rxvlan when sending towards - * the CPU - */ - if (!pvid) - return 0; - key_length = ocelot->vcap[VCAP_ES0].keys[VCAP_ES0_IGR_PORT].length; upstream = dsa_upstream_port(ds, port); @@ -71,21 +201,32 @@ static int felix_tag_8021q_rxvlan_add(struct felix *felix, int port, u16 vid, return err; } -static int felix_tag_8021q_txvlan_add(struct felix *felix, int port, u16 vid, - bool pvid, bool untagged) +static int felix_tag_8021q_vlan_del_rx(struct felix *felix, int port, u16 vid) +{ + struct ocelot_vcap_filter *outer_tagging_rule; + struct ocelot_vcap_block *block_vcap_es0; + struct ocelot *ocelot = &felix->ocelot; + + block_vcap_es0 = &ocelot->block[VCAP_ES0]; + + outer_tagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_es0, + port, false); + if (!outer_tagging_rule) + return -ENOENT; + + return ocelot_vcap_filter_del(ocelot, outer_tagging_rule); +} + +/* Set up VCAP IS1 rules for stripping the tag_8021q VLAN on TX and VCAP IS2 + * rules for steering those tagged packets towards the correct destination port + */ +static int felix_tag_8021q_vlan_add_tx(struct felix *felix, int port, u16 vid) { struct ocelot_vcap_filter *untagging_rule, *redirect_rule; struct ocelot *ocelot = &felix->ocelot; struct dsa_switch *ds = felix->ds; int upstream, err; - /* tag_8021q.c assumes we are implementing this via port VLAN - * membership, which we aren't. So we don't need to add any VCAP filter - * for the CPU port. - */ - if (ocelot->ports[port]->is_dsa_8021q_cpu) - return 0; - untagging_rule = kzalloc(sizeof(struct ocelot_vcap_filter), GFP_KERNEL); if (!untagging_rule) return -ENOMEM; @@ -142,49 +283,7 @@ static int felix_tag_8021q_txvlan_add(struct felix *felix, int port, u16 vid, return 0; } -static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, - u16 flags) -{ - bool untagged = flags & BRIDGE_VLAN_INFO_UNTAGGED; - bool pvid = flags & BRIDGE_VLAN_INFO_PVID; - struct ocelot *ocelot = ds->priv; - - if (vid_is_dsa_8021q_rxvlan(vid)) - return felix_tag_8021q_rxvlan_add(ocelot_to_felix(ocelot), - port, vid, pvid, untagged); - - if (vid_is_dsa_8021q_txvlan(vid)) - return felix_tag_8021q_txvlan_add(ocelot_to_felix(ocelot), - port, vid, pvid, untagged); - - return 0; -} - -static int felix_tag_8021q_rxvlan_del(struct felix *felix, int port, u16 vid) -{ - struct ocelot_vcap_filter *outer_tagging_rule; - struct ocelot_vcap_block *block_vcap_es0; - struct ocelot *ocelot = &felix->ocelot; - - block_vcap_es0 = &ocelot->block[VCAP_ES0]; - - outer_tagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_es0, - port, false); - /* In rxvlan_add, we had the "if (!pvid) return 0" logic to avoid - * installing outer tagging ES0 rules where they weren't needed. - * But in rxvlan_del, the API doesn't give us the "flags" anymore, - * so that forces us to be slightly sloppy here, and just assume that - * if we didn't find an outer_tagging_rule it means that there was - * none in the first place, i.e. rxvlan_del is called on a non-pvid - * port. This is most probably true though. - */ - if (!outer_tagging_rule) - return 0; - - return ocelot_vcap_filter_del(ocelot, outer_tagging_rule); -} - -static int felix_tag_8021q_txvlan_del(struct felix *felix, int port, u16 vid) +static int felix_tag_8021q_vlan_del_tx(struct felix *felix, int port, u16 vid) { struct ocelot_vcap_filter *untagging_rule, *redirect_rule; struct ocelot_vcap_block *block_vcap_is1; @@ -192,16 +291,13 @@ static int felix_tag_8021q_txvlan_del(struct felix *felix, int port, u16 vid) struct ocelot *ocelot = &felix->ocelot; int err; - if (ocelot->ports[port]->is_dsa_8021q_cpu) - return 0; - block_vcap_is1 = &ocelot->block[VCAP_IS1]; block_vcap_is2 = &ocelot->block[VCAP_IS2]; untagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_is1, port, false); if (!untagging_rule) - return 0; + return -ENOENT; err = ocelot_vcap_filter_del(ocelot, untagging_rule); if (err) @@ -210,22 +306,54 @@ static int felix_tag_8021q_txvlan_del(struct felix *felix, int port, u16 vid) redirect_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, port, false); if (!redirect_rule) - return 0; + return -ENOENT; return ocelot_vcap_filter_del(ocelot, redirect_rule); } +static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, + u16 flags) +{ + struct ocelot *ocelot = ds->priv; + int err; + + /* tag_8021q.c assumes we are implementing this via port VLAN + * membership, which we aren't. So we don't need to add any VCAP filter + * for the CPU port. + */ + if (!dsa_is_user_port(ds, port)) + return 0; + + err = felix_tag_8021q_vlan_add_rx(ocelot_to_felix(ocelot), port, vid); + if (err) + return err; + + err = felix_tag_8021q_vlan_add_tx(ocelot_to_felix(ocelot), port, vid); + if (err) { + felix_tag_8021q_vlan_del_rx(ocelot_to_felix(ocelot), port, vid); + return err; + } + + return 0; +} + static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) { struct ocelot *ocelot = ds->priv; + int err; + + if (!dsa_is_user_port(ds, port)) + return 0; - if (vid_is_dsa_8021q_rxvlan(vid)) - return felix_tag_8021q_rxvlan_del(ocelot_to_felix(ocelot), - port, vid); + err = felix_tag_8021q_vlan_del_rx(ocelot_to_felix(ocelot), port, vid); + if (err) + return err; - if (vid_is_dsa_8021q_txvlan(vid)) - return felix_tag_8021q_txvlan_del(ocelot_to_felix(ocelot), - port, vid); + err = felix_tag_8021q_vlan_del_tx(ocelot_to_felix(ocelot), port, vid); + if (err) { + felix_tag_8021q_vlan_add_rx(ocelot_to_felix(ocelot), port, vid); + return err; + } return 0; } @@ -241,8 +369,7 @@ static void felix_8021q_cpu_port_init(struct ocelot *ocelot, int port) { mutex_lock(&ocelot->fwd_domain_lock); - ocelot->ports[port]->is_dsa_8021q_cpu = true; - ocelot->npi = -1; + ocelot_port_set_dsa_8021q_cpu(ocelot, port); /* Overwrite PGID_CPU with the non-tagging port */ ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, PGID_CPU); @@ -256,7 +383,7 @@ static void felix_8021q_cpu_port_deinit(struct ocelot *ocelot, int port) { mutex_lock(&ocelot->fwd_domain_lock); - ocelot->ports[port]->is_dsa_8021q_cpu = false; + ocelot_port_unset_dsa_8021q_cpu(ocelot, port); /* Restore PGID_CPU */ ocelot_write_rix(ocelot, BIT(ocelot->num_phys_ports), ANA_PGID_PGID, @@ -333,10 +460,9 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds, return 0; } -static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu) +static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu, bool change) { struct ocelot *ocelot = ds->priv; - unsigned long cpu_flood; struct dsa_port *dp; int err; @@ -358,22 +484,27 @@ static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu) ANA_PORT_CPU_FWD_BPDU_CFG, dp->index); } - /* In tag_8021q mode, the CPU port module is unused, except for PTP - * frames. So we want to disable flooding of any kind to the CPU port - * module, since packets going there will end in a black hole. - */ - cpu_flood = ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)); - ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_UC); - ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_MC); - ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_BC); - err = dsa_tag_8021q_register(ds, htons(ETH_P_8021AD)); if (err) return err; + if (change) { + err = dsa_port_walk_fdbs(ds, cpu, + felix_migrate_fdbs_to_tag_8021q_port); + if (err) + goto out_tag_8021q_unregister; + + err = dsa_port_walk_mdbs(ds, cpu, + felix_migrate_mdbs_to_tag_8021q_port); + if (err) + goto out_migrate_fdbs; + + felix_migrate_flood_to_tag_8021q_port(ds, cpu); + } + err = felix_update_trapping_destinations(ds, true); if (err) - goto out_tag_8021q_unregister; + goto out_migrate_flood; /* The ownership of the CPU port module's queues might have just been * transferred to the tag_8021q tagger from the NPI-based tagger. @@ -386,6 +517,14 @@ static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu) return 0; +out_migrate_flood: + if (change) + felix_migrate_flood_to_npi_port(ds, cpu); + if (change) + dsa_port_walk_mdbs(ds, cpu, felix_migrate_mdbs_to_npi_port); +out_migrate_fdbs: + if (change) + dsa_port_walk_fdbs(ds, cpu, felix_migrate_fdbs_to_npi_port); out_tag_8021q_unregister: dsa_tag_8021q_unregister(ds); return err; @@ -460,30 +599,35 @@ static void felix_npi_port_deinit(struct ocelot *ocelot, int port) ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 1); } -static int felix_setup_tag_npi(struct dsa_switch *ds, int cpu) +static int felix_setup_tag_npi(struct dsa_switch *ds, int cpu, bool change) { struct ocelot *ocelot = ds->priv; - unsigned long cpu_flood; + int err; - felix_npi_port_init(ocelot, cpu); + if (change) { + err = dsa_port_walk_fdbs(ds, cpu, + felix_migrate_fdbs_to_npi_port); + if (err) + return err; - /* Include the CPU port module (and indirectly, the NPI port) - * in the forwarding mask for unknown unicast - the hardware - * default value for ANA_FLOODING_FLD_UNICAST excludes - * BIT(ocelot->num_phys_ports), and so does ocelot_init, - * since Ocelot relies on whitelisting MAC addresses towards - * PGID_CPU. - * We do this because DSA does not yet perform RX filtering, - * and the NPI port does not perform source address learning, - * so traffic sent to Linux is effectively unknown from the - * switch's perspective. - */ - cpu_flood = ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)); - ocelot_rmw_rix(ocelot, cpu_flood, cpu_flood, ANA_PGID_PGID, PGID_UC); - ocelot_rmw_rix(ocelot, cpu_flood, cpu_flood, ANA_PGID_PGID, PGID_MC); - ocelot_rmw_rix(ocelot, cpu_flood, cpu_flood, ANA_PGID_PGID, PGID_BC); + err = dsa_port_walk_mdbs(ds, cpu, + felix_migrate_mdbs_to_npi_port); + if (err) + goto out_migrate_fdbs; + + felix_migrate_flood_to_npi_port(ds, cpu); + } + + felix_npi_port_init(ocelot, cpu); return 0; + +out_migrate_fdbs: + if (change) + dsa_port_walk_fdbs(ds, cpu, + felix_migrate_fdbs_to_tag_8021q_port); + + return err; } static void felix_teardown_tag_npi(struct dsa_switch *ds, int cpu) @@ -494,17 +638,17 @@ static void felix_teardown_tag_npi(struct dsa_switch *ds, int cpu) } static int felix_set_tag_protocol(struct dsa_switch *ds, int cpu, - enum dsa_tag_protocol proto) + enum dsa_tag_protocol proto, bool change) { int err; switch (proto) { case DSA_TAG_PROTO_SEVILLE: case DSA_TAG_PROTO_OCELOT: - err = felix_setup_tag_npi(ds, cpu); + err = felix_setup_tag_npi(ds, cpu, change); break; case DSA_TAG_PROTO_OCELOT_8021Q: - err = felix_setup_tag_8021q(ds, cpu); + err = felix_setup_tag_8021q(ds, cpu, change); break; default: err = -EPROTONOSUPPORT; @@ -548,9 +692,9 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, felix_del_tag_protocol(ds, cpu, old_proto); - err = felix_set_tag_protocol(ds, cpu, proto); + err = felix_set_tag_protocol(ds, cpu, proto, true); if (err) { - felix_set_tag_protocol(ds, cpu, old_proto); + felix_set_tag_protocol(ds, cpu, old_proto, true); return err; } @@ -599,35 +743,81 @@ static int felix_fdb_dump(struct dsa_switch *ds, int port, } static int felix_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { + struct net_device *bridge_dev = felix_classify_db(db); struct ocelot *ocelot = ds->priv; - return ocelot_fdb_add(ocelot, port, addr, vid); + if (IS_ERR(bridge_dev)) + return PTR_ERR(bridge_dev); + + return ocelot_fdb_add(ocelot, port, addr, vid, bridge_dev); } static int felix_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { + struct net_device *bridge_dev = felix_classify_db(db); struct ocelot *ocelot = ds->priv; - return ocelot_fdb_del(ocelot, port, addr, vid); + if (IS_ERR(bridge_dev)) + return PTR_ERR(bridge_dev); + + return ocelot_fdb_del(ocelot, port, addr, vid, bridge_dev); +} + +static int felix_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag lag, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct net_device *bridge_dev = felix_classify_db(db); + struct ocelot *ocelot = ds->priv; + + if (IS_ERR(bridge_dev)) + return PTR_ERR(bridge_dev); + + return ocelot_lag_fdb_add(ocelot, lag.dev, addr, vid, bridge_dev); +} + +static int felix_lag_fdb_del(struct dsa_switch *ds, struct dsa_lag lag, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct net_device *bridge_dev = felix_classify_db(db); + struct ocelot *ocelot = ds->priv; + + if (IS_ERR(bridge_dev)) + return PTR_ERR(bridge_dev); + + return ocelot_lag_fdb_del(ocelot, lag.dev, addr, vid, bridge_dev); } static int felix_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { + struct net_device *bridge_dev = felix_classify_db(db); struct ocelot *ocelot = ds->priv; - return ocelot_port_mdb_add(ocelot, port, mdb); + if (IS_ERR(bridge_dev)) + return PTR_ERR(bridge_dev); + + return ocelot_port_mdb_add(ocelot, port, mdb, bridge_dev); } static int felix_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { + struct net_device *bridge_dev = felix_classify_db(db); struct ocelot *ocelot = ds->priv; - return ocelot_port_mdb_del(ocelot, port, mdb); + if (IS_ERR(bridge_dev)) + return PTR_ERR(bridge_dev); + + return ocelot_port_mdb_del(ocelot, port, mdb, bridge_dev); } static void felix_bridge_stp_state_set(struct dsa_switch *ds, int port, @@ -659,13 +849,13 @@ static int felix_bridge_flags(struct dsa_switch *ds, int port, } static int felix_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge, bool *tx_fwd_offload) + struct dsa_bridge bridge, bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { struct ocelot *ocelot = ds->priv; - ocelot_port_bridge_join(ocelot, port, bridge.dev); - - return 0; + return ocelot_port_bridge_join(ocelot, port, bridge.dev, bridge.num, + extack); } static void felix_bridge_leave(struct dsa_switch *ds, int port, @@ -677,20 +867,20 @@ static void felix_bridge_leave(struct dsa_switch *ds, int port, } static int felix_lag_join(struct dsa_switch *ds, int port, - struct net_device *bond, + struct dsa_lag lag, struct netdev_lag_upper_info *info) { struct ocelot *ocelot = ds->priv; - return ocelot_port_lag_join(ocelot, port, bond, info); + return ocelot_port_lag_join(ocelot, port, lag.dev, info); } static int felix_lag_leave(struct dsa_switch *ds, int port, - struct net_device *bond) + struct dsa_lag lag) { struct ocelot *ocelot = ds->priv; - ocelot_port_lag_leave(ocelot, port, bond); + ocelot_port_lag_leave(ocelot, port, lag.dev); return 0; } @@ -762,6 +952,21 @@ static int felix_vlan_del(struct dsa_switch *ds, int port, return ocelot_vlan_del(ocelot, port, vlan->vid); } +static void felix_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) +{ + struct ocelot *ocelot = ds->priv; + + /* This driver does not make use of the speed, duplex, pause or the + * advertisement in its mac_config, so it is safe to mark this driver + * as non-legacy. + */ + config->legacy_pre_march2020 = false; + + __set_bit(ocelot->ports[port]->phy_mode, + config->supported_interfaces); +} + static void felix_phylink_validate(struct dsa_switch *ds, int port, unsigned long *supported, struct phylink_link_state *state) @@ -773,16 +978,18 @@ static void felix_phylink_validate(struct dsa_switch *ds, int port, felix->info->phylink_validate(ocelot, port, supported, state); } -static void felix_phylink_mac_config(struct dsa_switch *ds, int port, - unsigned int link_an_mode, - const struct phylink_link_state *state) +static struct phylink_pcs *felix_phylink_mac_select_pcs(struct dsa_switch *ds, + int port, + phy_interface_t iface) { struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); - struct dsa_port *dp = dsa_to_port(ds, port); + struct phylink_pcs *pcs = NULL; if (felix->pcs && felix->pcs[port]) - phylink_set_pcs(dp->pl, felix->pcs[port]); + pcs = felix->pcs[port]; + + return pcs; } static void felix_phylink_mac_link_down(struct dsa_switch *ds, int port, @@ -864,11 +1071,28 @@ static int felix_get_ts_info(struct dsa_switch *ds, int port, return ocelot_get_ts_info(ocelot, port, info); } +static const u32 felix_phy_match_table[PHY_INTERFACE_MODE_MAX] = { + [PHY_INTERFACE_MODE_INTERNAL] = OCELOT_PORT_MODE_INTERNAL, + [PHY_INTERFACE_MODE_SGMII] = OCELOT_PORT_MODE_SGMII, + [PHY_INTERFACE_MODE_QSGMII] = OCELOT_PORT_MODE_QSGMII, + [PHY_INTERFACE_MODE_USXGMII] = OCELOT_PORT_MODE_USXGMII, + [PHY_INTERFACE_MODE_2500BASEX] = OCELOT_PORT_MODE_2500BASEX, +}; + +static int felix_validate_phy_mode(struct felix *felix, int port, + phy_interface_t phy_mode) +{ + u32 modes = felix->info->port_modes[port]; + + if (felix_phy_match_table[phy_mode] & modes) + return 0; + return -EOPNOTSUPP; +} + static int felix_parse_ports_node(struct felix *felix, struct device_node *ports_node, phy_interface_t *port_phy_modes) { - struct ocelot *ocelot = &felix->ocelot; struct device *dev = felix->ocelot.dev; struct device_node *child; @@ -895,7 +1119,7 @@ static int felix_parse_ports_node(struct felix *felix, return -ENODEV; } - err = felix->info->prevalidate_phy_mode(ocelot, port, phy_mode); + err = felix_validate_phy_mode(felix, port, phy_mode); if (err < 0) { dev_err(dev, "Unsupported PHY mode %s on port %d\n", phy_modes(phy_mode), port); @@ -1169,12 +1393,14 @@ static int felix_setup(struct dsa_switch *ds) /* The initial tag protocol is NPI which always returns 0, so * there's no real point in checking for errors. */ - felix_set_tag_protocol(ds, dp->index, felix->tag_proto); + felix_set_tag_protocol(ds, dp->index, felix->tag_proto, false); break; } ds->mtu_enforcement_ingress = true; ds->assisted_learning_on_cpu_port = true; + ds->fdb_isolation = true; + ds->max_num_bridges = ds->num_ports; return 0; @@ -1239,10 +1465,10 @@ static int felix_hwtstamp_set(struct dsa_switch *ds, int port, return felix_update_trapping_destinations(ds, using_tag_8021q); } -static bool felix_check_xtr_pkt(struct ocelot *ocelot, unsigned int ptp_type) +static bool felix_check_xtr_pkt(struct ocelot *ocelot) { struct felix *felix = ocelot_to_felix(ocelot); - int err, grp = 0; + int err = 0, grp = 0; if (felix->tag_proto != DSA_TAG_PROTO_OCELOT_8021Q) return false; @@ -1250,9 +1476,6 @@ static bool felix_check_xtr_pkt(struct ocelot *ocelot, unsigned int ptp_type) if (!felix->info->quirk_no_xtr_irq) return false; - if (ptp_type == PTP_CLASS_NONE) - return false; - while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) { struct sk_buff *skb; unsigned int type; @@ -1282,8 +1505,12 @@ static bool felix_check_xtr_pkt(struct ocelot *ocelot, unsigned int ptp_type) } out: - if (err < 0) + if (err < 0) { + dev_err_ratelimited(ocelot->dev, + "Error during packet extraction: %pe\n", + ERR_PTR(err)); ocelot_drain_cpu_queue(ocelot, 0); + } return true; } @@ -1303,7 +1530,7 @@ static bool felix_rxtstamp(struct dsa_switch *ds, int port, * MMIO in the CPU port module, and inject that into the stack from * ocelot_xtr_poll(). */ - if (felix_check_xtr_pkt(ocelot, type)) { + if (felix_check_xtr_pkt(ocelot)) { kfree_skb(skb); return true; } @@ -1571,14 +1798,17 @@ const struct dsa_switch_ops felix_switch_ops = { .get_ethtool_stats = felix_get_ethtool_stats, .get_sset_count = felix_get_sset_count, .get_ts_info = felix_get_ts_info, + .phylink_get_caps = felix_phylink_get_caps, .phylink_validate = felix_phylink_validate, - .phylink_mac_config = felix_phylink_mac_config, + .phylink_mac_select_pcs = felix_phylink_mac_select_pcs, .phylink_mac_link_down = felix_phylink_mac_link_down, .phylink_mac_link_up = felix_phylink_mac_link_up, .port_fast_age = felix_port_fast_age, .port_fdb_dump = felix_fdb_dump, .port_fdb_add = felix_fdb_add, .port_fdb_del = felix_fdb_del, + .lag_fdb_add = felix_lag_fdb_add, + .lag_fdb_del = felix_lag_fdb_del, .port_mdb_add = felix_mdb_add, .port_mdb_del = felix_mdb_del, .port_pre_bridge_flags = felix_pre_bridge_flags, diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 9395ac119d33..f083b06fdfe9 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -7,6 +7,12 @@ #define ocelot_to_felix(o) container_of((o), struct felix, ocelot) #define FELIX_MAC_QUIRKS OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION +#define OCELOT_PORT_MODE_INTERNAL BIT(0) +#define OCELOT_PORT_MODE_SGMII BIT(1) +#define OCELOT_PORT_MODE_QSGMII BIT(2) +#define OCELOT_PORT_MODE_2500BASEX BIT(3) +#define OCELOT_PORT_MODE_USXGMII BIT(4) + /* Platform-specific information */ struct felix_info { const struct resource *target_io_res; @@ -15,6 +21,7 @@ struct felix_info { const struct reg_field *regfields; const u32 *const *map; const struct ocelot_ops *ops; + const u32 *port_modes; int num_mact_rows; const struct ocelot_stat_layout *stats_layout; unsigned int num_stats; @@ -44,8 +51,6 @@ struct felix_info { void (*phylink_validate)(struct ocelot *ocelot, int port, unsigned long *supported, struct phylink_link_state *state); - int (*prevalidate_phy_mode)(struct ocelot *ocelot, int port, - phy_interface_t phy_mode); int (*port_setup_tc)(struct dsa_switch *ds, int port, enum tc_setup_type type, void *type_data); void (*port_sched_speed_set)(struct ocelot *ocelot, int port, diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 33f0ceae381d..ead3316742f6 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -18,12 +18,27 @@ #include <linux/pci.h> #include "felix.h" +#define VSC9959_NUM_PORTS 6 + #define VSC9959_TAS_GCL_ENTRY_MAX 63 #define VSC9959_VCAP_POLICER_BASE 63 #define VSC9959_VCAP_POLICER_MAX 383 #define VSC9959_SWITCH_PCI_BAR 4 #define VSC9959_IMDIO_PCI_BAR 0 +#define VSC9959_PORT_MODE_SERDES (OCELOT_PORT_MODE_SGMII | \ + OCELOT_PORT_MODE_QSGMII | \ + OCELOT_PORT_MODE_2500BASEX | \ + OCELOT_PORT_MODE_USXGMII) + +static const u32 vsc9959_port_modes[VSC9959_NUM_PORTS] = { + VSC9959_PORT_MODE_SERDES, + VSC9959_PORT_MODE_SERDES, + VSC9959_PORT_MODE_SERDES, + VSC9959_PORT_MODE_SERDES, + OCELOT_PORT_MODE_INTERNAL, +}; + static const u32 vsc9959_ana_regmap[] = { REG(ANA_ADVLEARN, 0x0089a0), REG(ANA_VLANMASK, 0x0089a4), @@ -944,15 +959,8 @@ static void vsc9959_phylink_validate(struct ocelot *ocelot, int port, unsigned long *supported, struct phylink_link_state *state) { - struct ocelot_port *ocelot_port = ocelot->ports[port]; __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - if (state->interface != PHY_INTERFACE_MODE_NA && - state->interface != ocelot_port->phy_mode) { - linkmode_zero(supported); - return; - } - phylink_set_port_modes(mask); phylink_set(mask, Autoneg); phylink_set(mask, Pause); @@ -975,27 +983,6 @@ static void vsc9959_phylink_validate(struct ocelot *ocelot, int port, linkmode_and(state->advertising, state->advertising, mask); } -static int vsc9959_prevalidate_phy_mode(struct ocelot *ocelot, int port, - phy_interface_t phy_mode) -{ - switch (phy_mode) { - case PHY_INTERFACE_MODE_INTERNAL: - if (port != 4 && port != 5) - return -ENOTSUPP; - return 0; - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_QSGMII: - case PHY_INTERFACE_MODE_USXGMII: - case PHY_INTERFACE_MODE_2500BASEX: - /* Not supported on internal to-CPU ports */ - if (port == 4 || port == 5) - return -ENOTSUPP; - return 0; - default: - return -ENOTSUPP; - } -} - /* Watermark encode * Bit 8: Unit; 0:1, 1:16 * Bit 7-0: Value to be multiplied with unit @@ -2231,14 +2218,14 @@ static const struct felix_info felix_info_vsc9959 = { .vcap_pol_base2 = 0, .vcap_pol_max2 = 0, .num_mact_rows = 2048, - .num_ports = 6, + .num_ports = VSC9959_NUM_PORTS, .num_tx_queues = OCELOT_NUM_TC, .quirk_no_xtr_irq = true, .ptp_caps = &vsc9959_ptp_caps, .mdio_bus_alloc = vsc9959_mdio_bus_alloc, .mdio_bus_free = vsc9959_mdio_bus_free, .phylink_validate = vsc9959_phylink_validate, - .prevalidate_phy_mode = vsc9959_prevalidate_phy_mode, + .port_modes = vsc9959_port_modes, .port_setup_tc = vsc9959_port_setup_tc, .port_sched_speed_set = vsc9959_sched_speed_set, .init_regmap = ocelot_regmap_init, diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index f2f1608a476c..68ef8f111bbe 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -14,11 +14,29 @@ #include <linux/iopoll.h> #include "felix.h" +#define VSC9953_NUM_PORTS 10 + #define VSC9953_VCAP_POLICER_BASE 11 #define VSC9953_VCAP_POLICER_MAX 31 #define VSC9953_VCAP_POLICER_BASE2 120 #define VSC9953_VCAP_POLICER_MAX2 161 +#define VSC9953_PORT_MODE_SERDES (OCELOT_PORT_MODE_SGMII | \ + OCELOT_PORT_MODE_QSGMII) + +static const u32 vsc9953_port_modes[VSC9953_NUM_PORTS] = { + VSC9953_PORT_MODE_SERDES, + VSC9953_PORT_MODE_SERDES, + VSC9953_PORT_MODE_SERDES, + VSC9953_PORT_MODE_SERDES, + VSC9953_PORT_MODE_SERDES, + VSC9953_PORT_MODE_SERDES, + VSC9953_PORT_MODE_SERDES, + VSC9953_PORT_MODE_SERDES, + OCELOT_PORT_MODE_INTERNAL, + OCELOT_PORT_MODE_INTERNAL, +}; + static const u32 vsc9953_ana_regmap[] = { REG(ANA_ADVLEARN, 0x00b500), REG(ANA_VLANMASK, 0x00b504), @@ -917,15 +935,8 @@ static void vsc9953_phylink_validate(struct ocelot *ocelot, int port, unsigned long *supported, struct phylink_link_state *state) { - struct ocelot_port *ocelot_port = ocelot->ports[port]; __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - if (state->interface != PHY_INTERFACE_MODE_NA && - state->interface != ocelot_port->phy_mode) { - linkmode_zero(supported); - return; - } - phylink_set_port_modes(mask); phylink_set(mask, Autoneg); phylink_set(mask, Pause); @@ -945,25 +956,6 @@ static void vsc9953_phylink_validate(struct ocelot *ocelot, int port, linkmode_and(state->advertising, state->advertising, mask); } -static int vsc9953_prevalidate_phy_mode(struct ocelot *ocelot, int port, - phy_interface_t phy_mode) -{ - switch (phy_mode) { - case PHY_INTERFACE_MODE_INTERNAL: - if (port != 8 && port != 9) - return -ENOTSUPP; - return 0; - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_QSGMII: - /* Not supported on internal to-CPU ports */ - if (port == 8 || port == 9) - return -ENOTSUPP; - return 0; - default: - return -ENOTSUPP; - } -} - /* Watermark encode * Bit 9: Unit; 0:1, 1:16 * Bit 8-0: Value to be multiplied with unit @@ -1101,12 +1093,12 @@ static const struct felix_info seville_info_vsc9953 = { .vcap_pol_base2 = VSC9953_VCAP_POLICER_BASE2, .vcap_pol_max2 = VSC9953_VCAP_POLICER_MAX2, .num_mact_rows = 2048, - .num_ports = 10, + .num_ports = VSC9953_NUM_PORTS, .num_tx_queues = OCELOT_NUM_TC, .mdio_bus_alloc = vsc9953_mdio_bus_alloc, .mdio_bus_free = vsc9953_mdio_bus_free, .phylink_validate = vsc9953_phylink_validate, - .prevalidate_phy_mode = vsc9953_prevalidate_phy_mode, + .port_modes = vsc9953_port_modes, .init_regmap = ocelot_regmap_init, }; diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 04fa21e37dfa..ee0dbf324268 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1937,6 +1937,7 @@ static int qca8k_pcs_config(struct phylink_pcs *pcs, unsigned int mode, default: WARN_ON(1); + return -EINVAL; } /* Enable/disable SerDes auto-negotiation as necessary */ @@ -2246,7 +2247,8 @@ qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) static int qca8k_port_bridge_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload) + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; int port_mask, cpu_port; @@ -2397,7 +2399,8 @@ qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr, static int qca8k_port_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; u16 port_mask = BIT(port); @@ -2407,7 +2410,8 @@ qca8k_port_fdb_add(struct dsa_switch *ds, int port, static int qca8k_port_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; u16 port_mask = BIT(port); @@ -2444,7 +2448,8 @@ qca8k_port_fdb_dump(struct dsa_switch *ds, int port, static int qca8k_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct qca8k_priv *priv = ds->priv; const u8 *addr = mdb->addr; @@ -2455,7 +2460,8 @@ qca8k_port_mdb_add(struct dsa_switch *ds, int port, static int qca8k_port_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct qca8k_priv *priv = ds->priv; const u8 *addr = mdb->addr; @@ -2646,18 +2652,16 @@ qca8k_get_tag_protocol(struct dsa_switch *ds, int port, } static bool -qca8k_lag_can_offload(struct dsa_switch *ds, - struct net_device *lag, +qca8k_lag_can_offload(struct dsa_switch *ds, struct dsa_lag lag, struct netdev_lag_upper_info *info) { struct dsa_port *dp; - int id, members = 0; + int members = 0; - id = dsa_lag_id(ds->dst, lag); - if (id < 0 || id >= ds->num_lag_ids) + if (!lag.id) return false; - dsa_lag_foreach_port(dp, ds->dst, lag) + dsa_lag_foreach_port(dp, ds->dst, &lag) /* Includes the port joining the LAG */ members++; @@ -2675,16 +2679,14 @@ qca8k_lag_can_offload(struct dsa_switch *ds, } static int -qca8k_lag_setup_hash(struct dsa_switch *ds, - struct net_device *lag, +qca8k_lag_setup_hash(struct dsa_switch *ds, struct dsa_lag lag, struct netdev_lag_upper_info *info) { + struct net_device *lag_dev = lag.dev; struct qca8k_priv *priv = ds->priv; bool unique_lag = true; + unsigned int i; u32 hash = 0; - int i, id; - - id = dsa_lag_id(ds->dst, lag); switch (info->hash_type) { case NETDEV_LAG_HASH_L23: @@ -2701,7 +2703,7 @@ qca8k_lag_setup_hash(struct dsa_switch *ds, /* Check if we are the unique configured LAG */ dsa_lags_foreach_id(i, ds->dst) - if (i != id && dsa_lag_dev(ds->dst, i)) { + if (i != lag.id && dsa_lag_by_id(ds->dst, i)) { unique_lag = false; break; } @@ -2716,7 +2718,7 @@ qca8k_lag_setup_hash(struct dsa_switch *ds, if (unique_lag) { priv->lag_hash_mode = hash; } else if (priv->lag_hash_mode != hash) { - netdev_err(lag, "Error: Mismatched Hash Mode across different lag is not supported\n"); + netdev_err(lag_dev, "Error: Mismatched Hash Mode across different lag is not supported\n"); return -EOPNOTSUPP; } @@ -2726,13 +2728,14 @@ qca8k_lag_setup_hash(struct dsa_switch *ds, static int qca8k_lag_refresh_portmap(struct dsa_switch *ds, int port, - struct net_device *lag, bool delete) + struct dsa_lag lag, bool delete) { struct qca8k_priv *priv = ds->priv; int ret, id, i; u32 val; - id = dsa_lag_id(ds->dst, lag); + /* DSA LAG IDs are one-based, hardware is zero-based */ + id = lag.id - 1; /* Read current port member */ ret = regmap_read(priv->regmap, QCA8K_REG_GOL_TRUNK_CTRL0, &val); @@ -2794,8 +2797,7 @@ qca8k_lag_refresh_portmap(struct dsa_switch *ds, int port, } static int -qca8k_port_lag_join(struct dsa_switch *ds, int port, - struct net_device *lag, +qca8k_port_lag_join(struct dsa_switch *ds, int port, struct dsa_lag lag, struct netdev_lag_upper_info *info) { int ret; @@ -2812,7 +2814,7 @@ qca8k_port_lag_join(struct dsa_switch *ds, int port, static int qca8k_port_lag_leave(struct dsa_switch *ds, int port, - struct net_device *lag) + struct dsa_lag lag) { return qca8k_lag_refresh_portmap(ds, port, lag, true); } diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c index 0308be95d00a..31e1f100e48e 100644 --- a/drivers/net/dsa/realtek/realtek-mdio.c +++ b/drivers/net/dsa/realtek/realtek-mdio.c @@ -98,6 +98,20 @@ out_unlock: return ret; } +static void realtek_mdio_lock(void *ctx) +{ + struct realtek_priv *priv = ctx; + + mutex_lock(&priv->map_lock); +} + +static void realtek_mdio_unlock(void *ctx) +{ + struct realtek_priv *priv = ctx; + + mutex_unlock(&priv->map_lock); +} + static const struct regmap_config realtek_mdio_regmap_config = { .reg_bits = 10, /* A4..A0 R4..R0 */ .val_bits = 16, @@ -108,6 +122,21 @@ static const struct regmap_config realtek_mdio_regmap_config = { .reg_read = realtek_mdio_read, .reg_write = realtek_mdio_write, .cache_type = REGCACHE_NONE, + .lock = realtek_mdio_lock, + .unlock = realtek_mdio_unlock, +}; + +static const struct regmap_config realtek_mdio_nolock_regmap_config = { + .reg_bits = 10, /* A4..A0 R4..R0 */ + .val_bits = 16, + .reg_stride = 1, + /* PHY regs are at 0x8000 */ + .max_register = 0xffff, + .reg_format_endian = REGMAP_ENDIAN_BIG, + .reg_read = realtek_mdio_read, + .reg_write = realtek_mdio_write, + .cache_type = REGCACHE_NONE, + .disable_locking = true, }; static int realtek_mdio_probe(struct mdio_device *mdiodev) @@ -115,8 +144,9 @@ static int realtek_mdio_probe(struct mdio_device *mdiodev) struct realtek_priv *priv; struct device *dev = &mdiodev->dev; const struct realtek_variant *var; - int ret; + struct regmap_config rc; struct device_node *np; + int ret; var = of_device_get_match_data(dev); if (!var) @@ -126,13 +156,25 @@ static int realtek_mdio_probe(struct mdio_device *mdiodev) if (!priv) return -ENOMEM; - priv->map = devm_regmap_init(dev, NULL, priv, &realtek_mdio_regmap_config); + mutex_init(&priv->map_lock); + + rc = realtek_mdio_regmap_config; + rc.lock_arg = priv; + priv->map = devm_regmap_init(dev, NULL, priv, &rc); if (IS_ERR(priv->map)) { ret = PTR_ERR(priv->map); dev_err(dev, "regmap init failed: %d\n", ret); return ret; } + rc = realtek_mdio_nolock_regmap_config; + priv->map_nolock = devm_regmap_init(dev, NULL, priv, &rc); + if (IS_ERR(priv->map_nolock)) { + ret = PTR_ERR(priv->map_nolock); + dev_err(dev, "regmap init failed: %d\n", ret); + return ret; + } + priv->mdio_addr = mdiodev->addr; priv->bus = mdiodev->bus; priv->dev = &mdiodev->dev; diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c index 8806b74bd7a8..2243d3da55b2 100644 --- a/drivers/net/dsa/realtek/realtek-smi.c +++ b/drivers/net/dsa/realtek/realtek-smi.c @@ -311,7 +311,21 @@ static int realtek_smi_read(void *ctx, u32 reg, u32 *val) return realtek_smi_read_reg(priv, reg, val); } -static const struct regmap_config realtek_smi_mdio_regmap_config = { +static void realtek_smi_lock(void *ctx) +{ + struct realtek_priv *priv = ctx; + + mutex_lock(&priv->map_lock); +} + +static void realtek_smi_unlock(void *ctx) +{ + struct realtek_priv *priv = ctx; + + mutex_unlock(&priv->map_lock); +} + +static const struct regmap_config realtek_smi_regmap_config = { .reg_bits = 10, /* A4..A0 R4..R0 */ .val_bits = 16, .reg_stride = 1, @@ -321,6 +335,21 @@ static const struct regmap_config realtek_smi_mdio_regmap_config = { .reg_read = realtek_smi_read, .reg_write = realtek_smi_write, .cache_type = REGCACHE_NONE, + .lock = realtek_smi_lock, + .unlock = realtek_smi_unlock, +}; + +static const struct regmap_config realtek_smi_nolock_regmap_config = { + .reg_bits = 10, /* A4..A0 R4..R0 */ + .val_bits = 16, + .reg_stride = 1, + /* PHY regs are at 0x8000 */ + .max_register = 0xffff, + .reg_format_endian = REGMAP_ENDIAN_BIG, + .reg_read = realtek_smi_read, + .reg_write = realtek_smi_write, + .cache_type = REGCACHE_NONE, + .disable_locking = true, }; static int realtek_smi_mdio_read(struct mii_bus *bus, int addr, int regnum) @@ -385,6 +414,7 @@ static int realtek_smi_probe(struct platform_device *pdev) const struct realtek_variant *var; struct device *dev = &pdev->dev; struct realtek_priv *priv; + struct regmap_config rc; struct device_node *np; int ret; @@ -395,14 +425,26 @@ static int realtek_smi_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; priv->chip_data = (void *)priv + sizeof(*priv); - priv->map = devm_regmap_init(dev, NULL, priv, - &realtek_smi_mdio_regmap_config); + + mutex_init(&priv->map_lock); + + rc = realtek_smi_regmap_config; + rc.lock_arg = priv; + priv->map = devm_regmap_init(dev, NULL, priv, &rc); if (IS_ERR(priv->map)) { ret = PTR_ERR(priv->map); dev_err(dev, "regmap init failed: %d\n", ret); return ret; } + rc = realtek_smi_nolock_regmap_config; + priv->map_nolock = devm_regmap_init(dev, NULL, priv, &rc); + if (IS_ERR(priv->map_nolock)) { + ret = PTR_ERR(priv->map_nolock); + dev_err(dev, "regmap init failed: %d\n", ret); + return ret; + } + /* Link forward and backward */ priv->dev = dev; priv->clk_delay = var->clk_delay; diff --git a/drivers/net/dsa/realtek/realtek.h b/drivers/net/dsa/realtek/realtek.h index e7d3e1bcf8b8..4fa7c6ba874a 100644 --- a/drivers/net/dsa/realtek/realtek.h +++ b/drivers/net/dsa/realtek/realtek.h @@ -52,6 +52,8 @@ struct realtek_priv { struct gpio_desc *mdc; struct gpio_desc *mdio; struct regmap *map; + struct regmap *map_nolock; + struct mutex map_lock; struct mii_bus *slave_mii_bus; struct mii_bus *bus; int mdio_addr; diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c index 2ed592147c20..c39d6b744597 100644 --- a/drivers/net/dsa/realtek/rtl8365mb.c +++ b/drivers/net/dsa/realtek/rtl8365mb.c @@ -590,7 +590,7 @@ static int rtl8365mb_phy_poll_busy(struct realtek_priv *priv) { u32 val; - return regmap_read_poll_timeout(priv->map, + return regmap_read_poll_timeout(priv->map_nolock, RTL8365MB_INDIRECT_ACCESS_STATUS_REG, val, !val, 10, 100); } @@ -604,7 +604,7 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_priv *priv, int phy, /* Set OCP prefix */ val = FIELD_GET(RTL8365MB_PHY_OCP_ADDR_PREFIX_MASK, ocp_addr); ret = regmap_update_bits( - priv->map, RTL8365MB_GPHY_OCP_MSB_0_REG, + priv->map_nolock, RTL8365MB_GPHY_OCP_MSB_0_REG, RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK, FIELD_PREP(RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK, val)); if (ret) @@ -617,8 +617,8 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_priv *priv, int phy, ocp_addr >> 1); val |= FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_9_6_MASK, ocp_addr >> 6); - ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG, - val); + ret = regmap_write(priv->map_nolock, + RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG, val); if (ret) return ret; @@ -631,36 +631,42 @@ static int rtl8365mb_phy_ocp_read(struct realtek_priv *priv, int phy, u32 val; int ret; + mutex_lock(&priv->map_lock); + ret = rtl8365mb_phy_poll_busy(priv); if (ret) - return ret; + goto out; ret = rtl8365mb_phy_ocp_prepare(priv, phy, ocp_addr); if (ret) - return ret; + goto out; /* Execute read operation */ val = FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_MASK, RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) | FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK, RTL8365MB_INDIRECT_ACCESS_CTRL_RW_READ); - ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val); + ret = regmap_write(priv->map_nolock, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, + val); if (ret) - return ret; + goto out; ret = rtl8365mb_phy_poll_busy(priv); if (ret) - return ret; + goto out; /* Get PHY register data */ - ret = regmap_read(priv->map, RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG, - &val); + ret = regmap_read(priv->map_nolock, + RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG, &val); if (ret) - return ret; + goto out; *data = val & 0xFFFF; - return 0; +out: + mutex_unlock(&priv->map_lock); + + return ret; } static int rtl8365mb_phy_ocp_write(struct realtek_priv *priv, int phy, @@ -669,32 +675,38 @@ static int rtl8365mb_phy_ocp_write(struct realtek_priv *priv, int phy, u32 val; int ret; + mutex_lock(&priv->map_lock); + ret = rtl8365mb_phy_poll_busy(priv); if (ret) - return ret; + goto out; ret = rtl8365mb_phy_ocp_prepare(priv, phy, ocp_addr); if (ret) - return ret; + goto out; /* Set PHY register data */ - ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG, - data); + ret = regmap_write(priv->map_nolock, + RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG, data); if (ret) - return ret; + goto out; /* Execute write operation */ val = FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_MASK, RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) | FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK, RTL8365MB_INDIRECT_ACCESS_CTRL_RW_WRITE); - ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val); + ret = regmap_write(priv->map_nolock, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, + val); if (ret) - return ret; + goto out; ret = rtl8365mb_phy_poll_busy(priv); if (ret) - return ret; + goto out; + +out: + mutex_unlock(&priv->map_lock); return 0; } diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c index fb6565e68401..1a3406b9e64c 100644 --- a/drivers/net/dsa/realtek/rtl8366rb.c +++ b/drivers/net/dsa/realtek/rtl8366rb.c @@ -1189,7 +1189,8 @@ rtl8366rb_port_disable(struct dsa_switch *ds, int port) static int rtl8366rb_port_bridge_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload) + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { struct realtek_priv *priv = ds->priv; unsigned int port_bitmap = 0; diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c index 7dcdd784aea4..fad5afe3819c 100644 --- a/drivers/net/dsa/sja1105/sja1105_flower.c +++ b/drivers/net/dsa/sja1105/sja1105_flower.c @@ -300,6 +300,46 @@ static int sja1105_flower_parse_key(struct sja1105_private *priv, return -EOPNOTSUPP; } +static int sja1105_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} + int sja1105_cls_flower_add(struct dsa_switch *ds, int port, struct flow_cls_offload *cls, bool ingress) { @@ -321,12 +361,9 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port, flow_action_for_each(i, act, &rule->action) { switch (act->id) { case FLOW_ACTION_POLICE: - if (act->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - rc = -EOPNOTSUPP; + rc = sja1105_policer_validate(&rule->action, act, extack); + if (rc) goto out; - } rc = sja1105_flower_policer(priv, port, extack, cookie, &key, diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index b513713be610..3358e979342c 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -393,10 +393,8 @@ static int sja1105_init_l2_lookup_params(struct sja1105_private *priv) .start_dynspc = 0, /* 2^8 + 2^5 + 2^3 + 2^2 + 2^1 + 1 in Koopman notation */ .poly = 0x97, - /* This selects between Independent VLAN Learning (IVL) and - * Shared VLAN Learning (SVL) - */ - .shared_learn = true, + /* Always use Independent VLAN Learning (IVL) */ + .shared_learn = false, /* Don't discard management traffic based on ENFPORT - * we don't perform SMAC port enforcement anyway, so * what we are setting here doesn't matter. @@ -1358,37 +1356,16 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port, return sja1105_clocking_setup_port(priv, port); } -/* The SJA1105 MAC programming model is through the static config (the xMII - * Mode table cannot be dynamically reconfigured), and we have to program - * that early (earlier than PHYLINK calls us, anyway). - * So just error out in case the connected PHY attempts to change the initial - * system interface MII protocol from what is defined in the DT, at least for - * now. - */ -static bool sja1105_phy_mode_mismatch(struct sja1105_private *priv, int port, - phy_interface_t interface) -{ - return priv->phy_mode[port] != interface; -} - -static void sja1105_mac_config(struct dsa_switch *ds, int port, - unsigned int mode, - const struct phylink_link_state *state) +static struct phylink_pcs * +sja1105_mac_select_pcs(struct dsa_switch *ds, int port, phy_interface_t iface) { - struct dsa_port *dp = dsa_to_port(ds, port); struct sja1105_private *priv = ds->priv; - struct dw_xpcs *xpcs; - - if (sja1105_phy_mode_mismatch(priv, port, state->interface)) { - dev_err(ds->dev, "Changing PHY mode to %s not supported!\n", - phy_modes(state->interface)); - return; - } - - xpcs = priv->xpcs[port]; + struct dw_xpcs *xpcs = priv->xpcs[port]; if (xpcs) - phylink_set_pcs(dp->pl, &xpcs->pcs); + return &xpcs->pcs; + + return NULL; } static void sja1105_mac_link_down(struct dsa_switch *ds, int port, @@ -1412,48 +1389,53 @@ static void sja1105_mac_link_up(struct dsa_switch *ds, int port, sja1105_inhibit_tx(priv, BIT(port), false); } -static void sja1105_phylink_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) +static void sja1105_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { - /* Construct a new mask which exhaustively contains all link features - * supported by the MAC, and then apply that (logical AND) to what will - * be sent to the PHY for "marketing". - */ - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct sja1105_private *priv = ds->priv; struct sja1105_xmii_params_entry *mii; + phy_interface_t phy_mode; - mii = priv->static_config.tables[BLK_IDX_XMII_PARAMS].entries; - - /* include/linux/phylink.h says: - * When @state->interface is %PHY_INTERFACE_MODE_NA, phylink - * expects the MAC driver to return all supported link modes. + /* This driver does not make use of the speed, duplex, pause or the + * advertisement in its mac_config, so it is safe to mark this driver + * as non-legacy. */ - if (state->interface != PHY_INTERFACE_MODE_NA && - sja1105_phy_mode_mismatch(priv, port, state->interface)) { - linkmode_zero(supported); - return; + config->legacy_pre_march2020 = false; + + phy_mode = priv->phy_mode[port]; + if (phy_mode == PHY_INTERFACE_MODE_SGMII || + phy_mode == PHY_INTERFACE_MODE_2500BASEX) { + /* Changing the PHY mode on SERDES ports is possible and makes + * sense, because that is done through the XPCS. We allow + * changes between SGMII and 2500base-X. + */ + if (priv->info->supports_sgmii[port]) + __set_bit(PHY_INTERFACE_MODE_SGMII, + config->supported_interfaces); + + if (priv->info->supports_2500basex[port]) + __set_bit(PHY_INTERFACE_MODE_2500BASEX, + config->supported_interfaces); + } else { + /* The SJA1105 MAC programming model is through the static + * config (the xMII Mode table cannot be dynamically + * reconfigured), and we have to program that early. + */ + __set_bit(phy_mode, config->supported_interfaces); } /* The MAC does not support pause frames, and also doesn't * support half-duplex traffic modes. */ - phylink_set(mask, Autoneg); - phylink_set(mask, MII); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Full); - phylink_set(mask, 100baseT1_Full); + config->mac_capabilities = MAC_10FD | MAC_100FD; + + mii = priv->static_config.tables[BLK_IDX_XMII_PARAMS].entries; if (mii->xmii_mode[port] == XMII_MODE_RGMII || mii->xmii_mode[port] == XMII_MODE_SGMII) - phylink_set(mask, 1000baseT_Full); - if (priv->info->supports_2500basex[port]) { - phylink_set(mask, 2500baseT_Full); - phylink_set(mask, 2500baseX_Full); - } + config->mac_capabilities |= MAC_1000FD; - linkmode_and(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mask); + if (priv->info->supports_2500basex[port]) + config->mac_capabilities |= MAC_2500FD; } static int @@ -1819,25 +1801,52 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port, } static int sja1105_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct sja1105_private *priv = ds->priv; + if (!vid) { + switch (db.type) { + case DSA_DB_PORT: + vid = dsa_tag_8021q_standalone_vid(db.dp); + break; + case DSA_DB_BRIDGE: + vid = dsa_tag_8021q_bridge_vid(db.bridge.num); + break; + default: + return -EOPNOTSUPP; + } + } + return priv->info->fdb_add_cmd(ds, port, addr, vid); } static int sja1105_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct sja1105_private *priv = ds->priv; + if (!vid) { + switch (db.type) { + case DSA_DB_PORT: + vid = dsa_tag_8021q_standalone_vid(db.dp); + break; + case DSA_DB_BRIDGE: + vid = dsa_tag_8021q_bridge_vid(db.bridge.num); + break; + default: + return -EOPNOTSUPP; + } + } + return priv->info->fdb_del_cmd(ds, port, addr, vid); } static int sja1105_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data) { - struct dsa_port *dp = dsa_to_port(ds, port); struct sja1105_private *priv = ds->priv; struct device *dev = ds->dev; int i; @@ -1874,7 +1883,7 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port, u64_to_ether_addr(l2_lookup.macaddr, macaddr); /* We need to hide the dsa_8021q VLANs from the user. */ - if (!dsa_port_is_vlan_filtering(dp)) + if (vid_is_dsa_8021q(l2_lookup.vlanid)) l2_lookup.vlanid = 0; rc = cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data); if (rc) @@ -1885,7 +1894,15 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port, static void sja1105_fast_age(struct dsa_switch *ds, int port) { + struct dsa_port *dp = dsa_to_port(ds, port); struct sja1105_private *priv = ds->priv; + struct dsa_db db = { + .type = DSA_DB_BRIDGE, + .bridge = { + .dev = dsa_port_bridge_dev_get(dp), + .num = dsa_port_bridge_num_get(dp), + }, + }; int i; for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) { @@ -1913,7 +1930,7 @@ static void sja1105_fast_age(struct dsa_switch *ds, int port) u64_to_ether_addr(l2_lookup.macaddr, macaddr); - rc = sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid); + rc = sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid, db); if (rc) { dev_err(ds->dev, "Failed to delete FDB entry %pM vid %lld: %pe\n", @@ -1924,15 +1941,17 @@ static void sja1105_fast_age(struct dsa_switch *ds, int port) } static int sja1105_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { - return sja1105_fdb_add(ds, port, mdb->addr, mdb->vid); + return sja1105_fdb_add(ds, port, mdb->addr, mdb->vid, db); } static int sja1105_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { - return sja1105_fdb_del(ds, port, mdb->addr, mdb->vid); + return sja1105_fdb_del(ds, port, mdb->addr, mdb->vid, db); } /* Common function for unicast and broadcast flood configuration. @@ -2075,7 +2094,8 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, static int sja1105_bridge_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload) + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { int rc; @@ -2083,7 +2103,7 @@ static int sja1105_bridge_join(struct dsa_switch *ds, int port, if (rc) return rc; - rc = dsa_tag_8021q_bridge_tx_fwd_offload(ds, port, bridge); + rc = dsa_tag_8021q_bridge_join(ds, port, bridge); if (rc) { sja1105_bridge_member(ds, port, bridge, false); return rc; @@ -2097,7 +2117,7 @@ static int sja1105_bridge_join(struct dsa_switch *ds, int port, static void sja1105_bridge_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) { - dsa_tag_8021q_bridge_tx_fwd_unoffload(ds, port, bridge); + dsa_tag_8021q_bridge_leave(ds, port, bridge); sja1105_bridge_member(ds, port, bridge, false); } @@ -2357,7 +2377,6 @@ sja1105_get_tag_protocol(struct dsa_switch *ds, int port, int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, struct netlink_ext_ack *extack) { - struct sja1105_l2_lookup_params_entry *l2_lookup_params; struct sja1105_general_params_entry *general_params; struct sja1105_private *priv = ds->priv; struct sja1105_table *table; @@ -2395,28 +2414,6 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, general_params->incl_srcpt1 = enabled; general_params->incl_srcpt0 = enabled; - /* VLAN filtering => independent VLAN learning. - * No VLAN filtering (or best effort) => shared VLAN learning. - * - * In shared VLAN learning mode, untagged traffic still gets - * pvid-tagged, and the FDB table gets populated with entries - * containing the "real" (pvid or from VLAN tag) VLAN ID. - * However the switch performs a masked L2 lookup in the FDB, - * effectively only looking up a frame's DMAC (and not VID) for the - * forwarding decision. - * - * This is extremely convenient for us, because in modes with - * vlan_filtering=0, dsa_8021q actually installs unique pvid's into - * each front panel port. This is good for identification but breaks - * learning badly - the VID of the learnt FDB entry is unique, aka - * no frames coming from any other port are going to have it. So - * for forwarding purposes, this is as though learning was broken - * (all frames get flooded). - */ - table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS]; - l2_lookup_params = table->entries; - l2_lookup_params->shared_learn = !enabled; - for (port = 0; port < ds->num_ports; port++) { if (dsa_is_unused_port(ds, port)) continue; @@ -2525,7 +2522,7 @@ static int sja1105_bridge_vlan_add(struct dsa_switch *ds, int port, */ if (vid_is_dsa_8021q(vlan->vid)) { NL_SET_ERR_MSG_MOD(extack, - "Range 1024-3071 reserved for dsa_8021q operation"); + "Range 3072-4095 reserved for dsa_8021q operation"); return -EBUSY; } @@ -3102,6 +3099,7 @@ static int sja1105_setup(struct dsa_switch *ds) */ ds->vlan_filtering_is_global = true; ds->untag_bridge_pvid = true; + ds->fdb_isolation = true; /* tag_8021q has 3 bits for the VBID, and the value 0 is reserved */ ds->max_num_bridges = 7; @@ -3152,8 +3150,8 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .set_ageing_time = sja1105_set_ageing_time, .port_change_mtu = sja1105_change_mtu, .port_max_mtu = sja1105_get_max_mtu, - .phylink_validate = sja1105_phylink_validate, - .phylink_mac_config = sja1105_mac_config, + .phylink_get_caps = sja1105_phylink_get_caps, + .phylink_mac_select_pcs = sja1105_mac_select_pcs, .phylink_mac_link_up = sja1105_mac_link_up, .phylink_mac_link_down = sja1105_mac_link_down, .get_strings = sja1105_get_strings, @@ -3346,18 +3344,16 @@ static int sja1105_probe(struct spi_device *spi) return dsa_register_switch(priv->ds); } -static int sja1105_remove(struct spi_device *spi) +static void sja1105_remove(struct spi_device *spi) { struct sja1105_private *priv = spi_get_drvdata(spi); if (!priv) - return 0; + return; dsa_unregister_switch(priv->ds); spi_set_drvdata(spi, NULL); - - return 0; } static void sja1105_shutdown(struct spi_device *spi) diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index be3068a935af..30fb2cc40164 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -399,7 +399,7 @@ static long sja1105_rxtstamp_work(struct ptp_clock_info *ptp) ts = sja1105_tstamp_reconstruct(ds, ticks, ts); shwt->hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(ts)); - netif_rx_ni(skb); + netif_rx(skb); } if (ptp_data->extts_enabled) diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index f5dca6a9b0f9..b7e95d60a6e4 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -296,6 +296,19 @@ static bool sja1105_vl_key_lower(struct sja1105_vl_lookup_entry *a, return false; } +/* FIXME: this should change when the bridge upper of the port changes. */ +static u16 sja1105_port_get_tag_8021q_vid(struct dsa_port *dp) +{ + unsigned long bridge_num; + + if (!dp->bridge) + return dsa_tag_8021q_standalone_vid(dp); + + bridge_num = dsa_port_bridge_num_get(dp); + + return dsa_tag_8021q_bridge_vid(bridge_num); +} + static int sja1105_init_virtual_links(struct sja1105_private *priv, struct netlink_ext_ack *extack) { @@ -394,8 +407,9 @@ static int sja1105_init_virtual_links(struct sja1105_private *priv, vl_lookup[k].vlanid = rule->key.vl.vid; vl_lookup[k].vlanprior = rule->key.vl.pcp; } else { + /* FIXME */ struct dsa_port *dp = dsa_to_port(priv->ds, port); - u16 vid = dsa_tag_8021q_rx_vid(dp); + u16 vid = sja1105_port_get_tag_8021q_vid(dp); vl_lookup[k].vlanid = vid; vl_lookup[k].vlanprior = 0; diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c index 645398901e05..3110895358d8 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-spi.c +++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c @@ -159,18 +159,16 @@ static int vsc73xx_spi_probe(struct spi_device *spi) return vsc73xx_probe(&vsc_spi->vsc); } -static int vsc73xx_spi_remove(struct spi_device *spi) +static void vsc73xx_spi_remove(struct spi_device *spi) { struct vsc73xx_spi *vsc_spi = spi_get_drvdata(spi); if (!vsc_spi) - return 0; + return; vsc73xx_remove(&vsc_spi->vsc); spi_set_drvdata(spi, NULL); - - return 0; } static void vsc73xx_spi_shutdown(struct spi_device *spi) diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c index bc06fe6bac6b..3887ed33c5fe 100644 --- a/drivers/net/dsa/xrs700x/xrs700x.c +++ b/drivers/net/dsa/xrs700x/xrs700x.c @@ -534,7 +534,8 @@ static int xrs700x_bridge_common(struct dsa_switch *ds, int port, } static int xrs700x_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge, bool *tx_fwd_offload) + struct dsa_bridge bridge, bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { return xrs700x_bridge_common(ds, port, bridge, true); } diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index db3ec4768159..bd4cb9d7c35d 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -78,6 +78,7 @@ source "drivers/net/ethernet/ezchip/Kconfig" source "drivers/net/ethernet/faraday/Kconfig" source "drivers/net/ethernet/freescale/Kconfig" source "drivers/net/ethernet/fujitsu/Kconfig" +source "drivers/net/ethernet/fungible/Kconfig" source "drivers/net/ethernet/google/Kconfig" source "drivers/net/ethernet/hisilicon/Kconfig" source "drivers/net/ethernet/huawei/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 8a87c1083d1d..8ef43e0c33c0 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_NET_VENDOR_EZCHIP) += ezchip/ obj-$(CONFIG_NET_VENDOR_FARADAY) += faraday/ obj-$(CONFIG_NET_VENDOR_FREESCALE) += freescale/ obj-$(CONFIG_NET_VENDOR_FUJITSU) += fujitsu/ +obj-$(CONFIG_NET_VENDOR_FUNGIBLE) += fungible/ obj-$(CONFIG_NET_VENDOR_GOOGLE) += google/ obj-$(CONFIG_NET_VENDOR_HISILICON) += hisilicon/ obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/ diff --git a/drivers/net/ethernet/asix/ax88796c_main.c b/drivers/net/ethernet/asix/ax88796c_main.c index e7a9f9863258..6ba5b024a7be 100644 --- a/drivers/net/ethernet/asix/ax88796c_main.c +++ b/drivers/net/ethernet/asix/ax88796c_main.c @@ -433,7 +433,7 @@ ax88796c_skb_return(struct ax88796c_device *ax_local, netif_info(ax_local, rx_status, ndev, "< rx, len %zu, type 0x%x\n", skb->len + sizeof(struct ethhdr), skb->protocol); - status = netif_rx_ni(skb); + status = netif_rx(skb); if (status != NET_RX_SUCCESS && net_ratelimit()) netif_info(ax_local, rx_err, ndev, "netif_rx status %d\n", status); @@ -1102,7 +1102,7 @@ err: return ret; } -static int ax88796c_remove(struct spi_device *spi) +static void ax88796c_remove(struct spi_device *spi) { struct ax88796c_device *ax_local = dev_get_drvdata(&spi->dev); struct net_device *ndev = ax_local->ndev; @@ -1112,8 +1112,6 @@ static int ax88796c_remove(struct spi_device *spi) netif_info(ax_local, probe, ndev, "removing network device %s %s\n", dev_driver_string(&spi->dev), dev_name(&spi->dev)); - - return 0; } #ifdef CONFIG_OF diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index e20aafeb4ca9..b97ed9b5f685 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8216,7 +8216,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) rc = dma_set_coherent_mask(&pdev->dev, persist_dma_mask); if (rc) { dev_err(&pdev->dev, - "pci_set_consistent_dma_mask failed, aborting\n"); + "dma_set_coherent_mask failed, aborting\n"); goto err_out_unmap; } } else if ((rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) != 0) { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 774c1f1a57c3..eedb48d945ed 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -100,6 +100,9 @@ MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FW_FILE_NAME_E1); MODULE_FIRMWARE(FW_FILE_NAME_E1H); MODULE_FIRMWARE(FW_FILE_NAME_E2); +MODULE_FIRMWARE(FW_FILE_NAME_E1_V15); +MODULE_FIRMWARE(FW_FILE_NAME_E1H_V15); +MODULE_FIRMWARE(FW_FILE_NAME_E2_V15); int bnx2x_num_queues; module_param_named(num_queues, bnx2x_num_queues, int, 0444); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c313221348c5..37facef47846 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4776,8 +4776,10 @@ static int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, u16 vnic_id) return rc; req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); - req->num_mc_entries = cpu_to_le32(vnic->mc_list_count); - req->mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping); + if (vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_MCAST) { + req->num_mc_entries = cpu_to_le32(vnic->mc_list_count); + req->mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping); + } req->mask = cpu_to_le32(vnic->rx_mask); return hwrm_req_send_silent(bp, req); } @@ -7820,6 +7822,19 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp) return 0; } +static void bnxt_remap_fw_health_regs(struct bnxt *bp) +{ + if (!bp->fw_health) + return; + + if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) { + bp->fw_health->status_reliable = true; + bp->fw_health->resets_reliable = true; + } else { + bnxt_try_map_fw_health_reg(bp); + } +} + static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp) { struct bnxt_fw_health *fw_health = bp->fw_health; @@ -8672,6 +8687,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) vnic->uc_filter_count = 1; vnic->rx_mask = 0; + if (test_bit(BNXT_STATE_HALF_OPEN, &bp->state)) + goto skip_rx_mask; + if (bp->dev->flags & IFF_BROADCAST) vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; @@ -8681,7 +8699,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) if (bp->dev->flags & IFF_ALLMULTI) { vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; vnic->mc_list_count = 0; - } else { + } else if (bp->dev->flags & IFF_MULTICAST) { u32 mask = 0; bnxt_mc_list_updated(bp, &mask); @@ -8692,6 +8710,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) if (rc) goto err_out; +skip_rx_mask: rc = bnxt_hwrm_set_coal(bp); if (rc) netdev_warn(bp->dev, "HWRM set coalescing failure rc: %x\n", @@ -9883,8 +9902,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) resc_reinit = true; if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE) fw_reset = true; - else if (bp->fw_health && !bp->fw_health->status_reliable) - bnxt_try_map_fw_health_reg(bp); + else + bnxt_remap_fw_health_regs(bp); if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && !fw_reset) { netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n"); @@ -10364,13 +10383,15 @@ int bnxt_half_open_nic(struct bnxt *bp) goto half_open_err; } - rc = bnxt_alloc_mem(bp, false); + rc = bnxt_alloc_mem(bp, true); if (rc) { netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); goto half_open_err; } - rc = bnxt_init_nic(bp, false); + set_bit(BNXT_STATE_HALF_OPEN, &bp->state); + rc = bnxt_init_nic(bp, true); if (rc) { + clear_bit(BNXT_STATE_HALF_OPEN, &bp->state); netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc); goto half_open_err; } @@ -10378,7 +10399,7 @@ int bnxt_half_open_nic(struct bnxt *bp) half_open_err: bnxt_free_skbs(bp); - bnxt_free_mem(bp, false); + bnxt_free_mem(bp, true); dev_close(bp->dev); return rc; } @@ -10388,9 +10409,10 @@ half_open_err: */ void bnxt_half_close_nic(struct bnxt *bp) { - bnxt_hwrm_resource_free(bp, false, false); + bnxt_hwrm_resource_free(bp, false, true); bnxt_free_skbs(bp); - bnxt_free_mem(bp, false); + bnxt_free_mem(bp, true); + clear_bit(BNXT_STATE_HALF_OPEN, &bp->state); } void bnxt_reenable_sriov(struct bnxt *bp) @@ -10806,7 +10828,7 @@ static void bnxt_set_rx_mode(struct net_device *dev) if (dev->flags & IFF_ALLMULTI) { mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; vnic->mc_list_count = 0; - } else { + } else if (dev->flags & IFF_MULTICAST) { mc_update = bnxt_mc_list_updated(bp, &mask); } @@ -10883,9 +10905,10 @@ skip_uc: !bnxt_promisc_ok(bp)) vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); - if (rc && vnic->mc_list_count) { + if (rc && (vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_MCAST)) { netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n", rc); + vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_MCAST; vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; vnic->mc_list_count = 0; rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 4b023e35c765..802ec1e9956d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1921,6 +1921,7 @@ struct bnxt { #define BNXT_STATE_RECOVER 12 #define BNXT_STATE_FW_NON_FATAL_COND 13 #define BNXT_STATE_FW_ACTIVATE_RESET 14 +#define BNXT_STATE_HALF_OPEN 15 /* For offline ethtool tests */ #define BNXT_NO_FW_ACCESS(bp) \ (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 4da31b1b84f9..f6e21fac0e69 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -367,6 +367,16 @@ bnxt_dl_livepatch_report_err(struct bnxt *bp, struct netlink_ext_ack *extack, } } +/* Live patch status in NVM */ +#define BNXT_LIVEPATCH_NOT_INSTALLED 0 +#define BNXT_LIVEPATCH_INSTALLED FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL +#define BNXT_LIVEPATCH_REMOVED FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE +#define BNXT_LIVEPATCH_MASK (FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL | \ + FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE) +#define BNXT_LIVEPATCH_ACTIVATED BNXT_LIVEPATCH_MASK + +#define BNXT_LIVEPATCH_STATE(flags) ((flags) & BNXT_LIVEPATCH_MASK) + static int bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack) { @@ -374,8 +384,9 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack) struct hwrm_fw_livepatch_query_input *query_req; struct hwrm_fw_livepatch_output *patch_resp; struct hwrm_fw_livepatch_input *patch_req; + u16 flags, live_patch_state; + bool activated = false; u32 installed = 0; - u16 flags; u8 target; int rc; @@ -394,7 +405,6 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack) hwrm_req_drop(bp, query_req); return rc; } - patch_req->opcode = FW_LIVEPATCH_REQ_OPCODE_ACTIVATE; patch_req->loadtype = FW_LIVEPATCH_REQ_LOADTYPE_NVM_INSTALL; patch_resp = hwrm_req_hold(bp, patch_req); @@ -407,12 +417,20 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack) } flags = le16_to_cpu(query_resp->status_flags); - if (~flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL) + live_patch_state = BNXT_LIVEPATCH_STATE(flags); + + if (live_patch_state == BNXT_LIVEPATCH_NOT_INSTALLED) continue; - if ((flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE) && - !strncmp(query_resp->active_ver, query_resp->install_ver, - sizeof(query_resp->active_ver))) + + if (live_patch_state == BNXT_LIVEPATCH_ACTIVATED) { + activated = true; continue; + } + + if (live_patch_state == BNXT_LIVEPATCH_INSTALLED) + patch_req->opcode = FW_LIVEPATCH_REQ_OPCODE_ACTIVATE; + else if (live_patch_state == BNXT_LIVEPATCH_REMOVED) + patch_req->opcode = FW_LIVEPATCH_REQ_OPCODE_DEACTIVATE; patch_req->fw_target = target; rc = hwrm_req_send(bp, patch_req); @@ -424,8 +442,13 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack) } if (!rc && !installed) { - NL_SET_ERR_MSG_MOD(extack, "No live patches found"); - rc = -ENOENT; + if (activated) { + NL_SET_ERR_MSG_MOD(extack, "Live patch already activated"); + rc = -EEXIST; + } else { + NL_SET_ERR_MSG_MOD(extack, "No live patches found"); + rc = -ENOENT; + } } hwrm_req_drop(bp, query_req); hwrm_req_drop(bp, patch_req); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 5edbee92f5c4..fecb03b49f01 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -26,6 +26,7 @@ #include "bnxt_hsi.h" #include "bnxt.h" #include "bnxt_hwrm.h" +#include "bnxt_ulp.h" #include "bnxt_xdp.h" #include "bnxt_ptp.h" #include "bnxt_ethtool.h" @@ -1972,6 +1973,9 @@ static int bnxt_get_fecparam(struct net_device *dev, case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE: fec->active_fec |= ETHTOOL_FEC_LLRS; break; + case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_NONE_ACTIVE: + fec->active_fec |= ETHTOOL_FEC_OFF; + break; } return 0; } @@ -3457,7 +3461,7 @@ static int bnxt_run_loopback(struct bnxt *bp) if (!skb) return -ENOMEM; data = skb_put(skb, pkt_size); - eth_broadcast_addr(data); + ether_addr_copy(&data[i], bp->dev->dev_addr); i += ETH_ALEN; ether_addr_copy(&data[i], bp->dev->dev_addr); i += ETH_ALEN; @@ -3551,9 +3555,12 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, if (!offline) { bnxt_run_fw_tests(bp, test_mask, &test_results); } else { - rc = bnxt_close_nic(bp, false, false); - if (rc) + bnxt_ulp_stop(bp); + rc = bnxt_close_nic(bp, true, false); + if (rc) { + bnxt_ulp_start(bp, rc); return; + } bnxt_run_fw_tests(bp, test_mask, &test_results); buf[BNXT_MACLPBK_TEST_IDX] = 1; @@ -3563,6 +3570,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, if (rc) { bnxt_hwrm_mac_loopback(bp, false); etest->flags |= ETH_TEST_FL_FAILED; + bnxt_ulp_start(bp, rc); return; } if (bnxt_run_loopback(bp)) @@ -3588,7 +3596,8 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, } bnxt_hwrm_phy_loopback(bp, false, false); bnxt_half_close_nic(bp); - rc = bnxt_open_nic(bp, false, true); + rc = bnxt_open_nic(bp, true, true); + bnxt_ulp_start(bp, rc); } if (rc || bnxt_test_irq(bp)) { buf[BNXT_IRQ_TEST_IDX] = 1; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c index 566c9487ef55..b01d42928a53 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c @@ -644,17 +644,23 @@ static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx) /* Last byte of resp contains valid bit */ valid = ((u8 *)ctx->resp) + len - 1; - for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) { + for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; ) { /* make sure we read from updated DMA memory */ dma_rmb(); if (*valid) break; - usleep_range(1, 5); + if (j < 10) { + udelay(1); + j++; + } else { + usleep_range(20, 30); + j += 20; + } } if (j >= HWRM_VALID_BIT_DELAY_USEC) { hwrm_err(bp, ctx, "Error (timeout: %u) msg {0x%x 0x%x} len:%d v:%d\n", - hwrm_total_timeout(i), req_type, + hwrm_total_timeout(i) + j, req_type, le16_to_cpu(ctx->req->seq_id), len, *valid); goto exit; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h index d52bd2d63aec..c98032e38188 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h @@ -90,7 +90,7 @@ static inline unsigned int hwrm_total_timeout(unsigned int n) } -#define HWRM_VALID_BIT_DELAY_USEC 150 +#define HWRM_VALID_BIT_DELAY_USEC 50000 static inline bool bnxt_cfa_hwrm_message(u16 req_type) { diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c index da41eee2f25c..a06003bfa04b 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -3613,6 +3613,8 @@ int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai, MAC_STATS_ACCUM_SECS : (MAC_STATS_ACCUM_SECS * 10); adapter->params.pci.vpd_cap_addr = pci_find_capability(adapter->pdev, PCI_CAP_ID_VPD); + if (!adapter->params.pci.vpd_cap_addr) + return -ENODEV; ret = get_vpd_params(adapter, &adapter->params.vpd); if (ret < 0) return ret; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c index 28fd2de9e4cf..1672d3afe5be 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c @@ -8,6 +8,46 @@ #include "cxgb4_filter.h" #include "cxgb4_tc_flower.h" +static int cxgb4_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} + static int cxgb4_matchall_egress_validate(struct net_device *dev, struct tc_cls_matchall_offload *cls) { @@ -48,11 +88,10 @@ static int cxgb4_matchall_egress_validate(struct net_device *dev, flow_action_for_each(i, entry, actions) { switch (entry->id) { case FLOW_ACTION_POLICE: - if (entry->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + ret = cxgb4_policer_validate(actions, entry, extack); + if (ret) + return ret; + /* Convert bytes per second to bits per second */ if (entry->police.rate_bytes_ps * 8 > max_link_rate) { NL_SET_ERR_MSG_MOD(extack, @@ -150,11 +189,11 @@ static int cxgb4_matchall_alloc_tc(struct net_device *dev, flow_action_for_each(i, entry, &cls->rule->action) if (entry->id == FLOW_ACTION_POLICE) break; - if (entry->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + + ret = cxgb4_policer_validate(&cls->rule->action, entry, extack); + if (ret) + return ret; + /* Convert from bytes per second to Kbps */ p.u.params.maxrate = div_u64(entry->police.rate_bytes_ps * 8, 1000); p.u.params.channel = pi->tx_chan; diff --git a/drivers/net/ethernet/davicom/dm9051.c b/drivers/net/ethernet/davicom/dm9051.c index 6a2bcfbfe891..a523ddda7609 100644 --- a/drivers/net/ethernet/davicom/dm9051.c +++ b/drivers/net/ethernet/davicom/dm9051.c @@ -804,7 +804,7 @@ static int dm9051_loop_rx(struct board_info *db) skb->protocol = eth_type_trans(skb, db->ndev); if (db->ndev->features & NETIF_F_RXCSUM) skb_checksum_none_assert(skb); - netif_rx_ni(skb); + netif_rx(skb); db->ndev->stats.rx_bytes += rxlen; db->ndev->stats.rx_packets++; scanrr++; @@ -845,17 +845,19 @@ static int dm9051_loop_tx(struct board_info *db) while (!skb_queue_empty(&db->txq)) { struct sk_buff *skb; + unsigned int len; skb = skb_dequeue(&db->txq); if (skb) { ntx++; ret = dm9051_single_tx(db, skb->data, skb->len); + len = skb->len; dev_kfree_skb(skb); if (ret < 0) { db->bc.tx_err_counter++; return 0; } - ndev->stats.tx_bytes += skb->len; + ndev->stats.tx_bytes += len; ndev->stats.tx_packets++; } @@ -1223,15 +1225,13 @@ static int dm9051_probe(struct spi_device *spi) return 0; } -static int dm9051_drv_remove(struct spi_device *spi) +static void dm9051_drv_remove(struct spi_device *spi) { struct device *dev = &spi->dev; struct net_device *ndev = dev_get_drvdata(dev); struct board_info *db = to_dm9051_board(ndev); phy_disconnect(db->phydev); - - return 0; } static const struct of_device_id dm9051_match_table[] = { diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 691605c15265..d5356db7539a 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -989,117 +989,6 @@ static int ftgmac100_alloc_rx_buffers(struct ftgmac100 *priv) return 0; } -static void ftgmac100_adjust_link(struct net_device *netdev) -{ - struct ftgmac100 *priv = netdev_priv(netdev); - struct phy_device *phydev = netdev->phydev; - bool tx_pause, rx_pause; - int new_speed; - - /* We store "no link" as speed 0 */ - if (!phydev->link) - new_speed = 0; - else - new_speed = phydev->speed; - - /* Grab pause settings from PHY if configured to do so */ - if (priv->aneg_pause) { - rx_pause = tx_pause = phydev->pause; - if (phydev->asym_pause) - tx_pause = !rx_pause; - } else { - rx_pause = priv->rx_pause; - tx_pause = priv->tx_pause; - } - - /* Link hasn't changed, do nothing */ - if (phydev->speed == priv->cur_speed && - phydev->duplex == priv->cur_duplex && - rx_pause == priv->rx_pause && - tx_pause == priv->tx_pause) - return; - - /* Print status if we have a link or we had one and just lost it, - * don't print otherwise. - */ - if (new_speed || priv->cur_speed) - phy_print_status(phydev); - - priv->cur_speed = new_speed; - priv->cur_duplex = phydev->duplex; - priv->rx_pause = rx_pause; - priv->tx_pause = tx_pause; - - /* Link is down, do nothing else */ - if (!new_speed) - return; - - /* Disable all interrupts */ - iowrite32(0, priv->base + FTGMAC100_OFFSET_IER); - - /* Reset the adapter asynchronously */ - schedule_work(&priv->reset_task); -} - -static int ftgmac100_mii_probe(struct net_device *netdev) -{ - struct ftgmac100 *priv = netdev_priv(netdev); - struct platform_device *pdev = to_platform_device(priv->dev); - struct device_node *np = pdev->dev.of_node; - struct phy_device *phydev; - phy_interface_t phy_intf; - int err; - - /* Default to RGMII. It's a gigabit part after all */ - err = of_get_phy_mode(np, &phy_intf); - if (err) - phy_intf = PHY_INTERFACE_MODE_RGMII; - - /* Aspeed only supports these. I don't know about other IP - * block vendors so I'm going to just let them through for - * now. Note that this is only a warning if for some obscure - * reason the DT really means to lie about it or it's a newer - * part we don't know about. - * - * On the Aspeed SoC there are additionally straps and SCU - * control bits that could tell us what the interface is - * (or allow us to configure it while the IP block is held - * in reset). For now I chose to keep this driver away from - * those SoC specific bits and assume the device-tree is - * right and the SCU has been configured properly by pinmux - * or the firmware. - */ - if (priv->is_aspeed && !(phy_interface_mode_is_rgmii(phy_intf))) { - netdev_warn(netdev, - "Unsupported PHY mode %s !\n", - phy_modes(phy_intf)); - } - - phydev = phy_find_first(priv->mii_bus); - if (!phydev) { - netdev_info(netdev, "%s: no PHY found\n", netdev->name); - return -ENODEV; - } - - phydev = phy_connect(netdev, phydev_name(phydev), - &ftgmac100_adjust_link, phy_intf); - - if (IS_ERR(phydev)) { - netdev_err(netdev, "%s: Could not attach to PHY\n", netdev->name); - return PTR_ERR(phydev); - } - - /* Indicate that we support PAUSE frames (see comment in - * Documentation/networking/phy.rst) - */ - phy_support_asym_pause(phydev); - - /* Display what we found */ - phy_attached_info(phydev); - - return 0; -} - static int ftgmac100_mdiobus_read(struct mii_bus *bus, int phy_addr, int regnum) { struct net_device *netdev = bus->priv; @@ -1410,10 +1299,8 @@ static int ftgmac100_init_all(struct ftgmac100 *priv, bool ignore_alloc_err) return err; } -static void ftgmac100_reset_task(struct work_struct *work) +static void ftgmac100_reset(struct ftgmac100 *priv) { - struct ftgmac100 *priv = container_of(work, struct ftgmac100, - reset_task); struct net_device *netdev = priv->netdev; int err; @@ -1459,6 +1346,134 @@ static void ftgmac100_reset_task(struct work_struct *work) rtnl_unlock(); } +static void ftgmac100_reset_task(struct work_struct *work) +{ + struct ftgmac100 *priv = container_of(work, struct ftgmac100, + reset_task); + + ftgmac100_reset(priv); +} + +static void ftgmac100_adjust_link(struct net_device *netdev) +{ + struct ftgmac100 *priv = netdev_priv(netdev); + struct phy_device *phydev = netdev->phydev; + bool tx_pause, rx_pause; + int new_speed; + + /* We store "no link" as speed 0 */ + if (!phydev->link) + new_speed = 0; + else + new_speed = phydev->speed; + + /* Grab pause settings from PHY if configured to do so */ + if (priv->aneg_pause) { + rx_pause = tx_pause = phydev->pause; + if (phydev->asym_pause) + tx_pause = !rx_pause; + } else { + rx_pause = priv->rx_pause; + tx_pause = priv->tx_pause; + } + + /* Link hasn't changed, do nothing */ + if (phydev->speed == priv->cur_speed && + phydev->duplex == priv->cur_duplex && + rx_pause == priv->rx_pause && + tx_pause == priv->tx_pause) + return; + + /* Print status if we have a link or we had one and just lost it, + * don't print otherwise. + */ + if (new_speed || priv->cur_speed) + phy_print_status(phydev); + + priv->cur_speed = new_speed; + priv->cur_duplex = phydev->duplex; + priv->rx_pause = rx_pause; + priv->tx_pause = tx_pause; + + /* Link is down, do nothing else */ + if (!new_speed) + return; + + /* Disable all interrupts */ + iowrite32(0, priv->base + FTGMAC100_OFFSET_IER); + + /* Release phy lock to allow ftgmac100_reset to aquire it, keeping lock + * order consistent to prevent dead lock. + */ + if (netdev->phydev) + mutex_unlock(&netdev->phydev->lock); + + ftgmac100_reset(priv); + + if (netdev->phydev) + mutex_lock(&netdev->phydev->lock); + +} + +static int ftgmac100_mii_probe(struct net_device *netdev) +{ + struct ftgmac100 *priv = netdev_priv(netdev); + struct platform_device *pdev = to_platform_device(priv->dev); + struct device_node *np = pdev->dev.of_node; + struct phy_device *phydev; + phy_interface_t phy_intf; + int err; + + /* Default to RGMII. It's a gigabit part after all */ + err = of_get_phy_mode(np, &phy_intf); + if (err) + phy_intf = PHY_INTERFACE_MODE_RGMII; + + /* Aspeed only supports these. I don't know about other IP + * block vendors so I'm going to just let them through for + * now. Note that this is only a warning if for some obscure + * reason the DT really means to lie about it or it's a newer + * part we don't know about. + * + * On the Aspeed SoC there are additionally straps and SCU + * control bits that could tell us what the interface is + * (or allow us to configure it while the IP block is held + * in reset). For now I chose to keep this driver away from + * those SoC specific bits and assume the device-tree is + * right and the SCU has been configured properly by pinmux + * or the firmware. + */ + if (priv->is_aspeed && !(phy_interface_mode_is_rgmii(phy_intf))) { + netdev_warn(netdev, + "Unsupported PHY mode %s !\n", + phy_modes(phy_intf)); + } + + phydev = phy_find_first(priv->mii_bus); + if (!phydev) { + netdev_info(netdev, "%s: no PHY found\n", netdev->name); + return -ENODEV; + } + + phydev = phy_connect(netdev, phydev_name(phydev), + &ftgmac100_adjust_link, phy_intf); + + if (IS_ERR(phydev)) { + netdev_err(netdev, "%s: Could not attach to PHY\n", netdev->name); + return PTR_ERR(phydev); + } + + /* Indicate that we support PAUSE frames (see comment in + * Documentation/networking/phy.rst) + */ + phy_support_asym_pause(phydev); + + /* Display what we found */ + phy_attached_info(phydev); + + return 0; +} + static int ftgmac100_open(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 6b5484543d09..939fa9db6a2e 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -35,6 +35,75 @@ MODULE_DESCRIPTION("Freescale DPAA2 Ethernet Driver"); struct ptp_qoriq *dpaa2_ptp; EXPORT_SYMBOL(dpaa2_ptp); +static void dpaa2_eth_detect_features(struct dpaa2_eth_priv *priv) +{ + priv->features = 0; + + if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_PTP_ONESTEP_VER_MAJOR, + DPNI_PTP_ONESTEP_VER_MINOR) >= 0) + priv->features |= DPAA2_ETH_FEATURE_ONESTEP_CFG_DIRECT; +} + +static void dpaa2_update_ptp_onestep_indirect(struct dpaa2_eth_priv *priv, + u32 offset, u8 udp) +{ + struct dpni_single_step_cfg cfg; + + cfg.en = 1; + cfg.ch_update = udp; + cfg.offset = offset; + cfg.peer_delay = 0; + + if (dpni_set_single_step_cfg(priv->mc_io, 0, priv->mc_token, &cfg)) + WARN_ONCE(1, "Failed to set single step register"); +} + +static void dpaa2_update_ptp_onestep_direct(struct dpaa2_eth_priv *priv, + u32 offset, u8 udp) +{ + u32 val = 0; + + val = DPAA2_PTP_SINGLE_STEP_ENABLE | + DPAA2_PTP_SINGLE_CORRECTION_OFF(offset); + + if (udp) + val |= DPAA2_PTP_SINGLE_STEP_CH; + + if (priv->onestep_reg_base) + writel(val, priv->onestep_reg_base); +} + +static void dpaa2_ptp_onestep_reg_update_method(struct dpaa2_eth_priv *priv) +{ + struct device *dev = priv->net_dev->dev.parent; + struct dpni_single_step_cfg ptp_cfg; + + priv->dpaa2_set_onestep_params_cb = dpaa2_update_ptp_onestep_indirect; + + if (!(priv->features & DPAA2_ETH_FEATURE_ONESTEP_CFG_DIRECT)) + return; + + if (dpni_get_single_step_cfg(priv->mc_io, 0, + priv->mc_token, &ptp_cfg)) { + dev_err(dev, "dpni_get_single_step_cfg cannot retrieve onestep reg, falling back to indirect update\n"); + return; + } + + if (!ptp_cfg.ptp_onestep_reg_base) { + dev_err(dev, "1588 onestep reg not available, falling back to indirect update\n"); + return; + } + + priv->onestep_reg_base = ioremap(ptp_cfg.ptp_onestep_reg_base, + sizeof(u32)); + if (!priv->onestep_reg_base) { + dev_err(dev, "1588 onestep reg cannot be mapped, falling back to indirect update\n"); + return; + } + + priv->dpaa2_set_onestep_params_cb = dpaa2_update_ptp_onestep_direct; +} + static void *dpaa2_iova_to_virt(struct iommu_domain *domain, dma_addr_t iova_addr) { @@ -696,7 +765,6 @@ static void dpaa2_eth_enable_tx_tstamp(struct dpaa2_eth_priv *priv, struct sk_buff *skb) { struct ptp_tstamp origin_timestamp; - struct dpni_single_step_cfg cfg; u8 msgtype, twostep, udp; struct dpaa2_faead *faead; struct dpaa2_fas *fas; @@ -750,14 +818,12 @@ static void dpaa2_eth_enable_tx_tstamp(struct dpaa2_eth_priv *priv, htonl(origin_timestamp.sec_lsb); *(__be32 *)(data + offset2 + 6) = htonl(origin_timestamp.nsec); - cfg.en = 1; - cfg.ch_update = udp; - cfg.offset = offset1; - cfg.peer_delay = 0; + if (priv->ptp_correction_off == offset1) + return; + + priv->dpaa2_set_onestep_params_cb(priv, offset1, udp); + priv->ptp_correction_off = offset1; - if (dpni_set_single_step_cfg(priv->mc_io, 0, priv->mc_token, - &cfg)) - WARN_ONCE(1, "Failed to set single step register"); } } @@ -2407,6 +2473,9 @@ static int dpaa2_eth_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) config.rx_filter = HWTSTAMP_FILTER_ALL; } + if (priv->tx_tstamp_type == HWTSTAMP_TX_ONESTEP_SYNC) + dpaa2_ptp_onestep_reg_update_method(priv); + return copy_to_user(rq->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; } @@ -4300,6 +4369,8 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev) return err; } + dpaa2_eth_detect_features(priv); + /* Capabilities listing */ supported |= IFF_LIVE_ADDR_CHANGE; @@ -4758,6 +4829,8 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) dpaa2_eth_free_dpbp(priv); dpaa2_eth_free_dpio(priv); dpaa2_eth_free_dpni(priv); + if (priv->onestep_reg_base) + iounmap(priv->onestep_reg_base); fsl_mc_portal_free(priv->mc_io); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index b79831cd1a94..447718483ef4 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -526,12 +526,15 @@ struct dpaa2_eth_priv { u8 num_channels; struct dpaa2_eth_channel *channel[DPAA2_ETH_MAX_DPCONS]; struct dpaa2_eth_sgt_cache __percpu *sgt_cache; - + unsigned long features; struct dpni_attr dpni_attrs; u16 dpni_ver_major; u16 dpni_ver_minor; u16 tx_data_offset; - + void __iomem *onestep_reg_base; + u8 ptp_correction_off; + void (*dpaa2_set_onestep_params_cb)(struct dpaa2_eth_priv *priv, + u32 offset, u8 udp); struct fsl_mc_device *dpbp_dev; u16 rx_buf_size; u16 bpid; @@ -673,6 +676,13 @@ enum dpaa2_eth_rx_dist { #define DPAA2_ETH_DIST_L4DST BIT(8) #define DPAA2_ETH_DIST_ALL (~0ULL) +#define DPNI_PTP_ONESTEP_VER_MAJOR 8 +#define DPNI_PTP_ONESTEP_VER_MINOR 2 +#define DPAA2_ETH_FEATURE_ONESTEP_CFG_DIRECT BIT(0) +#define DPAA2_PTP_SINGLE_STEP_ENABLE BIT(31) +#define DPAA2_PTP_SINGLE_STEP_CH BIT(7) +#define DPAA2_PTP_SINGLE_CORRECTION_OFF(v) ((v) << 8) + #define DPNI_PAUSE_VER_MAJOR 7 #define DPNI_PAUSE_VER_MINOR 13 #define dpaa2_eth_has_pause_support(priv) \ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h index 9f80bdfeedec..828f538097af 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h @@ -98,7 +98,7 @@ #define DPNI_CMDID_GET_LINK_CFG DPNI_CMD(0x278) #define DPNI_CMDID_SET_SINGLE_STEP_CFG DPNI_CMD(0x279) -#define DPNI_CMDID_GET_SINGLE_STEP_CFG DPNI_CMD(0x27a) +#define DPNI_CMDID_GET_SINGLE_STEP_CFG DPNI_CMD_V2(0x27a) /* Macros for accessing command fields smaller than 1byte */ #define DPNI_MASK(field) \ @@ -658,12 +658,16 @@ struct dpni_cmd_single_step_cfg { __le16 flags; __le16 offset; __le32 peer_delay; + __le32 ptp_onestep_reg_base; + __le32 pad0; }; struct dpni_rsp_single_step_cfg { __le16 flags; __le16 offset; __le32 peer_delay; + __le32 ptp_onestep_reg_base; + __le32 pad0; }; struct dpni_cmd_enable_vlan_filter { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c index d6afada99fb6..6c3b36f20fb8 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c @@ -2136,6 +2136,8 @@ int dpni_get_single_step_cfg(struct fsl_mc_io *mc_io, ptp_cfg->ch_update = dpni_get_field(le16_to_cpu(rsp_params->flags), PTP_CH_UPDATE) ? 1 : 0; ptp_cfg->peer_delay = le32_to_cpu(rsp_params->peer_delay); + ptp_cfg->ptp_onestep_reg_base = + le32_to_cpu(rsp_params->ptp_onestep_reg_base); return err; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h index 7de0562bbf59..6fffd519aa00 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h @@ -1074,12 +1074,18 @@ int dpni_set_tx_shaping(struct fsl_mc_io *mc_io, * @peer_delay: For peer-to-peer transparent clocks add this value to the * correction field in addition to the transient time update. * The value expresses nanoseconds. + * @ptp_onestep_reg_base: 1588 SINGLE_STEP register base address. This address + * is used to update directly the register contents. + * User has to create an address mapping for it. + * + * */ struct dpni_single_step_cfg { u8 en; u8 ch_update; u16 offset; u32 peer_delay; + u32 ptp_onestep_reg_base; }; int dpni_set_single_step_cfg(struct fsl_mc_io *mc_io, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 5a3eea1a718b..79afb1d7289b 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -1021,6 +1021,46 @@ static struct actions_fwd *enetc_check_flow_actions(u64 acts, return NULL; } +static int enetc_psfp_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} + static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv, struct flow_cls_offload *f) { @@ -1177,11 +1217,10 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv, /* Flow meter and max frame size */ if (entryp) { - if (entryp->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); - err = -EOPNOTSUPP; + err = enetc_psfp_policer_validate(&rule->action, entryp, extack); + if (err) goto free_sfi; - } + if (entryp->police.burst) { fmi = kzalloc(sizeof(*fmi), GFP_KERNEL); if (!fmi) { diff --git a/drivers/net/ethernet/fungible/Kconfig b/drivers/net/ethernet/fungible/Kconfig new file mode 100644 index 000000000000..2ff5138d0448 --- /dev/null +++ b/drivers/net/ethernet/fungible/Kconfig @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Fungible network driver configuration +# + +config NET_VENDOR_FUNGIBLE + bool "Fungible devices" + default y + help + If you have a Fungible network device, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Fungible cards. If you say Y, you will be asked + for your specific card in the following questions. + +if NET_VENDOR_FUNGIBLE + +config FUN_CORE + tristate + help + A service module offering basic common services to Fungible + device drivers. + +source "drivers/net/ethernet/fungible/funeth/Kconfig" + +endif # NET_VENDOR_FUNGIBLE diff --git a/drivers/net/ethernet/fungible/Makefile b/drivers/net/ethernet/fungible/Makefile new file mode 100644 index 000000000000..df759f1585a1 --- /dev/null +++ b/drivers/net/ethernet/fungible/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +# +# Makefile for the Fungible network device drivers. +# + +obj-$(CONFIG_FUN_CORE) += funcore/ +obj-$(CONFIG_FUN_ETH) += funeth/ diff --git a/drivers/net/ethernet/fungible/funcore/Makefile b/drivers/net/ethernet/fungible/funcore/Makefile new file mode 100644 index 000000000000..bc16b264b53e --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +obj-$(CONFIG_FUN_CORE) += funcore.o + +funcore-y := fun_dev.o fun_queue.o diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.c b/drivers/net/ethernet/fungible/funcore/fun_dev.c new file mode 100644 index 000000000000..5d7aef73df61 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_dev.c @@ -0,0 +1,843 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/aer.h> +#include <linux/bitmap.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/nvme.h> +#include <linux/pci.h> +#include <linux/wait.h> +#include <linux/sched/signal.h> + +#include "fun_queue.h" +#include "fun_dev.h" + +#define FUN_ADMIN_CMD_TO_MS 3000 + +enum { + AQA_ASQS_SHIFT = 0, + AQA_ACQS_SHIFT = 16, + AQA_MIN_QUEUE_SIZE = 2, + AQA_MAX_QUEUE_SIZE = 4096 +}; + +/* context for admin commands */ +struct fun_cmd_ctx { + fun_admin_callback_t cb; /* callback to invoke on completion */ + void *cb_data; /* user data provided to callback */ + int cpu; /* CPU where the cmd's tag was allocated */ +}; + +/* Context for synchronous admin commands. */ +struct fun_sync_cmd_ctx { + struct completion compl; + u8 *rsp_buf; /* caller provided response buffer */ + unsigned int rsp_len; /* response buffer size */ + u8 rsp_status; /* command response status */ +}; + +/* Wait for the CSTS.RDY bit to match @enabled. */ +static int fun_wait_ready(struct fun_dev *fdev, bool enabled) +{ + unsigned int cap_to = NVME_CAP_TIMEOUT(fdev->cap_reg); + u32 bit = enabled ? NVME_CSTS_RDY : 0; + unsigned long deadline; + + deadline = ((cap_to + 1) * HZ / 2) + jiffies; /* CAP.TO is in 500ms */ + + for (;;) { + u32 csts = readl(fdev->bar + NVME_REG_CSTS); + + if (csts == ~0) { + dev_err(fdev->dev, "CSTS register read %#x\n", csts); + return -EIO; + } + + if ((csts & NVME_CSTS_RDY) == bit) + return 0; + + if (time_is_before_jiffies(deadline)) + break; + + msleep(100); + } + + dev_err(fdev->dev, + "Timed out waiting for device to indicate RDY %u; aborting %s\n", + enabled, enabled ? "initialization" : "reset"); + return -ETIMEDOUT; +} + +/* Check CSTS and return an error if it is unreadable or has unexpected + * RDY value. + */ +static int fun_check_csts_rdy(struct fun_dev *fdev, unsigned int expected_rdy) +{ + u32 csts = readl(fdev->bar + NVME_REG_CSTS); + u32 actual_rdy = csts & NVME_CSTS_RDY; + + if (csts == ~0) { + dev_err(fdev->dev, "CSTS register read %#x\n", csts); + return -EIO; + } + if (actual_rdy != expected_rdy) { + dev_err(fdev->dev, "Unexpected CSTS RDY %u\n", actual_rdy); + return -EINVAL; + } + return 0; +} + +/* Check that CSTS RDY has the expected value. Then write a new value to the CC + * register and wait for CSTS RDY to match the new CC ENABLE state. + */ +static int fun_update_cc_enable(struct fun_dev *fdev, unsigned int initial_rdy) +{ + int rc = fun_check_csts_rdy(fdev, initial_rdy); + + if (rc) + return rc; + writel(fdev->cc_reg, fdev->bar + NVME_REG_CC); + return fun_wait_ready(fdev, !!(fdev->cc_reg & NVME_CC_ENABLE)); +} + +static int fun_disable_ctrl(struct fun_dev *fdev) +{ + fdev->cc_reg &= ~(NVME_CC_SHN_MASK | NVME_CC_ENABLE); + return fun_update_cc_enable(fdev, 1); +} + +static int fun_enable_ctrl(struct fun_dev *fdev, u32 admin_cqesz_log2, + u32 admin_sqesz_log2) +{ + fdev->cc_reg = (admin_cqesz_log2 << NVME_CC_IOCQES_SHIFT) | + (admin_sqesz_log2 << NVME_CC_IOSQES_SHIFT) | + ((PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT) | + NVME_CC_ENABLE; + + return fun_update_cc_enable(fdev, 0); +} + +static int fun_map_bars(struct fun_dev *fdev, const char *name) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + int err; + + err = pci_request_mem_regions(pdev, name); + if (err) { + dev_err(&pdev->dev, + "Couldn't get PCI memory resources, err %d\n", err); + return err; + } + + fdev->bar = pci_ioremap_bar(pdev, 0); + if (!fdev->bar) { + dev_err(&pdev->dev, "Couldn't map BAR 0\n"); + pci_release_mem_regions(pdev); + return -ENOMEM; + } + + return 0; +} + +static void fun_unmap_bars(struct fun_dev *fdev) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + + if (fdev->bar) { + iounmap(fdev->bar); + fdev->bar = NULL; + pci_release_mem_regions(pdev); + } +} + +static int fun_set_dma_masks(struct device *dev) +{ + int err; + + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (err) + dev_err(dev, "DMA mask configuration failed, err %d\n", err); + return err; +} + +static irqreturn_t fun_admin_irq(int irq, void *data) +{ + struct fun_queue *funq = data; + + return fun_process_cq(funq, 0) ? IRQ_HANDLED : IRQ_NONE; +} + +static void fun_complete_admin_cmd(struct fun_queue *funq, void *data, + void *entry, const struct fun_cqe_info *info) +{ + const struct fun_admin_rsp_common *rsp_common = entry; + struct fun_dev *fdev = funq->fdev; + struct fun_cmd_ctx *cmd_ctx; + int cpu; + u16 cid; + + if (info->sqhd == cpu_to_be16(0xffff)) { + dev_dbg(fdev->dev, "adminq event"); + if (fdev->adminq_cb) + fdev->adminq_cb(fdev, entry); + return; + } + + cid = be16_to_cpu(rsp_common->cid); + dev_dbg(fdev->dev, "admin CQE cid %u, op %u, ret %u\n", cid, + rsp_common->op, rsp_common->ret); + + cmd_ctx = &fdev->cmd_ctx[cid]; + if (cmd_ctx->cpu < 0) { + dev_err(fdev->dev, + "admin CQE with CID=%u, op=%u does not match a pending command\n", + cid, rsp_common->op); + return; + } + + if (cmd_ctx->cb) + cmd_ctx->cb(fdev, entry, xchg(&cmd_ctx->cb_data, NULL)); + + cpu = cmd_ctx->cpu; + cmd_ctx->cpu = -1; + sbitmap_queue_clear(&fdev->admin_sbq, cid, cpu); +} + +static int fun_init_cmd_ctx(struct fun_dev *fdev, unsigned int ntags) +{ + unsigned int i; + + fdev->cmd_ctx = kvcalloc(ntags, sizeof(*fdev->cmd_ctx), GFP_KERNEL); + if (!fdev->cmd_ctx) + return -ENOMEM; + + for (i = 0; i < ntags; i++) + fdev->cmd_ctx[i].cpu = -1; + + return 0; +} + +/* Allocate and enable an admin queue and assign it the first IRQ vector. */ +static int fun_enable_admin_queue(struct fun_dev *fdev, + const struct fun_dev_params *areq) +{ + struct fun_queue_alloc_req qreq = { + .cqe_size_log2 = areq->cqe_size_log2, + .sqe_size_log2 = areq->sqe_size_log2, + .cq_depth = areq->cq_depth, + .sq_depth = areq->sq_depth, + .rq_depth = areq->rq_depth, + }; + unsigned int ntags = areq->sq_depth - 1; + struct fun_queue *funq; + int rc; + + if (fdev->admin_q) + return -EEXIST; + + if (areq->sq_depth < AQA_MIN_QUEUE_SIZE || + areq->sq_depth > AQA_MAX_QUEUE_SIZE || + areq->cq_depth < AQA_MIN_QUEUE_SIZE || + areq->cq_depth > AQA_MAX_QUEUE_SIZE) + return -EINVAL; + + fdev->admin_q = fun_alloc_queue(fdev, 0, &qreq); + if (!fdev->admin_q) + return -ENOMEM; + + rc = fun_init_cmd_ctx(fdev, ntags); + if (rc) + goto free_q; + + rc = sbitmap_queue_init_node(&fdev->admin_sbq, ntags, -1, false, + GFP_KERNEL, dev_to_node(fdev->dev)); + if (rc) + goto free_cmd_ctx; + + funq = fdev->admin_q; + funq->cq_vector = 0; + rc = fun_request_irq(funq, dev_name(fdev->dev), fun_admin_irq, funq); + if (rc) + goto free_sbq; + + fun_set_cq_callback(funq, fun_complete_admin_cmd, NULL); + fdev->adminq_cb = areq->event_cb; + + writel((funq->sq_depth - 1) << AQA_ASQS_SHIFT | + (funq->cq_depth - 1) << AQA_ACQS_SHIFT, + fdev->bar + NVME_REG_AQA); + + writeq(funq->sq_dma_addr, fdev->bar + NVME_REG_ASQ); + writeq(funq->cq_dma_addr, fdev->bar + NVME_REG_ACQ); + + rc = fun_enable_ctrl(fdev, areq->cqe_size_log2, areq->sqe_size_log2); + if (rc) + goto free_irq; + + if (areq->rq_depth) { + rc = fun_create_rq(funq); + if (rc) + goto disable_ctrl; + + funq_rq_post(funq); + } + + return 0; + +disable_ctrl: + fun_disable_ctrl(fdev); +free_irq: + fun_free_irq(funq); +free_sbq: + sbitmap_queue_free(&fdev->admin_sbq); +free_cmd_ctx: + kvfree(fdev->cmd_ctx); + fdev->cmd_ctx = NULL; +free_q: + fun_free_queue(fdev->admin_q); + fdev->admin_q = NULL; + return rc; +} + +static void fun_disable_admin_queue(struct fun_dev *fdev) +{ + struct fun_queue *admq = fdev->admin_q; + + if (!admq) + return; + + fun_disable_ctrl(fdev); + + fun_free_irq(admq); + __fun_process_cq(admq, 0); + + sbitmap_queue_free(&fdev->admin_sbq); + + kvfree(fdev->cmd_ctx); + fdev->cmd_ctx = NULL; + + fun_free_queue(admq); + fdev->admin_q = NULL; +} + +/* Return %true if the admin queue has stopped servicing commands as can be + * detected through registers. This isn't exhaustive and may provide false + * negatives. + */ +static bool fun_adminq_stopped(struct fun_dev *fdev) +{ + u32 csts = readl(fdev->bar + NVME_REG_CSTS); + + return (csts & (NVME_CSTS_CFS | NVME_CSTS_RDY)) != NVME_CSTS_RDY; +} + +static int fun_wait_for_tag(struct fun_dev *fdev, int *cpup) +{ + struct sbitmap_queue *sbq = &fdev->admin_sbq; + struct sbq_wait_state *ws = &sbq->ws[0]; + DEFINE_SBQ_WAIT(wait); + int tag; + + for (;;) { + sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_UNINTERRUPTIBLE); + if (fdev->suppress_cmds) { + tag = -ESHUTDOWN; + break; + } + tag = sbitmap_queue_get(sbq, cpup); + if (tag >= 0) + break; + schedule(); + } + + sbitmap_finish_wait(sbq, ws, &wait); + return tag; +} + +/* Submit an asynchronous admin command. Caller is responsible for implementing + * any waiting or timeout. Upon command completion the callback @cb is called. + */ +int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd, + fun_admin_callback_t cb, void *cb_data, bool wait_ok) +{ + struct fun_queue *funq = fdev->admin_q; + unsigned int cmdsize = cmd->len8 * 8; + struct fun_cmd_ctx *cmd_ctx; + int tag, cpu, rc = 0; + + if (WARN_ON(cmdsize > (1 << funq->sqe_size_log2))) + return -EMSGSIZE; + + tag = sbitmap_queue_get(&fdev->admin_sbq, &cpu); + if (tag < 0) { + if (!wait_ok) + return -EAGAIN; + tag = fun_wait_for_tag(fdev, &cpu); + if (tag < 0) + return tag; + } + + cmd->cid = cpu_to_be16(tag); + + cmd_ctx = &fdev->cmd_ctx[tag]; + cmd_ctx->cb = cb; + cmd_ctx->cb_data = cb_data; + + spin_lock(&funq->sq_lock); + + if (unlikely(fdev->suppress_cmds)) { + rc = -ESHUTDOWN; + sbitmap_queue_clear(&fdev->admin_sbq, tag, cpu); + } else { + cmd_ctx->cpu = cpu; + memcpy(fun_sqe_at(funq, funq->sq_tail), cmd, cmdsize); + + dev_dbg(fdev->dev, "admin cmd @ %u: %8ph\n", funq->sq_tail, + cmd); + + if (++funq->sq_tail == funq->sq_depth) + funq->sq_tail = 0; + writel(funq->sq_tail, funq->sq_db); + } + spin_unlock(&funq->sq_lock); + return rc; +} + +/* Abandon a pending admin command by clearing the issuer's callback data. + * Failure indicates that the command either has already completed or its + * completion is racing with this call. + */ +static bool fun_abandon_admin_cmd(struct fun_dev *fd, + const struct fun_admin_req_common *cmd, + void *cb_data) +{ + u16 cid = be16_to_cpu(cmd->cid); + struct fun_cmd_ctx *cmd_ctx = &fd->cmd_ctx[cid]; + + return cmpxchg(&cmd_ctx->cb_data, cb_data, NULL) == cb_data; +} + +/* Stop submission of new admin commands and wake up any processes waiting for + * tags. Already submitted commands are left to complete or time out. + */ +static void fun_admin_stop(struct fun_dev *fdev) +{ + spin_lock(&fdev->admin_q->sq_lock); + fdev->suppress_cmds = true; + spin_unlock(&fdev->admin_q->sq_lock); + sbitmap_queue_wake_all(&fdev->admin_sbq); +} + +/* The callback for synchronous execution of admin commands. It copies the + * command response to the caller's buffer and signals completion. + */ +static void fun_admin_cmd_sync_cb(struct fun_dev *fd, void *rsp, void *cb_data) +{ + const struct fun_admin_rsp_common *rsp_common = rsp; + struct fun_sync_cmd_ctx *ctx = cb_data; + + if (!ctx) + return; /* command issuer timed out and left */ + if (ctx->rsp_buf) { + unsigned int rsp_len = rsp_common->len8 * 8; + + if (unlikely(rsp_len > ctx->rsp_len)) { + dev_err(fd->dev, + "response for op %u is %uB > response buffer %uB\n", + rsp_common->op, rsp_len, ctx->rsp_len); + rsp_len = ctx->rsp_len; + } + memcpy(ctx->rsp_buf, rsp, rsp_len); + } + ctx->rsp_status = rsp_common->ret; + complete(&ctx->compl); +} + +/* Submit a synchronous admin command. */ +int fun_submit_admin_sync_cmd(struct fun_dev *fdev, + struct fun_admin_req_common *cmd, void *rsp, + size_t rspsize, unsigned int timeout) +{ + struct fun_sync_cmd_ctx ctx = { + .compl = COMPLETION_INITIALIZER_ONSTACK(ctx.compl), + .rsp_buf = rsp, + .rsp_len = rspsize, + }; + unsigned int cmdlen = cmd->len8 * 8; + unsigned long jiffies_left; + int ret; + + ret = fun_submit_admin_cmd(fdev, cmd, fun_admin_cmd_sync_cb, &ctx, + true); + if (ret) + return ret; + + if (!timeout) + timeout = FUN_ADMIN_CMD_TO_MS; + + jiffies_left = wait_for_completion_timeout(&ctx.compl, + msecs_to_jiffies(timeout)); + if (!jiffies_left) { + /* The command timed out. Attempt to cancel it so we can return. + * But if the command is in the process of completing we'll + * wait for it. + */ + if (fun_abandon_admin_cmd(fdev, cmd, &ctx)) { + dev_err(fdev->dev, "admin command timed out: %*ph\n", + cmdlen, cmd); + fun_admin_stop(fdev); + /* see if the timeout was due to a queue failure */ + if (fun_adminq_stopped(fdev)) + dev_err(fdev->dev, + "device does not accept admin commands\n"); + + return -ETIMEDOUT; + } + wait_for_completion(&ctx.compl); + } + + if (ctx.rsp_status) { + dev_err(fdev->dev, "admin command failed, err %d: %*ph\n", + ctx.rsp_status, cmdlen, cmd); + } + + return -ctx.rsp_status; +} +EXPORT_SYMBOL_GPL(fun_submit_admin_sync_cmd); + +/* Return the number of device resources of the requested type. */ +int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res) +{ + union { + struct fun_admin_res_count_req req; + struct fun_admin_res_count_rsp rsp; + } cmd; + int rc; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(cmd.req)); + cmd.req.count = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_RES_COUNT, + 0, 0); + + rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, &cmd.rsp, + sizeof(cmd), 0); + return rc ? rc : be32_to_cpu(cmd.rsp.count.data); +} +EXPORT_SYMBOL_GPL(fun_get_res_count); + +/* Request that the instance of resource @res with the given id be deleted. */ +int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res, + unsigned int flags, u32 id) +{ + struct fun_admin_generic_destroy_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(req)), + .destroy = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_DESTROY, + flags, id) + }; + + return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0); +} +EXPORT_SYMBOL_GPL(fun_res_destroy); + +/* Bind two entities of the given types and IDs. */ +int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0, + unsigned int id0, enum fun_admin_bind_type type1, + unsigned int id1) +{ + struct { + struct fun_admin_bind_req req; + struct fun_admin_bind_entry entry[2]; + } cmd = { + .req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_BIND, + sizeof(cmd)), + .entry[0] = FUN_ADMIN_BIND_ENTRY_INIT(type0, id0), + .entry[1] = FUN_ADMIN_BIND_ENTRY_INIT(type1, id1), + }; + + return fun_submit_admin_sync_cmd(fdev, &cmd.req.common, NULL, 0, 0); +} +EXPORT_SYMBOL_GPL(fun_bind); + +static int fun_get_dev_limits(struct fun_dev *fdev) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + unsigned int cq_count, sq_count, num_dbs; + int rc; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPCQ); + if (rc < 0) + return rc; + cq_count = rc; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPSQ); + if (rc < 0) + return rc; + sq_count = rc; + + /* The admin queue consumes 1 CQ and at least 1 SQ. To be usable the + * device must provide additional queues. + */ + if (cq_count < 2 || sq_count < 2 + !!fdev->admin_q->rq_depth) + return -EINVAL; + + /* Calculate the max QID based on SQ/CQ/doorbell counts. + * SQ/CQ doorbells alternate. + */ + num_dbs = (pci_resource_len(pdev, 0) - NVME_REG_DBS) / + (fdev->db_stride * 4); + fdev->max_qid = min3(cq_count, sq_count, num_dbs / 2) - 1; + fdev->kern_end_qid = fdev->max_qid + 1; + return 0; +} + +/* Allocate all MSI-X vectors available on a function and at least @min_vecs. */ +static int fun_alloc_irqs(struct pci_dev *pdev, unsigned int min_vecs) +{ + int vecs, num_msix = pci_msix_vec_count(pdev); + + if (num_msix < 0) + return num_msix; + if (min_vecs > num_msix) + return -ERANGE; + + vecs = pci_alloc_irq_vectors(pdev, min_vecs, num_msix, PCI_IRQ_MSIX); + if (vecs > 0) { + dev_info(&pdev->dev, + "Allocated %d IRQ vectors of %d requested\n", + vecs, num_msix); + } else { + dev_err(&pdev->dev, + "Unable to allocate at least %u IRQ vectors\n", + min_vecs); + } + return vecs; +} + +/* Allocate and initialize the IRQ manager state. */ +static int fun_alloc_irq_mgr(struct fun_dev *fdev) +{ + fdev->irq_map = bitmap_zalloc(fdev->num_irqs, GFP_KERNEL); + if (!fdev->irq_map) + return -ENOMEM; + + spin_lock_init(&fdev->irqmgr_lock); + /* mark IRQ 0 allocated, it is used by the admin queue */ + __set_bit(0, fdev->irq_map); + fdev->irqs_avail = fdev->num_irqs - 1; + return 0; +} + +/* Reserve @nirqs of the currently available IRQs and return their indices. */ +int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs, u16 *irq_indices) +{ + unsigned int b, n = 0; + int err = -ENOSPC; + + if (!nirqs) + return 0; + + spin_lock(&fdev->irqmgr_lock); + if (nirqs > fdev->irqs_avail) + goto unlock; + + for_each_clear_bit(b, fdev->irq_map, fdev->num_irqs) { + __set_bit(b, fdev->irq_map); + irq_indices[n++] = b; + if (n >= nirqs) + break; + } + + WARN_ON(n < nirqs); + fdev->irqs_avail -= n; + err = n; +unlock: + spin_unlock(&fdev->irqmgr_lock); + return err; +} +EXPORT_SYMBOL(fun_reserve_irqs); + +/* Release @nirqs previously allocated IRQS with the supplied indices. */ +void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs, + u16 *irq_indices) +{ + unsigned int i; + + spin_lock(&fdev->irqmgr_lock); + for (i = 0; i < nirqs; i++) + __clear_bit(irq_indices[i], fdev->irq_map); + fdev->irqs_avail += nirqs; + spin_unlock(&fdev->irqmgr_lock); +} +EXPORT_SYMBOL(fun_release_irqs); + +static void fun_serv_handler(struct work_struct *work) +{ + struct fun_dev *fd = container_of(work, struct fun_dev, service_task); + + if (test_bit(FUN_SERV_DISABLED, &fd->service_flags)) + return; + if (fd->serv_cb) + fd->serv_cb(fd); +} + +void fun_serv_stop(struct fun_dev *fd) +{ + set_bit(FUN_SERV_DISABLED, &fd->service_flags); + cancel_work_sync(&fd->service_task); +} +EXPORT_SYMBOL_GPL(fun_serv_stop); + +void fun_serv_restart(struct fun_dev *fd) +{ + clear_bit(FUN_SERV_DISABLED, &fd->service_flags); + if (fd->service_flags) + schedule_work(&fd->service_task); +} +EXPORT_SYMBOL_GPL(fun_serv_restart); + +void fun_serv_sched(struct fun_dev *fd) +{ + if (!test_bit(FUN_SERV_DISABLED, &fd->service_flags)) + schedule_work(&fd->service_task); +} +EXPORT_SYMBOL_GPL(fun_serv_sched); + +/* Check and try to get the device into a proper state for initialization, + * i.e., CSTS.RDY = CC.EN = 0. + */ +static int sanitize_dev(struct fun_dev *fdev) +{ + int rc; + + fdev->cap_reg = readq(fdev->bar + NVME_REG_CAP); + fdev->cc_reg = readl(fdev->bar + NVME_REG_CC); + + /* First get RDY to agree with the current EN. Give RDY the opportunity + * to complete a potential recent EN change. + */ + rc = fun_wait_ready(fdev, fdev->cc_reg & NVME_CC_ENABLE); + if (rc) + return rc; + + /* Next, reset the device if EN is currently 1. */ + if (fdev->cc_reg & NVME_CC_ENABLE) + rc = fun_disable_ctrl(fdev); + + return rc; +} + +/* Undo the device initialization of fun_dev_enable(). */ +void fun_dev_disable(struct fun_dev *fdev) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + + pci_set_drvdata(pdev, NULL); + + if (fdev->fw_handle != FUN_HCI_ID_INVALID) { + fun_res_destroy(fdev, FUN_ADMIN_OP_SWUPGRADE, 0, + fdev->fw_handle); + fdev->fw_handle = FUN_HCI_ID_INVALID; + } + + fun_disable_admin_queue(fdev); + + bitmap_free(fdev->irq_map); + pci_free_irq_vectors(pdev); + + pci_clear_master(pdev); + pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); + + fun_unmap_bars(fdev); +} +EXPORT_SYMBOL(fun_dev_disable); + +/* Perform basic initialization of a device, including + * - PCI config space setup and BAR0 mapping + * - interrupt management initialization + * - 1 admin queue setup + * - determination of some device limits, such as number of queues. + */ +int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev, + const struct fun_dev_params *areq, const char *name) +{ + int rc; + + fdev->dev = &pdev->dev; + rc = fun_map_bars(fdev, name); + if (rc) + return rc; + + rc = fun_set_dma_masks(fdev->dev); + if (rc) + goto unmap; + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(&pdev->dev, "Couldn't enable device, err %d\n", rc); + goto unmap; + } + + pci_enable_pcie_error_reporting(pdev); + + rc = sanitize_dev(fdev); + if (rc) + goto disable_dev; + + fdev->fw_handle = FUN_HCI_ID_INVALID; + fdev->q_depth = NVME_CAP_MQES(fdev->cap_reg) + 1; + fdev->db_stride = 1 << NVME_CAP_STRIDE(fdev->cap_reg); + fdev->dbs = fdev->bar + NVME_REG_DBS; + + INIT_WORK(&fdev->service_task, fun_serv_handler); + fdev->service_flags = FUN_SERV_DISABLED; + fdev->serv_cb = areq->serv_cb; + + rc = fun_alloc_irqs(pdev, areq->min_msix + 1); /* +1 for admin CQ */ + if (rc < 0) + goto disable_dev; + fdev->num_irqs = rc; + + rc = fun_alloc_irq_mgr(fdev); + if (rc) + goto free_irqs; + + pci_set_master(pdev); + rc = fun_enable_admin_queue(fdev, areq); + if (rc) + goto free_irq_mgr; + + rc = fun_get_dev_limits(fdev); + if (rc < 0) + goto disable_admin; + + pci_save_state(pdev); + pci_set_drvdata(pdev, fdev); + pcie_print_link_status(pdev); + dev_dbg(fdev->dev, "q_depth %u, db_stride %u, max qid %d kern_end_qid %d\n", + fdev->q_depth, fdev->db_stride, fdev->max_qid, + fdev->kern_end_qid); + return 0; + +disable_admin: + fun_disable_admin_queue(fdev); +free_irq_mgr: + pci_clear_master(pdev); + bitmap_free(fdev->irq_map); +free_irqs: + pci_free_irq_vectors(pdev); +disable_dev: + pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); +unmap: + fun_unmap_bars(fdev); + return rc; +} +EXPORT_SYMBOL(fun_dev_enable); + +MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>"); +MODULE_DESCRIPTION("Core services driver for Fungible devices"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.h b/drivers/net/ethernet/fungible/funcore/fun_dev.h new file mode 100644 index 000000000000..9e8c17ce8887 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_dev.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUNDEV_H +#define _FUNDEV_H + +#include <linux/sbitmap.h> +#include <linux/spinlock_types.h> +#include <linux/workqueue.h> +#include "fun_hci.h" + +struct pci_dev; +struct fun_dev; +struct fun_queue; +struct fun_cmd_ctx; +struct fun_queue_alloc_req; + +/* doorbell fields */ +enum { + FUN_DB_QIDX_S = 0, + FUN_DB_INTCOAL_ENTRIES_S = 16, + FUN_DB_INTCOAL_ENTRIES_M = 0x7f, + FUN_DB_INTCOAL_USEC_S = 23, + FUN_DB_INTCOAL_USEC_M = 0x7f, + FUN_DB_IRQ_S = 30, + FUN_DB_IRQ_F = 1 << FUN_DB_IRQ_S, + FUN_DB_IRQ_ARM_S = 31, + FUN_DB_IRQ_ARM_F = 1U << FUN_DB_IRQ_ARM_S +}; + +/* Callback for asynchronous admin commands. + * Invoked on reception of command response. + */ +typedef void (*fun_admin_callback_t)(struct fun_dev *fdev, void *rsp, + void *cb_data); + +/* Callback for events/notifications received by an admin queue. */ +typedef void (*fun_admin_event_cb)(struct fun_dev *fdev, void *cqe); + +/* Callback for pending work handled by the service task. */ +typedef void (*fun_serv_cb)(struct fun_dev *fd); + +/* service task flags */ +enum { + FUN_SERV_DISABLED, /* service task is disabled */ + FUN_SERV_FIRST_AVAIL +}; + +/* Driver state associated with a PCI function. */ +struct fun_dev { + struct device *dev; + + void __iomem *bar; /* start of BAR0 mapping */ + u32 __iomem *dbs; /* start of doorbells in BAR0 mapping */ + + /* admin queue */ + struct fun_queue *admin_q; + struct sbitmap_queue admin_sbq; + struct fun_cmd_ctx *cmd_ctx; + fun_admin_event_cb adminq_cb; + bool suppress_cmds; /* if set don't write commands to SQ */ + + /* address increment between consecutive doorbells, in 4B units */ + unsigned int db_stride; + + /* SW versions of device registers */ + u32 cc_reg; /* CC register */ + u64 cap_reg; /* CAPability register */ + + unsigned int q_depth; /* max queue depth supported by device */ + unsigned int max_qid; /* = #queues - 1, separately for SQs and CQs */ + unsigned int kern_end_qid; /* last qid in the kernel range + 1 */ + + unsigned int fw_handle; + + /* IRQ manager */ + unsigned int num_irqs; + unsigned int irqs_avail; + spinlock_t irqmgr_lock; + unsigned long *irq_map; + + /* The service task handles work that needs a process context */ + struct work_struct service_task; + unsigned long service_flags; + fun_serv_cb serv_cb; +}; + +struct fun_dev_params { + u8 cqe_size_log2; /* admin q CQE size */ + u8 sqe_size_log2; /* admin q SQE size */ + + /* admin q depths */ + u16 cq_depth; + u16 sq_depth; + u16 rq_depth; + + u16 min_msix; /* min vectors needed by requesting driver */ + + fun_admin_event_cb event_cb; + fun_serv_cb serv_cb; +}; + +/* Return the BAR address of a doorbell. */ +static inline u32 __iomem *fun_db_addr(const struct fun_dev *fdev, + unsigned int db_index) +{ + return &fdev->dbs[db_index * fdev->db_stride]; +} + +/* Return the BAR address of an SQ doorbell. SQ and CQ DBs alternate, + * SQs have even DB indices. + */ +static inline u32 __iomem *fun_sq_db_addr(const struct fun_dev *fdev, + unsigned int sqid) +{ + return fun_db_addr(fdev, sqid * 2); +} + +static inline u32 __iomem *fun_cq_db_addr(const struct fun_dev *fdev, + unsigned int cqid) +{ + return fun_db_addr(fdev, cqid * 2 + 1); +} + +int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res); +int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res, + unsigned int flags, u32 id); +int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0, + unsigned int id0, enum fun_admin_bind_type type1, + unsigned int id1); + +int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd, + fun_admin_callback_t cb, void *cb_data, bool wait_ok); +int fun_submit_admin_sync_cmd(struct fun_dev *fdev, + struct fun_admin_req_common *cmd, void *rsp, + size_t rspsize, unsigned int timeout); + +int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev, + const struct fun_dev_params *areq, const char *name); +void fun_dev_disable(struct fun_dev *fdev); + +int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs, + u16 *irq_indices); +void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs, + u16 *irq_indices); + +void fun_serv_stop(struct fun_dev *fd); +void fun_serv_restart(struct fun_dev *fd); +void fun_serv_sched(struct fun_dev *fd); + +#endif /* _FUNDEV_H */ diff --git a/drivers/net/ethernet/fungible/funcore/fun_hci.h b/drivers/net/ethernet/fungible/funcore/fun_hci.h new file mode 100644 index 000000000000..257203e94b68 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_hci.h @@ -0,0 +1,1202 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef __FUN_HCI_H +#define __FUN_HCI_H + +enum { + FUN_HCI_ID_INVALID = 0xffffffff, +}; + +enum fun_admin_op { + FUN_ADMIN_OP_BIND = 0x1, + FUN_ADMIN_OP_EPCQ = 0x11, + FUN_ADMIN_OP_EPSQ = 0x12, + FUN_ADMIN_OP_PORT = 0x13, + FUN_ADMIN_OP_ETH = 0x14, + FUN_ADMIN_OP_VI = 0x15, + FUN_ADMIN_OP_SWUPGRADE = 0x1f, + FUN_ADMIN_OP_RSS = 0x21, + FUN_ADMIN_OP_ADI = 0x25, + FUN_ADMIN_OP_KTLS = 0x26, +}; + +enum { + FUN_REQ_COMMON_FLAG_RSP = 0x1, + FUN_REQ_COMMON_FLAG_HEAD_WB = 0x2, + FUN_REQ_COMMON_FLAG_INT = 0x4, + FUN_REQ_COMMON_FLAG_CQE_IN_RQBUF = 0x8, +}; + +struct fun_admin_req_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 rsvd0; + __be16 cid; +}; + +#define FUN_ADMIN_REQ_COMMON_INIT(_op, _len8, _flags, _suboff8, _cid) \ + (struct fun_admin_req_common) { \ + .op = (_op), .len8 = (_len8), .flags = cpu_to_be16(_flags), \ + .suboff8 = (_suboff8), .cid = cpu_to_be16(_cid), \ + } + +#define FUN_ADMIN_REQ_COMMON_INIT2(_op, _len) \ + (struct fun_admin_req_common) { \ + .op = (_op), .len8 = (_len) / 8, \ + } + +struct fun_admin_rsp_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 ret; + __be16 cid; +}; + +struct fun_admin_write48_req { + __be64 key_to_data; +}; + +#define FUN_ADMIN_WRITE48_REQ_KEY_S 56U +#define FUN_ADMIN_WRITE48_REQ_KEY_M 0xff +#define FUN_ADMIN_WRITE48_REQ_KEY_P_NOSWAP(x) \ + (((__u64)x) << FUN_ADMIN_WRITE48_REQ_KEY_S) + +#define FUN_ADMIN_WRITE48_REQ_DATA_S 0U +#define FUN_ADMIN_WRITE48_REQ_DATA_M 0xffffffffffff +#define FUN_ADMIN_WRITE48_REQ_DATA_P_NOSWAP(x) \ + (((__u64)x) << FUN_ADMIN_WRITE48_REQ_DATA_S) + +#define FUN_ADMIN_WRITE48_REQ_INIT(key, data) \ + (struct fun_admin_write48_req) { \ + .key_to_data = cpu_to_be64( \ + FUN_ADMIN_WRITE48_REQ_KEY_P_NOSWAP(key) | \ + FUN_ADMIN_WRITE48_REQ_DATA_P_NOSWAP(data)), \ + } + +struct fun_admin_write48_rsp { + __be64 key_to_data; +}; + +struct fun_admin_read48_req { + __be64 key_pack; +}; + +#define FUN_ADMIN_READ48_REQ_KEY_S 56U +#define FUN_ADMIN_READ48_REQ_KEY_M 0xff +#define FUN_ADMIN_READ48_REQ_KEY_P_NOSWAP(x) \ + (((__u64)x) << FUN_ADMIN_READ48_REQ_KEY_S) + +#define FUN_ADMIN_READ48_REQ_INIT(key) \ + (struct fun_admin_read48_req) { \ + .key_pack = \ + cpu_to_be64(FUN_ADMIN_READ48_REQ_KEY_P_NOSWAP(key)), \ + } + +struct fun_admin_read48_rsp { + __be64 key_to_data; +}; + +#define FUN_ADMIN_READ48_RSP_KEY_S 56U +#define FUN_ADMIN_READ48_RSP_KEY_M 0xff +#define FUN_ADMIN_READ48_RSP_KEY_G(x) \ + ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_KEY_S) & \ + FUN_ADMIN_READ48_RSP_KEY_M) + +#define FUN_ADMIN_READ48_RSP_RET_S 48U +#define FUN_ADMIN_READ48_RSP_RET_M 0xff +#define FUN_ADMIN_READ48_RSP_RET_G(x) \ + ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_RET_S) & \ + FUN_ADMIN_READ48_RSP_RET_M) + +#define FUN_ADMIN_READ48_RSP_DATA_S 0U +#define FUN_ADMIN_READ48_RSP_DATA_M 0xffffffffffff +#define FUN_ADMIN_READ48_RSP_DATA_G(x) \ + ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_DATA_S) & \ + FUN_ADMIN_READ48_RSP_DATA_M) + +enum fun_admin_bind_type { + FUN_ADMIN_BIND_TYPE_EPCQ = 0x1, + FUN_ADMIN_BIND_TYPE_EPSQ = 0x2, + FUN_ADMIN_BIND_TYPE_PORT = 0x3, + FUN_ADMIN_BIND_TYPE_RSS = 0x4, + FUN_ADMIN_BIND_TYPE_VI = 0x5, + FUN_ADMIN_BIND_TYPE_ETH = 0x6, +}; + +struct fun_admin_bind_entry { + __u8 type; + __u8 rsvd0[3]; + __be32 id; +}; + +#define FUN_ADMIN_BIND_ENTRY_INIT(_type, _id) \ + (struct fun_admin_bind_entry) { \ + .type = (_type), .id = cpu_to_be32(_id), \ + } + +struct fun_admin_bind_req { + struct fun_admin_req_common common; + struct fun_admin_bind_entry entry[]; +}; + +struct fun_admin_bind_rsp { + struct fun_admin_rsp_common bind_rsp_common; +}; + +struct fun_admin_simple_subop { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 data; +}; + +#define FUN_ADMIN_SIMPLE_SUBOP_INIT(_subop, _flags, _data) \ + (struct fun_admin_simple_subop) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .data = cpu_to_be32(_data), \ + } + +enum fun_admin_subop { + FUN_ADMIN_SUBOP_CREATE = 0x10, + FUN_ADMIN_SUBOP_DESTROY = 0x11, + FUN_ADMIN_SUBOP_MODIFY = 0x12, + FUN_ADMIN_SUBOP_RES_COUNT = 0x14, + FUN_ADMIN_SUBOP_READ = 0x15, + FUN_ADMIN_SUBOP_WRITE = 0x16, + FUN_ADMIN_SUBOP_NOTIFY = 0x17, +}; + +enum { + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR = 0x1, +}; + +struct fun_admin_generic_destroy_req { + struct fun_admin_req_common common; + struct fun_admin_simple_subop destroy; +}; + +struct fun_admin_generic_create_rsp { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; +}; + +struct fun_admin_res_count_req { + struct fun_admin_req_common common; + struct fun_admin_simple_subop count; +}; + +struct fun_admin_res_count_rsp { + struct fun_admin_rsp_common common; + struct fun_admin_simple_subop count; +}; + +enum { + FUN_ADMIN_EPCQ_CREATE_FLAG_INT_EPCQ = 0x2, + FUN_ADMIN_EPCQ_CREATE_FLAG_ENTRY_WR_TPH = 0x4, + FUN_ADMIN_EPCQ_CREATE_FLAG_SL_WR_TPH = 0x8, + FUN_ADMIN_EPCQ_CREATE_FLAG_RQ = 0x80, + FUN_ADMIN_EPCQ_CREATE_FLAG_INT_IQ = 0x100, + FUN_ADMIN_EPCQ_CREATE_FLAG_INT_NOARM = 0x200, + FUN_ADMIN_EPCQ_CREATE_FLAG_DROP_ON_OVERFLOW = 0x400, +}; + +struct fun_admin_epcq_req { + struct fun_admin_req_common common; + union epcq_req_subop { + struct fun_admin_epcq_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 epsqid; + __u8 rsvd1; + __u8 entry_size_log2; + __be16 nentries; + + __be64 address; + + __be16 tailroom; /* per packet tailroom in bytes */ + __u8 headroom; /* per packet headroom in 2B units */ + __u8 intcoal_kbytes; + __u8 intcoal_holdoff_nentries; + __u8 intcoal_holdoff_usecs; + __be16 intid; + + __be32 scan_start_id; + __be32 scan_end_id; + + __be16 tph_cpuid; + __u8 rsvd3[6]; + } create; + + struct fun_admin_epcq_modify_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be16 headroom; /* headroom in bytes */ + __u8 rsvd1[6]; + } modify; + } u; +}; + +#define FUN_ADMIN_EPCQ_CREATE_REQ_INIT( \ + _subop, _flags, _id, _epsqid, _entry_size_log2, _nentries, _address, \ + _tailroom, _headroom, _intcoal_kbytes, _intcoal_holdoff_nentries, \ + _intcoal_holdoff_usecs, _intid, _scan_start_id, _scan_end_id, \ + _tph_cpuid) \ + (struct fun_admin_epcq_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .epsqid = cpu_to_be32(_epsqid), \ + .entry_size_log2 = _entry_size_log2, \ + .nentries = cpu_to_be16(_nentries), \ + .address = cpu_to_be64(_address), \ + .tailroom = cpu_to_be16(_tailroom), .headroom = _headroom, \ + .intcoal_kbytes = _intcoal_kbytes, \ + .intcoal_holdoff_nentries = _intcoal_holdoff_nentries, \ + .intcoal_holdoff_usecs = _intcoal_holdoff_usecs, \ + .intid = cpu_to_be16(_intid), \ + .scan_start_id = cpu_to_be32(_scan_start_id), \ + .scan_end_id = cpu_to_be32(_scan_end_id), \ + .tph_cpuid = cpu_to_be16(_tph_cpuid), \ + } + +#define FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(_subop, _flags, _id, _headroom) \ + (struct fun_admin_epcq_modify_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .headroom = cpu_to_be16(_headroom), \ + } + +enum { + FUN_ADMIN_EPSQ_CREATE_FLAG_INT_EPSQ = 0x2, + FUN_ADMIN_EPSQ_CREATE_FLAG_ENTRY_RD_TPH = 0x4, + FUN_ADMIN_EPSQ_CREATE_FLAG_GL_RD_TPH = 0x8, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS = 0x10, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS_TPH = 0x20, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_EPCQ = 0x40, + FUN_ADMIN_EPSQ_CREATE_FLAG_RQ = 0x80, + FUN_ADMIN_EPSQ_CREATE_FLAG_INT_IQ = 0x100, + FUN_ADMIN_EPSQ_CREATE_FLAG_NO_CMPL = 0x200, +}; + +struct fun_admin_epsq_req { + struct fun_admin_req_common common; + + union epsq_req_subop { + struct fun_admin_epsq_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 epcqid; + __u8 rsvd1; + __u8 entry_size_log2; + __be16 nentries; + + __be64 address; /* DMA address of epsq */ + + __u8 rsvd2[3]; + __u8 intcoal_kbytes; + __u8 intcoal_holdoff_nentries; + __u8 intcoal_holdoff_usecs; + __be16 intid; + + __be32 scan_start_id; + __be32 scan_end_id; + + __u8 rsvd3[4]; + __be16 tph_cpuid; + __u8 buf_size_log2; /* log2 of RQ buffer size */ + __u8 head_wb_size_log2; /* log2 of head write back size */ + + __be64 head_wb_address; /* DMA address for head writeback */ + } create; + } u; +}; + +#define FUN_ADMIN_EPSQ_CREATE_REQ_INIT( \ + _subop, _flags, _id, _epcqid, _entry_size_log2, _nentries, _address, \ + _intcoal_kbytes, _intcoal_holdoff_nentries, _intcoal_holdoff_usecs, \ + _intid, _scan_start_id, _scan_end_id, _tph_cpuid, _buf_size_log2, \ + _head_wb_size_log2, _head_wb_address) \ + (struct fun_admin_epsq_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .epcqid = cpu_to_be32(_epcqid), \ + .entry_size_log2 = _entry_size_log2, \ + .nentries = cpu_to_be16(_nentries), \ + .address = cpu_to_be64(_address), \ + .intcoal_kbytes = _intcoal_kbytes, \ + .intcoal_holdoff_nentries = _intcoal_holdoff_nentries, \ + .intcoal_holdoff_usecs = _intcoal_holdoff_usecs, \ + .intid = cpu_to_be16(_intid), \ + .scan_start_id = cpu_to_be32(_scan_start_id), \ + .scan_end_id = cpu_to_be32(_scan_end_id), \ + .tph_cpuid = cpu_to_be16(_tph_cpuid), \ + .buf_size_log2 = _buf_size_log2, \ + .head_wb_size_log2 = _head_wb_size_log2, \ + .head_wb_address = cpu_to_be64(_head_wb_address), \ + } + +enum { + FUN_PORT_CAP_OFFLOADS = 0x1, + FUN_PORT_CAP_STATS = 0x2, + FUN_PORT_CAP_LOOPBACK = 0x4, + FUN_PORT_CAP_VPORT = 0x8, + FUN_PORT_CAP_TX_PAUSE = 0x10, + FUN_PORT_CAP_RX_PAUSE = 0x20, + FUN_PORT_CAP_AUTONEG = 0x40, + FUN_PORT_CAP_RSS = 0x80, + FUN_PORT_CAP_VLAN_OFFLOADS = 0x100, + FUN_PORT_CAP_ENCAP_OFFLOADS = 0x200, + FUN_PORT_CAP_1000_X = 0x1000, + FUN_PORT_CAP_10G_R = 0x2000, + FUN_PORT_CAP_40G_R4 = 0x4000, + FUN_PORT_CAP_25G_R = 0x8000, + FUN_PORT_CAP_50G_R2 = 0x10000, + FUN_PORT_CAP_50G_R = 0x20000, + FUN_PORT_CAP_100G_R4 = 0x40000, + FUN_PORT_CAP_100G_R2 = 0x80000, + FUN_PORT_CAP_200G_R4 = 0x100000, + FUN_PORT_CAP_FEC_NONE = 0x10000000, + FUN_PORT_CAP_FEC_FC = 0x20000000, + FUN_PORT_CAP_FEC_RS = 0x40000000, +}; + +enum fun_port_brkout_mode { + FUN_PORT_BRKMODE_NA = 0x0, + FUN_PORT_BRKMODE_NONE = 0x1, + FUN_PORT_BRKMODE_2X = 0x2, + FUN_PORT_BRKMODE_4X = 0x3, +}; + +enum { + FUN_PORT_SPEED_AUTO = 0x0, + FUN_PORT_SPEED_10M = 0x1, + FUN_PORT_SPEED_100M = 0x2, + FUN_PORT_SPEED_1G = 0x4, + FUN_PORT_SPEED_10G = 0x8, + FUN_PORT_SPEED_25G = 0x10, + FUN_PORT_SPEED_40G = 0x20, + FUN_PORT_SPEED_50G = 0x40, + FUN_PORT_SPEED_100G = 0x80, + FUN_PORT_SPEED_200G = 0x100, +}; + +enum fun_port_duplex_mode { + FUN_PORT_FULL_DUPLEX = 0x0, + FUN_PORT_HALF_DUPLEX = 0x1, +}; + +enum { + FUN_PORT_FEC_NA = 0x0, + FUN_PORT_FEC_OFF = 0x1, + FUN_PORT_FEC_RS = 0x2, + FUN_PORT_FEC_FC = 0x4, + FUN_PORT_FEC_AUTO = 0x8, +}; + +enum fun_port_link_status { + FUN_PORT_LINK_UP = 0x0, + FUN_PORT_LINK_UP_WITH_ERR = 0x1, + FUN_PORT_LINK_DOWN = 0x2, +}; + +enum fun_port_led_type { + FUN_PORT_LED_OFF = 0x0, + FUN_PORT_LED_AMBER = 0x1, + FUN_PORT_LED_GREEN = 0x2, + FUN_PORT_LED_BEACON_ON = 0x3, + FUN_PORT_LED_BEACON_OFF = 0x4, +}; + +enum { + FUN_PORT_FLAG_MAC_DOWN = 0x1, + FUN_PORT_FLAG_MAC_UP = 0x2, + FUN_PORT_FLAG_NH_DOWN = 0x4, + FUN_PORT_FLAG_NH_UP = 0x8, +}; + +enum { + FUN_PORT_FLAG_ENABLE_NOTIFY = 0x1, +}; + +enum fun_port_lane_attr { + FUN_PORT_LANE_1 = 0x1, + FUN_PORT_LANE_2 = 0x2, + FUN_PORT_LANE_4 = 0x4, + FUN_PORT_LANE_SPEED_10G = 0x100, + FUN_PORT_LANE_SPEED_25G = 0x200, + FUN_PORT_LANE_SPEED_50G = 0x400, + FUN_PORT_LANE_SPLIT = 0x8000, +}; + +enum fun_admin_port_subop { + FUN_ADMIN_PORT_SUBOP_INETADDR_EVENT = 0x24, +}; + +enum fun_admin_port_key { + FUN_ADMIN_PORT_KEY_ILLEGAL = 0x0, + FUN_ADMIN_PORT_KEY_MTU = 0x1, + FUN_ADMIN_PORT_KEY_FEC = 0x2, + FUN_ADMIN_PORT_KEY_SPEED = 0x3, + FUN_ADMIN_PORT_KEY_DEBOUNCE = 0x4, + FUN_ADMIN_PORT_KEY_DUPLEX = 0x5, + FUN_ADMIN_PORT_KEY_MACADDR = 0x6, + FUN_ADMIN_PORT_KEY_LINKMODE = 0x7, + FUN_ADMIN_PORT_KEY_BREAKOUT = 0x8, + FUN_ADMIN_PORT_KEY_ENABLE = 0x9, + FUN_ADMIN_PORT_KEY_DISABLE = 0xa, + FUN_ADMIN_PORT_KEY_ERR_DISABLE = 0xb, + FUN_ADMIN_PORT_KEY_CAPABILITIES = 0xc, + FUN_ADMIN_PORT_KEY_LP_CAPABILITIES = 0xd, + FUN_ADMIN_PORT_KEY_STATS_DMA_LOW = 0xe, + FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH = 0xf, + FUN_ADMIN_PORT_KEY_LANE_ATTRS = 0x10, + FUN_ADMIN_PORT_KEY_LED = 0x11, + FUN_ADMIN_PORT_KEY_ADVERT = 0x12, +}; + +struct fun_subop_imm { + __u8 subop; /* see fun_data_subop enum */ + __u8 flags; + __u8 nsgl; + __u8 rsvd0; + __be32 len; + + __u8 data[]; +}; + +enum fun_subop_sgl_flags { + FUN_SUBOP_SGL_USE_OFF8 = 0x1, + FUN_SUBOP_FLAG_FREE_BUF = 0x2, + FUN_SUBOP_FLAG_IS_REFBUF = 0x4, + FUN_SUBOP_SGL_FLAG_LOCAL = 0x8, +}; + +enum fun_data_op { + FUN_DATAOP_INVALID = 0x0, + FUN_DATAOP_SL = 0x1, /* scatter */ + FUN_DATAOP_GL = 0x2, /* gather */ + FUN_DATAOP_SGL = 0x3, /* scatter-gather */ + FUN_DATAOP_IMM = 0x4, /* immediate data */ + FUN_DATAOP_RQBUF = 0x8, /* rq buffer */ +}; + +struct fun_dataop_gl { + __u8 subop; + __u8 flags; + __be16 sgl_off; + __be32 sgl_len; + + __be64 sgl_data; +}; + +static inline void fun_dataop_gl_init(struct fun_dataop_gl *s, u8 flags, + u16 sgl_off, u32 sgl_len, u64 sgl_data) +{ + s->subop = FUN_DATAOP_GL; + s->flags = flags; + s->sgl_off = cpu_to_be16(sgl_off); + s->sgl_len = cpu_to_be32(sgl_len); + s->sgl_data = cpu_to_be64(sgl_data); +} + +struct fun_dataop_imm { + __u8 subop; + __u8 flags; + __be16 rsvd0; + __be32 sgl_len; +}; + +struct fun_subop_sgl { + __u8 subop; + __u8 flags; + __u8 nsgl; + __u8 rsvd0; + __be32 sgl_len; + + __be64 sgl_data; +}; + +#define FUN_SUBOP_SGL_INIT(_subop, _flags, _nsgl, _sgl_len, _sgl_data) \ + (struct fun_subop_sgl) { \ + .subop = (_subop), .flags = (_flags), .nsgl = (_nsgl), \ + .sgl_len = cpu_to_be32(_sgl_len), \ + .sgl_data = cpu_to_be64(_sgl_data), \ + } + +struct fun_dataop_rqbuf { + __u8 subop; + __u8 rsvd0; + __be16 cid; + __be32 bufoff; +}; + +struct fun_dataop_hdr { + __u8 nsgl; + __u8 flags; + __u8 ngather; + __u8 nscatter; + __be32 total_len; + + struct fun_dataop_imm imm[]; +}; + +#define FUN_DATAOP_HDR_INIT(_nsgl, _flags, _ngather, _nscatter, _total_len) \ + (struct fun_dataop_hdr) { \ + .nsgl = _nsgl, .flags = _flags, .ngather = _ngather, \ + .nscatter = _nscatter, .total_len = cpu_to_be32(_total_len), \ + } + +enum fun_port_inetaddr_event_type { + FUN_PORT_INETADDR_ADD = 0x1, + FUN_PORT_INETADDR_DEL = 0x2, +}; + +enum fun_port_inetaddr_addr_family { + FUN_PORT_INETADDR_IPV4 = 0x1, + FUN_PORT_INETADDR_IPV6 = 0x2, +}; + +struct fun_admin_port_req { + struct fun_admin_req_common common; + + union port_req_subop { + struct fun_admin_port_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + } create; + struct fun_admin_port_write_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; /* portid */ + + struct fun_admin_write48_req write48[]; + } write; + struct fun_admin_port_read_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; /* portid */ + + struct fun_admin_read48_req read48[]; + } read; + struct fun_admin_port_inetaddr_event_req { + __u8 subop; + __u8 rsvd0; + __u8 event_type; + __u8 addr_family; + __be32 id; + + __u8 addr[]; + } inetaddr_event; + } u; +}; + +#define FUN_ADMIN_PORT_CREATE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_port_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +#define FUN_ADMIN_PORT_WRITE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_port_write_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +#define FUN_ADMIN_PORT_READ_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_port_read_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +struct fun_admin_port_rsp { + struct fun_admin_rsp_common common; + + union port_rsp_subop { + struct fun_admin_port_create_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; + + __be16 lport; + __u8 rsvd1[6]; + } create; + struct fun_admin_port_write_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; /* portid */ + + struct fun_admin_write48_rsp write48[]; + } write; + struct fun_admin_port_read_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; /* portid */ + + struct fun_admin_read48_rsp read48[]; + } read; + struct fun_admin_port_inetaddr_event_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; /* portid */ + } inetaddr_event; + } u; +}; + +enum fun_xcvr_type { + FUN_XCVR_BASET = 0x0, + FUN_XCVR_CU = 0x1, + FUN_XCVR_SMF = 0x2, + FUN_XCVR_MMF = 0x3, + FUN_XCVR_AOC = 0x4, + FUN_XCVR_SFPP = 0x10, /* SFP+ or later */ + FUN_XCVR_QSFPP = 0x11, /* QSFP+ or later */ + FUN_XCVR_QSFPDD = 0x12, /* QSFP-DD */ +}; + +struct fun_admin_port_notif { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0; + __be16 id; + __be32 speed; /* in 10 Mbps units */ + + __u8 link_state; + __u8 missed_events; + __u8 link_down_reason; + __u8 xcvr_type; + __u8 flow_ctrl; + __u8 fec; + __u8 active_lanes; + __u8 rsvd1; + + __be64 advertising; + + __be64 lp_advertising; +}; + +enum fun_eth_rss_const { + FUN_ETH_RSS_MAX_KEY_SIZE = 0x28, + FUN_ETH_RSS_MAX_INDIR_ENT = 0x40, +}; + +enum fun_eth_hash_alg { + FUN_ETH_RSS_ALG_INVALID = 0x0, + FUN_ETH_RSS_ALG_TOEPLITZ = 0x1, + FUN_ETH_RSS_ALG_CRC32 = 0x2, +}; + +struct fun_admin_rss_req { + struct fun_admin_req_common common; + + union rss_req_subop { + struct fun_admin_rss_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 rsvd1; + __be32 viid; /* VI flow id */ + + __be64 metadata[1]; + + __u8 alg; + __u8 keylen; + __u8 indir_nent; + __u8 rsvd2; + __be16 key_off; + __be16 indir_off; + + struct fun_dataop_hdr dataop; + } create; + } u; +}; + +#define FUN_ADMIN_RSS_CREATE_REQ_INIT(_subop, _flags, _id, _viid, _alg, \ + _keylen, _indir_nent, _key_off, \ + _indir_off) \ + (struct fun_admin_rss_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .viid = cpu_to_be32(_viid), \ + .alg = _alg, .keylen = _keylen, .indir_nent = _indir_nent, \ + .key_off = cpu_to_be16(_key_off), \ + .indir_off = cpu_to_be16(_indir_off), \ + } + +struct fun_admin_vi_req { + struct fun_admin_req_common common; + + union vi_req_subop { + struct fun_admin_vi_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 rsvd1; + __be32 portid; /* port flow id */ + } create; + } u; +}; + +#define FUN_ADMIN_VI_CREATE_REQ_INIT(_subop, _flags, _id, _portid) \ + (struct fun_admin_vi_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .portid = cpu_to_be32(_portid), \ + } + +struct fun_admin_eth_req { + struct fun_admin_req_common common; + + union eth_req_subop { + struct fun_admin_eth_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 rsvd1; + __be32 portid; /* port flow id */ + } create; + } u; +}; + +#define FUN_ADMIN_ETH_CREATE_REQ_INIT(_subop, _flags, _id, _portid) \ + (struct fun_admin_eth_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .portid = cpu_to_be32(_portid), \ + } + +enum { + FUN_ADMIN_SWU_UPGRADE_FLAG_INIT = 0x10, + FUN_ADMIN_SWU_UPGRADE_FLAG_COMPLETE = 0x20, + FUN_ADMIN_SWU_UPGRADE_FLAG_DOWNGRADE = 0x40, + FUN_ADMIN_SWU_UPGRADE_FLAG_ACTIVE_IMAGE = 0x80, + FUN_ADMIN_SWU_UPGRADE_FLAG_ASYNC = 0x1, +}; + +enum fun_admin_swu_subop { + FUN_ADMIN_SWU_SUBOP_GET_VERSION = 0x20, + FUN_ADMIN_SWU_SUBOP_UPGRADE = 0x21, + FUN_ADMIN_SWU_SUBOP_UPGRADE_DATA = 0x22, + FUN_ADMIN_SWU_SUBOP_GET_ALL_VERSIONS = 0x23, +}; + +struct fun_admin_swu_req { + struct fun_admin_req_common common; + + union swu_req_subop { + struct fun_admin_swu_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + } create; + struct fun_admin_swu_upgrade_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 fourcc; + __be32 rsvd1; + + __be64 image_size; /* upgrade image length */ + } upgrade; + struct fun_admin_swu_upgrade_data_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 offset; /* offset of data in this command */ + __be32 size; /* total size of data in this command */ + } upgrade_data; + } u; + + struct fun_subop_sgl sgl[]; /* in, out buffers through sgl */ +}; + +#define FUN_ADMIN_SWU_CREATE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_swu_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +#define FUN_ADMIN_SWU_UPGRADE_REQ_INIT(_subop, _flags, _id, _fourcc, \ + _image_size) \ + (struct fun_admin_swu_upgrade_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .fourcc = cpu_to_be32(_fourcc), \ + .image_size = cpu_to_be64(_image_size), \ + } + +#define FUN_ADMIN_SWU_UPGRADE_DATA_REQ_INIT(_subop, _flags, _id, _offset, \ + _size) \ + (struct fun_admin_swu_upgrade_data_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .offset = cpu_to_be32(_offset), \ + .size = cpu_to_be32(_size), \ + } + +struct fun_admin_swu_rsp { + struct fun_admin_rsp_common common; + + union swu_rsp_subop { + struct fun_admin_swu_create_rsp { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + } create; + struct fun_admin_swu_upgrade_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; + + __be32 fourcc; + __be32 status; + + __be32 progress; + __be32 unused; + } upgrade; + struct fun_admin_swu_upgrade_data_rsp { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 offset; + __be32 size; + } upgrade_data; + } u; +}; + +enum fun_ktls_version { + FUN_KTLS_TLSV2 = 0x20, + FUN_KTLS_TLSV3 = 0x30, +}; + +enum fun_ktls_cipher { + FUN_KTLS_CIPHER_AES_GCM_128 = 0x33, + FUN_KTLS_CIPHER_AES_GCM_256 = 0x34, + FUN_KTLS_CIPHER_AES_CCM_128 = 0x35, + FUN_KTLS_CIPHER_CHACHA20_POLY1305 = 0x36, +}; + +enum fun_ktls_modify_flags { + FUN_KTLS_MODIFY_REMOVE = 0x1, +}; + +struct fun_admin_ktls_create_req { + struct fun_admin_req_common common; + + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; +}; + +#define FUN_ADMIN_KTLS_CREATE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_ktls_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +struct fun_admin_ktls_create_rsp { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0[3]; + __be32 id; +}; + +struct fun_admin_ktls_modify_req { + struct fun_admin_req_common common; + + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be64 tlsid; + + __be32 tcp_seq; + __u8 version; + __u8 cipher; + __u8 rsvd1[2]; + + __u8 record_seq[8]; + + __u8 key[32]; + + __u8 iv[16]; + + __u8 salt[8]; +}; + +#define FUN_ADMIN_KTLS_MODIFY_REQ_INIT(_subop, _flags, _id, _tlsid, _tcp_seq, \ + _version, _cipher) \ + (struct fun_admin_ktls_modify_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .tlsid = cpu_to_be64(_tlsid), \ + .tcp_seq = cpu_to_be32(_tcp_seq), .version = _version, \ + .cipher = _cipher, \ + } + +struct fun_admin_ktls_modify_rsp { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0[3]; + __be32 id; + + __be64 tlsid; +}; + +struct fun_req_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 rsvd0; + __be16 cid; +}; + +struct fun_rsp_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 ret; + __be16 cid; +}; + +struct fun_cqe_info { + __be16 sqhd; + __be16 sqid; + __be16 cid; + __be16 sf_p; +}; + +enum fun_eprq_def { + FUN_EPRQ_PKT_ALIGN = 0x80, +}; + +struct fun_eprq_rqbuf { + __be64 bufaddr; +}; + +#define FUN_EPRQ_RQBUF_INIT(_bufaddr) \ + (struct fun_eprq_rqbuf) { \ + .bufaddr = cpu_to_be64(_bufaddr), \ + } + +enum fun_eth_op { + FUN_ETH_OP_TX = 0x1, + FUN_ETH_OP_RX = 0x2, +}; + +enum { + FUN_ETH_OFFLOAD_EN = 0x8000, + FUN_ETH_OUTER_EN = 0x4000, + FUN_ETH_INNER_LSO = 0x2000, + FUN_ETH_INNER_TSO = 0x1000, + FUN_ETH_OUTER_IPV6 = 0x800, + FUN_ETH_OUTER_UDP = 0x400, + FUN_ETH_INNER_IPV6 = 0x200, + FUN_ETH_INNER_UDP = 0x100, + FUN_ETH_UPDATE_OUTER_L3_LEN = 0x80, + FUN_ETH_UPDATE_OUTER_L3_CKSUM = 0x40, + FUN_ETH_UPDATE_OUTER_L4_LEN = 0x20, + FUN_ETH_UPDATE_OUTER_L4_CKSUM = 0x10, + FUN_ETH_UPDATE_INNER_L3_LEN = 0x8, + FUN_ETH_UPDATE_INNER_L3_CKSUM = 0x4, + FUN_ETH_UPDATE_INNER_L4_LEN = 0x2, + FUN_ETH_UPDATE_INNER_L4_CKSUM = 0x1, +}; + +struct fun_eth_offload { + __be16 flags; /* combination of above flags */ + __be16 mss; /* TSO max seg size */ + __be16 tcp_doff_flags; /* TCP data offset + flags 16b word */ + __be16 vlan; + + __be16 inner_l3_off; /* Inner L3 header offset */ + __be16 inner_l4_off; /* Inner L4 header offset */ + __be16 outer_l3_off; /* Outer L3 header offset */ + __be16 outer_l4_off; /* Outer L4 header offset */ +}; + +static inline void fun_eth_offload_init(struct fun_eth_offload *s, u16 flags, + u16 mss, __be16 tcp_doff_flags, + __be16 vlan, u16 inner_l3_off, + u16 inner_l4_off, u16 outer_l3_off, + u16 outer_l4_off) +{ + s->flags = cpu_to_be16(flags); + s->mss = cpu_to_be16(mss); + s->tcp_doff_flags = tcp_doff_flags; + s->vlan = vlan; + s->inner_l3_off = cpu_to_be16(inner_l3_off); + s->inner_l4_off = cpu_to_be16(inner_l4_off); + s->outer_l3_off = cpu_to_be16(outer_l3_off); + s->outer_l4_off = cpu_to_be16(outer_l4_off); +} + +struct fun_eth_tls { + __be64 tlsid; +}; + +enum { + FUN_ETH_TX_TLS = 0x8000, +}; + +struct fun_eth_tx_req { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 repr_idn; + __be16 encap_proto; + + struct fun_eth_offload offload; + + struct fun_dataop_hdr dataop; +}; + +struct fun_eth_rx_cv { + __be16 il4_prot_to_l2_type; +}; + +#define FUN_ETH_RX_CV_IL4_PROT_S 13U +#define FUN_ETH_RX_CV_IL4_PROT_M 0x3 + +#define FUN_ETH_RX_CV_IL3_PROT_S 11U +#define FUN_ETH_RX_CV_IL3_PROT_M 0x3 + +#define FUN_ETH_RX_CV_OL4_PROT_S 8U +#define FUN_ETH_RX_CV_OL4_PROT_M 0x7 + +#define FUN_ETH_RX_CV_ENCAP_TYPE_S 6U +#define FUN_ETH_RX_CV_ENCAP_TYPE_M 0x3 + +#define FUN_ETH_RX_CV_OL3_PROT_S 4U +#define FUN_ETH_RX_CV_OL3_PROT_M 0x3 + +#define FUN_ETH_RX_CV_VLAN_TYPE_S 3U +#define FUN_ETH_RX_CV_VLAN_TYPE_M 0x1 + +#define FUN_ETH_RX_CV_L2_TYPE_S 2U +#define FUN_ETH_RX_CV_L2_TYPE_M 0x1 + +enum fun_rx_cv { + FUN_RX_CV_NONE = 0x0, + FUN_RX_CV_IP = 0x2, + FUN_RX_CV_IP6 = 0x3, + FUN_RX_CV_TCP = 0x2, + FUN_RX_CV_UDP = 0x3, + FUN_RX_CV_VXLAN = 0x2, + FUN_RX_CV_MPLS = 0x3, +}; + +struct fun_eth_cqe { + __u8 op; + __u8 len8; + __u8 nsgl; + __u8 repr_idn; + __be32 pkt_len; + + __be64 timestamp; + + __be16 pkt_cv; + __be16 rsvd0; + __be32 hash; + + __be16 encap_proto; + __be16 vlan; + __be32 rsvd1; + + __be32 buf_offset; + __be16 headroom; + __be16 csum; +}; + +enum fun_admin_adi_attr { + FUN_ADMIN_ADI_ATTR_MACADDR = 0x1, + FUN_ADMIN_ADI_ATTR_VLAN = 0x2, + FUN_ADMIN_ADI_ATTR_RATE = 0x3, +}; + +struct fun_adi_param { + union adi_param { + struct fun_adi_mac { + __be64 addr; + } mac; + struct fun_adi_vlan { + __be32 rsvd; + __be16 eth_type; + __be16 tci; + } vlan; + struct fun_adi_rate { + __be32 rsvd; + __be32 tx_mbps; + } rate; + } u; +}; + +#define FUN_ADI_MAC_INIT(_addr) \ + (struct fun_adi_mac) { \ + .addr = cpu_to_be64(_addr), \ + } + +#define FUN_ADI_VLAN_INIT(_eth_type, _tci) \ + (struct fun_adi_vlan) { \ + .eth_type = cpu_to_be16(_eth_type), .tci = cpu_to_be16(_tci), \ + } + +#define FUN_ADI_RATE_INIT(_tx_mbps) \ + (struct fun_adi_rate) { \ + .tx_mbps = cpu_to_be32(_tx_mbps), \ + } + +struct fun_admin_adi_req { + struct fun_admin_req_common common; + + union adi_req_subop { + struct fun_admin_adi_write_req { + __u8 subop; + __u8 attribute; + __be16 rsvd; + __be32 id; + + struct fun_adi_param param; + } write; + } u; +}; + +#define FUN_ADMIN_ADI_WRITE_REQ_INIT(_subop, _attribute, _id) \ + (struct fun_admin_adi_write_req) { \ + .subop = (_subop), .attribute = (_attribute), \ + .id = cpu_to_be32(_id), \ + } + +#endif /* __FUN_HCI_H */ diff --git a/drivers/net/ethernet/fungible/funcore/fun_queue.c b/drivers/net/ethernet/fungible/funcore/fun_queue.c new file mode 100644 index 000000000000..8ab9f68434f5 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_queue.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/log2.h> +#include <linux/mm.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/slab.h> + +#include "fun_dev.h" +#include "fun_queue.h" + +/* Allocate memory for a queue. This includes the memory for the HW descriptor + * ring, an optional 64b HW write-back area, and an optional SW state ring. + * Returns the virtual and DMA addresses of the HW ring, the VA of the SW ring, + * and the VA of the write-back area. + */ +void *fun_alloc_ring_mem(struct device *dma_dev, size_t depth, + size_t hw_desc_sz, size_t sw_desc_sz, bool wb, + int numa_node, dma_addr_t *dma_addr, void **sw_va, + volatile __be64 **wb_va) +{ + int dev_node = dev_to_node(dma_dev); + size_t dma_sz; + void *va; + + if (numa_node == NUMA_NO_NODE) + numa_node = dev_node; + + /* Place optional write-back area at end of descriptor ring. */ + dma_sz = hw_desc_sz * depth; + if (wb) + dma_sz += sizeof(u64); + + set_dev_node(dma_dev, numa_node); + va = dma_alloc_coherent(dma_dev, dma_sz, dma_addr, GFP_KERNEL); + set_dev_node(dma_dev, dev_node); + if (!va) + return NULL; + + if (sw_desc_sz) { + *sw_va = kvzalloc_node(sw_desc_sz * depth, GFP_KERNEL, + numa_node); + if (!*sw_va) { + dma_free_coherent(dma_dev, dma_sz, va, *dma_addr); + return NULL; + } + } + + if (wb) + *wb_va = va + dma_sz - sizeof(u64); + return va; +} +EXPORT_SYMBOL_GPL(fun_alloc_ring_mem); + +void fun_free_ring_mem(struct device *dma_dev, size_t depth, size_t hw_desc_sz, + bool wb, void *hw_va, dma_addr_t dma_addr, void *sw_va) +{ + if (hw_va) { + size_t sz = depth * hw_desc_sz; + + if (wb) + sz += sizeof(u64); + dma_free_coherent(dma_dev, sz, hw_va, dma_addr); + } + kvfree(sw_va); +} +EXPORT_SYMBOL_GPL(fun_free_ring_mem); + +/* Prepare and issue an admin command to create an SQ on the device with the + * provided parameters. If the queue ID is auto-allocated by the device it is + * returned in *sqidp. + */ +int fun_sq_create(struct fun_dev *fdev, u16 flags, u32 sqid, u32 cqid, + u8 sqe_size_log2, u32 sq_depth, dma_addr_t dma_addr, + u8 coal_nentries, u8 coal_usec, u32 irq_num, + u32 scan_start_id, u32 scan_end_id, + u32 rq_buf_size_log2, u32 *sqidp, u32 __iomem **dbp) +{ + union { + struct fun_admin_epsq_req req; + struct fun_admin_generic_create_rsp rsp; + } cmd; + dma_addr_t wb_addr; + u32 hw_qid; + int rc; + + if (sq_depth > fdev->q_depth) + return -EINVAL; + if (flags & FUN_ADMIN_EPSQ_CREATE_FLAG_RQ) + sqe_size_log2 = ilog2(sizeof(struct fun_eprq_rqbuf)); + + wb_addr = dma_addr + (sq_depth << sqe_size_log2); + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPSQ, + sizeof(cmd.req)); + cmd.req.u.create = + FUN_ADMIN_EPSQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags, + sqid, cqid, sqe_size_log2, + sq_depth - 1, dma_addr, 0, + coal_nentries, coal_usec, + irq_num, scan_start_id, + scan_end_id, 0, + rq_buf_size_log2, + ilog2(sizeof(u64)), wb_addr); + + rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, + &cmd.rsp, sizeof(cmd.rsp), 0); + if (rc) + return rc; + + hw_qid = be32_to_cpu(cmd.rsp.id); + *dbp = fun_sq_db_addr(fdev, hw_qid); + if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR) + *sqidp = hw_qid; + return rc; +} +EXPORT_SYMBOL_GPL(fun_sq_create); + +/* Prepare and issue an admin command to create a CQ on the device with the + * provided parameters. If the queue ID is auto-allocated by the device it is + * returned in *cqidp. + */ +int fun_cq_create(struct fun_dev *fdev, u16 flags, u32 cqid, u32 rqid, + u8 cqe_size_log2, u32 cq_depth, dma_addr_t dma_addr, + u16 headroom, u16 tailroom, u8 coal_nentries, u8 coal_usec, + u32 irq_num, u32 scan_start_id, u32 scan_end_id, u32 *cqidp, + u32 __iomem **dbp) +{ + union { + struct fun_admin_epcq_req req; + struct fun_admin_generic_create_rsp rsp; + } cmd; + u32 hw_qid; + int rc; + + if (cq_depth > fdev->q_depth) + return -EINVAL; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ, + sizeof(cmd.req)); + cmd.req.u.create = + FUN_ADMIN_EPCQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags, + cqid, rqid, cqe_size_log2, + cq_depth - 1, dma_addr, tailroom, + headroom / 2, 0, coal_nentries, + coal_usec, irq_num, + scan_start_id, scan_end_id, 0); + + rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, + &cmd.rsp, sizeof(cmd.rsp), 0); + if (rc) + return rc; + + hw_qid = be32_to_cpu(cmd.rsp.id); + *dbp = fun_cq_db_addr(fdev, hw_qid); + if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR) + *cqidp = hw_qid; + return rc; +} +EXPORT_SYMBOL_GPL(fun_cq_create); + +static bool fun_sq_is_head_wb(const struct fun_queue *funq) +{ + return funq->sq_flags & FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS; +} + +static void fun_clean_rq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + struct fun_rq_info *rqinfo; + unsigned int i; + + for (i = 0; i < funq->rq_depth; i++) { + rqinfo = &funq->rq_info[i]; + if (rqinfo->page) { + dma_unmap_page(fdev->dev, rqinfo->dma, PAGE_SIZE, + DMA_FROM_DEVICE); + put_page(rqinfo->page); + rqinfo->page = NULL; + } + } +} + +static int fun_fill_rq(struct fun_queue *funq) +{ + struct device *dev = funq->fdev->dev; + int i, node = dev_to_node(dev); + struct fun_rq_info *rqinfo; + + for (i = 0; i < funq->rq_depth; i++) { + rqinfo = &funq->rq_info[i]; + rqinfo->page = alloc_pages_node(node, GFP_KERNEL, 0); + if (unlikely(!rqinfo->page)) + return -ENOMEM; + + rqinfo->dma = dma_map_page(dev, rqinfo->page, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, rqinfo->dma))) { + put_page(rqinfo->page); + rqinfo->page = NULL; + return -ENOMEM; + } + + funq->rqes[i] = FUN_EPRQ_RQBUF_INIT(rqinfo->dma); + } + + funq->rq_tail = funq->rq_depth - 1; + return 0; +} + +static void fun_rq_update_pos(struct fun_queue *funq, int buf_offset) +{ + if (buf_offset <= funq->rq_buf_offset) { + struct fun_rq_info *rqinfo = &funq->rq_info[funq->rq_buf_idx]; + struct device *dev = funq->fdev->dev; + + dma_sync_single_for_device(dev, rqinfo->dma, PAGE_SIZE, + DMA_FROM_DEVICE); + funq->num_rqe_to_fill++; + if (++funq->rq_buf_idx == funq->rq_depth) + funq->rq_buf_idx = 0; + } + funq->rq_buf_offset = buf_offset; +} + +/* Given a command response with data scattered across >= 1 RQ buffers return + * a pointer to a contiguous buffer containing all the data. If the data is in + * one RQ buffer the start address within that buffer is returned, otherwise a + * new buffer is allocated and the data is gathered into it. + */ +static void *fun_data_from_rq(struct fun_queue *funq, + const struct fun_rsp_common *rsp, bool *need_free) +{ + u32 bufoff, total_len, remaining, fragsize, dataoff; + struct device *dma_dev = funq->fdev->dev; + const struct fun_dataop_rqbuf *databuf; + const struct fun_dataop_hdr *dataop; + const struct fun_rq_info *rqinfo; + void *data; + + dataop = (void *)rsp + rsp->suboff8 * 8; + total_len = be32_to_cpu(dataop->total_len); + + if (likely(dataop->nsgl == 1)) { + databuf = (struct fun_dataop_rqbuf *)dataop->imm; + bufoff = be32_to_cpu(databuf->bufoff); + fun_rq_update_pos(funq, bufoff); + rqinfo = &funq->rq_info[funq->rq_buf_idx]; + dma_sync_single_for_cpu(dma_dev, rqinfo->dma + bufoff, + total_len, DMA_FROM_DEVICE); + *need_free = false; + return page_address(rqinfo->page) + bufoff; + } + + /* For scattered completions gather the fragments into one buffer. */ + + data = kmalloc(total_len, GFP_ATOMIC); + /* NULL is OK here. In case of failure we still need to consume the data + * for proper buffer accounting but indicate an error in the response. + */ + if (likely(data)) + *need_free = true; + + dataoff = 0; + for (remaining = total_len; remaining; remaining -= fragsize) { + fun_rq_update_pos(funq, 0); + fragsize = min_t(unsigned int, PAGE_SIZE, remaining); + if (data) { + rqinfo = &funq->rq_info[funq->rq_buf_idx]; + dma_sync_single_for_cpu(dma_dev, rqinfo->dma, fragsize, + DMA_FROM_DEVICE); + memcpy(data + dataoff, page_address(rqinfo->page), + fragsize); + dataoff += fragsize; + } + } + return data; +} + +unsigned int __fun_process_cq(struct fun_queue *funq, unsigned int max) +{ + const struct fun_cqe_info *info; + struct fun_rsp_common *rsp; + unsigned int new_cqes; + u16 sf_p, flags; + bool need_free; + void *cqe; + + if (!max) + max = funq->cq_depth - 1; + + for (new_cqes = 0; new_cqes < max; new_cqes++) { + cqe = funq->cqes + (funq->cq_head << funq->cqe_size_log2); + info = funq_cqe_info(funq, cqe); + sf_p = be16_to_cpu(info->sf_p); + + if ((sf_p & 1) != funq->cq_phase) + break; + + /* ensure the phase tag is read before other CQE fields */ + dma_rmb(); + + if (++funq->cq_head == funq->cq_depth) { + funq->cq_head = 0; + funq->cq_phase = !funq->cq_phase; + } + + rsp = cqe; + flags = be16_to_cpu(rsp->flags); + + need_free = false; + if (unlikely(flags & FUN_REQ_COMMON_FLAG_CQE_IN_RQBUF)) { + rsp = fun_data_from_rq(funq, rsp, &need_free); + if (!rsp) { + rsp = cqe; + rsp->len8 = 1; + if (rsp->ret == 0) + rsp->ret = ENOMEM; + } + } + + if (funq->cq_cb) + funq->cq_cb(funq, funq->cb_data, rsp, info); + if (need_free) + kfree(rsp); + } + + dev_dbg(funq->fdev->dev, "CQ %u, new CQEs %u/%u, head %u, phase %u\n", + funq->cqid, new_cqes, max, funq->cq_head, funq->cq_phase); + return new_cqes; +} + +unsigned int fun_process_cq(struct fun_queue *funq, unsigned int max) +{ + unsigned int processed; + u32 db; + + processed = __fun_process_cq(funq, max); + + if (funq->num_rqe_to_fill) { + funq->rq_tail = (funq->rq_tail + funq->num_rqe_to_fill) % + funq->rq_depth; + funq->num_rqe_to_fill = 0; + writel(funq->rq_tail, funq->rq_db); + } + + db = funq->cq_head | FUN_DB_IRQ_ARM_F; + writel(db, funq->cq_db); + return processed; +} + +static int fun_alloc_sqes(struct fun_queue *funq) +{ + funq->sq_cmds = fun_alloc_ring_mem(funq->fdev->dev, funq->sq_depth, + 1 << funq->sqe_size_log2, 0, + fun_sq_is_head_wb(funq), + NUMA_NO_NODE, &funq->sq_dma_addr, + NULL, &funq->sq_head); + return funq->sq_cmds ? 0 : -ENOMEM; +} + +static int fun_alloc_cqes(struct fun_queue *funq) +{ + funq->cqes = fun_alloc_ring_mem(funq->fdev->dev, funq->cq_depth, + 1 << funq->cqe_size_log2, 0, false, + NUMA_NO_NODE, &funq->cq_dma_addr, NULL, + NULL); + return funq->cqes ? 0 : -ENOMEM; +} + +static int fun_alloc_rqes(struct fun_queue *funq) +{ + funq->rqes = fun_alloc_ring_mem(funq->fdev->dev, funq->rq_depth, + sizeof(*funq->rqes), + sizeof(*funq->rq_info), false, + NUMA_NO_NODE, &funq->rq_dma_addr, + (void **)&funq->rq_info, NULL); + return funq->rqes ? 0 : -ENOMEM; +} + +/* Free a queue's structures. */ +void fun_free_queue(struct fun_queue *funq) +{ + struct device *dev = funq->fdev->dev; + + fun_free_ring_mem(dev, funq->cq_depth, 1 << funq->cqe_size_log2, false, + funq->cqes, funq->cq_dma_addr, NULL); + fun_free_ring_mem(dev, funq->sq_depth, 1 << funq->sqe_size_log2, + fun_sq_is_head_wb(funq), funq->sq_cmds, + funq->sq_dma_addr, NULL); + + if (funq->rqes) { + fun_clean_rq(funq); + fun_free_ring_mem(dev, funq->rq_depth, sizeof(*funq->rqes), + false, funq->rqes, funq->rq_dma_addr, + funq->rq_info); + } + + kfree(funq); +} + +/* Allocate and initialize a funq's structures. */ +struct fun_queue *fun_alloc_queue(struct fun_dev *fdev, int qid, + const struct fun_queue_alloc_req *req) +{ + struct fun_queue *funq = kzalloc(sizeof(*funq), GFP_KERNEL); + + if (!funq) + return NULL; + + funq->fdev = fdev; + spin_lock_init(&funq->sq_lock); + + funq->qid = qid; + + /* Initial CQ/SQ/RQ ids */ + if (req->rq_depth) { + funq->cqid = 2 * qid; + if (funq->qid) { + /* I/O Q: use rqid = cqid, sqid = +1 */ + funq->rqid = funq->cqid; + funq->sqid = funq->rqid + 1; + } else { + /* Admin Q: sqid is always 0, use ID 1 for RQ */ + funq->sqid = 0; + funq->rqid = 1; + } + } else { + funq->cqid = qid; + funq->sqid = qid; + } + + funq->cq_flags = req->cq_flags; + funq->sq_flags = req->sq_flags; + + funq->cqe_size_log2 = req->cqe_size_log2; + funq->sqe_size_log2 = req->sqe_size_log2; + + funq->cq_depth = req->cq_depth; + funq->sq_depth = req->sq_depth; + + funq->cq_intcoal_nentries = req->cq_intcoal_nentries; + funq->cq_intcoal_usec = req->cq_intcoal_usec; + + funq->sq_intcoal_nentries = req->sq_intcoal_nentries; + funq->sq_intcoal_usec = req->sq_intcoal_usec; + + if (fun_alloc_cqes(funq)) + goto free_funq; + + funq->cq_phase = 1; + + if (fun_alloc_sqes(funq)) + goto free_funq; + + if (req->rq_depth) { + funq->rq_flags = req->rq_flags | FUN_ADMIN_EPSQ_CREATE_FLAG_RQ; + funq->rq_depth = req->rq_depth; + funq->rq_buf_offset = -1; + + if (fun_alloc_rqes(funq) || fun_fill_rq(funq)) + goto free_funq; + } + + funq->cq_vector = -1; + funq->cqe_info_offset = (1 << funq->cqe_size_log2) - sizeof(struct fun_cqe_info); + + /* SQ/CQ 0 are implicitly created, assign their doorbells now. + * Other queues are assigned doorbells at their explicit creation. + */ + if (funq->sqid == 0) + funq->sq_db = fun_sq_db_addr(fdev, 0); + if (funq->cqid == 0) + funq->cq_db = fun_cq_db_addr(fdev, 0); + + return funq; + +free_funq: + fun_free_queue(funq); + return NULL; +} + +/* Create a funq's CQ on the device. */ +static int fun_create_cq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + unsigned int rqid; + int rc; + + rqid = funq->cq_flags & FUN_ADMIN_EPCQ_CREATE_FLAG_RQ ? + funq->rqid : FUN_HCI_ID_INVALID; + rc = fun_cq_create(fdev, funq->cq_flags, funq->cqid, rqid, + funq->cqe_size_log2, funq->cq_depth, + funq->cq_dma_addr, 0, 0, funq->cq_intcoal_nentries, + funq->cq_intcoal_usec, funq->cq_vector, 0, 0, + &funq->cqid, &funq->cq_db); + if (!rc) + dev_dbg(fdev->dev, "created CQ %u\n", funq->cqid); + + return rc; +} + +/* Create a funq's SQ on the device. */ +static int fun_create_sq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + int rc; + + rc = fun_sq_create(fdev, funq->sq_flags, funq->sqid, funq->cqid, + funq->sqe_size_log2, funq->sq_depth, + funq->sq_dma_addr, funq->sq_intcoal_nentries, + funq->sq_intcoal_usec, funq->cq_vector, 0, 0, + 0, &funq->sqid, &funq->sq_db); + if (!rc) + dev_dbg(fdev->dev, "created SQ %u\n", funq->sqid); + + return rc; +} + +/* Create a funq's RQ on the device. */ +int fun_create_rq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + int rc; + + rc = fun_sq_create(fdev, funq->rq_flags, funq->rqid, funq->cqid, 0, + funq->rq_depth, funq->rq_dma_addr, 0, 0, + funq->cq_vector, 0, 0, PAGE_SHIFT, &funq->rqid, + &funq->rq_db); + if (!rc) + dev_dbg(fdev->dev, "created RQ %u\n", funq->rqid); + + return rc; +} + +static unsigned int funq_irq(struct fun_queue *funq) +{ + return pci_irq_vector(to_pci_dev(funq->fdev->dev), funq->cq_vector); +} + +int fun_request_irq(struct fun_queue *funq, const char *devname, + irq_handler_t handler, void *data) +{ + int rc; + + if (funq->cq_vector < 0) + return -EINVAL; + + funq->irq_handler = handler; + funq->irq_data = data; + + snprintf(funq->irqname, sizeof(funq->irqname), + funq->qid ? "%s-q[%d]" : "%s-adminq", devname, funq->qid); + + rc = request_irq(funq_irq(funq), handler, 0, funq->irqname, data); + if (rc) + funq->irq_handler = NULL; + + return rc; +} + +/* Create all component queues of a funq on the device. */ +int fun_create_queue(struct fun_queue *funq) +{ + int rc; + + rc = fun_create_cq(funq); + if (rc) + return rc; + + if (funq->rq_depth) { + rc = fun_create_rq(funq); + if (rc) + goto release_cq; + } + + rc = fun_create_sq(funq); + if (rc) + goto release_rq; + + return 0; + +release_rq: + fun_destroy_sq(funq->fdev, funq->rqid); +release_cq: + fun_destroy_cq(funq->fdev, funq->cqid); + return rc; +} + +void fun_free_irq(struct fun_queue *funq) +{ + if (funq->irq_handler) { + unsigned int vector = funq_irq(funq); + + free_irq(vector, funq->irq_data); + funq->irq_handler = NULL; + funq->irq_data = NULL; + } +} diff --git a/drivers/net/ethernet/fungible/funcore/fun_queue.h b/drivers/net/ethernet/fungible/funcore/fun_queue.h new file mode 100644 index 000000000000..7fb53d0ae8b0 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_queue.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUN_QEUEUE_H +#define _FUN_QEUEUE_H + +#include <linux/interrupt.h> +#include <linux/io.h> + +struct device; +struct fun_dev; +struct fun_queue; +struct fun_cqe_info; +struct fun_rsp_common; + +typedef void (*cq_callback_t)(struct fun_queue *funq, void *data, void *msg, + const struct fun_cqe_info *info); + +struct fun_rq_info { + dma_addr_t dma; + struct page *page; +}; + +/* A queue group consisting of an SQ, a CQ, and an optional RQ. */ +struct fun_queue { + struct fun_dev *fdev; + spinlock_t sq_lock; + + dma_addr_t cq_dma_addr; + dma_addr_t sq_dma_addr; + dma_addr_t rq_dma_addr; + + u32 __iomem *cq_db; + u32 __iomem *sq_db; + u32 __iomem *rq_db; + + void *cqes; + void *sq_cmds; + struct fun_eprq_rqbuf *rqes; + struct fun_rq_info *rq_info; + + u32 cqid; + u32 sqid; + u32 rqid; + + u32 cq_depth; + u32 sq_depth; + u32 rq_depth; + + u16 cq_head; + u16 sq_tail; + u16 rq_tail; + + u8 cqe_size_log2; + u8 sqe_size_log2; + + u16 cqe_info_offset; + + u16 rq_buf_idx; + int rq_buf_offset; + u16 num_rqe_to_fill; + + u8 cq_intcoal_usec; + u8 cq_intcoal_nentries; + u8 sq_intcoal_usec; + u8 sq_intcoal_nentries; + + u16 cq_flags; + u16 sq_flags; + u16 rq_flags; + + /* SQ head writeback */ + u16 sq_comp; + + volatile __be64 *sq_head; + + cq_callback_t cq_cb; + void *cb_data; + + irq_handler_t irq_handler; + void *irq_data; + s16 cq_vector; + u8 cq_phase; + + /* I/O q index */ + u16 qid; + + char irqname[24]; +}; + +static inline void *fun_sqe_at(const struct fun_queue *funq, unsigned int pos) +{ + return funq->sq_cmds + (pos << funq->sqe_size_log2); +} + +static inline void funq_sq_post_tail(struct fun_queue *funq, u16 tail) +{ + if (++tail == funq->sq_depth) + tail = 0; + funq->sq_tail = tail; + writel(tail, funq->sq_db); +} + +static inline struct fun_cqe_info *funq_cqe_info(const struct fun_queue *funq, + void *cqe) +{ + return cqe + funq->cqe_info_offset; +} + +static inline void funq_rq_post(struct fun_queue *funq) +{ + writel(funq->rq_tail, funq->rq_db); +} + +struct fun_queue_alloc_req { + u8 cqe_size_log2; + u8 sqe_size_log2; + + u16 cq_flags; + u16 sq_flags; + u16 rq_flags; + + u32 cq_depth; + u32 sq_depth; + u32 rq_depth; + + u8 cq_intcoal_usec; + u8 cq_intcoal_nentries; + u8 sq_intcoal_usec; + u8 sq_intcoal_nentries; +}; + +int fun_sq_create(struct fun_dev *fdev, u16 flags, u32 sqid, u32 cqid, + u8 sqe_size_log2, u32 sq_depth, dma_addr_t dma_addr, + u8 coal_nentries, u8 coal_usec, u32 irq_num, + u32 scan_start_id, u32 scan_end_id, + u32 rq_buf_size_log2, u32 *sqidp, u32 __iomem **dbp); +int fun_cq_create(struct fun_dev *fdev, u16 flags, u32 cqid, u32 rqid, + u8 cqe_size_log2, u32 cq_depth, dma_addr_t dma_addr, + u16 headroom, u16 tailroom, u8 coal_nentries, u8 coal_usec, + u32 irq_num, u32 scan_start_id, u32 scan_end_id, + u32 *cqidp, u32 __iomem **dbp); +void *fun_alloc_ring_mem(struct device *dma_dev, size_t depth, + size_t hw_desc_sz, size_t sw_desc_size, bool wb, + int numa_node, dma_addr_t *dma_addr, void **sw_va, + volatile __be64 **wb_va); +void fun_free_ring_mem(struct device *dma_dev, size_t depth, size_t hw_desc_sz, + bool wb, void *hw_va, dma_addr_t dma_addr, void *sw_va); + +#define fun_destroy_sq(fdev, sqid) \ + fun_res_destroy((fdev), FUN_ADMIN_OP_EPSQ, 0, (sqid)) +#define fun_destroy_cq(fdev, cqid) \ + fun_res_destroy((fdev), FUN_ADMIN_OP_EPCQ, 0, (cqid)) + +struct fun_queue *fun_alloc_queue(struct fun_dev *fdev, int qid, + const struct fun_queue_alloc_req *req); +void fun_free_queue(struct fun_queue *funq); + +static inline void fun_set_cq_callback(struct fun_queue *funq, cq_callback_t cb, + void *cb_data) +{ + funq->cq_cb = cb; + funq->cb_data = cb_data; +} + +int fun_create_rq(struct fun_queue *funq); +int fun_create_queue(struct fun_queue *funq); + +void fun_free_irq(struct fun_queue *funq); +int fun_request_irq(struct fun_queue *funq, const char *devname, + irq_handler_t handler, void *data); + +unsigned int __fun_process_cq(struct fun_queue *funq, unsigned int max); +unsigned int fun_process_cq(struct fun_queue *funq, unsigned int max); + +#endif /* _FUN_QEUEUE_H */ diff --git a/drivers/net/ethernet/fungible/funeth/Kconfig b/drivers/net/ethernet/fungible/funeth/Kconfig new file mode 100644 index 000000000000..c72ad9386400 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/Kconfig @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Fungible Ethernet driver configuration +# + +config FUN_ETH + tristate "Fungible Ethernet device driver" + depends on PCI && PCI_MSI + depends on TLS && TLS_DEVICE || TLS_DEVICE=n + select NET_DEVLINK + select FUN_CORE + help + This driver supports the Ethernet functionality of Fungible adapters. + It works with both physical and virtual functions. + + To compile this driver as a module, choose M here. The module + will be called funeth. diff --git a/drivers/net/ethernet/fungible/funeth/Makefile b/drivers/net/ethernet/fungible/funeth/Makefile new file mode 100644 index 000000000000..646d69595b4f --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +ccflags-y += -I$(srctree)/$(src)/../funcore -I$(srctree)/$(src) + +obj-$(CONFIG_FUN_ETH) += funeth.o + +funeth-y := funeth_main.o funeth_rx.o funeth_tx.o funeth_devlink.o \ + funeth_ethtool.o + +funeth-$(CONFIG_TLS_DEVICE) += funeth_ktls.o diff --git a/drivers/net/ethernet/fungible/funeth/fun_port.h b/drivers/net/ethernet/fungible/funeth/fun_port.h new file mode 100644 index 000000000000..0f9da44e3786 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/fun_port.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUN_PORT_H +#define _FUN_PORT_H + +enum port_mac_rx_stats { + PORT_MAC_RX_etherStatsOctets = 0x0, + PORT_MAC_RX_OctetsReceivedOK = 0x1, + PORT_MAC_RX_aAlignmentErrors = 0x2, + PORT_MAC_RX_aPAUSEMACCtrlFramesReceived = 0x3, + PORT_MAC_RX_aFrameTooLongErrors = 0x4, + PORT_MAC_RX_aInRangeLengthErrors = 0x5, + PORT_MAC_RX_aFramesReceivedOK = 0x6, + PORT_MAC_RX_aFrameCheckSequenceErrors = 0x7, + PORT_MAC_RX_VLANReceivedOK = 0x8, + PORT_MAC_RX_ifInErrors = 0x9, + PORT_MAC_RX_ifInUcastPkts = 0xa, + PORT_MAC_RX_ifInMulticastPkts = 0xb, + PORT_MAC_RX_ifInBroadcastPkts = 0xc, + PORT_MAC_RX_etherStatsDropEvents = 0xd, + PORT_MAC_RX_etherStatsPkts = 0xe, + PORT_MAC_RX_etherStatsUndersizePkts = 0xf, + PORT_MAC_RX_etherStatsPkts64Octets = 0x10, + PORT_MAC_RX_etherStatsPkts65to127Octets = 0x11, + PORT_MAC_RX_etherStatsPkts128to255Octets = 0x12, + PORT_MAC_RX_etherStatsPkts256to511Octets = 0x13, + PORT_MAC_RX_etherStatsPkts512to1023Octets = 0x14, + PORT_MAC_RX_etherStatsPkts1024to1518Octets = 0x15, + PORT_MAC_RX_etherStatsPkts1519toMaxOctets = 0x16, + PORT_MAC_RX_etherStatsOversizePkts = 0x17, + PORT_MAC_RX_etherStatsJabbers = 0x18, + PORT_MAC_RX_etherStatsFragments = 0x19, + PORT_MAC_RX_CBFCPAUSEFramesReceived_0 = 0x1a, + PORT_MAC_RX_CBFCPAUSEFramesReceived_1 = 0x1b, + PORT_MAC_RX_CBFCPAUSEFramesReceived_2 = 0x1c, + PORT_MAC_RX_CBFCPAUSEFramesReceived_3 = 0x1d, + PORT_MAC_RX_CBFCPAUSEFramesReceived_4 = 0x1e, + PORT_MAC_RX_CBFCPAUSEFramesReceived_5 = 0x1f, + PORT_MAC_RX_CBFCPAUSEFramesReceived_6 = 0x20, + PORT_MAC_RX_CBFCPAUSEFramesReceived_7 = 0x21, + PORT_MAC_RX_CBFCPAUSEFramesReceived_8 = 0x22, + PORT_MAC_RX_CBFCPAUSEFramesReceived_9 = 0x23, + PORT_MAC_RX_CBFCPAUSEFramesReceived_10 = 0x24, + PORT_MAC_RX_CBFCPAUSEFramesReceived_11 = 0x25, + PORT_MAC_RX_CBFCPAUSEFramesReceived_12 = 0x26, + PORT_MAC_RX_CBFCPAUSEFramesReceived_13 = 0x27, + PORT_MAC_RX_CBFCPAUSEFramesReceived_14 = 0x28, + PORT_MAC_RX_CBFCPAUSEFramesReceived_15 = 0x29, + PORT_MAC_RX_MACControlFramesReceived = 0x2a, + PORT_MAC_RX_STATS_MAX = 0x2b, +}; + +enum port_mac_tx_stats { + PORT_MAC_TX_etherStatsOctets = 0x0, + PORT_MAC_TX_OctetsTransmittedOK = 0x1, + PORT_MAC_TX_aPAUSEMACCtrlFramesTransmitted = 0x2, + PORT_MAC_TX_aFramesTransmittedOK = 0x3, + PORT_MAC_TX_VLANTransmittedOK = 0x4, + PORT_MAC_TX_ifOutErrors = 0x5, + PORT_MAC_TX_ifOutUcastPkts = 0x6, + PORT_MAC_TX_ifOutMulticastPkts = 0x7, + PORT_MAC_TX_ifOutBroadcastPkts = 0x8, + PORT_MAC_TX_etherStatsPkts64Octets = 0x9, + PORT_MAC_TX_etherStatsPkts65to127Octets = 0xa, + PORT_MAC_TX_etherStatsPkts128to255Octets = 0xb, + PORT_MAC_TX_etherStatsPkts256to511Octets = 0xc, + PORT_MAC_TX_etherStatsPkts512to1023Octets = 0xd, + PORT_MAC_TX_etherStatsPkts1024to1518Octets = 0xe, + PORT_MAC_TX_etherStatsPkts1519toMaxOctets = 0xf, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_0 = 0x10, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_1 = 0x11, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_2 = 0x12, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_3 = 0x13, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_4 = 0x14, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_5 = 0x15, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_6 = 0x16, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_7 = 0x17, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_8 = 0x18, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_9 = 0x19, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_10 = 0x1a, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_11 = 0x1b, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_12 = 0x1c, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_13 = 0x1d, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_14 = 0x1e, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_15 = 0x1f, + PORT_MAC_TX_MACControlFramesTransmitted = 0x20, + PORT_MAC_TX_etherStatsPkts = 0x21, + PORT_MAC_TX_STATS_MAX = 0x22, +}; + +enum port_mac_fec_stats { + PORT_MAC_FEC_Correctable = 0x0, + PORT_MAC_FEC_Uncorrectable = 0x1, + PORT_MAC_FEC_STATS_MAX = 0x2, +}; + +#endif /* _FUN_PORT_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth.h b/drivers/net/ethernet/fungible/funeth/funeth.h new file mode 100644 index 000000000000..1250e10d21db --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUNETH_H +#define _FUNETH_H + +#include <uapi/linux/if_ether.h> +#include <uapi/linux/net_tstamp.h> +#include <linux/mutex.h> +#include <linux/seqlock.h> +#include <linux/xarray.h> +#include <net/devlink.h> +#include "fun_dev.h" + +#define ADMIN_SQE_SIZE SZ_128 +#define ADMIN_CQE_SIZE SZ_64 +#define ADMIN_RSP_MAX_LEN (ADMIN_CQE_SIZE - sizeof(struct fun_cqe_info)) + +#define FUN_MAX_MTU 9024 + +#define SQ_DEPTH 512U +#define CQ_DEPTH 1024U +#define RQ_DEPTH (512U / (PAGE_SIZE / 4096)) + +#define CQ_INTCOAL_USEC 10 +#define CQ_INTCOAL_NPKT 16 +#define SQ_INTCOAL_USEC 10 +#define SQ_INTCOAL_NPKT 16 + +#define INVALID_LPORT 0xffff + +#define FUN_PORT_CAP_PAUSE_MASK (FUN_PORT_CAP_TX_PAUSE | FUN_PORT_CAP_RX_PAUSE) + +struct fun_vport_info { + u8 mac[ETH_ALEN]; + u16 vlan; + __be16 vlan_proto; + u8 qos; + u8 spoofchk:1; + u8 trusted:1; + unsigned int max_rate; +}; + +/* "subclass" of fun_dev for Ethernet functions */ +struct fun_ethdev { + struct fun_dev fdev; + + /* the function's network ports */ + struct net_device **netdevs; + unsigned int num_ports; + + /* configuration for the function's virtual ports */ + unsigned int num_vports; + struct fun_vport_info *vport_info; + + struct mutex state_mutex; /* nests inside RTNL if both taken */ + + unsigned int nsqs_per_port; +}; + +static inline struct fun_ethdev *to_fun_ethdev(struct fun_dev *p) +{ + return container_of(p, struct fun_ethdev, fdev); +} + +struct fun_qset { + struct funeth_rxq **rxqs; + struct funeth_txq **txqs; + struct funeth_txq **xdpqs; + unsigned int nrxqs; + unsigned int ntxqs; + unsigned int nxdpqs; + unsigned int rxq_start; + unsigned int txq_start; + unsigned int xdpq_start; + unsigned int cq_depth; + unsigned int rq_depth; + unsigned int sq_depth; + int state; +}; + +/* Per netdevice driver state, i.e., netdev_priv. */ +struct funeth_priv { + struct fun_dev *fdev; + struct pci_dev *pdev; + struct net_device *netdev; + + struct funeth_rxq * __rcu *rxqs; + struct funeth_txq **txqs; + struct funeth_txq * __rcu *xdpqs; + + struct xarray irqs; + unsigned int num_tx_irqs; + unsigned int num_rx_irqs; + unsigned int rx_irq_ofst; + + unsigned int lane_attrs; + u16 lport; + + /* link settings */ + u64 port_caps; + u64 advertising; + u64 lp_advertising; + unsigned int link_speed; + u8 xcvr_type; + u8 active_fc; + u8 active_fec; + u8 link_down_reason; + seqcount_t link_seq; + + u32 msg_enable; + + unsigned int num_xdpqs; + + /* ethtool, etc. config parameters */ + unsigned int sq_depth; + unsigned int rq_depth; + unsigned int cq_depth; + unsigned int cq_irq_db; + u8 tx_coal_usec; + u8 tx_coal_count; + u8 rx_coal_usec; + u8 rx_coal_count; + + struct hwtstamp_config hwtstamp_cfg; + + /* cumulative queue stats from earlier queue instances */ + u64 tx_packets; + u64 tx_bytes; + u64 tx_dropped; + u64 rx_packets; + u64 rx_bytes; + u64 rx_dropped; + + /* RSS */ + unsigned int rss_hw_id; + enum fun_eth_hash_alg hash_algo; + u8 rss_key[FUN_ETH_RSS_MAX_KEY_SIZE]; + unsigned int indir_table_nentries; + u32 indir_table[FUN_ETH_RSS_MAX_INDIR_ENT]; + dma_addr_t rss_dma_addr; + void *rss_cfg; + + /* DMA area for port stats */ + dma_addr_t stats_dma_addr; + __be64 *stats; + + struct bpf_prog *xdp_prog; + + struct devlink_port dl_port; + + /* kTLS state */ + unsigned int ktls_id; + atomic64_t tx_tls_add; + atomic64_t tx_tls_del; + atomic64_t tx_tls_resync; +}; + +void fun_set_ethtool_ops(struct net_device *netdev); +int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data); +int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data); +int fun_create_and_bind_tx(struct funeth_priv *fp, u32 sqid); +int fun_replace_queues(struct net_device *dev, struct fun_qset *newqs, + struct netlink_ext_ack *extack); +int fun_change_num_queues(struct net_device *dev, unsigned int ntx, + unsigned int nrx); +void fun_set_ring_count(struct net_device *netdev, unsigned int ntx, + unsigned int nrx); +int fun_config_rss(struct net_device *dev, int algo, const u8 *key, + const u32 *qtable, u8 op); + +#endif /* _FUNETH_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c new file mode 100644 index 000000000000..a849b3c6b01f --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include "funeth.h" +#include "funeth_devlink.h" + +static int fun_dl_info_get(struct devlink *dl, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + int err; + + err = devlink_info_driver_name_put(req, KBUILD_MODNAME); + if (err) + return err; + + return 0; +} + +static const struct devlink_ops fun_dl_ops = { + .info_get = fun_dl_info_get, +}; + +struct devlink *fun_devlink_alloc(struct device *dev) +{ + return devlink_alloc(&fun_dl_ops, sizeof(struct fun_ethdev), dev); +} + +void fun_devlink_free(struct devlink *devlink) +{ + devlink_free(devlink); +} + +void fun_devlink_register(struct devlink *devlink) +{ + devlink_register(devlink); +} + +void fun_devlink_unregister(struct devlink *devlink) +{ + devlink_unregister(devlink); +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.h b/drivers/net/ethernet/fungible/funeth/funeth_devlink.h new file mode 100644 index 000000000000..e40464d57ff4 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef __FUNETH_DEVLINK_H +#define __FUNETH_DEVLINK_H + +#include <net/devlink.h> + +struct devlink *fun_devlink_alloc(struct device *dev); +void fun_devlink_free(struct devlink *devlink); +void fun_devlink_register(struct devlink *devlink); +void fun_devlink_unregister(struct devlink *devlink); + +#endif /* __FUNETH_DEVLINK_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c new file mode 100644 index 000000000000..d081168c95fa --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c @@ -0,0 +1,1162 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/ethtool.h> +#include <linux/linkmode.h> +#include <linux/netdevice.h> +#include <linux/nvme.h> +#include <linux/io.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/pci.h> +#include <linux/rtnetlink.h> +#include "funeth.h" +#include "fun_port.h" +#include "funeth_txrx.h" + +/* Min queue depth. The smallest power-of-2 supporting jumbo frames with 4K + * pages is 8. Require it for all types of queues though some could work with + * fewer entries. + */ +#define FUNETH_MIN_QDEPTH 8 + +static const char mac_tx_stat_names[][ETH_GSTRING_LEN] = { + "mac_tx_octets_total", + "mac_tx_frames_total", + "mac_tx_vlan_frames_ok", + "mac_tx_unicast_frames", + "mac_tx_multicast_frames", + "mac_tx_broadcast_frames", + "mac_tx_errors", + "mac_tx_CBFCPAUSE0", + "mac_tx_CBFCPAUSE1", + "mac_tx_CBFCPAUSE2", + "mac_tx_CBFCPAUSE3", + "mac_tx_CBFCPAUSE4", + "mac_tx_CBFCPAUSE5", + "mac_tx_CBFCPAUSE6", + "mac_tx_CBFCPAUSE7", + "mac_tx_CBFCPAUSE8", + "mac_tx_CBFCPAUSE9", + "mac_tx_CBFCPAUSE10", + "mac_tx_CBFCPAUSE11", + "mac_tx_CBFCPAUSE12", + "mac_tx_CBFCPAUSE13", + "mac_tx_CBFCPAUSE14", + "mac_tx_CBFCPAUSE15", +}; + +static const char mac_rx_stat_names[][ETH_GSTRING_LEN] = { + "mac_rx_octets_total", + "mac_rx_frames_total", + "mac_rx_VLAN_frames_ok", + "mac_rx_unicast_frames", + "mac_rx_multicast_frames", + "mac_rx_broadcast_frames", + "mac_rx_drop_events", + "mac_rx_errors", + "mac_rx_alignment_errors", + "mac_rx_CBFCPAUSE0", + "mac_rx_CBFCPAUSE1", + "mac_rx_CBFCPAUSE2", + "mac_rx_CBFCPAUSE3", + "mac_rx_CBFCPAUSE4", + "mac_rx_CBFCPAUSE5", + "mac_rx_CBFCPAUSE6", + "mac_rx_CBFCPAUSE7", + "mac_rx_CBFCPAUSE8", + "mac_rx_CBFCPAUSE9", + "mac_rx_CBFCPAUSE10", + "mac_rx_CBFCPAUSE11", + "mac_rx_CBFCPAUSE12", + "mac_rx_CBFCPAUSE13", + "mac_rx_CBFCPAUSE14", + "mac_rx_CBFCPAUSE15", +}; + +static const char * const txq_stat_names[] = { + "tx_pkts", + "tx_bytes", + "tx_cso", + "tx_tso", + "tx_encapsulated_tso", + "tx_more", + "tx_queue_stops", + "tx_queue_restarts", + "tx_mapping_errors", + "tx_tls_encrypted_packets", + "tx_tls_encrypted_bytes", + "tx_tls_ooo", + "tx_tls_drop_no_sync_data", +}; + +static const char * const xdpq_stat_names[] = { + "tx_xdp_pkts", + "tx_xdp_bytes", + "tx_xdp_full", + "tx_xdp_mapping_errors", +}; + +static const char * const rxq_stat_names[] = { + "rx_pkts", + "rx_bytes", + "rx_cso", + "gro_pkts", + "gro_merged", + "rx_xdp_tx", + "rx_xdp_redir", + "rx_xdp_drops", + "rx_buffers", + "rx_page_allocs", + "rx_drops", + "rx_budget_exhausted", + "rx_mapping_errors", +}; + +static const char * const tls_stat_names[] = { + "tx_tls_ctx", + "tx_tls_del", + "tx_tls_resync", +}; + +static void fun_link_modes_to_ethtool(u64 modes, + unsigned long *ethtool_modes_map) +{ +#define ADD_LINK_MODE(mode) \ + __set_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, ethtool_modes_map) + + if (modes & FUN_PORT_CAP_AUTONEG) + ADD_LINK_MODE(Autoneg); + if (modes & FUN_PORT_CAP_1000_X) + ADD_LINK_MODE(1000baseX_Full); + if (modes & FUN_PORT_CAP_10G_R) { + ADD_LINK_MODE(10000baseCR_Full); + ADD_LINK_MODE(10000baseSR_Full); + ADD_LINK_MODE(10000baseLR_Full); + ADD_LINK_MODE(10000baseER_Full); + } + if (modes & FUN_PORT_CAP_25G_R) { + ADD_LINK_MODE(25000baseCR_Full); + ADD_LINK_MODE(25000baseSR_Full); + } + if (modes & FUN_PORT_CAP_40G_R4) { + ADD_LINK_MODE(40000baseCR4_Full); + ADD_LINK_MODE(40000baseSR4_Full); + ADD_LINK_MODE(40000baseLR4_Full); + } + if (modes & FUN_PORT_CAP_50G_R2) { + ADD_LINK_MODE(50000baseCR2_Full); + ADD_LINK_MODE(50000baseSR2_Full); + } + if (modes & FUN_PORT_CAP_50G_R) { + ADD_LINK_MODE(50000baseCR_Full); + ADD_LINK_MODE(50000baseSR_Full); + ADD_LINK_MODE(50000baseLR_ER_FR_Full); + } + if (modes & FUN_PORT_CAP_100G_R4) { + ADD_LINK_MODE(100000baseCR4_Full); + ADD_LINK_MODE(100000baseSR4_Full); + ADD_LINK_MODE(100000baseLR4_ER4_Full); + } + if (modes & FUN_PORT_CAP_100G_R2) { + ADD_LINK_MODE(100000baseCR2_Full); + ADD_LINK_MODE(100000baseSR2_Full); + ADD_LINK_MODE(100000baseLR2_ER2_FR2_Full); + } + if (modes & FUN_PORT_CAP_FEC_NONE) + ADD_LINK_MODE(FEC_NONE); + if (modes & FUN_PORT_CAP_FEC_FC) + ADD_LINK_MODE(FEC_BASER); + if (modes & FUN_PORT_CAP_FEC_RS) + ADD_LINK_MODE(FEC_RS); + if (modes & FUN_PORT_CAP_RX_PAUSE) + ADD_LINK_MODE(Pause); + +#undef ADD_LINK_MODE +} + +static void set_asym_pause(u64 advertising, struct ethtool_link_ksettings *ks) +{ + bool rx_pause, tx_pause; + + rx_pause = advertising & FUN_PORT_CAP_RX_PAUSE; + tx_pause = advertising & FUN_PORT_CAP_TX_PAUSE; + if (tx_pause ^ rx_pause) + ethtool_link_ksettings_add_link_mode(ks, advertising, + Asym_Pause); +} + +static unsigned int fun_port_type(unsigned int xcvr) +{ + if (!xcvr) + return PORT_NONE; + + switch (xcvr & 7) { + case FUN_XCVR_BASET: + return PORT_TP; + case FUN_XCVR_CU: + return PORT_DA; + default: + return PORT_FIBRE; + } +} + +static int fun_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *ks) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + unsigned int seq, speed, xcvr; + u64 lp_advertising; + bool link_up; + + ethtool_link_ksettings_zero_link_mode(ks, supported); + ethtool_link_ksettings_zero_link_mode(ks, advertising); + ethtool_link_ksettings_zero_link_mode(ks, lp_advertising); + + /* Link settings change asynchronously, take a consistent snapshot */ + do { + seq = read_seqcount_begin(&fp->link_seq); + link_up = netif_carrier_ok(netdev); + speed = fp->link_speed; + xcvr = fp->xcvr_type; + lp_advertising = fp->lp_advertising; + } while (read_seqcount_retry(&fp->link_seq, seq)); + + if (link_up) { + ks->base.speed = speed; + ks->base.duplex = DUPLEX_FULL; + fun_link_modes_to_ethtool(lp_advertising, + ks->link_modes.lp_advertising); + } else { + ks->base.speed = SPEED_UNKNOWN; + ks->base.duplex = DUPLEX_UNKNOWN; + } + + ks->base.autoneg = (fp->advertising & FUN_PORT_CAP_AUTONEG) ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + ks->base.port = fun_port_type(xcvr); + + fun_link_modes_to_ethtool(fp->port_caps, ks->link_modes.supported); + if (fp->port_caps & (FUN_PORT_CAP_RX_PAUSE | FUN_PORT_CAP_TX_PAUSE)) + ethtool_link_ksettings_add_link_mode(ks, supported, Asym_Pause); + + fun_link_modes_to_ethtool(fp->advertising, ks->link_modes.advertising); + set_asym_pause(fp->advertising, ks); + return 0; +} + +static u64 fun_advert_modes(const struct ethtool_link_ksettings *ks) +{ + u64 modes = 0; + +#define HAS_MODE(mode) \ + ethtool_link_ksettings_test_link_mode(ks, advertising, mode) + + if (HAS_MODE(1000baseX_Full)) + modes |= FUN_PORT_CAP_1000_X; + if (HAS_MODE(10000baseCR_Full) || HAS_MODE(10000baseSR_Full) || + HAS_MODE(10000baseLR_Full) || HAS_MODE(10000baseER_Full)) + modes |= FUN_PORT_CAP_10G_R; + if (HAS_MODE(25000baseCR_Full) || HAS_MODE(25000baseSR_Full)) + modes |= FUN_PORT_CAP_25G_R; + if (HAS_MODE(40000baseCR4_Full) || HAS_MODE(40000baseSR4_Full) || + HAS_MODE(40000baseLR4_Full)) + modes |= FUN_PORT_CAP_40G_R4; + if (HAS_MODE(50000baseCR2_Full) || HAS_MODE(50000baseSR2_Full)) + modes |= FUN_PORT_CAP_50G_R2; + if (HAS_MODE(50000baseCR_Full) || HAS_MODE(50000baseSR_Full) || + HAS_MODE(50000baseLR_ER_FR_Full)) + modes |= FUN_PORT_CAP_50G_R; + if (HAS_MODE(100000baseCR4_Full) || HAS_MODE(100000baseSR4_Full) || + HAS_MODE(100000baseLR4_ER4_Full)) + modes |= FUN_PORT_CAP_100G_R4; + if (HAS_MODE(100000baseCR2_Full) || HAS_MODE(100000baseSR2_Full) || + HAS_MODE(100000baseLR2_ER2_FR2_Full)) + modes |= FUN_PORT_CAP_100G_R2; + + return modes; +#undef HAS_MODE +} + +static u64 fun_speed_to_link_mode(unsigned int speed) +{ + switch (speed) { + case SPEED_100000: + return FUN_PORT_CAP_100G_R4 | FUN_PORT_CAP_100G_R2; + case SPEED_50000: + return FUN_PORT_CAP_50G_R | FUN_PORT_CAP_50G_R2; + case SPEED_40000: + return FUN_PORT_CAP_40G_R4; + case SPEED_25000: + return FUN_PORT_CAP_25G_R; + case SPEED_10000: + return FUN_PORT_CAP_10G_R; + case SPEED_1000: + return FUN_PORT_CAP_1000_X; + default: + return 0; + } +} + +static int fun_change_advert(struct funeth_priv *fp, u64 new_advert) +{ + int err; + + if (new_advert == fp->advertising) + return 0; + + err = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_ADVERT, new_advert); + if (!err) + fp->advertising = new_advert; + return err; +} + +#define FUN_PORT_CAP_FEC_MASK \ + (FUN_PORT_CAP_FEC_NONE | FUN_PORT_CAP_FEC_FC | FUN_PORT_CAP_FEC_RS) + +static int fun_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *ks) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = {}; + struct funeth_priv *fp = netdev_priv(netdev); + u64 new_advert; + + /* eswitch ports don't support mode changes */ + if (fp->port_caps & FUN_PORT_CAP_VPORT) + return -EOPNOTSUPP; + + if (ks->base.duplex == DUPLEX_HALF) + return -EINVAL; + if (ks->base.autoneg == AUTONEG_ENABLE && + !(fp->port_caps & FUN_PORT_CAP_AUTONEG)) + return -EINVAL; + + if (ks->base.autoneg == AUTONEG_ENABLE) { + if (linkmode_empty(ks->link_modes.advertising)) + return -EINVAL; + + fun_link_modes_to_ethtool(fp->port_caps, supported); + if (!linkmode_subset(ks->link_modes.advertising, supported)) + return -EINVAL; + + new_advert = fun_advert_modes(ks) | FUN_PORT_CAP_AUTONEG; + } else { + new_advert = fun_speed_to_link_mode(ks->base.speed); + new_advert &= fp->port_caps; + if (!new_advert) + return -EINVAL; + } + new_advert |= fp->advertising & + (FUN_PORT_CAP_PAUSE_MASK | FUN_PORT_CAP_FEC_MASK); + + return fun_change_advert(fp, new_advert); +} + +static void fun_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + u8 active_pause = fp->active_fc; + + pause->rx_pause = !!(active_pause & FUN_PORT_CAP_RX_PAUSE); + pause->tx_pause = !!(active_pause & FUN_PORT_CAP_TX_PAUSE); + pause->autoneg = !!(fp->advertising & FUN_PORT_CAP_AUTONEG); +} + +static int fun_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct funeth_priv *fp = netdev_priv(netdev); + u64 new_advert; + + if (fp->port_caps & FUN_PORT_CAP_VPORT) + return -EOPNOTSUPP; + /* Forcing PAUSE settings with AN enabled is unsupported. */ + if (!pause->autoneg && (fp->advertising & FUN_PORT_CAP_AUTONEG)) + return -EOPNOTSUPP; + if (pause->autoneg && !(fp->advertising & FUN_PORT_CAP_AUTONEG)) + return -EINVAL; + if (pause->tx_pause && !(fp->port_caps & FUN_PORT_CAP_TX_PAUSE)) + return -EINVAL; + if (pause->rx_pause && !(fp->port_caps & FUN_PORT_CAP_RX_PAUSE)) + return -EINVAL; + + new_advert = fp->advertising & ~FUN_PORT_CAP_PAUSE_MASK; + if (pause->tx_pause) + new_advert |= FUN_PORT_CAP_TX_PAUSE; + if (pause->rx_pause) + new_advert |= FUN_PORT_CAP_RX_PAUSE; + + return fun_change_advert(fp, new_advert); +} + +static int fun_restart_an(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->advertising & FUN_PORT_CAP_AUTONEG)) + return -EOPNOTSUPP; + + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_ADVERT, + FUN_PORT_CAP_AUTONEG); +} + +static int fun_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct funeth_priv *fp = netdev_priv(netdev); + unsigned int beacon; + + if (fp->port_caps & FUN_PORT_CAP_VPORT) + return -EOPNOTSUPP; + if (state != ETHTOOL_ID_ACTIVE && state != ETHTOOL_ID_INACTIVE) + return -EOPNOTSUPP; + + beacon = state == ETHTOOL_ID_ACTIVE ? FUN_PORT_LED_BEACON_ON : + FUN_PORT_LED_BEACON_OFF; + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_LED, beacon); +} + +static void fun_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); + strscpy(info->bus_info, pci_name(fp->pdev), sizeof(info->bus_info)); +} + +static u32 fun_get_msglevel(struct net_device *netdev) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + return fp->msg_enable; +} + +static void fun_set_msglevel(struct net_device *netdev, u32 value) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + fp->msg_enable = value; +} + +static int fun_get_regs_len(struct net_device *dev) +{ + return NVME_REG_ACQ + sizeof(u64); +} + +static void fun_get_regs(struct net_device *dev, struct ethtool_regs *regs, + void *buf) +{ + const struct funeth_priv *fp = netdev_priv(dev); + void __iomem *bar = fp->fdev->bar; + + regs->version = 0; + *(u64 *)(buf + NVME_REG_CAP) = readq(bar + NVME_REG_CAP); + *(u32 *)(buf + NVME_REG_VS) = readl(bar + NVME_REG_VS); + *(u32 *)(buf + NVME_REG_INTMS) = readl(bar + NVME_REG_INTMS); + *(u32 *)(buf + NVME_REG_INTMC) = readl(bar + NVME_REG_INTMC); + *(u32 *)(buf + NVME_REG_CC) = readl(bar + NVME_REG_CC); + *(u32 *)(buf + NVME_REG_CSTS) = readl(bar + NVME_REG_CSTS); + *(u32 *)(buf + NVME_REG_AQA) = readl(bar + NVME_REG_AQA); + *(u64 *)(buf + NVME_REG_ASQ) = readq(bar + NVME_REG_ASQ); + *(u64 *)(buf + NVME_REG_ACQ) = readq(bar + NVME_REG_ACQ); +} + +static int fun_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kcoal, + struct netlink_ext_ack *ext_ack) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + coal->rx_coalesce_usecs = fp->rx_coal_usec; + coal->rx_max_coalesced_frames = fp->rx_coal_count; + coal->use_adaptive_rx_coalesce = !fp->cq_irq_db; + coal->tx_coalesce_usecs = fp->tx_coal_usec; + coal->tx_max_coalesced_frames = fp->tx_coal_count; + return 0; +} + +static int fun_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kcoal, + struct netlink_ext_ack *ext_ack) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_rxq **rxqs; + unsigned int i, db_val; + + if (coal->rx_coalesce_usecs > FUN_DB_INTCOAL_USEC_M || + coal->rx_max_coalesced_frames > FUN_DB_INTCOAL_ENTRIES_M || + (coal->rx_coalesce_usecs | coal->rx_max_coalesced_frames) == 0 || + coal->tx_coalesce_usecs > FUN_DB_INTCOAL_USEC_M || + coal->tx_max_coalesced_frames > FUN_DB_INTCOAL_ENTRIES_M || + (coal->tx_coalesce_usecs | coal->tx_max_coalesced_frames) == 0) + return -EINVAL; + + /* a timer is required if there's any coalescing */ + if ((coal->rx_max_coalesced_frames > 1 && !coal->rx_coalesce_usecs) || + (coal->tx_max_coalesced_frames > 1 && !coal->tx_coalesce_usecs)) + return -EINVAL; + + fp->rx_coal_usec = coal->rx_coalesce_usecs; + fp->rx_coal_count = coal->rx_max_coalesced_frames; + fp->tx_coal_usec = coal->tx_coalesce_usecs; + fp->tx_coal_count = coal->tx_max_coalesced_frames; + + db_val = FUN_IRQ_CQ_DB(fp->rx_coal_usec, fp->rx_coal_count); + WRITE_ONCE(fp->cq_irq_db, db_val); + + rxqs = rtnl_dereference(fp->rxqs); + if (!rxqs) + return 0; + + for (i = 0; i < netdev->real_num_rx_queues; i++) + WRITE_ONCE(rxqs[i]->irq_db_val, db_val); + + db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec, fp->tx_coal_count); + for (i = 0; i < netdev->real_num_tx_queues; i++) + WRITE_ONCE(fp->txqs[i]->irq_db_val, db_val); + + return 0; +} + +static void fun_get_channels(struct net_device *netdev, + struct ethtool_channels *chan) +{ + chan->max_rx = netdev->num_rx_queues; + chan->rx_count = netdev->real_num_rx_queues; + + chan->max_tx = netdev->num_tx_queues; + chan->tx_count = netdev->real_num_tx_queues; +} + +static int fun_set_channels(struct net_device *netdev, + struct ethtool_channels *chan) +{ + if (!chan->tx_count || !chan->rx_count) + return -EINVAL; + + if (chan->tx_count == netdev->real_num_tx_queues && + chan->rx_count == netdev->real_num_rx_queues) + return 0; + + if (netif_running(netdev)) + return fun_change_num_queues(netdev, chan->tx_count, + chan->rx_count); + + fun_set_ring_count(netdev, chan->tx_count, chan->rx_count); + return 0; +} + +static void fun_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kring, + struct netlink_ext_ack *extack) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + unsigned int max_depth = fp->fdev->q_depth; + + /* We size CQs to be twice the RQ depth so max RQ depth is half the + * max queue depth. + */ + ring->rx_max_pending = max_depth / 2; + ring->tx_max_pending = max_depth; + + ring->rx_pending = fp->rq_depth; + ring->tx_pending = fp->sq_depth; + + kring->rx_buf_len = PAGE_SIZE; + kring->cqe_size = FUNETH_CQE_SIZE; +} + +static int fun_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kring, + struct netlink_ext_ack *extack) +{ + struct funeth_priv *fp = netdev_priv(netdev); + int rc; + + if (ring->rx_mini_pending || ring->rx_jumbo_pending) + return -EINVAL; + + /* queue depths must be powers-of-2 */ + if (!is_power_of_2(ring->rx_pending) || + !is_power_of_2(ring->tx_pending)) + return -EINVAL; + + if (ring->rx_pending < FUNETH_MIN_QDEPTH || + ring->tx_pending < FUNETH_MIN_QDEPTH) + return -EINVAL; + + if (fp->sq_depth == ring->tx_pending && + fp->rq_depth == ring->rx_pending) + return 0; + + if (netif_running(netdev)) { + struct fun_qset req = { + .cq_depth = 2 * ring->rx_pending, + .rq_depth = ring->rx_pending, + .sq_depth = ring->tx_pending + }; + + rc = fun_replace_queues(netdev, &req, extack); + if (rc) + return rc; + } + + fp->sq_depth = ring->tx_pending; + fp->rq_depth = ring->rx_pending; + fp->cq_depth = 2 * fp->rq_depth; + return 0; +} + +static int fun_get_sset_count(struct net_device *dev, int sset) +{ + const struct funeth_priv *fp = netdev_priv(dev); + int n; + + switch (sset) { + case ETH_SS_STATS: + n = (dev->real_num_tx_queues + 1) * ARRAY_SIZE(txq_stat_names) + + (dev->real_num_rx_queues + 1) * ARRAY_SIZE(rxq_stat_names) + + (fp->num_xdpqs + 1) * ARRAY_SIZE(xdpq_stat_names) + + ARRAY_SIZE(tls_stat_names); + if (fp->port_caps & FUN_PORT_CAP_STATS) { + n += ARRAY_SIZE(mac_tx_stat_names) + + ARRAY_SIZE(mac_rx_stat_names); + } + return n; + default: + break; + } + return 0; +} + +static void fun_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + unsigned int i, j; + u8 *p = data; + + switch (sset) { + case ETH_SS_STATS: + if (fp->port_caps & FUN_PORT_CAP_STATS) { + memcpy(p, mac_tx_stat_names, sizeof(mac_tx_stat_names)); + p += sizeof(mac_tx_stat_names); + memcpy(p, mac_rx_stat_names, sizeof(mac_rx_stat_names)); + p += sizeof(mac_rx_stat_names); + } + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + for (j = 0; j < ARRAY_SIZE(txq_stat_names); j++) + ethtool_sprintf(&p, "%s[%u]", txq_stat_names[j], + i); + } + for (j = 0; j < ARRAY_SIZE(txq_stat_names); j++) + ethtool_sprintf(&p, txq_stat_names[j]); + + for (i = 0; i < fp->num_xdpqs; i++) { + for (j = 0; j < ARRAY_SIZE(xdpq_stat_names); j++) + ethtool_sprintf(&p, "%s[%u]", + xdpq_stat_names[j], i); + } + for (j = 0; j < ARRAY_SIZE(xdpq_stat_names); j++) + ethtool_sprintf(&p, xdpq_stat_names[j]); + + for (i = 0; i < netdev->real_num_rx_queues; i++) { + for (j = 0; j < ARRAY_SIZE(rxq_stat_names); j++) + ethtool_sprintf(&p, "%s[%u]", rxq_stat_names[j], + i); + } + for (j = 0; j < ARRAY_SIZE(rxq_stat_names); j++) + ethtool_sprintf(&p, rxq_stat_names[j]); + + for (j = 0; j < ARRAY_SIZE(tls_stat_names); j++) + ethtool_sprintf(&p, tls_stat_names[j]); + break; + default: + break; + } +} + +static u64 *get_mac_stats(const struct funeth_priv *fp, u64 *data) +{ +#define TX_STAT(s) \ + *data++ = be64_to_cpu(fp->stats[PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_##s]) + + TX_STAT(etherStatsOctets); + TX_STAT(etherStatsPkts); + TX_STAT(VLANTransmittedOK); + TX_STAT(ifOutUcastPkts); + TX_STAT(ifOutMulticastPkts); + TX_STAT(ifOutBroadcastPkts); + TX_STAT(ifOutErrors); + TX_STAT(CBFCPAUSEFramesTransmitted_0); + TX_STAT(CBFCPAUSEFramesTransmitted_1); + TX_STAT(CBFCPAUSEFramesTransmitted_2); + TX_STAT(CBFCPAUSEFramesTransmitted_3); + TX_STAT(CBFCPAUSEFramesTransmitted_4); + TX_STAT(CBFCPAUSEFramesTransmitted_5); + TX_STAT(CBFCPAUSEFramesTransmitted_6); + TX_STAT(CBFCPAUSEFramesTransmitted_7); + TX_STAT(CBFCPAUSEFramesTransmitted_8); + TX_STAT(CBFCPAUSEFramesTransmitted_9); + TX_STAT(CBFCPAUSEFramesTransmitted_10); + TX_STAT(CBFCPAUSEFramesTransmitted_11); + TX_STAT(CBFCPAUSEFramesTransmitted_12); + TX_STAT(CBFCPAUSEFramesTransmitted_13); + TX_STAT(CBFCPAUSEFramesTransmitted_14); + TX_STAT(CBFCPAUSEFramesTransmitted_15); + +#define RX_STAT(s) *data++ = be64_to_cpu(fp->stats[PORT_MAC_RX_##s]) + + RX_STAT(etherStatsOctets); + RX_STAT(etherStatsPkts); + RX_STAT(VLANReceivedOK); + RX_STAT(ifInUcastPkts); + RX_STAT(ifInMulticastPkts); + RX_STAT(ifInBroadcastPkts); + RX_STAT(etherStatsDropEvents); + RX_STAT(ifInErrors); + RX_STAT(aAlignmentErrors); + RX_STAT(CBFCPAUSEFramesReceived_0); + RX_STAT(CBFCPAUSEFramesReceived_1); + RX_STAT(CBFCPAUSEFramesReceived_2); + RX_STAT(CBFCPAUSEFramesReceived_3); + RX_STAT(CBFCPAUSEFramesReceived_4); + RX_STAT(CBFCPAUSEFramesReceived_5); + RX_STAT(CBFCPAUSEFramesReceived_6); + RX_STAT(CBFCPAUSEFramesReceived_7); + RX_STAT(CBFCPAUSEFramesReceived_8); + RX_STAT(CBFCPAUSEFramesReceived_9); + RX_STAT(CBFCPAUSEFramesReceived_10); + RX_STAT(CBFCPAUSEFramesReceived_11); + RX_STAT(CBFCPAUSEFramesReceived_12); + RX_STAT(CBFCPAUSEFramesReceived_13); + RX_STAT(CBFCPAUSEFramesReceived_14); + RX_STAT(CBFCPAUSEFramesReceived_15); + + return data; + +#undef TX_STAT +#undef RX_STAT +} + +static void fun_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_txq_stats txs; + struct funeth_rxq_stats rxs; + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + unsigned int i, start; + u64 *totals, *tot; + + if (fp->port_caps & FUN_PORT_CAP_STATS) + data = get_mac_stats(fp, data); + + rxqs = rtnl_dereference(fp->rxqs); + if (!rxqs) + return; + +#define ADD_STAT(cnt) do { \ + *data = (cnt); *tot++ += *data++; \ +} while (0) + + /* Tx queues */ + totals = data + netdev->real_num_tx_queues * ARRAY_SIZE(txq_stat_names); + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + tot = totals; + + FUN_QSTAT_READ(fp->txqs[i], start, txs); + + ADD_STAT(txs.tx_pkts); + ADD_STAT(txs.tx_bytes); + ADD_STAT(txs.tx_cso); + ADD_STAT(txs.tx_tso); + ADD_STAT(txs.tx_encap_tso); + ADD_STAT(txs.tx_more); + ADD_STAT(txs.tx_nstops); + ADD_STAT(txs.tx_nrestarts); + ADD_STAT(txs.tx_map_err); + ADD_STAT(txs.tx_tls_pkts); + ADD_STAT(txs.tx_tls_bytes); + ADD_STAT(txs.tx_tls_fallback); + ADD_STAT(txs.tx_tls_drops); + } + data += ARRAY_SIZE(txq_stat_names); + + /* XDP Tx queues */ + xdpqs = rtnl_dereference(fp->xdpqs); + totals = data + fp->num_xdpqs * ARRAY_SIZE(xdpq_stat_names); + + for (i = 0; i < fp->num_xdpqs; i++) { + tot = totals; + + FUN_QSTAT_READ(xdpqs[i], start, txs); + + ADD_STAT(txs.tx_pkts); + ADD_STAT(txs.tx_bytes); + ADD_STAT(txs.tx_xdp_full); + ADD_STAT(txs.tx_map_err); + } + data += ARRAY_SIZE(xdpq_stat_names); + + /* Rx queues */ + totals = data + netdev->real_num_rx_queues * ARRAY_SIZE(rxq_stat_names); + + for (i = 0; i < netdev->real_num_rx_queues; i++) { + tot = totals; + + FUN_QSTAT_READ(rxqs[i], start, rxs); + + ADD_STAT(rxs.rx_pkts); + ADD_STAT(rxs.rx_bytes); + ADD_STAT(rxs.rx_cso); + ADD_STAT(rxs.gro_pkts); + ADD_STAT(rxs.gro_merged); + ADD_STAT(rxs.xdp_tx); + ADD_STAT(rxs.xdp_redir); + ADD_STAT(rxs.xdp_drops); + ADD_STAT(rxs.rx_bufs); + ADD_STAT(rxs.rx_page_alloc); + ADD_STAT(rxs.rx_mem_drops + rxs.xdp_err); + ADD_STAT(rxs.rx_budget); + ADD_STAT(rxs.rx_map_err); + } + data += ARRAY_SIZE(rxq_stat_names); +#undef ADD_STAT + + *data++ = atomic64_read(&fp->tx_tls_add); + *data++ = atomic64_read(&fp->tx_tls_del); + *data++ = atomic64_read(&fp->tx_tls_resync); +} + +#define RX_STAT(fp, s) be64_to_cpu((fp)->stats[PORT_MAC_RX_##s]) +#define TX_STAT(fp, s) \ + be64_to_cpu((fp)->stats[PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_##s]) +#define FEC_STAT(fp, s) \ + be64_to_cpu((fp)->stats[PORT_MAC_RX_STATS_MAX + \ + PORT_MAC_TX_STATS_MAX + PORT_MAC_FEC_##s]) + +static void fun_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->tx_pause_frames = TX_STAT(fp, aPAUSEMACCtrlFramesTransmitted); + stats->rx_pause_frames = RX_STAT(fp, aPAUSEMACCtrlFramesReceived); +} + +static void fun_get_802_3_stats(struct net_device *netdev, + struct ethtool_eth_mac_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->FramesTransmittedOK = TX_STAT(fp, aFramesTransmittedOK); + stats->FramesReceivedOK = RX_STAT(fp, aFramesReceivedOK); + stats->FrameCheckSequenceErrors = RX_STAT(fp, aFrameCheckSequenceErrors); + stats->OctetsTransmittedOK = TX_STAT(fp, OctetsTransmittedOK); + stats->OctetsReceivedOK = RX_STAT(fp, OctetsReceivedOK); + stats->InRangeLengthErrors = RX_STAT(fp, aInRangeLengthErrors); + stats->FrameTooLongErrors = RX_STAT(fp, aFrameTooLongErrors); +} + +static void fun_get_802_3_ctrl_stats(struct net_device *netdev, + struct ethtool_eth_ctrl_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->MACControlFramesTransmitted = TX_STAT(fp, MACControlFramesTransmitted); + stats->MACControlFramesReceived = RX_STAT(fp, MACControlFramesReceived); +} + +static void fun_get_rmon_stats(struct net_device *netdev, + struct ethtool_rmon_stats *stats, + const struct ethtool_rmon_hist_range **ranges) +{ + static const struct ethtool_rmon_hist_range rmon_ranges[] = { + { 64, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 32767 }, + {} + }; + + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->undersize_pkts = RX_STAT(fp, etherStatsUndersizePkts); + stats->oversize_pkts = RX_STAT(fp, etherStatsOversizePkts); + stats->fragments = RX_STAT(fp, etherStatsFragments); + stats->jabbers = RX_STAT(fp, etherStatsJabbers); + + stats->hist[0] = RX_STAT(fp, etherStatsPkts64Octets); + stats->hist[1] = RX_STAT(fp, etherStatsPkts65to127Octets); + stats->hist[2] = RX_STAT(fp, etherStatsPkts128to255Octets); + stats->hist[3] = RX_STAT(fp, etherStatsPkts256to511Octets); + stats->hist[4] = RX_STAT(fp, etherStatsPkts512to1023Octets); + stats->hist[5] = RX_STAT(fp, etherStatsPkts1024to1518Octets); + stats->hist[6] = RX_STAT(fp, etherStatsPkts1519toMaxOctets); + + stats->hist_tx[0] = TX_STAT(fp, etherStatsPkts64Octets); + stats->hist_tx[1] = TX_STAT(fp, etherStatsPkts65to127Octets); + stats->hist_tx[2] = TX_STAT(fp, etherStatsPkts128to255Octets); + stats->hist_tx[3] = TX_STAT(fp, etherStatsPkts256to511Octets); + stats->hist_tx[4] = TX_STAT(fp, etherStatsPkts512to1023Octets); + stats->hist_tx[5] = TX_STAT(fp, etherStatsPkts1024to1518Octets); + stats->hist_tx[6] = TX_STAT(fp, etherStatsPkts1519toMaxOctets); + + *ranges = rmon_ranges; +} + +static void fun_get_fec_stats(struct net_device *netdev, + struct ethtool_fec_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->corrected_blocks.total = FEC_STAT(fp, Correctable); + stats->uncorrectable_blocks.total = FEC_STAT(fp, Uncorrectable); +} + +#undef RX_STAT +#undef TX_STAT +#undef FEC_STAT + +static int fun_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = netdev->real_num_rx_queues; + return 0; + default: + break; + } + return -EOPNOTSUPP; +} + +static int fun_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info) +{ + return 0; +} + +static u32 fun_get_rxfh_indir_size(struct net_device *netdev) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + return fp->indir_table_nentries; +} + +static u32 fun_get_rxfh_key_size(struct net_device *netdev) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + return sizeof(fp->rss_key); +} + +static int fun_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!fp->rss_cfg) + return -EOPNOTSUPP; + + if (indir) + memcpy(indir, fp->indir_table, + sizeof(u32) * fp->indir_table_nentries); + + if (key) + memcpy(key, fp->rss_key, sizeof(fp->rss_key)); + + if (hfunc) + *hfunc = fp->hash_algo == FUN_ETH_RSS_ALG_TOEPLITZ ? + ETH_RSS_HASH_TOP : ETH_RSS_HASH_CRC32; + + return 0; +} + +static int fun_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct funeth_priv *fp = netdev_priv(netdev); + const u32 *rss_indir = indir ? indir : fp->indir_table; + const u8 *rss_key = key ? key : fp->rss_key; + enum fun_eth_hash_alg algo; + + if (!fp->rss_cfg) + return -EOPNOTSUPP; + + if (hfunc == ETH_RSS_HASH_NO_CHANGE) + algo = fp->hash_algo; + else if (hfunc == ETH_RSS_HASH_CRC32) + algo = FUN_ETH_RSS_ALG_CRC32; + else if (hfunc == ETH_RSS_HASH_TOP) + algo = FUN_ETH_RSS_ALG_TOEPLITZ; + else + return -EINVAL; + + /* If the port is enabled try to reconfigure RSS and keep the new + * settings if successful. If it is down we update the RSS settings + * and apply them at the next UP time. + */ + if (netif_running(netdev)) { + int rc = fun_config_rss(netdev, algo, rss_key, rss_indir, + FUN_ADMIN_SUBOP_MODIFY); + if (rc) + return rc; + } + + fp->hash_algo = algo; + if (key) + memcpy(fp->rss_key, key, sizeof(fp->rss_key)); + if (indir) + memcpy(fp->indir_table, indir, + sizeof(u32) * fp->indir_table_nentries); + return 0; +} + +static int fun_get_ts_info(struct net_device *netdev, + struct ethtool_ts_info *info) +{ + info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->phc_index = -1; + info->tx_types = BIT(HWTSTAMP_TX_OFF); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); + return 0; +} + +static unsigned int to_ethtool_fec(unsigned int fun_fec) +{ + unsigned int fec = 0; + + if (fun_fec == FUN_PORT_FEC_NA) + fec |= ETHTOOL_FEC_NONE; + if (fun_fec & FUN_PORT_FEC_OFF) + fec |= ETHTOOL_FEC_OFF; + if (fun_fec & FUN_PORT_FEC_RS) + fec |= ETHTOOL_FEC_RS; + if (fun_fec & FUN_PORT_FEC_FC) + fec |= ETHTOOL_FEC_BASER; + if (fun_fec & FUN_PORT_FEC_AUTO) + fec |= ETHTOOL_FEC_AUTO; + return fec; +} + +static int fun_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fec) +{ + struct funeth_priv *fp = netdev_priv(netdev); + u64 fec_data; + int rc; + + rc = fun_port_read_cmd(fp, FUN_ADMIN_PORT_KEY_FEC, &fec_data); + if (rc) + return rc; + + fec->active_fec = to_ethtool_fec(fec_data & 0xff); + fec->fec = to_ethtool_fec(fec_data >> 8); + return 0; +} + +static int fun_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fec) +{ + struct funeth_priv *fp = netdev_priv(netdev); + u64 fec_mode; + + switch (fec->fec) { + case ETHTOOL_FEC_AUTO: + fec_mode = FUN_PORT_FEC_AUTO; + break; + case ETHTOOL_FEC_OFF: + if (!(fp->port_caps & FUN_PORT_CAP_FEC_NONE)) + return -EINVAL; + fec_mode = FUN_PORT_FEC_OFF; + break; + case ETHTOOL_FEC_BASER: + if (!(fp->port_caps & FUN_PORT_CAP_FEC_FC)) + return -EINVAL; + fec_mode = FUN_PORT_FEC_FC; + break; + case ETHTOOL_FEC_RS: + if (!(fp->port_caps & FUN_PORT_CAP_FEC_RS)) + return -EINVAL; + fec_mode = FUN_PORT_FEC_RS; + break; + default: + return -EINVAL; + } + + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_FEC, fec_mode); +} + +static const struct ethtool_ops fun_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, + .get_link_ksettings = fun_get_link_ksettings, + .set_link_ksettings = fun_set_link_ksettings, + .set_phys_id = fun_set_phys_id, + .get_drvinfo = fun_get_drvinfo, + .get_msglevel = fun_get_msglevel, + .set_msglevel = fun_set_msglevel, + .get_regs_len = fun_get_regs_len, + .get_regs = fun_get_regs, + .get_link = ethtool_op_get_link, + .get_coalesce = fun_get_coalesce, + .set_coalesce = fun_set_coalesce, + .get_ts_info = fun_get_ts_info, + .get_ringparam = fun_get_ringparam, + .set_ringparam = fun_set_ringparam, + .get_sset_count = fun_get_sset_count, + .get_strings = fun_get_strings, + .get_ethtool_stats = fun_get_ethtool_stats, + .get_rxnfc = fun_get_rxnfc, + .set_rxnfc = fun_set_rxnfc, + .get_rxfh_indir_size = fun_get_rxfh_indir_size, + .get_rxfh_key_size = fun_get_rxfh_key_size, + .get_rxfh = fun_get_rxfh, + .set_rxfh = fun_set_rxfh, + .get_channels = fun_get_channels, + .set_channels = fun_set_channels, + .get_fecparam = fun_get_fecparam, + .set_fecparam = fun_set_fecparam, + .get_pauseparam = fun_get_pauseparam, + .set_pauseparam = fun_set_pauseparam, + .nway_reset = fun_restart_an, + .get_pause_stats = fun_get_pause_stats, + .get_fec_stats = fun_get_fec_stats, + .get_eth_mac_stats = fun_get_802_3_stats, + .get_eth_ctrl_stats = fun_get_802_3_ctrl_stats, + .get_rmon_stats = fun_get_rmon_stats, +}; + +void fun_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &fun_ethtool_ops; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ktls.c b/drivers/net/ethernet/fungible/funeth/funeth_ktls.c new file mode 100644 index 000000000000..f871def70d70 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_ktls.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include "funeth.h" +#include "funeth_ktls.h" + +static int fun_admin_ktls_create(struct funeth_priv *fp, unsigned int id) +{ + struct fun_admin_ktls_create_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + sizeof(req)), + .subop = FUN_ADMIN_SUBOP_CREATE, + .id = cpu_to_be32(id), + }; + + return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); +} + +static int fun_ktls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_admin_ktls_modify_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + sizeof(req)), + .subop = FUN_ADMIN_SUBOP_MODIFY, + .id = cpu_to_be32(fp->ktls_id), + .tcp_seq = cpu_to_be32(start_offload_tcp_sn), + }; + struct fun_admin_ktls_modify_rsp rsp; + struct fun_ktls_tx_ctx *tx_ctx; + int rc; + + if (direction != TLS_OFFLOAD_CTX_DIR_TX) + return -EOPNOTSUPP; + + if (crypto_info->version == TLS_1_2_VERSION) + req.version = FUN_KTLS_TLSV2; + else + return -EOPNOTSUPP; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *c = (void *)crypto_info; + + req.cipher = FUN_KTLS_CIPHER_AES_GCM_128; + memcpy(req.key, c->key, sizeof(c->key)); + memcpy(req.iv, c->iv, sizeof(c->iv)); + memcpy(req.salt, c->salt, sizeof(c->salt)); + memcpy(req.record_seq, c->rec_seq, sizeof(c->rec_seq)); + break; + } + default: + return -EOPNOTSUPP; + } + + rc = fun_submit_admin_sync_cmd(fp->fdev, &req.common, &rsp, + sizeof(rsp), 0); + memzero_explicit(&req, sizeof(req)); + if (rc) + return rc; + + tx_ctx = tls_driver_ctx(sk, direction); + tx_ctx->tlsid = rsp.tlsid; + tx_ctx->next_seq = start_offload_tcp_sn; + atomic64_inc(&fp->tx_tls_add); + return 0; +} + +static void fun_ktls_del(struct net_device *netdev, + struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_admin_ktls_modify_req req; + struct fun_ktls_tx_ctx *tx_ctx; + + if (direction != TLS_OFFLOAD_CTX_DIR_TX) + return; + + tx_ctx = __tls_driver_ctx(tls_ctx, direction); + + req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + offsetof(struct fun_admin_ktls_modify_req, tcp_seq)); + req.subop = FUN_ADMIN_SUBOP_MODIFY; + req.flags = cpu_to_be16(FUN_KTLS_MODIFY_REMOVE); + req.id = cpu_to_be32(fp->ktls_id); + req.tlsid = tx_ctx->tlsid; + + fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); + atomic64_inc(&fp->tx_tls_del); +} + +static int fun_ktls_resync(struct net_device *netdev, struct sock *sk, u32 seq, + u8 *rcd_sn, enum tls_offload_ctx_dir direction) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_admin_ktls_modify_req req; + struct fun_ktls_tx_ctx *tx_ctx; + int rc; + + if (direction != TLS_OFFLOAD_CTX_DIR_TX) + return -EOPNOTSUPP; + + tx_ctx = tls_driver_ctx(sk, direction); + + req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + offsetof(struct fun_admin_ktls_modify_req, key)); + req.subop = FUN_ADMIN_SUBOP_MODIFY; + req.flags = 0; + req.id = cpu_to_be32(fp->ktls_id); + req.tlsid = tx_ctx->tlsid; + req.tcp_seq = cpu_to_be32(seq); + req.version = 0; + req.cipher = 0; + memcpy(req.record_seq, rcd_sn, sizeof(req.record_seq)); + + atomic64_inc(&fp->tx_tls_resync); + rc = fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); + if (!rc) + tx_ctx->next_seq = seq; + return rc; +} + +static const struct tlsdev_ops fun_ktls_ops = { + .tls_dev_add = fun_ktls_add, + .tls_dev_del = fun_ktls_del, + .tls_dev_resync = fun_ktls_resync, +}; + +int fun_ktls_init(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + int rc; + + rc = fun_admin_ktls_create(fp, netdev->dev_port); + if (rc) + return rc; + + fp->ktls_id = netdev->dev_port; + netdev->tlsdev_ops = &fun_ktls_ops; + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->features |= NETIF_F_HW_TLS_TX; + return 0; +} + +void fun_ktls_cleanup(struct funeth_priv *fp) +{ + if (fp->ktls_id == FUN_HCI_ID_INVALID) + return; + + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_KTLS, 0, fp->ktls_id); + fp->ktls_id = FUN_HCI_ID_INVALID; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ktls.h b/drivers/net/ethernet/fungible/funeth/funeth_ktls.h new file mode 100644 index 000000000000..1b21cccf1278 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_ktls.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUN_KTLS_H +#define _FUN_KTLS_H + +struct net_device; +struct funeth_priv; + +#ifdef CONFIG_TLS_DEVICE +#include <net/tls.h> + +struct fun_ktls_tx_ctx { + __be64 tlsid; + u32 next_seq; +}; + +int fun_ktls_init(struct net_device *netdev); +void fun_ktls_cleanup(struct funeth_priv *fp); + +#else + +static inline void fun_ktls_init(struct net_device *netdev) +{ +} + +static inline void fun_ktls_cleanup(struct funeth_priv *fp) +{ +} +#endif + +#endif /* _FUN_KTLS_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c new file mode 100644 index 000000000000..c58b10c216ef --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c @@ -0,0 +1,2091 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/bpf.h> +#include <linux/crash_dump.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/filter.h> +#include <linux/idr.h> +#include <linux/if_vlan.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/rtnetlink.h> +#include <linux/inetdevice.h> + +#include "funeth.h" +#include "funeth_devlink.h" +#include "funeth_ktls.h" +#include "fun_port.h" +#include "fun_queue.h" +#include "funeth_txrx.h" + +#define ADMIN_SQ_DEPTH 32 +#define ADMIN_CQ_DEPTH 64 +#define ADMIN_RQ_DEPTH 16 + +/* Default number of Tx/Rx queues. */ +#define FUN_DFLT_QUEUES 16U + +enum { + FUN_SERV_RES_CHANGE = FUN_SERV_FIRST_AVAIL, + FUN_SERV_DEL_PORTS, +}; + +static const struct pci_device_id funeth_id_table[] = { + { PCI_VDEVICE(FUNGIBLE, 0x0101) }, + { PCI_VDEVICE(FUNGIBLE, 0x0181) }, + { 0, } +}; + +/* Issue a port write admin command with @n key/value pairs. */ +static int fun_port_write_cmds(struct funeth_priv *fp, unsigned int n, + const int *keys, const u64 *data) +{ + unsigned int cmd_size, i; + union { + struct fun_admin_port_req req; + struct fun_admin_port_rsp rsp; + u8 v[ADMIN_SQE_SIZE]; + } cmd; + + cmd_size = offsetof(struct fun_admin_port_req, u.write.write48) + + n * sizeof(struct fun_admin_write48_req); + if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN) + return -EINVAL; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT, + cmd_size); + cmd.req.u.write = + FUN_ADMIN_PORT_WRITE_REQ_INIT(FUN_ADMIN_SUBOP_WRITE, 0, + fp->netdev->dev_port); + for (i = 0; i < n; i++) + cmd.req.u.write.write48[i] = + FUN_ADMIN_WRITE48_REQ_INIT(keys[i], data[i]); + + return fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, + &cmd.rsp, cmd_size, 0); +} + +int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data) +{ + return fun_port_write_cmds(fp, 1, &key, &data); +} + +/* Issue a port read admin command with @n key/value pairs. */ +static int fun_port_read_cmds(struct funeth_priv *fp, unsigned int n, + const int *keys, u64 *data) +{ + const struct fun_admin_read48_rsp *r48rsp; + unsigned int cmd_size, i; + int rc; + union { + struct fun_admin_port_req req; + struct fun_admin_port_rsp rsp; + u8 v[ADMIN_SQE_SIZE]; + } cmd; + + cmd_size = offsetof(struct fun_admin_port_req, u.read.read48) + + n * sizeof(struct fun_admin_read48_req); + if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN) + return -EINVAL; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT, + cmd_size); + cmd.req.u.read = + FUN_ADMIN_PORT_READ_REQ_INIT(FUN_ADMIN_SUBOP_READ, 0, + fp->netdev->dev_port); + for (i = 0; i < n; i++) + cmd.req.u.read.read48[i] = FUN_ADMIN_READ48_REQ_INIT(keys[i]); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, + &cmd.rsp, cmd_size, 0); + if (rc) + return rc; + + for (r48rsp = cmd.rsp.u.read.read48, i = 0; i < n; i++, r48rsp++) { + data[i] = FUN_ADMIN_READ48_RSP_DATA_G(r48rsp->key_to_data); + dev_dbg(fp->fdev->dev, + "port_read_rsp lport=%u (key_to_data=0x%llx) key=%d data:%lld retval:%lld", + fp->lport, r48rsp->key_to_data, keys[i], data[i], + FUN_ADMIN_READ48_RSP_RET_G(r48rsp->key_to_data)); + } + return 0; +} + +int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data) +{ + return fun_port_read_cmds(fp, 1, &key, data); +} + +static void fun_report_link(struct net_device *netdev) +{ + if (netif_carrier_ok(netdev)) { + const struct funeth_priv *fp = netdev_priv(netdev); + const char *fec = "", *pause = ""; + int speed = fp->link_speed; + char unit = 'M'; + + if (fp->link_speed >= SPEED_1000) { + speed /= 1000; + unit = 'G'; + } + + if (fp->active_fec & FUN_PORT_FEC_RS) + fec = ", RS-FEC"; + else if (fp->active_fec & FUN_PORT_FEC_FC) + fec = ", BASER-FEC"; + + if ((fp->active_fc & FUN_PORT_CAP_PAUSE_MASK) == FUN_PORT_CAP_PAUSE_MASK) + pause = ", Tx/Rx PAUSE"; + else if (fp->active_fc & FUN_PORT_CAP_RX_PAUSE) + pause = ", Rx PAUSE"; + else if (fp->active_fc & FUN_PORT_CAP_TX_PAUSE) + pause = ", Tx PAUSE"; + + netdev_info(netdev, "Link up at %d %cb/s full-duplex%s%s\n", + speed, unit, pause, fec); + } else { + netdev_info(netdev, "Link down\n"); + } +} + +static int fun_adi_write(struct fun_dev *fdev, enum fun_admin_adi_attr attr, + unsigned int adi_id, const struct fun_adi_param *param) +{ + struct fun_admin_adi_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ADI, + sizeof(req)), + .u.write.subop = FUN_ADMIN_SUBOP_WRITE, + .u.write.attribute = attr, + .u.write.id = cpu_to_be32(adi_id), + .u.write.param = *param + }; + + return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0); +} + +/* Configure RSS for the given port. @op determines whether a new RSS context + * is to be created or whether an existing one should be reconfigured. The + * remaining parameters specify the hashing algorithm, key, and indirection + * table. + * + * This initiates packet delivery to the Rx queues set in the indirection + * table. + */ +int fun_config_rss(struct net_device *dev, int algo, const u8 *key, + const u32 *qtable, u8 op) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int table_len = fp->indir_table_nentries; + unsigned int len = FUN_ETH_RSS_MAX_KEY_SIZE + sizeof(u32) * table_len; + struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs); + union { + struct { + struct fun_admin_rss_req req; + struct fun_dataop_gl gl; + }; + struct fun_admin_generic_create_rsp rsp; + } cmd; + __be32 *indir_tab; + u16 flags; + int rc; + + if (op != FUN_ADMIN_SUBOP_CREATE && fp->rss_hw_id == FUN_HCI_ID_INVALID) + return -EINVAL; + + flags = op == FUN_ADMIN_SUBOP_CREATE ? + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR : 0; + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_RSS, + sizeof(cmd)); + cmd.req.u.create = + FUN_ADMIN_RSS_CREATE_REQ_INIT(op, flags, fp->rss_hw_id, + dev->dev_port, algo, + FUN_ETH_RSS_MAX_KEY_SIZE, + table_len, 0, + FUN_ETH_RSS_MAX_KEY_SIZE); + cmd.req.u.create.dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len); + fun_dataop_gl_init(&cmd.gl, 0, 0, len, fp->rss_dma_addr); + + /* write the key and indirection table into the RSS DMA area */ + memcpy(fp->rss_cfg, key, FUN_ETH_RSS_MAX_KEY_SIZE); + indir_tab = fp->rss_cfg + FUN_ETH_RSS_MAX_KEY_SIZE; + for (rc = 0; rc < table_len; rc++) + *indir_tab++ = cpu_to_be32(rxqs[*qtable++]->hw_cqid); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, + &cmd.rsp, sizeof(cmd.rsp), 0); + if (!rc && op == FUN_ADMIN_SUBOP_CREATE) + fp->rss_hw_id = be32_to_cpu(cmd.rsp.id); + return rc; +} + +/* Destroy the HW RSS conntext associated with the given port. This also stops + * all packet delivery to our Rx queues. + */ +static void fun_destroy_rss(struct funeth_priv *fp) +{ + if (fp->rss_hw_id != FUN_HCI_ID_INVALID) { + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_RSS, 0, fp->rss_hw_id); + fp->rss_hw_id = FUN_HCI_ID_INVALID; + } +} + +static void fun_irq_aff_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct fun_irq *p = container_of(notify, struct fun_irq, aff_notify); + + cpumask_copy(&p->affinity_mask, mask); +} + +static void fun_irq_aff_release(struct kref __always_unused *ref) +{ +} + +/* Allocate an IRQ structure, assign an MSI-X index and initial affinity to it, + * and add it to the IRQ XArray. + */ +static struct fun_irq *fun_alloc_qirq(struct funeth_priv *fp, unsigned int idx, + int node, unsigned int xa_idx_offset) +{ + struct fun_irq *irq; + int cpu, res; + + cpu = cpumask_local_spread(idx, node); + node = local_memory_node(cpu_to_node(cpu)); + + irq = kzalloc_node(sizeof(*irq), GFP_KERNEL, node); + if (!irq) + return ERR_PTR(-ENOMEM); + + res = fun_reserve_irqs(fp->fdev, 1, &irq->irq_idx); + if (res != 1) + goto free_irq; + + res = xa_insert(&fp->irqs, idx + xa_idx_offset, irq, GFP_KERNEL); + if (res) + goto release_irq; + + irq->irq = pci_irq_vector(fp->pdev, irq->irq_idx); + cpumask_set_cpu(cpu, &irq->affinity_mask); + irq->aff_notify.notify = fun_irq_aff_notify; + irq->aff_notify.release = fun_irq_aff_release; + irq->state = FUN_IRQ_INIT; + return irq; + +release_irq: + fun_release_irqs(fp->fdev, 1, &irq->irq_idx); +free_irq: + kfree(irq); + return ERR_PTR(res); +} + +static void fun_free_qirq(struct funeth_priv *fp, struct fun_irq *irq) +{ + netif_napi_del(&irq->napi); + fun_release_irqs(fp->fdev, 1, &irq->irq_idx); + kfree(irq); +} + +/* Release the IRQs reserved for Tx/Rx queues that aren't being used. */ +static void fun_prune_queue_irqs(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int nreleased = 0; + struct fun_irq *irq; + unsigned long idx; + + xa_for_each(&fp->irqs, idx, irq) { + if (irq->txq || irq->rxq) /* skip those in use */ + continue; + + xa_erase(&fp->irqs, idx); + fun_free_qirq(fp, irq); + nreleased++; + if (idx < fp->rx_irq_ofst) + fp->num_tx_irqs--; + else + fp->num_rx_irqs--; + } + netif_info(fp, intr, dev, "Released %u queue IRQs\n", nreleased); +} + +/* Reserve IRQs, one per queue, to acommodate the requested queue numbers @ntx + * and @nrx. IRQs are added incrementally to those we already have. + * We hold on to allocated IRQs until garbage collection of unused IRQs is + * separately requested. + */ +static int fun_alloc_queue_irqs(struct net_device *dev, unsigned int ntx, + unsigned int nrx) +{ + struct funeth_priv *fp = netdev_priv(dev); + int node = dev_to_node(&fp->pdev->dev); + struct fun_irq *irq; + unsigned int i; + + for (i = fp->num_tx_irqs; i < ntx; i++) { + irq = fun_alloc_qirq(fp, i, node, 0); + if (IS_ERR(irq)) + return PTR_ERR(irq); + + fp->num_tx_irqs++; + netif_tx_napi_add(dev, &irq->napi, fun_txq_napi_poll, + NAPI_POLL_WEIGHT); + } + + for (i = fp->num_rx_irqs; i < nrx; i++) { + irq = fun_alloc_qirq(fp, i, node, fp->rx_irq_ofst); + if (IS_ERR(irq)) + return PTR_ERR(irq); + + fp->num_rx_irqs++; + netif_napi_add(dev, &irq->napi, fun_rxq_napi_poll, + NAPI_POLL_WEIGHT); + } + + netif_info(fp, intr, dev, "Reserved %u/%u IRQs for Tx/Rx queues\n", + ntx, nrx); + return 0; +} + +static void free_txqs(struct funeth_txq **txqs, unsigned int nqs, + unsigned int start, int state) +{ + unsigned int i; + + for (i = start; i < nqs && txqs[i]; i++) + txqs[i] = funeth_txq_free(txqs[i], state); +} + +static int alloc_txqs(struct net_device *dev, struct funeth_txq **txqs, + unsigned int nqs, unsigned int depth, unsigned int start, + int state) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int i; + int err; + + for (i = start; i < nqs; i++) { + err = funeth_txq_create(dev, i, depth, xa_load(&fp->irqs, i), + state, &txqs[i]); + if (err) { + free_txqs(txqs, nqs, start, FUN_QSTATE_DESTROYED); + return err; + } + } + return 0; +} + +static void free_rxqs(struct funeth_rxq **rxqs, unsigned int nqs, + unsigned int start, int state) +{ + unsigned int i; + + for (i = start; i < nqs && rxqs[i]; i++) + rxqs[i] = funeth_rxq_free(rxqs[i], state); +} + +static int alloc_rxqs(struct net_device *dev, struct funeth_rxq **rxqs, + unsigned int nqs, unsigned int ncqe, unsigned int nrqe, + unsigned int start, int state) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int i; + int err; + + for (i = start; i < nqs; i++) { + err = funeth_rxq_create(dev, i, ncqe, nrqe, + xa_load(&fp->irqs, i + fp->rx_irq_ofst), + state, &rxqs[i]); + if (err) { + free_rxqs(rxqs, nqs, start, FUN_QSTATE_DESTROYED); + return err; + } + } + return 0; +} + +static void free_xdpqs(struct funeth_txq **xdpqs, unsigned int nqs, + unsigned int start, int state) +{ + unsigned int i; + + for (i = start; i < nqs && xdpqs[i]; i++) + xdpqs[i] = funeth_txq_free(xdpqs[i], state); + + if (state == FUN_QSTATE_DESTROYED) + kfree(xdpqs); +} + +static struct funeth_txq **alloc_xdpqs(struct net_device *dev, unsigned int nqs, + unsigned int depth, unsigned int start, + int state) +{ + struct funeth_txq **xdpqs; + unsigned int i; + int err; + + xdpqs = kcalloc(nqs, sizeof(*xdpqs), GFP_KERNEL); + if (!xdpqs) + return ERR_PTR(-ENOMEM); + + for (i = start; i < nqs; i++) { + err = funeth_txq_create(dev, i, depth, NULL, state, &xdpqs[i]); + if (err) { + free_xdpqs(xdpqs, nqs, start, FUN_QSTATE_DESTROYED); + return ERR_PTR(err); + } + } + return xdpqs; +} + +static void fun_free_rings(struct net_device *netdev, struct fun_qset *qset) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_txq **xdpqs = qset->xdpqs; + struct funeth_rxq **rxqs = qset->rxqs; + + /* qset may not specify any queues to operate on. In that case the + * currently installed queues are implied. + */ + if (!rxqs) { + rxqs = rtnl_dereference(fp->rxqs); + xdpqs = rtnl_dereference(fp->xdpqs); + qset->txqs = fp->txqs; + qset->nrxqs = netdev->real_num_rx_queues; + qset->ntxqs = netdev->real_num_tx_queues; + qset->nxdpqs = fp->num_xdpqs; + } + if (!rxqs) + return; + + if (rxqs == rtnl_dereference(fp->rxqs)) { + rcu_assign_pointer(fp->rxqs, NULL); + rcu_assign_pointer(fp->xdpqs, NULL); + synchronize_net(); + fp->txqs = NULL; + } + + free_rxqs(rxqs, qset->nrxqs, qset->rxq_start, qset->state); + free_txqs(qset->txqs, qset->ntxqs, qset->txq_start, qset->state); + free_xdpqs(xdpqs, qset->nxdpqs, qset->xdpq_start, qset->state); + if (qset->state == FUN_QSTATE_DESTROYED) + kfree(rxqs); + + /* Tell the caller which queues were operated on. */ + qset->rxqs = rxqs; + qset->xdpqs = xdpqs; +} + +static int fun_alloc_rings(struct net_device *netdev, struct fun_qset *qset) +{ + struct funeth_txq **xdpqs = NULL, **txqs; + struct funeth_rxq **rxqs; + int err; + + err = fun_alloc_queue_irqs(netdev, qset->ntxqs, qset->nrxqs); + if (err) + return err; + + rxqs = kcalloc(qset->ntxqs + qset->nrxqs, sizeof(*rxqs), GFP_KERNEL); + if (!rxqs) + return -ENOMEM; + + if (qset->nxdpqs) { + xdpqs = alloc_xdpqs(netdev, qset->nxdpqs, qset->sq_depth, + qset->xdpq_start, qset->state); + if (IS_ERR(xdpqs)) { + err = PTR_ERR(xdpqs); + goto free_qvec; + } + } + + txqs = (struct funeth_txq **)&rxqs[qset->nrxqs]; + err = alloc_txqs(netdev, txqs, qset->ntxqs, qset->sq_depth, + qset->txq_start, qset->state); + if (err) + goto free_xdpqs; + + err = alloc_rxqs(netdev, rxqs, qset->nrxqs, qset->cq_depth, + qset->rq_depth, qset->rxq_start, qset->state); + if (err) + goto free_txqs; + + qset->rxqs = rxqs; + qset->txqs = txqs; + qset->xdpqs = xdpqs; + return 0; + +free_txqs: + free_txqs(txqs, qset->ntxqs, qset->txq_start, FUN_QSTATE_DESTROYED); +free_xdpqs: + free_xdpqs(xdpqs, qset->nxdpqs, qset->xdpq_start, FUN_QSTATE_DESTROYED); +free_qvec: + kfree(rxqs); + return err; +} + +/* Take queues to the next level. Presently this means creating them on the + * device. + */ +static int fun_advance_ring_state(struct net_device *dev, struct fun_qset *qset) +{ + struct funeth_priv *fp = netdev_priv(dev); + int i, err; + + for (i = 0; i < qset->nrxqs; i++) { + err = fun_rxq_create_dev(qset->rxqs[i], + xa_load(&fp->irqs, + i + fp->rx_irq_ofst)); + if (err) + goto out; + } + + for (i = 0; i < qset->ntxqs; i++) { + err = fun_txq_create_dev(qset->txqs[i], xa_load(&fp->irqs, i)); + if (err) + goto out; + } + + for (i = 0; i < qset->nxdpqs; i++) { + err = fun_txq_create_dev(qset->xdpqs[i], NULL); + if (err) + goto out; + } + + return 0; + +out: + fun_free_rings(dev, qset); + return err; +} + +static int fun_port_create(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + union { + struct fun_admin_port_req req; + struct fun_admin_port_rsp rsp; + } cmd; + int rc; + + if (fp->lport != INVALID_LPORT) + return 0; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT, + sizeof(cmd.req)); + cmd.req.u.create = + FUN_ADMIN_PORT_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, 0, + netdev->dev_port); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp, + sizeof(cmd.rsp), 0); + + if (!rc) + fp->lport = be16_to_cpu(cmd.rsp.u.create.lport); + return rc; +} + +static int fun_port_destroy(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + if (fp->lport == INVALID_LPORT) + return 0; + + fp->lport = INVALID_LPORT; + return fun_res_destroy(fp->fdev, FUN_ADMIN_OP_PORT, 0, + netdev->dev_port); +} + +static int fun_eth_create(struct funeth_priv *fp) +{ + union { + struct fun_admin_eth_req req; + struct fun_admin_generic_create_rsp rsp; + } cmd; + int rc; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ETH, + sizeof(cmd.req)); + cmd.req.u.create = FUN_ADMIN_ETH_CREATE_REQ_INIT( + FUN_ADMIN_SUBOP_CREATE, + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR, + 0, fp->netdev->dev_port); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp, + sizeof(cmd.rsp), 0); + return rc ? rc : be32_to_cpu(cmd.rsp.id); +} + +static int fun_vi_create(struct funeth_priv *fp) +{ + struct fun_admin_vi_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_VI, + sizeof(req)), + .u.create = FUN_ADMIN_VI_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, + 0, + fp->netdev->dev_port, + fp->netdev->dev_port) + }; + + return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); +} + +/* Helper to create an ETH flow and bind an SQ to it. + * Returns the ETH id (>= 0) on success or a negative error. + */ +int fun_create_and_bind_tx(struct funeth_priv *fp, u32 sqid) +{ + int rc, ethid; + + ethid = fun_eth_create(fp); + if (ethid >= 0) { + rc = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_EPSQ, sqid, + FUN_ADMIN_BIND_TYPE_ETH, ethid); + if (rc) { + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, ethid); + ethid = rc; + } + } + return ethid; +} + +static irqreturn_t fun_queue_irq_handler(int irq, void *data) +{ + struct fun_irq *p = data; + + if (p->rxq) { + prefetch(p->rxq->next_cqe_info); + p->rxq->irq_cnt++; + } + napi_schedule_irqoff(&p->napi); + return IRQ_HANDLED; +} + +static int fun_enable_irqs(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned long idx, last; + unsigned int qidx; + struct fun_irq *p; + const char *qtype; + int err; + + xa_for_each(&fp->irqs, idx, p) { + if (p->txq) { + qtype = "tx"; + qidx = p->txq->qidx; + } else if (p->rxq) { + qtype = "rx"; + qidx = p->rxq->qidx; + } else { + continue; + } + + if (p->state != FUN_IRQ_INIT) + continue; + + snprintf(p->name, sizeof(p->name) - 1, "%s-%s-%u", dev->name, + qtype, qidx); + err = request_irq(p->irq, fun_queue_irq_handler, 0, p->name, p); + if (err) { + netdev_err(dev, "Failed to allocate IRQ %u, err %d\n", + p->irq, err); + goto unroll; + } + p->state = FUN_IRQ_REQUESTED; + } + + xa_for_each(&fp->irqs, idx, p) { + if (p->state != FUN_IRQ_REQUESTED) + continue; + irq_set_affinity_notifier(p->irq, &p->aff_notify); + irq_set_affinity_and_hint(p->irq, &p->affinity_mask); + napi_enable(&p->napi); + p->state = FUN_IRQ_ENABLED; + } + + return 0; + +unroll: + last = idx - 1; + xa_for_each_range(&fp->irqs, idx, p, 0, last) + if (p->state == FUN_IRQ_REQUESTED) { + free_irq(p->irq, p); + p->state = FUN_IRQ_INIT; + } + + return err; +} + +static void fun_disable_one_irq(struct fun_irq *irq) +{ + napi_disable(&irq->napi); + irq_set_affinity_notifier(irq->irq, NULL); + irq_update_affinity_hint(irq->irq, NULL); + free_irq(irq->irq, irq); + irq->state = FUN_IRQ_INIT; +} + +static void fun_disable_irqs(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_irq *p; + unsigned long idx; + + xa_for_each(&fp->irqs, idx, p) + if (p->state == FUN_IRQ_ENABLED) + fun_disable_one_irq(p); +} + +static void fun_down(struct net_device *dev, struct fun_qset *qset) +{ + struct funeth_priv *fp = netdev_priv(dev); + + /* If we don't have queues the data path is already down. + * Note netif_running(dev) may be true. + */ + if (!rcu_access_pointer(fp->rxqs)) + return; + + /* It is also down if the queues aren't on the device. */ + if (fp->txqs[0]->init_state >= FUN_QSTATE_INIT_FULL) { + netif_info(fp, ifdown, dev, + "Tearing down data path on device\n"); + fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_DISABLE, 0); + + netif_carrier_off(dev); + netif_tx_disable(dev); + + fun_destroy_rss(fp); + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, dev->dev_port); + fun_disable_irqs(dev); + } + + fun_free_rings(dev, qset); +} + +static int fun_up(struct net_device *dev, struct fun_qset *qset) +{ + static const int port_keys[] = { + FUN_ADMIN_PORT_KEY_STATS_DMA_LOW, + FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH, + FUN_ADMIN_PORT_KEY_ENABLE + }; + + struct funeth_priv *fp = netdev_priv(dev); + u64 vals[] = { + lower_32_bits(fp->stats_dma_addr), + upper_32_bits(fp->stats_dma_addr), + FUN_PORT_FLAG_ENABLE_NOTIFY + }; + int err; + + netif_info(fp, ifup, dev, "Setting up data path on device\n"); + + if (qset->rxqs[0]->init_state < FUN_QSTATE_INIT_FULL) { + err = fun_advance_ring_state(dev, qset); + if (err) + return err; + } + + err = fun_vi_create(fp); + if (err) + goto free_queues; + + fp->txqs = qset->txqs; + rcu_assign_pointer(fp->rxqs, qset->rxqs); + rcu_assign_pointer(fp->xdpqs, qset->xdpqs); + + err = fun_enable_irqs(dev); + if (err) + goto destroy_vi; + + if (fp->rss_cfg) { + err = fun_config_rss(dev, fp->hash_algo, fp->rss_key, + fp->indir_table, FUN_ADMIN_SUBOP_CREATE); + } else { + /* The non-RSS case has only 1 queue. */ + err = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_VI, dev->dev_port, + FUN_ADMIN_BIND_TYPE_EPCQ, + qset->rxqs[0]->hw_cqid); + } + if (err) + goto disable_irqs; + + err = fun_port_write_cmds(fp, 3, port_keys, vals); + if (err) + goto free_rss; + + netif_tx_start_all_queues(dev); + return 0; + +free_rss: + fun_destroy_rss(fp); +disable_irqs: + fun_disable_irqs(dev); +destroy_vi: + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, dev->dev_port); +free_queues: + fun_free_rings(dev, qset); + return err; +} + +static int funeth_open(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_qset qset = { + .nrxqs = netdev->real_num_rx_queues, + .ntxqs = netdev->real_num_tx_queues, + .nxdpqs = fp->num_xdpqs, + .cq_depth = fp->cq_depth, + .rq_depth = fp->rq_depth, + .sq_depth = fp->sq_depth, + .state = FUN_QSTATE_INIT_FULL, + }; + int rc; + + rc = fun_alloc_rings(netdev, &qset); + if (rc) + return rc; + + rc = fun_up(netdev, &qset); + if (rc) { + qset.state = FUN_QSTATE_DESTROYED; + fun_free_rings(netdev, &qset); + } + + return rc; +} + +static int funeth_close(struct net_device *netdev) +{ + struct fun_qset qset = { .state = FUN_QSTATE_DESTROYED }; + + fun_down(netdev, &qset); + return 0; +} + +static void fun_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + unsigned int i, start; + + stats->tx_packets = fp->tx_packets; + stats->tx_bytes = fp->tx_bytes; + stats->tx_dropped = fp->tx_dropped; + + stats->rx_packets = fp->rx_packets; + stats->rx_bytes = fp->rx_bytes; + stats->rx_dropped = fp->rx_dropped; + + rcu_read_lock(); + rxqs = rcu_dereference(fp->rxqs); + if (!rxqs) + goto unlock; + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + struct funeth_txq_stats txs; + + FUN_QSTAT_READ(fp->txqs[i], start, txs); + stats->tx_packets += txs.tx_pkts; + stats->tx_bytes += txs.tx_bytes; + stats->tx_dropped += txs.tx_map_err; + } + + for (i = 0; i < netdev->real_num_rx_queues; i++) { + struct funeth_rxq_stats rxs; + + FUN_QSTAT_READ(rxqs[i], start, rxs); + stats->rx_packets += rxs.rx_pkts; + stats->rx_bytes += rxs.rx_bytes; + stats->rx_dropped += rxs.rx_map_err + rxs.rx_mem_drops; + } + + xdpqs = rcu_dereference(fp->xdpqs); + if (!xdpqs) + goto unlock; + + for (i = 0; i < fp->num_xdpqs; i++) { + struct funeth_txq_stats txs; + + FUN_QSTAT_READ(xdpqs[i], start, txs); + stats->tx_packets += txs.tx_pkts; + stats->tx_bytes += txs.tx_bytes; + } +unlock: + rcu_read_unlock(); +} + +static int fun_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct funeth_priv *fp = netdev_priv(netdev); + int rc; + + rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MTU, new_mtu); + if (!rc) + netdev->mtu = new_mtu; + return rc; +} + +static int fun_set_macaddr(struct net_device *netdev, void *addr) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct sockaddr *saddr = addr; + int rc; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + if (ether_addr_equal(netdev->dev_addr, saddr->sa_data)) + return 0; + + rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR, + ether_addr_to_u64(saddr->sa_data)); + if (!rc) + eth_hw_addr_set(netdev, saddr->sa_data); + return rc; +} + +static int fun_get_port_attributes(struct net_device *netdev) +{ + static const int keys[] = { + FUN_ADMIN_PORT_KEY_MACADDR, FUN_ADMIN_PORT_KEY_CAPABILITIES, + FUN_ADMIN_PORT_KEY_ADVERT, FUN_ADMIN_PORT_KEY_MTU + }; + static const int phys_keys[] = { + FUN_ADMIN_PORT_KEY_LANE_ATTRS, + }; + + struct funeth_priv *fp = netdev_priv(netdev); + u64 data[ARRAY_SIZE(keys)]; + u8 mac[ETH_ALEN]; + int i, rc; + + rc = fun_port_read_cmds(fp, ARRAY_SIZE(keys), keys, data); + if (rc) + return rc; + + for (i = 0; i < ARRAY_SIZE(keys); i++) { + switch (keys[i]) { + case FUN_ADMIN_PORT_KEY_MACADDR: + u64_to_ether_addr(data[i], mac); + if (is_zero_ether_addr(mac)) { + eth_hw_addr_random(netdev); + } else if (is_valid_ether_addr(mac)) { + eth_hw_addr_set(netdev, mac); + } else { + netdev_err(netdev, + "device provided a bad MAC address %pM\n", + mac); + return -EINVAL; + } + break; + + case FUN_ADMIN_PORT_KEY_CAPABILITIES: + fp->port_caps = data[i]; + break; + + case FUN_ADMIN_PORT_KEY_ADVERT: + fp->advertising = data[i]; + break; + + case FUN_ADMIN_PORT_KEY_MTU: + netdev->mtu = data[i]; + break; + } + } + + if (!(fp->port_caps & FUN_PORT_CAP_VPORT)) { + rc = fun_port_read_cmds(fp, ARRAY_SIZE(phys_keys), phys_keys, + data); + if (rc) + return rc; + + fp->lane_attrs = data[0]; + } + + if (netdev->addr_assign_type == NET_ADDR_RANDOM) + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR, + ether_addr_to_u64(netdev->dev_addr)); + return 0; +} + +static int fun_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +{ + const struct funeth_priv *fp = netdev_priv(dev); + + return copy_to_user(ifr->ifr_data, &fp->hwtstamp_cfg, + sizeof(fp->hwtstamp_cfg)) ? -EFAULT : 0; +} + +static int fun_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct hwtstamp_config cfg; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + /* no TX HW timestamps */ + cfg.tx_type = HWTSTAMP_TX_OFF; + + switch (cfg.rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + cfg.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + fp->hwtstamp_cfg = cfg; + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; +} + +static int fun_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCSHWTSTAMP: + return fun_hwtstamp_set(dev, ifr); + case SIOCGHWTSTAMP: + return fun_hwtstamp_get(dev, ifr); + default: + return -EOPNOTSUPP; + } +} + +/* Prepare the queues for XDP. */ +static int fun_enter_xdp(struct net_device *dev, struct bpf_prog *prog) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int i, nqs = num_online_cpus(); + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + int err; + + xdpqs = alloc_xdpqs(dev, nqs, fp->sq_depth, 0, FUN_QSTATE_INIT_FULL); + if (IS_ERR(xdpqs)) + return PTR_ERR(xdpqs); + + rxqs = rtnl_dereference(fp->rxqs); + for (i = 0; i < dev->real_num_rx_queues; i++) { + err = fun_rxq_set_bpf(rxqs[i], prog); + if (err) + goto out; + } + + fp->num_xdpqs = nqs; + rcu_assign_pointer(fp->xdpqs, xdpqs); + return 0; +out: + while (i--) + fun_rxq_set_bpf(rxqs[i], NULL); + + free_xdpqs(xdpqs, nqs, 0, FUN_QSTATE_DESTROYED); + return err; +} + +/* Set the queues for non-XDP operation. */ +static void fun_end_xdp(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + unsigned int i; + + xdpqs = rtnl_dereference(fp->xdpqs); + rcu_assign_pointer(fp->xdpqs, NULL); + synchronize_net(); + /* at this point both Rx and Tx XDP processing has ended */ + + free_xdpqs(xdpqs, fp->num_xdpqs, 0, FUN_QSTATE_DESTROYED); + fp->num_xdpqs = 0; + + rxqs = rtnl_dereference(fp->rxqs); + for (i = 0; i < dev->real_num_rx_queues; i++) + fun_rxq_set_bpf(rxqs[i], NULL); +} + +#define XDP_MAX_MTU \ + (PAGE_SIZE - FUN_XDP_HEADROOM - VLAN_ETH_HLEN - FUN_RX_TAILROOM) + +static int fun_xdp_setup(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct bpf_prog *old_prog, *prog = xdp->prog; + struct funeth_priv *fp = netdev_priv(dev); + int i, err; + + /* XDP uses at most one buffer */ + if (prog && dev->mtu > XDP_MAX_MTU) { + netdev_err(dev, "device MTU %u too large for XDP\n", dev->mtu); + NL_SET_ERR_MSG_MOD(xdp->extack, + "Device MTU too large for XDP"); + return -EINVAL; + } + + if (!netif_running(dev)) { + fp->num_xdpqs = prog ? num_online_cpus() : 0; + } else if (prog && !fp->xdp_prog) { + err = fun_enter_xdp(dev, prog); + if (err) { + NL_SET_ERR_MSG_MOD(xdp->extack, + "Failed to set queues for XDP."); + return err; + } + } else if (!prog && fp->xdp_prog) { + fun_end_xdp(dev); + } else { + struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs); + + for (i = 0; i < dev->real_num_rx_queues; i++) + WRITE_ONCE(rxqs[i]->xdp_prog, prog); + } + + dev->max_mtu = prog ? XDP_MAX_MTU : FUN_MAX_MTU; + old_prog = xchg(&fp->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static int fun_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return fun_xdp_setup(dev, xdp); + default: + return -EINVAL; + } +} + +static struct devlink_port *fun_get_devlink_port(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + return &fp->dl_port; +} + +static int fun_init_vports(struct fun_ethdev *ed, unsigned int n) +{ + if (ed->num_vports) + return -EINVAL; + + ed->vport_info = kvcalloc(n, sizeof(*ed->vport_info), GFP_KERNEL); + if (!ed->vport_info) + return -ENOMEM; + ed->num_vports = n; + return 0; +} + +static void fun_free_vports(struct fun_ethdev *ed) +{ + kvfree(ed->vport_info); + ed->vport_info = NULL; + ed->num_vports = 0; +} + +static struct fun_vport_info *fun_get_vport(struct fun_ethdev *ed, + unsigned int vport) +{ + if (!ed->vport_info || vport >= ed->num_vports) + return NULL; + + return ed->vport_info + vport; +} + +static int fun_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_adi_param mac_param = {}; + struct fun_dev *fdev = fp->fdev; + struct fun_ethdev *ed = to_fun_ethdev(fdev); + struct fun_vport_info *vi; + int rc = -EINVAL; + + if (is_multicast_ether_addr(mac)) + return -EINVAL; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + mac_param.u.mac = FUN_ADI_MAC_INIT(ether_addr_to_u64(mac)); + rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_MACADDR, vf + 1, + &mac_param); + if (!rc) + ether_addr_copy(vi->mac, mac); +unlock: + mutex_unlock(&ed->state_mutex); + return rc; +} + +static int fun_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_adi_param vlan_param = {}; + struct fun_dev *fdev = fp->fdev; + struct fun_ethdev *ed = to_fun_ethdev(fdev); + struct fun_vport_info *vi; + int rc = -EINVAL; + + if (vlan > 4095 || qos > 7) + return -EINVAL; + if (vlan_proto && vlan_proto != htons(ETH_P_8021Q) && + vlan_proto != htons(ETH_P_8021AD)) + return -EINVAL; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + vlan_param.u.vlan = FUN_ADI_VLAN_INIT(be16_to_cpu(vlan_proto), + ((u16)qos << VLAN_PRIO_SHIFT) | vlan); + rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_VLAN, vf + 1, &vlan_param); + if (!rc) { + vi->vlan = vlan; + vi->qos = qos; + vi->vlan_proto = vlan_proto; + } +unlock: + mutex_unlock(&ed->state_mutex); + return rc; +} + +static int fun_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_adi_param rate_param = {}; + struct fun_dev *fdev = fp->fdev; + struct fun_ethdev *ed = to_fun_ethdev(fdev); + struct fun_vport_info *vi; + int rc = -EINVAL; + + if (min_tx_rate) + return -EINVAL; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + rate_param.u.rate = FUN_ADI_RATE_INIT(max_tx_rate); + rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_RATE, vf + 1, &rate_param); + if (!rc) + vi->max_rate = max_tx_rate; +unlock: + mutex_unlock(&ed->state_mutex); + return rc; +} + +static int fun_get_vf_config(struct net_device *dev, int vf, + struct ifla_vf_info *ivi) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_ethdev *ed = to_fun_ethdev(fp->fdev); + const struct fun_vport_info *vi; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vf; + ether_addr_copy(ivi->mac, vi->mac); + ivi->vlan = vi->vlan; + ivi->qos = vi->qos; + ivi->vlan_proto = vi->vlan_proto; + ivi->max_tx_rate = vi->max_rate; + ivi->spoofchk = vi->spoofchk; +unlock: + mutex_unlock(&ed->state_mutex); + return vi ? 0 : -EINVAL; +} + +static void fun_uninit(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + + fun_prune_queue_irqs(dev); + xa_destroy(&fp->irqs); +} + +static const struct net_device_ops fun_netdev_ops = { + .ndo_open = funeth_open, + .ndo_stop = funeth_close, + .ndo_start_xmit = fun_start_xmit, + .ndo_get_stats64 = fun_get_stats64, + .ndo_change_mtu = fun_change_mtu, + .ndo_set_mac_address = fun_set_macaddr, + .ndo_validate_addr = eth_validate_addr, + .ndo_eth_ioctl = fun_ioctl, + .ndo_uninit = fun_uninit, + .ndo_bpf = fun_xdp, + .ndo_xdp_xmit = fun_xdp_xmit_frames, + .ndo_set_vf_mac = fun_set_vf_mac, + .ndo_set_vf_vlan = fun_set_vf_vlan, + .ndo_set_vf_rate = fun_set_vf_rate, + .ndo_get_vf_config = fun_get_vf_config, + .ndo_get_devlink_port = fun_get_devlink_port, +}; + +#define GSO_ENCAP_FLAGS (NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | \ + NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | \ + NETIF_F_GSO_UDP_TUNNEL_CSUM) +#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) +#define VLAN_FEAT (NETIF_F_SG | NETIF_F_HW_CSUM | TSO_FLAGS | \ + GSO_ENCAP_FLAGS | NETIF_F_HIGHDMA) + +static void fun_dflt_rss_indir(struct funeth_priv *fp, unsigned int nrx) +{ + unsigned int i; + + for (i = 0; i < fp->indir_table_nentries; i++) + fp->indir_table[i] = ethtool_rxfh_indir_default(i, nrx); +} + +/* Reset the RSS indirection table to equal distribution across the current + * number of Rx queues. Called at init time and whenever the number of Rx + * queues changes subsequently. Note that this may also resize the indirection + * table. + */ +static void fun_reset_rss_indir(struct net_device *dev, unsigned int nrx) +{ + struct funeth_priv *fp = netdev_priv(dev); + + if (!fp->rss_cfg) + return; + + /* Set the table size to the max possible that allows an equal number + * of occurrences of each CQ. + */ + fp->indir_table_nentries = rounddown(FUN_ETH_RSS_MAX_INDIR_ENT, nrx); + fun_dflt_rss_indir(fp, nrx); +} + +/* Update the RSS LUT to contain only queues in [0, nrx). Normally this will + * update the LUT to an equal distribution among nrx queues, If @only_if_needed + * is set the LUT is left unchanged if it already does not reference any queues + * >= nrx. + */ +static int fun_rss_set_qnum(struct net_device *dev, unsigned int nrx, + bool only_if_needed) +{ + struct funeth_priv *fp = netdev_priv(dev); + u32 old_lut[FUN_ETH_RSS_MAX_INDIR_ENT]; + unsigned int i, oldsz; + int err; + + if (!fp->rss_cfg) + return 0; + + if (only_if_needed) { + for (i = 0; i < fp->indir_table_nentries; i++) + if (fp->indir_table[i] >= nrx) + break; + + if (i >= fp->indir_table_nentries) + return 0; + } + + memcpy(old_lut, fp->indir_table, sizeof(old_lut)); + oldsz = fp->indir_table_nentries; + fun_reset_rss_indir(dev, nrx); + + err = fun_config_rss(dev, fp->hash_algo, fp->rss_key, + fp->indir_table, FUN_ADMIN_SUBOP_MODIFY); + if (!err) + return 0; + + memcpy(fp->indir_table, old_lut, sizeof(old_lut)); + fp->indir_table_nentries = oldsz; + return err; +} + +/* Allocate the DMA area for the RSS configuration commands to the device, and + * initialize the hash, hash key, indirection table size and its entries to + * their defaults. The indirection table defaults to equal distribution across + * the Rx queues. + */ +static int fun_init_rss(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + size_t size = sizeof(fp->rss_key) + sizeof(fp->indir_table); + + fp->rss_hw_id = FUN_HCI_ID_INVALID; + if (!(fp->port_caps & FUN_PORT_CAP_OFFLOADS)) + return 0; + + fp->rss_cfg = dma_alloc_coherent(&fp->pdev->dev, size, + &fp->rss_dma_addr, GFP_KERNEL); + if (!fp->rss_cfg) + return -ENOMEM; + + fp->hash_algo = FUN_ETH_RSS_ALG_TOEPLITZ; + netdev_rss_key_fill(fp->rss_key, sizeof(fp->rss_key)); + fun_reset_rss_indir(dev, dev->real_num_rx_queues); + return 0; +} + +static void fun_free_rss(struct funeth_priv *fp) +{ + if (fp->rss_cfg) { + dma_free_coherent(&fp->pdev->dev, + sizeof(fp->rss_key) + sizeof(fp->indir_table), + fp->rss_cfg, fp->rss_dma_addr); + fp->rss_cfg = NULL; + } +} + +void fun_set_ring_count(struct net_device *netdev, unsigned int ntx, + unsigned int nrx) +{ + netif_set_real_num_tx_queues(netdev, ntx); + if (nrx != netdev->real_num_rx_queues) { + netif_set_real_num_rx_queues(netdev, nrx); + fun_reset_rss_indir(netdev, nrx); + } +} + +static int fun_init_stats_area(struct funeth_priv *fp) +{ + unsigned int nstats; + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return 0; + + nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX + + PORT_MAC_FEC_STATS_MAX; + + fp->stats = dma_alloc_coherent(&fp->pdev->dev, nstats * sizeof(u64), + &fp->stats_dma_addr, GFP_KERNEL); + if (!fp->stats) + return -ENOMEM; + return 0; +} + +static void fun_free_stats_area(struct funeth_priv *fp) +{ + unsigned int nstats; + + if (fp->stats) { + nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX; + dma_free_coherent(&fp->pdev->dev, nstats * sizeof(u64), + fp->stats, fp->stats_dma_addr); + fp->stats = NULL; + } +} + +static int fun_dl_port_register(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct devlink *dl = priv_to_devlink(fp->fdev); + struct devlink_port_attrs attrs = {}; + unsigned int idx; + + if (fp->port_caps & FUN_PORT_CAP_VPORT) { + attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL; + idx = fp->lport; + } else { + idx = netdev->dev_port; + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.lanes = fp->lane_attrs & 7; + if (fp->lane_attrs & FUN_PORT_LANE_SPLIT) { + attrs.split = 1; + attrs.phys.port_number = fp->lport & ~3; + attrs.phys.split_subport_number = fp->lport & 3; + } else { + attrs.phys.port_number = fp->lport; + } + } + + devlink_port_attrs_set(&fp->dl_port, &attrs); + + return devlink_port_register(dl, &fp->dl_port, idx); +} + +/* Determine the max Tx/Rx queues for a port. */ +static int fun_max_qs(struct fun_ethdev *ed, unsigned int *ntx, + unsigned int *nrx) +{ + int neth; + + if (ed->num_ports > 1 || is_kdump_kernel()) { + *ntx = 1; + *nrx = 1; + return 0; + } + + neth = fun_get_res_count(&ed->fdev, FUN_ADMIN_OP_ETH); + if (neth < 0) + return neth; + + /* We determine the max number of queues based on the CPU + * cores, device interrupts and queues, RSS size, and device Tx flows. + * + * - At least 1 Rx and 1 Tx queues. + * - At most 1 Rx/Tx queue per core. + * - Each Rx/Tx queue needs 1 SQ. + */ + *ntx = min(ed->nsqs_per_port - 1, num_online_cpus()); + *nrx = *ntx; + if (*ntx > neth) + *ntx = neth; + if (*nrx > FUN_ETH_RSS_MAX_INDIR_ENT) + *nrx = FUN_ETH_RSS_MAX_INDIR_ENT; + return 0; +} + +static void fun_queue_defaults(struct net_device *dev, unsigned int nsqs) +{ + unsigned int ntx, nrx; + + ntx = min(dev->num_tx_queues, FUN_DFLT_QUEUES); + nrx = min(dev->num_rx_queues, FUN_DFLT_QUEUES); + if (ntx <= nrx) { + ntx = min(ntx, nsqs / 2); + nrx = min(nrx, nsqs - ntx); + } else { + nrx = min(nrx, nsqs / 2); + ntx = min(ntx, nsqs - nrx); + } + + netif_set_real_num_tx_queues(dev, ntx); + netif_set_real_num_rx_queues(dev, nrx); +} + +/* Replace the existing Rx/Tx/XDP queues with equal number of queues with + * different settings, e.g. depth. This is a disruptive replacement that + * temporarily shuts down the data path and should be limited to changes that + * can't be applied to live queues. The old queues are always discarded. + */ +int fun_replace_queues(struct net_device *dev, struct fun_qset *newqs, + struct netlink_ext_ack *extack) +{ + struct fun_qset oldqs = { .state = FUN_QSTATE_DESTROYED }; + struct funeth_priv *fp = netdev_priv(dev); + int err; + + newqs->nrxqs = dev->real_num_rx_queues; + newqs->ntxqs = dev->real_num_tx_queues; + newqs->nxdpqs = fp->num_xdpqs; + newqs->state = FUN_QSTATE_INIT_SW; + err = fun_alloc_rings(dev, newqs); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to allocate memory for new queues, keeping current settings"); + return err; + } + + fun_down(dev, &oldqs); + + err = fun_up(dev, newqs); + if (!err) + return 0; + + /* The new queues couldn't be installed. We do not retry the old queues + * as they are the same to the device as the new queues and would + * similarly fail. + */ + newqs->state = FUN_QSTATE_DESTROYED; + fun_free_rings(dev, newqs); + NL_SET_ERR_MSG_MOD(extack, "Unable to restore the data path with the new queues."); + return err; +} + +/* Change the number of Rx/Tx queues of a device while it is up. This is done + * by incrementally adding/removing queues to meet the new requirements while + * handling ongoing traffic. + */ +int fun_change_num_queues(struct net_device *dev, unsigned int ntx, + unsigned int nrx) +{ + unsigned int keep_tx = min(dev->real_num_tx_queues, ntx); + unsigned int keep_rx = min(dev->real_num_rx_queues, nrx); + struct funeth_priv *fp = netdev_priv(dev); + struct fun_qset oldqs = { + .rxqs = rtnl_dereference(fp->rxqs), + .txqs = fp->txqs, + .nrxqs = dev->real_num_rx_queues, + .ntxqs = dev->real_num_tx_queues, + .rxq_start = keep_rx, + .txq_start = keep_tx, + .state = FUN_QSTATE_DESTROYED + }; + struct fun_qset newqs = { + .nrxqs = nrx, + .ntxqs = ntx, + .rxq_start = keep_rx, + .txq_start = keep_tx, + .cq_depth = fp->cq_depth, + .rq_depth = fp->rq_depth, + .sq_depth = fp->sq_depth, + .state = FUN_QSTATE_INIT_FULL + }; + int i, err; + + err = fun_alloc_rings(dev, &newqs); + if (err) + goto free_irqs; + + err = fun_enable_irqs(dev); /* of any newly added queues */ + if (err) + goto free_rings; + + /* copy the queues we are keeping to the new set */ + memcpy(newqs.rxqs, oldqs.rxqs, keep_rx * sizeof(*oldqs.rxqs)); + memcpy(newqs.txqs, fp->txqs, keep_tx * sizeof(*fp->txqs)); + + if (nrx < dev->real_num_rx_queues) { + err = fun_rss_set_qnum(dev, nrx, true); + if (err) + goto disable_tx_irqs; + + for (i = nrx; i < dev->real_num_rx_queues; i++) + fun_disable_one_irq(container_of(oldqs.rxqs[i]->napi, + struct fun_irq, napi)); + + netif_set_real_num_rx_queues(dev, nrx); + } + + if (ntx < dev->real_num_tx_queues) + netif_set_real_num_tx_queues(dev, ntx); + + rcu_assign_pointer(fp->rxqs, newqs.rxqs); + fp->txqs = newqs.txqs; + synchronize_net(); + + if (ntx > dev->real_num_tx_queues) + netif_set_real_num_tx_queues(dev, ntx); + + if (nrx > dev->real_num_rx_queues) { + netif_set_real_num_rx_queues(dev, nrx); + fun_rss_set_qnum(dev, nrx, false); + } + + /* disable interrupts of any excess Tx queues */ + for (i = keep_tx; i < oldqs.ntxqs; i++) + fun_disable_one_irq(oldqs.txqs[i]->irq); + + fun_free_rings(dev, &oldqs); + fun_prune_queue_irqs(dev); + return 0; + +disable_tx_irqs: + for (i = oldqs.ntxqs; i < ntx; i++) + fun_disable_one_irq(newqs.txqs[i]->irq); +free_rings: + newqs.state = FUN_QSTATE_DESTROYED; + fun_free_rings(dev, &newqs); +free_irqs: + fun_prune_queue_irqs(dev); + return err; +} + +static int fun_create_netdev(struct fun_ethdev *ed, unsigned int portid) +{ + struct fun_dev *fdev = &ed->fdev; + struct net_device *netdev; + struct funeth_priv *fp; + unsigned int ntx, nrx; + int rc; + + rc = fun_max_qs(ed, &ntx, &nrx); + if (rc) + return rc; + + netdev = alloc_etherdev_mqs(sizeof(*fp), ntx, nrx); + if (!netdev) { + rc = -ENOMEM; + goto done; + } + + netdev->dev_port = portid; + fun_queue_defaults(netdev, ed->nsqs_per_port); + + fp = netdev_priv(netdev); + fp->fdev = fdev; + fp->pdev = to_pci_dev(fdev->dev); + fp->netdev = netdev; + xa_init(&fp->irqs); + fp->rx_irq_ofst = ntx; + seqcount_init(&fp->link_seq); + + fp->lport = INVALID_LPORT; + rc = fun_port_create(netdev); + if (rc) + goto free_netdev; + + /* bind port to admin CQ for async events */ + rc = fun_bind(fdev, FUN_ADMIN_BIND_TYPE_PORT, portid, + FUN_ADMIN_BIND_TYPE_EPCQ, 0); + if (rc) + goto destroy_port; + + rc = fun_get_port_attributes(netdev); + if (rc) + goto destroy_port; + + rc = fun_init_rss(netdev); + if (rc) + goto destroy_port; + + rc = fun_init_stats_area(fp); + if (rc) + goto free_rss; + + SET_NETDEV_DEV(netdev, fdev->dev); + netdev->netdev_ops = &fun_netdev_ops; + + netdev->hw_features = NETIF_F_SG | NETIF_F_RXHASH | NETIF_F_RXCSUM; + if (fp->port_caps & FUN_PORT_CAP_OFFLOADS) + netdev->hw_features |= NETIF_F_HW_CSUM | TSO_FLAGS; + if (fp->port_caps & FUN_PORT_CAP_ENCAP_OFFLOADS) + netdev->hw_features |= GSO_ENCAP_FLAGS; + + netdev->features |= netdev->hw_features | NETIF_F_HIGHDMA; + netdev->vlan_features = netdev->features & VLAN_FEAT; + netdev->mpls_features = netdev->vlan_features; + netdev->hw_enc_features = netdev->hw_features; + + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = FUN_MAX_MTU; + + fun_set_ethtool_ops(netdev); + + /* configurable parameters */ + fp->sq_depth = min(SQ_DEPTH, fdev->q_depth); + fp->cq_depth = min(CQ_DEPTH, fdev->q_depth); + fp->rq_depth = min_t(unsigned int, RQ_DEPTH, fdev->q_depth); + fp->rx_coal_usec = CQ_INTCOAL_USEC; + fp->rx_coal_count = CQ_INTCOAL_NPKT; + fp->tx_coal_usec = SQ_INTCOAL_USEC; + fp->tx_coal_count = SQ_INTCOAL_NPKT; + fp->cq_irq_db = FUN_IRQ_CQ_DB(fp->rx_coal_usec, fp->rx_coal_count); + + rc = fun_dl_port_register(netdev); + if (rc) + goto free_stats; + + fp->ktls_id = FUN_HCI_ID_INVALID; + fun_ktls_init(netdev); /* optional, failure OK */ + + netif_carrier_off(netdev); + ed->netdevs[portid] = netdev; + rc = register_netdev(netdev); + if (rc) + goto unreg_devlink; + + if (fp->dl_port.devlink) + devlink_port_type_eth_set(&fp->dl_port, netdev); + + return 0; + +unreg_devlink: + ed->netdevs[portid] = NULL; + fun_ktls_cleanup(fp); + if (fp->dl_port.devlink) + devlink_port_unregister(&fp->dl_port); +free_stats: + fun_free_stats_area(fp); +free_rss: + fun_free_rss(fp); +destroy_port: + fun_port_destroy(netdev); +free_netdev: + free_netdev(netdev); +done: + dev_err(fdev->dev, "couldn't allocate port %u, error %d", portid, rc); + return rc; +} + +static void fun_destroy_netdev(struct net_device *netdev) +{ + struct funeth_priv *fp; + + fp = netdev_priv(netdev); + if (fp->dl_port.devlink) { + devlink_port_type_clear(&fp->dl_port); + devlink_port_unregister(&fp->dl_port); + } + unregister_netdev(netdev); + fun_ktls_cleanup(fp); + fun_free_stats_area(fp); + fun_free_rss(fp); + fun_port_destroy(netdev); + free_netdev(netdev); +} + +static int fun_create_ports(struct fun_ethdev *ed, unsigned int nports) +{ + struct fun_dev *fd = &ed->fdev; + int i, rc; + + /* The admin queue takes 1 IRQ and 2 SQs. */ + ed->nsqs_per_port = min(fd->num_irqs - 1, + fd->kern_end_qid - 2) / nports; + if (ed->nsqs_per_port < 2) { + dev_err(fd->dev, "Too few SQs for %u ports", nports); + return -EINVAL; + } + + ed->netdevs = kcalloc(nports, sizeof(*ed->netdevs), GFP_KERNEL); + if (!ed->netdevs) + return -ENOMEM; + + ed->num_ports = nports; + for (i = 0; i < nports; i++) { + rc = fun_create_netdev(ed, i); + if (rc) + goto free_netdevs; + } + + return 0; + +free_netdevs: + while (i) + fun_destroy_netdev(ed->netdevs[--i]); + kfree(ed->netdevs); + ed->netdevs = NULL; + ed->num_ports = 0; + return rc; +} + +static void fun_destroy_ports(struct fun_ethdev *ed) +{ + unsigned int i; + + for (i = 0; i < ed->num_ports; i++) + fun_destroy_netdev(ed->netdevs[i]); + + kfree(ed->netdevs); + ed->netdevs = NULL; + ed->num_ports = 0; +} + +static void fun_update_link_state(const struct fun_ethdev *ed, + const struct fun_admin_port_notif *notif) +{ + unsigned int port_idx = be16_to_cpu(notif->id); + struct net_device *netdev; + struct funeth_priv *fp; + + if (port_idx >= ed->num_ports) + return; + + netdev = ed->netdevs[port_idx]; + fp = netdev_priv(netdev); + + write_seqcount_begin(&fp->link_seq); + fp->link_speed = be32_to_cpu(notif->speed) * 10; /* 10 Mbps->Mbps */ + fp->active_fc = notif->flow_ctrl; + fp->active_fec = notif->fec; + fp->xcvr_type = notif->xcvr_type; + fp->link_down_reason = notif->link_down_reason; + fp->lp_advertising = be64_to_cpu(notif->lp_advertising); + + if ((notif->link_state | notif->missed_events) & FUN_PORT_FLAG_MAC_DOWN) + netif_carrier_off(netdev); + if (notif->link_state & FUN_PORT_FLAG_MAC_UP) + netif_carrier_on(netdev); + + write_seqcount_end(&fp->link_seq); + fun_report_link(netdev); +} + +/* handler for async events delivered through the admin CQ */ +static void fun_event_cb(struct fun_dev *fdev, void *entry) +{ + u8 op = ((struct fun_admin_rsp_common *)entry)->op; + + if (op == FUN_ADMIN_OP_PORT) { + const struct fun_admin_port_notif *rsp = entry; + + if (rsp->subop == FUN_ADMIN_SUBOP_NOTIFY) { + fun_update_link_state(to_fun_ethdev(fdev), rsp); + } else if (rsp->subop == FUN_ADMIN_SUBOP_RES_COUNT) { + const struct fun_admin_res_count_rsp *r = entry; + + if (r->count.data) + set_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags); + else + set_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags); + fun_serv_sched(fdev); + } else { + dev_info(fdev->dev, "adminq event unexpected op %u subop %u", + op, rsp->subop); + } + } else { + dev_info(fdev->dev, "adminq event unexpected op %u", op); + } +} + +/* handler for pending work managed by the service task */ +static void fun_service_cb(struct fun_dev *fdev) +{ + struct fun_ethdev *ed = to_fun_ethdev(fdev); + int rc; + + if (test_and_clear_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags)) + fun_destroy_ports(ed); + + if (!test_and_clear_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags)) + return; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT); + if (rc < 0 || rc == ed->num_ports) + return; + + if (ed->num_ports) + fun_destroy_ports(ed); + if (rc) + fun_create_ports(ed, rc); +} + +static int funeth_sriov_configure(struct pci_dev *pdev, int nvfs) +{ + struct fun_dev *fdev = pci_get_drvdata(pdev); + struct fun_ethdev *ed = to_fun_ethdev(fdev); + int rc; + + if (nvfs == 0) { + if (pci_vfs_assigned(pdev)) { + dev_warn(&pdev->dev, + "Cannot disable SR-IOV while VFs are assigned\n"); + return -EPERM; + } + + mutex_lock(&ed->state_mutex); + fun_free_vports(ed); + mutex_unlock(&ed->state_mutex); + pci_disable_sriov(pdev); + return 0; + } + + rc = pci_enable_sriov(pdev, nvfs); + if (rc) + return rc; + + mutex_lock(&ed->state_mutex); + rc = fun_init_vports(ed, nvfs); + mutex_unlock(&ed->state_mutex); + if (rc) { + pci_disable_sriov(pdev); + return rc; + } + + return nvfs; +} + +static int funeth_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct fun_dev_params aqreq = { + .cqe_size_log2 = ilog2(ADMIN_CQE_SIZE), + .sqe_size_log2 = ilog2(ADMIN_SQE_SIZE), + .cq_depth = ADMIN_CQ_DEPTH, + .sq_depth = ADMIN_SQ_DEPTH, + .rq_depth = ADMIN_RQ_DEPTH, + .min_msix = 2, /* 1 Rx + 1 Tx */ + .event_cb = fun_event_cb, + .serv_cb = fun_service_cb, + }; + struct devlink *devlink; + struct fun_ethdev *ed; + struct fun_dev *fdev; + int rc; + + devlink = fun_devlink_alloc(&pdev->dev); + if (!devlink) { + dev_err(&pdev->dev, "devlink alloc failed\n"); + return -ENOMEM; + } + + ed = devlink_priv(devlink); + mutex_init(&ed->state_mutex); + + fdev = &ed->fdev; + rc = fun_dev_enable(fdev, pdev, &aqreq, KBUILD_MODNAME); + if (rc) + goto free_devlink; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT); + if (rc > 0) + rc = fun_create_ports(ed, rc); + if (rc < 0) + goto disable_dev; + + fun_serv_restart(fdev); + fun_devlink_register(devlink); + return 0; + +disable_dev: + fun_dev_disable(fdev); +free_devlink: + mutex_destroy(&ed->state_mutex); + fun_devlink_free(devlink); + return rc; +} + +static void funeth_remove(struct pci_dev *pdev) +{ + struct fun_dev *fdev = pci_get_drvdata(pdev); + struct devlink *devlink; + struct fun_ethdev *ed; + + ed = to_fun_ethdev(fdev); + devlink = priv_to_devlink(ed); + fun_devlink_unregister(devlink); + +#ifdef CONFIG_PCI_IOV + funeth_sriov_configure(pdev, 0); +#endif + + fun_serv_stop(fdev); + fun_destroy_ports(ed); + fun_dev_disable(fdev); + mutex_destroy(&ed->state_mutex); + + fun_devlink_free(devlink); +} + +static struct pci_driver funeth_driver = { + .name = KBUILD_MODNAME, + .id_table = funeth_id_table, + .probe = funeth_probe, + .remove = funeth_remove, + .shutdown = funeth_remove, + .sriov_configure = funeth_sriov_configure, +}; + +module_pci_driver(funeth_driver); + +MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>"); +MODULE_DESCRIPTION("Fungible Ethernet Network Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DEVICE_TABLE(pci, funeth_id_table); diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c new file mode 100644 index 000000000000..0f6a549b9f67 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c @@ -0,0 +1,826 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/bpf_trace.h> +#include <linux/dma-mapping.h> +#include <linux/etherdevice.h> +#include <linux/filter.h> +#include <linux/irq.h> +#include <linux/pci.h> +#include <linux/skbuff.h> +#include "funeth_txrx.h" +#include "funeth.h" +#include "fun_queue.h" + +#define CREATE_TRACE_POINTS +#include "funeth_trace.h" + +/* Given the device's max supported MTU and pages of at least 4KB a packet can + * be scattered into at most 4 buffers. + */ +#define RX_MAX_FRAGS 4 + +/* Per packet headroom in non-XDP mode. Present only for 1-frag packets. */ +#define FUN_RX_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) + +/* We try to reuse pages for our buffers. To avoid frequent page ref writes we + * take EXTRA_PAGE_REFS references at once and then hand them out one per packet + * occupying the buffer. + */ +#define EXTRA_PAGE_REFS 1000000 +#define MIN_PAGE_REFS 1000 + +enum { + FUN_XDP_FLUSH_REDIR = 1, + FUN_XDP_FLUSH_TX = 2, +}; + +/* See if a page is running low on refs we are holding and if so take more. */ +static void refresh_refs(struct funeth_rxbuf *buf) +{ + if (unlikely(buf->pg_refs < MIN_PAGE_REFS)) { + buf->pg_refs += EXTRA_PAGE_REFS; + page_ref_add(buf->page, EXTRA_PAGE_REFS); + } +} + +/* Offer a buffer to the Rx buffer cache. The cache will hold the buffer if its + * page is worth retaining and there's room for it. Otherwise the page is + * unmapped and our references released. + */ +static void cache_offer(struct funeth_rxq *q, const struct funeth_rxbuf *buf) +{ + struct funeth_rx_cache *c = &q->cache; + + if (c->prod_cnt - c->cons_cnt <= c->mask && buf->node == numa_mem_id()) { + c->bufs[c->prod_cnt & c->mask] = *buf; + c->prod_cnt++; + } else { + dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + __page_frag_cache_drain(buf->page, buf->pg_refs); + } +} + +/* Get a page from the Rx buffer cache. We only consider the next available + * page and return it if we own all its references. + */ +static bool cache_get(struct funeth_rxq *q, struct funeth_rxbuf *rb) +{ + struct funeth_rx_cache *c = &q->cache; + struct funeth_rxbuf *buf; + + if (c->prod_cnt == c->cons_cnt) + return false; /* empty cache */ + + buf = &c->bufs[c->cons_cnt & c->mask]; + if (page_ref_count(buf->page) == buf->pg_refs) { + dma_sync_single_for_device(q->dma_dev, buf->dma_addr, + PAGE_SIZE, DMA_FROM_DEVICE); + *rb = *buf; + buf->page = NULL; + refresh_refs(rb); + c->cons_cnt++; + return true; + } + + /* Page can't be reused. If the cache is full drop this page. */ + if (c->prod_cnt - c->cons_cnt > c->mask) { + dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + __page_frag_cache_drain(buf->page, buf->pg_refs); + buf->page = NULL; + c->cons_cnt++; + } + return false; +} + +/* Allocate and DMA-map a page for receive. */ +static int funeth_alloc_page(struct funeth_rxq *q, struct funeth_rxbuf *rb, + int node, gfp_t gfp) +{ + struct page *p; + + if (cache_get(q, rb)) + return 0; + + p = __alloc_pages_node(node, gfp | __GFP_NOWARN, 0); + if (unlikely(!p)) + return -ENOMEM; + + rb->dma_addr = dma_map_page(q->dma_dev, p, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(q->dma_dev, rb->dma_addr))) { + FUN_QSTAT_INC(q, rx_map_err); + __free_page(p); + return -ENOMEM; + } + + FUN_QSTAT_INC(q, rx_page_alloc); + + rb->page = p; + rb->pg_refs = 1; + refresh_refs(rb); + rb->node = page_is_pfmemalloc(p) ? -1 : page_to_nid(p); + return 0; +} + +static void funeth_free_page(struct funeth_rxq *q, struct funeth_rxbuf *rb) +{ + if (rb->page) { + dma_unmap_page(q->dma_dev, rb->dma_addr, PAGE_SIZE, + DMA_FROM_DEVICE); + __page_frag_cache_drain(rb->page, rb->pg_refs); + rb->page = NULL; + } +} + +/* Run the XDP program assigned to an Rx queue. + * Return %NULL if the buffer is consumed, or the virtual address of the packet + * to turn into an skb. + */ +static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va, + int ref_ok, struct funeth_txq *xdp_q) +{ + struct bpf_prog *xdp_prog; + struct xdp_buff xdp; + u32 act; + + /* VA includes the headroom, frag size includes headroom + tailroom */ + xdp_init_buff(&xdp, ALIGN(skb_frag_size(frags), FUN_EPRQ_PKT_ALIGN), + &q->xdp_rxq); + xdp_prepare_buff(&xdp, buf_va, FUN_XDP_HEADROOM, skb_frag_size(frags) - + (FUN_RX_TAILROOM + FUN_XDP_HEADROOM), false); + + xdp_prog = READ_ONCE(q->xdp_prog); + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + switch (act) { + case XDP_PASS: + /* remove headroom, which may not be FUN_XDP_HEADROOM now */ + skb_frag_size_set(frags, xdp.data_end - xdp.data); + skb_frag_off_add(frags, xdp.data - xdp.data_hard_start); + goto pass; + case XDP_TX: + if (unlikely(!ref_ok)) + goto pass; + if (!fun_xdp_tx(xdp_q, xdp.data, xdp.data_end - xdp.data)) + goto xdp_error; + FUN_QSTAT_INC(q, xdp_tx); + q->xdp_flush |= FUN_XDP_FLUSH_TX; + break; + case XDP_REDIRECT: + if (unlikely(!ref_ok)) + goto pass; + if (unlikely(xdp_do_redirect(q->netdev, &xdp, xdp_prog))) + goto xdp_error; + FUN_QSTAT_INC(q, xdp_redir); + q->xdp_flush |= FUN_XDP_FLUSH_REDIR; + break; + default: + bpf_warn_invalid_xdp_action(q->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(q->netdev, xdp_prog, act); +xdp_error: + q->cur_buf->pg_refs++; /* return frags' page reference */ + FUN_QSTAT_INC(q, xdp_err); + break; + case XDP_DROP: + q->cur_buf->pg_refs++; + FUN_QSTAT_INC(q, xdp_drops); + break; + } + return NULL; + +pass: + return xdp.data; +} + +/* A CQE contains a fixed completion structure along with optional metadata and + * even packet data. Given the start address of a CQE return the start of the + * contained fixed structure, which lies at the end. + */ +static const void *cqe_to_info(const void *cqe) +{ + return cqe + FUNETH_CQE_INFO_OFFSET; +} + +/* The inverse of cqe_to_info(). */ +static const void *info_to_cqe(const void *cqe_info) +{ + return cqe_info - FUNETH_CQE_INFO_OFFSET; +} + +/* Return the type of hash provided by the device based on the L3 and L4 + * protocols it parsed for the packet. + */ +static enum pkt_hash_types cqe_to_pkt_hash_type(u16 pkt_parse) +{ + static const enum pkt_hash_types htype_map[] = { + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3, + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L4, + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3, + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3 + }; + u16 key; + + /* Build the key from the TCP/UDP and IP/IPv6 bits */ + key = ((pkt_parse >> FUN_ETH_RX_CV_OL4_PROT_S) & 6) | + ((pkt_parse >> (FUN_ETH_RX_CV_OL3_PROT_S + 1)) & 1); + + return htype_map[key]; +} + +/* Each received packet can be scattered across several Rx buffers or can + * share a buffer with previously received packets depending on the buffer + * and packet sizes and the room available in the most recently used buffer. + * + * The rules are: + * - If the buffer at the head of an RQ has not been used it gets (part of) the + * next incoming packet. + * - Otherwise, if the packet fully fits in the buffer's remaining space the + * packet is written there. + * - Otherwise, the packet goes into the next Rx buffer. + * + * This function returns the Rx buffer for a packet or fragment thereof of the + * given length. If it isn't @buf it either recycles or frees that buffer + * before advancing the queue to the next buffer. + * + * If called repeatedly with the remaining length of a packet it will walk + * through all the buffers containing the packet. + */ +static struct funeth_rxbuf * +get_buf(struct funeth_rxq *q, struct funeth_rxbuf *buf, unsigned int len) +{ + if (q->buf_offset + len <= PAGE_SIZE || !q->buf_offset) + return buf; /* @buf holds (part of) the packet */ + + /* The packet occupies part of the next buffer. Move there after + * replenishing the current buffer slot either with the spare page or + * by reusing the slot's existing page. Note that if a spare page isn't + * available and the current packet occupies @buf it is a multi-frag + * packet that will be dropped leaving @buf available for reuse. + */ + if ((page_ref_count(buf->page) == buf->pg_refs && + buf->node == numa_mem_id()) || !q->spare_buf.page) { + dma_sync_single_for_device(q->dma_dev, buf->dma_addr, + PAGE_SIZE, DMA_FROM_DEVICE); + refresh_refs(buf); + } else { + cache_offer(q, buf); + *buf = q->spare_buf; + q->spare_buf.page = NULL; + q->rqes[q->rq_cons & q->rq_mask] = + FUN_EPRQ_RQBUF_INIT(buf->dma_addr); + } + q->buf_offset = 0; + q->rq_cons++; + return &q->bufs[q->rq_cons & q->rq_mask]; +} + +/* Gather the page fragments making up the first Rx packet on @q. Its total + * length @tot_len includes optional head- and tail-rooms. + * + * Return 0 if the device retains ownership of at least some of the pages. + * In this case the caller may only copy the packet. + * + * A non-zero return value gives the caller permission to use references to the + * pages, e.g., attach them to skbs. Additionally, if the value is <0 at least + * one of the pages is PF_MEMALLOC. + * + * Regardless of outcome the caller is granted a reference to each of the pages. + */ +static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len, + skb_frag_t *frags) +{ + struct funeth_rxbuf *buf = q->cur_buf; + unsigned int frag_len; + int ref_ok = 1; + + for (;;) { + buf = get_buf(q, buf, tot_len); + + /* We always keep the RQ full of buffers so before we can give + * one of our pages to the stack we require that we can obtain + * a replacement page. If we can't the packet will either be + * copied or dropped so we can retain ownership of the page and + * reuse it. + */ + if (!q->spare_buf.page && + funeth_alloc_page(q, &q->spare_buf, numa_mem_id(), + GFP_ATOMIC | __GFP_MEMALLOC)) + ref_ok = 0; + + frag_len = min_t(unsigned int, tot_len, + PAGE_SIZE - q->buf_offset); + dma_sync_single_for_cpu(q->dma_dev, + buf->dma_addr + q->buf_offset, + frag_len, DMA_FROM_DEVICE); + buf->pg_refs--; + if (ref_ok) + ref_ok |= buf->node; + + __skb_frag_set_page(frags, buf->page); + skb_frag_off_set(frags, q->buf_offset); + skb_frag_size_set(frags++, frag_len); + + tot_len -= frag_len; + if (!tot_len) + break; + + q->buf_offset = PAGE_SIZE; + } + q->buf_offset = ALIGN(q->buf_offset + frag_len, FUN_EPRQ_PKT_ALIGN); + q->cur_buf = buf; + return ref_ok; +} + +static bool rx_hwtstamp_enabled(const struct net_device *dev) +{ + const struct funeth_priv *d = netdev_priv(dev); + + return d->hwtstamp_cfg.rx_filter == HWTSTAMP_FILTER_ALL; +} + +/* Advance the CQ pointers and phase tag to the next CQE. */ +static void advance_cq(struct funeth_rxq *q) +{ + if (unlikely(q->cq_head == q->cq_mask)) { + q->cq_head = 0; + q->phase ^= 1; + q->next_cqe_info = cqe_to_info(q->cqes); + } else { + q->cq_head++; + q->next_cqe_info += FUNETH_CQE_SIZE; + } + prefetch(q->next_cqe_info); +} + +/* Process the packet represented by the head CQE of @q. Gather the packet's + * fragments, run it through the optional XDP program, and if needed construct + * an skb and pass it to the stack. + */ +static void fun_handle_cqe_pkt(struct funeth_rxq *q, struct funeth_txq *xdp_q) +{ + const struct fun_eth_cqe *rxreq = info_to_cqe(q->next_cqe_info); + unsigned int i, tot_len, pkt_len = be32_to_cpu(rxreq->pkt_len); + struct net_device *ndev = q->netdev; + skb_frag_t frags[RX_MAX_FRAGS]; + struct skb_shared_info *si; + unsigned int headroom; + gro_result_t gro_res; + struct sk_buff *skb; + int ref_ok; + void *va; + u16 cv; + + u64_stats_update_begin(&q->syncp); + q->stats.rx_pkts++; + q->stats.rx_bytes += pkt_len; + u64_stats_update_end(&q->syncp); + + advance_cq(q); + + /* account for head- and tail-room, present only for 1-buffer packets */ + tot_len = pkt_len; + headroom = be16_to_cpu(rxreq->headroom); + if (likely(headroom)) + tot_len += FUN_RX_TAILROOM + headroom; + + ref_ok = fun_gather_pkt(q, tot_len, frags); + va = skb_frag_address(frags); + if (xdp_q && headroom == FUN_XDP_HEADROOM) { + va = fun_run_xdp(q, frags, va, ref_ok, xdp_q); + if (!va) + return; + headroom = 0; /* XDP_PASS trims it */ + } + if (unlikely(!ref_ok)) + goto no_mem; + + if (likely(headroom)) { + /* headroom is either FUN_RX_HEADROOM or FUN_XDP_HEADROOM */ + prefetch(va + headroom); + skb = napi_build_skb(va, ALIGN(tot_len, FUN_EPRQ_PKT_ALIGN)); + if (unlikely(!skb)) + goto no_mem; + + skb_reserve(skb, headroom); + __skb_put(skb, pkt_len); + skb->protocol = eth_type_trans(skb, ndev); + } else { + prefetch(va); + skb = napi_get_frags(q->napi); + if (unlikely(!skb)) + goto no_mem; + + if (ref_ok < 0) + skb->pfmemalloc = 1; + + si = skb_shinfo(skb); + si->nr_frags = rxreq->nsgl; + for (i = 0; i < si->nr_frags; i++) + si->frags[i] = frags[i]; + + skb->len = pkt_len; + skb->data_len = pkt_len; + skb->truesize += round_up(pkt_len, FUN_EPRQ_PKT_ALIGN); + } + + skb_record_rx_queue(skb, q->qidx); + cv = be16_to_cpu(rxreq->pkt_cv); + if (likely((q->netdev->features & NETIF_F_RXHASH) && rxreq->hash)) + skb_set_hash(skb, be32_to_cpu(rxreq->hash), + cqe_to_pkt_hash_type(cv)); + if (likely((q->netdev->features & NETIF_F_RXCSUM) && rxreq->csum)) { + FUN_QSTAT_INC(q, rx_cso); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = be16_to_cpu(rxreq->csum) - 1; + } + if (unlikely(rx_hwtstamp_enabled(q->netdev))) + skb_hwtstamps(skb)->hwtstamp = be64_to_cpu(rxreq->timestamp); + + trace_funeth_rx(q, rxreq->nsgl, pkt_len, skb->hash, cv); + + gro_res = skb->data_len ? napi_gro_frags(q->napi) : + napi_gro_receive(q->napi, skb); + if (gro_res == GRO_MERGED || gro_res == GRO_MERGED_FREE) + FUN_QSTAT_INC(q, gro_merged); + else if (gro_res == GRO_HELD) + FUN_QSTAT_INC(q, gro_pkts); + return; + +no_mem: + FUN_QSTAT_INC(q, rx_mem_drops); + + /* Release the references we've been granted for the frag pages. + * We return the ref of the last frag and free the rest. + */ + q->cur_buf->pg_refs++; + for (i = 0; i < rxreq->nsgl - 1; i++) + __free_page(skb_frag_page(frags + i)); +} + +/* Return 0 if the phase tag of the CQE at the CQ's head matches expectations + * indicating the CQE is new. + */ +static u16 cqe_phase_mismatch(const struct fun_cqe_info *ci, u16 phase) +{ + u16 sf_p = be16_to_cpu(ci->sf_p); + + return (sf_p & 1) ^ phase; +} + +/* Walk through a CQ identifying and processing fresh CQEs up to the given + * budget. Return the remaining budget. + */ +static int fun_process_cqes(struct funeth_rxq *q, int budget) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + struct funeth_txq **xdpqs, *xdp_q = NULL; + + xdpqs = rcu_dereference_bh(fp->xdpqs); + if (xdpqs) + xdp_q = xdpqs[smp_processor_id()]; + + while (budget && !cqe_phase_mismatch(q->next_cqe_info, q->phase)) { + /* access other descriptor fields after the phase check */ + dma_rmb(); + + fun_handle_cqe_pkt(q, xdp_q); + budget--; + } + + if (unlikely(q->xdp_flush)) { + if (q->xdp_flush & FUN_XDP_FLUSH_TX) + fun_txq_wr_db(xdp_q); + if (q->xdp_flush & FUN_XDP_FLUSH_REDIR) + xdp_do_flush(); + q->xdp_flush = 0; + } + + return budget; +} + +/* NAPI handler for Rx queues. Calls the CQE processing loop and writes RQ/CQ + * doorbells as needed. + */ +int fun_rxq_napi_poll(struct napi_struct *napi, int budget) +{ + struct fun_irq *irq = container_of(napi, struct fun_irq, napi); + struct funeth_rxq *q = irq->rxq; + int work_done = budget - fun_process_cqes(q, budget); + u32 cq_db_val = q->cq_head; + + if (unlikely(work_done >= budget)) + FUN_QSTAT_INC(q, rx_budget); + else if (napi_complete_done(napi, work_done)) + cq_db_val |= q->irq_db_val; + + /* check whether to post new Rx buffers */ + if (q->rq_cons - q->rq_cons_db >= q->rq_db_thres) { + u64_stats_update_begin(&q->syncp); + q->stats.rx_bufs += q->rq_cons - q->rq_cons_db; + u64_stats_update_end(&q->syncp); + q->rq_cons_db = q->rq_cons; + writel((q->rq_cons - 1) & q->rq_mask, q->rq_db); + } + + writel(cq_db_val, q->cq_db); + return work_done; +} + +/* Free the Rx buffers of an Rx queue. */ +static void fun_rxq_free_bufs(struct funeth_rxq *q) +{ + struct funeth_rxbuf *b = q->bufs; + unsigned int i; + + for (i = 0; i <= q->rq_mask; i++, b++) + funeth_free_page(q, b); + + funeth_free_page(q, &q->spare_buf); + q->cur_buf = NULL; +} + +/* Initially provision an Rx queue with Rx buffers. */ +static int fun_rxq_alloc_bufs(struct funeth_rxq *q, int node) +{ + struct funeth_rxbuf *b = q->bufs; + unsigned int i; + + for (i = 0; i <= q->rq_mask; i++, b++) { + if (funeth_alloc_page(q, b, node, GFP_KERNEL)) { + fun_rxq_free_bufs(q); + return -ENOMEM; + } + q->rqes[i] = FUN_EPRQ_RQBUF_INIT(b->dma_addr); + } + q->cur_buf = q->bufs; + return 0; +} + +/* Initialize a used-buffer cache of the given depth. */ +static int fun_rxq_init_cache(struct funeth_rx_cache *c, unsigned int depth, + int node) +{ + c->mask = depth - 1; + c->bufs = kvzalloc_node(depth * sizeof(*c->bufs), GFP_KERNEL, node); + return c->bufs ? 0 : -ENOMEM; +} + +/* Deallocate an Rx queue's used-buffer cache and its contents. */ +static void fun_rxq_free_cache(struct funeth_rxq *q) +{ + struct funeth_rxbuf *b = q->cache.bufs; + unsigned int i; + + for (i = 0; i <= q->cache.mask; i++, b++) + funeth_free_page(q, b); + + kvfree(q->cache.bufs); + q->cache.bufs = NULL; +} + +int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + struct fun_admin_epcq_req cmd; + u16 headroom; + int err; + + headroom = prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM; + if (headroom != q->headroom) { + cmd.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ, + sizeof(cmd)); + cmd.u.modify = + FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(FUN_ADMIN_SUBOP_MODIFY, + 0, q->hw_cqid, headroom); + err = fun_submit_admin_sync_cmd(fp->fdev, &cmd.common, NULL, 0, + 0); + if (err) + return err; + q->headroom = headroom; + } + + WRITE_ONCE(q->xdp_prog, prog); + return 0; +} + +/* Create an Rx queue, allocating the host memory it needs. */ +static struct funeth_rxq *fun_rxq_create_sw(struct net_device *dev, + unsigned int qidx, + unsigned int ncqe, + unsigned int nrqe, + struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_rxq *q; + int err = -ENOMEM; + int numa_node; + + numa_node = fun_irq_node(irq); + q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node); + if (!q) + goto err; + + q->qidx = qidx; + q->netdev = dev; + q->cq_mask = ncqe - 1; + q->rq_mask = nrqe - 1; + q->numa_node = numa_node; + q->rq_db_thres = nrqe / 4; + u64_stats_init(&q->syncp); + q->dma_dev = &fp->pdev->dev; + + q->rqes = fun_alloc_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), + sizeof(*q->bufs), false, numa_node, + &q->rq_dma_addr, (void **)&q->bufs, NULL); + if (!q->rqes) + goto free_q; + + q->cqes = fun_alloc_ring_mem(q->dma_dev, ncqe, FUNETH_CQE_SIZE, 0, + false, numa_node, &q->cq_dma_addr, NULL, + NULL); + if (!q->cqes) + goto free_rqes; + + err = fun_rxq_init_cache(&q->cache, nrqe, numa_node); + if (err) + goto free_cqes; + + err = fun_rxq_alloc_bufs(q, numa_node); + if (err) + goto free_cache; + + q->stats.rx_bufs = q->rq_mask; + q->init_state = FUN_QSTATE_INIT_SW; + return q; + +free_cache: + fun_rxq_free_cache(q); +free_cqes: + dma_free_coherent(q->dma_dev, ncqe * FUNETH_CQE_SIZE, q->cqes, + q->cq_dma_addr); +free_rqes: + fun_free_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), false, q->rqes, + q->rq_dma_addr, q->bufs); +free_q: + kfree(q); +err: + netdev_err(dev, "Unable to allocate memory for Rx queue %u\n", qidx); + return ERR_PTR(err); +} + +static void fun_rxq_free_sw(struct funeth_rxq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + + fun_rxq_free_cache(q); + fun_rxq_free_bufs(q); + fun_free_ring_mem(q->dma_dev, q->rq_mask + 1, sizeof(*q->rqes), false, + q->rqes, q->rq_dma_addr, q->bufs); + dma_free_coherent(q->dma_dev, (q->cq_mask + 1) * FUNETH_CQE_SIZE, + q->cqes, q->cq_dma_addr); + + /* Before freeing the queue transfer key counters to the device. */ + fp->rx_packets += q->stats.rx_pkts; + fp->rx_bytes += q->stats.rx_bytes; + fp->rx_dropped += q->stats.rx_map_err + q->stats.rx_mem_drops; + + kfree(q); +} + +/* Create an Rx queue's resources on the device. */ +int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + unsigned int ncqe = q->cq_mask + 1; + unsigned int nrqe = q->rq_mask + 1; + int err; + + err = xdp_rxq_info_reg(&q->xdp_rxq, q->netdev, q->qidx, + irq->napi.napi_id); + if (err) + goto out; + + err = xdp_rxq_info_reg_mem_model(&q->xdp_rxq, MEM_TYPE_PAGE_SHARED, + NULL); + if (err) + goto xdp_unreg; + + q->phase = 1; + q->irq_cnt = 0; + q->cq_head = 0; + q->rq_cons = 0; + q->rq_cons_db = 0; + q->buf_offset = 0; + q->napi = &irq->napi; + q->irq_db_val = fp->cq_irq_db; + q->next_cqe_info = cqe_to_info(q->cqes); + + q->xdp_prog = fp->xdp_prog; + q->headroom = fp->xdp_prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM; + + err = fun_sq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR | + FUN_ADMIN_EPSQ_CREATE_FLAG_RQ, 0, + FUN_HCI_ID_INVALID, 0, nrqe, q->rq_dma_addr, 0, 0, + 0, 0, fp->fdev->kern_end_qid, PAGE_SHIFT, + &q->hw_sqid, &q->rq_db); + if (err) + goto xdp_unreg; + + err = fun_cq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR | + FUN_ADMIN_EPCQ_CREATE_FLAG_RQ, 0, + q->hw_sqid, ilog2(FUNETH_CQE_SIZE), ncqe, + q->cq_dma_addr, q->headroom, FUN_RX_TAILROOM, 0, 0, + irq->irq_idx, 0, fp->fdev->kern_end_qid, + &q->hw_cqid, &q->cq_db); + if (err) + goto free_rq; + + irq->rxq = q; + writel(q->rq_mask, q->rq_db); + q->init_state = FUN_QSTATE_INIT_FULL; + + netif_info(fp, ifup, q->netdev, + "Rx queue %u, depth %u/%u, HW qid %u/%u, IRQ idx %u, node %d, headroom %u\n", + q->qidx, ncqe, nrqe, q->hw_cqid, q->hw_sqid, irq->irq_idx, + q->numa_node, q->headroom); + return 0; + +free_rq: + fun_destroy_sq(fp->fdev, q->hw_sqid); +xdp_unreg: + xdp_rxq_info_unreg(&q->xdp_rxq); +out: + netdev_err(q->netdev, + "Failed to create Rx queue %u on device, error %d\n", + q->qidx, err); + return err; +} + +static void fun_rxq_free_dev(struct funeth_rxq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + struct fun_irq *irq; + + if (q->init_state < FUN_QSTATE_INIT_FULL) + return; + + irq = container_of(q->napi, struct fun_irq, napi); + netif_info(fp, ifdown, q->netdev, + "Freeing Rx queue %u (id %u/%u), IRQ %u\n", + q->qidx, q->hw_cqid, q->hw_sqid, irq->irq_idx); + + irq->rxq = NULL; + xdp_rxq_info_unreg(&q->xdp_rxq); + fun_destroy_sq(fp->fdev, q->hw_sqid); + fun_destroy_cq(fp->fdev, q->hw_cqid); + q->init_state = FUN_QSTATE_INIT_SW; +} + +/* Create or advance an Rx queue, allocating all the host and device resources + * needed to reach the target state. + */ +int funeth_rxq_create(struct net_device *dev, unsigned int qidx, + unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq, + int state, struct funeth_rxq **qp) +{ + struct funeth_rxq *q = *qp; + int err; + + if (!q) { + q = fun_rxq_create_sw(dev, qidx, ncqe, nrqe, irq); + if (IS_ERR(q)) + return PTR_ERR(q); + } + + if (q->init_state >= state) + goto out; + + err = fun_rxq_create_dev(q, irq); + if (err) { + if (!*qp) + fun_rxq_free_sw(q); + return err; + } + +out: + *qp = q; + return 0; +} + +/* Free Rx queue resources until it reaches the target state. */ +struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state) +{ + if (state < FUN_QSTATE_INIT_FULL) + fun_rxq_free_dev(q); + + if (state == FUN_QSTATE_DESTROYED) { + fun_rxq_free_sw(q); + q = NULL; + } + + return q; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_trace.h b/drivers/net/ethernet/fungible/funeth/funeth_trace.h new file mode 100644 index 000000000000..9e58dfec19d5 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_trace.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM funeth + +#if !defined(_TRACE_FUNETH_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FUNETH_H + +#include <linux/tracepoint.h> + +#include "funeth_txrx.h" + +TRACE_EVENT(funeth_tx, + + TP_PROTO(const struct funeth_txq *txq, + u32 len, + u32 sqe_idx, + u32 ngle), + + TP_ARGS(txq, len, sqe_idx, ngle), + + TP_STRUCT__entry( + __field(u32, qidx) + __field(u32, len) + __field(u32, sqe_idx) + __field(u32, ngle) + __string(devname, txq->netdev->name) + ), + + TP_fast_assign( + __entry->qidx = txq->qidx; + __entry->len = len; + __entry->sqe_idx = sqe_idx; + __entry->ngle = ngle; + __assign_str(devname, txq->netdev->name); + ), + + TP_printk("%s: Txq %u, SQE idx %u, len %u, num GLEs %u", + __get_str(devname), __entry->qidx, __entry->sqe_idx, + __entry->len, __entry->ngle) +); + +TRACE_EVENT(funeth_tx_free, + + TP_PROTO(const struct funeth_txq *txq, + u32 sqe_idx, + u32 num_sqes, + u32 hw_head), + + TP_ARGS(txq, sqe_idx, num_sqes, hw_head), + + TP_STRUCT__entry( + __field(u32, qidx) + __field(u32, sqe_idx) + __field(u32, num_sqes) + __field(u32, hw_head) + __string(devname, txq->netdev->name) + ), + + TP_fast_assign( + __entry->qidx = txq->qidx; + __entry->sqe_idx = sqe_idx; + __entry->num_sqes = num_sqes; + __entry->hw_head = hw_head; + __assign_str(devname, txq->netdev->name); + ), + + TP_printk("%s: Txq %u, SQE idx %u, SQEs %u, HW head %u", + __get_str(devname), __entry->qidx, __entry->sqe_idx, + __entry->num_sqes, __entry->hw_head) +); + +TRACE_EVENT(funeth_rx, + + TP_PROTO(const struct funeth_rxq *rxq, + u32 num_rqes, + u32 pkt_len, + u32 hash, + u32 cls_vec), + + TP_ARGS(rxq, num_rqes, pkt_len, hash, cls_vec), + + TP_STRUCT__entry( + __field(u32, qidx) + __field(u32, cq_head) + __field(u32, num_rqes) + __field(u32, len) + __field(u32, hash) + __field(u32, cls_vec) + __string(devname, rxq->netdev->name) + ), + + TP_fast_assign( + __entry->qidx = rxq->qidx; + __entry->cq_head = rxq->cq_head; + __entry->num_rqes = num_rqes; + __entry->len = pkt_len; + __entry->hash = hash; + __entry->cls_vec = cls_vec; + __assign_str(devname, rxq->netdev->name); + ), + + TP_printk("%s: Rxq %u, CQ head %u, RQEs %u, len %u, hash %u, CV %#x", + __get_str(devname), __entry->qidx, __entry->cq_head, + __entry->num_rqes, __entry->len, __entry->hash, + __entry->cls_vec) +); + +#endif /* _TRACE_FUNETH_H */ + +/* Below must be outside protection. */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE funeth_trace + +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c new file mode 100644 index 000000000000..46684afa97a0 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c @@ -0,0 +1,762 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/dma-mapping.h> +#include <linux/ip.h> +#include <linux/pci.h> +#include <linux/skbuff.h> +#include <linux/tcp.h> +#include <uapi/linux/udp.h> +#include "funeth.h" +#include "funeth_txrx.h" +#include "funeth_trace.h" +#include "fun_queue.h" + +#define FUN_XDP_CLEAN_THRES 32 +#define FUN_XDP_CLEAN_BATCH 16 + +/* DMA-map a packet and return the (length, DMA_address) pairs for its + * segments. If a mapping error occurs -ENOMEM is returned. + */ +static int map_skb(const struct sk_buff *skb, struct device *dev, + dma_addr_t *addr, unsigned int *len) +{ + const struct skb_shared_info *si; + const skb_frag_t *fp, *end; + + *len = skb_headlen(skb); + *addr = dma_map_single(dev, skb->data, *len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, *addr)) + return -ENOMEM; + + si = skb_shinfo(skb); + end = &si->frags[si->nr_frags]; + + for (fp = si->frags; fp < end; fp++) { + *++len = skb_frag_size(fp); + *++addr = skb_frag_dma_map(dev, fp, 0, *len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, *addr)) + goto unwind; + } + return 0; + +unwind: + while (fp-- > si->frags) + dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE); + + dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE); + return -ENOMEM; +} + +/* Return the address just past the end of a Tx queue's descriptor ring. + * It exploits the fact that the HW writeback area is just after the end + * of the descriptor ring. + */ +static void *txq_end(const struct funeth_txq *q) +{ + return (void *)q->hw_wb; +} + +/* Return the amount of space within a Tx ring from the given address to the + * end. + */ +static unsigned int txq_to_end(const struct funeth_txq *q, void *p) +{ + return txq_end(q) - p; +} + +/* Return the number of Tx descriptors occupied by a Tx request. */ +static unsigned int tx_req_ndesc(const struct fun_eth_tx_req *req) +{ + return DIV_ROUND_UP(req->len8, FUNETH_SQE_SIZE / 8); +} + +static __be16 tcp_hdr_doff_flags(const struct tcphdr *th) +{ + return *(__be16 *)&tcp_flag_word(th); +} + +#if IS_ENABLED(CONFIG_TLS_DEVICE) +#include "funeth_ktls.h" + +static struct sk_buff *fun_tls_tx(struct sk_buff *skb, struct funeth_txq *q, + unsigned int *tls_len) +{ + const struct fun_ktls_tx_ctx *tls_ctx; + u32 datalen, seq; + + datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + if (!datalen) + return skb; + + if (likely(!tls_offload_tx_resync_pending(skb->sk))) { + seq = ntohl(tcp_hdr(skb)->seq); + tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); + + if (likely(tls_ctx->next_seq == seq)) { + *tls_len = datalen; + return skb; + } + if (seq - tls_ctx->next_seq < U32_MAX / 4) { + tls_offload_tx_resync_request(skb->sk, seq, + tls_ctx->next_seq); + } + } + + FUN_QSTAT_INC(q, tx_tls_fallback); + skb = tls_encrypt_skb(skb); + if (!skb) + FUN_QSTAT_INC(q, tx_tls_drops); + + return skb; +} +#endif + +/* Write as many descriptors as needed for the supplied skb starting at the + * current producer location. The caller has made certain enough descriptors + * are available. + * + * Returns the number of descriptors written, 0 on error. + */ +static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q, + unsigned int tls_len) +{ + unsigned int extra_bytes = 0, extra_pkts = 0; + unsigned int idx = q->prod_cnt & q->mask; + const struct skb_shared_info *shinfo; + unsigned int lens[MAX_SKB_FRAGS + 1]; + dma_addr_t addrs[MAX_SKB_FRAGS + 1]; + struct fun_eth_tx_req *req; + struct fun_dataop_gl *gle; + const struct tcphdr *th; + unsigned int ngle, i; + u16 flags; + + if (unlikely(map_skb(skb, q->dma_dev, addrs, lens))) { + FUN_QSTAT_INC(q, tx_map_err); + return 0; + } + + req = fun_tx_desc_addr(q, idx); + req->op = FUN_ETH_OP_TX; + req->len8 = 0; + req->flags = 0; + req->suboff8 = offsetof(struct fun_eth_tx_req, dataop); + req->repr_idn = 0; + req->encap_proto = 0; + + shinfo = skb_shinfo(skb); + if (likely(shinfo->gso_size)) { + if (skb->encapsulation) { + u16 ol4_ofst; + + flags = FUN_ETH_OUTER_EN | FUN_ETH_INNER_LSO | + FUN_ETH_UPDATE_INNER_L4_CKSUM | + FUN_ETH_UPDATE_OUTER_L3_LEN; + if (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM)) { + flags |= FUN_ETH_UPDATE_OUTER_L4_LEN | + FUN_ETH_OUTER_UDP; + if (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) + flags |= FUN_ETH_UPDATE_OUTER_L4_CKSUM; + ol4_ofst = skb_transport_offset(skb); + } else { + ol4_ofst = skb_inner_network_offset(skb); + } + + if (ip_hdr(skb)->version == 4) + flags |= FUN_ETH_UPDATE_OUTER_L3_CKSUM; + else + flags |= FUN_ETH_OUTER_IPV6; + + if (skb->inner_network_header) { + if (inner_ip_hdr(skb)->version == 4) + flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM | + FUN_ETH_UPDATE_INNER_L3_LEN; + else + flags |= FUN_ETH_INNER_IPV6 | + FUN_ETH_UPDATE_INNER_L3_LEN; + } + th = inner_tcp_hdr(skb); + fun_eth_offload_init(&req->offload, flags, + shinfo->gso_size, + tcp_hdr_doff_flags(th), 0, + skb_inner_network_offset(skb), + skb_inner_transport_offset(skb), + skb_network_offset(skb), ol4_ofst); + FUN_QSTAT_INC(q, tx_encap_tso); + } else { + /* HW considers one set of headers as inner */ + flags = FUN_ETH_INNER_LSO | + FUN_ETH_UPDATE_INNER_L4_CKSUM | + FUN_ETH_UPDATE_INNER_L3_LEN; + if (shinfo->gso_type & SKB_GSO_TCPV6) + flags |= FUN_ETH_INNER_IPV6; + else + flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM; + th = tcp_hdr(skb); + fun_eth_offload_init(&req->offload, flags, + shinfo->gso_size, + tcp_hdr_doff_flags(th), 0, + skb_network_offset(skb), + skb_transport_offset(skb), 0, 0); + FUN_QSTAT_INC(q, tx_tso); + } + + u64_stats_update_begin(&q->syncp); + q->stats.tx_cso += shinfo->gso_segs; + u64_stats_update_end(&q->syncp); + + extra_pkts = shinfo->gso_segs - 1; + extra_bytes = (be16_to_cpu(req->offload.inner_l4_off) + + __tcp_hdrlen(th)) * extra_pkts; + } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + flags = FUN_ETH_UPDATE_INNER_L4_CKSUM; + if (skb->csum_offset == offsetof(struct udphdr, check)) + flags |= FUN_ETH_INNER_UDP; + fun_eth_offload_init(&req->offload, flags, 0, 0, 0, 0, + skb_checksum_start_offset(skb), 0, 0); + FUN_QSTAT_INC(q, tx_cso); + } else { + fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0); + } + + ngle = shinfo->nr_frags + 1; + req->len8 = (sizeof(*req) + ngle * sizeof(*gle)) / 8; + req->dataop = FUN_DATAOP_HDR_INIT(ngle, 0, ngle, 0, skb->len); + + for (i = 0, gle = (struct fun_dataop_gl *)req->dataop.imm; + i < ngle && txq_to_end(q, gle); i++, gle++) + fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]); + + if (txq_to_end(q, gle) == 0) { + gle = (struct fun_dataop_gl *)q->desc; + for ( ; i < ngle; i++, gle++) + fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]); + } + + if (IS_ENABLED(CONFIG_TLS_DEVICE) && unlikely(tls_len)) { + struct fun_eth_tls *tls = (struct fun_eth_tls *)gle; + struct fun_ktls_tx_ctx *tls_ctx; + + req->len8 += FUNETH_TLS_SZ / 8; + req->flags = cpu_to_be16(FUN_ETH_TX_TLS); + + tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); + tls->tlsid = tls_ctx->tlsid; + tls_ctx->next_seq += tls_len; + + u64_stats_update_begin(&q->syncp); + q->stats.tx_tls_bytes += tls_len; + q->stats.tx_tls_pkts += 1 + extra_pkts; + u64_stats_update_end(&q->syncp); + } + + u64_stats_update_begin(&q->syncp); + q->stats.tx_bytes += skb->len + extra_bytes; + q->stats.tx_pkts += 1 + extra_pkts; + u64_stats_update_end(&q->syncp); + + q->info[idx].skb = skb; + + trace_funeth_tx(q, skb->len, idx, req->dataop.ngather); + return tx_req_ndesc(req); +} + +/* Return the number of available descriptors of a Tx queue. + * HW assumes head==tail means the ring is empty so we need to keep one + * descriptor unused. + */ +static unsigned int fun_txq_avail(const struct funeth_txq *q) +{ + return q->mask - q->prod_cnt + q->cons_cnt; +} + +/* Stop a queue if it can't handle another worst-case packet. */ +static void fun_tx_check_stop(struct funeth_txq *q) +{ + if (likely(fun_txq_avail(q) >= FUNETH_MAX_PKT_DESC)) + return; + + netif_tx_stop_queue(q->ndq); + + /* NAPI reclaim is freeing packets in parallel with us and we may race. + * We have stopped the queue but check again after synchronizing with + * reclaim. + */ + smp_mb(); + if (likely(fun_txq_avail(q) < FUNETH_MAX_PKT_DESC)) + FUN_QSTAT_INC(q, tx_nstops); + else + netif_tx_start_queue(q->ndq); +} + +/* Return true if a queue has enough space to restart. Current condition is + * that the queue must be >= 1/4 empty. + */ +static bool fun_txq_may_restart(struct funeth_txq *q) +{ + return fun_txq_avail(q) >= q->mask / 4; +} + +netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + unsigned int qid = skb_get_queue_mapping(skb); + struct funeth_txq *q = fp->txqs[qid]; + unsigned int tls_len = 0; + unsigned int ndesc; + + if (IS_ENABLED(CONFIG_TLS_DEVICE) && skb->sk && + tls_is_sk_tx_device_offloaded(skb->sk)) { + skb = fun_tls_tx(skb, q, &tls_len); + if (unlikely(!skb)) + goto dropped; + } + + ndesc = write_pkt_desc(skb, q, tls_len); + if (unlikely(!ndesc)) { + dev_kfree_skb_any(skb); + goto dropped; + } + + q->prod_cnt += ndesc; + fun_tx_check_stop(q); + + skb_tx_timestamp(skb); + + if (__netdev_tx_sent_queue(q->ndq, skb->len, netdev_xmit_more())) + fun_txq_wr_db(q); + else + FUN_QSTAT_INC(q, tx_more); + + return NETDEV_TX_OK; + +dropped: + /* A dropped packet may be the last one in a xmit_more train, + * ring the doorbell just in case. + */ + if (!netdev_xmit_more()) + fun_txq_wr_db(q); + return NETDEV_TX_OK; +} + +/* Return a Tx queue's HW head index written back to host memory. */ +static u16 txq_hw_head(const struct funeth_txq *q) +{ + return (u16)be64_to_cpu(*q->hw_wb); +} + +/* Unmap the Tx packet starting at the given descriptor index and + * return the number of Tx descriptors it occupied. + */ +static unsigned int unmap_skb(const struct funeth_txq *q, unsigned int idx) +{ + const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx); + unsigned int ngle = req->dataop.ngather; + struct fun_dataop_gl *gle; + + if (ngle) { + gle = (struct fun_dataop_gl *)req->dataop.imm; + dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data), + be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE); + + for (gle++; --ngle && txq_to_end(q, gle); gle++) + dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data), + be32_to_cpu(gle->sgl_len), + DMA_TO_DEVICE); + + for (gle = (struct fun_dataop_gl *)q->desc; ngle; ngle--, gle++) + dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data), + be32_to_cpu(gle->sgl_len), + DMA_TO_DEVICE); + } + + return tx_req_ndesc(req); +} + +/* Reclaim completed Tx descriptors and free their packets. Restart a stopped + * queue if we freed enough descriptors. + * + * Return true if we exhausted the budget while there is more work to be done. + */ +static bool fun_txq_reclaim(struct funeth_txq *q, int budget) +{ + unsigned int npkts = 0, nbytes = 0, ndesc = 0; + unsigned int head, limit, reclaim_idx; + + /* budget may be 0, e.g., netpoll */ + limit = budget ? budget : UINT_MAX; + + for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask; + head != reclaim_idx && npkts < limit; head = txq_hw_head(q)) { + /* The HW head is continually updated, ensure we don't read + * descriptor state before the head tells us to reclaim it. + * On the enqueue side the doorbell is an implicit write + * barrier. + */ + rmb(); + + do { + unsigned int pkt_desc = unmap_skb(q, reclaim_idx); + struct sk_buff *skb = q->info[reclaim_idx].skb; + + trace_funeth_tx_free(q, reclaim_idx, pkt_desc, head); + + nbytes += skb->len; + napi_consume_skb(skb, budget); + ndesc += pkt_desc; + reclaim_idx = (reclaim_idx + pkt_desc) & q->mask; + npkts++; + } while (reclaim_idx != head && npkts < limit); + } + + q->cons_cnt += ndesc; + netdev_tx_completed_queue(q->ndq, npkts, nbytes); + smp_mb(); /* pairs with the one in fun_tx_check_stop() */ + + if (unlikely(netif_tx_queue_stopped(q->ndq) && + fun_txq_may_restart(q))) { + netif_tx_wake_queue(q->ndq); + FUN_QSTAT_INC(q, tx_nrestarts); + } + + return reclaim_idx != head; +} + +/* The NAPI handler for Tx queues. */ +int fun_txq_napi_poll(struct napi_struct *napi, int budget) +{ + struct fun_irq *irq = container_of(napi, struct fun_irq, napi); + struct funeth_txq *q = irq->txq; + unsigned int db_val; + + if (fun_txq_reclaim(q, budget)) + return budget; /* exhausted budget */ + + napi_complete(napi); /* exhausted pending work */ + db_val = READ_ONCE(q->irq_db_val) | (q->cons_cnt & q->mask); + writel(db_val, q->db); + return 0; +} + +static void fun_xdp_unmap(const struct funeth_txq *q, unsigned int idx) +{ + const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx); + const struct fun_dataop_gl *gle; + + gle = (const struct fun_dataop_gl *)req->dataop.imm; + dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data), + be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE); +} + +/* Reclaim up to @budget completed Tx descriptors from a TX XDP queue. */ +static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget) +{ + unsigned int npkts = 0, head, reclaim_idx; + + for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask; + head != reclaim_idx && npkts < budget; head = txq_hw_head(q)) { + /* The HW head is continually updated, ensure we don't read + * descriptor state before the head tells us to reclaim it. + * On the enqueue side the doorbell is an implicit write + * barrier. + */ + rmb(); + + do { + fun_xdp_unmap(q, reclaim_idx); + page_frag_free(q->info[reclaim_idx].vaddr); + + trace_funeth_tx_free(q, reclaim_idx, 1, head); + + reclaim_idx = (reclaim_idx + 1) & q->mask; + npkts++; + } while (reclaim_idx != head && npkts < budget); + } + + q->cons_cnt += npkts; + return npkts; +} + +bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len) +{ + struct fun_eth_tx_req *req; + struct fun_dataop_gl *gle; + unsigned int idx; + dma_addr_t dma; + + if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES) + fun_xdpq_clean(q, FUN_XDP_CLEAN_BATCH); + + if (!unlikely(fun_txq_avail(q))) { + FUN_QSTAT_INC(q, tx_xdp_full); + return false; + } + + dma = dma_map_single(q->dma_dev, data, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(q->dma_dev, dma))) { + FUN_QSTAT_INC(q, tx_map_err); + return false; + } + + idx = q->prod_cnt & q->mask; + req = fun_tx_desc_addr(q, idx); + req->op = FUN_ETH_OP_TX; + req->len8 = (sizeof(*req) + sizeof(*gle)) / 8; + req->flags = 0; + req->suboff8 = offsetof(struct fun_eth_tx_req, dataop); + req->repr_idn = 0; + req->encap_proto = 0; + fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0); + req->dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len); + + gle = (struct fun_dataop_gl *)req->dataop.imm; + fun_dataop_gl_init(gle, 0, 0, len, dma); + + q->info[idx].vaddr = data; + + u64_stats_update_begin(&q->syncp); + q->stats.tx_bytes += len; + q->stats.tx_pkts++; + u64_stats_update_end(&q->syncp); + + trace_funeth_tx(q, len, idx, 1); + q->prod_cnt++; + + return true; +} + +int fun_xdp_xmit_frames(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_txq *q, **xdpqs; + int i, q_idx; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + xdpqs = rcu_dereference_bh(fp->xdpqs); + if (unlikely(!xdpqs)) + return -ENETDOWN; + + q_idx = smp_processor_id(); + if (unlikely(q_idx >= fp->num_xdpqs)) + return -ENXIO; + + for (q = xdpqs[q_idx], i = 0; i < n; i++) { + const struct xdp_frame *xdpf = frames[i]; + + if (!fun_xdp_tx(q, xdpf->data, xdpf->len)) + break; + } + + if (unlikely(flags & XDP_XMIT_FLUSH)) + fun_txq_wr_db(q); + return i; +} + +/* Purge a Tx queue of any queued packets. Should be called once HW access + * to the packets has been revoked, e.g., after the queue has been disabled. + */ +static void fun_txq_purge(struct funeth_txq *q) +{ + while (q->cons_cnt != q->prod_cnt) { + unsigned int idx = q->cons_cnt & q->mask; + + q->cons_cnt += unmap_skb(q, idx); + dev_kfree_skb_any(q->info[idx].skb); + } + netdev_tx_reset_queue(q->ndq); +} + +static void fun_xdpq_purge(struct funeth_txq *q) +{ + while (q->cons_cnt != q->prod_cnt) { + unsigned int idx = q->cons_cnt & q->mask; + + fun_xdp_unmap(q, idx); + page_frag_free(q->info[idx].vaddr); + q->cons_cnt++; + } +} + +/* Create a Tx queue, allocating all the host resources needed. */ +static struct funeth_txq *fun_txq_create_sw(struct net_device *dev, + unsigned int qidx, + unsigned int ndesc, + struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_txq *q; + int numa_node; + + if (irq) + numa_node = fun_irq_node(irq); /* skb Tx queue */ + else + numa_node = cpu_to_node(qidx); /* XDP Tx queue */ + + q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node); + if (!q) + goto err; + + q->dma_dev = &fp->pdev->dev; + q->desc = fun_alloc_ring_mem(q->dma_dev, ndesc, FUNETH_SQE_SIZE, + sizeof(*q->info), true, numa_node, + &q->dma_addr, (void **)&q->info, + &q->hw_wb); + if (!q->desc) + goto free_q; + + q->netdev = dev; + q->mask = ndesc - 1; + q->qidx = qidx; + q->numa_node = numa_node; + u64_stats_init(&q->syncp); + q->init_state = FUN_QSTATE_INIT_SW; + return q; + +free_q: + kfree(q); +err: + netdev_err(dev, "Can't allocate memory for %s queue %u\n", + irq ? "Tx" : "XDP", qidx); + return NULL; +} + +static void fun_txq_free_sw(struct funeth_txq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + + fun_free_ring_mem(q->dma_dev, q->mask + 1, FUNETH_SQE_SIZE, true, + q->desc, q->dma_addr, q->info); + + fp->tx_packets += q->stats.tx_pkts; + fp->tx_bytes += q->stats.tx_bytes; + fp->tx_dropped += q->stats.tx_map_err; + + kfree(q); +} + +/* Allocate the device portion of a Tx queue. */ +int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + unsigned int irq_idx, ndesc = q->mask + 1; + int err; + + q->irq = irq; + *q->hw_wb = 0; + q->prod_cnt = 0; + q->cons_cnt = 0; + irq_idx = irq ? irq->irq_idx : 0; + + err = fun_sq_create(fp->fdev, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS | + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR, 0, + FUN_HCI_ID_INVALID, ilog2(FUNETH_SQE_SIZE), ndesc, + q->dma_addr, fp->tx_coal_count, fp->tx_coal_usec, + irq_idx, 0, fp->fdev->kern_end_qid, 0, + &q->hw_qid, &q->db); + if (err) + goto out; + + err = fun_create_and_bind_tx(fp, q->hw_qid); + if (err < 0) + goto free_devq; + q->ethid = err; + + if (irq) { + irq->txq = q; + q->ndq = netdev_get_tx_queue(q->netdev, q->qidx); + q->irq_db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec, + fp->tx_coal_count); + writel(q->irq_db_val, q->db); + } + + q->init_state = FUN_QSTATE_INIT_FULL; + netif_info(fp, ifup, q->netdev, + "%s queue %u, depth %u, HW qid %u, IRQ idx %u, eth id %u, node %d\n", + irq ? "Tx" : "XDP", q->qidx, ndesc, q->hw_qid, irq_idx, + q->ethid, q->numa_node); + return 0; + +free_devq: + fun_destroy_sq(fp->fdev, q->hw_qid); +out: + netdev_err(q->netdev, + "Failed to create %s queue %u on device, error %d\n", + irq ? "Tx" : "XDP", q->qidx, err); + return err; +} + +static void fun_txq_free_dev(struct funeth_txq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + + if (q->init_state < FUN_QSTATE_INIT_FULL) + return; + + netif_info(fp, ifdown, q->netdev, + "Freeing %s queue %u (id %u), IRQ %u, ethid %u\n", + q->irq ? "Tx" : "XDP", q->qidx, q->hw_qid, + q->irq ? q->irq->irq_idx : 0, q->ethid); + + fun_destroy_sq(fp->fdev, q->hw_qid); + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, q->ethid); + + if (q->irq) { + q->irq->txq = NULL; + fun_txq_purge(q); + } else { + fun_xdpq_purge(q); + } + + q->init_state = FUN_QSTATE_INIT_SW; +} + +/* Create or advance a Tx queue, allocating all the host and device resources + * needed to reach the target state. + */ +int funeth_txq_create(struct net_device *dev, unsigned int qidx, + unsigned int ndesc, struct fun_irq *irq, int state, + struct funeth_txq **qp) +{ + struct funeth_txq *q = *qp; + int err; + + if (!q) + q = fun_txq_create_sw(dev, qidx, ndesc, irq); + if (!q) + return -ENOMEM; + + if (q->init_state >= state) + goto out; + + err = fun_txq_create_dev(q, irq); + if (err) { + if (!*qp) + fun_txq_free_sw(q); + return err; + } + +out: + *qp = q; + return 0; +} + +/* Free Tx queue resources until it reaches the target state. + * The queue must be already disconnected from the stack. + */ +struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state) +{ + if (state < FUN_QSTATE_INIT_FULL) + fun_txq_free_dev(q); + + if (state == FUN_QSTATE_DESTROYED) { + fun_txq_free_sw(q); + q = NULL; + } + + return q; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h new file mode 100644 index 000000000000..7aed0561aeac --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h @@ -0,0 +1,264 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUNETH_TXRX_H +#define _FUNETH_TXRX_H + +#include <linux/netdevice.h> +#include <linux/u64_stats_sync.h> + +/* Tx descriptor size */ +#define FUNETH_SQE_SIZE 64U + +/* Size of device headers per Tx packet */ +#define FUNETH_FUNOS_HDR_SZ (sizeof(struct fun_eth_tx_req)) + +/* Number of gather list entries per Tx descriptor */ +#define FUNETH_GLE_PER_DESC (FUNETH_SQE_SIZE / sizeof(struct fun_dataop_gl)) + +/* Max gather list size in bytes for an sk_buff. */ +#define FUNETH_MAX_GL_SZ ((MAX_SKB_FRAGS + 1) * sizeof(struct fun_dataop_gl)) + +#if IS_ENABLED(CONFIG_TLS_DEVICE) +# define FUNETH_TLS_SZ sizeof(struct fun_eth_tls) +#else +# define FUNETH_TLS_SZ 0 +#endif + +/* Max number of Tx descriptors for an sk_buff using a gather list. */ +#define FUNETH_MAX_GL_DESC \ + DIV_ROUND_UP((FUNETH_FUNOS_HDR_SZ + FUNETH_MAX_GL_SZ + FUNETH_TLS_SZ), \ + FUNETH_SQE_SIZE) + +/* Max number of Tx descriptors for any packet. */ +#define FUNETH_MAX_PKT_DESC FUNETH_MAX_GL_DESC + +/* Rx CQ descriptor size. */ +#define FUNETH_CQE_SIZE 64U + +/* Offset of cqe_info within a CQE. */ +#define FUNETH_CQE_INFO_OFFSET (FUNETH_CQE_SIZE - sizeof(struct fun_cqe_info)) + +/* Construct the IRQ portion of a CQ doorbell. The resulting value arms the + * interrupt with the supplied time delay and packet count moderation settings. + */ +#define FUN_IRQ_CQ_DB(usec, pkts) \ + (FUN_DB_IRQ_ARM_F | ((usec) << FUN_DB_INTCOAL_USEC_S) | \ + ((pkts) << FUN_DB_INTCOAL_ENTRIES_S)) + +/* As above for SQ doorbells. */ +#define FUN_IRQ_SQ_DB(usec, pkts) \ + (FUN_DB_IRQ_ARM_F | \ + ((usec) << FUN_DB_INTCOAL_USEC_S) | \ + ((pkts) << FUN_DB_INTCOAL_ENTRIES_S)) + +/* Per packet tailroom. Present only for 1-frag packets. */ +#define FUN_RX_TAILROOM SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + +/* Per packet headroom for XDP. Preferred over XDP_PACKET_HEADROOM to + * accommodate two packets per buffer for 4K pages and 1500B MTUs. + */ +#define FUN_XDP_HEADROOM 192 + +/* Initialization state of a queue. */ +enum { + FUN_QSTATE_DESTROYED, /* what queue? */ + FUN_QSTATE_INIT_SW, /* exists in SW, not on the device */ + FUN_QSTATE_INIT_FULL, /* exists both in SW and on device */ +}; + +/* Initialization state of an interrupt. */ +enum { + FUN_IRQ_INIT, /* initialized and in the XArray but inactive */ + FUN_IRQ_REQUESTED, /* request_irq() done */ + FUN_IRQ_ENABLED, /* processing enabled */ + FUN_IRQ_DISABLED, /* processing disabled */ +}; + +struct bpf_prog; + +struct funeth_txq_stats { /* per Tx queue SW counters */ + u64 tx_pkts; /* # of Tx packets */ + u64 tx_bytes; /* total bytes of Tx packets */ + u64 tx_cso; /* # of packets with checksum offload */ + u64 tx_tso; /* # of non-encapsulated TSO super-packets */ + u64 tx_encap_tso; /* # of encapsulated TSO super-packets */ + u64 tx_more; /* # of DBs elided due to xmit_more */ + u64 tx_nstops; /* # of times the queue has stopped */ + u64 tx_nrestarts; /* # of times the queue has restarted */ + u64 tx_map_err; /* # of packets dropped due to DMA mapping errors */ + u64 tx_xdp_full; /* # of XDP packets that could not be enqueued */ + u64 tx_tls_pkts; /* # of Tx TLS packets offloaded to HW */ + u64 tx_tls_bytes; /* Tx bytes of HW-handled TLS payload */ + u64 tx_tls_fallback; /* attempted Tx TLS offloads punted to SW */ + u64 tx_tls_drops; /* attempted Tx TLS offloads dropped */ +}; + +struct funeth_tx_info { /* per Tx descriptor state */ + union { + struct sk_buff *skb; /* associated packet */ + void *vaddr; /* start address for XDP */ + }; +}; + +struct funeth_txq { + /* RO cacheline of frequently accessed data */ + u32 mask; /* queue depth - 1 */ + u32 hw_qid; /* device ID of the queue */ + void *desc; /* base address of descriptor ring */ + struct funeth_tx_info *info; + struct device *dma_dev; /* device for DMA mappings */ + volatile __be64 *hw_wb; /* HW write-back location */ + u32 __iomem *db; /* SQ doorbell register address */ + struct netdev_queue *ndq; + dma_addr_t dma_addr; /* DMA address of descriptor ring */ + /* producer R/W cacheline */ + u16 qidx; /* queue index within net_device */ + u16 ethid; + u32 prod_cnt; /* producer counter */ + struct funeth_txq_stats stats; + /* shared R/W cacheline, primarily accessed by consumer */ + u32 irq_db_val; /* value written to IRQ doorbell */ + u32 cons_cnt; /* consumer (cleanup) counter */ + struct net_device *netdev; + struct fun_irq *irq; + int numa_node; + u8 init_state; /* queue initialization state */ + struct u64_stats_sync syncp; +}; + +struct funeth_rxq_stats { /* per Rx queue SW counters */ + u64 rx_pkts; /* # of received packets, including SW drops */ + u64 rx_bytes; /* total size of received packets */ + u64 rx_cso; /* # of packets with checksum offload */ + u64 rx_bufs; /* total # of Rx buffers provided to device */ + u64 gro_pkts; /* # of GRO superpackets */ + u64 gro_merged; /* # of pkts merged into existing GRO superpackets */ + u64 rx_page_alloc; /* # of page allocations for Rx buffers */ + u64 rx_budget; /* NAPI iterations that exhausted their budget */ + u64 rx_mem_drops; /* # of packets dropped due to memory shortage */ + u64 rx_map_err; /* # of page DMA mapping errors */ + u64 xdp_drops; /* XDP_DROPped packets */ + u64 xdp_tx; /* successful XDP transmits */ + u64 xdp_redir; /* successful XDP redirects */ + u64 xdp_err; /* packets dropped due to XDP errors */ +}; + +struct funeth_rxbuf { /* per Rx buffer state */ + struct page *page; /* associated page */ + dma_addr_t dma_addr; /* DMA address of page start */ + int pg_refs; /* page refs held by driver */ + int node; /* page node, or -1 if it is PF_MEMALLOC */ +}; + +struct funeth_rx_cache { /* cache of DMA-mapped previously used buffers */ + struct funeth_rxbuf *bufs; /* base of Rx buffer state ring */ + unsigned int prod_cnt; /* producer counter */ + unsigned int cons_cnt; /* consumer counter */ + unsigned int mask; /* depth - 1 */ +}; + +/* An Rx queue consists of a CQ and an SQ used to provide Rx buffers. */ +struct funeth_rxq { + struct net_device *netdev; + struct napi_struct *napi; + struct device *dma_dev; /* device for DMA mappings */ + void *cqes; /* base of CQ descriptor ring */ + const void *next_cqe_info; /* fun_cqe_info of next CQE */ + u32 __iomem *cq_db; /* CQ doorbell register address */ + unsigned int cq_head; /* CQ head index */ + unsigned int cq_mask; /* CQ depth - 1 */ + u16 phase; /* CQ phase tag */ + u16 qidx; /* queue index within net_device */ + unsigned int irq_db_val; /* IRQ info for CQ doorbell */ + struct fun_eprq_rqbuf *rqes; /* base of RQ descriptor ring */ + struct funeth_rxbuf *bufs; /* base of Rx buffer state ring */ + struct funeth_rxbuf *cur_buf; /* currently active buffer */ + u32 __iomem *rq_db; /* RQ doorbell register address */ + unsigned int rq_cons; /* RQ consumer counter */ + unsigned int rq_mask; /* RQ depth - 1 */ + unsigned int buf_offset; /* offset of next pkt in head buffer */ + u8 xdp_flush; /* XDP flush types needed at NAPI end */ + u8 init_state; /* queue initialization state */ + u16 headroom; /* per packet headroom */ + unsigned int rq_cons_db; /* value of rq_cons at last RQ db */ + unsigned int rq_db_thres; /* # of new buffers needed to write RQ db */ + struct funeth_rxbuf spare_buf; /* spare for next buffer replacement */ + struct funeth_rx_cache cache; /* used buffer cache */ + struct bpf_prog *xdp_prog; /* optional XDP BPF program */ + struct funeth_rxq_stats stats; + dma_addr_t cq_dma_addr; /* DMA address of CQE ring */ + dma_addr_t rq_dma_addr; /* DMA address of RQE ring */ + u16 irq_cnt; + u32 hw_cqid; /* device ID of the queue's CQ */ + u32 hw_sqid; /* device ID of the queue's SQ */ + int numa_node; + struct u64_stats_sync syncp; + struct xdp_rxq_info xdp_rxq; +}; + +#define FUN_QSTAT_INC(q, counter) \ + do { \ + u64_stats_update_begin(&(q)->syncp); \ + (q)->stats.counter++; \ + u64_stats_update_end(&(q)->syncp); \ + } while (0) + +#define FUN_QSTAT_READ(q, seq, stats_copy) \ + do { \ + seq = u64_stats_fetch_begin(&(q)->syncp); \ + stats_copy = (q)->stats; \ + } while (u64_stats_fetch_retry(&(q)->syncp, (seq))) + +#define FUN_INT_NAME_LEN (IFNAMSIZ + 16) + +struct fun_irq { + struct napi_struct napi; + struct funeth_txq *txq; + struct funeth_rxq *rxq; + u8 state; + u16 irq_idx; /* index of MSI-X interrupt */ + int irq; /* Linux IRQ vector */ + cpumask_t affinity_mask; /* IRQ affinity */ + struct irq_affinity_notify aff_notify; + char name[FUN_INT_NAME_LEN]; +} ____cacheline_internodealigned_in_smp; + +/* Return the start address of the idx-th Tx descriptor. */ +static inline void *fun_tx_desc_addr(const struct funeth_txq *q, + unsigned int idx) +{ + return q->desc + idx * FUNETH_SQE_SIZE; +} + +static inline void fun_txq_wr_db(const struct funeth_txq *q) +{ + unsigned int tail = q->prod_cnt & q->mask; + + writel(tail, q->db); +} + +static inline int fun_irq_node(const struct fun_irq *p) +{ + return local_memory_node(cpu_to_node(cpumask_first(&p->affinity_mask))); +} + +int fun_rxq_napi_poll(struct napi_struct *napi, int budget); +int fun_txq_napi_poll(struct napi_struct *napi, int budget); +netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev); +bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len); +int fun_xdp_xmit_frames(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags); + +int funeth_txq_create(struct net_device *dev, unsigned int qidx, + unsigned int ndesc, struct fun_irq *irq, int state, + struct funeth_txq **qp); +int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq); +struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state); +int funeth_rxq_create(struct net_device *dev, unsigned int qidx, + unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq, + int state, struct funeth_rxq **qp); +int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq); +struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state); +int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog); + +#endif /* _FUNETH_TXRX_H */ diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 61975cb9c1a4..869a4fe17c7c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2214,6 +2214,19 @@ static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason) } /* + * Initialize the init_done completion and return code values. We + * can get a transport event just after registering the CRQ and the + * tasklet will use this to communicate the transport event. To ensure + * we don't miss the notification/error, initialize these _before_ + * regisering the CRQ. + */ +static inline void reinit_init_done(struct ibmvnic_adapter *adapter) +{ + reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; +} + +/* * do_reset returns zero if we are able to keep processing reset events, or * non-zero if we hit a fatal error and must halt. */ @@ -2319,6 +2332,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; + reinit_init_done(adapter); + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { rc = init_crq_queue(adapter); } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) { @@ -2462,7 +2477,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; - reinit_completion(&adapter->init_done); + reinit_init_done(adapter); + rc = init_crq_queue(adapter); if (rc) { netdev_err(adapter->netdev, @@ -2603,23 +2619,82 @@ out: static void __ibmvnic_reset(struct work_struct *work) { struct ibmvnic_adapter *adapter; - bool saved_state = false; + unsigned int timeout = 5000; struct ibmvnic_rwi *tmprwi; + bool saved_state = false; struct ibmvnic_rwi *rwi; unsigned long flags; - u32 reset_state; + struct device *dev; + bool need_reset; int num_fails = 0; + u32 reset_state; int rc = 0; adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); + dev = &adapter->vdev->dev; - if (test_and_set_bit_lock(0, &adapter->resetting)) { + /* Wait for ibmvnic_probe() to complete. If probe is taking too long + * or if another reset is in progress, defer work for now. If probe + * eventually fails it will flush and terminate our work. + * + * Three possibilities here: + * 1. Adpater being removed - just return + * 2. Timed out on probe or another reset in progress - delay the work + * 3. Completed probe - perform any resets in queue + */ + if (adapter->state == VNIC_PROBING && + !wait_for_completion_timeout(&adapter->probe_done, timeout)) { + dev_err(dev, "Reset thread timed out on probe"); queue_delayed_work(system_long_wq, &adapter->ibmvnic_delayed_reset, IBMVNIC_RESET_DELAY); return; } + /* adapter is done with probe (i.e state is never VNIC_PROBING now) */ + if (adapter->state == VNIC_REMOVING) + return; + + /* ->rwi_list is stable now (no one else is removing entries) */ + + /* ibmvnic_probe() may have purged the reset queue after we were + * scheduled to process a reset so there maybe no resets to process. + * Before setting the ->resetting bit though, we have to make sure + * that there is infact a reset to process. Otherwise we may race + * with ibmvnic_open() and end up leaving the vnic down: + * + * __ibmvnic_reset() ibmvnic_open() + * ----------------- -------------- + * + * set ->resetting bit + * find ->resetting bit is set + * set ->state to IBMVNIC_OPEN (i.e + * assume reset will open device) + * return + * find reset queue empty + * return + * + * Neither performed vnic login/open and vnic stays down + * + * If we hold the lock and conditionally set the bit, either we + * or ibmvnic_open() will complete the open. + */ + need_reset = false; + spin_lock(&adapter->rwi_lock); + if (!list_empty(&adapter->rwi_list)) { + if (test_and_set_bit_lock(0, &adapter->resetting)) { + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); + } else { + need_reset = true; + } + } + spin_unlock(&adapter->rwi_lock); + + if (!need_reset) + return; + rwi = get_next_rwi(adapter); while (rwi) { spin_lock_irqsave(&adapter->state_lock, flags); @@ -2736,12 +2811,23 @@ static void __ibmvnic_delayed_reset(struct work_struct *work) __ibmvnic_reset(&adapter->ibmvnic_reset); } +static void flush_reset_queue(struct ibmvnic_adapter *adapter) +{ + struct list_head *entry, *tmp_entry; + + if (!list_empty(&adapter->rwi_list)) { + list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) { + list_del(entry); + kfree(list_entry(entry, struct ibmvnic_rwi, list)); + } + } +} + static int ibmvnic_reset(struct ibmvnic_adapter *adapter, enum ibmvnic_reset_reason reason) { - struct list_head *entry, *tmp_entry; - struct ibmvnic_rwi *rwi, *tmp; struct net_device *netdev = adapter->netdev; + struct ibmvnic_rwi *rwi, *tmp; unsigned long flags; int ret; @@ -2760,13 +2846,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, goto err; } - if (adapter->state == VNIC_PROBING) { - netdev_warn(netdev, "Adapter reset during probe\n"); - adapter->init_done_rc = -EAGAIN; - ret = EAGAIN; - goto err; - } - list_for_each_entry(tmp, &adapter->rwi_list, list) { if (tmp->reset_reason == reason) { netdev_dbg(netdev, "Skipping matching reset, reason=%s\n", @@ -2784,10 +2863,9 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, /* if we just received a transport event, * flush reset queue and process this reset */ - if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) { - list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) - list_del(entry); - } + if (adapter->force_reset_recovery) + flush_reset_queue(adapter); + rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n", @@ -5338,9 +5416,9 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, } if (!completion_done(&adapter->init_done)) { - complete(&adapter->init_done); if (!adapter->init_done_rc) adapter->init_done_rc = -EAGAIN; + complete(&adapter->init_done); } break; @@ -5363,6 +5441,13 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, adapter->fw_done_rc = -EIO; complete(&adapter->fw_done); } + + /* if we got here during crq-init, retry crq-init */ + if (!completion_done(&adapter->init_done)) { + adapter->init_done_rc = -EAGAIN; + complete(&adapter->init_done); + } + if (!completion_done(&adapter->stats_done)) complete(&adapter->stats_done); if (test_bit(0, &adapter->resetting)) @@ -5679,10 +5764,6 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) adapter->from_passive_init = false; - if (reset) - reinit_completion(&adapter->init_done); - - adapter->init_done_rc = 0; rc = ibmvnic_send_crq_init(adapter); if (rc) { dev_err(dev, "Send crq init failed with error %d\n", rc); @@ -5696,12 +5777,14 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) if (adapter->init_done_rc) { release_crq_queue(adapter); + dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc); return adapter->init_done_rc; } if (adapter->from_passive_init) { adapter->state = VNIC_OPEN; adapter->from_passive_init = false; + dev_err(dev, "CRQ-init failed, passive-init\n"); return -EINVAL; } @@ -5741,6 +5824,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) struct ibmvnic_adapter *adapter; struct net_device *netdev; unsigned char *mac_addr_p; + unsigned long flags; bool init_success; int rc; @@ -5785,6 +5869,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) spin_lock_init(&adapter->rwi_lock); spin_lock_init(&adapter->state_lock); mutex_init(&adapter->fw_lock); + init_completion(&adapter->probe_done); init_completion(&adapter->init_done); init_completion(&adapter->fw_done); init_completion(&adapter->reset_done); @@ -5795,6 +5880,33 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) init_success = false; do { + reinit_init_done(adapter); + + /* clear any failovers we got in the previous pass + * since we are reinitializing the CRQ + */ + adapter->failover_pending = false; + + /* If we had already initialized CRQ, we may have one or + * more resets queued already. Discard those and release + * the CRQ before initializing the CRQ again. + */ + release_crq_queue(adapter); + + /* Since we are still in PROBING state, __ibmvnic_reset() + * will not access the ->rwi_list and since we released CRQ, + * we won't get _new_ transport events. But there maybe an + * ongoing ibmvnic_reset() call. So serialize access to + * rwi_list. If we win the race, ibvmnic_reset() could add + * a reset after we purged but thats ok - we just may end + * up with an extra reset (i.e similar to having two or more + * resets in the queue at once). + * CHECK. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); + flush_reset_queue(adapter); + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + rc = init_crq_queue(adapter); if (rc) { dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n", @@ -5826,12 +5938,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) goto ibmvnic_dev_file_err; netif_carrier_off(netdev); - rc = register_netdev(netdev); - if (rc) { - dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); - goto ibmvnic_register_fail; - } - dev_info(&dev->dev, "ibmvnic registered\n"); if (init_success) { adapter->state = VNIC_PROBED; @@ -5844,6 +5950,16 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->wait_for_reset = false; adapter->last_reset_time = jiffies; + + rc = register_netdev(netdev); + if (rc) { + dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); + goto ibmvnic_register_fail; + } + dev_info(&dev->dev, "ibmvnic registered\n"); + + complete(&adapter->probe_done); + return 0; ibmvnic_register_fail: @@ -5858,6 +5974,17 @@ ibmvnic_stats_fail: ibmvnic_init_fail: release_sub_crqs(adapter, 1); release_crq_queue(adapter); + + /* cleanup worker thread after releasing CRQ so we don't get + * transport events (i.e new work items for the worker thread). + */ + adapter->state = VNIC_REMOVING; + complete(&adapter->probe_done); + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + + flush_reset_queue(adapter); + mutex_destroy(&adapter->fw_lock); free_netdev(netdev); @@ -5934,10 +6061,14 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr, be64_to_cpu(session_token)); rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, H_SESSION_ERR_DETECTED, session_token, 0, 0); - if (rc) + if (rc) { netdev_err(netdev, "H_VIOCTL initiated failover failed, rc %ld\n", rc); + goto last_resort; + } + + return count; last_resort: netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n"); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 4a7a56ff74ce..fa2d607a7b1b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -930,6 +930,7 @@ struct ibmvnic_adapter { struct ibmvnic_tx_pool *tx_pool; struct ibmvnic_tx_pool *tso_pool; + struct completion probe_done; struct completion init_done; int init_done_rc; diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index bcf680e83811..13382df2f2ef 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -630,6 +630,7 @@ struct e1000_phy_info { bool disable_polarity_correction; bool is_mdix; bool polarity_correction; + bool reset_disable; bool speed_downgraded; bool autoneg_wait_to_complete; }; diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index c908c84b86d2..d60e2016d03c 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -2050,6 +2050,10 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) bool blocked = false; int i = 0; + /* Check the PHY (LCD) reset flag */ + if (hw->phy.reset_disable) + return true; + while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) && (i++ < 30)) usleep_range(10000, 11000); @@ -4136,9 +4140,9 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) return ret_val; if (!(data & valid_csum_mask)) { - e_dbg("NVM Checksum Invalid\n"); + e_dbg("NVM Checksum valid bit not set\n"); - if (hw->mac.type < e1000_pch_cnp) { + if (hw->mac.type < e1000_pch_tgp) { data |= valid_csum_mask; ret_val = e1000_write_nvm(hw, word, 1, &data); if (ret_val) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 2504b11c3169..638a3ddd7ada 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -271,6 +271,7 @@ #define I217_CGFREG_ENABLE_MTA_RESET 0x0002 #define I217_MEMPWR PHY_REG(772, 26) #define I217_MEMPWR_DISABLE_SMB_RELEASE 0x0010 +#define I217_MEMPWR_MOEM 0x1000 /* Receive Address Initial CRC Calculation */ #define E1000_PCH_RAICC(_n) (0x05F50 + ((_n) * 4)) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 6fb3437f68e0..fa06f68c8c80 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6987,8 +6987,21 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); + struct e1000_hw *hw = &adapter->hw; + u16 phy_data; int rc; + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && + hw->mac.type >= e1000_pch_adp) { + /* Mask OEM Bits / Gig Disable / Restart AN (772_26[12] = 1) */ + e1e_rphy(hw, I217_MEMPWR, &phy_data); + phy_data |= I217_MEMPWR_MOEM; + e1e_wphy(hw, I217_MEMPWR, phy_data); + + /* Disable LCD reset */ + hw->phy.reset_disable = true; + } + e1000e_flush_lpic(pdev); e1000e_pm_freeze(dev); @@ -7010,6 +7023,8 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); + struct e1000_hw *hw = &adapter->hw; + u16 phy_data; int rc; /* Introduce S0ix implementation */ @@ -7020,6 +7035,17 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) if (rc) return rc; + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && + hw->mac.type >= e1000_pch_adp) { + /* Unmask OEM Bits / Gig Disable / Restart AN 772_26[12] = 0 */ + e1e_rphy(hw, I217_MEMPWR, &phy_data); + phy_data &= ~I217_MEMPWR_MOEM; + e1e_wphy(hw, I217_MEMPWR, phy_data); + + /* Enable LCD reset */ + hw->phy.reset_disable = false; + } + return e1000e_pm_thaw(dev); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9b7ce6d9a92b..6778df2177a1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5386,15 +5386,7 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, /* There is no need to reset BW when mqprio mode is on. */ if (pf->flags & I40E_FLAG_TC_MQPRIO) return 0; - - if (!vsi->mqprio_qopt.qopt.hw) { - if (pf->flags & I40E_FLAG_DCB_ENABLED) - goto skip_reset; - - if (IS_ENABLED(CONFIG_I40E_DCB) && - i40e_dcb_hw_get_num_tc(&pf->hw) == 1) - goto skip_reset; - + if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) { ret = i40e_set_bw_limit(vsi, vsi->seid, 0); if (ret) dev_info(&pf->pdev->dev, @@ -5402,8 +5394,6 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, vsi->seid); return ret; } - -skip_reset: memset(&bw_data, 0, sizeof(bw_data)); bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 5a997b0d07d8..c1d25b0b0ca2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -218,7 +218,6 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) ntu += nb_buffs; if (ntu == rx_ring->count) { rx_desc = I40E_RX_DESC(rx_ring, 0); - xdp = i40e_rx_bi(rx_ring, 0); ntu = 0; } @@ -328,11 +327,11 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring, int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; - u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); u16 next_to_clean = rx_ring->next_to_clean; u16 count_mask = rx_ring->count - 1; unsigned int xdp_res, xdp_xmit = 0; bool failure = false; + u16 cleaned_count; while (likely(total_rx_packets < (unsigned int)budget)) { union i40e_rx_desc *rx_desc; diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 59806d1f7e79..bed2ebf15627 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -44,6 +44,9 @@ #define DEFAULT_DEBUG_LEVEL_SHIFT 3 #define PFX "iavf: " +int iavf_status_to_errno(enum iavf_status status); +int virtchnl_status_to_errno(enum virtchnl_status_code v_status); + /* VSI state flags shared with common code */ enum iavf_vsi_state_t { __IAVF_VSI_DOWN, @@ -188,7 +191,7 @@ enum iavf_state_t { __IAVF_REMOVE, /* driver is being unloaded */ __IAVF_INIT_VERSION_CHECK, /* aq msg sent, awaiting reply */ __IAVF_INIT_GET_RESOURCES, /* aq msg sent, awaiting reply */ - __IAVF_INIT_GET_OFFLOAD_VLAN_V2_CAPS, + __IAVF_INIT_EXTENDED_CAPS, /* process extended caps which require aq msg exchange */ __IAVF_INIT_CONFIG_ADAPTER, __IAVF_INIT_SW, /* got resources, setting up structs */ __IAVF_INIT_FAILED, /* init failed, restarting procedure */ @@ -201,6 +204,10 @@ enum iavf_state_t { __IAVF_RUNNING, /* opened, working */ }; +enum iavf_critical_section_t { + __IAVF_IN_REMOVE_TASK, /* device being removed */ +}; + #define IAVF_CLOUD_FIELD_OMAC 0x01 #define IAVF_CLOUD_FIELD_IMAC 0x02 #define IAVF_CLOUD_FIELD_IVLAN 0x04 @@ -246,7 +253,6 @@ struct iavf_adapter { struct list_head mac_filter_list; struct mutex crit_lock; struct mutex client_lock; - struct mutex remove_lock; /* Lock to protect accesses to MAC and VLAN lists */ spinlock_t mac_vlan_list_lock; char misc_vector_name[IFNAMSIZ + 9]; @@ -284,6 +290,7 @@ struct iavf_adapter { #define IAVF_FLAG_LEGACY_RX BIT(15) #define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16) #define IAVF_FLAG_QUEUES_DISABLED BIT(17) +#define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) /* duplicates for common code */ #define IAVF_FLAG_DCB_ENABLED 0 /* flags for admin queue service task */ @@ -329,6 +336,21 @@ struct iavf_adapter { #define IAVF_FLAG_AQ_ENABLE_STAG_VLAN_INSERTION BIT_ULL(37) #define IAVF_FLAG_AQ_DISABLE_STAG_VLAN_INSERTION BIT_ULL(38) + /* flags for processing extended capability messages during + * __IAVF_INIT_EXTENDED_CAPS. Each capability exchange requires + * both a SEND and a RECV step, which must be processed in sequence. + * + * During the __IAVF_INIT_EXTENDED_CAPS state, the driver will + * process one flag at a time during each state loop. + */ + u64 extended_caps; +#define IAVF_EXTENDED_CAP_SEND_VLAN_V2 BIT_ULL(0) +#define IAVF_EXTENDED_CAP_RECV_VLAN_V2 BIT_ULL(1) + +#define IAVF_EXTENDED_CAPS \ + (IAVF_EXTENDED_CAP_SEND_VLAN_V2 | \ + IAVF_EXTENDED_CAP_RECV_VLAN_V2) + /* OS defined structs */ struct net_device *netdev; struct pci_dev *pdev; @@ -510,7 +532,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter); void iavf_del_vlans(struct iavf_adapter *adapter); void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags); void iavf_request_stats(struct iavf_adapter *adapter); -void iavf_request_reset(struct iavf_adapter *adapter); +int iavf_request_reset(struct iavf_adapter *adapter); void iavf_get_hena(struct iavf_adapter *adapter); void iavf_set_hena(struct iavf_adapter *adapter); void iavf_set_rss_key(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index e9cc7f6ddc46..34e46a23894f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -131,8 +131,8 @@ const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err) return "IAVF_ERR_INVALID_MAC_ADDR"; case IAVF_ERR_DEVICE_NOT_SUPPORTED: return "IAVF_ERR_DEVICE_NOT_SUPPORTED"; - case IAVF_ERR_MASTER_REQUESTS_PENDING: - return "IAVF_ERR_MASTER_REQUESTS_PENDING"; + case IAVF_ERR_PRIMARY_REQUESTS_PENDING: + return "IAVF_ERR_PRIMARY_REQUESTS_PENDING"; case IAVF_ERR_INVALID_LINK_SETTINGS: return "IAVF_ERR_INVALID_LINK_SETTINGS"; case IAVF_ERR_AUTONEG_NOT_COMPLETE: diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index b0bd95c85480..45a1e88c9276 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -51,6 +51,113 @@ MODULE_LICENSE("GPL v2"); static const struct net_device_ops iavf_netdev_ops; struct workqueue_struct *iavf_wq; +int iavf_status_to_errno(enum iavf_status status) +{ + switch (status) { + case IAVF_SUCCESS: + return 0; + case IAVF_ERR_PARAM: + case IAVF_ERR_MAC_TYPE: + case IAVF_ERR_INVALID_MAC_ADDR: + case IAVF_ERR_INVALID_LINK_SETTINGS: + case IAVF_ERR_INVALID_PD_ID: + case IAVF_ERR_INVALID_QP_ID: + case IAVF_ERR_INVALID_CQ_ID: + case IAVF_ERR_INVALID_CEQ_ID: + case IAVF_ERR_INVALID_AEQ_ID: + case IAVF_ERR_INVALID_SIZE: + case IAVF_ERR_INVALID_ARP_INDEX: + case IAVF_ERR_INVALID_FPM_FUNC_ID: + case IAVF_ERR_QP_INVALID_MSG_SIZE: + case IAVF_ERR_INVALID_FRAG_COUNT: + case IAVF_ERR_INVALID_ALIGNMENT: + case IAVF_ERR_INVALID_PUSH_PAGE_INDEX: + case IAVF_ERR_INVALID_IMM_DATA_SIZE: + case IAVF_ERR_INVALID_VF_ID: + case IAVF_ERR_INVALID_HMCFN_ID: + case IAVF_ERR_INVALID_PBLE_INDEX: + case IAVF_ERR_INVALID_SD_INDEX: + case IAVF_ERR_INVALID_PAGE_DESC_INDEX: + case IAVF_ERR_INVALID_SD_TYPE: + case IAVF_ERR_INVALID_HMC_OBJ_INDEX: + case IAVF_ERR_INVALID_HMC_OBJ_COUNT: + case IAVF_ERR_INVALID_SRQ_ARM_LIMIT: + return -EINVAL; + case IAVF_ERR_NVM: + case IAVF_ERR_NVM_CHECKSUM: + case IAVF_ERR_PHY: + case IAVF_ERR_CONFIG: + case IAVF_ERR_UNKNOWN_PHY: + case IAVF_ERR_LINK_SETUP: + case IAVF_ERR_ADAPTER_STOPPED: + case IAVF_ERR_PRIMARY_REQUESTS_PENDING: + case IAVF_ERR_AUTONEG_NOT_COMPLETE: + case IAVF_ERR_RESET_FAILED: + case IAVF_ERR_BAD_PTR: + case IAVF_ERR_SWFW_SYNC: + case IAVF_ERR_QP_TOOMANY_WRS_POSTED: + case IAVF_ERR_QUEUE_EMPTY: + case IAVF_ERR_FLUSHED_QUEUE: + case IAVF_ERR_OPCODE_MISMATCH: + case IAVF_ERR_CQP_COMPL_ERROR: + case IAVF_ERR_BACKING_PAGE_ERROR: + case IAVF_ERR_NO_PBLCHUNKS_AVAILABLE: + case IAVF_ERR_MEMCPY_FAILED: + case IAVF_ERR_SRQ_ENABLED: + case IAVF_ERR_ADMIN_QUEUE_ERROR: + case IAVF_ERR_ADMIN_QUEUE_FULL: + case IAVF_ERR_BAD_IWARP_CQE: + case IAVF_ERR_NVM_BLANK_MODE: + case IAVF_ERR_PE_DOORBELL_NOT_ENABLED: + case IAVF_ERR_DIAG_TEST_FAILED: + case IAVF_ERR_FIRMWARE_API_VERSION: + case IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR: + return -EIO; + case IAVF_ERR_DEVICE_NOT_SUPPORTED: + return -ENODEV; + case IAVF_ERR_NO_AVAILABLE_VSI: + case IAVF_ERR_RING_FULL: + return -ENOSPC; + case IAVF_ERR_NO_MEMORY: + return -ENOMEM; + case IAVF_ERR_TIMEOUT: + case IAVF_ERR_ADMIN_QUEUE_TIMEOUT: + return -ETIMEDOUT; + case IAVF_ERR_NOT_IMPLEMENTED: + case IAVF_NOT_SUPPORTED: + return -EOPNOTSUPP; + case IAVF_ERR_ADMIN_QUEUE_NO_WORK: + return -EALREADY; + case IAVF_ERR_NOT_READY: + return -EBUSY; + case IAVF_ERR_BUF_TOO_SHORT: + return -EMSGSIZE; + } + + return -EIO; +} + +int virtchnl_status_to_errno(enum virtchnl_status_code v_status) +{ + switch (v_status) { + case VIRTCHNL_STATUS_SUCCESS: + return 0; + case VIRTCHNL_STATUS_ERR_PARAM: + case VIRTCHNL_STATUS_ERR_INVALID_VF_ID: + return -EINVAL; + case VIRTCHNL_STATUS_ERR_NO_MEMORY: + return -ENOMEM; + case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH: + case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR: + case VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR: + return -EIO; + case VIRTCHNL_STATUS_ERR_NOT_SUPPORTED: + return -EOPNOTSUPP; + } + + return -EIO; +} + /** * iavf_pdev_to_adapter - go from pci_dev to adapter * @pdev: pci_dev pointer @@ -302,8 +409,9 @@ static irqreturn_t iavf_msix_aq(int irq, void *data) rd32(hw, IAVF_VFINT_ICR01); rd32(hw, IAVF_VFINT_ICR0_ENA1); - /* schedule work on the private workqueue */ - queue_work(iavf_wq, &adapter->adminq_task); + if (adapter->state != __IAVF_REMOVE) + /* schedule work on the private workqueue */ + queue_work(iavf_wq, &adapter->adminq_task); return IRQ_HANDLED; } @@ -876,6 +984,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, list_add_tail(&f->list, &adapter->mac_filter_list); f->add = true; f->is_new_mac = true; + f->is_primary = false; adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; } else { f->remove = false; @@ -909,17 +1018,22 @@ static int iavf_set_mac(struct net_device *netdev, void *p) f = iavf_find_filter(adapter, hw->mac.addr); if (f) { f->remove = true; + f->is_primary = true; adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; } f = iavf_add_filter(adapter, addr->sa_data); - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - if (f) { + f->is_primary = true; ether_addr_copy(hw->mac.addr, addr->sa_data); } + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + /* schedule the watchdog task to immediately process the request */ + if (f) + queue_work(iavf_wq, &adapter->watchdog_task.work); + return (f == NULL) ? -ENOMEM : 0; } @@ -1136,8 +1250,7 @@ void iavf_down(struct iavf_adapter *adapter) rss->state = IAVF_ADV_RSS_DEL_REQUEST; spin_unlock_bh(&adapter->adv_rss_lock); - if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) && - adapter->state != __IAVF_RESETTING) { + if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)) { /* cancel any current operation */ adapter->current_op = VIRTCHNL_OP_UNKNOWN; /* Schedule operations to close down the HW. Don't wait @@ -1421,7 +1534,7 @@ static int iavf_config_rss_aq(struct iavf_adapter *adapter) struct iavf_aqc_get_set_rss_key_data *rss_key = (struct iavf_aqc_get_set_rss_key_data *)adapter->rss_key; struct iavf_hw *hw = &adapter->hw; - int ret = 0; + enum iavf_status status; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -1430,24 +1543,25 @@ static int iavf_config_rss_aq(struct iavf_adapter *adapter) return -EBUSY; } - ret = iavf_aq_set_rss_key(hw, adapter->vsi.id, rss_key); - if (ret) { + status = iavf_aq_set_rss_key(hw, adapter->vsi.id, rss_key); + if (status) { dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n", - iavf_stat_str(hw, ret), + iavf_stat_str(hw, status), iavf_aq_str(hw, hw->aq.asq_last_status)); - return ret; + return iavf_status_to_errno(status); } - ret = iavf_aq_set_rss_lut(hw, adapter->vsi.id, false, - adapter->rss_lut, adapter->rss_lut_size); - if (ret) { + status = iavf_aq_set_rss_lut(hw, adapter->vsi.id, false, + adapter->rss_lut, adapter->rss_lut_size); + if (status) { dev_err(&adapter->pdev->dev, "Cannot set RSS lut, err %s aq_err %s\n", - iavf_stat_str(hw, ret), + iavf_stat_str(hw, status), iavf_aq_str(hw, hw->aq.asq_last_status)); + return iavf_status_to_errno(status); } - return ret; + return 0; } @@ -1517,7 +1631,6 @@ static void iavf_fill_rss_lut(struct iavf_adapter *adapter) static int iavf_init_rss(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; - int ret; if (!RSS_PF(adapter)) { /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ @@ -1533,9 +1646,8 @@ static int iavf_init_rss(struct iavf_adapter *adapter) iavf_fill_rss_lut(adapter); netdev_rss_key_fill((void *)adapter->rss_key, adapter->rss_key_size); - ret = iavf_config_rss(adapter); - return ret; + return iavf_config_rss(adapter); } /** @@ -2003,23 +2115,24 @@ static void iavf_startup(struct iavf_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct iavf_hw *hw = &adapter->hw; - int err; + enum iavf_status status; + int ret; WARN_ON(adapter->state != __IAVF_STARTUP); /* driver loaded, probe complete */ adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; adapter->flags &= ~IAVF_FLAG_RESET_PENDING; - err = iavf_set_mac_type(hw); - if (err) { - dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", err); + status = iavf_set_mac_type(hw); + if (status) { + dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", status); goto err; } - err = iavf_check_reset_complete(hw); - if (err) { + ret = iavf_check_reset_complete(hw); + if (ret) { dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n", - err); + ret); goto err; } hw->aq.num_arq_entries = IAVF_AQ_LEN; @@ -2027,14 +2140,15 @@ static void iavf_startup(struct iavf_adapter *adapter) hw->aq.arq_buf_size = IAVF_MAX_AQ_BUF_SIZE; hw->aq.asq_buf_size = IAVF_MAX_AQ_BUF_SIZE; - err = iavf_init_adminq(hw); - if (err) { - dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", err); + status = iavf_init_adminq(hw); + if (status) { + dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", + status); goto err; } - err = iavf_send_api_ver(adapter); - if (err) { - dev_err(&pdev->dev, "Unable to send to PF (%d)\n", err); + ret = iavf_send_api_ver(adapter); + if (ret) { + dev_err(&pdev->dev, "Unable to send to PF (%d)\n", ret); iavf_shutdown_adminq(hw); goto err; } @@ -2070,7 +2184,7 @@ static void iavf_init_version_check(struct iavf_adapter *adapter) /* aq msg sent, awaiting reply */ err = iavf_verify_api_ver(adapter); if (err) { - if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK) + if (err == -EALREADY) err = iavf_send_api_ver(adapter); else dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n", @@ -2171,11 +2285,11 @@ static void iavf_init_get_resources(struct iavf_adapter *adapter) } } err = iavf_get_vf_config(adapter); - if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK) { + if (err == -EALREADY) { err = iavf_send_vf_config_msg(adapter); goto err_alloc; - } else if (err == IAVF_ERR_PARAM) { - /* We only get ERR_PARAM if the device is in a very bad + } else if (err == -EINVAL) { + /* We only get -EINVAL if the device is in a very bad * state or if we've been disabled for previous bad * behavior. Either way, we're done now. */ @@ -2189,26 +2303,18 @@ static void iavf_init_get_resources(struct iavf_adapter *adapter) } err = iavf_parse_vf_resource_msg(adapter); - if (err) - goto err_alloc; - - err = iavf_send_vf_offload_vlan_v2_msg(adapter); - if (err == -EOPNOTSUPP) { - /* underlying PF doesn't support VIRTCHNL_VF_OFFLOAD_VLAN_V2, so - * go directly to finishing initialization - */ - iavf_change_state(adapter, __IAVF_INIT_CONFIG_ADAPTER); - return; - } else if (err) { - dev_err(&pdev->dev, "Unable to send offload vlan v2 request (%d)\n", + if (err) { + dev_err(&pdev->dev, "Failed to parse VF resource message from PF (%d)\n", err); goto err_alloc; } - - /* underlying PF supports VIRTCHNL_VF_OFFLOAD_VLAN_V2, so update the - * state accordingly + /* Some features require additional messages to negotiate extended + * capabilities. These are processed in sequence by the + * __IAVF_INIT_EXTENDED_CAPS driver state. */ - iavf_change_state(adapter, __IAVF_INIT_GET_OFFLOAD_VLAN_V2_CAPS); + adapter->extended_caps = IAVF_EXTENDED_CAPS; + + iavf_change_state(adapter, __IAVF_INIT_EXTENDED_CAPS); return; err_alloc: @@ -2219,35 +2325,93 @@ err: } /** - * iavf_init_get_offload_vlan_v2_caps - part of driver startup + * iavf_init_send_offload_vlan_v2_caps - part of initializing VLAN V2 caps * @adapter: board private structure * - * Function processes __IAVF_INIT_GET_OFFLOAD_VLAN_V2_CAPS driver state if the - * VF negotiates VIRTCHNL_VF_OFFLOAD_VLAN_V2. If VIRTCHNL_VF_OFFLOAD_VLAN_V2 is - * not negotiated, then this state will never be entered. + * Function processes send of the extended VLAN V2 capability message to the + * PF. Must clear IAVF_EXTENDED_CAP_RECV_VLAN_V2 if the message is not sent, + * e.g. due to PF not negotiating VIRTCHNL_VF_OFFLOAD_VLAN_V2. + */ +static void iavf_init_send_offload_vlan_v2_caps(struct iavf_adapter *adapter) +{ + int ret; + + WARN_ON(!(adapter->extended_caps & IAVF_EXTENDED_CAP_SEND_VLAN_V2)); + + ret = iavf_send_vf_offload_vlan_v2_msg(adapter); + if (ret && ret == -EOPNOTSUPP) { + /* PF does not support VIRTCHNL_VF_OFFLOAD_V2. In this case, + * we did not send the capability exchange message and do not + * expect a response. + */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_VLAN_V2; + } + + /* We sent the message, so move on to the next step */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_SEND_VLAN_V2; +} + +/** + * iavf_init_recv_offload_vlan_v2_caps - part of initializing VLAN V2 caps + * @adapter: board private structure + * + * Function processes receipt of the extended VLAN V2 capability message from + * the PF. **/ -static void iavf_init_get_offload_vlan_v2_caps(struct iavf_adapter *adapter) +static void iavf_init_recv_offload_vlan_v2_caps(struct iavf_adapter *adapter) { int ret; - WARN_ON(adapter->state != __IAVF_INIT_GET_OFFLOAD_VLAN_V2_CAPS); + WARN_ON(!(adapter->extended_caps & IAVF_EXTENDED_CAP_RECV_VLAN_V2)); memset(&adapter->vlan_v2_caps, 0, sizeof(adapter->vlan_v2_caps)); ret = iavf_get_vf_vlan_v2_caps(adapter); - if (ret) { - if (ret == IAVF_ERR_ADMIN_QUEUE_NO_WORK) - iavf_send_vf_offload_vlan_v2_msg(adapter); + if (ret) goto err; - } - iavf_change_state(adapter, __IAVF_INIT_CONFIG_ADAPTER); + /* We've processed receipt of the VLAN V2 caps message */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_VLAN_V2; return; err: + /* We didn't receive a reply. Make sure we try sending again when + * __IAVF_INIT_FAILED attempts to recover. + */ + adapter->extended_caps |= IAVF_EXTENDED_CAP_SEND_VLAN_V2; iavf_change_state(adapter, __IAVF_INIT_FAILED); } /** + * iavf_init_process_extended_caps - Part of driver startup + * @adapter: board private structure + * + * Function processes __IAVF_INIT_EXTENDED_CAPS driver state. This state + * handles negotiating capabilities for features which require an additional + * message. + * + * Once all extended capabilities exchanges are finished, the driver will + * transition into __IAVF_INIT_CONFIG_ADAPTER. + */ +static void iavf_init_process_extended_caps(struct iavf_adapter *adapter) +{ + WARN_ON(adapter->state != __IAVF_INIT_EXTENDED_CAPS); + + /* Process capability exchange for VLAN V2 */ + if (adapter->extended_caps & IAVF_EXTENDED_CAP_SEND_VLAN_V2) { + iavf_init_send_offload_vlan_v2_caps(adapter); + return; + } else if (adapter->extended_caps & IAVF_EXTENDED_CAP_RECV_VLAN_V2) { + iavf_init_recv_offload_vlan_v2_caps(adapter); + return; + } + + /* When we reach here, no further extended capabilities exchanges are + * necessary, so we finally transition into __IAVF_INIT_CONFIG_ADAPTER + */ + iavf_change_state(adapter, __IAVF_INIT_CONFIG_ADAPTER); +} + +/** * iavf_init_config_adapter - last part of driver startup * @adapter: board private structure * @@ -2374,17 +2538,22 @@ static void iavf_watchdog_task(struct work_struct *work) struct iavf_hw *hw = &adapter->hw; u32 reg_val; - if (!mutex_trylock(&adapter->crit_lock)) + if (!mutex_trylock(&adapter->crit_lock)) { + if (adapter->state == __IAVF_REMOVE) + return; + goto restart_watchdog; + } if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) iavf_change_state(adapter, __IAVF_COMM_FAILED); - if (adapter->flags & IAVF_FLAG_RESET_NEEDED && - adapter->state != __IAVF_RESETTING) { - iavf_change_state(adapter, __IAVF_RESETTING); + if (adapter->flags & IAVF_FLAG_RESET_NEEDED) { adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; + mutex_unlock(&adapter->crit_lock); + queue_work(iavf_wq, &adapter->reset_task); + return; } switch (adapter->state) { @@ -2406,8 +2575,8 @@ static void iavf_watchdog_task(struct work_struct *work) queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(1)); return; - case __IAVF_INIT_GET_OFFLOAD_VLAN_V2_CAPS: - iavf_init_get_offload_vlan_v2_caps(adapter); + case __IAVF_INIT_EXTENDED_CAPS: + iavf_init_process_extended_caps(adapter); mutex_unlock(&adapter->crit_lock); queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(1)); @@ -2419,6 +2588,15 @@ static void iavf_watchdog_task(struct work_struct *work) msecs_to_jiffies(1)); return; case __IAVF_INIT_FAILED: + if (test_bit(__IAVF_IN_REMOVE_TASK, + &adapter->crit_section)) { + /* Do not update the state and do not reschedule + * watchdog task, iavf_remove should handle this state + * as it can loop forever + */ + mutex_unlock(&adapter->crit_lock); + return; + } if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { dev_err(&adapter->pdev->dev, "Failed to communicate with PF; waiting before retry\n"); @@ -2435,6 +2613,17 @@ static void iavf_watchdog_task(struct work_struct *work) queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ); return; case __IAVF_COMM_FAILED: + if (test_bit(__IAVF_IN_REMOVE_TASK, + &adapter->crit_section)) { + /* Set state to __IAVF_INIT_FAILED and perform remove + * steps. Remove IAVF_FLAG_PF_COMMS_FAILED so the task + * doesn't bring the state back to __IAVF_COMM_FAILED. + */ + iavf_change_state(adapter, __IAVF_INIT_FAILED); + adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; + mutex_unlock(&adapter->crit_lock); + return; + } reg_val = rd32(hw, IAVF_VFGEN_RSTAT) & IAVF_VFGEN_RSTAT_VFR_STATE_MASK; if (reg_val == VIRTCHNL_VFR_VFACTIVE || @@ -2507,7 +2696,8 @@ static void iavf_watchdog_task(struct work_struct *work) schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5)); mutex_unlock(&adapter->crit_lock); restart_watchdog: - queue_work(iavf_wq, &adapter->adminq_task); + if (adapter->state >= __IAVF_DOWN) + queue_work(iavf_wq, &adapter->adminq_task); if (adapter->aq_required) queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(20)); @@ -2594,6 +2784,7 @@ static void iavf_reset_task(struct work_struct *work) struct iavf_hw *hw = &adapter->hw; struct iavf_mac_filter *f, *ftmp; struct iavf_cloud_filter *cf; + enum iavf_status status; u32 reg_val; int i = 0, err; bool running; @@ -2601,13 +2792,13 @@ static void iavf_reset_task(struct work_struct *work) /* When device is being removed it doesn't make sense to run the reset * task, just return in such a case. */ - if (mutex_is_locked(&adapter->remove_lock)) - return; + if (!mutex_trylock(&adapter->crit_lock)) { + if (adapter->state != __IAVF_REMOVE) + queue_work(iavf_wq, &adapter->reset_task); - if (iavf_lock_timeout(&adapter->crit_lock, 200)) { - schedule_work(&adapter->reset_task); return; } + while (!mutex_trylock(&adapter->client_lock)) usleep_range(500, 1000); if (CLIENT_ENABLED(adapter)) { @@ -2662,6 +2853,7 @@ static void iavf_reset_task(struct work_struct *work) reg_val); iavf_disable_vf(adapter); mutex_unlock(&adapter->client_lock); + mutex_unlock(&adapter->crit_lock); return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -2670,8 +2862,7 @@ continue_reset: * ndo_open() returning, so we can't assume it means all our open * tasks have finished, since we're not holding the rtnl_lock here. */ - running = ((adapter->state == __IAVF_RUNNING) || - (adapter->state == __IAVF_RESETTING)); + running = adapter->state == __IAVF_RUNNING; if (running) { netdev->flags &= ~IFF_UP; @@ -2695,10 +2886,12 @@ continue_reset: /* kill and reinit the admin queue */ iavf_shutdown_adminq(hw); adapter->current_op = VIRTCHNL_OP_UNKNOWN; - err = iavf_init_adminq(hw); - if (err) + status = iavf_init_adminq(hw); + if (status) { dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n", - err); + status); + goto reset_err; + } adapter->aq_required = 0; if (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED) { @@ -2826,13 +3019,19 @@ static void iavf_adminq_task(struct work_struct *work) if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) goto out; + if (!mutex_trylock(&adapter->crit_lock)) { + if (adapter->state == __IAVF_REMOVE) + return; + + queue_work(iavf_wq, &adapter->adminq_task); + goto out; + } + event.buf_len = IAVF_MAX_AQ_BUF_SIZE; event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); if (!event.msg_buf) goto out; - if (iavf_lock_timeout(&adapter->crit_lock, 200)) - goto freedom; do { ret = iavf_clean_arq_element(hw, &event, &pending); v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); @@ -2848,6 +3047,24 @@ static void iavf_adminq_task(struct work_struct *work) } while (pending); mutex_unlock(&adapter->crit_lock); + if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES)) { + if (adapter->netdev_registered || + !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { + struct net_device *netdev = adapter->netdev; + + rtnl_lock(); + netdev_update_features(netdev); + rtnl_unlock(); + /* Request VLAN offload settings */ + if (VLAN_V2_ALLOWED(adapter)) + iavf_set_vlan_offload_features + (adapter, 0, netdev->features); + + iavf_set_queue_vlan_tag_loc(adapter); + } + + adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES; + } if ((adapter->flags & (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || adapter->state == __IAVF_RESETTING) @@ -3800,11 +4017,12 @@ static int iavf_close(struct net_device *netdev) struct iavf_adapter *adapter = netdev_priv(netdev); int status; - if (adapter->state <= __IAVF_DOWN_PENDING) - return 0; + mutex_lock(&adapter->crit_lock); - while (!mutex_trylock(&adapter->crit_lock)) - usleep_range(500, 1000); + if (adapter->state <= __IAVF_DOWN_PENDING) { + mutex_unlock(&adapter->crit_lock); + return 0; + } set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); if (CLIENT_ENABLED(adapter)) @@ -3853,8 +4071,11 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) iavf_notify_client_l2_params(&adapter->vsi); adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; } - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(iavf_wq, &adapter->reset_task); + + if (netif_running(netdev)) { + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + queue_work(iavf_wq, &adapter->reset_task); + } return 0; } @@ -4428,7 +4649,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ mutex_init(&adapter->crit_lock); mutex_init(&adapter->client_lock); - mutex_init(&adapter->remove_lock); mutex_init(&hw->aq.asq_mutex); mutex_init(&hw->aq.arq_mutex); @@ -4544,7 +4764,6 @@ static int __maybe_unused iavf_resume(struct device *dev_d) static void iavf_remove(struct pci_dev *pdev) { struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); - enum iavf_state_t prev_state = adapter->last_state; struct net_device *netdev = adapter->netdev; struct iavf_fdir_fltr *fdir, *fdirtmp; struct iavf_vlan_filter *vlf, *vlftmp; @@ -4553,14 +4772,30 @@ static void iavf_remove(struct pci_dev *pdev) struct iavf_cloud_filter *cf, *cftmp; struct iavf_hw *hw = &adapter->hw; int err; - /* Indicate we are in remove and not to run reset_task */ - mutex_lock(&adapter->remove_lock); - cancel_work_sync(&adapter->reset_task); + + set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); + /* Wait until port initialization is complete. + * There are flows where register/unregister netdev may race. + */ + while (1) { + mutex_lock(&adapter->crit_lock); + if (adapter->state == __IAVF_RUNNING || + adapter->state == __IAVF_DOWN || + adapter->state == __IAVF_INIT_FAILED) { + mutex_unlock(&adapter->crit_lock); + break; + } + + mutex_unlock(&adapter->crit_lock); + usleep_range(500, 1000); + } cancel_delayed_work_sync(&adapter->watchdog_task); - cancel_delayed_work_sync(&adapter->client_task); + if (adapter->netdev_registered) { - unregister_netdev(netdev); + rtnl_lock(); + unregister_netdevice(netdev); adapter->netdev_registered = false; + rtnl_unlock(); } if (CLIENT_ALLOWED(adapter)) { err = iavf_lan_del_device(adapter); @@ -4569,6 +4804,10 @@ static void iavf_remove(struct pci_dev *pdev) err); } + mutex_lock(&adapter->crit_lock); + dev_info(&adapter->pdev->dev, "Remove device\n"); + iavf_change_state(adapter, __IAVF_REMOVE); + iavf_request_reset(adapter); msleep(50); /* If the FW isn't responding, kick it once, but only once. */ @@ -4576,37 +4815,24 @@ static void iavf_remove(struct pci_dev *pdev) iavf_request_reset(adapter); msleep(50); } - if (iavf_lock_timeout(&adapter->crit_lock, 5000)) - dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); - dev_info(&adapter->pdev->dev, "Removing device\n"); + iavf_misc_irq_disable(adapter); /* Shut down all the garbage mashers on the detention level */ - iavf_change_state(adapter, __IAVF_REMOVE); + cancel_work_sync(&adapter->reset_task); + cancel_delayed_work_sync(&adapter->watchdog_task); + cancel_work_sync(&adapter->adminq_task); + cancel_delayed_work_sync(&adapter->client_task); + adapter->aq_required = 0; adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; iavf_free_all_tx_resources(adapter); iavf_free_all_rx_resources(adapter); - iavf_misc_irq_disable(adapter); iavf_free_misc_irq(adapter); - /* In case we enter iavf_remove from erroneous state, free traffic irqs - * here, so as to not cause a kernel crash, when calling - * iavf_reset_interrupt_capability. - */ - if ((adapter->last_state == __IAVF_RESETTING && - prev_state != __IAVF_DOWN) || - (adapter->last_state == __IAVF_RUNNING && - !(netdev->flags & IFF_UP))) - iavf_free_traffic_irqs(adapter); - iavf_reset_interrupt_capability(adapter); iavf_free_q_vectors(adapter); - cancel_delayed_work_sync(&adapter->watchdog_task); - - cancel_work_sync(&adapter->adminq_task); - iavf_free_rss(adapter); if (hw->aq.asq.count) @@ -4618,8 +4844,6 @@ static void iavf_remove(struct pci_dev *pdev) mutex_destroy(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); mutex_destroy(&adapter->crit_lock); - mutex_unlock(&adapter->remove_lock); - mutex_destroy(&adapter->remove_lock); iounmap(hw->hw_addr); pci_release_regions(pdev); @@ -4689,8 +4913,6 @@ static struct pci_driver iavf_driver = { **/ static int __init iavf_init_module(void) { - int ret; - pr_info("iavf: %s\n", iavf_driver_string); pr_info("%s\n", iavf_copyright); @@ -4701,8 +4923,7 @@ static int __init iavf_init_module(void) pr_err("%s: Failed to create workqueue\n", iavf_driver_name); return -ENOMEM; } - ret = pci_register_driver(&iavf_driver); - return ret; + return pci_register_driver(&iavf_driver); } module_init(iavf_init_module); diff --git a/drivers/net/ethernet/intel/iavf/iavf_status.h b/drivers/net/ethernet/intel/iavf/iavf_status.h index 46e3d1f6b604..2ea5c7c339bc 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_status.h +++ b/drivers/net/ethernet/intel/iavf/iavf_status.h @@ -18,7 +18,7 @@ enum iavf_status { IAVF_ERR_ADAPTER_STOPPED = -9, IAVF_ERR_INVALID_MAC_ADDR = -10, IAVF_ERR_DEVICE_NOT_SUPPORTED = -11, - IAVF_ERR_MASTER_REQUESTS_PENDING = -12, + IAVF_ERR_PRIMARY_REQUESTS_PENDING = -12, IAVF_ERR_INVALID_LINK_SETTINGS = -13, IAVF_ERR_AUTONEG_NOT_COMPLETE = -14, IAVF_ERR_RESET_FAILED = -15, diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 8cbe7ad1347c..978f651c6b09 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -374,29 +374,60 @@ static inline bool iavf_container_is_rx(struct iavf_q_vector *q_vector, return &q_vector->rx == rc; } -static inline unsigned int iavf_itr_divisor(struct iavf_q_vector *q_vector) +#define IAVF_AIM_MULTIPLIER_100G 2560 +#define IAVF_AIM_MULTIPLIER_50G 1280 +#define IAVF_AIM_MULTIPLIER_40G 1024 +#define IAVF_AIM_MULTIPLIER_20G 512 +#define IAVF_AIM_MULTIPLIER_10G 256 +#define IAVF_AIM_MULTIPLIER_1G 32 + +static unsigned int iavf_mbps_itr_multiplier(u32 speed_mbps) { - unsigned int divisor; + switch (speed_mbps) { + case SPEED_100000: + return IAVF_AIM_MULTIPLIER_100G; + case SPEED_50000: + return IAVF_AIM_MULTIPLIER_50G; + case SPEED_40000: + return IAVF_AIM_MULTIPLIER_40G; + case SPEED_25000: + case SPEED_20000: + return IAVF_AIM_MULTIPLIER_20G; + case SPEED_10000: + default: + return IAVF_AIM_MULTIPLIER_10G; + case SPEED_1000: + case SPEED_100: + return IAVF_AIM_MULTIPLIER_1G; + } +} - switch (q_vector->adapter->link_speed) { +static unsigned int +iavf_virtchnl_itr_multiplier(enum virtchnl_link_speed speed_virtchnl) +{ + switch (speed_virtchnl) { case VIRTCHNL_LINK_SPEED_40GB: - divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 1024; - break; + return IAVF_AIM_MULTIPLIER_40G; case VIRTCHNL_LINK_SPEED_25GB: case VIRTCHNL_LINK_SPEED_20GB: - divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 512; - break; - default: + return IAVF_AIM_MULTIPLIER_20G; case VIRTCHNL_LINK_SPEED_10GB: - divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 256; - break; + default: + return IAVF_AIM_MULTIPLIER_10G; case VIRTCHNL_LINK_SPEED_1GB: case VIRTCHNL_LINK_SPEED_100MB: - divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 32; - break; + return IAVF_AIM_MULTIPLIER_1G; } +} - return divisor; +static unsigned int iavf_itr_divisor(struct iavf_adapter *adapter) +{ + if (ADV_LINK_SUPPORT(adapter)) + return IAVF_ITR_ADAPTIVE_MIN_INC * + iavf_mbps_itr_multiplier(adapter->link_speed_mbps); + else + return IAVF_ITR_ADAPTIVE_MIN_INC * + iavf_virtchnl_itr_multiplier(adapter->link_speed); } /** @@ -586,8 +617,9 @@ adjust_by_size: * Use addition as we have already recorded the new latency flag * for the ITR value. */ - itr += DIV_ROUND_UP(avg_wire_size, iavf_itr_divisor(q_vector)) * - IAVF_ITR_ADAPTIVE_MIN_INC; + itr += DIV_ROUND_UP(avg_wire_size, + iavf_itr_divisor(q_vector->adapter)) * + IAVF_ITR_ADAPTIVE_MIN_INC; if ((itr & IAVF_ITR_MASK) > IAVF_ITR_ADAPTIVE_MAX_USECS) { itr &= IAVF_ITR_ADAPTIVE_LATENCY; diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 5ee1d118fd30..c6f52f8ef678 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -22,17 +22,17 @@ static int iavf_send_pf_msg(struct iavf_adapter *adapter, enum virtchnl_ops op, u8 *msg, u16 len) { struct iavf_hw *hw = &adapter->hw; - enum iavf_status err; + enum iavf_status status; if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) return 0; /* nothing to see here, move along */ - err = iavf_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); - if (err) - dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n", - op, iavf_stat_str(hw, err), + status = iavf_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); + if (status) + dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, status %s, aq_err %s\n", + op, iavf_stat_str(hw, status), iavf_aq_str(hw, hw->aq.asq_last_status)); - return err; + return iavf_status_to_errno(status); } /** @@ -55,6 +55,41 @@ int iavf_send_api_ver(struct iavf_adapter *adapter) } /** + * iavf_poll_virtchnl_msg + * @hw: HW configuration structure + * @event: event to populate on success + * @op_to_poll: requested virtchnl op to poll for + * + * Initialize poll for virtchnl msg matching the requested_op. Returns 0 + * if a message of the correct opcode is in the queue or an error code + * if no message matching the op code is waiting and other failures. + */ +static int +iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct iavf_arq_event_info *event, + enum virtchnl_ops op_to_poll) +{ + enum virtchnl_ops received_op; + enum iavf_status status; + u32 v_retval; + + while (1) { + /* When the AQ is empty, iavf_clean_arq_element will return + * nonzero and this loop will terminate. + */ + status = iavf_clean_arq_element(hw, event, NULL); + if (status != IAVF_SUCCESS) + return iavf_status_to_errno(status); + received_op = + (enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high); + if (op_to_poll == received_op) + break; + } + + v_retval = le32_to_cpu(event->desc.cookie_low); + return virtchnl_status_to_errno((enum virtchnl_status_code)v_retval); +} + +/** * iavf_verify_api_ver * @adapter: adapter structure * @@ -65,55 +100,28 @@ int iavf_send_api_ver(struct iavf_adapter *adapter) **/ int iavf_verify_api_ver(struct iavf_adapter *adapter) { - struct virtchnl_version_info *pf_vvi; - struct iavf_hw *hw = &adapter->hw; struct iavf_arq_event_info event; - enum virtchnl_ops op; - enum iavf_status err; + int err; event.buf_len = IAVF_MAX_AQ_BUF_SIZE; - event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); - if (!event.msg_buf) { - err = -ENOMEM; - goto out; - } - - while (1) { - err = iavf_clean_arq_element(hw, &event, NULL); - /* When the AQ is empty, iavf_clean_arq_element will return - * nonzero and this loop will terminate. - */ - if (err) - goto out_alloc; - op = - (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); - if (op == VIRTCHNL_OP_VERSION) - break; - } - + event.msg_buf = kzalloc(IAVF_MAX_AQ_BUF_SIZE, GFP_KERNEL); + if (!event.msg_buf) + return -ENOMEM; - err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low); - if (err) - goto out_alloc; + err = iavf_poll_virtchnl_msg(&adapter->hw, &event, VIRTCHNL_OP_VERSION); + if (!err) { + struct virtchnl_version_info *pf_vvi = + (struct virtchnl_version_info *)event.msg_buf; + adapter->pf_version = *pf_vvi; - if (op != VIRTCHNL_OP_VERSION) { - dev_info(&adapter->pdev->dev, "Invalid reply type %d from PF\n", - op); - err = -EIO; - goto out_alloc; + if (pf_vvi->major > VIRTCHNL_VERSION_MAJOR || + (pf_vvi->major == VIRTCHNL_VERSION_MAJOR && + pf_vvi->minor > VIRTCHNL_VERSION_MINOR)) + err = -EIO; } - pf_vvi = (struct virtchnl_version_info *)event.msg_buf; - adapter->pf_version = *pf_vvi; - - if ((pf_vvi->major > VIRTCHNL_VERSION_MAJOR) || - ((pf_vvi->major == VIRTCHNL_VERSION_MAJOR) && - (pf_vvi->minor > VIRTCHNL_VERSION_MINOR))) - err = -EIO; - -out_alloc: kfree(event.msg_buf); -out: + return err; } @@ -208,33 +216,17 @@ int iavf_get_vf_config(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; struct iavf_arq_event_info event; - enum virtchnl_ops op; - enum iavf_status err; u16 len; + int err; - len = sizeof(struct virtchnl_vf_resource) + + len = sizeof(struct virtchnl_vf_resource) + IAVF_MAX_VF_VSI * sizeof(struct virtchnl_vsi_resource); event.buf_len = len; - event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); - if (!event.msg_buf) { - err = -ENOMEM; - goto out; - } + event.msg_buf = kzalloc(len, GFP_KERNEL); + if (!event.msg_buf) + return -ENOMEM; - while (1) { - /* When the AQ is empty, iavf_clean_arq_element will return - * nonzero and this loop will terminate. - */ - err = iavf_clean_arq_element(hw, &event, NULL); - if (err) - goto out_alloc; - op = - (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); - if (op == VIRTCHNL_OP_GET_VF_RESOURCES) - break; - } - - err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low); + err = iavf_poll_virtchnl_msg(hw, &event, VIRTCHNL_OP_GET_VF_RESOURCES); memcpy(adapter->vf_res, event.msg_buf, min(event.msg_len, len)); /* some PFs send more queues than we should have so validate that @@ -243,48 +235,32 @@ int iavf_get_vf_config(struct iavf_adapter *adapter) if (!err) iavf_validate_num_queues(adapter); iavf_vf_parse_hw_config(hw, adapter->vf_res); -out_alloc: + kfree(event.msg_buf); -out: + return err; } int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter) { - struct iavf_hw *hw = &adapter->hw; struct iavf_arq_event_info event; - enum virtchnl_ops op; - enum iavf_status err; + int err; u16 len; - len = sizeof(struct virtchnl_vlan_caps); + len = sizeof(struct virtchnl_vlan_caps); event.buf_len = len; - event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); - if (!event.msg_buf) { - err = -ENOMEM; - goto out; - } - - while (1) { - /* When the AQ is empty, iavf_clean_arq_element will return - * nonzero and this loop will terminate. - */ - err = iavf_clean_arq_element(hw, &event, NULL); - if (err) - goto out_alloc; - op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); - if (op == VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS) - break; - } + event.msg_buf = kzalloc(len, GFP_KERNEL); + if (!event.msg_buf) + return -ENOMEM; - err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low); - if (err) - goto out_alloc; + err = iavf_poll_virtchnl_msg(&adapter->hw, &event, + VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS); + if (!err) + memcpy(&adapter->vlan_v2_caps, event.msg_buf, + min(event.msg_len, len)); - memcpy(&adapter->vlan_v2_caps, event.msg_buf, min(event.msg_len, len)); -out_alloc: kfree(event.msg_buf); -out: + return err; } @@ -454,6 +430,20 @@ void iavf_map_queues(struct iavf_adapter *adapter) } /** + * iavf_set_mac_addr_type - Set the correct request type from the filter type + * @virtchnl_ether_addr: pointer to requested list element + * @filter: pointer to requested filter + **/ +static void +iavf_set_mac_addr_type(struct virtchnl_ether_addr *virtchnl_ether_addr, + const struct iavf_mac_filter *filter) +{ + virtchnl_ether_addr->type = filter->is_primary ? + VIRTCHNL_ETHER_ADDR_PRIMARY : + VIRTCHNL_ETHER_ADDR_EXTRA; +} + +/** * iavf_add_ether_addrs * @adapter: adapter structure * @@ -508,6 +498,7 @@ void iavf_add_ether_addrs(struct iavf_adapter *adapter) list_for_each_entry(f, &adapter->mac_filter_list, list) { if (f->add) { ether_addr_copy(veal->list[i].addr, f->macaddr); + iavf_set_mac_addr_type(&veal->list[i], f); i++; f->add = false; if (i == count) @@ -577,6 +568,7 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter) list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { if (f->remove) { ether_addr_copy(veal->list[i].addr, f->macaddr); + iavf_set_mac_addr_type(&veal->list[i], f); i++; list_del(&f->list); kfree(f); @@ -1827,11 +1819,13 @@ void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter) * * Request that the PF reset this VF. No response is expected. **/ -void iavf_request_reset(struct iavf_adapter *adapter) +int iavf_request_reset(struct iavf_adapter *adapter) { + int err; /* Don't check CURRENT_OP - this is always higher priority */ - iavf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0); + err = iavf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0); adapter->current_op = VIRTCHNL_OP_UNKNOWN; + return err; } /** @@ -2146,29 +2140,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, sizeof(adapter->vlan_v2_caps))); iavf_process_config(adapter); - - /* unlock crit_lock before acquiring rtnl_lock as other - * processes holding rtnl_lock could be waiting for the same - * crit_lock - */ - mutex_unlock(&adapter->crit_lock); - /* VLAN capabilities can change during VFR, so make sure to - * update the netdev features with the new capabilities - */ - rtnl_lock(); - netdev_update_features(netdev); - rtnl_unlock(); - if (iavf_lock_timeout(&adapter->crit_lock, 10000)) - dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", - __FUNCTION__); - - /* Request VLAN offload settings */ - if (VLAN_V2_ALLOWED(adapter)) - iavf_set_vlan_offload_features(adapter, 0, - netdev->features); - - iavf_set_queue_vlan_tag_loc(adapter); - + adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES; } break; case VIRTCHNL_OP_ENABLE_QUEUES: diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 389fff70d22e..44b8464b7663 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -40,6 +40,7 @@ ice-$(CONFIG_PCI_IOV) += \ ice_vf_vsi_vlan_ops.o \ ice_virtchnl_pf.o ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o +ice-$(CONFIG_TTY) += ice_gnss.o ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 8f40f6f9b8eb..dc42ff92dbad 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -73,6 +73,7 @@ #include "ice_eswitch.h" #include "ice_lag.h" #include "ice_vsi_vlan_ops.h" +#include "ice_gnss.h" #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 @@ -108,7 +109,6 @@ /* All VF control VSIs share the same IRQ, so assign a unique ID for them */ #define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_RDMA_VEC_ID - 1) #define ICE_INVAL_Q_INDEX 0xffff -#define ICE_INVAL_VFID 256 #define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ @@ -184,6 +184,7 @@ enum ice_feature { ICE_F_DSCP, ICE_F_SMA_CTRL, + ICE_F_GNSS, ICE_F_MAX }; @@ -281,7 +282,6 @@ enum ice_pf_state { ICE_VFLR_EVENT_PENDING, ICE_FLTR_OVERFLOW_PROMISC, ICE_VF_DIS, - ICE_VF_DEINIT_IN_PROGRESS, ICE_CFG_BUSY, ICE_SERVICE_SCHED, ICE_SERVICE_DIS, @@ -332,7 +332,7 @@ struct ice_vsi { u16 vsi_num; /* HW (absolute) index of this VSI */ u16 idx; /* software index in pf->vsi[] */ - s16 vf_id; /* VF ID for SR-IOV VSIs */ + struct ice_vf *vf; /* VF associated with this VSI */ u16 ethtype; /* Ethernet protocol for pause frame */ u16 num_gfltr; @@ -487,6 +487,7 @@ enum ice_pf_flags { ICE_FLAG_VF_VLAN_PRUNING, ICE_FLAG_LINK_LENIENT_MODE_ENA, ICE_FLAG_PLUG_AUX_DEV, + ICE_FLAG_GNSS, /* GNSS successfully initialized */ ICE_PF_FLAGS_NBITS /* must be last */ }; @@ -527,15 +528,7 @@ struct ice_pf { struct ice_vsi **vsi; /* VSIs created by the driver */ struct ice_sw *first_sw; /* first switch created by firmware */ u16 eswitch_mode; /* current mode of eswitch */ - /* Virtchnl/SR-IOV config info */ - struct ice_vf *vf; - u16 num_alloc_vfs; /* actual number of VFs allocated */ - u16 num_vfs_supported; /* num VFs supported for this PF */ - u16 num_qps_per_vf; - u16 num_msix_per_vf; - /* used to ratelimit the MDD event logging */ - unsigned long last_printed_mdd_jiffies; - DECLARE_BITMAP(malvfs, ICE_MAX_VF_COUNT); + struct ice_vfs vfs; DECLARE_BITMAP(features, ICE_F_MAX); DECLARE_BITMAP(state, ICE_STATE_NBITS); DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); @@ -550,6 +543,9 @@ struct ice_pf { struct mutex tc_mutex; /* lock to protect TC changes */ u32 msg_enable; struct ice_ptp ptp; + struct tty_driver *ice_gnss_tty_driver; + struct tty_port gnss_tty_port; + struct gnss_serial *gnss_serial; u16 num_rdma_msix; /* Total MSIX vectors for RDMA driver */ u16 rdma_base_vector; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index fd8ee5b7f596..b25e27c4d887 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1401,6 +1401,24 @@ struct ice_aqc_get_link_topo { u8 rsvd[9]; }; +/* Read I2C (direct, 0x06E2) */ +struct ice_aqc_i2c { + struct ice_aqc_link_topo_addr topo_addr; + __le16 i2c_addr; + u8 i2c_params; +#define ICE_AQC_I2C_DATA_SIZE_M GENMASK(3, 0) +#define ICE_AQC_I2C_USE_REPEATED_START BIT(7) + + u8 rsvd; + __le16 i2c_bus_addr; + u8 rsvd2[4]; +}; + +/* Read I2C Response (direct, 0x06E2) */ +struct ice_aqc_read_i2c_resp { + u8 i2c_data[16]; +}; + /* Set Port Identification LED (direct, 0x06E9) */ struct ice_aqc_set_port_id_led { u8 lport_num; @@ -2112,6 +2130,8 @@ struct ice_aq_desc { struct ice_aqc_get_link_status get_link_status; struct ice_aqc_event_lan_overflow lan_overflow; struct ice_aqc_get_link_topo get_link_topo; + struct ice_aqc_i2c read_i2c; + struct ice_aqc_read_i2c_resp read_i2c_resp; } params; }; @@ -2226,6 +2246,7 @@ enum ice_adminq_opc { ice_aqc_opc_set_event_mask = 0x0613, ice_aqc_opc_set_mac_lb = 0x0620, ice_aqc_opc_get_link_topo = 0x06E0, + ice_aqc_opc_read_i2c = 0x06E2, ice_aqc_opc_set_port_id_led = 0x06E9, ice_aqc_opc_set_gpio = 0x06EC, ice_aqc_opc_get_gpio = 0x06ED, diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 2360e6abdb1e..a3094470d31d 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -323,7 +323,7 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf break; case ICE_VSI_VF: /* Firmware expects vmvf_num to be absolute VF ID */ - tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id; + tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf->vf_id; tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF; break; case ICE_VSI_SWITCHDEV_CTRL: @@ -429,7 +429,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) */ if (ice_is_dvm_ena(hw)) if (vsi->type == ICE_VSI_VF && - ice_vf_is_port_vlan_ena(&vsi->back->vf[vsi->vf_id])) + ice_vf_is_port_vlan_ena(vsi->vf)) rlan_ctx.l2tsel = 1; else rlan_ctx.l2tsel = 0; diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index c57e5fc41cf8..9619bdb9e49a 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -3379,7 +3379,7 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(hw) && !ice_fw_supports_report_dflt_cfg(hw)) { - struct ice_link_default_override_tlv tlv; + struct ice_link_default_override_tlv tlv = { 0 }; status = ice_get_link_default_override(&tlv, pi); if (status) @@ -4798,6 +4798,59 @@ ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, } /** + * ice_aq_read_i2c + * @hw: pointer to the hw struct + * @topo_addr: topology address for a device to communicate with + * @bus_addr: 7-bit I2C bus address + * @addr: I2C memory address (I2C offset) with up to 16 bits + * @params: I2C parameters: bit [7] - Repeated start, + * bits [6:5] data offset size, + * bit [4] - I2C address type, + * bits [3:0] - data size to read (0-16 bytes) + * @data: pointer to data (0 to 16 bytes) to be read from the I2C device + * @cd: pointer to command details structure or NULL + * + * Read I2C (0x06E2) + */ +int +ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, + u16 bus_addr, __le16 addr, u8 params, u8 *data, + struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc = { 0 }; + struct ice_aqc_i2c *cmd; + u8 data_size; + int status; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_read_i2c); + cmd = &desc.params.read_i2c; + + if (!data) + return -EINVAL; + + data_size = FIELD_GET(ICE_AQC_I2C_DATA_SIZE_M, params); + + cmd->i2c_bus_addr = cpu_to_le16(bus_addr); + cmd->topo_addr = topo_addr; + cmd->i2c_params = params; + cmd->i2c_addr = addr; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + if (!status) { + struct ice_aqc_read_i2c_resp *resp; + u8 i; + + resp = &desc.params.read_i2c_resp; + for (i = 0; i < data_size; i++) { + *data = resp->i2c_data[i]; + data++; + } + } + + return status; +} + +/** * ice_aq_set_driver_param - Set driver parameter to share via firmware * @hw: pointer to the HW struct * @idx: parameter index to set diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index d28749edd92f..1efe6b2c32f0 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -4,6 +4,8 @@ #ifndef _ICE_COMMON_H_ #define _ICE_COMMON_H_ +#include <linux/bitfield.h> + #include "ice.h" #include "ice_type.h" #include "ice_nvm.h" @@ -208,5 +210,9 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw); int ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add); +int +ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, + u16 bus_addr, __le16 addr, u8 params, u8 *data, + struct ice_sq_cd *cd); bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index e1cb6682eee2..9a84d746a6c4 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -44,6 +44,7 @@ ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac) ctrl_vsi->rxq_map[vf->vf_id]; rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE; rule_info.flags_info.act_valid = true; + rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN; err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, vf->repr->mac_rule); @@ -175,10 +176,20 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf) int q_id; ice_for_each_txq(vsi, q_id) { - struct ice_repr *repr = pf->vf[q_id].repr; - struct ice_q_vector *q_vector = repr->q_vector; - struct ice_tx_ring *tx_ring = vsi->tx_rings[q_id]; - struct ice_rx_ring *rx_ring = vsi->rx_rings[q_id]; + struct ice_q_vector *q_vector; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; + struct ice_repr *repr; + struct ice_vf *vf; + + vf = ice_get_vf_by_id(pf, q_id); + if (WARN_ON(!vf)) + continue; + + repr = vf->repr; + q_vector = repr->q_vector; + tx_ring = vsi->tx_rings[q_id]; + rx_ring = vsi->rx_rings[q_id]; q_vector->vsi = vsi; q_vector->reg_idx = vsi->q_vectors[0]->reg_idx; @@ -198,6 +209,38 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf) rx_ring->q_vector = q_vector; rx_ring->next = NULL; rx_ring->netdev = repr->netdev; + + ice_put_vf(vf); + } +} + +/** + * ice_eswitch_release_reprs - clear PR VSIs configuration + * @pf: poiner to PF struct + * @ctrl_vsi: pointer to switchdev control VSI + */ +static void +ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi) +{ + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); + + ice_for_each_vf(pf, bkt, vf) { + struct ice_vsi *vsi = vf->repr->src_vsi; + + /* Skip VFs that aren't configured */ + if (!vf->repr->dst) + continue; + + ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); + metadata_dst_free(vf->repr->dst); + vf->repr->dst = NULL; + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, + ICE_FWD_TO_VSI); + + netif_napi_del(&vf->repr->q_vector->napi); } } @@ -209,11 +252,13 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) { struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; int max_vsi_num = 0; - int i; + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); - ice_for_each_vf(pf, i) { - struct ice_vsi *vsi = pf->vf[i].repr->src_vsi; - struct ice_vf *vf = &pf->vf[i]; + ice_for_each_vf(pf, bkt, vf) { + struct ice_vsi *vsi = vf->repr->src_vsi; ice_remove_vsi_fltr(&pf->hw, vsi->idx); vf->repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, @@ -230,6 +275,7 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) vf->hw_lan_addr.addr, ICE_FWD_TO_VSI); metadata_dst_free(vf->repr->dst); + vf->repr->dst = NULL; goto err; } @@ -238,6 +284,7 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) vf->hw_lan_addr.addr, ICE_FWD_TO_VSI); metadata_dst_free(vf->repr->dst); + vf->repr->dst = NULL; ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); goto err; } @@ -251,8 +298,8 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) netif_keep_dst(vf->repr->netdev); } - ice_for_each_vf(pf, i) { - struct ice_repr *repr = pf->vf[i].repr; + ice_for_each_vf(pf, bkt, vf) { + struct ice_repr *repr = vf->repr; struct ice_vsi *vsi = repr->src_vsi; struct metadata_dst *dst; @@ -265,43 +312,12 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) return 0; err: - for (i = i - 1; i >= 0; i--) { - struct ice_vsi *vsi = pf->vf[i].repr->src_vsi; - struct ice_vf *vf = &pf->vf[i]; - - ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); - metadata_dst_free(vf->repr->dst); - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, - ICE_FWD_TO_VSI); - } + ice_eswitch_release_reprs(pf, ctrl_vsi); return -ENODEV; } /** - * ice_eswitch_release_reprs - clear PR VSIs configuration - * @pf: poiner to PF struct - * @ctrl_vsi: pointer to switchdev control VSI - */ -static void -ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi) -{ - int i; - - ice_for_each_vf(pf, i) { - struct ice_vsi *vsi = pf->vf[i].repr->src_vsi; - struct ice_vf *vf = &pf->vf[i]; - - ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); - metadata_dst_free(vf->repr->dst); - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, - ICE_FWD_TO_VSI); - - netif_napi_del(&vf->repr->q_vector->napi); - } -} - -/** * ice_eswitch_update_repr - reconfigure VF port representor * @vsi: VF VSI for which port representor is configured */ @@ -315,7 +331,7 @@ void ice_eswitch_update_repr(struct ice_vsi *vsi) if (!ice_is_switchdev_running(pf)) return; - vf = &pf->vf[vsi->vf_id]; + vf = vsi->vf; repr = vf->repr; repr->src_vsi = vsi; repr->dst->u.port_info.port_id = vsi->vsi_num; @@ -323,7 +339,8 @@ void ice_eswitch_update_repr(struct ice_vsi *vsi) ret = ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof); if (ret) { ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, ICE_FWD_TO_VSI); - dev_err(ice_pf_to_dev(pf), "Failed to update VF %d port representor", vsi->vf_id); + dev_err(ice_pf_to_dev(pf), "Failed to update VF %d port representor", + vsi->vf->vf_id); } } @@ -407,7 +424,7 @@ static void ice_eswitch_release_env(struct ice_pf *pf) static struct ice_vsi * ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, ICE_INVAL_VFID, NULL); + return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, NULL, NULL); } /** @@ -416,10 +433,13 @@ ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) */ static void ice_eswitch_napi_del(struct ice_pf *pf) { - int i; + struct ice_vf *vf; + unsigned int bkt; - ice_for_each_vf(pf, i) - netif_napi_del(&pf->vf[i].repr->q_vector->napi); + lockdep_assert_held(&pf->vfs.table_lock); + + ice_for_each_vf(pf, bkt, vf) + netif_napi_del(&vf->repr->q_vector->napi); } /** @@ -428,10 +448,13 @@ static void ice_eswitch_napi_del(struct ice_pf *pf) */ static void ice_eswitch_napi_enable(struct ice_pf *pf) { - int i; + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); - ice_for_each_vf(pf, i) - napi_enable(&pf->vf[i].repr->q_vector->napi); + ice_for_each_vf(pf, bkt, vf) + napi_enable(&vf->repr->q_vector->napi); } /** @@ -440,10 +463,13 @@ static void ice_eswitch_napi_enable(struct ice_pf *pf) */ static void ice_eswitch_napi_disable(struct ice_pf *pf) { - int i; + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); - ice_for_each_vf(pf, i) - napi_disable(&pf->vf[i].repr->q_vector->napi); + ice_for_each_vf(pf, bkt, vf) + napi_disable(&vf->repr->q_vector->napi); } /** @@ -521,7 +547,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode, if (pf->eswitch_mode == mode) return 0; - if (pf->num_alloc_vfs) { + if (ice_has_vfs(pf)) { dev_info(ice_pf_to_dev(pf), "Changing eswitch mode is allowed only if there is no VFs created"); NL_SET_ERR_MSG_MOD(extack, "Changing eswitch mode is allowed only if there is no VFs created"); return -EOPNOTSUPP; @@ -612,16 +638,17 @@ int ice_eswitch_configure(struct ice_pf *pf) */ static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf) { - struct ice_repr *repr; - int i; + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); if (test_bit(ICE_DOWN, pf->state)) return; - ice_for_each_vf(pf, i) { - repr = pf->vf[i].repr; - if (repr) - ice_repr_start_tx_queues(repr); + ice_for_each_vf(pf, bkt, vf) { + if (vf->repr) + ice_repr_start_tx_queues(vf->repr); } } @@ -631,16 +658,17 @@ static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf) */ void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { - struct ice_repr *repr; - int i; + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); if (test_bit(ICE_DOWN, pf->state)) return; - ice_for_each_vf(pf, i) { - repr = pf->vf[i].repr; - if (repr) - ice_repr_stop_tx_queues(repr); + ice_for_each_vf(pf, bkt, vf) { + if (vf->repr) + ice_repr_stop_tx_queues(vf->repr); } } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index a3492754d0d3..399625892f9e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -316,16 +316,20 @@ out: */ static bool ice_active_vfs(struct ice_pf *pf) { - unsigned int i; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + bool active = false; + struct ice_vf *vf; + unsigned int bkt; - if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) - return true; + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { + if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + active = true; + break; + } } + rcu_read_unlock(); - return false; + return active; } /** @@ -1298,7 +1302,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) } if (test_bit(ICE_FLAG_VF_VLAN_PRUNING, change_flags) && - pf->num_alloc_vfs) { + ice_has_vfs(pf)) { dev_err(dev, "vf-vlan-pruning: VLAN pruning cannot be changed while VFs are active.\n"); /* toggle bit back to previous state */ change_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags); diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c new file mode 100644 index 000000000000..755e1580f368 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2021, Intel Corporation. */ + +#include "ice.h" +#include "ice_lib.h" +#include <linux/tty_driver.h> + +/** + * ice_gnss_read - Read data from internal GNSS module + * @work: GNSS read work structure + * + * Read the data from internal GNSS receiver, number of bytes read will be + * returned in *read_data parameter. + */ +static void ice_gnss_read(struct kthread_work *work) +{ + struct gnss_serial *gnss = container_of(work, struct gnss_serial, + read_work.work); + struct ice_aqc_link_topo_addr link_topo; + u8 i2c_params, bytes_read; + struct tty_port *port; + struct ice_pf *pf; + struct ice_hw *hw; + __be16 data_len_b; + char *buf = NULL; + u16 i, data_len; + int err = 0; + + pf = gnss->back; + if (!pf || !gnss->tty || !gnss->tty->port) { + err = -EFAULT; + goto exit; + } + + hw = &pf->hw; + port = gnss->tty->port; + + buf = (char *)get_zeroed_page(GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto exit; + } + + memset(&link_topo, 0, sizeof(struct ice_aqc_link_topo_addr)); + link_topo.topo_params.index = ICE_E810T_GNSS_I2C_BUS; + link_topo.topo_params.node_type_ctx |= + FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, + ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE); + + i2c_params = ICE_GNSS_UBX_DATA_LEN_WIDTH | + ICE_AQC_I2C_USE_REPEATED_START; + + /* Read data length in a loop, when it's not 0 the data is ready */ + for (i = 0; i < ICE_MAX_UBX_READ_TRIES; i++) { + err = ice_aq_read_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR, + cpu_to_le16(ICE_GNSS_UBX_DATA_LEN_H), + i2c_params, (u8 *)&data_len_b, NULL); + if (err) + goto exit_buf; + + data_len = be16_to_cpu(data_len_b); + if (data_len != 0 && data_len != U16_MAX) + break; + + mdelay(10); + } + + data_len = min(data_len, (u16)PAGE_SIZE); + data_len = tty_buffer_request_room(port, data_len); + if (!data_len) { + err = -ENOMEM; + goto exit_buf; + } + + /* Read received data */ + for (i = 0; i < data_len; i += bytes_read) { + u16 bytes_left = data_len - i; + + bytes_read = bytes_left < ICE_MAX_I2C_DATA_SIZE ? bytes_left : + ICE_MAX_I2C_DATA_SIZE; + + err = ice_aq_read_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR, + cpu_to_le16(ICE_GNSS_UBX_EMPTY_DATA), + bytes_read, &buf[i], NULL); + if (err) + goto exit_buf; + } + + /* Send the data to the tty layer for users to read. This doesn't + * actually push the data through unless tty->low_latency is set. + */ + tty_insert_flip_string(port, buf, i); + tty_flip_buffer_push(port); + +exit_buf: + free_page((unsigned long)buf); + kthread_queue_delayed_work(gnss->kworker, &gnss->read_work, + ICE_GNSS_TIMER_DELAY_TIME); +exit: + if (err) + dev_dbg(ice_pf_to_dev(pf), "GNSS failed to read err=%d\n", err); +} + +/** + * ice_gnss_struct_init - Initialize GNSS structure for the TTY + * @pf: Board private structure + */ +static struct gnss_serial *ice_gnss_struct_init(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct kthread_worker *kworker; + struct gnss_serial *gnss; + + gnss = kzalloc(sizeof(*gnss), GFP_KERNEL); + if (!gnss) + return NULL; + + mutex_init(&gnss->gnss_mutex); + gnss->open_count = 0; + gnss->back = pf; + pf->gnss_serial = gnss; + + kthread_init_delayed_work(&gnss->read_work, ice_gnss_read); + /* Allocate a kworker for handling work required for the GNSS TTY + * writes. + */ + kworker = kthread_create_worker(0, "ice-gnss-%s", dev_name(dev)); + if (!kworker) { + kfree(gnss); + return NULL; + } + + gnss->kworker = kworker; + + return gnss; +} + +/** + * ice_gnss_tty_open - Initialize GNSS structures on TTY device open + * @tty: pointer to the tty_struct + * @filp: pointer to the file + * + * This routine is mandatory. If this routine is not filled in, the attempted + * open will fail with ENODEV. + */ +static int ice_gnss_tty_open(struct tty_struct *tty, struct file *filp) +{ + struct gnss_serial *gnss; + struct ice_pf *pf; + + pf = (struct ice_pf *)tty->driver->driver_state; + if (!pf) + return -EFAULT; + + /* Clear the pointer in case something fails */ + tty->driver_data = NULL; + + /* Get the serial object associated with this tty pointer */ + gnss = pf->gnss_serial; + if (!gnss) { + /* Initialize GNSS struct on the first device open */ + gnss = ice_gnss_struct_init(pf); + if (!gnss) + return -ENOMEM; + } + + mutex_lock(&gnss->gnss_mutex); + + /* Save our structure within the tty structure */ + tty->driver_data = gnss; + gnss->tty = tty; + gnss->open_count++; + kthread_queue_delayed_work(gnss->kworker, &gnss->read_work, 0); + + mutex_unlock(&gnss->gnss_mutex); + + return 0; +} + +/** + * ice_gnss_tty_close - Cleanup GNSS structures on tty device close + * @tty: pointer to the tty_struct + * @filp: pointer to the file + */ +static void ice_gnss_tty_close(struct tty_struct *tty, struct file *filp) +{ + struct gnss_serial *gnss = tty->driver_data; + struct ice_pf *pf; + + if (!gnss) + return; + + pf = (struct ice_pf *)tty->driver->driver_state; + if (!pf) + return; + + mutex_lock(&gnss->gnss_mutex); + + if (!gnss->open_count) { + /* Port was never opened */ + dev_err(ice_pf_to_dev(pf), "GNSS port not opened\n"); + goto exit; + } + + gnss->open_count--; + if (gnss->open_count <= 0) { + /* Port is in shutdown state */ + kthread_cancel_delayed_work_sync(&gnss->read_work); + } +exit: + mutex_unlock(&gnss->gnss_mutex); +} + +/** + * ice_gnss_tty_write - Dummy TTY write function to avoid kernel panic + * @tty: pointer to the tty_struct + * @buf: pointer to the user data + * @cnt: the number of characters that was able to be sent to the hardware (or + * queued to be sent at a later time) + */ +static int +ice_gnss_tty_write(struct tty_struct *tty, const unsigned char *buf, int cnt) +{ + return 0; +} + +/** + * ice_gnss_tty_write_room - Dummy TTY write_room function to avoid kernel panic + * @tty: pointer to the tty_struct + */ +static unsigned int ice_gnss_tty_write_room(struct tty_struct *tty) +{ + return 0; +} + +static const struct tty_operations tty_gps_ops = { + .open = ice_gnss_tty_open, + .close = ice_gnss_tty_close, + .write = ice_gnss_tty_write, + .write_room = ice_gnss_tty_write_room, +}; + +/** + * ice_gnss_create_tty_driver - Create a TTY driver for GNSS + * @pf: Board private structure + */ +static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + const int ICE_TTYDRV_NAME_MAX = 14; + struct tty_driver *tty_driver; + char *ttydrv_name; + int err; + + tty_driver = tty_alloc_driver(1, TTY_DRIVER_REAL_RAW); + if (!tty_driver) { + dev_err(ice_pf_to_dev(pf), "Failed to allocate memory for GNSS TTY\n"); + return NULL; + } + + ttydrv_name = kzalloc(ICE_TTYDRV_NAME_MAX, GFP_KERNEL); + if (!ttydrv_name) { + tty_driver_kref_put(tty_driver); + return NULL; + } + + snprintf(ttydrv_name, ICE_TTYDRV_NAME_MAX, "ttyGNSS_%02x%02x_", + (u8)pf->pdev->bus->number, (u8)PCI_SLOT(pf->pdev->devfn)); + + /* Initialize the tty driver*/ + tty_driver->owner = THIS_MODULE; + tty_driver->driver_name = dev_driver_string(dev); + tty_driver->name = (const char *)ttydrv_name; + tty_driver->type = TTY_DRIVER_TYPE_SERIAL; + tty_driver->subtype = SERIAL_TYPE_NORMAL; + tty_driver->init_termios = tty_std_termios; + tty_driver->init_termios.c_iflag &= ~INLCR; + tty_driver->init_termios.c_iflag |= IGNCR; + tty_driver->init_termios.c_oflag &= ~OPOST; + tty_driver->init_termios.c_lflag &= ~ICANON; + tty_driver->init_termios.c_cflag &= ~(CSIZE | CBAUD | CBAUDEX); + /* baud rate 9600 */ + tty_termios_encode_baud_rate(&tty_driver->init_termios, 9600, 9600); + tty_driver->driver_state = pf; + tty_set_operations(tty_driver, &tty_gps_ops); + + pf->gnss_serial = NULL; + + tty_port_init(&pf->gnss_tty_port); + tty_port_link_device(&pf->gnss_tty_port, tty_driver, 0); + + err = tty_register_driver(tty_driver); + if (err) { + dev_err(ice_pf_to_dev(pf), "Failed to register TTY driver err=%d\n", + err); + + tty_port_destroy(&pf->gnss_tty_port); + kfree(ttydrv_name); + tty_driver_kref_put(pf->ice_gnss_tty_driver); + + return NULL; + } + + return tty_driver; +} + +/** + * ice_gnss_init - Initialize GNSS TTY support + * @pf: Board private structure + */ +void ice_gnss_init(struct ice_pf *pf) +{ + struct tty_driver *tty_driver; + + tty_driver = ice_gnss_create_tty_driver(pf); + if (!tty_driver) + return; + + pf->ice_gnss_tty_driver = tty_driver; + + set_bit(ICE_FLAG_GNSS, pf->flags); + dev_info(ice_pf_to_dev(pf), "GNSS TTY init successful\n"); +} + +/** + * ice_gnss_exit - Disable GNSS TTY support + * @pf: Board private structure + */ +void ice_gnss_exit(struct ice_pf *pf) +{ + if (!test_bit(ICE_FLAG_GNSS, pf->flags) || !pf->ice_gnss_tty_driver) + return; + + tty_port_destroy(&pf->gnss_tty_port); + + if (pf->gnss_serial) { + struct gnss_serial *gnss = pf->gnss_serial; + + kthread_cancel_delayed_work_sync(&gnss->read_work); + kfree(gnss); + pf->gnss_serial = NULL; + } + + tty_unregister_driver(pf->ice_gnss_tty_driver); + kfree(pf->ice_gnss_tty_driver->name); + tty_driver_kref_put(pf->ice_gnss_tty_driver); + pf->ice_gnss_tty_driver = NULL; +} + +/** + * ice_gnss_is_gps_present - Check if GPS HW is present + * @hw: pointer to HW struct + */ +bool ice_gnss_is_gps_present(struct ice_hw *hw) +{ + if (!hw->func_caps.ts_func_info.src_tmr_owned) + return false; + +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) + if (ice_is_e810t(hw)) { + int err; + u8 data; + + err = ice_read_pca9575_reg_e810t(hw, ICE_PCA9575_P0_IN, &data); + if (err || !!(data & ICE_E810T_P0_GNSS_PRSNT_N)) + return false; + } else { + return false; + } +#else + if (!ice_is_e810t(hw)) + return false; +#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ + + return true; +} diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.h b/drivers/net/ethernet/intel/ice/ice_gnss.h new file mode 100644 index 000000000000..9211adb2372c --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_gnss.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2021, Intel Corporation. */ + +#ifndef _ICE_GNSS_H_ +#define _ICE_GNSS_H_ + +#include <linux/tty.h> +#include <linux/tty_flip.h> + +#define ICE_E810T_GNSS_I2C_BUS 0x2 +#define ICE_GNSS_UBX_I2C_BUS_ADDR 0x42 +/* Data length register is big endian */ +#define ICE_GNSS_UBX_DATA_LEN_H 0xFD +#define ICE_GNSS_UBX_DATA_LEN_WIDTH 2 +#define ICE_GNSS_UBX_EMPTY_DATA 0xFF +#define ICE_GNSS_TIMER_DELAY_TIME (HZ / 10) /* 0.1 second per message */ +#define ICE_MAX_I2C_DATA_SIZE FIELD_MAX(ICE_AQC_I2C_DATA_SIZE_M) +#define ICE_MAX_UBX_READ_TRIES 255 + +/** + * struct gnss_serial - data used to initialize GNSS TTY port + * @back: back pointer to PF + * @tty: pointer to the tty for this device + * @open_count: number of times this port has been opened + * @gnss_mutex: gnss_mutex used to protect GNSS serial operations + * @kworker: kwork thread for handling periodic work + * @read_work: read_work function for handling GNSS reads + */ +struct gnss_serial { + struct ice_pf *back; + struct tty_struct *tty; + int open_count; + struct mutex gnss_mutex; /* protects GNSS serial structure */ + struct kthread_worker *kworker; + struct kthread_delayed_work read_work; +}; + +#if IS_ENABLED(CONFIG_TTY) +void ice_gnss_init(struct ice_pf *pf); +void ice_gnss_exit(struct ice_pf *pf); +bool ice_gnss_is_gps_present(struct ice_hw *hw); +#else +static inline void ice_gnss_init(struct ice_pf *pf) { } +static inline void ice_gnss_exit(struct ice_pf *pf) { } +static inline bool ice_gnss_is_gps_present(struct ice_hw *hw) +{ + return false; +} +#endif /* IS_ENABLED(CONFIG_TTY) */ +#endif /* _ICE_GNSS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 2d15bb73a074..113a2c56c14c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -166,21 +166,19 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) /** * ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI * @vsi: the VSI being configured - * @vf_id: ID of the VF being configured + * @vf: the VF associated with this VSI, if any * * Return 0 on success and a negative value on error */ -static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) +static void ice_vsi_set_num_qs(struct ice_vsi *vsi, struct ice_vf *vf) { + enum ice_vsi_type vsi_type = vsi->type; struct ice_pf *pf = vsi->back; - struct ice_vf *vf = NULL; - if (vsi->type == ICE_VSI_VF) - vsi->vf_id = vf_id; - else - vsi->vf_id = ICE_INVAL_VFID; + if (WARN_ON(vsi_type == ICE_VSI_VF && !vf)) + return; - switch (vsi->type) { + switch (vsi_type) { case ICE_VSI_PF: if (vsi->req_txq) { vsi->alloc_txq = vsi->req_txq; @@ -217,22 +215,21 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) /* The number of queues for ctrl VSI is equal to number of VFs. * Each ring is associated to the corresponding VF_PR netdev. */ - vsi->alloc_txq = pf->num_alloc_vfs; - vsi->alloc_rxq = pf->num_alloc_vfs; + vsi->alloc_txq = ice_get_num_vfs(pf); + vsi->alloc_rxq = vsi->alloc_txq; vsi->num_q_vectors = 1; break; case ICE_VSI_VF: - vf = &pf->vf[vsi->vf_id]; if (vf->num_req_qs) vf->num_vf_qs = vf->num_req_qs; vsi->alloc_txq = vf->num_vf_qs; vsi->alloc_rxq = vf->num_vf_qs; - /* pf->num_msix_per_vf includes (VF miscellaneous vector + + /* pf->vfs.num_msix_per includes (VF miscellaneous vector + * data queue interrupts). Since vsi->num_q_vectors is number * of queues vectors, subtract 1 (ICE_NONQ_VECS_VF) from the * original vector count */ - vsi->num_q_vectors = pf->num_msix_per_vf - ICE_NONQ_VECS_VF; + vsi->num_q_vectors = pf->vfs.num_msix_per - ICE_NONQ_VECS_VF; break; case ICE_VSI_CTRL: vsi->alloc_txq = 1; @@ -248,7 +245,7 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) vsi->alloc_rxq = 1; break; default: - dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi->type); + dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi_type); break; } @@ -299,7 +296,7 @@ void ice_vsi_delete(struct ice_vsi *vsi) return; if (vsi->type == ICE_VSI_VF) - ctxt->vf_num = vsi->vf_id; + ctxt->vf_num = vsi->vf->vf_id; ctxt->vsi_num = vsi->vsi_num; memcpy(&ctxt->info, &vsi->info, sizeof(ctxt->info)); @@ -384,8 +381,7 @@ int ice_vsi_clear(struct ice_vsi *vsi) pf->vsi[vsi->idx] = NULL; if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL) pf->next_vsi = vsi->idx; - if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL && - vsi->vf_id != ICE_INVAL_VFID) + if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL && vsi->vf) pf->next_vsi = vsi->idx; ice_vsi_free_arrays(vsi); @@ -437,13 +433,16 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d { struct ice_q_vector *q_vector = (struct ice_q_vector *)data; struct ice_pf *pf = q_vector->vsi->back; - int i; + struct ice_vf *vf; + unsigned int bkt; if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring) return IRQ_HANDLED; - ice_for_each_vf(pf, i) - napi_schedule(&pf->vf[i].repr->q_vector->napi); + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) + napi_schedule(&vf->repr->q_vector->napi); + rcu_read_unlock(); return IRQ_HANDLED; } @@ -453,17 +452,24 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d * @pf: board private structure * @vsi_type: type of VSI * @ch: ptr to channel - * @vf_id: ID of the VF being configured + * @vf: VF for ICE_VSI_VF and ICE_VSI_CTRL + * + * The VF pointer is used for ICE_VSI_VF and ICE_VSI_CTRL. For ICE_VSI_CTRL, + * it may be NULL in the case there is no association with a VF. For + * ICE_VSI_VF the VF pointer *must not* be NULL. * * returns a pointer to a VSI on success, NULL on failure. */ static struct ice_vsi * ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, - struct ice_channel *ch, u16 vf_id) + struct ice_channel *ch, struct ice_vf *vf) { struct device *dev = ice_pf_to_dev(pf); struct ice_vsi *vsi = NULL; + if (WARN_ON(vsi_type == ICE_VSI_VF && !vf)) + return NULL; + /* Need to protect the allocation of the VSIs at the PF level */ mutex_lock(&pf->sw_mutex); @@ -485,9 +491,9 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, set_bit(ICE_VSI_DOWN, vsi->state); if (vsi_type == ICE_VSI_VF) - ice_vsi_set_num_qs(vsi, vf_id); + ice_vsi_set_num_qs(vsi, vf); else if (vsi_type != ICE_VSI_CHNL) - ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); + ice_vsi_set_num_qs(vsi, NULL); switch (vsi->type) { case ICE_VSI_SWITCHDEV_CTRL: @@ -510,10 +516,16 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, /* Setup ctrl VSI MSIX irq handler */ vsi->irq_handler = ice_msix_clean_ctrl_vsi; + + /* For the PF control VSI this is NULL, for the VF control VSI + * this will be the first VF to allocate it. + */ + vsi->vf = vf; break; case ICE_VSI_VF: if (ice_vsi_alloc_arrays(vsi)) goto err_rings; + vsi->vf = vf; break; case ICE_VSI_CHNL: if (!ch) @@ -531,7 +543,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, goto unlock_pf; } - if (vsi->type == ICE_VSI_CTRL && vf_id == ICE_INVAL_VFID) { + if (vsi->type == ICE_VSI_CTRL && !vf) { /* Use the last VSI slot as the index for PF control VSI */ vsi->idx = pf->num_alloc_vsi - 1; pf->ctrl_vsi_idx = vsi->idx; @@ -546,8 +558,8 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, pf->next_vsi); } - if (vsi->type == ICE_VSI_CTRL && vf_id != ICE_INVAL_VFID) - pf->vf[vf_id].ctrl_vsi_idx = vsi->idx; + if (vsi->type == ICE_VSI_CTRL && vf) + vf->ctrl_vsi_idx = vsi->idx; goto unlock_pf; err_rings: @@ -1130,7 +1142,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) case ICE_VSI_VF: ctxt->flags = ICE_AQ_VSI_TYPE_VF; /* VF number here is the absolute VF number (0-255) */ - ctxt->vf_num = vsi->vf_id + hw->func_caps.vf_base_id; + ctxt->vf_num = vsi->vf->vf_id + hw->func_caps.vf_base_id; break; default: ret = -ENODEV; @@ -1322,6 +1334,36 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) } /** + * ice_get_vf_ctrl_res - Get VF control VSI resource + * @pf: pointer to the PF structure + * @vsi: the VSI to allocate a resource for + * + * Look up whether another VF has already allocated the control VSI resource. + * If so, re-use this resource so that we share it among all VFs. + * + * Otherwise, allocate the resource and return it. + */ +static int ice_get_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi) +{ + struct ice_vf *vf; + unsigned int bkt; + int base; + + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { + if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) { + base = pf->vsi[vf->ctrl_vsi_idx]->base_vector; + rcu_read_unlock(); + return base; + } + } + rcu_read_unlock(); + + return ice_get_res(pf, pf->irq_tracker, vsi->num_q_vectors, + ICE_RES_VF_CTRL_VEC_ID); +} + +/** * ice_vsi_setup_vector_base - Set up the base vector for the given VSI * @vsi: ptr to the VSI * @@ -1353,20 +1395,8 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) num_q_vectors = vsi->num_q_vectors; /* reserve slots from OS requested IRQs */ - if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { - int i; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; - - if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) { - base = pf->vsi[vf->ctrl_vsi_idx]->base_vector; - break; - } - } - if (i == pf->num_alloc_vfs) - base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, - ICE_RES_VF_CTRL_VEC_ID); + if (vsi->type == ICE_VSI_CTRL && vsi->vf) { + base = ice_get_vf_ctrl_res(pf, vsi); } else { base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx); @@ -2218,7 +2248,7 @@ ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi) } if (vsi->type == ICE_VSI_VF) { - struct ice_vf *vf = &vsi->back->vf[vsi->vf_id]; + struct ice_vf *vf = vsi->vf; q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector); } else { @@ -2403,9 +2433,8 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) * @pf: board private structure * @pi: pointer to the port_info instance * @vsi_type: VSI type - * @vf_id: defines VF ID to which this VSI connects. This field is meant to be - * used only for ICE_VSI_VF VSI type. For other VSI types, should - * fill-in ICE_INVAL_VFID as input. + * @vf: pointer to VF to which this VSI connects. This field is used primarily + * for the ICE_VSI_VF type. Other VSI types should pass NULL. * @ch: ptr to channel * * This allocates the sw VSI structure and its queue resources. @@ -2415,7 +2444,8 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) */ struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch) + enum ice_vsi_type vsi_type, struct ice_vf *vf, + struct ice_channel *ch) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct device *dev = ice_pf_to_dev(pf); @@ -2423,11 +2453,11 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, int ret, i; if (vsi_type == ICE_VSI_CHNL) - vsi = ice_vsi_alloc(pf, vsi_type, ch, ICE_INVAL_VFID); + vsi = ice_vsi_alloc(pf, vsi_type, ch, NULL); else if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL) - vsi = ice_vsi_alloc(pf, vsi_type, NULL, vf_id); + vsi = ice_vsi_alloc(pf, vsi_type, NULL, vf); else - vsi = ice_vsi_alloc(pf, vsi_type, NULL, ICE_INVAL_VFID); + vsi = ice_vsi_alloc(pf, vsi_type, NULL, NULL); if (!vsi) { dev_err(dev, "could not allocate VSI\n"); @@ -2439,9 +2469,6 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (vsi->type == ICE_VSI_PF) vsi->ethtype = ETH_P_PAUSE; - if (vsi->type == ICE_VSI_VF || vsi->type == ICE_VSI_CTRL) - vsi->vf_id = vf_id; - ice_alloc_fd_res(vsi); if (vsi_type != ICE_VSI_CHNL) { @@ -2862,6 +2889,37 @@ void ice_napi_del(struct ice_vsi *vsi) } /** + * ice_free_vf_ctrl_res - Free the VF control VSI resource + * @pf: pointer to PF structure + * @vsi: the VSI to free resources for + * + * Check if the VF control VSI resource is still in use. If no VF is using it + * any more, release the VSI resource. Otherwise, leave it to be cleaned up + * once no other VF uses it. + */ +static void ice_free_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi) +{ + struct ice_vf *vf; + unsigned int bkt; + + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { + if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) { + rcu_read_unlock(); + return; + } + } + rcu_read_unlock(); + + /* No other VFs left that have control VSI. It is now safe to reclaim + * SW interrupts back to the common pool. + */ + ice_free_res(pf->irq_tracker, vsi->base_vector, + ICE_RES_VF_CTRL_VEC_ID); + pf->num_avail_sw_msix += vsi->num_q_vectors; +} + +/** * ice_vsi_release - Delete a VSI and free its resources * @vsi: the VSI being removed * @@ -2904,23 +2962,8 @@ int ice_vsi_release(struct ice_vsi *vsi) * many interrupts each VF needs. SR-IOV MSIX resources are also * cleared in the same manner. */ - if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { - int i; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; - - if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) - break; - } - if (i == pf->num_alloc_vfs) { - /* No other VFs left that have control VSI, reclaim SW - * interrupts back to the common pool - */ - ice_free_res(pf->irq_tracker, vsi->base_vector, - ICE_RES_VF_CTRL_VEC_ID); - pf->num_avail_sw_msix += vsi->num_q_vectors; - } + if (vsi->type == ICE_VSI_CTRL && vsi->vf) { + ice_free_vf_ctrl_res(pf, vsi); } else if (vsi->type != ICE_VSI_VF) { /* reclaim SW interrupts back to the common pool */ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); @@ -3104,7 +3147,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_coalesce_stored *coalesce; int prev_num_q_vectors = 0; - struct ice_vf *vf = NULL; enum ice_vsi_type vtype; struct ice_pf *pf; int ret, i; @@ -3114,8 +3156,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) pf = vsi->back; vtype = vsi->type; - if (vtype == ICE_VSI_VF) - vf = &pf->vf[vsi->vf_id]; + if (WARN_ON(vtype == ICE_VSI_VF) && !vsi->vf) + return -EINVAL; ice_vsi_init_vlan_ops(vsi); @@ -3154,9 +3196,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) ice_vsi_clear_rings(vsi); ice_vsi_free_arrays(vsi); if (vtype == ICE_VSI_VF) - ice_vsi_set_num_qs(vsi, vf->vf_id); + ice_vsi_set_num_qs(vsi, vsi->vf); else - ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); + ice_vsi_set_num_qs(vsi, NULL); ret = ice_vsi_alloc_arrays(vsi); if (ret < 0) @@ -4013,9 +4055,14 @@ static u16 ice_vsi_num_zero_vlans(struct ice_vsi *vsi) #define ICE_DVM_NUM_ZERO_VLAN_FLTRS 2 #define ICE_SVM_NUM_ZERO_VLAN_FLTRS 1 /* no VLAN 0 filter is created when a port VLAN is active */ - if (vsi->type == ICE_VSI_VF && - ice_vf_is_port_vlan_ena(&vsi->back->vf[vsi->vf_id])) - return 0; + if (vsi->type == ICE_VSI_VF) { + if (WARN_ON(!vsi->vf)) + return 0; + + if (ice_vf_is_port_vlan_ena(vsi->vf)) + return 0; + } + if (ice_is_dvm_ena(&vsi->back->hw)) return ICE_DVM_NUM_ZERO_VLAN_FLTRS; else @@ -4094,8 +4141,11 @@ void ice_init_feature_support(struct ice_pf *pf) case ICE_DEV_ID_E810C_QSFP: case ICE_DEV_ID_E810C_SFP: ice_set_feature_support(pf, ICE_F_DSCP); - if (ice_is_e810t(&pf->hw)) + if (ice_is_e810t(&pf->hw)) { ice_set_feature_support(pf, ICE_F_SMA_CTRL); + if (ice_gnss_is_gps_present(&pf->hw)) + ice_set_feature_support(pf, ICE_F_GNSS); + } break; default: break; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 491f13f98797..0095329949d4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -52,7 +52,8 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch); + enum ice_vsi_type vsi_type, struct ice_vf *vf, + struct ice_channel *ch); void ice_napi_del(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index cff476f735ef..289e5c99e313 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -505,7 +505,8 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { struct ice_hw *hw = &pf->hw; struct ice_vsi *vsi; - unsigned int i; + struct ice_vf *vf; + unsigned int bkt; dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type); @@ -520,8 +521,10 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) ice_vc_notify_reset(pf); /* Disable VFs until reset is completed */ - ice_for_each_vf(pf, i) - ice_set_vf_state_qs_dis(&pf->vf[i]); + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) + ice_set_vf_state_qs_dis(vf); + mutex_unlock(&pf->vfs.table_lock); if (ice_is_eswitch_mode_switchdev(pf)) { if (reset_type != ICE_RESET_PFR) @@ -568,6 +571,9 @@ skip: if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_prepare_for_reset(pf); + if (ice_is_feature_supported(pf, ICE_F_GNSS)) + ice_gnss_exit(pf); + if (hw->port_info) ice_sched_clear_port(hw->port_info); @@ -1663,7 +1669,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - unsigned int i; + struct ice_vf *vf; + unsigned int bkt; u32 reg; if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) { @@ -1751,47 +1758,46 @@ static void ice_handle_mdd_event(struct ice_pf *pf) /* Check to see if one of the VFs caused an MDD event, and then * increment counters and set print pending */ - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; - - reg = rd32(hw, VP_MDET_TX_PQM(i)); + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { + reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id)); if (reg & VP_MDET_TX_PQM_VALID_M) { - wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF); + wr32(hw, VP_MDET_TX_PQM(vf->vf_id), 0xFFFF); vf->mdd_tx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n", - i); + vf->vf_id); } - reg = rd32(hw, VP_MDET_TX_TCLAN(i)); + reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id)); if (reg & VP_MDET_TX_TCLAN_VALID_M) { - wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF); + wr32(hw, VP_MDET_TX_TCLAN(vf->vf_id), 0xFFFF); vf->mdd_tx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n", - i); + vf->vf_id); } - reg = rd32(hw, VP_MDET_TX_TDPU(i)); + reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id)); if (reg & VP_MDET_TX_TDPU_VALID_M) { - wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF); + wr32(hw, VP_MDET_TX_TDPU(vf->vf_id), 0xFFFF); vf->mdd_tx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n", - i); + vf->vf_id); } - reg = rd32(hw, VP_MDET_RX(i)); + reg = rd32(hw, VP_MDET_RX(vf->vf_id)); if (reg & VP_MDET_RX_VALID_M) { - wr32(hw, VP_MDET_RX(i), 0xFFFF); + wr32(hw, VP_MDET_RX(vf->vf_id), 0xFFFF); vf->mdd_rx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_rx_err(pf)) dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n", - i); + vf->vf_id); /* Since the queue is disabled on VF Rx MDD events, the * PF can be configured to reset the VF through ethtool @@ -1802,10 +1808,13 @@ static void ice_handle_mdd_event(struct ice_pf *pf) * reset, so print the event prior to reset. */ ice_print_vf_rx_mdd_event(vf); - ice_reset_vf(&pf->vf[i], false); + mutex_lock(&vf->cfg_lock); + ice_reset_vf(vf, false); + mutex_unlock(&vf->cfg_lock); } } } + mutex_unlock(&pf->vfs.table_lock); ice_print_vfs_mdd_events(pf); } @@ -2434,7 +2443,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) /* skip this unused q_vector */ continue; } - if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) + if (vsi->type == ICE_VSI_CTRL && vsi->vf) err = devm_request_irq(dev, irq_num, vsi->irq_handler, IRQF_SHARED, q_vector->name, q_vector); @@ -3381,14 +3390,14 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) static struct ice_vsi * ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID, NULL); + return ice_vsi_setup(pf, pi, ICE_VSI_PF, NULL, NULL); } static struct ice_vsi * ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, struct ice_channel *ch) { - return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, ICE_INVAL_VFID, ch); + return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, NULL, ch); } /** @@ -3402,7 +3411,7 @@ ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, static struct ice_vsi * ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID, NULL); + return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, NULL, NULL); } /** @@ -3416,7 +3425,7 @@ ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi * ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID, NULL); + return ice_vsi_setup(pf, pi, ICE_VSI_LB, NULL, NULL); } /** @@ -3675,6 +3684,7 @@ static void ice_deinit_pf(struct ice_pf *pf) mutex_destroy(&pf->sw_mutex); mutex_destroy(&pf->tc_mutex); mutex_destroy(&pf->avail_q_mutex); + mutex_destroy(&pf->vfs.table_lock); if (pf->avail_txqs) { bitmap_free(pf->avail_txqs); @@ -3707,7 +3717,7 @@ static void ice_set_pf_caps(struct ice_pf *pf) clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); if (func_caps->common_cap.sr_iov_1_1) { set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); - pf->num_vfs_supported = min_t(int, func_caps->num_allocd_vfs, + pf->vfs.num_supported = min_t(int, func_caps->num_allocd_vfs, ICE_MAX_VF_COUNT); } clear_bit(ICE_FLAG_RSS_ENA, pf->flags); @@ -3774,6 +3784,9 @@ static int ice_init_pf(struct ice_pf *pf) return -ENOMEM; } + mutex_init(&pf->vfs.table_lock); + hash_init(pf->vfs.table); + return 0; } @@ -4700,6 +4713,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_init(pf); + if (ice_is_feature_supported(pf, ICE_F_GNSS)) + ice_gnss_init(pf); + /* Note: Flow director init failure is non-fatal to load */ if (ice_init_fdir(pf)) dev_err(dev, "could not initialize flow director\n"); @@ -4875,6 +4891,8 @@ static void ice_remove(struct pci_dev *pdev) ice_deinit_lag(pf); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_release(pf); + if (ice_is_feature_supported(pf, ICE_F_GNSS)) + ice_gnss_exit(pf); if (!ice_is_safe_mode(pf)) ice_remove_arfs(pf); ice_setup_mc_magic_wake(pf); @@ -6918,6 +6936,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_reset(pf); + if (ice_is_feature_supported(pf, ICE_F_GNSS)) + ice_gnss_init(pf); + /* rebuild PF VSI */ err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF); if (err) { diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index dc1b0e9e6df5..695b6dd61dc2 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -47,6 +47,7 @@ enum ice_protocol_type { enum ice_sw_tunnel_type { ICE_NON_TUN = 0, + ICE_SW_TUN_AND_NON_TUN, ICE_SW_TUN_VXLAN, ICE_SW_TUN_GENEVE, ICE_SW_TUN_NVGRE, diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index ae291d442539..000c39d163a2 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1533,9 +1533,12 @@ exit: static int ice_ptp_adjtime_nonatomic(struct ptp_clock_info *info, s64 delta) { struct timespec64 now, then; + int ret; then = ns_to_timespec64(delta); - ice_ptp_gettimex64(info, &now, NULL); + ret = ice_ptp_gettimex64(info, &now, NULL); + if (ret) + return ret; now = timespec64_add(now, then); return ice_ptp_settime64(info, (const struct timespec64 *)&now); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index ec8450f034e6..6dff97d53d81 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -3251,6 +3251,37 @@ int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data) } /** + * ice_read_pca9575_reg_e810t + * @hw: pointer to the hw struct + * @offset: GPIO controller register offset + * @data: pointer to data to be read from the GPIO controller + * + * Read the register from the GPIO controller + */ +int ice_read_pca9575_reg_e810t(struct ice_hw *hw, u8 offset, u8 *data) +{ + struct ice_aqc_link_topo_addr link_topo; + __le16 addr; + u16 handle; + int err; + + memset(&link_topo, 0, sizeof(link_topo)); + + err = ice_get_pca9575_handle(hw, &handle); + if (err) + return err; + + link_topo.handle = cpu_to_le16(handle); + link_topo.topo_params.node_type_ctx = + FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, + ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED); + + addr = cpu_to_le16((u16)offset); + + return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL); +} + +/** * ice_is_pca9575_present * @hw: pointer to the hw struct * diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 519e75462e67..1246e4ee4b5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -191,6 +191,7 @@ int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port); int ice_ptp_init_phy_e810(struct ice_hw *hw); int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data); int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data); +int ice_read_pca9575_reg_e810t(struct ice_hw *hw, u8 offset, u8 *data); bool ice_is_pca9575_present(struct ice_hw *hw); #define PFTSYN_SEM_BYTES 4 @@ -443,4 +444,10 @@ bool ice_is_pca9575_present(struct ice_hw *hw); #define ICE_SMA_MAX_BIT_E810T 7 #define ICE_PCA9575_P1_OFFSET 8 +/* E810T PCA9575 IO controller registers */ +#define ICE_PCA9575_P0_IN 0x0 + +/* E810T PCA9575 IO controller pin control */ +#define ICE_E810T_P0_GNSS_PRSNT_N BIT(4) + #endif /* _ICE_PTP_HW_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index dcc310e29300..2adfaf21e056 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -284,6 +284,8 @@ static int ice_repr_add(struct ice_vf *vf) devlink_port_type_eth_set(&vf->devlink_port, repr->netdev); + ice_vc_change_ops_to_repr(&vf->vc_ops); + return 0; err_netdev: @@ -311,6 +313,9 @@ err_alloc_rule: */ static void ice_repr_rem(struct ice_vf *vf) { + if (!vf->repr) + return; + ice_devlink_destroy_vf_port(vf); kfree(vf->repr->q_vector); vf->repr->q_vector = NULL; @@ -323,6 +328,23 @@ static void ice_repr_rem(struct ice_vf *vf) #endif kfree(vf->repr); vf->repr = NULL; + + ice_vc_set_dflt_vf_ops(&vf->vc_ops); +} + +/** + * ice_repr_rem_from_all_vfs - remove port representor for all VFs + * @pf: pointer to PF structure + */ +void ice_repr_rem_from_all_vfs(struct ice_pf *pf) +{ + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); + + ice_for_each_vf(pf, bkt, vf) + ice_repr_rem(vf); } /** @@ -331,49 +353,27 @@ static void ice_repr_rem(struct ice_vf *vf) */ int ice_repr_add_for_all_vfs(struct ice_pf *pf) { + struct ice_vf *vf; + unsigned int bkt; int err; - int i; - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + lockdep_assert_held(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { err = ice_repr_add(vf); if (err) goto err; - - ice_vc_change_ops_to_repr(&vf->vc_ops); } return 0; err: - for (i = i - 1; i >= 0; i--) { - struct ice_vf *vf = &pf->vf[i]; - - ice_repr_rem(vf); - ice_vc_set_dflt_vf_ops(&vf->vc_ops); - } + ice_repr_rem_from_all_vfs(pf); return err; } /** - * ice_repr_rem_from_all_vfs - remove port representor for all VFs - * @pf: pointer to PF structure - */ -void ice_repr_rem_from_all_vfs(struct ice_pf *pf) -{ - int i; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; - - ice_repr_rem(vf); - ice_vc_set_dflt_vf_ops(&vf->vc_ops); - } -} - -/** * ice_repr_start_tx_queues - start Tx queues of port representor * @repr: pointer to repr structure */ diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 4143728a1919..9c40a8d58c71 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -4617,6 +4617,7 @@ ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo, case ICE_SW_TUN_NVGRE: prof_type = ICE_PROF_TUN_GRE; break; + case ICE_SW_TUN_AND_NON_TUN: default: prof_type = ICE_PROF_ALL; break; @@ -5385,7 +5386,8 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (status) goto err_ice_add_adv_rule; - if (rinfo->tun_type != ICE_NON_TUN) { + if (rinfo->tun_type != ICE_NON_TUN && + rinfo->tun_type != ICE_SW_TUN_AND_NON_TUN) { status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, s_rule->pdata.lkup_tx_rx.hdr, pkt_offsets); diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index e8aab664270a..65cf32eb4046 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -709,7 +709,7 @@ ice_tc_set_port(struct flow_match_ports match, fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT; else fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT; - fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT; + headers->l4_key.dst_port = match.key->dst; headers->l4_mask.dst_port = match.mask->dst; } @@ -718,7 +718,7 @@ ice_tc_set_port(struct flow_match_ports match, fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT; else fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT; - fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT; + headers->l4_key.src_port = match.key->src; headers->l4_mask.src_port = match.mask->src; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index ff93ec71aed6..853f57a9589a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1165,7 +1165,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) struct ice_vsi *ctrl_vsi = rx_ring->vsi; if (rx_desc->wb.rxdid == FDIR_DESC_RXDID && - ctrl_vsi->vf_id != ICE_INVAL_VFID) + ctrl_vsi->vf) ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc); ice_put_rx_buf(rx_ring, NULL, 0); cleaned_count++; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c index 39f2d36cabba..b16f946185f2 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c @@ -34,9 +34,10 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) { struct ice_vsi_vlan_ops *vlan_ops; struct ice_pf *pf = vsi->back; - struct ice_vf *vf; + struct ice_vf *vf = vsi->vf; - vf = &pf->vf[vsi->vf_id]; + if (WARN_ON(!vf)) + return; if (ice_is_dvm_ena(&pf->hw)) { vlan_ops = &vsi->outer_vlan_ops; @@ -126,9 +127,14 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) */ void ice_vf_vsi_cfg_dvm_legacy_vlan_mode(struct ice_vsi *vsi) { - struct ice_vf *vf = &vsi->back->vf[vsi->vf_id]; - struct device *dev = ice_pf_to_dev(vf->pf); struct ice_vsi_vlan_ops *vlan_ops; + struct ice_vf *vf = vsi->vf; + struct device *dev; + + if (WARN_ON(!vf)) + return; + + dev = ice_pf_to_dev(vf->pf); if (!ice_is_dvm_ena(&vsi->back->hw) || ice_vf_is_port_vlan_ena(vf)) return; @@ -192,7 +198,10 @@ void ice_vf_vsi_cfg_dvm_legacy_vlan_mode(struct ice_vsi *vsi) */ void ice_vf_vsi_cfg_svm_legacy_vlan_mode(struct ice_vsi *vsi) { - struct ice_vf *vf = &vsi->back->vf[vsi->vf_id]; + struct ice_vf *vf = vsi->vf; + + if (WARN_ON(!vf)) + return; if (ice_is_dvm_ena(&vsi->back->hw) || ice_vf_is_port_vlan_ena(vf)) return; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index d64df81d4893..07989f1d08ef 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -1288,15 +1288,16 @@ ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, union ice_32b_rx_flex_desc *rx_desc) { struct ice_pf *pf = ctrl_vsi->back; + struct ice_vf *vf = ctrl_vsi->vf; struct ice_vf_fdir_ctx *ctx_done; struct ice_vf_fdir_ctx *ctx_irq; struct ice_vf_fdir *fdir; unsigned long flags; struct device *dev; - struct ice_vf *vf; int ret; - vf = &pf->vf[ctrl_vsi->vf_id]; + if (WARN_ON(!vf)) + return; fdir = &vf->fdir; ctx_done = &fdir->ctx_done; @@ -1571,15 +1572,16 @@ err_exit: */ void ice_flush_fdir_ctx(struct ice_pf *pf) { - int i; + struct ice_vf *vf; + unsigned int bkt; if (!test_and_clear_bit(ICE_FD_VF_FLUSH_CTX, pf->state)) return; - ice_for_each_vf(pf, i) { + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { struct device *dev = ice_pf_to_dev(pf); enum virtchnl_fdir_prgm_status status; - struct ice_vf *vf = &pf->vf[i]; struct ice_vf_fdir_ctx *ctx; unsigned long flags; int ret; @@ -1633,6 +1635,7 @@ err_exit: ctx->flags &= ~ICE_VF_FDIR_CTX_VALID; spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags); } + mutex_unlock(&pf->vfs.table_lock); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 02a8c15d2bf3..4840570c494d 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -175,18 +175,107 @@ struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf) } /** - * ice_validate_vf_id - helper to check if VF ID is valid - * @pf: pointer to the PF structure - * @vf_id: the ID of the VF to check + * ice_get_vf_by_id - Get pointer to VF by ID + * @pf: the PF private structure + * @vf_id: the VF ID to locate + * + * Locate and return a pointer to the VF structure associated with a given ID. + * Returns NULL if the ID does not have a valid VF structure associated with + * it. + * + * This function takes a reference to the VF, which must be released by + * calling ice_put_vf() once the caller is finished accessing the VF structure + * returned. */ -static int ice_validate_vf_id(struct ice_pf *pf, u16 vf_id) +struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id) { - /* vf_id range is only valid for 0-255, and should always be unsigned */ - if (vf_id >= pf->num_alloc_vfs) { - dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %u\n", vf_id); - return -EINVAL; + struct ice_vf *vf; + + rcu_read_lock(); + hash_for_each_possible_rcu(pf->vfs.table, vf, entry, vf_id) { + if (vf->vf_id == vf_id) { + struct ice_vf *found; + + if (kref_get_unless_zero(&vf->refcnt)) + found = vf; + else + found = NULL; + + rcu_read_unlock(); + return found; + } } - return 0; + rcu_read_unlock(); + + return NULL; +} + +/** + * ice_release_vf - Release VF associated with a refcount + * @ref: the kref decremented to zero + * + * Callback function for kref_put to release a VF once its reference count has + * hit zero. + */ +static void ice_release_vf(struct kref *ref) +{ + struct ice_vf *vf = container_of(ref, struct ice_vf, refcnt); + + mutex_destroy(&vf->cfg_lock); + + kfree_rcu(vf, rcu); +} + +/** + * ice_put_vf - Release a reference to a VF + * @vf: the VF structure to decrease reference count on + * + * This must be called after ice_get_vf_by_id() once the reference to the VF + * structure is no longer used. Otherwise, the VF structure will never be + * freed. + */ +void ice_put_vf(struct ice_vf *vf) +{ + kref_put(&vf->refcnt, ice_release_vf); +} + +/** + * ice_has_vfs - Return true if the PF has any associated VFs + * @pf: the PF private structure + * + * Return whether or not the PF has any allocated VFs. + * + * Note that this function only guarantees that there are no VFs at the point + * of calling it. It does not guarantee that no more VFs will be added. + */ +bool ice_has_vfs(struct ice_pf *pf) +{ + /* A simple check that the hash table is not empty does not require + * the mutex or rcu_read_lock. + */ + return !hash_empty(pf->vfs.table); +} + +/** + * ice_get_num_vfs - Get number of allocated VFs + * @pf: the PF private structure + * + * Return the total number of allocated VFs. NOTE: VF IDs are not guaranteed + * to be contiguous. Do not assume that a VF ID is guaranteed to be less than + * the output of this function. + */ +u16 ice_get_num_vfs(struct ice_pf *pf) +{ + struct ice_vf *vf; + unsigned int bkt; + u16 num_vfs = 0; + + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) + num_vfs++; + rcu_read_unlock(); + + return num_vfs; } /** @@ -205,6 +294,32 @@ static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf) } /** + * ice_free_vf_entries - Free all VF entries from the hash table + * @pf: pointer to the PF structure + * + * Iterate over the VF hash table, removing and releasing all VF entries. + * Called during VF teardown or as cleanup during failed VF initialization. + */ +static void ice_free_vf_entries(struct ice_pf *pf) +{ + struct ice_vfs *vfs = &pf->vfs; + struct hlist_node *tmp; + struct ice_vf *vf; + unsigned int bkt; + + /* Remove all VFs from the hash table and release their main + * reference. Once all references to the VF are dropped, ice_put_vf() + * will call ice_release_vf which will remove the VF memory. + */ + lockdep_assert_held(&vfs->table_lock); + + hash_for_each_safe(vfs->table, bkt, tmp, vf, entry) { + hash_del_rcu(&vf->entry); + ice_put_vf(vf); + } +} + +/** * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF * @pf: pointer to the PF structure * @v_opcode: operation code @@ -217,11 +332,11 @@ ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen) { struct ice_hw *hw = &pf->hw; - unsigned int i; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + struct ice_vf *vf; + unsigned int bkt; + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { /* Not all vfs are enabled so skip the ones that are not */ if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) @@ -233,6 +348,7 @@ ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode, ice_aq_send_msg_to_vf(hw, vf->vf_id, v_opcode, v_retval, msg, msglen, NULL); } + mutex_unlock(&pf->vfs.table_lock); } /** @@ -381,7 +497,7 @@ static void ice_free_vf_res(struct ice_vf *vf) vf->num_mac = 0; } - last_vector_idx = vf->first_vector_idx + pf->num_msix_per_vf - 1; + last_vector_idx = vf->first_vector_idx + pf->vfs.num_msix_per - 1; /* clear VF MDD event information */ memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events)); @@ -417,7 +533,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0); first = vf->first_vector_idx; - last = first + pf->num_msix_per_vf - 1; + last = first + pf->vfs.num_msix_per - 1; for (v = first; v <= last; v++) { u32 reg; @@ -499,16 +615,14 @@ static void ice_dis_vf_qs(struct ice_vf *vf) void ice_free_vfs(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); + struct ice_vfs *vfs = &pf->vfs; struct ice_hw *hw = &pf->hw; - unsigned int tmp, i; - - set_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state); + struct ice_vf *vf; + unsigned int bkt; - if (!pf->vf) + if (!ice_has_vfs(pf)) return; - ice_eswitch_release(pf); - while (test_and_set_bit(ICE_VF_DIS, pf->state)) usleep_range(1000, 2000); @@ -521,58 +635,48 @@ void ice_free_vfs(struct ice_pf *pf) else dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); - /* Avoid wait time by stopping all VFs at the same time */ - ice_for_each_vf(pf, i) - ice_dis_vf_qs(&pf->vf[i]); + mutex_lock(&vfs->table_lock); - tmp = pf->num_alloc_vfs; - pf->num_qps_per_vf = 0; - pf->num_alloc_vfs = 0; - for (i = 0; i < tmp; i++) { - if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) { - /* disable VF qp mappings and set VF disable state */ - ice_dis_vf_mappings(&pf->vf[i]); - set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states); - ice_free_vf_res(&pf->vf[i]); - } - - mutex_destroy(&pf->vf[i].cfg_lock); - } + ice_eswitch_release(pf); - if (ice_sriov_free_msix_res(pf)) - dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n"); + ice_for_each_vf(pf, bkt, vf) { + mutex_lock(&vf->cfg_lock); - devm_kfree(dev, pf->vf); - pf->vf = NULL; + ice_dis_vf_qs(vf); - /* This check is for when the driver is unloaded while VFs are - * assigned. Setting the number of VFs to 0 through sysfs is caught - * before this function ever gets called. - */ - if (!pci_vfs_assigned(pf->pdev)) { - unsigned int vf_id; + if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { + /* disable VF qp mappings and set VF disable state */ + ice_dis_vf_mappings(vf); + set_bit(ICE_VF_STATE_DIS, vf->vf_states); + ice_free_vf_res(vf); + } - /* Acknowledge VFLR for all VFs. Without this, VFs will fail to - * work correctly when SR-IOV gets re-enabled. - */ - for (vf_id = 0; vf_id < tmp; vf_id++) { + if (!pci_vfs_assigned(pf->pdev)) { u32 reg_idx, bit_idx; - reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32; - bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32; + reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32; + bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32; wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); } - } - /* clear malicious info if the VFs are getting released */ - for (i = 0; i < tmp; i++) - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, - ICE_MAX_VF_COUNT, i)) + /* clear malicious info since the VF is getting released */ + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_VF_COUNT, vf->vf_id)) dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", - i); + vf->vf_id); + + mutex_unlock(&vf->cfg_lock); + } + + if (ice_sriov_free_msix_res(pf)) + dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n"); + + vfs->num_qps_per = 0; + ice_free_vf_entries(pf); + + mutex_unlock(&vfs->table_lock); clear_bit(ICE_VF_DIS, pf->state); - clear_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state); clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags); } @@ -666,7 +770,7 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf) struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id, NULL); + vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf, NULL); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n"); @@ -693,7 +797,7 @@ struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id, NULL); + vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf, NULL); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n"); ice_vf_ctrl_invalidate_vsi(vf); @@ -716,7 +820,7 @@ struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) */ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) { - return pf->sriov_base_vector + vf->vf_id * pf->num_msix_per_vf; + return pf->sriov_base_vector + vf->vf_id * pf->vfs.num_msix_per; } /** @@ -973,12 +1077,12 @@ static void ice_ena_vf_msix_mappings(struct ice_vf *vf) hw = &pf->hw; pf_based_first_msix = vf->first_vector_idx; - pf_based_last_msix = (pf_based_first_msix + pf->num_msix_per_vf) - 1; + pf_based_last_msix = (pf_based_first_msix + pf->vfs.num_msix_per) - 1; device_based_first_msix = pf_based_first_msix + pf->hw.func_caps.common_cap.msix_vector_first_id; device_based_last_msix = - (device_based_first_msix + pf->num_msix_per_vf) - 1; + (device_based_first_msix + pf->vfs.num_msix_per) - 1; device_based_vf_id = vf->vf_id + hw->func_caps.vf_base_id; reg = (((device_based_first_msix << VPINT_ALLOC_FIRST_S) & @@ -1069,45 +1173,6 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) } /** - * ice_determine_res - * @pf: pointer to the PF structure - * @avail_res: available resources in the PF structure - * @max_res: maximum resources that can be given per VF - * @min_res: minimum resources that can be given per VF - * - * Returns non-zero value if resources (queues/vectors) are available or - * returns zero if PF cannot accommodate for all num_alloc_vfs. - */ -static int -ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res) -{ - bool checked_min_res = false; - int res; - - /* start by checking if PF can assign max number of resources for - * all num_alloc_vfs. - * if yes, return number per VF - * If no, divide by 2 and roundup, check again - * repeat the loop till we reach a point where even minimum resources - * are not available, in that case return 0 - */ - res = max_res; - while ((res >= min_res) && !checked_min_res) { - int num_all_res; - - num_all_res = pf->num_alloc_vfs * res; - if (num_all_res <= avail_res) - return res; - - if (res == min_res) - checked_min_res = true; - - res = DIV_ROUND_UP(res, 2); - } - return 0; -} - -/** * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space * @vf: VF to calculate the register index for * @q_vector: a q_vector associated to the VF @@ -1122,7 +1187,7 @@ int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) pf = vf->pf; /* always add one to account for the OICR being the first MSIX */ - return pf->sriov_base_vector + pf->num_msix_per_vf * vf->vf_id + + return pf->sriov_base_vector + pf->vfs.num_msix_per * vf->vf_id + q_vector->v_idx + 1; } @@ -1186,6 +1251,7 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) /** * ice_set_per_vf_res - check if vectors and queues are available * @pf: pointer to the PF structure + * @num_vfs: the number of SR-IOV VFs being configured * * First, determine HW interrupts from common pool. If we allocate fewer VFs, we * get more vectors and can enable more queues per VF. Note that this does not @@ -1204,20 +1270,22 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) * Lastly, set queue and MSI-X VF variables tracked by the PF so it can be used * by each VF during VF initialization and reset. */ -static int ice_set_per_vf_res(struct ice_pf *pf) +static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) { int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker); + u16 num_msix_per_vf, num_txq, num_rxq, avail_qs; int msix_avail_per_vf, msix_avail_for_sriov; struct device *dev = ice_pf_to_dev(pf); - u16 num_msix_per_vf, num_txq, num_rxq; - if (!pf->num_alloc_vfs || max_valid_res_idx < 0) + lockdep_assert_held(&pf->vfs.table_lock); + + if (!num_vfs || max_valid_res_idx < 0) return -EINVAL; /* determine MSI-X resources per VF */ msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors - pf->irq_tracker->num_entries; - msix_avail_per_vf = msix_avail_for_sriov / pf->num_alloc_vfs; + msix_avail_per_vf = msix_avail_for_sriov / num_vfs; if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) { num_msix_per_vf = ICE_NUM_VF_MSIX_MED; } else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_SMALL) { @@ -1229,40 +1297,43 @@ static int ice_set_per_vf_res(struct ice_pf *pf) } else { dev_err(dev, "Only %d MSI-X interrupts available for SR-IOV. Not enough to support minimum of %d MSI-X interrupts per VF for %d VFs\n", msix_avail_for_sriov, ICE_MIN_INTR_PER_VF, - pf->num_alloc_vfs); + num_vfs); return -EIO; } - /* determine queue resources per VF */ - num_txq = ice_determine_res(pf, ice_get_avail_txq_count(pf), - min_t(u16, - num_msix_per_vf - ICE_NONQ_VECS_VF, - ICE_MAX_RSS_QS_PER_VF), - ICE_MIN_QS_PER_VF); + num_txq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF, + ICE_MAX_RSS_QS_PER_VF); + avail_qs = ice_get_avail_txq_count(pf) / num_vfs; + if (!avail_qs) + num_txq = 0; + else if (num_txq > avail_qs) + num_txq = rounddown_pow_of_two(avail_qs); - num_rxq = ice_determine_res(pf, ice_get_avail_rxq_count(pf), - min_t(u16, - num_msix_per_vf - ICE_NONQ_VECS_VF, - ICE_MAX_RSS_QS_PER_VF), - ICE_MIN_QS_PER_VF); + num_rxq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF, + ICE_MAX_RSS_QS_PER_VF); + avail_qs = ice_get_avail_rxq_count(pf) / num_vfs; + if (!avail_qs) + num_rxq = 0; + else if (num_rxq > avail_qs) + num_rxq = rounddown_pow_of_two(avail_qs); - if (!num_txq || !num_rxq) { + if (num_txq < ICE_MIN_QS_PER_VF || num_rxq < ICE_MIN_QS_PER_VF) { dev_err(dev, "Not enough queues to support minimum of %d queue pairs per VF for %d VFs\n", - ICE_MIN_QS_PER_VF, pf->num_alloc_vfs); + ICE_MIN_QS_PER_VF, num_vfs); return -EIO; } - if (ice_sriov_set_msix_res(pf, num_msix_per_vf * pf->num_alloc_vfs)) { + if (ice_sriov_set_msix_res(pf, num_msix_per_vf * num_vfs)) { dev_err(dev, "Unable to set MSI-X resources for %d VFs\n", - pf->num_alloc_vfs); + num_vfs); return -EINVAL; } /* only allow equal Tx/Rx queue count (i.e. queue pairs) */ - pf->num_qps_per_vf = min_t(int, num_txq, num_rxq); - pf->num_msix_per_vf = num_msix_per_vf; + pf->vfs.num_qps_per = min_t(int, num_txq, num_rxq); + pf->vfs.num_msix_per = num_msix_per_vf; dev_info(dev, "Enabling %d VFs with %d vectors and %d queues per VF\n", - pf->num_alloc_vfs, pf->num_msix_per_vf, pf->num_qps_per_vf); + num_vfs, pf->vfs.num_msix_per, pf->vfs.num_qps_per); return 0; } @@ -1509,24 +1580,30 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; struct ice_vf *vf; - int v, i; + unsigned int bkt; /* If we don't have any VFs, then there is nothing to reset */ - if (!pf->num_alloc_vfs) + if (!ice_has_vfs(pf)) return false; + mutex_lock(&pf->vfs.table_lock); + /* clear all malicious info if the VFs are getting reset */ - ice_for_each_vf(pf, i) - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, ICE_MAX_VF_COUNT, i)) - dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", i); + ice_for_each_vf(pf, bkt, vf) + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_VF_COUNT, vf->vf_id)) + dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", + vf->vf_id); /* If VFs have been disabled, there is no need to reset */ - if (test_and_set_bit(ICE_VF_DIS, pf->state)) + if (test_and_set_bit(ICE_VF_DIS, pf->state)) { + mutex_unlock(&pf->vfs.table_lock); return false; + } /* Begin reset on all VFs at once */ - ice_for_each_vf(pf, v) - ice_trigger_vf_reset(&pf->vf[v], is_vflr, true); + ice_for_each_vf(pf, bkt, vf) + ice_trigger_vf_reset(vf, is_vflr, true); /* HW requires some time to make sure it can flush the FIFO for a VF * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in @@ -1534,35 +1611,35 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) * the VFs using a simple iterator that increments once that VF has * finished resetting. */ - for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) { - /* Check each VF in sequence */ - while (v < pf->num_alloc_vfs) { - u32 reg; - - vf = &pf->vf[v]; - reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id)); - if (!(reg & VPGEN_VFRSTAT_VFRD_M)) { - /* only delay if the check failed */ - usleep_range(10, 20); + ice_for_each_vf(pf, bkt, vf) { + bool done = false; + unsigned int i; + u32 reg; + + for (i = 0; i < 10; i++) { + reg = rd32(&pf->hw, VPGEN_VFRSTAT(vf->vf_id)); + if (reg & VPGEN_VFRSTAT_VFRD_M) { + done = true; break; } - /* If the current VF has finished resetting, move on - * to the next VF in sequence. + /* only delay if check failed */ + usleep_range(10, 20); + } + + if (!done) { + /* Display a warning if at least one VF didn't manage + * to reset in time, but continue on with the + * operation. */ - v++; + dev_warn(dev, "VF %u reset check timeout\n", vf->vf_id); + break; } } - /* Display a warning if at least one VF didn't manage to reset in - * time, but continue on with the operation. - */ - if (v < pf->num_alloc_vfs) - dev_warn(dev, "VF reset check timeout\n"); - /* free VF resources to begin resetting the VSI state */ - ice_for_each_vf(pf, v) { - vf = &pf->vf[v]; + ice_for_each_vf(pf, bkt, vf) { + mutex_lock(&vf->cfg_lock); vf->driver_caps = 0; ice_vc_set_default_allowlist(vf); @@ -1578,6 +1655,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) ice_vf_pre_vsi_rebuild(vf); ice_vf_rebuild_vsi(vf); ice_vf_post_vsi_rebuild(vf); + + mutex_unlock(&vf->cfg_lock); } if (ice_is_eswitch_mode_switchdev(pf)) @@ -1587,6 +1666,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) ice_flush(hw); clear_bit(ICE_VF_DIS, pf->state); + mutex_unlock(&pf->vfs.table_lock); + return true; } @@ -1628,6 +1709,8 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) u32 reg; int i; + lockdep_assert_held(&vf->cfg_lock); + dev = ice_pf_to_dev(pf); if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { @@ -1721,7 +1804,8 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) ice_eswitch_replay_vf_mac_rule(vf); /* if the VF has been reset allow it to come up again */ - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, ICE_MAX_VF_COUNT, vf->vf_id)) + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_VF_COUNT, vf->vf_id)) dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", i); return true; @@ -1733,10 +1817,13 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) */ void ice_vc_notify_link_state(struct ice_pf *pf) { - int i; + struct ice_vf *vf; + unsigned int bkt; - ice_for_each_vf(pf, i) - ice_vc_notify_vf_link_state(&pf->vf[i]); + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) + ice_vc_notify_vf_link_state(vf); + mutex_unlock(&pf->vfs.table_lock); } /** @@ -1749,7 +1836,7 @@ void ice_vc_notify_reset(struct ice_pf *pf) { struct virtchnl_pf_event pfe; - if (!pf->num_alloc_vfs) + if (!ice_has_vfs(pf)) return; pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING; @@ -1765,14 +1852,7 @@ void ice_vc_notify_reset(struct ice_pf *pf) static void ice_vc_notify_vf_reset(struct ice_vf *vf) { struct virtchnl_pf_event pfe; - struct ice_pf *pf; - - if (!vf) - return; - - pf = vf->pf; - if (ice_validate_vf_id(pf, vf->vf_id)) - return; + struct ice_pf *pf = vf->pf; /* Bail out if VF is in disabled state, neither initialized, nor active * state - otherwise proceed with notifications @@ -1858,11 +1938,14 @@ release_vsi: static int ice_start_vfs(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - int retval, i; + unsigned int bkt, it_cnt; + struct ice_vf *vf; + int retval; - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + lockdep_assert_held(&pf->vfs.table_lock); + it_cnt = 0; + ice_for_each_vf(pf, bkt, vf) { ice_clear_vf_reset_trigger(vf); retval = ice_init_vf_vsi_res(vf); @@ -1875,40 +1958,63 @@ static int ice_start_vfs(struct ice_pf *pf) set_bit(ICE_VF_STATE_INIT, vf->vf_states); ice_ena_vf_mappings(vf); wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE); + it_cnt++; } ice_flush(hw); return 0; teardown: - for (i = i - 1; i >= 0; i--) { - struct ice_vf *vf = &pf->vf[i]; + ice_for_each_vf(pf, bkt, vf) { + if (it_cnt == 0) + break; ice_dis_vf_mappings(vf); ice_vf_vsi_release(vf); + it_cnt--; } return retval; } /** - * ice_set_dflt_settings_vfs - set VF defaults during initialization/creation - * @pf: PF holding reference to all VFs for default configuration + * ice_create_vf_entries - Allocate and insert VF entries + * @pf: pointer to the PF structure + * @num_vfs: the number of VFs to allocate + * + * Allocate new VF entries and insert them into the hash table. Set some + * basic default fields for initializing the new VFs. + * + * After this function exits, the hash table will have num_vfs entries + * inserted. + * + * Returns 0 on success or an integer error code on failure. */ -static void ice_set_dflt_settings_vfs(struct ice_pf *pf) +static int ice_create_vf_entries(struct ice_pf *pf, u16 num_vfs) { - int i; + struct ice_vfs *vfs = &pf->vfs; + struct ice_vf *vf; + u16 vf_id; + int err; - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + lockdep_assert_held(&vfs->table_lock); + + for (vf_id = 0; vf_id < num_vfs; vf_id++) { + vf = kzalloc(sizeof(*vf), GFP_KERNEL); + if (!vf) { + err = -ENOMEM; + goto err_free_entries; + } + kref_init(&vf->refcnt); vf->pf = pf; - vf->vf_id = i; + vf->vf_id = vf_id; + vf->vf_sw_id = pf->first_sw; /* assign default capabilities */ set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps); vf->spoofchk = true; - vf->num_vf_qs = pf->num_qps_per_vf; + vf->num_vf_qs = pf->vfs.num_qps_per; ice_vc_set_default_allowlist(vf); /* ctrl_vsi_idx will be set to a valid value only when VF @@ -1920,27 +2026,15 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf) ice_vc_set_dflt_vf_ops(&vf->vc_ops); mutex_init(&vf->cfg_lock); - } -} - -/** - * ice_alloc_vfs - allocate num_vfs in the PF structure - * @pf: PF to store the allocated VFs in - * @num_vfs: number of VFs to allocate - */ -static int ice_alloc_vfs(struct ice_pf *pf, int num_vfs) -{ - struct ice_vf *vfs; - - vfs = devm_kcalloc(ice_pf_to_dev(pf), num_vfs, sizeof(*vfs), - GFP_KERNEL); - if (!vfs) - return -ENOMEM; - pf->vf = vfs; - pf->num_alloc_vfs = num_vfs; + hash_add_rcu(vfs->table, &vf->entry, vf_id); + } return 0; + +err_free_entries: + ice_free_vf_entries(pf); + return err; } /** @@ -1961,28 +2055,29 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) ice_flush(hw); ret = pci_enable_sriov(pf->pdev, num_vfs); - if (ret) { - pf->num_alloc_vfs = 0; + if (ret) goto err_unroll_intr; - } - ret = ice_alloc_vfs(pf, num_vfs); - if (ret) - goto err_pci_disable_sriov; + mutex_lock(&pf->vfs.table_lock); - if (ice_set_per_vf_res(pf)) { + if (ice_set_per_vf_res(pf, num_vfs)) { dev_err(dev, "Not enough resources for %d VFs, try with fewer number of VFs\n", num_vfs); ret = -ENOSPC; goto err_unroll_sriov; } - ice_set_dflt_settings_vfs(pf); + ret = ice_create_vf_entries(pf, num_vfs); + if (ret) { + dev_err(dev, "Failed to allocate VF entries for %d VFs\n", + num_vfs); + goto err_unroll_sriov; + } if (ice_start_vfs(pf)) { dev_err(dev, "Failed to start VF(s)\n"); ret = -EAGAIN; - goto err_unroll_sriov; + goto err_unroll_vf_entries; } clear_bit(ICE_VF_DIS, pf->state); @@ -1995,13 +2090,14 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state)) ice_irq_dynamic_ena(hw, NULL, NULL); + mutex_unlock(&pf->vfs.table_lock); + return 0; +err_unroll_vf_entries: + ice_free_vf_entries(pf); err_unroll_sriov: - devm_kfree(dev, pf->vf); - pf->vf = NULL; - pf->num_alloc_vfs = 0; -err_pci_disable_sriov: + mutex_unlock(&pf->vfs.table_lock); pci_disable_sriov(pf->pdev); err_unroll_intr: /* rearm interrupts here */ @@ -2028,9 +2124,9 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) else if (pre_existing_vfs && pre_existing_vfs == num_vfs) return 0; - if (num_vfs > pf->num_vfs_supported) { + if (num_vfs > pf->vfs.num_supported) { dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n", - num_vfs, pf->num_vfs_supported); + num_vfs, pf->vfs.num_supported); return -EOPNOTSUPP; } @@ -2128,25 +2224,30 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) void ice_process_vflr_event(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - unsigned int vf_id; + struct ice_vf *vf; + unsigned int bkt; u32 reg; if (!test_and_clear_bit(ICE_VFLR_EVENT_PENDING, pf->state) || - !pf->num_alloc_vfs) + !ice_has_vfs(pf)) return; - ice_for_each_vf(pf, vf_id) { - struct ice_vf *vf = &pf->vf[vf_id]; + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { u32 reg_idx, bit_idx; - reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32; - bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32; + reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32; + bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32; /* read GLGEN_VFLRSTAT register to find out the flr VFs */ reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx)); - if (reg & BIT(bit_idx)) + if (reg & BIT(bit_idx)) { /* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */ + mutex_lock(&vf->cfg_lock); ice_reset_vf(vf, true); + mutex_unlock(&vf->cfg_lock); + } } + mutex_unlock(&pf->vfs.table_lock); } /** @@ -2166,22 +2267,36 @@ static void ice_vc_reset_vf(struct ice_vf *vf) * * If no VF is found who owns the pfq then return NULL, otherwise return a * pointer to the VF who owns the pfq + * + * If this function returns non-NULL, it acquires a reference count of the VF + * structure. The caller is responsible for calling ice_put_vf() to drop this + * reference. */ static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq) { - unsigned int vf_id; + struct ice_vf *vf; + unsigned int bkt; - ice_for_each_vf(pf, vf_id) { - struct ice_vf *vf = &pf->vf[vf_id]; + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { struct ice_vsi *vsi; u16 rxq_idx; vsi = ice_get_vf_vsi(vf); ice_for_each_rxq(vsi, rxq_idx) - if (vsi->rxq_map[rxq_idx] == pfq) - return vf; + if (vsi->rxq_map[rxq_idx] == pfq) { + struct ice_vf *found; + + if (kref_get_unless_zero(&vf->refcnt)) + found = vf; + else + found = NULL; + rcu_read_unlock(); + return found; + } } + rcu_read_unlock(); return NULL; } @@ -2222,7 +2337,11 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) if (!vf) return; + mutex_lock(&vf->cfg_lock); ice_vc_reset_vf(vf); + mutex_unlock(&vf->cfg_lock); + + ice_put_vf(vf); } /** @@ -2243,13 +2362,7 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, struct ice_pf *pf; int aq_ret; - if (!vf) - return -EINVAL; - pf = vf->pf; - if (ice_validate_vf_id(pf, vf->vf_id)) - return -EINVAL; - dev = ice_pf_to_dev(pf); /* single place to detect unsuccessful return values */ @@ -2440,7 +2553,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->num_vsis = 1; /* Tx and Rx queue are equal for VF */ vfres->num_queue_pairs = vsi->num_txq; - vfres->max_vectors = pf->num_msix_per_vf; + vfres->max_vectors = pf->vfs.num_msix_per; vfres->rss_key_size = ICE_VSIQF_HKEY_ARRAY_SIZE; vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE; vfres->max_mtu = ice_vc_get_max_frame_size(vf); @@ -2514,7 +2627,7 @@ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) vsi = ice_find_vsi_from_id(pf, vsi_id); - return (vsi && (vsi->vf_id == vf->vf_id)); + return (vsi && (vsi->vf == vf)); } /** @@ -3005,30 +3118,34 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) int ret; dev = ice_pf_to_dev(pf); - if (ice_validate_vf_id(pf, vf_id)) + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; vf_vsi = ice_get_vf_vsi(vf); if (!vf_vsi) { netdev_err(netdev, "VSI %d for VF %d is null\n", vf->lan_vsi_idx, vf->vf_id); - return -EINVAL; + ret = -EINVAL; + goto out_put_vf; } if (vf_vsi->type != ICE_VSI_VF) { netdev_err(netdev, "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n", vf_vsi->type, vf_vsi->vsi_num, vf->vf_id); - return -ENODEV; + ret = -ENODEV; + goto out_put_vf; } if (ena == vf->spoofchk) { dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF"); - return 0; + ret = 0; + goto out_put_vf; } if (ena) @@ -3041,6 +3158,8 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) else vf->spoofchk = ena; +out_put_vf: + ice_put_vf(vf); return ret; } @@ -3053,18 +3172,22 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) */ bool ice_is_any_vf_in_promisc(struct ice_pf *pf) { - int vf_idx; - - ice_for_each_vf(pf, vf_idx) { - struct ice_vf *vf = &pf->vf[vf_idx]; + bool is_vf_promisc = false; + struct ice_vf *vf; + unsigned int bkt; + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { /* found a VF that has promiscuous mode configured */ if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || - test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) - return true; + test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) { + is_vf_promisc = true; + break; + } } + rcu_read_unlock(); - return false; + return is_vf_promisc; } /** @@ -3584,7 +3707,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) * there is actually at least a single VF queue vector mapped */ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || - pf->num_msix_per_vf < num_q_vectors_mapped || + pf->vfs.num_msix_per < num_q_vectors_mapped || !num_q_vectors_mapped) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -3606,7 +3729,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) /* vector_id is always 0-based for each VF, and can never be * larger than or equal to the max allowed interrupts per VF */ - if (!(vector_id < pf->num_msix_per_vf) || + if (!(vector_id < pf->vfs.num_msix_per) || !ice_vc_isvalid_vsi_id(vf, vsi_id) || (!vector_id && (map->rxq_map || map->txq_map))) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -4197,8 +4320,6 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, int ret; dev = ice_pf_to_dev(pf); - if (ice_validate_vf_id(pf, vf_id)) - return -EINVAL; if (vlan_id >= VLAN_N_VID || qos > 7) { dev_err(dev, "Invalid Port VLAN parameters for VF %d, ID %d, QoS %d\n", @@ -4212,10 +4333,13 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, return -EPROTONOSUPPORT; } - vf = &pf->vf[vf_id]; + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) + return -EINVAL; + ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; if (ice_vf_get_port_vlan_prio(vf) == qos && ice_vf_get_port_vlan_tpid(vf) == local_vlan_proto && @@ -4223,7 +4347,8 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, /* duplicate request, so just return success */ dev_dbg(dev, "Duplicate port VLAN %u, QoS %u, TPID 0x%04x request\n", vlan_id, qos, local_vlan_proto); - return 0; + ret = 0; + goto out_put_vf; } mutex_lock(&vf->cfg_lock); @@ -4238,7 +4363,9 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, ice_vc_reset_vf(vf); mutex_unlock(&vf->cfg_lock); - return 0; +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -5759,17 +5886,14 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) struct device *dev; int err = 0; - /* if de-init is underway, don't process messages from VF */ - if (test_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state)) - return; - dev = ice_pf_to_dev(pf); - if (ice_validate_vf_id(pf, vf_id)) { - err = -EINVAL; - goto error_handler; - } - vf = &pf->vf[vf_id]; + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) { + dev_err(dev, "Unable to locate VF for message from VF ID %d, opcode %d, len %d\n", + vf_id, v_opcode, msglen); + return; + } /* Check if VF is disabled. */ if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) { @@ -5792,6 +5916,7 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, 0); + ice_put_vf(vf); return; } @@ -5801,6 +5926,7 @@ error_handler: NULL, 0); dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n", vf_id, v_opcode, msglen, err); + ice_put_vf(vf); return; } @@ -5810,6 +5936,7 @@ error_handler: if (!mutex_trylock(&vf->cfg_lock)) { dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n", vf->vf_id); + ice_put_vf(vf); return; } @@ -5924,6 +6051,7 @@ error_handler: } mutex_unlock(&vf->cfg_lock); + ice_put_vf(vf); } /** @@ -5939,14 +6067,15 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) { struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; + int ret; - if (ice_validate_vf_id(pf, vf_id)) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; ivi->vf = vf_id; ether_addr_copy(ivi->mac, vf->hw_lan_addr.addr); @@ -5967,7 +6096,10 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE; ivi->max_tx_rate = vf->max_tx_rate; ivi->min_tx_rate = vf->min_tx_rate; - return 0; + +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -6017,28 +6149,31 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) struct ice_vf *vf; int ret; - if (ice_validate_vf_id(pf, vf_id)) - return -EINVAL; - if (is_multicast_ether_addr(mac)) { netdev_err(netdev, "%pM not a valid unicast address\n", mac); return -EINVAL; } - vf = &pf->vf[vf_id]; + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) + return -EINVAL; + /* nothing left to do, unicast MAC already set */ if (ether_addr_equal(vf->dev_lan_addr.addr, mac) && - ether_addr_equal(vf->hw_lan_addr.addr, mac)) - return 0; + ether_addr_equal(vf->hw_lan_addr.addr, mac)) { + ret = 0; + goto out_put_vf; + } ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; if (ice_unicast_mac_exists(pf, mac)) { netdev_err(netdev, "Unicast MAC %pM already exists on this PF. Preventing setting VF %u unicast MAC address to %pM\n", mac, vf_id, mac); - return -EINVAL; + ret = -EINVAL; + goto out_put_vf; } mutex_lock(&vf->cfg_lock); @@ -6062,7 +6197,10 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) ice_vc_reset_vf(vf); mutex_unlock(&vf->cfg_lock); - return 0; + +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -6084,17 +6222,19 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) return -EOPNOTSUPP; } - if (ice_validate_vf_id(pf, vf_id)) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; /* Check if already trusted */ - if (trusted == vf->trusted) - return 0; + if (trusted == vf->trusted) { + ret = 0; + goto out_put_vf; + } mutex_lock(&vf->cfg_lock); @@ -6105,7 +6245,9 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) mutex_unlock(&vf->cfg_lock); - return 0; +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -6122,13 +6264,13 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) struct ice_vf *vf; int ret; - if (ice_validate_vf_id(pf, vf_id)) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; switch (link_state) { case IFLA_VF_LINK_STATE_AUTO: @@ -6143,12 +6285,15 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) vf->link_up = false; break; default: - return -EINVAL; + ret = -EINVAL; + goto out_put_vf; } ice_vc_notify_vf_link_state(vf); - return 0; +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -6157,10 +6302,14 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) */ static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf) { - int rate = 0, i; + struct ice_vf *vf; + unsigned int bkt; + int rate = 0; - ice_for_each_vf(pf, i) - rate += pf->vf[i].min_tx_rate; + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) + rate += vf->min_tx_rate; + rcu_read_unlock(); return rate; } @@ -6215,13 +6364,14 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, int ret; dev = ice_pf_to_dev(pf); - if (ice_validate_vf_id(pf, vf_id)) + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; vsi = ice_get_vf_vsi(vf); @@ -6231,23 +6381,27 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, if (max_tx_rate && min_tx_rate > max_tx_rate) { dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max Tx rate %d Mbps\n", min_tx_rate, max_tx_rate); - return -EINVAL; + ret = -EINVAL; + goto out_put_vf; } if (min_tx_rate && ice_is_dcb_active(pf)) { dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n"); - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + goto out_put_vf; } - if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) - return -EINVAL; + if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) { + ret = -EINVAL; + goto out_put_vf; + } if (vf->min_tx_rate != (unsigned int)min_tx_rate) { ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000); if (ret) { dev_err(dev, "Unable to set min-tx-rate for VF %d\n", vf->vf_id); - return ret; + goto out_put_vf; } vf->min_tx_rate = min_tx_rate; @@ -6258,13 +6412,15 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, if (ret) { dev_err(dev, "Unable to set max-tx-rate for VF %d\n", vf->vf_id); - return ret; + goto out_put_vf; } vf->max_tx_rate = max_tx_rate; } - return 0; +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -6282,17 +6438,19 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id, struct ice_vf *vf; int ret; - if (ice_validate_vf_id(pf, vf_id)) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; vsi = ice_get_vf_vsi(vf); - if (!vsi) - return -EINVAL; + if (!vsi) { + ret = -EINVAL; + goto out_put_vf; + } ice_update_eth_stats(vsi); stats = &vsi->eth_stats; @@ -6310,7 +6468,9 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id, vf_stats->rx_dropped = stats->rx_discards; vf_stats->tx_dropped = stats->tx_discards; - return 0; +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -6341,21 +6501,21 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - int i; + struct ice_vf *vf; + unsigned int bkt; /* check that there are pending MDD events to print */ if (!test_and_clear_bit(ICE_MDD_VF_PRINT_PENDING, pf->state)) return; /* VF MDD event logs are rate limited to one second intervals */ - if (time_is_after_jiffies(pf->last_printed_mdd_jiffies + HZ * 1)) + if (time_is_after_jiffies(pf->vfs.last_printed_mdd_jiffies + HZ * 1)) return; - pf->last_printed_mdd_jiffies = jiffies; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + pf->vfs.last_printed_mdd_jiffies = jiffies; + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { /* only print Rx MDD event message if there are new events */ if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) { vf->mdd_rx_events.last_printed = @@ -6369,10 +6529,11 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) vf->mdd_tx_events.count; dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n", - vf->mdd_tx_events.count, hw->pf_id, i, + vf->mdd_tx_events.count, hw->pf_id, vf->vf_id, vf->dev_lan_addr.addr); } } + mutex_unlock(&pf->vfs.table_lock); } /** @@ -6424,13 +6585,12 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, struct ice_vf *vf; int status; - if (ice_validate_vf_id(pf, vf_id)) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return false; - vf = &pf->vf[vf_id]; - /* Check if VF is disabled. */ if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) - return false; + goto out_put_vf; mbxdata.num_msg_proc = num_msg_proc; mbxdata.num_pending_arq = num_msg_pending; @@ -6441,7 +6601,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, /* check to see if we have a malicious VF */ status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, vf_id, &malvf); if (status) - return false; + goto out_put_vf; if (malvf) { bool report_vf = false; @@ -6449,7 +6609,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, /* if the VF is malicious and we haven't let the user * know about it, then let them know now */ - status = ice_mbx_report_malvf(&pf->hw, pf->malvfs, + status = ice_mbx_report_malvf(&pf->hw, pf->vfs.malvfs, ICE_MAX_VF_COUNT, vf_id, &report_vf); if (status) @@ -6463,12 +6623,9 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, &vf->dev_lan_addr.addr[0], pf_vsi->netdev->dev_addr); } - - return true; } - /* if there was an error in detection or the VF is not malicious then - * return false - */ - return false; +out_put_vf: + ice_put_vf(vf); + return malvf; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 4f4961043638..02e3d306f6dd 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -39,8 +39,50 @@ #define ICE_MAX_VF_RESET_TRIES 40 #define ICE_MAX_VF_RESET_SLEEP_MS 20 -#define ice_for_each_vf(pf, i) \ - for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++) +/* VF Hash Table access functions + * + * These functions provide abstraction for interacting with the VF hash table. + * In general, direct access to the hash table should be avoided outside of + * these functions where possible. + * + * The VF entries in the hash table are protected by reference counting to + * track lifetime of accesses from the table. The ice_get_vf_by_id() function + * obtains a reference to the VF structure which must be dropped by using + * ice_put_vf(). + */ + +/** + * ice_for_each_vf - Iterate over each VF entry + * @pf: pointer to the PF private structure + * @bkt: bucket index used for iteration + * @vf: pointer to the VF entry currently being processed in the loop. + * + * The bkt variable is an unsigned integer iterator used to traverse the VF + * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is. + * Use vf->vf_id to get the id number if needed. + * + * The caller is expected to be under the table_lock mutex for the entire + * loop. Use this iterator if your loop is long or if it might sleep. + */ +#define ice_for_each_vf(pf, bkt, vf) \ + hash_for_each((pf)->vfs.table, (bkt), (vf), entry) + +/** + * ice_for_each_vf_rcu - Iterate over each VF entry protected by RCU + * @pf: pointer to the PF private structure + * @bkt: bucket index used for iteration + * @vf: pointer to the VF entry currently being processed in the loop. + * + * The bkt variable is an unsigned integer iterator used to traverse the VF + * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is. + * Use vf->vf_id to get the id number if needed. + * + * The caller is expected to be under rcu_read_lock() for the entire loop. + * Only use this iterator if your loop is short and you can guarantee it does + * not sleep. + */ +#define ice_for_each_vf_rcu(pf, bkt, vf) \ + hash_for_each_rcu((pf)->vfs.table, (bkt), (vf), entry) /* Specific VF states */ enum ice_vf_states { @@ -104,8 +146,22 @@ struct ice_vc_vf_ops { int (*dis_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg); }; +/* Virtchnl/SR-IOV config info */ +struct ice_vfs { + DECLARE_HASHTABLE(table, 8); /* table of VF entries */ + struct mutex table_lock; /* Lock for protecting the hash table */ + u16 num_supported; /* max supported VFs on this PF */ + u16 num_qps_per; /* number of queue pairs per VF */ + u16 num_msix_per; /* number of MSI-X vectors per VF */ + unsigned long last_printed_mdd_jiffies; /* MDD message rate limit */ + DECLARE_BITMAP(malvfs, ICE_MAX_VF_COUNT); /* malicious VF indicator */ +}; + /* VF information structure */ struct ice_vf { + struct hlist_node entry; + struct rcu_head rcu; + struct kref refcnt; struct ice_pf *pf; /* Used during virtchnl message handling and NDO ops against the VF @@ -162,6 +218,10 @@ struct ice_vf { }; #ifdef CONFIG_PCI_IOV +struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id); +void ice_put_vf(struct ice_vf *vf); +bool ice_has_vfs(struct ice_pf *pf); +u16 ice_get_num_vfs(struct ice_pf *pf); struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf); void ice_process_vflr_event(struct ice_pf *pf); int ice_sriov_configure(struct pci_dev *pdev, int num_vfs); @@ -221,6 +281,25 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id); bool ice_vf_is_port_vlan_ena(struct ice_vf *vf); #else /* CONFIG_PCI_IOV */ +static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id) +{ + return NULL; +} + +static inline void ice_put_vf(struct ice_vf *vf) +{ +} + +static inline bool ice_has_vfs(struct ice_pf *pf) +{ + return false; +} + +static inline u16 ice_get_num_vfs(struct ice_pf *pf) +{ + return 0; +} + static inline void ice_process_vflr_event(struct ice_pf *pf) { } static inline void ice_free_vfs(struct ice_pf *pf) { } static inline diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index 5cad31c3c7b0..40dbf4b43234 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -746,8 +746,6 @@ s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data) if (ret_val) return ret_val; ret_val = igc_write_phy_reg_mdic(hw, offset, data); - if (ret_val) - return ret_val; hw->phy.ops.release(hw); } else { ret_val = igc_write_xmdio_reg(hw, (u16)offset, dev_addr, @@ -779,8 +777,6 @@ s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data) if (ret_val) return ret_val; ret_val = igc_read_phy_reg_mdic(hw, offset, data); - if (ret_val) - return ret_val; hw->phy.ops.release(hw); } else { ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index e90b5047e695..4c26c4b92f07 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -30,7 +30,7 @@ static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, u16 words, u16 *data); static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw, u16 offset); -static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw); +static s32 ixgbe_disable_pcie_primary(struct ixgbe_hw *hw); /* Base table for registers values that change by MAC */ const u32 ixgbe_mvals_8259X[IXGBE_MVALS_IDX_LIMIT] = { @@ -746,10 +746,10 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw) usleep_range(1000, 2000); /* - * Prevent the PCI-E bus from from hanging by disabling PCI-E master + * Prevent the PCI-E bus from hanging by disabling PCI-E primary * access and verify no pending requests */ - return ixgbe_disable_pcie_master(hw); + return ixgbe_disable_pcie_primary(hw); } /** @@ -2506,15 +2506,15 @@ static u32 ixgbe_pcie_timeout_poll(struct ixgbe_hw *hw) } /** - * ixgbe_disable_pcie_master - Disable PCI-express master access + * ixgbe_disable_pcie_primary - Disable PCI-express primary access * @hw: pointer to hardware structure * - * Disables PCI-Express master access and verifies there are no pending - * requests. IXGBE_ERR_MASTER_REQUESTS_PENDING is returned if master disable - * bit hasn't caused the master requests to be disabled, else 0 - * is returned signifying master requests disabled. + * Disables PCI-Express primary access and verifies there are no pending + * requests. IXGBE_ERR_PRIMARY_REQUESTS_PENDING is returned if primary disable + * bit hasn't caused the primary requests to be disabled, else 0 + * is returned signifying primary requests disabled. **/ -static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) +static s32 ixgbe_disable_pcie_primary(struct ixgbe_hw *hw) { u32 i, poll; u16 value; @@ -2523,23 +2523,23 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS); /* Poll for bit to read as set */ - for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + for (i = 0; i < IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT; i++) { if (IXGBE_READ_REG(hw, IXGBE_CTRL) & IXGBE_CTRL_GIO_DIS) break; usleep_range(100, 120); } - if (i >= IXGBE_PCI_MASTER_DISABLE_TIMEOUT) { + if (i >= IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT) { hw_dbg(hw, "GIO disable did not set - requesting resets\n"); goto gio_disable_fail; } - /* Exit if master requests are blocked */ + /* Exit if primary requests are blocked */ if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO) || ixgbe_removed(hw->hw_addr)) return 0; - /* Poll for master request bit to clear */ - for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + /* Poll for primary request bit to clear */ + for (i = 0; i < IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT; i++) { udelay(100); if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO)) return 0; @@ -2547,13 +2547,13 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) /* * Two consecutive resets are required via CTRL.RST per datasheet - * 5.2.5.3.2 Master Disable. We set a flag to inform the reset routine - * of this need. The first reset prevents new master requests from + * 5.2.5.3.2 Primary Disable. We set a flag to inform the reset routine + * of this need. The first reset prevents new primary requests from * being issued by our device. We then must wait 1usec or more for any * remaining completions from the PCIe bus to trickle in, and then reset * again to clear out any effects they may have had on our device. */ - hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n"); + hw_dbg(hw, "GIO Primary Disable bit didn't clear - requesting resets\n"); gio_disable_fail: hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; @@ -2575,7 +2575,7 @@ gio_disable_fail: } hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n"); - return IXGBE_ERR_MASTER_REQUESTS_PENDING; + return IXGBE_ERR_PRIMARY_REQUESTS_PENDING; } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 2c8a4a06f56a..60a72af39ff7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5948,8 +5948,8 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) case IXGBE_ERR_SFP_NOT_PRESENT: case IXGBE_ERR_SFP_NOT_SUPPORTED: break; - case IXGBE_ERR_MASTER_REQUESTS_PENDING: - e_dev_err("master disable timed out\n"); + case IXGBE_ERR_PRIMARY_REQUESTS_PENDING: + e_dev_err("primary disable timed out\n"); break; case IXGBE_ERR_EEPROM_VERSION: /* We are running on a pre-production device, log a warning */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 2647937f7f4d..6da9880d766a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1247,7 +1247,7 @@ struct ixgbe_nvm_version { #define IXGBE_PSRTYPE_RQPL_SHIFT 29 /* CTRL Bit Masks */ -#define IXGBE_CTRL_GIO_DIS 0x00000004 /* Global IO Master Disable bit */ +#define IXGBE_CTRL_GIO_DIS 0x00000004 /* Global IO Primary Disable bit */ #define IXGBE_CTRL_LNK_RST 0x00000008 /* Link Reset. Resets everything. */ #define IXGBE_CTRL_RST 0x04000000 /* Reset (SW) */ #define IXGBE_CTRL_RST_MASK (IXGBE_CTRL_LNK_RST | IXGBE_CTRL_RST) @@ -1811,7 +1811,7 @@ enum { /* STATUS Bit Masks */ #define IXGBE_STATUS_LAN_ID 0x0000000C /* LAN ID */ #define IXGBE_STATUS_LAN_ID_SHIFT 2 /* LAN ID Shift*/ -#define IXGBE_STATUS_GIO 0x00080000 /* GIO Master Enable Status */ +#define IXGBE_STATUS_GIO 0x00080000 /* GIO Primary Enable Status */ #define IXGBE_STATUS_LAN_ID_0 0x00000000 /* LAN ID 0 */ #define IXGBE_STATUS_LAN_ID_1 0x00000004 /* LAN ID 1 */ @@ -2193,8 +2193,8 @@ enum { #define IXGBE_PCIDEVCTRL2_4_8s 0xd #define IXGBE_PCIDEVCTRL2_17_34s 0xe -/* Number of 100 microseconds we wait for PCI Express master disable */ -#define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800 +/* Number of 100 microseconds we wait for PCI Express primary disable */ +#define IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT 800 /* RAH */ #define IXGBE_RAH_VIND_MASK 0x003C0000 @@ -3671,7 +3671,7 @@ struct ixgbe_info { #define IXGBE_ERR_ADAPTER_STOPPED -9 #define IXGBE_ERR_INVALID_MAC_ADDR -10 #define IXGBE_ERR_DEVICE_NOT_SUPPORTED -11 -#define IXGBE_ERR_MASTER_REQUESTS_PENDING -12 +#define IXGBE_ERR_PRIMARY_REQUESTS_PENDING -12 #define IXGBE_ERR_INVALID_LINK_SETTINGS -13 #define IXGBE_ERR_AUTONEG_NOT_COMPLETE -14 #define IXGBE_ERR_RESET_FAILED -15 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index ee28929b9c5f..dd7ff66d422f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -395,12 +395,14 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) u32 cmd_type; while (budget-- > 0) { - if (unlikely(!ixgbe_desc_unused(xdp_ring)) || - !netif_carrier_ok(xdp_ring->netdev)) { + if (unlikely(!ixgbe_desc_unused(xdp_ring))) { work_done = false; break; } + if (!netif_carrier_ok(xdp_ring->netdev)) + break; + if (!xsk_tx_peek_desc(pool, &desc)) break; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 17fbc450da61..84222ec2393c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2753,7 +2753,7 @@ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, ring->reg_idx = reg_idx; /* assign ring to adapter */ - adapter->tx_ring[txr_idx] = ring; + adapter->tx_ring[txr_idx] = ring; /* update count and index */ txr_count--; diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 439674fc9765..b6c5122da995 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -28,6 +28,7 @@ #include <linux/udp.h> #include <linux/if_vlan.h> #include <linux/slab.h> +#include <linux/jiffies.h> #include <net/ip6_checksum.h> #include "jme.h" @@ -2179,7 +2180,7 @@ jme_stop_queue_if_full(struct jme_adapter *jme) } if (unlikely(txbi->start_xmit && - (jiffies - txbi->start_xmit) >= TX_TIMEOUT && + time_is_before_eq_jiffies(txbi->start_xmit + TX_TIMEOUT) && txbi->skb)) { netif_stop_queue(jme->dev); netif_info(jme, tx_queued, jme->dev, diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 105247582684..c31cbbae0eca 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -1661,7 +1661,7 @@ mv643xx_eth_set_ringparam(struct net_device *dev, struct ethtool_ringparam *er, if (er->rx_mini_pending || er->rx_jumbo_pending) return -EINVAL; - mp->rx_ring_size = er->rx_pending < 4096 ? er->rx_pending : 4096; + mp->rx_ring_size = min(er->rx_pending, 4096U); mp->tx_ring_size = clamp_t(unsigned int, er->tx_pending, MV643XX_MAX_SKB_DESCS * 2, 4096); if (mp->tx_ring_size != er->tx_pending) @@ -2704,6 +2704,16 @@ MODULE_DEVICE_TABLE(of, mv643xx_eth_shared_ids); static struct platform_device *port_platdev[3]; +static void mv643xx_eth_shared_of_remove(void) +{ + int n; + + for (n = 0; n < 3; n++) { + platform_device_del(port_platdev[n]); + port_platdev[n] = NULL; + } +} + static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, struct device_node *pnp) { @@ -2740,7 +2750,9 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, return -EINVAL; } - of_get_mac_address(pnp, ppd.mac_addr); + ret = of_get_mac_address(pnp, ppd.mac_addr); + if (ret) + return ret; mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size); mv643xx_eth_property(pnp, "tx-sram-addr", ppd.tx_sram_addr); @@ -2804,21 +2816,13 @@ static int mv643xx_eth_shared_of_probe(struct platform_device *pdev) ret = mv643xx_eth_shared_of_add_port(pdev, pnp); if (ret) { of_node_put(pnp); + mv643xx_eth_shared_of_remove(); return ret; } } return 0; } -static void mv643xx_eth_shared_of_remove(void) -{ - int n; - - for (n = 0; n < 3; n++) { - platform_device_del(port_platdev[n]); - port_platdev[n] = NULL; - } -} #else static inline int mv643xx_eth_shared_of_probe(struct platform_device *pdev) { diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 7cdbf8b8bbf6..1a835b48791b 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6870,6 +6870,9 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE; dev->dev.of_node = port_node; + port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops; + port->pcs_xlg.ops = &mvpp2_phylink_xlg_pcs_ops; + if (!mvpp2_use_acpi_compat_mode(port_fwnode)) { port->phylink_config.dev = &dev->dev; port->phylink_config.type = PHYLINK_NETDEV; @@ -6940,9 +6943,6 @@ static int mvpp2_port_probe(struct platform_device *pdev, port->phylink_config.supported_interfaces); } - port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops; - port->pcs_xlg.ops = &mvpp2_phylink_xlg_pcs_ops; - phylink = phylink_create(&port->phylink_config, port_fwnode, phy_mode, &mvpp2_phylink_ops); if (IS_ERR(phylink)) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c index e682b7bfde64..67a6821d2dff 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c @@ -25,6 +25,9 @@ #define PCI_SUBSYS_DEVID_OCTX2_95XXO_PTP 0xB600 #define PCI_DEVID_OCTEONTX2_RST 0xA085 #define PCI_DEVID_CN10K_PTP 0xA09E +#define PCI_SUBSYS_DEVID_CN10K_A_PTP 0xB900 +#define PCI_SUBSYS_DEVID_CNF10K_A_PTP 0xBA00 +#define PCI_SUBSYS_DEVID_CNF10K_B_PTP 0xBC00 #define PCI_PTP_BAR_NO 0 @@ -46,10 +49,105 @@ #define PTP_CLOCK_HI 0xF10ULL #define PTP_CLOCK_COMP 0xF18ULL #define PTP_TIMESTAMP 0xF20ULL +#define PTP_CLOCK_SEC 0xFD0ULL + +#define CYCLE_MULT 1000 static struct ptp *first_ptp_block; static const struct pci_device_id ptp_id_table[]; +static bool cn10k_ptp_errata(struct ptp *ptp) +{ + if (ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A_PTP || + ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CNF10K_A_PTP) + return true; + return false; +} + +static bool is_ptp_tsfmt_sec_nsec(struct ptp *ptp) +{ + if (ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A_PTP || + ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CNF10K_A_PTP) + return true; + return false; +} + +static u64 read_ptp_tstmp_sec_nsec(struct ptp *ptp) +{ + u64 sec, sec1, nsec; + unsigned long flags; + + spin_lock_irqsave(&ptp->ptp_lock, flags); + sec = readq(ptp->reg_base + PTP_CLOCK_SEC) & 0xFFFFFFFFUL; + nsec = readq(ptp->reg_base + PTP_CLOCK_HI); + sec1 = readq(ptp->reg_base + PTP_CLOCK_SEC) & 0xFFFFFFFFUL; + /* check nsec rollover */ + if (sec1 > sec) { + nsec = readq(ptp->reg_base + PTP_CLOCK_HI); + sec = sec1; + } + spin_unlock_irqrestore(&ptp->ptp_lock, flags); + + return sec * NSEC_PER_SEC + nsec; +} + +static u64 read_ptp_tstmp_nsec(struct ptp *ptp) +{ + return readq(ptp->reg_base + PTP_CLOCK_HI); +} + +static u64 ptp_calc_adjusted_comp(u64 ptp_clock_freq) +{ + u64 comp, adj = 0, cycles_per_sec, ns_drift = 0; + u32 ptp_clock_nsec, cycle_time; + int cycle; + + /* Errata: + * Issue #1: At the time of 1 sec rollover of the nano-second counter, + * the nano-second counter is set to 0. However, it should be set to + * (existing counter_value - 10^9). + * + * Issue #2: The nano-second counter rolls over at 0x3B9A_C9FF. + * It should roll over at 0x3B9A_CA00. + */ + + /* calculate ptp_clock_comp value */ + comp = ((u64)1000000000ULL << 32) / ptp_clock_freq; + /* use CYCLE_MULT to avoid accuracy loss due to integer arithmetic */ + cycle_time = NSEC_PER_SEC * CYCLE_MULT / ptp_clock_freq; + /* cycles per sec */ + cycles_per_sec = ptp_clock_freq; + + /* check whether ptp nanosecond counter rolls over early */ + cycle = cycles_per_sec - 1; + ptp_clock_nsec = (cycle * comp) >> 32; + while (ptp_clock_nsec < NSEC_PER_SEC) { + if (ptp_clock_nsec == 0x3B9AC9FF) + goto calc_adj_comp; + cycle++; + ptp_clock_nsec = (cycle * comp) >> 32; + } + /* compute nanoseconds lost per second when nsec counter rolls over */ + ns_drift = ptp_clock_nsec - NSEC_PER_SEC; + /* calculate ptp_clock_comp adjustment */ + if (ns_drift > 0) { + adj = comp * ns_drift; + adj = adj / 1000000000ULL; + } + /* speed up the ptp clock to account for nanoseconds lost */ + comp += adj; + return comp; + +calc_adj_comp: + /* slow down the ptp clock to not rollover early */ + adj = comp * cycle_time; + adj = adj / 1000000000ULL; + adj = adj / CYCLE_MULT; + comp -= adj; + + return comp; +} + struct ptp *ptp_get(void) { struct ptp *ptp = first_ptp_block; @@ -77,8 +175,8 @@ void ptp_put(struct ptp *ptp) static int ptp_adjfine(struct ptp *ptp, long scaled_ppm) { bool neg_adj = false; - u64 comp; - u64 adj; + u32 freq, freq_adj; + u64 comp, adj; s64 ppb; if (scaled_ppm < 0) { @@ -100,15 +198,22 @@ static int ptp_adjfine(struct ptp *ptp, long scaled_ppm) * where tbase is the basic compensation value calculated * initialy in the probe function. */ - comp = ((u64)1000000000ull << 32) / ptp->clock_rate; /* convert scaled_ppm to ppb */ ppb = 1 + scaled_ppm; ppb *= 125; ppb >>= 13; - adj = comp * ppb; - adj = div_u64(adj, 1000000000ull); - comp = neg_adj ? comp - adj : comp + adj; + if (cn10k_ptp_errata(ptp)) { + /* calculate the new frequency based on ppb */ + freq_adj = (ptp->clock_rate * ppb) / 1000000000ULL; + freq = neg_adj ? ptp->clock_rate + freq_adj : ptp->clock_rate - freq_adj; + comp = ptp_calc_adjusted_comp(freq); + } else { + comp = ((u64)1000000000ull << 32) / ptp->clock_rate; + adj = comp * ppb; + adj = div_u64(adj, 1000000000ull); + comp = neg_adj ? comp - adj : comp + adj; + } writeq(comp, ptp->reg_base + PTP_CLOCK_COMP); return 0; @@ -117,7 +222,7 @@ static int ptp_adjfine(struct ptp *ptp, long scaled_ppm) static int ptp_get_clock(struct ptp *ptp, u64 *clk) { /* Return the current PTP clock */ - *clk = readq(ptp->reg_base + PTP_CLOCK_HI); + *clk = ptp->read_ptp_tstmp(ptp); return 0; } @@ -166,7 +271,11 @@ void ptp_start(struct ptp *ptp, u64 sclk, u32 ext_clk_freq, u32 extts) writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_HI_INCR); writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_LO_INCR); - clock_comp = ((u64)1000000000ull << 32) / ptp->clock_rate; + if (cn10k_ptp_errata(ptp)) + clock_comp = ptp_calc_adjusted_comp(ptp->clock_rate); + else + clock_comp = ((u64)1000000000ull << 32) / ptp->clock_rate; + /* Initial compensation value to start the nanosecs counter */ writeq(clock_comp, ptp->reg_base + PTP_CLOCK_COMP); } @@ -214,6 +323,12 @@ static int ptp_probe(struct pci_dev *pdev, if (!first_ptp_block) first_ptp_block = ptp; + spin_lock_init(&ptp->ptp_lock); + if (is_ptp_tsfmt_sec_nsec(ptp)) + ptp->read_ptp_tstmp = &read_ptp_tstmp_sec_nsec; + else + ptp->read_ptp_tstmp = &read_ptp_tstmp_nsec; + return 0; error_free: diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.h b/drivers/net/ethernet/marvell/octeontx2/af/ptp.h index 1b81a0493cd3..95a955159f40 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.h @@ -15,6 +15,8 @@ struct ptp { struct pci_dev *pdev; void __iomem *reg_base; + u64 (*read_ptp_tstmp)(struct ptp *ptp); + spinlock_t ptp_lock; /* lock */ u32 clock_rate; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 2c9760814bc3..b9d7601138ca 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1048,7 +1048,7 @@ int otx2_config_nix(struct otx2_nic *pfvf) struct nix_lf_alloc_rsp *rsp; int err; - pfvf->qset.xqe_size = NIX_XQESZ_W16 ? 128 : 512; + pfvf->qset.xqe_size = pfvf->hw.xqe_size; /* Get memory to put this msg */ nixlf = otx2_mbox_alloc_msg_nix_lf_alloc(&pfvf->mbox); @@ -1061,7 +1061,7 @@ int otx2_config_nix(struct otx2_nic *pfvf) nixlf->cq_cnt = pfvf->qset.cq_cnt; nixlf->rss_sz = MAX_RSS_INDIR_TBL_SIZE; nixlf->rss_grps = MAX_RSS_GROUPS; - nixlf->xqe_sz = NIX_XQESZ_W16; + nixlf->xqe_sz = pfvf->hw.xqe_size == 128 ? NIX_XQESZ_W16 : NIX_XQESZ_W64; /* We don't know absolute NPA LF idx attached. * AF will replace 'RVU_DEFAULT_PF_FUNC' with * NPA LF attached to this RVU PF/VF. diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 7724f17ec31f..c587c14ac2a3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -17,6 +17,7 @@ #include <linux/soc/marvell/octeontx2/asm.h> #include <net/pkt_cls.h> #include <net/devlink.h> +#include <linux/time64.h> #include <mbox.h> #include <npc.h> @@ -180,6 +181,7 @@ struct otx2_hw { #define OTX2_DEFAULT_RBUF_LEN 2048 u16 rbuf_len; + u32 xqe_size; /* NPA */ u32 stack_pg_ptrs; /* No of ptrs per stack page */ @@ -275,6 +277,8 @@ struct otx2_ptp { u64 thresh; struct ptp_pin_desc extts_config; + u64 (*convert_rx_ptp_tstmp)(u64 timestamp); + u64 (*convert_tx_ptp_tstmp)(u64 timestamp); }; #define OTX2_HW_TIMESTAMP_LEN 8 diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index abe5267210ef..fc328de5345e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -372,6 +372,7 @@ static void otx2_get_ringparam(struct net_device *netdev, ring->tx_max_pending = Q_COUNT(Q_SIZE_MAX); ring->tx_pending = qs->sqe_cnt ? qs->sqe_cnt : Q_COUNT(Q_SIZE_4K); kernel_ring->rx_buf_len = pfvf->hw.rbuf_len; + kernel_ring->cqe_size = pfvf->hw.xqe_size; } static int otx2_set_ringparam(struct net_device *netdev, @@ -382,6 +383,7 @@ static int otx2_set_ringparam(struct net_device *netdev, struct otx2_nic *pfvf = netdev_priv(netdev); u32 rx_buf_len = kernel_ring->rx_buf_len; u32 old_rx_buf_len = pfvf->hw.rbuf_len; + u32 xqe_size = kernel_ring->cqe_size; bool if_up = netif_running(netdev); struct otx2_qset *qs = &pfvf->qset; u32 rx_count, tx_count; @@ -398,6 +400,12 @@ static int otx2_set_ringparam(struct net_device *netdev, return -EINVAL; } + if (xqe_size != 128 && xqe_size != 512) { + netdev_err(netdev, + "Completion event size must be 128 or 512"); + return -EINVAL; + } + /* Permitted lengths are 16 64 256 1K 4K 16K 64K 256K 1M */ rx_count = ring->rx_pending; /* On some silicon variants a skid or reserved CQEs are @@ -416,7 +424,7 @@ static int otx2_set_ringparam(struct net_device *netdev, tx_count = Q_COUNT(Q_SIZE(tx_count, 3)); if (tx_count == qs->sqe_cnt && rx_count == qs->rqe_cnt && - rx_buf_len == old_rx_buf_len) + rx_buf_len == old_rx_buf_len && xqe_size == pfvf->hw.xqe_size) return 0; if (if_up) @@ -427,6 +435,7 @@ static int otx2_set_ringparam(struct net_device *netdev, qs->rqe_cnt = rx_count; pfvf->hw.rbuf_len = rx_buf_len; + pfvf->hw.xqe_size = xqe_size; if (if_up) return netdev->netdev_ops->ndo_open(netdev); @@ -1222,7 +1231,8 @@ end: static const struct ethtool_ops otx2_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, - .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN, + .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN | + ETHTOOL_RING_USE_CQE_SIZE, .get_link = otx2_get_link, .get_drvinfo = otx2_get_drvinfo, .get_strings = otx2_get_strings, @@ -1342,7 +1352,8 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev, static const struct ethtool_ops otx2vf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, - .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN, + .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN | + ETHTOOL_RING_USE_CQE_SIZE, .get_link = otx2_get_link, .get_drvinfo = otx2vf_get_drvinfo, .get_strings = otx2vf_get_strings, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a5369167ab54..441aafc26a08 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2585,6 +2585,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->tot_tx_queues = qcount; hw->max_queues = qcount; hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; + /* Use CQE of 128 byte descriptor size by default */ + hw->xqe_size = 128; num_vec = pci_msix_vec_count(pdev); hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c index 61c20907315f..fdc2c9315b91 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c @@ -294,6 +294,14 @@ int otx2_ptp_init(struct otx2_nic *pfvf) goto error; } + if (is_dev_otx2(pfvf->pdev)) { + ptp_ptr->convert_rx_ptp_tstmp = &otx2_ptp_convert_rx_timestamp; + ptp_ptr->convert_tx_ptp_tstmp = &otx2_ptp_convert_tx_timestamp; + } else { + ptp_ptr->convert_rx_ptp_tstmp = &cn10k_ptp_convert_timestamp; + ptp_ptr->convert_tx_ptp_tstmp = &cn10k_ptp_convert_timestamp; + } + pfvf->ptp = ptp_ptr; error: diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h index 6ff284211d7b..7ff41927ceaf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h @@ -8,6 +8,21 @@ #ifndef OTX2_PTP_H #define OTX2_PTP_H +static inline u64 otx2_ptp_convert_rx_timestamp(u64 timestamp) +{ + return be64_to_cpu(*(__be64 *)×tamp); +} + +static inline u64 otx2_ptp_convert_tx_timestamp(u64 timestamp) +{ + return timestamp; +} + +static inline u64 cn10k_ptp_convert_timestamp(u64 timestamp) +{ + return ((timestamp >> 32) * NSEC_PER_SEC) + (timestamp & 0xFFFFFFFFUL); +} + int otx2_ptp_init(struct otx2_nic *pfvf); void otx2_ptp_destroy(struct otx2_nic *pfvf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 0593106d7161..28b19945d716 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -190,6 +190,40 @@ static int otx2_tc_validate_flow(struct otx2_nic *nic, return 0; } +static int otx2_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, struct tc_cls_matchall_offload *cls) { @@ -212,6 +246,10 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, entry = &cls->rule->action.entries[0]; switch (entry->id) { case FLOW_ACTION_POLICE: + err = otx2_policer_validate(&cls->rule->action, entry, extack); + if (err) + return err; + if (entry->police.rate_pkt_ps) { NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); return -EOPNOTSUPP; @@ -315,6 +353,7 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic, u8 nr_police = 0; bool pps = false; u64 rate; + int err; int i; if (!flow_action_has_entries(flow_action)) { @@ -355,6 +394,10 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic, return -EOPNOTSUPP; } + err = otx2_policer_validate(flow_action, act, extack); + if (err) + return err; + if (act->police.rate_bytes_ps > 0) { rate = act->police.rate_bytes_ps * 8; burst = act->police.burst; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 7c4068c5d1ac..c26de15b2ac3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -148,6 +148,7 @@ static void otx2_snd_pkt_handler(struct otx2_nic *pfvf, if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) { timestamp = ((u64 *)sq->timestamps->base)[snd_comp->sqe_id]; if (timestamp != 1) { + timestamp = pfvf->ptp->convert_tx_ptp_tstmp(timestamp); err = otx2_ptp_tstamp2time(pfvf, timestamp, &tsns); if (!err) { memset(&ts, 0, sizeof(ts)); @@ -167,14 +168,15 @@ static void otx2_snd_pkt_handler(struct otx2_nic *pfvf, static void otx2_set_rxtstamp(struct otx2_nic *pfvf, struct sk_buff *skb, void *data) { - u64 tsns; + u64 timestamp, tsns; int err; if (!(pfvf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED)) return; + timestamp = pfvf->ptp->convert_rx_ptp_tstmp(*(u64 *)data); /* The first 8 bytes is the timestamp */ - err = otx2_ptp_tstamp2time(pfvf, be64_to_cpu(*(__be64 *)data), &tsns); + err = otx2_ptp_tstamp2time(pfvf, timestamp, &tsns); if (err) return; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index a232e202f6a4..9e87836ed8bf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -572,6 +572,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->max_queues = qcount; hw->tot_tx_queues = qcount; hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; + /* Use CQE of 128 byte descriptor size by default */ + hw->xqe_size = 128; hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE, GFP_KERNEL); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_acl.c b/drivers/net/ethernet/marvell/prestera/prestera_acl.c index 06303e31b32a..e4af8a503277 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_acl.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_acl.c @@ -97,7 +97,7 @@ int prestera_acl_chain_to_client(u32 chain_index, u32 *client) PRESTERA_HW_COUNTER_CLIENT_LOOKUP_2 }; - if (chain_index > ARRAY_SIZE(client_map)) + if (chain_index >= ARRAY_SIZE(client_map)) return -EINVAL; *client = client_map[chain_index]; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c index 580fb986496a..921959a980ee 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c @@ -12,18 +12,21 @@ struct prestera_flower_template { u32 chain_index; }; +static void +prestera_flower_template_free(struct prestera_flower_template *template) +{ + prestera_acl_ruleset_put(template->ruleset); + list_del(&template->list); + kfree(template); +} + void prestera_flower_template_cleanup(struct prestera_flow_block *block) { - struct prestera_flower_template *template; - struct list_head *pos, *n; + struct prestera_flower_template *template, *tmp; /* put the reference to all rulesets kept in tmpl create */ - list_for_each_safe(pos, n, &block->template_list) { - template = list_entry(pos, typeof(*template), list); - prestera_acl_ruleset_put(template->ruleset); - list_del(&template->list); - kfree(template); - } + list_for_each_entry_safe(template, tmp, &block->template_list, list) + prestera_flower_template_free(template); } static int @@ -423,7 +426,14 @@ err_malloc: void prestera_flower_tmplt_destroy(struct prestera_flow_block *block, struct flow_cls_offload *f) { - prestera_flower_template_cleanup(block); + struct prestera_flower_template *template, *tmp; + + list_for_each_entry_safe(template, tmp, &block->template_list, list) + if (template->chain_index == f->common.chain_index) { + /* put the reference to the ruleset kept in create */ + prestera_flower_template_free(template); + return; + } } int prestera_flower_stats(struct prestera_flow_block *block, diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 54ebda61bfea..6c5618cf4f08 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -68,7 +68,7 @@ prestera_kern_fib_cache_find(struct prestera_switch *sw, fib_cache = rhashtable_lookup_fast(&sw->router->kern_fib_cache_ht, key, __prestera_kern_fib_cache_ht_params); - return IS_ERR(fib_cache) ? NULL : fib_cache; + return fib_cache; } static void diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c index d62adb970dd5..5b0cf3be9a9e 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c @@ -239,7 +239,7 @@ prestera_fib_node_find(struct prestera_switch *sw, struct prestera_fib_key *key) fib_node = rhashtable_lookup_fast(&sw->router->fib_ht, key, __prestera_fib_ht_params); - return IS_ERR(fib_node) ? NULL : fib_node; + return fib_node; } static void __prestera_fib_node_destruct(struct prestera_switch *sw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 17fe05809653..823d5808d5a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -190,10 +190,10 @@ static int verify_block_sig(struct mlx5_cmd_prot_block *block) int xor_len = sizeof(*block) - sizeof(block->data) - 1; if (xor8_buf(block, rsvd0_off, xor_len) != 0xff) - return -EINVAL; + return -EHWPOISON; if (xor8_buf(block, 0, sizeof(*block)) != 0xff) - return -EINVAL; + return -EHWPOISON; return 0; } @@ -259,12 +259,12 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent) sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay)); if (sig != 0xff) - return -EINVAL; + return -EHWPOISON; for (i = 0; i < n && next; i++) { err = verify_block_sig(next->buf); if (err) - return err; + return -EHWPOISON; next = next->next; } @@ -479,7 +479,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_ALLOC_SF: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; - return -EIO; + return -ENOLINK; default: mlx5_core_err(dev, "Unknown FW command (%d)\n", op); return -EINVAL; @@ -760,44 +760,72 @@ struct mlx5_ifc_mbox_in_bits { u8 reserved_at_40[0x40]; }; -void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome) +void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) { - *status = MLX5_GET(mbox_out, out, status); - *syndrome = MLX5_GET(mbox_out, out, syndrome); + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + mlx5_core_err_rl(dev, + "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", + mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, cmd_status_to_err(status)); } +EXPORT_SYMBOL(mlx5_cmd_out_err); -static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out) +static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) { + u16 opcode, op_mod; u32 syndrome; u8 status; - u16 opcode; - u16 op_mod; u16 uid; + int err; - mlx5_cmd_mbox_status(out, &status, &syndrome); - if (!status) - return 0; + syndrome = MLX5_GET(mbox_out, out, syndrome); + status = MLX5_GET(mbox_out, out, status); opcode = MLX5_GET(mbox_in, in, opcode); op_mod = MLX5_GET(mbox_in, in, op_mod); uid = MLX5_GET(mbox_in, in, uid); + err = cmd_status_to_err(status); + if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) - mlx5_core_err_rl(dev, - "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n", - mlx5_command_str(opcode), opcode, op_mod, - cmd_status_str(status), status, syndrome); + mlx5_cmd_out_err(dev, opcode, op_mod, out); else mlx5_core_dbg(dev, - "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n", - mlx5_command_str(opcode), - opcode, op_mod, - cmd_status_str(status), - status, - syndrome); + "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", + mlx5_command_str(opcode), opcode, op_mod, uid, + cmd_status_str(status), status, syndrome, err); +} + +int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) +{ + /* aborted due to PCI error or via reset flow mlx5_cmd_trigger_completions() */ + if (err == -ENXIO) { + u16 opcode = MLX5_GET(mbox_in, in, opcode); + u32 syndrome; + u8 status; + + /* PCI Error, emulate command return status, for smooth reset */ + err = mlx5_internal_err_ret_value(dev, opcode, &syndrome, &status); + MLX5_SET(mbox_out, out, status, status); + MLX5_SET(mbox_out, out, syndrome, syndrome); + if (!err) + return 0; + } + + /* driver or FW delivery error */ + if (err != -EREMOTEIO && err) + return err; + + /* check outbox status */ + err = cmd_status_to_err(MLX5_GET(mbox_out, out, status)); + if (err) + cmd_status_print(dev, in, out); - return cmd_status_to_err(status); + return err; } +EXPORT_SYMBOL(mlx5_cmd_check); static void dump_command(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent, int input) @@ -980,13 +1008,7 @@ static void cmd_work_handler(struct work_struct *work) /* Skip sending command to fw if internal error */ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { - u8 status = 0; - u32 drv_synd; - - ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status); - MLX5_SET(mbox_out, ent->out, status, status); - MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); - + ent->ret = -ENXIO; mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); return; } @@ -1005,6 +1027,31 @@ static void cmd_work_handler(struct work_struct *work) } } +static int deliv_status_to_err(u8 status) +{ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + case MLX5_DRIVER_STATUS_ABORTED: + return 0; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return -EBADR; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return -EFAULT; /* Bad address */ + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return -ENOMSG; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return -EIO; + default: + return -EINVAL; + } +} + static const char *deliv_status_to_str(u8 status) { switch (status) { @@ -1101,16 +1148,27 @@ out_err: /* Notes: * 1. Callback functions may not sleep * 2. page queue commands do not support asynchrous completion + * + * return value in case (!callback): + * ret < 0 : Command execution couldn't be submitted by driver + * ret > 0 : Command execution couldn't be performed by firmware + * ret == 0: Command was executed by FW, Caller must check FW outbox status. + * + * return value in case (callback): + * ret < 0 : Command execution couldn't be submitted by driver + * ret == 0: Command will be submitted to FW for execution + * and the callback will be called for further status updates */ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, void *uout, int uout_size, mlx5_cmd_cbk_t callback, - void *context, int page_queue, u8 *status, + void *context, int page_queue, u8 token, bool force_polling) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; struct mlx5_cmd_stats *stats; + u8 status = 0; int err = 0; s64 ds; u16 op; @@ -1141,12 +1199,12 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, cmd_work_handler(&ent->work); } else if (!queue_work(cmd->wq, &ent->work)) { mlx5_core_warn(dev, "failed to queue work\n"); - err = -ENOMEM; + err = -EALREADY; goto out_free; } if (callback) - goto out; /* mlx5_cmd_comp_handler() will put(ent) */ + return 0; /* mlx5_cmd_comp_handler() will put(ent) */ err = wait_func(dev, ent); if (err == -ETIMEDOUT || err == -ECANCELED) @@ -1164,12 +1222,11 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, "fw exec time for %s is %lld nsec\n", mlx5_command_str(op), ds); - *status = ent->status; out_free: + status = ent->status; cmd_ent_put(ent); -out: - return err; + return err ? : status; } static ssize_t dbg_write(struct file *filp, const char __user *buf, @@ -1612,15 +1669,15 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force ent->ts2 = ktime_get_ns(); memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); dump_command(dev, ent, 0); - if (!ent->ret) { + + if (vec & MLX5_TRIGGERED_CMD_COMP) + ent->ret = -ENXIO; + + if (!ent->ret) { /* Command completed by FW */ if (!cmd->checksum_disabled) ent->ret = verify_signature(ent); - else - ent->ret = 0; - if (vec & MLX5_TRIGGERED_CMD_COMP) - ent->status = MLX5_DRIVER_STATUS_ABORTED; - else - ent->status = ent->lay->status_own >> 1; + + ent->status = ent->lay->status_own >> 1; mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", ent->ret, deliv_status_to_str(ent->status), ent->status); @@ -1638,21 +1695,18 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force callback = ent->callback; context = ent->context; - err = ent->ret; - if (!err) { + err = ent->ret ? : ent->status; + if (err > 0) /* Failed in FW, command didn't execute */ + err = deliv_status_to_err(err); + + if (!err) err = mlx5_copy_from_msg(ent->uout, ent->out, ent->uout_size); - err = err ? err : mlx5_cmd_check(dev, - ent->in->first.data, - ent->uout); - } - mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); - err = err ? err : ent->status; /* final consumer is done, release ent */ cmd_ent_put(ent); callback(err, context); @@ -1719,31 +1773,6 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) up(&cmd->sem); } -static int status_to_err(u8 status) -{ - switch (status) { - case MLX5_CMD_DELIVERY_STAT_OK: - case MLX5_DRIVER_STATUS_ABORTED: - return 0; - case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: - case MLX5_CMD_DELIVERY_STAT_TOK_ERR: - return -EBADR; - case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: - case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: - case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: - return -EFAULT; /* Bad address */ - case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: - case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: - case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: - case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: - return -ENOMSG; - case MLX5_CMD_DELIVERY_STAT_FW_ERR: - return -EIO; - default: - return -EINVAL; - } -} - static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, gfp_t gfp) { @@ -1787,27 +1816,23 @@ static int is_manage_pages(void *in) return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES; } +/* Notes: + * 1. Callback functions may not sleep + * 2. Page queue commands do not support asynchrous completion + */ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context, bool force_polling) { - struct mlx5_cmd_msg *inb; - struct mlx5_cmd_msg *outb; + u16 opcode = MLX5_GET(mbox_in, in, opcode); + struct mlx5_cmd_msg *inb, *outb; int pages_queue; gfp_t gfp; - int err; - u8 status = 0; - u32 drv_synd; - u16 opcode; u8 token; + int err; - opcode = MLX5_GET(mbox_in, in, opcode); - if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) { - err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); - MLX5_SET(mbox_out, out, status, status); - MLX5_SET(mbox_out, out, syndrome, drv_synd); - return err; - } + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) + return -ENXIO; pages_queue = is_manage_pages(in); gfp = callback ? GFP_ATOMIC : GFP_KERNEL; @@ -1833,39 +1858,108 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, - pages_queue, &status, token, force_polling); - if (err) - goto out_out; + pages_queue, token, force_polling); + if (callback) + return err; - mlx5_core_dbg(dev, "err %d, status %d\n", err, status); - if (status) { - err = status_to_err(status); - goto out_out; - } + if (err > 0) /* Failed in FW, command didn't execute */ + err = deliv_status_to_err(err); - if (!callback) - err = mlx5_copy_from_msg(out, outb, out_size); + if (err) + goto out_out; + /* command completed by FW */ + err = mlx5_copy_from_msg(out, outb, out_size); out_out: - if (!callback) - mlx5_free_cmd_msg(dev, outb); - + mlx5_free_cmd_msg(dev, outb); out_in: - if (!callback) - free_msg(dev, inb); + free_msg(dev, inb); return err; } +/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ +static int cmd_status_err(int err, void *out) +{ + if (err) /* -EREMOTEIO is preserved */ + return err == -EREMOTEIO ? -EIO : err; + + if (MLX5_GET(mbox_out, out, status) != MLX5_CMD_STAT_OK) + return -EREMOTEIO; + + return 0; +} + +/** + * mlx5_cmd_do - Executes a fw command, wait for completion. + * Unlike mlx5_cmd_exec, this function will not translate or intercept + * outbox.status and will return -EREMOTEIO when + * outbox.status != MLX5_CMD_STAT_OK + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: + * -EREMOTEIO : Command executed by FW, outbox.status != MLX5_CMD_STAT_OK. + * Caller must check FW outbox status. + * 0 : Command execution successful, outbox.status == MLX5_CMD_STAT_OK. + * < 0 : Command execution couldn't be performed by firmware or driver + */ +int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) +{ + int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); + + return cmd_status_err(err, out); +} +EXPORT_SYMBOL(mlx5_cmd_do); + +/** + * mlx5_cmd_exec - Executes a fw command, wait for completion + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: 0 if no error, FW command execution was successful + * and outbox status is ok. + */ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) { - int err; + int err = mlx5_cmd_do(dev, in, in_size, out, out_size); - err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); - return err ? : mlx5_cmd_check(dev, in, out); + return mlx5_cmd_check(dev, err, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec); +/** + * mlx5_cmd_exec_polling - Executes a fw command, poll for completion + * Needed for driver force teardown, when command completion EQ + * will not be available to complete the command + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: 0 if no error, FW command execution was successful + * and outbox status is ok. + */ +int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size) +{ + int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); + + err = cmd_status_err(err, out); + return mlx5_cmd_check(dev, err, in, out); +} +EXPORT_SYMBOL(mlx5_cmd_exec_polling); + void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, struct mlx5_async_ctx *ctx) { @@ -1894,8 +1988,10 @@ EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); static void mlx5_cmd_exec_cb_handler(int status, void *_work) { struct mlx5_async_work *work = _work; - struct mlx5_async_ctx *ctx = work->ctx; + struct mlx5_async_ctx *ctx; + ctx = work->ctx; + status = cmd_status_err(status, work->out); work->user_callback(status, work); if (atomic_dec_and_test(&ctx->num_inflight)) wake_up(&ctx->wait); @@ -1909,6 +2005,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->ctx = ctx; work->user_callback = callback; + work->out = out; if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) return -EIO; ret = cmd_exec(ctx->dev, in, in_size, out, out_size, @@ -1920,17 +2017,6 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, } EXPORT_SYMBOL(mlx5_cmd_exec_cb); -int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size) -{ - int err; - - err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); - - return err ? : mlx5_cmd_check(dev, in, out); -} -EXPORT_SYMBOL(mlx5_cmd_exec_polling); - static void destroy_msg_cache(struct mlx5_core_dev *dev) { struct cmd_msg_cache *ch; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 5371ad0a12eb..15a74966be7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -86,8 +86,9 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, spin_unlock_irqrestore(&tasklet_ctx->lock, flags); } -int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - u32 *in, int inlen, u32 *out, int outlen) +/* Callers must verify outbox status in case of err */ +int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen) { int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn_or_apu_element); @@ -101,7 +102,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, memset(out, 0, outlen); MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + err = mlx5_cmd_do(dev, in, inlen, out, outlen); if (err) return err; @@ -148,6 +149,16 @@ err_cmd: mlx5_cmd_exec_in(dev, destroy_cq, din); return err; } +EXPORT_SYMBOL(mlx5_create_cq); + +/* oubox is checked and err val is normalized */ +int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen) +{ + int err = mlx5_create_cq(dev, cq, in, inlen, out, outlen); + + return mlx5_cmd_check(dev, err, in, out); +} EXPORT_SYMBOL(mlx5_core_create_cq); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index d1093bb2d436..057dde6f4417 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -100,15 +100,11 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli } net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE); - err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive); + err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive, extack); if (err) - goto out; + return err; - err = mlx5_fw_reset_wait_reset_done(dev); -out: - if (err) - NL_SET_ERR_MSG_MOD(extack, "FW activate command failed"); - return err; + return mlx5_fw_reset_wait_reset_done(dev); } static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index 519fa1056d9f..94a7cf38d6b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -18,11 +18,13 @@ struct mlx5e_tc_act_parse_state { struct netlink_ext_ack *extack; u32 actions; bool ct; + bool ct_clear; bool encap; bool decap; bool mpls_push; bool ptype_host; const struct ip_tunnel_info *tun_info; + struct mlx5e_mpls_info mpls_info; int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; int if_count; struct mlx5_tc_ct_priv *ct_priv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index 7368f95f2310..b9d38fe807df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -31,6 +31,10 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; int err; + /* It's redundant to do ct clear more than once. */ + if (clear_action && parse_state->ct_clear) + return 0; + err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr, &attr->parse_attr->mod_hdr_acts, act, parse_state->extack); @@ -46,6 +50,7 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, flow_flag_set(parse_state->flow, CT); parse_state->ct = true; } + parse_state->ct_clear = clear_action; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index d08abec93704..05a42fb4ba97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -178,6 +178,12 @@ parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state, return -ENOMEM; parse_state->encap = false; + + if (parse_state->mpls_push) { + memcpy(&parse_attr->mpls_info[esw_attr->out_count], + &parse_state->mpls_info, sizeof(parse_state->mpls_info)); + parse_state->mpls_push = false; + } esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; esw_attr->out_count++; /* attr->dests[].rep is resolved when we handle encap */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c index 40332949509a..96a80e03d129 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c @@ -23,6 +23,16 @@ tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, return true; } +static void +copy_mpls_info(struct mlx5e_mpls_info *mpls_info, + const struct flow_action_entry *act) +{ + mpls_info->label = act->mpls_push.label; + mpls_info->tc = act->mpls_push.tc; + mpls_info->bos = act->mpls_push.bos; + mpls_info->ttl = act->mpls_push.ttl; +} + static int tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -30,6 +40,7 @@ tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5_flow_attr *attr) { parse_state->mpls_push = true; + copy_mpls_info(&parse_state->mpls_info, act); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index f76624699a8d..03c953dacb09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -35,6 +35,7 @@ enum { struct mlx5e_tc_flow_parse_attr { const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 171bc6b36aa4..5105c8018d37 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -768,6 +768,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; const struct ip_tunnel_info *tun_info; + const struct mlx5e_mpls_info *mpls_info; unsigned long tbl_time_before = 0; struct mlx5e_encap_entry *e; struct mlx5e_encap_key key; @@ -778,6 +779,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; tun_info = parse_attr->tun_info[out_index]; + mpls_info = &parse_attr->mpls_info[out_index]; family = ip_tunnel_info_af(tun_info); key.ip_tun_key = &tun_info->key; key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); @@ -828,6 +830,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, goto out_err_init; } e->tun_info = tun_info; + memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info)); err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); if (err) goto out_err_init; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c index 60952b33b568..c5b1617d556f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c @@ -30,16 +30,15 @@ static int generate_ip_tun_hdr(char buf[], struct mlx5e_encap_entry *r) { const struct ip_tunnel_key *tun_key = &r->tun_info->key; + const struct mlx5e_mpls_info *mpls_info = &r->mpls_info; struct udphdr *udp = (struct udphdr *)(buf); struct mpls_shim_hdr *mpls; - u32 tun_id; - tun_id = be32_to_cpu(tunnel_id_to_key32(tun_key->tun_id)); mpls = (struct mpls_shim_hdr *)(udp + 1); *ip_proto = IPPROTO_UDP; udp->dest = tun_key->tp_dst; - *mpls = mpls_entry_encode(tun_id, tun_key->ttl, tun_key->tos, true); + *mpls = mpls_entry_encode(mpls_info->label, mpls_info->ttl, mpls_info->tc, mpls_info->bos); return 0; } @@ -60,37 +59,31 @@ static int parse_tunnel(struct mlx5e_priv *priv, void *headers_v) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); - struct flow_match_enc_keyid enc_keyid; struct flow_match_mpls match; void *misc2_c; void *misc2_v; - misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters_2); - misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters_2); - - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) - return 0; - - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) - return 0; - - flow_rule_match_enc_keyid(rule, &enc_keyid); - - if (!enc_keyid.mask->keyid) - return 0; - if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) && !(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP)) return -EOPNOTSUPP; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return -EOPNOTSUPP; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) + return 0; + flow_rule_match_mpls(rule, &match); /* Only support matching the first LSE */ if (match.mask->used_lses != 1) return -EOPNOTSUPP; + misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2_c, outer_first_mpls_over_udp.mpls_label, match.mask->ls[0].mpls_label); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index d964665eaa63..62cde3e87c2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -139,15 +139,6 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, return true; } -static inline bool mlx5e_accel_tx_is_ipsec_flow(struct mlx5e_accel_tx_state *state) -{ -#ifdef CONFIG_MLX5_EN_IPSEC - return mlx5e_ipsec_is_tx_flow(&state->ipsec); -#else - return false; -#endif -} - static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, struct mlx5e_accel_tx_state *state) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 57d755db1cf5..6e80585d731f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1792,7 +1792,7 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, if (size_read < 0) { netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", __func__, size_read); - return 0; + return size_read; } i += size_read; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 0b619c7846d0..adf5cc6a7b8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -179,6 +179,13 @@ struct mlx5e_decap_entry { struct rcu_head rcu; }; +struct mlx5e_mpls_info { + u32 label; + u8 tc; + u8 bos; + u8 ttl; +}; + struct mlx5e_encap_entry { /* attached neigh hash entry */ struct mlx5e_neigh_hash_entry *nhe; @@ -192,6 +199,7 @@ struct mlx5e_encap_entry { struct list_head route_list; struct mlx5_pkt_reformat *pkt_reformat; const struct ip_tunnel_info *tun_info; + struct mlx5e_mpls_info mpls_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 4cb7c7135b6a..074a44b281b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1348,7 +1348,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, } /* True when explicitly set via priv flag, or XDP prog is loaded */ - if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) || + get_cqe_tls_offload(cqe)) goto csum_unnecessary; /* CQE csum doesn't cover padding octets in short ethernet diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 8c9163d2c646..08a75654f5f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -334,6 +334,7 @@ void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, netdev_info(ndev, "\t[%d] %s start..\n", i, st.name); buf[count] = st.st_func(priv); netdev_info(ndev, "\t[%d] %s end: result(%lld)\n", i, st.name, buf[count]); + count++; } mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 3e5d8c788026..336e4d04c5f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -37,6 +37,10 @@ #include "en/ptp.h" #include "en/port.h" +#ifdef CONFIG_PAGE_POOL_STATS +#include <net/page_pool.h> +#endif + static unsigned int stats_grps_num(struct mlx5e_priv *priv) { return !priv->profile->stats_grps_num ? 0 : @@ -183,6 +187,19 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, +#ifdef CONFIG_PAGE_POOL_STATS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_fast) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow_high_order) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_refill) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_waive) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cached) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_released_ref) }, +#endif #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) }, @@ -349,6 +366,19 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, s->rx_congst_umr += rq_stats->congst_umr; s->rx_arfs_err += rq_stats->arfs_err; s->rx_recover += rq_stats->recover; +#ifdef CONFIG_PAGE_POOL_STATS + s->rx_pp_alloc_fast += rq_stats->pp_alloc_fast; + s->rx_pp_alloc_slow += rq_stats->pp_alloc_slow; + s->rx_pp_alloc_empty += rq_stats->pp_alloc_empty; + s->rx_pp_alloc_refill += rq_stats->pp_alloc_refill; + s->rx_pp_alloc_waive += rq_stats->pp_alloc_waive; + s->rx_pp_alloc_slow_high_order += rq_stats->pp_alloc_slow_high_order; + s->rx_pp_recycle_cached += rq_stats->pp_recycle_cached; + s->rx_pp_recycle_cache_full += rq_stats->pp_recycle_cache_full; + s->rx_pp_recycle_ring += rq_stats->pp_recycle_ring; + s->rx_pp_recycle_ring_full += rq_stats->pp_recycle_ring_full; + s->rx_pp_recycle_released_ref += rq_stats->pp_recycle_released_ref; +#endif #ifdef CONFIG_MLX5_EN_TLS s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets; s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes; @@ -455,6 +485,35 @@ static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv, } } +#ifdef CONFIG_PAGE_POOL_STATS +static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) +{ + struct mlx5e_rq_stats *rq_stats = c->rq.stats; + struct page_pool *pool = c->rq.page_pool; + struct page_pool_stats stats = { 0 }; + + if (!page_pool_get_stats(pool, &stats)) + return; + + rq_stats->pp_alloc_fast = stats.alloc_stats.fast; + rq_stats->pp_alloc_slow = stats.alloc_stats.slow; + rq_stats->pp_alloc_slow_high_order = stats.alloc_stats.slow_high_order; + rq_stats->pp_alloc_empty = stats.alloc_stats.empty; + rq_stats->pp_alloc_waive = stats.alloc_stats.waive; + rq_stats->pp_alloc_refill = stats.alloc_stats.refill; + + rq_stats->pp_recycle_cached = stats.recycle_stats.cached; + rq_stats->pp_recycle_cache_full = stats.recycle_stats.cache_full; + rq_stats->pp_recycle_ring = stats.recycle_stats.ring; + rq_stats->pp_recycle_ring_full = stats.recycle_stats.ring_full; + rq_stats->pp_recycle_released_ref = stats.recycle_stats.released_refcnt; +} +#else +static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) +{ +} +#endif + static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -465,8 +524,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) for (i = 0; i < priv->stats_nch; i++) { struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; + int j; + mlx5e_stats_update_stats_rq_page_pool(priv->channels.c[i]); + mlx5e_stats_grp_sw_update_stats_rq_stats(s, &channel_stats->rq); mlx5e_stats_grp_sw_update_stats_xdpsq(s, &channel_stats->rq_xdpsq); mlx5e_stats_grp_sw_update_stats_ch_stats(s, &channel_stats->ch); @@ -1254,9 +1316,6 @@ static void fec_set_corrected_bits_total(struct mlx5e_priv *priv, u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - return; - MLX5_SET(ppcnt_reg, in, local_port, 1); MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); if (mlx5_core_access_reg(mdev, in, sz, ppcnt_phy_statistical, @@ -1272,6 +1331,9 @@ static void fec_set_corrected_bits_total(struct mlx5e_priv *priv, void mlx5e_stats_fec_get(struct mlx5e_priv *priv, struct ethtool_fec_stats *fec_stats) { + if (!MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) + return; + fec_set_corrected_bits_total(priv, fec_stats); fec_set_block_stats(priv, fec_stats); } @@ -1887,6 +1949,19 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, +#ifdef CONFIG_PAGE_POOL_STATS + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_fast) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow_high_order) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_refill) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_waive) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cached) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_released_ref) }, +#endif #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 14eaf923e7b3..a7a025d15c14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -205,7 +205,19 @@ struct mlx5e_sw_stats { u64 ch_aff_change; u64 ch_force_irq; u64 ch_eq_rearm; - +#ifdef CONFIG_PAGE_POOL_STATS + u64 rx_pp_alloc_fast; + u64 rx_pp_alloc_slow; + u64 rx_pp_alloc_slow_high_order; + u64 rx_pp_alloc_empty; + u64 rx_pp_alloc_refill; + u64 rx_pp_alloc_waive; + u64 rx_pp_recycle_cached; + u64 rx_pp_recycle_cache_full; + u64 rx_pp_recycle_ring; + u64 rx_pp_recycle_ring_full; + u64 rx_pp_recycle_released_ref; +#endif #ifdef CONFIG_MLX5_EN_TLS u64 tx_tls_encrypted_packets; u64 tx_tls_encrypted_bytes; @@ -352,6 +364,19 @@ struct mlx5e_rq_stats { u64 congst_umr; u64 arfs_err; u64 recover; +#ifdef CONFIG_PAGE_POOL_STATS + u64 pp_alloc_fast; + u64 pp_alloc_slow; + u64 pp_alloc_slow_high_order; + u64 pp_alloc_empty; + u64 pp_alloc_refill; + u64 pp_alloc_waive; + u64 pp_recycle_cached; + u64 pp_recycle_cache_full; + u64 pp_recycle_ring; + u64 pp_recycle_ring_full; + u64 pp_recycle_released_ref; +#endif #ifdef CONFIG_MLX5_EN_TLS u64 tls_decrypted_packets; u64 tls_decrypted_bytes; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 76a015dfc5fc..40416e0a8bb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3316,6 +3316,18 @@ actions_match_supported(struct mlx5e_priv *priv, return false; } + if (!(~actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); + return false; + } + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); + return false; + } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, ct_flow, ct_clear, extack)) @@ -4470,6 +4482,46 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate, return err; } +static int mlx5e_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} + static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct netlink_ext_ack *extack) @@ -4497,10 +4549,10 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_POLICE: - if (act->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + err = mlx5e_policer_validate(flow_action, act, extack); + if (err) + return err; + err = apply_police_params(priv, act->police.rate_bytes_ps, extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c index 39e948bc1204..a994e71e05c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -92,6 +92,7 @@ static int esw_acl_ingress_mod_metadata_create(struct mlx5_eswitch *esw, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; + flow_act.fg = vport->ingress.offloads.metadata_allmatch_grp; vport->ingress.offloads.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0); @@ -117,6 +118,36 @@ static void esw_acl_ingress_mod_metadata_destroy(struct mlx5_eswitch *esw, vport->ingress.offloads.modify_metadata_rule = NULL; } +static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + int err = 0; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + flow_act.fg = vport->ingress.offloads.drop_grp; + flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } + + vport->ingress.offloads.drop_rule = flow_rule; +out: + return err; +} + +static void esw_acl_ingress_src_port_drop_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!vport->ingress.offloads.drop_rule) + return; + + mlx5_del_flow_rules(vport->ingress.offloads.drop_rule); + vport->ingress.offloads.drop_rule = NULL; +} + static int esw_acl_ingress_ofld_rules_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -154,6 +185,7 @@ static void esw_acl_ingress_ofld_rules_destroy(struct mlx5_eswitch *esw, { esw_acl_ingress_allow_rule_destroy(vport); esw_acl_ingress_mod_metadata_destroy(esw, vport); + esw_acl_ingress_src_port_drop_destroy(esw, vport); } static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw, @@ -170,10 +202,29 @@ static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw, if (!flow_group_in) return -ENOMEM; + if (vport->vport == MLX5_VPORT_UPLINK) { + /* This group can hold an FTE to drop all traffic. + * Need in case LAG is enabled. + */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, ret); + goto drop_err; + } + vport->ingress.offloads.drop_grp = g; + flow_index++; + } + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) { /* This group is to hold FTE to match untagged packets when prio_tag * is enabled. */ + memset(flow_group_in, 0, inlen); match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); MLX5_SET(create_flow_group_in, flow_group_in, @@ -221,6 +272,11 @@ metadata_err: vport->ingress.offloads.metadata_prio_tag_grp = NULL; } prio_tag_err: + if (!IS_ERR_OR_NULL(vport->ingress.offloads.drop_grp)) { + mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp); + vport->ingress.offloads.drop_grp = NULL; + } +drop_err: kvfree(flow_group_in); return ret; } @@ -236,6 +292,11 @@ static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport) mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); vport->ingress.offloads.metadata_prio_tag_grp = NULL; } + + if (vport->ingress.offloads.drop_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp); + vport->ingress.offloads.drop_grp = NULL; + } } int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, @@ -252,6 +313,8 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, if (mlx5_eswitch_vport_match_metadata_enabled(esw)) num_ftes++; + if (vport->vport == MLX5_VPORT_UPLINK) + num_ftes++; if (esw_acl_ingress_prio_tag_enabled(esw, vport)) num_ftes++; @@ -320,3 +383,27 @@ out: vport->metadata = vport->default_metadata; return err; } + +int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (IS_ERR(vport)) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return PTR_ERR(vport); + } + + return esw_acl_ingress_src_port_drop_create(esw, vport); +} + +void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (WARN_ON_ONCE(IS_ERR(vport))) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return; + } + + esw_acl_ingress_src_port_drop_destroy(esw, vport); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h index c57869b93d60..11d3d3978848 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h @@ -6,6 +6,7 @@ #include "eswitch.h" +#ifdef CONFIG_MLX5_ESWITCH /* Eswitch acl egress external APIs */ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport); @@ -25,5 +26,19 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vpor void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num, u32 metadata); +void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num); +#else /* CONFIG_MLX5_ESWITCH */ +static void +mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, + u16 vport_num) +{} + +static int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, + u16 vport_num) +{ + return 0; +} +#endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 11bbcd5f5b8b..694c54066955 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -697,7 +697,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo } int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - u32 min_rate, u32 max_rate) + u32 max_rate, u32 min_rate) { int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 44321cdfe928..973281bdb4a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -113,8 +113,11 @@ struct vport_ingress { * packet with metadata. */ struct mlx5_flow_group *metadata_allmatch_grp; + /* Optional group to add a drop all rule */ + struct mlx5_flow_group *drop_grp; struct mlx5_modify_hdr *modify_metadata; struct mlx5_flow_handle *modify_metadata_rule; + struct mlx5_flow_handle *drop_rule; } offloads; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 2b31d8bbd1b8..35cf4cb3098e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2379,60 +2379,6 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); } -static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master, - struct mlx5_core_dev *slave) -{ - u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; - u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; - struct mlx5_eswitch *esw; - struct mlx5_flow_root_namespace *root; - struct mlx5_flow_namespace *ns; - struct mlx5_vport *vport; - int err; - - MLX5_SET(set_flow_table_root_in, in, opcode, - MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); - MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL); - MLX5_SET(set_flow_table_root_in, in, other_vport, 1); - MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK); - - if (master) { - esw = master->priv.eswitch; - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1); - MLX5_SET(set_flow_table_root_in, in, table_vport_number, - MLX5_VPORT_UPLINK); - - ns = mlx5_get_flow_vport_acl_namespace(master, - MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport->index); - root = find_root(&ns->node); - mutex_lock(&root->chain_lock); - - MLX5_SET(set_flow_table_root_in, in, - table_eswitch_owner_vhca_id_valid, 1); - MLX5_SET(set_flow_table_root_in, in, - table_eswitch_owner_vhca_id, - MLX5_CAP_GEN(master, vhca_id)); - MLX5_SET(set_flow_table_root_in, in, table_id, - root->root_ft->id); - } else { - esw = slave->priv.eswitch; - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - ns = mlx5_get_flow_vport_acl_namespace(slave, - MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport->index); - root = find_root(&ns->node); - mutex_lock(&root->chain_lock); - MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id); - } - - err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); - mutex_unlock(&root->chain_lock); - - return err; -} - static int esw_set_slave_root_fdb(struct mlx5_core_dev *master, struct mlx5_core_dev *slave) { @@ -2614,15 +2560,10 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, { int err; - err = esw_set_uplink_slave_ingress_root(master_esw->dev, - slave_esw->dev); - if (err) - return -EINVAL; - err = esw_set_slave_root_fdb(master_esw->dev, slave_esw->dev); if (err) - goto err_fdb; + return err; err = esw_set_master_egress_rule(master_esw->dev, slave_esw->dev); @@ -2634,9 +2575,6 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, err_acl: esw_set_slave_root_fdb(NULL, slave_esw->dev); -err_fdb: - esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev); - return err; } @@ -2645,7 +2583,6 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, { esw_unset_master_egress_rule(master_esw->dev); esw_set_slave_root_fdb(NULL, slave_esw->dev); - esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev); } #define ESW_OFFLOADS_DEVCOM_PAIR (0) @@ -2839,13 +2776,22 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) return false; - if (mlx5_core_is_ecpf_esw_manager(esw->dev) || - mlx5_ecpf_vport_exists(esw->dev)) - return false; - return true; } +#define MLX5_ESW_METADATA_RSVD_UPLINK 1 + +/* Share the same metadata for uplink's. This is fine because: + * (a) In shared FDB mode (LAG) both uplink's are treated the + * same and tagged with the same metadata. + * (b) In non shared FDB mode, packets from physical port0 + * cannot hit eswitch of PF1 and vice versa. + */ +static u32 mlx5_esw_match_metadata_reserved(struct mlx5_eswitch *esw) +{ + return MLX5_ESW_METADATA_RSVD_UPLINK; +} + u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) { u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1; @@ -2860,8 +2806,10 @@ u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) return 0; /* Metadata is 4 bits of PFNUM and 12 bits of unique id */ - /* Use only non-zero vport_id (1-4095) for all PF's */ - id = ida_alloc_range(&esw->offloads.vport_metadata_ida, 1, vport_end_ida, GFP_KERNEL); + /* Use only non-zero vport_id (2-4095) for all PF's */ + id = ida_alloc_range(&esw->offloads.vport_metadata_ida, + MLX5_ESW_METADATA_RSVD_UPLINK + 1, + vport_end_ida, GFP_KERNEL); if (id < 0) return 0; id = (pf_num << ESW_VPORT_BITS) | id; @@ -2879,7 +2827,11 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata) static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - vport->default_metadata = mlx5_esw_match_metadata_alloc(esw); + if (vport->vport == MLX5_VPORT_UPLINK) + vport->default_metadata = mlx5_esw_match_metadata_reserved(esw); + else + vport->default_metadata = mlx5_esw_match_metadata_alloc(esw); + vport->metadata = vport->default_metadata; return vport->metadata ? 0 : -ENOSPC; } @@ -2890,6 +2842,9 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw, if (!vport->default_metadata) return; + if (vport->vport == MLX5_VPORT_UPLINK) + return; + WARN_ON(vport->metadata != vport->default_metadata); mlx5_esw_match_metadata_free(esw, vport->default_metadata); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 42f878e21fea..816d991f7621 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1696,6 +1696,7 @@ static void free_match_list(struct match_list *head, bool ft_locked) static int build_match_list(struct match_list *match_head, struct mlx5_flow_table *ft, const struct mlx5_flow_spec *spec, + struct mlx5_flow_group *fg, bool ft_locked) { struct rhlist_head *tmp, *list; @@ -1710,6 +1711,9 @@ static int build_match_list(struct match_list *match_head, rhl_for_each_entry_rcu(g, tmp, list, hash) { struct match_list *curr_match; + if (fg && fg != g) + continue; + if (unlikely(!tree_get_node(&g->node))) continue; @@ -1889,6 +1893,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (!check_valid_spec(spec)) return ERR_PTR(-EINVAL); + if (flow_act->fg && ft->autogroup.active) + return ERR_PTR(-EINVAL); + for (i = 0; i < dest_num; i++) { if (!dest_is_valid(&dest[i], flow_act, ft)) return ERR_PTR(-EINVAL); @@ -1898,7 +1905,7 @@ search_again_locked: version = atomic_read(&ft->node.version); /* Collect all fgs which has a matching match_criteria */ - err = build_match_list(&match_head, ft, spec, take_write); + err = build_match_list(&match_head, ft, spec, flow_act->fg, take_write); if (err) { if (take_write) up_write_ref_node(&ft->node, false); @@ -2074,6 +2081,8 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) fte->node.del_hw_func = NULL; up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); + } else { + up_write_ref_node(&fte->node, false); } kfree(handle); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 84dbe46d5ede..4aa22dce9b77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -57,7 +57,8 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level, return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1); } -static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) +static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, + u8 *reset_type, u8 *reset_state) { u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; @@ -71,25 +72,67 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *r *reset_level = MLX5_GET(mfrl_reg, out, reset_level); if (reset_type) *reset_type = MLX5_GET(mfrl_reg, out, reset_type); + if (reset_state) + *reset_state = MLX5_GET(mfrl_reg, out, reset_state); return 0; } int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) { - return mlx5_reg_mfrl_query(dev, reset_level, reset_type); + return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL); } -int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel) +static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev, + struct netlink_ext_ack *extack) +{ + u8 reset_state; + + if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state)) + goto out; + + switch (reset_state) { + case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION: + case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS: + NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered"); + return -EBUSY; + case MLX5_MFRL_REG_RESET_STATE_TIMEOUT: + NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout"); + return -ETIMEDOUT; + case MLX5_MFRL_REG_RESET_STATE_NACK: + NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset"); + return -EPERM; + } + +out: + NL_SET_ERR_MSG_MOD(extack, "Sync reset failed"); + return -EIO; +} + +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, + struct netlink_ext_ack *extack) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; int err; set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); - err = mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true); - if (err) - clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); - return err; + + MLX5_SET(mfrl_reg, in, reset_level, MLX5_MFRL_REG_RESET_LEVEL3); + MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel); + MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, 1); + err = mlx5_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MFRL, 0, 1, false); + if (!err) + return 0; + + clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state)) + return mlx5_fw_reset_get_reset_state_err(dev, extack); + + NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed"); + return mlx5_cmd_check(dev, err, in, out); } int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index 7761ee5fc7d0..694fc7cb2684 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -9,7 +9,8 @@ void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable); bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev); int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type); -int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel); +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, + struct netlink_ext_ack *extack); int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 4ddf6b330a44..6cad3b72c133 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -31,15 +31,22 @@ */ #include <linux/netdevice.h> +#include <net/bonding.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/eswitch.h> #include <linux/mlx5/vport.h> #include "lib/devcom.h" #include "mlx5_core.h" #include "eswitch.h" +#include "esw/acl/ofld.h" #include "lag.h" #include "mp.h" +enum { + MLX5_LAG_EGRESS_PORT_1 = 1, + MLX5_LAG_EGRESS_PORT_2, +}; + /* General purpose, use for short periods of time. * Beware of lock dependencies (preferably, no locks should be acquired * under it). @@ -193,15 +200,71 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled && tracker->netdev_state[MLX5_LAG_P2].link_up; - *port1 = 1; - *port2 = 2; + *port1 = MLX5_LAG_EGRESS_PORT_1; + *port2 = MLX5_LAG_EGRESS_PORT_2; if ((!p1en && !p2en) || (p1en && p2en)) return; if (p1en) - *port2 = 1; + *port2 = MLX5_LAG_EGRESS_PORT_1; + else + *port1 = MLX5_LAG_EGRESS_PORT_2; +} + +static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) +{ + return ldev->pf[MLX5_LAG_P1].has_drop || ldev->pf[MLX5_LAG_P2].has_drop; +} + +static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (!ldev->pf[i].has_drop) + continue; + + mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, + MLX5_VPORT_UPLINK); + ldev->pf[i].has_drop = false; + } +} + +static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + struct mlx5_core_dev *inactive; + u8 v2p_port1, v2p_port2; + int inactive_idx; + int err; + + /* First delete the current drop rule so there won't be any dropped + * packets + */ + mlx5_lag_drop_rule_cleanup(ldev); + + if (!ldev->tracker.has_inactive) + return; + + mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, &v2p_port2); + + if (v2p_port1 == MLX5_LAG_EGRESS_PORT_1) { + inactive = dev1; + inactive_idx = MLX5_LAG_P2; + } else { + inactive = dev0; + inactive_idx = MLX5_LAG_P1; + } + + err = mlx5_esw_acl_ingress_vport_drop_rule_create(inactive->priv.eswitch, + MLX5_VPORT_UPLINK); + if (!err) + ldev->pf[inactive_idx].has_drop = true; else - *port1 = 2; + mlx5_core_err(inactive, + "Failed to create lag drop rule, error: %d", err); } static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 v2p_port1, u8 v2p_port2) @@ -238,6 +301,10 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]); } + + if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + !(ldev->flags & MLX5_LAG_FLAG_ROCE)) + mlx5_lag_drop_rule_setup(ldev, tracker); } static void mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, @@ -339,6 +406,10 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, return err; } + if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + !roce_lag) + mlx5_lag_drop_rule_setup(ldev, tracker); + ldev->flags |= flags; ldev->shared_fdb = shared_fdb; return 0; @@ -347,6 +418,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, static int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; bool roce_lag = __mlx5_lag_is_roce(ldev); u8 flags = ldev->flags; @@ -356,8 +428,8 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) mlx5_lag_mp_reset(ldev); if (ldev->shared_fdb) { - mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch, - ldev->pf[MLX5_LAG_P2].dev->priv.eswitch); + mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch, + dev1->priv.eswitch); ldev->shared_fdb = false; } @@ -372,11 +444,15 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) "Failed to deactivate VF LAG; driver restart required\n" "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); } - } else if (flags & MLX5_LAG_FLAG_HASH_BASED) { - mlx5_lag_port_sel_destroy(ldev); + return err; } - return err; + if (flags & MLX5_LAG_FLAG_HASH_BASED) + mlx5_lag_port_sel_destroy(ldev); + if (mlx5_lag_has_drop_rule(ldev)) + mlx5_lag_drop_rule_cleanup(ldev); + + return 0; } static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) @@ -613,6 +689,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, struct net_device *upper = info->upper_dev, *ndev_tmp; struct netdev_lag_upper_info *lag_upper_info = NULL; bool is_bonded, is_in_lag, mode_supported; + bool has_inactive = 0; + struct slave *slave; int bond_status = 0; int num_slaves = 0; int changed = 0; @@ -632,8 +710,12 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, rcu_read_lock(); for_each_netdev_in_bond_rcu(upper, ndev_tmp) { idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); - if (idx >= 0) + if (idx >= 0) { + slave = bond_slave_get_rcu(ndev_tmp); + if (slave) + has_inactive |= bond_is_slave_inactive(slave); bond_status |= (1 << idx); + } num_slaves++; } @@ -648,6 +730,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, tracker->hash_type = lag_upper_info->hash_type; } + tracker->has_inactive = has_inactive; /* Determine bonding status: * A device is considered bonded if both its physical ports are slaves * of the same lag master, and only them. @@ -704,6 +787,38 @@ static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, return 1; } +static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev) +{ + struct net_device *ndev_tmp; + struct slave *slave; + bool has_inactive = 0; + int idx; + + if (!netif_is_lag_master(ndev)) + return 0; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); + if (idx < 0) + continue; + + slave = bond_slave_get_rcu(ndev_tmp); + if (slave) + has_inactive |= bond_is_slave_inactive(slave); + } + rcu_read_unlock(); + + if (tracker->has_inactive == has_inactive) + return 0; + + tracker->has_inactive = has_inactive; + + return 1; +} + static int mlx5_lag_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -712,7 +827,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, struct mlx5_lag *ldev; int changed = 0; - if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE)) + if (event != NETDEV_CHANGEUPPER && + event != NETDEV_CHANGELOWERSTATE && + event != NETDEV_CHANGEINFODATA) return NOTIFY_DONE; ldev = container_of(this, struct mlx5_lag, nb); @@ -728,6 +845,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, changed = mlx5_handle_changelowerstate_event(ldev, &tracker, ndev, ptr); break; + case NETDEV_CHANGEINFODATA: + changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); + break; } ldev->tracker = tracker; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index e5d231c31b54..cbf9a9003e55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -28,6 +28,7 @@ enum { struct lag_func { struct mlx5_core_dev *dev; struct net_device *netdev; + bool has_drop; }; /* Used for collection of netdev event info. */ @@ -35,6 +36,7 @@ struct lag_tracker { enum netdev_lag_tx_type tx_type; struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; unsigned int is_bonded:1; + unsigned int has_inactive:1; enum netdev_lag_hash hash_type; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index 1ca01a5b6cdd..4213208d9ef7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -50,7 +50,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, enum mlx5_lag_port_affinity port) { - struct lag_tracker tracker; + struct lag_tracker tracker = {}; if (!__mlx5_lag_is_multipath(ldev)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index df58cba37930..1e8ec4f236b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -121,6 +121,9 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) { + if (!mlx5_chains_prios_supported(chains)) + return 1; + if (mlx5_chains_ignore_flow_level_supported(chains)) return UINT_MAX; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h index 4bad6a5fde56..f240ffe5116c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h @@ -92,13 +92,6 @@ mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) { } - -static inline int -mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent, - void *buf, int len) -{ - return 0; -} #endif #endif /* __LIB_HV_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2c774f367199..98be7050aa8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -526,7 +526,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) /* Check log_max_qp from HCA caps to set in current profile */ if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) { - prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + prof->log_max_qp = min_t(u8, 17, MLX5_CAP_GEN_MAX(dev, log_max_qp)); } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", prof->log_max_qp, @@ -736,10 +736,9 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out); if (err) { - u32 syndrome; - u8 status; + u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome); + u8 status = MLX5_GET(query_issi_out, query_out, status); - mlx5_cmd_mbox_status(query_out, &status, &syndrome); if (!status || syndrome == MLX5_DRIVER_SYND) { mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n", err, status, syndrome); @@ -1840,10 +1839,12 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ + { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ { PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */ { 0, } }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 7b16a1188aab..289b29a23418 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -33,9 +33,10 @@ #include <linux/mlx5/port.h> #include "mlx5_core.h" -int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, - int size_in, void *data_out, int size_out, - u16 reg_id, int arg, int write) +/* calling with verbose false will not print error to log */ +int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, + void *data_out, int size_out, u16 reg_id, int arg, + int write, bool verbose) { int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out; int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in; @@ -57,7 +58,9 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, MLX5_SET(access_register_in, in, argument, arg); MLX5_SET(access_register_in, in, register_id, reg_id); - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + err = mlx5_cmd_do(dev, in, inlen, out, outlen); + if (verbose) + err = mlx5_cmd_check(dev, err, in, out); if (err) goto out; @@ -69,6 +72,15 @@ out: kvfree(in); return err; } +EXPORT_SYMBOL_GPL(mlx5_access_reg); + +int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, + int size_in, void *data_out, int size_out, + u16 reg_id, int arg, int write) +{ + return mlx5_access_reg(dev, data_in, size_in, data_out, size_out, + reg_id, arg, write, true); +} EXPORT_SYMBOL_GPL(mlx5_core_access_reg); int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index 7f6fd9c5e371..e289cfdbce07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -4,7 +4,6 @@ #include "dr_types.h" #define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 -#define DR_ICM_SYNC_THRESHOLD_POOL (64 * 1024 * 1024) struct mlx5dr_icm_pool { enum mlx5dr_icm_type icm_type; @@ -136,37 +135,35 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) kvfree(icm_mr); } -static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk) +static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy) { - chunk->ste_arr = kvzalloc(chunk->num_of_entries * - sizeof(chunk->ste_arr[0]), GFP_KERNEL); - if (!chunk->ste_arr) - return -ENOMEM; - - chunk->hw_ste_arr = kvzalloc(chunk->num_of_entries * - DR_STE_SIZE_REDUCED, GFP_KERNEL); - if (!chunk->hw_ste_arr) - goto out_free_ste_arr; - - chunk->miss_list = kvmalloc(chunk->num_of_entries * - sizeof(chunk->miss_list[0]), GFP_KERNEL); - if (!chunk->miss_list) - goto out_free_hw_ste_arr; + /* We support only one type of STE size, both for ConnectX-5 and later + * devices. Once the support for match STE which has a larger tag is + * added (32B instead of 16B), the STE size for devices later than + * ConnectX-5 needs to account for that. + */ + return DR_STE_SIZE_REDUCED; +} - return 0; +static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset) +{ + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + int index = offset / DR_STE_SIZE; -out_free_hw_ste_arr: - kvfree(chunk->hw_ste_arr); -out_free_ste_arr: - kvfree(chunk->ste_arr); - return -ENOMEM; + chunk->ste_arr = &buddy->ste_arr[index]; + chunk->miss_list = &buddy->miss_list[index]; + chunk->hw_ste_arr = buddy->hw_ste_arr + + index * dr_icm_buddy_get_ste_size(buddy); } static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) { - kvfree(chunk->miss_list); - kvfree(chunk->hw_ste_arr); - kvfree(chunk->ste_arr); + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + + memset(chunk->hw_ste_arr, 0, + chunk->num_of_entries * dr_icm_buddy_get_ste_size(buddy)); + memset(chunk->ste_arr, 0, + chunk->num_of_entries * sizeof(chunk->ste_arr[0])); } static enum mlx5dr_icm_type @@ -189,6 +186,44 @@ static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk, kvfree(chunk); } +static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) +{ + int num_of_entries = + mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz); + + buddy->ste_arr = kvcalloc(num_of_entries, + sizeof(struct mlx5dr_ste), GFP_KERNEL); + if (!buddy->ste_arr) + return -ENOMEM; + + /* Preallocate full STE size on non-ConnectX-5 devices since + * we need to support both full and reduced with the same cache. + */ + buddy->hw_ste_arr = kvcalloc(num_of_entries, + dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL); + if (!buddy->hw_ste_arr) + goto free_ste_arr; + + buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL); + if (!buddy->miss_list) + goto free_hw_ste_arr; + + return 0; + +free_hw_ste_arr: + kvfree(buddy->hw_ste_arr); +free_ste_arr: + kvfree(buddy->ste_arr); + return -ENOMEM; +} + +static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) +{ + kvfree(buddy->ste_arr); + kvfree(buddy->hw_ste_arr); + kvfree(buddy->miss_list); +} + static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) { struct mlx5dr_icm_buddy_mem *buddy; @@ -208,11 +243,19 @@ static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) buddy->icm_mr = icm_mr; buddy->pool = pool; + if (pool->icm_type == DR_ICM_TYPE_STE) { + /* Reduce allocations by preallocating and reusing the STE structures */ + if (dr_icm_buddy_init_ste_cache(buddy)) + goto err_cleanup_buddy; + } + /* add it to the -start- of the list in order to search in it first */ list_add(&buddy->list_node, &pool->buddy_mem_list); return 0; +err_cleanup_buddy: + mlx5dr_buddy_cleanup(buddy); err_free_buddy: kvfree(buddy); free_mr: @@ -234,6 +277,9 @@ static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy) mlx5dr_buddy_cleanup(buddy); + if (buddy->pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_buddy_cleanup_ste_cache(buddy); + kvfree(buddy); } @@ -261,34 +307,30 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool, chunk->byte_size = mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type); chunk->seg = seg; + chunk->buddy_mem = buddy_mem_pool; - if (pool->icm_type == DR_ICM_TYPE_STE && dr_icm_chunk_ste_init(chunk)) { - mlx5dr_err(pool->dmn, - "Failed to init ste arrays (order: %d)\n", - chunk_size); - goto out_free_chunk; - } + if (pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_chunk_ste_init(chunk, offset); buddy_mem_pool->used_memory += chunk->byte_size; - chunk->buddy_mem = buddy_mem_pool; INIT_LIST_HEAD(&chunk->chunk_list); /* chunk now is part of the used_list */ list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list); return chunk; - -out_free_chunk: - kvfree(chunk); - return NULL; } static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) { - if (pool->hot_memory_size > DR_ICM_SYNC_THRESHOLD_POOL) - return true; + int allow_hot_size; + + /* sync when hot memory reaches half of the pool size */ + allow_hot_size = + mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) / 2; - return false; + return pool->hot_memory_size > allow_hot_size; } static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index e87cf498c77b..38971fe1dfe1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -13,18 +13,6 @@ static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec) return (spec->dmac_47_16 || spec->dmac_15_0); } -static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec) -{ - return (spec->src_ip_127_96 || spec->src_ip_95_64 || - spec->src_ip_63_32 || spec->src_ip_31_0); -} - -static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec) -{ - return (spec->dst_ip_127_96 || spec->dst_ip_95_64 || - spec->dst_ip_63_32 || spec->dst_ip_31_0); -} - static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec) { return (spec->ip_protocol || spec->frag || spec->tcp_flags || @@ -503,11 +491,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, &mask, inner, rx); if (outer_ipv == DR_RULE_IPV6) { - if (dr_mask_is_dst_addr_set(&mask.outer)) + if (DR_MASK_IS_DST_IP_SET(&mask.outer)) mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++], &mask, inner, rx); - if (dr_mask_is_src_addr_set(&mask.outer)) + if (DR_MASK_IS_SRC_IP_SET(&mask.outer)) mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++], &mask, inner, rx); @@ -610,11 +598,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, &mask, inner, rx); if (inner_ipv == DR_RULE_IPV6) { - if (dr_mask_is_dst_addr_set(&mask.inner)) + if (DR_MASK_IS_DST_IP_SET(&mask.inner)) mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++], &mask, inner, rx); - if (dr_mask_is_src_addr_set(&mask.inner)) + if (DR_MASK_IS_SRC_IP_SET(&mask.inner)) mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++], &mask, inner, rx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 7e61742e58a0..187e29b409b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -602,12 +602,34 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx, used_hw_action_num); } +static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn, + struct mlx5dr_match_spec *spec) +{ + if (spec->ip_version) { + if (spec->ip_version != 0xf) { + mlx5dr_err(dmn, + "Partial ip_version mask with src/dst IP is not supported\n"); + return -EINVAL; + } + } else if (spec->ethertype != 0xffff && + (DR_MASK_IS_SRC_IP_SET(spec) || DR_MASK_IS_DST_IP_SET(spec))) { + mlx5dr_err(dmn, + "Partial/no ethertype mask with src/dst IP is not supported\n"); + return -EINVAL; + } + + return 0; +} + int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, u8 match_criteria, struct mlx5dr_match_param *mask, struct mlx5dr_match_param *value) { - if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) { + if (value) + return 0; + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { if (mask->misc.source_port && mask->misc.source_port != 0xffff) { mlx5dr_err(dmn, "Partial mask source_port is not supported\n"); @@ -621,6 +643,14 @@ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, } } + if ((match_criteria & DR_MATCHER_CRITERIA_OUTER) && + dr_ste_build_pre_check_spec(dmn, &mask->outer)) + return -EINVAL; + + if ((match_criteria & DR_MATCHER_CRITERIA_INNER) && + dr_ste_build_pre_check_spec(dmn, &mask->inner)) + return -EINVAL; + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 1b3d484b99be..55fcb751e24a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -798,6 +798,16 @@ struct mlx5dr_match_param { (_misc3)->icmpv4_code || \ (_misc3)->icmpv4_header_data) +#define DR_MASK_IS_SRC_IP_SET(_spec) ((_spec)->src_ip_127_96 || \ + (_spec)->src_ip_95_64 || \ + (_spec)->src_ip_63_32 || \ + (_spec)->src_ip_31_0) + +#define DR_MASK_IS_DST_IP_SET(_spec) ((_spec)->dst_ip_127_96 || \ + (_spec)->dst_ip_95_64 || \ + (_spec)->dst_ip_63_32 || \ + (_spec)->dst_ip_31_0) + struct mlx5dr_esw_caps { u64 drop_icm_address_rx; u64 drop_icm_address_tx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 033757bfdf64..57ffcab7294c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -233,7 +233,11 @@ static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst) dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID; } -#define MLX5_FLOW_CONTEXT_ACTION_MAX 32 +/* We want to support a rule with 32 destinations, which means we need to + * account for 32 destinations plus usually a counter plus one more action + * for a multi-destination flow table. + */ +#define MLX5_FLOW_CONTEXT_ACTION_MAX 34 static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_group *group, @@ -403,9 +407,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, enum mlx5_flow_destination_type type = dst->dest_attr.type; u32 id; - if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || - num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) { - err = -ENOSPC; + if (fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + num_term_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; goto free_actions; } @@ -478,8 +482,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, MLX5_FLOW_DESTINATION_TYPE_COUNTER) continue; - if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { - err = -ENOSPC; + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; goto free_actions; } @@ -499,14 +504,28 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, params.match_sz = match_sz; params.match_buf = (u64 *)fte->val; if (num_term_actions == 1) { - if (term_actions->reformat) + if (term_actions->reformat) { + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } actions[num_actions++] = term_actions->reformat; + } + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } actions[num_actions++] = term_actions->dest; } else if (num_term_actions > 1) { bool ignore_flow_level = !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL); + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } tmp_action = mlx5dr_action_create_mult_dest_tbl(domain, term_actions, num_term_actions, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index c7c93131b762..dfa223415fe2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -160,6 +160,11 @@ struct mlx5dr_icm_buddy_mem { * sync_ste command sets them free. */ struct list_head hot_list; + + /* Memory optimisation */ + struct mlx5dr_ste *ste_arr; + struct list_head *miss_list; + u8 *hw_ste_arr; }; int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index f45df5fbdcc0..0bf1d64644ba 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -177,17 +177,6 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_driver_priv); -bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core) -{ - return mlxsw_core->driver->res_query_enabled; -} -EXPORT_SYMBOL(mlxsw_core_res_query_enabled); - -bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core) -{ - return mlxsw_core->driver->temp_warn_enabled; -} - bool mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, const struct mlxsw_fw_rev *req_rev) @@ -223,6 +212,9 @@ static int mlxsw_core_trap_groups_set(struct mlxsw_core *mlxsw_core) int err; int i; + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return 0; + for (i = 0; i < ARRAY_SIZE(mlxsw_core_trap_groups); i++) { mlxsw_reg_htgt_pack(htgt_pl, mlxsw_core_trap_groups[i], MLXSW_REG_HTGT_INVALID_POLICER, @@ -2036,7 +2028,7 @@ static int mlxsw_core_health_init(struct mlxsw_core *mlxsw_core) struct devlink_health_reporter *fw_fatal; int err; - if (!mlxsw_core->driver->fw_fatal_enabled) + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0; fw_fatal = devlink_health_reporter_create(devlink, &mlxsw_core_health_fw_fatal_ops, @@ -2066,7 +2058,7 @@ err_trap_register: static void mlxsw_core_health_fini(struct mlxsw_core *mlxsw_core) { - if (!mlxsw_core->driver->fw_fatal_enabled) + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return; mlxsw_core_health_fw_fatal_config(mlxsw_core, false); @@ -2086,7 +2078,6 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const char *device_kind = mlxsw_bus_info->device_kind; struct mlxsw_core *mlxsw_core; struct mlxsw_driver *mlxsw_driver; - struct mlxsw_res *res; size_t alloc_size; int err; @@ -2112,8 +2103,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, mlxsw_core->bus_priv = bus_priv; mlxsw_core->bus_info = mlxsw_bus_info; - res = mlxsw_driver->res_query_enabled ? &mlxsw_core->res : NULL; - err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, res); + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, + &mlxsw_core->res); if (err) goto err_bus_init; @@ -2522,6 +2513,9 @@ int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, char hpkt_pl[MLXSW_REG_HPKT_LEN]; int err; + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return 0; + err = mlxsw_core_listener_register(mlxsw_core, listener, priv, listener->enabled_on_register); if (err) @@ -2551,6 +2545,9 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, { char hpkt_pl[MLXSW_REG_HPKT_LEN]; + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return; + if (!listener->is_event) { mlxsw_reg_hpkt_pack(hpkt_pl, listener->dis_action, listener->trap_id, listener->dis_trap_group, @@ -3242,9 +3239,6 @@ int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox, u16 id; int err; - if (!res) - return 0; - mlxsw_cmd_mbox_zero(mbox); for (index = 0; index < MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 6d304092f4e7..16ee5e90973d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -35,10 +35,6 @@ unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core); void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); -bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core); - -bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core); - bool mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, const struct mlxsw_fw_rev *req_rev); @@ -406,9 +402,6 @@ struct mlxsw_driver { u8 txhdr_len; const struct mlxsw_config_profile *profile; - bool res_query_enabled; - bool fw_fatal_enabled; - bool temp_warn_enabled; }; int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 6ea4bf87be0b..29a74b8bd5b5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -87,6 +87,7 @@ mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, bool *qsfp, *qsfp = true; break; case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_OSFP: *qsfp = true; *cmis = true; break; @@ -303,6 +304,7 @@ int mlxsw_env_get_module_info(struct net_device *netdev, modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN / 2; break; case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_OSFP: /* Use SFF_8636 as base type. ethtool should recognize specific * type through the identifier value. */ @@ -462,9 +464,6 @@ int mlxsw_env_reset_module(struct net_device *netdev, !(req & (ETH_RESET_PHY << ETH_RESET_SHARED_SHIFT))) return 0; - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return -EINVAL; - mutex_lock(&mlxsw_env->module_info_lock); err = __mlxsw_env_validate_module_type(mlxsw_core, module); @@ -510,9 +509,6 @@ mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module, u32 status_bits; int err; - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return -EINVAL; - mutex_lock(&mlxsw_env->module_info_lock); err = __mlxsw_env_validate_module_type(mlxsw_core, module); @@ -620,9 +616,6 @@ mlxsw_env_set_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module, bool low_power; int err = 0; - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return -EINVAL; - if (policy != ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH && policy != ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO) { NL_SET_ERR_MSG_MOD(extack, "Unsupported power mode policy"); @@ -831,9 +824,6 @@ static int mlxsw_env_temp_warn_event_register(struct mlxsw_core *mlxsw_core) { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (!mlxsw_core_temp_warn_enabled(mlxsw_core)) - return 0; - return mlxsw_core_trap_register(mlxsw_core, &mlxsw_env_temp_warn_listener, mlxsw_env); @@ -841,9 +831,6 @@ static int mlxsw_env_temp_warn_event_register(struct mlxsw_core *mlxsw_core) static void mlxsw_env_temp_warn_event_unregister(struct mlxsw_env *mlxsw_env) { - if (!mlxsw_core_temp_warn_enabled(mlxsw_env->core)) - return; - mlxsw_core_trap_unregister(mlxsw_env->core, &mlxsw_env_temp_warn_listener, mlxsw_env); } @@ -922,9 +909,6 @@ mlxsw_env_module_plug_event_register(struct mlxsw_core *mlxsw_core) { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (!mlxsw_core_temp_warn_enabled(mlxsw_core)) - return 0; - return mlxsw_core_trap_register(mlxsw_core, &mlxsw_env_module_plug_listener, mlxsw_env); @@ -933,9 +917,6 @@ mlxsw_env_module_plug_event_register(struct mlxsw_core *mlxsw_core) static void mlxsw_env_module_plug_event_unregister(struct mlxsw_env *mlxsw_env) { - if (!mlxsw_core_temp_warn_enabled(mlxsw_env->core)) - return; - mlxsw_core_trap_unregister(mlxsw_env->core, &mlxsw_env_module_plug_listener, mlxsw_env); @@ -966,9 +947,6 @@ mlxsw_env_module_overheat_counter_get(struct mlxsw_core *mlxsw_core, u8 module, { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return -EINVAL; - mutex_lock(&mlxsw_env->module_info_lock); *p_counter = mlxsw_env->module_info[module].module_overheat_counter; mutex_unlock(&mlxsw_env->module_info_lock); @@ -981,9 +959,6 @@ void mlxsw_env_module_port_map(struct mlxsw_core *mlxsw_core, u8 module) { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return; - mutex_lock(&mlxsw_env->module_info_lock); mlxsw_env->module_info[module].num_ports_mapped++; mutex_unlock(&mlxsw_env->module_info_lock); @@ -994,9 +969,6 @@ void mlxsw_env_module_port_unmap(struct mlxsw_core *mlxsw_core, u8 module) { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return; - mutex_lock(&mlxsw_env->module_info_lock); mlxsw_env->module_info[module].num_ports_mapped--; mutex_unlock(&mlxsw_env->module_info_lock); @@ -1008,9 +980,6 @@ int mlxsw_env_module_port_up(struct mlxsw_core *mlxsw_core, u8 module) struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); int err = 0; - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return -EINVAL; - mutex_lock(&mlxsw_env->module_info_lock); if (mlxsw_env->module_info[module].power_mode_policy != @@ -1040,9 +1009,6 @@ void mlxsw_env_module_port_down(struct mlxsw_core *mlxsw_core, u8 module) { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return; - mutex_lock(&mlxsw_env->module_info_lock); mlxsw_env->module_info[module].num_ports_up--; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index d41afdfbd085..8b170ad92302 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -57,14 +57,14 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; int temp, index; int err; - index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index, mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); @@ -80,14 +80,14 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; int temp_max, index; int err; - index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index, mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); @@ -103,9 +103,9 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0}; unsigned long val; int index; @@ -117,7 +117,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, if (val != 1) return -EINVAL; - index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index, mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, index); @@ -138,13 +138,13 @@ static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mfsm_pl[MLXSW_REG_MFSM_LEN]; int err; - mlxsw_reg_mfsm_pack(mfsm_pl, mlwsw_hwmon_attr->type_index); + mlxsw_reg_mfsm_pack(mfsm_pl, mlxsw_hwmon_attr->type_index); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsm), mfsm_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); @@ -157,9 +157,9 @@ static ssize_t mlxsw_hwmon_fan_fault_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char fore_pl[MLXSW_REG_FORE_LEN]; bool fault; int err; @@ -169,7 +169,7 @@ static ssize_t mlxsw_hwmon_fan_fault_show(struct device *dev, dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); return err; } - mlxsw_reg_fore_unpack(fore_pl, mlwsw_hwmon_attr->type_index, &fault); + mlxsw_reg_fore_unpack(fore_pl, mlxsw_hwmon_attr->type_index, &fault); return sprintf(buf, "%u\n", fault); } @@ -178,13 +178,13 @@ static ssize_t mlxsw_hwmon_pwm_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mfsc_pl[MLXSW_REG_MFSC_LEN]; int err; - mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, 0); + mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_hwmon_attr->type_index, 0); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query PWM\n"); @@ -198,9 +198,9 @@ static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mfsc_pl[MLXSW_REG_MFSC_LEN]; unsigned long val; int err; @@ -211,7 +211,7 @@ static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, if (val > 255) return -EINVAL; - mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, val); + mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_hwmon_attr->type_index, val); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to write PWM\n"); @@ -224,14 +224,14 @@ static int mlxsw_hwmon_module_temp_get(struct device *dev, struct device_attribute *attr, int *p_temp) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; u8 module; int err; - module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); @@ -261,15 +261,15 @@ static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; u8 module, fault; u16 temp; int err; - module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, 1); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); @@ -303,13 +303,13 @@ static int mlxsw_hwmon_module_temp_critical_get(struct device *dev, struct device_attribute *attr, int *p_temp) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; u8 module; int err; - module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, SFP_TEMP_HIGH_WARN, p_temp); if (err) { @@ -337,13 +337,13 @@ static int mlxsw_hwmon_module_temp_emergency_get(struct device *dev, struct device_attribute *attr, int *p_temp) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; u8 module; int err; - module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, SFP_TEMP_HIGH_ALARM, p_temp); if (err) { @@ -373,11 +373,11 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); return sprintf(buf, "front panel %03u\n", - mlwsw_hwmon_attr->type_index); + mlxsw_hwmon_attr->type_index); } static ssize_t @@ -385,10 +385,10 @@ mlxsw_hwmon_gbox_temp_label_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; - int index = mlwsw_hwmon_attr->type_index - + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; + int index = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->module_sensor_max + 1; return sprintf(buf, "gearbox %03u\n", index); @@ -655,9 +655,6 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) u8 module_sensor_max; int i, err; - if (!mlxsw_core_res_query_enabled(mlxsw_hwmon->core)) - return 0; - mlxsw_reg_mgpir_pack(mgpir_pl); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index b29824448aa8..05f54bd982c0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -357,6 +357,10 @@ static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev, return 0; } +static struct thermal_zone_params mlxsw_thermal_params = { + .no_hwmon = true, +}; + static struct thermal_zone_device_ops mlxsw_thermal_ops = { .bind = mlxsw_thermal_bind, .unbind = mlxsw_thermal_unbind, @@ -388,11 +392,11 @@ static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev, trip->min_state, THERMAL_WEIGHT_DEFAULT); if (err < 0) - goto err_bind_cooling_device; + goto err_thermal_zone_bind_cooling_device; } return 0; -err_bind_cooling_device: +err_thermal_zone_bind_cooling_device: for (j = i - 1; j >= 0; j--) thermal_zone_unbind_cooling_device(tzdev, j, cdev); return err; @@ -678,7 +682,8 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz) MLXSW_THERMAL_TRIP_MASK, module_tz, &mlxsw_thermal_module_ops, - NULL, 0, + &mlxsw_thermal_params, + 0, module_tz->parent->polling_delay); if (IS_ERR(module_tz->tzdev)) { err = PTR_ERR(module_tz->tzdev); @@ -741,9 +746,6 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, char mgpir_pl[MLXSW_REG_MGPIR_LEN]; int i, err; - if (!mlxsw_core_res_query_enabled(core)) - return 0; - mlxsw_reg_mgpir_pack(mgpir_pl); err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); if (err) @@ -761,7 +763,7 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, for (i = 0; i < thermal->tz_module_num; i++) { err = mlxsw_thermal_module_init(dev, core, thermal, i); if (err) - goto err_unreg_tz_module_arr; + goto err_thermal_module_init; } for (i = 0; i < thermal->tz_module_num; i++) { @@ -770,12 +772,13 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, continue; err = mlxsw_thermal_module_tz_init(module_tz); if (err) - goto err_unreg_tz_module_arr; + goto err_thermal_module_tz_init; } return 0; -err_unreg_tz_module_arr: +err_thermal_module_tz_init: +err_thermal_module_init: for (i = thermal->tz_module_num - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); @@ -787,9 +790,6 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) { int i; - if (!mlxsw_core_res_query_enabled(thermal->core)) - return; - for (i = thermal->tz_module_num - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); @@ -808,7 +808,7 @@ mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz) MLXSW_THERMAL_TRIP_MASK, gearbox_tz, &mlxsw_thermal_gearbox_ops, - NULL, 0, + &mlxsw_thermal_params, 0, gearbox_tz->parent->polling_delay); if (IS_ERR(gearbox_tz->tzdev)) return PTR_ERR(gearbox_tz->tzdev); @@ -837,9 +837,6 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, int i; int err; - if (!mlxsw_core_res_query_enabled(core)) - return 0; - mlxsw_reg_mgpir_pack(mgpir_pl); err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); if (err) @@ -866,12 +863,12 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, gearbox_tz->parent = thermal; err = mlxsw_thermal_gearbox_tz_init(gearbox_tz); if (err) - goto err_unreg_tz_gearbox; + goto err_thermal_gearbox_tz_init; } return 0; -err_unreg_tz_gearbox: +err_thermal_gearbox_tz_init: for (i--; i >= 0; i--) mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]); kfree(thermal->tz_gearbox_arr); @@ -883,9 +880,6 @@ mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal) { int i; - if (!mlxsw_core_res_query_enabled(thermal->core)) - return; - for (i = thermal->tz_gearbox_num - 1; i >= 0; i--) mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]); kfree(thermal->tz_gearbox_arr); @@ -915,7 +909,7 @@ int mlxsw_thermal_init(struct mlxsw_core *core, err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl); if (err) { dev_err(dev, "Failed to probe PWMs\n"); - goto err_free_thermal; + goto err_reg_query; } mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active); @@ -929,14 +923,14 @@ int mlxsw_thermal_init(struct mlxsw_core *core, err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl), mfsl_pl); if (err) - goto err_free_thermal; + goto err_reg_query; /* set the minimal RPMs to 0 */ mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0); err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl), mfsl_pl); if (err) - goto err_free_thermal; + goto err_reg_write; } } for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) { @@ -949,7 +943,7 @@ int mlxsw_thermal_init(struct mlxsw_core *core, if (IS_ERR(cdev)) { err = PTR_ERR(cdev); dev_err(dev, "Failed to register cooling device\n"); - goto err_unreg_cdevs; + goto err_thermal_cooling_device_register; } thermal->cdevs[i] = cdev; } @@ -968,43 +962,45 @@ int mlxsw_thermal_init(struct mlxsw_core *core, MLXSW_THERMAL_TRIP_MASK, thermal, &mlxsw_thermal_ops, - NULL, 0, + &mlxsw_thermal_params, 0, thermal->polling_delay); if (IS_ERR(thermal->tzdev)) { err = PTR_ERR(thermal->tzdev); dev_err(dev, "Failed to register thermal zone\n"); - goto err_unreg_cdevs; + goto err_thermal_zone_device_register; } err = mlxsw_thermal_modules_init(dev, core, thermal); if (err) - goto err_unreg_tzdev; + goto err_thermal_modules_init; err = mlxsw_thermal_gearboxes_init(dev, core, thermal); if (err) - goto err_unreg_modules_tzdev; + goto err_thermal_gearboxes_init; err = thermal_zone_device_enable(thermal->tzdev); if (err) - goto err_unreg_gearboxes; + goto err_thermal_zone_device_enable; *p_thermal = thermal; return 0; -err_unreg_gearboxes: +err_thermal_zone_device_enable: mlxsw_thermal_gearboxes_fini(thermal); -err_unreg_modules_tzdev: +err_thermal_gearboxes_init: mlxsw_thermal_modules_fini(thermal); -err_unreg_tzdev: +err_thermal_modules_init: if (thermal->tzdev) { thermal_zone_device_unregister(thermal->tzdev); thermal->tzdev = NULL; } -err_unreg_cdevs: +err_thermal_zone_device_register: +err_thermal_cooling_device_register: for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) if (thermal->cdevs[i]) thermal_cooling_device_unregister(thermal->cdevs[i]); -err_free_thermal: +err_reg_write: +err_reg_query: devm_kfree(dev, thermal); return err; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 9ac8ce01c061..060209983438 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -461,7 +461,6 @@ static struct mlxsw_driver mlxsw_m_driver = { .init = mlxsw_m_init, .fini = mlxsw_m_fini, .profile = &mlxsw_m_config_profile, - .res_query_enabled = true, }; static const struct i2c_device_id mlxsw_m_i2c_id[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index eebd0479b2bc..67b1a2f8397f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -6784,12 +6784,14 @@ static inline void mlxsw_reg_ritr_counter_pack(char *payload, u32 index, set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC; else set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT; - mlxsw_reg_ritr_egress_counter_set_type_set(payload, set_type); - if (egress) + if (egress) { + mlxsw_reg_ritr_egress_counter_set_type_set(payload, set_type); mlxsw_reg_ritr_egress_counter_index_set(payload, index); - else + } else { + mlxsw_reg_ritr_ingress_counter_set_type_set(payload, set_type); mlxsw_reg_ritr_ingress_counter_index_set(payload, index); + } } static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif) @@ -10037,6 +10039,7 @@ enum mlxsw_reg_mcia_eeprom_module_info_id { MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D, MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11, MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD = 0x18, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_OSFP = 0x19, }; enum mlxsw_reg_mcia_eeprom_module_info { @@ -11323,24 +11326,24 @@ enum mlxsw_reg_mgpir_device_type { MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE, }; -/* device_type +/* mgpir_device_type * Access: RO */ MLXSW_ITEM32(reg, mgpir, device_type, 0x00, 24, 4); -/* devices_per_flash +/* mgpir_devices_per_flash * Number of devices of device_type per flash (can be shared by few devices). * Access: RO */ MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8); -/* num_of_devices +/* mgpir_num_of_devices * Number of devices of device_type. * Access: RO */ MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8); -/* num_of_modules +/* mgpir_num_of_modules * Number of modules. * Access: RO */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index a4b94eecea98..7b7b17183d10 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -45,52 +45,49 @@ #include "spectrum_ptp.h" #include "spectrum_trap.h" +#define MLXSW_SP_FWREV_MINOR 2010 +#define MLXSW_SP_FWREV_SUBMINOR 1006 + #define MLXSW_SP1_FWREV_MAJOR 13 -#define MLXSW_SP1_FWREV_MINOR 2010 -#define MLXSW_SP1_FWREV_SUBMINOR 1006 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { .major = MLXSW_SP1_FWREV_MAJOR, - .minor = MLXSW_SP1_FWREV_MINOR, - .subminor = MLXSW_SP1_FWREV_SUBMINOR, + .minor = MLXSW_SP_FWREV_MINOR, + .subminor = MLXSW_SP_FWREV_SUBMINOR, .can_reset_minor = MLXSW_SP1_FWREV_CAN_RESET_MINOR, }; #define MLXSW_SP1_FW_FILENAME \ "mellanox/mlxsw_spectrum-" __stringify(MLXSW_SP1_FWREV_MAJOR) \ - "." __stringify(MLXSW_SP1_FWREV_MINOR) \ - "." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2" + "." __stringify(MLXSW_SP_FWREV_MINOR) \ + "." __stringify(MLXSW_SP_FWREV_SUBMINOR) ".mfa2" #define MLXSW_SP2_FWREV_MAJOR 29 -#define MLXSW_SP2_FWREV_MINOR 2010 -#define MLXSW_SP2_FWREV_SUBMINOR 1006 static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = { .major = MLXSW_SP2_FWREV_MAJOR, - .minor = MLXSW_SP2_FWREV_MINOR, - .subminor = MLXSW_SP2_FWREV_SUBMINOR, + .minor = MLXSW_SP_FWREV_MINOR, + .subminor = MLXSW_SP_FWREV_SUBMINOR, }; #define MLXSW_SP2_FW_FILENAME \ "mellanox/mlxsw_spectrum2-" __stringify(MLXSW_SP2_FWREV_MAJOR) \ - "." __stringify(MLXSW_SP2_FWREV_MINOR) \ - "." __stringify(MLXSW_SP2_FWREV_SUBMINOR) ".mfa2" + "." __stringify(MLXSW_SP_FWREV_MINOR) \ + "." __stringify(MLXSW_SP_FWREV_SUBMINOR) ".mfa2" #define MLXSW_SP3_FWREV_MAJOR 30 -#define MLXSW_SP3_FWREV_MINOR 2010 -#define MLXSW_SP3_FWREV_SUBMINOR 1006 static const struct mlxsw_fw_rev mlxsw_sp3_fw_rev = { .major = MLXSW_SP3_FWREV_MAJOR, - .minor = MLXSW_SP3_FWREV_MINOR, - .subminor = MLXSW_SP3_FWREV_SUBMINOR, + .minor = MLXSW_SP_FWREV_MINOR, + .subminor = MLXSW_SP_FWREV_SUBMINOR, }; #define MLXSW_SP3_FW_FILENAME \ "mellanox/mlxsw_spectrum3-" __stringify(MLXSW_SP3_FWREV_MAJOR) \ - "." __stringify(MLXSW_SP3_FWREV_MINOR) \ - "." __stringify(MLXSW_SP3_FWREV_SUBMINOR) ".mfa2" + "." __stringify(MLXSW_SP_FWREV_MINOR) \ + "." __stringify(MLXSW_SP_FWREV_SUBMINOR) ".mfa2" static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum"; static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2"; @@ -3630,9 +3627,6 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp1_config_profile, - .res_query_enabled = true, - .fw_fatal_enabled = true, - .temp_warn_enabled = true, }; static struct mlxsw_driver mlxsw_sp2_driver = { @@ -3670,9 +3664,6 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, - .res_query_enabled = true, - .fw_fatal_enabled = true, - .temp_warn_enabled = true, }; static struct mlxsw_driver mlxsw_sp3_driver = { @@ -3710,9 +3701,6 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, - .res_query_enabled = true, - .fw_fatal_enabled = true, - .temp_warn_enabled = true, }; static struct mlxsw_driver mlxsw_sp4_driver = { @@ -3748,9 +3736,6 @@ static struct mlxsw_driver mlxsw_sp4_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, - .res_query_enabled = true, - .fw_fatal_enabled = true, - .temp_warn_enabled = true, }; bool mlxsw_sp_port_dev_check(const struct net_device *dev) @@ -4838,6 +4823,22 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, return 0; } +static bool mlxsw_sp_netdevice_event_is_router(unsigned long event) +{ + switch (event) { + case NETDEV_PRE_CHANGEADDR: + case NETDEV_CHANGEADDR: + case NETDEV_CHANGEMTU: + case NETDEV_OFFLOAD_XSTATS_ENABLE: + case NETDEV_OFFLOAD_XSTATS_DISABLE: + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + return true; + default: + return false; + } +} + static int mlxsw_sp_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -4862,9 +4863,7 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev)) err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev, event, ptr); - else if (event == NETDEV_PRE_CHANGEADDR || - event == NETDEV_CHANGEADDR || - event == NETDEV_CHANGEMTU) + else if (mlxsw_sp_netdevice_event_is_router(event)) err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr); else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 1a2fef2a5379..5d494fabf93d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -266,10 +266,10 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable) if (!rif) continue; if (enable) - mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, + mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS); else - mlxsw_sp_rif_counter_free(mlxsw_sp, rif, + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); } mutex_unlock(&mlxsw_sp->router->lock); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index f54af3d9a03b..e91fb205e0b4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -15,6 +15,46 @@ #include "spectrum.h" #include "core_acl_flex_keys.h" +static int mlxsw_sp_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} + static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_flow_block *block, struct mlxsw_sp_acl_rule_info *rulei, @@ -191,10 +231,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return -EOPNOTSUPP; } - if (act->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + err = mlxsw_sp_policer_validate(flow_action, act, extack); + if (err) + return err; /* The kernel might adjust the requested burst size so * that it is not exactly a power of two. Re-adjust it diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index d40762cfc453..79deb19e3a19 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -225,6 +225,64 @@ int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, return 0; } +struct mlxsw_sp_rif_counter_set_basic { + u64 good_unicast_packets; + u64 good_multicast_packets; + u64 good_broadcast_packets; + u64 good_unicast_bytes; + u64 good_multicast_bytes; + u64 good_broadcast_bytes; + u64 error_packets; + u64 discard_packets; + u64 error_bytes; + u64 discard_bytes; +}; + +static int +mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, + struct mlxsw_sp_rif_counter_set_basic *set) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ricnt_pl[MLXSW_REG_RICNT_LEN]; + unsigned int *p_counter_index; + int err; + + if (!mlxsw_sp_rif_counter_valid_get(rif, dir)) + return -EINVAL; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (!p_counter_index) + return -EINVAL; + + mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index, + MLXSW_REG_RICNT_OPCODE_CLEAR); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); + if (err) + return err; + + if (!set) + return 0; + +#define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME) \ + (set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl)) + + MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes); + +#undef MLXSW_SP_RIF_COUNTER_EXTRACT + + return 0; +} + static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index) { @@ -235,16 +293,20 @@ static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); } -int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif, +int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir) { + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; unsigned int *p_counter_index; int err; + if (mlxsw_sp_rif_counter_valid_get(rif, dir)) + return 0; + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); if (!p_counter_index) return -EINVAL; + err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, p_counter_index); if (err) @@ -268,10 +330,10 @@ err_counter_clear: return err; } -void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif, +void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir) { + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; unsigned int *p_counter_index; if (!mlxsw_sp_rif_counter_valid_get(rif, dir)) @@ -296,14 +358,12 @@ static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif) if (!devlink_dpipe_table_counter_enabled(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) return; - mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS); } static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif) { - struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - - mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); } #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1) @@ -8148,6 +8208,166 @@ u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) return lb_rif->ul_rif_id; } +static bool +mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif) +{ + return mlxsw_sp_rif_counter_valid_get(rif, + MLXSW_SP_RIF_COUNTER_EGRESS) && + mlxsw_sp_rif_counter_valid_get(rif, + MLXSW_SP_RIF_COUNTER_INGRESS); +} + +static int +mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif) +{ + int err; + + err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS); + if (err) + return err; + + /* Clear stale data. */ + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_INGRESS, + NULL); + if (err) + goto err_clear_ingress; + + err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS); + if (err) + goto err_alloc_egress; + + /* Clear stale data. */ + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_EGRESS, + NULL); + if (err) + goto err_clear_egress; + + return 0; + +err_clear_egress: + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); +err_alloc_egress: +err_clear_ingress: + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS); + return err; +} + +static void +mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif) +{ + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS); +} + +static void +mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif, + struct netdev_notifier_offload_xstats_info *info) +{ + if (!mlxsw_sp_router_port_l3_stats_enabled(rif)) + return; + netdev_offload_xstats_report_used(info->report_used); +} + +static int +mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif, + struct rtnl_hw_stats64 *p_stats) +{ + struct mlxsw_sp_rif_counter_set_basic ingress; + struct mlxsw_sp_rif_counter_set_basic egress; + int err; + + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_INGRESS, + &ingress); + if (err) + return err; + + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_EGRESS, + &egress); + if (err) + return err; + +#define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX) \ + ((SET.good_unicast_ ## SFX) + \ + (SET.good_multicast_ ## SFX) + \ + (SET.good_broadcast_ ## SFX)) + + p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets); + p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets); + p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes); + p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes); + p_stats->rx_errors = ingress.error_packets; + p_stats->tx_errors = egress.error_packets; + p_stats->rx_dropped = ingress.discard_packets; + p_stats->tx_dropped = egress.discard_packets; + p_stats->multicast = ingress.good_multicast_packets + + ingress.good_broadcast_packets; + +#undef MLXSW_SP_ROUTER_ALL_GOOD + + return 0; +} + +static int +mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif, + struct netdev_notifier_offload_xstats_info *info) +{ + struct rtnl_hw_stats64 stats = {}; + int err; + + if (!mlxsw_sp_router_port_l3_stats_enabled(rif)) + return 0; + + err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats); + if (err) + return err; + + netdev_offload_xstats_report_delta(info->report_delta, &stats); + return 0; +} + +struct mlxsw_sp_router_hwstats_notify_work { + struct work_struct work; + struct net_device *dev; +}; + +static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work) +{ + struct mlxsw_sp_router_hwstats_notify_work *hws_work = + container_of(work, struct mlxsw_sp_router_hwstats_notify_work, + work); + + rtnl_lock(); + rtnl_offload_xstats_notify(hws_work->dev); + rtnl_unlock(); + dev_put(hws_work->dev); + kfree(hws_work); +} + +static void +mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev) +{ + struct mlxsw_sp_router_hwstats_notify_work *hws_work; + + /* To collect notification payload, the core ends up sending another + * notifier block message, which would deadlock on the attempt to + * acquire the router lock again. Just postpone the notification until + * later. + */ + + hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL); + if (!hws_work) + return; + + INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work); + dev_hold(dev); + hws_work->dev = dev; + mlxsw_core_schedule_work(&hws_work->work); +} + int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) { return rif->dev->ifindex; @@ -8158,6 +8378,16 @@ const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) return rif->dev; } +static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif) +{ + struct rtnl_hw_stats64 stats = {}; + + if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats)) + netdev_offload_xstats_push_delta(rif->dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3, + &stats); +} + static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, @@ -8218,10 +8448,19 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, goto err_mr_rif_add; } - mlxsw_sp_rif_counters_alloc(rif); + if (netdev_offload_xstats_enabled(rif->dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { + err = mlxsw_sp_router_port_l3_stats_enable(rif); + if (err) + goto err_stats_enable; + mlxsw_sp_router_hwstats_notify_schedule(rif->dev); + } else { + mlxsw_sp_rif_counters_alloc(rif); + } return rif; +err_stats_enable: err_mr_rif_add: for (i--; i >= 0; i--) mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); @@ -8251,7 +8490,15 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; - mlxsw_sp_rif_counters_free(rif); + if (netdev_offload_xstats_enabled(rif->dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { + mlxsw_sp_rif_push_l3_stats(rif); + mlxsw_sp_router_port_l3_stats_disable(rif); + mlxsw_sp_router_hwstats_notify_schedule(rif->dev); + } else { + mlxsw_sp_rif_counters_free(rif); + } + for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); ops->deconfigure(rif); @@ -9128,6 +9375,35 @@ static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif, return -ENOBUFS; } +static int +mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif, + unsigned long event, + struct netdev_notifier_offload_xstats_info *info) +{ + switch (info->type) { + case NETDEV_OFFLOAD_XSTATS_TYPE_L3: + break; + default: + return 0; + } + + switch (event) { + case NETDEV_OFFLOAD_XSTATS_ENABLE: + return mlxsw_sp_router_port_l3_stats_enable(rif); + case NETDEV_OFFLOAD_XSTATS_DISABLE: + mlxsw_sp_router_port_l3_stats_disable(rif); + return 0; + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + mlxsw_sp_router_port_l3_stats_report_used(rif, info); + return 0; + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + return mlxsw_sp_router_port_l3_stats_report_delta(rif, info); + } + + WARN_ON_ONCE(1); + return 0; +} + int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, unsigned long event, void *ptr) { @@ -9153,6 +9429,15 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, case NETDEV_PRE_CHANGEADDR: err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); break; + case NETDEV_OFFLOAD_XSTATS_ENABLE: + case NETDEV_OFFLOAD_XSTATS_DISABLE: + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + err = mlxsw_sp_router_port_offload_xstats_cmd(rif, event, ptr); + break; + default: + WARN_ON_ONCE(1); + break; } out: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 99e8371a82a5..fa829658a11b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -159,11 +159,9 @@ int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir, u64 *cnt); -void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif, +void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir); -int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif, +int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir); struct mlxsw_sp_neigh_entry * mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 5459490c7790..b73466470f75 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -269,7 +269,8 @@ mlxsw_sp_span_entry_bridge_8021q(const struct net_device *br_dev, if (!vid && WARN_ON(br_vlan_get_pvid(br_dev, &vid))) return NULL; - if (!vid || br_vlan_get_info(br_dev, vid, &vinfo)) + if (!vid || br_vlan_get_info(br_dev, vid, &vinfo) || + !(vinfo.flags & BRIDGE_VLAN_INFO_BRENTRY)) return NULL; edev = br_fdb_find_port(br_dev, dmac, vid); diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 0303e727e99f..82d55fc27edc 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -293,7 +293,7 @@ static void ks8851_wrfifo_spi(struct ks8851_net *ks, struct sk_buff *txp, */ static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb) { - netif_rx_ni(skb); + netif_rx(skb); } /** @@ -452,11 +452,9 @@ static int ks8851_probe_spi(struct spi_device *spi) return ks8851_probe_common(netdev, dev, msg_enable); } -static int ks8851_remove_spi(struct spi_device *spi) +static void ks8851_remove_spi(struct spi_device *spi) { ks8851_remove_common(&spi->dev); - - return 0; } static const struct of_device_id ks8851_match_table[] = { diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 634ac7649c43..559ad94a44d0 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -975,7 +975,7 @@ static void enc28j60_hw_rx(struct net_device *ndev) /* update statistics */ ndev->stats.rx_packets++; ndev->stats.rx_bytes += len; - netif_rx_ni(skb); + netif_rx(skb); } } /* @@ -1612,15 +1612,13 @@ error_alloc: return ret; } -static int enc28j60_remove(struct spi_device *spi) +static void enc28j60_remove(struct spi_device *spi) { struct enc28j60_net *priv = spi_get_drvdata(spi); unregister_netdev(priv->netdev); free_irq(spi->irq, priv); free_netdev(priv->netdev); - - return 0; } static const struct of_device_id enc28j60_dt_ids[] = { diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index b90efc80fb59..dc1840cb5b10 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -1093,7 +1093,7 @@ error_out: return ret; } -static int encx24j600_spi_remove(struct spi_device *spi) +static void encx24j600_spi_remove(struct spi_device *spi) { struct encx24j600_priv *priv = dev_get_drvdata(&spi->dev); @@ -1101,8 +1101,6 @@ static int encx24j600_spi_remove(struct spi_device *spi) kthread_stop(priv->kworker_task); free_netdev(priv->ndev); - - return 0; } static const struct spi_device_id encx24j600_spi_id_table[] = { diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 4e877d9859bf..ad310c95bf5c 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -600,7 +600,7 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args) skb->offload_fwd_mark = 0; } - netif_rx_ni(skb); + netif_rx(skb); dev->stats.rx_bytes += len; dev->stats.rx_packets++; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c index 85099a51d4c7..e3555c94294d 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c @@ -419,6 +419,9 @@ static int lan966x_netdevice_event(struct notifier_block *nb, return notifier_from_errno(ret); } +/* We don't offload uppers such as LAG as bridge ports, so every device except + * the bridge itself is foreign. + */ static bool lan966x_foreign_dev_check(const struct net_device *dev, const struct net_device *foreign_dev) { @@ -426,10 +429,10 @@ static bool lan966x_foreign_dev_check(const struct net_device *dev, struct lan966x *lan966x = port->lan966x; if (netif_is_bridge_master(foreign_dev)) - if (lan966x->bridge != foreign_dev) - return true; + if (lan966x->bridge == foreign_dev) + return false; - return false; + return true; } static int lan966x_switchdev_event(struct notifier_block *nb, @@ -449,8 +452,7 @@ static int lan966x_switchdev_event(struct notifier_block *nb, err = switchdev_handle_fdb_event_to_device(dev, event, ptr, lan966x_netdevice_check, lan966x_foreign_dev_check, - lan966x_handle_fdb, - NULL); + lan966x_handle_fdb); return notifier_from_errno(err); } diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile index c271e86ee292..e9dd348a6ebb 100644 --- a/drivers/net/ethernet/microchip/sparx5/Makefile +++ b/drivers/net/ethernet/microchip/sparx5/Makefile @@ -7,4 +7,5 @@ obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o sparx5-switch-objs := sparx5_main.o sparx5_packet.o \ sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o sparx5_vlan.o \ - sparx5_switchdev.o sparx5_calendar.o sparx5_ethtool.o sparx5_fdma.o + sparx5_switchdev.o sparx5_calendar.o sparx5_ethtool.o sparx5_fdma.o \ + sparx5_ptp.o diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c index 10b866e9f726..6b0febcb7fa9 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c @@ -1183,6 +1183,39 @@ static void sparx5_config_port_stats(struct sparx5 *sparx5, int portno) sparx5, ANA_AC_PORT_STAT_CFG(portno, SPX5_PORT_POLICER_DROPS)); } +static int sparx5_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct sparx5_port *port = netdev_priv(dev); + struct sparx5 *sparx5 = port->sparx5; + struct sparx5_phc *phc; + + if (!sparx5->ptp) + return ethtool_op_get_ts_info(dev, info); + + phc = &sparx5->phc[SPARX5_PHC_PORT]; + + info->phc_index = phc->clock ? ptp_clock_index(phc->clock) : -1; + if (info->phc_index == -1) { + info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + return 0; + } + info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) | + BIT(HWTSTAMP_TX_ONESTEP_SYNC); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} + const struct ethtool_ops sparx5_ethtool_ops = { .get_sset_count = sparx5_get_sset_count, .get_strings = sparx5_get_sset_strings, @@ -1194,6 +1227,7 @@ const struct ethtool_ops sparx5_ethtool_ops = { .get_eth_mac_stats = sparx5_get_eth_mac_stats, .get_eth_ctrl_stats = sparx5_get_eth_mac_ctrl_stats, .get_rmon_stats = sparx5_get_eth_rmon_stats, + .get_ts_info = sparx5_get_ts_info, }; int sparx_stats_init(struct sparx5 *sparx5) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c index 7436f62fa152..2dc87584023a 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c @@ -240,6 +240,8 @@ static bool sparx5_fdma_rx_get_frame(struct sparx5 *sparx5, struct sparx5_rx *rx skb_pull(skb, IFH_LEN * sizeof(u32)); if (likely(!(skb->dev->features & NETIF_F_RXFCS))) skb_trim(skb, skb->len - ETH_FCS_LEN); + + sparx5_ptp_rxtstamp(sparx5, skb, fi.timestamp); skb->protocol = eth_type_trans(skb, skb->dev); /* Everything we see on an interface that is in the HW bridge * has already been forwarded diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 394de85d360d..5f7c7030ce03 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -190,6 +190,7 @@ static const struct sparx5_main_io_resource sparx5_main_iomap[] = { { TARGET_ASM, 0x10600000, 1 }, /* 0x610600000 */ { TARGET_GCB, 0x11010000, 2 }, /* 0x611010000 */ { TARGET_QS, 0x11030000, 2 }, /* 0x611030000 */ + { TARGET_PTP, 0x11040000, 2 }, /* 0x611040000 */ { TARGET_ANA_ACL, 0x11050000, 2 }, /* 0x611050000 */ { TARGET_LRN, 0x11060000, 2 }, /* 0x611060000 */ { TARGET_VCAP_SUPER, 0x11080000, 2 }, /* 0x611080000 */ @@ -692,6 +693,18 @@ static int sparx5_start(struct sparx5 *sparx5) } else { sparx5->xtr_irq = -ENXIO; } + + if (sparx5->ptp_irq >= 0) { + err = devm_request_threaded_irq(sparx5->dev, sparx5->ptp_irq, + NULL, sparx5_ptp_irq_handler, + IRQF_ONESHOT, "sparx5-ptp", + sparx5); + if (err) + sparx5->ptp_irq = -ENXIO; + + sparx5->ptp = 1; + } + return err; } @@ -808,6 +821,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) sparx5->fdma_irq = platform_get_irq_byname(sparx5->pdev, "fdma"); sparx5->xtr_irq = platform_get_irq_byname(sparx5->pdev, "xtr"); + sparx5->ptp_irq = platform_get_irq_byname(sparx5->pdev, "ptp"); /* Read chip ID to check CPU interface */ sparx5->chip_id = spx5_rd(sparx5, GCB_CHIP_ID); @@ -846,6 +860,12 @@ static int mchp_sparx5_probe(struct platform_device *pdev) dev_err(sparx5->dev, "Start failed\n"); goto cleanup_ports; } + + err = sparx5_ptp_init(sparx5); + if (err) { + dev_err(sparx5->dev, "PTP failed\n"); + goto cleanup_ports; + } goto cleanup_config; cleanup_ports: @@ -869,6 +889,7 @@ static int mchp_sparx5_remove(struct platform_device *pdev) disable_irq(sparx5->fdma_irq); sparx5->fdma_irq = -ENXIO; } + sparx5_ptp_deinit(sparx5); sparx5_fdma_stop(sparx5); sparx5_cleanup_ports(sparx5); /* Unregister netdevs */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index a1acc9b461f2..33892dfc3b2f 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -14,8 +14,12 @@ #include <linux/if_vlan.h> #include <linux/bitmap.h> #include <linux/phylink.h> +#include <linux/net_tstamp.h> +#include <linux/ptp_clock_kernel.h> #include <linux/hrtimer.h> +#include "sparx5_main_regs.h" + /* Target chip type */ enum spx5_target_chiptype { SPX5_TARGET_CT_7546 = 0x7546, /* SparX-5-64 Enterprise */ @@ -77,6 +81,18 @@ enum sparx5_vlan_port_type { #define FDMA_RX_DCB_MAX_DBS 15 #define FDMA_TX_DCB_MAX_DBS 1 +#define SPARX5_PHC_COUNT 3 +#define SPARX5_PHC_PORT 0 + +#define IFH_REW_OP_NOOP 0x0 +#define IFH_REW_OP_ONE_STEP_PTP 0x3 +#define IFH_REW_OP_TWO_STEP_PTP 0x4 + +#define IFH_PDU_TYPE_NONE 0x0 +#define IFH_PDU_TYPE_PTP 0x5 +#define IFH_PDU_TYPE_IPV4_UDP_PTP 0x6 +#define IFH_PDU_TYPE_IPV6_UDP_PTP 0x7 + struct sparx5; struct sparx5_db_hw { @@ -165,9 +181,12 @@ struct sparx5_port { enum sparx5_port_max_tags max_vlan_tags; enum sparx5_vlan_port_type vlan_type; u32 custom_etype; - u32 ifh[IFH_LEN]; bool vlan_aware; struct hrtimer inj_timer; + /* ptp */ + u8 ptp_cmd; + u16 ts_id; + struct sk_buff_head tx_skbs; }; enum sparx5_core_clockfreq { @@ -177,6 +196,26 @@ enum sparx5_core_clockfreq { SPX5_CORE_CLOCK_625MHZ, /* 625MHZ core clock frequency */ }; +struct sparx5_phc { + struct ptp_clock *clock; + struct ptp_clock_info info; + struct hwtstamp_config hwtstamp_config; + struct sparx5 *sparx5; + u8 index; +}; + +struct sparx5_skb_cb { + u8 rew_op; + u8 pdu_type; + u8 pdu_w16_offset; + u16 ts_id; + unsigned long jiffies; +}; + +#define SPARX5_PTP_TIMEOUT msecs_to_jiffies(10) +#define SPARX5_SKB_CB(skb) \ + ((struct sparx5_skb_cb *)((skb)->cb)) + struct sparx5 { struct platform_device *pdev; struct device *dev; @@ -224,6 +263,14 @@ struct sparx5 { int fdma_irq; struct sparx5_rx rx; struct sparx5_tx tx; + /* PTP */ + bool ptp; + struct sparx5_phc phc[SPARX5_PHC_COUNT]; + spinlock_t ptp_clock_lock; /* lock for phc */ + spinlock_t ptp_ts_id_lock; /* lock for ts_id */ + struct mutex ptp_lock; /* lock for ptp interface state */ + u16 ptp_skbs; + int ptp_irq; }; /* sparx5_switchdev.c */ @@ -233,6 +280,7 @@ void sparx5_unregister_notifier_blocks(struct sparx5 *sparx5); /* sparx5_packet.c */ struct frame_info { int src_port; + u32 timestamp; }; void sparx5_xtr_flush(struct sparx5 *sparx5, u8 grp); @@ -286,12 +334,30 @@ void sparx5_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *stats int sparx_stats_init(struct sparx5 *sparx5); /* sparx5_netdev.c */ +void sparx5_set_port_ifh_timestamp(void *ifh_hdr, u64 timestamp); +void sparx5_set_port_ifh_rew_op(void *ifh_hdr, u32 rew_op); +void sparx5_set_port_ifh_pdu_type(void *ifh_hdr, u32 pdu_type); +void sparx5_set_port_ifh_pdu_w16_offset(void *ifh_hdr, u32 pdu_w16_offset); +void sparx5_set_port_ifh(void *ifh_hdr, u16 portno); bool sparx5_netdevice_check(const struct net_device *dev); struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno); int sparx5_register_netdevs(struct sparx5 *sparx5); void sparx5_destroy_netdevs(struct sparx5 *sparx5); void sparx5_unregister_netdevs(struct sparx5 *sparx5); +/* sparx5_ptp.c */ +int sparx5_ptp_init(struct sparx5 *sparx5); +void sparx5_ptp_deinit(struct sparx5 *sparx5); +int sparx5_ptp_hwtstamp_set(struct sparx5_port *port, struct ifreq *ifr); +int sparx5_ptp_hwtstamp_get(struct sparx5_port *port, struct ifreq *ifr); +void sparx5_ptp_rxtstamp(struct sparx5 *sparx5, struct sk_buff *skb, + u64 timestamp); +int sparx5_ptp_txtstamp_request(struct sparx5_port *port, + struct sk_buff *skb); +void sparx5_ptp_txtstamp_release(struct sparx5_port *port, + struct sk_buff *skb); +irqreturn_t sparx5_ptp_irq_handler(int irq, void *args); + /* Clock period in picoseconds */ static inline u32 sparx5_clk_period(enum sparx5_core_clockfreq cclock) { diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h index 5ab2373a7178..c94de436b281 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h @@ -4,8 +4,8 @@ * Copyright (c) 2021 Microchip Technology Inc. */ -/* This file is autogenerated by cml-utils 2021-05-06 13:06:37 +0200. - * Commit ID: 9ae4ec441e25e4b9003f4e514df5cb12a36b84d3 +/* This file is autogenerated by cml-utils 2022-02-26 14:15:01 +0100. + * Commit ID: 98bdd3d171cc2a1afd30d241d41a4281d471a48c (dirty) */ #ifndef _SPARX5_MAIN_REGS_H_ @@ -40,6 +40,7 @@ enum sparx5_target { TARGET_PCS25G_BR = 144, TARGET_PCS5G_BR = 160, TARGET_PORT_CONF = 173, + TARGET_PTP = 174, TARGET_QFWD = 175, TARGET_QRES = 176, TARGET_QS = 177, @@ -4156,6 +4157,249 @@ enum sparx5_target { #define PORT_CONF_USGMII_CFG_QUAD_MODE_GET(x)\ FIELD_GET(PORT_CONF_USGMII_CFG_QUAD_MODE, x) +/* DEVCPU_PTP:PTP_CFG:PTP_PIN_INTR */ +#define PTP_PTP_PIN_INTR __REG(TARGET_PTP, 0, 1, 320, 0, 1, 16, 0, 0, 1, 4) + +#define PTP_PTP_PIN_INTR_INTR_PTP GENMASK(4, 0) +#define PTP_PTP_PIN_INTR_INTR_PTP_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_INTR_INTR_PTP, x) +#define PTP_PTP_PIN_INTR_INTR_PTP_GET(x)\ + FIELD_GET(PTP_PTP_PIN_INTR_INTR_PTP, x) + +/* DEVCPU_PTP:PTP_CFG:PTP_PIN_INTR_ENA */ +#define PTP_PTP_PIN_INTR_ENA __REG(TARGET_PTP, 0, 1, 320, 0, 1, 16, 4, 0, 1, 4) + +#define PTP_PTP_PIN_INTR_ENA_INTR_PTP_ENA GENMASK(4, 0) +#define PTP_PTP_PIN_INTR_ENA_INTR_PTP_ENA_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_INTR_ENA_INTR_PTP_ENA, x) +#define PTP_PTP_PIN_INTR_ENA_INTR_PTP_ENA_GET(x)\ + FIELD_GET(PTP_PTP_PIN_INTR_ENA_INTR_PTP_ENA, x) + +/* DEVCPU_PTP:PTP_CFG:PTP_INTR_IDENT */ +#define PTP_PTP_INTR_IDENT __REG(TARGET_PTP, 0, 1, 320, 0, 1, 16, 8, 0, 1, 4) + +#define PTP_PTP_INTR_IDENT_INTR_PTP_IDENT GENMASK(4, 0) +#define PTP_PTP_INTR_IDENT_INTR_PTP_IDENT_SET(x)\ + FIELD_PREP(PTP_PTP_INTR_IDENT_INTR_PTP_IDENT, x) +#define PTP_PTP_INTR_IDENT_INTR_PTP_IDENT_GET(x)\ + FIELD_GET(PTP_PTP_INTR_IDENT_INTR_PTP_IDENT, x) + +/* DEVCPU_PTP:PTP_CFG:PTP_DOM_CFG */ +#define PTP_PTP_DOM_CFG __REG(TARGET_PTP, 0, 1, 320, 0, 1, 16, 12, 0, 1, 4) + +#define PTP_PTP_DOM_CFG_PTP_ENA GENMASK(11, 9) +#define PTP_PTP_DOM_CFG_PTP_ENA_SET(x)\ + FIELD_PREP(PTP_PTP_DOM_CFG_PTP_ENA, x) +#define PTP_PTP_DOM_CFG_PTP_ENA_GET(x)\ + FIELD_GET(PTP_PTP_DOM_CFG_PTP_ENA, x) + +#define PTP_PTP_DOM_CFG_PTP_HOLD GENMASK(8, 6) +#define PTP_PTP_DOM_CFG_PTP_HOLD_SET(x)\ + FIELD_PREP(PTP_PTP_DOM_CFG_PTP_HOLD, x) +#define PTP_PTP_DOM_CFG_PTP_HOLD_GET(x)\ + FIELD_GET(PTP_PTP_DOM_CFG_PTP_HOLD, x) + +#define PTP_PTP_DOM_CFG_PTP_TOD_FREEZE GENMASK(5, 3) +#define PTP_PTP_DOM_CFG_PTP_TOD_FREEZE_SET(x)\ + FIELD_PREP(PTP_PTP_DOM_CFG_PTP_TOD_FREEZE, x) +#define PTP_PTP_DOM_CFG_PTP_TOD_FREEZE_GET(x)\ + FIELD_GET(PTP_PTP_DOM_CFG_PTP_TOD_FREEZE, x) + +#define PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS GENMASK(2, 0) +#define PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_SET(x)\ + FIELD_PREP(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, x) +#define PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_GET(x)\ + FIELD_GET(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, x) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:CLK_PER_CFG */ +#define PTP_CLK_PER_CFG(g, r) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 0, r, 2, 4) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:PTP_CUR_NSEC */ +#define PTP_PTP_CUR_NSEC(g) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 8, 0, 1, 4) + +#define PTP_PTP_CUR_NSEC_PTP_CUR_NSEC GENMASK(29, 0) +#define PTP_PTP_CUR_NSEC_PTP_CUR_NSEC_SET(x)\ + FIELD_PREP(PTP_PTP_CUR_NSEC_PTP_CUR_NSEC, x) +#define PTP_PTP_CUR_NSEC_PTP_CUR_NSEC_GET(x)\ + FIELD_GET(PTP_PTP_CUR_NSEC_PTP_CUR_NSEC, x) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:PTP_CUR_NSEC_FRAC */ +#define PTP_PTP_CUR_NSEC_FRAC(g) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 12, 0, 1, 4) + +#define PTP_PTP_CUR_NSEC_FRAC_PTP_CUR_NSEC_FRAC GENMASK(7, 0) +#define PTP_PTP_CUR_NSEC_FRAC_PTP_CUR_NSEC_FRAC_SET(x)\ + FIELD_PREP(PTP_PTP_CUR_NSEC_FRAC_PTP_CUR_NSEC_FRAC, x) +#define PTP_PTP_CUR_NSEC_FRAC_PTP_CUR_NSEC_FRAC_GET(x)\ + FIELD_GET(PTP_PTP_CUR_NSEC_FRAC_PTP_CUR_NSEC_FRAC, x) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:PTP_CUR_SEC_LSB */ +#define PTP_PTP_CUR_SEC_LSB(g) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 16, 0, 1, 4) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:PTP_CUR_SEC_MSB */ +#define PTP_PTP_CUR_SEC_MSB(g) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 20, 0, 1, 4) + +#define PTP_PTP_CUR_SEC_MSB_PTP_CUR_SEC_MSB GENMASK(15, 0) +#define PTP_PTP_CUR_SEC_MSB_PTP_CUR_SEC_MSB_SET(x)\ + FIELD_PREP(PTP_PTP_CUR_SEC_MSB_PTP_CUR_SEC_MSB, x) +#define PTP_PTP_CUR_SEC_MSB_PTP_CUR_SEC_MSB_GET(x)\ + FIELD_GET(PTP_PTP_CUR_SEC_MSB_PTP_CUR_SEC_MSB, x) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:NTP_CUR_NSEC */ +#define PTP_NTP_CUR_NSEC(g) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 24, 0, 1, 4) + +/* DEVCPU_PTP:PTP_PINS:PTP_PIN_CFG */ +#define PTP_PTP_PIN_CFG(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 0, 0, 1, 4) + +#define PTP_PTP_PIN_CFG_PTP_PIN_ACTION GENMASK(28, 26) +#define PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_ACTION, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_ACTION_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_ACTION, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_SYNC GENMASK(25, 24) +#define PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_SYNC, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_SYNC_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_SYNC, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_INV_POL BIT(23) +#define PTP_PTP_PIN_CFG_PTP_PIN_INV_POL_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_INV_POL, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_INV_POL_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_INV_POL, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_SELECT GENMASK(22, 21) +#define PTP_PTP_PIN_CFG_PTP_PIN_SELECT_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_SELECT, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_SELECT_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_SELECT, x) + +#define PTP_PTP_PIN_CFG_PTP_CLK_SELECT GENMASK(20, 18) +#define PTP_PTP_PIN_CFG_PTP_CLK_SELECT_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_CLK_SELECT, x) +#define PTP_PTP_PIN_CFG_PTP_CLK_SELECT_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_CLK_SELECT, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_DOM GENMASK(17, 16) +#define PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_DOM, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_DOM_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_DOM, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_OPT GENMASK(15, 14) +#define PTP_PTP_PIN_CFG_PTP_PIN_OPT_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_OPT, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_OPT_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_OPT, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_EMBEDDED_CLK BIT(13) +#define PTP_PTP_PIN_CFG_PTP_PIN_EMBEDDED_CLK_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_EMBEDDED_CLK, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_EMBEDDED_CLK_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_EMBEDDED_CLK, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_OUTP_OFS GENMASK(12, 0) +#define PTP_PTP_PIN_CFG_PTP_PIN_OUTP_OFS_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_OUTP_OFS, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_OUTP_OFS_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_OUTP_OFS, x) + +/* DEVCPU_PTP:PTP_PINS:PTP_TOD_SEC_MSB */ +#define PTP_PTP_TOD_SEC_MSB(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 4, 0, 1, 4) + +#define PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB GENMASK(15, 0) +#define PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB_SET(x)\ + FIELD_PREP(PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB, x) +#define PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB_GET(x)\ + FIELD_GET(PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB, x) + +/* DEVCPU_PTP:PTP_PINS:PTP_TOD_SEC_LSB */ +#define PTP_PTP_TOD_SEC_LSB(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 8, 0, 1, 4) + +/* DEVCPU_PTP:PTP_PINS:PTP_TOD_NSEC */ +#define PTP_PTP_TOD_NSEC(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 12, 0, 1, 4) + +#define PTP_PTP_TOD_NSEC_PTP_TOD_NSEC GENMASK(29, 0) +#define PTP_PTP_TOD_NSEC_PTP_TOD_NSEC_SET(x)\ + FIELD_PREP(PTP_PTP_TOD_NSEC_PTP_TOD_NSEC, x) +#define PTP_PTP_TOD_NSEC_PTP_TOD_NSEC_GET(x)\ + FIELD_GET(PTP_PTP_TOD_NSEC_PTP_TOD_NSEC, x) + +/* DEVCPU_PTP:PTP_PINS:PTP_TOD_NSEC_FRAC */ +#define PTP_PTP_TOD_NSEC_FRAC(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 16, 0, 1, 4) + +#define PTP_PTP_TOD_NSEC_FRAC_PTP_TOD_NSEC_FRAC GENMASK(7, 0) +#define PTP_PTP_TOD_NSEC_FRAC_PTP_TOD_NSEC_FRAC_SET(x)\ + FIELD_PREP(PTP_PTP_TOD_NSEC_FRAC_PTP_TOD_NSEC_FRAC, x) +#define PTP_PTP_TOD_NSEC_FRAC_PTP_TOD_NSEC_FRAC_GET(x)\ + FIELD_GET(PTP_PTP_TOD_NSEC_FRAC_PTP_TOD_NSEC_FRAC, x) + +/* DEVCPU_PTP:PTP_PINS:NTP_NSEC */ +#define PTP_NTP_NSEC(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 20, 0, 1, 4) + +/* DEVCPU_PTP:PTP_PINS:PIN_WF_HIGH_PERIOD */ +#define PTP_PIN_WF_HIGH_PERIOD(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 24, 0, 1, 4) + +#define PTP_PIN_WF_HIGH_PERIOD_PIN_WFH GENMASK(29, 0) +#define PTP_PIN_WF_HIGH_PERIOD_PIN_WFH_SET(x)\ + FIELD_PREP(PTP_PIN_WF_HIGH_PERIOD_PIN_WFH, x) +#define PTP_PIN_WF_HIGH_PERIOD_PIN_WFH_GET(x)\ + FIELD_GET(PTP_PIN_WF_HIGH_PERIOD_PIN_WFH, x) + +/* DEVCPU_PTP:PTP_PINS:PIN_WF_LOW_PERIOD */ +#define PTP_PIN_WF_LOW_PERIOD(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 28, 0, 1, 4) + +#define PTP_PIN_WF_LOW_PERIOD_PIN_WFL GENMASK(29, 0) +#define PTP_PIN_WF_LOW_PERIOD_PIN_WFL_SET(x)\ + FIELD_PREP(PTP_PIN_WF_LOW_PERIOD_PIN_WFL, x) +#define PTP_PIN_WF_LOW_PERIOD_PIN_WFL_GET(x)\ + FIELD_GET(PTP_PIN_WF_LOW_PERIOD_PIN_WFL, x) + +/* DEVCPU_PTP:PTP_PINS:PIN_IOBOUNCH_DELAY */ +#define PTP_PIN_IOBOUNCH_DELAY(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 32, 0, 1, 4) + +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_VAL GENMASK(18, 3) +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_VAL_SET(x)\ + FIELD_PREP(PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_VAL, x) +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_VAL_GET(x)\ + FIELD_GET(PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_VAL, x) + +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_CFG GENMASK(2, 0) +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_CFG_SET(x)\ + FIELD_PREP(PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_CFG, x) +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_CFG_GET(x)\ + FIELD_GET(PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_CFG, x) + +/* DEVCPU_PTP:PHASE_DETECTOR_CTRL:PHAD_CTRL */ +#define PTP_PHAD_CTRL(g) __REG(TARGET_PTP, 0, 1, 420, g, 5, 8, 0, 0, 1, 4) + +#define PTP_PHAD_CTRL_PHAD_ENA BIT(7) +#define PTP_PHAD_CTRL_PHAD_ENA_SET(x)\ + FIELD_PREP(PTP_PHAD_CTRL_PHAD_ENA, x) +#define PTP_PHAD_CTRL_PHAD_ENA_GET(x)\ + FIELD_GET(PTP_PHAD_CTRL_PHAD_ENA, x) + +#define PTP_PHAD_CTRL_PHAD_FAILED BIT(6) +#define PTP_PHAD_CTRL_PHAD_FAILED_SET(x)\ + FIELD_PREP(PTP_PHAD_CTRL_PHAD_FAILED, x) +#define PTP_PHAD_CTRL_PHAD_FAILED_GET(x)\ + FIELD_GET(PTP_PHAD_CTRL_PHAD_FAILED, x) + +#define PTP_PHAD_CTRL_REDUCED_RES GENMASK(5, 3) +#define PTP_PHAD_CTRL_REDUCED_RES_SET(x)\ + FIELD_PREP(PTP_PHAD_CTRL_REDUCED_RES, x) +#define PTP_PHAD_CTRL_REDUCED_RES_GET(x)\ + FIELD_GET(PTP_PHAD_CTRL_REDUCED_RES, x) + +#define PTP_PHAD_CTRL_LOCK_ACC GENMASK(2, 0) +#define PTP_PHAD_CTRL_LOCK_ACC_SET(x)\ + FIELD_PREP(PTP_PHAD_CTRL_LOCK_ACC, x) +#define PTP_PHAD_CTRL_LOCK_ACC_GET(x)\ + FIELD_GET(PTP_PHAD_CTRL_LOCK_ACC, x) + +/* DEVCPU_PTP:PHASE_DETECTOR_CTRL:PHAD_CYC_STAT */ +#define PTP_PHAD_CYC_STAT(g) __REG(TARGET_PTP, 0, 1, 420, g, 5, 8, 4, 0, 1, 4) + /* QFWD:SYSTEM:SWITCH_PORT_MODE */ #define QFWD_SWITCH_PORT_MODE(r) __REG(TARGET_QFWD, 0, 1, 0, 0, 1, 340, 0, r, 70, 4) @@ -4528,6 +4772,93 @@ enum sparx5_target { #define REW_TAG_CTRL_TAG_DEI_CFG_GET(x)\ FIELD_GET(REW_TAG_CTRL_TAG_DEI_CFG, x) +/* REW:PTP_CTRL:PTP_TWOSTEP_CTRL */ +#define REW_PTP_TWOSTEP_CTRL __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 0, 0, 1, 4) + +#define REW_PTP_TWOSTEP_CTRL_PTP_OVWR_ENA BIT(12) +#define REW_PTP_TWOSTEP_CTRL_PTP_OVWR_ENA_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_PTP_OVWR_ENA, x) +#define REW_PTP_TWOSTEP_CTRL_PTP_OVWR_ENA_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_PTP_OVWR_ENA, x) + +#define REW_PTP_TWOSTEP_CTRL_PTP_NXT BIT(11) +#define REW_PTP_TWOSTEP_CTRL_PTP_NXT_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_PTP_NXT, x) +#define REW_PTP_TWOSTEP_CTRL_PTP_NXT_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_PTP_NXT, x) + +#define REW_PTP_TWOSTEP_CTRL_PTP_VLD BIT(10) +#define REW_PTP_TWOSTEP_CTRL_PTP_VLD_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_PTP_VLD, x) +#define REW_PTP_TWOSTEP_CTRL_PTP_VLD_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_PTP_VLD, x) + +#define REW_PTP_TWOSTEP_CTRL_STAMP_TX BIT(9) +#define REW_PTP_TWOSTEP_CTRL_STAMP_TX_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_STAMP_TX, x) +#define REW_PTP_TWOSTEP_CTRL_STAMP_TX_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_STAMP_TX, x) + +#define REW_PTP_TWOSTEP_CTRL_STAMP_PORT GENMASK(8, 1) +#define REW_PTP_TWOSTEP_CTRL_STAMP_PORT_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_STAMP_PORT, x) +#define REW_PTP_TWOSTEP_CTRL_STAMP_PORT_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_STAMP_PORT, x) + +#define REW_PTP_TWOSTEP_CTRL_PTP_OVFL BIT(0) +#define REW_PTP_TWOSTEP_CTRL_PTP_OVFL_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_PTP_OVFL, x) +#define REW_PTP_TWOSTEP_CTRL_PTP_OVFL_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_PTP_OVFL, x) + +/* REW:PTP_CTRL:PTP_TWOSTEP_STAMP */ +#define REW_PTP_TWOSTEP_STAMP __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 4, 0, 1, 4) + +#define REW_PTP_TWOSTEP_STAMP_STAMP_NSEC GENMASK(29, 0) +#define REW_PTP_TWOSTEP_STAMP_STAMP_NSEC_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_STAMP_STAMP_NSEC, x) +#define REW_PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_STAMP_STAMP_NSEC, x) + +/* REW:PTP_CTRL:PTP_TWOSTEP_STAMP_SUBNS */ +#define REW_PTP_TWOSTEP_STAMP_SUBNS __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 8, 0, 1, 4) + +#define REW_PTP_TWOSTEP_STAMP_SUBNS_STAMP_SUB_NSEC GENMASK(7, 0) +#define REW_PTP_TWOSTEP_STAMP_SUBNS_STAMP_SUB_NSEC_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_STAMP_SUBNS_STAMP_SUB_NSEC, x) +#define REW_PTP_TWOSTEP_STAMP_SUBNS_STAMP_SUB_NSEC_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_STAMP_SUBNS_STAMP_SUB_NSEC, x) + +/* REW:PTP_CTRL:PTP_RSRV_NOT_ZERO */ +#define REW_PTP_RSRV_NOT_ZERO __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 12, 0, 1, 4) + +/* REW:PTP_CTRL:PTP_RSRV_NOT_ZERO1 */ +#define REW_PTP_RSRV_NOT_ZERO1 __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 16, 0, 1, 4) + +/* REW:PTP_CTRL:PTP_RSRV_NOT_ZERO2 */ +#define REW_PTP_RSRV_NOT_ZERO2 __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 20, 0, 1, 4) + +#define REW_PTP_RSRV_NOT_ZERO2_PTP_RSRV_NOT_ZERO2 GENMASK(5, 0) +#define REW_PTP_RSRV_NOT_ZERO2_PTP_RSRV_NOT_ZERO2_SET(x)\ + FIELD_PREP(REW_PTP_RSRV_NOT_ZERO2_PTP_RSRV_NOT_ZERO2, x) +#define REW_PTP_RSRV_NOT_ZERO2_PTP_RSRV_NOT_ZERO2_GET(x)\ + FIELD_GET(REW_PTP_RSRV_NOT_ZERO2_PTP_RSRV_NOT_ZERO2, x) + +/* REW:PTP_CTRL:PTP_GEN_STAMP_FMT */ +#define REW_PTP_GEN_STAMP_FMT(r) __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 24, r, 4, 4) + +#define REW_PTP_GEN_STAMP_FMT_RT_OFS GENMASK(6, 2) +#define REW_PTP_GEN_STAMP_FMT_RT_OFS_SET(x)\ + FIELD_PREP(REW_PTP_GEN_STAMP_FMT_RT_OFS, x) +#define REW_PTP_GEN_STAMP_FMT_RT_OFS_GET(x)\ + FIELD_GET(REW_PTP_GEN_STAMP_FMT_RT_OFS, x) + +#define REW_PTP_GEN_STAMP_FMT_RT_FMT GENMASK(1, 0) +#define REW_PTP_GEN_STAMP_FMT_RT_FMT_SET(x)\ + FIELD_PREP(REW_PTP_GEN_STAMP_FMT_RT_FMT, x) +#define REW_PTP_GEN_STAMP_FMT_RT_FMT_GET(x)\ + FIELD_GET(REW_PTP_GEN_STAMP_FMT_RT_FMT, x) + /* REW:RAM_CTRL:RAM_INIT */ #define REW_RAM_INIT __REG(TARGET_REW, 0, 1, 378696, 0, 1, 4, 0, 0, 1, 4) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index e042f117dc7a..af4d3e1f1a6d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -54,7 +54,7 @@ static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) ifh_hdr[byte - 5] |= (u8)((encode & 0xFF0000000000) >> 40); } -static void sparx5_set_port_ifh(void *ifh_hdr, u16 portno) +void sparx5_set_port_ifh(void *ifh_hdr, u16 portno) { /* VSTAX.RSV = 1. MSBit must be 1 */ ifh_encode_bitfield(ifh_hdr, 1, VSTAX + 79, 1); @@ -74,6 +74,26 @@ static void sparx5_set_port_ifh(void *ifh_hdr, u16 portno) ifh_encode_bitfield(ifh_hdr, 1, 67, 1); } +void sparx5_set_port_ifh_rew_op(void *ifh_hdr, u32 rew_op) +{ + ifh_encode_bitfield(ifh_hdr, rew_op, VSTAX + 32, 10); +} + +void sparx5_set_port_ifh_pdu_type(void *ifh_hdr, u32 pdu_type) +{ + ifh_encode_bitfield(ifh_hdr, pdu_type, 191, 4); +} + +void sparx5_set_port_ifh_pdu_w16_offset(void *ifh_hdr, u32 pdu_w16_offset) +{ + ifh_encode_bitfield(ifh_hdr, pdu_w16_offset, 195, 6); +} + +void sparx5_set_port_ifh_timestamp(void *ifh_hdr, u64 timestamp) +{ + ifh_encode_bitfield(ifh_hdr, timestamp, 232, 40); +} + static int sparx5_port_open(struct net_device *ndev) { struct sparx5_port *port = netdev_priv(ndev); @@ -179,6 +199,24 @@ static int sparx5_get_port_parent_id(struct net_device *dev, return 0; } +static int sparx5_port_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + struct sparx5_port *sparx5_port = netdev_priv(dev); + struct sparx5 *sparx5 = sparx5_port->sparx5; + + if (!phy_has_hwtstamp(dev->phydev) && sparx5->ptp) { + switch (cmd) { + case SIOCSHWTSTAMP: + return sparx5_ptp_hwtstamp_set(sparx5_port, ifr); + case SIOCGHWTSTAMP: + return sparx5_ptp_hwtstamp_get(sparx5_port, ifr); + } + } + + return phy_mii_ioctl(dev->phydev, ifr, cmd); +} + static const struct net_device_ops sparx5_port_netdev_ops = { .ndo_open = sparx5_port_open, .ndo_stop = sparx5_port_stop, @@ -189,6 +227,7 @@ static const struct net_device_ops sparx5_port_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_get_stats64 = sparx5_get_stats64, .ndo_get_port_parent_id = sparx5_get_port_parent_id, + .ndo_eth_ioctl = sparx5_port_ioctl, }; bool sparx5_netdevice_check(const struct net_device *dev) @@ -210,7 +249,6 @@ struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno) spx5_port->ndev = ndev; spx5_port->sparx5 = sparx5; spx5_port->portno = portno; - sparx5_set_port_ifh(spx5_port->ifh, portno); ndev->netdev_ops = &sparx5_port_netdev_ops; ndev->ethtool_ops = &sparx5_ethtool_ops; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 148d431fcde4..304f84aadc36 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -44,6 +44,12 @@ void sparx5_ifh_parse(u32 *ifh, struct frame_info *info) ((u32)xtr_hdr[30] << 0); fwd = (fwd >> 5); info->src_port = FIELD_GET(GENMASK(7, 1), fwd); + + info->timestamp = + ((u64)xtr_hdr[2] << 24) | + ((u64)xtr_hdr[3] << 16) | + ((u64)xtr_hdr[4] << 8) | + ((u64)xtr_hdr[5] << 0); } static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap) @@ -144,6 +150,7 @@ static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap) /* Finish up skb */ skb_put(skb, byte_cnt - ETH_FCS_LEN); eth_skb_pad(skb); + sparx5_ptp_rxtstamp(sparx5, skb, fi.timestamp); skb->protocol = eth_type_trans(skb, netdev); netdev->stats.rx_bytes += skb->len; netdev->stats.rx_packets++; @@ -218,20 +225,44 @@ int sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = &dev->stats; struct sparx5_port *port = netdev_priv(dev); struct sparx5 *sparx5 = port->sparx5; + u32 ifh[IFH_LEN]; int ret; + memset(ifh, 0, IFH_LEN * 4); + sparx5_set_port_ifh(ifh, port->portno); + + if (sparx5->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { + ret = sparx5_ptp_txtstamp_request(port, skb); + if (ret) + return ret; + + sparx5_set_port_ifh_rew_op(ifh, SPARX5_SKB_CB(skb)->rew_op); + sparx5_set_port_ifh_pdu_type(ifh, SPARX5_SKB_CB(skb)->pdu_type); + sparx5_set_port_ifh_pdu_w16_offset(ifh, SPARX5_SKB_CB(skb)->pdu_w16_offset); + sparx5_set_port_ifh_timestamp(ifh, SPARX5_SKB_CB(skb)->ts_id); + } + + skb_tx_timestamp(skb); if (sparx5->fdma_irq > 0) - ret = sparx5_fdma_xmit(sparx5, port->ifh, skb); + ret = sparx5_fdma_xmit(sparx5, ifh, skb); else - ret = sparx5_inject(sparx5, port->ifh, skb, dev); + ret = sparx5_inject(sparx5, ifh, skb, dev); if (ret == NETDEV_TX_OK) { stats->tx_bytes += skb->len; stats->tx_packets++; - skb_tx_timestamp(skb); + + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + return ret; + dev_kfree_skb_any(skb); } else { stats->tx_dropped++; + + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + sparx5_ptp_txtstamp_release(port, skb); } return ret; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c new file mode 100644 index 000000000000..fa377f6e7e08 --- /dev/null +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c @@ -0,0 +1,685 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Microchip Sparx5 Switch driver + * + * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries. + * + * The Sparx5 Chip Register Model can be browsed at this location: + * https://github.com/microchip-ung/sparx-5_reginfo + */ +#include <linux/ptp_classify.h> + +#include "sparx5_main_regs.h" +#include "sparx5_main.h" + +#define SPARX5_MAX_PTP_ID 512 + +#define TOD_ACC_PIN 0x4 + +enum { + PTP_PIN_ACTION_IDLE = 0, + PTP_PIN_ACTION_LOAD, + PTP_PIN_ACTION_SAVE, + PTP_PIN_ACTION_CLOCK, + PTP_PIN_ACTION_DELTA, + PTP_PIN_ACTION_TOD +}; + +static u64 sparx5_ptp_get_1ppm(struct sparx5 *sparx5) +{ + /* Represents 1ppm adjustment in 2^59 format with 1.59687500000(625) + * 1.99609375000(500), 3.99218750000(250) as reference + * The value is calculated as following: + * (1/1000000)/((2^-59)/X) + */ + + u64 res; + + switch (sparx5->coreclock) { + case SPX5_CORE_CLOCK_250MHZ: + res = 2301339409586; + break; + case SPX5_CORE_CLOCK_500MHZ: + res = 1150669704793; + break; + case SPX5_CORE_CLOCK_625MHZ: + res = 920535763834; + break; + default: + WARN_ON("Invalid core clock"); + break; + } + + return res; +} + +static u64 sparx5_ptp_get_nominal_value(struct sparx5 *sparx5) +{ + u64 res; + + switch (sparx5->coreclock) { + case SPX5_CORE_CLOCK_250MHZ: + res = 0x1FF0000000000000; + break; + case SPX5_CORE_CLOCK_500MHZ: + res = 0x0FF8000000000000; + break; + case SPX5_CORE_CLOCK_625MHZ: + res = 0x0CC6666666666666; + break; + default: + WARN_ON("Invalid core clock"); + break; + } + + return res; +} + +int sparx5_ptp_hwtstamp_set(struct sparx5_port *port, struct ifreq *ifr) +{ + struct sparx5 *sparx5 = port->sparx5; + struct hwtstamp_config cfg; + struct sparx5_phc *phc; + + /* For now don't allow to run ptp on ports that are part of a bridge, + * because in case of transparent clock the HW will still forward the + * frames, so there would be duplicate frames + */ + + if (test_bit(port->portno, sparx5->bridge_mask)) + return -EINVAL; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + switch (cfg.tx_type) { + case HWTSTAMP_TX_ON: + port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP; + break; + case HWTSTAMP_TX_ONESTEP_SYNC: + port->ptp_cmd = IFH_REW_OP_ONE_STEP_PTP; + break; + case HWTSTAMP_TX_OFF: + port->ptp_cmd = IFH_REW_OP_NOOP; + break; + default: + return -ERANGE; + } + + switch (cfg.rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + cfg.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + /* Commit back the result & save it */ + mutex_lock(&sparx5->ptp_lock); + phc = &sparx5->phc[SPARX5_PHC_PORT]; + memcpy(&phc->hwtstamp_config, &cfg, sizeof(cfg)); + mutex_unlock(&sparx5->ptp_lock); + + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; +} + +int sparx5_ptp_hwtstamp_get(struct sparx5_port *port, struct ifreq *ifr) +{ + struct sparx5 *sparx5 = port->sparx5; + struct sparx5_phc *phc; + + phc = &sparx5->phc[SPARX5_PHC_PORT]; + return copy_to_user(ifr->ifr_data, &phc->hwtstamp_config, + sizeof(phc->hwtstamp_config)) ? -EFAULT : 0; +} + +static void sparx5_ptp_classify(struct sparx5_port *port, struct sk_buff *skb, + u8 *rew_op, u8 *pdu_type, u8 *pdu_w16_offset) +{ + struct ptp_header *header; + u8 msgtype; + int type; + + if (port->ptp_cmd == IFH_REW_OP_NOOP) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + *pdu_w16_offset = 0; + return; + } + + type = ptp_classify_raw(skb); + if (type == PTP_CLASS_NONE) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + *pdu_w16_offset = 0; + return; + } + + header = ptp_parse_header(skb, type); + if (!header) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + *pdu_w16_offset = 0; + return; + } + + *pdu_w16_offset = 7; + if (type & PTP_CLASS_L2) + *pdu_type = IFH_PDU_TYPE_PTP; + if (type & PTP_CLASS_IPV4) + *pdu_type = IFH_PDU_TYPE_IPV4_UDP_PTP; + if (type & PTP_CLASS_IPV6) + *pdu_type = IFH_PDU_TYPE_IPV6_UDP_PTP; + + if (port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { + *rew_op = IFH_REW_OP_TWO_STEP_PTP; + return; + } + + /* If it is sync and run 1 step then set the correct operation, + * otherwise run as 2 step + */ + msgtype = ptp_get_msgtype(header, type); + if ((msgtype & 0xf) == 0) { + *rew_op = IFH_REW_OP_ONE_STEP_PTP; + return; + } + + *rew_op = IFH_REW_OP_TWO_STEP_PTP; +} + +static void sparx5_ptp_txtstamp_old_release(struct sparx5_port *port) +{ + struct sk_buff *skb, *skb_tmp; + unsigned long flags; + + spin_lock_irqsave(&port->tx_skbs.lock, flags); + skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { + if time_after(SPARX5_SKB_CB(skb)->jiffies + SPARX5_PTP_TIMEOUT, + jiffies) + break; + + __skb_unlink(skb, &port->tx_skbs); + dev_kfree_skb_any(skb); + } + spin_unlock_irqrestore(&port->tx_skbs.lock, flags); +} + +int sparx5_ptp_txtstamp_request(struct sparx5_port *port, + struct sk_buff *skb) +{ + struct sparx5 *sparx5 = port->sparx5; + u8 rew_op, pdu_type, pdu_w16_offset; + unsigned long flags; + + sparx5_ptp_classify(port, skb, &rew_op, &pdu_type, &pdu_w16_offset); + SPARX5_SKB_CB(skb)->rew_op = rew_op; + SPARX5_SKB_CB(skb)->pdu_type = pdu_type; + SPARX5_SKB_CB(skb)->pdu_w16_offset = pdu_w16_offset; + + if (rew_op != IFH_REW_OP_TWO_STEP_PTP) + return 0; + + sparx5_ptp_txtstamp_old_release(port); + + spin_lock_irqsave(&sparx5->ptp_ts_id_lock, flags); + if (sparx5->ptp_skbs == SPARX5_MAX_PTP_ID) { + spin_unlock_irqrestore(&sparx5->ptp_ts_id_lock, flags); + return -EBUSY; + } + + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + skb_queue_tail(&port->tx_skbs, skb); + SPARX5_SKB_CB(skb)->ts_id = port->ts_id; + SPARX5_SKB_CB(skb)->jiffies = jiffies; + + sparx5->ptp_skbs++; + port->ts_id++; + if (port->ts_id == SPARX5_MAX_PTP_ID) + port->ts_id = 0; + + spin_unlock_irqrestore(&sparx5->ptp_ts_id_lock, flags); + + return 0; +} + +void sparx5_ptp_txtstamp_release(struct sparx5_port *port, + struct sk_buff *skb) +{ + struct sparx5 *sparx5 = port->sparx5; + unsigned long flags; + + spin_lock_irqsave(&sparx5->ptp_ts_id_lock, flags); + port->ts_id--; + sparx5->ptp_skbs--; + skb_unlink(skb, &port->tx_skbs); + spin_unlock_irqrestore(&sparx5->ptp_ts_id_lock, flags); +} + +static void sparx5_get_hwtimestamp(struct sparx5 *sparx5, + struct timespec64 *ts, + u32 nsec) +{ + /* Read current PTP time to get seconds */ + unsigned long flags; + u32 curr_nsec; + + spin_lock_irqsave(&sparx5->ptp_clock_lock, flags); + + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(SPARX5_PHC_PORT) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + ts->tv_sec = spx5_rd(sparx5, PTP_PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + curr_nsec = spx5_rd(sparx5, PTP_PTP_TOD_NSEC(TOD_ACC_PIN)); + + ts->tv_nsec = nsec; + + /* Sec has incremented since the ts was registered */ + if (curr_nsec < nsec) + ts->tv_sec--; + + spin_unlock_irqrestore(&sparx5->ptp_clock_lock, flags); +} + +irqreturn_t sparx5_ptp_irq_handler(int irq, void *args) +{ + int budget = SPARX5_MAX_PTP_ID; + struct sparx5 *sparx5 = args; + + while (budget--) { + struct sk_buff *skb, *skb_tmp, *skb_match = NULL; + struct skb_shared_hwtstamps shhwtstamps; + struct sparx5_port *port; + struct timespec64 ts; + unsigned long flags; + u32 val, id, txport; + u32 delay; + + val = spx5_rd(sparx5, REW_PTP_TWOSTEP_CTRL); + + /* Check if a timestamp can be retrieved */ + if (!(val & REW_PTP_TWOSTEP_CTRL_PTP_VLD)) + break; + + WARN_ON(val & REW_PTP_TWOSTEP_CTRL_PTP_OVFL); + + if (!(val & REW_PTP_TWOSTEP_CTRL_STAMP_TX)) + continue; + + /* Retrieve the ts Tx port */ + txport = REW_PTP_TWOSTEP_CTRL_STAMP_PORT_GET(val); + + /* Retrieve its associated skb */ + port = sparx5->ports[txport]; + + /* Retrieve the delay */ + delay = spx5_rd(sparx5, REW_PTP_TWOSTEP_STAMP); + delay = REW_PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(delay); + + /* Get next timestamp from fifo, which needs to be the + * rx timestamp which represents the id of the frame + */ + spx5_rmw(REW_PTP_TWOSTEP_CTRL_PTP_NXT_SET(1), + REW_PTP_TWOSTEP_CTRL_PTP_NXT, + sparx5, REW_PTP_TWOSTEP_CTRL); + + val = spx5_rd(sparx5, REW_PTP_TWOSTEP_CTRL); + + /* Check if a timestamp can be retried */ + if (!(val & REW_PTP_TWOSTEP_CTRL_PTP_VLD)) + break; + + /* Read RX timestamping to get the ID */ + id = spx5_rd(sparx5, REW_PTP_TWOSTEP_STAMP); + id <<= 8; + id |= spx5_rd(sparx5, REW_PTP_TWOSTEP_STAMP_SUBNS); + + spin_lock_irqsave(&port->tx_skbs.lock, flags); + skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { + if (SPARX5_SKB_CB(skb)->ts_id != id) + continue; + + __skb_unlink(skb, &port->tx_skbs); + skb_match = skb; + break; + } + spin_unlock_irqrestore(&port->tx_skbs.lock, flags); + + /* Next ts */ + spx5_rmw(REW_PTP_TWOSTEP_CTRL_PTP_NXT_SET(1), + REW_PTP_TWOSTEP_CTRL_PTP_NXT, + sparx5, REW_PTP_TWOSTEP_CTRL); + + if (WARN_ON(!skb_match)) + continue; + + spin_lock(&sparx5->ptp_ts_id_lock); + sparx5->ptp_skbs--; + spin_unlock(&sparx5->ptp_ts_id_lock); + + /* Get the h/w timestamp */ + sparx5_get_hwtimestamp(sparx5, &ts, delay); + + /* Set the timestamp into the skb */ + shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); + skb_tstamp_tx(skb_match, &shhwtstamps); + + dev_kfree_skb_any(skb_match); + } + + return IRQ_HANDLED; +} + +static int sparx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct sparx5_phc *phc = container_of(ptp, struct sparx5_phc, info); + struct sparx5 *sparx5 = phc->sparx5; + unsigned long flags; + bool neg_adj = 0; + u64 tod_inc; + u64 ref; + + if (!scaled_ppm) + return 0; + + if (scaled_ppm < 0) { + neg_adj = 1; + scaled_ppm = -scaled_ppm; + } + + tod_inc = sparx5_ptp_get_nominal_value(sparx5); + + /* The multiplication is split in 2 separate additions because of + * overflow issues. If scaled_ppm with 16bit fractional part was bigger + * than 20ppm then we got overflow. + */ + ref = sparx5_ptp_get_1ppm(sparx5) * (scaled_ppm >> 16); + ref += (sparx5_ptp_get_1ppm(sparx5) * (0xffff & scaled_ppm)) >> 16; + tod_inc = neg_adj ? tod_inc - ref : tod_inc + ref; + + spin_lock_irqsave(&sparx5->ptp_clock_lock, flags); + + spx5_rmw(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_SET(1 << BIT(phc->index)), + PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, + sparx5, PTP_PTP_DOM_CFG); + + spx5_wr((u32)tod_inc & 0xFFFFFFFF, sparx5, + PTP_CLK_PER_CFG(phc->index, 0)); + spx5_wr((u32)(tod_inc >> 32), sparx5, + PTP_CLK_PER_CFG(phc->index, 1)); + + spx5_rmw(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_SET(0), + PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, sparx5, + PTP_PTP_DOM_CFG); + + spin_unlock_irqrestore(&sparx5->ptp_clock_lock, flags); + + return 0; +} + +static int sparx5_ptp_settime64(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct sparx5_phc *phc = container_of(ptp, struct sparx5_phc, info); + struct sparx5 *sparx5 = phc->sparx5; + unsigned long flags; + + spin_lock_irqsave(&sparx5->ptp_clock_lock, flags); + + /* Must be in IDLE mode before the time can be loaded */ + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(phc->index) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + /* Set new value */ + spx5_wr(PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB_SET(upper_32_bits(ts->tv_sec)), + sparx5, PTP_PTP_TOD_SEC_MSB(TOD_ACC_PIN)); + spx5_wr(lower_32_bits(ts->tv_sec), + sparx5, PTP_PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + spx5_wr(ts->tv_nsec, sparx5, PTP_PTP_TOD_NSEC(TOD_ACC_PIN)); + + /* Apply new values */ + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_LOAD) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(phc->index) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + spin_unlock_irqrestore(&sparx5->ptp_clock_lock, flags); + + return 0; +} + +static int sparx5_ptp_gettime64(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct sparx5_phc *phc = container_of(ptp, struct sparx5_phc, info); + struct sparx5 *sparx5 = phc->sparx5; + unsigned long flags; + time64_t s; + s64 ns; + + spin_lock_irqsave(&sparx5->ptp_clock_lock, flags); + + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(phc->index) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + s = spx5_rd(sparx5, PTP_PTP_TOD_SEC_MSB(TOD_ACC_PIN)); + s <<= 32; + s |= spx5_rd(sparx5, PTP_PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + ns = spx5_rd(sparx5, PTP_PTP_TOD_NSEC(TOD_ACC_PIN)); + ns &= PTP_PTP_TOD_NSEC_PTP_TOD_NSEC; + + spin_unlock_irqrestore(&sparx5->ptp_clock_lock, flags); + + /* Deal with negative values */ + if ((ns & 0xFFFFFFF0) == 0x3FFFFFF0) { + s--; + ns &= 0xf; + ns += 999999984; + } + + set_normalized_timespec64(ts, s, ns); + return 0; +} + +static int sparx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct sparx5_phc *phc = container_of(ptp, struct sparx5_phc, info); + struct sparx5 *sparx5 = phc->sparx5; + + if (delta > -(NSEC_PER_SEC / 2) && delta < (NSEC_PER_SEC / 2)) { + unsigned long flags; + + spin_lock_irqsave(&sparx5->ptp_clock_lock, flags); + + /* Must be in IDLE mode before the time can be loaded */ + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(phc->index) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + spx5_wr(PTP_PTP_TOD_NSEC_PTP_TOD_NSEC_SET(delta), + sparx5, PTP_PTP_TOD_NSEC(TOD_ACC_PIN)); + + /* Adjust time with the value of PTP_TOD_NSEC */ + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_DELTA) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(phc->index) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + spin_unlock_irqrestore(&sparx5->ptp_clock_lock, flags); + } else { + /* Fall back using sparx5_ptp_settime64 which is not exact */ + struct timespec64 ts; + u64 now; + + sparx5_ptp_gettime64(ptp, &ts); + + now = ktime_to_ns(timespec64_to_ktime(ts)); + ts = ns_to_timespec64(now + delta); + + sparx5_ptp_settime64(ptp, &ts); + } + + return 0; +} + +static struct ptp_clock_info sparx5_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "sparx5 ptp", + .max_adj = 200000, + .gettime64 = sparx5_ptp_gettime64, + .settime64 = sparx5_ptp_settime64, + .adjtime = sparx5_ptp_adjtime, + .adjfine = sparx5_ptp_adjfine, +}; + +static int sparx5_ptp_phc_init(struct sparx5 *sparx5, + int index, + struct ptp_clock_info *clock_info) +{ + struct sparx5_phc *phc = &sparx5->phc[index]; + + phc->info = *clock_info; + phc->clock = ptp_clock_register(&phc->info, sparx5->dev); + if (IS_ERR(phc->clock)) + return PTR_ERR(phc->clock); + + phc->index = index; + phc->sparx5 = sparx5; + + /* PTP Rx stamping is always enabled. */ + phc->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + + return 0; +} + +int sparx5_ptp_init(struct sparx5 *sparx5) +{ + u64 tod_adj = sparx5_ptp_get_nominal_value(sparx5); + struct sparx5_port *port; + int err, i; + + if (!sparx5->ptp) + return 0; + + for (i = 0; i < SPARX5_PHC_COUNT; ++i) { + err = sparx5_ptp_phc_init(sparx5, i, &sparx5_ptp_clock_info); + if (err) + return err; + } + + spin_lock_init(&sparx5->ptp_clock_lock); + spin_lock_init(&sparx5->ptp_ts_id_lock); + mutex_init(&sparx5->ptp_lock); + + /* Disable master counters */ + spx5_wr(PTP_PTP_DOM_CFG_PTP_ENA_SET(0), sparx5, PTP_PTP_DOM_CFG); + + /* Configure the nominal TOD increment per clock cycle */ + spx5_rmw(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_SET(0x7), + PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, + sparx5, PTP_PTP_DOM_CFG); + + for (i = 0; i < SPARX5_PHC_COUNT; ++i) { + spx5_wr((u32)tod_adj & 0xFFFFFFFF, sparx5, + PTP_CLK_PER_CFG(i, 0)); + spx5_wr((u32)(tod_adj >> 32), sparx5, + PTP_CLK_PER_CFG(i, 1)); + } + + spx5_rmw(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_SET(0), + PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, + sparx5, PTP_PTP_DOM_CFG); + + /* Enable master counters */ + spx5_wr(PTP_PTP_DOM_CFG_PTP_ENA_SET(0x7), sparx5, PTP_PTP_DOM_CFG); + + for (i = 0; i < sparx5->port_count; i++) { + port = sparx5->ports[i]; + if (!port) + continue; + + skb_queue_head_init(&port->tx_skbs); + } + + return 0; +} + +void sparx5_ptp_deinit(struct sparx5 *sparx5) +{ + struct sparx5_port *port; + int i; + + for (i = 0; i < sparx5->port_count; i++) { + port = sparx5->ports[i]; + if (!port) + continue; + + skb_queue_purge(&port->tx_skbs); + } + + for (i = 0; i < SPARX5_PHC_COUNT; ++i) + ptp_clock_unregister(sparx5->phc[i].clock); +} + +void sparx5_ptp_rxtstamp(struct sparx5 *sparx5, struct sk_buff *skb, + u64 timestamp) +{ + struct skb_shared_hwtstamps *shhwtstamps; + struct sparx5_phc *phc; + struct timespec64 ts; + u64 full_ts_in_ns; + + if (!sparx5->ptp) + return; + + phc = &sparx5->phc[SPARX5_PHC_PORT]; + sparx5_ptp_gettime64(&phc->info, &ts); + + if (ts.tv_nsec < timestamp) + ts.tv_sec--; + ts.tv_nsec = timestamp; + full_ts_in_ns = ktime_set(ts.tv_sec, ts.tv_nsec); + + shhwtstamps = skb_hwtstamps(skb); + shhwtstamps->hwtstamp = full_ts_in_ns; +} diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c index f5271c3ec133..dacb87f49552 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c @@ -19,11 +19,27 @@ struct sparx5_switchdev_event_work { unsigned long event; }; +static int sparx5_port_attr_pre_bridge_flags(struct sparx5_port *port, + struct switchdev_brport_flags flags) +{ + if (flags.mask & ~(BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD)) + return -EINVAL; + + return 0; +} + static void sparx5_port_attr_bridge_flags(struct sparx5_port *port, struct switchdev_brport_flags flags) { + int pgid; + if (flags.mask & BR_MCAST_FLOOD) - sparx5_pgid_update_mask(port, PGID_MC_FLOOD, true); + for (pgid = PGID_MC_FLOOD; pgid <= PGID_IPV6_MC_CTRL; pgid++) + sparx5_pgid_update_mask(port, pgid, !!(flags.val & BR_MCAST_FLOOD)); + if (flags.mask & BR_FLOOD) + sparx5_pgid_update_mask(port, PGID_UC_FLOOD, !!(flags.val & BR_FLOOD)); + if (flags.mask & BR_BCAST_FLOOD) + sparx5_pgid_update_mask(port, PGID_BCAST, !!(flags.val & BR_BCAST_FLOOD)); } static void sparx5_attr_stp_state_set(struct sparx5_port *port, @@ -72,6 +88,9 @@ static int sparx5_port_attr_set(struct net_device *dev, const void *ctx, struct sparx5_port *port = netdev_priv(dev); switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + return sparx5_port_attr_pre_bridge_flags(port, + attr->u.brport_flags); case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: sparx5_port_attr_bridge_flags(port, attr->u.brport_flags); break; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c b/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c index 4ce490a25f33..8e56ffa1c4f7 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c @@ -58,16 +58,6 @@ int sparx5_vlan_vid_add(struct sparx5_port *port, u16 vid, bool pvid, struct sparx5 *sparx5 = port->sparx5; int ret; - /* Make the port a member of the VLAN */ - set_bit(port->portno, sparx5->vlan_mask[vid]); - ret = sparx5_vlant_set_mask(sparx5, vid); - if (ret) - return ret; - - /* Default ingress vlan classification */ - if (pvid) - port->pvid = vid; - /* Untagged egress vlan classification */ if (untagged && port->vid != vid) { if (port->vid) { @@ -79,6 +69,16 @@ int sparx5_vlan_vid_add(struct sparx5_port *port, u16 vid, bool pvid, port->vid = vid; } + /* Make the port a member of the VLAN */ + set_bit(port->portno, sparx5->vlan_mask[vid]); + ret = sparx5_vlant_set_mask(sparx5, vid); + if (ret) + return ret; + + /* Default ingress vlan classification */ + if (pvid) + port->pvid = vid; + sparx5_vlan_port_apply(sparx5, port); return 0; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 2fb713e9baa4..21134125a6e4 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -13,6 +13,7 @@ #define TABLE_UPDATE_SLEEP_US 10 #define TABLE_UPDATE_TIMEOUT_US 100000 +#define OCELOT_RSV_VLAN_RANGE_START 4000 struct ocelot_mact_entry { u8 mac[ETH_ALEN]; @@ -221,6 +222,35 @@ static void ocelot_vcap_enable(struct ocelot *ocelot, int port) REW_PORT_CFG, port); } +static int ocelot_single_vlan_aware_bridge(struct ocelot *ocelot, + struct netlink_ext_ack *extack) +{ + struct net_device *bridge = NULL; + int port; + + for (port = 0; port < ocelot->num_phys_ports; port++) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + if (!ocelot_port || !ocelot_port->bridge || + !br_vlan_enabled(ocelot_port->bridge)) + continue; + + if (!bridge) { + bridge = ocelot_port->bridge; + continue; + } + + if (bridge == ocelot_port->bridge) + continue; + + NL_SET_ERR_MSG_MOD(extack, + "Only one VLAN-aware bridge is supported"); + return -EBUSY; + } + + return 0; +} + static inline u32 ocelot_vlant_read_vlanaccess(struct ocelot *ocelot) { return ocelot_read(ocelot, ANA_TABLES_VLANACCESS); @@ -347,12 +377,45 @@ static void ocelot_port_manage_port_tag(struct ocelot *ocelot, int port) } } +int ocelot_bridge_num_find(struct ocelot *ocelot, + const struct net_device *bridge) +{ + int port; + + for (port = 0; port < ocelot->num_phys_ports; port++) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + if (ocelot_port && ocelot_port->bridge == bridge) + return ocelot_port->bridge_num; + } + + return -1; +} +EXPORT_SYMBOL_GPL(ocelot_bridge_num_find); + +static u16 ocelot_vlan_unaware_pvid(struct ocelot *ocelot, + const struct net_device *bridge) +{ + int bridge_num; + + /* Standalone ports use VID 0 */ + if (!bridge) + return 0; + + bridge_num = ocelot_bridge_num_find(ocelot, bridge); + if (WARN_ON(bridge_num < 0)) + return 0; + + /* VLAN-unaware bridges use a reserved VID going from 4095 downwards */ + return VLAN_N_VID - bridge_num - 1; +} + /* Default vlan to clasify for untagged frames (may be zero) */ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, const struct ocelot_bridge_vlan *pvid_vlan) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - u16 pvid = OCELOT_VLAN_UNAWARE_PVID; + u16 pvid = ocelot_vlan_unaware_pvid(ocelot, ocelot_port->bridge); u32 val = 0; ocelot_port->pvid_vlan = pvid_vlan; @@ -466,12 +529,29 @@ static int ocelot_vlan_member_del(struct ocelot *ocelot, int port, u16 vid) return 0; } +static int ocelot_add_vlan_unaware_pvid(struct ocelot *ocelot, int port, + const struct net_device *bridge) +{ + u16 vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + + return ocelot_vlan_member_add(ocelot, port, vid, true); +} + +static int ocelot_del_vlan_unaware_pvid(struct ocelot *ocelot, int port, + const struct net_device *bridge) +{ + u16 vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + + return ocelot_vlan_member_del(ocelot, port, vid); +} + int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool vlan_aware, struct netlink_ext_ack *extack) { struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1]; struct ocelot_port *ocelot_port = ocelot->ports[port]; struct ocelot_vcap_filter *filter; + int err; u32 val; list_for_each_entry(filter, &block->rules, list) { @@ -483,6 +563,19 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, } } + err = ocelot_single_vlan_aware_bridge(ocelot, extack); + if (err) + return err; + + if (vlan_aware) + err = ocelot_del_vlan_unaware_pvid(ocelot, port, + ocelot_port->bridge); + else + err = ocelot_add_vlan_unaware_pvid(ocelot, port, + ocelot_port->bridge); + if (err) + return err; + ocelot_port->vlan_aware = vlan_aware; if (vlan_aware) @@ -521,6 +614,12 @@ int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid, } } + if (vid > OCELOT_RSV_VLAN_RANGE_START) { + NL_SET_ERR_MSG_MOD(extack, + "VLAN range 4000-4095 reserved for VLAN-unaware bridging"); + return -EBUSY; + } + return 0; } EXPORT_SYMBOL(ocelot_vlan_prepare); @@ -584,11 +683,11 @@ static void ocelot_vlan_init(struct ocelot *ocelot) for (vid = 1; vid < VLAN_N_VID; vid++) ocelot_vlant_set_mask(ocelot, vid, 0); - /* Because VLAN filtering is enabled, we need VID 0 to get untagged - * traffic. It is added automatically if 8021q module is loaded, but - * we can't rely on it since module may be not loaded. + /* We need VID 0 to get traffic on standalone ports. + * It is added automatically if the 8021q module is loaded, but we + * can't rely on that since it might not be. */ - ocelot_vlant_set_mask(ocelot, OCELOT_VLAN_UNAWARE_PVID, all_ports); + ocelot_vlant_set_mask(ocelot, OCELOT_STANDALONE_PVID, all_ports); /* Set vlan ingress filter mask to all ports but the CPU port by * default. @@ -1237,21 +1336,27 @@ void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) } EXPORT_SYMBOL(ocelot_drain_cpu_queue); -int ocelot_fdb_add(struct ocelot *ocelot, int port, - const unsigned char *addr, u16 vid) +int ocelot_fdb_add(struct ocelot *ocelot, int port, const unsigned char *addr, + u16 vid, const struct net_device *bridge) { int pgid = port; if (port == ocelot->npi) pgid = PGID_CPU; + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + return ocelot_mact_learn(ocelot, pgid, addr, vid, ENTRYTYPE_LOCKED); } EXPORT_SYMBOL(ocelot_fdb_add); -int ocelot_fdb_del(struct ocelot *ocelot, int port, - const unsigned char *addr, u16 vid) +int ocelot_fdb_del(struct ocelot *ocelot, int port, const unsigned char *addr, + u16 vid, const struct net_device *bridge) { + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + return ocelot_mact_forget(ocelot, addr, vid); } EXPORT_SYMBOL(ocelot_fdb_del); @@ -1413,6 +1518,12 @@ int ocelot_fdb_dump(struct ocelot *ocelot, int port, is_static = (entry.type == ENTRYTYPE_LOCKED); + /* Hide the reserved VLANs used for + * VLAN-unaware bridging. + */ + if (entry.vid > OCELOT_RSV_VLAN_RANGE_START) + entry.vid = 0; + err = cb(entry.mac, entry.vid, is_static, data); if (err) break; @@ -1907,6 +2018,8 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond) u32 mask = 0; int port; + lockdep_assert_held(&ocelot->fwd_domain_lock); + for (port = 0; port < ocelot->num_phys_ports; port++) { struct ocelot_port *ocelot_port = ocelot->ports[port]; @@ -1920,6 +2033,19 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond) return mask; } +/* The logical port number of a LAG is equal to the lowest numbered physical + * port ID present in that LAG. It may change if that port ever leaves the LAG. + */ +static int ocelot_bond_get_id(struct ocelot *ocelot, struct net_device *bond) +{ + int bond_mask = ocelot_get_bond_mask(ocelot, bond); + + if (!bond_mask) + return -ENOENT; + + return __ffs(bond_mask); +} + u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, int src_port) { struct ocelot_port *ocelot_port = ocelot->ports[src_port]; @@ -2039,6 +2165,28 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot, bool joining) } EXPORT_SYMBOL(ocelot_apply_bridge_fwd_mask); +void ocelot_port_set_dsa_8021q_cpu(struct ocelot *ocelot, int port) +{ + u16 vid; + + ocelot->ports[port]->is_dsa_8021q_cpu = true; + + for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++) + ocelot_vlan_member_add(ocelot, port, vid, true); +} +EXPORT_SYMBOL_GPL(ocelot_port_set_dsa_8021q_cpu); + +void ocelot_port_unset_dsa_8021q_cpu(struct ocelot *ocelot, int port) +{ + u16 vid; + + ocelot->ports[port]->is_dsa_8021q_cpu = false; + + for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++) + ocelot_vlan_member_del(ocelot, port, vid); +} +EXPORT_SYMBOL_GPL(ocelot_port_unset_dsa_8021q_cpu); + void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state) { struct ocelot_port *ocelot_port = ocelot->ports[port]; @@ -2183,7 +2331,8 @@ static void ocelot_encode_ports_to_mdb(unsigned char *addr, } int ocelot_port_mdb_add(struct ocelot *ocelot, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + const struct net_device *bridge) { unsigned char addr[ETH_ALEN]; struct ocelot_multicast *mc; @@ -2193,6 +2342,9 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port, if (port == ocelot->npi) port = ocelot->num_phys_ports; + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + mc = ocelot_multicast_get(ocelot, mdb->addr, vid); if (!mc) { /* New entry */ @@ -2239,7 +2391,8 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port, EXPORT_SYMBOL(ocelot_port_mdb_add); int ocelot_port_mdb_del(struct ocelot *ocelot, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + const struct net_device *bridge) { unsigned char addr[ETH_ALEN]; struct ocelot_multicast *mc; @@ -2249,6 +2402,9 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port, if (port == ocelot->npi) port = ocelot->num_phys_ports; + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + mc = ocelot_multicast_get(ocelot, mdb->addr, vid); if (!mc) return -ENOENT; @@ -2282,18 +2438,30 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port, } EXPORT_SYMBOL(ocelot_port_mdb_del); -void ocelot_port_bridge_join(struct ocelot *ocelot, int port, - struct net_device *bridge) +int ocelot_port_bridge_join(struct ocelot *ocelot, int port, + struct net_device *bridge, int bridge_num, + struct netlink_ext_ack *extack) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + int err; + + err = ocelot_single_vlan_aware_bridge(ocelot, extack); + if (err) + return err; mutex_lock(&ocelot->fwd_domain_lock); ocelot_port->bridge = bridge; + ocelot_port->bridge_num = bridge_num; ocelot_apply_bridge_fwd_mask(ocelot, true); mutex_unlock(&ocelot->fwd_domain_lock); + + if (br_vlan_enabled(bridge)) + return 0; + + return ocelot_add_vlan_unaware_pvid(ocelot, port, bridge); } EXPORT_SYMBOL(ocelot_port_bridge_join); @@ -2304,7 +2472,11 @@ void ocelot_port_bridge_leave(struct ocelot *ocelot, int port, mutex_lock(&ocelot->fwd_domain_lock); + if (!br_vlan_enabled(bridge)) + ocelot_del_vlan_unaware_pvid(ocelot, port, bridge); + ocelot_port->bridge = NULL; + ocelot_port->bridge_num = -1; ocelot_port_set_pvid(ocelot, port, NULL); ocelot_port_manage_port_tag(ocelot, port); @@ -2413,7 +2585,7 @@ static void ocelot_setup_logical_port_ids(struct ocelot *ocelot) bond = ocelot_port->bond; if (bond) { - int lag = __ffs(ocelot_get_bond_mask(ocelot, bond)); + int lag = ocelot_bond_get_id(ocelot, bond); ocelot_rmw_gix(ocelot, ANA_PORT_PORT_CFG_PORTID_VAL(lag), @@ -2428,6 +2600,46 @@ static void ocelot_setup_logical_port_ids(struct ocelot *ocelot) } } +/* Documentation for PORTID_VAL says: + * Logical port number for front port. If port is not a member of a LLAG, + * then PORTID must be set to the physical port number. + * If port is a member of a LLAG, then PORTID must be set to the common + * PORTID_VAL used for all member ports of the LLAG. + * The value must not exceed the number of physical ports on the device. + * + * This means we have little choice but to migrate FDB entries pointing towards + * a logical port when that changes. + */ +static void ocelot_migrate_lag_fdbs(struct ocelot *ocelot, + struct net_device *bond, + int lag) +{ + struct ocelot_lag_fdb *fdb; + int err; + + lockdep_assert_held(&ocelot->fwd_domain_lock); + + list_for_each_entry(fdb, &ocelot->lag_fdbs, list) { + if (fdb->bond != bond) + continue; + + err = ocelot_mact_forget(ocelot, fdb->addr, fdb->vid); + if (err) { + dev_err(ocelot->dev, + "failed to delete LAG %s FDB %pM vid %d: %pe\n", + bond->name, fdb->addr, fdb->vid, ERR_PTR(err)); + } + + err = ocelot_mact_learn(ocelot, lag, fdb->addr, fdb->vid, + ENTRYTYPE_LOCKED); + if (err) { + dev_err(ocelot->dev, + "failed to migrate LAG %s FDB %pM vid %d: %pe\n", + bond->name, fdb->addr, fdb->vid, ERR_PTR(err)); + } + } +} + int ocelot_port_lag_join(struct ocelot *ocelot, int port, struct net_device *bond, struct netdev_lag_upper_info *info) @@ -2452,14 +2664,23 @@ EXPORT_SYMBOL(ocelot_port_lag_join); void ocelot_port_lag_leave(struct ocelot *ocelot, int port, struct net_device *bond) { + int old_lag_id, new_lag_id; + mutex_lock(&ocelot->fwd_domain_lock); + old_lag_id = ocelot_bond_get_id(ocelot, bond); + ocelot->ports[port]->bond = NULL; ocelot_setup_logical_port_ids(ocelot); ocelot_apply_bridge_fwd_mask(ocelot, false); ocelot_set_aggr_pgids(ocelot); + new_lag_id = ocelot_bond_get_id(ocelot, bond); + + if (new_lag_id >= 0 && old_lag_id != new_lag_id) + ocelot_migrate_lag_fdbs(ocelot, bond, new_lag_id); + mutex_unlock(&ocelot->fwd_domain_lock); } EXPORT_SYMBOL(ocelot_port_lag_leave); @@ -2468,13 +2689,83 @@ void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + mutex_lock(&ocelot->fwd_domain_lock); + ocelot_port->lag_tx_active = lag_tx_active; /* Rebalance the LAGs */ ocelot_set_aggr_pgids(ocelot); + + mutex_unlock(&ocelot->fwd_domain_lock); } EXPORT_SYMBOL(ocelot_port_lag_change); +int ocelot_lag_fdb_add(struct ocelot *ocelot, struct net_device *bond, + const unsigned char *addr, u16 vid, + const struct net_device *bridge) +{ + struct ocelot_lag_fdb *fdb; + int lag, err; + + fdb = kzalloc(sizeof(*fdb), GFP_KERNEL); + if (!fdb) + return -ENOMEM; + + mutex_lock(&ocelot->fwd_domain_lock); + + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + + ether_addr_copy(fdb->addr, addr); + fdb->vid = vid; + fdb->bond = bond; + + lag = ocelot_bond_get_id(ocelot, bond); + + err = ocelot_mact_learn(ocelot, lag, addr, vid, ENTRYTYPE_LOCKED); + if (err) { + mutex_unlock(&ocelot->fwd_domain_lock); + kfree(fdb); + return err; + } + + list_add_tail(&fdb->list, &ocelot->lag_fdbs); + mutex_unlock(&ocelot->fwd_domain_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(ocelot_lag_fdb_add); + +int ocelot_lag_fdb_del(struct ocelot *ocelot, struct net_device *bond, + const unsigned char *addr, u16 vid, + const struct net_device *bridge) +{ + struct ocelot_lag_fdb *fdb, *tmp; + + mutex_lock(&ocelot->fwd_domain_lock); + + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + + list_for_each_entry_safe(fdb, tmp, &ocelot->lag_fdbs, list) { + if (!ether_addr_equal(fdb->addr, addr) || fdb->vid != vid || + fdb->bond != bond) + continue; + + ocelot_mact_forget(ocelot, addr, vid); + list_del(&fdb->list); + mutex_unlock(&ocelot->fwd_domain_lock); + kfree(fdb); + + return 0; + } + + mutex_unlock(&ocelot->fwd_domain_lock); + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(ocelot_lag_fdb_del); + /* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu. * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG. * In the special case that it's the NPI port that we're configuring, the @@ -2595,6 +2886,9 @@ EXPORT_SYMBOL(ocelot_port_pre_bridge_flags); void ocelot_port_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags flags) { + if (port == ocelot->npi) + port = ocelot->num_phys_ports; + if (flags.mask & BR_LEARNING) ocelot_port_set_learning(ocelot, port, !!(flags.val & BR_LEARNING)); @@ -2707,7 +3001,7 @@ static void ocelot_cpu_port_init(struct ocelot *ocelot) /* Configure the CPU port to be VLAN aware */ ocelot_write_gix(ocelot, - ANA_PORT_VLAN_CFG_VLAN_VID(OCELOT_VLAN_UNAWARE_PVID) | + ANA_PORT_VLAN_CFG_VLAN_VID(OCELOT_STANDALONE_PVID) | ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA | ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1), ANA_PORT_VLAN_CFG, cpu); @@ -2769,6 +3063,7 @@ int ocelot_init(struct ocelot *ocelot) INIT_LIST_HEAD(&ocelot->multicast); INIT_LIST_HEAD(&ocelot->pgids); INIT_LIST_HEAD(&ocelot->vlans); + INIT_LIST_HEAD(&ocelot->lag_fdbs); ocelot_detect_features(ocelot); ocelot_mact_init(ocelot); ocelot_vlan_init(ocelot); diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index 5277c4b53af4..f8dc0d75eb5d 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -26,7 +26,7 @@ #include "ocelot_rew.h" #include "ocelot_qs.h" -#define OCELOT_VLAN_UNAWARE_PVID 0 +#define OCELOT_STANDALONE_PVID 0 #define OCELOT_BUFFER_CELL_SZ 60 #define OCELOT_STATS_CHECK_DELAY (2 * HZ) @@ -81,6 +81,9 @@ struct ocelot_multicast { struct ocelot_pgid *pgid; }; +int ocelot_bridge_num_find(struct ocelot *ocelot, + const struct net_device *bridge); + int ocelot_port_fdb_do_dump(const unsigned char *addr, u16 vid, bool is_static, void *data); int ocelot_mact_learn(struct ocelot *ocelot, int port, diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 7106137f98ee..b3f5418dc622 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -6,6 +6,7 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_gact.h> #include <soc/mscc/ocelot_vcap.h> +#include "ocelot_police.h" #include "ocelot_vcap.h" /* Arbitrarily chosen constants for encoding the VCAP block and lookup number @@ -217,6 +218,7 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, bool ingress, struct flow_cls_offload *f, struct ocelot_vcap_filter *filter) { + const struct flow_action *action = &f->rule->action; struct netlink_ext_ack *extack = f->common.extack; bool allow_missing_goto_target = false; const struct flow_action_entry *a; @@ -244,7 +246,7 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, filter->goto_target = -1; filter->type = OCELOT_VCAP_FILTER_DUMMY; - flow_action_for_each(i, a, &f->rule->action) { + flow_action_for_each(i, a, action) { switch (a->id) { case FLOW_ACTION_DROP: if (filter->block_id != VCAP_IS2) { @@ -297,11 +299,11 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, "Last action must be GOTO"); return -EOPNOTSUPP; } - if (a->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + + err = ocelot_policer_validate(action, a, extack); + if (err) + return err; + filter->action.police_ena = true; pol_ix = a->hw_index + ocelot->vcap_pol.base; diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c index 142e897ea2af..3ccec488a304 100644 --- a/drivers/net/ethernet/mscc/ocelot_mrp.c +++ b/drivers/net/ethernet/mscc/ocelot_mrp.c @@ -107,16 +107,16 @@ static void ocelot_mrp_save_mac(struct ocelot *ocelot, struct ocelot_port *port) { ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_test_dmac, - OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); + OCELOT_STANDALONE_PVID, ENTRYTYPE_LOCKED); ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_control_dmac, - OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); + OCELOT_STANDALONE_PVID, ENTRYTYPE_LOCKED); } static void ocelot_mrp_del_mac(struct ocelot *ocelot, struct ocelot_port *port) { - ocelot_mact_forget(ocelot, mrp_test_dmac, OCELOT_VLAN_UNAWARE_PVID); - ocelot_mact_forget(ocelot, mrp_control_dmac, OCELOT_VLAN_UNAWARE_PVID); + ocelot_mact_forget(ocelot, mrp_test_dmac, OCELOT_STANDALONE_PVID); + ocelot_mact_forget(ocelot, mrp_control_dmac, OCELOT_STANDALONE_PVID); } int ocelot_mrp_add(struct ocelot *ocelot, int port, diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index e271b6225b72..5767e38c0c5a 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -14,6 +14,7 @@ #include <linux/phy/phy.h> #include <net/pkt_cls.h> #include "ocelot.h" +#include "ocelot_police.h" #include "ocelot_vcap.h" #include "ocelot_fdma.h" @@ -258,11 +259,10 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv, return -EEXIST; } - if (action->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + err = ocelot_policer_validate(&f->rule->action, action, + extack); + if (err) + return err; pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8; pol.burst = action->police.burst; @@ -419,7 +419,7 @@ static int ocelot_vlan_vid_del(struct net_device *dev, u16 vid) * with VLAN filtering feature. We need to keep it to receive * untagged traffic. */ - if (vid == OCELOT_VLAN_UNAWARE_PVID) + if (vid == OCELOT_STANDALONE_PVID) return 0; ret = ocelot_vlan_del(ocelot, port, vid); @@ -559,7 +559,7 @@ static int ocelot_mc_unsync(struct net_device *dev, const unsigned char *addr) struct ocelot_mact_work_ctx w; ether_addr_copy(w.forget.addr, addr); - w.forget.vid = OCELOT_VLAN_UNAWARE_PVID; + w.forget.vid = OCELOT_STANDALONE_PVID; w.type = OCELOT_MACT_FORGET; return ocelot_enqueue_mact_action(ocelot, &w); @@ -573,7 +573,7 @@ static int ocelot_mc_sync(struct net_device *dev, const unsigned char *addr) struct ocelot_mact_work_ctx w; ether_addr_copy(w.learn.addr, addr); - w.learn.vid = OCELOT_VLAN_UNAWARE_PVID; + w.learn.vid = OCELOT_STANDALONE_PVID; w.learn.pgid = PGID_CPU; w.learn.entry_type = ENTRYTYPE_LOCKED; w.type = OCELOT_MACT_LEARN; @@ -608,9 +608,9 @@ static int ocelot_port_set_mac_address(struct net_device *dev, void *p) /* Learn the new net device MAC address in the mac table. */ ocelot_mact_learn(ocelot, PGID_CPU, addr->sa_data, - OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); + OCELOT_STANDALONE_PVID, ENTRYTYPE_LOCKED); /* Then forget the previous one. */ - ocelot_mact_forget(ocelot, dev->dev_addr, OCELOT_VLAN_UNAWARE_PVID); + ocelot_mact_forget(ocelot, dev->dev_addr, OCELOT_STANDALONE_PVID); eth_hw_addr_set(dev, addr->sa_data); return 0; @@ -662,10 +662,11 @@ static int ocelot_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct ocelot_port_private *priv = netdev_priv(dev); - struct ocelot *ocelot = priv->port.ocelot; + struct ocelot_port *ocelot_port = &priv->port; + struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; - return ocelot_fdb_add(ocelot, port, addr, vid); + return ocelot_fdb_add(ocelot, port, addr, vid, ocelot_port->bridge); } static int ocelot_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], @@ -673,10 +674,11 @@ static int ocelot_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], const unsigned char *addr, u16 vid) { struct ocelot_port_private *priv = netdev_priv(dev); - struct ocelot *ocelot = priv->port.ocelot; + struct ocelot_port *ocelot_port = &priv->port; + struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; - return ocelot_fdb_del(ocelot, port, addr, vid); + return ocelot_fdb_del(ocelot, port, addr, vid, ocelot_port->bridge); } static int ocelot_port_fdb_dump(struct sk_buff *skb, @@ -988,7 +990,7 @@ static int ocelot_port_obj_add_mdb(struct net_device *dev, struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; - return ocelot_port_mdb_add(ocelot, port, mdb); + return ocelot_port_mdb_add(ocelot, port, mdb, ocelot_port->bridge); } static int ocelot_port_obj_del_mdb(struct net_device *dev, @@ -999,7 +1001,7 @@ static int ocelot_port_obj_del_mdb(struct net_device *dev, struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; - return ocelot_port_mdb_del(ocelot, port, mdb); + return ocelot_port_mdb_del(ocelot, port, mdb, ocelot_port->bridge); } static int ocelot_port_obj_mrp_add(struct net_device *dev, @@ -1173,6 +1175,33 @@ static int ocelot_switchdev_unsync(struct ocelot *ocelot, int port) return 0; } +static int ocelot_bridge_num_get(struct ocelot *ocelot, + const struct net_device *bridge_dev) +{ + int bridge_num = ocelot_bridge_num_find(ocelot, bridge_dev); + + if (bridge_num < 0) { + /* First port that offloads this bridge */ + bridge_num = find_first_zero_bit(&ocelot->bridges, + ocelot->num_phys_ports); + + set_bit(bridge_num, &ocelot->bridges); + } + + return bridge_num; +} + +static void ocelot_bridge_num_put(struct ocelot *ocelot, + const struct net_device *bridge_dev, + int bridge_num) +{ + /* Check if the bridge is still in use, otherwise it is time + * to clean it up so we can reuse this bridge_num later. + */ + if (!ocelot_bridge_num_find(ocelot, bridge_dev)) + clear_bit(bridge_num, &ocelot->bridges); +} + static int ocelot_netdevice_bridge_join(struct net_device *dev, struct net_device *brport_dev, struct net_device *bridge, @@ -1182,9 +1211,14 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev, struct ocelot_port *ocelot_port = &priv->port; struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; - int err; + int bridge_num, err; + + bridge_num = ocelot_bridge_num_get(ocelot, bridge); - ocelot_port_bridge_join(ocelot, port, bridge); + err = ocelot_port_bridge_join(ocelot, port, bridge, bridge_num, + extack); + if (err) + goto err_join; err = switchdev_bridge_port_offload(brport_dev, dev, priv, &ocelot_switchdev_nb, @@ -1205,6 +1239,8 @@ err_switchdev_sync: &ocelot_switchdev_blocking_nb); err_switchdev_offload: ocelot_port_bridge_leave(ocelot, port, bridge); +err_join: + ocelot_bridge_num_put(ocelot, bridge, bridge_num); return err; } @@ -1225,6 +1261,7 @@ static int ocelot_netdevice_bridge_leave(struct net_device *dev, struct ocelot_port_private *priv = netdev_priv(dev); struct ocelot_port *ocelot_port = &priv->port; struct ocelot *ocelot = ocelot_port->ocelot; + int bridge_num = ocelot_port->bridge_num; int port = priv->chip_port; int err; @@ -1233,6 +1270,7 @@ static int ocelot_netdevice_bridge_leave(struct net_device *dev, return err; ocelot_port_bridge_leave(ocelot, port, bridge); + ocelot_bridge_num_put(ocelot, bridge, bridge_num); return 0; } @@ -1700,7 +1738,7 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, eth_hw_addr_gen(dev, ocelot->base_mac, port); ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, - OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); + OCELOT_STANDALONE_PVID, ENTRYTYPE_LOCKED); ocelot_init_port(ocelot, port); diff --git a/drivers/net/ethernet/mscc/ocelot_police.c b/drivers/net/ethernet/mscc/ocelot_police.c index 6f5068c1041a..a65606bb84a0 100644 --- a/drivers/net/ethernet/mscc/ocelot_police.c +++ b/drivers/net/ethernet/mscc/ocelot_police.c @@ -154,6 +154,47 @@ int qos_policer_conf_set(struct ocelot *ocelot, int port, u32 pol_ix, return 0; } +int ocelot_policer_validate(const struct flow_action *action, + const struct flow_action_entry *a, + struct netlink_ext_ack *extack) +{ + if (a->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (a->police.notexceed.act_id != FLOW_ACTION_PIPE && + a->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (a->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, a)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but police action is not last"); + return -EOPNOTSUPP; + } + + if (a->police.peakrate_bytes_ps || + a->police.avrate || a->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (a->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "Offload does not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} +EXPORT_SYMBOL(ocelot_policer_validate); + int ocelot_port_policer_add(struct ocelot *ocelot, int port, struct ocelot_policer *pol) { diff --git a/drivers/net/ethernet/mscc/ocelot_police.h b/drivers/net/ethernet/mscc/ocelot_police.h index 7adb05f71999..7552995f8b17 100644 --- a/drivers/net/ethernet/mscc/ocelot_police.h +++ b/drivers/net/ethernet/mscc/ocelot_police.h @@ -8,6 +8,7 @@ #define _MSCC_OCELOT_POLICE_H_ #include "ocelot.h" +#include <net/flow_offload.h> enum mscc_qos_rate_mode { MSCC_QOS_RATE_MODE_DISABLED, /* Policer/shaper disabled */ @@ -33,4 +34,8 @@ struct qos_policer_conf { int qos_policer_conf_set(struct ocelot *ocelot, int port, u32 pol_ix, struct qos_policer_conf *conf); +int ocelot_policer_validate(const struct flow_action *action, + const struct flow_action_entry *a, + struct netlink_ext_ack *extack); + #endif /* _MSCC_OCELOT_POLICE_H_ */ diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 852054da9db9..b976d480aeb3 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -564,9 +564,9 @@ static void is2_entry_set(struct ocelot *ocelot, int ix, val = proto.value[0]; msk = proto.mask[0]; type = IS2_TYPE_IP_UDP_TCP; - if (msk == 0xff && (val == 6 || val == 17)) { + if (msk == 0xff && (val == IPPROTO_TCP || val == IPPROTO_UDP)) { /* UDP/TCP protocol match */ - tcp = (val == 6 ? + tcp = (val == IPPROTO_TCP ? OCELOT_VCAP_BIT_1 : OCELOT_VCAP_BIT_0); vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_TCP, tcp); vcap_key_l4_port_set(vcap, &data, @@ -1195,18 +1195,16 @@ static void ocelot_vcap_block_remove_filter(struct ocelot *ocelot, struct ocelot_vcap_block *block, struct ocelot_vcap_filter *filter) { - struct ocelot_vcap_filter *tmp; - struct list_head *pos, *q; + struct ocelot_vcap_filter *tmp, *n; - list_for_each_safe(pos, q, &block->rules) { - tmp = list_entry(pos, struct ocelot_vcap_filter, list); + list_for_each_entry_safe(tmp, n, &block->rules, list) { if (ocelot_vcap_filter_equal(filter, tmp)) { if (tmp->block_id == VCAP_IS2 && tmp->action.police_ena) ocelot_vcap_policer_del(ocelot, tmp->action.pol_ix); - list_del(pos); + list_del(&tmp->list); kfree(tmp); } } diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 9cff3d48acbc..9c72b43c1581 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -31,6 +31,7 @@ nfp-objs := \ nfp_net_main.o \ nfp_net_repr.o \ nfp_net_sriov.o \ + nfp_net_xsk.o \ nfp_netvf_main.o \ nfp_port.o \ nfp_shared_buf.o \ diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index a3242b36e216..2c40a3959f94 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -922,6 +922,51 @@ nfp_fl_pedit(const struct flow_action_entry *act, } } +static struct nfp_fl_meter *nfp_fl_meter(char *act_data) +{ + size_t act_size = sizeof(struct nfp_fl_meter); + struct nfp_fl_meter *meter_act; + + meter_act = (struct nfp_fl_meter *)act_data; + + memset(meter_act, 0, act_size); + + meter_act->head.jump_id = NFP_FL_ACTION_OPCODE_METER; + meter_act->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + return meter_act; +} + +static int +nfp_flower_meter_action(struct nfp_app *app, + const struct flow_action_entry *action, + struct nfp_fl_payload *nfp_fl, int *a_len, + struct net_device *netdev, + struct netlink_ext_ack *extack) +{ + struct nfp_fl_meter *fl_meter; + u32 meter_id; + + if (*a_len + sizeof(struct nfp_fl_meter) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload:meter action size beyond the allowed maximum"); + return -EOPNOTSUPP; + } + + meter_id = action->hw_index; + if (!nfp_flower_search_meter_entry(app, meter_id)) { + NL_SET_ERR_MSG_MOD(extack, + "can not offload flow table with unsupported police action.\n"); + return -EOPNOTSUPP; + } + + fl_meter = nfp_fl_meter(&nfp_fl->action_data[*a_len]); + *a_len += sizeof(struct nfp_fl_meter); + fl_meter->meter_id = cpu_to_be32(meter_id); + + return 0; +} + static int nfp_flower_output_action(struct nfp_app *app, const struct flow_action_entry *act, @@ -985,6 +1030,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, struct nfp_flower_pedit_acts *set_act, bool *pkt_host, struct netlink_ext_ack *extack, int act_idx) { + struct nfp_flower_priv *fl_priv = app->priv; struct nfp_fl_pre_tunnel *pre_tun; struct nfp_fl_set_tun *set_tun; struct nfp_fl_push_vlan *psh_v; @@ -1149,6 +1195,18 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, *pkt_host = true; break; + case FLOW_ACTION_POLICE: + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_METER)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: unsupported police action in action list"); + return -EOPNOTSUPP; + } + + err = nfp_flower_meter_action(app, act, nfp_fl, a_len, netdev, + extack); + if (err) + return err; + break; default: /* Currently we do not handle any other actions. */ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported action in action list"); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 1543e47456d5..68e8a2fb1a29 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -85,6 +85,7 @@ #define NFP_FL_ACTION_OPCODE_SET_TCP 15 #define NFP_FL_ACTION_OPCODE_PRE_LAG 16 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_METER 24 #define NFP_FL_ACTION_OPCODE_PUSH_GENEVE 26 #define NFP_FL_ACTION_OPCODE_NUM 32 @@ -260,6 +261,12 @@ struct nfp_fl_set_mpls { __be32 lse; }; +struct nfp_fl_meter { + struct nfp_fl_act_head head; + __be16 reserved; + __be32 meter_id; +}; + /* Metadata with L2 (1W/4B) * ---------------------------------------------------------------- * 3 2 1 diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 917c450a7aad..fa902ce2dd82 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -12,7 +12,9 @@ #include <linux/rhashtable.h> #include <linux/time64.h> #include <linux/types.h> +#include <net/flow_offload.h> #include <net/pkt_cls.h> +#include <net/pkt_sched.h> #include <net/tcp.h> #include <linux/workqueue.h> #include <linux/idr.h> @@ -48,6 +50,7 @@ struct nfp_app; #define NFP_FL_FEATS_IPV6_TUN BIT(7) #define NFP_FL_FEATS_VLAN_QINQ BIT(8) #define NFP_FL_FEATS_QOS_PPS BIT(9) +#define NFP_FL_FEATS_QOS_METER BIT(10) #define NFP_FL_FEATS_HOST_ACK BIT(31) #define NFP_FL_ENABLE_FLOW_MERGE BIT(0) @@ -63,7 +66,8 @@ struct nfp_app; NFP_FL_FEATS_PRE_TUN_RULES | \ NFP_FL_FEATS_IPV6_TUN | \ NFP_FL_FEATS_VLAN_QINQ | \ - NFP_FL_FEATS_QOS_PPS) + NFP_FL_FEATS_QOS_PPS | \ + NFP_FL_FEATS_QOS_METER) struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; @@ -191,6 +195,8 @@ struct nfp_fl_internal_ports { * @qos_stats_work: Workqueue for qos stats processing * @qos_rate_limiters: Current active qos rate limiters * @qos_stats_lock: Lock on qos stats updates + * @meter_stats_lock: Lock on meter stats updates + * @meter_table: Hash table used to store the meter table * @pre_tun_rule_cnt: Number of pre-tunnel rules offloaded * @merge_table: Hash table to store merged flows * @ct_zone_table: Hash table used to store the different zones @@ -228,6 +234,8 @@ struct nfp_flower_priv { struct delayed_work qos_stats_work; unsigned int qos_rate_limiters; spinlock_t qos_stats_lock; /* Protect the qos stats */ + struct mutex meter_stats_lock; /* Protect the meter stats */ + struct rhashtable meter_table; int pre_tun_rule_cnt; struct rhashtable merge_table; struct rhashtable ct_zone_table; @@ -374,6 +382,31 @@ struct nfp_fl_stats_frame { __be64 stats_cookie; }; +struct nfp_meter_stats_entry { + u64 pkts; + u64 bytes; + u64 drops; +}; + +struct nfp_meter_entry { + struct rhash_head ht_node; + u32 meter_id; + bool bps; + u32 rate; + u32 burst; + u64 used; + struct nfp_meter_stats { + u64 update; + struct nfp_meter_stats_entry curr; + struct nfp_meter_stats_entry prev; + } stats; +}; + +enum nfp_meter_op { + NFP_METER_ADD, + NFP_METER_DEL, +}; + static inline bool nfp_flower_internal_port_can_offload(struct nfp_app *app, struct net_device *netdev) @@ -569,4 +602,18 @@ nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, void nfp_flower_update_merge_stats(struct nfp_app *app, struct nfp_fl_payload *sub_flow); + +int nfp_setup_tc_act_offload(struct nfp_app *app, + struct flow_offload_action *fl_act); +int nfp_init_meter_table(struct nfp_app *app); +void nfp_flower_stats_meter_request_all(struct nfp_flower_priv *fl_priv); +void nfp_act_stats_reply(struct nfp_app *app, void *pmsg); +int nfp_flower_offload_one_police(struct nfp_app *app, bool ingress, + bool pps, u32 id, u32 rate, u32 burst); +int nfp_flower_setup_meter_entry(struct nfp_app *app, + const struct flow_action_entry *action, + enum nfp_meter_op op, + u32 meter_id); +struct nfp_meter_entry * +nfp_flower_search_meter_entry(struct nfp_app *app, u32 meter_id); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index f97eff5afd12..92e8ade4854e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1861,6 +1861,20 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct Qdisc *sch, str return 0; } +static int +nfp_setup_tc_no_dev(struct nfp_app *app, enum tc_setup_type type, void *data) +{ + if (!data) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_ACT: + return nfp_setup_tc_act_offload(app, data); + default: + return -EOPNOTSUPP; + } +} + int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv, enum tc_setup_type type, void *type_data, @@ -1868,7 +1882,7 @@ nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void * void (*cleanup)(struct flow_block_cb *block_cb)) { if (!netdev) - return -EOPNOTSUPP; + return nfp_setup_tc_no_dev(cb_priv, type, data); if (!nfp_fl_is_netdev_to_offload(netdev)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c index 784c6dbf8bc4..3206ba83b1aa 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -1,7 +1,11 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2019 Netronome Systems, Inc. */ +#include <linux/hash.h> +#include <linux/hashtable.h> +#include <linux/jhash.h> #include <linux/math64.h> +#include <linux/vmalloc.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> @@ -11,10 +15,14 @@ #define NFP_FL_QOS_UPDATE msecs_to_jiffies(1000) #define NFP_FL_QOS_PPS BIT(15) +#define NFP_FL_QOS_METER BIT(10) struct nfp_police_cfg_head { __be32 flags_opts; - __be32 port; + union { + __be32 meter_id; + __be32 port; + }; }; enum NFP_FL_QOS_TYPES { @@ -46,7 +54,15 @@ enum NFP_FL_QOS_TYPES { * | Committed Information Rate | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * Word[0](FLag options): - * [15] p(pps) 1 for pps ,0 for bps + * [15] p(pps) 1 for pps, 0 for bps + * + * Meter control message + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-------------------------------+-+---+-----+-+---------+-+---+-+ + * | Reserved |p| Y |TYPE |E|TSHFV |P| PC|R| + * +-------------------------------+-+---+-----+-+---------+-+---+-+ + * | meter ID | + * +-------------------------------+-------------------------------+ * */ struct nfp_police_config { @@ -67,6 +83,74 @@ struct nfp_police_stats_reply { __be64 drop_pkts; }; +int nfp_flower_offload_one_police(struct nfp_app *app, bool ingress, + bool pps, u32 id, u32 rate, u32 burst) +{ + struct nfp_police_config *config; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + if (pps) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_PPS); + if (!ingress) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_METER); + + if (ingress) + config->head.port = cpu_to_be32(id); + else + config->head.meter_id = cpu_to_be32(id); + + config->bkt_tkn_p = cpu_to_be32(burst); + config->bkt_tkn_c = cpu_to_be32(burst); + config->pbs = cpu_to_be32(burst); + config->cbs = cpu_to_be32(burst); + config->pir = cpu_to_be32(rate); + config->cir = cpu_to_be32(rate); + nfp_ctrl_tx(app->ctrl, skb); + + return 0; +} + +static int nfp_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + static int nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, struct tc_cls_matchall_offload *flow, @@ -77,15 +161,15 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, struct nfp_flower_priv *fl_priv = app->priv; struct flow_action_entry *action = NULL; struct nfp_flower_repr_priv *repr_priv; - struct nfp_police_config *config; u32 netdev_port_id, i; struct nfp_repr *repr; - struct sk_buff *skb; bool pps_support; u32 bps_num = 0; u32 pps_num = 0; u32 burst; + bool pps; u64 rate; + int err; if (!nfp_netdev_is_nfp_repr(netdev)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port"); @@ -132,6 +216,11 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, "unsupported offload: qos rate limit offload requires police action"); return -EOPNOTSUPP; } + + err = nfp_policer_validate(&flow->rule->action, action, extack); + if (err) + return err; + if (action->police.rate_bytes_ps > 0) { if (bps_num++) { NL_SET_ERR_MSG_MOD(extack, @@ -169,23 +258,12 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, } if (rate != 0) { - skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config), - NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - config = nfp_flower_cmsg_get_data(skb); - memset(config, 0, sizeof(struct nfp_police_config)); + pps = false; if (action->police.rate_pkt_ps > 0) - config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_PPS); - config->head.port = cpu_to_be32(netdev_port_id); - config->bkt_tkn_p = cpu_to_be32(burst); - config->bkt_tkn_c = cpu_to_be32(burst); - config->pbs = cpu_to_be32(burst); - config->cbs = cpu_to_be32(burst); - config->pir = cpu_to_be32(rate); - config->cir = cpu_to_be32(rate); - nfp_ctrl_tx(repr->app->ctrl, skb); + pps = true; + nfp_flower_offload_one_police(repr->app, true, + pps, netdev_port_id, + rate, burst); } } repr_priv->qos_table.netdev_port_id = netdev_port_id; @@ -266,6 +344,9 @@ void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb) u32 netdev_port_id; msg = nfp_flower_cmsg_get_data(skb); + if (be32_to_cpu(msg->head.flags_opts) & NFP_FL_QOS_METER) + return nfp_act_stats_reply(app, msg); + netdev_port_id = be32_to_cpu(msg->head.port); rcu_read_lock(); netdev = nfp_app_dev_get(app, netdev_port_id, NULL); @@ -297,7 +378,7 @@ exit_unlock_rcu: static void nfp_flower_stats_rlim_request(struct nfp_flower_priv *fl_priv, - u32 netdev_port_id) + u32 id, bool ingress) { struct nfp_police_cfg_head *head; struct sk_buff *skb; @@ -308,10 +389,15 @@ nfp_flower_stats_rlim_request(struct nfp_flower_priv *fl_priv, GFP_ATOMIC); if (!skb) return; - head = nfp_flower_cmsg_get_data(skb); + memset(head, 0, sizeof(struct nfp_police_cfg_head)); - head->port = cpu_to_be32(netdev_port_id); + if (ingress) { + head->port = cpu_to_be32(id); + } else { + head->flags_opts = cpu_to_be32(NFP_FL_QOS_METER); + head->meter_id = cpu_to_be32(id); + } nfp_ctrl_tx(fl_priv->app->ctrl, skb); } @@ -341,7 +427,8 @@ nfp_flower_stats_rlim_request_all(struct nfp_flower_priv *fl_priv) if (!netdev_port_id) continue; - nfp_flower_stats_rlim_request(fl_priv, netdev_port_id); + nfp_flower_stats_rlim_request(fl_priv, + netdev_port_id, true); } } @@ -359,6 +446,8 @@ static void update_stats_cache(struct work_struct *work) qos_stats_work); nfp_flower_stats_rlim_request_all(fl_priv); + nfp_flower_stats_meter_request_all(fl_priv); + schedule_delayed_work(&fl_priv->qos_stats_work, NFP_FL_QOS_UPDATE); } @@ -406,6 +495,9 @@ void nfp_flower_qos_init(struct nfp_app *app) struct nfp_flower_priv *fl_priv = app->priv; spin_lock_init(&fl_priv->qos_stats_lock); + mutex_init(&fl_priv->meter_stats_lock); + nfp_init_meter_table(app); + INIT_DELAYED_WORK(&fl_priv->qos_stats_work, &update_stats_cache); } @@ -441,3 +533,333 @@ int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev, return -EOPNOTSUPP; } } + +/* offload tc action, currently only for tc police */ + +static const struct rhashtable_params stats_meter_table_params = { + .key_offset = offsetof(struct nfp_meter_entry, meter_id), + .head_offset = offsetof(struct nfp_meter_entry, ht_node), + .key_len = sizeof(u32), +}; + +struct nfp_meter_entry * +nfp_flower_search_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_lookup_fast(&priv->meter_table, &meter_id, + stats_meter_table_params); +} + +static struct nfp_meter_entry * +nfp_flower_add_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_flower_priv *priv = app->priv; + + meter_entry = rhashtable_lookup_fast(&priv->meter_table, + &meter_id, + stats_meter_table_params); + if (meter_entry) + return meter_entry; + + meter_entry = kzalloc(sizeof(*meter_entry), GFP_KERNEL); + if (!meter_entry) + return NULL; + + meter_entry->meter_id = meter_id; + meter_entry->used = jiffies; + if (rhashtable_insert_fast(&priv->meter_table, &meter_entry->ht_node, + stats_meter_table_params)) { + kfree(meter_entry); + return NULL; + } + + priv->qos_rate_limiters++; + if (priv->qos_rate_limiters == 1) + schedule_delayed_work(&priv->qos_stats_work, + NFP_FL_QOS_UPDATE); + + return meter_entry; +} + +static void nfp_flower_del_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_flower_priv *priv = app->priv; + + meter_entry = rhashtable_lookup_fast(&priv->meter_table, &meter_id, + stats_meter_table_params); + if (!meter_entry) + return; + + rhashtable_remove_fast(&priv->meter_table, + &meter_entry->ht_node, + stats_meter_table_params); + kfree(meter_entry); + priv->qos_rate_limiters--; + if (!priv->qos_rate_limiters) + cancel_delayed_work_sync(&priv->qos_stats_work); +} + +int nfp_flower_setup_meter_entry(struct nfp_app *app, + const struct flow_action_entry *action, + enum nfp_meter_op op, + u32 meter_id) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + int err = 0; + + mutex_lock(&fl_priv->meter_stats_lock); + + switch (op) { + case NFP_METER_DEL: + nfp_flower_del_meter_entry(app, meter_id); + goto exit_unlock; + case NFP_METER_ADD: + meter_entry = nfp_flower_add_meter_entry(app, meter_id); + break; + default: + err = -EOPNOTSUPP; + goto exit_unlock; + } + + if (!meter_entry) { + err = -ENOMEM; + goto exit_unlock; + } + + if (action->police.rate_bytes_ps > 0) { + meter_entry->bps = true; + meter_entry->rate = action->police.rate_bytes_ps; + meter_entry->burst = action->police.burst; + } else { + meter_entry->bps = false; + meter_entry->rate = action->police.rate_pkt_ps; + meter_entry->burst = action->police.burst_pkt; + } + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); + return err; +} + +int nfp_init_meter_table(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_init(&priv->meter_table, &stats_meter_table_params); +} + +void +nfp_flower_stats_meter_request_all(struct nfp_flower_priv *fl_priv) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct rhashtable_iter iter; + + mutex_lock(&fl_priv->meter_stats_lock); + rhashtable_walk_enter(&fl_priv->meter_table, &iter); + rhashtable_walk_start(&iter); + + while ((meter_entry = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(meter_entry)) + continue; + nfp_flower_stats_rlim_request(fl_priv, + meter_entry->meter_id, false); + } + + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + mutex_unlock(&fl_priv->meter_stats_lock); +} + +static int +nfp_act_install_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry *paction = &fl_act->action.entries[0]; + u32 action_num = fl_act->action.num_entries; + struct nfp_flower_priv *fl_priv = app->priv; + struct flow_action_entry *action = NULL; + u32 burst, i, meter_id; + bool pps_support, pps; + bool add = false; + u64 rate; + + pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS); + + for (i = 0 ; i < action_num; i++) { + /*set qos associate data for this interface */ + action = paction + i; + if (action->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + continue; + } + if (action->police.rate_bytes_ps > 0) { + rate = action->police.rate_bytes_ps; + burst = action->police.burst; + } else if (action->police.rate_pkt_ps > 0 && pps_support) { + rate = action->police.rate_pkt_ps; + burst = action->police.burst_pkt; + } else { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: unsupported qos rate limit"); + continue; + } + + if (rate != 0) { + meter_id = action->hw_index; + if (nfp_flower_setup_meter_entry(app, action, NFP_METER_ADD, meter_id)) + continue; + + pps = false; + if (action->police.rate_pkt_ps > 0) + pps = true; + nfp_flower_offload_one_police(app, false, pps, meter_id, + rate, burst); + add = true; + } + } + + return add ? 0 : -EOPNOTSUPP; +} + +static int +nfp_act_remove_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_police_config *config; + struct sk_buff *skb; + u32 meter_id; + bool pps; + + /*delete qos associate data for this interface */ + if (fl_act->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + meter_id = fl_act->index; + meter_entry = nfp_flower_search_meter_entry(app, meter_id); + if (!meter_entry) { + NL_SET_ERR_MSG_MOD(extack, + "no meter entry when delete the action index."); + return -ENOENT; + } + pps = !meter_entry->bps; + + skb = nfp_flower_cmsg_alloc(app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_METER); + config->head.meter_id = cpu_to_be32(meter_id); + if (pps) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_PPS); + + nfp_ctrl_tx(app->ctrl, skb); + nfp_flower_setup_meter_entry(app, NULL, NFP_METER_DEL, meter_id); + + return 0; +} + +void +nfp_act_stats_reply(struct nfp_app *app, void *pmsg) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_police_stats_reply *msg = pmsg; + u32 meter_id; + + meter_id = be32_to_cpu(msg->head.meter_id); + mutex_lock(&fl_priv->meter_stats_lock); + + meter_entry = nfp_flower_search_meter_entry(app, meter_id); + if (!meter_entry) + goto exit_unlock; + + meter_entry->stats.curr.pkts = be64_to_cpu(msg->pass_pkts) + + be64_to_cpu(msg->drop_pkts); + meter_entry->stats.curr.bytes = be64_to_cpu(msg->pass_bytes) + + be64_to_cpu(msg->drop_bytes); + meter_entry->stats.curr.drops = be64_to_cpu(msg->drop_pkts); + if (!meter_entry->stats.update) { + meter_entry->stats.prev.pkts = meter_entry->stats.curr.pkts; + meter_entry->stats.prev.bytes = meter_entry->stats.curr.bytes; + meter_entry->stats.prev.drops = meter_entry->stats.curr.drops; + } + + meter_entry->stats.update = jiffies; + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); +} + +static int +nfp_act_stats_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + u64 diff_bytes, diff_pkts, diff_drops; + int err = 0; + + if (fl_act->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + mutex_lock(&fl_priv->meter_stats_lock); + meter_entry = nfp_flower_search_meter_entry(app, fl_act->index); + if (!meter_entry) { + err = -ENOENT; + goto exit_unlock; + } + diff_pkts = meter_entry->stats.curr.pkts > meter_entry->stats.prev.pkts ? + meter_entry->stats.curr.pkts - meter_entry->stats.prev.pkts : 0; + diff_bytes = meter_entry->stats.curr.bytes > meter_entry->stats.prev.bytes ? + meter_entry->stats.curr.bytes - meter_entry->stats.prev.bytes : 0; + diff_drops = meter_entry->stats.curr.drops > meter_entry->stats.prev.drops ? + meter_entry->stats.curr.drops - meter_entry->stats.prev.drops : 0; + + flow_stats_update(&fl_act->stats, diff_bytes, diff_pkts, diff_drops, + meter_entry->stats.update, + FLOW_ACTION_HW_STATS_DELAYED); + + meter_entry->stats.prev.pkts = meter_entry->stats.curr.pkts; + meter_entry->stats.prev.bytes = meter_entry->stats.curr.bytes; + meter_entry->stats.prev.drops = meter_entry->stats.curr.drops; + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); + return err; +} + +int nfp_setup_tc_act_offload(struct nfp_app *app, + struct flow_offload_action *fl_act) +{ + struct netlink_ext_ack *extack = fl_act->extack; + struct nfp_flower_priv *fl_priv = app->priv; + + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_METER)) + return -EOPNOTSUPP; + + switch (fl_act->command) { + case FLOW_ACT_REPLACE: + return nfp_act_install_actions(app, fl_act, extack); + case FLOW_ACT_DESTROY: + return nfp_act_remove_actions(app, fl_act, extack); + case FLOW_ACT_STATS: + return nfp_act_stats_actions(app, fl_act, extack); + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index cd50db779dda..c71bd555f482 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -922,8 +922,8 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, int port, bool mod) { struct nfp_flower_priv *priv = app->priv; - int ida_idx = NFP_MAX_MAC_INDEX, err; struct nfp_tun_offloaded_mac *entry; + int ida_idx = -1, err; u16 nfp_mac_idx = 0; entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); @@ -942,8 +942,8 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, if (!nfp_mac_idx) { /* Assign a global index if non-repr or MAC is now shared. */ if (entry || !port) { - ida_idx = ida_simple_get(&priv->tun.mac_off_ids, 0, - NFP_MAX_MAC_INDEX, GFP_KERNEL); + ida_idx = ida_alloc_max(&priv->tun.mac_off_ids, + NFP_MAX_MAC_INDEX, GFP_KERNEL); if (ida_idx < 0) return ida_idx; @@ -997,8 +997,8 @@ err_remove_hash: err_free_entry: kfree(entry); err_free_ida: - if (ida_idx != NFP_MAX_MAC_INDEX) - ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + if (ida_idx != -1) + ida_free(&priv->tun.mac_off_ids, ida_idx); return err; } @@ -1061,7 +1061,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, } ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); - ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + ida_free(&priv->tun.mac_off_ids, ida_idx); entry->index = nfp_mac_idx; return 0; } @@ -1081,7 +1081,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, /* If MAC has global ID then extract and free the ida entry. */ if (nfp_tunnel_is_mac_idx_global(nfp_mac_idx)) { ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); - ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + ida_free(&priv->tun.mac_off_ids, ida_idx); } kfree(entry); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 0b1865e9f0b5..437a19722fcf 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -109,6 +109,7 @@ struct nfp_eth_table_port; struct nfp_net; struct nfp_net_r_vector; struct nfp_port; +struct xsk_buff_pool; /* Convenience macro for wrapping descriptor index on ring size */ #define D_IDX(ring, idx) ((idx) & ((ring)->cnt - 1)) @@ -170,11 +171,14 @@ struct nfp_net_tx_desc { * struct nfp_net_tx_buf - software TX buffer descriptor * @skb: normal ring, sk_buff associated with this buffer * @frag: XDP ring, page frag associated with this buffer + * @xdp: XSK buffer pool handle (for AF_XDP) * @dma_addr: DMA mapping address of the buffer * @fidx: Fragment index (-1 for the head and [0..nr_frags-1] for frags) * @pkt_cnt: Number of packets to be produced out of the skb associated * with this buffer (valid only on the head's buffer). * Will be 1 for all non-TSO packets. + * @is_xsk_tx: Flag if buffer is a RX buffer after a XDP_TX action and not a + * buffer from the TX queue (for AF_XDP). * @real_len: Number of bytes which to be produced out of the skb (valid only * on the head's buffer). Equal to skb->len for non-TSO packets. */ @@ -182,10 +186,18 @@ struct nfp_net_tx_buf { union { struct sk_buff *skb; void *frag; + struct xdp_buff *xdp; }; dma_addr_t dma_addr; - short int fidx; - u16 pkt_cnt; + union { + struct { + short int fidx; + u16 pkt_cnt; + }; + struct { + bool is_xsk_tx; + }; + }; u32 real_len; }; @@ -315,6 +327,16 @@ struct nfp_net_rx_buf { }; /** + * struct nfp_net_xsk_rx_buf - software RX XSK buffer descriptor + * @dma_addr: DMA mapping address of the buffer + * @xdp: XSK buffer pool handle (for AF_XDP) + */ +struct nfp_net_xsk_rx_buf { + dma_addr_t dma_addr; + struct xdp_buff *xdp; +}; + +/** * struct nfp_net_rx_ring - RX ring structure * @r_vec: Back pointer to ring vector structure * @cnt: Size of the queue in number of descriptors @@ -324,6 +346,7 @@ struct nfp_net_rx_buf { * @fl_qcidx: Queue Controller Peripheral (QCP) queue index for the freelist * @qcp_fl: Pointer to base of the QCP freelist queue * @rxbufs: Array of transmitted FL/RX buffers + * @xsk_rxbufs: Array of transmitted FL/RX buffers (for AF_XDP) * @rxds: Virtual address of FL/RX ring in host memory * @xdp_rxq: RX-ring info avail for XDP * @dma: DMA address of the FL/RX ring @@ -342,6 +365,7 @@ struct nfp_net_rx_ring { u8 __iomem *qcp_fl; struct nfp_net_rx_buf *rxbufs; + struct nfp_net_xsk_rx_buf *xsk_rxbufs; struct nfp_net_rx_desc *rxds; struct xdp_rxq_info xdp_rxq; @@ -360,6 +384,7 @@ struct nfp_net_rx_ring { * @tx_ring: Pointer to TX ring * @rx_ring: Pointer to RX ring * @xdp_ring: Pointer to an extra TX ring for XDP + * @xsk_pool: XSK buffer pool active on vector queue pair (for AF_XDP) * @irq_entry: MSI-X table entry (use for talking to the device) * @event_ctr: Number of interrupt * @rx_dim: Dynamic interrupt moderation structure for RX @@ -431,6 +456,7 @@ struct nfp_net_r_vector { u64 rx_replace_buf_alloc_fail; struct nfp_net_tx_ring *xdp_ring; + struct xsk_buff_pool *xsk_pool; struct u64_stats_sync tx_sync; u64 tx_pkts; @@ -501,6 +527,7 @@ struct nfp_stat_pair { * @num_stack_tx_rings: Number of TX rings used by the stack (not XDP) * @num_rx_rings: Currently configured number of RX rings * @mtu: Device MTU + * @xsk_pools: XSK buffer pools, @max_r_vecs in size (for AF_XDP). */ struct nfp_net_dp { struct device *dev; @@ -537,6 +564,8 @@ struct nfp_net_dp { unsigned int num_rx_rings; unsigned int mtu; + + struct xsk_buff_pool **xsk_pools; }; /** @@ -965,6 +994,7 @@ int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd); void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 update); int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn); +void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr); unsigned int nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries, unsigned int min_irqs, unsigned int want_irqs); @@ -973,6 +1003,19 @@ void nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries, unsigned int n); +void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring); +void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget); + +bool +nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, + void *data, void *pkt, unsigned int pkt_len, int meta_len); + +void nfp_net_rx_csum(const struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, + const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta, + struct sk_buff *skb); + struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn); int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 79257ec41987..00a09b9e0aee 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -38,6 +38,7 @@ #include <net/tls.h> #include <net/vxlan.h> +#include <net/xdp_sock_drv.h> #include "nfpcore/nfp_nsp.h" #include "ccm.h" @@ -45,6 +46,7 @@ #include "nfp_net_ctrl.h" #include "nfp_net.h" #include "nfp_net_sriov.h" +#include "nfp_net_xsk.h" #include "nfp_port.h" #include "crypto/crypto.h" #include "crypto/fw.h" @@ -381,7 +383,7 @@ int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd) * * Clear the ICR for the IRQ entry. */ -static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) +void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) { nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED); nn_pci_flush(nn); @@ -923,7 +925,7 @@ static void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle) #endif } -static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) +void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) { wmb(); nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); @@ -1142,7 +1144,7 @@ err_flush: * @tx_ring: TX ring structure * @budget: NAPI budget (only used as bool to determine if in NAPI context) */ -static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) +void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) { struct nfp_net_r_vector *r_vec = tx_ring->r_vec; struct nfp_net_dp *dp = &r_vec->nfp_net->dp; @@ -1315,6 +1317,9 @@ nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) tx_ring->rd_p++; } + if (tx_ring->is_xdp) + nfp_net_xsk_tx_bufs_free(tx_ring); + memset(tx_ring->txds, 0, tx_ring->size); tx_ring->wr_p = 0; tx_ring->rd_p = 0; @@ -1338,24 +1343,43 @@ static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) /* Receive processing */ static unsigned int -nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp) +nfp_net_calc_fl_bufsz_data(struct nfp_net_dp *dp) { - unsigned int fl_bufsz; + unsigned int fl_bufsz = 0; - fl_bufsz = NFP_NET_RX_BUF_HEADROOM; - fl_bufsz += dp->rx_dma_off; if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) fl_bufsz += NFP_NET_MAX_PREPEND; else fl_bufsz += dp->rx_offset; fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu; + return fl_bufsz; +} + +static unsigned int nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp) +{ + unsigned int fl_bufsz; + + fl_bufsz = NFP_NET_RX_BUF_HEADROOM; + fl_bufsz += dp->rx_dma_off; + fl_bufsz += nfp_net_calc_fl_bufsz_data(dp); + fl_bufsz = SKB_DATA_ALIGN(fl_bufsz); fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); return fl_bufsz; } +static unsigned int nfp_net_calc_fl_bufsz_xsk(struct nfp_net_dp *dp) +{ + unsigned int fl_bufsz; + + fl_bufsz = XDP_PACKET_HEADROOM; + fl_bufsz += nfp_net_calc_fl_bufsz_data(dp); + + return fl_bufsz; +} + static void nfp_net_free_frag(void *frag, bool xdp) { @@ -1484,10 +1508,14 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) /* Move the empty entry to the end of the list */ wr_idx = D_IDX(rx_ring, rx_ring->wr_p); last_idx = rx_ring->cnt - 1; - rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr; - rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag; - rx_ring->rxbufs[last_idx].dma_addr = 0; - rx_ring->rxbufs[last_idx].frag = NULL; + if (rx_ring->r_vec->xsk_pool) { + rx_ring->xsk_rxbufs[wr_idx] = rx_ring->xsk_rxbufs[last_idx]; + memset(&rx_ring->xsk_rxbufs[last_idx], 0, + sizeof(*rx_ring->xsk_rxbufs)); + } else { + rx_ring->rxbufs[wr_idx] = rx_ring->rxbufs[last_idx]; + memset(&rx_ring->rxbufs[last_idx], 0, sizeof(*rx_ring->rxbufs)); + } memset(rx_ring->rxds, 0, rx_ring->size); rx_ring->wr_p = 0; @@ -1509,6 +1537,9 @@ nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp, { unsigned int i; + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return; + for (i = 0; i < rx_ring->cnt - 1; i++) { /* NULL skb can only happen when initial filling of the ring * fails to allocate enough buffers and calls here to free @@ -1536,6 +1567,9 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_buf *rxbufs; unsigned int i; + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return 0; + rxbufs = rx_ring->rxbufs; for (i = 0; i < rx_ring->cnt - 1; i++) { @@ -1560,6 +1594,9 @@ nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp, { unsigned int i; + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return nfp_net_xsk_rx_ring_fill_freelist(rx_ring); + for (i = 0; i < rx_ring->cnt - 1; i++) nfp_net_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, rx_ring->rxbufs[i].dma_addr); @@ -1587,10 +1624,10 @@ static int nfp_net_rx_csum_has_errors(u16 flags) * @meta: Parsed metadata prepend * @skb: Pointer to SKB */ -static void nfp_net_rx_csum(struct nfp_net_dp *dp, - struct nfp_net_r_vector *r_vec, - struct nfp_net_rx_desc *rxd, - struct nfp_meta_parsed *meta, struct sk_buff *skb) +void nfp_net_rx_csum(const struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, + const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta, struct sk_buff *skb) { skb_checksum_none_assert(skb); @@ -1668,7 +1705,7 @@ nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta, &rx_hash->hash); } -static bool +bool nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, void *data, void *pkt, unsigned int pkt_len, int meta_len) { @@ -2540,7 +2577,11 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) if (dp->netdev) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - kvfree(rx_ring->rxbufs); + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + kvfree(rx_ring->xsk_rxbufs); + else + kvfree(rx_ring->rxbufs); if (rx_ring->rxds) dma_free_coherent(dp->dev, rx_ring->size, @@ -2548,6 +2589,7 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) rx_ring->cnt = 0; rx_ring->rxbufs = NULL; + rx_ring->xsk_rxbufs = NULL; rx_ring->rxds = NULL; rx_ring->dma = 0; rx_ring->size = 0; @@ -2563,8 +2605,18 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) static int nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) { + enum xdp_mem_type mem_type; + size_t rxbuf_sw_desc_sz; int err; + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) { + mem_type = MEM_TYPE_XSK_BUFF_POOL; + rxbuf_sw_desc_sz = sizeof(*rx_ring->xsk_rxbufs); + } else { + mem_type = MEM_TYPE_PAGE_ORDER0; + rxbuf_sw_desc_sz = sizeof(*rx_ring->rxbufs); + } + if (dp->netdev) { err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, rx_ring->idx, rx_ring->r_vec->napi.napi_id); @@ -2572,6 +2624,10 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) return err; } + err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, mem_type, NULL); + if (err) + goto err_alloc; + rx_ring->cnt = dp->rxd_cnt; rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds)); rx_ring->rxds = dma_alloc_coherent(dp->dev, rx_ring->size, @@ -2583,10 +2639,17 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) goto err_alloc; } - rx_ring->rxbufs = kvcalloc(rx_ring->cnt, sizeof(*rx_ring->rxbufs), - GFP_KERNEL); - if (!rx_ring->rxbufs) - goto err_alloc; + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) { + rx_ring->xsk_rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz, + GFP_KERNEL); + if (!rx_ring->xsk_rxbufs) + goto err_alloc; + } else { + rx_ring->rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz, + GFP_KERNEL); + if (!rx_ring->rxbufs) + goto err_alloc; + } return 0; @@ -2639,6 +2702,27 @@ static void nfp_net_rx_rings_free(struct nfp_net_dp *dp) } static void +nfp_net_napi_add(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, int idx) +{ + if (dp->netdev) + netif_napi_add(dp->netdev, &r_vec->napi, + nfp_net_has_xsk_pool_slow(dp, idx) ? + nfp_net_xsk_poll : nfp_net_poll, + NAPI_POLL_WEIGHT); + else + tasklet_enable(&r_vec->tasklet); +} + +static void +nfp_net_napi_del(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec) +{ + if (dp->netdev) + netif_napi_del(&r_vec->napi); + else + tasklet_disable(&r_vec->tasklet); +} + +static void nfp_net_vector_assign_rings(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, int idx) { @@ -2648,6 +2732,17 @@ nfp_net_vector_assign_rings(struct nfp_net_dp *dp, r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ? &dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL; + + if (nfp_net_has_xsk_pool_slow(dp, idx) || r_vec->xsk_pool) { + r_vec->xsk_pool = dp->xdp_prog ? dp->xsk_pools[idx] : NULL; + + if (r_vec->xsk_pool) + xsk_pool_set_rxq_info(r_vec->xsk_pool, + &r_vec->rx_ring->xdp_rxq); + + nfp_net_napi_del(dp, r_vec); + nfp_net_napi_add(dp, r_vec, idx); + } } static int @@ -2656,23 +2751,14 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, { int err; - /* Setup NAPI */ - if (nn->dp.netdev) - netif_napi_add(nn->dp.netdev, &r_vec->napi, - nfp_net_poll, NAPI_POLL_WEIGHT); - else - tasklet_enable(&r_vec->tasklet); + nfp_net_napi_add(&nn->dp, r_vec, idx); snprintf(r_vec->name, sizeof(r_vec->name), "%s-rxtx-%d", nfp_net_name(nn), idx); err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name, r_vec); if (err) { - if (nn->dp.netdev) - netif_napi_del(&r_vec->napi); - else - tasklet_disable(&r_vec->tasklet); - + nfp_net_napi_del(&nn->dp, r_vec); nn_err(nn, "Error requesting IRQ %d\n", r_vec->irq_vector); return err; } @@ -2690,11 +2776,7 @@ static void nfp_net_cleanup_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec) { irq_set_affinity_hint(r_vec->irq_vector, NULL); - if (nn->dp.netdev) - netif_napi_del(&r_vec->napi); - else - tasklet_disable(&r_vec->tasklet); - + nfp_net_napi_del(&nn->dp, r_vec); free_irq(r_vec->irq_vector, r_vec); } @@ -2808,8 +2890,11 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn) if (err) nn_err(nn, "Could not disable device: %d\n", err); - for (r = 0; r < nn->dp.num_rx_rings; r++) + for (r = 0; r < nn->dp.num_rx_rings; r++) { nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]); + if (nfp_net_has_xsk_pool_slow(&nn->dp, nn->dp.rx_rings[r].idx)) + nfp_net_xsk_rx_bufs_free(&nn->dp.rx_rings[r]); + } for (r = 0; r < nn->dp.num_tx_rings; r++) nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]); for (r = 0; r < nn->dp.num_r_vecs; r++) @@ -3296,6 +3381,15 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn) *new = nn->dp; + new->xsk_pools = kmemdup(new->xsk_pools, + array_size(nn->max_r_vecs, + sizeof(new->xsk_pools)), + GFP_KERNEL); + if (!new->xsk_pools) { + kfree(new); + return NULL; + } + /* Clear things which need to be recomputed */ new->fl_bufsz = 0; new->tx_rings = NULL; @@ -3306,10 +3400,18 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn) return new; } +static void nfp_net_free_dp(struct nfp_net_dp *dp) +{ + kfree(dp->xsk_pools); + kfree(dp); +} + static int nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp, struct netlink_ext_ack *extack) { + unsigned int r, xsk_min_fl_bufsz; + /* XDP-enabled tests */ if (!dp->xdp_prog) return 0; @@ -3322,6 +3424,18 @@ nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp, return -EINVAL; } + xsk_min_fl_bufsz = nfp_net_calc_fl_bufsz_xsk(dp); + for (r = 0; r < nn->max_r_vecs; r++) { + if (!dp->xsk_pools[r]) + continue; + + if (xsk_pool_get_rx_frame_size(dp->xsk_pools[r]) < xsk_min_fl_bufsz) { + NL_SET_ERR_MSG_MOD(extack, + "XSK buffer pool chunk size too small\n"); + return -EINVAL; + } + } + return 0; } @@ -3389,7 +3503,7 @@ int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp, nfp_net_open_stack(nn); exit_free_dp: - kfree(dp); + nfp_net_free_dp(dp); return err; @@ -3398,7 +3512,7 @@ err_free_rx: err_cleanup_vecs: for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--) nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); - kfree(dp); + nfp_net_free_dp(dp); return err; } @@ -3716,6 +3830,9 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) return nfp_net_xdp_setup_drv(nn, xdp); case XDP_SETUP_PROG_HW: return nfp_net_xdp_setup_hw(nn, xdp); + case XDP_SETUP_XSK_POOL: + return nfp_net_xsk_setup_pool(netdev, xdp->xsk.pool, + xdp->xsk.queue_id); default: return nfp_app_bpf(nn->app, nn, xdp); } @@ -3766,6 +3883,7 @@ const struct net_device_ops nfp_net_netdev_ops = { .ndo_features_check = nfp_net_features_check, .ndo_get_phys_port_name = nfp_net_get_phys_port_name, .ndo_bpf = nfp_net_xdp, + .ndo_xsk_wakeup = nfp_net_xsk_wakeup, .ndo_get_devlink_port = nfp_devlink_get_devlink_port, }; @@ -3893,6 +4011,14 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev, nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings); nn->dp.num_r_vecs = min_t(unsigned int, nn->dp.num_r_vecs, num_online_cpus()); + nn->max_r_vecs = nn->dp.num_r_vecs; + + nn->dp.xsk_pools = kcalloc(nn->max_r_vecs, sizeof(nn->dp.xsk_pools), + GFP_KERNEL); + if (!nn->dp.xsk_pools) { + err = -ENOMEM; + goto err_free_nn; + } nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT; nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; @@ -3932,6 +4058,7 @@ void nfp_net_free(struct nfp_net *nn) WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted); nfp_ccm_mbox_free(nn); + kfree(nn->dp.xsk_pools); if (nn->dp.netdev) free_netdev(nn->dp.netdev); else diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c index 553c708694e8..2c74b3c5aef9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -42,13 +42,19 @@ static int nfp_rx_q_show(struct seq_file *file, void *data) seq_printf(file, "%04d: 0x%08x 0x%08x", i, rxd->vals[0], rxd->vals[1]); - frag = READ_ONCE(rx_ring->rxbufs[i].frag); - if (frag) - seq_printf(file, " frag=%p", frag); - - if (rx_ring->rxbufs[i].dma_addr) - seq_printf(file, " dma_addr=%pad", - &rx_ring->rxbufs[i].dma_addr); + if (!r_vec->xsk_pool) { + frag = READ_ONCE(rx_ring->rxbufs[i].frag); + if (frag) + seq_printf(file, " frag=%p", frag); + + if (rx_ring->rxbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &rx_ring->rxbufs[i].dma_addr); + } else { + if (rx_ring->xsk_rxbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &rx_ring->xsk_rxbufs[i].dma_addr); + } if (i == rx_ring->rd_p % rxd_cnt) seq_puts(file, " H_RD "); @@ -103,20 +109,23 @@ static int nfp_tx_q_show(struct seq_file *file, void *data) tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p); for (i = 0; i < txd_cnt; i++) { + struct xdp_buff *xdp; + struct sk_buff *skb; + txd = &tx_ring->txds[i]; seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i, txd->vals[0], txd->vals[1], txd->vals[2], txd->vals[3]); - if (tx_ring == r_vec->tx_ring) { - struct sk_buff *skb = READ_ONCE(tx_ring->txbufs[i].skb); - + if (!tx_ring->is_xdp) { + skb = READ_ONCE(tx_ring->txbufs[i].skb); if (skb) seq_printf(file, " skb->head=%p skb->data=%p", skb->head, skb->data); } else { - seq_printf(file, " frag=%p", - READ_ONCE(tx_ring->txbufs[i].frag)); + xdp = READ_ONCE(tx_ring->txbufs[i].xdp); + if (xdp) + seq_printf(file, " xdp->data=%p", xdp->data); } if (tx_ring->txbufs[i].dma_addr) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c new file mode 100644 index 000000000000..ab7243277efa --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c @@ -0,0 +1,592 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#include <linux/dma-direction.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <net/xdp_sock_drv.h> +#include <trace/events/xdp.h> + +#include "nfp_app.h" +#include "nfp_net.h" +#include "nfp_net_xsk.h" + +static int nfp_net_tx_space(struct nfp_net_tx_ring *tx_ring) +{ + return tx_ring->cnt - tx_ring->wr_p + tx_ring->rd_p - 1; +} + +static void nfp_net_xsk_tx_free(struct nfp_net_tx_buf *txbuf) +{ + xsk_buff_free(txbuf->xdp); + + txbuf->dma_addr = 0; + txbuf->xdp = NULL; +} + +void nfp_net_xsk_tx_bufs_free(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_tx_buf *txbuf; + unsigned int idx; + + while (tx_ring->rd_p != tx_ring->wr_p) { + idx = D_IDX(tx_ring, tx_ring->rd_p); + txbuf = &tx_ring->txbufs[idx]; + + txbuf->real_len = 0; + + tx_ring->qcp_rd_p++; + tx_ring->rd_p++; + + if (tx_ring->r_vec->xsk_pool) { + if (txbuf->is_xsk_tx) + nfp_net_xsk_tx_free(txbuf); + + xsk_tx_completed(tx_ring->r_vec->xsk_pool, 1); + } + } +} + +static bool nfp_net_xsk_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + u32 done_pkts = 0, done_bytes = 0, reused = 0; + bool done_all; + int idx, todo; + u32 qcp_rd_p; + + if (tx_ring->wr_p == tx_ring->rd_p) + return true; + + /* Work out how many descriptors have been transmitted. */ + qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return true; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + + done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; + todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); + + tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo); + + done_pkts = todo; + while (todo--) { + struct nfp_net_tx_buf *txbuf; + + idx = D_IDX(tx_ring, tx_ring->rd_p); + tx_ring->rd_p++; + + txbuf = &tx_ring->txbufs[idx]; + if (unlikely(!txbuf->real_len)) + continue; + + done_bytes += txbuf->real_len; + txbuf->real_len = 0; + + if (txbuf->is_xsk_tx) { + nfp_net_xsk_tx_free(txbuf); + reused++; + } + } + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + xsk_tx_completed(r_vec->xsk_pool, done_pkts - reused); + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); + + return done_all; +} + +static void nfp_net_xsk_tx(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct xdp_desc desc[NFP_NET_XSK_TX_BATCH]; + struct xsk_buff_pool *xsk_pool; + struct nfp_net_tx_desc *txd; + u32 pkts = 0, wr_idx; + u32 i, got; + + xsk_pool = r_vec->xsk_pool; + + while (nfp_net_tx_space(tx_ring) >= NFP_NET_XSK_TX_BATCH) { + for (i = 0; i < NFP_NET_XSK_TX_BATCH; i++) + if (!xsk_tx_peek_desc(xsk_pool, &desc[i])) + break; + got = i; + if (!got) + break; + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); + prefetchw(&tx_ring->txds[wr_idx]); + + for (i = 0; i < got; i++) + xsk_buff_raw_dma_sync_for_device(xsk_pool, desc[i].addr, + desc[i].len); + + for (i = 0; i < got; i++) { + wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); + + tx_ring->txbufs[wr_idx].real_len = desc[i].len; + tx_ring->txbufs[wr_idx].is_xsk_tx = false; + + /* Build TX descriptor. */ + txd = &tx_ring->txds[wr_idx]; + nfp_desc_set_dma_addr(txd, + xsk_buff_raw_get_dma(xsk_pool, + desc[i].addr + )); + txd->offset_eop = PCIE_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(desc[i].len); + txd->data_len = cpu_to_le16(desc[i].len); + } + + tx_ring->wr_p += got; + pkts += got; + } + + if (!pkts) + return; + + xsk_tx_release(xsk_pool); + /* Ensure all records are visible before incrementing write counter. */ + wmb(); + nfp_qcp_wr_ptr_add(tx_ring->qcp_q, pkts); +} + +static bool +nfp_net_xsk_tx_xdp(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_ring *rx_ring, + struct nfp_net_tx_ring *tx_ring, + struct nfp_net_xsk_rx_buf *xrxbuf, unsigned int pkt_len, + int pkt_off) +{ + struct xsk_buff_pool *pool = r_vec->xsk_pool; + struct nfp_net_tx_buf *txbuf; + struct nfp_net_tx_desc *txd; + unsigned int wr_idx; + + if (nfp_net_tx_space(tx_ring) < 1) + return false; + + xsk_buff_raw_dma_sync_for_device(pool, xrxbuf->dma_addr + pkt_off, pkt_len); + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->xdp = xrxbuf->xdp; + txbuf->real_len = pkt_len; + txbuf->is_xsk_tx = true; + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = PCIE_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(pkt_len); + nfp_desc_set_dma_addr(txd, xrxbuf->dma_addr + pkt_off); + txd->data_len = cpu_to_le16(pkt_len); + + txd->flags = 0; + txd->mss = 0; + txd->lso_hdrlen = 0; + + tx_ring->wr_ptr_add++; + tx_ring->wr_p++; + + return true; +} + +static int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring) +{ + return rx_ring->cnt - rx_ring->wr_p + rx_ring->rd_p - 1; +} + +static void +nfp_net_xsk_rx_bufs_stash(struct nfp_net_rx_ring *rx_ring, unsigned int idx, + struct xdp_buff *xdp) +{ + unsigned int headroom; + + headroom = xsk_pool_get_headroom(rx_ring->r_vec->xsk_pool); + + rx_ring->rxds[idx].fld.reserved = 0; + rx_ring->rxds[idx].fld.meta_len_dd = 0; + + rx_ring->xsk_rxbufs[idx].xdp = xdp; + rx_ring->xsk_rxbufs[idx].dma_addr = + xsk_buff_xdp_get_frame_dma(xdp) + headroom; +} + +static void nfp_net_xsk_rx_unstash(struct nfp_net_xsk_rx_buf *rxbuf) +{ + rxbuf->dma_addr = 0; + rxbuf->xdp = NULL; +} + +static void nfp_net_xsk_rx_free(struct nfp_net_xsk_rx_buf *rxbuf) +{ + if (rxbuf->xdp) + xsk_buff_free(rxbuf->xdp); + + nfp_net_xsk_rx_unstash(rxbuf); +} + +void nfp_net_xsk_rx_bufs_free(struct nfp_net_rx_ring *rx_ring) +{ + unsigned int i; + + if (!rx_ring->cnt) + return; + + for (i = 0; i < rx_ring->cnt - 1; i++) + nfp_net_xsk_rx_free(&rx_ring->xsk_rxbufs[i]); +} + +void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct xsk_buff_pool *pool = r_vec->xsk_pool; + unsigned int wr_idx, wr_ptr_add = 0; + struct xdp_buff *xdp; + + while (nfp_net_rx_space(rx_ring)) { + wr_idx = D_IDX(rx_ring, rx_ring->wr_p); + + xdp = xsk_buff_alloc(pool); + if (!xdp) + break; + + nfp_net_xsk_rx_bufs_stash(rx_ring, wr_idx, xdp); + + nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, + rx_ring->xsk_rxbufs[wr_idx].dma_addr); + + rx_ring->wr_p++; + wr_ptr_add++; + } + + /* Ensure all records are visible before incrementing write counter. */ + wmb(); + nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, wr_ptr_add); +} + +static void nfp_net_xsk_rx_drop(struct nfp_net_r_vector *r_vec, + struct nfp_net_xsk_rx_buf *xrxbuf) +{ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + + nfp_net_xsk_rx_free(xrxbuf); +} + +static void nfp_net_xsk_rx_skb(struct nfp_net_rx_ring *rx_ring, + const struct nfp_net_rx_desc *rxd, + struct nfp_net_xsk_rx_buf *xrxbuf, + const struct nfp_meta_parsed *meta, + unsigned int pkt_len, + bool meta_xdp, + unsigned int *skbs_polled) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct net_device *netdev; + struct sk_buff *skb; + + if (likely(!meta->portid)) { + netdev = dp->netdev; + } else { + struct nfp_net *nn = netdev_priv(dp->netdev); + + netdev = nfp_app_dev_get(nn->app, meta->portid, NULL); + if (unlikely(!netdev)) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + return; + } + nfp_repr_inc_rx_stats(netdev, pkt_len); + } + + skb = napi_alloc_skb(&r_vec->napi, pkt_len); + if (!skb) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + return; + } + memcpy(skb_put(skb, pkt_len), xrxbuf->xdp->data, pkt_len); + + skb->mark = meta->mark; + skb_set_hash(skb, meta->hash, meta->hash_type); + + skb_record_rx_queue(skb, rx_ring->idx); + skb->protocol = eth_type_trans(skb, netdev); + + nfp_net_rx_csum(dp, r_vec, rxd, meta, skb); + + if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + le16_to_cpu(rxd->rxd.vlan)); + if (meta_xdp) + skb_metadata_set(skb, + xrxbuf->xdp->data - xrxbuf->xdp->data_meta); + + napi_gro_receive(&rx_ring->r_vec->napi, skb); + + nfp_net_xsk_rx_free(xrxbuf); + + (*skbs_polled)++; +} + +static unsigned int +nfp_net_xsk_rx(struct nfp_net_rx_ring *rx_ring, int budget, + unsigned int *skbs_polled) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct nfp_net_tx_ring *tx_ring; + struct bpf_prog *xdp_prog; + bool xdp_redir = false; + int pkts_polled = 0; + + xdp_prog = READ_ONCE(dp->xdp_prog); + tx_ring = r_vec->xdp_ring; + + while (pkts_polled < budget) { + unsigned int meta_len, data_len, pkt_len, pkt_off; + struct nfp_net_xsk_rx_buf *xrxbuf; + struct nfp_net_rx_desc *rxd; + struct nfp_meta_parsed meta; + int idx, act; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + break; + + rx_ring->rd_p++; + pkts_polled++; + + xrxbuf = &rx_ring->xsk_rxbufs[idx]; + + /* If starved of buffers "drop" it and scream. */ + if (rx_ring->rd_p >= rx_ring->wr_p) { + nn_dp_warn(dp, "Starved of RX buffers\n"); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + break; + } + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + memset(&meta, 0, sizeof(meta)); + + /* Only supporting AF_XDP with dynamic metadata so buffer layout + * is always: + * + * --------------------------------------------------------- + * | off | metadata | packet | XXXX | + * --------------------------------------------------------- + */ + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + if (unlikely(meta_len > NFP_NET_MAX_PREPEND)) { + nn_dp_warn(dp, "Oversized RX packet metadata %u\n", + meta_len); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + continue; + } + + /* Stats update. */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + xrxbuf->xdp->data += meta_len; + xrxbuf->xdp->data_end = xrxbuf->xdp->data + pkt_len; + xdp_set_data_meta_invalid(xrxbuf->xdp); + xsk_buff_dma_sync_for_cpu(xrxbuf->xdp, r_vec->xsk_pool); + net_prefetch(xrxbuf->xdp->data); + + if (meta_len) { + if (unlikely(nfp_net_parse_meta(dp->netdev, &meta, + xrxbuf->xdp->data - + meta_len, + xrxbuf->xdp->data, + pkt_len, meta_len))) { + nn_dp_warn(dp, "Invalid RX packet metadata\n"); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + continue; + } + + if (unlikely(meta.portid)) { + struct nfp_net *nn = netdev_priv(dp->netdev); + + if (meta.portid != NFP_META_PORT_ID_CTRL) { + nfp_net_xsk_rx_skb(rx_ring, rxd, xrxbuf, + &meta, pkt_len, + false, skbs_polled); + continue; + } + + nfp_app_ctrl_rx_raw(nn->app, xrxbuf->xdp->data, + pkt_len); + nfp_net_xsk_rx_free(xrxbuf); + continue; + } + } + + act = bpf_prog_run_xdp(xdp_prog, xrxbuf->xdp); + + pkt_len = xrxbuf->xdp->data_end - xrxbuf->xdp->data; + pkt_off = xrxbuf->xdp->data - xrxbuf->xdp->data_hard_start; + + switch (act) { + case XDP_PASS: + nfp_net_xsk_rx_skb(rx_ring, rxd, xrxbuf, &meta, pkt_len, + true, skbs_polled); + break; + case XDP_TX: + if (!nfp_net_xsk_tx_xdp(dp, r_vec, rx_ring, tx_ring, + xrxbuf, pkt_len, pkt_off)) + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + else + nfp_net_xsk_rx_unstash(xrxbuf); + break; + case XDP_REDIRECT: + if (xdp_do_redirect(dp->netdev, xrxbuf->xdp, xdp_prog)) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + } else { + nfp_net_xsk_rx_unstash(xrxbuf); + xdp_redir = true; + } + break; + default: + bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_DROP: + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + break; + } + } + + nfp_net_xsk_rx_ring_fill_freelist(r_vec->rx_ring); + + if (xdp_redir) + xdp_do_flush_map(); + + if (tx_ring->wr_ptr_add) + nfp_net_tx_xmit_more_flush(tx_ring); + + return pkts_polled; +} + +static void nfp_net_xsk_pool_unmap(struct device *dev, + struct xsk_buff_pool *pool) +{ + return xsk_pool_dma_unmap(pool, 0); +} + +static int nfp_net_xsk_pool_map(struct device *dev, struct xsk_buff_pool *pool) +{ + return xsk_pool_dma_map(pool, dev, 0); +} + +int nfp_net_xsk_setup_pool(struct net_device *netdev, + struct xsk_buff_pool *pool, u16 queue_id) +{ + struct nfp_net *nn = netdev_priv(netdev); + + struct xsk_buff_pool *prev_pool; + struct nfp_net_dp *dp; + int err; + + /* Reject on old FWs so we can drop some checks on datapath. */ + if (nn->dp.rx_offset != NFP_NET_CFG_RX_OFFSET_DYNAMIC) + return -EOPNOTSUPP; + if (!nn->dp.chained_metadata_format) + return -EOPNOTSUPP; + + /* Install */ + if (pool) { + err = nfp_net_xsk_pool_map(nn->dp.dev, pool); + if (err) + return err; + } + + /* Reconfig/swap */ + dp = nfp_net_clone_dp(nn); + if (!dp) { + err = -ENOMEM; + goto err_unmap; + } + + prev_pool = dp->xsk_pools[queue_id]; + dp->xsk_pools[queue_id] = pool; + + err = nfp_net_ring_reconfig(nn, dp, NULL); + if (err) + goto err_unmap; + + /* Uninstall */ + if (prev_pool) + nfp_net_xsk_pool_unmap(nn->dp.dev, prev_pool); + + return 0; +err_unmap: + if (pool) + nfp_net_xsk_pool_unmap(nn->dp.dev, pool); + + return err; +} + +int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags) +{ + struct nfp_net *nn = netdev_priv(netdev); + + /* queue_id comes from a zero-copy socket, installed with XDP_SETUP_XSK_POOL, + * so it must be within our vector range. Moreover, our napi structs + * are statically allocated, so we can always kick them without worrying + * if reconfig is in progress or interface down. + */ + napi_schedule(&nn->r_vecs[queue_id].napi); + + return 0; +} + +int nfp_net_xsk_poll(struct napi_struct *napi, int budget) +{ + struct nfp_net_r_vector *r_vec = + container_of(napi, struct nfp_net_r_vector, napi); + unsigned int pkts_polled, skbs = 0; + + pkts_polled = nfp_net_xsk_rx(r_vec->rx_ring, budget, &skbs); + + if (pkts_polled < budget) { + if (r_vec->tx_ring) + nfp_net_tx_complete(r_vec->tx_ring, budget); + + if (!nfp_net_xsk_complete(r_vec->xdp_ring)) + pkts_polled = budget; + + nfp_net_xsk_tx(r_vec->xdp_ring); + + if (pkts_polled < budget && napi_complete_done(napi, skbs)) + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + } + + return pkts_polled; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h new file mode 100644 index 000000000000..5c8549cb3543 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#ifndef _NFP_XSK_H_ +#define _NFP_XSK_H_ + +#include <net/xdp_sock_drv.h> + +#define NFP_NET_XSK_TX_BATCH 16 /* XSK TX transmission batch size. */ + +static inline bool nfp_net_has_xsk_pool_slow(struct nfp_net_dp *dp, + unsigned int qid) +{ + return dp->xdp_prog && dp->xsk_pools[qid]; +} + +int nfp_net_xsk_setup_pool(struct net_device *netdev, struct xsk_buff_pool *pool, + u16 queue_id); + +void nfp_net_xsk_tx_bufs_free(struct nfp_net_tx_ring *tx_ring); +void nfp_net_xsk_rx_bufs_free(struct nfp_net_rx_ring *rx_ring); + +void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring); + +int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); +int nfp_net_xsk_poll(struct napi_struct *napi, int budget); + +#endif /* _NFP_XSK_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 52a1b5cfd8e7..9d0514cfeb5c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -202,19 +202,25 @@ do_check_time: } } + dev_dbg(ionic->dev, "fw_status 0x%02x ready %d idev->ready %d last_hb 0x%x state 0x%02lx\n", + fw_status, fw_status_ready, idev->fw_status_ready, + idev->last_fw_hb, lif->state[0]); + /* is this a transition? */ - if (fw_status_ready != idev->fw_status_ready) { + if (fw_status_ready != idev->fw_status_ready && + !test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) { bool trigger = false; - if (!fw_status_ready && lif && + idev->fw_status_ready = fw_status_ready; + + if (!fw_status_ready && !test_bit(IONIC_LIF_F_FW_RESET, lif->state) && !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) { dev_info(ionic->dev, "FW stopped 0x%02x\n", fw_status); trigger = true; - } else if (fw_status_ready && lif && - test_bit(IONIC_LIF_F_FW_RESET, lif->state) && - !test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) { + } else if (fw_status_ready && + test_bit(IONIC_LIF_F_FW_RESET, lif->state)) { dev_info(ionic->dev, "FW running 0x%02x\n", fw_status); trigger = true; } @@ -222,8 +228,6 @@ do_check_time: if (trigger) { struct ionic_deferred_work *work; - idev->fw_status_ready = fw_status_ready; - work = kzalloc(sizeof(*work), GFP_ATOMIC); if (work) { work->type = IONIC_DW_TYPE_LIF_RESET; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 386a5cf1e224..01c22701482d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -74,10 +74,10 @@ static void ionic_get_drvinfo(struct net_device *netdev, struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; - strlcpy(drvinfo->driver, IONIC_DRV_NAME, sizeof(drvinfo->driver)); - strlcpy(drvinfo->fw_version, ionic->idev.dev_info.fw_version, + strscpy(drvinfo->driver, IONIC_DRV_NAME, sizeof(drvinfo->driver)); + strscpy(drvinfo->fw_version, ionic->idev.dev_info.fw_version, sizeof(drvinfo->fw_version)); - strlcpy(drvinfo->bus_info, ionic_bus_info(ionic), + strscpy(drvinfo->bus_info, ionic_bus_info(ionic), sizeof(drvinfo->bus_info)); } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 278610ed7227..4a90f611c611 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -759,7 +759,7 @@ enum ionic_txq_desc_opcode { * IONIC_TXQ_DESC_OPCODE_CSUM_HW: * Offload 16-bit checksum computation to hardware. * If @csum_l3 is set then the packet's L3 checksum is - * updated. Similarly, if @csum_l4 is set the the L4 + * updated. Similarly, if @csum_l4 is set the L4 * checksum is updated. If @encap is set then encap header * checksums are also updated. * @@ -1368,9 +1368,9 @@ union ionic_port_config { * @status: link status (enum ionic_port_oper_status) * @id: port id * @speed: link speed (in Mbps) - * @link_down_count: number of times link went from from up to down + * @link_down_count: number of times link went from up to down * @fec_type: fec type (enum ionic_port_fec_type) - * @xcvr: tranceiver status + * @xcvr: transceiver status */ struct ionic_port_status { __le32 id; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 542e395fb037..f3568901eb91 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -12,6 +12,7 @@ #include <linux/pci.h> #include <linux/cpumask.h> #include <linux/crash_dump.h> +#include <linux/vmalloc.h> #include "ionic.h" #include "ionic_bus.h" @@ -393,11 +394,11 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq) ionic_qcq_intr_free(lif, qcq); if (qcq->cq.info) { - devm_kfree(dev, qcq->cq.info); + vfree(qcq->cq.info); qcq->cq.info = NULL; } if (qcq->q.info) { - devm_kfree(dev, qcq->q.info); + vfree(qcq->q.info); qcq->q.info = NULL; } } @@ -528,8 +529,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->q.dev = dev; new->flags = flags; - new->q.info = devm_kcalloc(dev, num_descs, sizeof(*new->q.info), - GFP_KERNEL); + new->q.info = vzalloc(num_descs * sizeof(*new->q.info)); if (!new->q.info) { netdev_err(lif->netdev, "Cannot allocate queue info\n"); err = -ENOMEM; @@ -550,8 +550,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, if (err) goto err_out; - new->cq.info = devm_kcalloc(dev, num_descs, sizeof(*new->cq.info), - GFP_KERNEL); + new->cq.info = vzalloc(num_descs * sizeof(*new->cq.info)); if (!new->cq.info) { netdev_err(lif->netdev, "Cannot allocate completion queue info\n"); err = -ENOMEM; @@ -640,14 +639,14 @@ err_out_free_cq: err_out_free_q: dma_free_coherent(dev, new->q_size, new->q_base, new->q_base_pa); err_out_free_cq_info: - devm_kfree(dev, new->cq.info); + vfree(new->cq.info); err_out_free_irq: if (flags & IONIC_QCQ_F_INTR) { devm_free_irq(dev, new->intr.vector, &new->napi); ionic_intr_free(lif->ionic, new->intr.index); } err_out_free_q_info: - devm_kfree(dev, new->q.info); + vfree(new->q.info); err_out_free_qcq: devm_kfree(dev, new); err_out: @@ -3303,7 +3302,7 @@ static void ionic_lif_set_netdev_info(struct ionic_lif *lif) }, }; - strlcpy(ctx.cmd.lif_setattr.name, lif->netdev->name, + strscpy(ctx.cmd.lif_setattr.name, lif->netdev->name, sizeof(ctx.cmd.lif_setattr.name)); ionic_adminq_post_wait(lif, &ctx); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c index fd6806b4a1b9..9859a4432985 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c @@ -151,7 +151,6 @@ static const struct ionic_stat_desc ionic_rx_stats_desc[] = { IONIC_RX_STAT_DESC(vlan_stripped), }; - #define IONIC_NUM_LIF_STATS ARRAY_SIZE(ionic_lif_stats_desc) #define IONIC_NUM_PORT_STATS ARRAY_SIZE(ionic_port_stats_desc) #define IONIC_NUM_TX_STATS ARRAY_SIZE(ionic_tx_stats_desc) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index d197a70a49c9..f54035455ad6 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -10,7 +10,6 @@ #include "ionic_lif.h" #include "ionic_txrx.h" - static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell, ionic_desc_cb cb_func, void *cb_arg) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 8ac38828ba45..bf4a95186e55 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -2984,12 +2984,16 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn, u8 mask = QED_ACCEPT_UCAST_UNMATCHED | QED_ACCEPT_MCAST_UNMATCHED; struct qed_filter_accept_flags *flags = ¶ms->accept_flags; struct qed_public_vf_info *vf_info; + u16 tlv_mask; + + tlv_mask = BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM) | + BIT(QED_IOV_VP_UPDATE_ACCEPT_ANY_VLAN); /* Untrusted VFs can't even be trusted to know that fact. * Simply indicate everything is configured fine, and trace * configuration 'behind their back'. */ - if (!(*tlvs & BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM))) + if (!(*tlvs & tlv_mask)) return 0; vf_info = qed_iov_get_public_vf_info(hwfn, vfid, true); @@ -3006,6 +3010,13 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn, flags->tx_accept_filter &= ~mask; } + if (params->update_accept_any_vlan_flg) { + vf_info->accept_any_vlan = params->accept_any_vlan; + + if (vf_info->forced_vlan && !vf_info->is_trusted_configured) + params->accept_any_vlan = false; + } + return 0; } @@ -4715,6 +4726,7 @@ static int qed_get_vf_config(struct qed_dev *cdev, tx_rate = vf_info->tx_rate; ivi->max_tx_rate = tx_rate ? tx_rate : link.speed; ivi->min_tx_rate = qed_iov_get_vf_min_rate(hwfn, vf_id); + ivi->trusted = vf_info->is_trusted_request; return 0; } @@ -5145,6 +5157,12 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) params.update_ctl_frame_check = 1; params.mac_chk_en = !vf_info->is_trusted_configured; + params.update_accept_any_vlan_flg = 0; + + if (vf_info->accept_any_vlan && vf_info->forced_vlan) { + params.update_accept_any_vlan_flg = 1; + params.accept_any_vlan = vf_info->accept_any_vlan; + } if (vf_info->rx_accept_mode & mask) { flags->update_rx_mode_config = 1; @@ -5160,13 +5178,20 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) if (!vf_info->is_trusted_configured) { flags->rx_accept_filter &= ~mask; flags->tx_accept_filter &= ~mask; + params.accept_any_vlan = false; } if (flags->update_rx_mode_config || flags->update_tx_mode_config || - params.update_ctl_frame_check) + params.update_ctl_frame_check || + params.update_accept_any_vlan_flg) { + DP_VERBOSE(hwfn, QED_MSG_IOV, + "vport update config for %s VF[abs 0x%x rel 0x%x]\n", + vf_info->is_trusted_configured ? "trusted" : "untrusted", + vf->abs_vf_id, vf->relative_vf_id); qed_sp_vport_update(hwfn, ¶ms, QED_SPQ_MODE_EBLOCK, NULL); + } } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index f448e3dd6c8b..6ee2493de164 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -62,6 +62,7 @@ struct qed_public_vf_info { bool is_trusted_request; u8 rx_accept_mode; u8 tx_accept_mode; + bool accept_any_vlan; }; struct qed_iov_vf_init_params { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 29cdcb2285b1..bcf3746220df 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -10,6 +10,7 @@ #include <linux/ipv6.h> #include <net/checksum.h> #include <linux/printk.h> +#include <linux/jiffies.h> #include "qlcnic.h" @@ -332,7 +333,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { if (ether_addr_equal(tmp_fil->faddr, (u8 *)&src_addr) && tmp_fil->vlan_id == vlan_id) { - if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) + if (time_is_before_jiffies(QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring); tmp_fil->ftime = jiffies; diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 955cce644392..c865a4be05ee 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -435,7 +435,7 @@ qcaspi_receive(struct qcaspi *qca) qca->rx_skb->protocol = eth_type_trans( qca->rx_skb, qca->rx_skb->dev); skb_checksum_none_assert(qca->rx_skb); - netif_rx_ni(qca->rx_skb); + netif_rx(qca->rx_skb); qca->rx_skb = netdev_alloc_skb_ip_align(net_dev, net_dev->mtu + VLAN_ETH_HLEN); if (!qca->rx_skb) { @@ -1001,7 +1001,7 @@ qca_spi_probe(struct spi_device *spi) return 0; } -static int +static void qca_spi_remove(struct spi_device *spi) { struct net_device *qcaspi_devs = spi_get_drvdata(spi); @@ -1011,8 +1011,6 @@ qca_spi_remove(struct spi_device *spi) unregister_netdev(qcaspi_devs); free_netdev(qcaspi_devs); - - return 0; } static const struct spi_device_id qca_spi_id[] = { diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c index 27c4f43176aa..26646cb6a20a 100644 --- a/drivers/net/ethernet/qualcomm/qca_uart.c +++ b/drivers/net/ethernet/qualcomm/qca_uart.c @@ -108,7 +108,7 @@ qca_tty_receive(struct serdev_device *serdev, const unsigned char *data, qca->rx_skb->protocol = eth_type_trans( qca->rx_skb, qca->rx_skb->dev); skb_checksum_none_assert(qca->rx_skb); - netif_rx_ni(qca->rx_skb); + netif_rx(qca->rx_skb); qca->rx_skb = netdev_alloc_skb_ip_align(netdev, netdev->mtu + VLAN_ETH_HLEN); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c index 3676976c875b..ba194698cc14 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c @@ -298,7 +298,6 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb, { struct rmnet_map_header *map_header; u32 padding, map_datalen; - u8 *padbytes; map_datalen = skb->len - hdrlen; map_header = (struct rmnet_map_header *) @@ -323,8 +322,7 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb, if (skb_tailroom(skb) < padding) return NULL; - padbytes = (u8 *)skb_put(skb, padding); - memset(padbytes, 0, padding); + skb_put_zero(skb, padding); done: map_header->pkt_len = htons(map_datalen + padding); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 24e2635c4c80..525d66f71f02 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -475,7 +475,7 @@ static int ravb_ring_init(struct net_device *ndev, int q) goto error; for (i = 0; i < priv->num_rx_ring[q]; i++) { - skb = netdev_alloc_skb(ndev, info->max_rx_len); + skb = __netdev_alloc_skb(ndev, info->max_rx_len, GFP_KERNEL); if (!skb) goto error; ravb_set_buffer_align(skb); diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 77a0d9d7e65a..407a1f8e3059 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -2285,18 +2285,18 @@ static int __init sxgbe_cmdline_opt(char *str) char *opt; if (!str || !*str) - return -EINVAL; + return 1; while ((opt = strsep(&str, ",")) != NULL) { if (!strncmp(opt, "eee_timer:", 10)) { if (kstrtoint(opt + 10, 0, &eee_timer)) goto err; } } - return 0; + return 1; err: pr_err("%s: ERROR broken module parameter conversion\n", __func__); - return -EINVAL; + return 1; } __setup("sxgbeeth=", sxgbe_cmdline_opt); diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index ead550ae2709..d6fdcdc530ca 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -78,31 +78,48 @@ static const struct efx_channel_type efx_default_channel_type = { * INTERRUPTS *************/ -static unsigned int efx_wanted_parallelism(struct efx_nic *efx) +static unsigned int count_online_cores(struct efx_nic *efx, bool local_node) { - cpumask_var_t thread_mask; + cpumask_var_t filter_mask; unsigned int count; int cpu; + if (unlikely(!zalloc_cpumask_var(&filter_mask, GFP_KERNEL))) { + netif_warn(efx, probe, efx->net_dev, + "RSS disabled due to allocation failure\n"); + return 1; + } + + cpumask_copy(filter_mask, cpu_online_mask); + if (local_node) { + int numa_node = pcibus_to_node(efx->pci_dev->bus); + + cpumask_and(filter_mask, filter_mask, cpumask_of_node(numa_node)); + } + + count = 0; + for_each_cpu(cpu, filter_mask) { + ++count; + cpumask_andnot(filter_mask, filter_mask, topology_sibling_cpumask(cpu)); + } + + free_cpumask_var(filter_mask); + + return count; +} + +static unsigned int efx_wanted_parallelism(struct efx_nic *efx) +{ + unsigned int count; + if (rss_cpus) { count = rss_cpus; } else { - if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { - netif_warn(efx, probe, efx->net_dev, - "RSS disabled due to allocation failure\n"); - return 1; - } + count = count_online_cores(efx, true); - count = 0; - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, thread_mask)) { - ++count; - cpumask_or(thread_mask, thread_mask, - topology_sibling_cpumask(cpu)); - } - } - - free_cpumask_var(thread_mask); + /* If no online CPUs in local node, fallback to any online CPUs */ + if (count == 0) + count = count_online_cores(efx, false); } if (count > EFX_MAX_RX_QUEUES) { @@ -369,12 +386,20 @@ int efx_probe_interrupts(struct efx_nic *efx) #if defined(CONFIG_SMP) void efx_set_interrupt_affinity(struct efx_nic *efx) { + int numa_node = pcibus_to_node(efx->pci_dev->bus); + const struct cpumask *numa_mask = cpumask_of_node(numa_node); struct efx_channel *channel; unsigned int cpu; + /* If no online CPUs in local node, fallback to any online CPU */ + if (cpumask_first_and(cpu_online_mask, numa_mask) >= nr_cpu_ids) + numa_mask = cpu_online_mask; + + cpu = -1; efx_for_each_channel(channel, efx) { - cpu = cpumask_local_spread(channel->channel, - pcibus_to_node(efx->pci_dev->bus)); + cpu = cpumask_next_and(cpu, cpu_online_mask, numa_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first_and(cpu_online_mask, numa_mask); irq_set_affinity_hint(channel->irq, cpumask_of(cpu)); } } diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index be6bfd6b7ec7..50baf62b2cbc 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -163,9 +163,9 @@ static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd, /* Serialise with efx_mcdi_ev_cpl() and efx_mcdi_ev_death() */ spin_lock_bh(&mcdi->iface_lock); ++mcdi->seqno; + seqno = mcdi->seqno & SEQ_MASK; spin_unlock_bh(&mcdi->iface_lock); - seqno = mcdi->seqno & SEQ_MASK; xflags = 0; if (mcdi->mode == MCDI_MODE_EVENTS) xflags |= MCDI_HEADER_XFLAGS_EVREQ; diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 556bd353dd42..b0c5a44785fa 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1044,7 +1044,7 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) "rx failed to build skb\n"); break; } - page_pool_release_page(dring->page_pool, page); + skb_mark_for_recycle(skb); skb_reserve(skb, xdp.data - xdp.data_hard_start); skb_put(skb, xdp.data_end - xdp.data); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 5943ff9f21c2..32ef3df4e266 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -721,6 +721,7 @@ static int tgl_common_data(struct pci_dev *pdev, plat->rx_queues_to_use = 6; plat->tx_queues_to_use = 4; plat->clk_ptp_rate = 200000000; + plat->speed_mode_2500 = intel_speed_mode_2500; plat->safety_feat_cfg->tsoee = 1; plat->safety_feat_cfg->mrxpee = 0; @@ -740,7 +741,6 @@ static int tgl_sgmii_phy0_data(struct pci_dev *pdev, { plat->bus_id = 1; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; - plat->speed_mode_2500 = intel_speed_mode_2500; plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; return tgl_common_data(pdev, plat); @@ -755,7 +755,6 @@ static int tgl_sgmii_phy1_data(struct pci_dev *pdev, { plat->bus_id = 2; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; - plat->speed_mode_2500 = intel_speed_mode_2500; plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; return tgl_common_data(pdev, plat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 2ffa0a11eea5..0cc28c79cc61 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -78,6 +78,7 @@ struct ethqos_emac_por { struct ethqos_emac_driver_data { const struct ethqos_emac_por *por; unsigned int num_por; + bool rgmii_config_looback_en; }; struct qcom_ethqos { @@ -90,6 +91,7 @@ struct qcom_ethqos { const struct ethqos_emac_por *por; unsigned int num_por; + bool rgmii_config_looback_en; }; static int rgmii_readl(struct qcom_ethqos *ethqos, unsigned int offset) @@ -181,6 +183,22 @@ static const struct ethqos_emac_por emac_v2_3_0_por[] = { static const struct ethqos_emac_driver_data emac_v2_3_0_data = { .por = emac_v2_3_0_por, .num_por = ARRAY_SIZE(emac_v2_3_0_por), + .rgmii_config_looback_en = true, +}; + +static const struct ethqos_emac_por emac_v2_1_0_por[] = { + { .offset = RGMII_IO_MACRO_CONFIG, .value = 0x40C01343 }, + { .offset = SDCC_HC_REG_DLL_CONFIG, .value = 0x2004642C }, + { .offset = SDCC_HC_REG_DDR_CONFIG, .value = 0x00000000 }, + { .offset = SDCC_HC_REG_DLL_CONFIG2, .value = 0x00200000 }, + { .offset = SDCC_USR_CTL, .value = 0x00010800 }, + { .offset = RGMII_IO_MACRO_CONFIG2, .value = 0x00002060 }, +}; + +static const struct ethqos_emac_driver_data emac_v2_1_0_data = { + .por = emac_v2_1_0_por, + .num_por = ARRAY_SIZE(emac_v2_1_0_por), + .rgmii_config_looback_en = false, }; static int ethqos_dll_configure(struct qcom_ethqos *ethqos) @@ -297,8 +315,12 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_DLY_EN, SDCC_DDR_CONFIG_PRG_DLY_EN, SDCC_HC_REG_DDR_CONFIG); - rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, - RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG); + if (ethqos->rgmii_config_looback_en) + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, + RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG); + else + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, + 0, RGMII_IO_MACRO_CONFIG); break; case SPEED_100: @@ -331,8 +353,13 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, SDCC_HC_REG_DDR_CONFIG); - rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, - RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG); + if (ethqos->rgmii_config_looback_en) + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, + RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG); + else + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, + 0, RGMII_IO_MACRO_CONFIG); + break; case SPEED_10: @@ -504,6 +531,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) data = of_device_get_match_data(&pdev->dev); ethqos->por = data->por; ethqos->num_por = data->num_por; + ethqos->rgmii_config_looback_en = data->rgmii_config_looback_en; ethqos->rgmii_clk = devm_clk_get(&pdev->dev, "rgmii"); if (IS_ERR(ethqos->rgmii_clk)) { @@ -558,6 +586,7 @@ static int qcom_ethqos_remove(struct platform_device *pdev) static const struct of_device_id qcom_ethqos_match[] = { { .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_data}, + { .compatible = "qcom,sm8150-ethqos", .data = &emac_v2_1_0_data}, { } }; MODULE_DEVICE_TABLE(of, qcom_ethqos_match); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b745d624b2cb..cf4e077d21ff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2209,6 +2209,23 @@ static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan) stmmac_stop_tx(priv, priv->ioaddr, chan); } +static void stmmac_enable_all_dma_irq(struct stmmac_priv *priv) +{ + u32 rx_channels_count = priv->plat->rx_queues_to_use; + u32 tx_channels_count = priv->plat->tx_queues_to_use; + u32 dma_csr_ch = max(rx_channels_count, tx_channels_count); + u32 chan; + + for (chan = 0; chan < dma_csr_ch; chan++) { + struct stmmac_channel *ch = &priv->channel[chan]; + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 1); + spin_unlock_irqrestore(&ch->lock, flags); + } +} + /** * stmmac_start_all_dma - start all RX and TX DMA channels * @priv: driver private structure @@ -2851,8 +2868,10 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) stmmac_axi(priv, priv->ioaddr, priv->plat->axi); /* DMA CSR Channel configuration */ - for (chan = 0; chan < dma_csr_ch; chan++) + for (chan = 0; chan < dma_csr_ch; chan++) { stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan); + stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 1); + } /* DMA RX Channel Configuration */ for (chan = 0; chan < rx_channels_count; chan++) { @@ -3708,6 +3727,7 @@ static int stmmac_open(struct net_device *dev) stmmac_enable_all_queues(priv); netif_tx_start_all_queues(priv->dev); + stmmac_enable_all_dma_irq(priv); return 0; @@ -6457,8 +6477,10 @@ int stmmac_xdp_open(struct net_device *dev) } /* DMA CSR Channel configuration */ - for (chan = 0; chan < dma_csr_ch; chan++) + for (chan = 0; chan < dma_csr_ch; chan++) { stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan); + stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 1); + } /* Adjust Split header */ sph_en = (priv->hw->rx_csum > 0) && priv->sph; @@ -6519,6 +6541,7 @@ int stmmac_xdp_open(struct net_device *dev) stmmac_enable_all_queues(priv); netif_carrier_on(dev); netif_tx_start_all_queues(dev); + stmmac_enable_all_dma_irq(priv); return 0; @@ -7398,6 +7421,7 @@ int stmmac_resume(struct device *dev) stmmac_restore_hw_vlan_rx_fltr(priv, ndev, priv->hw); stmmac_enable_all_queues(priv); + stmmac_enable_all_dma_irq(priv); mutex_unlock(&priv->lock); rtnl_unlock(); @@ -7414,7 +7438,7 @@ static int __init stmmac_cmdline_opt(char *str) char *opt; if (!str || !*str) - return -EINVAL; + return 1; while ((opt = strsep(&str, ",")) != NULL) { if (!strncmp(opt, "debug:", 6)) { if (kstrtoint(opt + 6, 0, &debug)) @@ -7445,11 +7469,11 @@ static int __init stmmac_cmdline_opt(char *str) goto err; } } - return 0; + return 1; err: pr_err("%s: ERROR broken module parameter conversion", __func__); - return -EINVAL; + return 1; } __setup("stmmaceth=", stmmac_cmdline_opt); diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index dba9f12efa1c..153edc5eadad 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -88,6 +88,7 @@ #include <asm/io.h> #include <asm/byteorder.h> #include <linux/uaccess.h> +#include <linux/jiffies.h> #define cas_page_map(x) kmap_atomic((x)) #define cas_page_unmap(x) kunmap_atomic((x)) @@ -1234,19 +1235,6 @@ static void cas_init_rx_dma(struct cas *cp) */ readl(cp->regs + REG_INTR_STATUS_ALIAS); writel(INTR_RX_DONE | INTR_RX_BUF_UNAVAIL, cp->regs + REG_ALIAS_CLEAR); - if (cp->cas_flags & CAS_FLAG_REG_PLUS) { - for (i = 1; i < N_RX_COMP_RINGS; i++) - readl(cp->regs + REG_PLUS_INTRN_STATUS_ALIAS(i)); - - /* 2 is different from 3 and 4 */ - if (N_RX_COMP_RINGS > 1) - writel(INTR_RX_DONE_ALT | INTR_RX_BUF_UNAVAIL_1, - cp->regs + REG_PLUS_ALIASN_CLEAR(1)); - - for (i = 2; i < N_RX_COMP_RINGS; i++) - writel(INTR_RX_DONE_ALT, - cp->regs + REG_PLUS_ALIASN_CLEAR(i)); - } /* set up pause thresholds */ val = CAS_BASE(RX_PAUSE_THRESH_OFF, @@ -3508,9 +3496,6 @@ enable_rx_done: if (N_RX_DESC_RINGS > 1) writel(RX_DESC_RINGN_SIZE(1) - 4, cp->regs + REG_PLUS_RX_KICK1); - - for (i = 1; i < N_RX_COMP_RINGS; i++) - writel(0, cp->regs + REG_PLUS_RX_COMPN_TAIL(i)); } } @@ -4063,8 +4048,8 @@ static void cas_link_timer(struct timer_list *t) if (link_transition_timeout != 0 && cp->link_transition_jiffies_valid && - ((jiffies - cp->link_transition_jiffies) > - (link_transition_timeout))) { + time_is_before_jiffies(cp->link_transition_jiffies + + link_transition_timeout)) { /* One-second counter so link-down workaround doesn't * cause resets to occur so fast as to fool the switch * into thinking the link is down. diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 89a31783fbb4..eb39a45de012 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -362,7 +362,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) mse102x_dump_packet(__func__, skb->len, skb->data); skb->protocol = eth_type_trans(skb, mse->ndev); - netif_rx_ni(skb); + netif_rx(skb); mse->ndev->stats.rx_packets++; mse->ndev->stats.rx_bytes += rxlen; @@ -731,7 +731,7 @@ static int mse102x_probe_spi(struct spi_device *spi) return 0; } -static int mse102x_remove_spi(struct spi_device *spi) +static void mse102x_remove_spi(struct spi_device *spi) { struct mse102x_net *mse = dev_get_drvdata(&spi->dev); struct mse102x_net_spi *mses = to_mse102x_spi(mse); @@ -741,8 +741,6 @@ static int mse102x_remove_spi(struct spi_device *spi) mse102x_remove_device_debugfs(mses); unregister_netdev(mse->ndev); - - return 0; } static const struct of_device_id mse102x_match_table[] = { diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c index 7779a36da3c8..7c52796273a4 100644 --- a/drivers/net/ethernet/wiznet/w5100-spi.c +++ b/drivers/net/ethernet/wiznet/w5100-spi.c @@ -461,11 +461,9 @@ static int w5100_spi_probe(struct spi_device *spi) return w5100_probe(&spi->dev, ops, priv_size, mac, spi->irq, -EINVAL); } -static int w5100_spi_remove(struct spi_device *spi) +static void w5100_spi_remove(struct spi_device *spi) { w5100_remove(&spi->dev); - - return 0; } static const struct spi_device_id w5100_spi_ids[] = { diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index ae24d6b86803..4fd7c39e1123 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -883,7 +883,7 @@ static void w5100_rx_work(struct work_struct *work) struct sk_buff *skb; while ((skb = w5100_rx_skb(priv->ndev))) - netif_rx_ni(skb); + netif_rx(skb); w5100_enable_intr(priv); } diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 0a045d9e3501..869e362e09c1 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1434,6 +1434,8 @@ static int temac_probe(struct platform_device *pdev) lp->indirect_lock = devm_kmalloc(&pdev->dev, sizeof(*lp->indirect_lock), GFP_KERNEL); + if (!lp->indirect_lock) + return -ENOMEM; spin_lock_init(lp->indirect_lock); } diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index b1fc153125d9..45c3c4a1101b 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -668,11 +668,11 @@ static void sixpack_close(struct tty_struct *tty) */ netif_stop_queue(sp->dev); + unregister_netdev(sp->dev); + del_timer_sync(&sp->tx_t); del_timer_sync(&sp->resync_t); - unregister_netdev(sp->dev); - /* Free all 6pack frame buffers after unreg. */ kfree(sp->rbuff); kfree(sp->xbuff); diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index 7e527499d3ad..a2a12208e3ad 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -28,6 +28,7 @@ #include <asm/io.h> #include <asm/irq.h> #include <linux/uaccess.h> +#include <linux/jiffies.h> #include <net/ax25.h> #include "z8530.h" @@ -377,7 +378,7 @@ static int __init dmascc_init(void) udelay(2000000 / TMR_0_HZ); /* Timing loop */ - while (jiffies - time < 13) { + while (time_is_after_jiffies(time + 13)) { for (i = 0; i < hw[h].num_devs; i++) if (base[i] && counting[i]) { /* Read back Timer 1: latch; read LSB; read MSB */ @@ -525,7 +526,7 @@ static int __init setup_adapter(int card_base, int type, int n) /* Wait and detect IRQ */ time = jiffies; - while (jiffies - time < 2 + HZ / TMR_0_HZ); + while (time_is_after_jiffies(time + 2 + HZ / TMR_0_HZ)); irq = probe_irq_off(irqs); /* Clear pending interrupt, disable interrupts */ @@ -1353,7 +1354,7 @@ static void es_isr(struct scc_priv *priv) /* Switch state */ write_scc(priv, R15, 0); if (priv->tx_count && - (jiffies - priv->tx_start) < priv->param.txtimeout) { + time_is_after_jiffies(priv->tx_start + priv->param.txtimeout)) { priv->state = TX_PAUSE; start_timer(priv, priv->param.txpause, 0); } else { diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 7db9cbd0f5de..6afdf1622944 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1304,7 +1304,7 @@ err_alloc_wq: return ret; } -static int adf7242_remove(struct spi_device *spi) +static void adf7242_remove(struct spi_device *spi) { struct adf7242_local *lp = spi_get_drvdata(spi); @@ -1316,8 +1316,6 @@ static int adf7242_remove(struct spi_device *spi) ieee802154_unregister_hw(lp->hw); mutex_destroy(&lp->bmux); ieee802154_free_hw(lp->hw); - - return 0; } static const struct of_device_id adf7242_of_match[] = { diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 4f5ef8a9a9a8..549d04b5f3d4 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1768,7 +1768,7 @@ free_dev: return rc; } -static int at86rf230_remove(struct spi_device *spi) +static void at86rf230_remove(struct spi_device *spi) { struct at86rf230_local *lp = spi_get_drvdata(spi); @@ -1778,8 +1778,6 @@ static int at86rf230_remove(struct spi_device *spi) ieee802154_free_hw(lp->hw); at86rf230_debugfs_remove(); dev_dbg(&spi->dev, "unregistered at86rf230\n"); - - return 0; } static const struct of_device_id at86rf230_of_match[] = { diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 2bc730fd260e..187cbc634ce8 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -831,7 +831,7 @@ static void ca8210_rx_done(struct cas_control *cas_ctl) finish:; } -static int ca8210_remove(struct spi_device *spi_device); +static void ca8210_remove(struct spi_device *spi_device); /** * ca8210_spi_transfer_complete() - Called when a single spi transfer has @@ -3049,7 +3049,7 @@ static void ca8210_test_interface_clear(struct ca8210_priv *priv) * * Return: 0 or linux error code */ -static int ca8210_remove(struct spi_device *spi_device) +static void ca8210_remove(struct spi_device *spi_device) { struct ca8210_priv *priv; struct ca8210_platform_data *pdata; @@ -3089,8 +3089,6 @@ static int ca8210_remove(struct spi_device *spi_device) if (IS_ENABLED(CONFIG_IEEE802154_CA8210_DEBUGFS)) ca8210_test_interface_clear(priv); } - - return 0; } /** diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index 89c046b204e0..1e1f40f628a0 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -1213,7 +1213,7 @@ err_hw_init: return ret; } -static int cc2520_remove(struct spi_device *spi) +static void cc2520_remove(struct spi_device *spi) { struct cc2520_private *priv = spi_get_drvdata(spi); @@ -1222,8 +1222,6 @@ static int cc2520_remove(struct spi_device *spi) ieee802154_unregister_hw(priv->hw); ieee802154_free_hw(priv->hw); - - return 0; } static const struct spi_device_id cc2520_ids[] = { diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 383231b85464..c927a5ae0d05 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -1335,7 +1335,7 @@ free_dev: return ret; } -static int mcr20a_remove(struct spi_device *spi) +static void mcr20a_remove(struct spi_device *spi) { struct mcr20a_local *lp = spi_get_drvdata(spi); @@ -1343,8 +1343,6 @@ static int mcr20a_remove(struct spi_device *spi) ieee802154_unregister_hw(lp->hw); ieee802154_free_hw(lp->hw); - - return 0; } static const struct of_device_id mcr20a_of_match[] = { diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index ff83e00b77af..ee4cfbf2c5cc 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -1356,7 +1356,7 @@ err_ret: return ret; } -static int mrf24j40_remove(struct spi_device *spi) +static void mrf24j40_remove(struct spi_device *spi) { struct mrf24j40 *devrec = spi_get_drvdata(spi); @@ -1366,8 +1366,6 @@ static int mrf24j40_remove(struct spi_device *spi) ieee802154_free_hw(devrec->hw); /* TODO: Will ieee802154_free_device() wait until ->xmit() is * complete? */ - - return 0; } static const struct of_device_id mrf24j40_of_match[] = { diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig index d037682fb7ad..6782c2cbf542 100644 --- a/drivers/net/ipa/Kconfig +++ b/drivers/net/ipa/Kconfig @@ -2,7 +2,9 @@ config QCOM_IPA tristate "Qualcomm IPA support" depends on NET && QCOM_SMEM depends on ARCH_QCOM || COMPILE_TEST + depends on INTERCONNECT depends on QCOM_RPROC_COMMON || (QCOM_RPROC_COMMON=n && COMPILE_TEST) + depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n select QCOM_MDT_LOADER if ARCH_QCOM select QCOM_SCM select QCOM_QMI_HELPERS diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index d05f86fe78c9..720394c0639b 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -74,7 +74,7 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb, skb_tx_timestamp(skb); /* do not fool net_timestamp_check() with various clock bases */ - skb->tstamp = 0; + skb_clear_tstamp(skb); skb_orphan(skb); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d87c06c317ed..33753a2fde29 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -285,7 +285,7 @@ static void macvlan_broadcast(struct sk_buff *skb, if (likely(nskb)) err = macvlan_broadcast_one(nskb, vlan, eth, mode == MACVLAN_MODE_BRIDGE) ?: - netif_rx_ni(nskb); + netif_rx(nskb); macvlan_count_rx(vlan, skb->len + ETH_HLEN, err == NET_RX_SUCCESS, true); } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 6b12902a803f..cecf8c63096c 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -133,11 +133,17 @@ static void macvtap_setup(struct net_device *dev) dev->tx_queue_len = TUN_READQ_SIZE; } +static struct net *macvtap_link_net(const struct net_device *dev) +{ + return dev_net(macvlan_dev_real_dev(dev)); +} + static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .kind = "macvtap", .setup = macvtap_setup, .newlink = macvtap_newlink, .dellink = macvtap_dellink, + .get_link_net = macvtap_link_net, .priv_size = sizeof(struct macvtap_dev), }; diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig index 2929471395ae..dc71657d9184 100644 --- a/drivers/net/mctp/Kconfig +++ b/drivers/net/mctp/Kconfig @@ -21,6 +21,18 @@ config MCTP_SERIAL Say y here if you need to connect to MCTP endpoints over serial. To compile as a module, use m; the module will be called mctp-serial. +config MCTP_TRANSPORT_I2C + tristate "MCTP SMBus/I2C transport" + # i2c-mux is optional, but we must build as a module if i2c-mux is a module + depends on I2C_MUX || !I2C_MUX + depends on I2C + depends on I2C_SLAVE + select MCTP_FLOWS + help + Provides a driver to access MCTP devices over SMBus/I2C transport, + from DMTF specification DSP0237. A MCTP protocol network device is + created for each I2C bus that has been assigned a mctp-i2c device. + endmenu endif diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile index d32622613ce4..1ca3e6028f77 100644 --- a/drivers/net/mctp/Makefile +++ b/drivers/net/mctp/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_MCTP_SERIAL) += mctp-serial.o +obj-$(CONFIG_MCTP_TRANSPORT_I2C) += mctp-i2c.o diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c new file mode 100644 index 000000000000..baf7afac7857 --- /dev/null +++ b/drivers/net/mctp/mctp-i2c.c @@ -0,0 +1,1082 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Management Controller Transport Protocol (MCTP) + * Implements DMTF specification + * "DSP0237 Management Component Transport Protocol (MCTP) SMBus/I2C + * Transport Binding" + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0237_1.2.0.pdf + * + * A netdev is created for each I2C bus that handles MCTP. In the case of an I2C + * mux topology a single I2C client is attached to the root of the mux topology, + * shared between all mux I2C busses underneath. For non-mux cases an I2C client + * is attached per netdev. + * + * mctp-i2c-controller.yml devicetree binding has further details. + * + * Copyright (c) 2022 Code Construct + * Copyright (c) 2022 Google + */ + +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/i2c.h> +#include <linux/i2c-mux.h> +#include <linux/if_arp.h> +#include <net/mctp.h> +#include <net/mctpdevice.h> + +/* byte_count is limited to u8 */ +#define MCTP_I2C_MAXBLOCK 255 +/* One byte is taken by source_slave */ +#define MCTP_I2C_MAXMTU (MCTP_I2C_MAXBLOCK - 1) +#define MCTP_I2C_MINMTU (64 + 4) +/* Allow space for dest_address, command, byte_count, data, PEC */ +#define MCTP_I2C_BUFSZ (3 + MCTP_I2C_MAXBLOCK + 1) +#define MCTP_I2C_MINLEN 8 +#define MCTP_I2C_COMMANDCODE 0x0f +#define MCTP_I2C_TX_WORK_LEN 100 +/* Sufficient for 64kB at min mtu */ +#define MCTP_I2C_TX_QUEUE_LEN 1100 + +#define MCTP_I2C_OF_PROP "mctp-controller" + +enum { + MCTP_I2C_FLOW_STATE_NEW = 0, + MCTP_I2C_FLOW_STATE_ACTIVE, +}; + +/* List of all struct mctp_i2c_client + * Lock protects driver_clients and also prevents adding/removing adapters + * during mctp_i2c_client probe/remove. + */ +static DEFINE_MUTEX(driver_clients_lock); +static LIST_HEAD(driver_clients); + +struct mctp_i2c_client; + +/* The netdev structure. One of these per I2C adapter. */ +struct mctp_i2c_dev { + struct net_device *ndev; + struct i2c_adapter *adapter; + struct mctp_i2c_client *client; + struct list_head list; /* For mctp_i2c_client.devs */ + + size_t rx_pos; + u8 rx_buffer[MCTP_I2C_BUFSZ]; + struct completion rx_done; + + struct task_struct *tx_thread; + wait_queue_head_t tx_wq; + struct sk_buff_head tx_queue; + u8 tx_scratch[MCTP_I2C_BUFSZ]; + + /* A fake entry in our tx queue to perform an unlock operation */ + struct sk_buff unlock_marker; + + /* Spinlock protects i2c_lock_count, release_count, allow_rx */ + spinlock_t lock; + int i2c_lock_count; + int release_count; + /* Indicates that the netif is ready to receive incoming packets */ + bool allow_rx; + +}; + +/* The i2c client structure. One per hardware i2c bus at the top of the + * mux tree, shared by multiple netdevs + */ +struct mctp_i2c_client { + struct i2c_client *client; + u8 lladdr; + + struct mctp_i2c_dev *sel; + struct list_head devs; + spinlock_t sel_lock; /* Protects sel and devs */ + + struct list_head list; /* For driver_clients */ +}; + +/* Header on the wire. */ +struct mctp_i2c_hdr { + u8 dest_slave; + u8 command; + /* Count of bytes following byte_count, excluding PEC */ + u8 byte_count; + u8 source_slave; +}; + +static int mctp_i2c_recv(struct mctp_i2c_dev *midev); +static int mctp_i2c_slave_cb(struct i2c_client *client, + enum i2c_slave_event event, u8 *val); +static void mctp_i2c_ndo_uninit(struct net_device *dev); +static int mctp_i2c_ndo_open(struct net_device *dev); + +static struct i2c_adapter *mux_root_adapter(struct i2c_adapter *adap) +{ +#if IS_ENABLED(CONFIG_I2C_MUX) + return i2c_root_adapter(&adap->dev); +#else + /* In non-mux config all i2c adapters are root adapters */ + return adap; +#endif +} + +/* Creates a new i2c slave device attached to the root adapter. + * Sets up the slave callback. + * Must be called with a client on a root adapter. + */ +static struct mctp_i2c_client *mctp_i2c_new_client(struct i2c_client *client) +{ + struct mctp_i2c_client *mcli = NULL; + struct i2c_adapter *root = NULL; + int rc; + + if (client->flags & I2C_CLIENT_TEN) { + dev_err(&client->dev, "failed, MCTP requires a 7-bit I2C address, addr=0x%x\n", + client->addr); + rc = -EINVAL; + goto err; + } + + root = mux_root_adapter(client->adapter); + if (!root) { + dev_err(&client->dev, "failed to find root adapter\n"); + rc = -ENOENT; + goto err; + } + if (root != client->adapter) { + dev_err(&client->dev, + "A mctp-i2c-controller client cannot be placed on an I2C mux adapter.\n" + " It should be placed on the mux tree root adapter\n" + " then set mctp-controller property on adapters to attach\n"); + rc = -EINVAL; + goto err; + } + + mcli = kzalloc(sizeof(*mcli), GFP_KERNEL); + if (!mcli) { + rc = -ENOMEM; + goto err; + } + spin_lock_init(&mcli->sel_lock); + INIT_LIST_HEAD(&mcli->devs); + INIT_LIST_HEAD(&mcli->list); + mcli->lladdr = client->addr & 0xff; + mcli->client = client; + i2c_set_clientdata(client, mcli); + + rc = i2c_slave_register(mcli->client, mctp_i2c_slave_cb); + if (rc < 0) { + dev_err(&client->dev, "i2c register failed %d\n", rc); + mcli->client = NULL; + i2c_set_clientdata(client, NULL); + goto err; + } + + return mcli; +err: + if (mcli) { + if (mcli->client) + i2c_unregister_device(mcli->client); + kfree(mcli); + } + return ERR_PTR(rc); +} + +static void mctp_i2c_free_client(struct mctp_i2c_client *mcli) +{ + int rc; + + WARN_ON(!mutex_is_locked(&driver_clients_lock)); + WARN_ON(!list_empty(&mcli->devs)); + WARN_ON(mcli->sel); /* sanity check, no locking */ + + rc = i2c_slave_unregister(mcli->client); + /* Leak if it fails, we can't propagate errors upwards */ + if (rc < 0) + dev_err(&mcli->client->dev, "i2c unregister failed %d\n", rc); + else + kfree(mcli); +} + +/* Switch the mctp i2c device to receive responses. + * Call with sel_lock held + */ +static void __mctp_i2c_device_select(struct mctp_i2c_client *mcli, + struct mctp_i2c_dev *midev) +{ + assert_spin_locked(&mcli->sel_lock); + if (midev) + dev_hold(midev->ndev); + if (mcli->sel) + dev_put(mcli->sel->ndev); + mcli->sel = midev; +} + +/* Switch the mctp i2c device to receive responses */ +static void mctp_i2c_device_select(struct mctp_i2c_client *mcli, + struct mctp_i2c_dev *midev) +{ + unsigned long flags; + + spin_lock_irqsave(&mcli->sel_lock, flags); + __mctp_i2c_device_select(mcli, midev); + spin_unlock_irqrestore(&mcli->sel_lock, flags); +} + +static int mctp_i2c_slave_cb(struct i2c_client *client, + enum i2c_slave_event event, u8 *val) +{ + struct mctp_i2c_client *mcli = i2c_get_clientdata(client); + struct mctp_i2c_dev *midev = NULL; + unsigned long flags; + int rc = 0; + + spin_lock_irqsave(&mcli->sel_lock, flags); + midev = mcli->sel; + if (midev) + dev_hold(midev->ndev); + spin_unlock_irqrestore(&mcli->sel_lock, flags); + + if (!midev) + return 0; + + switch (event) { + case I2C_SLAVE_WRITE_RECEIVED: + if (midev->rx_pos < MCTP_I2C_BUFSZ) { + midev->rx_buffer[midev->rx_pos] = *val; + midev->rx_pos++; + } else { + midev->ndev->stats.rx_over_errors++; + } + + break; + case I2C_SLAVE_WRITE_REQUESTED: + /* dest_slave as first byte */ + midev->rx_buffer[0] = mcli->lladdr << 1; + midev->rx_pos = 1; + break; + case I2C_SLAVE_STOP: + rc = mctp_i2c_recv(midev); + break; + default: + break; + } + + dev_put(midev->ndev); + return rc; +} + +/* Processes incoming data that has been accumulated by the slave cb */ +static int mctp_i2c_recv(struct mctp_i2c_dev *midev) +{ + struct net_device *ndev = midev->ndev; + struct mctp_i2c_hdr *hdr; + struct mctp_skb_cb *cb; + struct sk_buff *skb; + unsigned long flags; + u8 pec, calc_pec; + size_t recvlen; + int status; + + /* + 1 for the PEC */ + if (midev->rx_pos < MCTP_I2C_MINLEN + 1) { + ndev->stats.rx_length_errors++; + return -EINVAL; + } + /* recvlen excludes PEC */ + recvlen = midev->rx_pos - 1; + + hdr = (void *)midev->rx_buffer; + if (hdr->command != MCTP_I2C_COMMANDCODE) { + ndev->stats.rx_dropped++; + return -EINVAL; + } + + if (hdr->byte_count + offsetof(struct mctp_i2c_hdr, source_slave) != recvlen) { + ndev->stats.rx_length_errors++; + return -EINVAL; + } + + pec = midev->rx_buffer[midev->rx_pos - 1]; + calc_pec = i2c_smbus_pec(0, midev->rx_buffer, recvlen); + if (pec != calc_pec) { + ndev->stats.rx_crc_errors++; + return -EINVAL; + } + + skb = netdev_alloc_skb(ndev, recvlen); + if (!skb) { + ndev->stats.rx_dropped++; + return -ENOMEM; + } + + skb->protocol = htons(ETH_P_MCTP); + skb_put_data(skb, midev->rx_buffer, recvlen); + skb_reset_mac_header(skb); + skb_pull(skb, sizeof(struct mctp_i2c_hdr)); + skb_reset_network_header(skb); + + cb = __mctp_cb(skb); + cb->halen = 1; + cb->haddr[0] = hdr->source_slave >> 1; + + /* We need to ensure that the netif is not used once netdev + * unregister occurs + */ + spin_lock_irqsave(&midev->lock, flags); + if (midev->allow_rx) { + reinit_completion(&midev->rx_done); + spin_unlock_irqrestore(&midev->lock, flags); + + status = netif_rx(skb); + complete(&midev->rx_done); + } else { + status = NET_RX_DROP; + spin_unlock_irqrestore(&midev->lock, flags); + } + + if (status == NET_RX_SUCCESS) { + ndev->stats.rx_packets++; + ndev->stats.rx_bytes += recvlen; + } else { + ndev->stats.rx_dropped++; + } + return 0; +} + +enum mctp_i2c_flow_state { + MCTP_I2C_TX_FLOW_INVALID, + MCTP_I2C_TX_FLOW_NONE, + MCTP_I2C_TX_FLOW_NEW, + MCTP_I2C_TX_FLOW_EXISTING, +}; + +static enum mctp_i2c_flow_state +mctp_i2c_get_tx_flow_state(struct mctp_i2c_dev *midev, struct sk_buff *skb) +{ + enum mctp_i2c_flow_state state; + struct mctp_sk_key *key; + struct mctp_flow *flow; + unsigned long flags; + + flow = skb_ext_find(skb, SKB_EXT_MCTP); + if (!flow) + return MCTP_I2C_TX_FLOW_NONE; + + key = flow->key; + if (!key) + return MCTP_I2C_TX_FLOW_NONE; + + spin_lock_irqsave(&key->lock, flags); + /* If the key is present but invalid, we're unlikely to be able + * to handle the flow at all; just drop now + */ + if (!key->valid) { + state = MCTP_I2C_TX_FLOW_INVALID; + + } else if (key->dev_flow_state == MCTP_I2C_FLOW_STATE_NEW) { + key->dev_flow_state = MCTP_I2C_FLOW_STATE_ACTIVE; + state = MCTP_I2C_TX_FLOW_NEW; + } else { + state = MCTP_I2C_TX_FLOW_EXISTING; + } + + spin_unlock_irqrestore(&key->lock, flags); + + return state; +} + +/* We're not contending with ourselves here; we only need to exclude other + * i2c clients from using the bus. refcounts are simply to prevent + * recursive locking. + */ +static void mctp_i2c_lock_nest(struct mctp_i2c_dev *midev) +{ + unsigned long flags; + bool lock; + + spin_lock_irqsave(&midev->lock, flags); + lock = midev->i2c_lock_count == 0; + midev->i2c_lock_count++; + spin_unlock_irqrestore(&midev->lock, flags); + + if (lock) + i2c_lock_bus(midev->adapter, I2C_LOCK_SEGMENT); +} + +static void mctp_i2c_unlock_nest(struct mctp_i2c_dev *midev) +{ + unsigned long flags; + bool unlock; + + spin_lock_irqsave(&midev->lock, flags); + if (!WARN_ONCE(midev->i2c_lock_count == 0, "lock count underflow!")) + midev->i2c_lock_count--; + unlock = midev->i2c_lock_count == 0; + spin_unlock_irqrestore(&midev->lock, flags); + + if (unlock) + i2c_unlock_bus(midev->adapter, I2C_LOCK_SEGMENT); +} + +/* Unlocks the bus if was previously locked, used for cleanup */ +static void mctp_i2c_unlock_reset(struct mctp_i2c_dev *midev) +{ + unsigned long flags; + bool unlock; + + spin_lock_irqsave(&midev->lock, flags); + unlock = midev->i2c_lock_count > 0; + midev->i2c_lock_count = 0; + spin_unlock_irqrestore(&midev->lock, flags); + + if (unlock) + i2c_unlock_bus(midev->adapter, I2C_LOCK_SEGMENT); +} + +static void mctp_i2c_xmit(struct mctp_i2c_dev *midev, struct sk_buff *skb) +{ + struct net_device_stats *stats = &midev->ndev->stats; + enum mctp_i2c_flow_state fs; + struct mctp_i2c_hdr *hdr; + struct i2c_msg msg = {0}; + u8 *pecp; + int rc; + + fs = mctp_i2c_get_tx_flow_state(midev, skb); + + hdr = (void *)skb_mac_header(skb); + /* Sanity check that packet contents matches skb length, + * and can't exceed MCTP_I2C_BUFSZ + */ + if (skb->len != hdr->byte_count + 3) { + dev_warn_ratelimited(&midev->adapter->dev, + "Bad tx length %d vs skb %u\n", + hdr->byte_count + 3, skb->len); + return; + } + + if (skb_tailroom(skb) >= 1) { + /* Linear case with space, we can just append the PEC */ + skb_put(skb, 1); + } else { + /* Otherwise need to copy the buffer */ + skb_copy_bits(skb, 0, midev->tx_scratch, skb->len); + hdr = (void *)midev->tx_scratch; + } + + pecp = (void *)&hdr->source_slave + hdr->byte_count; + *pecp = i2c_smbus_pec(0, (u8 *)hdr, hdr->byte_count + 3); + msg.buf = (void *)&hdr->command; + /* command, bytecount, data, pec */ + msg.len = 2 + hdr->byte_count + 1; + msg.addr = hdr->dest_slave >> 1; + + switch (fs) { + case MCTP_I2C_TX_FLOW_NONE: + /* no flow: full lock & unlock */ + mctp_i2c_lock_nest(midev); + mctp_i2c_device_select(midev->client, midev); + rc = __i2c_transfer(midev->adapter, &msg, 1); + mctp_i2c_unlock_nest(midev); + break; + + case MCTP_I2C_TX_FLOW_NEW: + /* new flow: lock, tx, but don't unlock; that will happen + * on flow release + */ + mctp_i2c_lock_nest(midev); + mctp_i2c_device_select(midev->client, midev); + fallthrough; + + case MCTP_I2C_TX_FLOW_EXISTING: + /* existing flow: we already have the lock; just tx */ + rc = __i2c_transfer(midev->adapter, &msg, 1); + break; + + case MCTP_I2C_TX_FLOW_INVALID: + return; + } + + if (rc < 0) { + dev_warn_ratelimited(&midev->adapter->dev, + "__i2c_transfer failed %d\n", rc); + stats->tx_errors++; + } else { + stats->tx_bytes += skb->len; + stats->tx_packets++; + } +} + +static void mctp_i2c_flow_release(struct mctp_i2c_dev *midev) +{ + unsigned long flags; + bool unlock; + + spin_lock_irqsave(&midev->lock, flags); + if (midev->release_count > midev->i2c_lock_count) { + WARN_ONCE(1, "release count overflow"); + midev->release_count = midev->i2c_lock_count; + } + + midev->i2c_lock_count -= midev->release_count; + unlock = midev->i2c_lock_count == 0 && midev->release_count > 0; + midev->release_count = 0; + spin_unlock_irqrestore(&midev->lock, flags); + + if (unlock) + i2c_unlock_bus(midev->adapter, I2C_LOCK_SEGMENT); +} + +static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct mctp_i2c_hdr *hdr; + struct mctp_hdr *mhdr; + u8 lldst, llsrc; + + if (len > MCTP_I2C_MAXMTU) + return -EMSGSIZE; + + lldst = *((u8 *)daddr); + llsrc = *((u8 *)saddr); + + skb_push(skb, sizeof(struct mctp_i2c_hdr)); + skb_reset_mac_header(skb); + hdr = (void *)skb_mac_header(skb); + mhdr = mctp_hdr(skb); + hdr->dest_slave = (lldst << 1) & 0xff; + hdr->command = MCTP_I2C_COMMANDCODE; + hdr->byte_count = len + 1; + hdr->source_slave = ((llsrc << 1) & 0xff) | 0x01; + mhdr->ver = 0x01; + + return 0; +} + +static int mctp_i2c_tx_thread(void *data) +{ + struct mctp_i2c_dev *midev = data; + struct sk_buff *skb; + unsigned long flags; + + for (;;) { + if (kthread_should_stop()) + break; + + spin_lock_irqsave(&midev->tx_queue.lock, flags); + skb = __skb_dequeue(&midev->tx_queue); + if (netif_queue_stopped(midev->ndev)) + netif_wake_queue(midev->ndev); + spin_unlock_irqrestore(&midev->tx_queue.lock, flags); + + if (skb == &midev->unlock_marker) { + mctp_i2c_flow_release(midev); + + } else if (skb) { + mctp_i2c_xmit(midev, skb); + kfree_skb(skb); + + } else { + wait_event_idle(midev->tx_wq, + !skb_queue_empty(&midev->tx_queue) || + kthread_should_stop()); + } + } + + return 0; +} + +static netdev_tx_t mctp_i2c_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct mctp_i2c_dev *midev = netdev_priv(dev); + unsigned long flags; + + spin_lock_irqsave(&midev->tx_queue.lock, flags); + if (skb_queue_len(&midev->tx_queue) >= MCTP_I2C_TX_WORK_LEN) { + netif_stop_queue(dev); + spin_unlock_irqrestore(&midev->tx_queue.lock, flags); + netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); + return NETDEV_TX_BUSY; + } + + __skb_queue_tail(&midev->tx_queue, skb); + if (skb_queue_len(&midev->tx_queue) == MCTP_I2C_TX_WORK_LEN) + netif_stop_queue(dev); + spin_unlock_irqrestore(&midev->tx_queue.lock, flags); + + wake_up(&midev->tx_wq); + return NETDEV_TX_OK; +} + +static void mctp_i2c_release_flow(struct mctp_dev *mdev, + struct mctp_sk_key *key) + +{ + struct mctp_i2c_dev *midev = netdev_priv(mdev->dev); + unsigned long flags; + + spin_lock_irqsave(&midev->lock, flags); + midev->release_count++; + spin_unlock_irqrestore(&midev->lock, flags); + + /* Ensure we have a release operation queued, through the fake + * marker skb + */ + spin_lock(&midev->tx_queue.lock); + if (!midev->unlock_marker.next) + __skb_queue_tail(&midev->tx_queue, &midev->unlock_marker); + spin_unlock(&midev->tx_queue.lock); + + wake_up(&midev->tx_wq); +} + +static const struct net_device_ops mctp_i2c_ops = { + .ndo_start_xmit = mctp_i2c_start_xmit, + .ndo_uninit = mctp_i2c_ndo_uninit, + .ndo_open = mctp_i2c_ndo_open, +}; + +static const struct header_ops mctp_i2c_headops = { + .create = mctp_i2c_header_create, +}; + +static const struct mctp_netdev_ops mctp_i2c_mctp_ops = { + .release_flow = mctp_i2c_release_flow, +}; + +static void mctp_i2c_net_setup(struct net_device *dev) +{ + dev->type = ARPHRD_MCTP; + + dev->mtu = MCTP_I2C_MAXMTU; + dev->min_mtu = MCTP_I2C_MINMTU; + dev->max_mtu = MCTP_I2C_MAXMTU; + dev->tx_queue_len = MCTP_I2C_TX_QUEUE_LEN; + + dev->hard_header_len = sizeof(struct mctp_i2c_hdr); + dev->addr_len = 1; + + dev->netdev_ops = &mctp_i2c_ops; + dev->header_ops = &mctp_i2c_headops; +} + +/* Populates the mctp_i2c_dev priv struct for a netdev. + * Returns an error pointer on failure. + */ +static struct mctp_i2c_dev *mctp_i2c_midev_init(struct net_device *dev, + struct mctp_i2c_client *mcli, + struct i2c_adapter *adap) +{ + struct mctp_i2c_dev *midev = netdev_priv(dev); + unsigned long flags; + + midev->tx_thread = kthread_create(mctp_i2c_tx_thread, midev, + "%s/tx", dev->name); + if (IS_ERR(midev->tx_thread)) + return ERR_CAST(midev->tx_thread); + + midev->ndev = dev; + get_device(&adap->dev); + midev->adapter = adap; + get_device(&mcli->client->dev); + midev->client = mcli; + INIT_LIST_HEAD(&midev->list); + spin_lock_init(&midev->lock); + midev->i2c_lock_count = 0; + midev->release_count = 0; + init_completion(&midev->rx_done); + complete(&midev->rx_done); + init_waitqueue_head(&midev->tx_wq); + skb_queue_head_init(&midev->tx_queue); + + /* Add to the parent mcli */ + spin_lock_irqsave(&mcli->sel_lock, flags); + list_add(&midev->list, &mcli->devs); + /* Select a device by default */ + if (!mcli->sel) + __mctp_i2c_device_select(mcli, midev); + spin_unlock_irqrestore(&mcli->sel_lock, flags); + + /* Start the worker thread */ + wake_up_process(midev->tx_thread); + + return midev; +} + +/* Counterpart of mctp_i2c_midev_init */ +static void mctp_i2c_midev_free(struct mctp_i2c_dev *midev) +{ + struct mctp_i2c_client *mcli = midev->client; + unsigned long flags; + + if (midev->tx_thread) { + kthread_stop(midev->tx_thread); + midev->tx_thread = NULL; + } + + /* Unconditionally unlock on close */ + mctp_i2c_unlock_reset(midev); + + /* Remove the netdev from the parent i2c client. */ + spin_lock_irqsave(&mcli->sel_lock, flags); + list_del(&midev->list); + if (mcli->sel == midev) { + struct mctp_i2c_dev *first; + + first = list_first_entry_or_null(&mcli->devs, struct mctp_i2c_dev, list); + __mctp_i2c_device_select(mcli, first); + } + spin_unlock_irqrestore(&mcli->sel_lock, flags); + + skb_queue_purge(&midev->tx_queue); + put_device(&midev->adapter->dev); + put_device(&mcli->client->dev); +} + +/* Stops, unregisters, and frees midev */ +static void mctp_i2c_unregister(struct mctp_i2c_dev *midev) +{ + unsigned long flags; + + /* Stop tx thread prior to unregister, it uses netif_() functions */ + kthread_stop(midev->tx_thread); + midev->tx_thread = NULL; + + /* Prevent any new rx in mctp_i2c_recv(), let any pending work finish */ + spin_lock_irqsave(&midev->lock, flags); + midev->allow_rx = false; + spin_unlock_irqrestore(&midev->lock, flags); + wait_for_completion(&midev->rx_done); + + mctp_unregister_netdev(midev->ndev); + /* midev has been freed now by mctp_i2c_ndo_uninit callback */ + + free_netdev(midev->ndev); +} + +static void mctp_i2c_ndo_uninit(struct net_device *dev) +{ + struct mctp_i2c_dev *midev = netdev_priv(dev); + + /* Perform cleanup here to ensure that mcli->sel isn't holding + * a reference that would prevent unregister_netdevice() + * from completing. + */ + mctp_i2c_midev_free(midev); +} + +static int mctp_i2c_ndo_open(struct net_device *dev) +{ + struct mctp_i2c_dev *midev = netdev_priv(dev); + unsigned long flags; + + /* i2c rx handler can only pass packets once the netdev is registered */ + spin_lock_irqsave(&midev->lock, flags); + midev->allow_rx = true; + spin_unlock_irqrestore(&midev->lock, flags); + + return 0; +} + +static int mctp_i2c_add_netdev(struct mctp_i2c_client *mcli, + struct i2c_adapter *adap) +{ + struct mctp_i2c_dev *midev = NULL; + struct net_device *ndev = NULL; + struct i2c_adapter *root; + unsigned long flags; + char namebuf[30]; + int rc; + + root = mux_root_adapter(adap); + if (root != mcli->client->adapter) { + dev_err(&mcli->client->dev, + "I2C adapter %s is not a child bus of %s\n", + mcli->client->adapter->name, root->name); + return -EINVAL; + } + + WARN_ON(!mutex_is_locked(&driver_clients_lock)); + snprintf(namebuf, sizeof(namebuf), "mctpi2c%d", adap->nr); + ndev = alloc_netdev(sizeof(*midev), namebuf, NET_NAME_ENUM, mctp_i2c_net_setup); + if (!ndev) { + dev_err(&mcli->client->dev, "alloc netdev failed\n"); + rc = -ENOMEM; + goto err; + } + dev_net_set(ndev, current->nsproxy->net_ns); + SET_NETDEV_DEV(ndev, &adap->dev); + dev_addr_set(ndev, &mcli->lladdr); + + midev = mctp_i2c_midev_init(ndev, mcli, adap); + if (IS_ERR(midev)) { + rc = PTR_ERR(midev); + midev = NULL; + goto err; + } + + rc = mctp_register_netdev(ndev, &mctp_i2c_mctp_ops); + if (rc < 0) { + dev_err(&mcli->client->dev, + "register netdev \"%s\" failed %d\n", + ndev->name, rc); + goto err; + } + + spin_lock_irqsave(&midev->lock, flags); + midev->allow_rx = false; + spin_unlock_irqrestore(&midev->lock, flags); + + return 0; +err: + if (midev) + mctp_i2c_midev_free(midev); + if (ndev) + free_netdev(ndev); + return rc; +} + +/* Removes any netdev for adap. mcli is the parent root i2c client */ +static void mctp_i2c_remove_netdev(struct mctp_i2c_client *mcli, + struct i2c_adapter *adap) +{ + struct mctp_i2c_dev *midev = NULL, *m = NULL; + unsigned long flags; + + WARN_ON(!mutex_is_locked(&driver_clients_lock)); + spin_lock_irqsave(&mcli->sel_lock, flags); + /* List size is limited by number of MCTP netdevs on a single hardware bus */ + list_for_each_entry(m, &mcli->devs, list) + if (m->adapter == adap) { + midev = m; + break; + } + spin_unlock_irqrestore(&mcli->sel_lock, flags); + + if (midev) + mctp_i2c_unregister(midev); +} + +/* Determines whether a device is an i2c adapter. + * Optionally returns the root i2c_adapter + */ +static struct i2c_adapter *mctp_i2c_get_adapter(struct device *dev, + struct i2c_adapter **ret_root) +{ + struct i2c_adapter *root, *adap; + + if (dev->type != &i2c_adapter_type) + return NULL; + adap = to_i2c_adapter(dev); + root = mux_root_adapter(adap); + WARN_ONCE(!root, "MCTP I2C failed to find root adapter for %s\n", + dev_name(dev)); + if (!root) + return NULL; + if (ret_root) + *ret_root = root; + return adap; +} + +/* Determines whether a device is an i2c adapter with the "mctp-controller" + * devicetree property set. If adap is not an OF node, returns match_no_of + */ +static bool mctp_i2c_adapter_match(struct i2c_adapter *adap, bool match_no_of) +{ + if (!adap->dev.of_node) + return match_no_of; + return of_property_read_bool(adap->dev.of_node, MCTP_I2C_OF_PROP); +} + +/* Called for each existing i2c device (adapter or client) when a + * new mctp-i2c client is probed. + */ +static int mctp_i2c_client_try_attach(struct device *dev, void *data) +{ + struct i2c_adapter *adap = NULL, *root = NULL; + struct mctp_i2c_client *mcli = data; + + adap = mctp_i2c_get_adapter(dev, &root); + if (!adap) + return 0; + if (mcli->client->adapter != root) + return 0; + /* Must either have mctp-controller property on the adapter, or + * be a root adapter if it's non-devicetree + */ + if (!mctp_i2c_adapter_match(adap, adap == root)) + return 0; + + return mctp_i2c_add_netdev(mcli, adap); +} + +static void mctp_i2c_notify_add(struct device *dev) +{ + struct mctp_i2c_client *mcli = NULL, *m = NULL; + struct i2c_adapter *root = NULL, *adap = NULL; + int rc; + + adap = mctp_i2c_get_adapter(dev, &root); + if (!adap) + return; + /* Check for mctp-controller property on the adapter */ + if (!mctp_i2c_adapter_match(adap, false)) + return; + + /* Find an existing mcli for adap's root */ + mutex_lock(&driver_clients_lock); + list_for_each_entry(m, &driver_clients, list) { + if (m->client->adapter == root) { + mcli = m; + break; + } + } + + if (mcli) { + rc = mctp_i2c_add_netdev(mcli, adap); + if (rc < 0) + dev_warn(dev, "Failed adding mctp-i2c net device\n"); + } + mutex_unlock(&driver_clients_lock); +} + +static void mctp_i2c_notify_del(struct device *dev) +{ + struct i2c_adapter *root = NULL, *adap = NULL; + struct mctp_i2c_client *mcli = NULL; + + adap = mctp_i2c_get_adapter(dev, &root); + if (!adap) + return; + + mutex_lock(&driver_clients_lock); + list_for_each_entry(mcli, &driver_clients, list) { + if (mcli->client->adapter == root) { + mctp_i2c_remove_netdev(mcli, adap); + break; + } + } + mutex_unlock(&driver_clients_lock); +} + +static int mctp_i2c_probe(struct i2c_client *client) +{ + struct mctp_i2c_client *mcli = NULL; + int rc; + + mutex_lock(&driver_clients_lock); + mcli = mctp_i2c_new_client(client); + if (IS_ERR(mcli)) { + rc = PTR_ERR(mcli); + mcli = NULL; + goto out; + } else { + list_add(&mcli->list, &driver_clients); + } + + /* Add a netdev for adapters that have a 'mctp-controller' property */ + i2c_for_each_dev(mcli, mctp_i2c_client_try_attach); + rc = 0; +out: + mutex_unlock(&driver_clients_lock); + return rc; +} + +static int mctp_i2c_remove(struct i2c_client *client) +{ + struct mctp_i2c_client *mcli = i2c_get_clientdata(client); + struct mctp_i2c_dev *midev = NULL, *tmp = NULL; + + mutex_lock(&driver_clients_lock); + list_del(&mcli->list); + /* Remove all child adapter netdevs */ + list_for_each_entry_safe(midev, tmp, &mcli->devs, list) + mctp_i2c_unregister(midev); + + mctp_i2c_free_client(mcli); + mutex_unlock(&driver_clients_lock); + /* Callers ignore return code */ + return 0; +} + +/* We look for a 'mctp-controller' property on I2C busses as they are + * added/deleted, creating/removing netdevs as required. + */ +static int mctp_i2c_notifier_call(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + mctp_i2c_notify_add(dev); + break; + case BUS_NOTIFY_DEL_DEVICE: + mctp_i2c_notify_del(dev); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block mctp_i2c_notifier = { + .notifier_call = mctp_i2c_notifier_call, +}; + +static const struct i2c_device_id mctp_i2c_id[] = { + { "mctp-i2c-interface", 0 }, + {}, +}; +MODULE_DEVICE_TABLE(i2c, mctp_i2c_id); + +static const struct of_device_id mctp_i2c_of_match[] = { + { .compatible = "mctp-i2c-controller" }, + {}, +}; +MODULE_DEVICE_TABLE(of, mctp_i2c_of_match); + +static struct i2c_driver mctp_i2c_driver = { + .driver = { + .name = "mctp-i2c-interface", + .of_match_table = mctp_i2c_of_match, + }, + .probe_new = mctp_i2c_probe, + .remove = mctp_i2c_remove, + .id_table = mctp_i2c_id, +}; + +static __init int mctp_i2c_mod_init(void) +{ + int rc; + + pr_info("MCTP I2C interface driver\n"); + rc = i2c_add_driver(&mctp_i2c_driver); + if (rc < 0) + return rc; + rc = bus_register_notifier(&i2c_bus_type, &mctp_i2c_notifier); + if (rc < 0) { + i2c_del_driver(&mctp_i2c_driver); + return rc; + } + return 0; +} + +static __exit void mctp_i2c_mod_exit(void) +{ + int rc; + + rc = bus_unregister_notifier(&i2c_bus_type, &mctp_i2c_notifier); + if (rc < 0) + pr_warn("MCTP I2C could not unregister notifier, %d\n", rc); + i2c_del_driver(&mctp_i2c_driver); +} + +module_init(mctp_i2c_mod_init); +module_exit(mctp_i2c_mod_exit); + +MODULE_DESCRIPTION("MCTP I2C device"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Matt Johnston <matt@codeconstruct.com.au>"); diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c index 5f4cd24a0241..4eba5a91075c 100644 --- a/drivers/net/mdio/mdio-ipq4019.c +++ b/drivers/net/mdio/mdio-ipq4019.c @@ -200,7 +200,11 @@ static int ipq_mdio_reset(struct mii_bus *bus) if (ret) return ret; - return clk_prepare_enable(priv->mdio_clk); + ret = clk_prepare_enable(priv->mdio_clk); + if (ret == 0) + mdelay(10); + + return ret; } static int ipq4019_mdio_probe(struct platform_device *pdev) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index a7ebcdab415b..81a76322254c 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -28,6 +28,10 @@ #include <linux/of.h> #include <linux/clk.h> #include <linux/delay.h> +#include <linux/ptp_clock_kernel.h> +#include <linux/ptp_clock.h> +#include <linux/ptp_classify.h> +#include <linux/net_tstamp.h> /* Operation Mode Strap Override */ #define MII_KSZPHY_OMSO 0x16 @@ -79,6 +83,119 @@ #define LAN8814_INTR_CTRL_REG_POLARITY BIT(1) #define LAN8814_INTR_CTRL_REG_INTR_ENABLE BIT(0) +/* Represents 1ppm adjustment in 2^32 format with + * each nsec contains 4 clock cycles. + * The value is calculated as following: (1/1000000)/((2^-32)/4) + */ +#define LAN8814_1PPM_FORMAT 17179 + +#define PTP_RX_MOD 0x024F +#define PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_ BIT(3) +#define PTP_RX_TIMESTAMP_EN 0x024D +#define PTP_TX_TIMESTAMP_EN 0x028D + +#define PTP_TIMESTAMP_EN_SYNC_ BIT(0) +#define PTP_TIMESTAMP_EN_DREQ_ BIT(1) +#define PTP_TIMESTAMP_EN_PDREQ_ BIT(2) +#define PTP_TIMESTAMP_EN_PDRES_ BIT(3) + +#define PTP_RX_LATENCY_1000 0x0224 +#define PTP_TX_LATENCY_1000 0x0225 + +#define PTP_RX_LATENCY_100 0x0222 +#define PTP_TX_LATENCY_100 0x0223 + +#define PTP_RX_LATENCY_10 0x0220 +#define PTP_TX_LATENCY_10 0x0221 + +#define PTP_TX_PARSE_L2_ADDR_EN 0x0284 +#define PTP_RX_PARSE_L2_ADDR_EN 0x0244 + +#define PTP_TX_PARSE_IP_ADDR_EN 0x0285 +#define PTP_RX_PARSE_IP_ADDR_EN 0x0245 +#define LTC_HARD_RESET 0x023F +#define LTC_HARD_RESET_ BIT(0) + +#define TSU_HARD_RESET 0x02C1 +#define TSU_HARD_RESET_ BIT(0) + +#define PTP_CMD_CTL 0x0200 +#define PTP_CMD_CTL_PTP_DISABLE_ BIT(0) +#define PTP_CMD_CTL_PTP_ENABLE_ BIT(1) +#define PTP_CMD_CTL_PTP_CLOCK_READ_ BIT(3) +#define PTP_CMD_CTL_PTP_CLOCK_LOAD_ BIT(4) +#define PTP_CMD_CTL_PTP_LTC_STEP_SEC_ BIT(5) +#define PTP_CMD_CTL_PTP_LTC_STEP_NSEC_ BIT(6) + +#define PTP_CLOCK_SET_SEC_MID 0x0206 +#define PTP_CLOCK_SET_SEC_LO 0x0207 +#define PTP_CLOCK_SET_NS_HI 0x0208 +#define PTP_CLOCK_SET_NS_LO 0x0209 + +#define PTP_CLOCK_READ_SEC_MID 0x022A +#define PTP_CLOCK_READ_SEC_LO 0x022B +#define PTP_CLOCK_READ_NS_HI 0x022C +#define PTP_CLOCK_READ_NS_LO 0x022D + +#define PTP_OPERATING_MODE 0x0241 +#define PTP_OPERATING_MODE_STANDALONE_ BIT(0) + +#define PTP_TX_MOD 0x028F +#define PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_ BIT(12) +#define PTP_TX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_ BIT(3) + +#define PTP_RX_PARSE_CONFIG 0x0242 +#define PTP_RX_PARSE_CONFIG_LAYER2_EN_ BIT(0) +#define PTP_RX_PARSE_CONFIG_IPV4_EN_ BIT(1) +#define PTP_RX_PARSE_CONFIG_IPV6_EN_ BIT(2) + +#define PTP_TX_PARSE_CONFIG 0x0282 +#define PTP_TX_PARSE_CONFIG_LAYER2_EN_ BIT(0) +#define PTP_TX_PARSE_CONFIG_IPV4_EN_ BIT(1) +#define PTP_TX_PARSE_CONFIG_IPV6_EN_ BIT(2) + +#define PTP_CLOCK_RATE_ADJ_HI 0x020C +#define PTP_CLOCK_RATE_ADJ_LO 0x020D +#define PTP_CLOCK_RATE_ADJ_DIR_ BIT(15) + +#define PTP_LTC_STEP_ADJ_HI 0x0212 +#define PTP_LTC_STEP_ADJ_LO 0x0213 +#define PTP_LTC_STEP_ADJ_DIR_ BIT(15) + +#define LAN8814_INTR_STS_REG 0x0033 +#define LAN8814_INTR_STS_REG_1588_TSU0_ BIT(0) +#define LAN8814_INTR_STS_REG_1588_TSU1_ BIT(1) +#define LAN8814_INTR_STS_REG_1588_TSU2_ BIT(2) +#define LAN8814_INTR_STS_REG_1588_TSU3_ BIT(3) + +#define PTP_CAP_INFO 0x022A +#define PTP_CAP_INFO_TX_TS_CNT_GET_(reg_val) (((reg_val) & 0x0f00) >> 8) +#define PTP_CAP_INFO_RX_TS_CNT_GET_(reg_val) ((reg_val) & 0x000f) + +#define PTP_TX_EGRESS_SEC_HI 0x0296 +#define PTP_TX_EGRESS_SEC_LO 0x0297 +#define PTP_TX_EGRESS_NS_HI 0x0294 +#define PTP_TX_EGRESS_NS_LO 0x0295 +#define PTP_TX_MSG_HEADER2 0x0299 + +#define PTP_RX_INGRESS_SEC_HI 0x0256 +#define PTP_RX_INGRESS_SEC_LO 0x0257 +#define PTP_RX_INGRESS_NS_HI 0x0254 +#define PTP_RX_INGRESS_NS_LO 0x0255 +#define PTP_RX_MSG_HEADER2 0x0259 + +#define PTP_TSU_INT_EN 0x0200 +#define PTP_TSU_INT_EN_PTP_TX_TS_OVRFL_EN_ BIT(3) +#define PTP_TSU_INT_EN_PTP_TX_TS_EN_ BIT(2) +#define PTP_TSU_INT_EN_PTP_RX_TS_OVRFL_EN_ BIT(1) +#define PTP_TSU_INT_EN_PTP_RX_TS_EN_ BIT(0) + +#define PTP_TSU_INT_STS 0x0201 +#define PTP_TSU_INT_STS_PTP_TX_TS_OVRFL_INT_ BIT(3) +#define PTP_TSU_INT_STS_PTP_TX_TS_EN_ BIT(2) +#define PTP_TSU_INT_STS_PTP_RX_TS_OVRFL_INT_ BIT(1) +#define PTP_TSU_INT_STS_PTP_RX_TS_EN_ BIT(0) + /* PHY Control 1 */ #define MII_KSZPHY_CTRL_1 0x1e #define KSZ8081_CTRL1_MDIX_STAT BIT(4) @@ -108,6 +225,7 @@ #define MII_KSZPHY_TX_DATA_PAD_SKEW 0x106 #define PS_TO_REG 200 +#define FIFO_SIZE 8 struct kszphy_hw_stat { const char *string; @@ -128,7 +246,57 @@ struct kszphy_type { bool has_rmii_ref_clk_sel; }; +/* Shared structure between the PHYs of the same package. */ +struct lan8814_shared_priv { + struct phy_device *phydev; + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_clock_info; + + /* Reference counter to how many ports in the package are enabling the + * timestamping + */ + u8 ref; + + /* Lock for ptp_clock and ref */ + struct mutex shared_lock; +}; + +struct lan8814_ptp_rx_ts { + struct list_head list; + u32 seconds; + u32 nsec; + u16 seq_id; +}; + +struct kszphy_latencies { + u16 rx_10; + u16 tx_10; + u16 rx_100; + u16 tx_100; + u16 rx_1000; + u16 tx_1000; +}; + +struct kszphy_ptp_priv { + struct mii_timestamper mii_ts; + struct phy_device *phydev; + + struct sk_buff_head tx_queue; + struct sk_buff_head rx_queue; + + struct list_head rx_ts_list; + /* Lock for Rx ts fifo */ + spinlock_t rx_ts_lock; + + int hwts_tx_type; + enum hwtstamp_rx_filters rx_filter; + int layer; + int version; +}; + struct kszphy_priv { + struct kszphy_ptp_priv ptp_priv; + struct kszphy_latencies latencies; const struct kszphy_type *type; int led_mode; bool rmii_ref_clk_sel; @@ -136,6 +304,14 @@ struct kszphy_priv { u64 stats[ARRAY_SIZE(kszphy_hw_stats)]; }; +static struct kszphy_latencies lan8814_latencies = { + .rx_10 = 0x22AA, + .tx_10 = 0x2E4A, + .rx_100 = 0x092A, + .tx_100 = 0x02C1, + .rx_1000 = 0x01AD, + .tx_1000 = 0x00C9, +}; static const struct kszphy_type ksz8021_type = { .led_mode_reg = MII_KSZPHY_CTRL_2, .has_broadcast_disable = true, @@ -1596,11 +1772,13 @@ static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr) { u32 data; - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, - (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC)); - data = phy_read(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA); + phy_lock_mdio_bus(phydev); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, + (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC)); + data = __phy_read(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA); + phy_unlock_mdio_bus(phydev); return data; } @@ -1608,43 +1786,681 @@ static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr) static int lanphy_write_page_reg(struct phy_device *phydev, int page, u16 addr, u16 val) { - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, - (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC)); + phy_lock_mdio_bus(phydev); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, + page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC); - val = phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, val); - if (val) { + val = __phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, val); + if (val != 0) phydev_err(phydev, "Error: phy_write has returned error %d\n", val); - return val; + phy_unlock_mdio_bus(phydev); + return val; +} + +static int lan8814_config_ts_intr(struct phy_device *phydev, bool enable) +{ + u16 val = 0; + + if (enable) + val = PTP_TSU_INT_EN_PTP_TX_TS_EN_ | + PTP_TSU_INT_EN_PTP_TX_TS_OVRFL_EN_ | + PTP_TSU_INT_EN_PTP_RX_TS_EN_ | + PTP_TSU_INT_EN_PTP_RX_TS_OVRFL_EN_; + + return lanphy_write_page_reg(phydev, 5, PTP_TSU_INT_EN, val); +} + +static void lan8814_ptp_rx_ts_get(struct phy_device *phydev, + u32 *seconds, u32 *nano_seconds, u16 *seq_id) +{ + *seconds = lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_SEC_HI); + *seconds = (*seconds << 16) | + lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_SEC_LO); + + *nano_seconds = lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_NS_HI); + *nano_seconds = ((*nano_seconds & 0x3fff) << 16) | + lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_NS_LO); + + *seq_id = lanphy_read_page_reg(phydev, 5, PTP_RX_MSG_HEADER2); +} + +static void lan8814_ptp_tx_ts_get(struct phy_device *phydev, + u32 *seconds, u32 *nano_seconds, u16 *seq_id) +{ + *seconds = lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_SEC_HI); + *seconds = *seconds << 16 | + lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_SEC_LO); + + *nano_seconds = lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_NS_HI); + *nano_seconds = ((*nano_seconds & 0x3fff) << 16) | + lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_NS_LO); + + *seq_id = lanphy_read_page_reg(phydev, 5, PTP_TX_MSG_HEADER2); +} + +static int lan8814_ts_info(struct mii_timestamper *mii_ts, struct ethtool_ts_info *info) +{ + struct kszphy_ptp_priv *ptp_priv = container_of(mii_ts, struct kszphy_ptp_priv, mii_ts); + struct phy_device *phydev = ptp_priv->phydev; + struct lan8814_shared_priv *shared = phydev->shared->priv; + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->phc_index = ptp_clock_index(shared->ptp_clock); + + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON) | + (1 << HWTSTAMP_TX_ONESTEP_SYNC); + + info->rx_filters = + (1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +} + +static void lan8814_flush_fifo(struct phy_device *phydev, bool egress) +{ + int i; + + for (i = 0; i < FIFO_SIZE; ++i) + lanphy_read_page_reg(phydev, 5, + egress ? PTP_TX_MSG_HEADER2 : PTP_RX_MSG_HEADER2); + + /* Read to clear overflow status bit */ + lanphy_read_page_reg(phydev, 5, PTP_TSU_INT_STS); +} + +static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) +{ + struct kszphy_ptp_priv *ptp_priv = + container_of(mii_ts, struct kszphy_ptp_priv, mii_ts); + struct phy_device *phydev = ptp_priv->phydev; + struct lan8814_shared_priv *shared = phydev->shared->priv; + struct lan8814_ptp_rx_ts *rx_ts, *tmp; + struct hwtstamp_config config; + int txcfg = 0, rxcfg = 0; + int pkt_ts_enable; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + ptp_priv->hwts_tx_type = config.tx_type; + ptp_priv->rx_filter = config.rx_filter; + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + ptp_priv->layer = 0; + ptp_priv->version = 0; + break; + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + ptp_priv->layer = PTP_CLASS_L4; + ptp_priv->version = PTP_CLASS_V2; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + ptp_priv->layer = PTP_CLASS_L2; + ptp_priv->version = PTP_CLASS_V2; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + ptp_priv->layer = PTP_CLASS_L4 | PTP_CLASS_L2; + ptp_priv->version = PTP_CLASS_V2; + break; + default: + return -ERANGE; + } + + if (ptp_priv->layer & PTP_CLASS_L2) { + rxcfg = PTP_RX_PARSE_CONFIG_LAYER2_EN_; + txcfg = PTP_TX_PARSE_CONFIG_LAYER2_EN_; + } else if (ptp_priv->layer & PTP_CLASS_L4) { + rxcfg |= PTP_RX_PARSE_CONFIG_IPV4_EN_ | PTP_RX_PARSE_CONFIG_IPV6_EN_; + txcfg |= PTP_TX_PARSE_CONFIG_IPV4_EN_ | PTP_TX_PARSE_CONFIG_IPV6_EN_; } + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_PARSE_CONFIG, rxcfg); + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_PARSE_CONFIG, txcfg); + + pkt_ts_enable = PTP_TIMESTAMP_EN_SYNC_ | PTP_TIMESTAMP_EN_DREQ_ | + PTP_TIMESTAMP_EN_PDREQ_ | PTP_TIMESTAMP_EN_PDRES_; + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable); + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable); + + if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, + PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + + if (config.rx_filter != HWTSTAMP_FILTER_NONE) + lan8814_config_ts_intr(ptp_priv->phydev, true); + else + lan8814_config_ts_intr(ptp_priv->phydev, false); + + mutex_lock(&shared->shared_lock); + if (config.rx_filter != HWTSTAMP_FILTER_NONE) + shared->ref++; + else + shared->ref--; + + if (shared->ref) + lanphy_write_page_reg(ptp_priv->phydev, 4, PTP_CMD_CTL, + PTP_CMD_CTL_PTP_ENABLE_); + else + lanphy_write_page_reg(ptp_priv->phydev, 4, PTP_CMD_CTL, + PTP_CMD_CTL_PTP_DISABLE_); + mutex_unlock(&shared->shared_lock); + + /* In case of multiple starts and stops, these needs to be cleared */ + list_for_each_entry_safe(rx_ts, tmp, &ptp_priv->rx_ts_list, list) { + list_del(&rx_ts->list); + kfree(rx_ts); + } + skb_queue_purge(&ptp_priv->rx_queue); + skb_queue_purge(&ptp_priv->tx_queue); + + lan8814_flush_fifo(ptp_priv->phydev, false); + lan8814_flush_fifo(ptp_priv->phydev, true); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; +} + +static bool is_sync(struct sk_buff *skb, int type) +{ + struct ptp_header *hdr; + + hdr = ptp_parse_header(skb, type); + if (!hdr) + return false; + + return ((ptp_get_msgtype(hdr, type) & 0xf) == 0); +} + +static void lan8814_txtstamp(struct mii_timestamper *mii_ts, + struct sk_buff *skb, int type) +{ + struct kszphy_ptp_priv *ptp_priv = container_of(mii_ts, struct kszphy_ptp_priv, mii_ts); + + switch (ptp_priv->hwts_tx_type) { + case HWTSTAMP_TX_ONESTEP_SYNC: + if (is_sync(skb, type)) { + kfree_skb(skb); + return; + } + fallthrough; + case HWTSTAMP_TX_ON: + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_queue_tail(&ptp_priv->tx_queue, skb); + break; + case HWTSTAMP_TX_OFF: + default: + kfree_skb(skb); + break; + } +} + +static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) +{ + struct ptp_header *ptp_header; + u32 type; + + skb_push(skb, ETH_HLEN); + type = ptp_classify_raw(skb); + ptp_header = ptp_parse_header(skb, type); + skb_pull_inline(skb, ETH_HLEN); + + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); +} + +static bool lan8814_match_rx_ts(struct kszphy_ptp_priv *ptp_priv, + struct sk_buff *skb) +{ + struct skb_shared_hwtstamps *shhwtstamps; + struct lan8814_ptp_rx_ts *rx_ts, *tmp; + unsigned long flags; + bool ret = false; + u16 skb_sig; + + lan8814_get_sig_rx(skb, &skb_sig); + + /* Iterate over all RX timestamps and match it with the received skbs */ + spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags); + list_for_each_entry_safe(rx_ts, tmp, &ptp_priv->rx_ts_list, list) { + /* Check if we found the signature we were looking for. */ + if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id))) + continue; + + shhwtstamps = skb_hwtstamps(skb); + memset(shhwtstamps, 0, sizeof(*shhwtstamps)); + shhwtstamps->hwtstamp = ktime_set(rx_ts->seconds, + rx_ts->nsec); + netif_rx_ni(skb); + + list_del(&rx_ts->list); + kfree(rx_ts); + + ret = true; + break; + } + spin_unlock_irqrestore(&ptp_priv->rx_ts_lock, flags); + + return ret; +} + +static bool lan8814_rxtstamp(struct mii_timestamper *mii_ts, struct sk_buff *skb, int type) +{ + struct kszphy_ptp_priv *ptp_priv = + container_of(mii_ts, struct kszphy_ptp_priv, mii_ts); + + if (ptp_priv->rx_filter == HWTSTAMP_FILTER_NONE || + type == PTP_CLASS_NONE) + return false; + + if ((type & ptp_priv->version) == 0 || (type & ptp_priv->layer) == 0) + return false; + + /* If we failed to match then add it to the queue for when the timestamp + * will come + */ + if (!lan8814_match_rx_ts(ptp_priv, skb)) + skb_queue_tail(&ptp_priv->rx_queue, skb); + + return true; +} + +static void lan8814_ptp_clock_set(struct phy_device *phydev, + u32 seconds, u32 nano_seconds) +{ + u32 sec_low, sec_high, nsec_low, nsec_high; + + sec_low = seconds & 0xffff; + sec_high = (seconds >> 16) & 0xffff; + nsec_low = nano_seconds & 0xffff; + nsec_high = (nano_seconds >> 16) & 0x3fff; + + lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_LO, sec_low); + lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_MID, sec_high); + lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_NS_LO, nsec_low); + lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_NS_HI, nsec_high); + + lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, PTP_CMD_CTL_PTP_CLOCK_LOAD_); +} + +static void lan8814_ptp_clock_get(struct phy_device *phydev, + u32 *seconds, u32 *nano_seconds) +{ + lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, PTP_CMD_CTL_PTP_CLOCK_READ_); + + *seconds = lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_MID); + *seconds = (*seconds << 16) | + lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_LO); + + *nano_seconds = lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_NS_HI); + *nano_seconds = ((*nano_seconds & 0x3fff) << 16) | + lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_NS_LO); +} + +static int lan8814_ptpci_gettime64(struct ptp_clock_info *ptpci, + struct timespec64 *ts) +{ + struct lan8814_shared_priv *shared = container_of(ptpci, struct lan8814_shared_priv, + ptp_clock_info); + struct phy_device *phydev = shared->phydev; + u32 nano_seconds; + u32 seconds; + + mutex_lock(&shared->shared_lock); + lan8814_ptp_clock_get(phydev, &seconds, &nano_seconds); + mutex_unlock(&shared->shared_lock); + ts->tv_sec = seconds; + ts->tv_nsec = nano_seconds; + return 0; } -static int lan8814_config_init(struct phy_device *phydev) +static int lan8814_ptpci_settime64(struct ptp_clock_info *ptpci, + const struct timespec64 *ts) { - int val; + struct lan8814_shared_priv *shared = container_of(ptpci, struct lan8814_shared_priv, + ptp_clock_info); + struct phy_device *phydev = shared->phydev; - /* Reset the PHY */ - val = lanphy_read_page_reg(phydev, 4, LAN8814_QSGMII_SOFT_RESET); - val |= LAN8814_QSGMII_SOFT_RESET_BIT; - lanphy_write_page_reg(phydev, 4, LAN8814_QSGMII_SOFT_RESET, val); + mutex_lock(&shared->shared_lock); + lan8814_ptp_clock_set(phydev, ts->tv_sec, ts->tv_nsec); + mutex_unlock(&shared->shared_lock); - /* Disable ANEG with QSGMII PCS Host side */ - val = lanphy_read_page_reg(phydev, 5, LAN8814_QSGMII_PCS1G_ANEG_CONFIG); - val &= ~LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA; - lanphy_write_page_reg(phydev, 5, LAN8814_QSGMII_PCS1G_ANEG_CONFIG, val); + return 0; +} - /* MDI-X setting for swap A,B transmit */ - val = lanphy_read_page_reg(phydev, 2, LAN8814_ALIGN_SWAP); - val &= ~LAN8814_ALIGN_TX_A_B_SWAP_MASK; - val |= LAN8814_ALIGN_TX_A_B_SWAP; - lanphy_write_page_reg(phydev, 2, LAN8814_ALIGN_SWAP, val); +static void lan8814_ptp_clock_step(struct phy_device *phydev, + s64 time_step_ns) +{ + u32 nano_seconds_step; + u64 abs_time_step_ns; + u32 unsigned_seconds; + u32 nano_seconds; + u32 remainder; + s32 seconds; + + if (time_step_ns > 15000000000LL) { + /* convert to clock set */ + lan8814_ptp_clock_get(phydev, &unsigned_seconds, &nano_seconds); + unsigned_seconds += div_u64_rem(time_step_ns, 1000000000LL, + &remainder); + nano_seconds += remainder; + if (nano_seconds >= 1000000000) { + unsigned_seconds++; + nano_seconds -= 1000000000; + } + lan8814_ptp_clock_set(phydev, unsigned_seconds, nano_seconds); + return; + } else if (time_step_ns < -15000000000LL) { + /* convert to clock set */ + time_step_ns = -time_step_ns; + + lan8814_ptp_clock_get(phydev, &unsigned_seconds, &nano_seconds); + unsigned_seconds -= div_u64_rem(time_step_ns, 1000000000LL, + &remainder); + nano_seconds_step = remainder; + if (nano_seconds < nano_seconds_step) { + unsigned_seconds--; + nano_seconds += 1000000000; + } + nano_seconds -= nano_seconds_step; + lan8814_ptp_clock_set(phydev, unsigned_seconds, + nano_seconds); + return; + } + + /* do clock step */ + if (time_step_ns >= 0) { + abs_time_step_ns = (u64)time_step_ns; + seconds = (s32)div_u64_rem(abs_time_step_ns, 1000000000, + &remainder); + nano_seconds = remainder; + } else { + abs_time_step_ns = (u64)(-time_step_ns); + seconds = -((s32)div_u64_rem(abs_time_step_ns, 1000000000, + &remainder)); + nano_seconds = remainder; + if (nano_seconds > 0) { + /* subtracting nano seconds is not allowed + * convert to subtracting from seconds, + * and adding to nanoseconds + */ + seconds--; + nano_seconds = (1000000000 - nano_seconds); + } + } + + if (nano_seconds > 0) { + /* add 8 ns to cover the likely normal increment */ + nano_seconds += 8; + } + + if (nano_seconds >= 1000000000) { + /* carry into seconds */ + seconds++; + nano_seconds -= 1000000000; + } + + while (seconds) { + if (seconds > 0) { + u32 adjustment_value = (u32)seconds; + u16 adjustment_value_lo, adjustment_value_hi; + + if (adjustment_value > 0xF) + adjustment_value = 0xF; + + adjustment_value_lo = adjustment_value & 0xffff; + adjustment_value_hi = (adjustment_value >> 16) & 0x3fff; + + lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_LO, + adjustment_value_lo); + lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_HI, + PTP_LTC_STEP_ADJ_DIR_ | + adjustment_value_hi); + seconds -= ((s32)adjustment_value); + } else { + u32 adjustment_value = (u32)(-seconds); + u16 adjustment_value_lo, adjustment_value_hi; + + if (adjustment_value > 0xF) + adjustment_value = 0xF; + + adjustment_value_lo = adjustment_value & 0xffff; + adjustment_value_hi = (adjustment_value >> 16) & 0x3fff; + + lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_LO, + adjustment_value_lo); + lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_HI, + adjustment_value_hi); + seconds += ((s32)adjustment_value); + } + lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, + PTP_CMD_CTL_PTP_LTC_STEP_SEC_); + } + if (nano_seconds) { + u16 nano_seconds_lo; + u16 nano_seconds_hi; + + nano_seconds_lo = nano_seconds & 0xffff; + nano_seconds_hi = (nano_seconds >> 16) & 0x3fff; + + lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_LO, + nano_seconds_lo); + lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_HI, + PTP_LTC_STEP_ADJ_DIR_ | + nano_seconds_hi); + lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, + PTP_CMD_CTL_PTP_LTC_STEP_NSEC_); + } +} + +static int lan8814_ptpci_adjtime(struct ptp_clock_info *ptpci, s64 delta) +{ + struct lan8814_shared_priv *shared = container_of(ptpci, struct lan8814_shared_priv, + ptp_clock_info); + struct phy_device *phydev = shared->phydev; + + mutex_lock(&shared->shared_lock); + lan8814_ptp_clock_step(phydev, delta); + mutex_unlock(&shared->shared_lock); + + return 0; +} + +static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm) +{ + struct lan8814_shared_priv *shared = container_of(ptpci, struct lan8814_shared_priv, + ptp_clock_info); + struct phy_device *phydev = shared->phydev; + u16 kszphy_rate_adj_lo, kszphy_rate_adj_hi; + bool positive = true; + u32 kszphy_rate_adj; + + if (scaled_ppm < 0) { + scaled_ppm = -scaled_ppm; + positive = false; + } + + kszphy_rate_adj = LAN8814_1PPM_FORMAT * (scaled_ppm >> 16); + kszphy_rate_adj += (LAN8814_1PPM_FORMAT * (0xffff & scaled_ppm)) >> 16; + + kszphy_rate_adj_lo = kszphy_rate_adj & 0xffff; + kszphy_rate_adj_hi = (kszphy_rate_adj >> 16) & 0x3fff; + + if (positive) + kszphy_rate_adj_hi |= PTP_CLOCK_RATE_ADJ_DIR_; + + mutex_lock(&shared->shared_lock); + lanphy_write_page_reg(phydev, 4, PTP_CLOCK_RATE_ADJ_HI, kszphy_rate_adj_hi); + lanphy_write_page_reg(phydev, 4, PTP_CLOCK_RATE_ADJ_LO, kszphy_rate_adj_lo); + mutex_unlock(&shared->shared_lock); return 0; } +static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) +{ + struct ptp_header *ptp_header; + u32 type; + + type = ptp_classify_raw(skb); + ptp_header = ptp_parse_header(skb, type); + + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); +} + +static void lan8814_dequeue_tx_skb(struct kszphy_ptp_priv *ptp_priv) +{ + struct phy_device *phydev = ptp_priv->phydev; + struct skb_shared_hwtstamps shhwtstamps; + struct sk_buff *skb, *skb_tmp; + unsigned long flags; + u32 seconds, nsec; + bool ret = false; + u16 skb_sig; + u16 seq_id; + + lan8814_ptp_tx_ts_get(phydev, &seconds, &nsec, &seq_id); + + spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags); + skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) { + lan8814_get_sig_tx(skb, &skb_sig); + + if (memcmp(&skb_sig, &seq_id, sizeof(seq_id))) + continue; + + __skb_unlink(skb, &ptp_priv->tx_queue); + ret = true; + break; + } + spin_unlock_irqrestore(&ptp_priv->tx_queue.lock, flags); + + if (ret) { + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + shhwtstamps.hwtstamp = ktime_set(seconds, nsec); + skb_complete_tx_timestamp(skb, &shhwtstamps); + } +} + +static void lan8814_get_tx_ts(struct kszphy_ptp_priv *ptp_priv) +{ + struct phy_device *phydev = ptp_priv->phydev; + u32 reg; + + do { + lan8814_dequeue_tx_skb(ptp_priv); + + /* If other timestamps are available in the FIFO, + * process them. + */ + reg = lanphy_read_page_reg(phydev, 5, PTP_CAP_INFO); + } while (PTP_CAP_INFO_TX_TS_CNT_GET_(reg) > 0); +} + +static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv, + struct lan8814_ptp_rx_ts *rx_ts) +{ + struct skb_shared_hwtstamps *shhwtstamps; + struct sk_buff *skb, *skb_tmp; + unsigned long flags; + bool ret = false; + u16 skb_sig; + + spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags); + skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) { + lan8814_get_sig_rx(skb, &skb_sig); + + if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id))) + continue; + + __skb_unlink(skb, &ptp_priv->rx_queue); + + ret = true; + break; + } + spin_unlock_irqrestore(&ptp_priv->rx_queue.lock, flags); + + if (ret) { + shhwtstamps = skb_hwtstamps(skb); + memset(shhwtstamps, 0, sizeof(*shhwtstamps)); + shhwtstamps->hwtstamp = ktime_set(rx_ts->seconds, rx_ts->nsec); + netif_rx_ni(skb); + } + + return ret; +} + +static void lan8814_get_rx_ts(struct kszphy_ptp_priv *ptp_priv) +{ + struct phy_device *phydev = ptp_priv->phydev; + struct lan8814_ptp_rx_ts *rx_ts; + unsigned long flags; + u32 reg; + + do { + rx_ts = kzalloc(sizeof(*rx_ts), GFP_KERNEL); + if (!rx_ts) + return; + + lan8814_ptp_rx_ts_get(phydev, &rx_ts->seconds, &rx_ts->nsec, + &rx_ts->seq_id); + + /* If we failed to match the skb add it to the queue for when + * the frame will come + */ + if (!lan8814_match_skb(ptp_priv, rx_ts)) { + spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags); + list_add(&rx_ts->list, &ptp_priv->rx_ts_list); + spin_unlock_irqrestore(&ptp_priv->rx_ts_lock, flags); + } else { + kfree(rx_ts); + } + + /* If other timestamps are available in the FIFO, + * process them. + */ + reg = lanphy_read_page_reg(phydev, 5, PTP_CAP_INFO); + } while (PTP_CAP_INFO_RX_TS_CNT_GET_(reg) > 0); +} + +static void lan8814_handle_ptp_interrupt(struct phy_device *phydev) +{ + struct kszphy_priv *priv = phydev->priv; + struct kszphy_ptp_priv *ptp_priv = &priv->ptp_priv; + u16 status; + + status = lanphy_read_page_reg(phydev, 5, PTP_TSU_INT_STS); + if (status & PTP_TSU_INT_STS_PTP_TX_TS_EN_) + lan8814_get_tx_ts(ptp_priv); + + if (status & PTP_TSU_INT_STS_PTP_RX_TS_EN_) + lan8814_get_rx_ts(ptp_priv); + + if (status & PTP_TSU_INT_STS_PTP_TX_TS_OVRFL_INT_) { + lan8814_flush_fifo(phydev, true); + skb_queue_purge(&ptp_priv->tx_queue); + } + + if (status & PTP_TSU_INT_STS_PTP_RX_TS_OVRFL_INT_) { + lan8814_flush_fifo(phydev, false); + skb_queue_purge(&ptp_priv->rx_queue); + } +} + static int lan8804_config_init(struct phy_device *phydev) { int val; @@ -1666,17 +2482,31 @@ static int lan8804_config_init(struct phy_device *phydev) static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev) { + u16 tsu_irq_status; int irq_status; irq_status = phy_read(phydev, LAN8814_INTS); - if (irq_status < 0) - return IRQ_NONE; + if (irq_status > 0 && (irq_status & LAN8814_INT_LINK)) + phy_trigger_machine(phydev); - if (!(irq_status & LAN8814_INT_LINK)) + if (irq_status < 0) { + phy_error(phydev); return IRQ_NONE; + } - phy_trigger_machine(phydev); + while (1) { + tsu_irq_status = lanphy_read_page_reg(phydev, 4, + LAN8814_INTR_STS_REG); + if (tsu_irq_status > 0 && + (tsu_irq_status & (LAN8814_INTR_STS_REG_1588_TSU0_ | + LAN8814_INTR_STS_REG_1588_TSU1_ | + LAN8814_INTR_STS_REG_1588_TSU2_ | + LAN8814_INTR_STS_REG_1588_TSU3_))) + lan8814_handle_ptp_interrupt(phydev); + else + break; + } return IRQ_HANDLED; } @@ -1716,6 +2546,223 @@ static int lan8814_config_intr(struct phy_device *phydev) return err; } +static void lan8814_ptp_init(struct phy_device *phydev) +{ + struct kszphy_priv *priv = phydev->priv; + struct kszphy_ptp_priv *ptp_priv = &priv->ptp_priv; + u32 temp; + + lanphy_write_page_reg(phydev, 5, TSU_HARD_RESET, TSU_HARD_RESET_); + + temp = lanphy_read_page_reg(phydev, 5, PTP_TX_MOD); + temp |= PTP_TX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_; + lanphy_write_page_reg(phydev, 5, PTP_TX_MOD, temp); + + temp = lanphy_read_page_reg(phydev, 5, PTP_RX_MOD); + temp |= PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_; + lanphy_write_page_reg(phydev, 5, PTP_RX_MOD, temp); + + lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_CONFIG, 0); + lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_CONFIG, 0); + + /* Removing default registers configs related to L2 and IP */ + lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_L2_ADDR_EN, 0); + lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_L2_ADDR_EN, 0); + lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_IP_ADDR_EN, 0); + lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_IP_ADDR_EN, 0); + + skb_queue_head_init(&ptp_priv->tx_queue); + skb_queue_head_init(&ptp_priv->rx_queue); + INIT_LIST_HEAD(&ptp_priv->rx_ts_list); + spin_lock_init(&ptp_priv->rx_ts_lock); + + ptp_priv->phydev = phydev; + + ptp_priv->mii_ts.rxtstamp = lan8814_rxtstamp; + ptp_priv->mii_ts.txtstamp = lan8814_txtstamp; + ptp_priv->mii_ts.hwtstamp = lan8814_hwtstamp; + ptp_priv->mii_ts.ts_info = lan8814_ts_info; + + phydev->mii_ts = &ptp_priv->mii_ts; +} + +static int lan8814_ptp_probe_once(struct phy_device *phydev) +{ + struct lan8814_shared_priv *shared = phydev->shared->priv; + + /* Initialise shared lock for clock*/ + mutex_init(&shared->shared_lock); + + shared->ptp_clock_info.owner = THIS_MODULE; + snprintf(shared->ptp_clock_info.name, 30, "%s", phydev->drv->name); + shared->ptp_clock_info.max_adj = 31249999; + shared->ptp_clock_info.n_alarm = 0; + shared->ptp_clock_info.n_ext_ts = 0; + shared->ptp_clock_info.n_pins = 0; + shared->ptp_clock_info.pps = 0; + shared->ptp_clock_info.pin_config = NULL; + shared->ptp_clock_info.adjfine = lan8814_ptpci_adjfine; + shared->ptp_clock_info.adjtime = lan8814_ptpci_adjtime; + shared->ptp_clock_info.gettime64 = lan8814_ptpci_gettime64; + shared->ptp_clock_info.settime64 = lan8814_ptpci_settime64; + shared->ptp_clock_info.getcrosststamp = NULL; + + shared->ptp_clock = ptp_clock_register(&shared->ptp_clock_info, + &phydev->mdio.dev); + if (IS_ERR_OR_NULL(shared->ptp_clock)) { + phydev_err(phydev, "ptp_clock_register failed %lu\n", + PTR_ERR(shared->ptp_clock)); + return -EINVAL; + } + + phydev_dbg(phydev, "successfully registered ptp clock\n"); + + shared->phydev = phydev; + + /* The EP.4 is shared between all the PHYs in the package and also it + * can be accessed by any of the PHYs + */ + lanphy_write_page_reg(phydev, 4, LTC_HARD_RESET, LTC_HARD_RESET_); + lanphy_write_page_reg(phydev, 4, PTP_OPERATING_MODE, + PTP_OPERATING_MODE_STANDALONE_); + + return 0; +} + +static int lan8814_read_status(struct phy_device *phydev) +{ + struct kszphy_priv *priv = phydev->priv; + struct kszphy_latencies *latencies = &priv->latencies; + int err; + int regval; + + err = genphy_read_status(phydev); + if (err) + return err; + + switch (phydev->speed) { + case SPEED_1000: + lanphy_write_page_reg(phydev, 5, PTP_RX_LATENCY_1000, + latencies->rx_1000); + lanphy_write_page_reg(phydev, 5, PTP_TX_LATENCY_1000, + latencies->tx_1000); + break; + case SPEED_100: + lanphy_write_page_reg(phydev, 5, PTP_RX_LATENCY_100, + latencies->rx_100); + lanphy_write_page_reg(phydev, 5, PTP_TX_LATENCY_100, + latencies->tx_100); + break; + case SPEED_10: + lanphy_write_page_reg(phydev, 5, PTP_RX_LATENCY_10, + latencies->rx_10); + lanphy_write_page_reg(phydev, 5, PTP_TX_LATENCY_10, + latencies->tx_10); + break; + default: + break; + } + + /* Make sure the PHY is not broken. Read idle error count, + * and reset the PHY if it is maxed out. + */ + regval = phy_read(phydev, MII_STAT1000); + if ((regval & 0xFF) == 0xFF) { + phy_init_hw(phydev); + phydev->link = 0; + if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) + phydev->drv->config_intr(phydev); + return genphy_config_aneg(phydev); + } + + return 0; +} + +static int lan8814_config_init(struct phy_device *phydev) +{ + int val; + + /* Reset the PHY */ + val = lanphy_read_page_reg(phydev, 4, LAN8814_QSGMII_SOFT_RESET); + val |= LAN8814_QSGMII_SOFT_RESET_BIT; + lanphy_write_page_reg(phydev, 4, LAN8814_QSGMII_SOFT_RESET, val); + + /* Disable ANEG with QSGMII PCS Host side */ + val = lanphy_read_page_reg(phydev, 5, LAN8814_QSGMII_PCS1G_ANEG_CONFIG); + val &= ~LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA; + lanphy_write_page_reg(phydev, 5, LAN8814_QSGMII_PCS1G_ANEG_CONFIG, val); + + /* MDI-X setting for swap A,B transmit */ + val = lanphy_read_page_reg(phydev, 2, LAN8814_ALIGN_SWAP); + val &= ~LAN8814_ALIGN_TX_A_B_SWAP_MASK; + val |= LAN8814_ALIGN_TX_A_B_SWAP; + lanphy_write_page_reg(phydev, 2, LAN8814_ALIGN_SWAP, val); + + return 0; +} + +static void lan8814_parse_latency(struct phy_device *phydev) +{ + const struct device_node *np = phydev->mdio.dev.of_node; + struct kszphy_priv *priv = phydev->priv; + struct kszphy_latencies *latency = &priv->latencies; + u32 val; + + if (!of_property_read_u32(np, "lan8814,latency_rx_10", &val)) + latency->rx_10 = val; + if (!of_property_read_u32(np, "lan8814,latency_tx_10", &val)) + latency->tx_10 = val; + if (!of_property_read_u32(np, "lan8814,latency_rx_100", &val)) + latency->rx_100 = val; + if (!of_property_read_u32(np, "lan8814,latency_tx_100", &val)) + latency->tx_100 = val; + if (!of_property_read_u32(np, "lan8814,latency_rx_1000", &val)) + latency->rx_1000 = val; + if (!of_property_read_u32(np, "lan8814,latency_tx_1000", &val)) + latency->tx_1000 = val; +} + +static int lan8814_probe(struct phy_device *phydev) +{ + const struct device_node *np = phydev->mdio.dev.of_node; + struct kszphy_priv *priv; + u16 addr; + int err; + + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->led_mode = -1; + + priv->latencies = lan8814_latencies; + + phydev->priv = priv; + + if (!IS_ENABLED(CONFIG_PTP_1588_CLOCK) || + !IS_ENABLED(CONFIG_NETWORK_PHY_TIMESTAMPING) || + of_property_read_bool(np, "lan8814,ignore-ts")) + return 0; + + /* Strap-in value for PHY address, below register read gives starting + * phy address value + */ + addr = lanphy_read_page_reg(phydev, 4, 0) & 0x1F; + devm_phy_package_join(&phydev->mdio.dev, phydev, + addr, sizeof(struct lan8814_shared_priv)); + + if (phy_package_init_once(phydev)) { + err = lan8814_ptp_probe_once(phydev); + if (err) + return err; + } + + lan8814_parse_latency(phydev); + lan8814_ptp_init(phydev); + + return 0; +} + static struct phy_driver ksphy_driver[] = { { .phy_id = PHY_ID_KS8737, @@ -1890,10 +2937,9 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Microchip INDY Gigabit Quad PHY", .config_init = lan8814_config_init, - .driver_data = &ksz9021_type, - .probe = kszphy_probe, + .probe = lan8814_probe, .soft_reset = genphy_soft_reset, - .read_status = ksz9031_read_status, + .read_status = lan8814_read_status, .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index bc50224d43dd..8292f7305805 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -8,11 +8,17 @@ #include <linux/phy.h> #include <linux/ethtool.h> #include <linux/ethtool_netlink.h> +#include <linux/bitfield.h> + +#define PHY_ID_LAN87XX 0x0007c150 +#define PHY_ID_LAN937X 0x0007c180 /* External Register Control Register */ #define LAN87XX_EXT_REG_CTL (0x14) #define LAN87XX_EXT_REG_CTL_RD_CTL (0x1000) #define LAN87XX_EXT_REG_CTL_WR_CTL (0x0800) +#define LAN87XX_REG_BANK_SEL_MASK GENMASK(10, 8) +#define LAN87XX_REG_ADDR_MASK GENMASK(7, 0) /* External Register Read Data Register */ #define LAN87XX_EXT_REG_RD_DATA (0x15) @@ -37,6 +43,7 @@ #define PHYACC_ATTR_MODE_READ 0 #define PHYACC_ATTR_MODE_WRITE 1 #define PHYACC_ATTR_MODE_MODIFY 2 +#define PHYACC_ATTR_MODE_POLL 3 #define PHYACC_ATTR_BANK_SMI 0 #define PHYACC_ATTR_BANK_MISC 1 @@ -50,8 +57,33 @@ #define LAN87XX_CABLE_TEST_OPEN 1 #define LAN87XX_CABLE_TEST_SAME_SHORT 2 +/* T1 Registers */ +#define T1_AFE_PORT_CFG1_REG 0x0B +#define T1_POWER_DOWN_CONTROL_REG 0x1A +#define T1_SLV_FD_MULT_CFG_REG 0x18 +#define T1_CDR_CFG_PRE_LOCK_REG 0x05 +#define T1_CDR_CFG_POST_LOCK_REG 0x06 +#define T1_LCK_STG2_MUFACT_CFG_REG 0x1A +#define T1_LCK_STG3_MUFACT_CFG_REG 0x1B +#define T1_POST_LCK_MUFACT_CFG_REG 0x1C +#define T1_TX_RX_FIFO_CFG_REG 0x02 +#define T1_TX_LPF_FIR_CFG_REG 0x55 +#define T1_SQI_CONFIG_REG 0x2E +#define T1_MDIO_CONTROL2_REG 0x10 +#define T1_INTERRUPT_SOURCE_REG 0x18 +#define T1_INTERRUPT2_SOURCE_REG 0x08 +#define T1_EQ_FD_STG1_FRZ_CFG 0x69 +#define T1_EQ_FD_STG2_FRZ_CFG 0x6A +#define T1_EQ_FD_STG3_FRZ_CFG 0x6B +#define T1_EQ_FD_STG4_FRZ_CFG 0x6C +#define T1_EQ_WT_FD_LCK_FRZ_CFG 0x6D +#define T1_PST_EQ_LCK_STG1_FRZ_CFG 0x6E + +#define T1_MODE_STAT_REG 0x11 +#define T1_LINK_UP_MSK BIT(0) + #define DRIVER_AUTHOR "Nisar Sayed <nisar.sayed@microchip.com>" -#define DRIVER_DESC "Microchip LAN87XX T1 PHY driver" +#define DRIVER_DESC "Microchip LAN87XX/LAN937x T1 PHY driver" struct access_ereg_val { u8 mode; @@ -61,6 +93,37 @@ struct access_ereg_val { u16 mask; }; +static int lan937x_dsp_workaround(struct phy_device *phydev, u16 ereg, u8 bank) +{ + u8 prev_bank; + int rc = 0; + u16 val; + + mutex_lock(&phydev->lock); + /* Read previous selected bank */ + rc = phy_read(phydev, LAN87XX_EXT_REG_CTL); + if (rc < 0) + goto out_unlock; + + /* store the prev_bank */ + prev_bank = FIELD_GET(LAN87XX_REG_BANK_SEL_MASK, rc); + + if (bank != prev_bank && bank == PHYACC_ATTR_BANK_DSP) { + val = ereg & ~LAN87XX_REG_ADDR_MASK; + + val &= ~LAN87XX_EXT_REG_CTL_WR_CTL; + val |= LAN87XX_EXT_REG_CTL_RD_CTL; + + /* access twice for DSP bank change,dummy access */ + rc = phy_write(phydev, LAN87XX_EXT_REG_CTL, val); + } + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + static int access_ereg(struct phy_device *phydev, u8 mode, u8 bank, u8 offset, u16 val) { @@ -89,6 +152,13 @@ static int access_ereg(struct phy_device *phydev, u8 mode, u8 bank, ereg |= (bank << 8) | offset; + /* DSP bank access workaround for lan937x */ + if (phydev->phy_id == PHY_ID_LAN937X) { + rc = lan937x_dsp_workaround(phydev, ereg, bank); + if (rc < 0) + return rc; + } + rc = phy_write(phydev, LAN87XX_EXT_REG_CTL, ereg); if (rc < 0) return rc; @@ -117,6 +187,15 @@ static int access_ereg_modify_changed(struct phy_device *phydev, return rc; } +static int access_smi_poll_timeout(struct phy_device *phydev, + u8 offset, u16 mask, u16 clr) +{ + int val; + + return phy_read_poll_timeout(phydev, offset, val, (val & mask) == clr, + 150, 30000, true); +} + static int lan87xx_config_rgmii_delay(struct phy_device *phydev) { int rc; @@ -157,68 +236,159 @@ static int lan87xx_config_rgmii_delay(struct phy_device *phydev) static int lan87xx_phy_init(struct phy_device *phydev) { static const struct access_ereg_val init[] = { - /* TX Amplitude = 5 */ - {PHYACC_ATTR_MODE_MODIFY, PHYACC_ATTR_BANK_AFE, 0x0B, - 0x000A, 0x001E}, - /* Clear SMI interrupts */ - {PHYACC_ATTR_MODE_READ, PHYACC_ATTR_BANK_SMI, 0x18, - 0, 0}, - /* Clear MISC interrupts */ - {PHYACC_ATTR_MODE_READ, PHYACC_ATTR_BANK_MISC, 0x08, - 0, 0}, - /* Turn on TC10 Ring Oscillator (ROSC) */ - {PHYACC_ATTR_MODE_MODIFY, PHYACC_ATTR_BANK_MISC, 0x20, - 0x0020, 0x0020}, - /* WUR Detect Length to 1.2uS, LPC Detect Length to 1.09uS */ - {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_PCS, 0x20, - 0x283C, 0}, - /* Wake_In Debounce Length to 39uS, Wake_Out Length to 79uS */ - {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_MISC, 0x21, - 0x274F, 0}, - /* Enable Auto Wake Forward to Wake_Out, ROSC on, Sleep, - * and Wake_In to wake PHY - */ - {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_MISC, 0x20, - 0x80A7, 0}, - /* Enable WUP Auto Fwd, Enable Wake on MDI, Wakeup Debouncer - * to 128 uS - */ - {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_MISC, 0x24, - 0xF110, 0}, - /* Enable HW Init */ - {PHYACC_ATTR_MODE_MODIFY, PHYACC_ATTR_BANK_SMI, 0x1A, - 0x0100, 0x0100}, + /* TXPD/TXAMP6 Configs */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_AFE, + T1_AFE_PORT_CFG1_REG, 0x002D, 0 }, + /* HW_Init Hi and Force_ED */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_SMI, + T1_POWER_DOWN_CONTROL_REG, 0x0308, 0 }, + /* Equalizer Full Duplex Freeze - T1 Slave */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_EQ_FD_STG1_FRZ_CFG, 0x0002, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_EQ_FD_STG2_FRZ_CFG, 0x0002, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_EQ_FD_STG3_FRZ_CFG, 0x0002, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_EQ_FD_STG4_FRZ_CFG, 0x0002, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_EQ_WT_FD_LCK_FRZ_CFG, 0x0002, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_PST_EQ_LCK_STG1_FRZ_CFG, 0x0002, 0 }, + /* Slave Full Duplex Multi Configs */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_SLV_FD_MULT_CFG_REG, 0x0D53, 0 }, + /* CDR Pre and Post Lock Configs */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_CDR_CFG_PRE_LOCK_REG, 0x0AB2, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_CDR_CFG_POST_LOCK_REG, 0x0AB3, 0 }, + /* Lock Stage 2-3 Multi Factor Config */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_LCK_STG2_MUFACT_CFG_REG, 0x0AEA, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_LCK_STG3_MUFACT_CFG_REG, 0x0AEB, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_POST_LCK_MUFACT_CFG_REG, 0x0AEB, 0 }, + /* Pointer delay */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_RX_FIFO_CFG_REG, 0x1C00, 0 }, + /* Tx iir edits */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1000, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1861, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1061, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1922, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1122, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1983, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1183, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1944, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1144, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x18c5, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x10c5, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1846, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1046, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1807, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1007, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1808, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1008, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1809, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1009, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x180A, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x100A, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x180B, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x100B, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x180C, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x100C, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x180D, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x100D, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x180E, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x100E, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x180F, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x100F, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1810, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1010, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1811, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1011, 0 }, + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_TX_LPF_FIR_CFG_REG, 0x1000, 0 }, + /* SQI enable */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, + T1_SQI_CONFIG_REG, 0x9572, 0 }, + /* Flag LPS and WUR as idle errors */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_SMI, + T1_MDIO_CONTROL2_REG, 0x0014, 0 }, + /* HW_Init toggle, undo force ED, TXPD off */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_SMI, + T1_POWER_DOWN_CONTROL_REG, 0x0200, 0 }, + /* Reset PCS to trigger hardware initialization */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_SMI, + T1_MDIO_CONTROL2_REG, 0x0094, 0 }, + /* Poll till Hardware is initialized */ + { PHYACC_ATTR_MODE_POLL, PHYACC_ATTR_BANK_SMI, + T1_MDIO_CONTROL2_REG, 0x0080, 0 }, + /* Tx AMP - 0x06 */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_AFE, + T1_AFE_PORT_CFG1_REG, 0x000C, 0 }, + /* Read INTERRUPT_SOURCE Register */ + { PHYACC_ATTR_MODE_READ, PHYACC_ATTR_BANK_SMI, + T1_INTERRUPT_SOURCE_REG, 0, 0 }, + /* Read INTERRUPT_SOURCE Register */ + { PHYACC_ATTR_MODE_READ, PHYACC_ATTR_BANK_MISC, + T1_INTERRUPT2_SOURCE_REG, 0, 0 }, + /* HW_Init Hi */ + { PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_SMI, + T1_POWER_DOWN_CONTROL_REG, 0x0300, 0 }, }; int rc, i; - /* Start manual initialization procedures in Managed Mode */ - rc = access_ereg_modify_changed(phydev, PHYACC_ATTR_BANK_SMI, - 0x1a, 0x0000, 0x0100); - if (rc < 0) - return rc; - - /* Soft Reset the SMI block */ - rc = access_ereg_modify_changed(phydev, PHYACC_ATTR_BANK_SMI, - 0x00, 0x8000, 0x8000); - if (rc < 0) - return rc; - - /* Check to see if the self-clearing bit is cleared */ - usleep_range(1000, 2000); - rc = access_ereg(phydev, PHYACC_ATTR_MODE_READ, - PHYACC_ATTR_BANK_SMI, 0x00, 0); + /* phy Soft reset */ + rc = genphy_soft_reset(phydev); if (rc < 0) return rc; - if ((rc & 0x8000) != 0) - return -ETIMEDOUT; /* PHY Initialization */ for (i = 0; i < ARRAY_SIZE(init); i++) { - if (init[i].mode == PHYACC_ATTR_MODE_MODIFY) { - rc = access_ereg_modify_changed(phydev, init[i].bank, - init[i].offset, - init[i].val, - init[i].mask); + if (init[i].mode == PHYACC_ATTR_MODE_POLL && + init[i].bank == PHYACC_ATTR_BANK_SMI) { + rc = access_smi_poll_timeout(phydev, + init[i].offset, + init[i].val, + init[i].mask); } else { rc = access_ereg(phydev, init[i].mode, init[i].bank, init[i].offset, init[i].val); @@ -504,22 +674,114 @@ static int lan87xx_cable_test_get_status(struct phy_device *phydev, return 0; } +static int lan87xx_read_master_slave(struct phy_device *phydev) +{ + int rc = 0; + + phydev->master_slave_get = MASTER_SLAVE_CFG_UNKNOWN; + phydev->master_slave_state = MASTER_SLAVE_STATE_UNKNOWN; + + rc = phy_read(phydev, MII_CTRL1000); + if (rc < 0) + return rc; + + if (rc & CTL1000_AS_MASTER) + phydev->master_slave_get = MASTER_SLAVE_CFG_MASTER_FORCE; + else + phydev->master_slave_get = MASTER_SLAVE_CFG_SLAVE_FORCE; + + rc = phy_read(phydev, MII_STAT1000); + if (rc < 0) + return rc; + + if (rc & LPA_1000MSRES) + phydev->master_slave_state = MASTER_SLAVE_STATE_MASTER; + else + phydev->master_slave_state = MASTER_SLAVE_STATE_SLAVE; + + return rc; +} + +static int lan87xx_read_status(struct phy_device *phydev) +{ + int rc = 0; + + rc = phy_read(phydev, T1_MODE_STAT_REG); + if (rc < 0) + return rc; + + if (rc & T1_LINK_UP_MSK) + phydev->link = 1; + else + phydev->link = 0; + + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->pause = 0; + phydev->asym_pause = 0; + + rc = lan87xx_read_master_slave(phydev); + if (rc < 0) + return rc; + + rc = genphy_read_status_fixed(phydev); + if (rc < 0) + return rc; + + return rc; +} + +static int lan87xx_config_aneg(struct phy_device *phydev) +{ + u16 ctl = 0; + int rc; + + switch (phydev->master_slave_set) { + case MASTER_SLAVE_CFG_MASTER_FORCE: + ctl |= CTL1000_AS_MASTER; + break; + case MASTER_SLAVE_CFG_SLAVE_FORCE: + break; + case MASTER_SLAVE_CFG_UNKNOWN: + case MASTER_SLAVE_CFG_UNSUPPORTED: + return 0; + default: + phydev_warn(phydev, "Unsupported Master/Slave mode\n"); + return -EOPNOTSUPP; + } + + rc = phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl); + if (rc == 1) + rc = genphy_soft_reset(phydev); + + return rc; +} + static struct phy_driver microchip_t1_phy_driver[] = { { - .phy_id = 0x0007c150, - .phy_id_mask = 0xfffffff0, + PHY_ID_MATCH_MODEL(PHY_ID_LAN87XX), .name = "Microchip LAN87xx T1", .flags = PHY_POLL_CABLE_TEST, - .features = PHY_BASIC_T1_FEATURES, - .config_init = lan87xx_config_init, - .config_intr = lan87xx_phy_config_intr, .handle_interrupt = lan87xx_handle_interrupt, - .suspend = genphy_suspend, .resume = genphy_resume, + .config_aneg = lan87xx_config_aneg, + .read_status = lan87xx_read_status, + .cable_test_start = lan87xx_cable_test_start, + .cable_test_get_status = lan87xx_cable_test_get_status, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_LAN937X), + .name = "Microchip LAN937x T1", + .features = PHY_BASIC_T1_FEATURES, + .config_init = lan87xx_config_init, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = lan87xx_config_aneg, + .read_status = lan87xx_read_status, .cable_test_start = lan87xx_cable_test_start, .cable_test_get_status = lan87xx_cable_test_get_status, } @@ -528,7 +790,8 @@ static struct phy_driver microchip_t1_phy_driver[] = { module_phy_driver(microchip_t1_phy_driver); static struct mdio_device_id __maybe_unused microchip_t1_tbl[] = { - { 0x0007c150, 0xfffffff0 }, + { PHY_ID_MATCH_MODEL(PHY_ID_LAN87XX) }, + { PHY_ID_MATCH_MODEL(PHY_ID_LAN937X) }, { } }; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 5b53a3e23c89..06943889d747 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -74,6 +74,7 @@ struct phylink { struct work_struct resolve; bool mac_link_dropped; + bool using_mac_select_pcs; struct sfp_bus *sfp_bus; bool sfp_may_have_phy; @@ -416,7 +417,7 @@ static int phylink_validate_mac_and_pcs(struct phylink *pl, int ret; /* Get the PCS for this interface mode */ - if (pl->mac_ops->mac_select_pcs) { + if (pl->using_mac_select_pcs) { pcs = pl->mac_ops->mac_select_pcs(pl->config, state->interface); if (IS_ERR(pcs)) return PTR_ERR(pcs); @@ -791,7 +792,7 @@ static void phylink_major_config(struct phylink *pl, bool restart, phylink_dbg(pl, "major config %s\n", phy_modes(state->interface)); - if (pl->mac_ops->mac_select_pcs) { + if (pl->using_mac_select_pcs) { pcs = pl->mac_ops->mac_select_pcs(pl->config, state->interface); if (IS_ERR(pcs)) { phylink_err(pl, @@ -814,8 +815,18 @@ static void phylink_major_config(struct phylink *pl, bool restart, /* If we have a new PCS, switch to the new PCS after preparing the MAC * for the change. */ - if (pcs) - phylink_set_pcs(pl, pcs); + if (pcs) { + pl->pcs = pcs; + pl->pcs_ops = pcs->ops; + + if (!pl->phylink_disable_state && + pl->cfg_link_an_mode == MLO_AN_INBAND) { + if (pcs->poll) + mod_timer(&pl->link_poll, jiffies + HZ); + else + del_timer(&pl->link_poll); + } + } phylink_mac_config(pl, state); @@ -1171,9 +1182,8 @@ static int phylink_register_sfp(struct phylink *pl, bus = sfp_bus_find_fwnode(fwnode); if (IS_ERR(bus)) { - ret = PTR_ERR(bus); - phylink_err(pl, "unable to attach SFP bus: %d\n", ret); - return ret; + phylink_err(pl, "unable to attach SFP bus: %pe\n", bus); + return PTR_ERR(bus); } pl->sfp_bus = bus; @@ -1205,11 +1215,17 @@ struct phylink *phylink_create(struct phylink_config *config, phy_interface_t iface, const struct phylink_mac_ops *mac_ops) { + bool using_mac_select_pcs = false; struct phylink *pl; int ret; - /* Validate the supplied configuration */ if (mac_ops->mac_select_pcs && + mac_ops->mac_select_pcs(config, PHY_INTERFACE_MODE_NA) != + ERR_PTR(-EOPNOTSUPP)) + using_mac_select_pcs = true; + + /* Validate the supplied configuration */ + if (using_mac_select_pcs && phy_interface_empty(config->supported_interfaces)) { dev_err(config->dev, "phylink: error: empty supported_interfaces but mac_select_pcs() method present\n"); @@ -1233,6 +1249,7 @@ struct phylink *phylink_create(struct phylink_config *config, return ERR_PTR(-EINVAL); } + pl->using_mac_select_pcs = using_mac_select_pcs; pl->phy_state.interface = iface; pl->link_interface = iface; if (iface == PHY_INTERFACE_MODE_MOCA) @@ -1279,36 +1296,6 @@ struct phylink *phylink_create(struct phylink_config *config, EXPORT_SYMBOL_GPL(phylink_create); /** - * phylink_set_pcs() - set the current PCS for phylink to use - * @pl: a pointer to a &struct phylink returned from phylink_create() - * @pcs: a pointer to the &struct phylink_pcs - * - * Bind the MAC PCS to phylink. This may be called after phylink_create(). - * If it is desired to dynamically change the PCS, then the preferred method - * is to use mac_select_pcs(), but it may also be called in mac_prepare() - * or mac_config(). - * - * Please note that there are behavioural changes with the mac_config() - * callback if a PCS is present (denoting a newer setup) so removing a PCS - * is not supported, and if a PCS is going to be used, it must be registered - * by calling phylink_set_pcs() at the latest in the first mac_config() call. - */ -void phylink_set_pcs(struct phylink *pl, struct phylink_pcs *pcs) -{ - pl->pcs = pcs; - pl->pcs_ops = pcs->ops; - - if (!pl->phylink_disable_state && - pl->cfg_link_an_mode == MLO_AN_INBAND) { - if (pl->config->pcs_poll || pcs->poll) - mod_timer(&pl->link_poll, jiffies + HZ); - else - del_timer(&pl->link_poll); - } -} -EXPORT_SYMBOL_GPL(phylink_set_pcs); - -/** * phylink_destroy() - cleanup and destroy the phylink instance * @pl: a pointer to a &struct phylink returned from phylink_create() * @@ -1392,11 +1379,11 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, ret = phylink_validate(pl, supported, &config); if (ret) { - phylink_warn(pl, "validation of %s with support %*pb and advertisement %*pb failed: %d\n", + phylink_warn(pl, "validation of %s with support %*pb and advertisement %*pb failed: %pe\n", phy_modes(config.interface), __ETHTOOL_LINK_MODE_MASK_NBITS, phy->supported, __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising, - ret); + ERR_PTR(ret)); return ret; } @@ -1673,7 +1660,6 @@ void phylink_start(struct phylink *pl) poll |= pl->config->poll_fixed_state; break; case MLO_AN_INBAND: - poll |= pl->config->pcs_poll; if (pl->pcs) poll |= pl->pcs->poll; break; @@ -2596,8 +2582,9 @@ static int phylink_sfp_config(struct phylink *pl, u8 mode, /* Ignore errors if we're expecting a PHY to attach later */ ret = phylink_validate(pl, support, &config); if (ret) { - phylink_err(pl, "validation with support %*pb failed: %d\n", - __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret); + phylink_err(pl, "validation with support %*pb failed: %pe\n", + __ETHTOOL_LINK_MODE_MASK_NBITS, support, + ERR_PTR(ret)); return ret; } @@ -2613,10 +2600,12 @@ static int phylink_sfp_config(struct phylink *pl, u8 mode, linkmode_copy(support1, support); ret = phylink_validate(pl, support1, &config); if (ret) { - phylink_err(pl, "validation of %s/%s with support %*pb failed: %d\n", + phylink_err(pl, + "validation of %s/%s with support %*pb failed: %pe\n", phylink_an_mode_str(mode), phy_modes(config.interface), - __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret); + __ETHTOOL_LINK_MODE_MASK_NBITS, support, + ERR_PTR(ret)); return ret; } diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 4720b24ca51b..4dfb79807823 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -471,8 +471,8 @@ static unsigned int sfp_soft_get_state(struct sfp *sfp) state |= SFP_F_TX_FAULT; } else { dev_err_ratelimited(sfp->dev, - "failed to read SFP soft status: %d\n", - ret); + "failed to read SFP soft status: %pe\n", + ERR_PTR(ret)); /* Preserve the current state */ state = sfp->state; } @@ -1311,7 +1311,8 @@ static void sfp_hwmon_probe(struct work_struct *work) mod_delayed_work(system_wq, &sfp->hwmon_probe, T_PROBE_RETRY_SLOW); } else { - dev_warn(sfp->dev, "hwmon probe failed: %d\n", err); + dev_warn(sfp->dev, "hwmon probe failed: %pe\n", + ERR_PTR(err)); } return; } @@ -1516,14 +1517,15 @@ static int sfp_sm_probe_phy(struct sfp *sfp, bool is_c45) if (phy == ERR_PTR(-ENODEV)) return PTR_ERR(phy); if (IS_ERR(phy)) { - dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy)); + dev_err(sfp->dev, "mdiobus scan returned %pe\n", phy); return PTR_ERR(phy); } err = phy_device_register(phy); if (err) { phy_device_free(phy); - dev_err(sfp->dev, "phy_device_register failed: %d\n", err); + dev_err(sfp->dev, "phy_device_register failed: %pe\n", + ERR_PTR(err)); return err; } @@ -1531,7 +1533,7 @@ static int sfp_sm_probe_phy(struct sfp *sfp, bool is_c45) if (err) { phy_device_remove(phy); phy_device_free(phy); - dev_err(sfp->dev, "sfp_add_phy failed: %d\n", err); + dev_err(sfp->dev, "sfp_add_phy failed: %pe\n", ERR_PTR(err)); return err; } @@ -1708,7 +1710,7 @@ static int sfp_sm_mod_hpower(struct sfp *sfp, bool enable) err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val)); if (err != sizeof(val)) { - dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err); + dev_err(sfp->dev, "Failed to read EEPROM: %pe\n", ERR_PTR(err)); return -EAGAIN; } @@ -1726,7 +1728,8 @@ static int sfp_sm_mod_hpower(struct sfp *sfp, bool enable) err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val)); if (err != sizeof(val)) { - dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err); + dev_err(sfp->dev, "Failed to write EEPROM: %pe\n", + ERR_PTR(err)); return -EAGAIN; } @@ -1778,7 +1781,9 @@ static int sfp_cotsworks_fixup_check(struct sfp *sfp, struct sfp_eeprom_id *id) id->base.connector = SFF8024_CONNECTOR_LC; err = sfp_write(sfp, false, SFP_PHYS_ID, &id->base, 3); if (err != 3) { - dev_err(sfp->dev, "Failed to rewrite module EEPROM: %d\n", err); + dev_err(sfp->dev, + "Failed to rewrite module EEPROM: %pe\n", + ERR_PTR(err)); return err; } @@ -1789,7 +1794,9 @@ static int sfp_cotsworks_fixup_check(struct sfp *sfp, struct sfp_eeprom_id *id) check = sfp_check(&id->base, sizeof(id->base) - 1); err = sfp_write(sfp, false, SFP_CC_BASE, &check, 1); if (err != 1) { - dev_err(sfp->dev, "Failed to update base structure checksum in fiber module EEPROM: %d\n", err); + dev_err(sfp->dev, + "Failed to update base structure checksum in fiber module EEPROM: %pe\n", + ERR_PTR(err)); return err; } } @@ -1814,12 +1821,13 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base)); if (ret < 0) { if (report) - dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret); + dev_err(sfp->dev, "failed to read EEPROM: %pe\n", + ERR_PTR(ret)); return -EAGAIN; } if (ret != sizeof(id.base)) { - dev_err(sfp->dev, "EEPROM short read: %d\n", ret); + dev_err(sfp->dev, "EEPROM short read: %pe\n", ERR_PTR(ret)); return -EAGAIN; } @@ -1839,13 +1847,15 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base)); if (ret < 0) { if (report) - dev_err(sfp->dev, "failed to read EEPROM: %d\n", - ret); + dev_err(sfp->dev, + "failed to read EEPROM: %pe\n", + ERR_PTR(ret)); return -EAGAIN; } if (ret != sizeof(id.base)) { - dev_err(sfp->dev, "EEPROM short read: %d\n", ret); + dev_err(sfp->dev, "EEPROM short read: %pe\n", + ERR_PTR(ret)); return -EAGAIN; } } @@ -1887,12 +1897,13 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) ret = sfp_read(sfp, false, SFP_CC_BASE + 1, &id.ext, sizeof(id.ext)); if (ret < 0) { if (report) - dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret); + dev_err(sfp->dev, "failed to read EEPROM: %pe\n", + ERR_PTR(ret)); return -EAGAIN; } if (ret != sizeof(id.ext)) { - dev_err(sfp->dev, "EEPROM short read: %d\n", ret); + dev_err(sfp->dev, "EEPROM short read: %pe\n", ERR_PTR(ret)); return -EAGAIN; } @@ -2046,7 +2057,8 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event) err = sfp_hwmon_insert(sfp); if (err) - dev_warn(sfp->dev, "hwmon probe failed: %d\n", err); + dev_warn(sfp->dev, "hwmon probe failed: %pe\n", + ERR_PTR(err)); sfp_sm_mod_next(sfp, SFP_MOD_WAITDEV, 0); fallthrough; diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index 8b5445a724ce..ff37f8ba6758 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -517,7 +517,7 @@ static int ks8995_probe(struct spi_device *spi) return 0; } -static int ks8995_remove(struct spi_device *spi) +static void ks8995_remove(struct spi_device *spi) { struct ks8995_switch *ks = spi_get_drvdata(spi); @@ -526,8 +526,6 @@ static int ks8995_remove(struct spi_device *spi) /* assert reset */ if (ks->pdata && gpio_is_valid(ks->pdata->reset_gpio)) gpiod_set_value(gpio_to_desc(ks->pdata->reset_gpio), 1); - - return 0; } /* ------------------------------------------------------------------------ */ diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 8e3a28ba6b28..ba2ef5437e16 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1198,7 +1198,8 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m, struct xdp_buff *xdp; int i; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { for (i = 0; i < ctl->num; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; tap_get_user_xdp(q, xdp); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index fed85447701a..2a0d8a5d7aec 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2388,9 +2388,10 @@ static int tun_xdp_one(struct tun_struct *tun, struct virtio_net_hdr *gso = &hdr->gso; struct bpf_prog *xdp_prog; struct sk_buff *skb = NULL; + struct sk_buff_head *queue; u32 rxhash = 0, act; int buflen = hdr->buflen; - int err = 0; + int ret = 0; bool skb_xdp = false; struct page *page; @@ -2405,13 +2406,13 @@ static int tun_xdp_one(struct tun_struct *tun, xdp_set_data_meta_invalid(xdp); act = bpf_prog_run_xdp(xdp_prog, xdp); - err = tun_xdp_act(tun, xdp_prog, xdp, act); - if (err < 0) { + ret = tun_xdp_act(tun, xdp_prog, xdp, act); + if (ret < 0) { put_page(virt_to_head_page(xdp->data)); - return err; + return ret; } - switch (err) { + switch (ret) { case XDP_REDIRECT: *flush = true; fallthrough; @@ -2435,7 +2436,7 @@ static int tun_xdp_one(struct tun_struct *tun, build: skb = build_skb(xdp->data_hard_start, buflen); if (!skb) { - err = -ENOMEM; + ret = -ENOMEM; goto out; } @@ -2445,7 +2446,7 @@ build: if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); - err = -EINVAL; + ret = -EINVAL; goto out; } @@ -2455,16 +2456,27 @@ build: skb_record_rx_queue(skb, tfile->queue_index); if (skb_xdp) { - err = do_xdp_generic(xdp_prog, skb); - if (err != XDP_PASS) + ret = do_xdp_generic(xdp_prog, skb); + if (ret != XDP_PASS) { + ret = 0; goto out; + } } if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 && !tfile->detached) rxhash = __skb_get_hash_symmetric(skb); - netif_receive_skb(skb); + if (tfile->napi_enabled) { + queue = &tfile->sk.sk_write_queue; + spin_lock(&queue->lock); + __skb_queue_tail(queue, skb); + spin_unlock(&queue->lock); + ret = 1; + } else { + netif_receive_skb(skb); + ret = 0; + } /* No need to disable preemption here since this function is * always called with bh disabled @@ -2475,7 +2487,7 @@ build: tun_flow_update(tun, rxhash, tfile); out: - return err; + return ret; } static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) @@ -2489,10 +2501,11 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) if (!tun) return -EBADFD; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { struct tun_page tpage; int n = ctl->num; - int flush = 0; + int flush = 0, queued = 0; memset(&tpage, 0, sizeof(tpage)); @@ -2501,12 +2514,17 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) for (i = 0; i < n; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; - tun_xdp_one(tun, tfile, xdp, &flush, &tpage); + ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage); + if (ret > 0) + queued += ret; } if (flush) xdp_do_flush(); + if (tfile->napi_enabled && queued > 0) + napi_schedule(&tfile->napi); + rcu_read_unlock(); local_bh_enable(); diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 524805285019..632fa6c1d5e3 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -491,7 +491,8 @@ void asix_set_multicast(struct net_device *net) asix_write_cmd_async(dev, AX_CMD_WRITE_RX_CTL, rx_ctl, 0, 0, NULL); } -int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) +static int __asix_mdio_read(struct net_device *netdev, int phy_id, int loc, + bool in_pm) { struct usbnet *dev = netdev_priv(netdev); __le16 res; @@ -499,18 +500,18 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) mutex_lock(&dev->phy_mutex); - ret = asix_check_host_enable(dev, 0); + ret = asix_check_host_enable(dev, in_pm); if (ret == -ENODEV || ret == -ETIMEDOUT) { mutex_unlock(&dev->phy_mutex); return ret; } ret = asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, (__u16)loc, 2, - &res, 0); + &res, in_pm); if (ret < 0) goto out; - ret = asix_set_hw_mii(dev, 0); + ret = asix_set_hw_mii(dev, in_pm); out: mutex_unlock(&dev->phy_mutex); @@ -520,8 +521,13 @@ out: return ret < 0 ? ret : le16_to_cpu(res); } +int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) +{ + return __asix_mdio_read(netdev, phy_id, loc, false); +} + static int __asix_mdio_write(struct net_device *netdev, int phy_id, int loc, - int val) + int val, bool in_pm) { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); @@ -532,16 +538,16 @@ static int __asix_mdio_write(struct net_device *netdev, int phy_id, int loc, mutex_lock(&dev->phy_mutex); - ret = asix_check_host_enable(dev, 0); + ret = asix_check_host_enable(dev, in_pm); if (ret == -ENODEV) goto out; ret = asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2, - &res, 0); + &res, in_pm); if (ret < 0) goto out; - ret = asix_set_hw_mii(dev, 0); + ret = asix_set_hw_mii(dev, in_pm); out: mutex_unlock(&dev->phy_mutex); @@ -550,7 +556,7 @@ out: void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val) { - __asix_mdio_write(netdev, phy_id, loc, val); + __asix_mdio_write(netdev, phy_id, loc, val, false); } /* MDIO read and write wrappers for phylib */ @@ -558,67 +564,25 @@ int asix_mdio_bus_read(struct mii_bus *bus, int phy_id, int regnum) { struct usbnet *priv = bus->priv; - return asix_mdio_read(priv->net, phy_id, regnum); + return __asix_mdio_read(priv->net, phy_id, regnum, false); } int asix_mdio_bus_write(struct mii_bus *bus, int phy_id, int regnum, u16 val) { struct usbnet *priv = bus->priv; - return __asix_mdio_write(priv->net, phy_id, regnum, val); + return __asix_mdio_write(priv->net, phy_id, regnum, val, false); } int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) { - struct usbnet *dev = netdev_priv(netdev); - __le16 res; - int ret; - - mutex_lock(&dev->phy_mutex); - - ret = asix_check_host_enable(dev, 1); - if (ret == -ENODEV || ret == -ETIMEDOUT) { - mutex_unlock(&dev->phy_mutex); - return ret; - } - - ret = asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, - (__u16)loc, 2, &res, 1); - if (ret < 0) { - mutex_unlock(&dev->phy_mutex); - return ret; - } - asix_set_hw_mii(dev, 1); - mutex_unlock(&dev->phy_mutex); - - netdev_dbg(dev->net, "asix_mdio_read_nopm() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", - phy_id, loc, le16_to_cpu(res)); - - return le16_to_cpu(res); + return __asix_mdio_read(netdev, phy_id, loc, true); } void asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, int val) { - struct usbnet *dev = netdev_priv(netdev); - __le16 res = cpu_to_le16(val); - int ret; - - netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", - phy_id, loc, val); - - mutex_lock(&dev->phy_mutex); - - ret = asix_check_host_enable(dev, 1); - if (ret == -ENODEV) { - mutex_unlock(&dev->phy_mutex); - return; - } - - asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, - (__u16)loc, 2, &res, 1); - asix_set_hw_mii(dev, 1); - mutex_unlock(&dev->phy_mutex); + __asix_mdio_write(netdev, phy_id, loc, val, true); } void asix_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) diff --git a/drivers/net/usb/gl620a.c b/drivers/net/usb/gl620a.c index 13a9a83b8538..46af78caf457 100644 --- a/drivers/net/usb/gl620a.c +++ b/drivers/net/usb/gl620a.c @@ -56,7 +56,7 @@ struct gl_packet { __le32 packet_length; - char packet_data [1]; + char packet_data[]; }; struct gl_header { diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index b658510cc9a4..5a53e63d33a6 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -413,7 +413,7 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* ignore the CRC length */ len = (skb->data[1] | (skb->data[2] << 8)) - 4; - if (len > ETH_FRAME_LEN) + if (len > ETH_FRAME_LEN || len > skb->len) return 0; /* the last packet of current skb */ diff --git a/drivers/net/vxlan/Makefile b/drivers/net/vxlan/Makefile new file mode 100644 index 000000000000..d4c255499b72 --- /dev/null +++ b/drivers/net/vxlan/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the vxlan driver +# + +obj-$(CONFIG_VXLAN) += vxlan.o + +vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan/vxlan_core.c index d0dc90d3dac2..4ab09dd5a32a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -34,10 +34,10 @@ #include <net/ip6_checksum.h> #endif +#include "vxlan_private.h" + #define VXLAN_VERSION "0.1" -#define PORT_HASH_BITS 8 -#define PORT_HASH_SIZE (1<<PORT_HASH_BITS) #define FDB_AGE_DEFAULT 300 /* 5 min */ #define FDB_AGE_INTERVAL (10 * HZ) /* rescan interval */ @@ -53,41 +53,15 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -static unsigned int vxlan_net_id; -static struct rtnl_link_ops vxlan_link_ops; +unsigned int vxlan_net_id; -static const u8 all_zeros_mac[ETH_ALEN + 2]; +const u8 all_zeros_mac[ETH_ALEN + 2]; +static struct rtnl_link_ops vxlan_link_ops; static int vxlan_sock_add(struct vxlan_dev *vxlan); static void vxlan_vs_del_dev(struct vxlan_dev *vxlan); -/* per-network namespace private data for this module */ -struct vxlan_net { - struct list_head vxlan_list; - struct hlist_head sock_list[PORT_HASH_SIZE]; - spinlock_t sock_lock; - struct notifier_block nexthop_notifier_block; -}; - -/* Forwarding table entry */ -struct vxlan_fdb { - struct hlist_node hlist; /* linked list of entries */ - struct rcu_head rcu; - unsigned long updated; /* jiffies */ - unsigned long used; - struct list_head remotes; - u8 eth_addr[ETH_ALEN]; - u16 state; /* see ndm_state */ - __be32 vni; - u16 flags; /* see ndm_flags and below */ - struct list_head nh_list; - struct nexthop __rcu *nh; - struct vxlan_dev __rcu *vdev; -}; - -#define NTF_VXLAN_ADDED_BY_USER 0x100 - /* salt for hash table */ static u32 vxlan_salt __read_mostly; @@ -98,17 +72,6 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) } #if IS_ENABLED(CONFIG_IPV6) -static inline -bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) -{ - if (a->sa.sa_family != b->sa.sa_family) - return false; - if (a->sa.sa_family == AF_INET6) - return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr); - else - return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; -} - static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla) { if (nla_len(nla) >= sizeof(struct in6_addr)) { @@ -135,12 +98,6 @@ static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, #else /* !CONFIG_IPV6 */ -static inline -bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) -{ - return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; -} - static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla) { if (nla_len(nla) >= sizeof(struct in6_addr)) { @@ -161,37 +118,6 @@ static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, } #endif -/* Virtual Network hash table head */ -static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni) -{ - return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)]; -} - -/* Socket hash table head */ -static inline struct hlist_head *vs_head(struct net *net, __be16 port) -{ - struct vxlan_net *vn = net_generic(net, vxlan_net_id); - - return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; -} - -/* First remote destination for a forwarding entry. - * Guaranteed to be non-NULL because remotes are never deleted. - */ -static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb) -{ - if (rcu_access_pointer(fdb->nh)) - return NULL; - return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list); -} - -static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) -{ - if (rcu_access_pointer(fdb->nh)) - return NULL; - return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); -} - /* Find VXLAN socket based on network namespace, address family, UDP port, * enabled unshareable flags and socket device binding (see l3mdev with * non-default VRF). @@ -213,18 +139,29 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, return NULL; } -static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex, - __be32 vni) +static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, + int ifindex, __be32 vni, + struct vxlan_vni_node **vninode) { + struct vxlan_vni_node *vnode; struct vxlan_dev_node *node; /* For flow based devices, map all packets to VNI 0 */ - if (vs->flags & VXLAN_F_COLLECT_METADATA) + if (vs->flags & VXLAN_F_COLLECT_METADATA && + !(vs->flags & VXLAN_F_VNIFILTER)) vni = 0; hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) { - if (node->vxlan->default_dst.remote_vni != vni) + if (!node->vxlan) continue; + vnode = NULL; + if (node->vxlan->cfg.flags & VXLAN_F_VNIFILTER) { + vnode = vxlan_vnifilter_lookup(node->vxlan, vni); + if (!vnode) + continue; + } else if (node->vxlan->default_dst.remote_vni != vni) { + continue; + } if (IS_ENABLED(CONFIG_IPV6)) { const struct vxlan_config *cfg = &node->vxlan->cfg; @@ -234,6 +171,8 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex, continue; } + if (vninode) + *vninode = vnode; return node->vxlan; } @@ -251,7 +190,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex, if (!vs) return NULL; - return vxlan_vs_find_vni(vs, ifindex, vni); + return vxlan_vs_find_vni(vs, ifindex, vni, NULL); } /* Fill in neighbour message in skbuff. */ @@ -493,7 +432,7 @@ static u32 eth_hash(const unsigned char *addr) return hash_64(value, FDB_HASH_BITS); } -static u32 eth_vni_hash(const unsigned char *addr, __be32 vni) +u32 eth_vni_hash(const unsigned char *addr, __be32 vni) { /* use 1 byte of OUI and 3 bytes of NIC */ u32 key = get_unaligned((u32 *)(addr + 2)); @@ -501,7 +440,7 @@ static u32 eth_vni_hash(const unsigned char *addr, __be32 vni) return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1); } -static u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni) +u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni) { if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) return eth_vni_hash(mac, vni); @@ -920,12 +859,12 @@ err_inval: return err; } -static int vxlan_fdb_create(struct vxlan_dev *vxlan, - const u8 *mac, union vxlan_addr *ip, - __u16 state, __be16 port, __be32 src_vni, - __be32 vni, __u32 ifindex, __u16 ndm_flags, - u32 nhid, struct vxlan_fdb **fdb, - struct netlink_ext_ack *extack) +int vxlan_fdb_create(struct vxlan_dev *vxlan, + const u8 *mac, union vxlan_addr *ip, + __u16 state, __be16 port, __be32 src_vni, + __be32 vni, __u32 ifindex, __u16 ndm_flags, + u32 nhid, struct vxlan_fdb **fdb, + struct netlink_ext_ack *extack) { struct vxlan_rdst *rd = NULL; struct vxlan_fdb *f; @@ -1150,13 +1089,13 @@ err_notify: } /* Add new entry to forwarding table -- assumes lock held */ -static int vxlan_fdb_update(struct vxlan_dev *vxlan, - const u8 *mac, union vxlan_addr *ip, - __u16 state, __u16 flags, - __be16 port, __be32 src_vni, __be32 vni, - __u32 ifindex, __u16 ndm_flags, u32 nhid, - bool swdev_notify, - struct netlink_ext_ack *extack) +int vxlan_fdb_update(struct vxlan_dev *vxlan, + const u8 *mac, union vxlan_addr *ip, + __u16 state, __u16 flags, + __be16 port, __be32 src_vni, __be32 vni, + __u32 ifindex, __u16 ndm_flags, u32 nhid, + bool swdev_notify, + struct netlink_ext_ack *extack) { struct vxlan_fdb *f; @@ -1307,10 +1246,10 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int __vxlan_fdb_delete(struct vxlan_dev *vxlan, - const unsigned char *addr, union vxlan_addr ip, - __be16 port, __be32 src_vni, __be32 vni, - u32 ifindex, bool swdev_notify) +int __vxlan_fdb_delete(struct vxlan_dev *vxlan, + const unsigned char *addr, union vxlan_addr ip, + __be16 port, __be32 src_vni, __be32 vni, + u32 ifindex, bool swdev_notify) { struct vxlan_rdst *rd = NULL; struct vxlan_fdb *f; @@ -1519,56 +1458,6 @@ static bool vxlan_snoop(struct net_device *dev, return false; } -/* See if multicast group is already in use by other ID */ -static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) -{ - struct vxlan_dev *vxlan; - struct vxlan_sock *sock4; -#if IS_ENABLED(CONFIG_IPV6) - struct vxlan_sock *sock6; -#endif - unsigned short family = dev->default_dst.remote_ip.sa.sa_family; - - sock4 = rtnl_dereference(dev->vn4_sock); - - /* The vxlan_sock is only used by dev, leaving group has - * no effect on other vxlan devices. - */ - if (family == AF_INET && sock4 && refcount_read(&sock4->refcnt) == 1) - return false; -#if IS_ENABLED(CONFIG_IPV6) - sock6 = rtnl_dereference(dev->vn6_sock); - if (family == AF_INET6 && sock6 && refcount_read(&sock6->refcnt) == 1) - return false; -#endif - - list_for_each_entry(vxlan, &vn->vxlan_list, next) { - if (!netif_running(vxlan->dev) || vxlan == dev) - continue; - - if (family == AF_INET && - rtnl_dereference(vxlan->vn4_sock) != sock4) - continue; -#if IS_ENABLED(CONFIG_IPV6) - if (family == AF_INET6 && - rtnl_dereference(vxlan->vn6_sock) != sock6) - continue; -#endif - - if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip, - &dev->default_dst.remote_ip)) - continue; - - if (vxlan->default_dst.remote_ifindex != - dev->default_dst.remote_ifindex) - continue; - - return true; - } - - return false; -} - static bool __vxlan_sock_release_prep(struct vxlan_sock *vs) { struct vxlan_net *vn; @@ -1602,7 +1491,10 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan) RCU_INIT_POINTER(vxlan->vn4_sock, NULL); synchronize_net(); - vxlan_vs_del_dev(vxlan); + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) + vxlan_vs_del_vnigrp(vxlan); + else + vxlan_vs_del_dev(vxlan); if (__vxlan_sock_release_prep(sock4)) { udp_tunnel_sock_release(sock4->sock); @@ -1617,76 +1509,6 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan) #endif } -/* Update multicast group membership when first VNI on - * multicast address is brought up - */ -static int vxlan_igmp_join(struct vxlan_dev *vxlan) -{ - struct sock *sk; - union vxlan_addr *ip = &vxlan->default_dst.remote_ip; - int ifindex = vxlan->default_dst.remote_ifindex; - int ret = -EINVAL; - - if (ip->sa.sa_family == AF_INET) { - struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); - struct ip_mreqn mreq = { - .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, - .imr_ifindex = ifindex, - }; - - sk = sock4->sock->sk; - lock_sock(sk); - ret = ip_mc_join_group(sk, &mreq); - release_sock(sk); -#if IS_ENABLED(CONFIG_IPV6) - } else { - struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); - - sk = sock6->sock->sk; - lock_sock(sk); - ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex, - &ip->sin6.sin6_addr); - release_sock(sk); -#endif - } - - return ret; -} - -/* Inverse of vxlan_igmp_join when last VNI is brought down */ -static int vxlan_igmp_leave(struct vxlan_dev *vxlan) -{ - struct sock *sk; - union vxlan_addr *ip = &vxlan->default_dst.remote_ip; - int ifindex = vxlan->default_dst.remote_ifindex; - int ret = -EINVAL; - - if (ip->sa.sa_family == AF_INET) { - struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); - struct ip_mreqn mreq = { - .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, - .imr_ifindex = ifindex, - }; - - sk = sock4->sock->sk; - lock_sock(sk); - ret = ip_mc_leave_group(sk, &mreq); - release_sock(sk); -#if IS_ENABLED(CONFIG_IPV6) - } else { - struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); - - sk = sock6->sock->sk; - lock_sock(sk); - ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex, - &ip->sin6.sin6_addr); - release_sock(sk); -#endif - } - - return ret; -} - static bool vxlan_remcsum(struct vxlanhdr *unparsed, struct sk_buff *skb, u32 vxflags) { @@ -1828,6 +1650,7 @@ static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph, /* Callback from net/ipv4/udp.c to receive packets */ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) { + struct vxlan_vni_node *vninode = NULL; struct vxlan_dev *vxlan; struct vxlan_sock *vs; struct vxlanhdr unparsed; @@ -1860,7 +1683,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); - vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni); + vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni, &vninode); if (!vxlan) goto drop; @@ -1930,6 +1753,8 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (!vxlan_ecn_decapsulate(vs, oiph, skb)) { ++vxlan->dev->stats.rx_frame_errors; ++vxlan->dev->stats.rx_errors; + vxlan_vnifilter_count(vxlan, vni, vninode, + VXLAN_VNI_STATS_RX_ERRORS, 0); goto drop; } @@ -1938,10 +1763,13 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (unlikely(!(vxlan->dev->flags & IFF_UP))) { rcu_read_unlock(); atomic_long_inc(&vxlan->dev->rx_dropped); + vxlan_vnifilter_count(vxlan, vni, vninode, + VXLAN_VNI_STATS_RX_DROPS, 0); goto drop; } dev_sw_netstats_rx_add(vxlan->dev, skb->len); + vxlan_vnifilter_count(vxlan, vni, vninode, VXLAN_VNI_STATS_RX, skb->len); gro_cells_receive(&vxlan->gro_cells, skb); rcu_read_unlock(); @@ -1975,7 +1803,7 @@ static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb) return -ENOENT; vni = vxlan_vni(hdr->vx_vni); - vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni); + vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni, NULL); if (!vxlan) return -ENOENT; @@ -2049,8 +1877,12 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) reply->ip_summed = CHECKSUM_UNNECESSARY; reply->pkt_type = PACKET_HOST; - if (netif_rx_ni(reply) == NET_RX_DROP) + if (netif_rx_ni(reply) == NET_RX_DROP) { dev->stats.rx_dropped++; + vxlan_vnifilter_count(vxlan, vni, NULL, + VXLAN_VNI_STATS_RX_DROPS, 0); + } + } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) { union vxlan_addr ipa = { .sin.sin_addr.s_addr = tip, @@ -2204,9 +2036,11 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) if (reply == NULL) goto out; - if (netif_rx_ni(reply) == NET_RX_DROP) + if (netif_rx_ni(reply) == NET_RX_DROP) { dev->stats.rx_dropped++; - + vxlan_vnifilter_count(vxlan, vni, NULL, + VXLAN_VNI_STATS_RX_DROPS, 0); + } } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) { union vxlan_addr ipa = { .sin6.sin6_addr = msg->target, @@ -2540,15 +2374,20 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, tx_stats->tx_packets++; tx_stats->tx_bytes += len; u64_stats_update_end(&tx_stats->syncp); + vxlan_vnifilter_count(src_vxlan, vni, NULL, VXLAN_VNI_STATS_TX, len); if (__netif_rx(skb) == NET_RX_SUCCESS) { u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += len; u64_stats_update_end(&rx_stats->syncp); + vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX, + len); } else { drop: dev->stats.rx_dropped++; + vxlan_vnifilter_count(dst_vxlan, vni, NULL, + VXLAN_VNI_STATS_RX_DROPS, 0); } rcu_read_unlock(); } @@ -2578,6 +2417,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, vxlan->cfg.flags); if (!dst_vxlan) { dev->stats.tx_errors++; + vxlan_vnifilter_count(vxlan, vni, NULL, + VXLAN_VNI_STATS_TX_ERRORS, 0); kfree_skb(skb); return -ENOENT; @@ -2601,15 +2442,19 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, union vxlan_addr remote_ip, local_ip; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; + unsigned int pkt_len = skb->len; __be16 src_port = 0, dst_port; struct dst_entry *ndst = NULL; - __be32 vni, label; __u8 tos, ttl; int ifindex; int err; u32 flags = vxlan->cfg.flags; bool udp_sum = false; bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev)); + __be32 vni = 0; +#if IS_ENABLED(CONFIG_IPV6) + __be32 label; +#endif info = skb_tunnel_info(skb); @@ -2647,7 +2492,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX); else udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); +#if IS_ENABLED(CONFIG_IPV6) label = vxlan->cfg.label; +#endif } else { if (!info) { WARN_ONCE(1, "%s: Missing encapsulation instructions\n", @@ -2674,7 +2521,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } ttl = info->key.ttl; tos = info->key.tos; +#if IS_ENABLED(CONFIG_IPV6) label = info->key.label; +#endif udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); } src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min, @@ -2821,12 +2670,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, label, src_port, dst_port, !udp_sum); #endif } + vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len); out_unlock: rcu_read_unlock(); return; drop: dev->stats.tx_dropped++; + vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); dev_kfree_skb(skb); return; @@ -2838,6 +2689,7 @@ tx_error: dev->stats.tx_carrier_errors++; dst_release(ndst); dev->stats.tx_errors++; + vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_ERRORS, 0); kfree_skb(skb); } @@ -2870,6 +2722,8 @@ static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev, drop: dev->stats.tx_dropped++; + vxlan_vnifilter_count(netdev_priv(dev), vni, NULL, + VXLAN_VNI_STATS_TX_DROPS, 0); dev_kfree_skb(skb); } @@ -2944,6 +2798,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) vxlan_fdb_miss(vxlan, eth->h_dest); dev->stats.tx_dropped++; + vxlan_vnifilter_count(vxlan, vni, NULL, + VXLAN_VNI_STATS_TX_DROPS, 0); kfree_skb(skb); return NETDEV_TX_OK; } @@ -3044,6 +2900,9 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); int err; + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) + vxlan_vnigroup_init(vxlan); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; @@ -3073,6 +2932,9 @@ static void vxlan_uninit(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) + vxlan_vnigroup_uninit(vxlan); + gro_cells_destroy(&vxlan->gro_cells); vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni); @@ -3090,14 +2952,10 @@ static int vxlan_open(struct net_device *dev) if (ret < 0) return ret; - if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) { - ret = vxlan_igmp_join(vxlan); - if (ret == -EADDRINUSE) - ret = 0; - if (ret) { - vxlan_sock_release(vxlan); - return ret; - } + ret = vxlan_multicast_join(vxlan); + if (ret) { + vxlan_sock_release(vxlan); + return ret; } if (vxlan->cfg.age_interval) @@ -3134,12 +2992,9 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) static int vxlan_stop(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); int ret = 0; - if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && - !vxlan_group_used(vn, vxlan)) - ret = vxlan_igmp_leave(vxlan); + vxlan_multicast_leave(vxlan); del_timer_sync(&vxlan->age_timer); @@ -3369,6 +3224,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG }, [IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG }, [IFLA_VXLAN_DF] = { .type = NLA_U8 }, + [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 }, }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], @@ -3554,6 +3410,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6, static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA; struct vxlan_sock *vs = NULL; struct vxlan_dev_node *node; int l3mdev_index = 0; @@ -3589,7 +3446,12 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) rcu_assign_pointer(vxlan->vn4_sock, vs); node = &vxlan->hlist4; } - vxlan_vs_add_dev(vs, vxlan, node); + + if (metadata && (vxlan->cfg.flags & VXLAN_F_VNIFILTER)) + vxlan_vs_add_vnigrp(vxlan, vs, ipv6); + else + vxlan_vs_add_dev(vs, vxlan, node); + return 0; } @@ -3616,13 +3478,42 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan) return ret; } +int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan, + struct vxlan_config *conf, __be32 vni) +{ + struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); + struct vxlan_dev *tmp; + + list_for_each_entry(tmp, &vn->vxlan_list, next) { + if (tmp == vxlan) + continue; + if (tmp->cfg.flags & VXLAN_F_VNIFILTER) { + if (!vxlan_vnifilter_lookup(tmp, vni)) + continue; + } else if (tmp->cfg.vni != vni) { + continue; + } + if (tmp->cfg.dst_port != conf->dst_port) + continue; + if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) != + (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6))) + continue; + + if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) && + tmp->cfg.remote_ifindex != conf->remote_ifindex) + continue; + + return -EEXIST; + } + + return 0; +} + static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf, struct net_device **lower, struct vxlan_dev *old, struct netlink_ext_ack *extack) { - struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); - struct vxlan_dev *tmp; bool use_ipv6 = false; if (conf->flags & VXLAN_F_GPE) { @@ -3755,22 +3646,7 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf, if (!conf->age_interval) conf->age_interval = FDB_AGE_DEFAULT; - list_for_each_entry(tmp, &vn->vxlan_list, next) { - if (tmp == old) - continue; - - if (tmp->cfg.vni != conf->vni) - continue; - if (tmp->cfg.dst_port != conf->dst_port) - continue; - if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) != - (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6))) - continue; - - if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) && - tmp->cfg.remote_ifindex != conf->remote_ifindex) - continue; - + if (vxlan_vni_in_use(src_net, old, conf, conf->vni)) { NL_SET_ERR_MSG(extack, "A VXLAN device with the specified VNI already exists"); return -EEXIST; @@ -4226,6 +4102,21 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], if (data[IFLA_VXLAN_DF]) conf->df = nla_get_u8(data[IFLA_VXLAN_DF]); + if (data[IFLA_VXLAN_VNIFILTER]) { + err = vxlan_nl2flag(conf, data, IFLA_VXLAN_VNIFILTER, + VXLAN_F_VNIFILTER, changelink, false, + extack); + if (err) + return err; + + if ((conf->flags & VXLAN_F_VNIFILTER) && + !(conf->flags & VXLAN_F_COLLECT_METADATA)) { + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_VNIFILTER], + "vxlan vnifilter only valid in collect metadata mode"); + return -EINVAL; + } + } + return 0; } @@ -4301,6 +4192,19 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], dst->remote_ifindex, true); spin_unlock_bh(&vxlan->hash_lock[hash_index]); + + /* If vni filtering device, also update fdb entries of + * all vnis that were using default remote ip + */ + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) { + err = vxlan_vnilist_update_group(vxlan, &dst->remote_ip, + &conf.remote_ip, extack); + if (err) { + netdev_adjacent_change_abort(dst->remote_dev, + lowerdev, dev); + return err; + } + } } if (conf.age_interval != vxlan->cfg.age_interval) @@ -4446,6 +4350,11 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL)) goto nla_put_failure; + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER && + nla_put_u8(skb, IFLA_VXLAN_VNIFILTER, + !!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))) + goto nla_put_failure; + return 0; nla_put_failure: @@ -4805,6 +4714,8 @@ static int __init vxlan_init_module(void) if (rc) goto out4; + vxlan_vnifilter_init(); + return 0; out4: unregister_switchdev_notifier(&vxlan_switchdev_notifier_block); @@ -4819,6 +4730,7 @@ late_initcall(vxlan_init_module); static void __exit vxlan_cleanup_module(void) { + vxlan_vnifilter_uninit(); rtnl_link_unregister(&vxlan_link_ops); unregister_switchdev_notifier(&vxlan_switchdev_notifier_block); unregister_netdevice_notifier(&vxlan_notifier_block); diff --git a/drivers/net/vxlan/vxlan_multicast.c b/drivers/net/vxlan/vxlan_multicast.c new file mode 100644 index 000000000000..a7f2d67dc61b --- /dev/null +++ b/drivers/net/vxlan/vxlan_multicast.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Vxlan multicast group handling + * + */ +#include <linux/kernel.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <linux/igmp.h> +#include <net/vxlan.h> + +#include "vxlan_private.h" + +/* Update multicast group membership when first VNI on + * multicast address is brought up + */ +int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip, + int rifindex) +{ + union vxlan_addr *ip = (rip ? : &vxlan->default_dst.remote_ip); + int ifindex = (rifindex ? : vxlan->default_dst.remote_ifindex); + int ret = -EINVAL; + struct sock *sk; + + if (ip->sa.sa_family == AF_INET) { + struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); + struct ip_mreqn mreq = { + .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, + .imr_ifindex = ifindex, + }; + + sk = sock4->sock->sk; + lock_sock(sk); + ret = ip_mc_join_group(sk, &mreq); + release_sock(sk); +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); + + sk = sock6->sock->sk; + lock_sock(sk); + ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex, + &ip->sin6.sin6_addr); + release_sock(sk); +#endif + } + + return ret; +} + +int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip, + int rifindex) +{ + union vxlan_addr *ip = (rip ? : &vxlan->default_dst.remote_ip); + int ifindex = (rifindex ? : vxlan->default_dst.remote_ifindex); + int ret = -EINVAL; + struct sock *sk; + + if (ip->sa.sa_family == AF_INET) { + struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); + struct ip_mreqn mreq = { + .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, + .imr_ifindex = ifindex, + }; + + sk = sock4->sock->sk; + lock_sock(sk); + ret = ip_mc_leave_group(sk, &mreq); + release_sock(sk); +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); + + sk = sock6->sock->sk; + lock_sock(sk); + ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex, + &ip->sin6.sin6_addr); + release_sock(sk); +#endif + } + + return ret; +} + +static bool vxlan_group_used_match(union vxlan_addr *ip, int ifindex, + union vxlan_addr *rip, int rifindex) +{ + if (!vxlan_addr_multicast(rip)) + return false; + + if (!vxlan_addr_equal(rip, ip)) + return false; + + if (rifindex != ifindex) + return false; + + return true; +} + +static bool vxlan_group_used_by_vnifilter(struct vxlan_dev *vxlan, + union vxlan_addr *ip, int ifindex) +{ + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); + struct vxlan_vni_node *v, *tmp; + + if (vxlan_group_used_match(ip, ifindex, + &vxlan->default_dst.remote_ip, + vxlan->default_dst.remote_ifindex)) + return true; + + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { + if (!vxlan_addr_multicast(&v->remote_ip)) + continue; + + if (vxlan_group_used_match(ip, ifindex, + &v->remote_ip, + vxlan->default_dst.remote_ifindex)) + return true; + } + + return false; +} + +/* See if multicast group is already in use by other ID */ +bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev, + __be32 vni, union vxlan_addr *rip, int rifindex) +{ + union vxlan_addr *ip = (rip ? : &dev->default_dst.remote_ip); + int ifindex = (rifindex ? : dev->default_dst.remote_ifindex); + struct vxlan_dev *vxlan; + struct vxlan_sock *sock4; +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_sock *sock6; +#endif + unsigned short family = dev->default_dst.remote_ip.sa.sa_family; + + sock4 = rtnl_dereference(dev->vn4_sock); + + /* The vxlan_sock is only used by dev, leaving group has + * no effect on other vxlan devices. + */ + if (family == AF_INET && sock4 && refcount_read(&sock4->refcnt) == 1) + return false; + +#if IS_ENABLED(CONFIG_IPV6) + sock6 = rtnl_dereference(dev->vn6_sock); + if (family == AF_INET6 && sock6 && refcount_read(&sock6->refcnt) == 1) + return false; +#endif + + list_for_each_entry(vxlan, &vn->vxlan_list, next) { + if (!netif_running(vxlan->dev) || vxlan == dev) + continue; + + if (family == AF_INET && + rtnl_dereference(vxlan->vn4_sock) != sock4) + continue; +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6 && + rtnl_dereference(vxlan->vn6_sock) != sock6) + continue; +#endif + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) { + if (!vxlan_group_used_by_vnifilter(vxlan, ip, ifindex)) + continue; + } else { + if (!vxlan_group_used_match(ip, ifindex, + &vxlan->default_dst.remote_ip, + vxlan->default_dst.remote_ifindex)) + continue; + } + + return true; + } + + return false; +} + +static int vxlan_multicast_join_vnigrp(struct vxlan_dev *vxlan) +{ + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); + struct vxlan_vni_node *v, *tmp, *vgood = NULL; + int ret = 0; + + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { + if (!vxlan_addr_multicast(&v->remote_ip)) + continue; + /* skip if address is same as default address */ + if (vxlan_addr_equal(&v->remote_ip, + &vxlan->default_dst.remote_ip)) + continue; + ret = vxlan_igmp_join(vxlan, &v->remote_ip, 0); + if (ret == -EADDRINUSE) + ret = 0; + if (ret) + goto out; + vgood = v; + } +out: + if (ret) { + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { + if (!vxlan_addr_multicast(&v->remote_ip)) + continue; + if (vxlan_addr_equal(&v->remote_ip, + &vxlan->default_dst.remote_ip)) + continue; + vxlan_igmp_leave(vxlan, &v->remote_ip, 0); + if (v == vgood) + break; + } + } + + return ret; +} + +static int vxlan_multicast_leave_vnigrp(struct vxlan_dev *vxlan) +{ + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); + struct vxlan_vni_node *v, *tmp; + int last_err = 0, ret; + + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { + if (vxlan_addr_multicast(&v->remote_ip) && + !vxlan_group_used(vn, vxlan, v->vni, &v->remote_ip, + 0)) { + ret = vxlan_igmp_leave(vxlan, &v->remote_ip, 0); + if (ret) + last_err = ret; + } + } + + return last_err; +} + +int vxlan_multicast_join(struct vxlan_dev *vxlan) +{ + int ret = 0; + + if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) { + ret = vxlan_igmp_join(vxlan, &vxlan->default_dst.remote_ip, + vxlan->default_dst.remote_ifindex); + if (ret == -EADDRINUSE) + ret = 0; + if (ret) + return ret; + } + + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) + return vxlan_multicast_join_vnigrp(vxlan); + + return 0; +} + +int vxlan_multicast_leave(struct vxlan_dev *vxlan) +{ + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + int ret = 0; + + if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && + !vxlan_group_used(vn, vxlan, 0, NULL, 0)) { + ret = vxlan_igmp_leave(vxlan, &vxlan->default_dst.remote_ip, + vxlan->default_dst.remote_ifindex); + if (ret) + return ret; + } + + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) + return vxlan_multicast_leave_vnigrp(vxlan); + + return 0; +} diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h new file mode 100644 index 000000000000..599c3b4fdd5e --- /dev/null +++ b/drivers/net/vxlan/vxlan_private.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Vxlan private header file + * + */ + +#ifndef _VXLAN_PRIVATE_H +#define _VXLAN_PRIVATE_H + +#include <linux/rhashtable.h> + +extern unsigned int vxlan_net_id; +extern const u8 all_zeros_mac[ETH_ALEN + 2]; +extern const struct rhashtable_params vxlan_vni_rht_params; + +#define PORT_HASH_BITS 8 +#define PORT_HASH_SIZE (1 << PORT_HASH_BITS) + +/* per-network namespace private data for this module */ +struct vxlan_net { + struct list_head vxlan_list; + struct hlist_head sock_list[PORT_HASH_SIZE]; + spinlock_t sock_lock; + struct notifier_block nexthop_notifier_block; +}; + +/* Forwarding table entry */ +struct vxlan_fdb { + struct hlist_node hlist; /* linked list of entries */ + struct rcu_head rcu; + unsigned long updated; /* jiffies */ + unsigned long used; + struct list_head remotes; + u8 eth_addr[ETH_ALEN]; + u16 state; /* see ndm_state */ + __be32 vni; + u16 flags; /* see ndm_flags and below */ + struct list_head nh_list; + struct nexthop __rcu *nh; + struct vxlan_dev __rcu *vdev; +}; + +#define NTF_VXLAN_ADDED_BY_USER 0x100 + +/* Virtual Network hash table head */ +static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni) +{ + return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)]; +} + +/* Socket hash table head */ +static inline struct hlist_head *vs_head(struct net *net, __be16 port) +{ + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + + return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; +} + +/* First remote destination for a forwarding entry. + * Guaranteed to be non-NULL because remotes are never deleted. + */ +static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb) +{ + if (rcu_access_pointer(fdb->nh)) + return NULL; + return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list); +} + +static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) +{ + if (rcu_access_pointer(fdb->nh)) + return NULL; + return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline +bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) +{ + if (a->sa.sa_family != b->sa.sa_family) + return false; + if (a->sa.sa_family == AF_INET6) + return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr); + else + return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; +} + +#else /* !CONFIG_IPV6 */ + +static inline +bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) +{ + return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; +} + +#endif + +static inline struct vxlan_vni_node * +vxlan_vnifilter_lookup(struct vxlan_dev *vxlan, __be32 vni) +{ + struct vxlan_vni_group *vg; + + vg = rcu_dereference_rtnl(vxlan->vnigrp); + if (!vg) + return NULL; + + return rhashtable_lookup_fast(&vg->vni_hash, &vni, + vxlan_vni_rht_params); +} + +/* vxlan_core.c */ +int vxlan_fdb_create(struct vxlan_dev *vxlan, + const u8 *mac, union vxlan_addr *ip, + __u16 state, __be16 port, __be32 src_vni, + __be32 vni, __u32 ifindex, __u16 ndm_flags, + u32 nhid, struct vxlan_fdb **fdb, + struct netlink_ext_ack *extack); +int __vxlan_fdb_delete(struct vxlan_dev *vxlan, + const unsigned char *addr, union vxlan_addr ip, + __be16 port, __be32 src_vni, __be32 vni, + u32 ifindex, bool swdev_notify); +u32 eth_vni_hash(const unsigned char *addr, __be32 vni); +u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni); +int vxlan_fdb_update(struct vxlan_dev *vxlan, + const u8 *mac, union vxlan_addr *ip, + __u16 state, __u16 flags, + __be16 port, __be32 src_vni, __be32 vni, + __u32 ifindex, __u16 ndm_flags, u32 nhid, + bool swdev_notify, struct netlink_ext_ack *extack); +int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan, + struct vxlan_config *conf, __be32 vni); + +/* vxlan_vnifilter.c */ +int vxlan_vnigroup_init(struct vxlan_dev *vxlan); +void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan); + +void vxlan_vnifilter_init(void); +void vxlan_vnifilter_uninit(void); +void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni, + struct vxlan_vni_node *vninode, + int type, unsigned int len); + +void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan, + struct vxlan_sock *vs, + bool ipv6); +void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan); +int vxlan_vnilist_update_group(struct vxlan_dev *vxlan, + union vxlan_addr *old_remote_ip, + union vxlan_addr *new_remote_ip, + struct netlink_ext_ack *extack); + + +/* vxlan_multicast.c */ +int vxlan_multicast_join(struct vxlan_dev *vxlan); +int vxlan_multicast_leave(struct vxlan_dev *vxlan); +bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev, + __be32 vni, union vxlan_addr *rip, int rifindex); +int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip, + int rifindex); +int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip, + int rifindex); +#endif diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c new file mode 100644 index 000000000000..9f28d0b6a6b2 --- /dev/null +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -0,0 +1,999 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Vxlan vni filter for collect metadata mode + * + * Authors: Roopa Prabhu <roopa@nvidia.com> + * + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/etherdevice.h> +#include <linux/rhashtable.h> +#include <net/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <net/vxlan.h> + +#include "vxlan_private.h" + +static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct vxlan_vni_node *vnode = ptr; + __be32 vni = *(__be32 *)arg->key; + + return vnode->vni != vni; +} + +const struct rhashtable_params vxlan_vni_rht_params = { + .head_offset = offsetof(struct vxlan_vni_node, vnode), + .key_offset = offsetof(struct vxlan_vni_node, vni), + .key_len = sizeof(__be32), + .nelem_hint = 3, + .max_size = VXLAN_N_VID, + .obj_cmpfn = vxlan_vni_cmp, + .automatic_shrinking = true, +}; + +static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan, + struct vxlan_vni_node *v, + bool del) +{ + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + struct vxlan_dev_node *node; + struct vxlan_sock *vs; + + spin_lock(&vn->sock_lock); + if (del) { + if (!hlist_unhashed(&v->hlist4.hlist)) + hlist_del_init_rcu(&v->hlist4.hlist); +#if IS_ENABLED(CONFIG_IPV6) + if (!hlist_unhashed(&v->hlist6.hlist)) + hlist_del_init_rcu(&v->hlist6.hlist); +#endif + goto out; + } + +#if IS_ENABLED(CONFIG_IPV6) + vs = rtnl_dereference(vxlan->vn6_sock); + if (vs && v) { + node = &v->hlist6; + hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); + } +#endif + vs = rtnl_dereference(vxlan->vn4_sock); + if (vs && v) { + node = &v->hlist4; + hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); + } +out: + spin_unlock(&vn->sock_lock); +} + +void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan, + struct vxlan_sock *vs, + bool ipv6) +{ + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); + struct vxlan_vni_node *v, *tmp; + struct vxlan_dev_node *node; + + if (!vg) + return; + + spin_lock(&vn->sock_lock); + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6) + node = &v->hlist6; + else +#endif + node = &v->hlist4; + node->vxlan = vxlan; + hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); + } + spin_unlock(&vn->sock_lock); +} + +void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan) +{ + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + struct vxlan_vni_node *v, *tmp; + + if (!vg) + return; + + spin_lock(&vn->sock_lock); + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { + hlist_del_init_rcu(&v->hlist4.hlist); +#if IS_ENABLED(CONFIG_IPV6) + hlist_del_init_rcu(&v->hlist6.hlist); +#endif + } + spin_unlock(&vn->sock_lock); +} + +static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode, + struct vxlan_vni_stats *dest) +{ + int i; + + memset(dest, 0, sizeof(*dest)); + for_each_possible_cpu(i) { + struct vxlan_vni_stats_pcpu *pstats; + struct vxlan_vni_stats temp; + unsigned int start; + + pstats = per_cpu_ptr(vninode->stats, i); + do { + start = u64_stats_fetch_begin_irq(&pstats->syncp); + memcpy(&temp, &pstats->stats, sizeof(temp)); + } while (u64_stats_fetch_retry_irq(&pstats->syncp, start)); + + dest->rx_packets += temp.rx_packets; + dest->rx_bytes += temp.rx_bytes; + dest->rx_drops += temp.rx_drops; + dest->rx_errors += temp.rx_errors; + dest->tx_packets += temp.tx_packets; + dest->tx_bytes += temp.tx_bytes; + dest->tx_drops += temp.tx_drops; + dest->tx_errors += temp.tx_errors; + } +} + +static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode, + int type, unsigned int len) +{ + struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats); + + u64_stats_update_begin(&pstats->syncp); + switch (type) { + case VXLAN_VNI_STATS_RX: + pstats->stats.rx_bytes += len; + pstats->stats.rx_packets++; + break; + case VXLAN_VNI_STATS_RX_DROPS: + pstats->stats.rx_drops++; + break; + case VXLAN_VNI_STATS_RX_ERRORS: + pstats->stats.rx_errors++; + break; + case VXLAN_VNI_STATS_TX: + pstats->stats.tx_bytes += len; + pstats->stats.tx_packets++; + break; + case VXLAN_VNI_STATS_TX_DROPS: + pstats->stats.tx_drops++; + break; + case VXLAN_VNI_STATS_TX_ERRORS: + pstats->stats.tx_errors++; + break; + } + u64_stats_update_end(&pstats->syncp); +} + +void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni, + struct vxlan_vni_node *vninode, + int type, unsigned int len) +{ + struct vxlan_vni_node *vnode; + + if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) + return; + + if (vninode) { + vnode = vninode; + } else { + vnode = vxlan_vnifilter_lookup(vxlan, vni); + if (!vnode) + return; + } + + vxlan_vnifilter_stats_add(vnode, type, len); +} + +static u32 vnirange(struct vxlan_vni_node *vbegin, + struct vxlan_vni_node *vend) +{ + return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni)); +} + +static size_t vxlan_vnifilter_entry_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct tunnel_msg)) + + nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */ + + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */ + + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */ + + nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */ +} + +static int __vnifilter_entry_fill_stats(struct sk_buff *skb, + const struct vxlan_vni_node *vbegin) +{ + struct vxlan_vni_stats vstats; + struct nlattr *vstats_attr; + + vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS); + if (!vstats_attr) + goto out_stats_err; + + vxlan_vnifilter_stats_get(vbegin, &vstats); + if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES, + vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS, + vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS, + vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS, + vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES, + vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS, + vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS, + vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS, + vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD)) + goto out_stats_err; + + nla_nest_end(skb, vstats_attr); + + return 0; + +out_stats_err: + nla_nest_cancel(skb, vstats_attr); + return -EMSGSIZE; +} + +static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb, + struct vxlan_vni_node *vbegin, + struct vxlan_vni_node *vend, + bool fill_stats) +{ + struct nlattr *ventry; + u32 vs = be32_to_cpu(vbegin->vni); + u32 ve = 0; + + if (vbegin != vend) + ve = be32_to_cpu(vend->vni); + + ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY); + if (!ventry) + return false; + + if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs)) + goto out_err; + + if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve)) + goto out_err; + + if (!vxlan_addr_any(&vbegin->remote_ip)) { + if (vbegin->remote_ip.sa.sa_family == AF_INET) { + if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP, + vbegin->remote_ip.sin.sin_addr.s_addr)) + goto out_err; +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6, + &vbegin->remote_ip.sin6.sin6_addr)) + goto out_err; +#endif + } + } + + if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin)) + goto out_err; + + nla_nest_end(skb, ventry); + + return true; + +out_err: + nla_nest_cancel(skb, ventry); + + return false; +} + +static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan, + struct vxlan_vni_node *vninode, int cmd) +{ + struct tunnel_msg *tmsg; + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct net *net = dev_net(vxlan->dev); + int err = -ENOBUFS; + + skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL); + if (!skb) + goto out_err; + + err = -EMSGSIZE; + nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0); + if (!nlh) + goto out_err; + tmsg = nlmsg_data(nlh); + memset(tmsg, 0, sizeof(*tmsg)); + tmsg->family = AF_BRIDGE; + tmsg->ifindex = vxlan->dev->ifindex; + + if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false)) + goto out_err; + + nlmsg_end(skb, nlh); + rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL); + + return; + +out_err: + rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err); + + kfree_skb(skb); +} + +static int vxlan_vnifilter_dump_dev(const struct net_device *dev, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL; + struct vxlan_dev *vxlan = netdev_priv(dev); + struct tunnel_msg *new_tmsg, *tmsg; + int idx = 0, s_idx = cb->args[1]; + struct vxlan_vni_group *vg; + struct nlmsghdr *nlh; + bool dump_stats; + int err = 0; + + if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) + return -EINVAL; + + /* RCU needed because of the vni locking rules (rcu || rtnl) */ + vg = rcu_dereference(vxlan->vnigrp); + if (!vg || !vg->num_vnis) + return 0; + + tmsg = nlmsg_data(cb->nlh); + dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS); + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + new_tmsg = nlmsg_data(nlh); + memset(new_tmsg, 0, sizeof(*new_tmsg)); + new_tmsg->family = PF_BRIDGE; + new_tmsg->ifindex = dev->ifindex; + + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { + if (idx < s_idx) { + idx++; + continue; + } + if (!vbegin) { + vbegin = v; + vend = v; + continue; + } + if (!dump_stats && vnirange(vend, v) == 1 && + vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) { + goto update_end; + } else { + if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, + dump_stats)) { + err = -EMSGSIZE; + break; + } + idx += vnirange(vbegin, vend) + 1; + vbegin = v; + } +update_end: + vend = v; + } + + if (!err && vbegin) { + if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats)) + err = -EMSGSIZE; + } + + cb->args[1] = err ? idx : 0; + + nlmsg_end(skb, nlh); + + return err; +} + +static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx = 0, err = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); + struct tunnel_msg *tmsg; + struct net_device *dev; + + tmsg = nlmsg_data(cb->nlh); + + if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) { + NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header"); + return -EINVAL; + } + + rcu_read_lock(); + if (tmsg->ifindex) { + dev = dev_get_by_index_rcu(net, tmsg->ifindex); + if (!dev) { + err = -ENODEV; + goto out_err; + } + err = vxlan_vnifilter_dump_dev(dev, skb, cb); + /* if the dump completed without an error we return 0 here */ + if (err != -EMSGSIZE) + goto out_err; + } else { + for_each_netdev_rcu(net, dev) { + if (!netif_is_vxlan(dev)) + continue; + if (idx < s_idx) + goto skip; + err = vxlan_vnifilter_dump_dev(dev, skb, cb); + if (err == -EMSGSIZE) + break; +skip: + idx++; + } + } + cb->args[0] = idx; + rcu_read_unlock(); + + return skb->len; + +out_err: + rcu_read_unlock(); + + return err; +} + +static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = { + [VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 }, + [VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 }, + [VXLAN_VNIFILTER_ENTRY_GROUP] = { .type = NLA_BINARY, + .len = sizeof_field(struct iphdr, daddr) }, + [VXLAN_VNIFILTER_ENTRY_GROUP6] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, +}; + +static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = { + [VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED }, +}; + +static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, + union vxlan_addr *old_remote_ip, + union vxlan_addr *remote_ip, + struct netlink_ext_ack *extack) +{ + struct vxlan_rdst *dst = &vxlan->default_dst; + u32 hash_index; + int err = 0; + + hash_index = fdb_head_index(vxlan, all_zeros_mac, vni); + spin_lock_bh(&vxlan->hash_lock[hash_index]); + if (remote_ip && !vxlan_addr_any(remote_ip)) { + err = vxlan_fdb_update(vxlan, all_zeros_mac, + remote_ip, + NUD_REACHABLE | NUD_PERMANENT, + NLM_F_APPEND | NLM_F_CREATE, + vxlan->cfg.dst_port, + vni, + vni, + dst->remote_ifindex, + NTF_SELF, 0, true, extack); + if (err) { + spin_unlock_bh(&vxlan->hash_lock[hash_index]); + return err; + } + } + + if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) { + __vxlan_fdb_delete(vxlan, all_zeros_mac, + *old_remote_ip, + vxlan->cfg.dst_port, + vni, vni, + dst->remote_ifindex, + true); + } + spin_unlock_bh(&vxlan->hash_lock[hash_index]); + + return err; +} + +static int vxlan_vni_update_group(struct vxlan_dev *vxlan, + struct vxlan_vni_node *vninode, + union vxlan_addr *group, + bool create, bool *changed, + struct netlink_ext_ack *extack) +{ + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + struct vxlan_rdst *dst = &vxlan->default_dst; + union vxlan_addr *newrip = NULL, *oldrip = NULL; + union vxlan_addr old_remote_ip; + int ret = 0; + + memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip)); + + /* if per vni remote ip is not present use vxlan dev + * default dst remote ip for fdb entry + */ + if (group && !vxlan_addr_any(group)) { + newrip = group; + } else { + if (!vxlan_addr_any(&dst->remote_ip)) + newrip = &dst->remote_ip; + } + + /* if old rip exists, and no newrip, + * explicitly delete old rip + */ + if (!newrip && !vxlan_addr_any(&old_remote_ip)) + oldrip = &old_remote_ip; + + if (!newrip && !oldrip) + return 0; + + if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip)) + return 0; + + ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni, + oldrip, newrip, + extack); + if (ret) + goto out; + + if (group) + memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip)); + + if (vxlan->dev->flags & IFF_UP) { + if (vxlan_addr_multicast(&old_remote_ip) && + !vxlan_group_used(vn, vxlan, vninode->vni, + &old_remote_ip, + vxlan->default_dst.remote_ifindex)) { + ret = vxlan_igmp_leave(vxlan, &old_remote_ip, + 0); + if (ret) + goto out; + } + + if (vxlan_addr_multicast(&vninode->remote_ip)) { + ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0); + if (ret == -EADDRINUSE) + ret = 0; + if (ret) + goto out; + } + } + + *changed = true; + + return 0; +out: + return ret; +} + +int vxlan_vnilist_update_group(struct vxlan_dev *vxlan, + union vxlan_addr *old_remote_ip, + union vxlan_addr *new_remote_ip, + struct netlink_ext_ack *extack) +{ + struct list_head *headp, *hpos; + struct vxlan_vni_group *vg; + struct vxlan_vni_node *vent; + int ret; + + vg = rtnl_dereference(vxlan->vnigrp); + + headp = &vg->vni_list; + list_for_each_prev(hpos, headp) { + vent = list_entry(hpos, struct vxlan_vni_node, vlist); + if (vxlan_addr_any(&vent->remote_ip)) { + ret = vxlan_update_default_fdb_entry(vxlan, vent->vni, + old_remote_ip, + new_remote_ip, + extack); + if (ret) + return ret; + } + } + + return 0; +} + +static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, + struct vxlan_vni_node *vninode) +{ + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + struct vxlan_rdst *dst = &vxlan->default_dst; + + /* if per vni remote_ip not present, delete the + * default dst remote_ip previously added for this vni + */ + if (!vxlan_addr_any(&vninode->remote_ip) || + !vxlan_addr_any(&dst->remote_ip)) + __vxlan_fdb_delete(vxlan, all_zeros_mac, + (vxlan_addr_any(&vninode->remote_ip) ? + dst->remote_ip : vninode->remote_ip), + vxlan->cfg.dst_port, + vninode->vni, vninode->vni, + dst->remote_ifindex, + true); + + if (vxlan->dev->flags & IFF_UP) { + if (vxlan_addr_multicast(&vninode->remote_ip) && + !vxlan_group_used(vn, vxlan, vninode->vni, + &vninode->remote_ip, + dst->remote_ifindex)) { + vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0); + } + } +} + +static int vxlan_vni_update(struct vxlan_dev *vxlan, + struct vxlan_vni_group *vg, + __be32 vni, union vxlan_addr *group, + bool *changed, + struct netlink_ext_ack *extack) +{ + struct vxlan_vni_node *vninode; + int ret; + + vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni, + vxlan_vni_rht_params); + if (!vninode) + return 0; + + ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed, + extack); + if (ret) + return ret; + + if (changed) + vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); + + return 0; +} + +static void __vxlan_vni_add_list(struct vxlan_vni_group *vg, + struct vxlan_vni_node *v) +{ + struct list_head *headp, *hpos; + struct vxlan_vni_node *vent; + + headp = &vg->vni_list; + list_for_each_prev(hpos, headp) { + vent = list_entry(hpos, struct vxlan_vni_node, vlist); + if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni)) + continue; + else + break; + } + list_add_rcu(&v->vlist, hpos); + vg->num_vnis++; +} + +static void __vxlan_vni_del_list(struct vxlan_vni_group *vg, + struct vxlan_vni_node *v) +{ + list_del_rcu(&v->vlist); + vg->num_vnis--; +} + +static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan, + __be32 vni) +{ + struct vxlan_vni_node *vninode; + + vninode = kzalloc(sizeof(*vninode), GFP_ATOMIC); + if (!vninode) + return NULL; + vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu); + if (!vninode->stats) { + kfree(vninode); + return NULL; + } + vninode->vni = vni; + vninode->hlist4.vxlan = vxlan; +#if IS_ENABLED(CONFIG_IPV6) + vninode->hlist6.vxlan = vxlan; +#endif + + return vninode; +} + +static int vxlan_vni_add(struct vxlan_dev *vxlan, + struct vxlan_vni_group *vg, + u32 vni, union vxlan_addr *group, + struct netlink_ext_ack *extack) +{ + struct vxlan_vni_node *vninode; + __be32 v = cpu_to_be32(vni); + bool changed = false; + int err = 0; + + if (vxlan_vnifilter_lookup(vxlan, v)) + return vxlan_vni_update(vxlan, vg, v, group, &changed, extack); + + err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v); + if (err) { + NL_SET_ERR_MSG(extack, "VNI in use"); + return err; + } + + vninode = vxlan_vni_alloc(vxlan, v); + if (!vninode) + return -ENOMEM; + + err = rhashtable_lookup_insert_fast(&vg->vni_hash, + &vninode->vnode, + vxlan_vni_rht_params); + if (err) { + kfree(vninode); + return err; + } + + __vxlan_vni_add_list(vg, vninode); + + if (vxlan->dev->flags & IFF_UP) + vxlan_vs_add_del_vninode(vxlan, vninode, false); + + err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed, + extack); + + if (changed) + vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); + + return err; +} + +static void vxlan_vni_node_rcu_free(struct rcu_head *rcu) +{ + struct vxlan_vni_node *v; + + v = container_of(rcu, struct vxlan_vni_node, rcu); + free_percpu(v->stats); + kfree(v); +} + +static int vxlan_vni_del(struct vxlan_dev *vxlan, + struct vxlan_vni_group *vg, + u32 vni, struct netlink_ext_ack *extack) +{ + struct vxlan_vni_node *vninode; + __be32 v = cpu_to_be32(vni); + int err = 0; + + vg = rtnl_dereference(vxlan->vnigrp); + + vninode = rhashtable_lookup_fast(&vg->vni_hash, &v, + vxlan_vni_rht_params); + if (!vninode) { + err = -ENOENT; + goto out; + } + + vxlan_vni_delete_group(vxlan, vninode); + + err = rhashtable_remove_fast(&vg->vni_hash, + &vninode->vnode, + vxlan_vni_rht_params); + if (err) + goto out; + + __vxlan_vni_del_list(vg, vninode); + + vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL); + + if (vxlan->dev->flags & IFF_UP) + vxlan_vs_add_del_vninode(vxlan, vninode, true); + + call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free); + + return 0; +out: + return err; +} + +static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni, + __u32 end_vni, union vxlan_addr *group, + int cmd, struct netlink_ext_ack *extack) +{ + struct vxlan_vni_group *vg; + int v, err = 0; + + vg = rtnl_dereference(vxlan->vnigrp); + + for (v = start_vni; v <= end_vni; v++) { + switch (cmd) { + case RTM_NEWTUNNEL: + err = vxlan_vni_add(vxlan, vg, v, group, extack); + break; + case RTM_DELTUNNEL: + err = vxlan_vni_del(vxlan, vg, v, extack); + break; + default: + err = -EOPNOTSUPP; + break; + } + if (err) + goto out; + } + + return 0; +out: + return err; +} + +static int vxlan_process_vni_filter(struct vxlan_dev *vxlan, + struct nlattr *nlvnifilter, + int cmd, struct netlink_ext_ack *extack) +{ + struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1]; + u32 vni_start = 0, vni_end = 0; + union vxlan_addr group; + int err; + + err = nla_parse_nested(vattrs, + VXLAN_VNIFILTER_ENTRY_MAX, + nlvnifilter, vni_filter_entry_policy, + extack); + if (err) + return err; + + if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) { + vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]); + vni_end = vni_start; + } + + if (vattrs[VXLAN_VNIFILTER_ENTRY_END]) + vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]); + + if (!vni_start && !vni_end) { + NL_SET_ERR_MSG_ATTR(extack, nlvnifilter, + "vni start nor end found in vni entry"); + return -EINVAL; + } + + if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) { + group.sin.sin_addr.s_addr = + nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]); + group.sa.sa_family = AF_INET; + } else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) { + group.sin6.sin6_addr = + nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]); + group.sa.sa_family = AF_INET6; + } else { + memset(&group, 0, sizeof(group)); + } + + if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) { + NL_SET_ERR_MSG(extack, + "Local interface required for multicast remote group"); + + return -EINVAL; + } + + err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd, + extack); + if (err) + return err; + + return 0; +} + +void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan) +{ + struct vxlan_vni_node *v, *tmp; + struct vxlan_vni_group *vg; + + vg = rtnl_dereference(vxlan->vnigrp); + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { + rhashtable_remove_fast(&vg->vni_hash, &v->vnode, + vxlan_vni_rht_params); + hlist_del_init_rcu(&v->hlist4.hlist); +#if IS_ENABLED(CONFIG_IPV6) + hlist_del_init_rcu(&v->hlist6.hlist); +#endif + __vxlan_vni_del_list(vg, v); + vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL); + call_rcu(&v->rcu, vxlan_vni_node_rcu_free); + } + rhashtable_destroy(&vg->vni_hash); + kfree(vg); +} + +int vxlan_vnigroup_init(struct vxlan_dev *vxlan) +{ + struct vxlan_vni_group *vg; + int ret; + + vg = kzalloc(sizeof(*vg), GFP_KERNEL); + if (!vg) + return -ENOMEM; + ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params); + if (ret) { + kfree(vg); + return ret; + } + INIT_LIST_HEAD(&vg->vni_list); + rcu_assign_pointer(vxlan->vnigrp, vg); + + return 0; +} + +static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct tunnel_msg *tmsg; + struct vxlan_dev *vxlan; + struct net_device *dev; + struct nlattr *attr; + int err, vnis = 0; + int rem; + + /* this should validate the header and check for remaining bytes */ + err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX, + vni_filter_policy, extack); + if (err < 0) + return err; + + tmsg = nlmsg_data(nlh); + dev = __dev_get_by_index(net, tmsg->ifindex); + if (!dev) + return -ENODEV; + + if (!netif_is_vxlan(dev)) { + NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device"); + return -EINVAL; + } + + vxlan = netdev_priv(dev); + + if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) + return -EOPNOTSUPP; + + nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) { + switch (nla_type(attr)) { + case VXLAN_VNIFILTER_ENTRY: + err = vxlan_process_vni_filter(vxlan, attr, + nlh->nlmsg_type, extack); + break; + default: + continue; + } + vnis++; + if (err) + break; + } + + if (!vnis) { + NL_SET_ERR_MSG_MOD(extack, "No vnis found to process"); + err = -EINVAL; + } + + return err; +} + +void vxlan_vnifilter_init(void) +{ + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL, + vxlan_vnifilter_dump, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL, + vxlan_vnifilter_process, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL, + vxlan_vnifilter_process, NULL, 0); +} + +void vxlan_vnifilter_uninit(void) +{ + rtnl_unregister(PF_BRIDGE, RTM_GETTUNNEL); + rtnl_unregister(PF_BRIDGE, RTM_NEWTUNNEL); + rtnl_unregister(PF_BRIDGE, RTM_DELTUNNEL); +} diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 6a142dc85c37..76c6b4f89890 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -57,6 +57,7 @@ #include <asm/io.h> #include <asm/dma.h> #include <linux/uaccess.h> +#include <linux/jiffies.h> //#include <asm/spinlock.h> #define DRIVER_MAJOR_VERSION 1 @@ -1968,7 +1969,7 @@ static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue) printk("%s: Xmitter busy|\n", dev->name); sc->extra_stats.tx_tbusy_calls++; - if (jiffies - dev_trans_start(dev) < TX_TIMEOUT) + if (time_is_before_jiffies(dev_trans_start(dev) + TX_TIMEOUT)) goto bug_out; /* diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c index 8e3b1c717c10..6063552cea9b 100644 --- a/drivers/net/wan/slic_ds26522.c +++ b/drivers/net/wan/slic_ds26522.c @@ -194,10 +194,9 @@ static int slic_ds26522_init_configure(struct spi_device *spi) return 0; } -static int slic_ds26522_remove(struct spi_device *spi) +static void slic_ds26522_remove(struct spi_device *spi) { pr_info("DS26522 module uninstalled\n"); - return 0; } static int slic_ds26522_probe(struct spi_device *spi) diff --git a/drivers/net/wireless/intel/Makefile b/drivers/net/wireless/intel/Makefile index 1364b0014488..208e73a16051 100644 --- a/drivers/net/wireless/intel/Makefile +++ b/drivers/net/wireless/intel/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_IPW2200) += ipw2x00/ obj-$(CONFIG_IWLEGACY) += iwlegacy/ obj-$(CONFIG_IWLWIFI) += iwlwifi/ +obj-$(CONFIG_IWLMEI) += iwlwifi/ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index dd58c8f9aa11..04addf964d83 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -553,8 +553,7 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = { .has_he = true, .he_cap_elem = { .mac_cap_info[0] = - IEEE80211_HE_MAC_CAP0_HTC_HE | - IEEE80211_HE_MAC_CAP0_TWT_REQ, + IEEE80211_HE_MAC_CAP0_HTC_HE, .mac_cap_info[1] = IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US | IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 63432c24eb59..445c94adb076 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -5,6 +5,7 @@ * Copyright (C) 2016-2017 Intel Deutschland GmbH */ #include <linux/vmalloc.h> +#include <linux/err.h> #include <linux/ieee80211.h> #include <linux/netdevice.h> @@ -1857,7 +1858,6 @@ void iwl_mvm_sta_add_debugfs(struct ieee80211_hw *hw, void iwl_mvm_dbgfs_register(struct iwl_mvm *mvm) { struct dentry *bcast_dir __maybe_unused; - char buf[100]; spin_lock_init(&mvm->drv_stats_lock); @@ -1939,6 +1939,11 @@ void iwl_mvm_dbgfs_register(struct iwl_mvm *mvm) * Create a symlink with mac80211. It will be removed when mac80211 * exists (before the opmode exists which removes the target.) */ - snprintf(buf, 100, "../../%pd2", mvm->debugfs_dir->d_parent); - debugfs_create_symlink("iwlwifi", mvm->hw->wiphy->debugfsdir, buf); + if (!IS_ERR(mvm->debugfs_dir)) { + char buf[100]; + + snprintf(buf, 100, "../../%pd2", mvm->debugfs_dir->d_parent); + debugfs_create_symlink("iwlwifi", mvm->hw->wiphy->debugfsdir, + buf); + } } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 4ac599f6ad22..709a3df57b10 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -226,7 +226,6 @@ static const u8 he_if_types_ext_capa_sta[] = { [0] = WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING, [2] = WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT, [7] = WLAN_EXT_CAPA8_OPMODE_NOTIF, - [9] = WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT, }; static const struct wiphy_iftype_ext_capab he_iftypes_ext_capa[] = { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c b/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c index 78450366312b..080a1587caa5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c @@ -71,12 +71,13 @@ static int iwl_mvm_vendor_host_get_ownership(struct wiphy *wiphy, { struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy); struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + int ret; mutex_lock(&mvm->mutex); - iwl_mvm_mei_get_ownership(mvm); + ret = iwl_mvm_mei_get_ownership(mvm); mutex_unlock(&mvm->mutex); - return 0; + return ret; } static const struct wiphy_vendor_command iwl_mvm_vendor_commands[] = { diff --git a/drivers/net/wireless/intersil/p54/p54spi.c b/drivers/net/wireless/intersil/p54/p54spi.c index ab0fe8565851..f99b7ba69fc3 100644 --- a/drivers/net/wireless/intersil/p54/p54spi.c +++ b/drivers/net/wireless/intersil/p54/p54spi.c @@ -669,7 +669,7 @@ err_free: return ret; } -static int p54spi_remove(struct spi_device *spi) +static void p54spi_remove(struct spi_device *spi) { struct p54s_priv *priv = spi_get_drvdata(spi); @@ -684,8 +684,6 @@ static int p54spi_remove(struct spi_device *spi) mutex_destroy(&priv->mutex); p54_free_common(priv->hw); - - return 0; } diff --git a/drivers/net/wireless/marvell/libertas/if_spi.c b/drivers/net/wireless/marvell/libertas/if_spi.c index cd9f8ecf171f..ff1c7ec8c450 100644 --- a/drivers/net/wireless/marvell/libertas/if_spi.c +++ b/drivers/net/wireless/marvell/libertas/if_spi.c @@ -1195,7 +1195,7 @@ out: return err; } -static int libertas_spi_remove(struct spi_device *spi) +static void libertas_spi_remove(struct spi_device *spi) { struct if_spi_card *card = spi_get_drvdata(spi); struct lbs_private *priv = card->priv; @@ -1212,8 +1212,6 @@ static int libertas_spi_remove(struct spi_device *spi) if (card->pdata->teardown) card->pdata->teardown(spi); free_if_spi_card(card); - - return 0; } static int if_spi_suspend(struct device *dev) diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c index 217477f34c21..18420e954402 100644 --- a/drivers/net/wireless/microchip/wilc1000/spi.c +++ b/drivers/net/wireless/microchip/wilc1000/spi.c @@ -240,7 +240,7 @@ free: return ret; } -static int wilc_bus_remove(struct spi_device *spi) +static void wilc_bus_remove(struct spi_device *spi) { struct wilc *wilc = spi_get_drvdata(spi); struct wilc_spi *spi_priv = wilc->bus_data; @@ -248,8 +248,6 @@ static int wilc_bus_remove(struct spi_device *spi) clk_disable_unprepare(wilc->rtc_clk); wilc_netdev_cleanup(wilc); kfree(spi_priv); - - return 0; } static const struct of_device_id wilc_of_match[] = { diff --git a/drivers/net/wireless/st/cw1200/cw1200_spi.c b/drivers/net/wireless/st/cw1200/cw1200_spi.c index 271ed2ce2d7f..fe0d220da44d 100644 --- a/drivers/net/wireless/st/cw1200/cw1200_spi.c +++ b/drivers/net/wireless/st/cw1200/cw1200_spi.c @@ -423,7 +423,7 @@ static int cw1200_spi_probe(struct spi_device *func) } /* Disconnect Function to be called by SPI stack when device is disconnected */ -static int cw1200_spi_disconnect(struct spi_device *func) +static void cw1200_spi_disconnect(struct spi_device *func) { struct hwbus_priv *self = spi_get_drvdata(func); @@ -435,8 +435,6 @@ static int cw1200_spi_disconnect(struct spi_device *func) } } cw1200_spi_off(dev_get_platdata(&func->dev)); - - return 0; } static int __maybe_unused cw1200_spi_suspend(struct device *dev) diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c index 5b894bd6237e..9df38726e8b0 100644 --- a/drivers/net/wireless/ti/wl1251/spi.c +++ b/drivers/net/wireless/ti/wl1251/spi.c @@ -327,14 +327,12 @@ out_free: return ret; } -static int wl1251_spi_remove(struct spi_device *spi) +static void wl1251_spi_remove(struct spi_device *spi) { struct wl1251 *wl = spi_get_drvdata(spi); wl1251_free_hw(wl); regulator_disable(wl->vio); - - return 0; } static struct spi_driver wl1251_spi_driver = { diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 354a7e1c3315..7eae1ec2eb2b 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -546,13 +546,11 @@ out_dev_put: return ret; } -static int wl1271_remove(struct spi_device *spi) +static void wl1271_remove(struct spi_device *spi) { struct wl12xx_spi_glue *glue = spi_get_drvdata(spi); platform_device_unregister(glue->core); - - return 0; } static struct spi_driver wl1271_spi_driver = { diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index d24b7a7993aa..990360d75cb6 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -256,6 +256,7 @@ static void backend_disconnect(struct backend_info *be) unsigned int queue_index; xen_unregister_watchers(vif); + xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status"); #ifdef CONFIG_DEBUG_FS xenvif_debugfs_delif(vif); #endif /* CONFIG_DEBUG_FS */ @@ -675,7 +676,6 @@ static void hotplug_status_changed(struct xenbus_watch *watch, /* Not interested in this watch anymore. */ unregister_hotplug_status_watch(be); - xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status"); } kfree(str); } @@ -824,15 +824,11 @@ static void connect(struct backend_info *be) xenvif_carrier_on(be->vif); unregister_hotplug_status_watch(be); - if (xenbus_exists(XBT_NIL, dev->nodename, "hotplug-status")) { - err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, - NULL, hotplug_status_changed, - "%s/%s", dev->nodename, - "hotplug-status"); - if (err) - goto err; + err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL, + hotplug_status_changed, + "%s/%s", dev->nodename, "hotplug-status"); + if (!err) be->have_hotplug_status_watch = 1; - } netif_tx_wake_all_queues(be->vif->dev); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 8b18246ad999..7748f07e2cf1 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -842,6 +842,28 @@ static int xennet_close(struct net_device *dev) return 0; } +static void xennet_destroy_queues(struct netfront_info *info) +{ + unsigned int i; + + for (i = 0; i < info->netdev->real_num_tx_queues; i++) { + struct netfront_queue *queue = &info->queues[i]; + + if (netif_running(info->netdev)) + napi_disable(&queue->napi); + netif_napi_del(&queue->napi); + } + + kfree(info->queues); + info->queues = NULL; +} + +static void xennet_uninit(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + xennet_destroy_queues(np); +} + static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val) { unsigned long flags; @@ -1611,6 +1633,7 @@ static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp) } static const struct net_device_ops xennet_netdev_ops = { + .ndo_uninit = xennet_uninit, .ndo_open = xennet_open, .ndo_stop = xennet_close, .ndo_start_xmit = xennet_start_xmit, @@ -2103,22 +2126,6 @@ error: return err; } -static void xennet_destroy_queues(struct netfront_info *info) -{ - unsigned int i; - - for (i = 0; i < info->netdev->real_num_tx_queues; i++) { - struct netfront_queue *queue = &info->queues[i]; - - if (netif_running(info->netdev)) - napi_disable(&queue->napi); - netif_napi_del(&queue->napi); - } - - kfree(info->queues); - info->queues = NULL; -} - static int xennet_create_page_pool(struct netfront_queue *queue) diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c index 5b833a9a83f8..a38e2fcdfd39 100644 --- a/drivers/nfc/nfcmrvl/spi.c +++ b/drivers/nfc/nfcmrvl/spi.c @@ -174,12 +174,11 @@ static int nfcmrvl_spi_probe(struct spi_device *spi) return 0; } -static int nfcmrvl_spi_remove(struct spi_device *spi) +static void nfcmrvl_spi_remove(struct spi_device *spi) { struct nfcmrvl_spi_drv_data *drv_data = spi_get_drvdata(spi); nfcmrvl_nci_unregister_dev(drv_data->priv); - return 0; } static const struct of_device_id of_nfcmrvl_spi_match[] __maybe_unused = { diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c index 4e723992e74c..169eacc0a32a 100644 --- a/drivers/nfc/st-nci/spi.c +++ b/drivers/nfc/st-nci/spi.c @@ -263,13 +263,11 @@ static int st_nci_spi_probe(struct spi_device *dev) return r; } -static int st_nci_spi_remove(struct spi_device *dev) +static void st_nci_spi_remove(struct spi_device *dev) { struct st_nci_spi_phy *phy = spi_get_drvdata(dev); ndlc_remove(phy->ndlc); - - return 0; } static struct spi_device_id st_nci_spi_id_table[] = { diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index b23f47936473..ed704bb77226 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -1198,7 +1198,7 @@ err_disable_regulator: return ret; } -static int st95hf_remove(struct spi_device *nfc_spi_dev) +static void st95hf_remove(struct spi_device *nfc_spi_dev) { int result = 0; unsigned char reset_cmd = ST95HF_COMMAND_RESET; @@ -1236,8 +1236,6 @@ static int st95hf_remove(struct spi_device *nfc_spi_dev) /* disable regulator */ if (stcontext->st95hf_supply) regulator_disable(stcontext->st95hf_supply); - - return 0; } /* Register as SPI protocol driver */ diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 29ca9c328df2..21d68664fe08 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -2144,7 +2144,7 @@ err_destroy_lock: return ret; } -static int trf7970a_remove(struct spi_device *spi) +static void trf7970a_remove(struct spi_device *spi) { struct trf7970a *trf = spi_get_drvdata(spi); @@ -2160,8 +2160,6 @@ static int trf7970a_remove(struct spi_device *spi) regulator_disable(trf->regulator); mutex_destroy(&trf->lock); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.c b/drivers/ntb/hw/intel/ntb_hw_gen4.c index fede05151f69..4081fc538ff4 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen4.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen4.c @@ -168,6 +168,18 @@ static enum ntb_topo gen4_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) return NTB_TOPO_NONE; } +static enum ntb_topo spr_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) +{ + switch (ppd & SPR_PPD_TOPO_MASK) { + case SPR_PPD_TOPO_B2B_USD: + return NTB_TOPO_B2B_USD; + case SPR_PPD_TOPO_B2B_DSD: + return NTB_TOPO_B2B_DSD; + } + + return NTB_TOPO_NONE; +} + int gen4_init_dev(struct intel_ntb_dev *ndev) { struct pci_dev *pdev = ndev->ntb.pdev; @@ -183,7 +195,10 @@ int gen4_init_dev(struct intel_ntb_dev *ndev) } ppd1 = ioread32(ndev->self_mmio + GEN4_PPD1_OFFSET); - ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1); + if (pdev_is_ICX(pdev)) + ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1); + else if (pdev_is_SPR(pdev)) + ndev->ntb.topo = spr_ppd_topo(ndev, ppd1); dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd1, ntb_topo_string(ndev->ntb.topo)); if (ndev->ntb.topo == NTB_TOPO_NONE) diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.h b/drivers/ntb/hw/intel/ntb_hw_gen4.h index 3fcd3fdce9ed..f91323eaf5ce 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen4.h +++ b/drivers/ntb/hw/intel/ntb_hw_gen4.h @@ -49,10 +49,14 @@ #define GEN4_PPD_CLEAR_TRN 0x0001 #define GEN4_PPD_LINKTRN 0x0008 #define GEN4_PPD_CONN_MASK 0x0300 +#define SPR_PPD_CONN_MASK 0x0700 #define GEN4_PPD_CONN_B2B 0x0200 #define GEN4_PPD_DEV_MASK 0x1000 #define GEN4_PPD_DEV_DSD 0x1000 #define GEN4_PPD_DEV_USD 0x0000 +#define SPR_PPD_DEV_MASK 0x4000 +#define SPR_PPD_DEV_DSD 0x4000 +#define SPR_PPD_DEV_USD 0x0000 #define GEN4_LINK_CTRL_LINK_DISABLE 0x0010 #define GEN4_SLOTSTS 0xb05a @@ -62,6 +66,10 @@ #define GEN4_PPD_TOPO_B2B_USD (GEN4_PPD_CONN_B2B | GEN4_PPD_DEV_USD) #define GEN4_PPD_TOPO_B2B_DSD (GEN4_PPD_CONN_B2B | GEN4_PPD_DEV_DSD) +#define SPR_PPD_TOPO_MASK (SPR_PPD_CONN_MASK | SPR_PPD_DEV_MASK) +#define SPR_PPD_TOPO_B2B_USD (GEN4_PPD_CONN_B2B | SPR_PPD_DEV_USD) +#define SPR_PPD_TOPO_B2B_DSD (GEN4_PPD_CONN_B2B | SPR_PPD_DEV_DSD) + #define GEN4_DB_COUNT 32 #define GEN4_DB_LINK 32 #define GEN4_DB_LINK_BIT BIT_ULL(GEN4_DB_LINK) @@ -112,4 +120,12 @@ static inline int pdev_is_ICX(struct pci_dev *pdev) return 0; } +static inline int pdev_is_SPR(struct pci_dev *pdev) +{ + if (pdev_is_gen4(pdev) && + pdev->revision > PCI_DEVICE_REVISION_ICX_MAX) + return 1; + return 0; +} + #endif diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c index dd683cb58d09..6295e55ef85e 100644 --- a/drivers/ntb/msi.c +++ b/drivers/ntb/msi.c @@ -33,7 +33,6 @@ int ntb_msi_init(struct ntb_dev *ntb, { phys_addr_t mw_phys_addr; resource_size_t mw_size; - size_t struct_size; int peer_widx; int peers; int ret; @@ -43,9 +42,8 @@ int ntb_msi_init(struct ntb_dev *ntb, if (peers <= 0) return -EINVAL; - struct_size = sizeof(*ntb->msi) + sizeof(*ntb->msi->peer_mws) * peers; - - ntb->msi = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL); + ntb->msi = devm_kzalloc(&ntb->dev, struct_size(ntb->msi, peer_mws, peers), + GFP_KERNEL); if (!ntb->msi) return -ENOMEM; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 79005ea1a33e..fd4720d37cc0 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1723,7 +1723,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return 0; } -static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -1739,7 +1739,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - return 0; + return; + if (ctrl->ops->flags & NVME_F_FABRICS) { /* * The NVMe over Fabrics specification only supports metadata as @@ -1747,7 +1748,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * remap the separate metadata buffer from the block layer. */ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) - return -EINVAL; + return; ns->features |= NVME_NS_EXT_LBAS; @@ -1774,8 +1775,6 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) else ns->features |= NVME_NS_METADATA_SUPPORTED; } - - return 0; } static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, @@ -1916,9 +1915,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) ns->lba_shift = id->lbaf[lbaf].ds; nvme_set_queue_limits(ns->ctrl, ns->queue); - ret = nvme_configure_metadata(ns, id); - if (ret) - goto out_unfreeze; + nvme_configure_metadata(ns, id); nvme_set_chunk_sectors(ns, id); nvme_update_disk_info(ns->disk, ns, id); @@ -1934,7 +1931,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (blk_queue_is_zoned(ns->queue)) { ret = nvme_revalidate_zones(ns); if (ret && !nvme_first_scan(ns->disk)) - goto out; + return ret; } if (nvme_ns_head_multipath(ns->head)) { @@ -1949,16 +1946,16 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) return 0; out_unfreeze: - blk_mq_unfreeze_queue(ns->disk->queue); -out: /* * If probing fails due an unsupported feature, hide the block device, * but still allow other access. */ if (ret == -ENODEV) { ns->disk->flags |= GENHD_FL_HIDDEN; + set_bit(NVME_NS_READY, &ns->flags); ret = 0; } + blk_mq_unfreeze_queue(ns->disk->queue); return ret; } @@ -4574,7 +4571,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns) if (test_and_set_bit(NVME_NS_DEAD, &ns->flags)) return; - blk_set_queue_dying(ns->queue); + blk_mark_disk_dead(ns->disk); nvme_start_ns_queue(ns); set_capacity_and_notify(ns->disk, 0); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index f8bf6606eb2f..ff775235534c 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -848,7 +848,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) return; - blk_set_queue_dying(head->disk->queue); + blk_mark_disk_dead(head->disk); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); flush_work(&head->requeue_work); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 891a36d02e7c..65e00c64a588 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -44,6 +44,8 @@ struct nvme_tcp_request { u32 data_len; u32 pdu_len; u32 pdu_sent; + u32 h2cdata_left; + u32 h2cdata_offset; u16 ttag; __le16 status; struct list_head entry; @@ -95,6 +97,7 @@ struct nvme_tcp_queue { struct nvme_tcp_request *request; int queue_size; + u32 maxh2cdata; size_t cmnd_capsule_len; struct nvme_tcp_ctrl *ctrl; unsigned long flags; @@ -572,23 +575,26 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, return ret; } -static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, - struct nvme_tcp_r2t_pdu *pdu) +static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req) { struct nvme_tcp_data_pdu *data = req->pdu; struct nvme_tcp_queue *queue = req->queue; struct request *rq = blk_mq_rq_from_pdu(req); + u32 h2cdata_sent = req->pdu_len; u8 hdgst = nvme_tcp_hdgst_len(queue); u8 ddgst = nvme_tcp_ddgst_len(queue); req->state = NVME_TCP_SEND_H2C_PDU; req->offset = 0; - req->pdu_len = le32_to_cpu(pdu->r2t_length); + req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata); req->pdu_sent = 0; + req->h2cdata_left -= req->pdu_len; + req->h2cdata_offset += h2cdata_sent; memset(data, 0, sizeof(*data)); data->hdr.type = nvme_tcp_h2c_data; - data->hdr.flags = NVME_TCP_F_DATA_LAST; + if (!req->h2cdata_left) + data->hdr.flags = NVME_TCP_F_DATA_LAST; if (queue->hdr_digest) data->hdr.flags |= NVME_TCP_F_HDGST; if (queue->data_digest) @@ -597,9 +603,9 @@ static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, data->hdr.pdo = data->hdr.hlen + hdgst; data->hdr.plen = cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); - data->ttag = pdu->ttag; + data->ttag = req->ttag; data->command_id = nvme_cid(rq); - data->data_offset = pdu->r2t_offset; + data->data_offset = cpu_to_le32(req->h2cdata_offset); data->data_length = cpu_to_le32(req->pdu_len); } @@ -609,6 +615,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, struct nvme_tcp_request *req; struct request *rq; u32 r2t_length = le32_to_cpu(pdu->r2t_length); + u32 r2t_offset = le32_to_cpu(pdu->r2t_offset); rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); if (!rq) { @@ -633,14 +640,19 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, return -EPROTO; } - if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) { + if (unlikely(r2t_offset < req->data_sent)) { dev_err(queue->ctrl->ctrl.device, "req %d unexpected r2t offset %u (expected %zu)\n", - rq->tag, le32_to_cpu(pdu->r2t_offset), req->data_sent); + rq->tag, r2t_offset, req->data_sent); return -EPROTO; } - nvme_tcp_setup_h2c_data_pdu(req, pdu); + req->pdu_len = 0; + req->h2cdata_left = r2t_length; + req->h2cdata_offset = r2t_offset; + req->ttag = pdu->ttag; + + nvme_tcp_setup_h2c_data_pdu(req); nvme_tcp_queue_request(req, false, true); return 0; @@ -928,6 +940,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; int req_data_len = req->data_len; + u32 h2cdata_left = req->h2cdata_left; while (true) { struct page *page = nvme_tcp_req_cur_page(req); @@ -972,7 +985,10 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) req->state = NVME_TCP_SEND_DDGST; req->offset = 0; } else { - nvme_tcp_done_send_req(queue); + if (h2cdata_left) + nvme_tcp_setup_h2c_data_pdu(req); + else + nvme_tcp_done_send_req(queue); } return 1; } @@ -1030,9 +1046,14 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) if (queue->hdr_digest && !req->offset) nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); - ret = kernel_sendpage(queue->sock, virt_to_page(pdu), - offset_in_page(pdu) + req->offset, len, - MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); + if (!req->h2cdata_left) + ret = kernel_sendpage(queue->sock, virt_to_page(pdu), + offset_in_page(pdu) + req->offset, len, + MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); + else + ret = sock_no_sendpage(queue->sock, virt_to_page(pdu), + offset_in_page(pdu) + req->offset, len, + MSG_DONTWAIT | MSG_MORE); if (unlikely(ret <= 0)) return ret; @@ -1052,6 +1073,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; size_t offset = req->offset; + u32 h2cdata_left = req->h2cdata_left; int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { @@ -1069,7 +1091,10 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) return ret; if (offset + ret == NVME_TCP_DIGEST_LENGTH) { - nvme_tcp_done_send_req(queue); + if (h2cdata_left) + nvme_tcp_setup_h2c_data_pdu(req); + else + nvme_tcp_done_send_req(queue); return 1; } @@ -1261,6 +1286,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) struct msghdr msg = {}; struct kvec iov; bool ctrl_hdgst, ctrl_ddgst; + u32 maxh2cdata; int ret; icreq = kzalloc(sizeof(*icreq), GFP_KERNEL); @@ -1344,6 +1370,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) goto free_icresp; } + maxh2cdata = le32_to_cpu(icresp->maxdata); + if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) { + pr_err("queue %d: invalid maxh2cdata returned %u\n", + nvme_tcp_queue_id(queue), maxh2cdata); + goto free_icresp; + } + queue->maxh2cdata = maxh2cdata; + ret = 0; free_icresp: kfree(icresp); @@ -2329,6 +2363,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, req->data_sent = 0; req->pdu_len = 0; req->pdu_sent = 0; + req->h2cdata_left = 0; req->data_len = blk_rq_nr_phys_segments(rq) ? blk_rq_payload_bytes(rq) : 0; req->curr_bio = rq->bio; diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 23a38dcf0fc4..9fd1602b539d 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -771,7 +771,7 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) if (config->wp_gpio) nvmem->wp_gpio = config->wp_gpio; - else + else if (!config->ignore_wp) nvmem->wp_gpio = gpiod_get_optional(config->dev, "wp", GPIOD_OUT_HIGH); if (IS_ERR(nvmem->wp_gpio)) { diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index ad85ff6474ff..ec315b060cd5 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -648,8 +648,8 @@ void __init early_init_fdt_scan_reserved_mem(void) } fdt_scan_reserved_mem(); - fdt_init_reserved_mem(); fdt_reserve_elfcorehdr(); + fdt_init_reserved_mem(); } /** diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 70992103c07d..2c2fb161b572 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -513,24 +513,24 @@ static void __init of_unittest_parse_phandle_with_args(void) memset(&args, 0, sizeof(args)); EXPECT_BEGIN(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1"); + "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1"); rc = of_parse_phandle_with_args(np, "phandle-list-bad-args", "#phandle-cells", 1, &args); EXPECT_END(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1"); + "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1"); unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); EXPECT_BEGIN(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1"); + "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1"); rc = of_count_phandle_with_args(np, "phandle-list-bad-args", "#phandle-cells"); EXPECT_END(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1"); + "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1"); unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); } @@ -670,12 +670,12 @@ static void __init of_unittest_parse_phandle_with_args_map(void) memset(&args, 0, sizeof(args)); EXPECT_BEGIN(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found -1"); + "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found 1"); rc = of_parse_phandle_with_args_map(np, "phandle-list-bad-args", "phandle", 1, &args); EXPECT_END(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found -1"); + "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found 1"); unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); } @@ -1257,12 +1257,12 @@ static void __init of_unittest_platform_populate(void) unittest(pdev, "device 2 creation failed\n"); EXPECT_BEGIN(KERN_INFO, - "platform testcase-data:testcase-device2: IRQ index 0 not found"); + "platform testcase-data:testcase-device2: error -ENXIO: IRQ index 0 not found"); irq = platform_get_irq(pdev, 0); EXPECT_END(KERN_INFO, - "platform testcase-data:testcase-device2: IRQ index 0 not found"); + "platform testcase-data:testcase-device2: error -ENXIO: IRQ index 0 not found"); unittest(irq < 0 && irq != -EPROBE_DEFER, "device parsing error failed - %d\n", irq); diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index 71258ea3d35f..f8e82c5e2d87 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -1329,7 +1329,8 @@ static int mvebu_pcie_probe(struct platform_device *pdev) * indirectly via kernel emulated PCI bridge driver. */ mvebu_pcie_setup_hw(port); - mvebu_pcie_set_local_dev_nr(port, 0); + mvebu_pcie_set_local_dev_nr(port, 1); + mvebu_pcie_set_local_bus_nr(port, 0); } pcie->nports = i; diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index cc166c683638..eb05cceab964 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -99,11 +99,13 @@ struct vmd_irq { * @srcu: SRCU struct for local synchronization. * @count: number of child IRQs assigned to this vector; used to track * sharing. + * @virq: The underlying VMD Linux interrupt number */ struct vmd_irq_list { struct list_head irq_list; struct srcu_struct srcu; unsigned int count; + unsigned int virq; }; struct vmd_dev { @@ -253,7 +255,6 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, struct msi_desc *desc = arg->desc; struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); - unsigned int index, vector; if (!vmdirq) return -ENOMEM; @@ -261,10 +262,8 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, INIT_LIST_HEAD(&vmdirq->node); vmdirq->irq = vmd_next_irq(vmd, desc); vmdirq->virq = virq; - index = index_from_irqs(vmd, vmdirq->irq); - vector = pci_irq_vector(vmd->dev, index); - irq_domain_set_info(domain, virq, vector, info->chip, vmdirq, + irq_domain_set_info(domain, virq, vmdirq->irq->virq, info->chip, vmdirq, handle_untracked_irq, vmd, NULL); return 0; } @@ -685,7 +684,8 @@ static int vmd_alloc_irqs(struct vmd_dev *vmd) return err; INIT_LIST_HEAD(&vmd->irqs[i].irq_list); - err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i), + vmd->irqs[i].virq = pci_irq_vector(dev, i); + err = devm_request_irq(&dev->dev, vmd->irqs[i].virq, vmd_irq, IRQF_NO_THREAD, vmd->name, &vmd->irqs[i]); if (err) @@ -969,7 +969,7 @@ static int vmd_suspend(struct device *dev) int i; for (i = 0; i < vmd->msix_count; i++) - devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]); + devm_free_irq(dev, vmd->irqs[i].virq, &vmd->irqs[i]); return 0; } @@ -981,7 +981,7 @@ static int vmd_resume(struct device *dev) int err, i; for (i = 0; i < vmd->msix_count; i++) { - err = devm_request_irq(dev, pci_irq_vector(pdev, i), + err = devm_request_irq(dev, vmd->irqs[i].virq, vmd_irq, IRQF_NO_THREAD, vmd->name, &vmd->irqs[i]); if (err) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d2dd6a6cda60..65f7f6b0576c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5344,11 +5344,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags); */ static void quirk_amd_harvest_no_ats(struct pci_dev *pdev) { - if ((pdev->device == 0x7312 && pdev->revision != 0x00) || - (pdev->device == 0x7340 && pdev->revision != 0xc5) || - (pdev->device == 0x7341 && pdev->revision != 0x00)) - return; - if (pdev->device == 0x15d8) { if (pdev->revision == 0xcf && pdev->subsystem_vendor == 0xea50 && @@ -5370,10 +5365,19 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x98e4, quirk_amd_harvest_no_ats); /* AMD Iceland dGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_amd_harvest_no_ats); /* AMD Navi10 dGPU */ +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7310, quirk_amd_harvest_no_ats); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7318, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7319, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731a, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731b, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731e, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731f, quirk_amd_harvest_no_ats); /* AMD Navi14 dGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7341, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7347, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x734f, quirk_amd_harvest_no_ats); /* AMD Raven platform iGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, quirk_amd_harvest_no_ats); #endif /* CONFIG_PCI_ATS */ diff --git a/drivers/pinctrl/intel/pinctrl-tigerlake.c b/drivers/pinctrl/intel/pinctrl-tigerlake.c index 0bcd19597e4a..3ddaeffc0415 100644 --- a/drivers/pinctrl/intel/pinctrl-tigerlake.c +++ b/drivers/pinctrl/intel/pinctrl-tigerlake.c @@ -749,7 +749,6 @@ static const struct acpi_device_id tgl_pinctrl_acpi_match[] = { { "INT34C5", (kernel_ulong_t)&tgllp_soc_data }, { "INT34C6", (kernel_ulong_t)&tglh_soc_data }, { "INTC1055", (kernel_ulong_t)&tgllp_soc_data }, - { "INTC1057", (kernel_ulong_t)&tgllp_soc_data }, { } }; MODULE_DEVICE_TABLE(acpi, tgl_pinctrl_acpi_match); diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c index 49e32684dbb2..ecab6bf63dc6 100644 --- a/drivers/pinctrl/pinctrl-k210.c +++ b/drivers/pinctrl/pinctrl-k210.c @@ -482,7 +482,7 @@ static int k210_pinconf_get_drive(unsigned int max_strength_ua) { int i; - for (i = K210_PC_DRIVE_MAX; i; i--) { + for (i = K210_PC_DRIVE_MAX; i >= 0; i--) { if (k210_pinconf_drive_strength[i] <= max_strength_ua) return i; } @@ -527,7 +527,7 @@ static int k210_pinconf_set_param(struct pinctrl_dev *pctldev, case PIN_CONFIG_BIAS_PULL_UP: if (!arg) return -EINVAL; - val |= K210_PC_PD; + val |= K210_PC_PU; break; case PIN_CONFIG_DRIVE_STRENGTH: arg *= 1000; diff --git a/drivers/pinctrl/pinctrl-starfive.c b/drivers/pinctrl/pinctrl-starfive.c index 0b912152a405..266da41a6162 100644 --- a/drivers/pinctrl/pinctrl-starfive.c +++ b/drivers/pinctrl/pinctrl-starfive.c @@ -1164,6 +1164,7 @@ static int starfive_irq_set_type(struct irq_data *d, unsigned int trigger) } static struct irq_chip starfive_irq_chip = { + .name = "StarFive GPIO", .irq_ack = starfive_irq_ack, .irq_mask = starfive_irq_mask, .irq_mask_ack = starfive_irq_mask_ack, @@ -1308,7 +1309,6 @@ static int starfive_probe(struct platform_device *pdev) sfp->gc.ngpio = NR_GPIOS; starfive_irq_chip.parent_device = dev; - starfive_irq_chip.name = sfp->gc.label; sfp->gc.irq.chip = &starfive_irq_chip; sfp->gc.irq.parent_handler = starfive_gpio_irq_handler; diff --git a/drivers/platform/chrome/cros_ec.c b/drivers/platform/chrome/cros_ec.c index fc5aa1525d13..d49a4efe46c8 100644 --- a/drivers/platform/chrome/cros_ec.c +++ b/drivers/platform/chrome/cros_ec.c @@ -302,13 +302,11 @@ EXPORT_SYMBOL(cros_ec_register); * * Return: 0 on success or negative error code. */ -int cros_ec_unregister(struct cros_ec_device *ec_dev) +void cros_ec_unregister(struct cros_ec_device *ec_dev) { if (ec_dev->pd) platform_device_unregister(ec_dev->pd); platform_device_unregister(ec_dev->ec); - - return 0; } EXPORT_SYMBOL(cros_ec_unregister); diff --git a/drivers/platform/chrome/cros_ec.h b/drivers/platform/chrome/cros_ec.h index 78363dcfdf23..bbca0096868a 100644 --- a/drivers/platform/chrome/cros_ec.h +++ b/drivers/platform/chrome/cros_ec.h @@ -11,7 +11,7 @@ #include <linux/interrupt.h> int cros_ec_register(struct cros_ec_device *ec_dev); -int cros_ec_unregister(struct cros_ec_device *ec_dev); +void cros_ec_unregister(struct cros_ec_device *ec_dev); int cros_ec_suspend(struct cros_ec_device *ec_dev); int cros_ec_resume(struct cros_ec_device *ec_dev); diff --git a/drivers/platform/chrome/cros_ec_i2c.c b/drivers/platform/chrome/cros_ec_i2c.c index 30c8938c27d5..22feb0fd4ce7 100644 --- a/drivers/platform/chrome/cros_ec_i2c.c +++ b/drivers/platform/chrome/cros_ec_i2c.c @@ -313,7 +313,9 @@ static int cros_ec_i2c_remove(struct i2c_client *client) { struct cros_ec_device *ec_dev = i2c_get_clientdata(client); - return cros_ec_unregister(ec_dev); + cros_ec_unregister(ec_dev); + + return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c index d6306d2a096f..7651417b4a25 100644 --- a/drivers/platform/chrome/cros_ec_lpc.c +++ b/drivers/platform/chrome/cros_ec_lpc.c @@ -439,7 +439,9 @@ static int cros_ec_lpc_remove(struct platform_device *pdev) acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY, cros_ec_lpc_acpi_notify); - return cros_ec_unregister(ec_dev); + cros_ec_unregister(ec_dev); + + return 0; } static const struct acpi_device_id cros_ec_lpc_acpi_device_ids[] = { diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c index 14c4046fa04d..8493af0f680e 100644 --- a/drivers/platform/chrome/cros_ec_spi.c +++ b/drivers/platform/chrome/cros_ec_spi.c @@ -786,11 +786,11 @@ static int cros_ec_spi_probe(struct spi_device *spi) return 0; } -static int cros_ec_spi_remove(struct spi_device *spi) +static void cros_ec_spi_remove(struct spi_device *spi) { struct cros_ec_device *ec_dev = spi_get_drvdata(spi); - return cros_ec_unregister(ec_dev); + cros_ec_unregister(ec_dev); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/platform/olpc/olpc-xo175-ec.c b/drivers/platform/olpc/olpc-xo175-ec.c index 0d46706afd2d..4823bd2819f6 100644 --- a/drivers/platform/olpc/olpc-xo175-ec.c +++ b/drivers/platform/olpc/olpc-xo175-ec.c @@ -648,7 +648,7 @@ static struct olpc_ec_driver olpc_xo175_ec_driver = { .ec_cmd = olpc_xo175_ec_cmd, }; -static int olpc_xo175_ec_remove(struct spi_device *spi) +static void olpc_xo175_ec_remove(struct spi_device *spi) { if (pm_power_off == olpc_xo175_ec_power_off) pm_power_off = NULL; @@ -657,8 +657,6 @@ static int olpc_xo175_ec_remove(struct spi_device *spi) platform_device_unregister(olpc_ec); olpc_ec = NULL; - - return 0; } static int olpc_xo175_ec_probe(struct spi_device *spi) diff --git a/drivers/platform/surface/surface3_power.c b/drivers/platform/surface/surface3_power.c index abac3eec565e..444ec81ba02d 100644 --- a/drivers/platform/surface/surface3_power.c +++ b/drivers/platform/surface/surface3_power.c @@ -232,14 +232,21 @@ static int mshw0011_bix(struct mshw0011_data *cdata, struct bix *bix) } bix->last_full_charg_capacity = ret; - /* get serial number */ + /* + * Get serial number, on some devices (with unofficial replacement + * battery?) reading any of the serial number range addresses gets + * nacked in this case just leave the serial number empty. + */ ret = i2c_smbus_read_i2c_block_data(client, MSHW0011_BAT0_REG_SERIAL_NO, sizeof(buf), buf); - if (ret != sizeof(buf)) { + if (ret == -EREMOTEIO) { + /* no serial number available */ + } else if (ret != sizeof(buf)) { dev_err(&client->dev, "Error reading serial no: %d\n", ret); return ret; + } else { + snprintf(bix->serial, ARRAY_SIZE(bix->serial), "%3pE%6pE", buf + 7, buf); } - snprintf(bix->serial, ARRAY_SIZE(bix->serial), "%3pE%6pE", buf + 7, buf); /* get cycle count */ ret = i2c_smbus_read_word_data(client, MSHW0011_BAT0_REG_CYCLE_CNT); diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 4c72ba68b315..b1103f85a85a 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -21,6 +21,7 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/platform_device.h> +#include <linux/pm_qos.h> #include <linux/rtc.h> #include <linux/suspend.h> #include <linux/seq_file.h> @@ -85,6 +86,9 @@ #define PMC_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 +/* QoS request for letting CPUs in idle states, but not the deepest */ +#define AMD_PMC_MAX_IDLE_STATE_LATENCY 3 + #define SOC_SUBSYSTEM_IP_MAX 12 #define DELAY_MIN_US 2000 #define DELAY_MAX_US 3000 @@ -131,6 +135,7 @@ struct amd_pmc_dev { struct device *dev; struct pci_dev *rdev; struct mutex lock; /* generic mutex lock */ + struct pm_qos_request amd_pmc_pm_qos_req; #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbgfs_dir; #endif /* CONFIG_DEBUG_FS */ @@ -521,6 +526,14 @@ static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg) rc = rtc_alarm_irq_enable(rtc_device, 0); dev_dbg(pdev->dev, "wakeup timer programmed for %lld seconds\n", duration); + /* + * Prevent CPUs from getting into deep idle states while sending OS_HINT + * which is otherwise generally safe to send when at least one of the CPUs + * is not in deep idle states. + */ + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, AMD_PMC_MAX_IDLE_STATE_LATENCY); + wake_up_all_idle_cpus(); + return rc; } @@ -538,24 +551,31 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev) /* Activate CZN specific RTC functionality */ if (pdev->cpu_id == AMD_CPU_ID_CZN) { rc = amd_pmc_verify_czn_rtc(pdev, &arg); - if (rc < 0) - return rc; + if (rc) + goto fail; } /* Dump the IdleMask before we send hint to SMU */ amd_pmc_idlemask_read(pdev, dev, NULL); msg = amd_pmc_get_os_hint(pdev); rc = amd_pmc_send_cmd(pdev, arg, NULL, msg, 0); - if (rc) + if (rc) { dev_err(pdev->dev, "suspend failed\n"); + goto fail; + } if (enable_stb) rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF); - if (rc) { + if (rc) { dev_err(pdev->dev, "error writing to STB\n"); - return rc; + goto fail; } + return 0; +fail: + if (pdev->cpu_id == AMD_CPU_ID_CZN) + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, + PM_QOS_DEFAULT_VALUE); return rc; } @@ -579,12 +599,15 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) /* Write data incremented by 1 to distinguish in stb_read */ if (enable_stb) rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF + 1); - if (rc) { + if (rc) dev_err(pdev->dev, "error writing to STB\n"); - return rc; - } - return 0; + /* Restore the QoS request back to defaults if it was set */ + if (pdev->cpu_id == AMD_CPU_ID_CZN) + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, + PM_QOS_DEFAULT_VALUE); + + return rc; } static const struct dev_pm_ops amd_pmc_pm_ops = { @@ -722,6 +745,7 @@ static int amd_pmc_probe(struct platform_device *pdev) amd_pmc_get_smu_version(dev); platform_set_drvdata(pdev, dev); amd_pmc_dbgfs_register(dev); + cpu_latency_qos_add_request(&dev->amd_pmc_pm_qos_req, PM_QOS_DEFAULT_VALUE); return 0; err_pci_dev_put: diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index a3b83b22a3b1..2104a2621e50 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2223,7 +2223,7 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available, err = fan_curve_get_factory_default(asus, fan_dev); if (err) { - if (err == -ENODEV) + if (err == -ENODEV || err == -ENODATA) return 0; return err; } diff --git a/drivers/platform/x86/intel/int3472/tps68470_board_data.c b/drivers/platform/x86/intel/int3472/tps68470_board_data.c index f93d437fd192..525f09a3b5ff 100644 --- a/drivers/platform/x86/intel/int3472/tps68470_board_data.c +++ b/drivers/platform/x86/intel/int3472/tps68470_board_data.c @@ -100,7 +100,8 @@ static struct gpiod_lookup_table surface_go_tps68470_gpios = { .dev_id = "i2c-INT347A:00", .table = { GPIO_LOOKUP("tps68470-gpio", 9, "reset", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("tps68470-gpio", 7, "powerdown", GPIO_ACTIVE_LOW) + GPIO_LOOKUP("tps68470-gpio", 7, "powerdown", GPIO_ACTIVE_LOW), + { } } }; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index bd045486b933..3424b080db77 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8703,6 +8703,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('N', '4', '0', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (4nd gen) */ TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL), /* P15 (1st gen) / P15v (1st gen) */ TPACPI_Q_LNV3('N', '3', '2', TPACPI_FAN_2CTL), /* X1 Carbon (9th gen) */ + TPACPI_Q_LNV3('N', '3', '7', TPACPI_FAN_2CTL), /* T15g (2nd gen) */ TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ }; diff --git a/drivers/power/supply/bq256xx_charger.c b/drivers/power/supply/bq256xx_charger.c index b274942dc46a..01ad84fd147c 100644 --- a/drivers/power/supply/bq256xx_charger.c +++ b/drivers/power/supply/bq256xx_charger.c @@ -1523,6 +1523,9 @@ static int bq256xx_hw_init(struct bq256xx_device *bq) BQ256XX_WDT_BIT_SHIFT); ret = power_supply_get_battery_info(bq->charger, &bat_info); + if (ret == -ENOMEM) + return ret; + if (ret) { dev_warn(bq->dev, "battery info missing, default values will be applied\n"); diff --git a/drivers/power/supply/cw2015_battery.c b/drivers/power/supply/cw2015_battery.c index 0c87ad0dbf71..728e2a6cc9c3 100644 --- a/drivers/power/supply/cw2015_battery.c +++ b/drivers/power/supply/cw2015_battery.c @@ -689,7 +689,7 @@ static int cw_bat_probe(struct i2c_client *client) if (ret) { /* Allocate an empty battery */ cw_bat->battery = devm_kzalloc(&client->dev, - sizeof(cw_bat->battery), + sizeof(*cw_bat->battery), GFP_KERNEL); if (!cw_bat->battery) return -ENOMEM; diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 0f1b5a7d2a89..608d1a0eb141 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -52,6 +52,8 @@ struct ocp_reg { u32 servo_offset_i; u32 servo_drift_p; u32 servo_drift_i; + u32 status_offset; + u32 status_drift; }; #define OCP_CTRL_ENABLE BIT(0) @@ -88,9 +90,10 @@ struct tod_reg { #define TOD_CTRL_GNSS_MASK ((1U << 4) - 1) #define TOD_CTRL_GNSS_SHIFT 24 -#define TOD_STATUS_UTC_MASK 0xff -#define TOD_STATUS_UTC_VALID BIT(8) -#define TOD_STATUS_LEAP_VALID BIT(16) +#define TOD_STATUS_UTC_MASK 0xff +#define TOD_STATUS_UTC_VALID BIT(8) +#define TOD_STATUS_LEAP_ANNOUNCE BIT(12) +#define TOD_STATUS_LEAP_VALID BIT(16) struct ts_reg { u32 enable; @@ -607,7 +610,7 @@ ptp_ocp_settime(struct ptp_clock_info *ptp_info, const struct timespec64 *ts) } static void -__ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u64 adj_val) +__ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u32 adj_val) { u32 select, ctrl; @@ -615,7 +618,7 @@ __ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u64 adj_val) iowrite32(OCP_SELECT_CLK_REG, &bp->reg->select); iowrite32(adj_val, &bp->reg->offset_ns); - iowrite32(adj_val & 0x7f, &bp->reg->offset_window_ns); + iowrite32(NSEC_PER_SEC, &bp->reg->offset_window_ns); ctrl = OCP_CTRL_ADJUST_OFFSET | OCP_CTRL_ENABLE; iowrite32(ctrl, &bp->reg->ctrl); @@ -624,6 +627,22 @@ __ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u64 adj_val) iowrite32(select >> 16, &bp->reg->select); } +static void +ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, u64 delta_ns) +{ + struct timespec64 ts; + unsigned long flags; + int err; + + spin_lock_irqsave(&bp->lock, flags); + err = __ptp_ocp_gettime_locked(bp, &ts, NULL); + if (likely(!err)) { + timespec64_add_ns(&ts, delta_ns); + __ptp_ocp_settime_locked(bp, &ts); + } + spin_unlock_irqrestore(&bp->lock, flags); +} + static int ptp_ocp_adjtime(struct ptp_clock_info *ptp_info, s64 delta_ns) { @@ -631,6 +650,11 @@ ptp_ocp_adjtime(struct ptp_clock_info *ptp_info, s64 delta_ns) unsigned long flags; u32 adj_ns, sign; + if (delta_ns > NSEC_PER_SEC || -delta_ns > NSEC_PER_SEC) { + ptp_ocp_adjtime_coarse(bp, delta_ns); + return 0; + } + sign = delta_ns < 0 ? BIT(31) : 0; adj_ns = sign ? -delta_ns : delta_ns; @@ -739,11 +763,30 @@ __ptp_ocp_clear_drift_locked(struct ptp_ocp *bp) } static void +ptp_ocp_utc_distribute(struct ptp_ocp *bp, u32 val) +{ + unsigned long flags; + + spin_lock_irqsave(&bp->lock, flags); + + bp->utc_tai_offset = val; + + if (bp->irig_out) + iowrite32(val, &bp->irig_out->adj_sec); + if (bp->dcf_out) + iowrite32(val, &bp->dcf_out->adj_sec); + if (bp->nmea_out) + iowrite32(val, &bp->nmea_out->adj_sec); + + spin_unlock_irqrestore(&bp->lock, flags); +} + +static void ptp_ocp_watchdog(struct timer_list *t) { struct ptp_ocp *bp = from_timer(bp, t, watchdog); unsigned long flags; - u32 status; + u32 status, utc_offset; status = ioread32(&bp->pps_to_clk->status); @@ -760,6 +803,17 @@ ptp_ocp_watchdog(struct timer_list *t) bp->gnss_lost = 0; } + /* if GNSS provides correct data we can rely on + * it to get leap second information + */ + if (bp->tod) { + status = ioread32(&bp->tod->utc_status); + utc_offset = status & TOD_STATUS_UTC_MASK; + if (status & TOD_STATUS_UTC_VALID && + utc_offset != bp->utc_tai_offset) + ptp_ocp_utc_distribute(bp, utc_offset); + } + mod_timer(&bp->watchdog, jiffies + HZ); } @@ -829,25 +883,6 @@ ptp_ocp_init_clock(struct ptp_ocp *bp) } static void -ptp_ocp_utc_distribute(struct ptp_ocp *bp, u32 val) -{ - unsigned long flags; - - spin_lock_irqsave(&bp->lock, flags); - - bp->utc_tai_offset = val; - - if (bp->irig_out) - iowrite32(val, &bp->irig_out->adj_sec); - if (bp->dcf_out) - iowrite32(val, &bp->dcf_out->adj_sec); - if (bp->nmea_out) - iowrite32(val, &bp->nmea_out->adj_sec); - - spin_unlock_irqrestore(&bp->lock, flags); -} - -static void ptp_ocp_tod_init(struct ptp_ocp *bp) { u32 ctrl, reg; @@ -862,45 +897,26 @@ ptp_ocp_tod_init(struct ptp_ocp *bp) ptp_ocp_utc_distribute(bp, reg & TOD_STATUS_UTC_MASK); } -static void -ptp_ocp_tod_info(struct ptp_ocp *bp) +static const char * +ptp_ocp_tod_proto_name(const int idx) { static const char * const proto_name[] = { "NMEA", "NMEA_ZDA", "NMEA_RMC", "NMEA_none", "UBX", "UBX_UTC", "UBX_LS", "UBX_none" }; + return proto_name[idx]; +} + +static const char * +ptp_ocp_tod_gnss_name(int idx) +{ static const char * const gnss_name[] = { "ALL", "COMBINED", "GPS", "GLONASS", "GALILEO", "BEIDOU", + "Unknown" }; - u32 version, ctrl, reg; - int idx; - - version = ioread32(&bp->tod->version); - dev_info(&bp->pdev->dev, "TOD Version %d.%d.%d\n", - version >> 24, (version >> 16) & 0xff, version & 0xffff); - - ctrl = ioread32(&bp->tod->ctrl); - idx = ctrl & TOD_CTRL_PROTOCOL ? 4 : 0; - idx += (ctrl >> 16) & 3; - dev_info(&bp->pdev->dev, "control: %x\n", ctrl); - dev_info(&bp->pdev->dev, "TOD Protocol %s %s\n", proto_name[idx], - ctrl & TOD_CTRL_ENABLE ? "enabled" : ""); - - idx = (ctrl >> TOD_CTRL_GNSS_SHIFT) & TOD_CTRL_GNSS_MASK; - if (idx < ARRAY_SIZE(gnss_name)) - dev_info(&bp->pdev->dev, "GNSS %s\n", gnss_name[idx]); - - reg = ioread32(&bp->tod->status); - dev_info(&bp->pdev->dev, "status: %x\n", reg); - - reg = ioread32(&bp->tod->adj_sec); - dev_info(&bp->pdev->dev, "correction: %d\n", reg); - - reg = ioread32(&bp->tod->utc_status); - dev_info(&bp->pdev->dev, "utc_status: %x\n", reg); - dev_info(&bp->pdev->dev, "utc_offset: %d valid:%d leap_valid:%d\n", - reg & TOD_STATUS_UTC_MASK, reg & TOD_STATUS_UTC_VALID ? 1 : 0, - reg & TOD_STATUS_LEAP_VALID ? 1 : 0); + if (idx > ARRAY_SIZE(gnss_name)) + idx = ARRAY_SIZE(gnss_name) - 1; + return gnss_name[idx]; } static int @@ -1953,6 +1969,76 @@ available_clock_sources_show(struct device *dev, } static DEVICE_ATTR_RO(available_clock_sources); +static ssize_t +clock_status_drift_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + u32 val; + int res; + + val = ioread32(&bp->reg->status_drift); + res = (val & ~INT_MAX) ? -1 : 1; + res *= (val & INT_MAX); + return sysfs_emit(buf, "%d\n", res); +} +static DEVICE_ATTR_RO(clock_status_drift); + +static ssize_t +clock_status_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + u32 val; + int res; + + val = ioread32(&bp->reg->status_offset); + res = (val & ~INT_MAX) ? -1 : 1; + res *= (val & INT_MAX); + return sysfs_emit(buf, "%d\n", res); +} +static DEVICE_ATTR_RO(clock_status_offset); + +static ssize_t +tod_correction_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + u32 val; + int res; + + val = ioread32(&bp->tod->adj_sec); + res = (val & ~INT_MAX) ? -1 : 1; + res *= (val & INT_MAX); + return sysfs_emit(buf, "%d\n", res); +} + +static ssize_t +tod_correction_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + unsigned long flags; + int err, res; + u32 val = 0; + + err = kstrtos32(buf, 0, &res); + if (err) + return err; + if (res < 0) { + res *= -1; + val |= BIT(31); + } + val |= res; + + spin_lock_irqsave(&bp->lock, flags); + iowrite32(val, &bp->tod->adj_sec); + spin_unlock_irqrestore(&bp->lock, flags); + + return count; +} +static DEVICE_ATTR_RW(tod_correction); + static struct attribute *timecard_attrs[] = { &dev_attr_serialnum.attr, &dev_attr_gnss_sync.attr, @@ -1964,9 +2050,12 @@ static struct attribute *timecard_attrs[] = { &dev_attr_sma4.attr, &dev_attr_available_sma_inputs.attr, &dev_attr_available_sma_outputs.attr, + &dev_attr_clock_status_drift.attr, + &dev_attr_clock_status_offset.attr, &dev_attr_irig_b_mode.attr, &dev_attr_utc_tai_offset.attr, &dev_attr_ts_window_adjust.attr, + &dev_attr_tod_correction.attr, NULL, }; ATTRIBUTE_GROUPS(timecard); @@ -2179,6 +2268,57 @@ ptp_ocp_summary_show(struct seq_file *s, void *data) } DEFINE_SHOW_ATTRIBUTE(ptp_ocp_summary); +static int +ptp_ocp_tod_status_show(struct seq_file *s, void *data) +{ + struct device *dev = s->private; + struct ptp_ocp *bp; + u32 val; + int idx; + + bp = dev_get_drvdata(dev); + + val = ioread32(&bp->tod->ctrl); + if (!(val & TOD_CTRL_ENABLE)) { + seq_printf(s, "TOD Slave disabled\n"); + return 0; + } + seq_printf(s, "TOD Slave enabled, Control Register 0x%08X\n", val); + + idx = val & TOD_CTRL_PROTOCOL ? 4 : 0; + idx += (val >> 16) & 3; + seq_printf(s, "Protocol %s\n", ptp_ocp_tod_proto_name(idx)); + + idx = (val >> TOD_CTRL_GNSS_SHIFT) & TOD_CTRL_GNSS_MASK; + seq_printf(s, "GNSS %s\n", ptp_ocp_tod_gnss_name(idx)); + + val = ioread32(&bp->tod->version); + seq_printf(s, "TOD Version %d.%d.%d\n", + val >> 24, (val >> 16) & 0xff, val & 0xffff); + + val = ioread32(&bp->tod->status); + seq_printf(s, "Status register: 0x%08X\n", val); + + val = ioread32(&bp->tod->adj_sec); + idx = (val & ~INT_MAX) ? -1 : 1; + idx *= (val & INT_MAX); + seq_printf(s, "Correction seconds: %d\n", idx); + + val = ioread32(&bp->tod->utc_status); + seq_printf(s, "UTC status register: 0x%08X\n", val); + seq_printf(s, "UTC offset: %d valid:%d\n", + val & TOD_STATUS_UTC_MASK, val & TOD_STATUS_UTC_VALID ? 1 : 0); + seq_printf(s, "Leap second info valid:%d, Leap second announce %d\n", + val & TOD_STATUS_LEAP_VALID ? 1 : 0, + val & TOD_STATUS_LEAP_ANNOUNCE ? 1 : 0); + + val = ioread32(&bp->tod->leap); + seq_printf(s, "Time to next leap second (in sec): %d\n", (s32) val); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(ptp_ocp_tod_status); + static struct dentry *ptp_ocp_debugfs_root; static void @@ -2190,6 +2330,9 @@ ptp_ocp_debugfs_add_device(struct ptp_ocp *bp) bp->debug_root = d; debugfs_create_file("summary", 0444, bp->debug_root, &bp->dev, &ptp_ocp_summary_fops); + if (bp->tod) + debugfs_create_file("tod_status", 0444, bp->debug_root, + &bp->dev, &ptp_ocp_tod_status_fops); } static void @@ -2368,8 +2511,6 @@ ptp_ocp_info(struct ptp_ocp *bp) u32 reg; ptp_ocp_phc_info(bp); - if (bp->tod) - ptp_ocp_tod_info(bp); if (bp->image) { u32 ver = ioread32(&bp->image->version); diff --git a/drivers/regulator/da9121-regulator.c b/drivers/regulator/da9121-regulator.c index 6f21223a488e..eb9df485bd8a 100644 --- a/drivers/regulator/da9121-regulator.c +++ b/drivers/regulator/da9121-regulator.c @@ -87,16 +87,16 @@ static struct da9121_range da9121_3A_1phase_current = { }; static struct da9121_range da914x_40A_4phase_current = { - .val_min = 14000000, - .val_max = 80000000, - .val_stp = 2000000, + .val_min = 26000000, + .val_max = 78000000, + .val_stp = 4000000, .reg_min = 1, .reg_max = 14, }; static struct da9121_range da914x_20A_2phase_current = { - .val_min = 7000000, - .val_max = 40000000, + .val_min = 13000000, + .val_max = 39000000, .val_stp = 2000000, .reg_min = 1, .reg_max = 14, @@ -561,7 +561,7 @@ static const struct regulator_desc da9217_reg = { }; #define DA914X_MIN_MV 500 -#define DA914X_MAX_MV 1000 +#define DA914X_MAX_MV 1300 #define DA914X_STEP_MV 10 #define DA914X_MIN_SEL (DA914X_MIN_MV / DA914X_STEP_MV) #define DA914X_N_VOLTAGES (((DA914X_MAX_MV - DA914X_MIN_MV) / DA914X_STEP_MV) \ @@ -585,10 +585,6 @@ static const struct regulator_desc da9141_reg = { .vsel_mask = DA9121_MASK_BUCK_BUCKx_5_CHx_A_VOUT, .enable_reg = DA9121_REG_BUCK_BUCK1_0, .enable_mask = DA9121_MASK_BUCK_BUCKx_0_CHx_EN, - /* Default value of BUCK_BUCK1_0.CH1_SRC_DVC_UP */ - .ramp_delay = 20000, - /* tBUCK_EN */ - .enable_time = 20, }; static const struct regulator_desc da9142_reg = { diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c index 2f83adef966e..6d66ab5a8b17 100644 --- a/drivers/rtc/rtc-ds1302.c +++ b/drivers/rtc/rtc-ds1302.c @@ -185,10 +185,9 @@ static int ds1302_probe(struct spi_device *spi) return 0; } -static int ds1302_remove(struct spi_device *spi) +static void ds1302_remove(struct spi_device *spi) { spi_set_drvdata(spi, NULL); - return 0; } #ifdef CONFIG_OF diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c index 9ef107b99b65..ed9360486953 100644 --- a/drivers/rtc/rtc-ds1305.c +++ b/drivers/rtc/rtc-ds1305.c @@ -720,7 +720,7 @@ static int ds1305_probe(struct spi_device *spi) return 0; } -static int ds1305_remove(struct spi_device *spi) +static void ds1305_remove(struct spi_device *spi) { struct ds1305 *ds1305 = spi_get_drvdata(spi); @@ -730,8 +730,6 @@ static int ds1305_remove(struct spi_device *spi) devm_free_irq(&spi->dev, spi->irq, ds1305); cancel_work_sync(&ds1305->work); } - - return 0; } static struct spi_driver ds1305_driver = { diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c index f14ed6c96437..ed5a6ba89a3e 100644 --- a/drivers/rtc/rtc-ds1343.c +++ b/drivers/rtc/rtc-ds1343.c @@ -434,11 +434,9 @@ static int ds1343_probe(struct spi_device *spi) return 0; } -static int ds1343_remove(struct spi_device *spi) +static void ds1343_remove(struct spi_device *spi) { dev_pm_clear_wake_irq(&spi->dev); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index a61d38a1b4ed..bab9b34926c6 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -223,7 +223,7 @@ lcs_setup_read_ccws(struct lcs_card *card) * we do not need to do set_normalized_cda. */ card->read.ccws[cnt].cda = - (__u32) __pa(card->read.iob[cnt].data); + (__u32)virt_to_phys(card->read.iob[cnt].data); ((struct lcs_header *) card->read.iob[cnt].data)->offset = LCS_ILLEGAL_OFFSET; card->read.iob[cnt].callback = lcs_get_frames_cb; @@ -236,7 +236,7 @@ lcs_setup_read_ccws(struct lcs_card *card) /* Last ccw is a tic (transfer in channel). */ card->read.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER; card->read.ccws[LCS_NUM_BUFFS].cda = - (__u32) __pa(card->read.ccws); + (__u32)virt_to_phys(card->read.ccws); /* Setg initial state of the read channel. */ card->read.state = LCS_CH_STATE_INIT; @@ -278,12 +278,12 @@ lcs_setup_write_ccws(struct lcs_card *card) * we do not need to do set_normalized_cda. */ card->write.ccws[cnt].cda = - (__u32) __pa(card->write.iob[cnt].data); + (__u32)virt_to_phys(card->write.iob[cnt].data); } /* Last ccw is a tic (transfer in channel). */ card->write.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER; card->write.ccws[LCS_NUM_BUFFS].cda = - (__u32) __pa(card->write.ccws); + (__u32)virt_to_phys(card->write.ccws); /* Set initial state of the write channel. */ card->read.state = LCS_CH_STATE_INIT; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 29f0111f8e11..d99c5b773e22 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -426,7 +426,7 @@ static void qeth_setup_ccw(struct ccw1 *ccw, u8 cmd_code, u8 flags, u32 len, ccw->cmd_code = cmd_code; ccw->flags = flags | CCW_FLAG_SLI; ccw->count = len; - ccw->cda = (__u32) __pa(data); + ccw->cda = (__u32)virt_to_phys(data); } static int __qeth_issue_next_read(struct qeth_card *card) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index a1e0a106c132..98cabe09c040 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -592,6 +592,7 @@ struct lpfc_vport { #define FC_VPORT_LOGO_RCVD 0x200 /* LOGO received on vport */ #define FC_RSCN_DISCOVERY 0x400 /* Auth all devices after RSCN */ #define FC_LOGO_RCVD_DID_CHNG 0x800 /* FDISC on phys port detect DID chng*/ +#define FC_PT2PT_NO_NVME 0x1000 /* Don't send NVME PRLI */ #define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */ #define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */ #define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */ diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index bac78fbce8d6..fa8415259cb8 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1315,6 +1315,9 @@ lpfc_issue_lip(struct Scsi_Host *shost) pmboxq->u.mb.mbxCommand = MBX_DOWN_LINK; pmboxq->u.mb.mbxOwner = OWN_HOST; + if ((vport->fc_flag & FC_PT2PT) && (vport->fc_flag & FC_PT2PT_NO_NVME)) + vport->fc_flag &= ~FC_PT2PT_NO_NVME; + mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, LPFC_MBOX_TMO * 2); if ((mbxstatus == MBX_SUCCESS) && diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index db5ccae1b63d..f936833c9909 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1072,7 +1072,8 @@ stop_rr_fcf_flogi: /* FLOGI failed, so there is no fabric */ spin_lock_irq(shost->host_lock); - vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP | + FC_PT2PT_NO_NVME); spin_unlock_irq(shost->host_lock); /* If private loop, then allow max outstanding els to be @@ -4607,6 +4608,23 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Added for Vendor specifc support * Just keep retrying for these Rsn / Exp codes */ + if ((vport->fc_flag & FC_PT2PT) && + cmd == ELS_CMD_NVMEPRLI) { + switch (stat.un.b.lsRjtRsnCode) { + case LSRJT_UNABLE_TPC: + case LSRJT_INVALID_CMD: + case LSRJT_LOGICAL_ERR: + case LSRJT_CMD_UNSUPPORTED: + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "0168 NVME PRLI LS_RJT " + "reason %x port doesn't " + "support NVME, disabling NVME\n", + stat.un.b.lsRjtRsnCode); + retry = 0; + vport->fc_flag |= FC_PT2PT_NO_NVME; + goto out_retry; + } + } switch (stat.un.b.lsRjtRsnCode) { case LSRJT_UNABLE_TPC: /* The driver has a VALID PLOGI but the rport has diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 7d717a4ac14d..fdf5e777bf11 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -1961,8 +1961,9 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport, * is configured try it. */ ndlp->nlp_fc4_type |= NLP_FC4_FCP; - if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) || - (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) { + if ((!(vport->fc_flag & FC_PT2PT_NO_NVME)) && + (vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH || + vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) { ndlp->nlp_fc4_type |= NLP_FC4_NVME; /* We need to update the localport also */ lpfc_nvme_update_localport(vport); diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index 5916ed7662d5..4eb89aa4a39d 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -771,11 +771,10 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, qedi_cmd->list_tmf_work = NULL; } } + spin_unlock_bh(&qedi_conn->tmf_work_lock); - if (!found) { - spin_unlock_bh(&qedi_conn->tmf_work_lock); + if (!found) goto check_cleanup_reqs; - } QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n", @@ -806,7 +805,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, qedi_cmd->state = CLEANUP_RECV; unlock: spin_unlock_bh(&conn->session->back_lock); - spin_unlock_bh(&qedi_conn->tmf_work_lock); wake_up_interruptible(&qedi_conn->wait_queue); return; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 50b12d60dc1b..9349557b8a01 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2681,7 +2681,7 @@ static int ufshcd_map_queues(struct Scsi_Host *shost) break; case HCTX_TYPE_READ: map->nr_queues = 0; - break; + continue; default: WARN_ON_ONCE(true); } diff --git a/drivers/soc/fsl/guts.c b/drivers/soc/fsl/guts.c index 072473a16f4d..5ed2fc1c53a0 100644 --- a/drivers/soc/fsl/guts.c +++ b/drivers/soc/fsl/guts.c @@ -28,7 +28,6 @@ struct fsl_soc_die_attr { static struct guts *guts; static struct soc_device_attribute soc_dev_attr; static struct soc_device *soc_dev; -static struct device_node *root; /* SoC die attribute definition for QorIQ platform */ @@ -138,7 +137,7 @@ static u32 fsl_guts_get_svr(void) static int fsl_guts_probe(struct platform_device *pdev) { - struct device_node *np = pdev->dev.of_node; + struct device_node *root, *np = pdev->dev.of_node; struct device *dev = &pdev->dev; const struct fsl_soc_die_attr *soc_die; const char *machine; @@ -159,8 +158,14 @@ static int fsl_guts_probe(struct platform_device *pdev) root = of_find_node_by_path("/"); if (of_property_read_string(root, "model", &machine)) of_property_read_string_index(root, "compatible", 0, &machine); - if (machine) - soc_dev_attr.machine = machine; + if (machine) { + soc_dev_attr.machine = devm_kstrdup(dev, machine, GFP_KERNEL); + if (!soc_dev_attr.machine) { + of_node_put(root); + return -ENOMEM; + } + } + of_node_put(root); svr = fsl_guts_get_svr(); soc_die = fsl_soc_die_match(svr, fsl_soc_die); @@ -195,7 +200,6 @@ static int fsl_guts_probe(struct platform_device *pdev) static int fsl_guts_remove(struct platform_device *dev) { soc_device_unregister(soc_dev); - of_node_put(root); return 0; } diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index 4d38c80f8be8..b3c226eb5292 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -147,7 +147,7 @@ EXPORT_SYMBOL(qe_issue_cmd); * memory mapped space. * The BRG clock is the QE clock divided by 2. * It was set up long ago during the initial boot phase and is - * is given to us. + * given to us. * Baud rate clocks are zero-based in the driver code (as that maps * to port numbers). Documentation uses 1-based numbering. */ @@ -421,7 +421,7 @@ static void qe_upload_microcode(const void *base, for (i = 0; i < be32_to_cpu(ucode->count); i++) iowrite32be(be32_to_cpu(code[i]), &qe_immr->iram.idata); - + /* Set I-RAM Ready Register */ iowrite32be(QE_IRAM_READY, &qe_immr->iram.iready); } diff --git a/drivers/soc/fsl/qe/qe_io.c b/drivers/soc/fsl/qe/qe_io.c index e277c827bdf3..a5e2d0e5ab51 100644 --- a/drivers/soc/fsl/qe/qe_io.c +++ b/drivers/soc/fsl/qe/qe_io.c @@ -35,6 +35,8 @@ int par_io_init(struct device_node *np) if (ret) return ret; par_io = ioremap(res.start, resource_size(&res)); + if (!par_io) + return -ENOMEM; if (!of_property_read_u32(np, "num-ports", &num_ports)) num_par_io_ports = num_ports; diff --git a/drivers/soc/imx/gpcv2.c b/drivers/soc/imx/gpcv2.c index 3e59d479d001..3cb123016b3e 100644 --- a/drivers/soc/imx/gpcv2.c +++ b/drivers/soc/imx/gpcv2.c @@ -382,7 +382,8 @@ static int imx_pgc_power_down(struct generic_pm_domain *genpd) return 0; out_clk_disable: - clk_bulk_disable_unprepare(domain->num_clks, domain->clks); + if (!domain->keep_clocks) + clk_bulk_disable_unprepare(domain->num_clks, domain->clks); return ret; } diff --git a/drivers/soc/mediatek/mtk-scpsys.c b/drivers/soc/mediatek/mtk-scpsys.c index 670cc82d17dc..ca75b14931ec 100644 --- a/drivers/soc/mediatek/mtk-scpsys.c +++ b/drivers/soc/mediatek/mtk-scpsys.c @@ -411,17 +411,12 @@ out: return ret; } -static int init_clks(struct platform_device *pdev, struct clk **clk) +static void init_clks(struct platform_device *pdev, struct clk **clk) { int i; - for (i = CLK_NONE + 1; i < CLK_MAX; i++) { + for (i = CLK_NONE + 1; i < CLK_MAX; i++) clk[i] = devm_clk_get(&pdev->dev, clk_names[i]); - if (IS_ERR(clk[i])) - return PTR_ERR(clk[i]); - } - - return 0; } static struct scp *init_scp(struct platform_device *pdev, @@ -431,7 +426,7 @@ static struct scp *init_scp(struct platform_device *pdev, { struct genpd_onecell_data *pd_data; struct resource *res; - int i, j, ret; + int i, j; struct scp *scp; struct clk *clk[CLK_MAX]; @@ -486,9 +481,7 @@ static struct scp *init_scp(struct platform_device *pdev, pd_data->num_domains = num; - ret = init_clks(pdev, clk); - if (ret) - return ERR_PTR(ret); + init_clks(pdev, clk); for (i = 0; i < num; i++) { struct scp_domain *scpd = &scp->domains[i]; diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index 37f4443ce9a0..e9d83d65873b 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -854,15 +854,13 @@ static int spi_mem_probe(struct spi_device *spi) return memdrv->probe(mem); } -static int spi_mem_remove(struct spi_device *spi) +static void spi_mem_remove(struct spi_device *spi) { struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver); struct spi_mem *mem = spi_get_drvdata(spi); if (memdrv->remove) - return memdrv->remove(mem); - - return 0; + memdrv->remove(mem); } static void spi_mem_shutdown(struct spi_device *spi) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 553b6b9d0222..c6a1bb09be05 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -585,6 +585,12 @@ static int rockchip_spi_slave_abort(struct spi_controller *ctlr) { struct rockchip_spi *rs = spi_controller_get_devdata(ctlr); + if (atomic_read(&rs->state) & RXDMA) + dmaengine_terminate_sync(ctlr->dma_rx); + if (atomic_read(&rs->state) & TXDMA) + dmaengine_terminate_sync(ctlr->dma_tx); + atomic_set(&rs->state, 0); + spi_enable_chip(rs, false); rs->slave_abort = true; spi_finalize_current_transfer(ctlr); @@ -654,7 +660,7 @@ static int rockchip_spi_probe(struct platform_device *pdev) struct spi_controller *ctlr; struct resource *mem; struct device_node *np = pdev->dev.of_node; - u32 rsd_nsecs; + u32 rsd_nsecs, num_cs; bool slave_mode; slave_mode = of_property_read_bool(np, "spi-slave"); @@ -764,8 +770,9 @@ static int rockchip_spi_probe(struct platform_device *pdev) * rk spi0 has two native cs, spi1..5 one cs only * if num-cs is missing in the dts, default to 1 */ - if (of_property_read_u16(np, "num-cs", &ctlr->num_chipselect)) - ctlr->num_chipselect = 1; + if (of_property_read_u32(np, "num-cs", &num_cs)) + num_cs = 1; + ctlr->num_chipselect = num_cs; ctlr->use_gpio_descriptors = true; } ctlr->dev.of_node = pdev->dev.of_node; diff --git a/drivers/spi/spi-slave-system-control.c b/drivers/spi/spi-slave-system-control.c index 169f3d595f60..d37cfe995a63 100644 --- a/drivers/spi/spi-slave-system-control.c +++ b/drivers/spi/spi-slave-system-control.c @@ -132,13 +132,12 @@ static int spi_slave_system_control_probe(struct spi_device *spi) return 0; } -static int spi_slave_system_control_remove(struct spi_device *spi) +static void spi_slave_system_control_remove(struct spi_device *spi) { struct spi_slave_system_control_priv *priv = spi_get_drvdata(spi); spi_slave_abort(spi); wait_for_completion(&priv->finished); - return 0; } static struct spi_driver spi_slave_system_control_driver = { diff --git a/drivers/spi/spi-slave-time.c b/drivers/spi/spi-slave-time.c index f2e07a392d68..f56c1afb8534 100644 --- a/drivers/spi/spi-slave-time.c +++ b/drivers/spi/spi-slave-time.c @@ -106,13 +106,12 @@ static int spi_slave_time_probe(struct spi_device *spi) return 0; } -static int spi_slave_time_remove(struct spi_device *spi) +static void spi_slave_time_remove(struct spi_device *spi) { struct spi_slave_time_priv *priv = spi_get_drvdata(spi); spi_slave_abort(spi); wait_for_completion(&priv->finished); - return 0; } static struct spi_driver spi_slave_time_driver = { diff --git a/drivers/spi/spi-tle62x0.c b/drivers/spi/spi-tle62x0.c index f8ad0709d015..a565352f6381 100644 --- a/drivers/spi/spi-tle62x0.c +++ b/drivers/spi/spi-tle62x0.c @@ -288,7 +288,7 @@ static int tle62x0_probe(struct spi_device *spi) return ret; } -static int tle62x0_remove(struct spi_device *spi) +static void tle62x0_remove(struct spi_device *spi) { struct tle62x0_state *st = spi_get_drvdata(spi); int ptr; @@ -298,7 +298,6 @@ static int tle62x0_remove(struct spi_device *spi) device_remove_file(&spi->dev, &dev_attr_status_show); kfree(st); - return 0; } static struct spi_driver tle62x0_driver = { diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c index cfa222c9bd5e..78f31b61a2aa 100644 --- a/drivers/spi/spi-zynq-qspi.c +++ b/drivers/spi/spi-zynq-qspi.c @@ -570,6 +570,9 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem, if (op->dummy.nbytes) { tmpbuf = kzalloc(op->dummy.nbytes, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + memset(tmpbuf, 0xff, op->dummy.nbytes); reinit_completion(&xqspi->data_completion); xqspi->txbuf = tmpbuf; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 4599b121d744..ead9a132dcb9 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -404,15 +404,8 @@ static void spi_remove(struct device *dev) { const struct spi_driver *sdrv = to_spi_driver(dev->driver); - if (sdrv->remove) { - int ret; - - ret = sdrv->remove(to_spi_device(dev)); - if (ret) - dev_warn(dev, - "Failed to unbind driver (%pe), ignoring\n", - ERR_PTR(ret)); - } + if (sdrv->remove) + sdrv->remove(to_spi_device(dev)); dev_pm_domain_detach(dev, true); } diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index a5cceca8b82b..9468f74308bd 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -803,7 +803,7 @@ static int spidev_probe(struct spi_device *spi) return status; } -static int spidev_remove(struct spi_device *spi) +static void spidev_remove(struct spi_device *spi) { struct spidev_data *spidev = spi_get_drvdata(spi); @@ -820,8 +820,6 @@ static int spidev_remove(struct spi_device *spi) if (spidev->users == 0) kfree(spidev); mutex_unlock(&device_list_lock); - - return 0; } static struct spi_driver spidev_spi_driver = { diff --git a/drivers/staging/fbtft/fb_st7789v.c b/drivers/staging/fbtft/fb_st7789v.c index abe9395a0aef..861a154144e6 100644 --- a/drivers/staging/fbtft/fb_st7789v.c +++ b/drivers/staging/fbtft/fb_st7789v.c @@ -144,6 +144,8 @@ static int init_display(struct fbtft_par *par) { int rc; + par->fbtftops.reset(par); + rc = init_tearing_effect_line(par); if (rc) return rc; diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h index 55677efc0138..b68f5f9b7c78 100644 --- a/drivers/staging/fbtft/fbtft.h +++ b/drivers/staging/fbtft/fbtft.h @@ -272,21 +272,39 @@ void fbtft_write_reg8_bus9(struct fbtft_par *par, int len, ...); void fbtft_write_reg16_bus8(struct fbtft_par *par, int len, ...); void fbtft_write_reg16_bus16(struct fbtft_par *par, int len, ...); +#define FBTFT_DT_TABLE(_compatible) \ +static const struct of_device_id dt_ids[] = { \ + { .compatible = _compatible }, \ + {}, \ +}; \ +MODULE_DEVICE_TABLE(of, dt_ids); + +#define FBTFT_SPI_DRIVER(_name, _compatible, _display, _spi_ids) \ + \ +static int fbtft_driver_probe_spi(struct spi_device *spi) \ +{ \ + return fbtft_probe_common(_display, spi, NULL); \ +} \ + \ +static void fbtft_driver_remove_spi(struct spi_device *spi) \ +{ \ + struct fb_info *info = spi_get_drvdata(spi); \ + \ + fbtft_remove_common(&spi->dev, info); \ +} \ + \ +static struct spi_driver fbtft_driver_spi_driver = { \ + .driver = { \ + .name = _name, \ + .of_match_table = dt_ids, \ + }, \ + .id_table = _spi_ids, \ + .probe = fbtft_driver_probe_spi, \ + .remove = fbtft_driver_remove_spi, \ +}; + #define FBTFT_REGISTER_DRIVER(_name, _compatible, _display) \ \ -static int fbtft_driver_probe_spi(struct spi_device *spi) \ -{ \ - return fbtft_probe_common(_display, spi, NULL); \ -} \ - \ -static int fbtft_driver_remove_spi(struct spi_device *spi) \ -{ \ - struct fb_info *info = spi_get_drvdata(spi); \ - \ - fbtft_remove_common(&spi->dev, info); \ - return 0; \ -} \ - \ static int fbtft_driver_probe_pdev(struct platform_device *pdev) \ { \ return fbtft_probe_common(_display, NULL, pdev); \ @@ -300,22 +318,9 @@ static int fbtft_driver_remove_pdev(struct platform_device *pdev) \ return 0; \ } \ \ -static const struct of_device_id dt_ids[] = { \ - { .compatible = _compatible }, \ - {}, \ -}; \ - \ -MODULE_DEVICE_TABLE(of, dt_ids); \ +FBTFT_DT_TABLE(_compatible) \ \ - \ -static struct spi_driver fbtft_driver_spi_driver = { \ - .driver = { \ - .name = _name, \ - .of_match_table = dt_ids, \ - }, \ - .probe = fbtft_driver_probe_spi, \ - .remove = fbtft_driver_remove_spi, \ -}; \ +FBTFT_SPI_DRIVER(_name, _compatible, _display, NULL) \ \ static struct platform_driver fbtft_driver_platform_driver = { \ .driver = { \ @@ -351,42 +356,15 @@ module_exit(fbtft_driver_module_exit); #define FBTFT_REGISTER_SPI_DRIVER(_name, _comp_vend, _comp_dev, _display) \ \ -static int fbtft_driver_probe_spi(struct spi_device *spi) \ -{ \ - return fbtft_probe_common(_display, spi, NULL); \ -} \ - \ -static int fbtft_driver_remove_spi(struct spi_device *spi) \ -{ \ - struct fb_info *info = spi_get_drvdata(spi); \ - \ - fbtft_remove_common(&spi->dev, info); \ - return 0; \ -} \ - \ -static const struct of_device_id dt_ids[] = { \ - { .compatible = _comp_vend "," _comp_dev }, \ - {}, \ -}; \ - \ -MODULE_DEVICE_TABLE(of, dt_ids); \ +FBTFT_DT_TABLE(_comp_vend "," _comp_dev) \ \ static const struct spi_device_id spi_ids[] = { \ { .name = _comp_dev }, \ {}, \ }; \ - \ MODULE_DEVICE_TABLE(spi, spi_ids); \ \ -static struct spi_driver fbtft_driver_spi_driver = { \ - .driver = { \ - .name = _name, \ - .of_match_table = dt_ids, \ - }, \ - .id_table = spi_ids, \ - .probe = fbtft_driver_probe_spi, \ - .remove = fbtft_driver_remove_spi, \ -}; \ +FBTFT_SPI_DRIVER(_name, _comp_vend "," _comp_dev, _display, spi_ids) \ \ module_spi_driver(fbtft_driver_spi_driver); diff --git a/drivers/staging/pi433/pi433_if.c b/drivers/staging/pi433/pi433_if.c index 68c09fa016ed..1d31c35875e3 100644 --- a/drivers/staging/pi433/pi433_if.c +++ b/drivers/staging/pi433/pi433_if.c @@ -1264,7 +1264,7 @@ RX_failed: return retval; } -static int pi433_remove(struct spi_device *spi) +static void pi433_remove(struct spi_device *spi) { struct pi433_device *device = spi_get_drvdata(spi); @@ -1284,8 +1284,6 @@ static int pi433_remove(struct spi_device *spi) kfree(device->rx_buffer); kfree(device); - - return 0; } static const struct of_device_id pi433_dt_ids[] = { diff --git a/drivers/staging/wfx/bus_spi.c b/drivers/staging/wfx/bus_spi.c index 55ffcd7c42e2..fa0ff66a457d 100644 --- a/drivers/staging/wfx/bus_spi.c +++ b/drivers/staging/wfx/bus_spi.c @@ -232,12 +232,11 @@ static int wfx_spi_probe(struct spi_device *func) return wfx_probe(bus->core); } -static int wfx_spi_remove(struct spi_device *func) +static void wfx_spi_remove(struct spi_device *func) { struct wfx_spi_priv *bus = spi_get_drvdata(func); wfx_release(bus->core); - return 0; } /* For dynamic driver binding, kernel does not use OF to match driver. It only diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c index f2bf6c61197f..f744ab15bf2c 100644 --- a/drivers/tee/optee/ffa_abi.c +++ b/drivers/tee/optee/ffa_abi.c @@ -869,8 +869,10 @@ static int optee_ffa_probe(struct ffa_device *ffa_dev) optee_supp_init(&optee->supp); ffa_dev_set_drvdata(ffa_dev, optee); ctx = teedev_open(optee->teedev); - if (IS_ERR(ctx)) + if (IS_ERR(ctx)) { + rc = PTR_ERR(ctx); goto err_rhashtable_free; + } optee->ctx = ctx; rc = optee_notif_init(optee, OPTEE_DEFAULT_MAX_NOTIF_VALUE); if (rc) diff --git a/drivers/tee/optee/smc_abi.c b/drivers/tee/optee/smc_abi.c index 1a55339c7072..c517d310249f 100644 --- a/drivers/tee/optee/smc_abi.c +++ b/drivers/tee/optee/smc_abi.c @@ -1417,8 +1417,10 @@ static int optee_probe(struct platform_device *pdev) platform_set_drvdata(pdev, optee); ctx = teedev_open(optee->teedev); - if (IS_ERR(ctx)) + if (IS_ERR(ctx)) { + rc = PTR_ERR(ctx); goto err_supp_uninit; + } optee->ctx = ctx; rc = optee_notif_init(optee, max_notif_value); if (rc) diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 72acb1f61849..4f478812cb51 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -404,6 +404,10 @@ static void int3400_notify(acpi_handle handle, thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", therm_event); thermal_prop[4] = NULL; kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE, thermal_prop); + kfree(thermal_prop[0]); + kfree(thermal_prop[1]); + kfree(thermal_prop[2]); + kfree(thermal_prop[3]); } static int int3400_thermal_get_temp(struct thermal_zone_device *thermal, diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 0b1808e3a912..fa92f727fdf8 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -439,7 +439,7 @@ static u8 gsm_encode_modem(const struct gsm_dlci *dlci) modembits |= MDM_RTR; if (dlci->modem_tx & TIOCM_RI) modembits |= MDM_IC; - if (dlci->modem_tx & TIOCM_CD) + if (dlci->modem_tx & TIOCM_CD || dlci->gsm->initiator) modembits |= MDM_DV; return modembits; } @@ -448,7 +448,7 @@ static u8 gsm_encode_modem(const struct gsm_dlci *dlci) * gsm_print_packet - display a frame for debug * @hdr: header to print before decode * @addr: address EA from the frame - * @cr: C/R bit from the frame + * @cr: C/R bit seen as initiator * @control: control including PF bit * @data: following data bytes * @dlen: length of data @@ -548,7 +548,7 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len) * gsm_send - send a control frame * @gsm: our GSM mux * @addr: address for control frame - * @cr: command/response bit + * @cr: command/response bit seen as initiator * @control: control byte including PF bit * * Format up and transmit a control frame. These do not go via the @@ -563,11 +563,15 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) int len; u8 cbuf[10]; u8 ibuf[3]; + int ocr; + + /* toggle C/R coding if not initiator */ + ocr = cr ^ (gsm->initiator ? 0 : 1); switch (gsm->encoding) { case 0: cbuf[0] = GSM0_SOF; - cbuf[1] = (addr << 2) | (cr << 1) | EA; + cbuf[1] = (addr << 2) | (ocr << 1) | EA; cbuf[2] = control; cbuf[3] = EA; /* Length of data = 0 */ cbuf[4] = 0xFF - gsm_fcs_add_block(INIT_FCS, cbuf + 1, 3); @@ -577,7 +581,7 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) case 1: case 2: /* Control frame + packing (but not frame stuffing) in mode 1 */ - ibuf[0] = (addr << 2) | (cr << 1) | EA; + ibuf[0] = (addr << 2) | (ocr << 1) | EA; ibuf[1] = control; ibuf[2] = 0xFF - gsm_fcs_add_block(INIT_FCS, ibuf, 2); /* Stuffing may double the size worst case */ @@ -611,7 +615,7 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) static inline void gsm_response(struct gsm_mux *gsm, int addr, int control) { - gsm_send(gsm, addr, 1, control); + gsm_send(gsm, addr, 0, control); } /** @@ -1017,25 +1021,25 @@ static void gsm_control_reply(struct gsm_mux *gsm, int cmd, const u8 *data, * @tty: virtual tty bound to the DLCI * @dlci: DLCI to affect * @modem: modem bits (full EA) - * @clen: command length + * @slen: number of signal octets * * Used when a modem control message or line state inline in adaption * layer 2 is processed. Sort out the local modem state and throttles */ static void gsm_process_modem(struct tty_struct *tty, struct gsm_dlci *dlci, - u32 modem, int clen) + u32 modem, int slen) { int mlines = 0; u8 brk = 0; int fc; - /* The modem status command can either contain one octet (v.24 signals) - or two octets (v.24 signals + break signals). The length field will - either be 2 or 3 respectively. This is specified in section - 5.4.6.3.7 of the 27.010 mux spec. */ + /* The modem status command can either contain one octet (V.24 signals) + * or two octets (V.24 signals + break signals). This is specified in + * section 5.4.6.3.7 of the 07.10 mux spec. + */ - if (clen == 2) + if (slen == 1) modem = modem & 0x7f; else { brk = modem & 0x7f; @@ -1092,6 +1096,7 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen) unsigned int brk = 0; struct gsm_dlci *dlci; int len = clen; + int slen; const u8 *dp = data; struct tty_struct *tty; @@ -1111,6 +1116,7 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen) return; dlci = gsm->dlci[addr]; + slen = len; while (gsm_read_ea(&modem, *dp++) == 0) { len--; if (len == 0) @@ -1127,7 +1133,7 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen) modem |= (brk & 0x7f); } tty = tty_port_tty_get(&dlci->port); - gsm_process_modem(tty, dlci, modem, clen); + gsm_process_modem(tty, dlci, modem, slen); if (tty) { tty_wakeup(tty); tty_kref_put(tty); @@ -1451,6 +1457,9 @@ static void gsm_dlci_close(struct gsm_dlci *dlci) if (dlci->addr != 0) { tty_port_tty_hangup(&dlci->port, false); kfifo_reset(&dlci->fifo); + /* Ensure that gsmtty_open() can return. */ + tty_port_set_initialized(&dlci->port, 0); + wake_up_interruptible(&dlci->port.open_wait); } else dlci->gsm->dead = true; /* Unregister gsmtty driver,report gsmtty dev remove uevent for user */ @@ -1514,7 +1523,7 @@ static void gsm_dlci_t1(struct timer_list *t) dlci->mode = DLCI_MODE_ADM; gsm_dlci_open(dlci); } else { - gsm_dlci_close(dlci); + gsm_dlci_begin_close(dlci); /* prevent half open link */ } break; @@ -1593,6 +1602,7 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen) struct tty_struct *tty; unsigned int modem = 0; int len = clen; + int slen = 0; if (debug & 16) pr_debug("%d bytes for tty\n", len); @@ -1605,12 +1615,14 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen) case 2: /* Asynchronous serial with line state in each frame */ while (gsm_read_ea(&modem, *data++) == 0) { len--; + slen++; if (len == 0) return; } + slen++; tty = tty_port_tty_get(port); if (tty) { - gsm_process_modem(tty, dlci, modem, clen); + gsm_process_modem(tty, dlci, modem, slen); tty_kref_put(tty); } fallthrough; @@ -1748,7 +1760,12 @@ static void gsm_dlci_release(struct gsm_dlci *dlci) gsm_destroy_network(dlci); mutex_unlock(&dlci->mutex); - tty_hangup(tty); + /* We cannot use tty_hangup() because in tty_kref_put() the tty + * driver assumes that the hangup queue is free and reuses it to + * queue release_one_tty() -> NULL pointer panic in + * process_one_work(). + */ + tty_vhangup(tty); tty_port_tty_set(&dlci->port, NULL); tty_kref_put(tty); @@ -1800,10 +1817,10 @@ static void gsm_queue(struct gsm_mux *gsm) goto invalid; cr = gsm->address & 1; /* C/R bit */ + cr ^= gsm->initiator ? 0 : 1; /* Flip so 1 always means command */ gsm_print_packet("<--", address, cr, gsm->control, gsm->buf, gsm->len); - cr ^= 1 - gsm->initiator; /* Flip so 1 always means command */ dlci = gsm->dlci[address]; switch (gsm->control) { @@ -3234,9 +3251,9 @@ static void gsmtty_throttle(struct tty_struct *tty) if (dlci->state == DLCI_CLOSED) return; if (C_CRTSCTS(tty)) - dlci->modem_tx &= ~TIOCM_DTR; + dlci->modem_tx &= ~TIOCM_RTS; dlci->throttled = true; - /* Send an MSC with DTR cleared */ + /* Send an MSC with RTS cleared */ gsmtty_modem_update(dlci, 0); } @@ -3246,9 +3263,9 @@ static void gsmtty_unthrottle(struct tty_struct *tty) if (dlci->state == DLCI_CLOSED) return; if (C_CRTSCTS(tty)) - dlci->modem_tx |= TIOCM_DTR; + dlci->modem_tx |= TIOCM_RTS; dlci->throttled = false; - /* Send an MSC with DTR set */ + /* Send an MSC with RTS set */ gsmtty_modem_update(dlci, 0); } diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c index 3c92d4e01488..516cff362434 100644 --- a/drivers/tty/serial/max3100.c +++ b/drivers/tty/serial/max3100.c @@ -805,7 +805,7 @@ static int max3100_probe(struct spi_device *spi) return 0; } -static int max3100_remove(struct spi_device *spi) +static void max3100_remove(struct spi_device *spi) { struct max3100_port *s = spi_get_drvdata(spi); int i; @@ -828,13 +828,12 @@ static int max3100_remove(struct spi_device *spi) for (i = 0; i < MAX_MAX3100; i++) if (max3100s[i]) { mutex_unlock(&max3100s_lock); - return 0; + return; } pr_debug("removing max3100 driver\n"); uart_unregister_driver(&max3100_uart_driver); mutex_unlock(&max3100s_lock); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index dde0824b2fa5..3112b4a05448 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1487,10 +1487,9 @@ static int max310x_spi_probe(struct spi_device *spi) return max310x_probe(&spi->dev, devtype, regmap, spi->irq); } -static int max310x_spi_remove(struct spi_device *spi) +static void max310x_spi_remove(struct spi_device *spi) { max310x_remove(&spi->dev); - return 0; } static const struct spi_device_id max310x_id_table[] = { diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 64e7e6c8145f..3a6c68e19c80 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -734,12 +734,15 @@ static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) static void sc16is7xx_tx_proc(struct kthread_work *ws) { struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port); + struct sc16is7xx_port *s = dev_get_drvdata(port->dev); if ((port->rs485.flags & SER_RS485_ENABLED) && (port->rs485.delay_rts_before_send > 0)) msleep(port->rs485.delay_rts_before_send); + mutex_lock(&s->efr_lock); sc16is7xx_handle_tx(port); + mutex_unlock(&s->efr_lock); } static void sc16is7xx_reconf_rs485(struct uart_port *port) @@ -1440,11 +1443,9 @@ static int sc16is7xx_spi_probe(struct spi_device *spi) return sc16is7xx_probe(&spi->dev, devtype, regmap, spi->irq); } -static int sc16is7xx_spi_remove(struct spi_device *spi) +static void sc16is7xx_spi_remove(struct spi_device *spi) { sc16is7xx_remove(&spi->dev); - - return 0; } static const struct spi_device_id sc16is7xx_spi_id_table[] = { diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 8a63da3ab39d..88c337bf564f 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -1418,6 +1418,7 @@ void dwc2_hsotg_core_connect(struct dwc2_hsotg *hsotg); void dwc2_hsotg_disconnect(struct dwc2_hsotg *dwc2); int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode); #define dwc2_is_device_connected(hsotg) (hsotg->connected) +#define dwc2_is_device_enabled(hsotg) (hsotg->enabled) int dwc2_backup_device_registers(struct dwc2_hsotg *hsotg); int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, int remote_wakeup); int dwc2_gadget_enter_hibernation(struct dwc2_hsotg *hsotg); @@ -1454,6 +1455,7 @@ static inline int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode) { return 0; } #define dwc2_is_device_connected(hsotg) (0) +#define dwc2_is_device_enabled(hsotg) (0) static inline int dwc2_backup_device_registers(struct dwc2_hsotg *hsotg) { return 0; } static inline int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, diff --git a/drivers/usb/dwc2/drd.c b/drivers/usb/dwc2/drd.c index 1b39c4776369..d8d6493bc457 100644 --- a/drivers/usb/dwc2/drd.c +++ b/drivers/usb/dwc2/drd.c @@ -130,8 +130,10 @@ static int dwc2_drd_role_sw_set(struct usb_role_switch *sw, enum usb_role role) already = dwc2_ovr_avalid(hsotg, true); } else if (role == USB_ROLE_DEVICE) { already = dwc2_ovr_bvalid(hsotg, true); - /* This clear DCTL.SFTDISCON bit */ - dwc2_hsotg_core_connect(hsotg); + if (dwc2_is_device_enabled(hsotg)) { + /* This clear DCTL.SFTDISCON bit */ + dwc2_hsotg_core_connect(hsotg); + } } else { if (dwc2_is_device_mode(hsotg)) { if (!dwc2_ovr_bvalid(hsotg, false)) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 7ff8fc8f79a9..06d0e88ec8af 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -43,6 +43,7 @@ #define PCI_DEVICE_ID_INTEL_ADLP 0x51ee #define PCI_DEVICE_ID_INTEL_ADLM 0x54ee #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1 +#define PCI_DEVICE_ID_INTEL_RPLS 0x7a61 #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 #define PCI_DEVICE_ID_AMD_MR 0x163a @@ -85,8 +86,8 @@ static const struct acpi_gpio_mapping acpi_dwc3_byt_gpios[] = { static struct gpiod_lookup_table platform_bytcr_gpios = { .dev_id = "0000:00:16.0", .table = { - GPIO_LOOKUP("INT33FC:00", 54, "reset", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("INT33FC:02", 14, "cs", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("INT33FC:00", 54, "cs", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("INT33FC:02", 14, "reset", GPIO_ACTIVE_HIGH), {} }, }; @@ -119,6 +120,13 @@ static const struct property_entry dwc3_pci_intel_properties[] = { {} }; +static const struct property_entry dwc3_pci_intel_byt_properties[] = { + PROPERTY_ENTRY_STRING("dr_mode", "peripheral"), + PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"), + PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"), + {} +}; + static const struct property_entry dwc3_pci_mrfld_properties[] = { PROPERTY_ENTRY_STRING("dr_mode", "otg"), PROPERTY_ENTRY_STRING("linux,extcon-name", "mrfld_bcove_pwrsrc"), @@ -161,6 +169,10 @@ static const struct software_node dwc3_pci_intel_swnode = { .properties = dwc3_pci_intel_properties, }; +static const struct software_node dwc3_pci_intel_byt_swnode = { + .properties = dwc3_pci_intel_byt_properties, +}; + static const struct software_node dwc3_pci_intel_mrfld_swnode = { .properties = dwc3_pci_mrfld_properties, }; @@ -344,7 +356,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { (kernel_ulong_t) &dwc3_pci_intel_swnode, }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BYT), - (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + (kernel_ulong_t) &dwc3_pci_intel_byt_swnode, }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD), (kernel_ulong_t) &dwc3_pci_intel_mrfld_swnode, }, @@ -409,6 +421,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS), (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL), (kernel_ulong_t) &dwc3_pci_intel_swnode, }, diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 183b90923f51..a0c883f19a41 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4160,9 +4160,11 @@ static irqreturn_t dwc3_thread_interrupt(int irq, void *_evt) unsigned long flags; irqreturn_t ret = IRQ_NONE; + local_bh_disable(); spin_lock_irqsave(&dwc->lock, flags); ret = dwc3_process_event_buf(evt); spin_unlock_irqrestore(&dwc->lock, flags); + local_bh_enable(); return ret; } diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c index b7ccf1803656..00b3f6b3bb31 100644 --- a/drivers/usb/gadget/function/rndis.c +++ b/drivers/usb/gadget/function/rndis.c @@ -922,6 +922,7 @@ struct rndis_params *rndis_register(void (*resp_avail)(void *v), void *v) params->resp_avail = resp_avail; params->v = v; INIT_LIST_HEAD(¶ms->resp_queue); + spin_lock_init(¶ms->resp_lock); pr_debug("%s: configNr = %d\n", __func__, i); return params; @@ -1015,12 +1016,14 @@ void rndis_free_response(struct rndis_params *params, u8 *buf) { rndis_resp_t *r, *n; + spin_lock(¶ms->resp_lock); list_for_each_entry_safe(r, n, ¶ms->resp_queue, list) { if (r->buf == buf) { list_del(&r->list); kfree(r); } } + spin_unlock(¶ms->resp_lock); } EXPORT_SYMBOL_GPL(rndis_free_response); @@ -1030,14 +1033,17 @@ u8 *rndis_get_next_response(struct rndis_params *params, u32 *length) if (!length) return NULL; + spin_lock(¶ms->resp_lock); list_for_each_entry_safe(r, n, ¶ms->resp_queue, list) { if (!r->send) { r->send = 1; *length = r->length; + spin_unlock(¶ms->resp_lock); return r->buf; } } + spin_unlock(¶ms->resp_lock); return NULL; } EXPORT_SYMBOL_GPL(rndis_get_next_response); @@ -1054,7 +1060,9 @@ static rndis_resp_t *rndis_add_response(struct rndis_params *params, u32 length) r->length = length; r->send = 0; + spin_lock(¶ms->resp_lock); list_add_tail(&r->list, ¶ms->resp_queue); + spin_unlock(¶ms->resp_lock); return r; } diff --git a/drivers/usb/gadget/function/rndis.h b/drivers/usb/gadget/function/rndis.h index f6167f7fea82..6206b8b7490f 100644 --- a/drivers/usb/gadget/function/rndis.h +++ b/drivers/usb/gadget/function/rndis.h @@ -174,6 +174,7 @@ typedef struct rndis_params { void (*resp_avail)(void *v); void *v; struct list_head resp_queue; + spinlock_t resp_lock; } rndis_params; /* RNDIS Message parser and other useless functions */ diff --git a/drivers/usb/gadget/udc/max3420_udc.c b/drivers/usb/gadget/udc/max3420_udc.c index d2a2b20cc1ad..7d9bd16190c0 100644 --- a/drivers/usb/gadget/udc/max3420_udc.c +++ b/drivers/usb/gadget/udc/max3420_udc.c @@ -1292,7 +1292,7 @@ del_gadget: return err; } -static int max3420_remove(struct spi_device *spi) +static void max3420_remove(struct spi_device *spi) { struct max3420_udc *udc = spi_get_drvdata(spi); unsigned long flags; @@ -1304,8 +1304,6 @@ static int max3420_remove(struct spi_device *spi) kthread_stop(udc->thread_task); spin_unlock_irqrestore(&udc->lock, flags); - - return 0; } static const struct of_device_id max3420_udc_of_match[] = { diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c index 6ce886fb7bfe..2907fad04e2c 100644 --- a/drivers/usb/gadget/udc/udc-xilinx.c +++ b/drivers/usb/gadget/udc/udc-xilinx.c @@ -1615,6 +1615,8 @@ static void xudc_getstatus(struct xusb_udc *udc) break; case USB_RECIP_ENDPOINT: epnum = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK; + if (epnum >= XUSB_MAX_ENDPOINTS) + goto stall; target_ep = &udc->ep[epnum]; epcfgreg = udc->read_fn(udc->addr + target_ep->offset); halt = epcfgreg & XUSB_EP_CFG_STALL_MASK; @@ -1682,6 +1684,10 @@ static void xudc_set_clear_feature(struct xusb_udc *udc) case USB_RECIP_ENDPOINT: if (!udc->setup.wValue) { endpoint = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK; + if (endpoint >= XUSB_MAX_ENDPOINTS) { + xudc_ep0_stall(udc); + return; + } target_ep = &udc->ep[endpoint]; outinbit = udc->setup.wIndex & USB_ENDPOINT_DIR_MASK; outinbit = outinbit >> 7; diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index 30de85a707fe..99a5523a79fb 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -1926,7 +1926,7 @@ error: return retval; } -static int +static void max3421_remove(struct spi_device *spi) { struct max3421_hcd *max3421_hcd; @@ -1947,7 +1947,6 @@ max3421_remove(struct spi_device *spi) free_irq(spi->irq, hcd); usb_put_hcd(hcd); - return 0; } static const struct of_device_id max3421_of_match_table[] = { diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index dc357cabb265..2d378543bc3a 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1091,6 +1091,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) int retval = 0; bool comp_timer_running = false; bool pending_portevent = false; + bool reinit_xhc = false; if (!hcd->state) return 0; @@ -1107,10 +1108,11 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) set_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags); spin_lock_irq(&xhci->lock); - if ((xhci->quirks & XHCI_RESET_ON_RESUME) || xhci->broken_suspend) - hibernated = true; - if (!hibernated) { + if (hibernated || xhci->quirks & XHCI_RESET_ON_RESUME || xhci->broken_suspend) + reinit_xhc = true; + + if (!reinit_xhc) { /* * Some controllers might lose power during suspend, so wait * for controller not ready bit to clear, just as in xHC init. @@ -1143,12 +1145,17 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; } - temp = readl(&xhci->op_regs->status); } - /* If restore operation fails, re-initialize the HC during resume */ - if ((temp & STS_SRE) || hibernated) { + temp = readl(&xhci->op_regs->status); + /* re-initialize the HC on Restore Error, or Host Controller Error */ + if (temp & (STS_SRE | STS_HCE)) { + reinit_xhc = true; + xhci_warn(xhci, "xHC error in resume, USBSTS 0x%x, Reinit\n", temp); + } + + if (reinit_xhc) { if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) && !(xhci_all_ports_seen_u0(xhci))) { del_timer_sync(&xhci->comp_mode_recovery_timer); @@ -1604,9 +1611,12 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag struct urb_priv *urb_priv; int num_tds; - if (!urb || xhci_check_args(hcd, urb->dev, urb->ep, - true, true, __func__) <= 0) + if (!urb) return -EINVAL; + ret = xhci_check_args(hcd, urb->dev, urb->ep, + true, true, __func__); + if (ret <= 0) + return ret ? ret : -EINVAL; slot_id = urb->dev->slot_id; ep_index = xhci_get_endpoint_index(&urb->ep->desc); @@ -3323,7 +3333,7 @@ static int xhci_check_streams_endpoint(struct xhci_hcd *xhci, return -EINVAL; ret = xhci_check_args(xhci_to_hcd(xhci), udev, ep, 1, true, __func__); if (ret <= 0) - return -EINVAL; + return ret ? ret : -EINVAL; if (usb_ss_max_streams(&ep->ss_ep_comp) == 0) { xhci_warn(xhci, "WARN: SuperSpeed Endpoint Companion" " descriptor for ep 0x%x does not support streams\n", diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 58cba8ee0277..2798fca71261 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -81,7 +81,6 @@ #define CH341_QUIRK_SIMULATE_BREAK BIT(1) static const struct usb_device_id id_table[] = { - { USB_DEVICE(0x1a86, 0x5512) }, { USB_DEVICE(0x1a86, 0x5523) }, { USB_DEVICE(0x1a86, 0x7522) }, { USB_DEVICE(0x1a86, 0x7523) }, diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 962e9943fc20..e7755d9cfc61 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -198,6 +198,8 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5821E 0x81d7 #define DELL_PRODUCT_5821E_ESIM 0x81e0 +#define DELL_PRODUCT_5829E_ESIM 0x81e4 +#define DELL_PRODUCT_5829E 0x81e6 #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da @@ -1063,6 +1065,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E_ESIM), .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E), + .driver_info = RSVD(0) | RSVD(6) }, + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E_ESIM), + .driver_info = RSVD(0) | RSVD(6) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, @@ -1273,10 +1279,16 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff), /* Telit LE910-S1 (ECM) */ .driver_info = NCTRL(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701a, 0xff), /* Telit LE910R1 (RNDIS) */ + .driver_info = NCTRL(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701b, 0xff), /* Telit LE910R1 (ECM) */ + .driver_info = NCTRL(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */ .driver_info = NCTRL(0) | ZLP }, { USB_DEVICE(TELIT_VENDOR_ID, 0x9200), /* Telit LE910S1 flashing device */ .driver_info = NCTRL(0) | ZLP }, + { USB_DEVICE(TELIT_VENDOR_ID, 0x9201), /* Telit LE910R1 flashing device */ + .driver_info = NCTRL(0) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff), .driver_info = RSVD(1) }, diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 6d27a5b5e3ca..7ffcda94d323 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -761,12 +761,12 @@ static int tps6598x_probe(struct i2c_client *client) ret = tps6598x_read32(tps, TPS_REG_STATUS, &status); if (ret < 0) - return ret; + goto err_clear_mask; trace_tps6598x_status(status); ret = tps6598x_read32(tps, TPS_REG_SYSTEM_CONF, &conf); if (ret < 0) - return ret; + goto err_clear_mask; /* * This fwnode has a "compatible" property, but is never populated as a @@ -855,7 +855,8 @@ err_role_put: usb_role_switch_put(tps->role_sw); err_fwnode_put: fwnode_handle_put(fwnode); - +err_clear_mask: + tps6598x_write64(tps, TPS_REG_INT_MASK1, 0); return ret; } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 28ef323882fb..792ab5f23647 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -473,6 +473,7 @@ static void vhost_tx_batch(struct vhost_net *net, goto signal_used; msghdr->msg_control = &ctl; + msghdr->msg_controllen = sizeof(ctl); err = sock->ops->sendmsg(sock, msghdr, 0); if (unlikely(err < 0)) { vq_err(&nvq->vq, "Fail to batch sending packets\n"); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index d6ca1c7ad513..37f0b4274113 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -629,16 +629,18 @@ err: return ret; } -static int vhost_vsock_stop(struct vhost_vsock *vsock) +static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner) { size_t i; - int ret; + int ret = 0; mutex_lock(&vsock->dev.mutex); - ret = vhost_dev_check_owner(&vsock->dev); - if (ret) - goto err; + if (check_owner) { + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + } for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { struct vhost_virtqueue *vq = &vsock->vqs[i]; @@ -753,7 +755,12 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) * inefficient. Room for improvement here. */ vsock_for_each_connected_socket(vhost_vsock_reset_orphans); - vhost_vsock_stop(vsock); + /* Don't check the owner, because we are in the release path, so we + * need to stop the vsock device in any case. + * vhost_vsock_stop() can not fail in this case, so we don't need to + * check the return code. + */ + vhost_vsock_stop(vsock, false); vhost_vsock_flush(vsock); vhost_dev_stop(&vsock->dev); @@ -868,7 +875,7 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, if (start) return vhost_vsock_start(vsock); else - return vhost_vsock_stop(vsock); + return vhost_vsock_stop(vsock, true); case VHOST_GET_FEATURES: features = VHOST_VSOCK_FEATURES; if (copy_to_user(argp, &features, sizeof(features))) diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c index 8a4361e95a11..522dd81110b8 100644 --- a/drivers/video/backlight/ams369fg06.c +++ b/drivers/video/backlight/ams369fg06.c @@ -506,12 +506,11 @@ static int ams369fg06_probe(struct spi_device *spi) return 0; } -static int ams369fg06_remove(struct spi_device *spi) +static void ams369fg06_remove(struct spi_device *spi) { struct ams369fg06 *lcd = spi_get_drvdata(spi); ams369fg06_power(lcd, FB_BLANK_POWERDOWN); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c index 33f5d80495e6..0a57033ae31d 100644 --- a/drivers/video/backlight/corgi_lcd.c +++ b/drivers/video/backlight/corgi_lcd.c @@ -542,7 +542,7 @@ static int corgi_lcd_probe(struct spi_device *spi) return 0; } -static int corgi_lcd_remove(struct spi_device *spi) +static void corgi_lcd_remove(struct spi_device *spi) { struct corgi_lcd *lcd = spi_get_drvdata(spi); @@ -550,7 +550,6 @@ static int corgi_lcd_remove(struct spi_device *spi) lcd->bl_dev->props.brightness = 0; backlight_update_status(lcd->bl_dev); corgi_lcd_set_power(lcd->lcd_dev, FB_BLANK_POWERDOWN); - return 0; } static struct spi_driver corgi_lcd_driver = { diff --git a/drivers/video/backlight/ili922x.c b/drivers/video/backlight/ili922x.c index 328aba9cddad..e7b6bd827986 100644 --- a/drivers/video/backlight/ili922x.c +++ b/drivers/video/backlight/ili922x.c @@ -526,10 +526,9 @@ static int ili922x_probe(struct spi_device *spi) return 0; } -static int ili922x_remove(struct spi_device *spi) +static void ili922x_remove(struct spi_device *spi) { ili922x_poweroff(spi); - return 0; } static struct spi_driver ili922x_driver = { diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c index 46f97d1c3d21..cc763cf15f53 100644 --- a/drivers/video/backlight/l4f00242t03.c +++ b/drivers/video/backlight/l4f00242t03.c @@ -223,12 +223,11 @@ static int l4f00242t03_probe(struct spi_device *spi) return 0; } -static int l4f00242t03_remove(struct spi_device *spi) +static void l4f00242t03_remove(struct spi_device *spi) { struct l4f00242t03_priv *priv = spi_get_drvdata(spi); l4f00242t03_lcd_power_set(priv->ld, FB_BLANK_POWERDOWN); - return 0; } static void l4f00242t03_shutdown(struct spi_device *spi) diff --git a/drivers/video/backlight/lms501kf03.c b/drivers/video/backlight/lms501kf03.c index f949b66dce1b..5c46df8022bf 100644 --- a/drivers/video/backlight/lms501kf03.c +++ b/drivers/video/backlight/lms501kf03.c @@ -364,12 +364,11 @@ static int lms501kf03_probe(struct spi_device *spi) return 0; } -static int lms501kf03_remove(struct spi_device *spi) +static void lms501kf03_remove(struct spi_device *spi) { struct lms501kf03 *lcd = spi_get_drvdata(spi); lms501kf03_power(lcd, FB_BLANK_POWERDOWN); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c index 5cbf621e48bd..b6d373af6e3f 100644 --- a/drivers/video/backlight/ltv350qv.c +++ b/drivers/video/backlight/ltv350qv.c @@ -255,12 +255,11 @@ static int ltv350qv_probe(struct spi_device *spi) return 0; } -static int ltv350qv_remove(struct spi_device *spi) +static void ltv350qv_remove(struct spi_device *spi) { struct ltv350qv *lcd = spi_get_drvdata(spi); ltv350qv_power(lcd, FB_BLANK_POWERDOWN); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c index 0de044dcafd5..fc6fbaf85594 100644 --- a/drivers/video/backlight/tdo24m.c +++ b/drivers/video/backlight/tdo24m.c @@ -397,12 +397,11 @@ static int tdo24m_probe(struct spi_device *spi) return 0; } -static int tdo24m_remove(struct spi_device *spi) +static void tdo24m_remove(struct spi_device *spi) { struct tdo24m *lcd = spi_get_drvdata(spi); tdo24m_power(lcd, FB_BLANK_POWERDOWN); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c index 38765544345b..23d6c6bf0f54 100644 --- a/drivers/video/backlight/tosa_lcd.c +++ b/drivers/video/backlight/tosa_lcd.c @@ -232,15 +232,13 @@ err_register: return ret; } -static int tosa_lcd_remove(struct spi_device *spi) +static void tosa_lcd_remove(struct spi_device *spi) { struct tosa_lcd_data *data = spi_get_drvdata(spi); i2c_unregister_device(data->i2c); tosa_lcd_tg_off(data); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/backlight/vgg2432a4.c b/drivers/video/backlight/vgg2432a4.c index 3567b45f9ba9..bfc1913e8b55 100644 --- a/drivers/video/backlight/vgg2432a4.c +++ b/drivers/video/backlight/vgg2432a4.c @@ -233,11 +233,9 @@ static int vgg2432a4_probe(struct spi_device *spi) return 0; } -static int vgg2432a4_remove(struct spi_device *spi) +static void vgg2432a4_remove(struct spi_device *spi) { ili9320_remove(spi_get_drvdata(spi)); - - return 0; } static void vgg2432a4_shutdown(struct spi_device *spi) diff --git a/drivers/video/fbdev/omap/lcd_mipid.c b/drivers/video/fbdev/omap/lcd_mipid.c index a75ae0c9b14c..03cff39d392d 100644 --- a/drivers/video/fbdev/omap/lcd_mipid.c +++ b/drivers/video/fbdev/omap/lcd_mipid.c @@ -570,14 +570,12 @@ static int mipid_spi_probe(struct spi_device *spi) return 0; } -static int mipid_spi_remove(struct spi_device *spi) +static void mipid_spi_remove(struct spi_device *spi) { struct mipid_device *md = dev_get_drvdata(&spi->dev); mipid_disable(&md->panel); kfree(md); - - return 0; } static struct spi_driver mipid_spi_driver = { diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c index 1bec7a4422e8..aab67721263d 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c @@ -316,7 +316,7 @@ err_gpio: return r; } -static int lb035q02_panel_spi_remove(struct spi_device *spi) +static void lb035q02_panel_spi_remove(struct spi_device *spi) { struct panel_drv_data *ddata = spi_get_drvdata(spi); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -328,8 +328,6 @@ static int lb035q02_panel_spi_remove(struct spi_device *spi) lb035q02_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id lb035q02_of_match[] = { diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c index dff9ebbadfc0..be9910ff6e62 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c @@ -327,7 +327,7 @@ err_gpio: return r; } -static int nec_8048_remove(struct spi_device *spi) +static void nec_8048_remove(struct spi_device *spi) { struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -341,8 +341,6 @@ static int nec_8048_remove(struct spi_device *spi) nec_8048_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c index 8d8b5ff7d43c..a909b5385ca5 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c @@ -857,7 +857,7 @@ err_gpio: return r; } -static int acx565akm_remove(struct spi_device *spi) +static void acx565akm_remove(struct spi_device *spi) { struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -874,8 +874,6 @@ static int acx565akm_remove(struct spi_device *spi) acx565akm_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id acx565akm_of_match[] = { diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c index 595ebd8bd5dc..3c0f887d3092 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c @@ -425,7 +425,7 @@ err_reg: return r; } -static int td028ttec1_panel_remove(struct spi_device *spi) +static void td028ttec1_panel_remove(struct spi_device *spi) { struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -439,8 +439,6 @@ static int td028ttec1_panel_remove(struct spi_device *spi) td028ttec1_panel_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id td028ttec1_of_match[] = { diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c index afac1d9445aa..58bbba7c037f 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c @@ -564,7 +564,7 @@ err_regulator: return r; } -static int tpo_td043_remove(struct spi_device *spi) +static void tpo_td043_remove(struct spi_device *spi) { struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -580,8 +580,6 @@ static int tpo_td043_remove(struct spi_device *spi) omap_dss_put_device(in); sysfs_remove_group(&spi->dev.kobj, &tpo_td043_attr_group); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9e11e6f13e83..d61543fbd652 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1135,14 +1135,25 @@ out_free_interp: * is then page aligned. */ load_bias = ELF_PAGESTART(load_bias - vaddr); - } - /* - * Calculate the entire size of the ELF mapping (total_size). - * (Note that load_addr_set is set to true later once the - * initial mapping is performed.) - */ - if (!load_addr_set) { + /* + * Calculate the entire size of the ELF mapping + * (total_size), used for the initial mapping, + * due to load_addr_set which is set to true later + * once the initial mapping is performed. + * + * Note that this is only sensible when the LOAD + * segments are contiguous (or overlapping). If + * used for LOADs that are far apart, this would + * cause the holes between LOADs to be mapped, + * running the risk of having the mapping fail, + * as it would be larger than the ELF file itself. + * + * As a result, only ET_DYN does this, since + * some ET_EXEC (e.g. ia64) may have large virtual + * memory holes between LOADs. + * + */ total_size = total_mapping_size(elf_phdata, elf_ex->e_phnum); if (!total_size) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8992e0096163..947f04789389 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3291,7 +3291,7 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info, int __init btrfs_auto_defrag_init(void); void __cold btrfs_auto_defrag_exit(void); int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, - struct btrfs_inode *inode); + struct btrfs_inode *inode, u32 extent_thresh); int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 5a36add21305..c28ceddefae4 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -261,6 +261,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; em->mod_start = merge->mod_start; em->generation = max(em->generation, merge->generation); + set_bit(EXTENT_FLAG_MERGED, &em->flags); rb_erase_cached(&merge->rb_node, &tree->map); RB_CLEAR_NODE(&merge->rb_node); @@ -278,6 +279,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) RB_CLEAR_NODE(&merge->rb_node); em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; em->generation = max(em->generation, merge->generation); + set_bit(EXTENT_FLAG_MERGED, &em->flags); free_extent_map(merge); } } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 8e217337dff9..d2fa32ffe304 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -25,6 +25,8 @@ enum { EXTENT_FLAG_FILLING, /* filesystem extent mapping type */ EXTENT_FLAG_FS_MAPPING, + /* This em is merged from two or more physically adjacent ems */ + EXTENT_FLAG_MERGED, }; struct extent_map { @@ -40,6 +42,12 @@ struct extent_map { u64 ram_bytes; u64 block_start; u64 block_len; + + /* + * Generation of the extent map, for merged em it's the highest + * generation of all merged ems. + * For non-merged extents, it's from btrfs_file_extent_item::generation. + */ u64 generation; unsigned long flags; /* Used for chunk mappings, flag EXTENT_FLAG_FS_MAPPING must be set */ diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 11204dbbe053..a0179cc62913 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -50,11 +50,14 @@ struct inode_defrag { /* root objectid */ u64 root; - /* last offset we were able to defrag */ - u64 last_offset; - - /* if we've wrapped around back to zero once already */ - int cycled; + /* + * The extent size threshold for autodefrag. + * + * This value is different for compressed/non-compressed extents, + * thus needs to be passed from higher layer. + * (aka, inode_should_defrag()) + */ + u32 extent_thresh; }; static int __compare_inode_defrag(struct inode_defrag *defrag1, @@ -107,8 +110,8 @@ static int __btrfs_add_inode_defrag(struct btrfs_inode *inode, */ if (defrag->transid < entry->transid) entry->transid = defrag->transid; - if (defrag->last_offset > entry->last_offset) - entry->last_offset = defrag->last_offset; + entry->extent_thresh = min(defrag->extent_thresh, + entry->extent_thresh); return -EEXIST; } } @@ -134,7 +137,7 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info) * enabled */ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, - struct btrfs_inode *inode) + struct btrfs_inode *inode, u32 extent_thresh) { struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; @@ -160,6 +163,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, defrag->ino = btrfs_ino(inode); defrag->transid = transid; defrag->root = root->root_key.objectid; + defrag->extent_thresh = extent_thresh; spin_lock(&fs_info->defrag_inodes_lock); if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) { @@ -179,34 +183,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, } /* - * Requeue the defrag object. If there is a defrag object that points to - * the same inode in the tree, we will merge them together (by - * __btrfs_add_inode_defrag()) and free the one that we want to requeue. - */ -static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode, - struct inode_defrag *defrag) -{ - struct btrfs_fs_info *fs_info = inode->root->fs_info; - int ret; - - if (!__need_auto_defrag(fs_info)) - goto out; - - /* - * Here we don't check the IN_DEFRAG flag, because we need merge - * them together. - */ - spin_lock(&fs_info->defrag_inodes_lock); - ret = __btrfs_add_inode_defrag(inode, defrag); - spin_unlock(&fs_info->defrag_inodes_lock); - if (ret) - goto out; - return; -out: - kmem_cache_free(btrfs_inode_defrag_cachep, defrag); -} - -/* * pick the defragable inode that we want, if it doesn't exist, we will get * the next one. */ @@ -278,8 +254,14 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, struct btrfs_root *inode_root; struct inode *inode; struct btrfs_ioctl_defrag_range_args range; - int num_defrag; - int ret; + int ret = 0; + u64 cur = 0; + +again: + if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)) + goto cleanup; + if (!__need_auto_defrag(fs_info)) + goto cleanup; /* get the inode */ inode_root = btrfs_get_fs_root(fs_info, defrag->root, true); @@ -295,39 +277,30 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, goto cleanup; } + if (cur >= i_size_read(inode)) { + iput(inode); + goto cleanup; + } + /* do a chunk of defrag */ clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); memset(&range, 0, sizeof(range)); range.len = (u64)-1; - range.start = defrag->last_offset; + range.start = cur; + range.extent_thresh = defrag->extent_thresh; sb_start_write(fs_info->sb); - num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, + ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid, BTRFS_DEFRAG_BATCH); sb_end_write(fs_info->sb); - /* - * if we filled the whole defrag batch, there - * must be more work to do. Queue this defrag - * again - */ - if (num_defrag == BTRFS_DEFRAG_BATCH) { - defrag->last_offset = range.start; - btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag); - } else if (defrag->last_offset && !defrag->cycled) { - /* - * we didn't fill our defrag batch, but - * we didn't start at zero. Make sure we loop - * around to the start of the file. - */ - defrag->last_offset = 0; - defrag->cycled = 1; - btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag); - } else { - kmem_cache_free(btrfs_inode_defrag_cachep, defrag); - } - iput(inode); - return 0; + + if (ret < 0) + goto cleanup; + + cur = max(cur + fs_info->sectorsize, range.start); + goto again; + cleanup: kmem_cache_free(btrfs_inode_defrag_cachep, defrag); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3b2403b6127f..76e530f76e3c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -560,12 +560,12 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start, } static inline void inode_should_defrag(struct btrfs_inode *inode, - u64 start, u64 end, u64 num_bytes, u64 small_write) + u64 start, u64 end, u64 num_bytes, u32 small_write) { /* If this is a small write inside eof, kick off a defrag */ if (num_bytes < small_write && (start > 0 || end + 1 < inode->disk_i_size)) - btrfs_add_inode_defrag(NULL, inode); + btrfs_add_inode_defrag(NULL, inode, small_write); } /* diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 927771d1853f..8d47ec5fc4f4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1012,8 +1012,155 @@ out: return ret; } +/* + * Defrag specific helper to get an extent map. + * + * Differences between this and btrfs_get_extent() are: + * + * - No extent_map will be added to inode->extent_tree + * To reduce memory usage in the long run. + * + * - Extra optimization to skip file extents older than @newer_than + * By using btrfs_search_forward() we can skip entire file ranges that + * have extents created in past transactions, because btrfs_search_forward() + * will not visit leaves and nodes with a generation smaller than given + * minimal generation threshold (@newer_than). + * + * Return valid em if we find a file extent matching the requirement. + * Return NULL if we can not find a file extent matching the requirement. + * + * Return ERR_PTR() for error. + */ +static struct extent_map *defrag_get_extent(struct btrfs_inode *inode, + u64 start, u64 newer_than) +{ + struct btrfs_root *root = inode->root; + struct btrfs_file_extent_item *fi; + struct btrfs_path path = { 0 }; + struct extent_map *em; + struct btrfs_key key; + u64 ino = btrfs_ino(inode); + int ret; + + em = alloc_extent_map(); + if (!em) { + ret = -ENOMEM; + goto err; + } + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = start; + + if (newer_than) { + ret = btrfs_search_forward(root, &key, &path, newer_than); + if (ret < 0) + goto err; + /* Can't find anything newer */ + if (ret > 0) + goto not_found; + } else { + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto err; + } + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + /* + * If btrfs_search_slot() makes path to point beyond nritems, + * we should not have an empty leaf, as this inode must at + * least have its INODE_ITEM. + */ + ASSERT(btrfs_header_nritems(path.nodes[0])); + path.slots[0] = btrfs_header_nritems(path.nodes[0]) - 1; + } + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + /* Perfect match, no need to go one slot back */ + if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY && + key.offset == start) + goto iterate; + + /* We didn't find a perfect match, needs to go one slot back */ + if (path.slots[0] > 0) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY) + path.slots[0]--; + } + +iterate: + /* Iterate through the path to find a file extent covering @start */ + while (true) { + u64 extent_end; + + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) + goto next; + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + + /* + * We may go one slot back to INODE_REF/XATTR item, then + * need to go forward until we reach an EXTENT_DATA. + * But we should still has the correct ino as key.objectid. + */ + if (WARN_ON(key.objectid < ino) || key.type < BTRFS_EXTENT_DATA_KEY) + goto next; + + /* It's beyond our target range, definitely not extent found */ + if (key.objectid > ino || key.type > BTRFS_EXTENT_DATA_KEY) + goto not_found; + + /* + * | |<- File extent ->| + * \- start + * + * This means there is a hole between start and key.offset. + */ + if (key.offset > start) { + em->start = start; + em->orig_start = start; + em->block_start = EXTENT_MAP_HOLE; + em->len = key.offset - start; + break; + } + + fi = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_file_extent_item); + extent_end = btrfs_file_extent_end(&path); + + /* + * |<- file extent ->| | + * \- start + * + * We haven't reached start, search next slot. + */ + if (extent_end <= start) + goto next; + + /* Now this extent covers @start, convert it to em */ + btrfs_extent_item_to_extent_map(inode, &path, fi, false, em); + break; +next: + ret = btrfs_next_item(root, &path); + if (ret < 0) + goto err; + if (ret > 0) + goto not_found; + } + btrfs_release_path(&path); + return em; + +not_found: + btrfs_release_path(&path); + free_extent_map(em); + return NULL; + +err: + btrfs_release_path(&path); + free_extent_map(em); + return ERR_PTR(ret); +} + static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start, - bool locked) + u64 newer_than, bool locked) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; @@ -1028,6 +1175,20 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start, em = lookup_extent_mapping(em_tree, start, sectorsize); read_unlock(&em_tree->lock); + /* + * We can get a merged extent, in that case, we need to re-search + * tree to get the original em for defrag. + * + * If @newer_than is 0 or em::generation < newer_than, we can trust + * this em, as either we don't care about the generation, or the + * merged extent map will be rejected anyway. + */ + if (em && test_bit(EXTENT_FLAG_MERGED, &em->flags) && + newer_than && em->generation >= newer_than) { + free_extent_map(em); + em = NULL; + } + if (!em) { struct extent_state *cached = NULL; u64 end = start + sectorsize - 1; @@ -1035,7 +1196,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start, /* get the big lock and read metadata off disk */ if (!locked) lock_extent_bits(io_tree, start, end, &cached); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, sectorsize); + em = defrag_get_extent(BTRFS_I(inode), start, newer_than); if (!locked) unlock_extent_cached(io_tree, start, end, &cached); @@ -1046,23 +1207,42 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start, return em; } +static u32 get_extent_max_capacity(const struct extent_map *em) +{ + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) + return BTRFS_MAX_COMPRESSED; + return BTRFS_MAX_EXTENT_SIZE; +} + static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, bool locked) { struct extent_map *next; - bool ret = true; + bool ret = false; /* this is the last extent */ if (em->start + em->len >= i_size_read(inode)) return false; - next = defrag_lookup_extent(inode, em->start + em->len, locked); + /* + * We want to check if the next extent can be merged with the current + * one, which can be an extent created in a past generation, so we pass + * a minimum generation of 0 to defrag_lookup_extent(). + */ + next = defrag_lookup_extent(inode, em->start + em->len, 0, locked); + /* No more em or hole */ if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) - ret = false; - else if ((em->block_start + em->block_len == next->block_start) && - (em->block_len > SZ_128K && next->block_len > SZ_128K)) - ret = false; - + goto out; + if (test_bit(EXTENT_FLAG_PREALLOC, &next->flags)) + goto out; + /* + * If the next extent is at its max capacity, defragging current extent + * makes no sense, as the total number of extents won't change. + */ + if (next->len >= get_extent_max_capacity(em)) + goto out; + ret = true; +out: free_extent_map(next); return ret; } @@ -1186,8 +1366,10 @@ struct defrag_target_range { static int defrag_collect_targets(struct btrfs_inode *inode, u64 start, u64 len, u32 extent_thresh, u64 newer_than, bool do_compress, - bool locked, struct list_head *target_list) + bool locked, struct list_head *target_list, + u64 *last_scanned_ret) { + bool last_is_target = false; u64 cur = start; int ret = 0; @@ -1197,7 +1379,9 @@ static int defrag_collect_targets(struct btrfs_inode *inode, bool next_mergeable = true; u64 range_len; - em = defrag_lookup_extent(&inode->vfs_inode, cur, locked); + last_is_target = false; + em = defrag_lookup_extent(&inode->vfs_inode, cur, + newer_than, locked); if (!em) break; @@ -1254,6 +1438,13 @@ static int defrag_collect_targets(struct btrfs_inode *inode, if (range_len >= extent_thresh) goto next; + /* + * Skip extents already at its max capacity, this is mostly for + * compressed extents, which max cap is only 128K. + */ + if (em->len >= get_extent_max_capacity(em)) + goto next; + next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, locked); if (!next_mergeable) { @@ -1272,6 +1463,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, } add: + last_is_target = true; range_len = min(extent_map_end(em), start + len) - cur; /* * This one is a good target, check if it can be merged into @@ -1315,6 +1507,17 @@ next: kfree(entry); } } + if (!ret && last_scanned_ret) { + /* + * If the last extent is not a target, the caller can skip to + * the end of that extent. + * Otherwise, we can only go the end of the specified range. + */ + if (!last_is_target) + *last_scanned_ret = max(cur, *last_scanned_ret); + else + *last_scanned_ret = max(start + len, *last_scanned_ret); + } return ret; } @@ -1373,7 +1576,8 @@ static int defrag_one_locked_target(struct btrfs_inode *inode, } static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len, - u32 extent_thresh, u64 newer_than, bool do_compress) + u32 extent_thresh, u64 newer_than, bool do_compress, + u64 *last_scanned_ret) { struct extent_state *cached_state = NULL; struct defrag_target_range *entry; @@ -1419,7 +1623,7 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len, */ ret = defrag_collect_targets(inode, start, len, extent_thresh, newer_than, do_compress, true, - &target_list); + &target_list, last_scanned_ret); if (ret < 0) goto unlock_extent; @@ -1454,7 +1658,8 @@ static int defrag_one_cluster(struct btrfs_inode *inode, u64 start, u32 len, u32 extent_thresh, u64 newer_than, bool do_compress, unsigned long *sectors_defragged, - unsigned long max_sectors) + unsigned long max_sectors, + u64 *last_scanned_ret) { const u32 sectorsize = inode->root->fs_info->sectorsize; struct defrag_target_range *entry; @@ -1465,7 +1670,7 @@ static int defrag_one_cluster(struct btrfs_inode *inode, BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE)); ret = defrag_collect_targets(inode, start, len, extent_thresh, newer_than, do_compress, false, - &target_list); + &target_list, NULL); if (ret < 0) goto out; @@ -1482,6 +1687,15 @@ static int defrag_one_cluster(struct btrfs_inode *inode, range_len = min_t(u32, range_len, (max_sectors - *sectors_defragged) * sectorsize); + /* + * If defrag_one_range() has updated last_scanned_ret, + * our range may already be invalid (e.g. hole punched). + * Skip if our range is before last_scanned_ret, as there is + * no need to defrag the range anymore. + */ + if (entry->start + range_len <= *last_scanned_ret) + continue; + if (ra) page_cache_sync_readahead(inode->vfs_inode.i_mapping, ra, NULL, entry->start >> PAGE_SHIFT, @@ -1494,7 +1708,8 @@ static int defrag_one_cluster(struct btrfs_inode *inode, * accounting. */ ret = defrag_one_range(inode, entry->start, range_len, - extent_thresh, newer_than, do_compress); + extent_thresh, newer_than, do_compress, + last_scanned_ret); if (ret < 0) break; *sectors_defragged += range_len >> @@ -1505,6 +1720,8 @@ out: list_del_init(&entry->list); kfree(entry); } + if (ret >= 0) + *last_scanned_ret = max(*last_scanned_ret, start + len); return ret; } @@ -1590,6 +1807,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, while (cur < last_byte) { const unsigned long prev_sectors_defragged = sectors_defragged; + u64 last_scanned = cur; u64 cluster_end; /* The cluster size 256K should always be page aligned */ @@ -1619,8 +1837,8 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, BTRFS_I(inode)->defrag_compress = compress_type; ret = defrag_one_cluster(BTRFS_I(inode), ra, cur, cluster_end + 1 - cur, extent_thresh, - newer_than, do_compress, - §ors_defragged, max_to_defrag); + newer_than, do_compress, §ors_defragged, + max_to_defrag, &last_scanned); if (sectors_defragged > prev_sectors_defragged) balance_dirty_pages_ratelimited(inode->i_mapping); @@ -1628,7 +1846,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, btrfs_inode_unlock(inode, 0); if (ret < 0) break; - cur = cluster_end + 1; + cur = max(cluster_end + 1, last_scanned); if (ret > 0) { ret = 0; break; diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 0fb90cbe7669..e6e28a9c7987 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -380,6 +380,17 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) kunmap(cur_page); cur_in += LZO_LEN; + if (seg_len > lzo1x_worst_compress(PAGE_SIZE)) { + /* + * seg_len shouldn't be larger than we have allocated + * for workspace->cbuf + */ + btrfs_err(fs_info, "unexpectedly large lzo segment len %u", + seg_len); + ret = -EIO; + goto out; + } + /* Copy the compressed segment payload into workspace */ copy_compressed_segment(cb, workspace->cbuf, seg_len, &cur_in); diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index ee3aab3dd4ac..bf861fef2f0c 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -949,6 +949,9 @@ static void populate_new_aces(char *nacl_base, pnntace = (struct cifs_ace *) (nacl_base + nsize); nsize += setup_special_mode_ACE(pnntace, nmode); num_aces++; + pnntace = (struct cifs_ace *) (nacl_base + nsize); + nsize += setup_authusers_ACE(pnntace); + num_aces++; goto set_size; } @@ -1297,7 +1300,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, if (uid_valid(uid)) { /* chown */ uid_t id; - nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid), + nowner_sid_ptr = kzalloc(sizeof(struct cifs_sid), GFP_KERNEL); if (!nowner_sid_ptr) { rc = -ENOMEM; @@ -1326,7 +1329,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, } if (gid_valid(gid)) { /* chgrp */ gid_t id; - ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid), + ngroup_sid_ptr = kzalloc(sizeof(struct cifs_sid), GFP_KERNEL); if (!ngroup_sid_ptr) { rc = -ENOMEM; @@ -1613,7 +1616,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, nsecdesclen = secdesclen; if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */ if (mode_from_sid) - nsecdesclen += sizeof(struct cifs_ace); + nsecdesclen += 2 * sizeof(struct cifs_ace); else /* cifsacl */ nsecdesclen += 5 * sizeof(struct cifs_ace); } else { /* chown */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 199edac0cb59..082c21478686 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -919,6 +919,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, out_super: deactivate_locked_super(sb); + return root; out: if (cifs_sb) { kfree(cifs_sb->prepath); diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 7ec35f3f0a5f..a92e9eec521f 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -149,7 +149,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_u32("echo_interval", Opt_echo_interval), fsparam_u32("max_credits", Opt_max_credits), fsparam_u32("handletimeout", Opt_handletimeout), - fsparam_u32("snapshot", Opt_snapshot), + fsparam_u64("snapshot", Opt_snapshot), fsparam_u32("max_channels", Opt_max_channels), /* Mount options which take string value */ @@ -1078,7 +1078,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->echo_interval = result.uint_32; break; case Opt_snapshot: - ctx->snapshot_time = result.uint_32; + ctx->snapshot_time = result.uint_64; break; case Opt_max_credits: if (result.uint_32 < 20 || result.uint_32 > 60000) { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 5723d50340e5..32f478c7a66d 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -127,11 +127,6 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) struct cifs_server_iface *ifaces = NULL; size_t iface_count; - if (ses->server->dialect < SMB30_PROT_ID) { - cifs_dbg(VFS, "multichannel is not supported on this protocol version, use 3.0 or above\n"); - return 0; - } - spin_lock(&ses->chan_lock); new_chan_count = old_chan_count = ses->chan_count; @@ -145,6 +140,12 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) return 0; } + if (ses->server->dialect < SMB30_PROT_ID) { + spin_unlock(&ses->chan_lock); + cifs_dbg(VFS, "multichannel is not supported on this protocol version, use 3.0 or above\n"); + return 0; + } + if (!(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { ses->chan_max = 1; spin_unlock(&ses->chan_lock); diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 7d8b72d67c80..9d486fbbfbbd 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -175,11 +175,13 @@ static int cifs_xattr_set(const struct xattr_handler *handler, switch (handler->flags) { case XATTR_CIFS_NTSD_FULL: aclflags = (CIFS_ACL_OWNER | + CIFS_ACL_GROUP | CIFS_ACL_DACL | CIFS_ACL_SACL); break; case XATTR_CIFS_NTSD: aclflags = (CIFS_ACL_OWNER | + CIFS_ACL_GROUP | CIFS_ACL_DACL); break; case XATTR_CIFS_ACL: diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index d3cd2a94d1e8..d1f9d2632202 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -34,6 +34,14 @@ */ DEFINE_SPINLOCK(configfs_dirent_lock); +/* + * All of link_obj/unlink_obj/link_group/unlink_group require that + * subsys->su_mutex is held. + * But parent configfs_subsystem is NULL when config_item is root. + * Use this mutex when config_item is root. + */ +static DEFINE_MUTEX(configfs_subsystem_mutex); + static void configfs_d_iput(struct dentry * dentry, struct inode * inode) { @@ -1859,7 +1867,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) group->cg_item.ci_name = group->cg_item.ci_namebuf; sd = root->d_fsdata; + mutex_lock(&configfs_subsystem_mutex); link_group(to_config_group(sd->s_element), group); + mutex_unlock(&configfs_subsystem_mutex); inode_lock_nested(d_inode(root), I_MUTEX_PARENT); @@ -1884,7 +1894,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) inode_unlock(d_inode(root)); if (err) { + mutex_lock(&configfs_subsystem_mutex); unlink_group(group); + mutex_unlock(&configfs_subsystem_mutex); configfs_release_fs(); } put_fragment(frag); @@ -1931,7 +1943,9 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) dput(dentry); + mutex_lock(&configfs_subsystem_mutex); unlink_group(group); + mutex_unlock(&configfs_subsystem_mutex); configfs_release_fs(); } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index b8272fb95fd6..5aa2cf2c2f80 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -325,7 +325,7 @@ struct erofs_inode { unsigned char z_algorithmtype[2]; unsigned char z_logical_clusterbits; unsigned long z_tailextent_headlcn; - unsigned int z_idataoff; + erofs_off_t z_idataoff; unsigned short z_idata_size; }; #endif /* CONFIG_EROFS_FS_ZIP */ diff --git a/fs/io_uring.c b/fs/io_uring.c index 77b9c7e4793b..4715980e9015 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4567,6 +4567,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head) } else { list_add_tail(&buf->list, &(*head)->list); } + cond_resched(); } return i ? i : -ENOMEM; @@ -7693,7 +7694,7 @@ static int io_run_task_work_sig(void) /* when returns >0, the caller should retry */ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - signed long *timeout) + ktime_t timeout) { int ret; @@ -7705,8 +7706,9 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, if (test_bit(0, &ctx->check_cq_overflow)) return 1; - *timeout = schedule_timeout(*timeout); - return !*timeout ? -ETIME : 1; + if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) + return -ETIME; + return 1; } /* @@ -7719,7 +7721,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, { struct io_wait_queue iowq; struct io_rings *rings = ctx->rings; - signed long timeout = MAX_SCHEDULE_TIMEOUT; + ktime_t timeout = KTIME_MAX; int ret; do { @@ -7735,7 +7737,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, if (get_timespec64(&ts, uts)) return -EFAULT; - timeout = timespec64_to_jiffies(&ts); + timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); } if (sig) { @@ -7767,7 +7769,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, TASK_INTERRUPTIBLE); - ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); + ret = io_cqring_wait_schedule(ctx, &iowq, timeout); finish_wait(&ctx->cq_wait, &iowq.wq); cond_resched(); } while (ret > 0); @@ -7924,7 +7926,15 @@ static __cold int io_rsrc_ref_quiesce(struct io_rsrc_data *data, ret = wait_for_completion_interruptible(&data->done); if (!ret) { mutex_lock(&ctx->uring_lock); - break; + if (atomic_read(&data->refs) > 0) { + /* + * it has been revived by another thread while + * we were unlocked + */ + mutex_unlock(&ctx->uring_lock); + } else { + break; + } } atomic_inc(&data->refs); diff --git a/fs/namespace.c b/fs/namespace.c index 40b994a29e90..de6fae84f1a1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -469,6 +469,24 @@ void mnt_drop_write_file(struct file *file) } EXPORT_SYMBOL(mnt_drop_write_file); +/** + * mnt_hold_writers - prevent write access to the given mount + * @mnt: mnt to prevent write access to + * + * Prevents write access to @mnt if there are no active writers for @mnt. + * This function needs to be called and return successfully before changing + * properties of @mnt that need to remain stable for callers with write access + * to @mnt. + * + * After this functions has been called successfully callers must pair it with + * a call to mnt_unhold_writers() in order to stop preventing write access to + * @mnt. + * + * Context: This function expects lock_mount_hash() to be held serializing + * setting MNT_WRITE_HOLD. + * Return: On success 0 is returned. + * On error, -EBUSY is returned. + */ static inline int mnt_hold_writers(struct mount *mnt) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -500,6 +518,18 @@ static inline int mnt_hold_writers(struct mount *mnt) return 0; } +/** + * mnt_unhold_writers - stop preventing write access to the given mount + * @mnt: mnt to stop preventing write access to + * + * Stop preventing write access to @mnt allowing callers to gain write access + * to @mnt again. + * + * This function can only be called after a successful call to + * mnt_hold_writers(). + * + * Context: This function expects lock_mount_hash() to be held. + */ static inline void mnt_unhold_writers(struct mount *mnt) { /* diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7bc7cf6b26f0..75cb1cbe4cde 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2010,14 +2010,14 @@ no_open: if (!res) { inode = d_inode(dentry); if ((lookup_flags & LOOKUP_DIRECTORY) && inode && - !S_ISDIR(inode->i_mode)) + !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) res = ERR_PTR(-ENOTDIR); else if (inode && S_ISREG(inode->i_mode)) res = ERR_PTR(-EOPENSTALE); } else if (!IS_ERR(res)) { inode = d_inode(res); if ((lookup_flags & LOOKUP_DIRECTORY) && inode && - !S_ISDIR(inode->i_mode)) { + !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) { dput(res); res = ERR_PTR(-ENOTDIR); } else if (inode && S_ISREG(inode->i_mode)) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a918c3a834b6..d96baa4450e3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -853,12 +853,9 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, } /* Flush out writes to the server in order to update c/mtime. */ - if ((request_mask & (STATX_CTIME|STATX_MTIME)) && - S_ISREG(inode->i_mode)) { - err = filemap_write_and_wait(inode->i_mapping); - if (err) - goto out; - } + if ((request_mask & (STATX_CTIME | STATX_MTIME)) && + S_ISREG(inode->i_mode)) + filemap_write_and_wait(inode->i_mapping); /* * We may force a getattr if the user cares about atime. diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f5020828ab65..0e0db6c27619 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1229,8 +1229,7 @@ nfs4_update_changeattr_locked(struct inode *inode, NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | - NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR | - NFS_INO_REVAL_PAGECACHE; + NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR; nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); } nfsi->attrtimeo_timestamp = jiffies; diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index bafc02bf8220..de7252715b12 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -264,7 +264,6 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) if (!gid_valid(gid)) return -EINVAL; opts->gid = gid; - set_gid(tracefs_mount->mnt_root, gid); break; case Opt_mode: if (match_octal(&args[0], &option)) @@ -291,7 +290,9 @@ static int tracefs_apply_options(struct super_block *sb) inode->i_mode |= opts->mode; inode->i_uid = opts->uid; - inode->i_gid = opts->gid; + + /* Set all the group ids to the mount option */ + set_gid(sb->s_root, opts->gid); return 0; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 4c0dee78b2f8..d84714e4e46a 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1753,6 +1753,11 @@ xfs_remount_ro( }; int error; + /* Flush all the dirty data to disk. */ + error = sync_filesystem(mp->m_super); + if (error) + return error; + /* * Cancel background eofb scanning so it cannot race with the final * log force+buftarg wait and deadlock the remount. @@ -1831,8 +1836,6 @@ xfs_fs_reconfigure( if (error) return error; - sync_filesystem(mp->m_super); - /* inode32 -> inode64 */ if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { mp->m_features &= ~XFS_FEAT_SMALL_INUMS; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f35aea98bc35..16b47035e4b0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -748,7 +748,8 @@ extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, bool __must_check blk_get_queue(struct request_queue *); extern void blk_put_queue(struct request_queue *); -extern void blk_set_queue_dying(struct request_queue *); + +void blk_mark_disk_dead(struct gendisk *disk); #ifdef CONFIG_BLOCK /* diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index a81652d1c6f3..7ae21c0f7f23 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -113,7 +113,7 @@ struct can_tdc_const { }; #ifdef CONFIG_CAN_CALC_BITTIMING -int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, +int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc); void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, @@ -121,7 +121,7 @@ void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, u32 *ctrlmode, u32 ctrlmode_supported); #else /* !CONFIG_CAN_CALC_BITTIMING */ static inline int -can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, +can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc) { netdev_err(dev, "bit-timing calculation not available\n"); @@ -136,7 +136,7 @@ can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, } #endif /* CONFIG_CAN_CALC_BITTIMING */ -int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, +int can_get_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc, const u32 *bitrate_const, const unsigned int bitrate_const_cnt); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1ab29e61b078..3522a272b74d 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -382,6 +382,9 @@ struct cpufreq_driver { int (*suspend)(struct cpufreq_policy *policy); int (*resume)(struct cpufreq_policy *policy); + /* Will be called after the driver is fully initialized */ + void (*ready)(struct cpufreq_policy *policy); + struct freq_attr **attr; /* platform specific boost support code */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index dca2b1355bb1..6150d11a607e 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -62,6 +62,14 @@ #define DMA_ATTR_PRIVILEGED (1UL << 9) /* + * This is a hint to the DMA-mapping subsystem that the device is expected + * to overwrite the entire mapped size, thus the caller does not require any + * of the previous buffer contents to be preserved. This allows + * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers. + */ +#define DMA_ATTR_OVERWRITE (1UL << 10) + +/* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a * given device and there may be a translation between the CPU physical address diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 939a1beaddf7..3ed117e299ec 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -32,31 +32,29 @@ int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); void dsa_tag_8021q_unregister(struct dsa_switch *ds); -struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, - u16 tpid, u16 tci); +int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, + struct dsa_bridge bridge); -void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id); +void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_bridge bridge); -int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); +struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, + u16 tpid, u16 tci); -void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); +void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, + int *vbid); -u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num); +struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, + int vbid); -u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp); +u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num); -u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp); +u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp); int dsa_8021q_rx_switch_id(u16 vid); int dsa_8021q_rx_source_port(u16 vid); -bool vid_is_dsa_8021q_rxvlan(u16 vid); - -bool vid_is_dsa_8021q_txvlan(u16 vid); - bool vid_is_dsa_8021q(u16 vid); #endif /* _NET_DSA_8021Q_H */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e0853f48b75e..4af58459a1e7 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -71,18 +71,22 @@ enum { * struct kernel_ethtool_ringparam - RX/TX ring configuration * @rx_buf_len: Current length of buffers on the rx ring. * @tcp_data_split: Scatter packet headers and data to separate buffers + * @cqe_size: Size of TX/RX completion queue event */ struct kernel_ethtool_ringparam { u32 rx_buf_len; u8 tcp_data_split; + u32 cqe_size; }; /** * enum ethtool_supported_ring_param - indicator caps for setting ring params * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len + * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), + ETHTOOL_RING_USE_CQE_SIZE = BIT(1), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) diff --git a/include/linux/filter.h b/include/linux/filter.h index 1cb1af917617..9bf26307247f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -572,7 +572,8 @@ struct bpf_prog { has_callchain_buf:1, /* callchain buffer allocated? */ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ - call_get_func_ip:1; /* Do we call get_func_ip() */ + call_get_func_ip:1, /* Do we call get_func_ip() */ + delivery_time_access:1; /* Accessed __sk_buff->delivery_time_type */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 509e18c7e740..3aae023a9353 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -58,6 +58,7 @@ struct br_ip_list { #define BR_MRP_LOST_CONT BIT(18) #define BR_MRP_LOST_IN_CONT BIT(19) #define BR_TX_FWD_OFFLOAD BIT(20) +#define BR_PORT_LOCKED BIT(21) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/linux/if_hsr.h b/include/linux/if_hsr.h index 38bbc537d4e4..408539d5ea5f 100644 --- a/include/linux/if_hsr.h +++ b/include/linux/if_hsr.h @@ -9,6 +9,22 @@ enum hsr_version { PRP_V1, }; +/* HSR Tag. + * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB, + * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest, + * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr, + * encapsulated protocol } instead. + * + * Field names as defined in the IEC:2010 standard for HSR. + */ +struct hsr_tag { + __be16 path_and_LSDU_size; + __be16 sequence_nr; + __be16 encap_proto; +} __packed; + +#define HSR_HLEN 6 + #if IS_ENABLED(CONFIG_HSR) extern bool is_hsr_master(struct net_device *dev); extern int hsr_get_version(struct net_device *dev, enum hsr_version *ver); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 674aeead6260..ead323243e7b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -150,6 +150,7 @@ struct in_ifaddr { __be32 ifa_broadcast; unsigned char ifa_scope; unsigned char ifa_prefixlen; + unsigned char ifa_proto; __u32 ifa_flags; char ifa_label[IFNAMSIZ]; diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 7bfb67363434..cb15308b5cb0 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -183,6 +183,8 @@ static inline void mlx5_cq_put(struct mlx5_core_cq *cq) complete(&cq->free); } +int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 78655d8d13a7..d3b1a6a1f8d2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -863,20 +863,10 @@ struct mlx5_hca_vport_context { bool grh_required; }; -static inline void *mlx5_buf_offset(struct mlx5_frag_buf *buf, int offset) -{ - return buf->frags->buf + offset; -} - #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field -static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) -{ - return pci_get_drvdata(pdev); -} - extern struct dentry *mlx5_debugfs_root; static inline u16 fw_rev_maj(struct mlx5_core_dev *dev) @@ -965,6 +955,7 @@ typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); struct mlx5_async_work { struct mlx5_async_ctx *ctx; mlx5_async_cbk_t user_callback; + void *out; /* pointer to the cmd output buffer */ }; void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, @@ -973,7 +964,9 @@ void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx); int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, void *out, int out_size, mlx5_async_cbk_t callback, struct mlx5_async_work *work); - +void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out); +int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); +int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out); int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); @@ -991,7 +984,6 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); -void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); @@ -1039,6 +1031,9 @@ int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, + void *data_out, int size_out, u16 reg_id, int arg, + int write, bool verbose); int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index b1aad14689e3..e3bfed68b08a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -224,6 +224,7 @@ struct mlx5_flow_act { u32 flags; struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; struct ib_counters *counters; + struct mlx5_flow_group *fg; }; #define MLX5_DECLARE_FLOW_ACT(name) \ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 27145c4d6820..ea65131835ab 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9687,7 +9687,8 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x6b]; + u8 reserved_at_0[0x6a]; + u8 reset_state[0x1]; u8 ptpcyc2realtime_modify[0x1]; u8 reserved_at_6c[0x2]; u8 pci_status_and_power[0x1]; @@ -10369,6 +10370,14 @@ struct mlx5_ifc_mcda_reg_bits { }; enum { + MLX5_MFRL_REG_RESET_STATE_IDLE = 0, + MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1, + MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS = 2, + MLX5_MFRL_REG_RESET_STATE_TIMEOUT = 3, + MLX5_MFRL_REG_RESET_STATE_NACK = 4, +}; + +enum { MLX5_MFRL_REG_RESET_TYPE_FULL_CHIP = BIT(0), MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE = BIT(1), }; @@ -10386,7 +10395,8 @@ struct mlx5_ifc_mfrl_reg_bits { u8 pci_sync_for_fw_update_start[0x1]; u8 pci_sync_for_fw_update_resp[0x2]; u8 rst_type_sel[0x3]; - u8 reserved_at_28[0x8]; + u8 reserved_at_28[0x4]; + u8 reset_state[0x4]; u8 reset_type[0x8]; u8 reset_level[0x8]; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 93fc680b658f..19a27ac361ef 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1950,6 +1950,7 @@ enum netdev_ml_priv_type { * @watchdog_dev_tracker: refcount tracker used by watchdog. * @dev_registered_tracker: tracker for reference held while * registered + * @offload_xstats_l3: L3 HW stats for this netdevice. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2236,7 +2237,9 @@ struct net_device { #if IS_ENABLED(CONFIG_MRP) struct mrp_port __rcu *mrp_port; #endif - +#if IS_ENABLED(CONFIG_NET_DROP_MONITOR) + struct dm_hw_stat_delta __rcu *dm_private; +#endif struct device dev; const struct attribute_group *sysfs_groups[4]; const struct attribute_group *sysfs_rx_queue_group; @@ -2285,6 +2288,7 @@ struct net_device { netdevice_tracker linkwatch_dev_tracker; netdevice_tracker watchdog_dev_tracker; netdevice_tracker dev_registered_tracker; + struct rtnl_hw_stats64 *offload_xstats_l3; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2725,6 +2729,10 @@ enum netdev_cmd { NETDEV_CVLAN_FILTER_DROP_INFO, NETDEV_SVLAN_FILTER_PUSH_INFO, NETDEV_SVLAN_FILTER_DROP_INFO, + NETDEV_OFFLOAD_XSTATS_ENABLE, + NETDEV_OFFLOAD_XSTATS_DISABLE, + NETDEV_OFFLOAD_XSTATS_REPORT_USED, + NETDEV_OFFLOAD_XSTATS_REPORT_DELTA, }; const char *netdev_cmd_to_name(enum netdev_cmd cmd); @@ -2775,6 +2783,42 @@ struct netdev_notifier_pre_changeaddr_info { const unsigned char *dev_addr; }; +enum netdev_offload_xstats_type { + NETDEV_OFFLOAD_XSTATS_TYPE_L3 = 1, +}; + +struct netdev_notifier_offload_xstats_info { + struct netdev_notifier_info info; /* must be first */ + enum netdev_offload_xstats_type type; + + union { + /* NETDEV_OFFLOAD_XSTATS_REPORT_DELTA */ + struct netdev_notifier_offload_xstats_rd *report_delta; + /* NETDEV_OFFLOAD_XSTATS_REPORT_USED */ + struct netdev_notifier_offload_xstats_ru *report_used; + }; +}; + +int netdev_offload_xstats_enable(struct net_device *dev, + enum netdev_offload_xstats_type type, + struct netlink_ext_ack *extack); +int netdev_offload_xstats_disable(struct net_device *dev, + enum netdev_offload_xstats_type type); +bool netdev_offload_xstats_enabled(const struct net_device *dev, + enum netdev_offload_xstats_type type); +int netdev_offload_xstats_get(struct net_device *dev, + enum netdev_offload_xstats_type type, + struct rtnl_hw_stats64 *stats, bool *used, + struct netlink_ext_ack *extack); +void +netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *rd, + const struct rtnl_hw_stats64 *stats); +void +netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *ru); +void netdev_offload_xstats_push_delta(struct net_device *dev, + enum netdev_offload_xstats_type type, + const struct rtnl_hw_stats64 *stats); + static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, struct net_device *dev) { diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index b4dd96e4dc8d..e6487a691136 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -101,7 +101,11 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, nf_hook_state_init(&state, NF_NETDEV_EGRESS, NFPROTO_NETDEV, dev, NULL, NULL, dev_net(dev), NULL); + + /* nf assumes rcu_read_lock, not just read_lock_bh */ + rcu_read_lock(); ret = nf_hook_slow(skb, &state, e, 0); + rcu_read_unlock(); if (ret == 1) { return skb; diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 959e0bd9a913..75470159a194 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -12,6 +12,7 @@ #define NVME_TCP_DISC_PORT 8009 #define NVME_TCP_ADMIN_CCSZ SZ_8K #define NVME_TCP_DIGEST_LENGTH 4 +#define NVME_TCP_MIN_MAXH2CDATA 4096 enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 98efb7b5660d..c9a3ac9efeaa 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -70,7 +70,8 @@ struct nvmem_keepout { * @word_size: Minimum read/write access granularity. * @stride: Minimum read/write access stride. * @priv: User context passed to read/write callbacks. - * @wp-gpio: Write protect pin + * @wp-gpio: Write protect pin + * @ignore_wp: Write Protect pin is managed by the provider. * * Note: A default "nvmem<id>" name will be assigned to the device if * no name is specified in its configuration. In such case "<id>" is @@ -92,6 +93,7 @@ struct nvmem_config { enum nvmem_type type; bool read_only; bool root_only; + bool ignore_wp; struct device_node *of_node; bool no_of_node; nvmem_reg_read_t reg_read; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index aad54c666407..c7e6f2043c7d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2561,6 +2561,8 @@ #define PCI_VENDOR_ID_HYGON 0x1d94 +#define PCI_VENDOR_ID_FUNGIBLE 0x1dad + #define PCI_VENDOR_ID_HXT 0x1dbf #define PCI_VENDOR_ID_TEKRAM 0x1de1 diff --git a/include/linux/phylink.h b/include/linux/phylink.h index cca149f78d35..223781622b33 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -86,7 +86,6 @@ enum phylink_op_type { * @type: operation type of PHYLINK instance * @legacy_pre_march2020: driver has not been updated for March 2020 updates * (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls") - * @pcs_poll: MAC PCS cannot provide link change interrupt * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND @@ -100,7 +99,6 @@ struct phylink_config { struct device *dev; enum phylink_op_type type; bool legacy_pre_march2020; - bool pcs_poll; bool poll_fixed_state; bool ovr_an_inband; void (*get_fixed_state)(struct phylink_config *config, @@ -534,7 +532,6 @@ void phylink_generic_validate(struct phylink_config *config, struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); -void phylink_set_pcs(struct phylink *, struct phylink_pcs *pcs); void phylink_destroy(struct phylink *); int phylink_connect_phy(struct phylink *, struct phy_device *); diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index c35f3962dc4f..373003ace639 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -308,6 +308,11 @@ static inline bool rfkill_blocked(struct rfkill *rfkill) return false; } +static inline bool rfkill_soft_blocked(struct rfkill *rfkill) +{ + return false; +} + static inline enum rfkill_type rfkill_find_type(const char *name) { return RFKILL_TYPE_ALL; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index bb9cb84114c1..7f970b16da3a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -134,4 +134,7 @@ extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, int (*vlan_fill)(struct sk_buff *skb, struct net_device *dev, u32 filter_mask)); + +extern void rtnl_offload_xstats_notify(struct net_device *dev); + #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index b9198a1b3a84..e84e54d1b490 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -54,8 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_post_fork(struct task_struct *p, - struct kernel_clone_args *kargs); +extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); +extern void sched_post_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a5adbf6b51e8..2be263184d1e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -346,6 +346,72 @@ enum skb_drop_reason { * udp packet drop out of * udp_memory_allocated. */ + SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one + * expected, corresponding + * to LINUX_MIB_TCPMD5NOTFOUND + */ + SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not + * expecting one, corresponding + * to LINUX_MIB_TCPMD5UNEXPECTED + */ + SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong, + * corresponding to + * LINUX_MIB_TCPMD5FAILURE + */ + SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket + * backlog (see + * LINUX_MIB_TCPBACKLOGDROP) + */ + SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ + SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero, + * see LINUX_MIB_TCPZEROWINDOWDROP + */ + SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already + * received before (spurious retrans + * may happened), see + * LINUX_MIB_DELAYEDACKLOST + */ + SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window, + * the seq of the first byte exceed + * the right edges of receive + * window + */ + SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in + * the ofo queue, corresponding to + * LINUX_MIB_TCPOFOMERGE + */ + SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ + SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by + * BPF_PROG_TYPE_CGROUP_SKB + * eBPF program + */ + SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */ + SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh + * entry + */ + SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */ + SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh + * entry is full + */ + SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ + SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ + SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet + * outputting (failed to enqueue to + * current qdisc) + */ + SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to + * the per CPU backlog queue. This + * can be caused by backlog queue + * full (see netdev_max_backlog in + * net.rst) or RPS flow limit + */ + SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ + SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ + SKB_DROP_REASON_PTYPE_ABSENT, /* not packet_type found to handle + * the skb. For an etner packet, + * this means that L3 protocol is + * not supported + */ SKB_DROP_REASON_MAX, }; @@ -747,6 +813,10 @@ typedef unsigned char *sk_buff_data_t; * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required + * @mono_delivery_time: When set, skb->tstamp has the + * delivery_time in mono clock base (i.e. EDT). Otherwise, the + * skb->tstamp has the (rcv) timestamp at ingress and + * delivery_time at egress. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @secmark: security marking @@ -889,8 +959,12 @@ struct sk_buff { __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */ __u8 csum_complete_sw:1; __u8 csum_level:2; - __u8 csum_not_inet:1; __u8 dst_pending_confirm:1; + __u8 mono_delivery_time:1; +#ifdef CONFIG_NET_CLS_ACT + __u8 tc_skip_classify:1; + __u8 tc_at_ingress:1; +#endif #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif @@ -902,10 +976,6 @@ struct sk_buff { __u8 offload_fwd_mark:1; __u8 offload_l3_fwd_mark:1; #endif -#ifdef CONFIG_NET_CLS_ACT - __u8 tc_skip_classify:1; - __u8 tc_at_ingress:1; -#endif __u8 redirected:1; #ifdef CONFIG_NET_REDIRECT __u8 from_ingress:1; @@ -917,6 +987,7 @@ struct sk_buff { __u8 decrypted:1; #endif __u8 slow_gro:1; + __u8 csum_not_inet:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -994,10 +1065,16 @@ struct sk_buff { /* if you move pkt_vlan_present around you also must adapt these constants */ #ifdef __BIG_ENDIAN_BITFIELD #define PKT_VLAN_PRESENT_BIT 7 +#define TC_AT_INGRESS_MASK (1 << 0) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 2) #else #define PKT_VLAN_PRESENT_BIT 0 +#define TC_AT_INGRESS_MASK (1 << 7) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) #endif #define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) +#define TC_AT_INGRESS_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) +#define SKB_MONO_DELIVERY_TIME_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) #ifdef __KERNEL__ /* @@ -1142,10 +1219,16 @@ static inline void kfree_skb(struct sk_buff *skb) } void skb_release_head_state(struct sk_buff *skb); -void kfree_skb_list(struct sk_buff *segs); +void kfree_skb_list_reason(struct sk_buff *segs, + enum skb_drop_reason reason); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); +static inline void kfree_skb_list(struct sk_buff *segs) +{ + kfree_skb_list_reason(segs, SKB_DROP_REASON_NOT_SPECIFIED); +} + #ifdef CONFIG_TRACEPOINTS void consume_skb(struct sk_buff *skb); #else @@ -1502,6 +1585,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = offset; +} #else static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@ -1512,6 +1600,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end - skb->head; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = skb->head + offset; +} #endif /* Internal */ @@ -1751,19 +1844,19 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) return 0; } -/* This variant of skb_unclone() makes sure skb->truesize is not changed */ +/* This variant of skb_unclone() makes sure skb->truesize + * and skb_end_offset() are not changed, whenever a new skb->head is needed. + * + * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) + * when various debugging features are in place. + */ +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); - if (skb_cloned(skb)) { - unsigned int save = skb->truesize; - int res; - - res = pskb_expand_head(skb, 0, 0, pri); - skb->truesize = save; - return res; - } + if (skb_cloned(skb)) + return __skb_unclone_keeptruesize(skb, pri); return 0; } @@ -3918,6 +4011,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb, static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); + skb->mono_delivery_time = 0; } static inline ktime_t net_timedelta(ktime_t t) @@ -3925,6 +4019,56 @@ static inline ktime_t net_timedelta(ktime_t t) return ktime_sub(ktime_get_real(), t); } +static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, + bool mono) +{ + skb->tstamp = kt; + skb->mono_delivery_time = kt && mono; +} + +DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); + +/* It is used in the ingress path to clear the delivery_time. + * If needed, set the skb->tstamp to the (rcv) timestamp. + */ +static inline void skb_clear_delivery_time(struct sk_buff *skb) +{ + if (skb->mono_delivery_time) { + skb->mono_delivery_time = 0; + if (static_branch_unlikely(&netstamp_needed_key)) + skb->tstamp = ktime_get_real(); + else + skb->tstamp = 0; + } +} + +static inline void skb_clear_tstamp(struct sk_buff *skb) +{ + if (skb->mono_delivery_time) + return; + + skb->tstamp = 0; +} + +static inline ktime_t skb_tstamp(const struct sk_buff *skb) +{ + if (skb->mono_delivery_time) + return 0; + + return skb->tstamp; +} + +static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) +{ + if (!skb->mono_delivery_time && skb->tstamp) + return skb->tstamp; + + if (static_branch_unlikely(&netstamp_needed_key) || cond) + return ktime_get_real(); + + return 0; +} + static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; @@ -4781,7 +4925,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) #ifdef CONFIG_NET_REDIRECT skb->from_ingress = from_ingress; if (skb->from_ingress) - skb->tstamp = 0; + skb_clear_tstamp(skb); #endif } diff --git a/include/linux/slab.h b/include/linux/slab.h index 37bde99b74af..5b6193fd8bd9 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -660,8 +660,7 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag * allocator where we care about the real place the memory allocation * request comes from. */ -extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller) - __alloc_size(1); +extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller); #define kmalloc_track_caller(size, flags) \ __kmalloc_track_caller(size, flags, _RET_IP_) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7ab3fed7b804..c84e61b99c7b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -280,7 +280,7 @@ struct spi_message; struct spi_driver { const struct spi_device_id *id_table; int (*probe)(struct spi_device *spi); - int (*remove)(struct spi_device *spi); + void (*remove)(struct spi_device *spi); void (*shutdown)(struct spi_device *spi); struct device_driver driver; }; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 70c069aef02c..dcea51fb60e2 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -699,6 +699,8 @@ event_triggers_post_call(struct trace_event_file *file, bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); +bool __trace_trigger_soft_disabled(struct trace_event_file *file); + /** * trace_trigger_soft_disabled - do triggers and test if soft disabled * @file: The file pointer of the event to test @@ -708,20 +710,20 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); * triggers that require testing the fields, it will return true, * otherwise false. */ -static inline bool +static __always_inline bool trace_trigger_soft_disabled(struct trace_event_file *file) { unsigned long eflags = file->flags; - if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { - if (eflags & EVENT_FILE_FL_TRIGGER_MODE) - event_triggers_call(file, NULL, NULL, NULL); - if (eflags & EVENT_FILE_FL_SOFT_DISABLED) - return true; - if (eflags & EVENT_FILE_FL_PID_FILTER) - return trace_event_ignore_this_pid(file); - } - return false; + if (likely(!(eflags & (EVENT_FILE_FL_TRIGGER_MODE | + EVENT_FILE_FL_SOFT_DISABLED | + EVENT_FILE_FL_PID_FILTER)))) + return false; + + if (likely(eflags & EVENT_FILE_FL_TRIGGER_COND)) + return false; + + return __trace_trigger_soft_disabled(file); } #ifdef CONFIG_BPF_EVENTS diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 59940e230b78..f7506f08e505 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -64,6 +64,8 @@ struct ifa6_config { const struct in6_addr *pfx; unsigned int plen; + u8 ifa_proto; + const struct in6_addr *peer_pfx; u32 rt_priority; diff --git a/include/net/arp.h b/include/net/arp.h index 031374ac2f22..d7ef4ec71dfe 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -65,6 +65,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, const unsigned char *src_hw, const unsigned char *th); int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); void arp_ifdown(struct net_device *dev); +int arp_invalidate(struct net_device *dev, __be32 ip, bool force); struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 4b3d0b16c185..a647e5fabdbd 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -506,8 +506,7 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); if (IS_ERR(tmp)) { - kfree_skb(skb); - return tmp; + return skb; } len -= tmp->len; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f5caff1ddb29..d5377740e99c 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1504,6 +1504,14 @@ void hci_conn_del_sysfs(struct hci_conn *conn); /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) +/* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1789: + * + * C24: Mandatory if the LE Controller supports Connection State and either + * LE Feature (LL Privacy) or LE Feature (Extended Advertising) is supported + */ +#define use_enhanced_conn_complete(dev) (ll_privacy_capable(dev) || \ + ext_adv_capable(dev)) + /* ----- HCI protocols ----- */ #define HCI_PROTO_DEFER 0x01 diff --git a/include/net/bond_options.h b/include/net/bond_options.h index dd75c071f67e..61b49063791c 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -66,19 +66,24 @@ enum { BOND_OPT_PEER_NOTIF_DELAY, BOND_OPT_LACP_ACTIVE, BOND_OPT_MISSED_MAX, + BOND_OPT_NS_TARGETS, BOND_OPT_LAST }; /* This structure is used for storing option values and for passing option * values when changing an option. The logic when used as an arg is as follows: - * - if string != NULL -> parse it, if the opt is RAW type then return it, else - * return the parse result - * - if string == NULL -> parse value + * - if value != ULLONG_MAX -> parse value + * - if string != NULL -> parse string + * - if the opt is RAW data and length less than maxlen, + * copy the data to extra storage */ + +#define BOND_OPT_EXTRA_MAXLEN 16 struct bond_opt_value { char *string; u64 value; u32 flags; + char extra[BOND_OPT_EXTRA_MAXLEN]; }; struct bonding; @@ -118,18 +123,26 @@ const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val); * When value is ULLONG_MAX then string will be used. */ static inline void __bond_opt_init(struct bond_opt_value *optval, - char *string, u64 value) + char *string, u64 value, + void *extra, size_t extra_len) { memset(optval, 0, sizeof(*optval)); optval->value = ULLONG_MAX; - if (value == ULLONG_MAX) - optval->string = string; - else + if (value != ULLONG_MAX) optval->value = value; + else if (string) + optval->string = string; + else if (extra_len <= BOND_OPT_EXTRA_MAXLEN) + memcpy(optval->extra, extra, extra_len); } -#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value) -#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX) +#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value, NULL, 0) +#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX, NULL, 0) +#define bond_opt_initextra(optval, extra, extra_len) \ + __bond_opt_init(optval, NULL, ULLONG_MAX, extra, extra_len) void bond_option_arp_ip_targets_clear(struct bonding *bond); +#if IS_ENABLED(CONFIG_IPV6) +void bond_option_ns_ip6_targets_clear(struct bonding *bond); +#endif #endif /* _NET_BOND_OPTIONS_H */ diff --git a/include/net/bonding.h b/include/net/bonding.h index 7dead855a72d..d0dfe727e0b1 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -29,8 +29,11 @@ #include <net/bond_3ad.h> #include <net/bond_alb.h> #include <net/bond_options.h> +#include <net/ipv6.h> +#include <net/addrconf.h> #define BOND_MAX_ARP_TARGETS 16 +#define BOND_MAX_NS_TARGETS BOND_MAX_ARP_TARGETS #define BOND_DEFAULT_MIIMON 100 @@ -146,6 +149,7 @@ struct bond_params { struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; u16 ad_user_port_key; + struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; /* 2 bytes of padding : see ether_addr_equal_64bits() */ u8 ad_actor_system[ETH_ALEN + 2]; @@ -499,6 +503,13 @@ static inline int bond_is_ip_target_ok(__be32 addr) return !ipv4_is_lbcast(addr) && !ipv4_is_zeronet(addr); } +static inline int bond_is_ip6_target_ok(struct in6_addr *addr) +{ + return !ipv6_addr_any(addr) && + !ipv6_addr_loopback(addr) && + !ipv6_addr_is_multicast(addr); +} + /* Get the oldest arp which we've received on this slave for bond's * arp_targets. */ @@ -628,7 +639,7 @@ struct bond_net { struct class_attribute class_attr_bonding_masters; }; -int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); +int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); int bond_create(struct net *net, const char *name); int bond_create_sysfs(struct bond_net *net); @@ -735,6 +746,19 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) return -1; } +static inline int bond_get_targets_ip6(struct in6_addr *targets, struct in6_addr *ip) +{ + int i; + + for (i = 0; i < BOND_MAX_NS_TARGETS; i++) + if (ipv6_addr_equal(&targets[i], ip)) + return i; + else if (ipv6_addr_any(&targets[i])) + break; + + return -1; +} + /* exported from bond_main.c */ extern unsigned int bond_net_id; diff --git a/include/net/checksum.h b/include/net/checksum.h index 5218041e5c8f..79c67f14c448 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -22,7 +22,7 @@ #include <asm/checksum.h> #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER -static inline +static __always_inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, int len) { @@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst, #endif #ifndef HAVE_CSUM_COPY_USER -static __inline__ __wsum csum_and_copy_to_user +static __always_inline __wsum csum_and_copy_to_user (const void *src, void __user *dst, int len) { __wsum sum = csum_partial(src, len, ~0U); @@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user #endif #ifndef _HAVE_ARCH_CSUM_AND_COPY -static inline __wsum +static __always_inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) { memcpy(dst, src, len); @@ -54,7 +54,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len) #endif #ifndef HAVE_ARCH_CSUM_ADD -static inline __wsum csum_add(__wsum csum, __wsum addend) +static __always_inline __wsum csum_add(__wsum csum, __wsum addend) { u32 res = (__force u32)csum; res += (__force u32)addend; @@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) } #endif -static inline __wsum csum_sub(__wsum csum, __wsum addend) +static __always_inline __wsum csum_sub(__wsum csum, __wsum addend) { return csum_add(csum, ~addend); } -static inline __sum16 csum16_add(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend) { u16 res = (__force u16)csum; @@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend) return (__force __sum16)(res + (res < (__force u16)addend)); } -static inline __sum16 csum16_sub(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend) { return csum16_add(csum, ~addend); } -static inline __wsum csum_shift(__wsum sum, int offset) +static __always_inline __wsum csum_shift(__wsum sum, int offset) { /* rotate sum to align it with a 16b boundary */ if (offset & 1) @@ -88,42 +88,43 @@ static inline __wsum csum_shift(__wsum sum, int offset) return sum; } -static inline __wsum +static __always_inline __wsum csum_block_add(__wsum csum, __wsum csum2, int offset) { return csum_add(csum, csum_shift(csum2, offset)); } -static inline __wsum +static __always_inline __wsum csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) { return csum_block_add(csum, csum2, offset); } -static inline __wsum +static __always_inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { return csum_block_add(csum, ~csum2, offset); } -static inline __wsum csum_unfold(__sum16 n) +static __always_inline __wsum csum_unfold(__sum16 n) { return (__force __wsum)n; } -static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) +static __always_inline +__wsum csum_partial_ext(const void *buff, int len, __wsum sum) { return csum_partial(buff, len, sum); } #define CSUM_MANGLED_0 ((__force __sum16)0xffff) -static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) +static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) { *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); } -static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) +static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); @@ -136,11 +137,16 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) * m : old value of a 16bit field * m' : new value of a 16bit field */ -static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) +static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) { *sum = ~csum16_add(csum16_sub(~(*sum), old), new); } +static inline void csum_replace(__wsum *csum, __wsum old, __wsum new) +{ + *csum = csum_add(csum_sub(*csum, old), new); +} + struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr); @@ -150,16 +156,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr); -static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, - __be16 from, __be16 to, - bool pseudohdr) +static __always_inline +void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, + __be16 from, __be16 to, bool pseudohdr) { inet_proto_csum_replace4(sum, skb, (__force __be32)from, (__force __be32)to, pseudohdr); } -static inline __wsum remcsum_adjust(void *ptr, __wsum csum, - int start, int offset) +static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum, + int start, int offset) { __sum16 *psum = (__sum16 *)(ptr + offset); __wsum delta; @@ -175,12 +181,12 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, return delta; } -static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) +static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta) { *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } -static inline __wsum wsum_negate(__wsum val) +static __always_inline __wsum wsum_negate(__wsum val) { return (__force __wsum)-((__force u32)val); } diff --git a/include/net/dsa.h b/include/net/dsa.h index bc6eef6af810..71cc363dbbd4 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -116,6 +116,14 @@ struct dsa_netdevice_ops { #define MODULE_ALIAS_DSA_TAG_DRIVER(__proto) \ MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE)) +struct dsa_lag { + struct net_device *dev; + unsigned int id; + struct mutex fdb_lock; + struct list_head fdbs; + refcount_t refcount; +}; + struct dsa_switch_tree { struct list_head list; @@ -134,7 +142,7 @@ struct dsa_switch_tree { /* Maps offloaded LAG netdevs to a zero-based linear ID for * drivers that need it. */ - struct net_device **lags; + struct dsa_lag **lags; /* Tagging protocol operations */ const struct dsa_device_ops *tag_ops; @@ -163,32 +171,36 @@ struct dsa_switch_tree { unsigned int last_switch; }; +/* LAG IDs are one-based, the dst->lags array is zero-based */ #define dsa_lags_foreach_id(_id, _dst) \ - for ((_id) = 0; (_id) < (_dst)->lags_len; (_id)++) \ - if ((_dst)->lags[(_id)]) + for ((_id) = 1; (_id) <= (_dst)->lags_len; (_id)++) \ + if ((_dst)->lags[(_id) - 1]) #define dsa_lag_foreach_port(_dp, _dst, _lag) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ - if ((_dp)->lag_dev == (_lag)) + if (dsa_port_offloads_lag((_dp), (_lag))) #define dsa_hsr_foreach_port(_dp, _ds, _hsr) \ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds) && (_dp)->hsr_dev == (_hsr)) -static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst, - unsigned int id) +static inline struct dsa_lag *dsa_lag_by_id(struct dsa_switch_tree *dst, + unsigned int id) { - return dst->lags[id]; + /* DSA LAG IDs are one-based, dst->lags is zero-based */ + return dst->lags[id - 1]; } static inline int dsa_lag_id(struct dsa_switch_tree *dst, - struct net_device *lag) + struct net_device *lag_dev) { unsigned int id; dsa_lags_foreach_id(id, dst) { - if (dsa_lag_dev(dst, id) == lag) - return id; + struct dsa_lag *lag = dsa_lag_by_id(dst, id); + + if (lag->dev == lag_dev) + return lag->id; } return -ENODEV; @@ -291,7 +303,7 @@ struct dsa_port { struct devlink_port devlink_port; struct phylink *pl; struct phylink_config pl_config; - struct net_device *lag_dev; + struct dsa_lag *lag; struct net_device *hsr_dev; struct list_head list; @@ -329,11 +341,28 @@ struct dsa_link { struct list_head list; }; +enum dsa_db_type { + DSA_DB_PORT, + DSA_DB_LAG, + DSA_DB_BRIDGE, +}; + +struct dsa_db { + enum dsa_db_type type; + + union { + const struct dsa_port *dp; + struct dsa_lag lag; + struct dsa_bridge bridge; + }; +}; + struct dsa_mac_addr { unsigned char addr[ETH_ALEN]; u16 vid; refcount_t refcount; struct list_head list; + struct dsa_db db; }; struct dsa_vlan { @@ -391,17 +420,19 @@ struct dsa_switch { */ u32 vlan_filtering:1; - /* MAC PCS does not provide link state change interrupt, and requires - * polling. Flag passed on to PHYLINK. - */ - u32 pcs_poll:1; - /* For switches that only have the MRU configurable. To ensure the * configured MTU is not exceeded, normalization of MRU on all bridged * interfaces is needed. */ u32 mtu_enforcement_ingress:1; + /* Drivers that isolate the FDBs of multiple bridges must set this + * to true to receive the bridge as an argument in .port_fdb_{add,del} + * and .port_mdb_{add,del}. Otherwise, the bridge.num will always be + * passed as zero. + */ + u32 fdb_isolation:1; + /* Listener for switch fabric events */ struct notifier_block nb; @@ -646,14 +677,30 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) return dp->vlan_filtering; } +static inline unsigned int dsa_port_lag_id_get(struct dsa_port *dp) +{ + return dp->lag ? dp->lag->id : 0; +} + +static inline struct net_device *dsa_port_lag_dev_get(struct dsa_port *dp) +{ + return dp->lag ? dp->lag->dev : NULL; +} + +static inline bool dsa_port_offloads_lag(struct dsa_port *dp, + const struct dsa_lag *lag) +{ + return dsa_port_lag_dev_get(dp) == lag->dev; +} + static inline struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) { if (!dp->bridge) return NULL; - if (dp->lag_dev) - return dp->lag_dev; + if (dp->lag) + return dp->lag->dev; else if (dp->hsr_dev) return dp->hsr_dev; @@ -890,7 +937,8 @@ struct dsa_switch_ops { int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload); + bool *tx_fwd_offload, + struct netlink_ext_ack *extack); void (*port_bridge_leave)(struct dsa_switch *ds, int port, struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, @@ -918,19 +966,29 @@ struct dsa_switch_ops { * Forwarding database */ int (*port_fdb_add)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); int (*port_fdb_del)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); int (*port_fdb_dump)(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); + int (*lag_fdb_add)(struct dsa_switch *ds, struct dsa_lag lag, + const unsigned char *addr, u16 vid, + struct dsa_db db); + int (*lag_fdb_del)(struct dsa_switch *ds, struct dsa_lag lag, + const unsigned char *addr, u16 vid, + struct dsa_db db); /* * Multicast database */ int (*port_mdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); int (*port_mdb_del)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); /* * RXNFC */ @@ -964,17 +1022,18 @@ struct dsa_switch_ops { */ int (*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index, int sw_index, int port, - struct dsa_bridge bridge); + struct dsa_bridge bridge, + struct netlink_ext_ack *extack); void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, struct dsa_bridge bridge); int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, int port); int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag, + int port, struct dsa_lag lag, struct netdev_lag_upper_info *info); int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag); + int port, struct dsa_lag lag); /* * PTP functionality @@ -1046,10 +1105,10 @@ struct dsa_switch_ops { */ int (*port_lag_change)(struct dsa_switch *ds, int port); int (*port_lag_join)(struct dsa_switch *ds, int port, - struct net_device *lag, + struct dsa_lag lag, struct netdev_lag_upper_info *info); int (*port_lag_leave)(struct dsa_switch *ds, int port, - struct net_device *lag); + struct dsa_lag lag); /* * HSR integration @@ -1160,6 +1219,13 @@ struct dsa_switch_driver { struct net_device *dsa_dev_to_net_device(struct device *dev); +typedef int dsa_fdb_walk_cb_t(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db); + +int dsa_port_walk_fdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb); +int dsa_port_walk_mdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb); + /* Keep inline for faster access in hot path */ static inline bool netdev_uses_dsa(const struct net_device *dev) { diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 5b8c54eb7a6b..92267d23083e 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -148,6 +148,8 @@ enum flow_action_id { FLOW_ACTION_MPLS_MANGLE, FLOW_ACTION_GATE, FLOW_ACTION_PPPOE_PUSH, + FLOW_ACTION_JUMP, + FLOW_ACTION_PIPE, NUM_FLOW_ACTIONS, }; @@ -235,9 +237,16 @@ struct flow_action_entry { struct { /* FLOW_ACTION_POLICE */ u32 burst; u64 rate_bytes_ps; + u64 peakrate_bytes_ps; + u32 avrate; + u16 overhead; u64 burst_pkt; u64 rate_pkt_ps; u32 mtu; + struct { + enum flow_action_id act_id; + u32 extval; + } exceed, notexceed; } police; struct { /* FLOW_ACTION_CT */ int action; @@ -302,6 +311,12 @@ static inline bool flow_offload_has_one_action(const struct flow_action *action) return action->num_entries == 1; } +static inline bool flow_action_is_last_entry(const struct flow_action *action, + const struct flow_action_entry *entry) +{ + return entry == &action->entries[action->num_entries - 1]; +} + #define flow_action_for_each(__i, __act, __actions) \ for (__i = 0, __act = &(__actions)->entries[0]; \ __i < (__actions)->num_entries; \ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f026cf08a8e8..4cfdef6ca4f6 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -71,6 +71,8 @@ struct inet6_ifaddr { bool tokenized; + u8 ifa_proto; + struct rcu_head rcu; struct in6_addr peer_addr; }; diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 63540be0fc34..911ad930867d 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -70,6 +70,7 @@ struct frag_v6_compare_key { * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far + * @mono_delivery_time: stamp has a mono delivery time (EDT) * @flags: fragment queue flags * @max_size: maximum received fragment size * @fqdir: pointer to struct fqdir @@ -90,6 +91,7 @@ struct inet_frag_queue { ktime_t stamp; int len; int meat; + u8 mono_delivery_time; __u8 flags; u16 max_size; struct fqdir *fqdir; diff --git a/include/net/mctp.h b/include/net/mctp.h index e80a4baf8379..d37268fe6825 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -40,11 +40,21 @@ struct mctp_hdr { #define MCTP_INITIAL_DEFAULT_NET 1 -static inline bool mctp_address_ok(mctp_eid_t eid) +static inline bool mctp_address_unicast(mctp_eid_t eid) { return eid >= 8 && eid < 255; } +static inline bool mctp_address_broadcast(mctp_eid_t eid) +{ + return eid == 255; +} + +static inline bool mctp_address_null(mctp_eid_t eid) +{ + return eid == 0; +} + static inline bool mctp_address_matches(mctp_eid_t match, mctp_eid_t eid) { return match == eid || match == MCTP_ADDR_ANY; diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 53cb8de0e589..da7eec8669ec 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -447,10 +447,15 @@ void ndisc_cleanup(void); int ndisc_rcv(struct sk_buff *skb); +struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit, + const struct in6_addr *saddr, u64 nonce); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr, u64 nonce); +void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, + const struct in6_addr *saddr); + void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, @@ -475,9 +480,9 @@ int igmp6_late_init(void); void igmp6_cleanup(void); void igmp6_late_cleanup(void); -int igmp6_event_query(struct sk_buff *skb); +void igmp6_event_query(struct sk_buff *skb); -int igmp6_event_report(struct sk_buff *skb); +void igmp6_event_report(struct sk_buff *skb); #ifdef CONFIG_SYSCTL diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index a3647fadf1cc..bd59e950f4d6 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -96,6 +96,7 @@ enum flow_offload_xmit_type { FLOW_OFFLOAD_XMIT_NEIGH, FLOW_OFFLOAD_XMIT_XFRM, FLOW_OFFLOAD_XMIT_DIRECT, + FLOW_OFFLOAD_XMIT_TC, }; #define NF_FLOW_TABLE_ENCAP_MAX 2 @@ -127,7 +128,7 @@ struct flow_offload_tuple { struct { } __hash; u8 dir:2, - xmit_type:2, + xmit_type:3, encap_num:2, in_vlan_ingress:2; u16 mtu; @@ -142,6 +143,9 @@ struct flow_offload_tuple { u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; } out; + struct { + u32 iifidx; + } tc; }; }; diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 9eed51e920e8..980daa6e1e3a 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -37,7 +37,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh); void nf_unregister_queue_handler(void); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); -void nf_queue_entry_get_refs(struct nf_queue_entry *entry); +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); void nf_queue_entry_free(struct nf_queue_entry *entry); static inline void init_hashrandom(u32 *jhash_initval) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index eaf55da9a205..c4c0861deac1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -905,9 +905,9 @@ struct nft_expr_ops { int (*offload)(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr); + bool (*offload_action)(const struct nft_expr *expr); void (*offload_stats)(struct nft_expr *expr, const struct flow_stats *stats); - u32 offload_flags; const struct nft_expr_type *type; void *data; }; diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index f9d95ff82df8..797147843958 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -67,8 +67,6 @@ struct nft_flow_rule { struct flow_rule *rule; }; -#define NFT_OFFLOAD_F_ACTION (1 << 0) - void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, enum flow_dissector_key_id addr_type); diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index 47b166684fd8..e5389eeaf8bd 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -14,5 +14,9 @@ struct netns_smc { struct smc_stats_rsn *fback_rsn; bool limit_smc_hs; /* constraint on handshake */ +#ifdef CONFIG_SYSCTL + struct ctl_table_header *smc_hdr; +#endif + unsigned int sysctl_autocorking_size; }; #endif diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 97c3c19872ff..ea5fb70e5101 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -84,6 +84,48 @@ struct page_pool_params { void *init_arg; }; +#ifdef CONFIG_PAGE_POOL_STATS +struct page_pool_alloc_stats { + u64 fast; /* fast path allocations */ + u64 slow; /* slow-path order 0 allocations */ + u64 slow_high_order; /* slow-path high order allocations */ + u64 empty; /* failed refills due to empty ptr ring, forcing + * slow path allocation + */ + u64 refill; /* allocations via successful refill */ + u64 waive; /* failed refills due to numa zone mismatch */ +}; + +struct page_pool_recycle_stats { + u64 cached; /* recycling placed page in the cache. */ + u64 cache_full; /* cache was full */ + u64 ring; /* recycling placed page back into ptr ring */ + u64 ring_full; /* page was released from page-pool because + * PTR ring was full. + */ + u64 released_refcnt; /* page released because of elevated + * refcnt + */ +}; + +/* This struct wraps the above stats structs so users of the + * page_pool_get_stats API can pass a single argument when requesting the + * stats for the page pool. + */ +struct page_pool_stats { + struct page_pool_alloc_stats alloc_stats; + struct page_pool_recycle_stats recycle_stats; +}; + +/* + * Drivers that wish to harvest page pool stats and report them to users + * (perhaps via ethtool, debugfs, or another mechanism) can allocate a + * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. + */ +bool page_pool_get_stats(struct page_pool *pool, + struct page_pool_stats *stats); +#endif + struct page_pool { struct page_pool_params p; @@ -96,6 +138,11 @@ struct page_pool { unsigned int frag_offset; struct page *frag_page; long frag_users; + +#ifdef CONFIG_PAGE_POOL_STATS + /* these stats are incremented while in softirq context */ + struct page_pool_alloc_stats alloc_stats; +#endif u32 xdp_mem_id; /* @@ -126,6 +173,10 @@ struct page_pool { */ struct ptr_ring ring; +#ifdef CONFIG_PAGE_POOL_STATS + /* recycle stats are per-cpu to avoid locking */ + struct page_pool_recycle_stats __percpu *recycle_stats; +#endif atomic_t pages_state_release_cnt; /* A page_pool is strictly tied to a single RX-queue being diff --git a/include/net/sock.h b/include/net/sock.h index d6c13f0fba40..c4b91fc19b9c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -509,7 +509,7 @@ struct sock { #endif u16 sk_tsflags; u8 sk_shutdown; - u32 sk_tskey; + atomic_t sk_tskey; atomic_t sk_zckey; u8 sk_clockid; @@ -2681,7 +2681,7 @@ static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, __sock_tx_timestamp(tsflags, tx_flags); if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) - *tskey = sk->sk_tskey++; + *tskey = atomic_inc_return(&sk->sk_tskey) - 1; } if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index c32e1c8f79ec..3e424d40fae3 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -313,10 +313,7 @@ int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long e const struct net_device *foreign_dev), int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)); + const struct switchdev_notifier_fdb_info *fdb_info)); int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, @@ -443,10 +440,7 @@ switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event const struct net_device *foreign_dev), int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)) + const struct switchdev_notifier_fdb_info *fdb_info)) { return 0; } diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h index 72649512dcdd..283bde711a42 100644 --- a/include/net/tc_act/tc_police.h +++ b/include/net/tc_act/tc_police.h @@ -159,4 +159,34 @@ static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act) return params->tcfp_mtu; } +static inline u64 tcf_police_peakrate_bytes_ps(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->peak.rate_bytes_ps; +} + +static inline u32 tcf_police_tcfp_ewma_rate(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->tcfp_ewma_rate; +} + +static inline u16 tcf_police_rate_overhead(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->rate.overhead; +} + #endif /* __NET_TC_POLICE_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index eff2487d972d..d486d7b6112d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1367,7 +1367,8 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) __skb_checksum_complete(skb); } -bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason); #ifdef CONFIG_INET void __sk_defer_free_flush(struct sock *sk); @@ -1673,6 +1674,11 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, return NULL; return __tcp_md5_do_lookup(sk, l3index, addr, family); } +bool tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + enum skb_drop_reason *reason, + const void *saddr, const void *daddr, + int family, int dif, int sdif); + #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else @@ -1682,6 +1688,14 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, { return NULL; } +static inline bool tcp_inbound_md5_hash(const struct sock *sk, + const struct sk_buff *skb, + enum skb_drop_reason *reason, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + return false; +} #define tcp_twsk_md5_key(twsk) NULL #endif @@ -1817,11 +1831,6 @@ static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk) return skb_rb_last(&sk->tcp_rtx_queue); } -static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) -{ - return skb_peek(&sk->sk_write_queue); -} - static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk) { return skb_peek_tail(&sk->sk_write_queue); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 5a934bebe630..bca5b01af247 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -227,11 +227,56 @@ struct vxlan_config { enum ifla_vxlan_df df; }; +enum { + VXLAN_VNI_STATS_RX, + VXLAN_VNI_STATS_RX_DROPS, + VXLAN_VNI_STATS_RX_ERRORS, + VXLAN_VNI_STATS_TX, + VXLAN_VNI_STATS_TX_DROPS, + VXLAN_VNI_STATS_TX_ERRORS, +}; + +struct vxlan_vni_stats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_drops; + u64 rx_errors; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + u64 tx_errors; +}; + +struct vxlan_vni_stats_pcpu { + struct vxlan_vni_stats stats; + struct u64_stats_sync syncp; +}; + struct vxlan_dev_node { struct hlist_node hlist; struct vxlan_dev *vxlan; }; +struct vxlan_vni_node { + struct rhash_head vnode; + struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ +#endif + struct list_head vlist; + __be32 vni; + union vxlan_addr remote_ip; /* default remote ip for this vni */ + struct vxlan_vni_stats_pcpu __percpu *stats; + + struct rcu_head rcu; +}; + +struct vxlan_vni_group { + struct rhashtable vni_hash; + struct list_head vni_list; + u32 num_vnis; +}; + /* Pseudo network device */ struct vxlan_dev { struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ @@ -254,6 +299,8 @@ struct vxlan_dev { struct vxlan_config cfg; + struct vxlan_vni_group __rcu *vnigrp; + struct hlist_head fdb_head[FDB_HASH_SIZE]; }; @@ -274,6 +321,7 @@ struct vxlan_dev { #define VXLAN_F_GPE 0x4000 #define VXLAN_F_IPV6_LINKLOCAL 0x8000 #define VXLAN_F_TTL_INHERIT 0x10000 +#define VXLAN_F_VNIFILTER 0x20000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -283,7 +331,8 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ VXLAN_F_REMCSUM_NOPARTIAL | \ - VXLAN_F_COLLECT_METADATA) + VXLAN_F_COLLECT_METADATA | \ + VXLAN_F_VNIFILTER) /* Flags that can be set together with VXLAN_F_GPE. */ #define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ @@ -292,7 +341,8 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM_TX | \ VXLAN_F_UDP_ZERO_CSUM6_TX | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ - VXLAN_F_COLLECT_METADATA) + VXLAN_F_COLLECT_METADATA | \ + VXLAN_F_VNIFILTER) struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fdb41e8bb626..76aa6f11a540 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1568,7 +1568,6 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x); -u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu); u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); @@ -1681,14 +1680,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap); -struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, + u32 if_id); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m, struct xfrm_encap_tmpl *encap); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap); + struct xfrm_encap_tmpl *encap, u32 if_id); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); diff --git a/include/soc/fsl/dpaa2-fd.h b/include/soc/fsl/dpaa2-fd.h index 90ae8d191f1a..bae490cac0aa 100644 --- a/include/soc/fsl/dpaa2-fd.h +++ b/include/soc/fsl/dpaa2-fd.h @@ -7,7 +7,8 @@ #ifndef __FSL_DPAA2_FD_H #define __FSL_DPAA2_FD_H -#include <linux/kernel.h> +#include <linux/byteorder/generic.h> +#include <linux/types.h> /** * DOC: DPAA2 FD - Frame Descriptor APIs for DPAA2 diff --git a/include/soc/fsl/qe/immap_qe.h b/include/soc/fsl/qe/immap_qe.h index 7614fee532f1..edd601f53f5d 100644 --- a/include/soc/fsl/qe/immap_qe.h +++ b/include/soc/fsl/qe/immap_qe.h @@ -13,7 +13,8 @@ #define _ASM_POWERPC_IMMAP_QE_H #ifdef __KERNEL__ -#include <linux/kernel.h> +#include <linux/types.h> + #include <asm/io.h> #define QE_IMMAP_SIZE (1024 * 1024) /* 1MB from 1MB+IMMR */ diff --git a/include/soc/fsl/qe/qe_tdm.h b/include/soc/fsl/qe/qe_tdm.h index b6febe225071..43ea830cfe1f 100644 --- a/include/soc/fsl/qe/qe_tdm.h +++ b/include/soc/fsl/qe/qe_tdm.h @@ -10,8 +10,8 @@ #ifndef _QE_TDM_H_ #define _QE_TDM_H_ -#include <linux/kernel.h> #include <linux/list.h> +#include <linux/types.h> #include <soc/fsl/qe/immap_qe.h> #include <soc/fsl/qe/qe.h> @@ -19,6 +19,8 @@ #include <soc/fsl/qe/ucc.h> #include <soc/fsl/qe/ucc_fast.h> +struct device_node; + /* SI RAM entries */ #define SIR_LAST 0x0001 #define SIR_BYTE 0x0002 diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h index 9696a5b9b5d1..ad60b87a3c69 100644 --- a/include/soc/fsl/qe/ucc_fast.h +++ b/include/soc/fsl/qe/ucc_fast.h @@ -10,7 +10,7 @@ #ifndef __UCC_FAST_H__ #define __UCC_FAST_H__ -#include <linux/kernel.h> +#include <linux/types.h> #include <soc/fsl/qe/immap_qe.h> #include <soc/fsl/qe/qe.h> diff --git a/include/soc/fsl/qe/ucc_slow.h b/include/soc/fsl/qe/ucc_slow.h index 11a216e4e919..7548ce8a202d 100644 --- a/include/soc/fsl/qe/ucc_slow.h +++ b/include/soc/fsl/qe/ucc_slow.h @@ -11,7 +11,7 @@ #ifndef __UCC_SLOW_H__ #define __UCC_SLOW_H__ -#include <linux/kernel.h> +#include <linux/types.h> #include <soc/fsl/qe/immap_qe.h> #include <soc/fsl/qe/qe.h> diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 78f56502bc09..ee3c59639d70 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -635,6 +635,13 @@ enum macaccess_entry_type { #define OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION BIT(0) #define OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP BIT(1) +struct ocelot_lag_fdb { + unsigned char addr[ETH_ALEN]; + u16 vid; + struct net_device *bond; + struct list_head list; +}; + struct ocelot_port { struct ocelot *ocelot; @@ -661,6 +668,7 @@ struct ocelot_port { u16 mrp_ring_id; struct net_device *bridge; + int bridge_num; u8 stp_state; int speed; @@ -690,6 +698,7 @@ struct ocelot { struct list_head vlans; struct list_head traps; + struct list_head lag_fdbs; /* Switches like VSC9959 have flooding per traffic class */ int num_flooding_pgids; @@ -705,6 +714,8 @@ struct ocelot { enum ocelot_tag_prefix npi_inj_prefix; enum ocelot_tag_prefix npi_xtr_prefix; + unsigned long bridges; + struct list_head multicast; struct list_head pgids; @@ -838,6 +849,9 @@ void ocelot_deinit(struct ocelot *ocelot); void ocelot_init_port(struct ocelot *ocelot, int port); void ocelot_deinit_port(struct ocelot *ocelot, int port); +void ocelot_port_set_dsa_8021q_cpu(struct ocelot *ocelot, int port); +void ocelot_port_unset_dsa_8021q_cpu(struct ocelot *ocelot, int port); + /* DSA callbacks */ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data); void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data); @@ -855,17 +869,24 @@ int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); void ocelot_port_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); -void ocelot_port_bridge_join(struct ocelot *ocelot, int port, - struct net_device *bridge); +int ocelot_port_bridge_join(struct ocelot *ocelot, int port, + struct net_device *bridge, int bridge_num, + struct netlink_ext_ack *extack); void ocelot_port_bridge_leave(struct ocelot *ocelot, int port, struct net_device *bridge); int ocelot_mact_flush(struct ocelot *ocelot, int port); int ocelot_fdb_dump(struct ocelot *ocelot, int port, dsa_fdb_dump_cb_t *cb, void *data); -int ocelot_fdb_add(struct ocelot *ocelot, int port, - const unsigned char *addr, u16 vid); -int ocelot_fdb_del(struct ocelot *ocelot, int port, - const unsigned char *addr, u16 vid); +int ocelot_fdb_add(struct ocelot *ocelot, int port, const unsigned char *addr, + u16 vid, const struct net_device *bridge); +int ocelot_fdb_del(struct ocelot *ocelot, int port, const unsigned char *addr, + u16 vid, const struct net_device *bridge); +int ocelot_lag_fdb_add(struct ocelot *ocelot, struct net_device *bond, + const unsigned char *addr, u16 vid, + const struct net_device *bridge); +int ocelot_lag_fdb_del(struct ocelot *ocelot, struct net_device *bond, + const unsigned char *addr, u16 vid, + const struct net_device *bridge); int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged, struct netlink_ext_ack *extack); int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, @@ -889,9 +910,11 @@ int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port, int ocelot_cls_flower_stats(struct ocelot *ocelot, int port, struct flow_cls_offload *f, bool ingress); int ocelot_port_mdb_add(struct ocelot *ocelot, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + const struct net_device *bridge); int ocelot_port_mdb_del(struct ocelot *ocelot, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + const struct net_device *bridge); int ocelot_port_lag_join(struct ocelot *ocelot, int port, struct net_device *bond, struct netdev_lag_upper_info *info); diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index cfcfd26399f7..c0769d943f8e 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -27,6 +27,30 @@ EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO) \ EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF) \ EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM) \ + EM(SKB_DROP_REASON_TCP_MD5NOTFOUND, TCP_MD5NOTFOUND) \ + EM(SKB_DROP_REASON_TCP_MD5UNEXPECTED, \ + TCP_MD5UNEXPECTED) \ + EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \ + EM(SKB_DROP_REASON_SOCKET_BACKLOG, SOCKET_BACKLOG) \ + EM(SKB_DROP_REASON_TCP_FLAGS, TCP_FLAGS) \ + EM(SKB_DROP_REASON_TCP_ZEROWINDOW, TCP_ZEROWINDOW) \ + EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \ + EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \ + EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \ + EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \ + EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \ + BPF_CGROUP_EGRESS) \ + EM(SKB_DROP_REASON_IPV6DISABLED, IPV6DISABLED) \ + EM(SKB_DROP_REASON_NEIGH_CREATEFAIL, NEIGH_CREATEFAIL) \ + EM(SKB_DROP_REASON_NEIGH_FAILED, NEIGH_FAILED) \ + EM(SKB_DROP_REASON_NEIGH_QUEUEFULL, NEIGH_QUEUEFULL) \ + EM(SKB_DROP_REASON_NEIGH_DEAD, NEIGH_DEAD) \ + EM(SKB_DROP_REASON_TC_EGRESS, TC_EGRESS) \ + EM(SKB_DROP_REASON_QDISC_DROP, QDISC_DROP) \ + EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ + EM(SKB_DROP_REASON_XDP, XDP) \ + EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ + EM(SKB_DROP_REASON_PTYPE_ABSENT, PTYPE_ABSENT) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index afe3d0d7f5f2..4eebea830613 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5086,6 +5086,37 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. On error * *dst* buffer is zeroed out. + * + * long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type) + * Description + * Set a *dtime* (delivery time) to the __sk_buff->tstamp and also + * change the __sk_buff->delivery_time_type to *dtime_type*. + * + * When setting a delivery time (non zero *dtime*) to + * __sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type* + * is supported. It is the only delivery_time_type that will be + * kept after bpf_redirect_*(). + * + * If there is no need to change the __sk_buff->delivery_time_type, + * the delivery time can be directly written to __sk_buff->tstamp + * instead. + * + * *dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE + * can be used to clear any delivery time stored in + * __sk_buff->tstamp. + * + * Only IPv4 and IPv6 skb->protocol are supported. + * + * This function is most useful when it needs to set a + * mono delivery time to __sk_buff->tstamp and then + * bpf_redirect_*() to the egress of an iface. For example, + * changing the (rcv) timestamp in __sk_buff->tstamp at + * ingress to a mono delivery time and then bpf_redirect_*() + * to sch_fq@phy-dev. + * Return + * 0 on success. + * **-EINVAL** for invalid input + * **-EOPNOTSUPP** for unsupported delivery_time_type and protocol */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5280,6 +5311,7 @@ union bpf_attr { FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ FN(copy_from_user_task), \ + FN(skb_set_delivery_time), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5469,6 +5501,12 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +enum { + BPF_SKB_DELIVERY_TIME_NONE, + BPF_SKB_DELIVERY_TIME_UNSPEC, + BPF_SKB_DELIVERY_TIME_MONO, +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -5509,7 +5547,8 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; - __u32 :32; /* Padding, future use. */ + __u8 delivery_time_type; + __u32 :24; /* Padding, future use. */ __u64 hwtstamp; }; diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 417d4280d7b5..979850221b8d 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -337,6 +337,7 @@ enum { ETHTOOL_A_RINGS_TX, /* u32 */ ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ + ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index dfcf3ce0097f..1c392dd95a5e 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -33,8 +33,9 @@ enum { IFA_CACHEINFO, IFA_MULTICAST, IFA_FLAGS, - IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ IFA_TARGET_NETNSID, + IFA_PROTO, /* u8, address protocol */ __IFA_MAX, }; @@ -69,4 +70,10 @@ struct ifa_cacheinfo { #define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) #endif +/* ifa_proto */ +#define IFAPROT_UNSPEC 0 +#define IFAPROT_KERNEL_LO 1 /* loopback */ +#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */ +#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */ + #endif diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index c0c2f3ed5729..1d0bccc3fa54 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -86,8 +86,10 @@ * over Ethernet */ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ +#define ETH_P_PROFINET 0x8892 /* PROFINET */ #define ETH_P_REALTEK 0x8899 /* Multiple proprietary protocols */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ +#define ETH_P_ETHERCAT 0x88A4 /* EtherCAT */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ #define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6218f93f5c1a..ddca20357e7e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -245,6 +245,21 @@ struct rtnl_link_stats64 { __u64 rx_nohandler; }; +/* Subset of link stats useful for in-HW collection. Meaning of the fields is as + * for struct rtnl_link_stats64. + */ +struct rtnl_hw_stats64 { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; +}; + /* The struct should be in sync with struct ifmap */ struct rtnl_link_ifmap { __u64 mem_start; @@ -537,6 +552,7 @@ enum { IFLA_BRPORT_MRP_IN_OPEN, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, + IFLA_BRPORT_LOCKED, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -712,7 +728,55 @@ enum ipvlan_mode { #define IPVLAN_F_PRIVATE 0x01 #define IPVLAN_F_VEPA 0x02 +/* Tunnel RTM header */ +struct tunnel_msg { + __u8 family; + __u8 flags; + __u16 reserved2; + __u32 ifindex; +}; + /* VXLAN section */ + +/* include statistics in the dump */ +#define TUNNEL_MSG_FLAG_STATS 0x01 + +#define TUNNEL_MSG_VALID_USER_FLAGS TUNNEL_MSG_FLAG_STATS + +/* Embedded inside VXLAN_VNIFILTER_ENTRY_STATS */ +enum { + VNIFILTER_ENTRY_STATS_UNSPEC, + VNIFILTER_ENTRY_STATS_RX_BYTES, + VNIFILTER_ENTRY_STATS_RX_PKTS, + VNIFILTER_ENTRY_STATS_RX_DROPS, + VNIFILTER_ENTRY_STATS_RX_ERRORS, + VNIFILTER_ENTRY_STATS_TX_BYTES, + VNIFILTER_ENTRY_STATS_TX_PKTS, + VNIFILTER_ENTRY_STATS_TX_DROPS, + VNIFILTER_ENTRY_STATS_TX_ERRORS, + VNIFILTER_ENTRY_STATS_PAD, + __VNIFILTER_ENTRY_STATS_MAX +}; +#define VNIFILTER_ENTRY_STATS_MAX (__VNIFILTER_ENTRY_STATS_MAX - 1) + +enum { + VXLAN_VNIFILTER_ENTRY_UNSPEC, + VXLAN_VNIFILTER_ENTRY_START, + VXLAN_VNIFILTER_ENTRY_END, + VXLAN_VNIFILTER_ENTRY_GROUP, + VXLAN_VNIFILTER_ENTRY_GROUP6, + VXLAN_VNIFILTER_ENTRY_STATS, + __VXLAN_VNIFILTER_ENTRY_MAX +}; +#define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1) + +enum { + VXLAN_VNIFILTER_UNSPEC, + VXLAN_VNIFILTER_ENTRY, + __VXLAN_VNIFILTER_MAX +}; +#define VXLAN_VNIFILTER_MAX (__VXLAN_VNIFILTER_MAX - 1) + enum { IFLA_VXLAN_UNSPEC, IFLA_VXLAN_ID, @@ -744,6 +808,7 @@ enum { IFLA_VXLAN_GPE, IFLA_VXLAN_TTL_INHERIT, IFLA_VXLAN_DF, + IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -860,6 +925,7 @@ enum { IFLA_BOND_PEER_NOTIF_DELAY, IFLA_BOND_AD_LACP_ACTIVE, IFLA_BOND_MISSED_MAX, + IFLA_BOND_NS_IP6_TARGET, __IFLA_BOND_MAX, }; @@ -1156,6 +1222,17 @@ enum { #define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) +enum { + IFLA_STATS_GETSET_UNSPEC, + IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with + * a filter mask for the corresponding group. + */ + IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, /* 0 or 1 as u8 */ + __IFLA_STATS_GETSET_MAX, +}; + +#define IFLA_STATS_GETSET_MAX (__IFLA_STATS_GETSET_MAX - 1) + /* These are embedded into IFLA_STATS_LINK_XSTATS: * [IFLA_STATS_LINK_XSTATS] * -> [LINK_XSTATS_TYPE_xxx] @@ -1173,10 +1250,21 @@ enum { enum { IFLA_OFFLOAD_XSTATS_UNSPEC, IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO, /* HW stats info. A nest */ + IFLA_OFFLOAD_XSTATS_L3_STATS, /* struct rtnl_hw_stats64 */ __IFLA_OFFLOAD_XSTATS_MAX }; #define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) +enum { + IFLA_OFFLOAD_XSTATS_HW_S_INFO_UNSPEC, + IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, /* u8 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, /* u8 */ + __IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX, +}; +#define IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX \ + (__IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX - 1) + /* XDP section */ #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5191b57e1562..507ee1f2aa96 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1134,6 +1134,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_PPC_AIL_MODE_3 210 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index a1fd6173e2db..1d90c21a6251 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -134,6 +134,7 @@ struct mrt6msg { #define MRT6MSG_NOCACHE 1 #define MRT6MSG_WRONGMIF 2 #define MRT6MSG_WHOLEPKT 3 /* used for use level encap */ +#define MRT6MSG_WRMIFWHOLE 4 /* For PIM Register and assert processing */ __u8 im6_mbz; /* must be zero */ __u8 im6_msgtype; /* what type of message */ __u16 im6_mif; /* mif rec'd on */ diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 150bcff49b1c..9d1710f20505 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -351,6 +351,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ + OVS_KEY_ATTR_IPV6_EXTHDRS, /* struct ovs_key_ipv6_exthdr */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -430,6 +431,11 @@ struct ovs_key_ipv6 { __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ }; +/* separate structure to support backward compatibility with older user space */ +struct ovs_key_ipv6_exthdrs { + __u16 hdrs; +}; + struct ovs_key_tcp { __be16 tcp_src; __be16 tcp_dst; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 93d934cc4613..51530aade46e 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -146,6 +146,8 @@ enum { #define RTM_NEWSTATS RTM_NEWSTATS RTM_GETSTATS = 94, #define RTM_GETSTATS RTM_GETSTATS + RTM_SETSTATS, +#define RTM_SETSTATS RTM_SETSTATS RTM_NEWCACHEREPORT = 96, #define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT @@ -185,6 +187,13 @@ enum { RTM_GETNEXTHOPBUCKET, #define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + RTM_NEWTUNNEL = 120, +#define RTM_NEWTUNNEL RTM_NEWTUNNEL + RTM_DELTUNNEL, +#define RTM_DELTUNNEL RTM_DELTUNNEL + RTM_GETTUNNEL, +#define RTM_GETTUNNEL RTM_GETTUNNEL + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; @@ -756,6 +765,10 @@ enum rtnetlink_groups { #define RTNLGRP_BRVLAN RTNLGRP_BRVLAN RTNLGRP_MCTP_IFADDR, #define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR + RTNLGRP_TUNNEL, +#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL + RTNLGRP_STATS, +#define RTNLGRP_STATS RTNLGRP_STATS __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 4e29d7851890..65e13a099b1a 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -511,6 +511,12 @@ struct xfrm_user_offload { int ifindex; __u8 flags; }; +/* This flag was exposed without any kernel code that supporting it. + * Unfortunately, strongswan has the code that uses sets this flag, + * which makes impossible to reuse this bit. + * + * So leave it here to make sure that it won't be reused by mistake. + */ #define XFRM_OFFLOAD_IPV6 1 #define XFRM_OFFLOAD_INBOUND 2 diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 0e877dbcfeea..afc6c0e9c966 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -546,6 +546,7 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cgroup *cgrp; + struct cgroup_file_ctx *ctx; BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); @@ -553,8 +554,9 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, * Release agent gets called with all capabilities, * require capabilities to set release agent. */ - if ((of->file->f_cred->user_ns != &init_user_ns) || - !capable(CAP_SYS_ADMIN)) + ctx = of->priv; + if ((ctx->ns->user_ns != &init_user_ns) || + !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN)) return -EPERM; cgrp = cgroup_kn_lock_live(of->kn, false); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 9d05c3ca2d5e..a557eea7166f 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6166,6 +6166,20 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) if (ret) goto err; + /* + * Spawning a task directly into a cgroup works by passing a file + * descriptor to the target cgroup directory. This can even be an O_PATH + * file descriptor. But it can never be a cgroup.procs file descriptor. + * This was done on purpose so spawning into a cgroup could be + * conceptualized as an atomic + * + * fd = openat(dfd_cgroup, "cgroup.procs", ...); + * write(fd, <child-pid>, ...); + * + * sequence, i.e. it's a shorthand for the caller opening and writing + * cgroup.procs of the cgroup indicated by @dfd_cgroup. This allows us + * to always use the caller's credentials. + */ ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, !(kargs->flags & CLONE_THREAD), current->nsproxy->cgroup_ns); diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 4c7254e8f49a..5de18448016c 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2289,6 +2289,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) cgroup_taskset_first(tset, &css); cs = css_cs(css); + cpus_read_lock(); percpu_down_write(&cpuset_rwsem); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); @@ -2342,6 +2343,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) wake_up(&cpuset_attach_wq); percpu_up_write(&cpuset_rwsem); + cpus_read_unlock(); } /* The various types of files and directories in a cpuset file system */ @@ -3522,8 +3524,8 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) return cs; } -/** - * cpuset_node_allowed - Can we allocate on a memory node? +/* + * __cpuset_node_allowed - Can we allocate on a memory node? * @node: is this an allowed node? * @gfp_mask: memory allocation flags * @@ -3694,8 +3696,8 @@ void cpuset_print_current_mems_allowed(void) int cpuset_memory_pressure_enabled __read_mostly; -/** - * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. +/* + * __cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. * * Keep a running average of the rate of synchronous (direct) * page reclaim efforts initiated by tasks in each cpuset. @@ -3710,7 +3712,7 @@ int cpuset_memory_pressure_enabled __read_mostly; * "memory_pressure". Value displayed is an integer * representing the recent rate of entry into the synchronous * (direct) page reclaim by any task attached to the cpuset. - **/ + */ void __cpuset_memory_pressure_bump(void) { diff --git a/kernel/cred.c b/kernel/cred.c index 473d17c431f3..933155c96922 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -665,21 +665,16 @@ EXPORT_SYMBOL(cred_fscmp); int set_cred_ucounts(struct cred *new) { - struct task_struct *task = current; - const struct cred *old = task->real_cred; struct ucounts *new_ucounts, *old_ucounts = new->ucounts; - if (new->user == old->user && new->user_ns == old->user_ns) - return 0; - /* * This optimization is needed because alloc_ucounts() uses locks * for table lookups. */ - if (old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid)) + if (old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->uid)) return 0; - if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid))) + if (!(new_ucounts = alloc_ucounts(new->user_ns, new->uid))) return -EAGAIN; new->ucounts = new_ucounts; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index f1e7ea160b43..bfc56cb21705 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -628,7 +628,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, mem->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(mem->start, index) + offset; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) + (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE || + dir == DMA_BIDIRECTIONAL)) swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); return tlb_addr; } diff --git a/kernel/fork.c b/kernel/fork.c index d75a528f7b21..a024bf6254df 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2021,18 +2021,18 @@ static __latent_entropy struct task_struct *copy_process( #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + retval = copy_creds(p, clone_flags); + if (retval < 0) + goto bad_fork_free; + retval = -EAGAIN; if (is_ucounts_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) - goto bad_fork_free; + goto bad_fork_cleanup_count; } current->flags &= ~PF_NPROC_EXCEEDED; - retval = copy_creds(p, clone_flags); - if (retval < 0) - goto bad_fork_free; - /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there @@ -2267,6 +2267,17 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_put_pidfd; /* + * Now that the cgroups are pinned, re-clone the parent cgroup and put + * the new task on the correct runqueue. All this *before* the task + * becomes visible. + * + * This isn't part of ->can_fork() because while the re-cloning is + * cgroup specific, it unconditionally needs to place the task on a + * runqueue. + */ + sched_cgroup_fork(p, args); + + /* * From this point on we must avoid any synchronous user-space * communication until we take the tasklist-lock. In particular, we do * not want user-space to be able to predict the process start-time by @@ -2323,10 +2334,6 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cancel_cgroup; } - /* past the last point of failure */ - if (pidfile) - fd_install(pidfd, pidfile); - init_task_pid_links(p); if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@ -2375,8 +2382,11 @@ static __latent_entropy struct task_struct *copy_process( syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); + if (pidfile) + fd_install(pidfd, pidfile); + proc_fork_connector(p); - sched_post_fork(p, args); + sched_post_fork(p); cgroup_post_fork(p, args); perf_event_fork(p); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4a882f83aeb9..f8a0212189ca 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3462,7 +3462,7 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i) u16 chain_hlock = chain_hlocks[chain->base + i]; unsigned int class_idx = chain_hlock_class_idx(chain_hlock); - return lock_classes + class_idx - 1; + return lock_classes + class_idx; } /* @@ -3530,7 +3530,7 @@ static void print_chain_keys_chain(struct lock_chain *chain) hlock_id = chain_hlocks[chain->base + i]; chain_key = print_chain_key_iteration(hlock_id, chain_key); - print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id) - 1); + print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id)); printk("\n"); } } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fcf0c180617c..9745613d531c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1214,9 +1214,8 @@ int tg_nop(struct task_group *tg, void *data) } #endif -static void set_load_weight(struct task_struct *p) +static void set_load_weight(struct task_struct *p, bool update_load) { - bool update_load = !(READ_ONCE(p->__state) & TASK_NEW); int prio = p->static_prio - MAX_RT_PRIO; struct load_weight *load = &p->se.load; @@ -4407,7 +4406,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->static_prio = NICE_TO_PRIO(0); p->prio = p->normal_prio = p->static_prio; - set_load_weight(p); + set_load_weight(p, false); /* * We don't need the reset flag anymore after the fork. It has @@ -4425,6 +4424,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) init_entity_runnable_average(&p->se); + #ifdef CONFIG_SCHED_INFO if (likely(sched_info_on())) memset(&p->sched_info, 0, sizeof(p->sched_info)); @@ -4440,18 +4440,23 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) return 0; } -void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) +void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) { unsigned long flags; -#ifdef CONFIG_CGROUP_SCHED - struct task_group *tg; -#endif + /* + * Because we're not yet on the pid-hash, p->pi_lock isn't strictly + * required yet, but lockdep gets upset if rules are violated. + */ raw_spin_lock_irqsave(&p->pi_lock, flags); #ifdef CONFIG_CGROUP_SCHED - tg = container_of(kargs->cset->subsys[cpu_cgrp_id], - struct task_group, css); - p->sched_task_group = autogroup_task_group(p, tg); + if (1) { + struct task_group *tg; + tg = container_of(kargs->cset->subsys[cpu_cgrp_id], + struct task_group, css); + tg = autogroup_task_group(p, tg); + p->sched_task_group = tg; + } #endif rseq_migrate(p); /* @@ -4462,7 +4467,10 @@ void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) if (p->sched_class->task_fork) p->sched_class->task_fork(p); raw_spin_unlock_irqrestore(&p->pi_lock, flags); +} +void sched_post_fork(struct task_struct *p) +{ uclamp_post_fork(p); } @@ -6922,7 +6930,7 @@ void set_user_nice(struct task_struct *p, long nice) put_prev_task(rq, p); p->static_prio = NICE_TO_PRIO(nice); - set_load_weight(p); + set_load_weight(p, true); old_prio = p->prio; p->prio = effective_prio(p); @@ -7213,7 +7221,7 @@ static void __setscheduler_params(struct task_struct *p, */ p->rt_priority = attr->sched_priority; p->normal_prio = normal_prio(p); - set_load_weight(p); + set_load_weight(p, true); } /* @@ -9446,7 +9454,7 @@ void __init sched_init(void) #endif } - set_load_weight(&init_task); + set_load_weight(&init_task, false); /* * The boot idle thread does lazy MMU switching as well: diff --git a/kernel/sys.c b/kernel/sys.c index ecc4cf019242..97dc9e5d6bf9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -472,6 +472,16 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; + free_uid(new->user); + new->user = new_user; + return 0; +} + +static void flag_nproc_exceeded(struct cred *new) +{ + if (new->ucounts == current_ucounts()) + return; + /* * We don't fail in case of NPROC limit excess here because too many * poorly written programs don't check set*uid() return code, assuming @@ -480,15 +490,10 @@ static int set_user(struct cred *new) * failure to the execve() stage. */ if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) && - new_user != INIT_USER && - !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) + new->user != INIT_USER) current->flags |= PF_NPROC_EXCEEDED; else current->flags &= ~PF_NPROC_EXCEEDED; - - free_uid(new->user); - new->user = new_user; - return 0; } /* @@ -563,6 +568,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid) if (retval < 0) goto error; + flag_nproc_exceeded(new); return commit_creds(new); error: @@ -625,6 +631,7 @@ long __sys_setuid(uid_t uid) if (retval < 0) goto error; + flag_nproc_exceeded(new); return commit_creds(new); error: @@ -704,6 +711,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) if (retval < 0) goto error; + flag_nproc_exceeded(new); return commit_creds(new); error: diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f9feb197b2da..a4b462b6f944 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7191,7 +7191,6 @@ static int __init ftrace_nodyn_init(void) core_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; } -static inline void ftrace_startup_enable(int command) { } static inline void ftrace_startup_all(int command) { } # define ftrace_startup_sysctl() do { } while (0) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7c2578efde26..3050892d1812 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1474,10 +1474,12 @@ static int __init set_buf_size(char *str) if (!str) return 0; buf_size = memparse(str, &str); - /* nr_entries can not be zero */ - if (buf_size == 0) - return 0; - trace_buf_size = buf_size; + /* + * nr_entries can not be zero and the startup + * tests require some buffer space. Therefore + * ensure we have at least 4096 bytes of buffer. + */ + trace_buf_size = max(4096UL, buf_size); return 1; } __setup("trace_buf_size=", set_buf_size); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d038ddbf1bea..c5b09c31e077 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -136,7 +136,6 @@ struct kprobe_trace_entry_head { struct eprobe_trace_entry_head { struct trace_entry ent; - unsigned int type; }; struct kretprobe_trace_entry_head { diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 191db32dec46..541aa13581b9 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -242,7 +242,6 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i) static int eprobe_event_define_fields(struct trace_event_call *event_call) { - int ret; struct eprobe_trace_entry_head field; struct trace_probe *tp; @@ -250,8 +249,6 @@ static int eprobe_event_define_fields(struct trace_event_call *event_call) if (WARN_ON_ONCE(!tp)) return -ENOENT; - DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0); - return traceprobe_define_arg_fields(event_call, sizeof(field), tp); } @@ -270,7 +267,9 @@ print_eprobe_event(struct trace_iterator *iter, int flags, struct trace_event_call *pevent; struct trace_event *probed_event; struct trace_seq *s = &iter->seq; + struct trace_eprobe *ep; struct trace_probe *tp; + unsigned int type; field = (struct eprobe_trace_entry_head *)iter->ent; tp = trace_probe_primary_from_call( @@ -278,15 +277,18 @@ print_eprobe_event(struct trace_iterator *iter, int flags, if (WARN_ON_ONCE(!tp)) goto out; + ep = container_of(tp, struct trace_eprobe, tp); + type = ep->event->event.type; + trace_seq_printf(s, "%s: (", trace_probe_name(tp)); - probed_event = ftrace_find_event(field->type); + probed_event = ftrace_find_event(type); if (probed_event) { pevent = container_of(probed_event, struct trace_event_call, event); trace_seq_printf(s, "%s.%s", pevent->class->system, trace_event_name(pevent)); } else { - trace_seq_printf(s, "%u", field->type); + trace_seq_printf(s, "%u", type); } trace_seq_putc(s, ')'); @@ -498,10 +500,6 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec) return; entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); - if (edata->ep->event) - entry->type = edata->ep->event->event.type; - else - entry->type = 0; store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d00fee705f9c..7eb9d04f1c2e 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -84,6 +84,20 @@ event_triggers_call(struct trace_event_file *file, } EXPORT_SYMBOL_GPL(event_triggers_call); +bool __trace_trigger_soft_disabled(struct trace_event_file *file) +{ + unsigned long eflags = file->flags; + + if (eflags & EVENT_FILE_FL_TRIGGER_MODE) + event_triggers_call(file, NULL, NULL, NULL); + if (eflags & EVENT_FILE_FL_SOFT_DISABLED) + return true; + if (eflags & EVENT_FILE_FL_PID_FILTER) + return trace_event_ignore_this_pid(file); + return false; +} +EXPORT_SYMBOL_GPL(__trace_trigger_soft_disabled); + /** * event_triggers_post_call - Call 'post_triggers' for a trace event * @file: The trace_event_file associated with the event @@ -1295,6 +1309,16 @@ traceon_trigger(struct event_trigger_data *data, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { + struct trace_event_file *file = data->private_data; + + if (file) { + if (tracer_tracing_is_on(file->tr)) + return; + + tracer_tracing_on(file->tr); + return; + } + if (tracing_is_on()) return; @@ -1306,8 +1330,15 @@ traceon_count_trigger(struct event_trigger_data *data, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { - if (tracing_is_on()) - return; + struct trace_event_file *file = data->private_data; + + if (file) { + if (tracer_tracing_is_on(file->tr)) + return; + } else { + if (tracing_is_on()) + return; + } if (!data->count) return; @@ -1315,7 +1346,10 @@ traceon_count_trigger(struct event_trigger_data *data, if (data->count != -1) (data->count)--; - tracing_on(); + if (file) + tracer_tracing_on(file->tr); + else + tracing_on(); } static void @@ -1323,6 +1357,16 @@ traceoff_trigger(struct event_trigger_data *data, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { + struct trace_event_file *file = data->private_data; + + if (file) { + if (!tracer_tracing_is_on(file->tr)) + return; + + tracer_tracing_off(file->tr); + return; + } + if (!tracing_is_on()) return; @@ -1334,8 +1378,15 @@ traceoff_count_trigger(struct event_trigger_data *data, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { - if (!tracing_is_on()) - return; + struct trace_event_file *file = data->private_data; + + if (file) { + if (!tracer_tracing_is_on(file->tr)) + return; + } else { + if (!tracing_is_on()) + return; + } if (!data->count) return; @@ -1343,7 +1394,10 @@ traceoff_count_trigger(struct event_trigger_data *data, if (data->count != -1) (data->count)--; - tracing_off(); + if (file) + tracer_tracing_off(file->tr); + else + tracing_off(); } static int @@ -1540,7 +1594,12 @@ stacktrace_trigger(struct event_trigger_data *data, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { - trace_dump_stack(STACK_SKIP); + struct trace_event_file *file = data->private_data; + + if (file) + __trace_stack(file->tr, tracing_gen_ctx(), STACK_SKIP); + else + trace_dump_stack(STACK_SKIP); } static void diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 870a08da5b48..cfddb30e65ab 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1437,6 +1437,37 @@ static struct cpumask osnoise_cpumask; static struct cpumask save_cpumask; /* + * osnoise_sleep - sleep until the next period + */ +static void osnoise_sleep(void) +{ + u64 interval; + ktime_t wake_time; + + mutex_lock(&interface_lock); + interval = osnoise_data.sample_period - osnoise_data.sample_runtime; + mutex_unlock(&interface_lock); + + /* + * differently from hwlat_detector, the osnoise tracer can run + * without a pause because preemption is on. + */ + if (!interval) { + /* Let synchronize_rcu_tasks() make progress */ + cond_resched_tasks_rcu_qs(); + return; + } + + wake_time = ktime_add_us(ktime_get(), interval); + __set_current_state(TASK_INTERRUPTIBLE); + + while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) { + if (kthread_should_stop()) + break; + } +} + +/* * osnoise_main - The osnoise detection kernel thread * * Calls run_osnoise() function to measure the osnoise for the configured runtime, @@ -1444,30 +1475,10 @@ static struct cpumask save_cpumask; */ static int osnoise_main(void *data) { - u64 interval; while (!kthread_should_stop()) { - run_osnoise(); - - mutex_lock(&interface_lock); - interval = osnoise_data.sample_period - osnoise_data.sample_runtime; - mutex_unlock(&interface_lock); - - do_div(interval, USEC_PER_MSEC); - - /* - * differently from hwlat_detector, the osnoise tracer can run - * without a pause because preemption is on. - */ - if (interval < 1) { - /* Let synchronize_rcu_tasks() make progress */ - cond_resched_tasks_rcu_qs(); - continue; - } - - if (msleep_interruptible(interval)) - break; + osnoise_sleep(); } return 0; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 73d90179b51b..80863c6508e5 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -871,15 +871,15 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, switch (ptype) { case PROBE_PRINT_NORMAL: fmt = "(%lx)"; - arg = "REC->" FIELD_STRING_IP; + arg = ", REC->" FIELD_STRING_IP; break; case PROBE_PRINT_RETURN: fmt = "(%lx <- %lx)"; - arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; + arg = ", REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; break; case PROBE_PRINT_EVENT: - fmt = "(%u)"; - arg = "REC->" FIELD_STRING_TYPE; + fmt = ""; + arg = ""; break; default: WARN_ON_ONCE(1); @@ -903,7 +903,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, parg->type->fmt); } - pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); + pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", arg); for (i = 0; i < tp->nr_args; i++) { parg = tp->args + i; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 99e7a5df025e..92cc149af0fd 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -38,7 +38,6 @@ #define FIELD_STRING_IP "__probe_ip" #define FIELD_STRING_RETIP "__probe_ret_ip" #define FIELD_STRING_FUNC "__probe_func" -#define FIELD_STRING_TYPE "__probe_type" #undef DEFINE_FIELD #define DEFINE_FIELD(type, item, name, is_signed) \ diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index afd937a46496..abcadbe933bb 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -784,9 +784,7 @@ static struct fgraph_ops fgraph_ops __initdata = { .retfunc = &trace_graph_return, }; -#if defined(CONFIG_DYNAMIC_FTRACE) && \ - defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) -#define TEST_DIRECT_TRAMP +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS noinline __noclone static void trace_direct_tramp(void) { } #endif @@ -849,7 +847,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, goto out; } -#ifdef TEST_DIRECT_TRAMP +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS tracing_reset_online_cpus(&tr->array_buffer); set_graph_array(tr); diff --git a/kernel/ucount.c b/kernel/ucount.c index 65b597431c86..06ea04d44685 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -350,7 +350,8 @@ bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsign if (rlimit > LONG_MAX) max = LONG_MAX; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - if (get_ucounts_value(iter, type) > max) + long val = get_ucounts_value(iter, type); + if (val < 0 || val > max) return true; max = READ_ONCE(iter->ns->ucount_max[type]); } diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 6b2e3ca7ee99..5481ba44a8d6 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -58,6 +58,18 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) cred->user_ns = user_ns; } +static unsigned long enforced_nproc_rlimit(void) +{ + unsigned long limit = RLIM_INFINITY; + + /* Is RLIMIT_NPROC currently enforced? */ + if (!uid_eq(current_uid(), GLOBAL_ROOT_UID) || + (current_user_ns() != &init_user_ns)) + limit = rlimit(RLIMIT_NPROC); + + return limit; +} + /* * Create a new user namespace, deriving the creator from the user in the * passed credentials, and replacing that user with the new root user for the @@ -122,7 +134,7 @@ int create_user_ns(struct cred *new) for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) { ns->ucount_max[i] = INT_MAX; } - set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)); + set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit()); set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE)); set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING)); set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK)); diff --git a/lib/Kconfig b/lib/Kconfig index c80fde816a7e..9b5a692ce00c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -45,7 +45,6 @@ config BITREVERSE config HAVE_ARCH_BITREVERSE bool default n - depends on BITREVERSE help This option enables the use of hardware bit-reversal instructions on architectures which support such operations. diff --git a/lib/iov_iter.c b/lib/iov_iter.c index b0e0acdf96c1..6dd5330f7a99 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -414,6 +414,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by return 0; buf->ops = &page_cache_pipe_buf_ops; + buf->flags = 0; get_page(page); buf->page = page; buf->offset = offset; @@ -577,6 +578,7 @@ static size_t push_pipe(struct iov_iter *i, size_t size, break; buf->ops = &default_pipe_buf_ops; + buf->flags = 0; buf->page = page; buf->offset = 0; buf->len = min_t(ssize_t, left, PAGE_SIZE); diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 26a5c9007653..3b413f8c8a71 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -869,11 +869,14 @@ static void kmem_cache_invalid_free(struct kunit *test) kmem_cache_destroy(cache); } +static void empty_cache_ctor(void *object) { } + static void kmem_cache_double_destroy(struct kunit *test) { struct kmem_cache *cache; - cache = kmem_cache_create("test_cache", 200, 0, 0, NULL); + /* Provide a constructor to prevent cache merging. */ + cache = kmem_cache_create("test_cache", 200, 0, 0, empty_cache_ctor); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); kmem_cache_destroy(cache); KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_destroy(cache)); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 61895cc01d09..f294db835f4b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4159,10 +4159,10 @@ static int __init hugepages_setup(char *s) pr_warn("HugeTLB: architecture can't support node specific alloc, ignoring!\n"); return 0; } + if (tmp >= nr_online_nodes) + goto invalid; node = tmp; p += count + 1; - if (node < 0 || node >= nr_online_nodes) - goto invalid; /* Parse hugepages */ if (sscanf(p, "%lu%n", &tmp, &count) != 1) goto invalid; @@ -4851,14 +4851,13 @@ again: } static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pte_t *src_pte) + unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte) { struct hstate *h = hstate_vma(vma); struct mm_struct *mm = vma->vm_mm; - pte_t *dst_pte, pte; spinlock_t *src_ptl, *dst_ptl; + pte_t pte; - dst_pte = huge_pte_offset(mm, new_addr, huge_page_size(h)); dst_ptl = huge_pte_lock(h, mm, dst_pte); src_ptl = huge_pte_lockptr(h, mm, src_pte); @@ -4917,7 +4916,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, if (!dst_pte) break; - move_huge_pte(vma, old_addr, new_addr, src_pte); + move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte); } flush_tlb_range(vma, old_end - len, old_end); mmu_notifier_invalidate_range_end(&range); diff --git a/mm/memblock.c b/mm/memblock.c index 1018e50566f3..b12a364f2766 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -366,14 +366,20 @@ void __init memblock_discard(void) addr = __pa(memblock.reserved.regions); size = PAGE_ALIGN(sizeof(struct memblock_region) * memblock.reserved.max); - memblock_free_late(addr, size); + if (memblock_reserved_in_slab) + kfree(memblock.reserved.regions); + else + memblock_free_late(addr, size); } if (memblock.memory.regions != memblock_memory_init_regions) { addr = __pa(memblock.memory.regions); size = PAGE_ALIGN(sizeof(struct memblock_region) * memblock.memory.max); - memblock_free_late(addr, size); + if (memblock_memory_in_slab) + kfree(memblock.memory.regions); + else + memblock_free_late(addr, size); } memblock_memory = NULL; diff --git a/mm/mmap.c b/mm/mmap.c index 1e8fdb0b51ed..d445c1b9d606 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3186,6 +3186,7 @@ void exit_mmap(struct mm_struct *mm) vma = remove_vma(vma); cond_resched(); } + mm->mmap = NULL; mmap_write_unlock(mm); vm_unacct_memory(nr_accounted); } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index d1902828a18a..e5d23e75572a 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -638,12 +638,7 @@ void vlan_dev_free_egress_priority(const struct net_device *dev) static void vlan_dev_uninit(struct net_device *dev) { - struct vlan_dev_priv *vlan = vlan_dev_priv(dev); - vlan_dev_free_egress_priority(dev); - - /* Get rid of the vlan's reference to real_dev */ - dev_put_track(vlan->real_dev, &vlan->dev_tracker); } static netdev_features_t vlan_dev_fix_features(struct net_device *dev, @@ -856,6 +851,9 @@ static void vlan_dev_free(struct net_device *dev) free_percpu(vlan->vlan_pcpu_stats); vlan->vlan_pcpu_stats = NULL; + + /* Get rid of the vlan's reference to real_dev */ + dev_put_track(vlan->real_dev, &vlan->dev_tracker); } void vlan_setup(struct net_device *dev) diff --git a/net/Kconfig b/net/Kconfig index 8a1f9d0287de..6b78f695caa6 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -434,6 +434,19 @@ config NET_DEVLINK config PAGE_POOL bool +config PAGE_POOL_STATS + default n + bool "Page pool stats" + depends on PAGE_POOL + help + Enable page pool statistics to track page allocation and recycling + in page pools. This option incurs additional CPU cost in allocation + and recycle paths and additional memory cost to store the statistics. + These statistics are only available if this option is enabled and if + the driver using the page pool supports exporting this data. + + If unsure, say N. + config FAILOVER tristate "Generic failover module" help diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index f94f538fa382..7f6a7c96ac92 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -13,13 +13,13 @@ #include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> +#include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 71999e13f729..b6db999abf75 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -10,13 +10,13 @@ #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/byteorder/generic.h> +#include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/minmax.h> #include <linux/netdevice.h> diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 1d750f3cb2e4..033639df96d8 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -9,12 +9,12 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> +#include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 2ed9496fc41f..337e20b6586d 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -10,6 +10,7 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> #include <linux/compiler.h> +#include <linux/container_of.h> #include <linux/crc16.h> #include <linux/errno.h> #include <linux/etherdevice.h> diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 2f008e329007..fefb51a5f606 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -11,6 +11,7 @@ #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/byteorder/generic.h> +#include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> @@ -20,7 +21,6 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/netlink.h> diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index b7466136e292..d26124bc27e1 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -9,6 +9,7 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> +#include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 8a2b78f9c4b2..83fb51b6e299 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -9,11 +9,11 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> +#include <linux/container_of.h> #include <linux/gfp.h> #include <linux/if.h> #include <linux/if_arp.h> #include <linux/if_ether.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/limits.h> #include <linux/list.h> @@ -149,25 +149,28 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) struct net *net = dev_net(net_dev); struct net_device *parent_dev; struct net *parent_net; + int iflink; bool ret; /* check if this is a batman-adv mesh interface */ if (batadv_softif_is_valid(net_dev)) return true; - /* no more parents..stop recursion */ - if (dev_get_iflink(net_dev) == 0 || - dev_get_iflink(net_dev) == net_dev->ifindex) + iflink = dev_get_iflink(net_dev); + if (iflink == 0) return false; parent_net = batadv_getlink_net(net_dev, net); + /* iflink to itself, most likely physical device */ + if (net == parent_net && iflink == net_dev->ifindex) + return false; + /* recurse over the parent device */ - parent_dev = __dev_get_by_index((struct net *)parent_net, - dev_get_iflink(net_dev)); - /* if we got a NULL parent_dev there is something broken.. */ + parent_dev = __dev_get_by_index((struct net *)parent_net, iflink); if (!parent_dev) { - pr_err("Cannot find parent device\n"); + pr_warn("Cannot find parent device. Skipping batadv-on-batadv check for %s\n", + net_dev->name); return false; } @@ -214,14 +217,15 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) struct net_device *real_netdev = NULL; struct net *real_net; struct net *net; - int ifindex; + int iflink; ASSERT_RTNL(); if (!netdev) return NULL; - if (netdev->ifindex == dev_get_iflink(netdev)) { + iflink = dev_get_iflink(netdev); + if (iflink == 0) { dev_hold(netdev); return netdev; } @@ -231,9 +235,16 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) goto out; net = dev_net(hard_iface->soft_iface); - ifindex = dev_get_iflink(netdev); real_net = batadv_getlink_net(netdev, net); - real_netdev = dev_get_by_index(real_net, ifindex); + + /* iflink to itself, most likely physical device */ + if (net == real_net && netdev->ifindex == iflink) { + real_netdev = netdev; + dev_hold(real_netdev); + goto out; + } + + real_netdev = dev_get_by_index(real_net, iflink); out: batadv_hardif_put(hard_iface); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5207cd8d6ad8..e8a449915566 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -9,6 +9,7 @@ #include <linux/atomic.h> #include <linux/build_bug.h> #include <linux/byteorder/generic.h> +#include <linux/container_of.h> #include <linux/crc32c.h> #include <linux/device.h> #include <linux/errno.h> @@ -132,7 +133,6 @@ static void __exit batadv_exit(void) rtnl_link_unregister(&batadv_link_ops); unregister_netdevice_notifier(&batadv_hard_if_notifier); - flush_workqueue(batadv_event_workqueue); destroy_workqueue(batadv_event_workqueue); batadv_event_workqueue = NULL; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 494d1ebecac2..f3be82999f1f 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2022.0" +#define BATADV_SOURCE_VERSION "2022.1" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 9f311fddfaf9..b238455913df 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -11,6 +11,7 @@ #include <linux/bitops.h> #include <linux/bug.h> #include <linux/byteorder/generic.h> +#include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 974d726fabb9..5f4aeeb60dc4 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -11,6 +11,7 @@ #include <linux/bitops.h> #include <linux/byteorder/generic.h> #include <linux/compiler.h> +#include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> @@ -19,7 +20,6 @@ #include <linux/init.h> #include <linux/jhash.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index aadc653ca1d8..34903df4fe93 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -8,11 +8,11 @@ #include "main.h" #include <linux/atomic.h> +#include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 477d85a3b558..0379b126865d 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -10,13 +10,13 @@ #include <linux/atomic.h> #include <linux/bug.h> #include <linux/byteorder/generic.h> +#include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if.h> #include <linux/if_ether.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/netdevice.h> diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 2dbbe6c19609..0f5c0679b55a 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -11,6 +11,7 @@ #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/compiler.h> +#include <linux/container_of.h> #include <linux/cpumask.h> #include <linux/errno.h> #include <linux/etherdevice.h> @@ -19,7 +20,6 @@ #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 93730d30af54..7f3dd3c393e0 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -12,13 +12,13 @@ #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/compiler.h> +#include <linux/container_of.h> #include <linux/err.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/kthread.h> #include <linux/limits.h> diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 4b7ad6684bc4..8478034d3abf 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -13,6 +13,7 @@ #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/compiler.h> +#include <linux/container_of.h> #include <linux/crc32c.h> #include <linux/errno.h> #include <linux/etherdevice.h> @@ -21,7 +22,6 @@ #include <linux/init.h> #include <linux/jhash.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 0cb58eb04093..7ec2e2343884 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -7,10 +7,10 @@ #include "main.h" #include <linux/byteorder/generic.h> +#include <linux/container_of.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if_ether.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 5bde0ec41177..b4782a6c1025 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2739,6 +2739,7 @@ void hci_release_dev(struct hci_dev *hdev) hci_dev_unlock(hdev); ida_simple_remove(&hci_index_ida, hdev->id); + kfree_skb(hdev->sent_cmd); kfree(hdev); } EXPORT_SYMBOL(hci_release_dev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 6e71aa6b6fea..8fa57ef0bbc9 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1844,6 +1844,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) struct bdaddr_list *b, *t; u8 num_entries = 0; bool pend_conn, pend_report; + u8 filter_policy; int err; /* Pause advertising if resolving list can be used as controllers are @@ -1930,6 +1931,8 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) err = -EINVAL; done: + filter_policy = err ? 0x00 : 0x01; + /* Enable address resolution when LL Privacy is enabled. */ err = hci_le_set_addr_resolution_enable_sync(hdev, 0x01); if (err) @@ -1940,7 +1943,7 @@ done: hci_resume_advertising_sync(hdev); /* Select filter policy to use accept list */ - return err ? 0x00 : 0x01; + return filter_policy; } /* Returns true if an le connection is in the scanning state */ @@ -3265,10 +3268,10 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev) if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) events[0] |= 0x40; /* LE Data Length Change */ - /* If the controller supports LL Privacy feature, enable - * the corresponding event. + /* If the controller supports LL Privacy feature or LE Extended Adv, + * enable the corresponding event. */ - if (hdev->le_features[0] & HCI_LE_LL_PRIVACY) + if (use_enhanced_conn_complete(hdev)) events[1] |= 0x02; /* LE Enhanced Connection Complete */ /* If the controller supports Extended Scanner Filter @@ -4109,9 +4112,9 @@ int hci_dev_close_sync(struct hci_dev *hdev) hci_inquiry_cache_flush(hdev); hci_pend_le_actions_clear(hdev); hci_conn_hash_flush(hdev); - hci_dev_unlock(hdev); - + /* Prevent data races on hdev->smp_data or hdev->smp_bredr_data */ smp_unregister(hdev); + hci_dev_unlock(hdev); hci_sock_dev_event(hdev, HCI_DEV_DOWN); @@ -5188,7 +5191,7 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_EXT_CREATE_CONN, plen, data, HCI_EV_LE_ENHANCED_CONN_COMPLETE, - HCI_CMD_TIMEOUT, NULL); + conn->conn_timeout, NULL); } int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn) @@ -5273,9 +5276,18 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn) cp.min_ce_len = cpu_to_le16(0x0000); cp.max_ce_len = cpu_to_le16(0x0000); + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2261: + * + * If this event is unmasked and the HCI_LE_Connection_Complete event + * is unmasked, only the HCI_LE_Enhanced_Connection_Complete event is + * sent when a new connection has been created. + */ err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CONN, - sizeof(cp), &cp, HCI_EV_LE_CONN_COMPLETE, - HCI_CMD_TIMEOUT, NULL); + sizeof(cp), &cp, + use_enhanced_conn_complete(hdev) ? + HCI_EV_LE_ENHANCED_CONN_COMPLETE : + HCI_EV_LE_CONN_COMPLETE, + conn->conn_timeout, NULL); done: /* Re-enable advertising after the connection attempt is finished. */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 5dd684e0b259..71aff6e65e7c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1220,7 +1220,13 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip) static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; - struct mgmt_mode *cp = cmd->param; + struct mgmt_mode *cp; + + /* Make sure cmd still outstanding. */ + if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev)) + return; + + cp = cmd->param; bt_dev_dbg(hdev, "err %d", err); @@ -1244,7 +1250,7 @@ static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err) mgmt_status(err)); } - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); } static int set_powered_sync(struct hci_dev *hdev, void *data) @@ -1283,7 +1289,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - cmd = mgmt_pending_new(sk, MGMT_OP_SET_POWERED, hdev, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -1292,6 +1298,9 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, mgmt_set_powered_complete); + if (err < 0) + mgmt_pending_remove(cmd); + failed: hci_dev_unlock(hdev); return err; @@ -1385,6 +1394,10 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "err %d", err); + /* Make sure cmd still outstanding. */ + if (cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev)) + return; + hci_dev_lock(hdev); if (err) { @@ -1404,7 +1417,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, new_settings(hdev, cmd->sk); done: - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); hci_dev_unlock(hdev); } @@ -1513,7 +1526,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - cmd = mgmt_pending_new(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -1540,6 +1553,9 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, err = hci_cmd_sync_queue(hdev, set_discoverable_sync, cmd, mgmt_set_discoverable_complete); + if (err < 0) + mgmt_pending_remove(cmd); + failed: hci_dev_unlock(hdev); return err; @@ -1552,6 +1568,10 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "err %d", err); + /* Make sure cmd still outstanding. */ + if (cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) + return; + hci_dev_lock(hdev); if (err) { @@ -1564,7 +1584,9 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data, new_settings(hdev, cmd->sk); done: - mgmt_pending_free(cmd); + if (cmd) + mgmt_pending_remove(cmd); + hci_dev_unlock(hdev); } @@ -1636,7 +1658,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - cmd = mgmt_pending_new(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -1656,6 +1678,9 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, err = hci_cmd_sync_queue(hdev, set_connectable_sync, cmd, mgmt_set_connectable_complete); + if (err < 0) + mgmt_pending_remove(cmd); + failed: hci_dev_unlock(hdev); return err; @@ -1776,6 +1801,10 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) u8 enable = cp->val; bool changed; + /* Make sure cmd still outstanding. */ + if (cmd != pending_find(MGMT_OP_SET_SSP, hdev)) + return; + if (err) { u8 mgmt_err = mgmt_status(err); @@ -3323,6 +3352,9 @@ static void set_name_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); + if (cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev)) + return; + if (status) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, status); @@ -3495,6 +3527,9 @@ static void set_default_phy_complete(struct hci_dev *hdev, void *data, int err) struct sk_buff *skb = cmd->skb; u8 status = mgmt_status(err); + if (cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev)) + return; + if (!status) { if (!skb) status = MGMT_STATUS_FAILED; @@ -3761,13 +3796,6 @@ static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); - if (pending_find(MGMT_OP_SET_WIDEBAND_SPEECH, hdev)) { - err = mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_WIDEBAND_SPEECH, - MGMT_STATUS_BUSY); - goto unlock; - } - if (hdev_is_powered(hdev) && !!cp->val != hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED)) { @@ -5038,12 +5066,6 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev)) { - err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, - MGMT_STATUS_BUSY); - goto unlock; - } - cmd = mgmt_pending_new(sk, MGMT_OP_READ_LOCAL_OOB_DATA, hdev, NULL, 0); if (!cmd) err = -ENOMEM; @@ -5263,11 +5285,16 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; + if (cmd != pending_find(MGMT_OP_START_DISCOVERY, hdev) && + cmd != pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev) && + cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev)) + return; + bt_dev_dbg(hdev, "err %d", err); mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), cmd->param, 1); - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); hci_discovery_set_state(hdev, err ? DISCOVERY_STOPPED: DISCOVERY_FINDING); @@ -5329,7 +5356,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, else hdev->discovery.limited = false; - cmd = mgmt_pending_new(sk, op, hdev, data, len); + cmd = mgmt_pending_add(sk, op, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -5338,7 +5365,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd, start_discovery_complete); if (err < 0) { - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); goto failed; } @@ -5432,7 +5459,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - cmd = mgmt_pending_new(sk, MGMT_OP_START_SERVICE_DISCOVERY, + cmd = mgmt_pending_add(sk, MGMT_OP_START_SERVICE_DISCOVERY, hdev, data, len); if (!cmd) { err = -ENOMEM; @@ -5465,7 +5492,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd, start_discovery_complete); if (err < 0) { - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); goto failed; } @@ -5497,11 +5524,14 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; + if (cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev)) + return; + bt_dev_dbg(hdev, "err %d", err); mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), cmd->param, 1); - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); if (!err) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); @@ -5537,7 +5567,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - cmd = mgmt_pending_new(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len); if (!cmd) { err = -ENOMEM; goto unlock; @@ -5546,7 +5576,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, err = hci_cmd_sync_queue(hdev, stop_discovery_sync, cmd, stop_discovery_complete); if (err < 0) { - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); goto unlock; } @@ -7476,6 +7506,9 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data, u8 status = mgmt_status(err); u16 eir_len; + if (cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev)) + return; + if (!status) { if (!skb) status = MGMT_STATUS_FAILED; @@ -7971,11 +8004,7 @@ static bool requested_adv_flags_are_valid(struct hci_dev *hdev, u32 adv_flags) static bool adv_busy(struct hci_dev *hdev) { - return (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) || - pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) || - pending_find(MGMT_OP_SET_LE, hdev) || - pending_find(MGMT_OP_ADD_EXT_ADV_PARAMS, hdev) || - pending_find(MGMT_OP_ADD_EXT_ADV_DATA, hdev)); + return pending_find(MGMT_OP_SET_LE, hdev); } static void add_adv_complete(struct hci_dev *hdev, struct sock *sk, u8 instance, @@ -8565,9 +8594,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) || - pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) || - pending_find(MGMT_OP_SET_LE, hdev)) { + if (pending_find(MGMT_OP_SET_LE, hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, MGMT_STATUS_BUSY); goto unlock; diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index edee60bbc7b4..37eef2ce55ae 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -77,11 +77,12 @@ int mgmt_send_event_skb(unsigned short channel, struct sk_buff *skb, int flag, { struct hci_dev *hdev; struct mgmt_hdr *hdr; - int len = skb->len; + int len; if (!skb) return -EINVAL; + len = skb->len; hdev = bt_cb(skb)->mgmt.hdev; /* Time stamp */ diff --git a/net/bridge/br.c b/net/bridge/br.c index 1fac72cc617f..b1dea3febeea 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -342,23 +342,26 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on) clear_bit(opt, &br->options); } -static void __net_exit br_net_exit(struct net *net) +static void __net_exit br_net_exit_batch(struct list_head *net_list) { struct net_device *dev; + struct net *net; LIST_HEAD(list); rtnl_lock(); - for_each_netdev(net, dev) - if (netif_is_bridge_master(dev)) - br_dev_delete(dev, &list); + + list_for_each_entry(net, net_list, exit_list) + for_each_netdev(net, dev) + if (netif_is_bridge_master(dev)) + br_dev_delete(dev, &list); unregister_netdevice_many(&list); - rtnl_unlock(); + rtnl_unlock(); } static struct pernet_operations br_net_ops = { - .exit = br_net_exit, + .exit_batch = br_net_exit_batch, }; static const struct stp_proto br_stp_proto = { diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index 3db1def4437b..e5e48c6e35d7 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -84,7 +84,7 @@ static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p, skb->ip_summed = CHECKSUM_UNNECESSARY; skb->pkt_type = PACKET_HOST; - netif_rx_ni(skb); + netif_rx(skb); } } @@ -364,7 +364,7 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, reply->ip_summed = CHECKSUM_UNNECESSARY; reply->pkt_type = PACKET_HOST; - netif_rx_ni(reply); + netif_rx(reply); } } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index ec646656dbf1..02bb620d3b8d 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - skb->tstamp = 0; + skb_clear_tstamp(skb); return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, net, sk, skb, NULL, skb->dev, br_dev_queue_push_xmit); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index b50382f957c1..e0c13fcc50ed 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -81,6 +81,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (!p || p->state == BR_STATE_DISABLED) goto drop; + br = p->br; brmctx = &p->br->multicast_ctx; pmctx = &p->multicast_ctx; state = p->state; @@ -88,10 +89,18 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb &state, &vlan)) goto out; + if (p->flags & BR_PORT_LOCKED) { + struct net_bridge_fdb_entry *fdb_src = + br_fdb_find_rcu(br, eth_hdr(skb)->h_source, vid); + + if (!fdb_src || READ_ONCE(fdb_src->dst) != p || + test_bit(BR_FDB_LOCAL, &fdb_src->flags)) + goto drop; + } + nbp_switchdev_frame_mark(p, skb); /* insert into forwarding database after filtering to avoid spoofing */ - br = p->br; if (p->flags & BR_LEARNING) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 2ff83d84230d..7d4432ca9a20 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -184,6 +184,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */ + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */ + nla_total_size(1) /* IFLA_BRPORT_ISOLATED */ + + nla_total_size(1) /* IFLA_BRPORT_LOCKED */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */ @@ -269,7 +270,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, BR_MRP_LOST_CONT)) || nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN, !!(p->flags & BR_MRP_LOST_IN_CONT)) || - nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED))) + nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)) || + nla_put_u8(skb, IFLA_BRPORT_LOCKED, !!(p->flags & BR_PORT_LOCKED))) return -EMSGSIZE; timerval = br_timer_value(&p->message_age_timer); @@ -827,6 +829,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 }, [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 }, [IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 }, + [IFLA_BRPORT_LOCKED] = { .type = NLA_U8 }, [IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 }, [IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 }, }; @@ -893,6 +896,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[], br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL); br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, BR_NEIGH_SUPPRESS); br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED); + br_set_port_flag(p, tb, IFLA_BRPORT_LOCKED, BR_PORT_LOCKED); changed_mask = old_flags ^ p->flags; diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 59fcabd08ef1..6f6a70121a5e 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -72,7 +72,7 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, /* Flags that can be offloaded to hardware */ #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \ - BR_MCAST_FLOOD | BR_BCAST_FLOOD) + BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED) int br_switchdev_set_port_flag(struct net_bridge_port *p, unsigned long flags, diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index fdbed3158555..ebfb2a5c59e4 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -32,6 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; + bool mono_delivery_time = skb->mono_delivery_time; unsigned int hlen, ll_rs, mtu; ktime_t tstamp = skb->tstamp; struct ip_frag_state state; @@ -81,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, if (iter.frag) ip_fraglist_prepare(skb, &iter); - skb->tstamp = tstamp; + skb_set_delivery_time(skb, tstamp, mono_delivery_time); err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -112,7 +113,7 @@ slow_path: goto blackhole; } - skb2->tstamp = tstamp; + skb_set_delivery_time(skb2, tstamp, mono_delivery_time); err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 414dc5671c45..4d63ef13a1fd 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -99,7 +99,7 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) else skb->ip_summed = CHECKSUM_NONE; - netif_rx_any_context(skb); + netif_rx(skb); /* Update statistics. */ priv->netdev->stats.rx_packets++; diff --git a/net/can/gw.c b/net/can/gw.c index 24221352e059..1ea4cc527db3 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -577,6 +577,13 @@ static inline void cgw_unregister_filter(struct net *net, struct cgw_job *gwj) gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj); } +static void cgw_job_free_rcu(struct rcu_head *rcu_head) +{ + struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu); + + kmem_cache_free(cgw_cache, gwj); +} + static int cgw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { @@ -596,8 +603,7 @@ static int cgw_notifier(struct notifier_block *nb, if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); - synchronize_rcu(); - kmem_cache_free(cgw_cache, gwj); + call_rcu(&gwj->rcu, cgw_job_free_rcu); } } } @@ -1155,8 +1161,7 @@ static void cgw_remove_all_jobs(struct net *net) hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); - synchronize_rcu(); - kmem_cache_free(cgw_cache, gwj); + call_rcu(&gwj->rcu, cgw_job_free_rcu); } } @@ -1224,8 +1229,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); - synchronize_rcu(); - kmem_cache_free(cgw_cache, gwj); + call_rcu(&gwj->rcu, cgw_job_free_rcu); err = 0; break; } diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index a271688780a2..307ee1174a6e 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -2006,7 +2006,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv, /* set the end-packet for broadcast */ session->pkt.last = session->pkt.total; - skcb->tskey = session->sk->sk_tskey++; + skcb->tskey = atomic_inc_return(&session->sk->sk_tskey) - 1; session->tskey = skcb->tskey; return session; diff --git a/net/core/dev.c b/net/core/dev.c index 05fa867f1878..ba69ddf85af6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1622,7 +1622,8 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd) N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN) N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO) N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO) - N(PRE_CHANGEADDR) + N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE) + N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA) } #undef N return "UNKNOWN_NETDEV_EVENT"; @@ -1939,6 +1940,32 @@ static int call_netdevice_notifiers_info(unsigned long val, return raw_notifier_call_chain(&netdev_chain, val, info); } +/** + * call_netdevice_notifiers_info_robust - call per-netns notifier blocks + * for and rollback on error + * @val_up: value passed unmodified to notifier function + * @val_down: value passed unmodified to the notifier function when + * recovering from an error on @val_up + * @info: notifier information data + * + * Call all per-netns network notifier blocks, but not notifier blocks on + * the global notifier chain. Parameters and return value are as for + * raw_notifier_call_chain_robust(). + */ + +static int +call_netdevice_notifiers_info_robust(unsigned long val_up, + unsigned long val_down, + struct netdev_notifier_info *info) +{ + struct net *net = dev_net(info->dev); + + ASSERT_RTNL(); + + return raw_notifier_call_chain_robust(&net->netdev_chain, + val_up, val_down, info); +} + static int call_netdevice_notifiers_extack(unsigned long val, struct net_device *dev, struct netlink_ext_ack *extack) @@ -2020,7 +2047,8 @@ void net_dec_egress_queue(void) EXPORT_SYMBOL_GPL(net_dec_egress_queue); #endif -static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key); +DEFINE_STATIC_KEY_FALSE(netstamp_needed_key); +EXPORT_SYMBOL(netstamp_needed_key); #ifdef CONFIG_JUMP_LABEL static atomic_t netstamp_needed_deferred; static atomic_t netstamp_wanted; @@ -2081,14 +2109,15 @@ EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { skb->tstamp = 0; + skb->mono_delivery_time = 0; if (static_branch_unlikely(&netstamp_needed_key)) - __net_timestamp(skb); + skb->tstamp = ktime_get_real(); } #define net_timestamp_check(COND, SKB) \ if (static_branch_unlikely(&netstamp_needed_key)) { \ if ((COND) && !(SKB)->tstamp) \ - __net_timestamp(SKB); \ + (SKB)->tstamp = ktime_get_real(); \ } \ bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb) @@ -3730,7 +3759,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, no_lock_out: if (unlikely(to_free)) - kfree_skb_list(to_free); + kfree_skb_list_reason(to_free, + SKB_DROP_REASON_QDISC_DROP); return rc; } @@ -3785,7 +3815,7 @@ no_lock_out: } spin_unlock(root_lock); if (unlikely(to_free)) - kfree_skb_list(to_free); + kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP); if (unlikely(contended)) spin_unlock(&q->busylock); return rc; @@ -3831,7 +3861,7 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) skb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(skb)); skb_dst_force(skb); - netif_rx_ni(skb); + netif_rx(skb); return 0; } EXPORT_SYMBOL(dev_loopback_xmit); @@ -3860,7 +3890,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) case TC_ACT_SHOT: mini_qdisc_qstats_cpu_drop(miniq); *ret = NET_XMIT_DROP; - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS); return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: @@ -4540,10 +4570,12 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) static int enqueue_to_backlog(struct sk_buff *skb, int cpu, unsigned int *qtail) { + enum skb_drop_reason reason; struct softnet_data *sd; unsigned long flags; unsigned int qlen; + reason = SKB_DROP_REASON_NOT_SPECIFIED; sd = &per_cpu(softnet_data, cpu); rps_lock_irqsave(sd, &flags); @@ -4566,13 +4598,14 @@ enqueue: napi_schedule_rps(sd); goto enqueue; } + reason = SKB_DROP_REASON_CPU_BACKLOG; drop: sd->dropped++; rps_unlock_irq_restore(sd, &flags); atomic_long_inc(&skb->dev->rx_dropped); - kfree_skb(skb); + kfree_skb_reason(skb, reason); return NET_RX_DROP; } @@ -4792,7 +4825,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) } return XDP_PASS; out_redir: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_XDP); return XDP_DROP; } EXPORT_SYMBOL_GPL(do_xdp_generic); @@ -4860,7 +4893,9 @@ EXPORT_SYMBOL(__netif_rx); * congestion control or by the protocol layers. * The network buffer is passed via the backlog NAPI device. Modern NIC * driver should use NAPI and GRO. - * This function can used from any context. + * This function can used from interrupt and from process context. The + * caller from process context must not disable interrupts before invoking + * this function. * * return values: * NET_RX_SUCCESS (no congestion) @@ -4869,13 +4904,16 @@ EXPORT_SYMBOL(__netif_rx); */ int netif_rx(struct sk_buff *skb) { + bool need_bh_off = !(hardirq_count() | softirq_count()); int ret; - local_bh_disable(); + if (need_bh_off) + local_bh_disable(); trace_netif_rx_entry(skb); ret = netif_rx_internal(skb); trace_netif_rx_exit(ret); - local_bh_enable(); + if (need_bh_off) + local_bh_enable(); return ret; } EXPORT_SYMBOL(netif_rx); @@ -5003,7 +5041,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, break; case TC_ACT_SHOT: mini_qdisc_qstats_cpu_drop(miniq); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS); return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: @@ -5320,11 +5358,13 @@ check_vlan_id: *ppt_prev = pt_prev; } else { drop: - if (!deliver_exact) + if (!deliver_exact) { atomic_long_inc(&skb->dev->rx_dropped); - else + kfree_skb_reason(skb, SKB_DROP_REASON_PTYPE_ABSENT); + } else { atomic_long_inc(&skb->dev->rx_nohandler); - kfree_skb(skb); + kfree_skb(skb); + } /* Jamal, now you will not able to escape explaining * me how you were going to use this. :-) */ @@ -7723,6 +7763,242 @@ void netdev_bonding_info_change(struct net_device *dev, } EXPORT_SYMBOL(netdev_bonding_info_change); +static int netdev_offload_xstats_enable_l3(struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct netdev_notifier_offload_xstats_info info = { + .info.dev = dev, + .info.extack = extack, + .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3, + }; + int err; + int rc; + + dev->offload_xstats_l3 = kzalloc(sizeof(*dev->offload_xstats_l3), + GFP_KERNEL); + if (!dev->offload_xstats_l3) + return -ENOMEM; + + rc = call_netdevice_notifiers_info_robust(NETDEV_OFFLOAD_XSTATS_ENABLE, + NETDEV_OFFLOAD_XSTATS_DISABLE, + &info.info); + err = notifier_to_errno(rc); + if (err) + goto free_stats; + + return 0; + +free_stats: + kfree(dev->offload_xstats_l3); + dev->offload_xstats_l3 = NULL; + return err; +} + +int netdev_offload_xstats_enable(struct net_device *dev, + enum netdev_offload_xstats_type type, + struct netlink_ext_ack *extack) +{ + ASSERT_RTNL(); + + if (netdev_offload_xstats_enabled(dev, type)) + return -EALREADY; + + switch (type) { + case NETDEV_OFFLOAD_XSTATS_TYPE_L3: + return netdev_offload_xstats_enable_l3(dev, extack); + } + + WARN_ON(1); + return -EINVAL; +} +EXPORT_SYMBOL(netdev_offload_xstats_enable); + +static void netdev_offload_xstats_disable_l3(struct net_device *dev) +{ + struct netdev_notifier_offload_xstats_info info = { + .info.dev = dev, + .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3, + }; + + call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_DISABLE, + &info.info); + kfree(dev->offload_xstats_l3); + dev->offload_xstats_l3 = NULL; +} + +int netdev_offload_xstats_disable(struct net_device *dev, + enum netdev_offload_xstats_type type) +{ + ASSERT_RTNL(); + + if (!netdev_offload_xstats_enabled(dev, type)) + return -EALREADY; + + switch (type) { + case NETDEV_OFFLOAD_XSTATS_TYPE_L3: + netdev_offload_xstats_disable_l3(dev); + return 0; + } + + WARN_ON(1); + return -EINVAL; +} +EXPORT_SYMBOL(netdev_offload_xstats_disable); + +static void netdev_offload_xstats_disable_all(struct net_device *dev) +{ + netdev_offload_xstats_disable(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3); +} + +static struct rtnl_hw_stats64 * +netdev_offload_xstats_get_ptr(const struct net_device *dev, + enum netdev_offload_xstats_type type) +{ + switch (type) { + case NETDEV_OFFLOAD_XSTATS_TYPE_L3: + return dev->offload_xstats_l3; + } + + WARN_ON(1); + return NULL; +} + +bool netdev_offload_xstats_enabled(const struct net_device *dev, + enum netdev_offload_xstats_type type) +{ + ASSERT_RTNL(); + + return netdev_offload_xstats_get_ptr(dev, type); +} +EXPORT_SYMBOL(netdev_offload_xstats_enabled); + +struct netdev_notifier_offload_xstats_ru { + bool used; +}; + +struct netdev_notifier_offload_xstats_rd { + struct rtnl_hw_stats64 stats; + bool used; +}; + +static void netdev_hw_stats64_add(struct rtnl_hw_stats64 *dest, + const struct rtnl_hw_stats64 *src) +{ + dest->rx_packets += src->rx_packets; + dest->tx_packets += src->tx_packets; + dest->rx_bytes += src->rx_bytes; + dest->tx_bytes += src->tx_bytes; + dest->rx_errors += src->rx_errors; + dest->tx_errors += src->tx_errors; + dest->rx_dropped += src->rx_dropped; + dest->tx_dropped += src->tx_dropped; + dest->multicast += src->multicast; +} + +static int netdev_offload_xstats_get_used(struct net_device *dev, + enum netdev_offload_xstats_type type, + bool *p_used, + struct netlink_ext_ack *extack) +{ + struct netdev_notifier_offload_xstats_ru report_used = {}; + struct netdev_notifier_offload_xstats_info info = { + .info.dev = dev, + .info.extack = extack, + .type = type, + .report_used = &report_used, + }; + int rc; + + WARN_ON(!netdev_offload_xstats_enabled(dev, type)); + rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_USED, + &info.info); + *p_used = report_used.used; + return notifier_to_errno(rc); +} + +static int netdev_offload_xstats_get_stats(struct net_device *dev, + enum netdev_offload_xstats_type type, + struct rtnl_hw_stats64 *p_stats, + bool *p_used, + struct netlink_ext_ack *extack) +{ + struct netdev_notifier_offload_xstats_rd report_delta = {}; + struct netdev_notifier_offload_xstats_info info = { + .info.dev = dev, + .info.extack = extack, + .type = type, + .report_delta = &report_delta, + }; + struct rtnl_hw_stats64 *stats; + int rc; + + stats = netdev_offload_xstats_get_ptr(dev, type); + if (WARN_ON(!stats)) + return -EINVAL; + + rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_DELTA, + &info.info); + + /* Cache whatever we got, even if there was an error, otherwise the + * successful stats retrievals would get lost. + */ + netdev_hw_stats64_add(stats, &report_delta.stats); + + if (p_stats) + *p_stats = *stats; + *p_used = report_delta.used; + + return notifier_to_errno(rc); +} + +int netdev_offload_xstats_get(struct net_device *dev, + enum netdev_offload_xstats_type type, + struct rtnl_hw_stats64 *p_stats, bool *p_used, + struct netlink_ext_ack *extack) +{ + ASSERT_RTNL(); + + if (p_stats) + return netdev_offload_xstats_get_stats(dev, type, p_stats, + p_used, extack); + else + return netdev_offload_xstats_get_used(dev, type, p_used, + extack); +} +EXPORT_SYMBOL(netdev_offload_xstats_get); + +void +netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *report_delta, + const struct rtnl_hw_stats64 *stats) +{ + report_delta->used = true; + netdev_hw_stats64_add(&report_delta->stats, stats); +} +EXPORT_SYMBOL(netdev_offload_xstats_report_delta); + +void +netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *report_used) +{ + report_used->used = true; +} +EXPORT_SYMBOL(netdev_offload_xstats_report_used); + +void netdev_offload_xstats_push_delta(struct net_device *dev, + enum netdev_offload_xstats_type type, + const struct rtnl_hw_stats64 *p_stats) +{ + struct rtnl_hw_stats64 *stats; + + ASSERT_RTNL(); + + stats = netdev_offload_xstats_get_ptr(dev, type); + if (WARN_ON(!stats)) + return; + + netdev_hw_stats64_add(stats, p_stats); +} +EXPORT_SYMBOL(netdev_offload_xstats_push_delta); + /** * netdev_get_xmit_slave - Get the xmit slave of master device * @dev: device @@ -9828,9 +10104,6 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) struct net_device *dev; int wait = 0; - list_for_each_entry(dev, list, todo_list) - linkwatch_forget_dev(dev); - rebroadcast_time = warning_time = jiffies; list_for_each_entry(dev, list, todo_list) @@ -9951,6 +10224,7 @@ void netdev_run_todo(void) } dev->reg_state = NETREG_UNREGISTERED; + linkwatch_forget_dev(dev); } while (!list_empty(&list)) { @@ -10414,6 +10688,8 @@ void unregister_netdevice_many(struct list_head *head) dev_xdp_uninstall(dev); + netdev_offload_xstats_disable_all(dev); + /* Notify protocols, that we are about to destroy * this device. They should clean all the things. */ @@ -10679,11 +10955,11 @@ static int dev_cpu_dead(unsigned int oldcpu) /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { - netif_rx_ni(skb); + netif_rx(skb); input_queue_head_incr(oldsd); } while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { - netif_rx_ni(skb); + netif_rx(skb); input_queue_head_incr(oldsd); } @@ -10886,36 +11162,6 @@ static void __net_exit default_device_exit_net(struct net *net) } } -static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) -{ - /* Return (with the rtnl_lock held) when there are no network - * devices unregistering in any network namespace in net_list. - */ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - bool unregistering; - struct net *net; - - ASSERT_RTNL(); - add_wait_queue(&netdev_unregistering_wq, &wait); - for (;;) { - unregistering = false; - - list_for_each_entry(net, net_list, exit_list) { - if (atomic_read(&net->dev_unreg_count) > 0) { - unregistering = true; - break; - } - } - if (!unregistering) - break; - __rtnl_unlock(); - - wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); - rtnl_lock(); - } - remove_wait_queue(&netdev_unregistering_wq, &wait); -} - static void __net_exit default_device_exit_batch(struct list_head *net_list) { /* At exit all network devices most be removed from a network @@ -10932,18 +11178,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) default_device_exit_net(net); cond_resched(); } - /* To prevent network device cleanup code from dereferencing - * loopback devices or network devices that have been freed - * wait here for all pending unregistrations to complete, - * before unregistring the loopback device and allowing the - * network namespace be freed. - * - * The netdev todo list containing all network devices - * unregistrations that happen in default_device_exit_batch - * will run in the rtnl_unlock() at the end of - * default_device_exit_batch. - */ - rtnl_lock_unregistering(net_list); list_for_each_entry(net, net_list, exit_list) { for_each_netdev_reverse(net, dev) { diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d7d177f75d43..b89e3e95bffc 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -64,7 +64,6 @@ static const char * const drop_reasons[] = { /* net_dm_mutex * * An overall lock guarding every operation coming from userspace. - * It also guards the global 'hw_stats_list' list. */ static DEFINE_MUTEX(net_dm_mutex); @@ -100,11 +99,9 @@ struct per_cpu_dm_data { }; struct dm_hw_stat_delta { - struct net_device *dev; unsigned long last_rx; - struct list_head list; - struct rcu_head rcu; unsigned long last_drop_val; + struct rcu_head rcu; }; static struct genl_family net_drop_monitor_family; @@ -115,7 +112,6 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data); static int dm_hit_limit = 64; static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; -static LIST_HEAD(hw_stats_list); static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY; static u32 net_dm_trunc_len; @@ -287,33 +283,27 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi, int work, int budget) { - struct dm_hw_stat_delta *new_stat; - + struct net_device *dev = napi->dev; + struct dm_hw_stat_delta *stat; /* * Don't check napi structures with no associated device */ - if (!napi->dev) + if (!dev) return; rcu_read_lock(); - list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { - struct net_device *dev; - + stat = rcu_dereference(dev->dm_private); + if (stat) { /* * only add a note to our monitor buffer if: - * 1) this is the dev we received on - * 2) its after the last_rx delta - * 3) our rx_dropped count has gone up + * 1) its after the last_rx delta + * 2) our rx_dropped count has gone up */ - /* Paired with WRITE_ONCE() in dropmon_net_event() */ - dev = READ_ONCE(new_stat->dev); - if ((dev == napi->dev) && - (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) && - (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { + if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) && + (dev->stats.rx_dropped != stat->last_drop_val)) { trace_drop_common(NULL, NULL); - new_stat->last_drop_val = napi->dev->stats.rx_dropped; - new_stat->last_rx = jiffies; - break; + stat->last_drop_val = dev->stats.rx_dropped; + stat->last_rx = jiffies; } } rcu_read_unlock(); @@ -1198,7 +1188,6 @@ err_module_put: static void net_dm_trace_off_set(void) { - struct dm_hw_stat_delta *new_stat, *temp; const struct net_dm_alert_ops *ops; int cpu; @@ -1222,13 +1211,6 @@ static void net_dm_trace_off_set(void) consume_skb(skb); } - list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { - if (new_stat->dev == NULL) { - list_del_rcu(&new_stat->list); - kfree_rcu(new_stat, rcu); - } - } - module_put(THIS_MODULE); } @@ -1589,41 +1571,28 @@ static int dropmon_net_event(struct notifier_block *ev_block, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct dm_hw_stat_delta *new_stat = NULL; - struct dm_hw_stat_delta *tmp; + struct dm_hw_stat_delta *stat; switch (event) { case NETDEV_REGISTER: - new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); + if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private))) + break; + stat = kzalloc(sizeof(*stat), GFP_KERNEL); + if (!stat) + break; - if (!new_stat) - goto out; + stat->last_rx = jiffies; + rcu_assign_pointer(dev->dm_private, stat); - new_stat->dev = dev; - new_stat->last_rx = jiffies; - mutex_lock(&net_dm_mutex); - list_add_rcu(&new_stat->list, &hw_stats_list); - mutex_unlock(&net_dm_mutex); break; case NETDEV_UNREGISTER: - mutex_lock(&net_dm_mutex); - list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { - if (new_stat->dev == dev) { - - /* Paired with READ_ONCE() in trace_napi_poll_hit() */ - WRITE_ONCE(new_stat->dev, NULL); - - if (trace_state == TRACE_OFF) { - list_del_rcu(&new_stat->list); - kfree_rcu(new_stat, rcu); - break; - } - } + stat = rtnl_dereference(dev->dm_private); + if (stat) { + rcu_assign_pointer(dev->dm_private, NULL); + kfree_rcu(stat, rcu); } - mutex_unlock(&net_dm_mutex); break; } -out: return NOTIFY_DONE; } diff --git a/net/core/filter.c b/net/core/filter.c index 65869fd510e8..88767f7da150 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2107,7 +2107,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) } skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); dev_xmit_recursion_inc(); ret = dev_queue_xmit(skb); @@ -2176,7 +2176,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, } skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { skb = skb_expand_head(skb, hh_len); @@ -2274,7 +2274,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, } skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { skb = skb_expand_head(skb, hh_len); @@ -7388,6 +7388,43 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = { .arg3_type = ARG_ANYTHING, }; +BPF_CALL_3(bpf_skb_set_delivery_time, struct sk_buff *, skb, + u64, dtime, u32, dtime_type) +{ + /* skb_clear_delivery_time() is done for inet protocol */ + if (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)) + return -EOPNOTSUPP; + + switch (dtime_type) { + case BPF_SKB_DELIVERY_TIME_MONO: + if (!dtime) + return -EINVAL; + skb->tstamp = dtime; + skb->mono_delivery_time = 1; + break; + case BPF_SKB_DELIVERY_TIME_NONE: + if (dtime) + return -EINVAL; + skb->tstamp = 0; + skb->mono_delivery_time = 0; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static const struct bpf_func_proto bpf_skb_set_delivery_time_proto = { + .func = bpf_skb_set_delivery_time, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -7749,6 +7786,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_gen_syncookie_proto; case BPF_FUNC_sk_assign: return &bpf_sk_assign_proto; + case BPF_FUNC_skb_set_delivery_time: + return &bpf_skb_set_delivery_time_proto; #endif default: return bpf_sk_base_func_proto(func_id); @@ -8088,7 +8127,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type return false; info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; break; - case offsetofend(struct __sk_buff, gso_size) ... offsetof(struct __sk_buff, hwtstamp) - 1: + case offsetof(struct __sk_buff, delivery_time_type): + return false; + case offsetofend(struct __sk_buff, delivery_time_type) ... offsetof(struct __sk_buff, hwtstamp) - 1: /* Explicitly prohibit access to padding in __sk_buff. */ return false; default: @@ -8443,6 +8484,15 @@ static bool tc_cls_act_is_valid_access(int off, int size, break; case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; + case offsetof(struct __sk_buff, delivery_time_type): + /* The convert_ctx_access() on reading and writing + * __sk_buff->tstamp depends on whether the bpf prog + * has used __sk_buff->delivery_time_type or not. + * Thus, we need to set prog->delivery_time_access + * earlier during is_valid_access() here. + */ + ((struct bpf_prog *)prog)->delivery_time_access = 1; + return size == sizeof(__u8); } return bpf_skb_is_valid_access(off, size, type, prog, info); @@ -8838,6 +8888,45 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +static struct bpf_insn *bpf_convert_dtime_type_read(const struct bpf_insn *si, + struct bpf_insn *insn) +{ + __u8 value_reg = si->dst_reg; + __u8 skb_reg = si->src_reg; + __u8 tmp_reg = BPF_REG_AX; + + *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, + SKB_MONO_DELIVERY_TIME_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, + SKB_MONO_DELIVERY_TIME_MASK); + *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2); + /* value_reg = BPF_SKB_DELIVERY_TIME_MONO */ + *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_MONO); + *insn++ = BPF_JMP_A(IS_ENABLED(CONFIG_NET_CLS_ACT) ? 10 : 5); + + *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, skb_reg, + offsetof(struct sk_buff, tstamp)); + *insn++ = BPF_JMP_IMM(BPF_JNE, tmp_reg, 0, 2); + /* value_reg = BPF_SKB_DELIVERY_TIME_NONE */ + *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_NONE); + *insn++ = BPF_JMP_A(IS_ENABLED(CONFIG_NET_CLS_ACT) ? 6 : 1); + +#ifdef CONFIG_NET_CLS_ACT + *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK); + *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2); + /* At ingress, value_reg = 0 */ + *insn++ = BPF_MOV32_IMM(value_reg, 0); + *insn++ = BPF_JMP_A(1); +#endif + + /* value_reg = BPF_SKB_DELIVERYT_TIME_UNSPEC */ + *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_UNSPEC); + + /* 15 insns with CONFIG_NET_CLS_ACT */ + return insn; +} + static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si, struct bpf_insn *insn) { @@ -8859,6 +8948,71 @@ static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si, return insn; } +static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, + const struct bpf_insn *si, + struct bpf_insn *insn) +{ + __u8 value_reg = si->dst_reg; + __u8 skb_reg = si->src_reg; + +#ifdef CONFIG_NET_CLS_ACT + if (!prog->delivery_time_access) { + __u8 tmp_reg = BPF_REG_AX; + + *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK); + *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 5); + /* @ingress, read __sk_buff->tstamp as the (rcv) timestamp, + * so check the skb->mono_delivery_time. + */ + *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, + SKB_MONO_DELIVERY_TIME_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, + SKB_MONO_DELIVERY_TIME_MASK); + *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2); + /* skb->mono_delivery_time is set, read 0 as the (rcv) timestamp. */ + *insn++ = BPF_MOV64_IMM(value_reg, 0); + *insn++ = BPF_JMP_A(1); + } +#endif + + *insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg, + offsetof(struct sk_buff, tstamp)); + return insn; +} + +static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, + const struct bpf_insn *si, + struct bpf_insn *insn) +{ + __u8 value_reg = si->src_reg; + __u8 skb_reg = si->dst_reg; + +#ifdef CONFIG_NET_CLS_ACT + if (!prog->delivery_time_access) { + __u8 tmp_reg = BPF_REG_AX; + + *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK); + *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 3); + /* Writing __sk_buff->tstamp at ingress as the (rcv) timestamp. + * Clear the skb->mono_delivery_time. + */ + *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, + SKB_MONO_DELIVERY_TIME_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, + ~SKB_MONO_DELIVERY_TIME_MASK); + *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, + SKB_MONO_DELIVERY_TIME_OFFSET); + } +#endif + + /* skb->tstamp = tstamp */ + *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg, + offsetof(struct sk_buff, tstamp)); + return insn; +} + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -9167,17 +9321,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_DW, - si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, - tstamp, 8, - target_size)); + insn = bpf_convert_tstamp_write(prog, si, insn); else - *insn++ = BPF_LDX_MEM(BPF_DW, - si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, - tstamp, 8, - target_size)); + insn = bpf_convert_tstamp_read(prog, si, insn); + break; + + case offsetof(struct __sk_buff, delivery_time_type): + insn = bpf_convert_dtime_type_read(si, insn); break; case offsetof(struct __sk_buff, gso_segs): diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 15833e1d6ea1..34441a32e3be 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -22,6 +22,7 @@ #include <linux/ppp_defs.h> #include <linux/stddef.h> #include <linux/if_ether.h> +#include <linux/if_hsr.h> #include <linux/mpls.h> #include <linux/tcp.h> #include <linux/ptp_classify.h> @@ -1282,6 +1283,22 @@ proto_again: break; } + case htons(ETH_P_HSR): { + struct hsr_tag *hdr, _hdr; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, + &_hdr); + if (!hdr) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } + + proto = hdr->encap_proto; + nhoff += HSR_HLEN; + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + break; + } + default: fdret = FLOW_DISSECT_RET_OUT_BAD; break; diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index 6eb2e5ec2c50..8462f926ab45 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -89,8 +89,23 @@ int gro_cells_init(struct gro_cells *gcells, struct net_device *dev) } EXPORT_SYMBOL(gro_cells_init); +struct percpu_free_defer { + struct rcu_head rcu; + void __percpu *ptr; +}; + +static void percpu_free_defer_callback(struct rcu_head *head) +{ + struct percpu_free_defer *defer; + + defer = container_of(head, struct percpu_free_defer, rcu); + free_percpu(defer->ptr); + kfree(defer); +} + void gro_cells_destroy(struct gro_cells *gcells) { + struct percpu_free_defer *defer; int i; if (!gcells->cells) @@ -102,12 +117,23 @@ void gro_cells_destroy(struct gro_cells *gcells) __netif_napi_del(&cell->napi); __skb_queue_purge(&cell->napi_skbs); } - /* This barrier is needed because netpoll could access dev->napi_list - * under rcu protection. + /* We need to observe an rcu grace period before freeing ->cells, + * because netpoll could access dev->napi_list under rcu protection. + * Try hard using call_rcu() instead of synchronize_rcu(), + * because we might be called from cleanup_net(), and we + * definitely do not want to block this critical task. */ - synchronize_net(); - - free_percpu(gcells->cells); + defer = kmalloc(sizeof(*defer), GFP_KERNEL | __GFP_NOWARN); + if (likely(defer)) { + defer->ptr = gcells->cells; + call_rcu(&defer->rcu, percpu_free_defer_callback); + } else { + /* We do not hold RTNL at this point, synchronize_net() + * would not be able to expedite this sync. + */ + synchronize_rcu_expedited(); + free_percpu(gcells->cells); + } gcells->cells = NULL; } EXPORT_SYMBOL(gro_cells_destroy); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ec0bf737b076..f64ebd050f6c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1171,7 +1171,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, neigh->updated = jiffies; write_unlock_bh(&neigh->lock); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED); return 1; } } else if (neigh->nud_state & NUD_STALE) { @@ -1193,7 +1193,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, if (!buff) break; neigh->arp_queue_len_bytes -= buff->truesize; - kfree_skb(buff); + kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL); NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } skb_dst_force(skb); @@ -1215,7 +1215,7 @@ out_dead: if (neigh->nud_state & NUD_STALE) goto out_unlock_bh; write_unlock_bh(&neigh->lock); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD); trace_neigh_event_send_dead(neigh, 1); return 1; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 53ea262ecafd..fbddf966206b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -213,7 +213,7 @@ static ssize_t speed_show(struct device *dev, if (!rtnl_trylock()) return restart_syscall(); - if (netif_running(netdev)) { + if (netif_running(netdev) && netif_device_present(netdev)) { struct ethtool_link_ksettings cmd; if (!__ethtool_get_link_ksettings(netdev, &cmd)) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index e25d359d84d9..1943c0f0307d 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -26,6 +26,45 @@ #define BIAS_MAX LONG_MAX +#ifdef CONFIG_PAGE_POOL_STATS +/* alloc_stat_inc is intended to be used in softirq context */ +#define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) +/* recycle_stat_inc is safe to use when preemption is possible. */ +#define recycle_stat_inc(pool, __stat) \ + do { \ + struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ + this_cpu_inc(s->__stat); \ + } while (0) + +bool page_pool_get_stats(struct page_pool *pool, + struct page_pool_stats *stats) +{ + int cpu = 0; + + if (!stats) + return false; + + memcpy(&stats->alloc_stats, &pool->alloc_stats, sizeof(pool->alloc_stats)); + + for_each_possible_cpu(cpu) { + const struct page_pool_recycle_stats *pcpu = + per_cpu_ptr(pool->recycle_stats, cpu); + + stats->recycle_stats.cached += pcpu->cached; + stats->recycle_stats.cache_full += pcpu->cache_full; + stats->recycle_stats.ring += pcpu->ring; + stats->recycle_stats.ring_full += pcpu->ring_full; + stats->recycle_stats.released_refcnt += pcpu->released_refcnt; + } + + return true; +} +EXPORT_SYMBOL(page_pool_get_stats); +#else +#define alloc_stat_inc(pool, __stat) +#define recycle_stat_inc(pool, __stat) +#endif + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) { @@ -73,6 +112,12 @@ static int page_pool_init(struct page_pool *pool, pool->p.flags & PP_FLAG_PAGE_FRAG) return -EINVAL; +#ifdef CONFIG_PAGE_POOL_STATS + pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); + if (!pool->recycle_stats) + return -ENOMEM; +#endif + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) return -ENOMEM; @@ -117,8 +162,10 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) int pref_nid; /* preferred NUMA node */ /* Quicker fallback, avoid locks when ring is empty */ - if (__ptr_ring_empty(r)) + if (__ptr_ring_empty(r)) { + alloc_stat_inc(pool, empty); return NULL; + } /* Softirq guarantee CPU and thus NUMA node is stable. This, * assumes CPU refilling driver RX-ring will also run RX-NAPI. @@ -145,14 +192,17 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) * This limit stress on page buddy alloactor. */ page_pool_return_page(pool, page); + alloc_stat_inc(pool, waive); page = NULL; break; } } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); /* Return last page */ - if (likely(pool->alloc.count > 0)) + if (likely(pool->alloc.count > 0)) { page = pool->alloc.cache[--pool->alloc.count]; + alloc_stat_inc(pool, refill); + } return page; } @@ -166,6 +216,7 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) if (likely(pool->alloc.count)) { /* Fast-path */ page = pool->alloc.cache[--pool->alloc.count]; + alloc_stat_inc(pool, fast); } else { page = page_pool_refill_alloc_cache(pool); } @@ -239,6 +290,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, return NULL; } + alloc_stat_inc(pool, slow_high_order); page_pool_set_pp_info(pool, page); /* Track how many pages are held 'in-flight' */ @@ -293,10 +345,12 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, } /* Return last page */ - if (likely(pool->alloc.count > 0)) + if (likely(pool->alloc.count > 0)) { page = pool->alloc.cache[--pool->alloc.count]; - else + alloc_stat_inc(pool, slow); + } else { page = NULL; + } /* When page just alloc'ed is should/must have refcnt 1. */ return page; @@ -394,7 +448,12 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) else ret = ptr_ring_produce_bh(&pool->ring, page); - return (ret == 0) ? true : false; + if (!ret) { + recycle_stat_inc(pool, ring); + return true; + } + + return false; } /* Only allow direct recycling in special circumstances, into the @@ -405,11 +464,14 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) static bool page_pool_recycle_in_cache(struct page *page, struct page_pool *pool) { - if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) + if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) { + recycle_stat_inc(pool, cache_full); return false; + } /* Caller MUST have verified/know (page_ref_count(page) == 1) */ pool->alloc.cache[pool->alloc.count++] = page; + recycle_stat_inc(pool, cached); return true; } @@ -459,6 +521,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * doing refcnt based recycle tricks, meaning another process * will be invoking put_page. */ + recycle_stat_inc(pool, released_refcnt); /* Do not replace this with page_pool_return_page() */ page_pool_release_page(pool, page); put_page(page); @@ -472,6 +535,7 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); if (page && !page_pool_recycle_in_ring(pool, page)) { /* Cache full, fallback to free pages */ + recycle_stat_inc(pool, ring_full); page_pool_return_page(pool, page); } } @@ -620,6 +684,9 @@ static void page_pool_free(struct page_pool *pool) if (pool->p.flags & PP_FLAG_DMA_MAP) put_device(pool->p.dev); +#ifdef CONFIG_PAGE_POOL_STATS + free_percpu(pool->recycle_stats); +#endif kfree(pool); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 20a9e1686453..a66b6761b88b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5048,82 +5048,256 @@ static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr) (!idxattr || idxattr == attrid); } -#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1) -static int rtnl_get_offload_stats_attr_size(int attr_id) +static bool +rtnl_offload_xstats_have_ndo(const struct net_device *dev, int attr_id) { - switch (attr_id) { - case IFLA_OFFLOAD_XSTATS_CPU_HIT: - return sizeof(struct rtnl_link_stats64); - } + return dev->netdev_ops && + dev->netdev_ops->ndo_has_offload_stats && + dev->netdev_ops->ndo_get_offload_stats && + dev->netdev_ops->ndo_has_offload_stats(dev, attr_id); +} - return 0; +static unsigned int +rtnl_offload_xstats_get_size_ndo(const struct net_device *dev, int attr_id) +{ + return rtnl_offload_xstats_have_ndo(dev, attr_id) ? + sizeof(struct rtnl_link_stats64) : 0; } -static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev, - int *prividx) +static int +rtnl_offload_xstats_fill_ndo(struct net_device *dev, int attr_id, + struct sk_buff *skb) { + unsigned int size = rtnl_offload_xstats_get_size_ndo(dev, attr_id); struct nlattr *attr = NULL; - int attr_id, size; void *attr_data; int err; - if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && - dev->netdev_ops->ndo_get_offload_stats)) + if (!size) return -ENODATA; - for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; - attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { - if (attr_id < *prividx) - continue; + attr = nla_reserve_64bit(skb, attr_id, size, + IFLA_OFFLOAD_XSTATS_UNSPEC); + if (!attr) + return -EMSGSIZE; - size = rtnl_get_offload_stats_attr_size(attr_id); - if (!size) - continue; + attr_data = nla_data(attr); + memset(attr_data, 0, size); - if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id)) - continue; + err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, attr_data); + if (err) + return err; + + return 0; +} + +static unsigned int +rtnl_offload_xstats_get_size_stats(const struct net_device *dev, + enum netdev_offload_xstats_type type) +{ + bool enabled = netdev_offload_xstats_enabled(dev, type); + + return enabled ? sizeof(struct rtnl_hw_stats64) : 0; +} + +struct rtnl_offload_xstats_request_used { + bool request; + bool used; +}; + +static int +rtnl_offload_xstats_get_stats(struct net_device *dev, + enum netdev_offload_xstats_type type, + struct rtnl_offload_xstats_request_used *ru, + struct rtnl_hw_stats64 *stats, + struct netlink_ext_ack *extack) +{ + bool request; + bool used; + int err; + + request = netdev_offload_xstats_enabled(dev, type); + if (!request) { + used = false; + goto out; + } + + err = netdev_offload_xstats_get(dev, type, stats, &used, extack); + if (err) + return err; - attr = nla_reserve_64bit(skb, attr_id, size, +out: + if (ru) { + ru->request = request; + ru->used = used; + } + return 0; +} + +static int +rtnl_offload_xstats_fill_hw_s_info_one(struct sk_buff *skb, int attr_id, + struct rtnl_offload_xstats_request_used *ru) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, attr_id); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, ru->request)) + goto nla_put_failure; + + if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, ru->used)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int +rtnl_offload_xstats_fill_hw_s_info(struct sk_buff *skb, struct net_device *dev, + struct netlink_ext_ack *extack) +{ + enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; + struct rtnl_offload_xstats_request_used ru_l3; + struct nlattr *nest; + int err; + + err = rtnl_offload_xstats_get_stats(dev, t_l3, &ru_l3, NULL, extack); + if (err) + return err; + + nest = nla_nest_start(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO); + if (!nest) + return -EMSGSIZE; + + if (rtnl_offload_xstats_fill_hw_s_info_one(skb, + IFLA_OFFLOAD_XSTATS_L3_STATS, + &ru_l3)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev, + int *prividx, u32 off_filter_mask, + struct netlink_ext_ack *extack) +{ + enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; + int attr_id_hw_s_info = IFLA_OFFLOAD_XSTATS_HW_S_INFO; + int attr_id_l3_stats = IFLA_OFFLOAD_XSTATS_L3_STATS; + int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT; + bool have_data = false; + int err; + + if (*prividx <= attr_id_cpu_hit && + (off_filter_mask & + IFLA_STATS_FILTER_BIT(attr_id_cpu_hit))) { + err = rtnl_offload_xstats_fill_ndo(dev, attr_id_cpu_hit, skb); + if (!err) { + have_data = true; + } else if (err != -ENODATA) { + *prividx = attr_id_cpu_hit; + return err; + } + } + + if (*prividx <= attr_id_hw_s_info && + (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_hw_s_info))) { + *prividx = attr_id_hw_s_info; + + err = rtnl_offload_xstats_fill_hw_s_info(skb, dev, extack); + if (err) + return err; + + have_data = true; + *prividx = 0; + } + + if (*prividx <= attr_id_l3_stats && + (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_l3_stats))) { + unsigned int size_l3; + struct nlattr *attr; + + *prividx = attr_id_l3_stats; + + size_l3 = rtnl_offload_xstats_get_size_stats(dev, t_l3); + attr = nla_reserve_64bit(skb, attr_id_l3_stats, size_l3, IFLA_OFFLOAD_XSTATS_UNSPEC); if (!attr) - goto nla_put_failure; + return -EMSGSIZE; - attr_data = nla_data(attr); - memset(attr_data, 0, size); - err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, - attr_data); + err = rtnl_offload_xstats_get_stats(dev, t_l3, NULL, + nla_data(attr), extack); if (err) - goto get_offload_stats_failure; + return err; + + have_data = true; + *prividx = 0; } - if (!attr) + if (!have_data) return -ENODATA; *prividx = 0; return 0; +} -nla_put_failure: - err = -EMSGSIZE; -get_offload_stats_failure: - *prividx = attr_id; - return err; +static unsigned int +rtnl_offload_xstats_get_size_hw_s_info_one(const struct net_device *dev, + enum netdev_offload_xstats_type type) +{ + bool enabled = netdev_offload_xstats_enabled(dev, type); + + return nla_total_size(0) + + /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */ + nla_total_size(sizeof(u8)) + + /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */ + (enabled ? nla_total_size(sizeof(u8)) : 0) + + 0; } -static int rtnl_get_offload_stats_size(const struct net_device *dev) +static unsigned int +rtnl_offload_xstats_get_size_hw_s_info(const struct net_device *dev) { + enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; + + return nla_total_size(0) + + /* IFLA_OFFLOAD_XSTATS_L3_STATS */ + rtnl_offload_xstats_get_size_hw_s_info_one(dev, t_l3) + + 0; +} + +static int rtnl_offload_xstats_get_size(const struct net_device *dev, + u32 off_filter_mask) +{ + enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; + int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT; int nla_size = 0; - int attr_id; int size; - if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && - dev->netdev_ops->ndo_get_offload_stats)) - return 0; + if (off_filter_mask & + IFLA_STATS_FILTER_BIT(attr_id_cpu_hit)) { + size = rtnl_offload_xstats_get_size_ndo(dev, attr_id_cpu_hit); + nla_size += nla_total_size_64bit(size); + } - for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; - attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { - if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id)) - continue; - size = rtnl_get_offload_stats_attr_size(attr_id); + if (off_filter_mask & + IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO)) + nla_size += rtnl_offload_xstats_get_size_hw_s_info(dev); + + if (off_filter_mask & + IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_L3_STATS)) { + size = rtnl_offload_xstats_get_size_stats(dev, t_l3); nla_size += nla_total_size_64bit(size); } @@ -5133,11 +5307,21 @@ static int rtnl_get_offload_stats_size(const struct net_device *dev) return nla_size; } +struct rtnl_stats_dump_filters { + /* mask[0] filters outer attributes. Then individual nests have their + * filtering mask at the index of the nested attribute. + */ + u32 mask[IFLA_STATS_MAX + 1]; +}; + static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, - unsigned int flags, unsigned int filter_mask, - int *idxattr, int *prividx) + unsigned int flags, + const struct rtnl_stats_dump_filters *filters, + int *idxattr, int *prividx, + struct netlink_ext_ack *extack) { + unsigned int filter_mask = filters->mask[0]; struct if_stats_msg *ifsm; struct nlmsghdr *nlh; struct nlattr *attr; @@ -5163,8 +5347,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64, sizeof(struct rtnl_link_stats64), IFLA_STATS_UNSPEC); - if (!attr) + if (!attr) { + err = -EMSGSIZE; goto nla_put_failure; + } sp = nla_data(attr); dev_get_stats(dev, sp); @@ -5177,8 +5363,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, *idxattr = IFLA_STATS_LINK_XSTATS; attr = nla_nest_start_noflag(skb, IFLA_STATS_LINK_XSTATS); - if (!attr) + if (!attr) { + err = -EMSGSIZE; goto nla_put_failure; + } err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); nla_nest_end(skb, attr); @@ -5200,8 +5388,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; attr = nla_nest_start_noflag(skb, IFLA_STATS_LINK_XSTATS_SLAVE); - if (!attr) + if (!attr) { + err = -EMSGSIZE; goto nla_put_failure; + } err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); nla_nest_end(skb, attr); @@ -5213,13 +5403,19 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, *idxattr)) { + u32 off_filter_mask; + + off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS]; *idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS; attr = nla_nest_start_noflag(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS); - if (!attr) + if (!attr) { + err = -EMSGSIZE; goto nla_put_failure; + } - err = rtnl_get_offload_stats(skb, dev, prividx); + err = rtnl_offload_xstats_fill(skb, dev, prividx, + off_filter_mask, extack); if (err == -ENODATA) nla_nest_cancel(skb, attr); else @@ -5235,8 +5431,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, *idxattr = IFLA_STATS_AF_SPEC; attr = nla_nest_start_noflag(skb, IFLA_STATS_AF_SPEC); - if (!attr) + if (!attr) { + err = -EMSGSIZE; goto nla_put_failure; + } rcu_read_lock(); list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { @@ -5280,13 +5478,14 @@ nla_put_failure: else nlmsg_end(skb, nlh); - return -EMSGSIZE; + return err; } static size_t if_nlmsg_stats_size(const struct net_device *dev, - u32 filter_mask) + const struct rtnl_stats_dump_filters *filters) { size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg)); + unsigned int filter_mask = filters->mask[0]; if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0)) size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); @@ -5322,8 +5521,12 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, } } - if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) - size += rtnl_get_offload_stats_size(dev); + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) { + u32 off_filter_mask; + + off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS]; + size += rtnl_offload_xstats_get_size(dev, off_filter_mask); + } if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, 0)) { struct rtnl_af_ops *af_ops; @@ -5347,6 +5550,79 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, return size; } +#define RTNL_STATS_OFFLOAD_XSTATS_VALID ((1 << __IFLA_OFFLOAD_XSTATS_MAX) - 1) + +static const struct nla_policy +rtnl_stats_get_policy_filters[IFLA_STATS_MAX + 1] = { + [IFLA_STATS_LINK_OFFLOAD_XSTATS] = + NLA_POLICY_MASK(NLA_U32, RTNL_STATS_OFFLOAD_XSTATS_VALID), +}; + +static const struct nla_policy +rtnl_stats_get_policy[IFLA_STATS_GETSET_MAX + 1] = { + [IFLA_STATS_GET_FILTERS] = + NLA_POLICY_NESTED(rtnl_stats_get_policy_filters), +}; + +static const struct nla_policy +ifla_stats_set_policy[IFLA_STATS_GETSET_MAX + 1] = { + [IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS] = NLA_POLICY_MAX(NLA_U8, 1), +}; + +static int rtnl_stats_get_parse_filters(struct nlattr *ifla_filters, + struct rtnl_stats_dump_filters *filters, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_STATS_MAX + 1]; + int err; + int at; + + err = nla_parse_nested(tb, IFLA_STATS_MAX, ifla_filters, + rtnl_stats_get_policy_filters, extack); + if (err < 0) + return err; + + for (at = 1; at <= IFLA_STATS_MAX; at++) { + if (tb[at]) { + if (!(filters->mask[0] & IFLA_STATS_FILTER_BIT(at))) { + NL_SET_ERR_MSG(extack, "Filtered attribute not enabled in filter_mask"); + return -EINVAL; + } + filters->mask[at] = nla_get_u32(tb[at]); + } + } + + return 0; +} + +static int rtnl_stats_get_parse(const struct nlmsghdr *nlh, + u32 filter_mask, + struct rtnl_stats_dump_filters *filters, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1]; + int err; + int i; + + filters->mask[0] = filter_mask; + for (i = 1; i < ARRAY_SIZE(filters->mask); i++) + filters->mask[i] = -1U; + + err = nlmsg_parse(nlh, sizeof(struct if_stats_msg), tb, + IFLA_STATS_GETSET_MAX, rtnl_stats_get_policy, extack); + if (err < 0) + return err; + + if (tb[IFLA_STATS_GET_FILTERS]) { + err = rtnl_stats_get_parse_filters(tb[IFLA_STATS_GET_FILTERS], + filters, extack); + if (err) + return err; + } + + return 0; +} + static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, bool is_dump, struct netlink_ext_ack *extack) { @@ -5369,10 +5645,6 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request"); return -EINVAL; } - if (nlmsg_attrlen(nlh, sizeof(*ifsm))) { - NL_SET_ERR_MSG(extack, "Invalid attributes after stats header"); - return -EINVAL; - } if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) { NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask"); return -EINVAL; @@ -5384,12 +5656,12 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct rtnl_stats_dump_filters filters; struct net *net = sock_net(skb->sk); struct net_device *dev = NULL; int idxattr = 0, prividx = 0; struct if_stats_msg *ifsm; struct sk_buff *nskb; - u32 filter_mask; int err; err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb), @@ -5406,19 +5678,22 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, if (!dev) return -ENODEV; - filter_mask = ifsm->filter_mask; - if (!filter_mask) { + if (!ifsm->filter_mask) { NL_SET_ERR_MSG(extack, "Filter mask must be set for stats get"); return -EINVAL; } - nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL); + err = rtnl_stats_get_parse(nlh, ifsm->filter_mask, &filters, extack); + if (err) + return err; + + nskb = nlmsg_new(if_nlmsg_stats_size(dev, &filters), GFP_KERNEL); if (!nskb) return -ENOBUFS; err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, - 0, filter_mask, &idxattr, &prividx); + 0, &filters, &idxattr, &prividx, extack); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */ WARN_ON(err == -EMSGSIZE); @@ -5434,12 +5709,12 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; int h, s_h, err, s_idx, s_idxattr, s_prividx; + struct rtnl_stats_dump_filters filters; struct net *net = sock_net(skb->sk); unsigned int flags = NLM_F_MULTI; struct if_stats_msg *ifsm; struct hlist_head *head; struct net_device *dev; - u32 filter_mask = 0; int idx = 0; s_h = cb->args[0]; @@ -5454,12 +5729,16 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) return err; ifsm = nlmsg_data(cb->nlh); - filter_mask = ifsm->filter_mask; - if (!filter_mask) { + if (!ifsm->filter_mask) { NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump"); return -EINVAL; } + err = rtnl_stats_get_parse(cb->nlh, ifsm->filter_mask, &filters, + extack); + if (err) + return err; + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; @@ -5469,8 +5748,9 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, - flags, filter_mask, - &s_idxattr, &s_prividx); + flags, &filters, + &s_idxattr, &s_prividx, + extack); /* If we ran out of room on the first message, * we're in trouble */ @@ -5494,6 +5774,107 @@ out: return skb->len; } +void rtnl_offload_xstats_notify(struct net_device *dev) +{ + struct rtnl_stats_dump_filters response_filters = {}; + struct net *net = dev_net(dev); + int idxattr = 0, prividx = 0; + struct sk_buff *skb; + int err = -ENOBUFS; + + ASSERT_RTNL(); + + response_filters.mask[0] |= + IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS); + response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |= + IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO); + + skb = nlmsg_new(if_nlmsg_stats_size(dev, &response_filters), + GFP_KERNEL); + if (!skb) + goto errout; + + err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, 0, 0, 0, 0, + &response_filters, &idxattr, &prividx, NULL); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_STATS, NULL, GFP_KERNEL); + return; + +errout: + rtnl_set_sk_err(net, RTNLGRP_STATS, err); +} +EXPORT_SYMBOL(rtnl_offload_xstats_notify); + +static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; + struct rtnl_stats_dump_filters response_filters = {}; + struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct net_device *dev = NULL; + struct if_stats_msg *ifsm; + bool notify = false; + int err; + + err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb), + false, extack); + if (err) + return err; + + ifsm = nlmsg_data(nlh); + if (ifsm->family != AF_UNSPEC) { + NL_SET_ERR_MSG(extack, "Address family should be AF_UNSPEC"); + return -EINVAL; + } + + if (ifsm->ifindex > 0) + dev = __dev_get_by_index(net, ifsm->ifindex); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + if (ifsm->filter_mask) { + NL_SET_ERR_MSG(extack, "Filter mask must be 0 for stats set"); + return -EINVAL; + } + + err = nlmsg_parse(nlh, sizeof(*ifsm), tb, IFLA_STATS_GETSET_MAX, + ifla_stats_set_policy, extack); + if (err < 0) + return err; + + if (tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]) { + u8 req = nla_get_u8(tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]); + + if (req) + err = netdev_offload_xstats_enable(dev, t_l3, extack); + else + err = netdev_offload_xstats_disable(dev, t_l3); + + if (!err) + notify = true; + else if (err != -EALREADY) + return err; + + response_filters.mask[0] |= + IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS); + response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |= + IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO); + } + + if (notify) + rtnl_offload_xstats_notify(dev); + + return 0; +} + /* Process one rtnetlink message. */ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -5719,4 +6100,5 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, 0); + rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9d0388bed0c1..10bde7c6db44 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -201,7 +201,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data, skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; + skb_set_end_offset(skb, size); skb->mac_header = (typeof(skb->mac_header))~0U; skb->transport_header = (typeof(skb->transport_header))~0U; @@ -777,16 +777,17 @@ void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) } EXPORT_SYMBOL(kfree_skb_reason); -void kfree_skb_list(struct sk_buff *segs) +void kfree_skb_list_reason(struct sk_buff *segs, + enum skb_drop_reason reason) { while (segs) { struct sk_buff *next = segs->next; - kfree_skb(segs); + kfree_skb_reason(segs, reason); segs = next; } } -EXPORT_SYMBOL(kfree_skb_list); +EXPORT_SYMBOL(kfree_skb_list_reason); /* Dump skb information and contents. * @@ -1736,11 +1737,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->head = data; skb->head_frag = 0; skb->data += off; + + skb_set_end_offset(skb, size); #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; off = nhead; -#else - skb->end = skb->head + size; #endif skb->tail += off; skb_headers_offset_update(skb, nhead); @@ -1788,6 +1788,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) } EXPORT_SYMBOL(skb_realloc_headroom); +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + unsigned int saved_end_offset, saved_truesize; + struct skb_shared_info *shinfo; + int res; + + saved_end_offset = skb_end_offset(skb); + saved_truesize = skb->truesize; + + res = pskb_expand_head(skb, 0, 0, pri); + if (res) + return res; + + skb->truesize = saved_truesize; + + if (likely(skb_end_offset(skb) == saved_end_offset)) + return 0; + + shinfo = skb_shinfo(skb); + + /* We are about to change back skb->end, + * we need to move skb_shinfo() to its new location. + */ + memmove(skb->head + saved_end_offset, + shinfo, + offsetof(struct skb_shared_info, frags[shinfo->nr_frags])); + + skb_set_end_offset(skb, saved_end_offset); + + return 0; +} + /** * skb_expand_head - reallocate header of &sk_buff * @skb: buffer to reallocate @@ -2276,7 +2308,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Free pulled out fragments. */ while ((list = skb_shinfo(skb)->frag_list) != insp) { skb_shinfo(skb)->frag_list = list->next; - kfree_skb(list); + consume_skb(list); } /* And insert new clone at head. */ if (clone) { @@ -3876,6 +3908,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, list_skb = list_skb->next; err = 0; + delta_truesize += nskb->truesize; if (skb_shared(nskb)) { tmp = skb_clone(nskb, GFP_ATOMIC); if (tmp) { @@ -3900,7 +3933,6 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, tail = nskb; delta_len += nskb->len; - delta_truesize += nskb->truesize; skb_push(nskb, -skb_network_offset(nskb) + offset); @@ -4730,7 +4762,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk_is_tcp(sk)) - serr->ee.ee_data -= sk->sk_tskey; + serr->ee.ee_data -= atomic_read(&sk->sk_tskey); } err = sock_queue_err_skb(sk, skb); @@ -4820,7 +4852,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if (hwtstamps) *skb_hwtstamps(skb) = *hwtstamps; else - skb->tstamp = ktime_get_real(); + __net_timestamp(skb); __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); } @@ -5350,7 +5382,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) ipvs_reset(skb); skb->mark = 0; - skb->tstamp = 0; + skb_clear_tstamp(skb); } EXPORT_SYMBOL_GPL(skb_scrub_packet); @@ -6044,11 +6076,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb->head = data; skb->data = data; skb->head_frag = 0; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_set_tail_pointer(skb, skb_headlen(skb)); skb_headers_offset_update(skb, 0); skb->cloned = 0; @@ -6105,7 +6133,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb, /* Free pulled out fragments. */ while ((list = shinfo->frag_list) != insp) { shinfo->frag_list = list->next; - kfree_skb(list); + consume_skb(list); } /* And insert new clone at head. */ if (clone) { @@ -6186,11 +6214,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb->head = data; skb->head_frag = 0; skb->data = data; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_reset_tail_pointer(skb); skb_headers_offset_update(skb, 0); skb->cloned = 0; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 8eb671c827f9..929a2b096b04 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1153,7 +1153,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; - int len = skb->len; + int len = orig_len; /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */ skb = skb_clone(skb, GFP_ATOMIC); diff --git a/net/core/sock.c b/net/core/sock.c index d76218ab4999..784c92eaded8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -879,9 +879,9 @@ int sock_set_timestamping(struct sock *sk, int optname, if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return -EINVAL; - sk->sk_tskey = tcp_sk(sk)->snd_una; + atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una); } else { - sk->sk_tskey = 0; + atomic_set(&sk->sk_tskey, 0); } } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index dbeb8ecbcd98..7123fe7feeac 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -103,8 +103,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, if (orig_sock_table) { static_branch_dec(&rps_needed); static_branch_dec(&rfs_needed); - synchronize_rcu(); - vfree(orig_sock_table); + kvfree_rcu(orig_sock_table); } } } @@ -142,8 +141,7 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, lockdep_is_held(&flow_limit_update_mutex)); if (cur && !cpumask_test_cpu(i, mask)) { RCU_INIT_POINTER(sd->flow_limit, NULL); - synchronize_rcu(); - kfree(cur); + kfree_rcu(cur); } else if (!cur && cpumask_test_cpu(i, mask)) { cur = kzalloc_node(len, GFP_KERNEL, cpu_to_node(i)); diff --git a/net/core/utils.c b/net/core/utils.c index 1f31a39236d5..938495bc1d34 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -476,9 +476,9 @@ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr) { if (skb->ip_summed != CHECKSUM_PARTIAL) { - *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); + csum_replace_by_diff(sum, diff); if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) - skb->csum = ~csum_add(diff, ~skb->csum); + skb->csum = ~csum_sub(diff, skb->csum); } else if (pseudohdr) { *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); } diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index b441ab330fd3..dc4fb699b56c 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -2073,8 +2073,52 @@ u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev) } EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask); +static void dcbnl_flush_dev(struct net_device *dev) +{ + struct dcb_app_type *itr, *tmp; + + spin_lock_bh(&dcb_lock); + + list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) { + if (itr->ifindex == dev->ifindex) { + list_del(&itr->list); + kfree(itr); + } + } + + spin_unlock_bh(&dcb_lock); +} + +static int dcbnl_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_UNREGISTER: + if (!dev->dcbnl_ops) + return NOTIFY_DONE; + + dcbnl_flush_dev(dev); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block dcbnl_nb __read_mostly = { + .notifier_call = dcbnl_netdevice_event, +}; + static int __init dcbnl_init(void) { + int err; + + err = register_netdevice_notifier(&dcbnl_nb); + if (err) + return err; + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0); rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0); diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index eadc89583168..b05639bdfc8f 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -52,6 +52,7 @@ #include <linux/init.h> #include <linux/poll.h> #include <linux/if_packet.h> +#include <linux/jiffies.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> @@ -351,7 +352,7 @@ void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, * Slow start: If we have been idle for more than * one RTT, then reset window to min size. */ - if ((jiffies - scp->stamp) > t) + if (time_is_before_jiffies(scp->stamp + t)) scp->snd_window = NSP_MIN_WINDOW; if (oth) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index c43f7446a75d..06d5de28a43e 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -467,6 +467,46 @@ struct dsa_port *dsa_port_from_netdev(struct net_device *netdev) } EXPORT_SYMBOL_GPL(dsa_port_from_netdev); +int dsa_port_walk_fdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_mac_addr *a; + int err; + + mutex_lock(&dp->addr_lists_lock); + + list_for_each_entry(a, &dp->fdbs, list) { + err = cb(ds, port, a->addr, a->vid, a->db); + if (err) + break; + } + + mutex_unlock(&dp->addr_lists_lock); + + return err; +} +EXPORT_SYMBOL_GPL(dsa_port_walk_fdbs); + +int dsa_port_walk_mdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_mac_addr *a; + int err; + + mutex_lock(&dp->addr_lists_lock); + + list_for_each_entry(a, &dp->mdbs, list) { + err = cb(ds, port, a->addr, a->vid, a->db); + if (err) + break; + } + + mutex_unlock(&dp->addr_lists_lock); + + return err; +} +EXPORT_SYMBOL_GPL(dsa_port_walk_mdbs); + static int __init dsa_init_module(void) { int rc; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 408b79a28cd4..d5f21a770689 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -72,27 +72,24 @@ int dsa_broadcast(unsigned long e, void *v) } /** - * dsa_lag_map() - Map LAG netdev to a linear LAG ID + * dsa_lag_map() - Map LAG structure to a linear LAG array * @dst: Tree in which to record the mapping. - * @lag: Netdev that is to be mapped to an ID. + * @lag: LAG structure that is to be mapped to the tree's array. * - * dsa_lag_id/dsa_lag_dev can then be used to translate between the + * dsa_lag_id/dsa_lag_by_id can then be used to translate between the * two spaces. The size of the mapping space is determined by the * driver by setting ds->num_lag_ids. It is perfectly legal to leave * it unset if it is not needed, in which case these functions become * no-ops. */ -void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag) +void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag) { unsigned int id; - if (dsa_lag_id(dst, lag) >= 0) - /* Already mapped */ - return; - - for (id = 0; id < dst->lags_len; id++) { - if (!dsa_lag_dev(dst, id)) { - dst->lags[id] = lag; + for (id = 1; id <= dst->lags_len; id++) { + if (!dsa_lag_by_id(dst, id)) { + dst->lags[id - 1] = lag; + lag->id = id; return; } } @@ -108,28 +105,36 @@ void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag) /** * dsa_lag_unmap() - Remove a LAG ID mapping * @dst: Tree in which the mapping is recorded. - * @lag: Netdev that was mapped. + * @lag: LAG structure that was mapped. * * As there may be multiple users of the mapping, it is only removed * if there are no other references to it. */ -void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag) +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag) { - struct dsa_port *dp; unsigned int id; - dsa_lag_foreach_port(dp, dst, lag) - /* There are remaining users of this mapping */ - return; - dsa_lags_foreach_id(id, dst) { - if (dsa_lag_dev(dst, id) == lag) { - dst->lags[id] = NULL; + if (dsa_lag_by_id(dst, id) == lag) { + dst->lags[id - 1] = NULL; + lag->id = 0; break; } } } +struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, + const struct net_device *lag_dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_lag_dev_get(dp) == lag_dev) + return dp->lag; + + return NULL; +} + struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst, const struct net_device *br) { @@ -1289,7 +1294,7 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, info.tag_ops = tag_ops; err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info); if (err) - return err; + goto out_unwind_tagger; err = dsa_tree_bind_tag_proto(dst, tag_ops); if (err) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index a37f0883676a..c3c7491ace72 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -25,6 +25,8 @@ enum { DSA_NOTIFIER_FDB_DEL, DSA_NOTIFIER_HOST_FDB_ADD, DSA_NOTIFIER_HOST_FDB_DEL, + DSA_NOTIFIER_LAG_FDB_ADD, + DSA_NOTIFIER_LAG_FDB_DEL, DSA_NOTIFIER_LAG_CHANGE, DSA_NOTIFIER_LAG_JOIN, DSA_NOTIFIER_LAG_LEAVE, @@ -57,6 +59,7 @@ struct dsa_notifier_bridge_info { int sw_index; int port; bool tx_fwd_offload; + struct netlink_ext_ack *extack; }; /* DSA_NOTIFIER_FDB_* */ @@ -65,6 +68,15 @@ struct dsa_notifier_fdb_info { int port; const unsigned char *addr; u16 vid; + struct dsa_db db; +}; + +/* DSA_NOTIFIER_LAG_FDB_* */ +struct dsa_notifier_lag_fdb_info { + struct dsa_lag *lag; + const unsigned char *addr; + u16 vid; + struct dsa_db db; }; /* DSA_NOTIFIER_MDB_* */ @@ -72,11 +84,12 @@ struct dsa_notifier_mdb_info { const struct switchdev_obj_port_mdb *mdb; int sw_index; int port; + struct dsa_db db; }; /* DSA_NOTIFIER_LAG_* */ struct dsa_notifier_lag_info { - struct net_device *lag; + struct dsa_lag lag; int sw_index; int port; @@ -119,9 +132,8 @@ struct dsa_notifier_master_state_info { }; struct dsa_switchdev_event_work { - struct dsa_switch *ds; - int port; struct net_device *dev; + struct net_device *orig_dev; struct work_struct work; unsigned long event; /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and @@ -132,6 +144,21 @@ struct dsa_switchdev_event_work { bool host_addr; }; +enum dsa_standalone_event { + DSA_UC_ADD, + DSA_UC_DEL, + DSA_MC_ADD, + DSA_MC_DEL, +}; + +struct dsa_standalone_event_work { + struct work_struct work; + struct net_device *dev; + enum dsa_standalone_event event; + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + struct dsa_slave_priv { /* Copy of CPU port xmit for faster access in slave transmit hot path */ struct sk_buff * (*xmit)(struct sk_buff *skb, @@ -211,19 +238,31 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid); int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid); -int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, - u16 vid); -int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, - u16 vid); +int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, + const unsigned char *addr, u16 vid); +int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, + const unsigned char *addr, u16 vid); +int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid); int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data); int dsa_port_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); -int dsa_port_host_mdb_add(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb); -int dsa_port_host_mdb_del(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb); +int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); int dsa_port_pre_bridge_flags(const struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); @@ -486,9 +525,25 @@ static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb) int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); +static inline bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds) +{ + return ds->ops->port_fdb_add && ds->ops->port_fdb_del && + ds->fdb_isolation && !ds->vlan_filtering_is_global && + !ds->needs_standalone_vlan_filtering; +} + +static inline bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds) +{ + return ds->ops->port_mdb_add && ds->ops->port_mdb_del && + ds->fdb_isolation && !ds->vlan_filtering_is_global && + !ds->needs_standalone_vlan_filtering; +} + /* dsa2.c */ -void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag); -void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag); +void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag); +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag); +struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, + const struct net_device *lag_dev); int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v); int dsa_broadcast(unsigned long e, void *v); int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, @@ -508,10 +563,6 @@ struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst, const struct net_device *br); /* tag_8021q.c */ -int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, - struct dsa_notifier_bridge_info *info); -int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, - struct dsa_notifier_bridge_info *info); int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, struct dsa_notifier_tag_8021q_vlan_info *info); int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, diff --git a/net/dsa/master.c b/net/dsa/master.c index 6ac393cc6ea7..991c2930d631 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -260,11 +260,16 @@ static void dsa_netdev_ops_set(struct net_device *dev, dev->dsa_ptr->netdev_ops = ops; } +/* Keep the master always promiscuous if the tagging protocol requires that + * (garbles MAC DA) or if it doesn't support unicast filtering, case in which + * it would revert to promiscuous mode as soon as we call dev_uc_add() on it + * anyway. + */ static void dsa_master_set_promiscuity(struct net_device *dev, int inc) { const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops; - if (!ops->promisc_on_master) + if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_master) return; ASSERT_RTNL(); diff --git a/net/dsa/port.c b/net/dsa/port.c index 056c6f51ac20..58291df14cdb 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -176,7 +176,7 @@ static int dsa_port_inherit_brport_flags(struct dsa_port *dp, struct netlink_ext_ack *extack) { const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | - BR_BCAST_FLOOD; + BR_BCAST_FLOOD | BR_PORT_LOCKED; struct net_device *brport_dev = dsa_port_to_bridge_port(dp); int flag, err; @@ -200,7 +200,7 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp) { const unsigned long val = BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | - BR_BCAST_FLOOD; + BR_BCAST_FLOOD | BR_PORT_LOCKED; int flag, err; for_each_set_bit(flag, &mask, 32) { @@ -328,6 +328,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, .tree_index = dp->ds->dst->index, .sw_index = dp->ds->index, .port = dp->index, + .extack = extack, }; struct net_device *dev = dp->slave; struct net_device *brport_dev; @@ -395,10 +396,17 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) .tree_index = dp->ds->dst->index, .sw_index = dp->ds->index, .port = dp->index, - .bridge = *dp->bridge, }; int err; + /* If the port could not be offloaded to begin with, then + * there is nothing to do. + */ + if (!dp->bridge) + return; + + info.bridge = *dp->bridge; + /* Here the port is already unbridged. Reflect the current configuration * so that drivers can program their chips accordingly. */ @@ -422,7 +430,7 @@ int dsa_port_lag_change(struct dsa_port *dp, }; bool tx_enabled; - if (!dp->lag_dev) + if (!dp->lag) return 0; /* On statically configured aggregates (e.g. loadbalance @@ -440,27 +448,70 @@ int dsa_port_lag_change(struct dsa_port *dp, return dsa_port_notify(dp, DSA_NOTIFIER_LAG_CHANGE, &info); } -int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag, +static int dsa_port_lag_create(struct dsa_port *dp, + struct net_device *lag_dev) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_lag *lag; + + lag = dsa_tree_lag_find(ds->dst, lag_dev); + if (lag) { + refcount_inc(&lag->refcount); + dp->lag = lag; + return 0; + } + + lag = kzalloc(sizeof(*lag), GFP_KERNEL); + if (!lag) + return -ENOMEM; + + refcount_set(&lag->refcount, 1); + mutex_init(&lag->fdb_lock); + INIT_LIST_HEAD(&lag->fdbs); + lag->dev = lag_dev; + dsa_lag_map(ds->dst, lag); + dp->lag = lag; + + return 0; +} + +static void dsa_port_lag_destroy(struct dsa_port *dp) +{ + struct dsa_lag *lag = dp->lag; + + dp->lag = NULL; + dp->lag_tx_enabled = false; + + if (!refcount_dec_and_test(&lag->refcount)) + return; + + WARN_ON(!list_empty(&lag->fdbs)); + dsa_lag_unmap(dp->ds->dst, lag); + kfree(lag); +} + +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, struct netdev_lag_upper_info *uinfo, struct netlink_ext_ack *extack) { struct dsa_notifier_lag_info info = { .sw_index = dp->ds->index, .port = dp->index, - .lag = lag, .info = uinfo, }; struct net_device *bridge_dev; int err; - dsa_lag_map(dp->ds->dst, lag); - dp->lag_dev = lag; + err = dsa_port_lag_create(dp, lag_dev); + if (err) + goto err_lag_create; + info.lag = *dp->lag; err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info); if (err) goto err_lag_join; - bridge_dev = netdev_master_upper_dev_get(lag); + bridge_dev = netdev_master_upper_dev_get(lag_dev); if (!bridge_dev || !netif_is_bridge_master(bridge_dev)) return 0; @@ -473,12 +524,12 @@ int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag, err_bridge_join: dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info); err_lag_join: - dp->lag_dev = NULL; - dsa_lag_unmap(dp->ds->dst, lag); + dsa_port_lag_destroy(dp); +err_lag_create: return err; } -void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag) +void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev) { struct net_device *br = dsa_port_bridge_dev_get(dp); @@ -486,17 +537,16 @@ void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag) dsa_port_pre_bridge_leave(dp, br); } -void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag) +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev) { struct net_device *br = dsa_port_bridge_dev_get(dp); struct dsa_notifier_lag_info info = { .sw_index = dp->ds->index, .port = dp->index, - .lag = lag, }; int err; - if (!dp->lag_dev) + if (!dp->lag) return; /* Port might have been part of a LAG that in turn was @@ -505,16 +555,15 @@ void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag) if (br) dsa_port_bridge_leave(dp, br); - dp->lag_tx_enabled = false; - dp->lag_dev = NULL; + info.lag = *dp->lag; + + dsa_port_lag_destroy(dp); err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info); if (err) dev_err(dp->ds->dev, "port %d failed to notify DSA_NOTIFIER_LAG_LEAVE: %pe\n", dp->index, ERR_PTR(err)); - - dsa_lag_unmap(dp->ds->dst, lag); } /* Must be called under rcu_read_lock() */ @@ -750,8 +799,19 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, .port = dp->index, .addr = addr, .vid = vid, + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, }; + /* Refcounting takes bridge.num as a key, and should be global for all + * bridges in the absence of FDB isolation, and per bridge otherwise. + * Force the bridge.num to zero here in the absence of FDB isolation. + */ + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info); } @@ -763,48 +823,154 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, .port = dp->index, .addr = addr, .vid = vid, - + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, }; + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info); } -int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, - u16 vid) +static int dsa_port_host_fdb_add(struct dsa_port *dp, + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct dsa_notifier_fdb_info info = { .sw_index = dp->ds->index, .port = dp->index, .addr = addr, .vid = vid, + .db = db, }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info); +} + +int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, + const unsigned char *addr, u16 vid) +{ + struct dsa_db db = { + .type = DSA_DB_PORT, + .dp = dp, + }; + + return dsa_port_host_fdb_add(dp, addr, vid, db); +} + +int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, + const unsigned char *addr, u16 vid) +{ struct dsa_port *cpu_dp = dp->cpu_dp; + struct dsa_db db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }; int err; - err = dev_uc_add(cpu_dp->master, addr); - if (err) - return err; + /* Avoid a call to __dev_set_promiscuity() on the master, which + * requires rtnl_lock(), since we can't guarantee that is held here, + * and we can't take it either. + */ + if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_add(cpu_dp->master, addr); + if (err) + return err; + } - return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info); + return dsa_port_host_fdb_add(dp, addr, vid, db); } -int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, - u16 vid) +static int dsa_port_host_fdb_del(struct dsa_port *dp, + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct dsa_notifier_fdb_info info = { .sw_index = dp->ds->index, .port = dp->index, .addr = addr, .vid = vid, + .db = db, + }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info); +} + +int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, + const unsigned char *addr, u16 vid) +{ + struct dsa_db db = { + .type = DSA_DB_PORT, + .dp = dp, }; + + return dsa_port_host_fdb_del(dp, addr, vid, db); +} + +int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, + const unsigned char *addr, u16 vid) +{ struct dsa_port *cpu_dp = dp->cpu_dp; + struct dsa_db db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }; int err; - err = dev_uc_del(cpu_dp->master, addr); - if (err) - return err; + if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_del(cpu_dp->master, addr); + if (err) + return err; + } - return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info); + return dsa_port_host_fdb_del(dp, addr, vid, db); +} + +int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid) +{ + struct dsa_notifier_lag_fdb_info info = { + .lag = dp->lag, + .addr = addr, + .vid = vid, + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, + }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_LAG_FDB_ADD, &info); +} + +int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid) +{ + struct dsa_notifier_lag_fdb_info info = { + .lag = dp->lag, + .addr = addr, + .vid = vid, + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, + }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_LAG_FDB_DEL, &info); } int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data) @@ -825,8 +991,15 @@ int dsa_port_mdb_add(const struct dsa_port *dp, .sw_index = dp->ds->index, .port = dp->index, .mdb = mdb, + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, }; + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info); } @@ -837,45 +1010,106 @@ int dsa_port_mdb_del(const struct dsa_port *dp, .sw_index = dp->ds->index, .port = dp->index, .mdb = mdb, + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, }; + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info); } -int dsa_port_host_mdb_add(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb) +static int dsa_port_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct dsa_notifier_mdb_info info = { .sw_index = dp->ds->index, .port = dp->index, .mdb = mdb, + .db = db, + }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_ADD, &info); +} + +int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) +{ + struct dsa_db db = { + .type = DSA_DB_PORT, + .dp = dp, }; + + return dsa_port_host_mdb_add(dp, mdb, db); +} + +int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) +{ struct dsa_port *cpu_dp = dp->cpu_dp; + struct dsa_db db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }; int err; err = dev_mc_add(cpu_dp->master, mdb->addr); if (err) return err; - return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_ADD, &info); + return dsa_port_host_mdb_add(dp, mdb, db); } -int dsa_port_host_mdb_del(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb) +static int dsa_port_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct dsa_notifier_mdb_info info = { .sw_index = dp->ds->index, .port = dp->index, .mdb = mdb, + .db = db, + }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_DEL, &info); +} + +int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) +{ + struct dsa_db db = { + .type = DSA_DB_PORT, + .dp = dp, }; + + return dsa_port_host_mdb_del(dp, mdb, db); +} + +int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) +{ struct dsa_port *cpu_dp = dp->cpu_dp; + struct dsa_db db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }; int err; err = dev_mc_del(cpu_dp->master, mdb->addr); if (err) return err; - return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_DEL, &info); + return dsa_port_host_mdb_del(dp, mdb, db); } int dsa_port_vlan_add(struct dsa_port *dp, @@ -1058,8 +1292,8 @@ dsa_port_phylink_mac_select_pcs(struct phylink_config *config, phy_interface_t interface) { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); + struct phylink_pcs *pcs = ERR_PTR(-EOPNOTSUPP); struct dsa_switch *ds = dp->ds; - struct phylink_pcs *pcs = NULL; if (ds->ops->phylink_mac_select_pcs) pcs = ds->ops->phylink_mac_select_pcs(ds, dp->index, interface); @@ -1251,7 +1485,6 @@ static int dsa_port_phylink_register(struct dsa_port *dp) dp->pl_config.dev = ds->dev; dp->pl_config.type = PHYLINK_DEV; - dp->pl_config.pcs_poll = ds->pcs_poll; err = dsa_port_phylink_create(dp); if (err) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f61e6b72ffbb..42436ac6993b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -23,6 +23,114 @@ #include "dsa_priv.h" +static void dsa_slave_standalone_event_work(struct work_struct *work) +{ + struct dsa_standalone_event_work *standalone_work = + container_of(work, struct dsa_standalone_event_work, work); + const unsigned char *addr = standalone_work->addr; + struct net_device *dev = standalone_work->dev; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct switchdev_obj_port_mdb mdb; + struct dsa_switch *ds = dp->ds; + u16 vid = standalone_work->vid; + int err; + + switch (standalone_work->event) { + case DSA_UC_ADD: + err = dsa_port_standalone_host_fdb_add(dp, addr, vid); + if (err) { + dev_err(ds->dev, + "port %d failed to add %pM vid %d to fdb: %d\n", + dp->index, addr, vid, err); + break; + } + break; + + case DSA_UC_DEL: + err = dsa_port_standalone_host_fdb_del(dp, addr, vid); + if (err) { + dev_err(ds->dev, + "port %d failed to delete %pM vid %d from fdb: %d\n", + dp->index, addr, vid, err); + } + + break; + case DSA_MC_ADD: + ether_addr_copy(mdb.addr, addr); + mdb.vid = vid; + + err = dsa_port_standalone_host_mdb_add(dp, &mdb); + if (err) { + dev_err(ds->dev, + "port %d failed to add %pM vid %d to mdb: %d\n", + dp->index, addr, vid, err); + break; + } + break; + case DSA_MC_DEL: + ether_addr_copy(mdb.addr, addr); + mdb.vid = vid; + + err = dsa_port_standalone_host_mdb_del(dp, &mdb); + if (err) { + dev_err(ds->dev, + "port %d failed to delete %pM vid %d from mdb: %d\n", + dp->index, addr, vid, err); + } + + break; + } + + kfree(standalone_work); +} + +static int dsa_slave_schedule_standalone_work(struct net_device *dev, + enum dsa_standalone_event event, + const unsigned char *addr, + u16 vid) +{ + struct dsa_standalone_event_work *standalone_work; + + standalone_work = kzalloc(sizeof(*standalone_work), GFP_ATOMIC); + if (!standalone_work) + return -ENOMEM; + + INIT_WORK(&standalone_work->work, dsa_slave_standalone_event_work); + standalone_work->event = event; + standalone_work->dev = dev; + + ether_addr_copy(standalone_work->addr, addr); + standalone_work->vid = vid; + + dsa_schedule_work(&standalone_work->work); + + return 0; +} + +static int dsa_slave_sync_uc(struct net_device *dev, + const unsigned char *addr) +{ + return dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0); +} + +static int dsa_slave_unsync_uc(struct net_device *dev, + const unsigned char *addr) +{ + return dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0); +} + +static int dsa_slave_sync_mc(struct net_device *dev, + const unsigned char *addr) +{ + return dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0); +} + +static int dsa_slave_unsync_mc(struct net_device *dev, + const unsigned char *addr) +{ + return dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); +} + /* slave mii_bus handling ***************************************************/ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) { @@ -67,6 +175,7 @@ static int dsa_slave_open(struct net_device *dev) { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; int err; err = dev_open(master, NULL); @@ -75,38 +184,30 @@ static int dsa_slave_open(struct net_device *dev) goto out; } - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) { - err = dev_uc_add(master, dev->dev_addr); - if (err < 0) + if (dsa_switch_supports_uc_filtering(ds)) { + err = dsa_port_standalone_host_fdb_add(dp, dev->dev_addr, 0); + if (err) goto out; } - if (dev->flags & IFF_ALLMULTI) { - err = dev_set_allmulti(master, 1); - if (err < 0) - goto del_unicast; - } - if (dev->flags & IFF_PROMISC) { - err = dev_set_promiscuity(master, 1); + if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) { + err = dev_uc_add(master, dev->dev_addr); if (err < 0) - goto clear_allmulti; + goto del_host_addr; } err = dsa_port_enable_rt(dp, dev->phydev); if (err) - goto clear_promisc; + goto del_unicast; return 0; -clear_promisc: - if (dev->flags & IFF_PROMISC) - dev_set_promiscuity(master, -1); -clear_allmulti: - if (dev->flags & IFF_ALLMULTI) - dev_set_allmulti(master, -1); del_unicast: if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); +del_host_addr: + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); out: return err; } @@ -115,68 +216,122 @@ static int dsa_slave_close(struct net_device *dev) { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; dsa_port_disable_rt(dp); - dev_mc_unsync(master, dev); - dev_uc_unsync(master, dev); - if (dev->flags & IFF_ALLMULTI) - dev_set_allmulti(master, -1); - if (dev->flags & IFF_PROMISC) - dev_set_promiscuity(master, -1); - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); + return 0; } +/* Keep flooding enabled towards this port's CPU port as long as it serves at + * least one port in the tree that requires it. + */ +static void dsa_port_manage_cpu_flood(struct dsa_port *dp) +{ + struct switchdev_brport_flags flags = { + .mask = BR_FLOOD | BR_MCAST_FLOOD, + }; + struct dsa_switch_tree *dst = dp->ds->dst; + struct dsa_port *cpu_dp = dp->cpu_dp; + struct dsa_port *other_dp; + int err; + + list_for_each_entry(other_dp, &dst->ports, list) { + if (!dsa_port_is_user(other_dp)) + continue; + + if (other_dp->cpu_dp != cpu_dp) + continue; + + if (other_dp->slave->flags & IFF_ALLMULTI) + flags.val |= BR_MCAST_FLOOD; + if (other_dp->slave->flags & IFF_PROMISC) + flags.val |= BR_FLOOD; + } + + err = dsa_port_pre_bridge_flags(dp, flags, NULL); + if (err) + return; + + dsa_port_bridge_flags(cpu_dp, flags, NULL); +} + static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { struct net_device *master = dsa_slave_to_master(dev); - if (dev->flags & IFF_UP) { - if (change & IFF_ALLMULTI) - dev_set_allmulti(master, - dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(master, - dev->flags & IFF_PROMISC ? 1 : -1); - } + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, + dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, + dev->flags & IFF_PROMISC ? 1 : -1); + + if (dsa_switch_supports_uc_filtering(ds) && + dsa_switch_supports_mc_filtering(ds)) + dsa_port_manage_cpu_flood(dp); } static void dsa_slave_set_rx_mode(struct net_device *dev) { struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; dev_mc_sync(master, dev); dev_uc_sync(master, dev); + if (dsa_switch_supports_mc_filtering(ds)) + __dev_mc_sync(dev, dsa_slave_sync_mc, dsa_slave_unsync_mc); + if (dsa_switch_supports_uc_filtering(ds)) + __dev_uc_sync(dev, dsa_slave_sync_uc, dsa_slave_unsync_uc); } static int dsa_slave_set_mac_address(struct net_device *dev, void *a) { struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; struct sockaddr *addr = a; int err; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - if (!(dev->flags & IFF_UP)) - goto out; + if (dsa_switch_supports_uc_filtering(ds)) { + err = dsa_port_standalone_host_fdb_add(dp, addr->sa_data, 0); + if (err) + return err; + } if (!ether_addr_equal(addr->sa_data, master->dev_addr)) { err = dev_uc_add(master, addr->sa_data); if (err < 0) - return err; + goto del_unicast; } if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); -out: + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); + eth_hw_addr_set(dev, addr->sa_data); return 0; + +del_unicast: + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, addr->sa_data, 0); + + return err; } struct dsa_slave_dump_ctx { @@ -426,7 +581,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx, if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) return -EOPNOTSUPP; - err = dsa_port_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); + err = dsa_port_bridge_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) @@ -507,7 +662,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx, if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) return -EOPNOTSUPP; - err = dsa_port_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); + err = dsa_port_bridge_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) @@ -1948,6 +2103,8 @@ int dsa_slave_create(struct dsa_port *port) else eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; + if (dsa_switch_supports_uc_filtering(ds)) + slave_dev->priv_flags |= IFF_UNICAST_FLT; slave_dev->netdev_ops = &dsa_slave_netdev_ops; if (ds->ops->port_max_mtu) slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index); @@ -2134,7 +2291,7 @@ dsa_slave_lag_changeupper(struct net_device *dev, continue; dp = dsa_slave_to_port(lower); - if (!dp->lag_dev) + if (!dp->lag) /* Software LAG */ continue; @@ -2163,7 +2320,7 @@ dsa_slave_lag_prechangeupper(struct net_device *dev, continue; dp = dsa_slave_to_port(lower); - if (!dp->lag_dev) + if (!dp->lag) /* Software LAG */ continue; @@ -2373,43 +2530,40 @@ static void dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work) { struct switchdev_notifier_fdb_info info = {}; - struct dsa_switch *ds = switchdev_work->ds; - struct dsa_port *dp; - - if (!dsa_is_user_port(ds, switchdev_work->port)) - return; info.addr = switchdev_work->addr; info.vid = switchdev_work->vid; info.offloaded = true; - dp = dsa_to_port(ds, switchdev_work->port); call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, - dp->slave, &info.info, NULL); + switchdev_work->orig_dev, &info.info, NULL); } static void dsa_slave_switchdev_event_work(struct work_struct *work) { struct dsa_switchdev_event_work *switchdev_work = container_of(work, struct dsa_switchdev_event_work, work); - struct dsa_switch *ds = switchdev_work->ds; + const unsigned char *addr = switchdev_work->addr; + struct net_device *dev = switchdev_work->dev; + u16 vid = switchdev_work->vid; + struct dsa_switch *ds; struct dsa_port *dp; int err; - dp = dsa_to_port(ds, switchdev_work->port); + dp = dsa_slave_to_port(dev); + ds = dp->ds; switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: if (switchdev_work->host_addr) - err = dsa_port_host_fdb_add(dp, switchdev_work->addr, - switchdev_work->vid); + err = dsa_port_bridge_host_fdb_add(dp, addr, vid); + else if (dp->lag) + err = dsa_port_lag_fdb_add(dp, addr, vid); else - err = dsa_port_fdb_add(dp, switchdev_work->addr, - switchdev_work->vid); + err = dsa_port_fdb_add(dp, addr, vid); if (err) { dev_err(ds->dev, "port %d failed to add %pM vid %d to fdb: %d\n", - dp->index, switchdev_work->addr, - switchdev_work->vid, err); + dp->index, addr, vid, err); break; } dsa_fdb_offload_notify(switchdev_work); @@ -2417,16 +2571,15 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) case SWITCHDEV_FDB_DEL_TO_DEVICE: if (switchdev_work->host_addr) - err = dsa_port_host_fdb_del(dp, switchdev_work->addr, - switchdev_work->vid); + err = dsa_port_bridge_host_fdb_del(dp, addr, vid); + else if (dp->lag) + err = dsa_port_lag_fdb_del(dp, addr, vid); else - err = dsa_port_fdb_del(dp, switchdev_work->addr, - switchdev_work->vid); + err = dsa_port_fdb_del(dp, addr, vid); if (err) { dev_err(ds->dev, "port %d failed to delete %pM vid %d from fdb: %d\n", - dp->index, switchdev_work->addr, - switchdev_work->vid, err); + dp->index, addr, vid, err); } break; @@ -2464,19 +2617,17 @@ static int dsa_slave_fdb_event(struct net_device *dev, if (ctx && ctx != dp) return 0; - if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del) - return -EOPNOTSUPP; - - if (dsa_slave_dev_check(orig_dev) && - switchdev_fdb_is_dynamically_learned(fdb_info)) - return 0; + if (switchdev_fdb_is_dynamically_learned(fdb_info)) { + if (dsa_port_offloads_bridge_port(dp, orig_dev)) + return 0; - /* FDB entries learned by the software bridge should be installed as - * host addresses only if the driver requests assisted learning. - */ - if (switchdev_fdb_is_dynamically_learned(fdb_info) && - !ds->assisted_learning_on_cpu_port) - return 0; + /* FDB entries learned by the software bridge or by foreign + * bridge ports should be installed as host addresses only if + * the driver requests assisted learning. + */ + if (!ds->assisted_learning_on_cpu_port) + return 0; + } /* Also treat FDB entries on foreign interfaces bridged with us as host * addresses. @@ -2484,6 +2635,18 @@ static int dsa_slave_fdb_event(struct net_device *dev, if (dsa_foreign_dev_check(dev, orig_dev)) host_addr = true; + /* Check early that we're not doing work in vain. + * Host addresses on LAG ports still require regular FDB ops, + * since the CPU port isn't in a LAG. + */ + if (dp->lag && !host_addr) { + if (!ds->ops->lag_fdb_add || !ds->ops->lag_fdb_del) + return -EOPNOTSUPP; + } else { + if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del) + return -EOPNOTSUPP; + } + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); if (!switchdev_work) return -ENOMEM; @@ -2494,10 +2657,9 @@ static int dsa_slave_fdb_event(struct net_device *dev, host_addr ? " as host address" : ""); INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work); - switchdev_work->ds = ds; - switchdev_work->port = dp->index; switchdev_work->event = event; switchdev_work->dev = dev; + switchdev_work->orig_dev = orig_dev; ether_addr_copy(switchdev_work->addr, fdb_info->addr); switchdev_work->vid = fdb_info->vid; @@ -2526,8 +2688,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, err = switchdev_handle_fdb_event_to_device(dev, event, ptr, dsa_slave_dev_check, dsa_foreign_dev_check, - dsa_slave_fdb_event, - NULL); + dsa_slave_fdb_event); return notifier_from_errno(err); default: return NOTIFY_DONE; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 0bb3987bd4e6..327d66bf7b47 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -96,7 +96,8 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, return -EOPNOTSUPP; err = ds->ops->port_bridge_join(ds, info->port, info->bridge, - &info->tx_fwd_offload); + &info->tx_fwd_offload, + info->extack); if (err) return err; } @@ -105,12 +106,13 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, ds->ops->crosschip_bridge_join) { err = ds->ops->crosschip_bridge_join(ds, info->tree_index, info->sw_index, - info->port, info->bridge); + info->port, info->bridge, + info->extack); if (err) return err; } - return dsa_tag_8021q_bridge_join(ds, info); + return 0; } static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds, @@ -186,7 +188,7 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, return err; } - return dsa_tag_8021q_bridge_leave(ds, info); + return 0; } /* Matches for all upstream-facing ports (the CPU port and all upstream-facing @@ -210,21 +212,41 @@ static bool dsa_port_host_address_match(struct dsa_port *dp, return false; } +static bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b) +{ + if (a->type != b->type) + return false; + + switch (a->type) { + case DSA_DB_PORT: + return a->dp == b->dp; + case DSA_DB_LAG: + return a->lag.dev == b->lag.dev; + case DSA_DB_BRIDGE: + return a->bridge.num == b->bridge.num; + default: + WARN_ON(1); + return false; + } +} + static struct dsa_mac_addr *dsa_mac_addr_find(struct list_head *addr_list, - const unsigned char *addr, - u16 vid) + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct dsa_mac_addr *a; list_for_each_entry(a, addr_list, list) - if (ether_addr_equal(a->addr, addr) && a->vid == vid) + if (ether_addr_equal(a->addr, addr) && a->vid == vid && + dsa_db_equal(&a->db, &db)) return a; return NULL; } static int dsa_port_do_mdb_add(struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; @@ -233,11 +255,11 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp, /* No need to bother with refcounting for user ports */ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) - return ds->ops->port_mdb_add(ds, port, mdb); + return ds->ops->port_mdb_add(ds, port, mdb, db); mutex_lock(&dp->addr_lists_lock); - a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid); + a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db); if (a) { refcount_inc(&a->refcount); goto out; @@ -249,7 +271,7 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp, goto out; } - err = ds->ops->port_mdb_add(ds, port, mdb); + err = ds->ops->port_mdb_add(ds, port, mdb, db); if (err) { kfree(a); goto out; @@ -257,6 +279,7 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp, ether_addr_copy(a->addr, mdb->addr); a->vid = mdb->vid; + a->db = db; refcount_set(&a->refcount, 1); list_add_tail(&a->list, &dp->mdbs); @@ -267,7 +290,8 @@ out: } static int dsa_port_do_mdb_del(struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; @@ -276,11 +300,11 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp, /* No need to bother with refcounting for user ports */ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) - return ds->ops->port_mdb_del(ds, port, mdb); + return ds->ops->port_mdb_del(ds, port, mdb, db); mutex_lock(&dp->addr_lists_lock); - a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid); + a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db); if (!a) { err = -ENOENT; goto out; @@ -289,7 +313,7 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp, if (!refcount_dec_and_test(&a->refcount)) goto out; - err = ds->ops->port_mdb_del(ds, port, mdb); + err = ds->ops->port_mdb_del(ds, port, mdb, db); if (err) { refcount_set(&a->refcount, 1); goto out; @@ -305,7 +329,7 @@ out: } static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, - u16 vid) + u16 vid, struct dsa_db db) { struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; @@ -314,11 +338,11 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, /* No need to bother with refcounting for user ports */ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) - return ds->ops->port_fdb_add(ds, port, addr, vid); + return ds->ops->port_fdb_add(ds, port, addr, vid, db); mutex_lock(&dp->addr_lists_lock); - a = dsa_mac_addr_find(&dp->fdbs, addr, vid); + a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db); if (a) { refcount_inc(&a->refcount); goto out; @@ -330,7 +354,7 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, goto out; } - err = ds->ops->port_fdb_add(ds, port, addr, vid); + err = ds->ops->port_fdb_add(ds, port, addr, vid, db); if (err) { kfree(a); goto out; @@ -338,6 +362,7 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, ether_addr_copy(a->addr, addr); a->vid = vid; + a->db = db; refcount_set(&a->refcount, 1); list_add_tail(&a->list, &dp->fdbs); @@ -348,7 +373,7 @@ out: } static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr, - u16 vid) + u16 vid, struct dsa_db db) { struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; @@ -357,11 +382,11 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr, /* No need to bother with refcounting for user ports */ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) - return ds->ops->port_fdb_del(ds, port, addr, vid); + return ds->ops->port_fdb_del(ds, port, addr, vid, db); mutex_lock(&dp->addr_lists_lock); - a = dsa_mac_addr_find(&dp->fdbs, addr, vid); + a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db); if (!a) { err = -ENOENT; goto out; @@ -370,7 +395,7 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr, if (!refcount_dec_and_test(&a->refcount)) goto out; - err = ds->ops->port_fdb_del(ds, port, addr, vid); + err = ds->ops->port_fdb_del(ds, port, addr, vid, db); if (err) { refcount_set(&a->refcount, 1); goto out; @@ -385,6 +410,77 @@ out: return err; } +static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct dsa_mac_addr *a; + int err = 0; + + mutex_lock(&lag->fdb_lock); + + a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db); + if (a) { + refcount_inc(&a->refcount); + goto out; + } + + a = kzalloc(sizeof(*a), GFP_KERNEL); + if (!a) { + err = -ENOMEM; + goto out; + } + + err = ds->ops->lag_fdb_add(ds, *lag, addr, vid, db); + if (err) { + kfree(a); + goto out; + } + + ether_addr_copy(a->addr, addr); + a->vid = vid; + refcount_set(&a->refcount, 1); + list_add_tail(&a->list, &lag->fdbs); + +out: + mutex_unlock(&lag->fdb_lock); + + return err; +} + +static int dsa_switch_do_lag_fdb_del(struct dsa_switch *ds, struct dsa_lag *lag, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct dsa_mac_addr *a; + int err = 0; + + mutex_lock(&lag->fdb_lock); + + a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db); + if (!a) { + err = -ENOENT; + goto out; + } + + if (!refcount_dec_and_test(&a->refcount)) + goto out; + + err = ds->ops->lag_fdb_del(ds, *lag, addr, vid, db); + if (err) { + refcount_set(&a->refcount, 1); + goto out; + } + + list_del(&a->list); + kfree(a); + +out: + mutex_unlock(&lag->fdb_lock); + + return err; +} + static int dsa_switch_host_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { @@ -397,7 +493,8 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds, dsa_switch_for_each_port(dp, ds) { if (dsa_port_host_address_match(dp, info->sw_index, info->port)) { - err = dsa_port_do_fdb_add(dp, info->addr, info->vid); + err = dsa_port_do_fdb_add(dp, info->addr, info->vid, + info->db); if (err) break; } @@ -418,7 +515,8 @@ static int dsa_switch_host_fdb_del(struct dsa_switch *ds, dsa_switch_for_each_port(dp, ds) { if (dsa_port_host_address_match(dp, info->sw_index, info->port)) { - err = dsa_port_do_fdb_del(dp, info->addr, info->vid); + err = dsa_port_do_fdb_del(dp, info->addr, info->vid, + info->db); if (err) break; } @@ -436,7 +534,7 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds, if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - return dsa_port_do_fdb_add(dp, info->addr, info->vid); + return dsa_port_do_fdb_add(dp, info->addr, info->vid, info->db); } static int dsa_switch_fdb_del(struct dsa_switch *ds, @@ -448,7 +546,43 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds, if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - return dsa_port_do_fdb_del(dp, info->addr, info->vid); + return dsa_port_do_fdb_del(dp, info->addr, info->vid, info->db); +} + +static int dsa_switch_lag_fdb_add(struct dsa_switch *ds, + struct dsa_notifier_lag_fdb_info *info) +{ + struct dsa_port *dp; + + if (!ds->ops->lag_fdb_add) + return -EOPNOTSUPP; + + /* Notify switch only if it has a port in this LAG */ + dsa_switch_for_each_port(dp, ds) + if (dsa_port_offloads_lag(dp, info->lag)) + return dsa_switch_do_lag_fdb_add(ds, info->lag, + info->addr, info->vid, + info->db); + + return 0; +} + +static int dsa_switch_lag_fdb_del(struct dsa_switch *ds, + struct dsa_notifier_lag_fdb_info *info) +{ + struct dsa_port *dp; + + if (!ds->ops->lag_fdb_del) + return -EOPNOTSUPP; + + /* Notify switch only if it has a port in this LAG */ + dsa_switch_for_each_port(dp, ds) + if (dsa_port_offloads_lag(dp, info->lag)) + return dsa_switch_do_lag_fdb_del(ds, info->lag, + info->addr, info->vid, + info->db); + + return 0; } static int dsa_switch_lag_change(struct dsa_switch *ds, @@ -501,7 +635,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, if (!ds->ops->port_mdb_add) return -EOPNOTSUPP; - return dsa_port_do_mdb_add(dp, info->mdb); + return dsa_port_do_mdb_add(dp, info->mdb, info->db); } static int dsa_switch_mdb_del(struct dsa_switch *ds, @@ -513,7 +647,7 @@ static int dsa_switch_mdb_del(struct dsa_switch *ds, if (!ds->ops->port_mdb_del) return -EOPNOTSUPP; - return dsa_port_do_mdb_del(dp, info->mdb); + return dsa_port_do_mdb_del(dp, info->mdb, info->db); } static int dsa_switch_host_mdb_add(struct dsa_switch *ds, @@ -528,7 +662,7 @@ static int dsa_switch_host_mdb_add(struct dsa_switch *ds, dsa_switch_for_each_port(dp, ds) { if (dsa_port_host_address_match(dp, info->sw_index, info->port)) { - err = dsa_port_do_mdb_add(dp, info->mdb); + err = dsa_port_do_mdb_add(dp, info->mdb, info->db); if (err) break; } @@ -549,7 +683,7 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds, dsa_switch_for_each_port(dp, ds) { if (dsa_port_host_address_match(dp, info->sw_index, info->port)) { - err = dsa_port_do_mdb_del(dp, info->mdb); + err = dsa_port_do_mdb_del(dp, info->mdb, info->db); if (err) break; } @@ -904,6 +1038,12 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_HOST_FDB_DEL: err = dsa_switch_host_fdb_del(ds, info); break; + case DSA_NOTIFIER_LAG_FDB_ADD: + err = dsa_switch_lag_fdb_add(ds, info); + break; + case DSA_NOTIFIER_LAG_FDB_DEL: + err = dsa_switch_lag_fdb_del(ds, info); + break; case DSA_NOTIFIER_LAG_CHANGE: err = dsa_switch_lag_change(ds, info); break; diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 114f663332d0..a786569203f0 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -16,15 +16,11 @@ * * | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | * +-----------+-----+-----------------+-----------+-----------------------+ - * | DIR | VBID| SWITCH_ID | VBID | PORT | + * | RSV | VBID| SWITCH_ID | VBID | PORT | * +-----------+-----+-----------------+-----------+-----------------------+ * - * DIR - VID[11:10]: - * Direction flags. - * * 1 (0b01) for RX VLAN, - * * 2 (0b10) for TX VLAN. - * These values make the special VIDs of 0, 1 and 4095 to be left - * unused by this coding scheme. + * RSV - VID[11:10]: + * Reserved. Must be set to 3 (0b11). * * SWITCH_ID - VID[8:6]: * Index of switch within DSA tree. Must be between 0 and 7. @@ -32,18 +28,17 @@ * VBID - { VID[9], VID[5:4] }: * Virtual bridge ID. If between 1 and 7, packet targets the broadcast * domain of a bridge. If transmitted as zero, packet targets a single - * port. Field only valid on transmit, must be ignored on receive. + * port. * * PORT - VID[3:0]: * Index of switch port. Must be between 0 and 15. */ -#define DSA_8021Q_DIR_SHIFT 10 -#define DSA_8021Q_DIR_MASK GENMASK(11, 10) -#define DSA_8021Q_DIR(x) (((x) << DSA_8021Q_DIR_SHIFT) & \ - DSA_8021Q_DIR_MASK) -#define DSA_8021Q_DIR_RX DSA_8021Q_DIR(1) -#define DSA_8021Q_DIR_TX DSA_8021Q_DIR(2) +#define DSA_8021Q_RSV_VAL 3 +#define DSA_8021Q_RSV_SHIFT 10 +#define DSA_8021Q_RSV_MASK GENMASK(11, 10) +#define DSA_8021Q_RSV ((DSA_8021Q_RSV_VAL << DSA_8021Q_RSV_SHIFT) & \ + DSA_8021Q_RSV_MASK) #define DSA_8021Q_SWITCH_ID_SHIFT 6 #define DSA_8021Q_SWITCH_ID_MASK GENMASK(8, 6) @@ -67,34 +62,24 @@ #define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \ DSA_8021Q_PORT_MASK) -u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num) +u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num) { /* The VBID value of 0 is reserved for precise TX, but it is also * reserved/invalid for the bridge_num, so all is well. */ - return DSA_8021Q_DIR_TX | DSA_8021Q_VBID(bridge_num); + return DSA_8021Q_RSV | DSA_8021Q_VBID(bridge_num); } -EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid); - -/* Returns the VID to be inserted into the frame from xmit for switch steering - * instructions on egress. Encodes switch ID and port ID. - */ -u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp) -{ - return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(dp->ds->index) | - DSA_8021Q_PORT(dp->index); -} -EXPORT_SYMBOL_GPL(dsa_tag_8021q_tx_vid); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_vid); /* Returns the VID that will be installed as pvid for this switch port, sent as * tagged egress towards the CPU port and decoded by the rcv function. */ -u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp) +u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp) { - return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(dp->ds->index) | + return DSA_8021Q_RSV | DSA_8021Q_SWITCH_ID(dp->ds->index) | DSA_8021Q_PORT(dp->index); } -EXPORT_SYMBOL_GPL(dsa_tag_8021q_rx_vid); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_standalone_vid); /* Returns the decoded switch ID from the RX VID. */ int dsa_8021q_rx_switch_id(u16 vid) @@ -110,21 +95,20 @@ int dsa_8021q_rx_source_port(u16 vid) } EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); -bool vid_is_dsa_8021q_rxvlan(u16 vid) +/* Returns the decoded VBID from the RX VID. */ +static int dsa_tag_8021q_rx_vbid(u16 vid) { - return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX; -} -EXPORT_SYMBOL_GPL(vid_is_dsa_8021q_rxvlan); + u16 vbid_hi = (vid & DSA_8021Q_VBID_HI_MASK) >> DSA_8021Q_VBID_HI_SHIFT; + u16 vbid_lo = (vid & DSA_8021Q_VBID_LO_MASK) >> DSA_8021Q_VBID_LO_SHIFT; -bool vid_is_dsa_8021q_txvlan(u16 vid) -{ - return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_TX; + return (vbid_hi << 2) | vbid_lo; } -EXPORT_SYMBOL_GPL(vid_is_dsa_8021q_txvlan); bool vid_is_dsa_8021q(u16 vid) { - return vid_is_dsa_8021q_rxvlan(vid) || vid_is_dsa_8021q_txvlan(vid); + u16 rsv = (vid & DSA_8021Q_RSV_MASK) >> DSA_8021Q_RSV_SHIFT; + + return rsv == DSA_8021Q_RSV_VAL; } EXPORT_SYMBOL_GPL(vid_is_dsa_8021q); @@ -242,12 +226,8 @@ int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, u16 flags = 0; if (dsa_port_is_user(dp)) - flags |= BRIDGE_VLAN_INFO_UNTAGGED; - - if (vid_is_dsa_8021q_rxvlan(info->vid) && - dsa_8021q_rx_switch_id(info->vid) == ds->index && - dsa_8021q_rx_source_port(info->vid) == dp->index) - flags |= BRIDGE_VLAN_INFO_PVID; + flags |= BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_PVID; err = dsa_port_do_tag_8021q_vlan_add(dp, info->vid, flags); @@ -279,162 +259,78 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, return 0; } -/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single - * front-panel switch port (here swp0). +/* There are 2 ways of offloading tag_8021q VLANs. * - * Port identification through VLAN (802.1Q) tags has different requirements - * for it to work effectively: - * - On RX (ingress from network): each front-panel port must have a pvid - * that uniquely identifies it, and the egress of this pvid must be tagged - * towards the CPU port, so that software can recover the source port based - * on the VID in the frame. But this would only work for standalone ports; - * if bridged, this VLAN setup would break autonomous forwarding and would - * force all switched traffic to pass through the CPU. So we must also make - * the other front-panel ports members of this VID we're adding, albeit - * we're not making it their PVID (they'll still have their own). - * - On TX (ingress from CPU and towards network) we are faced with a problem. - * If we were to tag traffic (from within DSA) with the port's pvid, all - * would be well, assuming the switch ports were standalone. Frames would - * have no choice but to be directed towards the correct front-panel port. - * But because we also want the RX VLAN to not break bridging, then - * inevitably that means that we have to give them a choice (of what - * front-panel port to go out on), and therefore we cannot steer traffic - * based on the RX VID. So what we do is simply install one more VID on the - * front-panel and CPU ports, and profit off of the fact that steering will - * work just by virtue of the fact that there is only one other port that's - * a member of the VID we're tagging the traffic with - the desired one. + * One is to use a hardware TCAM to push the port's standalone VLAN into the + * frame when forwarding it to the CPU, as an egress modification rule on the + * CPU port. This is preferable because it has no side effects for the + * autonomous forwarding path, and accomplishes tag_8021q's primary goal of + * identifying the source port of each packet based on VLAN ID. * - * So at the end, each front-panel port will have one RX VID (also the PVID), - * the RX VID of all other front-panel ports that are in the same bridge, and - * one TX VID. Whereas the CPU port will have the RX and TX VIDs of all - * front-panel ports, and on top of that, is also tagged-input and - * tagged-output (VLAN trunk). + * The other is to commit the tag_8021q VLAN as a PVID to the VLAN table, and + * to configure the port as VLAN-unaware. This is less preferable because + * unique source port identification can only be done for standalone ports; + * under a VLAN-unaware bridge, all ports share the same tag_8021q VLAN as + * PVID, and under a VLAN-aware bridge, packets received by software will not + * have tag_8021q VLANs appended, just bridge VLANs. * - * CPU port CPU port - * +-------------+-----+-------------+ +-------------+-----+-------------+ - * | RX VID | | | | TX VID | | | - * | of swp0 | | | | of swp0 | | | - * | +-----+ | | +-----+ | - * | ^ T | | | Tagged | - * | | | | | ingress | - * | +-------+---+---+-------+ | | +-----------+ | - * | | | | | | | | Untagged | - * | | U v U v U v | | v egress | - * | +-----+ +-----+ +-----+ +-----+ | | +-----+ +-----+ +-----+ +-----+ | - * | | | | | | | | | | | | | | | | | | | | - * | |PVID | | | | | | | | | | | | | | | | | | - * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+ - * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3 + * For tag_8021q implementations of the second type, this method is used to + * replace the standalone tag_8021q VLAN of a port with the tag_8021q VLAN to + * be used for VLAN-unaware bridging. */ -static bool -dsa_port_tag_8021q_bridge_match(struct dsa_port *dp, - struct dsa_notifier_bridge_info *info) +int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, + struct dsa_bridge bridge) { - /* Don't match on self */ - if (dp->ds->dst->index == info->tree_index && - dp->ds->index == info->sw_index && - dp->index == info->port) - return false; - - if (dsa_port_is_user(dp)) - return dsa_port_offloads_bridge(dp, &info->bridge); - - return false; -} - -int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, - struct dsa_notifier_bridge_info *info) -{ - struct dsa_switch *targeted_ds; - struct dsa_port *targeted_dp; - struct dsa_port *dp; - u16 targeted_rx_vid; + struct dsa_port *dp = dsa_to_port(ds, port); + u16 standalone_vid, bridge_vid; int err; - if (!ds->tag_8021q_ctx) - return 0; - - targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); - targeted_dp = dsa_to_port(targeted_ds, info->port); - targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp); - - dsa_switch_for_each_port(dp, ds) { - u16 rx_vid = dsa_tag_8021q_rx_vid(dp); - - if (!dsa_port_tag_8021q_bridge_match(dp, info)) - continue; + /* Delete the standalone VLAN of the port and replace it with a + * bridging VLAN + */ + standalone_vid = dsa_tag_8021q_standalone_vid(dp); + bridge_vid = dsa_tag_8021q_bridge_vid(bridge.num); - /* Install the RX VID of the targeted port in our VLAN table */ - err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid, true); - if (err) - return err; + err = dsa_port_tag_8021q_vlan_add(dp, bridge_vid, true); + if (err) + return err; - /* Install our RX VID into the targeted port's VLAN table */ - err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid, true); - if (err) - return err; - } + dsa_port_tag_8021q_vlan_del(dp, standalone_vid, false); return 0; } +EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_join); -int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, - struct dsa_notifier_bridge_info *info) +void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_bridge bridge) { - struct dsa_switch *targeted_ds; - struct dsa_port *targeted_dp; - struct dsa_port *dp; - u16 targeted_rx_vid; - - if (!ds->tag_8021q_ctx) - return 0; - - targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); - targeted_dp = dsa_to_port(targeted_ds, info->port); - targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp); - - dsa_switch_for_each_port(dp, ds) { - u16 rx_vid = dsa_tag_8021q_rx_vid(dp); - - if (!dsa_port_tag_8021q_bridge_match(dp, info)) - continue; + struct dsa_port *dp = dsa_to_port(ds, port); + u16 standalone_vid, bridge_vid; + int err; - /* Remove the RX VID of the targeted port from our VLAN table */ - dsa_port_tag_8021q_vlan_del(dp, targeted_rx_vid, true); + /* Delete the bridging VLAN of the port and replace it with a + * standalone VLAN + */ + standalone_vid = dsa_tag_8021q_standalone_vid(dp); + bridge_vid = dsa_tag_8021q_bridge_vid(bridge.num); - /* Remove our RX VID from the targeted port's VLAN table */ - dsa_port_tag_8021q_vlan_del(targeted_dp, rx_vid, true); + err = dsa_port_tag_8021q_vlan_add(dp, standalone_vid, false); + if (err) { + dev_err(ds->dev, + "Failed to delete tag_8021q standalone VLAN %d from port %d: %pe\n", + standalone_vid, port, ERR_PTR(err)); } - return 0; -} - -int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, - struct dsa_bridge bridge) -{ - u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge.num); - - return dsa_port_tag_8021q_vlan_add(dsa_to_port(ds, port), tx_vid, - true); + dsa_port_tag_8021q_vlan_del(dp, bridge_vid, true); } -EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_offload); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_leave); -void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, - struct dsa_bridge bridge) -{ - u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge.num); - - dsa_port_tag_8021q_vlan_del(dsa_to_port(ds, port), tx_vid, true); -} -EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_unoffload); - -/* Set up a port's tag_8021q RX and TX VLAN for standalone mode operation */ +/* Set up a port's standalone tag_8021q VLAN */ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) { struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_tag_8021q_rx_vid(dp); - u16 tx_vid = dsa_tag_8021q_tx_vid(dp); + u16 vid = dsa_tag_8021q_standalone_vid(dp); struct net_device *master; int err; @@ -446,30 +342,16 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) master = dp->cpu_dp->master; - /* Add this user port's RX VID to the membership list of all others - * (including itself). This is so that bridging will not be hindered. - * L2 forwarding rules still take precedence when there are no VLAN - * restrictions, so there are no concerns about leaking traffic. - */ - err = dsa_port_tag_8021q_vlan_add(dp, rx_vid, false); + err = dsa_port_tag_8021q_vlan_add(dp, vid, false); if (err) { dev_err(ds->dev, - "Failed to apply RX VID %d to port %d: %pe\n", - rx_vid, port, ERR_PTR(err)); + "Failed to apply standalone VID %d to port %d: %pe\n", + vid, port, ERR_PTR(err)); return err; } - /* Add @rx_vid to the master's RX filter. */ - vlan_vid_add(master, ctx->proto, rx_vid); - - /* Finally apply the TX VID on this port and on the CPU port */ - err = dsa_port_tag_8021q_vlan_add(dp, tx_vid, false); - if (err) { - dev_err(ds->dev, - "Failed to apply TX VID %d on port %d: %pe\n", - tx_vid, port, ERR_PTR(err)); - return err; - } + /* Add the VLAN to the master's RX filter. */ + vlan_vid_add(master, ctx->proto, vid); return err; } @@ -478,8 +360,7 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) { struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_tag_8021q_rx_vid(dp); - u16 tx_vid = dsa_tag_8021q_tx_vid(dp); + u16 vid = dsa_tag_8021q_standalone_vid(dp); struct net_device *master; /* The CPU port is implicitly configured by @@ -490,11 +371,9 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) master = dp->cpu_dp->master; - dsa_port_tag_8021q_vlan_del(dp, rx_vid, false); + dsa_port_tag_8021q_vlan_del(dp, vid, false); - vlan_vid_del(master, ctx->proto, rx_vid); - - dsa_port_tag_8021q_vlan_del(dp, tx_vid, false); + vlan_vid_del(master, ctx->proto, vid); } static int dsa_tag_8021q_setup(struct dsa_switch *ds) @@ -573,7 +452,37 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, } EXPORT_SYMBOL_GPL(dsa_8021q_xmit); -void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id) +struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, + int vbid) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_port *dp; + + if (WARN_ON(!vbid)) + return NULL; + + dsa_tree_for_each_user_port(dp, dst) { + if (!dp->bridge) + continue; + + if (dp->stp_state != BR_STATE_LEARNING && + dp->stp_state != BR_STATE_FORWARDING) + continue; + + if (dp->cpu_dp != cpu_dp) + continue; + + if (dsa_port_bridge_num_get(dp) == vbid) + return dp->slave; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_port_by_vbid); + +void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, + int *vbid) { u16 vid, tci; @@ -590,6 +499,10 @@ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id) *source_port = dsa_8021q_rx_source_port(vid); *switch_id = dsa_8021q_rx_switch_id(vid); + + if (vbid) + *vbid = dsa_tag_8021q_rx_vbid(vid); + skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } EXPORT_SYMBOL_GPL(dsa_8021q_rcv); diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 8abf39dcac64..c8b4bbd46191 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -246,12 +246,14 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, if (trunk) { struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_lag *lag; /* The exact source port is not available in the tag, * so we inject the frame directly on the upper * team/bond. */ - skb->dev = dsa_lag_dev(cpu_dp->dst, source_port); + lag = dsa_lag_by_id(cpu_dp->dst, source_port + 1); + skb->dev = lag ? lag->dev : NULL; } else { skb->dev = dsa_master_find_slave(dev, source_device, source_port); diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index bd6f1d0e5372..37ccf00404ea 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -62,7 +62,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct dsa_port *dp = dsa_slave_to_port(netdev); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); - u16 tx_vid = dsa_tag_8021q_tx_vid(dp); + u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); struct ethhdr *hdr = eth_hdr(skb); if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest)) @@ -77,7 +77,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, { int src_port, switch_id; - dsa_8021q_rcv(skb, &src_port, &switch_id); + dsa_8021q_rcv(skb, &src_port, &switch_id, NULL); skb->dev = dsa_master_find_slave(netdev, switch_id, src_port); if (!skb->dev) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 72d5e0ef8dcf..83e4136516b0 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -226,7 +226,7 @@ static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, * TX VLAN that targets the bridge's entire broadcast domain, * instead of just the specific port. */ - tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num); + tx_vid = dsa_tag_8021q_bridge_vid(bridge_num); return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), tx_vid); } @@ -267,7 +267,7 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct dsa_port *dp = dsa_slave_to_port(netdev); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); - u16 tx_vid = dsa_tag_8021q_tx_vid(dp); + u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); if (skb->offload_fwd_mark) return sja1105_imprecise_xmit(skb, netdev); @@ -295,7 +295,7 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb, struct dsa_port *dp = dsa_slave_to_port(netdev); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); - u16 tx_vid = dsa_tag_8021q_tx_vid(dp); + u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); __be32 *tx_trailer; __be16 *tx_header; int trailer_pos; @@ -509,7 +509,7 @@ static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb) * packet. */ static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port, - int *switch_id, u16 *vid) + int *switch_id, int *vbid, u16 *vid) { struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)skb_mac_header(skb); u16 vlan_tci; @@ -519,8 +519,8 @@ static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port, else vlan_tci = ntohs(hdr->h_vlan_TCI); - if (vid_is_dsa_8021q_rxvlan(vlan_tci & VLAN_VID_MASK)) - return dsa_8021q_rcv(skb, source_port, switch_id); + if (vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK)) + return dsa_8021q_rcv(skb, source_port, switch_id, vbid); /* Try our best with imprecise RX */ *vid = vlan_tci & VLAN_VID_MASK; @@ -529,7 +529,7 @@ static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port, static struct sk_buff *sja1105_rcv(struct sk_buff *skb, struct net_device *netdev) { - int source_port = -1, switch_id = -1; + int source_port = -1, switch_id = -1, vbid = -1; struct sja1105_meta meta = {0}; struct ethhdr *hdr; bool is_link_local; @@ -542,7 +542,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, if (sja1105_skb_has_tag_8021q(skb)) { /* Normal traffic path. */ - sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid); + sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid); } else if (is_link_local) { /* Management traffic path. Switch embeds the switch ID and * port ID into bytes of the destination MAC, courtesy of @@ -561,7 +561,9 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, return NULL; } - if (source_port == -1 || switch_id == -1) + if (vbid >= 1) + skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); + else if (source_port == -1 || switch_id == -1) skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); else skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); @@ -686,7 +688,7 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, static struct sk_buff *sja1110_rcv(struct sk_buff *skb, struct net_device *netdev) { - int source_port = -1, switch_id = -1; + int source_port = -1, switch_id = -1, vbid = -1; bool host_only = false; u16 vid = 0; @@ -700,9 +702,11 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb, /* Packets with in-band control extensions might still have RX VLANs */ if (likely(sja1105_skb_has_tag_8021q(skb))) - sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid); + sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid); - if (source_port == -1 || switch_id == -1) + if (vbid >= 1) + skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); + else if (source_port == -1 || switch_id == -1) skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); else skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 75856db299e9..29d01662a48b 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -363,7 +363,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_BUF_LEN + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_CQE_SIZE + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 18a5035d3bee..9f33c9689b56 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -54,7 +54,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */ nla_total_size(sizeof(u32)) + /* _RINGS_TX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */ - nla_total_size(sizeof(u8)); /* _RINGS_TCP_DATA_SPLIT */ + nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */ + nla_total_size(sizeof(u32)); /* _RINGS_CQE_SIZE */ } static int rings_fill_reply(struct sk_buff *skb, @@ -91,7 +92,9 @@ static int rings_fill_reply(struct sk_buff *skb, (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, kr->rx_buf_len))) || (kr->tcp_data_split && (nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT, - kr->tcp_data_split)))) + kr->tcp_data_split))) || + (kr->cqe_size && + (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size)))) return -EMSGSIZE; return 0; @@ -119,6 +122,7 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1), + [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1), }; int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) @@ -159,6 +163,8 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod); ethnl_update_u32(&kernel_ringparam.rx_buf_len, tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod); + ethnl_update_u32(&kernel_ringparam.cqe_size, + tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod); ret = 0; if (!mod) goto out_ops; @@ -190,6 +196,15 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) goto out_ops; } + if (kernel_ringparam.cqe_size && + !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) { + ret = -EOPNOTSUPP; + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_CQE_SIZE], + "setting cqe size not supported"); + goto out_ops; + } + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); if (ret < 0) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index b3c6ffa1894d..584e21788799 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -20,6 +20,13 @@ #include "hsr_framereg.h" #include "hsr_netlink.h" +#ifdef CONFIG_LOCKDEP +int lockdep_hsr_is_held(spinlock_t *lock) +{ + return lockdep_is_held(lock); +} +#endif + u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr) { u32 hash = jhash(addr, ETH_ALEN, hsr->hash_seed); @@ -27,11 +34,12 @@ u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr) return reciprocal_scale(hash, hsr->hash_buckets); } -struct hsr_node *hsr_node_get_first(struct hlist_head *head) +struct hsr_node *hsr_node_get_first(struct hlist_head *head, spinlock_t *lock) { struct hlist_node *first; - first = rcu_dereference(hlist_first_rcu(head)); + first = rcu_dereference_bh_check(hlist_first_rcu(head), + lockdep_hsr_is_held(lock)); if (first) return hlist_entry(first, struct hsr_node, mac_list); @@ -59,7 +67,7 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) { struct hsr_node *node; - node = hsr_node_get_first(&hsr->self_node_db); + node = hsr_node_get_first(&hsr->self_node_db, &hsr->list_lock); if (!node) { WARN_ONCE(1, "HSR: No self node\n"); return false; @@ -106,7 +114,7 @@ int hsr_create_self_node(struct hsr_priv *hsr, ether_addr_copy(node->macaddress_B, addr_b); spin_lock_bh(&hsr->list_lock); - oldnode = hsr_node_get_first(self_node_db); + oldnode = hsr_node_get_first(self_node_db, &hsr->list_lock); if (oldnode) { hlist_replace_rcu(&oldnode->mac_list, &node->mac_list); spin_unlock_bh(&hsr->list_lock); @@ -125,7 +133,7 @@ void hsr_del_self_node(struct hsr_priv *hsr) struct hsr_node *node; spin_lock_bh(&hsr->list_lock); - node = hsr_node_get_first(self_node_db); + node = hsr_node_get_first(self_node_db, &hsr->list_lock); if (node) { hlist_del_rcu(&node->mac_list); kfree_rcu(node, rcu_head); @@ -191,7 +199,7 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, spin_lock_bh(&hsr->list_lock); hlist_for_each_entry_rcu(node, node_db, mac_list, - lockdep_is_held(&hsr->list_lock)) { + lockdep_hsr_is_held(&hsr->list_lock)) { if (ether_addr_equal(node->macaddress_A, addr)) goto out; if (ether_addr_equal(node->macaddress_B, addr)) @@ -597,7 +605,8 @@ void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, hash = hsr_mac_hash(hsr, addr); if (!_pos) { - node = hsr_node_get_first(&hsr->node_db[hash]); + node = hsr_node_get_first(&hsr->node_db[hash], + &hsr->list_lock); if (node) ether_addr_copy(addr, node->macaddress_A); return node; diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index d7cce6b161e3..f3762e9e42b5 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -28,8 +28,14 @@ struct hsr_frame_info { bool is_from_san; }; +#ifdef CONFIG_LOCKDEP +int lockdep_hsr_is_held(spinlock_t *lock); +#else +#define lockdep_hsr_is_held(lock) 1 +#endif + u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr); -struct hsr_node *hsr_node_get_first(struct hlist_head *head); +struct hsr_node *hsr_node_get_first(struct hlist_head *head, spinlock_t *lock); void hsr_del_self_node(struct hsr_priv *hsr); void hsr_del_nodes(struct hlist_head *node_db); struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db, diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index ca556bda3467..b158ba409f9a 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -45,22 +45,6 @@ /* PRP V1 life redundancy box MAC address */ #define PRP_TLV_REDBOX_MAC 30 -/* HSR Tag. - * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB, - * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest, - * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr, - * encapsulated protocol } instead. - * - * Field names as defined in the IEC:2010 standard for HSR. - */ -struct hsr_tag { - __be16 path_and_LSDU_size; - __be16 sequence_nr; - __be16 encap_proto; -} __packed; - -#define HSR_HLEN 6 - #define HSR_V1_SUP_LSDUSIZE 52 #define HSR_HSIZE_SHIFT 8 diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index be6f06adefe0..a91283d1e5bf 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -130,6 +130,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, goto err; fq->q.stamp = skb->tstamp; + fq->q.mono_delivery_time = skb->mono_delivery_time; if (frag_type == LOWPAN_DISPATCH_FRAG1) fq->q.flags |= INET_FRAG_FIRST_IN; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9c465bac1eb0..72fde2888ad2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1376,8 +1376,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, } ops = rcu_dereference(inet_offloads[proto]); - if (likely(ops && ops->callbacks.gso_segment)) + if (likely(ops && ops->callbacks.gso_segment)) { segs = ops->callbacks.gso_segment(skb, features); + if (!segs) + skb->network_header = skb_mac_header(skb) + nhoff - skb->head; + } if (IS_ERR_OR_NULL(segs)) goto out; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4db0325f6e1a..2d0c05ca9c6f 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -293,7 +293,7 @@ static int arp_constructor(struct neighbour *neigh) static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb) { dst_link_failure(skb); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED); } /* Create and send an arp packet. */ @@ -1116,13 +1116,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) return err; } -static int arp_invalidate(struct net_device *dev, __be32 ip) +int arp_invalidate(struct net_device *dev, __be32 ip, bool force) { struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); int err = -ENXIO; struct neigh_table *tbl = &arp_tbl; if (neigh) { + if ((neigh->nud_state & NUD_VALID) && !force) { + neigh_release(neigh); + return 0; + } + if (neigh->nud_state & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| @@ -1169,7 +1174,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, if (!dev) return -EINVAL; } - return arp_invalidate(dev, ip); + return arp_invalidate(dev, ip, true); } /* diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index fba2bffd65f7..53a6b14dc50a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -104,6 +104,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_FLAGS] = { .type = NLA_U32 }, [IFA_RT_PRIORITY] = { .type = NLA_U32 }, [IFA_TARGET_NETNSID] = { .type = NLA_S32 }, + [IFA_PROTO] = { .type = NLA_U8 }, }; struct inet_fill_args { @@ -889,6 +890,9 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, if (tb[IFA_RT_PRIORITY]) ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]); + if (tb[IFA_PROTO]) + ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]); + if (tb[IFA_CACHEINFO]) { struct ifa_cacheinfo *ci; @@ -1625,6 +1629,7 @@ static size_t inet_nlmsg_size(void) + nla_total_size(4) /* IFA_BROADCAST */ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ + nla_total_size(4) /* IFA_FLAGS */ + + nla_total_size(1) /* IFA_PROTO */ + nla_total_size(4) /* IFA_RT_PRIORITY */ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ } @@ -1699,6 +1704,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || + (ifa->ifa_proto && + nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) || nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || (ifa->ifa_rt_priority && nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) || diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 851f542928a3..e1b1d080e908 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -671,7 +671,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e0730c4d07d6..7408051632ac 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1124,9 +1124,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) return; /* Add broadcast address, if it is explicitly assigned. */ - if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) + if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) { fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim, 0); + arp_invalidate(dev, ifa->ifa_broadcast, false); + } if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) { @@ -1140,6 +1142,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) if (ifa->ifa_prefixlen < 31) { fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, 32, prim, 0); + arp_invalidate(dev, prefix | ~mask, false); } } } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 341096807100..63948f6aeca0 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -572,6 +572,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, skb_mark_not_on_list(head); head->prev = NULL; head->tstamp = q->stamp; + head->mono_delivery_time = q->mono_delivery_time; } EXPORT_SYMBOL(inet_frag_reasm_finish); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 00ec819f949b..92ba3350274b 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -79,7 +79,7 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s if (unlikely(opt->optlen)) ip_forward_options(skb); - skb->tstamp = 0; + skb_clear_tstamp(skb); return dst_output(net, sk, skb); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fad803d2d711..fb153569889e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -349,6 +349,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) qp->iif = dev->ifindex; qp->q.stamp = skb->tstamp; + qp->q.mono_delivery_time = skb->mono_delivery_time; qp->q.meat += skb->len; qp->ecn |= ecn; add_frag_mem_limit(qp->q.fqdir, skb->truesize); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d94f9f7e60c3..95f7bb052784 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -226,6 +226,7 @@ resubmit: static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb_clear_delivery_time(skb); __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0c0574eb5f5b..00b4bf26fd93 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -233,7 +233,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s net_dbg_ratelimited("%s: No header cache and no neighbour!\n", __func__); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; } @@ -317,7 +317,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk case NET_XMIT_CN: return __ip_finish_output(net, sk, skb) ? : ret; default: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); return ret; } } @@ -337,7 +337,7 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk, case NET_XMIT_SUCCESS: break; default: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); return ret; } @@ -536,7 +536,7 @@ packet_routed: no_route: rcu_read_unlock(); IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_OUTNOROUTES); return -EHOSTUNREACH; } EXPORT_SYMBOL(__ip_queue_xmit); @@ -761,6 +761,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, { struct iphdr *iph; struct sk_buff *skb2; + bool mono_delivery_time = skb->mono_delivery_time; struct rtable *rt = skb_rtable(skb); unsigned int mtu, hlen, ll_rs; struct ip_fraglist_iter iter; @@ -852,7 +853,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, } } - skb->tstamp = tstamp; + skb_set_delivery_time(skb, tstamp, mono_delivery_time); err = output(net, sk, skb); if (!err) @@ -908,7 +909,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - skb2->tstamp = tstamp; + skb_set_delivery_time(skb2, tstamp, mono_delivery_time); err = output(net, sk, skb2); if (err) goto fail; @@ -991,7 +992,7 @@ static int __ip_append_data(struct sock *sk, if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) - tskey = sk->sk_tskey++; + tskey = atomic_inc_return(&sk->sk_tskey) - 1; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1727,6 +1728,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; + nskb->mono_delivery_time = !!transmit_time; ip_push_pending_frames(sk, &fl4); } out: diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3a5994b50571..3ee947557b88 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -187,7 +187,6 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) (int)ident, &ipv6_hdr(skb)->daddr, dif); #endif } else { - pr_err("ping: protocol(%x) is not supported\n", ntohs(skb->protocol)); return NULL; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 760e8221d321..33f20134e3f1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1683,11 +1683,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, if (!copied) copied = used; break; - } else if (used <= len) { - seq += used; - copied += used; - offset += used; } + if (WARN_ON_ONCE(used > len)) + used = len; + seq += used; + copied += used; + offset += used; + /* If recv_actor drops the lock (e.g. TCP splice * receive) the skb pointer might be invalid when * getting here: tcp_collapse might have deleted it @@ -4431,6 +4433,76 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *ke } EXPORT_SYMBOL(tcp_md5_hash_key); +/* Called with rcu_read_lock() */ +bool tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + enum skb_drop_reason *reason, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + /* + * This gets called for each TCP segment that arrives + * so we want to be efficient. + * We have 3 drop cases: + * o No MD5 hash and one expected. + * o MD5 hash and we're not expecting one. + * o MD5 hash and its wrong. + */ + const __u8 *hash_location = NULL; + struct tcp_md5sig_key *hash_expected; + const struct tcphdr *th = tcp_hdr(skb); + struct tcp_sock *tp = tcp_sk(sk); + int genhash, l3index; + u8 newhash[16]; + + /* sdif set, means packet ingressed via a device + * in an L3 domain and dif is set to the l3mdev + */ + l3index = sdif ? dif : 0; + + hash_expected = tcp_md5_do_lookup(sk, l3index, saddr, family); + hash_location = tcp_parse_md5sig_option(th); + + /* We've parsed the options - do we have a hash? */ + if (!hash_expected && !hash_location) + return false; + + if (hash_expected && !hash_location) { + *reason = SKB_DROP_REASON_TCP_MD5NOTFOUND; + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + return true; + } + + if (!hash_expected && hash_location) { + *reason = SKB_DROP_REASON_TCP_MD5UNEXPECTED; + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); + return true; + } + + /* check the signature */ + genhash = tp->af_specific->calc_md5_hash(newhash, hash_expected, + NULL, skb); + + if (genhash || memcmp(hash_location, newhash, 16) != 0) { + *reason = SKB_DROP_REASON_TCP_MD5FAILURE; + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); + if (family == AF_INET) { + net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n", + saddr, ntohs(th->source), + daddr, ntohs(th->dest), + genhash ? " tcp_v4_calc_md5_hash failed" + : "", l3index); + } else { + net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", + genhash ? "failed" : "mismatch", + saddr, ntohs(th->source), + daddr, ntohs(th->dest), l3index); + } + return true; + } + return false; +} +EXPORT_SYMBOL(tcp_inbound_md5_hash); + #endif void tcp_done(struct sock *sk) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 92e65d56dc2c..2088f93fa37b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4684,10 +4684,16 @@ static bool tcp_ooo_try_coalesce(struct sock *sk, return res; } -static void tcp_drop(struct sock *sk, struct sk_buff *skb) +static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason reason) { sk_drops_add(sk, skb); - __kfree_skb(skb); + kfree_skb_reason(skb, reason); +} + +static void tcp_drop(struct sock *sk, struct sk_buff *skb) +{ + tcp_drop_reason(sk, skb, SKB_DROP_REASON_NOT_SPECIFIED); } /* This one checks to see if we can put data from the @@ -4773,7 +4779,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); sk->sk_data_ready(sk); - tcp_drop(sk, skb); + tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM); return; } @@ -4836,7 +4842,8 @@ coalesce_done: /* All the bits are present. Drop. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - tcp_drop(sk, skb); + tcp_drop_reason(sk, skb, + SKB_DROP_REASON_TCP_OFOMERGE); skb = NULL; tcp_dsack_set(sk, seq, end_seq); goto add_sack; @@ -4855,7 +4862,8 @@ coalesce_done: TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - tcp_drop(sk, skb1); + tcp_drop_reason(sk, skb1, + SKB_DROP_REASON_TCP_OFOMERGE); goto merge_right; } } else if (tcp_ooo_try_coalesce(sk, skb1, @@ -4883,7 +4891,7 @@ merge_right: tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - tcp_drop(sk, skb1); + tcp_drop_reason(sk, skb1, SKB_DROP_REASON_TCP_OFOMERGE); } /* If there is no skb after us, we are the last_skb ! */ if (!skb1) @@ -4982,6 +4990,7 @@ void tcp_data_ready(struct sock *sk) static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + enum skb_drop_reason reason; bool fragstolen; int eaten; @@ -5000,6 +5009,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); + reason = SKB_DROP_REASON_NOT_SPECIFIED; tp->rx_opt.dsack = 0; /* Queue data for delivery to the user. @@ -5008,6 +5018,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) */ if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { if (tcp_receive_window(tp) == 0) { + reason = SKB_DROP_REASON_TCP_ZEROWINDOW; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP); goto out_of_window; } @@ -5017,6 +5028,7 @@ queue_and_out: if (skb_queue_len(&sk->sk_receive_queue) == 0) sk_forced_mem_schedule(sk, skb->truesize); else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) { + reason = SKB_DROP_REASON_PROTO_MEM; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); sk->sk_data_ready(sk); goto drop; @@ -5053,6 +5065,7 @@ queue_and_out: if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { tcp_rcv_spurious_retrans(sk, skb); /* A retransmit, 2nd most common case. Force an immediate ack. */ + reason = SKB_DROP_REASON_TCP_OLD_DATA; NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); @@ -5060,13 +5073,16 @@ out_of_window: tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); inet_csk_schedule_ack(sk); drop: - tcp_drop(sk, skb); + tcp_drop_reason(sk, skb, reason); return; } /* Out of window. F.e. zero window probe. */ - if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) + if (!before(TCP_SKB_CB(skb)->seq, + tp->rcv_nxt + tcp_receive_window(tp))) { + reason = SKB_DROP_REASON_TCP_OVERWINDOW; goto out_of_window; + } if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { /* Partial packet, seq < rcv_next < end_seq */ @@ -5076,6 +5092,7 @@ drop: * remembering D-SACK for its head made in previous line. */ if (!tcp_receive_window(tp)) { + reason = SKB_DROP_REASON_TCP_ZEROWINDOW; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP); goto out_of_window; } @@ -5781,6 +5798,7 @@ discard: */ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; const struct tcphdr *th = (const struct tcphdr *)skb->data; struct tcp_sock *tp = tcp_sk(sk); unsigned int len = skb->len; @@ -5869,6 +5887,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr; return; } else { /* Header too small */ + reason = SKB_DROP_REASON_PKT_TOO_SMALL; TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; } @@ -5924,8 +5943,10 @@ slow_path: if (len < (th->doff << 2) || tcp_checksum_complete(skb)) goto csum_error; - if (!th->ack && !th->rst && !th->syn) + if (!th->ack && !th->rst && !th->syn) { + reason = SKB_DROP_REASON_TCP_FLAGS; goto discard; + } /* * Standard slow path. @@ -5951,12 +5972,13 @@ step5: return; csum_error: + reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); discard: - tcp_drop(sk, skb); + tcp_drop_reason(sk, skb, reason); } EXPORT_SYMBOL(tcp_rcv_established); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6873f46fc8ba..411357ad9757 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1409,72 +1409,6 @@ EXPORT_SYMBOL(tcp_v4_md5_hash_skb); #endif -/* Called with rcu_read_lock() */ -static bool tcp_v4_inbound_md5_hash(const struct sock *sk, - const struct sk_buff *skb, - int dif, int sdif) -{ -#ifdef CONFIG_TCP_MD5SIG - /* - * This gets called for each TCP segment that arrives - * so we want to be efficient. - * We have 3 drop cases: - * o No MD5 hash and one expected. - * o MD5 hash and we're not expecting one. - * o MD5 hash and its wrong. - */ - const __u8 *hash_location = NULL; - struct tcp_md5sig_key *hash_expected; - const struct iphdr *iph = ip_hdr(skb); - const struct tcphdr *th = tcp_hdr(skb); - const union tcp_md5_addr *addr; - unsigned char newhash[16]; - int genhash, l3index; - - /* sdif set, means packet ingressed via a device - * in an L3 domain and dif is set to the l3mdev - */ - l3index = sdif ? dif : 0; - - addr = (union tcp_md5_addr *)&iph->saddr; - hash_expected = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); - hash_location = tcp_parse_md5sig_option(th); - - /* We've parsed the options - do we have a hash? */ - if (!hash_expected && !hash_location) - return false; - - if (hash_expected && !hash_location) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - return true; - } - - if (!hash_expected && hash_location) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); - return true; - } - - /* Okay, so this is hash_expected and hash_location - - * so we need to calculate the checksum. - */ - genhash = tcp_v4_md5_hash_skb(newhash, - hash_expected, - NULL, skb); - - if (genhash || memcmp(hash_location, newhash, 16) != 0) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); - net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n", - &iph->saddr, ntohs(th->source), - &iph->daddr, ntohs(th->dest), - genhash ? " tcp_v4_calc_md5_hash failed" - : "", l3index); - return true; - } - return false; -#endif - return false; -} - static void tcp_v4_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) @@ -1704,6 +1638,7 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, */ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { + enum skb_drop_reason reason; struct sock *rsk; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ @@ -1726,6 +1661,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } + reason = SKB_DROP_REASON_NOT_SPECIFIED; if (tcp_checksum_complete(skb)) goto csum_err; @@ -1753,7 +1689,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) reset: tcp_v4_send_reset(rsk, skb); discard: - kfree_skb(skb); + kfree_skb_reason(skb, reason); /* Be careful here. If this function gets more complicated and * gcc suffers from register pressure on the x86, sk (in %ebx) * might be destroyed here. This current version compiles correctly, @@ -1762,6 +1698,7 @@ discard: return 0; csum_err: + reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); @@ -1807,7 +1744,8 @@ int tcp_v4_early_demux(struct sk_buff *skb) return 0; } -bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) +bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason) { u32 limit, tail_gso_size, tail_gso_segs; struct skb_shared_info *shinfo; @@ -1833,6 +1771,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) if (unlikely(tcp_checksum_complete(skb))) { bh_unlock_sock(sk); trace_tcp_bad_csum(skb); + *reason = SKB_DROP_REASON_TCP_CSUM; __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); return true; @@ -1921,6 +1860,7 @@ no_coalesce: if (unlikely(sk_add_backlog(sk, skb, limit))) { bh_unlock_sock(sk); + *reason = SKB_DROP_REASON_SOCKET_BACKLOG; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); return true; } @@ -1971,13 +1911,13 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, int tcp_v4_rcv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); + enum skb_drop_reason drop_reason; int sdif = inet_sdif(skb); int dif = inet_iif(skb); const struct iphdr *iph; const struct tcphdr *th; bool refcounted; struct sock *sk; - int drop_reason; int ret; drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; @@ -2025,7 +1965,9 @@ process: struct sock *nsk; sk = req->rsk_listener; - if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) { + if (unlikely(tcp_inbound_md5_hash(sk, skb, &drop_reason, + &iph->saddr, &iph->daddr, + AF_INET, dif, sdif))) { sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; @@ -2057,6 +1999,8 @@ process: iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); nsk = tcp_check_req(sk, skb, req, false, &req_stolen); + } else { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } if (!nsk) { reqsk_put(req); @@ -2092,10 +2036,13 @@ process: } } - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { + drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto discard_and_relse; + } - if (tcp_v4_inbound_md5_hash(sk, skb, dif, sdif)) + if (tcp_inbound_md5_hash(sk, skb, &drop_reason, &iph->saddr, + &iph->daddr, AF_INET, dif, sdif)) goto discard_and_relse; nf_reset_ct(skb); @@ -2124,7 +2071,7 @@ process: if (!sock_owned_by_user(sk)) { ret = tcp_v4_do_rcv(sk, skb); } else { - if (tcp_add_backlog(sk, skb)) + if (tcp_add_backlog(sk, skb, &drop_reason)) goto discard_and_relse; } bh_unlock_sock(sk); @@ -2166,6 +2113,7 @@ discard_and_relse: do_time_wait: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + drop_reason = SKB_DROP_REASON_XFRM_POLICY; inet_twsk_put(inet_twsk(sk)); goto discard_it; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e76bf1e9251e..2319531267c6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1253,7 +1253,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tp = tcp_sk(sk); prior_wstamp = tp->tcp_wstamp_ns; tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); - skb->skb_mstamp_ns = tp->tcp_wstamp_ns; + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); if (clone_it) { oskb = skb; @@ -1589,7 +1589,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, skb_split(skb, buff, len); - buff->tstamp = skb->tstamp; + skb_set_delivery_time(buff, skb->tstamp, true); tcp_fragment_tstamp(skb, buff); old_factor = tcp_skb_pcount(skb); @@ -2616,7 +2616,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp_ns" is used as a start point for the retransmit timer */ - skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache; + tp->tcp_wstamp_ns = tp->tcp_clock_cache; + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); goto repair; /* Skip network transmission */ @@ -3541,11 +3542,12 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, now = tcp_clock_ns(); #ifdef CONFIG_SYN_COOKIES if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) - skb->skb_mstamp_ns = cookie_init_timestamp(req, now); + skb_set_delivery_time(skb, cookie_init_timestamp(req, now), + true); else #endif { - skb->skb_mstamp_ns = now; + skb_set_delivery_time(skb, now, true); if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } @@ -3594,7 +3596,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb, synack_type, &opts); - skb->skb_mstamp_ns = now; + skb_set_delivery_time(skb, now, true); tcp_add_tx_delay(skb, tp); return skb; @@ -3771,7 +3773,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); - syn->skb_mstamp_ns = syn_data->skb_mstamp_ns; + skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true); /* Now full SYN+DATA was cloned and sent (or not), * remove the SYN from the original skb (syn_data) diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index b91003538d87..bc3a043a5d5c 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -846,7 +846,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) list_for_each_entry(node, &info->shared->devices, list) if (node->dev == dev) break; - if (node->dev != dev) + if (list_entry_is_head(node, &info->shared->devices, list)) return; list_del(&node->list); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2f307da17f21..b22504176588 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1115,6 +1115,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, ifa->prefix_len = cfg->plen; ifa->rt_priority = cfg->rt_priority; ifa->flags = cfg->ifa_flags; + ifa->ifa_proto = cfg->ifa_proto; /* No need to add the TENTATIVE flag for addresses with NODAD */ if (!(cfg->ifa_flags & IFA_F_NODAD)) ifa->flags |= IFA_F_TENTATIVE; @@ -2593,6 +2594,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, .valid_lft = valid_lft, .preferred_lft = prefered_lft, .scope = addr_type & IPV6_ADDR_SCOPE_MASK, + .ifa_proto = IFAPROT_KERNEL_RA }; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -3077,7 +3079,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg) } static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, - int plen, int scope) + int plen, int scope, u8 proto) { struct inet6_ifaddr *ifp; struct ifa6_config cfg = { @@ -3086,7 +3088,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, .ifa_flags = IFA_F_PERMANENT, .valid_lft = INFINITY_LIFE_TIME, .preferred_lft = INFINITY_LIFE_TIME, - .scope = scope + .scope = scope, + .ifa_proto = proto }; ifp = ipv6_add_addr(idev, &cfg, true, NULL); @@ -3131,7 +3134,7 @@ static void add_v4_addrs(struct inet6_dev *idev) } if (addr.s6_addr32[3]) { - add_addr(idev, &addr, plen, scope); + add_addr(idev, &addr, plen, scope, IFAPROT_UNSPEC); addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags, GFP_KERNEL); return; @@ -3154,7 +3157,8 @@ static void add_v4_addrs(struct inet6_dev *idev) flag |= IFA_HOST; } - add_addr(idev, &addr, plen, flag); + add_addr(idev, &addr, plen, flag, + IFAPROT_UNSPEC); addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags, GFP_KERNEL); } @@ -3177,7 +3181,7 @@ static void init_loopback(struct net_device *dev) return; } - add_addr(idev, &in6addr_loopback, 128, IFA_HOST); + add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFAPROT_KERNEL_LO); } void addrconf_add_linklocal(struct inet6_dev *idev, @@ -3189,7 +3193,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev, .ifa_flags = flags | IFA_F_PERMANENT, .valid_lft = INFINITY_LIFE_TIME, .preferred_lft = INFINITY_LIFE_TIME, - .scope = IFA_LINK + .scope = IFA_LINK, + .ifa_proto = IFAPROT_KERNEL_LL }; struct inet6_ifaddr *ifp; @@ -3725,6 +3730,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) struct inet6_dev *idev; struct inet6_ifaddr *ifa, *tmp; bool keep_addr = false; + bool was_ready; int state, i; ASSERT_RTNL(); @@ -3790,7 +3796,10 @@ restart: addrconf_del_rs_timer(idev); - /* Step 2: clear flags for stateless addrconf */ + /* Step 2: clear flags for stateless addrconf, repeated down + * detection + */ + was_ready = idev->if_flags & IF_READY; if (!unregister) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); @@ -3864,7 +3873,7 @@ restart: if (unregister) { ipv6_ac_destroy_dev(idev); ipv6_mc_destroy_dev(idev); - } else { + } else if (was_ready) { ipv6_mc_down(idev); } @@ -4627,6 +4636,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { [IFA_FLAGS] = { .len = sizeof(u32) }, [IFA_RT_PRIORITY] = { .len = sizeof(u32) }, [IFA_TARGET_NETNSID] = { .type = NLA_S32 }, + [IFA_PROTO] = { .type = NLA_U8 }, }; static int @@ -4752,6 +4762,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, ifp->tstamp = jiffies; ifp->valid_lft = cfg->valid_lft; ifp->prefered_lft = cfg->preferred_lft; + ifp->ifa_proto = cfg->ifa_proto; if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority) ifp->rt_priority = cfg->rt_priority; @@ -4845,6 +4856,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[IFA_RT_PRIORITY]) cfg.rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]); + if (tb[IFA_PROTO]) + cfg.ifa_proto = nla_get_u8(tb[IFA_PROTO]); + cfg.valid_lft = INFINITY_LIFE_TIME; cfg.preferred_lft = INFINITY_LIFE_TIME; @@ -4948,6 +4962,7 @@ static inline int inet6_ifaddr_msgsize(void) + nla_total_size(16) /* IFA_ADDRESS */ + nla_total_size(sizeof(struct ifa_cacheinfo)) + nla_total_size(4) /* IFA_FLAGS */ + + nla_total_size(1) /* IFA_PROTO */ + nla_total_size(4) /* IFA_RT_PRIORITY */; } @@ -4985,6 +5000,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto error; + spin_lock_bh(&ifa->lock); if (!((ifa->flags&IFA_F_PERMANENT) && (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; @@ -5006,6 +5022,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, preferred = INFINITY_LIFE_TIME; valid = INFINITY_LIFE_TIME; } + spin_unlock_bh(&ifa->lock); if (!ipv6_addr_any(&ifa->peer_addr)) { if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || @@ -5025,6 +5042,10 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0) goto error; + if (ifa->ifa_proto && + nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) + goto error; + nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 8bb2c407b46b..7591160edce1 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -707,7 +707,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index e159eb4328a8..1098131ed90c 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -635,7 +635,8 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_schema *sc, u8 sclen, bool is_input) { - struct __kernel_sock_timeval ts; + struct timespec64 ts; + ktime_t tstamp; u64 raw64; u32 raw32; u16 raw16; @@ -680,10 +681,9 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { - if (!skb->tstamp) - __net_timestamp(skb); + tstamp = skb_tstamp_cond(skb, true); + ts = ktime_to_timespec64(tstamp); - skb_get_new_timestamp(skb, &ts); *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); } data += sizeof(__be32); @@ -694,13 +694,12 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { - if (!skb->tstamp) - __net_timestamp(skb); + if (!trace->type.bit2) { + tstamp = skb_tstamp_cond(skb, true); + ts = ktime_to_timespec64(tstamp); + } - if (!trace->type.bit2) - skb_get_new_timestamp(skb, &ts); - - *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec); + *(__be32 *)data = cpu_to_be32((u32)(ts.tv_nsec / NSEC_PER_USEC)); } data += sizeof(__be32); } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index d4b1e2c5aa76..5b5ea35635f9 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -459,6 +459,7 @@ discard: static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb_clear_delivery_time(skb); rcu_read_lock(); ip6_protocol_deliver_rcu(net, skb, 0, false); rcu_read_unlock(); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index d37a79a8554e..c4fc03c1ac99 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -114,6 +114,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); segs = ops->callbacks.gso_segment(skb, features); + if (!segs) + skb->network_header = skb_mac_header(skb) + nhoff - skb->head; } if (IS_ERR_OR_NULL(segs)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3286b64ec03d..e69fac576970 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -130,7 +130,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * rcu_read_unlock_bh(); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; } @@ -202,7 +202,7 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s case NET_XMIT_CN: return __ip6_finish_output(net, sk, skb) ? : ret; default: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); return ret; } } @@ -217,7 +217,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (unlikely(idev->cnf.disable_ipv6)) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; } @@ -440,7 +440,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk, } #endif - skb->tstamp = 0; + skb_clear_tstamp(skb); return dst_output(net, sk, skb); } @@ -813,6 +813,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; + bool mono_delivery_time = skb->mono_delivery_time; struct ip6_frag_state state; unsigned int mtu, hlen, nexthdr_offset; ktime_t tstamp = skb->tstamp; @@ -903,7 +904,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb->tstamp = tstamp; + skb_set_delivery_time(skb, tstamp, mono_delivery_time); err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), @@ -962,7 +963,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - frag->tstamp = tstamp; + skb_set_delivery_time(frag, tstamp, mono_delivery_time); err = output(net, sk, frag); if (err) goto fail; @@ -1406,8 +1407,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (np->frag_size) mtu = np->frag_size; } - if (mtu < IPV6_MIN_MTU) - return -EINVAL; cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; @@ -1464,14 +1463,12 @@ static int __ip6_append_data(struct sock *sk, if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) - tskey = sk->sk_tskey++; + tskey = atomic_inc_return(&sk->sk_tskey) - 1; hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - - sizeof(struct frag_hdr); headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + @@ -1479,6 +1476,13 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; + if (mtu < fragheaderlen || + ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr)) + goto emsgsize; + + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); + /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit * the first fragment */ diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0ebaaec3faf9..a9775c830194 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1040,7 +1040,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, int ret; #ifdef CONFIG_IPV6_PIMSM_V2 - if (assert == MRT6MSG_WHOLEPKT) + if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) +sizeof(*msg)); else @@ -1056,7 +1056,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, skb->ip_summed = CHECKSUM_UNNECESSARY; #ifdef CONFIG_IPV6_PIMSM_V2 - if (assert == MRT6MSG_WHOLEPKT) { + if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) { /* Ugly, but we have no choice with this interface. Duplicate old header, fix length etc. And all this only to mangle msg->im6_msgtype and @@ -1068,8 +1068,11 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, skb_reset_transport_header(skb); msg = (struct mrt6msg *)skb_transport_header(skb); msg->im6_mbz = 0; - msg->im6_msgtype = MRT6MSG_WHOLEPKT; - msg->im6_mif = mrt->mroute_reg_vif_num; + msg->im6_msgtype = assert; + if (assert == MRT6MSG_WRMIFWHOLE) + msg->im6_mif = mifi; + else + msg->im6_mif = mrt->mroute_reg_vif_num; msg->im6_pad = 0; msg->im6_src = ipv6_hdr(pkt)->saddr; msg->im6_dst = ipv6_hdr(pkt)->daddr; @@ -1650,6 +1653,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, mifi_t mifi; struct net *net = sock_net(sk); struct mr_table *mrt; + bool do_wrmifwhole; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) @@ -1763,12 +1767,15 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, return -EINVAL; if (copy_from_sockptr(&v, optval, sizeof(v))) return -EFAULT; + + do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE); v = !!v; rtnl_lock(); ret = 0; if (v != mrt->mroute_do_pim) { mrt->mroute_do_pim = v; mrt->mroute_do_assert = v; + mrt->mroute_do_wrvifwhole = do_wrmifwhole; } rtnl_unlock(); return ret; @@ -2144,6 +2151,9 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, MFC_ASSERT_THRESH)) { c->_c.mfc_un.res.last_assert = jiffies; ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); + if (mrt->mroute_do_wrvifwhole) + ip6mr_cache_report(mrt, skb, true_vifi, + MRT6MSG_WRMIFWHOLE); } goto dont_forward; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a8861db52c18..909f937befd7 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1371,27 +1371,23 @@ static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld, } /* called with rcu_read_lock() */ -int igmp6_event_query(struct sk_buff *skb) +void igmp6_event_query(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); - if (!idev) - return -EINVAL; - - if (idev->dead) { - kfree_skb(skb); - return -ENODEV; - } + if (!idev || idev->dead) + goto out; spin_lock_bh(&idev->mc_query_lock); if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) { __skb_queue_tail(&idev->mc_query_queue, skb); if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0)) in6_dev_hold(idev); + skb = NULL; } spin_unlock_bh(&idev->mc_query_lock); - - return 0; +out: + kfree_skb(skb); } static void __mld_query_work(struct sk_buff *skb) @@ -1542,27 +1538,23 @@ static void mld_query_work(struct work_struct *work) } /* called with rcu_read_lock() */ -int igmp6_event_report(struct sk_buff *skb) +void igmp6_event_report(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); - if (!idev) - return -EINVAL; - - if (idev->dead) { - kfree_skb(skb); - return -ENODEV; - } + if (!idev || idev->dead) + goto out; spin_lock_bh(&idev->mc_report_lock); if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) { __skb_queue_tail(&idev->mc_report_queue, skb); if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0)) in6_dev_hold(idev); + skb = NULL; } spin_unlock_bh(&idev->mc_report_lock); - - return 0; +out: + kfree_skb(skb); } static void __mld_report_work(struct sk_buff *skb) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 1c06d0cd02f7..fcb288b0ae13 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -466,9 +466,8 @@ static void ip6_nd_hdr(struct sk_buff *skb, hdr->daddr = *daddr; } -static void ndisc_send_skb(struct sk_buff *skb, - const struct in6_addr *daddr, - const struct in6_addr *saddr) +void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct dst_entry *dst = skb_dst(skb); struct net *net = dev_net(skb->dev); @@ -515,6 +514,7 @@ static void ndisc_send_skb(struct sk_buff *skb, rcu_read_unlock(); } +EXPORT_SYMBOL(ndisc_send_skb); void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, @@ -598,22 +598,16 @@ static void ndisc_send_unsol_na(struct net_device *dev) in6_dev_put(idev); } -void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr, - u64 nonce) +struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit, + const struct in6_addr *saddr, u64 nonce) { - struct sk_buff *skb; - struct in6_addr addr_buf; int inc_opt = dev->addr_len; - int optlen = 0; + struct sk_buff *skb; struct nd_msg *msg; + int optlen = 0; - if (!saddr) { - if (ipv6_get_lladdr(dev, &addr_buf, - (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))) - return; - saddr = &addr_buf; - } + if (!saddr) + return NULL; if (ipv6_addr_any(saddr)) inc_opt = false; @@ -625,7 +619,7 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) - return; + return NULL; msg = skb_put(skb, sizeof(*msg)); *msg = (struct nd_msg) { @@ -647,7 +641,28 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, memcpy(opt + 2, &nonce, 6); } - ndisc_send_skb(skb, daddr, saddr); + return skb; +} +EXPORT_SYMBOL(ndisc_ns_create); + +void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, + const struct in6_addr *daddr, const struct in6_addr *saddr, + u64 nonce) +{ + struct in6_addr addr_buf; + struct sk_buff *skb; + + if (!saddr) { + if (ipv6_get_lladdr(dev, &addr_buf, + (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC))) + return; + saddr = &addr_buf; + } + + skb = ndisc_ns_create(dev, solicit, saddr, nonce); + + if (skb) + ndisc_send_skb(skb, daddr, saddr); } void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 6ab710b5a1a8..1da332450d98 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -121,6 +121,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; + bool mono_delivery_time = skb->mono_delivery_time; ktime_t tstamp = skb->tstamp; struct ip6_frag_state state; u8 *prevhdr, nexthdr = 0; @@ -186,7 +187,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb->tstamp = tstamp; + skb_set_delivery_time(skb, tstamp, mono_delivery_time); err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -219,7 +220,7 @@ slow_path: goto blackhole; } - skb2->tstamp = tstamp; + skb_set_delivery_time(skb2, tstamp, mono_delivery_time); err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 5c47be29b9ee..7dd3629dd19e 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -264,6 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; + fq->q.mono_delivery_time = skb->mono_delivery_time; fq->q.meat += skb->len; fq->ecn |= ecn; if (payload_len > fq->q.max_size) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 28e44782c94d..ff866f2a879e 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -194,6 +194,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; + fq->q.mono_delivery_time = skb->mono_delivery_time; fq->q.meat += skb->len; fq->ecn |= ecn; add_frag_mem_limit(fq->q.fqdir, skb->truesize); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0c648bf07f39..cb2bb7d2e907 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -773,57 +773,6 @@ clear_hash_noput: #endif -static bool tcp_v6_inbound_md5_hash(const struct sock *sk, - const struct sk_buff *skb, - int dif, int sdif) -{ -#ifdef CONFIG_TCP_MD5SIG - const __u8 *hash_location = NULL; - struct tcp_md5sig_key *hash_expected; - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - const struct tcphdr *th = tcp_hdr(skb); - int genhash, l3index; - u8 newhash[16]; - - /* sdif set, means packet ingressed via a device - * in an L3 domain and dif is set to the l3mdev - */ - l3index = sdif ? dif : 0; - - hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index); - hash_location = tcp_parse_md5sig_option(th); - - /* We've parsed the options - do we have a hash? */ - if (!hash_expected && !hash_location) - return false; - - if (hash_expected && !hash_location) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - return true; - } - - if (!hash_expected && hash_location) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); - return true; - } - - /* check the signature */ - genhash = tcp_v6_md5_hash_skb(newhash, - hash_expected, - NULL, skb); - - if (genhash || memcmp(hash_location, newhash, 16) != 0) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); - net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", - genhash ? "failed" : "mismatch", - &ip6h->saddr, ntohs(th->source), - &ip6h->daddr, ntohs(th->dest), l3index); - return true; - } -#endif - return false; -} - static void tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) @@ -921,12 +870,11 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 } #endif - buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, - GFP_ATOMIC); + buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (!buff) return; - skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); + skb_reserve(buff, MAX_TCP_HEADER); t1 = skb_push(buff, tot_len); skb_reset_transport_header(buff); @@ -992,7 +940,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 } else { mark = sk->sk_mark; } - buff->tstamp = tcp_transmit_time(sk); + skb_set_delivery_time(buff, tcp_transmit_time(sk), true); } fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; fl6.fl6_dport = t1->dest; @@ -1472,6 +1420,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct sk_buff *opt_skb = NULL; + enum skb_drop_reason reason; struct tcp_sock *tp; /* Imagine: socket is IPv6. IPv4 packet arrives, @@ -1506,6 +1455,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (np->rxopt.all) opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); + reason = SKB_DROP_REASON_NOT_SPECIFIED; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst; @@ -1559,9 +1509,10 @@ reset: discard: if (opt_skb) __kfree_skb(opt_skb); - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; csum_err: + reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); @@ -1627,6 +1578,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { + enum skb_drop_reason drop_reason; int sdif = inet6_sdif(skb); int dif = inet6_iif(skb); const struct tcphdr *th; @@ -1636,6 +1588,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) int ret; struct net *net = dev_net(skb->dev); + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -1649,8 +1602,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; - if (unlikely(th->doff < sizeof(struct tcphdr)/4)) + if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { + drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; goto bad_packet; + } if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; @@ -1677,7 +1632,8 @@ process: struct sock *nsk; sk = req->rsk_listener; - if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) { + if (tcp_inbound_md5_hash(sk, skb, &drop_reason, &hdr->saddr, + &hdr->daddr, AF_INET6, dif, sdif)) { sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; @@ -1706,6 +1662,8 @@ process: hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); nsk = tcp_check_req(sk, skb, req, false, &req_stolen); + } else { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } if (!nsk) { reqsk_put(req); @@ -1741,14 +1699,19 @@ process: } } - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { + drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto discard_and_relse; + } - if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) + if (tcp_inbound_md5_hash(sk, skb, &drop_reason, &hdr->saddr, + &hdr->daddr, AF_INET6, dif, sdif)) goto discard_and_relse; - if (tcp_filter(sk, skb)) + if (tcp_filter(sk, skb)) { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto discard_and_relse; + } th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); @@ -1769,7 +1732,7 @@ process: if (!sock_owned_by_user(sk)) { ret = tcp_v6_do_rcv(sk, skb); } else { - if (tcp_add_backlog(sk, skb)) + if (tcp_add_backlog(sk, skb, &drop_reason)) goto discard_and_relse; } bh_unlock_sock(sk); @@ -1779,6 +1742,7 @@ put_and_return: return ret ? -1 : 0; no_tcp_socket: + drop_reason = SKB_DROP_REASON_NO_SOCKET; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; @@ -1786,6 +1750,7 @@ no_tcp_socket: if (tcp_checksum_complete(skb)) { csum_error: + drop_reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: @@ -1795,7 +1760,7 @@ bad_packet: } discard_it: - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; discard_and_relse: @@ -1806,6 +1771,7 @@ discard_and_relse: do_time_wait: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + drop_reason = SKB_DROP_REASON_XFRM_POLICY; inet_twsk_put(inet_twsk(sk)); goto discard_it; } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 8f4d49a7d3e8..eb0295d90039 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -319,7 +319,7 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm) */ static int __iucv_query_maxconn(void *param, unsigned long *max_pathid) { - unsigned long reg1 = (unsigned long)param; + unsigned long reg1 = virt_to_phys(param); int cc; asm volatile ( diff --git a/net/key/af_key.c b/net/key/af_key.c index de24a7d474df..9bf52a09b5ff 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2623,7 +2623,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, - kma ? &k : NULL, net, NULL); + kma ? &k : NULL, net, NULL, 0); out: return err; diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 74a878f213d3..1deb3d874a4b 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation */ #include <linux/ieee80211.h> @@ -626,6 +626,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; } + if (test_sta_flag(sta, WLAN_STA_MFP) && + !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { + ht_dbg(sdata, + "MFP STA not authorized - deny BA session request %pM tid %d\n", + sta->sta.addr, tid); + return -EINVAL; + } + /* * 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a * member of an IBSS, and has no other existing Block Ack agreement diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index da35791b8378..95aaf00c876c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -376,7 +376,7 @@ struct ieee80211_mgd_auth_data { u8 key[WLAN_KEY_LEN_WEP104]; u8 key_len, key_idx; - bool done; + bool done, waiting; bool peer_confirmed; bool timeout_started; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 20b57ddf149c..950be0f43b53 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -37,6 +37,7 @@ #define IEEE80211_AUTH_TIMEOUT_SAE (HZ * 2) #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) +#define IEEE80211_AUTH_WAIT_SAE_RETRY (HZ * 2) #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) #define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2) #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) @@ -3016,8 +3017,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, (status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED || (auth_transaction == 1 && (status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT || - status_code == WLAN_STATUS_SAE_PK)))) + status_code == WLAN_STATUS_SAE_PK)))) { + /* waiting for userspace now */ + ifmgd->auth_data->waiting = true; + ifmgd->auth_data->timeout = + jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY; + ifmgd->auth_data->timeout_started = true; + run_again(sdata, ifmgd->auth_data->timeout); goto notify_driver; + } sdata_info(sdata, "%pM denied authentication (status %d)\n", mgmt->sa, status_code); @@ -4608,10 +4616,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (ifmgd->auth_data && ifmgd->auth_data->timeout_started && time_after(jiffies, ifmgd->auth_data->timeout)) { - if (ifmgd->auth_data->done) { + if (ifmgd->auth_data->done || ifmgd->auth_data->waiting) { /* - * ok ... we waited for assoc but userspace didn't, - * so let's just kill the auth data + * ok ... we waited for assoc or continuation but + * userspace didn't do it, so kill the auth data */ ieee80211_destroy_auth_data(sdata, false); } else if (ieee80211_auth(sdata)) { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d54a4d98c648..beb6b92eb780 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2607,7 +2607,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, * address, so that the authenticator (e.g. hostapd) will see * the frame, but bridge won't forward it anywhere else. Note * that due to earlier filtering, the only other address can - * be the PAE group address. + * be the PAE group address, unless the hardware allowed them + * through in 802.3 offloaded mode. */ if (unlikely(skb->protocol == sdata->control_port_protocol && !ether_addr_equal(ehdr->h_dest, sdata->vif.addr))) @@ -2922,13 +2923,13 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) ether_addr_equal(sdata->vif.addr, hdr->addr3)) return RX_CONTINUE; - ac = ieee80211_select_queue_80211(sdata, skb, hdr); + ac = ieee802_1d_to_ac[skb->priority]; q = sdata->vif.hw_queue[ac]; if (ieee80211_queue_stopped(&local->hw, q)) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion); return RX_DROP_MONITOR; } - skb_set_queue_mapping(skb, q); + skb_set_queue_mapping(skb, ac); if (!--mesh_hdr->ttl) { if (!is_multicast_ether_addr(hdr->addr1)) @@ -4514,12 +4515,7 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, /* deliver to local stack */ skb->protocol = eth_type_trans(skb, fast_rx->dev); - memset(skb->cb, 0, sizeof(skb->cb)); - if (rx->list) - list_add_tail(&skb->list, rx->list); - else - netif_receive_skb(skb); - + ieee80211_deliver_skb_to_local_stack(skb, rx); } static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, diff --git a/net/mctp/device.c b/net/mctp/device.c index 02ddc0f1bd3e..f49be882e98e 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -25,12 +25,25 @@ struct mctp_dump_cb { size_t a_idx; }; -/* unlocked: caller must hold rcu_read_lock */ +/* unlocked: caller must hold rcu_read_lock. + * Returned mctp_dev has its refcount incremented, or NULL if unset. + */ struct mctp_dev *__mctp_dev_get(const struct net_device *dev) { - return rcu_dereference(dev->mctp_ptr); + struct mctp_dev *mdev = rcu_dereference(dev->mctp_ptr); + + /* RCU guarantees that any mdev is still live. + * Zero refcount implies a pending free, return NULL. + */ + if (mdev) + if (!refcount_inc_not_zero(&mdev->refs)) + return NULL; + return mdev; } +/* Returned mctp_dev does not have refcount incremented. The returned pointer + * remains live while rtnl_lock is held, as that prevents mctp_unregister() + */ struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev) { return rtnl_dereference(dev->mctp_ptr); @@ -107,7 +120,7 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb) struct ifaddrmsg *hdr; struct mctp_dev *mdev; int ifindex; - int idx, rc; + int idx = 0, rc; hdr = nlmsg_data(cb->nlh); // filter by ifindex if requested @@ -124,6 +137,7 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb) if (mdev) { rc = mctp_dump_dev_addrinfo(mdev, skb, cb); + mctp_dev_put(mdev); // Error indicates full buffer, this // callback will get retried. if (rc < 0) @@ -209,7 +223,7 @@ static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (!mdev) return -ENODEV; - if (!mctp_address_ok(addr->s_addr)) + if (!mctp_address_unicast(addr->s_addr)) return -EINVAL; /* Prevent duplicates. Under RTNL so don't need to lock for reading */ @@ -298,7 +312,7 @@ void mctp_dev_hold(struct mctp_dev *mdev) void mctp_dev_put(struct mctp_dev *mdev) { - if (refcount_dec_and_test(&mdev->refs)) { + if (mdev && refcount_dec_and_test(&mdev->refs)) { dev_put(mdev->dev); kfree_rcu(mdev, rcu); } @@ -370,6 +384,7 @@ static size_t mctp_get_link_af_size(const struct net_device *dev, if (!mdev) return 0; ret = nla_total_size(4); /* IFLA_MCTP_NET */ + mctp_dev_put(mdev); return ret; } @@ -413,10 +428,10 @@ static void mctp_unregister(struct net_device *dev) struct mctp_dev *mdev; mdev = mctp_dev_get_rtnl(dev); - if (mctp_known(dev) != (bool)mdev) { + if (mdev && !mctp_known(dev)) { // Sanity check, should match what was set in mctp_register - netdev_warn(dev, "%s: mdev pointer %d but type (%d) match is %d", - __func__, (bool)mdev, mctp_known(dev), dev->type); + netdev_warn(dev, "%s: BUG mctp_ptr set for unknown type %d", + __func__, dev->type); return; } if (!mdev) @@ -440,7 +455,7 @@ static int mctp_register(struct net_device *dev) if (mdev) { if (!mctp_known(dev)) - netdev_warn(dev, "%s: mctp_dev set for unknown type %d", + netdev_warn(dev, "%s: BUG mctp_ptr set for unknown type %d", __func__, dev->type); return 0; } diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c index 6ad3e33bd4d4..ffa0f9e0983f 100644 --- a/net/mctp/neigh.c +++ b/net/mctp/neigh.c @@ -143,7 +143,7 @@ static int mctp_rtm_newneigh(struct sk_buff *skb, struct nlmsghdr *nlh, } eid = nla_get_u8(tb[NDA_DST]); - if (!mctp_address_ok(eid)) { + if (!mctp_address_unicast(eid)) { NL_SET_ERR_MSG(extack, "Invalid neighbour EID"); return -EINVAL; } diff --git a/net/mctp/route.c b/net/mctp/route.c index 0c4c56e1bd6e..d5e7db83fe9d 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -456,7 +456,6 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * the reassembly/response key */ if (!rc && flags & MCTP_HDR_FLAG_EOM) { - msk = container_of(key->sk, struct mctp_sock, sk); sock_queue_rcv_skb(key->sk, key->reasm_head); key->reasm_head = NULL; __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED); @@ -836,9 +835,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, { struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_skb_cb *cb = mctp_cb(skb); - struct mctp_route tmp_rt; + struct mctp_route tmp_rt = {0}; struct mctp_sk_key *key; - struct net_device *dev; struct mctp_hdr *hdr; unsigned long flags; unsigned int mtu; @@ -851,12 +849,12 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, if (rt) { ext_rt = false; - dev = NULL; - if (WARN_ON(!rt->dev)) goto out_release; } else if (cb->ifindex) { + struct net_device *dev; + ext_rt = true; rt = &tmp_rt; @@ -866,7 +864,6 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rcu_read_unlock(); return rc; } - rt->dev = __mctp_dev_get(dev); rcu_read_unlock(); @@ -947,10 +944,9 @@ out_release: if (!ext_rt) mctp_route_release(rt); - dev_put(dev); + mctp_dev_put(tmp_rt.dev); return rc; - } /* route management */ @@ -962,7 +958,7 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, struct net *net = dev_net(mdev->dev); struct mctp_route *rt, *ert; - if (!mctp_address_ok(daddr_start)) + if (!mctp_address_unicast(daddr_start)) return -EINVAL; if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) @@ -1092,6 +1088,17 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) goto err_drop; + /* source must be valid unicast or null; drop reserved ranges and + * broadcast + */ + if (!(mctp_address_unicast(mh->src) || mctp_address_null(mh->src))) + goto err_drop; + + /* dest address: as above, but allow broadcast */ + if (!(mctp_address_unicast(mh->dest) || mctp_address_null(mh->dest) || + mctp_address_broadcast(mh->dest))) + goto err_drop; + /* MCTP drivers must populate halen/haddr */ if (dev->type == ARPHRD_MCTP) { cb = mctp_cb(skb); @@ -1113,11 +1120,13 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, rt->output(rt, skb); mctp_route_release(rt); + mctp_dev_put(mdev); return NET_RX_SUCCESS; err_drop: kfree_skb(skb); + mctp_dev_put(mdev); return NET_RX_DROP; } diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index 7b7918702592..e03ba66bbe18 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -54,7 +54,6 @@ struct mctp_test_dev *mctp_test_create_dev(void) rcu_read_lock(); dev->mdev = __mctp_dev_get(ndev); - mctp_dev_hold(dev->mdev); rcu_read_unlock(); return dev; diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 3240b72271a7..7558802a1435 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -35,12 +35,14 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR), SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD), + SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP), SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX), SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX), SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX), SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX), SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), + SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index ecd3d8b117e0..2966fcb6548b 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -28,12 +28,14 @@ enum linux_mptcp_mib_field { MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */ MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */ MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */ + MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */ MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */ MPTCP_MIB_JOINPORTSYNACKRX, /* Received a SYNACK MP_JOIN with a different port-number */ MPTCP_MIB_JOINPORTACKRX, /* Received an ACK MP_JOIN with a different port-number */ MPTCP_MIB_MISMATCHPORTSYNRX, /* Received a SYN MP_JOIN with a mismatched port-number */ MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */ MPTCP_MIB_RMADDR, /* Received RM_ADDR */ + MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */ MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 8755b81896de..01809eef29b4 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -213,6 +213,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, mptcp_pm_add_addr_send_ack(msk); } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { pm->remote = *addr; + } else { + __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); } spin_unlock_bh(&pm->lock); @@ -253,8 +255,10 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, mptcp_event_addr_removed(msk, rm_list->ids[i]); spin_lock_bh(&pm->lock); - mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED); - pm->rm_list_rx = *rm_list; + if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED)) + pm->rm_list_rx = *rm_list; + else + __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP); spin_unlock_bh(&pm->lock); } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 9aeee30e50ba..75a0a27547e6 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -546,6 +546,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.add_addr_signaled < add_addr_signal_max) { local = select_signal_address(pernet, msk); + /* due to racing events on both ends we can reach here while + * previous add address is still running: if we invoke now + * mptcp_pm_announce_addr(), that will fail and the + * corresponding id will be marked as used. + * Instead let the PM machinery reschedule us when the + * current address announce will be completed. + */ + if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) + return; + if (local) { if (mptcp_pm_alloc_anno_list(msk, local)) { __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); @@ -650,6 +660,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) unsigned int add_addr_accept_max; struct mptcp_addr_info remote; unsigned int subflows_max; + bool reset_port = false; int i, nr; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); @@ -659,15 +670,19 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) msk->pm.add_addr_accepted, add_addr_accept_max, msk->pm.remote.family); - if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote)) + remote = msk->pm.remote; + if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) goto add_addr_echo; + /* pick id 0 port, if none is provided the remote address */ + if (!remote.port) { + reset_port = true; + remote.port = sk->sk_dport; + } + /* connect to the specified remote address, using whatever * local address the routing configuration will pick. */ - remote = msk->pm.remote; - if (!remote.port) - remote.port = sk->sk_dport; nr = fill_local_addresses_vec(msk, addrs); msk->pm.add_addr_accepted++; @@ -680,8 +695,12 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) __mptcp_subflow_connect(sk, &addrs[i], &remote); spin_lock_bh(&msk->pm.lock); + /* be sure to echo exactly the received address */ + if (reset_port) + remote.port = 0; + add_addr_echo: - mptcp_pm_announce_addr(msk, &msk->pm.remote, true); + mptcp_pm_announce_addr(msk, &remote, true); mptcp_pm_nl_addr_send_ack(msk); } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f60f01b14fac..1c72f25f083e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -466,9 +466,12 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq) static void mptcp_set_datafin_timeout(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + u32 retransmits; - mptcp_sk(sk)->timer_ival = min(TCP_RTO_MAX, - TCP_RTO_MIN << icsk->icsk_retransmits); + retransmits = min_t(u32, icsk->icsk_retransmits, + ilog2(TCP_RTO_MAX / TCP_RTO_MIN)); + + mptcp_sk(sk)->timer_ival = TCP_RTO_MIN << retransmits; } static void __mptcp_set_timeout(struct sock *sk, long tout) @@ -3294,6 +3297,17 @@ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) return 0; delta = msk->write_seq - v; + if (__mptcp_check_fallback(msk) && msk->first) { + struct tcp_sock *tp = tcp_sk(msk->first); + + /* the first subflow is disconnected after close - see + * __mptcp_close_ssk(). tcp_disconnect() moves the write_seq + * so ignore that status, too. + */ + if (!((1 << msk->first->sk_state) & + (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))) + delta += READ_ONCE(tp->write_seq) - tp->snd_una; + } if (delta > INT_MAX) delta = INT_MAX; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index d1c9dfbb11fa..9a4feb922cf6 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -428,14 +428,15 @@ static int __nf_register_net_hook(struct net *net, int pf, p = nf_entry_dereference(*pp); new_hooks = nf_hook_entries_grow(p, reg); - if (!IS_ERR(new_hooks)) + if (!IS_ERR(new_hooks)) { + hooks_validate(new_hooks); rcu_assign_pointer(*pp, new_hooks); + } mutex_unlock(&nf_hook_mutex); if (IS_ERR(new_hooks)) return PTR_ERR(new_hooks); - hooks_validate(new_hooks); #ifdef CONFIG_NETFILTER_INGRESS if (nf_ingress_hook(reg, pf)) net_inc_ingress_queue(); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index d2e5a8f644b8..029171379884 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -610,7 +610,7 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, nf_reset_ct(skb); skb_forward_csum(skb); if (skb->dev) - skb->tstamp = 0; + skb_clear_tstamp(skb); } return ret; } @@ -652,7 +652,7 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, if (!local) { skb_forward_csum(skb); if (skb->dev) - skb->tstamp = 0; + skb_clear_tstamp(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else @@ -674,7 +674,7 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); if (skb->dev) - skb->tstamp = 0; + skb_clear_tstamp(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index a579e59ee5c5..7873bd1389c3 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -19,7 +19,7 @@ static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) skb_push(skb, skb->mac_len); skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); dev_queue_xmit(skb); } diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 889cf88d3dba..f1d387129f02 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -376,7 +376,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, nf_flow_nat_ip(flow, skb, thoff, dir, iph); ip_decrease_ttl(iph); - skb->tstamp = 0; + skb_clear_tstamp(skb); if (flow_table->flags & NF_FLOWTABLE_COUNTER) nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); @@ -611,7 +611,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, nf_flow_nat_ipv6(flow, skb, dir, ip6h); ip6h->hop_limit--; - skb->tstamp = 0; + skb_clear_tstamp(skb); if (flow_table->flags & NF_FLOWTABLE_COUNTER) nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index b561e0a44a45..fc4265acd9c4 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -110,7 +110,11 @@ static int nf_flow_rule_match(struct nf_flow_match *match, nf_flow_rule_lwt_match(match, tun_info); } - key->meta.ingress_ifindex = tuple->iifidx; + if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC) + key->meta.ingress_ifindex = tuple->tc.iifidx; + else + key->meta.ingress_ifindex = tuple->iifidx; + mask->meta.ingress_ifindex = 0xffffffff; if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) && diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 6d12afabfe8a..63d1516816b1 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -46,6 +46,15 @@ void nf_unregister_queue_handler(void) } EXPORT_SYMBOL(nf_unregister_queue_handler); +static void nf_queue_sock_put(struct sock *sk) +{ +#ifdef CONFIG_INET + sock_gen_put(sk); +#else + sock_put(sk); +#endif +} + static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; @@ -54,7 +63,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) dev_put(state->in); dev_put(state->out); if (state->sk) - sock_put(state->sk); + nf_queue_sock_put(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dev_put(entry->physin); @@ -87,19 +96,21 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry) } /* Bump dev refs so they don't vanish while packet is out */ -void nf_queue_entry_get_refs(struct nf_queue_entry *entry) +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; + if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt)) + return false; + dev_hold(state->in); dev_hold(state->out); - if (state->sk) - sock_hold(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dev_hold(entry->physin); dev_hold(entry->physout); #endif + return true; } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); @@ -169,6 +180,18 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, break; } + if (skb_sk_is_prefetched(skb)) { + struct sock *sk = skb->sk; + + if (!sk_is_refcounted(sk)) { + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + return -ENOTCONN; + + /* drop refcount on skb_orphan */ + skb->destructor = sock_edemux; + } + } + entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC); if (!entry) return -ENOMEM; @@ -187,7 +210,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, __nf_queue_entry_init_physdevs(entry); - nf_queue_entry_get_refs(entry); + if (!nf_queue_entry_get_refs(entry)) { + kfree(entry); + return -ENOTCONN; + } switch (entry->state.pf) { case AF_INET: diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5fa16990da95..c86748b3873b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4502,7 +4502,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { list_del_rcu(&catchall->list); nft_set_elem_destroy(set, catchall->elem, true); - kfree_rcu(catchall); + kfree_rcu(catchall, rcu); } } @@ -5669,7 +5669,7 @@ static void nft_setelem_catchall_remove(const struct net *net, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { if (catchall->elem == elem->priv) { list_del_rcu(&catchall->list); - kfree_rcu(catchall); + kfree_rcu(catchall, rcu); break; } } @@ -6551,12 +6551,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, { struct nft_object *newobj; struct nft_trans *trans; - int err; + int err = -ENOMEM; + + if (!try_module_get(type->owner)) + return -ENOENT; trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, sizeof(struct nft_trans_obj)); if (!trans) - return -ENOMEM; + goto err_trans; newobj = nft_obj_init(ctx, type, attr); if (IS_ERR(newobj)) { @@ -6573,6 +6576,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, err_free_trans: kfree(trans); +err_trans: + module_put(type->owner); return err; } @@ -8185,7 +8190,7 @@ static void nft_obj_commit_update(struct nft_trans *trans) if (obj->ops->update) obj->ops->update(obj, newobj); - kfree(newobj); + nft_obj_destroy(&trans->ctx, newobj); } static void nft_commit_release(struct nft_trans *trans) @@ -8976,7 +8981,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - kfree(nft_trans_obj_newobj(trans)); + nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { trans->ctx.table->use--; @@ -9636,10 +9641,13 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain); static void __nft_release_hook(struct net *net, struct nft_table *table) { + struct nft_flowtable *flowtable; struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) nf_tables_unregister_hook(net, table, chain); + list_for_each_entry(flowtable, &table->flowtables, list) + nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list); } static void __nft_release_hooks(struct net *net) diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 9656c1646222..2d36952b1392 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -94,7 +94,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, expr = nft_expr_first(rule); while (nft_expr_more(rule, expr)) { - if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION) + if (expr->ops->offload_action && + expr->ops->offload_action(expr)) num_actions++; expr = nft_expr_next(expr); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index ae9c0756bba5..d97eb280cb2e 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -460,6 +460,7 @@ __build_packet_message(struct nfnl_log_net *log, sk_buff_data_t old_tail = inst->skb->tail; struct sock *sk; const unsigned char *hwhdrp; + ktime_t tstamp; nlh = nfnl_msg_put(inst->skb, 0, 0, nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET), @@ -588,9 +589,10 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; } - if (hooknum <= NF_INET_FORWARD && skb->tstamp) { + tstamp = skb_tstamp_cond(skb, false); + if (hooknum <= NF_INET_FORWARD && tstamp) { struct nfulnl_msg_packet_timestamp ts; - struct timespec64 kts = ktime_to_timespec64(skb->tstamp); + struct timespec64 kts = ktime_to_timespec64(tstamp); ts.sec = cpu_to_be64(kts.tv_sec); ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 8c15978d9258..a364f8e5e698 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -392,6 +392,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, bool csum_verify; char *secdata = NULL; u32 seclen = 0; + ktime_t tstamp; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) @@ -407,7 +408,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, + nla_total_size(sizeof(u_int32_t)) /* skbinfo */ + nla_total_size(sizeof(u_int32_t)); /* cap_len */ - if (entskb->tstamp) + tstamp = skb_tstamp_cond(entskb, false); + if (tstamp) size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); size += nfqnl_get_bridge_size(entry); @@ -582,9 +584,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (nfqnl_put_bridge(entry, skb) < 0) goto nla_put_failure; - if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) { + if (entry->state.hook <= NF_INET_FORWARD && tstamp) { struct nfqnl_msg_packet_timestamp ts; - struct timespec64 kts = ktime_to_timespec64(entskb->tstamp); + struct timespec64 kts = ktime_to_timespec64(tstamp); ts.sec = cpu_to_be64(kts.tv_sec); ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC); @@ -715,9 +717,15 @@ static struct nf_queue_entry * nf_queue_entry_dup(struct nf_queue_entry *e) { struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); - if (entry) - nf_queue_entry_get_refs(entry); - return entry; + + if (!entry) + return NULL; + + if (nf_queue_entry_get_refs(entry)) + return entry; + + kfree(entry); + return NULL; } #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index bbf3fcba3df4..5b5c607fbf83 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -67,6 +67,11 @@ static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx, return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif); } +static bool nft_dup_netdev_offload_action(const struct nft_expr *expr) +{ + return true; +} + static struct nft_expr_type nft_dup_netdev_type; static const struct nft_expr_ops nft_dup_netdev_ops = { .type = &nft_dup_netdev_type, @@ -75,6 +80,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = { .init = nft_dup_netdev_init, .dump = nft_dup_netdev_dump, .offload = nft_dup_netdev_offload, + .offload_action = nft_dup_netdev_offload_action, }; static struct nft_expr_type nft_dup_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index fa9301ca6033..08e7a289738e 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -79,6 +79,11 @@ static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx, return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif); } +static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr) +{ + return true; +} + struct nft_fwd_neigh { u8 sreg_dev; u8 sreg_addr; @@ -140,7 +145,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, return; skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); neigh_xmit(neigh_table, dev, addr, skb); out: regs->verdict.code = verdict; @@ -222,6 +227,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .dump = nft_fwd_netdev_dump, .validate = nft_fwd_validate, .offload = nft_fwd_netdev_offload, + .offload_action = nft_fwd_netdev_offload_action, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 90c64d27ae53..d0f67d325bdf 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -213,6 +213,16 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx, return 0; } +static bool nft_immediate_offload_action(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg == NFT_REG_VERDICT) + return true; + + return false; +} + static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -224,7 +234,7 @@ static const struct nft_expr_ops nft_imm_ops = { .dump = nft_immediate_dump, .validate = nft_immediate_validate, .offload = nft_immediate_offload, - .offload_flags = NFT_OFFLOAD_F_ACTION, + .offload_action = nft_immediate_offload_action, }; struct nft_expr_type nft_imm_type __read_mostly = { diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index c4f308460dd1..a726b623963d 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -340,11 +340,20 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb, return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); } +static void nft_limit_obj_pkts_destroy(const struct nft_ctx *ctx, + struct nft_object *obj) +{ + struct nft_limit_priv_pkts *priv = nft_obj_data(obj); + + nft_limit_destroy(ctx, &priv->limit); +} + static struct nft_object_type nft_limit_obj_type; static const struct nft_object_ops nft_limit_obj_pkts_ops = { .type = &nft_limit_obj_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)), .init = nft_limit_obj_pkts_init, + .destroy = nft_limit_obj_pkts_destroy, .eval = nft_limit_obj_pkts_eval, .dump = nft_limit_obj_pkts_dump, }; @@ -378,11 +387,20 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb, return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); } +static void nft_limit_obj_bytes_destroy(const struct nft_ctx *ctx, + struct nft_object *obj) +{ + struct nft_limit_priv *priv = nft_obj_data(obj); + + nft_limit_destroy(ctx, priv); +} + static struct nft_object_type nft_limit_obj_type; static const struct nft_object_ops nft_limit_obj_bytes_ops = { .type = &nft_limit_obj_type, .size = sizeof(struct nft_limit_priv), .init = nft_limit_obj_bytes_init, + .destroy = nft_limit_obj_bytes_destroy, .eval = nft_limit_obj_bytes_eval, .dump = nft_limit_obj_bytes_dump, }; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 662e5eb1cc39..7013f55f05d1 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -220,8 +220,10 @@ static void socket_mt_destroy(const struct xt_mtdtor_param *par) { if (par->family == NFPROTO_IPV4) nf_defrag_ipv4_disable(par->net); +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) else if (par->family == NFPROTO_IPV6) nf_defrag_ipv6_disable(par->net); +#endif } static struct xt_match socket_mt_reg[] __read_mostly = { diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index d49d4bf2e37c..c1d9be636933 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -6,7 +6,6 @@ enum llcp_state { LLCP_CONNECTED = 1, /* wait_for_packet() wants that */ LLCP_CONNECTING, - LLCP_DISCONNECTING, LLCP_CLOSED, LLCP_BOUND, LLCP_LISTEN, diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 5ad5157aa9c5..3364caabef8b 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -383,7 +383,7 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local, pr_debug("WKS %d\n", ssap); /* This is a WKS, let's check if it's free */ - if (local->local_wks & BIT(ssap)) { + if (test_bit(ssap, &local->local_wks)) { mutex_unlock(&local->sdp_lock); return LLCP_SAP_MAX; @@ -737,13 +737,6 @@ static void nfc_llcp_tx_work(struct work_struct *work) print_hex_dump_debug("LLCP Tx: ", DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); - if (ptype == LLCP_PDU_DISC && sk != NULL && - sk->sk_state == LLCP_DISCONNECTING) { - nfc_llcp_sock_unlink(&local->sockets, sk); - sock_orphan(sk); - sock_put(sk); - } - if (ptype == LLCP_PDU_I) copy_skb = skb_copy(skb, GFP_ATOMIC); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 0b93a17b9f11..4ca35791c93b 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -108,21 +108,13 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) llcp_sock->service_name_len, GFP_KERNEL); if (!llcp_sock->service_name) { - nfc_llcp_local_put(llcp_sock->local); - llcp_sock->local = NULL; - llcp_sock->dev = NULL; ret = -ENOMEM; - goto put_dev; + goto sock_llcp_put_local; } llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock); if (llcp_sock->ssap == LLCP_SAP_MAX) { - nfc_llcp_local_put(llcp_sock->local); - llcp_sock->local = NULL; - kfree(llcp_sock->service_name); - llcp_sock->service_name = NULL; - llcp_sock->dev = NULL; ret = -EADDRINUSE; - goto put_dev; + goto free_service_name; } llcp_sock->reserved_ssap = llcp_sock->ssap; @@ -132,6 +124,19 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) pr_debug("Socket bound to SAP %d\n", llcp_sock->ssap); sk->sk_state = LLCP_BOUND; + nfc_put_device(dev); + release_sock(sk); + + return 0; + +free_service_name: + kfree(llcp_sock->service_name); + llcp_sock->service_name = NULL; + +sock_llcp_put_local: + nfc_llcp_local_put(llcp_sock->local); + llcp_sock->local = NULL; + llcp_sock->dev = NULL; put_dev: nfc_put_device(dev); @@ -626,23 +631,16 @@ static int llcp_sock_release(struct socket *sock) } } - if (llcp_sock->reserved_ssap < LLCP_SAP_MAX) - nfc_llcp_put_ssap(llcp_sock->local, llcp_sock->ssap); - - release_sock(sk); - - /* Keep this sock alive and therefore do not remove it from the sockets - * list until the DISC PDU has been actually sent. Otherwise we would - * reply with DM PDUs before sending the DISC one. - */ - if (sk->sk_state == LLCP_DISCONNECTING) - return err; - if (sock->type == SOCK_RAW) nfc_llcp_sock_unlink(&local->raw_sockets, sk); else nfc_llcp_sock_unlink(&local->sockets, sk); + if (llcp_sock->reserved_ssap < LLCP_SAP_MAX) + nfc_llcp_put_ssap(llcp_sock->local, llcp_sock->ssap); + + release_sock(sk); + out: sock_orphan(sk); sock_put(sk); @@ -712,10 +710,8 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, llcp_sock->local = nfc_llcp_local_get(local); llcp_sock->ssap = nfc_llcp_get_local_ssap(local); if (llcp_sock->ssap == LLCP_SAP_MAX) { - nfc_llcp_local_put(llcp_sock->local); - llcp_sock->local = NULL; ret = -ENOMEM; - goto put_dev; + goto sock_llcp_put_local; } llcp_sock->reserved_ssap = llcp_sock->ssap; @@ -760,8 +756,11 @@ sock_unlink: sock_llcp_release: nfc_llcp_put_ssap(local, llcp_sock->ssap); + +sock_llcp_put_local: nfc_llcp_local_put(llcp_sock->local); llcp_sock->local = NULL; + llcp_sock->dev = NULL; put_dev: nfc_put_device(dev); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 076774034bb9..780d9e2246f3 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -423,12 +423,43 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, memcpy(addr, new_addr, sizeof(__be32[4])); } -static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) +static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask) { + u8 old_ipv6_tclass = ipv6_get_dsfield(nh); + + ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12), + (__force __wsum)(ipv6_tclass << 12)); + + ipv6_change_dsfield(nh, ~mask, ipv6_tclass); +} + +static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask) +{ + u32 ofl; + + ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2]; + fl = OVS_MASKED(ofl, fl, mask); + /* Bits 21-24 are always unmasked, so this retains their values. */ - OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); - OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); - OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); + nh->flow_lbl[0] = (u8)(fl >> 16); + nh->flow_lbl[1] = (u8)(fl >> 8); + nh->flow_lbl[2] = (u8)fl; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl)); +} + +static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask) +{ + new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8), + (__force __wsum)(new_ttl << 8)); + nh->hop_limit = new_ttl; } static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl, @@ -546,18 +577,17 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, } } if (mask->ipv6_tclass) { - ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass); + set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass); flow_key->ip.tos = ipv6_get_dsfield(nh); } if (mask->ipv6_label) { - set_ipv6_fl(nh, ntohl(key->ipv6_label), + set_ipv6_fl(skb, nh, ntohl(key->ipv6_label), ntohl(mask->ipv6_label)); flow_key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); } if (mask->ipv6_hlimit) { - OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit, - mask->ipv6_hlimit); + set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit); flow_key->ip.ttl = nh->hop_limit; } return 0; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index f6cd24fd530c..372bf54a0ca9 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -241,6 +241,144 @@ static bool icmphdr_ok(struct sk_buff *skb) sizeof(struct icmphdr)); } +/** + * get_ipv6_ext_hdrs() - Parses packet and sets IPv6 extension header flags. + * + * @skb: buffer where extension header data starts in packet + * @nh: ipv6 header + * @ext_hdrs: flags are stored here + * + * OFPIEH12_UNREP is set if more than one of a given IPv6 extension header + * is unexpectedly encountered. (Two destination options headers may be + * expected and would not cause this bit to be set.) + * + * OFPIEH12_UNSEQ is set if IPv6 extension headers were not in the order + * preferred (but not required) by RFC 2460: + * + * When more than one extension header is used in the same packet, it is + * recommended that those headers appear in the following order: + * IPv6 header + * Hop-by-Hop Options header + * Destination Options header + * Routing header + * Fragment header + * Authentication header + * Encapsulating Security Payload header + * Destination Options header + * upper-layer header + */ +static void get_ipv6_ext_hdrs(struct sk_buff *skb, struct ipv6hdr *nh, + u16 *ext_hdrs) +{ + u8 next_type = nh->nexthdr; + unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); + int dest_options_header_count = 0; + + *ext_hdrs = 0; + + while (ipv6_ext_hdr(next_type)) { + struct ipv6_opt_hdr _hdr, *hp; + + switch (next_type) { + case IPPROTO_NONE: + *ext_hdrs |= OFPIEH12_NONEXT; + /* stop parsing */ + return; + + case IPPROTO_ESP: + if (*ext_hdrs & OFPIEH12_ESP) + *ext_hdrs |= OFPIEH12_UNREP; + if ((*ext_hdrs & ~(OFPIEH12_HOP | OFPIEH12_DEST | + OFPIEH12_ROUTER | IPPROTO_FRAGMENT | + OFPIEH12_AUTH | OFPIEH12_UNREP)) || + dest_options_header_count >= 2) { + *ext_hdrs |= OFPIEH12_UNSEQ; + } + *ext_hdrs |= OFPIEH12_ESP; + break; + + case IPPROTO_AH: + if (*ext_hdrs & OFPIEH12_AUTH) + *ext_hdrs |= OFPIEH12_UNREP; + if ((*ext_hdrs & + ~(OFPIEH12_HOP | OFPIEH12_DEST | OFPIEH12_ROUTER | + IPPROTO_FRAGMENT | OFPIEH12_UNREP)) || + dest_options_header_count >= 2) { + *ext_hdrs |= OFPIEH12_UNSEQ; + } + *ext_hdrs |= OFPIEH12_AUTH; + break; + + case IPPROTO_DSTOPTS: + if (dest_options_header_count == 0) { + if (*ext_hdrs & + ~(OFPIEH12_HOP | OFPIEH12_UNREP)) + *ext_hdrs |= OFPIEH12_UNSEQ; + *ext_hdrs |= OFPIEH12_DEST; + } else if (dest_options_header_count == 1) { + if (*ext_hdrs & + ~(OFPIEH12_HOP | OFPIEH12_DEST | + OFPIEH12_ROUTER | OFPIEH12_FRAG | + OFPIEH12_AUTH | OFPIEH12_ESP | + OFPIEH12_UNREP)) { + *ext_hdrs |= OFPIEH12_UNSEQ; + } + } else { + *ext_hdrs |= OFPIEH12_UNREP; + } + dest_options_header_count++; + break; + + case IPPROTO_FRAGMENT: + if (*ext_hdrs & OFPIEH12_FRAG) + *ext_hdrs |= OFPIEH12_UNREP; + if ((*ext_hdrs & ~(OFPIEH12_HOP | + OFPIEH12_DEST | + OFPIEH12_ROUTER | + OFPIEH12_UNREP)) || + dest_options_header_count >= 2) { + *ext_hdrs |= OFPIEH12_UNSEQ; + } + *ext_hdrs |= OFPIEH12_FRAG; + break; + + case IPPROTO_ROUTING: + if (*ext_hdrs & OFPIEH12_ROUTER) + *ext_hdrs |= OFPIEH12_UNREP; + if ((*ext_hdrs & ~(OFPIEH12_HOP | + OFPIEH12_DEST | + OFPIEH12_UNREP)) || + dest_options_header_count >= 2) { + *ext_hdrs |= OFPIEH12_UNSEQ; + } + *ext_hdrs |= OFPIEH12_ROUTER; + break; + + case IPPROTO_HOPOPTS: + if (*ext_hdrs & OFPIEH12_HOP) + *ext_hdrs |= OFPIEH12_UNREP; + /* OFPIEH12_HOP is set to 1 if a hop-by-hop IPv6 + * extension header is present as the first + * extension header in the packet. + */ + if (*ext_hdrs == 0) + *ext_hdrs |= OFPIEH12_HOP; + else + *ext_hdrs |= OFPIEH12_UNSEQ; + break; + + default: + return; + } + + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); + if (!hp) + break; + next_type = hp->nexthdr; + start += ipv6_optlen(hp); + } +} + static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) { unsigned short frag_off; @@ -256,6 +394,8 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) nh = ipv6_hdr(skb); + get_ipv6_ext_hdrs(skb, nh, &key->ipv6.exthdrs); + key->ip.proto = NEXTHDR_NONE; key->ip.tos = ipv6_get_dsfield(nh); key->ip.ttl = nh->hop_limit; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 758a8c77f736..073ab73ffeaa 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -32,6 +32,19 @@ enum sw_flow_mac_proto { #define SW_FLOW_KEY_INVALID 0x80 #define MPLS_LABEL_DEPTH 3 +/* Bit definitions for IPv6 Extension Header pseudo-field. */ +enum ofp12_ipv6exthdr_flags { + OFPIEH12_NONEXT = 1 << 0, /* "No next header" encountered. */ + OFPIEH12_ESP = 1 << 1, /* Encrypted Sec Payload header present. */ + OFPIEH12_AUTH = 1 << 2, /* Authentication header present. */ + OFPIEH12_DEST = 1 << 3, /* 1 or 2 dest headers present. */ + OFPIEH12_FRAG = 1 << 4, /* Fragment header present. */ + OFPIEH12_ROUTER = 1 << 5, /* Router header present. */ + OFPIEH12_HOP = 1 << 6, /* Hop-by-hop header present. */ + OFPIEH12_UNREP = 1 << 7, /* Unexpected repeats encountered. */ + OFPIEH12_UNSEQ = 1 << 8 /* Unexpected sequencing encountered. */ +}; + /* Store options at the end of the array if they are less than the * maximum size. This allows us to get the benefits of variable length * matching for small options. @@ -121,6 +134,7 @@ struct sw_flow_key { struct in6_addr dst; /* IPv6 destination address. */ } addr; __be32 label; /* IPv6 flow label. */ + u16 exthdrs; /* IPv6 extension header flags */ union { struct { struct in6_addr src; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index fd1f809e9bc1..8b4124820f7d 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -346,7 +346,7 @@ size_t ovs_key_attr_size(void) /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 30); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -369,7 +369,8 @@ size_t ovs_key_attr_size(void) + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ - + nla_total_size(28); /* OVS_KEY_ATTR_ND */ + + nla_total_size(28) /* OVS_KEY_ATTR_ND */ + + nla_total_size(2); /* OVS_KEY_ATTR_IPV6_EXTHDRS */ } static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = { @@ -437,6 +438,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED, .next = ovs_nsh_key_attr_lens, }, + [OVS_KEY_ATTR_IPV6_EXTHDRS] = { + .len = sizeof(struct ovs_key_ipv6_exthdrs) }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -1597,6 +1600,17 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, attrs &= ~(1 << OVS_KEY_ATTR_IPV6); } + if (attrs & (1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS)) { + const struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key; + + ipv6_exthdrs_key = nla_data(a[OVS_KEY_ATTR_IPV6_EXTHDRS]); + + SW_FLOW_KEY_PUT(match, ipv6.exthdrs, + ipv6_exthdrs_key->hdrs, is_mask); + + attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS); + } + if (attrs & (1 << OVS_KEY_ATTR_ARP)) { const struct ovs_key_arp *arp_key; @@ -2099,6 +2113,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ipv4_key->ipv4_frag = output->ip.frag; } else if (swkey->eth.type == htons(ETH_P_IPV6)) { struct ovs_key_ipv6 *ipv6_key; + struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key; nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); if (!nla) @@ -2113,6 +2128,13 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ipv6_key->ipv6_tclass = output->ip.tos; ipv6_key->ipv6_hlimit = output->ip.ttl; ipv6_key->ipv6_frag = output->ip.frag; + + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6_EXTHDRS, + sizeof(*ipv6_exthdrs_key)); + if (!nla) + goto nla_put_failure; + ipv6_exthdrs_key = nla_data(nla); + ipv6_exthdrs_key->hdrs = output->ipv6.exthdrs; } else if (swkey->eth.type == htons(ETH_P_NSH)) { if (nsh_key_to_nlattr(&output->nsh, is_mask, skb)) goto nla_put_failure; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index cf2ce5812489..82a74f998966 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -507,7 +507,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) } skb->dev = vport->dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); vport->ops->send(skb); return; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ab87f22cc7ec..1b93ce1a5600 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -460,7 +460,7 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts, return TP_STATUS_TS_RAW_HARDWARE; if ((flags & SOF_TIMESTAMPING_SOFTWARE) && - ktime_to_timespec64_cond(skb->tstamp, ts)) + ktime_to_timespec64_cond(skb_tstamp(skb), ts)) return TP_STATUS_TS_SOFTWARE; return 0; @@ -2199,6 +2199,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_packets++; sock_skb_set_dropcount(sk, skb); + skb_clear_delivery_time(skb); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); @@ -2377,6 +2378,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; + skb_clear_delivery_time(copy_skb); __skb_queue_tail(&sk->sk_receive_queue, copy_skb); } spin_unlock(&sk->sk_receive_queue.lock); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2811348f3acc..4f51094da9da 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -274,7 +274,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action, err = tc_setup_action(&fl_action->action, actions); if (err) { NL_SET_ERR_MSG_MOD(extack, - "Failed to setup tc actions for offload\n"); + "Failed to setup tc actions for offload"); goto fl_err; } @@ -1446,6 +1446,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, continue; if (skip_sw != tc_act_skip_sw(act->tcfa_flags) || skip_hw != tc_act_skip_hw(act->tcfa_flags)) { + NL_SET_ERR_MSG(extack, + "Mismatch between action and filter offload flags"); err = -EINVAL; goto err; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index a77d8908e737..fea2d78b9ddc 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -53,6 +53,8 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(filter, skb); } + if (unlikely(!skb->tstamp && skb->mono_delivery_time)) + skb->mono_delivery_time = 0; if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK) skb_orphan(skb); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 7108e71ce4db..89e46f66e3d9 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -355,6 +355,13 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params) } } +static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry, + struct nf_conn_act_ct_ext *act_ct_ext, u8 dir) +{ + entry->tuplehash[dir].tuple.xmit_type = FLOW_OFFLOAD_XMIT_TC; + entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir]; +} + static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, struct nf_conn *ct, bool tcp) @@ -379,10 +386,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, act_ct_ext = nf_conn_act_ct_ext_find(ct); if (act_ct_ext) { - entry->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx = - act_ct_ext->ifindex[IP_CT_DIR_ORIGINAL]; - entry->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx = - act_ct_ext->ifindex[IP_CT_DIR_REPLY]; + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL); + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY); } err = flow_offload_add(&ct_ft->nf_ft, entry); @@ -527,11 +532,6 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct nf_conn *ct; u8 dir; - /* Previously seen or loopback */ - ct = nf_ct_get(skb, &ctinfo); - if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) - return false; - switch (family) { case NFPROTO_IPV4: if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph)) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 899fe025df77..f4d917705263 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -419,20 +419,66 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static int tcf_police_act_to_flow_act(int tc_act, u32 *extval) +{ + int act_id = -EOPNOTSUPP; + + if (!TC_ACT_EXT_OPCODE(tc_act)) { + if (tc_act == TC_ACT_OK) + act_id = FLOW_ACTION_ACCEPT; + else if (tc_act == TC_ACT_SHOT) + act_id = FLOW_ACTION_DROP; + else if (tc_act == TC_ACT_PIPE) + act_id = FLOW_ACTION_PIPE; + } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_GOTO_CHAIN)) { + act_id = FLOW_ACTION_GOTO; + *extval = tc_act & TC_ACT_EXT_VAL_MASK; + } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_JUMP)) { + act_id = FLOW_ACTION_JUMP; + *extval = tc_act & TC_ACT_EXT_VAL_MASK; + } + + return act_id; +} + static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind) { if (bind) { struct flow_action_entry *entry = entry_data; + struct tcf_police *police = to_police(act); + struct tcf_police_params *p; + int act_id; + + p = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); entry->id = FLOW_ACTION_POLICE; entry->police.burst = tcf_police_burst(act); entry->police.rate_bytes_ps = tcf_police_rate_bytes_ps(act); + entry->police.peakrate_bytes_ps = tcf_police_peakrate_bytes_ps(act); + entry->police.avrate = tcf_police_tcfp_ewma_rate(act); + entry->police.overhead = tcf_police_rate_overhead(act); entry->police.burst_pkt = tcf_police_burst_pkt(act); entry->police.rate_pkt_ps = tcf_police_rate_pkt_ps(act); entry->police.mtu = tcf_police_tcfp_mtu(act); + + act_id = tcf_police_act_to_flow_act(police->tcf_action, + &entry->police.exceed.extval); + if (act_id < 0) + return act_id; + + entry->police.exceed.act_id = act_id; + + act_id = tcf_police_act_to_flow_act(p->tcfp_result, + &entry->police.notexceed.extval); + if (act_id < 0) + return act_id; + + entry->police.notexceed.act_id = act_id; + *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index df19a847829e..c85b85a192bf 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -102,6 +102,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(prog->filter, skb); } + if (unlikely(!skb->tstamp && skb->mono_delivery_time)) + skb->mono_delivery_time = 0; if (prog->exts_integrated) { res->class = 0; diff --git a/net/smc/Makefile b/net/smc/Makefile index 196fb6f01b14..640af9a39f9c 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_SMC) += smc.o obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o -smc-y += smc_tracepoint.o +smc-y += smc_tracepoint.o smc_sysctl.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index b40ae4d28cbf..e508e4f3a073 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -51,6 +51,7 @@ #include "smc_close.h" #include "smc_stats.h" #include "smc_tracepoint.h" +#include "smc_sysctl.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -192,12 +193,27 @@ void smc_unhash_sk(struct sock *sk) } EXPORT_SYMBOL_GPL(smc_unhash_sk); +/* This will be called before user really release sock_lock. So do the + * work which we didn't do because of user hold the sock_lock in the + * BH context + */ +static void smc_release_cb(struct sock *sk) +{ + struct smc_sock *smc = smc_sk(sk); + + if (smc->conn.tx_in_release_sock) { + smc_tx_pending(&smc->conn); + smc->conn.tx_in_release_sock = false; + } +} + struct proto smc_proto = { .name = "SMC", .owner = THIS_MODULE, .keepalive = smc_set_keepalive, .hash = smc_hash_sk, .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, .obj_size = sizeof(struct smc_sock), .h.smc_hash = &smc_v4_hashinfo, .slab_flags = SLAB_TYPESAFE_BY_RCU, @@ -210,6 +226,7 @@ struct proto smc_proto6 = { .keepalive = smc_set_keepalive, .hash = smc_hash_sk, .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, .obj_size = sizeof(struct smc_sock), .h.smc_hash = &smc_v6_hashinfo, .slab_flags = SLAB_TYPESAFE_BY_RCU, @@ -268,7 +285,7 @@ static int smc_release(struct socket *sock) { struct sock *sk = sock->sk; struct smc_sock *smc; - int rc = 0; + int old_state, rc = 0; if (!sk) goto out; @@ -276,8 +293,10 @@ static int smc_release(struct socket *sock) sock_hold(sk); /* sock_put below */ smc = smc_sk(sk); + old_state = sk->sk_state; + /* cleanup for a dangling non-blocking connect */ - if (smc->connect_nonblock && sk->sk_state == SMC_INIT) + if (smc->connect_nonblock && old_state == SMC_INIT) tcp_abort(smc->clcsock->sk, ECONNABORTED); if (cancel_work_sync(&smc->connect_work)) @@ -291,6 +310,10 @@ static int smc_release(struct socket *sock) else lock_sock(sk); + if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE && + !smc->use_fallback) + smc_close_active_abort(smc); + rc = __smc_release(smc); /* detach socket */ @@ -2715,10 +2738,14 @@ static int __smc_setsockopt(struct socket *sock, int level, int optname, lock_sock(sk); switch (optname) { case SMC_LIMIT_HS: - if (optlen < sizeof(int)) - return -EINVAL; - if (copy_from_sockptr(&val, optval, sizeof(int))) - return -EFAULT; + if (optlen < sizeof(int)) { + rc = -EINVAL; + break; + } + if (copy_from_sockptr(&val, optval, sizeof(int))) { + rc = -EFAULT; + break; + } smc->limit_smc_hs = !!val; rc = 0; @@ -2791,8 +2818,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_state != SMC_CLOSED) { if (val) { SMC_STAT_INC(smc, ndly_cnt); - mod_delayed_work(smc->conn.lgr->tx_wq, - &smc->conn.tx_work, 0); + smc_tx_pending(&smc->conn); + cancel_delayed_work(&smc->conn.tx_work); } } break; @@ -3266,12 +3293,22 @@ static int __init smc_init(void) rc = tcp_register_ulp(&smc_ulp_ops); if (rc) { pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); - goto out_sock; + goto out_ib; + } + + rc = smc_sysctl_init(); + if (rc) { + pr_err("%s: sysctl_init fails with %d\n", __func__, rc); + goto out_ulp; } static_branch_enable(&tcp_have_smc); return 0; +out_ulp: + tcp_unregister_ulp(&smc_ulp_ops); +out_ib: + smc_ib_unregister_client(); out_sock: sock_unregister(PF_SMC); out_proto6: @@ -3299,6 +3336,7 @@ out_pernet_subsys: static void __exit smc_exit(void) { static_branch_disable(&tcp_have_smc); + smc_sysctl_exit(); tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); diff --git a/net/smc/smc.h b/net/smc/smc.h index a096d8af21a0..ea0620529ebe 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -29,6 +29,7 @@ #define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM * devices */ +#define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */ extern struct proto smc_proto; extern struct proto smc_proto6; @@ -192,6 +193,7 @@ struct smc_connection { * - dec on polled tx cqe */ wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ + atomic_t tx_pushing; /* nr_threads trying tx push */ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ @@ -211,6 +213,10 @@ struct smc_connection { * data still pending */ char urg_rx_byte; /* urgent byte */ + bool tx_in_release_sock; + /* flush pending tx data in + * sock release_cb() + */ atomic_t bytes_to_rcv; /* arrived data, * not yet received */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 9d5a97168969..5c731f27996e 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -48,9 +48,19 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, conn->tx_cdc_seq_fin = cdcpend->ctrl_seq; } - if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) && - unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq))) - wake_up(&conn->cdc_pend_tx_wq); + if (atomic_dec_and_test(&conn->cdc_pend_tx_wr)) { + /* If user owns the sock_lock, mark the connection need sending. + * User context will later try to send when it release sock_lock + * in smc_release_cb() + */ + if (sock_owned_by_user(&smc->sk)) + conn->tx_in_release_sock = true; + else + smc_tx_pending(conn); + + if (unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq))) + wake_up(&conn->cdc_pend_tx_wq); + } WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0); smc_tx_sndbuf_nonfull(smc); @@ -350,8 +360,12 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ if ((diff_cons && smc_tx_prepared_sends(conn)) || conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || - conn->local_rx_ctrl.prod_flags.urg_data_pending) - smc_tx_sndbuf_nonempty(conn); + conn->local_rx_ctrl.prod_flags.urg_data_pending) { + if (!sock_owned_by_user(&smc->sk)) + smc_tx_pending(conn); + else + conn->tx_in_release_sock = true; + } if (diff_cons && conn->urg_tx_pend && atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) { diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 29525d03b253..f40f6ed0fbdb 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1161,8 +1161,8 @@ void smc_conn_free(struct smc_connection *conn) cancel_work_sync(&conn->abort_work); } if (!list_empty(&lgr->list)) { - smc_lgr_unregister_conn(conn); smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + smc_lgr_unregister_conn(conn); } if (!lgr->conns_num) @@ -1864,7 +1864,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) (ini->smcd_version == SMC_V2 || lgr->vlan_id == ini->vlan_id) && (role == SMC_CLNT || ini->is_smcd || - lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { + (lgr->conns_num < SMC_RMBS_PER_LGR_MAX && + !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) { /* link group found */ ini->first_contact_local = 0; conn->lgr = lgr; @@ -1988,7 +1989,7 @@ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, */ static inline int smc_rmb_wnd_update_limit(int rmbe_size) { - return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); + return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } /* map an rmb buf to a link */ diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index ff61b7b95875..7984f8883472 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -113,7 +113,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) pnettable = &sn->pnettable; /* remove table entry */ - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (!pnet_name || @@ -131,7 +131,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) rc = 0; } } - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); /* if this is not the initial namespace, stop here */ if (net != &init_net) @@ -192,7 +192,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { @@ -206,7 +206,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) break; } } - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -224,7 +224,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); @@ -237,7 +237,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) break; } } - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -370,7 +370,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); rc = -EEXIST; new_netdev = true; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_ETH && !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) { @@ -385,9 +385,9 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, GFP_ATOMIC); } list_add_tail(&new_pe->list, &pnettable->pnetlist); - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); } else { - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); kfree(new_pe); goto out_put; } @@ -448,7 +448,7 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, new_pe->ib_port = ib_port; new_ibdev = true; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { @@ -458,9 +458,9 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, } if (new_ibdev) { list_add_tail(&new_pe->list, &pnettable->pnetlist); - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); } else { - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); kfree(new_pe); } return (new_ibdev) ? 0 : -EEXIST; @@ -605,7 +605,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, pnettable = &sn->pnettable; /* dump pnettable entries */ - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) continue; @@ -620,7 +620,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return idx; } @@ -864,7 +864,7 @@ int smc_pnet_net_init(struct net *net) struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev; INIT_LIST_HEAD(&pnettable->pnetlist); - rwlock_init(&pnettable->lock); + mutex_init(&pnettable->lock); INIT_LIST_HEAD(&pnetids_ndev->list); rwlock_init(&pnetids_ndev->lock); @@ -947,7 +947,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) { /* get pnetid of netdev device */ @@ -956,7 +956,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -1159,7 +1159,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) sn = net_generic(&init_net, smc_net_id); pnettable = &sn->pnettable; - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) && @@ -1169,7 +1169,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -1188,7 +1188,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) sn = net_generic(&init_net, smc_net_id); pnettable = &sn->pnettable; - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { @@ -1197,7 +1197,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 14039272f7e4..80a88eea4949 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -29,7 +29,7 @@ struct smc_link_group; * @pnetlist: List of PNETIDs */ struct smc_pnettable { - rwlock_t lock; + struct mutex lock; struct list_head pnetlist; }; diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c new file mode 100644 index 000000000000..3b59876aaac9 --- /dev/null +++ b/net/smc/smc_sysctl.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * smc_sysctl.c: sysctl interface to SMC subsystem. + * + * Copyright (c) 2022, Alibaba Inc. + * + * Author: Tony Lu <tonylu@linux.alibaba.com> + * + */ + +#include <linux/init.h> +#include <linux/sysctl.h> +#include <net/net_namespace.h> + +#include "smc.h" +#include "smc_sysctl.h" + +static struct ctl_table smc_table[] = { + { + .procname = "autocorking_size", + .data = &init_net.smc.sysctl_autocorking_size, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { } +}; + +static __net_init int smc_sysctl_init_net(struct net *net) +{ + struct ctl_table *table; + + table = smc_table; + if (!net_eq(net, &init_net)) { + int i; + + table = kmemdup(table, sizeof(smc_table), GFP_KERNEL); + if (!table) + goto err_alloc; + + for (i = 0; i < ARRAY_SIZE(smc_table) - 1; i++) + table[i].data += (void *)net - (void *)&init_net; + } + + net->smc.smc_hdr = register_net_sysctl(net, "net/smc", table); + if (!net->smc.smc_hdr) + goto err_reg; + + net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; + + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static __net_exit void smc_sysctl_exit_net(struct net *net) +{ + unregister_net_sysctl_table(net->smc.smc_hdr); +} + +static struct pernet_operations smc_sysctl_ops __net_initdata = { + .init = smc_sysctl_init_net, + .exit = smc_sysctl_exit_net, +}; + +int __init smc_sysctl_init(void) +{ + return register_pernet_subsys(&smc_sysctl_ops); +} + +void smc_sysctl_exit(void) +{ + unregister_pernet_subsys(&smc_sysctl_ops); +} diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h new file mode 100644 index 000000000000..49553ac236b6 --- /dev/null +++ b/net/smc/smc_sysctl.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * smc_sysctl.c: sysctl interface to SMC subsystem. + * + * Copyright (c) 2022, Alibaba Inc. + * + * Author: Tony Lu <tonylu@linux.alibaba.com> + * + */ + +#ifndef _SMC_SYSCTL_H +#define _SMC_SYSCTL_H + +#ifdef CONFIG_SYSCTL + +int smc_sysctl_init(void); +void smc_sysctl_exit(void); + +#else + +int smc_sysctl_init(void) +{ + return 0; +} + +void smc_sysctl_exit(void) { } + +#endif /* CONFIG_SYSCTL */ + +#endif /* _SMC_SYSCTL_H */ diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 5df3940d4543..98ca9229fe87 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -131,6 +131,51 @@ static bool smc_tx_is_corked(struct smc_sock *smc) return (tp->nonagle & TCP_NAGLE_CORK) ? true : false; } +/* If we have pending CDC messages, do not send: + * Because CQE of this CDC message will happen shortly, it gives + * a chance to coalesce future sendmsg() payload in to one RDMA Write, + * without need for a timer, and with no latency trade off. + * Algorithm here: + * 1. First message should never cork + * 2. If we have pending Tx CDC messages, wait for the first CDC + * message's completion + * 3. Don't cork to much data in a single RDMA Write to prevent burst + * traffic, total corked message should not exceed sendbuf/2 + */ +static bool smc_should_autocork(struct smc_sock *smc) +{ + struct smc_connection *conn = &smc->conn; + int corking_size; + + corking_size = min_t(unsigned int, conn->sndbuf_desc->len >> 1, + sock_net(&smc->sk)->smc.sysctl_autocorking_size); + + if (atomic_read(&conn->cdc_pend_tx_wr) == 0 || + smc_tx_prepared_sends(conn) > corking_size) + return false; + return true; +} + +static bool smc_tx_should_cork(struct smc_sock *smc, struct msghdr *msg) +{ + struct smc_connection *conn = &smc->conn; + + if (smc_should_autocork(smc)) + return true; + + /* for a corked socket defer the RDMA writes if + * sndbuf_space is still available. The applications + * should known how/when to uncork it. + */ + if ((msg->msg_flags & MSG_MORE || + smc_tx_is_corked(smc) || + msg->msg_flags & MSG_SENDPAGE_NOTLAST) && + atomic_read(&conn->sndbuf_space)) + return true; + + return false; +} + /* sndbuf producer: main API called by socket layer. * called under sock lock. */ @@ -235,15 +280,11 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) */ if ((msg->msg_flags & MSG_OOB) && !send_remaining) conn->urg_tx_pend = true; - if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc) || - msg->msg_flags & MSG_SENDPAGE_NOTLAST) && - (atomic_read(&conn->sndbuf_space))) - /* for a corked socket defer the RDMA writes if - * sndbuf_space is still available. The applications - * should known how/when to uncork it. - */ - continue; - smc_tx_sndbuf_nonempty(conn); + /* If we need to cork, do nothing and wait for the next + * sendmsg() call or push on tx completion + */ + if (!smc_tx_should_cork(smc, msg)) + smc_tx_sndbuf_nonempty(conn); trace_smc_tx_sendmsg(smc, copylen); } /* while (msg_data_left(msg)) */ @@ -590,13 +631,26 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) return rc; } -int smc_tx_sndbuf_nonempty(struct smc_connection *conn) +static int __smc_tx_sndbuf_nonempty(struct smc_connection *conn) { - int rc; + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + int rc = 0; + + /* No data in the send queue */ + if (unlikely(smc_tx_prepared_sends(conn) <= 0)) + goto out; + + /* Peer don't have RMBE space */ + if (unlikely(atomic_read(&conn->peer_rmbe_space) <= 0)) { + SMC_STAT_RMB_TX_PEER_FULL(smc, !conn->lnk); + goto out; + } if (conn->killed || - conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) - return -EPIPE; /* connection being aborted */ + conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { + rc = -EPIPE; /* connection being aborted */ + goto out; + } if (conn->lgr->is_smcd) rc = smcd_tx_sndbuf_nonempty(conn); else @@ -604,10 +658,38 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) if (!rc) { /* trigger socket release if connection is closing */ - struct smc_sock *smc = container_of(conn, struct smc_sock, - conn); smc_close_wake_tx_prepared(smc); } + +out: + return rc; +} + +int smc_tx_sndbuf_nonempty(struct smc_connection *conn) +{ + int rc; + + /* This make sure only one can send simultaneously to prevent wasting + * of CPU and CDC slot. + * Record whether someone has tried to push while we are pushing. + */ + if (atomic_inc_return(&conn->tx_pushing) > 1) + return 0; + +again: + atomic_set(&conn->tx_pushing, 1); + smp_wmb(); /* Make sure tx_pushing is 1 before real send */ + rc = __smc_tx_sndbuf_nonempty(conn); + + /* We need to check whether someone else have added some data into + * the send queue and tried to push but failed after the atomic_set() + * when we are pushing. + * If so, we need to push again to prevent those data hang in the send + * queue. + */ + if (unlikely(!atomic_dec_and_test(&conn->tx_pushing))) + goto again; + return rc; } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 24be1d03fef9..34d616406d51 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -137,25 +137,28 @@ static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t) { struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet); struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; - int i = 0, rc; - int polled = 0; + int i, rc; again: - polled++; do { memset(&wc, 0, sizeof(wc)); rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc); - if (polled == 1) { - ib_req_notify_cq(dev->roce_cq_send, - IB_CQ_NEXT_COMP | - IB_CQ_REPORT_MISSED_EVENTS); - } - if (!rc) - break; for (i = 0; i < rc; i++) smc_wr_tx_process_cqe(&wc[i]); + if (rc < SMC_WR_MAX_POLL_CQE) + /* If < SMC_WR_MAX_POLL_CQE, the CQ should have been + * drained, no need to poll again. --Guangguan Wang + */ + break; } while (rc > 0); - if (polled == 1) + + /* IB_CQ_REPORT_MISSED_EVENTS make sure if ib_req_notify_cq() returns + * 0, it is safe to wait for the next event. + * Else we must poll the CQ again to make sure we won't miss any event + */ + if (ib_req_notify_cq(dev->roce_cq_send, + IB_CQ_NEXT_COMP | + IB_CQ_REPORT_MISSED_EVENTS)) goto again; } @@ -478,24 +481,28 @@ static void smc_wr_rx_tasklet_fn(struct tasklet_struct *t) { struct smc_ib_device *dev = from_tasklet(dev, t, recv_tasklet); struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; - int polled = 0; int rc; again: - polled++; do { memset(&wc, 0, sizeof(wc)); rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc); - if (polled == 1) { - ib_req_notify_cq(dev->roce_cq_recv, - IB_CQ_SOLICITED_MASK - | IB_CQ_REPORT_MISSED_EVENTS); - } - if (!rc) + if (rc > 0) + smc_wr_rx_process_cqes(&wc[0], rc); + if (rc < SMC_WR_MAX_POLL_CQE) + /* If < SMC_WR_MAX_POLL_CQE, the CQ should have been + * drained, no need to poll again. --Guangguan Wang + */ break; - smc_wr_rx_process_cqes(&wc[0], rc); } while (rc > 0); - if (polled == 1) + + /* IB_CQ_REPORT_MISSED_EVENTS make sure if ib_req_notify_cq() returns + * 0, it is safe to wait for the next event. + * Else we must poll the CQ again to make sure we won't miss any event + */ + if (ib_req_notify_cq(dev->roce_cq_recv, + IB_CQ_SOLICITED_MASK | + IB_CQ_REPORT_MISSED_EVENTS)) goto again; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 6a00c390547b..474f76383033 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -458,63 +458,40 @@ static int __switchdev_handle_fdb_event_to_device(struct net_device *dev, const struct net_device *foreign_dev), int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)) + const struct switchdev_notifier_fdb_info *fdb_info)) { const struct switchdev_notifier_info *info = &fdb_info->info; - struct net_device *br, *lower_dev; + struct net_device *br, *lower_dev, *switchdev; struct list_head *iter; int err = -EOPNOTSUPP; if (check_cb(dev)) return mod_cb(dev, orig_dev, event, info->ctx, fdb_info); - if (netif_is_lag_master(dev)) { - if (!switchdev_lower_dev_find_rcu(dev, check_cb, foreign_dev_check_cb)) - goto maybe_bridged_with_us; - - /* This is a LAG interface that we offload */ - if (!lag_mod_cb) - return -EOPNOTSUPP; - - return lag_mod_cb(dev, orig_dev, event, info->ctx, fdb_info); - } - /* Recurse through lower interfaces in case the FDB entry is pointing - * towards a bridge device. + * towards a bridge or a LAG device. */ - if (netif_is_bridge_master(dev)) { - if (!switchdev_lower_dev_find_rcu(dev, check_cb, foreign_dev_check_cb)) - return 0; - - /* This is a bridge interface that we offload */ - netdev_for_each_lower_dev(dev, lower_dev, iter) { - /* Do not propagate FDB entries across bridges */ - if (netif_is_bridge_master(lower_dev)) - continue; - - /* Bridge ports might be either us, or LAG interfaces - * that we offload. - */ - if (!check_cb(lower_dev) && - !switchdev_lower_dev_find_rcu(lower_dev, check_cb, - foreign_dev_check_cb)) - continue; - - err = __switchdev_handle_fdb_event_to_device(lower_dev, orig_dev, - event, fdb_info, check_cb, - foreign_dev_check_cb, - mod_cb, lag_mod_cb); - if (err && err != -EOPNOTSUPP) - return err; - } + netdev_for_each_lower_dev(dev, lower_dev, iter) { + /* Do not propagate FDB entries across bridges */ + if (netif_is_bridge_master(lower_dev)) + continue; - return 0; + /* Bridge ports might be either us, or LAG interfaces + * that we offload. + */ + if (!check_cb(lower_dev) && + !switchdev_lower_dev_find_rcu(lower_dev, check_cb, + foreign_dev_check_cb)) + continue; + + err = __switchdev_handle_fdb_event_to_device(lower_dev, orig_dev, + event, fdb_info, check_cb, + foreign_dev_check_cb, + mod_cb); + if (err && err != -EOPNOTSUPP) + return err; } -maybe_bridged_with_us: /* Event is neither on a bridge nor a LAG. Check whether it is on an * interface that is in a bridge with us. */ @@ -522,12 +499,16 @@ maybe_bridged_with_us: if (!br || !netif_is_bridge_master(br)) return 0; - if (!switchdev_lower_dev_find_rcu(br, check_cb, foreign_dev_check_cb)) + switchdev = switchdev_lower_dev_find_rcu(br, check_cb, foreign_dev_check_cb); + if (!switchdev) return 0; + if (!foreign_dev_check_cb(switchdev, dev)) + return err; + return __switchdev_handle_fdb_event_to_device(br, orig_dev, event, fdb_info, check_cb, foreign_dev_check_cb, - mod_cb, lag_mod_cb); + mod_cb); } int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event, @@ -537,16 +518,13 @@ int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long e const struct net_device *foreign_dev), int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)) + const struct switchdev_notifier_fdb_info *fdb_info)) { int err; err = __switchdev_handle_fdb_event_to_device(dev, dev, event, fdb_info, check_cb, foreign_dev_check_cb, - mod_cb, lag_mod_cb); + mod_cb); if (err == -EOPNOTSUPP) err = 0; @@ -564,7 +542,7 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, struct netlink_ext_ack *extack)) { struct switchdev_notifier_info *info = &port_obj_info->info; - struct net_device *br, *lower_dev; + struct net_device *br, *lower_dev, *switchdev; struct netlink_ext_ack *extack; struct list_head *iter; int err = -EOPNOTSUPP; @@ -614,7 +592,11 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, if (!br || !netif_is_bridge_master(br)) return err; - if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb)) + switchdev = switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb); + if (!switchdev) + return err; + + if (!foreign_dev_check_cb(switchdev, dev)) return err; return __switchdev_handle_port_obj_add(br, port_obj_info, check_cb, @@ -674,7 +656,7 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj)) { struct switchdev_notifier_info *info = &port_obj_info->info; - struct net_device *br, *lower_dev; + struct net_device *br, *lower_dev, *switchdev; struct list_head *iter; int err = -EOPNOTSUPP; @@ -721,7 +703,11 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev, if (!br || !netif_is_bridge_master(br)) return err; - if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb)) + switchdev = switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb); + if (!switchdev) + return err; + + if (!foreign_dev_check_cb(switchdev, dev)) return err; return __switchdev_handle_port_obj_del(br, port_obj_info, check_cb, diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 01396dd1c899..1d8ba233d047 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -967,7 +967,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, list_for_each_entry(p, &sr->all_publ, all_publ) if (p->key == *last_key) break; - if (p->key != *last_key) + if (list_entry_is_head(p, &sr->all_publ, all_publ)) return -EPIPE; } else { p = list_first_entry(&sr->all_publ, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3e63c83e641c..7545321c3440 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3749,7 +3749,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, if (p->key == *last_publ) break; } - if (p->key != *last_publ) { + if (list_entry_is_head(p, &tsk->publications, binding_sock)) { /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the * consistence check to fail in the netlink callback diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 1e9be50469ce..527ae669f6f7 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -33,7 +33,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex) echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ ) > $@ -$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDI) \ +$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR) \ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509) @$(kecho) " GEN $@" $(Q)(set -e; \ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 578bff9c378b..c01fbcc848e8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13411,6 +13411,9 @@ static int handle_nan_filter(struct nlattr *attr_filter, i = 0; nla_for_each_nested(attr, attr_filter, rem) { filter[i].filter = nla_memdup(attr, GFP_KERNEL); + if (!filter[i].filter) + goto err; + filter[i].len = nla_len(attr); i++; } @@ -13423,6 +13426,15 @@ static int handle_nan_filter(struct nlattr *attr_filter, } return 0; + +err: + i = 0; + nla_for_each_nested(attr, attr_filter, rem) { + kfree(filter[i].filter); + i++; + } + kfree(filter); + return -ENOMEM; } static int nl80211_nan_add_func(struct sk_buff *skb, @@ -17816,7 +17828,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev, wdev->chandef = *chandef; wdev->preset_chandef = *chandef; - if (wdev->iftype == NL80211_IFTYPE_STATION && + if ((wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) && !WARN_ON(!wdev->current_bss)) cfg80211_update_assoc_bss_entry(wdev, chandef->chan); diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 3fa066419d37..39bce5d764de 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -223,6 +223,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, if (x->encap || x->tfcpad) return -EINVAL; + if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) + return -EINVAL; + dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { @@ -262,7 +265,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC); xso->real_dev = dev; xso->num_exthdrs = 1; - xso->flags = xuo->flags; + /* Don't forward bit that is not implemented */ + xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6; err = dev->xfrmdev_ops->xdo_dev_state_add(x); if (err) { diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 57448fc519fc..daec1fb08bf5 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -190,7 +190,7 @@ static void xfrmi_dev_uninit(struct net_device *dev) static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) { - skb->tstamp = 0; + skb_clear_tstamp(skb); skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; skb->ignore_df = 0; @@ -673,12 +673,12 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], struct net *net = xi->net; struct xfrm_if_parms p = {}; + xfrmi_netlink_parms(data, &p); if (!p.if_id) { NL_SET_ERR_MSG(extack, "if_id must be non zero"); return -EINVAL; } - xfrmi_netlink_parms(data, &p); xi = xfrmi_locate(net, &p); if (!xi) { xi = netdev_priv(dev); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 04d1ce9b510f..882526159d3a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4256,7 +4256,7 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, } static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, - u8 dir, u8 type, struct net *net) + u8 dir, u8 type, struct net *net, u32 if_id) { struct xfrm_policy *pol, *ret = NULL; struct hlist_head *chain; @@ -4265,7 +4265,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { - if (xfrm_migrate_selector_match(sel, &pol->selector) && + if ((if_id == 0 || pol->if_id == if_id) && + xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; priority = ret->priority; @@ -4277,7 +4278,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * if ((pol->priority >= priority) && ret) break; - if (xfrm_migrate_selector_match(sel, &pol->selector) && + if ((if_id == 0 || pol->if_id == if_id) && + xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; break; @@ -4393,7 +4395,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap) + struct xfrm_encap_tmpl *encap, u32 if_id) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -4412,14 +4414,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 1 - find policy */ - if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { + if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) { err = -ENOENT; goto out; } /* Stage 2 - find and update state(s) */ for (i = 0, mp = m; i < num_migrate; i++, mp++) { - if ((x = xfrm_migrate_state_find(mp, net))) { + if ((x = xfrm_migrate_state_find(mp, net, if_id))) { x_cur[nx_cur] = x; nx_cur++; xc = xfrm_state_migrate(x, mp, encap); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ca6bee18346d..b749935152ba 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1579,9 +1579,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, memcpy(&x->mark, &orig->mark, sizeof(x->mark)); memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark)); - if (xfrm_init_state(x) < 0) - goto error; - x->props.flags = orig->props.flags; x->props.extra_flags = orig->props.extra_flags; @@ -1606,7 +1603,8 @@ out: return NULL; } -struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net) +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, + u32 if_id) { unsigned int h; struct xfrm_state *x = NULL; @@ -1622,6 +1620,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n continue; if (m->reqid && x->props.reqid != m->reqid) continue; + if (if_id != 0 && x->if_id != if_id) + continue; if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, m->old_family) || !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, @@ -1637,6 +1637,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n if (x->props.mode != m->mode || x->id.proto != m->proto) continue; + if (if_id != 0 && x->if_id != if_id) + continue; if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, m->old_family) || !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, @@ -1663,6 +1665,11 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, if (!xc) return NULL; + xc->props.family = m->new_family; + + if (xfrm_init_state(xc) < 0) + goto error; + memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr)); memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr)); @@ -2572,7 +2579,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); -u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) +u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) { const struct xfrm_type *type = READ_ONCE(x->type); struct crypto_aead *aead; @@ -2603,17 +2610,7 @@ u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - net_adj) & ~(blksize - 1)) + net_adj - 2; } -EXPORT_SYMBOL_GPL(__xfrm_state_mtu); - -u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) -{ - mtu = __xfrm_state_mtu(x, mtu); - - if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU) - return IPV6_MIN_MTU; - - return mtu; -} +EXPORT_SYMBOL_GPL(xfrm_state_mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8cd6c8129004..a4fb596e87af 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2608,6 +2608,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, int n = 0; struct net *net = sock_net(skb->sk); struct xfrm_encap_tmpl *encap = NULL; + u32 if_id = 0; if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; @@ -2632,7 +2633,10 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENOMEM; } - err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap); + if (attrs[XFRMA_IF_ID]) + if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + + err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id); kfree(encap); diff --git a/security/selinux/ima.c b/security/selinux/ima.c index 727c4e43219d..ff7aea6b3774 100644 --- a/security/selinux/ima.c +++ b/security/selinux/ima.c @@ -77,7 +77,7 @@ void selinux_ima_measure_state_locked(struct selinux_state *state) size_t policy_len; int rc = 0; - WARN_ON(!mutex_is_locked(&state->policy_mutex)); + lockdep_assert_held(&state->policy_mutex); state_str = selinux_ima_collect_state(state); if (!state_str) { @@ -117,7 +117,7 @@ void selinux_ima_measure_state_locked(struct selinux_state *state) */ void selinux_ima_measure_state(struct selinux_state *state) { - WARN_ON(mutex_is_locked(&state->policy_mutex)); + lockdep_assert_not_held(&state->policy_mutex); mutex_lock(&state->policy_mutex); selinux_ima_measure_state_locked(state); diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 94ea2a8b2bb7..d8ceee9e0d6f 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -76,6 +76,7 @@ static const struct nlmsg_perm nlmsg_route_perms[] = { RTM_GETNSID, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ }, + { RTM_SETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWCACHEREPORT, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, @@ -91,6 +92,9 @@ static const struct nlmsg_perm nlmsg_route_perms[] = { RTM_NEWNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_READ }, + { RTM_NEWTUNNEL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, + { RTM_DELTUNNEL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, + { RTM_GETTUNNEL, NETLINK_ROUTE_SOCKET__NLMSG_READ }, }; static const struct nlmsg_perm nlmsg_tcpdiag_perms[] = @@ -176,7 +180,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm) * structures at the top of this file with the new mappings * before updating the BUILD_BUG_ON() macro! */ - BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOPBUCKET + 3)); + BUILD_BUG_ON(RTM_MAX != (RTM_NEWTUNNEL + 3)); err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms, sizeof(nlmsg_route_perms)); break; diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index d1fcd1d5adae..6fd763d4d15b 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -511,7 +511,8 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) DEFAULT_GFP, 0); if (!sgt) return NULL; - dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->dev.dir); + dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, + sg_dma_address(sgt->sgl)); p = dma_vmap_noncontiguous(dmab->dev.dev, size, sgt); if (p) dmab->private_data = sgt; @@ -540,9 +541,9 @@ static void snd_dma_noncontig_sync(struct snd_dma_buffer *dmab, if (mode == SNDRV_DMA_SYNC_CPU) { if (dmab->dev.dir == DMA_TO_DEVICE) return; + invalidate_kernel_vmap_range(dmab->area, dmab->bytes); dma_sync_sgtable_for_cpu(dmab->dev.dev, dmab->private_data, dmab->dev.dir); - invalidate_kernel_vmap_range(dmab->area, dmab->bytes); } else { if (dmab->dev.dir == DMA_FROM_DEVICE) return; @@ -671,9 +672,13 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = { */ static void *snd_dma_noncoherent_alloc(struct snd_dma_buffer *dmab, size_t size) { - dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->dev.dir); - return dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr, - dmab->dev.dir, DEFAULT_GFP); + void *p; + + p = dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr, + dmab->dev.dir, DEFAULT_GFP); + if (p) + dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->addr); + return p; } static void snd_dma_noncoherent_free(struct snd_dma_buffer *dmab) diff --git a/sound/pci/hda/cs35l41_hda_spi.c b/sound/pci/hda/cs35l41_hda_spi.c index 9f8123893cc8..50eb6c0e6658 100644 --- a/sound/pci/hda/cs35l41_hda_spi.c +++ b/sound/pci/hda/cs35l41_hda_spi.c @@ -28,11 +28,9 @@ static int cs35l41_hda_spi_probe(struct spi_device *spi) devm_regmap_init_spi(spi, &cs35l41_regmap_spi)); } -static int cs35l41_hda_spi_remove(struct spi_device *spi) +static void cs35l41_hda_spi_remove(struct spi_device *spi) { cs35l41_hda_remove(&spi->dev); - - return 0; } static const struct spi_device_id cs35l41_hda_spi_id[] = { diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4b0338c4c543..572ff0d1fafe 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1615,6 +1615,7 @@ static const struct snd_pci_quirk probe_mask_list[] = { /* forced codec slots */ SND_PCI_QUIRK(0x1043, 0x1262, "ASUS W5Fm", 0x103), SND_PCI_QUIRK(0x1046, 0x1262, "ASUS W5F", 0x103), + SND_PCI_QUIRK(0x1558, 0x0351, "Schenker Dock 15", 0x105), /* WinFast VP200 H (Teradici) user reported broken communication */ SND_PCI_QUIRK(0x3a21, 0x040d, "WinFast VP200 H", 0x101), {} @@ -1798,8 +1799,6 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, assign_position_fix(chip, check_position_fix(chip, position_fix[dev])); - check_probe_mask(chip, dev); - if (single_cmd < 0) /* allow fallback to single_cmd at errors */ chip->fallback_to_single_cmd = 1; else /* explicitly set to single_cmd or not */ @@ -1825,6 +1824,8 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, chip->bus.core.needs_damn_long_delay = 1; } + check_probe_mask(chip, dev); + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); if (err < 0) { dev_err(card->dev, "Error creating device [card]!\n"); @@ -1940,6 +1941,7 @@ static int azx_first_init(struct azx *chip) dma_bits = 32; if (dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(dma_bits))) dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32)); + dma_set_max_seg_size(&pci->dev, UINT_MAX); /* read number of streams from GCAP register instead of using * hardcoded value diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8315bf7d4c38..3a42457984e9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -138,6 +138,22 @@ struct alc_spec { * COEF access helper functions */ +static void coef_mutex_lock(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + snd_hda_power_up_pm(codec); + mutex_lock(&spec->coef_mutex); +} + +static void coef_mutex_unlock(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + mutex_unlock(&spec->coef_mutex); + snd_hda_power_down_pm(codec); +} + static int __alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, unsigned int coef_idx) { @@ -151,12 +167,11 @@ static int __alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, unsigned int coef_idx) { - struct alc_spec *spec = codec->spec; unsigned int val; - mutex_lock(&spec->coef_mutex); + coef_mutex_lock(codec); val = __alc_read_coefex_idx(codec, nid, coef_idx); - mutex_unlock(&spec->coef_mutex); + coef_mutex_unlock(codec); return val; } @@ -173,11 +188,9 @@ static void __alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid, static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid, unsigned int coef_idx, unsigned int coef_val) { - struct alc_spec *spec = codec->spec; - - mutex_lock(&spec->coef_mutex); + coef_mutex_lock(codec); __alc_write_coefex_idx(codec, nid, coef_idx, coef_val); - mutex_unlock(&spec->coef_mutex); + coef_mutex_unlock(codec); } #define alc_write_coef_idx(codec, coef_idx, coef_val) \ @@ -198,11 +211,9 @@ static void alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid, unsigned int coef_idx, unsigned int mask, unsigned int bits_set) { - struct alc_spec *spec = codec->spec; - - mutex_lock(&spec->coef_mutex); + coef_mutex_lock(codec); __alc_update_coefex_idx(codec, nid, coef_idx, mask, bits_set); - mutex_unlock(&spec->coef_mutex); + coef_mutex_unlock(codec); } #define alc_update_coef_idx(codec, coef_idx, mask, bits_set) \ @@ -235,9 +246,7 @@ struct coef_fw { static void alc_process_coef_fw(struct hda_codec *codec, const struct coef_fw *fw) { - struct alc_spec *spec = codec->spec; - - mutex_lock(&spec->coef_mutex); + coef_mutex_lock(codec); for (; fw->nid; fw++) { if (fw->mask == (unsigned short)-1) __alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val); @@ -245,7 +254,7 @@ static void alc_process_coef_fw(struct hda_codec *codec, __alc_update_coefex_idx(codec, fw->nid, fw->idx, fw->mask, fw->val); } - mutex_unlock(&spec->coef_mutex); + coef_mutex_unlock(codec); } /* @@ -9170,6 +9179,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x383d, "Legion Y9000X 2019", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP), SND_PCI_QUIRK(0x17aa, 0x3847, "Legion 7 16ACHG6", ALC287_FIXUP_LEGION_16ACHG6), SND_PCI_QUIRK(0x17aa, 0x384a, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), diff --git a/sound/soc/amd/acp/acp-mach.h b/sound/soc/amd/acp/acp-mach.h index fd6299844ebe..c855f50d6b34 100644 --- a/sound/soc/amd/acp/acp-mach.h +++ b/sound/soc/amd/acp/acp-mach.h @@ -21,7 +21,6 @@ #include <linux/gpio/consumer.h> #define EN_SPKR_GPIO_GB 0x11F -#define EN_SPKR_GPIO_NK 0x146 #define EN_SPKR_GPIO_NONE -EINVAL enum be_id { diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c index 07de46142655..4cc431e54fe1 100644 --- a/sound/soc/amd/acp/acp-sof-mach.c +++ b/sound/soc/amd/acp/acp-sof-mach.c @@ -37,7 +37,7 @@ static struct acp_card_drvdata sof_rt5682_max_data = { .hs_codec_id = RT5682, .amp_codec_id = MAX98360A, .dmic_codec_id = DMIC, - .gpio_spkr_en = EN_SPKR_GPIO_NK, + .gpio_spkr_en = EN_SPKR_GPIO_NONE, }; static struct acp_card_drvdata sof_rt5682s_max_data = { @@ -47,7 +47,7 @@ static struct acp_card_drvdata sof_rt5682s_max_data = { .hs_codec_id = RT5682S, .amp_codec_id = MAX98360A, .dmic_codec_id = DMIC, - .gpio_spkr_en = EN_SPKR_GPIO_NK, + .gpio_spkr_en = EN_SPKR_GPIO_NONE, }; static const struct snd_kcontrol_new acp_controls[] = { diff --git a/sound/soc/codecs/adau1761-spi.c b/sound/soc/codecs/adau1761-spi.c index 655689c9778a..7c9242c2ff94 100644 --- a/sound/soc/codecs/adau1761-spi.c +++ b/sound/soc/codecs/adau1761-spi.c @@ -45,10 +45,9 @@ static int adau1761_spi_probe(struct spi_device *spi) id->driver_data, adau1761_spi_switch_mode); } -static int adau1761_spi_remove(struct spi_device *spi) +static void adau1761_spi_remove(struct spi_device *spi) { adau17x1_remove(&spi->dev); - return 0; } static const struct spi_device_id adau1761_spi_id[] = { diff --git a/sound/soc/codecs/adau1781-spi.c b/sound/soc/codecs/adau1781-spi.c index bb5613574786..1a09633d5a88 100644 --- a/sound/soc/codecs/adau1781-spi.c +++ b/sound/soc/codecs/adau1781-spi.c @@ -45,10 +45,9 @@ static int adau1781_spi_probe(struct spi_device *spi) id->driver_data, adau1781_spi_switch_mode); } -static int adau1781_spi_remove(struct spi_device *spi) +static void adau1781_spi_remove(struct spi_device *spi) { adau17x1_remove(&spi->dev); - return 0; } static const struct spi_device_id adau1781_spi_id[] = { diff --git a/sound/soc/codecs/cs35l41-spi.c b/sound/soc/codecs/cs35l41-spi.c index 6dfd5459aa20..169221a5b09f 100644 --- a/sound/soc/codecs/cs35l41-spi.c +++ b/sound/soc/codecs/cs35l41-spi.c @@ -55,13 +55,11 @@ static int cs35l41_spi_probe(struct spi_device *spi) return cs35l41_probe(cs35l41, pdata); } -static int cs35l41_spi_remove(struct spi_device *spi) +static void cs35l41_spi_remove(struct spi_device *spi) { struct cs35l41_private *cs35l41 = spi_get_drvdata(spi); cs35l41_remove(cs35l41); - - return 0; } #ifdef CONFIG_OF diff --git a/sound/soc/codecs/pcm3168a-spi.c b/sound/soc/codecs/pcm3168a-spi.c index ecd379f308e6..b5b08046f545 100644 --- a/sound/soc/codecs/pcm3168a-spi.c +++ b/sound/soc/codecs/pcm3168a-spi.c @@ -26,11 +26,9 @@ static int pcm3168a_spi_probe(struct spi_device *spi) return pcm3168a_probe(&spi->dev, regmap); } -static int pcm3168a_spi_remove(struct spi_device *spi) +static void pcm3168a_spi_remove(struct spi_device *spi) { pcm3168a_remove(&spi->dev); - - return 0; } static const struct spi_device_id pcm3168a_spi_id[] = { diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c index 7cf559b47e1c..4d29e7196380 100644 --- a/sound/soc/codecs/pcm512x-spi.c +++ b/sound/soc/codecs/pcm512x-spi.c @@ -26,10 +26,9 @@ static int pcm512x_spi_probe(struct spi_device *spi) return pcm512x_probe(&spi->dev, regmap); } -static int pcm512x_spi_remove(struct spi_device *spi) +static void pcm512x_spi_remove(struct spi_device *spi) { pcm512x_remove(&spi->dev); - return 0; } static const struct spi_device_id pcm512x_spi_id[] = { diff --git a/sound/soc/codecs/rt5668.c b/sound/soc/codecs/rt5668.c index fb09715bf932..5b12cbf2ba21 100644 --- a/sound/soc/codecs/rt5668.c +++ b/sound/soc/codecs/rt5668.c @@ -1022,11 +1022,13 @@ static void rt5668_jack_detect_handler(struct work_struct *work) container_of(work, struct rt5668_priv, jack_detect_work.work); int val, btn_type; - while (!rt5668->component) - usleep_range(10000, 15000); - - while (!rt5668->component->card->instantiated) - usleep_range(10000, 15000); + if (!rt5668->component || !rt5668->component->card || + !rt5668->component->card->instantiated) { + /* card not yet ready, try later */ + mod_delayed_work(system_power_efficient_wq, + &rt5668->jack_detect_work, msecs_to_jiffies(15)); + return; + } mutex_lock(&rt5668->calibrate_mutex); diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 0a0ec4a021e1..be68d573a490 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -1092,11 +1092,13 @@ void rt5682_jack_detect_handler(struct work_struct *work) struct snd_soc_dapm_context *dapm; int val, btn_type; - while (!rt5682->component) - usleep_range(10000, 15000); - - while (!rt5682->component->card->instantiated) - usleep_range(10000, 15000); + if (!rt5682->component || !rt5682->component->card || + !rt5682->component->card->instantiated) { + /* card not yet ready, try later */ + mod_delayed_work(system_power_efficient_wq, + &rt5682->jack_detect_work, msecs_to_jiffies(15)); + return; + } dapm = snd_soc_component_get_dapm(rt5682->component); diff --git a/sound/soc/codecs/rt5682s.c b/sound/soc/codecs/rt5682s.c index efa1016831dd..1e662d1be2b3 100644 --- a/sound/soc/codecs/rt5682s.c +++ b/sound/soc/codecs/rt5682s.c @@ -824,11 +824,13 @@ static void rt5682s_jack_detect_handler(struct work_struct *work) container_of(work, struct rt5682s_priv, jack_detect_work.work); int val, btn_type; - while (!rt5682s->component) - usleep_range(10000, 15000); - - while (!rt5682s->component->card->instantiated) - usleep_range(10000, 15000); + if (!rt5682s->component || !rt5682s->component->card || + !rt5682s->component->card->instantiated) { + /* card not yet ready, try later */ + mod_delayed_work(system_power_efficient_wq, + &rt5682s->jack_detect_work, msecs_to_jiffies(15)); + return; + } mutex_lock(&rt5682s->jdet_mutex); mutex_lock(&rt5682s->calibrate_mutex); diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 6549e7fef3e3..c5ea3b115966 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -38,10 +38,12 @@ static void tas2770_reset(struct tas2770_priv *tas2770) gpiod_set_value_cansleep(tas2770->reset_gpio, 0); msleep(20); gpiod_set_value_cansleep(tas2770->reset_gpio, 1); + usleep_range(1000, 2000); } snd_soc_component_write(tas2770->component, TAS2770_SW_RST, TAS2770_RST); + usleep_range(1000, 2000); } static int tas2770_set_bias_level(struct snd_soc_component *component, @@ -110,6 +112,7 @@ static int tas2770_codec_resume(struct snd_soc_component *component) if (tas2770->sdz_gpio) { gpiod_set_value_cansleep(tas2770->sdz_gpio, 1); + usleep_range(1000, 2000); } else { ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, TAS2770_PWR_CTRL_MASK, @@ -510,8 +513,10 @@ static int tas2770_codec_probe(struct snd_soc_component *component) tas2770->component = component; - if (tas2770->sdz_gpio) + if (tas2770->sdz_gpio) { gpiod_set_value_cansleep(tas2770->sdz_gpio, 1); + usleep_range(1000, 2000); + } tas2770_reset(tas2770); diff --git a/sound/soc/codecs/tlv320aic32x4-spi.c b/sound/soc/codecs/tlv320aic32x4-spi.c index a8958cd1c692..03cce8d6404f 100644 --- a/sound/soc/codecs/tlv320aic32x4-spi.c +++ b/sound/soc/codecs/tlv320aic32x4-spi.c @@ -46,11 +46,9 @@ static int aic32x4_spi_probe(struct spi_device *spi) return aic32x4_probe(&spi->dev, regmap); } -static int aic32x4_spi_remove(struct spi_device *spi) +static void aic32x4_spi_remove(struct spi_device *spi) { aic32x4_remove(&spi->dev); - - return 0; } static const struct spi_device_id aic32x4_spi_id[] = { diff --git a/sound/soc/codecs/tlv320aic3x-spi.c b/sound/soc/codecs/tlv320aic3x-spi.c index 494e84402232..deed6ec7e081 100644 --- a/sound/soc/codecs/tlv320aic3x-spi.c +++ b/sound/soc/codecs/tlv320aic3x-spi.c @@ -35,11 +35,9 @@ static int aic3x_spi_probe(struct spi_device *spi) return aic3x_probe(&spi->dev, regmap, id->driver_data); } -static int aic3x_spi_remove(struct spi_device *spi) +static void aic3x_spi_remove(struct spi_device *spi) { aic3x_remove(&spi->dev); - - return 0; } static const struct spi_device_id aic3x_spi_id[] = { diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c index 28b4656c4e14..1bef1c500c8e 100644 --- a/sound/soc/codecs/wm0010.c +++ b/sound/soc/codecs/wm0010.c @@ -969,7 +969,7 @@ static int wm0010_spi_probe(struct spi_device *spi) return 0; } -static int wm0010_spi_remove(struct spi_device *spi) +static void wm0010_spi_remove(struct spi_device *spi) { struct wm0010_priv *wm0010 = spi_get_drvdata(spi); @@ -980,8 +980,6 @@ static int wm0010_spi_remove(struct spi_device *spi) if (wm0010->irq) free_irq(wm0010->irq, wm0010); - - return 0; } static struct spi_driver wm0010_spi_driver = { diff --git a/sound/soc/codecs/wm8804-spi.c b/sound/soc/codecs/wm8804-spi.c index 9a8da1511c34..628568724c20 100644 --- a/sound/soc/codecs/wm8804-spi.c +++ b/sound/soc/codecs/wm8804-spi.c @@ -24,10 +24,9 @@ static int wm8804_spi_probe(struct spi_device *spi) return wm8804_probe(&spi->dev, regmap); } -static int wm8804_spi_remove(struct spi_device *spi) +static void wm8804_spi_remove(struct spi_device *spi) { wm8804_remove(&spi->dev); - return 0; } static const struct of_device_id wm8804_of_match[] = { diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index f3672e3d1703..0582585236a2 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1441,7 +1441,8 @@ static int wm_adsp_buffer_parse_coeff(struct cs_dsp_coeff_ctl *cs_ctl) int ret, i; for (i = 0; i < 5; ++i) { - ret = cs_dsp_coeff_read_ctrl(cs_ctl, 0, &coeff_v1, sizeof(coeff_v1)); + ret = cs_dsp_coeff_read_ctrl(cs_ctl, 0, &coeff_v1, + min(cs_ctl->len, sizeof(coeff_v1))); if (ret < 0) return ret; diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index 148ddf4cace0..aeca58246fc7 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -952,6 +952,7 @@ static int skl_first_init(struct hdac_bus *bus) /* allow 64bit DMA address if supported by H/W */ if (dma_set_mask_and_coherent(bus->dev, DMA_BIT_MASK(64))) dma_set_mask_and_coherent(bus->dev, DMA_BIT_MASK(32)); + dma_set_max_seg_size(bus->dev, UINT_MAX); /* initialize streams */ snd_hdac_ext_stream_init_all diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index a59e9d20cb46..4b1773c1fb95 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -524,7 +524,7 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, return -EINVAL; } - ret = regmap_update_bits(map, reg_irqclr, val_irqclr, val_irqclr); + ret = regmap_write_bits(map, reg_irqclr, val_irqclr, val_irqclr); if (ret) { dev_err(soc_runtime->dev, "error writing to irqclear reg: %d\n", ret); return ret; @@ -665,7 +665,7 @@ static irqreturn_t lpass_dma_interrupt_handler( return -EINVAL; } if (interrupts & LPAIF_IRQ_PER(chan)) { - rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_PER(chan) | val)); + rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_PER(chan) | val)); if (rv) { dev_err(soc_runtime->dev, "error writing to irqclear reg: %d\n", rv); @@ -676,7 +676,7 @@ static irqreturn_t lpass_dma_interrupt_handler( } if (interrupts & LPAIF_IRQ_XRUN(chan)) { - rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_XRUN(chan) | val)); + rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_XRUN(chan) | val)); if (rv) { dev_err(soc_runtime->dev, "error writing to irqclear reg: %d\n", rv); @@ -688,7 +688,7 @@ static irqreturn_t lpass_dma_interrupt_handler( } if (interrupts & LPAIF_IRQ_ERR(chan)) { - rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_ERR(chan) | val)); + rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_ERR(chan) | val)); if (rv) { dev_err(soc_runtime->dev, "error writing to irqclear reg: %d\n", rv); diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 9833611b83d1..03ea9591fb16 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -308,7 +308,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, unsigned int sign_bit = mc->sign_bit; unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; - int err; + int err, ret; bool type_2r = false; unsigned int val2 = 0; unsigned int val, val_mask; @@ -350,12 +350,18 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, err = snd_soc_component_update_bits(component, reg, val_mask, val); if (err < 0) return err; + ret = err; - if (type_2r) + if (type_2r) { err = snd_soc_component_update_bits(component, reg2, val_mask, - val2); + val2); + /* Don't discard any error code or drop change flag */ + if (ret == 0 || err < 0) { + ret = err; + } + } - return err; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_put_volsw); @@ -421,6 +427,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, int min = mc->min; unsigned int mask = (1U << (fls(min + max) - 1)) - 1; int err = 0; + int ret; unsigned int val, val_mask; if (ucontrol->value.integer.value[0] < 0) @@ -437,6 +444,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, err = snd_soc_component_update_bits(component, reg, val_mask, val); if (err < 0) return err; + ret = err; if (snd_soc_volsw_is_stereo(mc)) { unsigned int val2; @@ -447,6 +455,11 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, err = snd_soc_component_update_bits(component, reg2, val_mask, val2); + + /* Don't discard any error code or drop change flag */ + if (ret == 0 || err < 0) { + ret = err; + } } return err; } @@ -506,7 +519,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; unsigned int val, val_mask; - int ret; + int err, ret; if (invert) val = (max - ucontrol->value.integer.value[0]) & mask; @@ -515,9 +528,10 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val_mask = mask << shift; val = val << shift; - ret = snd_soc_component_update_bits(component, reg, val_mask, val); - if (ret < 0) - return ret; + err = snd_soc_component_update_bits(component, reg, val_mask, val); + if (err < 0) + return err; + ret = err; if (snd_soc_volsw_is_stereo(mc)) { if (invert) @@ -527,8 +541,12 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val_mask = mask << shift; val = val << shift; - ret = snd_soc_component_update_bits(component, rreg, val_mask, + err = snd_soc_component_update_bits(component, rreg, val_mask, val); + /* Don't discard any error code or drop change flag */ + if (ret == 0 || err < 0) { + ret = err; + } } return ret; @@ -877,6 +895,7 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, unsigned long mask = (1UL<<mc->nbits)-1; long max = mc->max; long val = ucontrol->value.integer.value[0]; + int ret = 0; unsigned int i; if (val < mc->min || val > mc->max) @@ -891,9 +910,11 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, regmask, regval); if (err < 0) return err; + if (err > 0) + ret = err; } - return 0; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_put_xr_sx); diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index c8fb082209ce..1385695d7745 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -956,6 +956,7 @@ int hda_dsp_probe(struct snd_sof_dev *sdev) dev_dbg(sdev->dev, "DMA mask is 32 bit\n"); dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32)); } + dma_set_max_seg_size(&pci->dev, UINT_MAX); /* init streams */ ret = hda_dsp_stream_init(sdev); diff --git a/sound/spi/at73c213.c b/sound/spi/at73c213.c index 76c0e37a838c..56d2c712e257 100644 --- a/sound/spi/at73c213.c +++ b/sound/spi/at73c213.c @@ -1001,7 +1001,7 @@ out: return retval; } -static int snd_at73c213_remove(struct spi_device *spi) +static void snd_at73c213_remove(struct spi_device *spi) { struct snd_card *card = dev_get_drvdata(&spi->dev); struct snd_at73c213 *chip = card->private_data; @@ -1066,8 +1066,6 @@ out: ssc_free(chip->ssc); snd_card_free(card); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 70319c822c10..2d444ec74202 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -47,13 +47,13 @@ struct snd_usb_implicit_fb_match { static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { /* Generic matching */ IMPLICIT_FB_GENERIC_DEV(0x0499, 0x1509), /* Steinberg UR22 */ - IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2080), /* M-Audio FastTrack Ultra */ - IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2081), /* M-Audio FastTrack Ultra */ IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2030), /* M-Audio Fast Track C400 */ IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2031), /* M-Audio Fast Track C600 */ /* Fixed EP */ /* FIXME: check the availability of generic matching */ + IMPLICIT_FB_FIXED_DEV(0x0763, 0x2080, 0x81, 2), /* M-Audio FastTrack Ultra */ + IMPLICIT_FB_FIXED_DEV(0x0763, 0x2081, 0x81, 2), /* M-Audio FastTrack Ultra */ IMPLICIT_FB_FIXED_DEV(0x2466, 0x8010, 0x81, 2), /* Fractal Audio Axe-Fx III */ IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0001, 0x81, 2), /* Solid State Logic SSL2 */ IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0002, 0x81, 2), /* Solid State Logic SSL2+ */ diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 630766ba259f..a5641956ef10 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -3678,17 +3678,14 @@ static int restore_mixer_value(struct usb_mixer_elem_list *list) err = snd_usb_set_cur_mix_value(cval, c + 1, idx, cval->cache_val[idx]); if (err < 0) - return err; + break; } idx++; } } else { /* master */ - if (cval->cached) { - err = snd_usb_set_cur_mix_value(cval, 0, 0, *cval->cache_val); - if (err < 0) - return err; - } + if (cval->cached) + snd_usb_set_cur_mix_value(cval, 0, 0, *cval->cache_val); } return 0; diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 3faf0f97edb1..a4a39c3e0f19 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -476,6 +476,7 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py index 1600b17dbb8a..1d3a90d93fe2 100644 --- a/tools/cgroup/memcg_slabinfo.py +++ b/tools/cgroup/memcg_slabinfo.py @@ -11,7 +11,7 @@ from drgn.helpers.linux import list_for_each_entry, list_empty from drgn.helpers.linux import for_each_page from drgn.helpers.linux.cpumask import for_each_online_cpu from drgn.helpers.linux.percpu import per_cpu_ptr -from drgn import container_of, FaultError, Object +from drgn import container_of, FaultError, Object, cast DESC = """ @@ -69,15 +69,15 @@ def oo_objects(s): def count_partial(n, fn): - nr_pages = 0 - for page in list_for_each_entry('struct page', n.partial.address_of_(), - 'lru'): - nr_pages += fn(page) - return nr_pages + nr_objs = 0 + for slab in list_for_each_entry('struct slab', n.partial.address_of_(), + 'slab_list'): + nr_objs += fn(slab) + return nr_objs -def count_free(page): - return page.objects - page.inuse +def count_free(slab): + return slab.objects - slab.inuse def slub_get_slabinfo(s, cfg): @@ -145,14 +145,14 @@ def detect_kernel_config(): return cfg -def for_each_slab_page(prog): +def for_each_slab(prog): PGSlab = 1 << prog.constant('PG_slab') PGHead = 1 << prog.constant('PG_head') for page in for_each_page(prog): try: if page.flags.value_() & PGSlab: - yield page + yield cast('struct slab *', page) except FaultError: pass @@ -190,13 +190,13 @@ def main(): 'list'): obj_cgroups.add(ptr.value_()) - # look over all slab pages, belonging to non-root memcgs - # and look for objects belonging to the given memory cgroup - for page in for_each_slab_page(prog): - objcg_vec_raw = page.memcg_data.value_() + # look over all slab folios and look for objects belonging + # to the given memory cgroup + for slab in for_each_slab(prog): + objcg_vec_raw = slab.memcg_data.value_() if objcg_vec_raw == 0: continue - cache = page.slab_cache + cache = slab.slab_cache if not cache: continue addr = cache.value_() diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index afe3d0d7f5f2..4eebea830613 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5086,6 +5086,37 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. On error * *dst* buffer is zeroed out. + * + * long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type) + * Description + * Set a *dtime* (delivery time) to the __sk_buff->tstamp and also + * change the __sk_buff->delivery_time_type to *dtime_type*. + * + * When setting a delivery time (non zero *dtime*) to + * __sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type* + * is supported. It is the only delivery_time_type that will be + * kept after bpf_redirect_*(). + * + * If there is no need to change the __sk_buff->delivery_time_type, + * the delivery time can be directly written to __sk_buff->tstamp + * instead. + * + * *dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE + * can be used to clear any delivery time stored in + * __sk_buff->tstamp. + * + * Only IPv4 and IPv6 skb->protocol are supported. + * + * This function is most useful when it needs to set a + * mono delivery time to __sk_buff->tstamp and then + * bpf_redirect_*() to the egress of an iface. For example, + * changing the (rcv) timestamp in __sk_buff->tstamp at + * ingress to a mono delivery time and then bpf_redirect_*() + * to sch_fq@phy-dev. + * Return + * 0 on success. + * **-EINVAL** for invalid input + * **-EOPNOTSUPP** for unsupported delivery_time_type and protocol */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5280,6 +5311,7 @@ union bpf_attr { FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ FN(copy_from_user_task), \ + FN(skb_set_delivery_time), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5469,6 +5501,12 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +enum { + BPF_SKB_DELIVERY_TIME_NONE, + BPF_SKB_DELIVERY_TIME_UNSPEC, + BPF_SKB_DELIVERY_TIME_MONO, +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -5509,7 +5547,8 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; - __u32 :32; /* Padding, future use. */ + __u8 delivery_time_type; + __u32 :24; /* Padding, future use. */ __u64 hwtstamp; }; diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 6218f93f5c1a..e1ba2d51b717 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -860,6 +860,7 @@ enum { IFLA_BOND_PEER_NOTIF_DELAY, IFLA_BOND_AD_LACP_ACTIVE, IFLA_BOND_MISSED_MAX, + IFLA_BOND_NS_IP6_TARGET, __IFLA_BOND_MAX, }; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 5191b57e1562..507ee1f2aa96 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1134,6 +1134,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_PPC_AIL_MODE_3 210 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index abae8184e171..fa478ddcd18a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -463,7 +463,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; if (PRINT_FIELD(WEIGHT) && - evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT)) + evsel__do_check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT, allow_user_set)) return -EINVAL; if (PRINT_FIELD(SYM) && diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c index 1f147fe6595f..e32ece90e164 100644 --- a/tools/perf/tests/sigtrap.c +++ b/tools/perf/tests/sigtrap.c @@ -22,19 +22,6 @@ #include "tests.h" #include "../perf-sys.h" -/* - * PowerPC and S390 do not support creation of instruction breakpoints using the - * perf_event interface. - * - * Just disable the test for these architectures until these issues are - * resolved. - */ -#if defined(__powerpc__) || defined(__s390x__) -#define BP_ACCOUNT_IS_SUPPORTED 0 -#else -#define BP_ACCOUNT_IS_SUPPORTED 1 -#endif - #define NUM_THREADS 5 static struct { @@ -135,7 +122,7 @@ static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __m char sbuf[STRERR_BUFSIZE]; int i, fd, ret = TEST_FAIL; - if (!BP_ACCOUNT_IS_SUPPORTED) { + if (!BP_SIGNAL_IS_SUPPORTED) { pr_debug("Test not supported on this architecture"); return TEST_SKIP; } diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index f5d260b1df4d..15a4547d608e 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -44,10 +44,6 @@ int perf_data__create_dir(struct perf_data *data, int nr) if (!files) return -ENOMEM; - data->dir.version = PERF_DIR_VERSION; - data->dir.files = files; - data->dir.nr = nr; - for (i = 0; i < nr; i++) { struct perf_data_file *file = &files[i]; @@ -62,6 +58,9 @@ int perf_data__create_dir(struct perf_data *data, int nr) file->fd = ret; } + data->dir.version = PERF_DIR_VERSION; + data->dir.files = files; + data->dir.nr = nr; return 0; out_err: diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c index 7f234215147d..57f02beef023 100644 --- a/tools/perf/util/evlist-hybrid.c +++ b/tools/perf/util/evlist-hybrid.c @@ -154,8 +154,8 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list) perf_cpu_map__put(matched_cpus); perf_cpu_map__put(unmatched_cpus); } - - ret = (unmatched_count == events_nr) ? -1 : 0; + if (events_nr) + ret = (unmatched_count == events_nr) ? -1 : 0; out: perf_cpu_map__put(cpus); return ret; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 647b0a833628..2b255e28ed26 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -17,6 +17,8 @@ #include <linux/if_tun.h> #include <linux/limits.h> #include <linux/sysctl.h> +#include <linux/time_types.h> +#include <linux/net_tstamp.h> #include <sched.h> #include <stdbool.h> #include <stdio.h> @@ -29,6 +31,11 @@ #include "test_tc_neigh_fib.skel.h" #include "test_tc_neigh.skel.h" #include "test_tc_peer.skel.h" +#include "test_tc_dtime.skel.h" + +#ifndef TCP_TX_DELAY +#define TCP_TX_DELAY 37 +#endif #define NS_SRC "ns_src" #define NS_FWD "ns_fwd" @@ -61,6 +68,7 @@ #define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk" #define TIMEOUT_MILLIS 10000 +#define NSEC_PER_SEC 1000000000ULL #define log_err(MSG, ...) \ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ @@ -440,6 +448,431 @@ static int set_forwarding(bool enable) return 0; } +static void rcv_tstamp(int fd, const char *expected, size_t s) +{ + struct __kernel_timespec pkt_ts = {}; + char ctl[CMSG_SPACE(sizeof(pkt_ts))]; + struct timespec now_ts; + struct msghdr msg = {}; + __u64 now_ns, pkt_ns; + struct cmsghdr *cmsg; + struct iovec iov; + char data[32]; + int ret; + + iov.iov_base = data; + iov.iov_len = sizeof(data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = &ctl; + msg.msg_controllen = sizeof(ctl); + + ret = recvmsg(fd, &msg, 0); + if (!ASSERT_EQ(ret, s, "recvmsg")) + return; + ASSERT_STRNEQ(data, expected, s, "expected rcv data"); + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg && cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) + memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts)); + + pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec; + ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp"); + + ret = clock_gettime(CLOCK_REALTIME, &now_ts); + ASSERT_OK(ret, "clock_gettime"); + now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec; + + if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp")) + ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC, + "check rcv tstamp"); +} + +static void snd_tstamp(int fd, char *b, size_t s) +{ + struct sock_txtime opt = { .clockid = CLOCK_TAI }; + char ctl[CMSG_SPACE(sizeof(__u64))]; + struct timespec now_ts; + struct msghdr msg = {}; + struct cmsghdr *cmsg; + struct iovec iov; + __u64 now_ns; + int ret; + + ret = clock_gettime(CLOCK_TAI, &now_ts); + ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)"); + now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec; + + iov.iov_base = b; + iov.iov_len = s; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = &ctl; + msg.msg_controllen = sizeof(ctl); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_TXTIME; + cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns)); + *(__u64 *)CMSG_DATA(cmsg) = now_ns; + + ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt)); + ASSERT_OK(ret, "setsockopt(SO_TXTIME)"); + + ret = sendmsg(fd, &msg, 0); + ASSERT_EQ(ret, s, "sendmsg"); +} + +static void test_inet_dtime(int family, int type, const char *addr, __u16 port) +{ + int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err; + char buf[] = "testing testing"; + struct nstoken *nstoken; + + nstoken = open_netns(NS_DST); + if (!ASSERT_OK_PTR(nstoken, "setns dst")) + return; + listen_fd = start_server(family, type, addr, port, 0); + close_netns(nstoken); + + if (!ASSERT_GE(listen_fd, 0, "listen")) + return; + + /* Ensure the kernel puts the (rcv) timestamp for all skb */ + err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, + &opt, sizeof(opt)); + if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)")) + goto done; + + if (type == SOCK_STREAM) { + /* Ensure the kernel set EDT when sending out rst/ack + * from the kernel's ctl_sk. + */ + err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt, + sizeof(opt)); + if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)")) + goto done; + } + + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns src")) + goto done; + client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); + close_netns(nstoken); + + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto done; + + if (type == SOCK_STREAM) { + int n; + + accept_fd = accept(listen_fd, NULL, NULL); + if (!ASSERT_GE(accept_fd, 0, "accept")) + goto done; + + n = write(client_fd, buf, sizeof(buf)); + if (!ASSERT_EQ(n, sizeof(buf), "send to server")) + goto done; + rcv_tstamp(accept_fd, buf, sizeof(buf)); + } else { + snd_tstamp(client_fd, buf, sizeof(buf)); + rcv_tstamp(listen_fd, buf, sizeof(buf)); + } + +done: + close(listen_fd); + if (accept_fd != -1) + close(accept_fd); + if (client_fd != -1) + close(client_fd); +} + +static int netns_load_dtime_bpf(struct test_tc_dtime *skel) +{ + struct nstoken *nstoken; + +#define PIN_FNAME(__file) "/sys/fs/bpf/" #__file +#define PIN(__prog) ({ \ + int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \ + if (!ASSERT_OK(err, "pin " #__prog)) \ + goto fail; \ + }) + + /* setup ns_src tc progs */ + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) + return -1; + PIN(egress_host); + PIN(ingress_host); + SYS("tc qdisc add dev veth_src clsact"); + SYS("tc filter add dev veth_src ingress bpf da object-pinned " + PIN_FNAME(ingress_host)); + SYS("tc filter add dev veth_src egress bpf da object-pinned " + PIN_FNAME(egress_host)); + close_netns(nstoken); + + /* setup ns_dst tc progs */ + nstoken = open_netns(NS_DST); + if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST)) + return -1; + PIN(egress_host); + PIN(ingress_host); + SYS("tc qdisc add dev veth_dst clsact"); + SYS("tc filter add dev veth_dst ingress bpf da object-pinned " + PIN_FNAME(ingress_host)); + SYS("tc filter add dev veth_dst egress bpf da object-pinned " + PIN_FNAME(egress_host)); + close_netns(nstoken); + + /* setup ns_fwd tc progs */ + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) + return -1; + PIN(ingress_fwdns_prio100); + PIN(egress_fwdns_prio100); + PIN(ingress_fwdns_prio101); + PIN(egress_fwdns_prio101); + SYS("tc qdisc add dev veth_dst_fwd clsact"); + SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned " + PIN_FNAME(ingress_fwdns_prio100)); + SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned " + PIN_FNAME(ingress_fwdns_prio101)); + SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned " + PIN_FNAME(egress_fwdns_prio100)); + SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned " + PIN_FNAME(egress_fwdns_prio101)); + SYS("tc qdisc add dev veth_src_fwd clsact"); + SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned " + PIN_FNAME(ingress_fwdns_prio100)); + SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned " + PIN_FNAME(ingress_fwdns_prio101)); + SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned " + PIN_FNAME(egress_fwdns_prio100)); + SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned " + PIN_FNAME(egress_fwdns_prio101)); + close_netns(nstoken); + +#undef PIN + + return 0; + +fail: + close_netns(nstoken); + return -1; +} + +enum { + INGRESS_FWDNS_P100, + INGRESS_FWDNS_P101, + EGRESS_FWDNS_P100, + EGRESS_FWDNS_P101, + INGRESS_ENDHOST, + EGRESS_ENDHOST, + SET_DTIME, + __MAX_CNT, +}; + +const char *cnt_names[] = { + "ingress_fwdns_p100", + "ingress_fwdns_p101", + "egress_fwdns_p100", + "egress_fwdns_p101", + "ingress_endhost", + "egress_endhost", + "set_dtime", +}; + +enum { + TCP_IP6_CLEAR_DTIME, + TCP_IP4, + TCP_IP6, + UDP_IP4, + UDP_IP6, + TCP_IP4_RT_FWD, + TCP_IP6_RT_FWD, + UDP_IP4_RT_FWD, + UDP_IP6_RT_FWD, + UKN_TEST, + __NR_TESTS, +}; + +const char *test_names[] = { + "tcp ip6 clear dtime", + "tcp ip4", + "tcp ip6", + "udp ip4", + "udp ip6", + "tcp ip4 rt fwd", + "tcp ip6 rt fwd", + "udp ip4 rt fwd", + "udp ip6 rt fwd", +}; + +static const char *dtime_cnt_str(int test, int cnt) +{ + static char name[64]; + + snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]); + + return name; +} + +static const char *dtime_err_str(int test, int cnt) +{ + static char name[64]; + + snprintf(name, sizeof(name), "%s %s errs", test_names[test], + cnt_names[cnt]); + + return name; +} + +static void test_tcp_clear_dtime(struct test_tc_dtime *skel) +{ + int i, t = TCP_IP6_CLEAR_DTIME; + __u32 *dtimes = skel->bss->dtimes[t]; + __u32 *errs = skel->bss->errs[t]; + + skel->bss->test = t; + test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 0); + + ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, + dtime_cnt_str(t, INGRESS_FWDNS_P100)); + ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, + dtime_cnt_str(t, INGRESS_FWDNS_P101)); + ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0, + dtime_cnt_str(t, EGRESS_FWDNS_P100)); + ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0, + dtime_cnt_str(t, EGRESS_FWDNS_P101)); + ASSERT_GT(dtimes[EGRESS_ENDHOST], 0, + dtime_cnt_str(t, EGRESS_ENDHOST)); + ASSERT_GT(dtimes[INGRESS_ENDHOST], 0, + dtime_cnt_str(t, INGRESS_ENDHOST)); + + for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) + ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); +} + +static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) +{ + __u32 *dtimes, *errs; + const char *addr; + int i, t; + + if (family == AF_INET) { + t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD; + addr = IP4_DST; + } else { + t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD; + addr = IP6_DST; + } + + dtimes = skel->bss->dtimes[t]; + errs = skel->bss->errs[t]; + + skel->bss->test = t; + test_inet_dtime(family, SOCK_STREAM, addr, 0); + + /* fwdns_prio100 prog does not read delivery_time_type, so + * kernel puts the (rcv) timetamp in __sk_buff->tstamp + */ + ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, + dtime_cnt_str(t, INGRESS_FWDNS_P100)); + for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++) + ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); + + for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) + ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); +} + +static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) +{ + __u32 *dtimes, *errs; + const char *addr; + int i, t; + + if (family == AF_INET) { + t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD; + addr = IP4_DST; + } else { + t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD; + addr = IP6_DST; + } + + dtimes = skel->bss->dtimes[t]; + errs = skel->bss->errs[t]; + + skel->bss->test = t; + test_inet_dtime(family, SOCK_DGRAM, addr, 0); + + ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, + dtime_cnt_str(t, INGRESS_FWDNS_P100)); + /* non mono delivery time is not forwarded */ + ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, + dtime_cnt_str(t, INGRESS_FWDNS_P100)); + for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) + ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); + + for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) + ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); +} + +static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) +{ + struct test_tc_dtime *skel; + struct nstoken *nstoken; + int err; + + skel = test_tc_dtime__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open")) + return; + + skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_dtime__load(skel); + if (!ASSERT_OK(err, "test_tc_dtime__load")) + goto done; + + if (netns_load_dtime_bpf(skel)) + goto done; + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + goto done; + err = set_forwarding(false); + close_netns(nstoken); + if (!ASSERT_OK(err, "disable forwarding")) + goto done; + + test_tcp_clear_dtime(skel); + + test_tcp_dtime(skel, AF_INET, true); + test_tcp_dtime(skel, AF_INET6, true); + test_udp_dtime(skel, AF_INET, true); + test_udp_dtime(skel, AF_INET6, true); + + /* Test the kernel ip[6]_forward path instead + * of bpf_redirect_neigh(). + */ + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + goto done; + err = set_forwarding(true); + close_netns(nstoken); + if (!ASSERT_OK(err, "enable forwarding")) + goto done; + + test_tcp_dtime(skel, AF_INET, false); + test_tcp_dtime(skel, AF_INET6, false); + test_udp_dtime(skel, AF_INET, false); + test_udp_dtime(skel, AF_INET6, false); + +done: + test_tc_dtime__destroy(skel); +} + static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) { struct nstoken *nstoken = NULL; @@ -787,6 +1220,7 @@ static void *test_tc_redirect_run_tests(void *arg) RUN_TEST(tc_redirect_peer_l3); RUN_TEST(tc_redirect_neigh); RUN_TEST(tc_redirect_neigh_fib); + RUN_TEST(tc_redirect_dtime); return NULL; } diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c new file mode 100644 index 000000000000..9d9e8e17b8a0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2022 Meta + +#include <stddef.h> +#include <stdint.h> +#include <stdbool.h> +#include <linux/bpf.h> +#include <linux/stddef.h> +#include <linux/pkt_cls.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <sys/socket.h> + +/* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst + * | | + * ns_src | ns_fwd | ns_dst + * + * ns_src and ns_dst: ENDHOST namespace + * ns_fwd: Fowarding namespace + */ + +#define ctx_ptr(field) (void *)(long)(field) + +#define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */ +#define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */ + +#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } +#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } + +#define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ + a.s6_addr32[1] == b.s6_addr32[1] && \ + a.s6_addr32[2] == b.s6_addr32[2] && \ + a.s6_addr32[3] == b.s6_addr32[3]) + +volatile const __u32 IFINDEX_SRC; +volatile const __u32 IFINDEX_DST; + +#define EGRESS_ENDHOST_MAGIC 0x0b9fbeef +#define INGRESS_FWDNS_MAGIC 0x1b9fbeef +#define EGRESS_FWDNS_MAGIC 0x2b9fbeef + +enum { + INGRESS_FWDNS_P100, + INGRESS_FWDNS_P101, + EGRESS_FWDNS_P100, + EGRESS_FWDNS_P101, + INGRESS_ENDHOST, + EGRESS_ENDHOST, + SET_DTIME, + __MAX_CNT, +}; + +enum { + TCP_IP6_CLEAR_DTIME, + TCP_IP4, + TCP_IP6, + UDP_IP4, + UDP_IP6, + TCP_IP4_RT_FWD, + TCP_IP6_RT_FWD, + UDP_IP4_RT_FWD, + UDP_IP6_RT_FWD, + UKN_TEST, + __NR_TESTS, +}; + +enum { + SRC_NS = 1, + DST_NS, +}; + +__u32 dtimes[__NR_TESTS][__MAX_CNT] = {}; +__u32 errs[__NR_TESTS][__MAX_CNT] = {}; +__u32 test = 0; + +static void inc_dtimes(__u32 idx) +{ + if (test < __NR_TESTS) + dtimes[test][idx]++; + else + dtimes[UKN_TEST][idx]++; +} + +static void inc_errs(__u32 idx) +{ + if (test < __NR_TESTS) + errs[test][idx]++; + else + errs[UKN_TEST][idx]++; +} + +static int skb_proto(int type) +{ + return type & 0xff; +} + +static int skb_ns(int type) +{ + return (type >> 8) & 0xff; +} + +static bool fwdns_clear_dtime(void) +{ + return test == TCP_IP6_CLEAR_DTIME; +} + +static bool bpf_fwd(void) +{ + return test < TCP_IP4_RT_FWD; +} + +/* -1: parse error: TC_ACT_SHOT + * 0: not testing traffic: TC_ACT_OK + * >0: first byte is the inet_proto, second byte has the netns + * of the sender + */ +static int skb_get_type(struct __sk_buff *skb) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + __u8 inet_proto = 0, ns = 0; + struct ipv6hdr *ip6h; + struct iphdr *iph; + + switch (skb->protocol) { + case __bpf_htons(ETH_P_IP): + iph = data + sizeof(struct ethhdr); + if (iph + 1 > data_end) + return -1; + if (iph->saddr == ip4_src) + ns = SRC_NS; + else if (iph->saddr == ip4_dst) + ns = DST_NS; + inet_proto = iph->protocol; + break; + case __bpf_htons(ETH_P_IPV6): + ip6h = data + sizeof(struct ethhdr); + if (ip6h + 1 > data_end) + return -1; + if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src)) + ns = SRC_NS; + else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst)) + ns = DST_NS; + inet_proto = ip6h->nexthdr; + break; + default: + return 0; + } + + if ((inet_proto != IPPROTO_TCP && inet_proto != IPPROTO_UDP) || !ns) + return 0; + + return (ns << 8 | inet_proto); +} + +/* format: direction@iface@netns + * egress@veth_(src|dst)@ns_(src|dst) + */ +SEC("tc") +int egress_host(struct __sk_buff *skb) +{ + int skb_type; + + skb_type = skb_get_type(skb); + if (skb_type == -1) + return TC_ACT_SHOT; + if (!skb_type) + return TC_ACT_OK; + + if (skb_proto(skb_type) == IPPROTO_TCP) { + if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO && + skb->tstamp) + inc_dtimes(EGRESS_ENDHOST); + else + inc_errs(EGRESS_ENDHOST); + } else { + if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_UNSPEC && + skb->tstamp) + inc_dtimes(EGRESS_ENDHOST); + else + inc_errs(EGRESS_ENDHOST); + } + + skb->tstamp = EGRESS_ENDHOST_MAGIC; + + return TC_ACT_OK; +} + +/* ingress@veth_(src|dst)@ns_(src|dst) */ +SEC("tc") +int ingress_host(struct __sk_buff *skb) +{ + int skb_type; + + skb_type = skb_get_type(skb); + if (skb_type == -1) + return TC_ACT_SHOT; + if (!skb_type) + return TC_ACT_OK; + + if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO && + skb->tstamp == EGRESS_FWDNS_MAGIC) + inc_dtimes(INGRESS_ENDHOST); + else + inc_errs(INGRESS_ENDHOST); + + return TC_ACT_OK; +} + +/* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */ +SEC("tc") +int ingress_fwdns_prio100(struct __sk_buff *skb) +{ + int skb_type; + + skb_type = skb_get_type(skb); + if (skb_type == -1) + return TC_ACT_SHOT; + if (!skb_type) + return TC_ACT_OK; + + /* delivery_time is only available to the ingress + * if the tc-bpf checks the skb->delivery_time_type. + */ + if (skb->tstamp == EGRESS_ENDHOST_MAGIC) + inc_errs(INGRESS_FWDNS_P100); + + if (fwdns_clear_dtime()) + skb->tstamp = 0; + + return TC_ACT_UNSPEC; +} + +/* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */ +SEC("tc") +int egress_fwdns_prio100(struct __sk_buff *skb) +{ + int skb_type; + + skb_type = skb_get_type(skb); + if (skb_type == -1) + return TC_ACT_SHOT; + if (!skb_type) + return TC_ACT_OK; + + /* delivery_time is always available to egress even + * the tc-bpf did not use the delivery_time_type. + */ + if (skb->tstamp == INGRESS_FWDNS_MAGIC) + inc_dtimes(EGRESS_FWDNS_P100); + else + inc_errs(EGRESS_FWDNS_P100); + + if (fwdns_clear_dtime()) + skb->tstamp = 0; + + return TC_ACT_UNSPEC; +} + +/* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */ +SEC("tc") +int ingress_fwdns_prio101(struct __sk_buff *skb) +{ + __u64 expected_dtime = EGRESS_ENDHOST_MAGIC; + int skb_type; + + skb_type = skb_get_type(skb); + if (skb_type == -1 || !skb_type) + /* Should have handled in prio100 */ + return TC_ACT_SHOT; + + if (skb_proto(skb_type) == IPPROTO_UDP) + expected_dtime = 0; + + if (skb->delivery_time_type) { + if (fwdns_clear_dtime() || + skb->delivery_time_type != BPF_SKB_DELIVERY_TIME_MONO || + skb->tstamp != expected_dtime) + inc_errs(INGRESS_FWDNS_P101); + else + inc_dtimes(INGRESS_FWDNS_P101); + } else { + if (!fwdns_clear_dtime() && expected_dtime) + inc_errs(INGRESS_FWDNS_P101); + } + + if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO) { + skb->tstamp = INGRESS_FWDNS_MAGIC; + } else { + if (bpf_skb_set_delivery_time(skb, INGRESS_FWDNS_MAGIC, + BPF_SKB_DELIVERY_TIME_MONO)) + inc_errs(SET_DTIME); + if (!bpf_skb_set_delivery_time(skb, INGRESS_FWDNS_MAGIC, + BPF_SKB_DELIVERY_TIME_UNSPEC)) + inc_errs(SET_DTIME); + } + + if (skb_ns(skb_type) == SRC_NS) + return bpf_fwd() ? + bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK; + else + return bpf_fwd() ? + bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK; +} + +/* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */ +SEC("tc") +int egress_fwdns_prio101(struct __sk_buff *skb) +{ + int skb_type; + + skb_type = skb_get_type(skb); + if (skb_type == -1 || !skb_type) + /* Should have handled in prio100 */ + return TC_ACT_SHOT; + + if (skb->delivery_time_type) { + if (fwdns_clear_dtime() || + skb->delivery_time_type != BPF_SKB_DELIVERY_TIME_MONO || + skb->tstamp != INGRESS_FWDNS_MAGIC) + inc_errs(EGRESS_FWDNS_P101); + else + inc_dtimes(EGRESS_FWDNS_P101); + } else { + if (!fwdns_clear_dtime()) + inc_errs(EGRESS_FWDNS_P101); + } + + if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO) { + skb->tstamp = EGRESS_FWDNS_MAGIC; + } else { + if (bpf_skb_set_delivery_time(skb, EGRESS_FWDNS_MAGIC, + BPF_SKB_DELIVERY_TIME_MONO)) + inc_errs(SET_DTIME); + if (!bpf_skb_set_delivery_time(skb, EGRESS_FWDNS_MAGIC, + BPF_SKB_DELIVERY_TIME_UNSPEC)) + inc_errs(SET_DTIME); + } + + return TC_ACT_OK; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index bcb110e830ce..dea33dc93790 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -50,8 +50,8 @@ for current_test in ${TESTS:-$ALL_TESTS}; do else log_test "'$current_test' [$profile] overflow $target" fi + RET_FIN=$(( RET_FIN || RET )) done - RET_FIN=$(( RET_FIN || RET )) done done current_test="" diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh index 3e3e06ea5703..86e787895f78 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh @@ -60,7 +60,8 @@ __tc_police_test() tc_police_rules_create $count $should_fail - offload_count=$(tc filter show dev $swp1 ingress | grep in_hw | wc -l) + offload_count=$(tc -j filter show dev $swp1 ingress | + jq "[.[] | select(.options.in_hw == true)] | length") ((offload_count == count)) check_err_fail $should_fail $? "tc police offload count" } diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 12c5e27d32c1..2d7fca446c7f 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -3,8 +3,8 @@ CFLAGS = -Wall CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE -TEST_PROGS := binfmt_script non-regular -TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 +TEST_PROGS := binfmt_script +TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index e96e279e0533..25432b8cd5bd 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -19,7 +19,7 @@ fail() { # mesg FILTER=set_ftrace_filter FUNC1="schedule" -FUNC2="do_softirq" +FUNC2="scheduler_tick" ALL_FUNCS="#### all functions enabled ####" diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 9ad38bd360a4..b08d30bf71c5 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -366,6 +366,7 @@ static struct kvm_vm *test_vm_create(void) { struct kvm_vm *vm; unsigned int i; + int ret; int nr_vcpus = test_args.nr_vcpus; vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); @@ -382,7 +383,11 @@ static struct kvm_vm *test_vm_create(void) ucall_init(vm, NULL); test_init_timer_irq(vm); - vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); + ret = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); + if (ret < 0) { + print_skip("Failed to create vgic-v3"); + exit(KSFT_SKIP); + } /* Make all the test's cmdline args visible to the guest */ sync_global_to_guest(vm, test_args); diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index e6c7d7f8fbd1..7eca97799917 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -761,6 +761,10 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) gic_fd = vgic_v3_setup(vm, 1, nr_irqs, GICD_BASE_GPA, GICR_BASE_GPA); + if (gic_fd < 0) { + print_skip("Failed to create vgic-v3, skipping"); + exit(KSFT_SKIP); + } vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handlers[args.eoi_split][args.level_sensitive]); diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index b3a0fca0d780..f5cd0c536d85 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -52,7 +52,9 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, nr_vcpus, nr_vcpus_created); /* Distributor setup */ - gic_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + if (_kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, + false, &gic_fd) != 0) + return -1; kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs, true); diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 192a2899bae8..94df2692e6e4 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -455,6 +455,7 @@ static void mfd_fail_write(int fd) printf("mmap()+mprotect() didn't fail as expected\n"); abort(); } + munmap(p, mfd_def_size); } /* verify PUNCH_HOLE fails */ diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index f31205f04ee0..8c5fea68ae67 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -1236,7 +1236,7 @@ static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long } /** - * Validate that an attached mount in our mount namespace can be idmapped. + * Validate that an attached mount in our mount namespace cannot be idmapped. * (The kernel enforces that the mount's mount namespace and the caller's mount * namespace match.) */ @@ -1259,7 +1259,7 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) attr.userns_fd = get_userns_fd(0, 10000, 10000); ASSERT_GE(attr.userns_fd, 0); - ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index e2690cc42da3..2271a8727f62 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,7 +9,7 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle" +TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh" VERBOSE=0 PAUSE_ON_FAIL=no @@ -1954,6 +1954,61 @@ ipv6_mangle_test() route_cleanup } +ip_neigh_get_check() +{ + ip neigh help 2>&1 | grep -q 'ip neigh get' + if [ $? -ne 0 ]; then + echo "iproute2 command does not support neigh get. Skipping test" + return 1 + fi + + return 0 +} + +ipv4_bcast_neigh_test() +{ + local rc + + echo + echo "IPv4 broadcast neighbour tests" + + ip_neigh_get_check || return 1 + + setup + + set -e + run_cmd "$IP neigh add 192.0.2.111 lladdr 00:11:22:33:44:55 nud perm dev dummy0" + run_cmd "$IP neigh add 192.0.2.255 lladdr 00:11:22:33:44:55 nud perm dev dummy0" + + run_cmd "$IP neigh get 192.0.2.111 dev dummy0" + run_cmd "$IP neigh get 192.0.2.255 dev dummy0" + + run_cmd "$IP address add 192.0.2.1/24 broadcast 192.0.2.111 dev dummy0" + + run_cmd "$IP neigh add 203.0.113.111 nud failed dev dummy0" + run_cmd "$IP neigh add 203.0.113.255 nud failed dev dummy0" + + run_cmd "$IP neigh get 203.0.113.111 dev dummy0" + run_cmd "$IP neigh get 203.0.113.255 dev dummy0" + + run_cmd "$IP address add 203.0.113.1/24 broadcast 203.0.113.111 dev dummy0" + set +e + + run_cmd "$IP neigh get 192.0.2.111 dev dummy0" + log_test $? 0 "Resolved neighbour for broadcast address" + + run_cmd "$IP neigh get 192.0.2.255 dev dummy0" + log_test $? 0 "Resolved neighbour for network broadcast address" + + run_cmd "$IP neigh get 203.0.113.111 dev dummy0" + log_test $? 2 "Unresolved neighbour for broadcast address" + + run_cmd "$IP neigh get 203.0.113.255 dev dummy0" + log_test $? 2 "Unresolved neighbour for network broadcast address" + + cleanup +} + ################################################################################ # usage @@ -2028,6 +2083,7 @@ do ipv4_route_v6_gw) ipv4_route_v6_gw_test;; ipv4_mangle) ipv4_mangle_test;; ipv6_mangle) ipv6_mangle_test;; + ipv4_bcast_neigh) ipv4_bcast_neigh_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 72ee644d47bf..8fa97ae9af9e 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT TEST_PROGS = bridge_igmp.sh \ + bridge_locked_port.sh \ bridge_port_isolation.sh \ bridge_sticky_fdb.sh \ bridge_vlan_aware.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh new file mode 100755 index 000000000000..6e98efa6d371 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh @@ -0,0 +1,180 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="locked_port_ipv4 locked_port_ipv6 locked_port_vlan" +NUM_NETIFS=4 +CHECK_TC="no" +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + vrf_create "vrf-vlan-h1" + ip link set dev vrf-vlan-h1 up + vlan_create $h1 100 vrf-vlan-h1 198.51.100.1/24 +} + +h1_destroy() +{ + vlan_destroy $h1 100 + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 + vrf_create "vrf-vlan-h2" + ip link set dev vrf-vlan-h2 up + vlan_create $h2 100 vrf-vlan-h2 198.51.100.2/24 +} + +h2_destroy() +{ + vlan_destroy $h2 100 + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add dev br0 type bridge vlan_filtering 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + bridge link set dev $swp1 learning off +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +locked_port_ipv4() +{ + RET=0 + + check_locked_port_support || return 0 + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work before locking port" + + bridge link set dev $swp1 locked on + + ping_do $h1 192.0.2.2 + check_fail $? "Ping worked after locking port, but before adding FDB entry" + + bridge fdb add `mac_get $h1` dev $swp1 master static + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work after locking port and adding FDB entry" + + bridge link set dev $swp1 locked off + bridge fdb del `mac_get $h1` dev $swp1 master static + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work after unlocking port and removing FDB entry." + + log_test "Locked port ipv4" +} + +locked_port_vlan() +{ + RET=0 + + check_locked_port_support || return 0 + + bridge vlan add vid 100 dev $swp1 + bridge vlan add vid 100 dev $swp2 + + ping_do $h1.100 198.51.100.2 + check_err $? "Ping through vlan did not work before locking port" + + bridge link set dev $swp1 locked on + ping_do $h1.100 198.51.100.2 + check_fail $? "Ping through vlan worked after locking port, but before adding FDB entry" + + bridge fdb add `mac_get $h1` dev $swp1 vlan 100 master static + + ping_do $h1.100 198.51.100.2 + check_err $? "Ping through vlan did not work after locking port and adding FDB entry" + + bridge link set dev $swp1 locked off + bridge fdb del `mac_get $h1` dev $swp1 vlan 100 master static + + ping_do $h1.100 198.51.100.2 + check_err $? "Ping through vlan did not work after unlocking port and removing FDB entry" + + bridge vlan del vid 100 dev $swp1 + bridge vlan del vid 100 dev $swp2 + log_test "Locked port vlan" +} + +locked_port_ipv6() +{ + RET=0 + check_locked_port_support || return 0 + + ping6_do $h1 2001:db8:1::2 + check_err $? "Ping6 did not work before locking port" + + bridge link set dev $swp1 locked on + + ping6_do $h1 2001:db8:1::2 + check_fail $? "Ping6 worked after locking port, but before adding FDB entry" + + bridge fdb add `mac_get $h1` dev $swp1 master static + ping6_do $h1 2001:db8:1::2 + check_err $? "Ping6 did not work after locking port and adding FDB entry" + + bridge link set dev $swp1 locked off + bridge fdb del `mac_get $h1` dev $swp1 master static + + ping6_do $h1 2001:db8:1::2 + check_err $? "Ping6 did not work after unlocking port and removing FDB entry" + + log_test "Locked port ipv6" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh new file mode 100755 index 000000000000..1c11c4256d06 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh @@ -0,0 +1,332 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +----------------------+ +# | H1 | | H2 | +# | | | | +# | $h1.200 + | | + $h2.200 | +# | 192.0.2.1/28 | | | | 192.0.2.18/28 | +# | 2001:db8:1::1/64 | | | | 2001:db8:2::1/64 | +# | | | | | | +# | $h1 + | | + $h2 | +# | | | | | | +# +------------------|-+ +-|--------------------+ +# | | +# +------------------|-------------------------|--------------------+ +# | SW | | | +# | | | | +# | $rp1 + + $rp2 | +# | | | | +# | $rp1.200 + + $rp2.200 | +# | 192.0.2.2/28 192.0.2.17/28 | +# | 2001:db8:1::2/64 2001:db8:2::2/64 | +# | | +# +-----------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + respin_enablement + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + reapply_config + ping_ipv4 + ping_ipv6 + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + test_stats_report_rx + test_stats_report_tx + test_destroy_enabled + test_double_enable +" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 + ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + vlan_destroy $h1 200 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64 + ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 + ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 + vlan_destroy $h2 200 + simple_if_fini $h2 +} + +router_rp1_200_create() +{ + ip link add name $rp1.200 up \ + link $rp1 addrgenmode eui64 type vlan id 200 + ip address add dev $rp1.200 192.0.2.2/28 + ip address add dev $rp1.200 2001:db8:1::2/64 + ip stats set dev $rp1.200 l3_stats on +} + +router_rp1_200_destroy() +{ + ip stats set dev $rp1.200 l3_stats off + ip address del dev $rp1.200 2001:db8:1::2/64 + ip address del dev $rp1.200 192.0.2.2/28 + ip link del dev $rp1.200 +} + +router_create() +{ + ip link set dev $rp1 up + router_rp1_200_create + + ip link set dev $rp2 up + vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64 +} + +router_destroy() +{ + vlan_destroy $rp2 200 + ip link set dev $rp2 down + + router_rp1_200_destroy + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1mac=$(mac_get $rp1) + rp2mac=$(mac_get $rp2) + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.200 192.0.2.18 " IPv4" +} + +ping_ipv6() +{ + ping_test $h1.200 2001:db8:2::1 " IPv6" +} + +get_l3_stat() +{ + local selector=$1; shift + + ip -j stats show dev $rp1.200 group offload subgroup l3_stats | + jq '.[0].stats64.'$selector +} + +send_packets_rx_ipv4() +{ + # Send 21 packets instead of 20, because the first one might trap and go + # through the SW datapath, which might not bump the HW counter. + $MZ $h1.200 -c 21 -d 20msec -p 100 \ + -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_rx_ipv6() +{ + $MZ $h1.200 -6 -c 21 -d 20msec -p 100 \ + -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_tx_ipv4() +{ + $MZ $h2.200 -c 21 -d 20msec -p 100 \ + -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_tx_ipv6() +{ + $MZ $h2.200 -6 -c 21 -d 20msec -p 100 \ + -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \ + -q -t udp sp=54321,dp=12345 +} + +___test_stats() +{ + local dir=$1; shift + local prot=$1; shift + + local a + local b + + a=$(get_l3_stat ${dir}.packets) + send_packets_${dir}_${prot} + "$@" + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + get_l3_stat ${dir}.packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" +} + +__test_stats() +{ + local dir=$1; shift + local prot=$1; shift + + RET=0 + ___test_stats "$dir" "$prot" + log_test "Test $dir packets: $prot" +} + +test_stats_rx_ipv4() +{ + __test_stats rx ipv4 +} + +test_stats_tx_ipv4() +{ + __test_stats tx ipv4 +} + +test_stats_rx_ipv6() +{ + __test_stats rx ipv6 +} + +test_stats_tx_ipv6() +{ + __test_stats tx ipv6 +} + +# Make sure everything works well even after stats have been disabled and +# reenabled on the same device without touching the L3 configuration. +respin_enablement() +{ + log_info "Turning stats off and on again" + ip stats set dev $rp1.200 l3_stats off + ip stats set dev $rp1.200 l3_stats on +} + +# For the initial run, l3_stats is enabled on a completely set up netdevice. Now +# do it the other way around: enabling the L3 stats on an L2 netdevice, and only +# then apply the L3 configuration. +reapply_config() +{ + log_info "Reapplying configuration" + + router_rp1_200_destroy + + ip link add name $rp1.200 link $rp1 addrgenmode none type vlan id 200 + ip stats set dev $rp1.200 l3_stats on + ip link set dev $rp1.200 up addrgenmode eui64 + ip address add dev $rp1.200 192.0.2.2/28 + ip address add dev $rp1.200 2001:db8:1::2/64 +} + +__test_stats_report() +{ + local dir=$1; shift + local prot=$1; shift + + local a + local b + + RET=0 + + a=$(get_l3_stat ${dir}.packets) + send_packets_${dir}_${prot} + ip address flush dev $rp1.200 + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + get_l3_stat ${dir}.packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" + log_test "Test ${dir} packets: stats pushed on loss of L3" + + ip stats set dev $rp1.200 l3_stats off + ip link del dev $rp1.200 + router_rp1_200_create +} + +test_stats_report_rx() +{ + __test_stats_report rx ipv4 +} + +test_stats_report_tx() +{ + __test_stats_report tx ipv4 +} + +test_destroy_enabled() +{ + RET=0 + + ip link del dev $rp1.200 + router_rp1_200_create + + log_test "Destroy l3_stats-enabled netdev" +} + +test_double_enable() +{ + RET=0 + ___test_stats rx ipv4 \ + ip stats set dev $rp1.200 l3_stats on + log_test "Test stat retention across a spurious enablement" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e7e434a4758b..159afc7f0979 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -126,6 +126,14 @@ check_ethtool_lanes_support() fi } +check_locked_port_support() +{ + if ! bridge -d link show | grep -q " locked"; then + echo "SKIP: iproute2 too old; Locked port feature not supported." + return $ksft_skip + fi +} + if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" exit $ksft_skip diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 2674ba20d524..ff821025d309 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -71,6 +71,36 @@ chk_msk_remote_key_nr() __chk_nr "grep -c remote_key" $* } +# $1: ns, $2: port +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +wait_connected() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec ${listener_ns} grep -q " 0100007F:${port_hex} " /proc/net/tcp && break + sleep 0.1 + done +} trap cleanup EXIT ip netns add $ns @@ -81,15 +111,15 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10000 -l -t ${timeout_poll} \ 0.0.0.0 >/dev/null & -sleep 0.1 +wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10000 -j -t ${timeout_poll} \ + ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} \ 127.0.0.1 >/dev/null & -sleep 0.1 +wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" @@ -101,13 +131,13 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \ 0.0.0.0 >/dev/null & -sleep 0.1 +wait_local_port_listen $ns 10001 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10001 -j -t ${timeout_poll} \ + ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} \ 127.0.0.1 >/dev/null & -sleep 0.1 +wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" flush_pids @@ -119,7 +149,7 @@ for I in `seq 1 $NR_CLIENTS`; do ./mptcp_connect -p $((I+10001)) -l -w 10 \ -t ${timeout_poll} 0.0.0.0 >/dev/null & done -sleep 0.1 +wait_local_port_listen $ns $((NR_CLIENTS + 10001)) for I in `seq 1 $NR_CLIENTS`; do echo "b" | \ diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 5b7a40d73253..621af6895f4d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -782,8 +782,8 @@ run_tests_disconnect() run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin" # restore previous status - cout=$old_cout - cout_disconnect="$cout".disconnect + sin=$old_sin + sin_disconnect="$cout".disconnect cin=$old_cin cin_disconnect="$cin".disconnect connect_per_transfer=1 diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 725924012b41..77b359a49a47 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -799,6 +799,7 @@ chk_join_nr() local ack_nr=$4 local count local dump_stats + local with_cookie printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn" count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'` @@ -812,12 +813,20 @@ chk_join_nr() fi echo -n " - synack" + with_cookie=`ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies` count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'` [ -z "$count" ] && count=0 if [ "$count" != "$syn_ack_nr" ]; then - echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" - ret=1 - dump_stats=1 + # simult connections exceeding the limit with cookie enabled could go up to + # synack validation as the conn limit can be enforced reliably only after + # the subflow creation + if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then + echo -n "[ ok ]" + else + echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" + ret=1 + dump_stats=1 + fi else echo -n "[ ok ]" fi @@ -891,11 +900,17 @@ chk_add_nr() local mis_ack_nr=${8:-0} local count local dump_stats + local timeout + + timeout=`ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout` printf "%-39s %s" " " "add" - count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'` + count=`ip netns exec $ns2 nstat -as MPTcpExtAddAddr | grep MPTcpExtAddAddr | awk '{print $2}'` [ -z "$count" ] && count=0 - if [ "$count" != "$add_nr" ]; then + + # if the test configured a short timeout tolerate greater then expected + # add addrs options, due to retransmissions + if [ "$count" != "$add_nr" ] && [ "$timeout" -gt 1 -o "$count" -lt "$add_nr" ]; then echo "[fail] got $count ADD_ADDR[s] expected $add_nr" ret=1 dump_stats=1 @@ -1100,7 +1115,7 @@ wait_for_tw() local ns=$1 while [ $time -lt $timeout_ms ]; do - local cnt=$(ip netns exec $ns ss -t state time-wait |wc -l) + local cnt=$(ip netns exec $ns nstat -as TcpAttemptFails | grep TcpAttemptFails | awk '{print $2}') [ "$cnt" = 1 ] && return 1 time=$((time + 100)) @@ -1297,7 +1312,10 @@ signal_address_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_add_endpoint $ns2 10.0.3.2 flags signal pm_nl_add_endpoint $ns2 10.0.4.2 flags signal - run_tests $ns1 $ns2 10.0.1.1 + + # the peer could possibly miss some addr notification, allow retransmission + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr "signal addresses race test" 3 3 3 # the server will not signal the address terminating diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh new file mode 100755 index 000000000000..704997ffc244 --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -0,0 +1,579 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking the VXLAN vni filtering api and +# datapath. +# It simulates two hypervisors running two VMs each using four network +# six namespaces: two for the HVs, four for the VMs. Each VM is +# connected to a separate bridge. The VM's use overlapping vlans and +# hence the separate bridge domain. Each vxlan device is a collect +# metadata device with vni filtering and hence has the ability to +# terminate configured vni's only. + +# +--------------------------------+ +------------------------------------+ +# | vm-11 netns | | vm-21 netns | +# | | | | +# |+------------+ +-------------+ | |+-------------+ +----------------+ | +# ||veth-11.10 | |veth-11.20 | | ||veth-21.10 | | veth-21.20 | | +# ||10.0.10.11/24 |10.0.20.11/24| | ||10.0.10.21/24| | 10.0.20.21/24 | | +# |+------|-----+ +|------------+ | |+-----------|-+ +---|------------+ | +# | | | | | | | | +# | | | | | +------------+ | +# | +------------+ | | | veth-21 | | +# | | veth-11 | | | | | | +# | | | | | +-----|------+ | +# | +-----|------+ | | | | +# | | | | | | +# +------------|-------------------+ +---------------|--------------------+ +# +------------|-----------------------------------------|-------------------+ +# | +-----|------+ +-----|------+ | +# | |vethhv-11 | |vethhv-21 | | +# | +----|-------+ +-----|------+ | +# | +---|---+ +---|--+ | +# | | br1 | | br2 | | +# | +---|---+ +---|--+ | +# | +---|----+ +---|--+ | +# | | vxlan1| |vxlan2| | +# | +--|-----+ +--|---+ | +# | | | | +# | | +---------------------+ | | +# | | |veth0 | | | +# | +---------|172.16.0.1/24 -----------+ | +# | |2002:fee1::1/64 | | +# | hv-1 netns +--------|------------+ | +# +-----------------------------|--------------------------------------------+ +# | +# +-----------------------------|--------------------------------------------+ +# | hv-2 netns +--------|-------------+ | +# | | veth0 | | +# | +------| 172.16.0.2/24 |---+ | +# | | | 2002:fee1::2/64 | | | +# | | | | | | +# | | +----------------------+ | - | +# | | | | +# | +-|-------+ +--------|-+ | +# | | vxlan1 | | vxlan2 | | +# | +----|----+ +---|------+ | +# | +--|--+ +-|---+ | +# | | br1 | | br2 | | +# | +--|--+ +--|--+ | +# | +-----|-------+ +----|-------+ | +# | | vethhv-12 | |vethhv-22 | | +# | +------|------+ +-------|----+ | +# +-----------------|----------------------------|---------------------------+ +# | | +# +-----------------|-----------------+ +--------|---------------------------+ +# | +-------|---+ | | +--|---------+ | +# | | veth-12 | | | |veth-22 | | +# | +-|--------|+ | | +--|--------|+ | +# | | | | | | | | +# |+----------|--+ +---|-----------+ | |+-------|-----+ +|---------------+ | +# ||veth-12.10 | |veth-12.20 | | ||veth-22.10 | |veth-22.20 | | +# ||10.0.10.12/24| |10.0.20.12/24 | | ||10.0.10.22/24| |10.0.20.22/24 | | +# |+-------------+ +---------------+ | |+-------------+ +----------------+ | +# | | | | +# | | | | +# | vm-12 netns | |vm-22 netns | +# +-----------------------------------+ +------------------------------------+ +# +# +# This test tests the new vxlan vnifiltering api + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# all tests in this script. Can be overridden with -t option +TESTS=" + vxlan_vnifilter_api + vxlan_vnifilter_datapath + vxlan_vnifilter_datapath_pervni + vxlan_vnifilter_datapath_mgroup + vxlan_vnifilter_datapath_mgroup_pervni + vxlan_vnifilter_metadata_and_traditional_mix +" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no + +which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf " TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi +} + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +check_hv_connectivity() { + ip netns exec hv-1 ping -c 1 -W 1 $1 &>/dev/null + sleep 1 + ip netns exec hv-1 ping -c 1 -W 1 $2 &>/dev/null + + return $? +} + +check_vm_connectivity() { + run_cmd "ip netns exec vm-11 ping -c 1 -W 1 10.0.10.12" + log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)" + + run_cmd "ip netns exec vm-21 ping -c 1 -W 1 10.0.10.22" + log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)" +} + +cleanup() { + ip link del veth-hv-1 2>/dev/null || true + ip link del vethhv-11 vethhv-12 vethhv-21 vethhv-22 2>/dev/null || true + + for ns in hv-1 hv-2 vm-11 vm-21 vm-12 vm-22 vm-31 vm-32; do + ip netns del $ns 2>/dev/null || true + done +} + +trap cleanup EXIT + +setup-hv-networking() { + hv=$1 + local1=$2 + mask1=$3 + local2=$4 + mask2=$5 + + ip netns add hv-$hv + ip link set veth-hv-$hv netns hv-$hv + ip -netns hv-$hv link set veth-hv-$hv name veth0 + ip -netns hv-$hv addr add $local1/$mask1 dev veth0 + ip -netns hv-$hv addr add $local2/$mask2 dev veth0 + ip -netns hv-$hv link set veth0 up +} + +# Setups a "VM" simulated by a netns an a veth pair +# example: setup-vm <hvid> <vmid> <brid> <VATTRS> <mcast_for_bum> +# VATTRS = comma separated "<vlan>-<v[46]>-<localip>-<remoteip>-<VTYPE>-<vxlandstport>" +# VTYPE = vxlan device type. "default = traditional device, metadata = metadata device +# vnifilter = vnifiltering device, +# vnifilterg = vnifiltering device with per vni group/remote" +# example: +# setup-vm 1 11 1 \ +# 10-v4-172.16.0.1-239.1.1.100-vnifilterg,20-v4-172.16.0.1-239.1.1.100-vnifilterg 1 +# +setup-vm() { + hvid=$1 + vmid=$2 + brid=$3 + vattrs=$4 + mcast=$5 + lastvxlandev="" + + # create bridge + ip -netns hv-$hvid link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + ip -netns hv-$hvid link set br$brid up + + # create vm namespace and interfaces and connect to hypervisor + # namespace + ip netns add vm-$vmid + hvvethif="vethhv-$vmid" + vmvethif="veth-$vmid" + ip link add $hvvethif type veth peer name $vmvethif + ip link set $hvvethif netns hv-$hvid + ip link set $vmvethif netns vm-$vmid + ip -netns hv-$hvid link set $hvvethif up + ip -netns vm-$vmid link set $vmvethif up + ip -netns hv-$hvid link set $hvvethif master br$brid + + # configure VM vlan/vni filtering on hypervisor + for vmap in $(echo $vattrs | cut -d "," -f1- --output-delimiter=' ') + do + local vid=$(echo $vmap | awk -F'-' '{print ($1)}') + local family=$(echo $vmap | awk -F'-' '{print ($2)}') + local localip=$(echo $vmap | awk -F'-' '{print ($3)}') + local group=$(echo $vmap | awk -F'-' '{print ($4)}') + local vtype=$(echo $vmap | awk -F'-' '{print ($5)}') + local port=$(echo $vmap | awk -F'-' '{print ($6)}') + + ip -netns vm-$vmid link add name $vmvethif.$vid link $vmvethif type vlan id $vid + ip -netns vm-$vmid addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid + ip -netns vm-$vmid link set $vmvethif.$vid up + + tid=$vid + vxlandev="vxlan$brid" + vxlandevflags="" + + if [[ -n $vtype && $vtype == "metadata" ]]; then + vxlandevflags="$vxlandevflags external" + elif [[ -n $vtype && $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then + vxlandevflags="$vxlandevflags external vnifilter" + tid=$((vid+brid)) + else + vxlandevflags="$vxlandevflags id $tid" + vxlandev="vxlan$tid" + fi + + if [[ -n $vtype && $vtype != "vnifilterg" ]]; then + if [[ -n "$group" && "$group" != "null" ]]; then + if [ $mcast -eq 1 ]; then + vxlandevflags="$vxlandevflags group $group" + else + vxlandevflags="$vxlandevflags remote $group" + fi + fi + fi + + if [[ -n "$port" && "$port" != "default" ]]; then + vxlandevflags="$vxlandevflags dstport $port" + fi + + # create vxlan device + if [ "$vxlandev" != "$lastvxlandev" ]; then + ip -netns hv-$hvid link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null + ip -netns hv-$hvid link set $vxlandev master br$brid + ip -netns hv-$hvid link set $vxlandev up + lastvxlandev=$vxlandev + fi + + # add vlan + bridge -netns hv-$hvid vlan add vid $vid dev $hvvethif + bridge -netns hv-$hvid vlan add vid $vid pvid dev $vxlandev + + # Add bridge vni filter for tx + if [[ -n $vtype && $vtype == "metadata" || $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then + bridge -netns hv-$hvid link set dev $vxlandev vlan_tunnel on + bridge -netns hv-$hvid vlan add dev $vxlandev vid $vid tunnel_info id $tid + fi + + if [[ -n $vtype && $vtype == "metadata" ]]; then + bridge -netns hv-$hvid fdb add 00:00:00:00:00:00 dev $vxlandev \ + src_vni $tid vni $tid dst $group self + elif [[ -n $vtype && $vtype == "vnifilter" ]]; then + # Add per vni rx filter with 'bridge vni' api + bridge -netns hv-$hvid vni add dev $vxlandev vni $tid + elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then + # Add per vni group config with 'bridge vni' api + if [ -n "$group" ]; then + if [ "$family" == "v4" ]; then + if [ $mcast -eq 1 ]; then + bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group + else + bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group + fi + else + if [ $mcast -eq 1 ]; then + bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group6 $group + else + bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote6 $group + fi + fi + fi + fi + done +} + +setup_vnifilter_api() +{ + ip link add veth-host type veth peer name veth-testns + ip netns add testns + ip link set veth-testns netns testns +} + +cleanup_vnifilter_api() +{ + ip link del veth-host 2>/dev/null || true + ip netns del testns 2>/dev/null || true +} + +# tests vxlan filtering api +vxlan_vnifilter_api() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + localip="172.16.0.1" + group="239.1.1.101" + + cleanup_vnifilter_api &>/dev/null + setup_vnifilter_api + + # Duplicate vni test + # create non-vnifiltering traditional vni device + run_cmd "ip -netns testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789" + log_test $? 0 "Create traditional vxlan device" + + # create vni filtering device + run_cmd "ip -netns testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789" + log_test $? 1 "Cannot create vnifilter device without external flag" + + run_cmd "ip -netns testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" + log_test $? 0 "Creating external vxlan device with vnifilter flag" + + run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 100" + log_test $? 0 "Cannot set in-use vni id on vnifiltering device" + + run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200" + log_test $? 0 "Set new vni id on vnifiltering device" + + run_cmd "ip -netns testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" + log_test $? 0 "Create second external vxlan device with vnifilter flag" + + run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 200" + log_test $? 255 "Cannot set in-use vni id on vnifiltering device" + + run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300" + log_test $? 0 "Set new vni id on vnifiltering device" + + # check in bridge vni show + run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300" + log_test $? 0 "Update vni id on vnifiltering device" + + run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 400" + log_test $? 0 "Add new vni id on vnifiltering device" + + # add multicast group per vni + run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200 group $group" + log_test $? 0 "Set multicast group on existing vni" + + # add multicast group per vni + run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300 group $group" + log_test $? 0 "Set multicast group on existing vni" + + # set vnifilter on an existing external vxlan device + run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external vnifilter" + log_test $? 2 "Cannot set vnifilter flag on a device" + + # change vxlan vnifilter flag + run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external novnifilter" + log_test $? 2 "Cannot unset vnifilter flag on a device" +} + +# Sanity test vnifilter datapath +# vnifilter vnis inherit BUM group from +# vxlan device +vxlan_vnifilter_datapath() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + + ip link add veth-hv-1 type veth peer name veth-hv-2 + setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 $hv2addr1 $hv2addr2 + setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 $hv1addr1 $hv1addr2 + + check_hv_connectivity hv2addr1 hv2addr2 + + setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0 + setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0 + + setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0 + setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0 + + check_vm_connectivity "vnifiltering vxlan" +} + +# Sanity test vnifilter datapath +# with vnifilter per vni configured BUM +# group/remote +vxlan_vnifilter_datapath_pervni() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + + ip link add veth-hv-1 type veth peer name veth-hv-2 + setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 + setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 + + check_hv_connectivity hv2addr1 hv2addr2 + + setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilterg,20-v4-$hv1addr1-$hv2addr1-vnifilterg 0 + setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilterg,20-v6-$hv1addr2-$hv2addr2-vnifilterg 0 + + setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilterg,20-v4-$hv2addr1-$hv1addr1-vnifilterg 0 + setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilterg,20-v6-$hv2addr2-$hv1addr2-vnifilterg 0 + + check_vm_connectivity "vnifiltering vxlan pervni remote" +} + + +vxlan_vnifilter_datapath_mgroup() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + group="239.1.1.100" + group6="ff07::1" + + ip link add veth-hv-1 type veth peer name veth-hv-2 + setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 + setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 + + check_hv_connectivity hv2addr1 hv2addr2 + + setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilter,20-v4-$hv1addr1-$group-vnifilter 1 + setup-vm 1 21 2 "10-v6-$hv1addr2-$group6-vnifilter,20-v6-$hv1addr2-$group6-vnifilter" 1 + + setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilter,20-v4-$hv2addr1-$group-vnifilter 1 + setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilter,20-v6-$hv2addr2-$group6-vnifilter 1 + + check_vm_connectivity "vnifiltering vxlan mgroup" +} + +vxlan_vnifilter_datapath_mgroup_pervni() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + group="239.1.1.100" + group6="ff07::1" + + ip link add veth-hv-1 type veth peer name veth-hv-2 + setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 + setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 + + check_hv_connectivity hv2addr1 hv2addr2 + + setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilterg,20-v4-$hv1addr1-$group-vnifilterg 1 + setup-vm 1 21 2 10-v6-$hv1addr2-$group6-vnifilterg,20-v6-$hv1addr2-$group6-vnifilterg 1 + + setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilterg,20-v4-$hv2addr1-$group-vnifilterg 1 + setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilterg,20-v6-$hv2addr2-$group6-vnifilterg 1 + + check_vm_connectivity "vnifiltering vxlan pervni mgroup" +} + +vxlan_vnifilter_metadata_and_traditional_mix() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + + ip link add veth-hv-1 type veth peer name veth-hv-2 + setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 + setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 + + check_hv_connectivity hv2addr1 hv2addr2 + + setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0 + setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0 + setup-vm 1 31 3 30-v4-$hv1addr1-$hv2addr1-default-4790,40-v6-$hv1addr2-$hv2addr2-default-4790,50-v4-$hv1addr1-$hv2addr1-metadata-4791 0 + + + setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0 + setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0 + setup-vm 2 32 3 30-v4-$hv2addr1-$hv1addr1-default-4790,40-v6-$hv2addr2-$hv1addr2-default-4790,50-v4-$hv2addr1-$hv1addr1-metadata-4791 0 + + check_vm_connectivity "vnifiltering vxlan pervni remote mix" + + # check VM connectivity over traditional/non-vxlan filtering vxlan devices + run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.30.32" + log_test $? 0 "VM connectivity over traditional vxlan (ipv4 default rdst)" + + run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.40.32" + log_test $? 0 "VM connectivity over traditional vxlan (ipv6 default rdst)" + + run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.50.32" + log_test $? 0 "VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst)" +} + +while getopts :t:pP46hv o +do + case $o in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# make sure we don't pause twice +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip link help vxlan 2>&1 | grep -q "vnifilter" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing vxlan dev vnifilter setting" + sync + exit $ksft_skip +fi + +bridge vni help 2>&1 | grep -q "Usage: bridge vni" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 bridge lacks vxlan vnifiltering support" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null + +for t in $TESTS +do + case $t in + none) setup; exit 0;; + *) $t; cleanup;; + esac +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore index 8448f74adfec..4cb887b57413 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/netfilter/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only nf-queue +connect_close diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index e4f845dd942b..7e81c9a7fff9 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -9,6 +9,6 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ conntrack_vrf.sh nft_synproxy.sh LDLIBS = -lmnl -TEST_GEN_FILES = nf-queue +TEST_GEN_FILES = nf-queue connect_close include ../lib.mk diff --git a/tools/testing/selftests/netfilter/connect_close.c b/tools/testing/selftests/netfilter/connect_close.c new file mode 100644 index 000000000000..1c3b0add54c4 --- /dev/null +++ b/tools/testing/selftests/netfilter/connect_close.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> + +#include <arpa/inet.h> +#include <sys/socket.h> + +#define PORT 12345 +#define RUNTIME 10 + +static struct { + unsigned int timeout; + unsigned int port; +} opts = { + .timeout = RUNTIME, + .port = PORT, +}; + +static void handler(int sig) +{ + _exit(sig == SIGALRM ? 0 : 1); +} + +static void set_timeout(void) +{ + struct sigaction action = { + .sa_handler = handler, + }; + + sigaction(SIGALRM, &action, NULL); + + alarm(opts.timeout); +} + +static void do_connect(const struct sockaddr_in *dst) +{ + int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + + if (s >= 0) + fcntl(s, F_SETFL, O_NONBLOCK); + + connect(s, (struct sockaddr *)dst, sizeof(*dst)); + close(s); +} + +static void do_accept(const struct sockaddr_in *src) +{ + int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + + if (s < 0) + return; + + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); + setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + + bind(s, (struct sockaddr *)src, sizeof(*src)); + + listen(s, 16); + + c = accept(s, NULL, NULL); + if (c >= 0) + close(c); + + close(s); +} + +static int accept_loop(void) +{ + struct sockaddr_in src = { + .sin_family = AF_INET, + .sin_port = htons(opts.port), + }; + + inet_pton(AF_INET, "127.0.0.1", &src.sin_addr); + + set_timeout(); + + for (;;) + do_accept(&src); + + return 1; +} + +static int connect_loop(void) +{ + struct sockaddr_in dst = { + .sin_family = AF_INET, + .sin_port = htons(opts.port), + }; + + inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr); + + set_timeout(); + + for (;;) + do_connect(&dst); + + return 1; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "t:p:")) != -1) { + switch (c) { + case 't': + opts.timeout = atoi(optarg); + break; + case 'p': + opts.port = atoi(optarg); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + pid_t p; + + parse_opts(argc, argv); + + p = fork(); + if (p < 0) + return 111; + + if (p > 0) + return accept_loop(); + + return connect_loop(); +} diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh index 7d27f1f3bc01..e12729753351 100755 --- a/tools/testing/selftests/netfilter/nft_queue.sh +++ b/tools/testing/selftests/netfilter/nft_queue.sh @@ -113,6 +113,7 @@ table inet $name { chain output { type filter hook output priority $prio; policy accept; tcp dport 12345 queue num 3 + tcp sport 23456 queue num 3 jump nfq } chain post { @@ -296,6 +297,23 @@ test_tcp_localhost() wait 2>/dev/null } +test_tcp_localhost_connectclose() +{ + tmpfile=$(mktemp) || exit 1 + + ip netns exec ${nsrouter} ./connect_close -p 23456 -t $timeout & + + ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout & + local nfqpid=$! + + sleep 1 + rm -f "$tmpfile" + + wait $rpid + [ $? -eq 0 ] && echo "PASS: tcp via loopback with connect/close" + wait 2>/dev/null +} + test_tcp_localhost_requeue() { ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF @@ -424,6 +442,7 @@ test_queue 20 test_tcp_forward test_tcp_localhost +test_tcp_localhost_connectclose test_tcp_localhost_requeue test_icmp_vrf diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index c0f6a062364d..198ad5f32187 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -133,6 +133,7 @@ static void usage(char *progname) " 0 - none\n" " 1 - external time stamp\n" " 2 - periodic output\n" + " -n val shift the ptp clock time by 'val' nanoseconds\n" " -p val enable output with a period of 'val' nanoseconds\n" " -H val set output phase to 'val' nanoseconds (requires -p)\n" " -w val set output pulse width to 'val' nanoseconds (requires -p)\n" @@ -165,6 +166,7 @@ int main(int argc, char *argv[]) clockid_t clkid; int adjfreq = 0x7fffffff; int adjtime = 0; + int adjns = 0; int capabilities = 0; int extts = 0; int flagtest = 0; @@ -186,7 +188,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:p:P:sSt:T:w:z"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:n:p:P:sSt:T:w:z"))) { switch (c) { case 'c': capabilities = 1; @@ -223,6 +225,9 @@ int main(int argc, char *argv[]) return -1; } break; + case 'n': + adjns = atoi(optarg); + break; case 'p': perout = atoll(optarg); break; @@ -305,11 +310,16 @@ int main(int argc, char *argv[]) } } - if (adjtime) { + if (adjtime || adjns) { memset(&tx, 0, sizeof(tx)); - tx.modes = ADJ_SETOFFSET; + tx.modes = ADJ_SETOFFSET | ADJ_NANO; tx.time.tv_sec = adjtime; - tx.time.tv_usec = 0; + tx.time.tv_usec = adjns; + while (tx.time.tv_usec < 0) { + tx.time.tv_sec -= 1; + tx.time.tv_usec += 1000000000; + } + if (clock_adjtime(clkid, &tx) < 0) { perror("clock_adjtime"); } else { diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index 0ebfe8b0e147..585f7a0c10cb 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -Wl,-no-as-needed -Wall +CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/ LDFLAGS += -lpthread TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c index d91bde511268..eed44322d1a6 100644 --- a/tools/testing/selftests/vm/map_fixed_noreplace.c +++ b/tools/testing/selftests/vm/map_fixed_noreplace.c @@ -17,9 +17,6 @@ #define MAP_FIXED_NOREPLACE 0x100000 #endif -#define BASE_ADDRESS (256ul * 1024 * 1024) - - static void dump_maps(void) { char cmd[32]; @@ -28,18 +25,46 @@ static void dump_maps(void) system(cmd); } +static unsigned long find_base_addr(unsigned long size) +{ + void *addr; + unsigned long flags; + + flags = MAP_PRIVATE | MAP_ANONYMOUS; + addr = mmap(NULL, size, PROT_NONE, flags, -1, 0); + if (addr == MAP_FAILED) { + printf("Error: couldn't map the space we need for the test\n"); + return 0; + } + + if (munmap(addr, size) != 0) { + printf("Error: couldn't map the space we need for the test\n"); + return 0; + } + return (unsigned long)addr; +} + int main(void) { + unsigned long base_addr; unsigned long flags, addr, size, page_size; char *p; page_size = sysconf(_SC_PAGE_SIZE); + //let's find a base addr that is free before we start the tests + size = 5 * page_size; + base_addr = find_base_addr(size); + if (!base_addr) { + printf("Error: couldn't map the space we need for the test\n"); + return 1; + } + flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE; // Check we can map all the areas we need below errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); @@ -60,7 +85,7 @@ int main(void) printf("unmap() successful\n"); errno = 0; - addr = BASE_ADDRESS + page_size; + addr = base_addr + page_size; size = 3 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -80,7 +105,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -101,7 +126,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS + (2 * page_size); + addr = base_addr + (2 * page_size); size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -121,7 +146,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS + (3 * page_size); + addr = base_addr + (3 * page_size); size = 2 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -141,7 +166,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 2 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -161,7 +186,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -181,7 +206,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS + (4 * page_size); + addr = base_addr + (4 * page_size); size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -192,7 +217,7 @@ int main(void) return 1; } - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; if (munmap((void *)addr, size) != 0) { dump_maps(); diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c index 5648f9252e58..e60f1862bad0 100644 --- a/tools/tracing/rtla/src/osnoise.c +++ b/tools/tracing/rtla/src/osnoise.c @@ -810,7 +810,7 @@ struct osnoise_tool *osnoise_init_trace_tool(char *tracer) retval = enable_tracer_by_name(trace->trace.inst, tracer); if (retval) { - err_msg("Could not enable osnoiser tracer for tracing\n"); + err_msg("Could not enable %s tracer for tracing\n", tracer); goto out_err; } diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 1f0b7fce55cf..52c053cc1789 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -426,7 +426,7 @@ static void osnoise_hist_usage(char *usage) static const char * const msg[] = { "", " usage: rtla osnoise hist [-h] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\", - " [-c cpu-list] [-P priority] [-b N] [-e N] [--no-header] [--no-summary] \\", + " [-c cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] \\", " [--no-index] [--with-zeros]", "", " -h/--help: print this menu", @@ -439,7 +439,7 @@ static void osnoise_hist_usage(char *usage) " -D/--debug: print debug info", " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", " -b/--bucket-size N: set the histogram bucket size (default 1)", - " -e/--entries N: set the number of entries of the histogram (default 256)", + " -E/--entries N: set the number of entries of the histogram (default 256)", " --no-header: do not print header", " --no-summary: do not print summary", " --no-index: do not print index", @@ -486,7 +486,7 @@ static struct osnoise_hist_params while (1) { static struct option long_options[] = { {"bucket-size", required_argument, 0, 'b'}, - {"entries", required_argument, 0, 'e'}, + {"entries", required_argument, 0, 'E'}, {"cpus", required_argument, 0, 'c'}, {"debug", no_argument, 0, 'D'}, {"duration", required_argument, 0, 'd'}, @@ -507,7 +507,7 @@ static struct osnoise_hist_params /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "c:b:d:e:Dhp:P:r:s:S:t::0123", + c = getopt_long(argc, argv, "c:b:d:E:Dhp:P:r:s:S:t::0123", long_options, &option_index); /* detect the end of the options. */ @@ -534,7 +534,7 @@ static struct osnoise_hist_params if (!params->duration) osnoise_hist_usage("Invalid -D duration\n"); break; - case 'e': + case 'E': params->entries = get_llong_from_str(optarg); if ((params->entries < 10) || (params->entries > 9999999)) osnoise_hist_usage("Entries must be > 10 and < 9999999\n"); diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index c67dc28ef716..7af769b9c0de 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -573,6 +573,7 @@ out_top: osnoise_free_top(tool->data); osnoise_destroy_tool(record); osnoise_destroy_tool(tool); + free(params); out_exit: exit(return_value); } diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 436a799f9adf..237e1735afa7 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -429,7 +429,7 @@ static void timerlat_hist_usage(char *usage) char *msg[] = { "", " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\", - " [-c cpu-list] [-P priority] [-e N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", + " [-c cpu-list] [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", " [--no-index] [--with-zeros]", "", " -h/--help: print this menu", @@ -443,7 +443,7 @@ static void timerlat_hist_usage(char *usage) " -T/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", " -n/--nano: display data in nanoseconds", " -b/--bucket-size N: set the histogram bucket size (default 1)", - " -e/--entries N: set the number of entries of the histogram (default 256)", + " -E/--entries N: set the number of entries of the histogram (default 256)", " --no-irq: ignore IRQ latencies", " --no-thread: ignore thread latencies", " --no-header: do not print header", @@ -494,7 +494,7 @@ static struct timerlat_hist_params {"cpus", required_argument, 0, 'c'}, {"bucket-size", required_argument, 0, 'b'}, {"debug", no_argument, 0, 'D'}, - {"entries", required_argument, 0, 'e'}, + {"entries", required_argument, 0, 'E'}, {"duration", required_argument, 0, 'd'}, {"help", no_argument, 0, 'h'}, {"irq", required_argument, 0, 'i'}, @@ -516,7 +516,7 @@ static struct timerlat_hist_params /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "c:b:d:e:Dhi:np:P:s:t::T:012345", + c = getopt_long(argc, argv, "c:b:d:E:Dhi:np:P:s:t::T:012345", long_options, &option_index); /* detect the end of the options. */ @@ -543,7 +543,7 @@ static struct timerlat_hist_params if (!params->duration) timerlat_hist_usage("Invalid -D duration\n"); break; - case 'e': + case 'E': params->entries = get_llong_from_str(optarg); if ((params->entries < 10) || (params->entries > 9999999)) timerlat_hist_usage("Entries must be > 10 and < 9999999\n"); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 58d31da8a2f7..0afc016cc54d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5528,9 +5528,7 @@ static int kvm_suspend(void) static void kvm_resume(void) { if (kvm_usage_count) { -#ifdef CONFIG_LOCKDEP - WARN_ON(lockdep_is_held(&kvm_count_lock)); -#endif + lockdep_assert_not_held(&kvm_count_lock); hardware_enable_nolock(NULL); } } |