diff options
186 files changed, 7476 insertions, 2032 deletions
diff --git a/Documentation/devicetree/bindings/net/dsa/ksz.txt b/Documentation/devicetree/bindings/net/dsa/ksz.txt index 0f407fb371ce..8d58c2a7de39 100644 --- a/Documentation/devicetree/bindings/net/dsa/ksz.txt +++ b/Documentation/devicetree/bindings/net/dsa/ksz.txt @@ -19,58 +19,58 @@ Examples: Ethernet switch connected via SPI to the host, CPU port wired to eth0: - eth0: ethernet@10001000 { - fixed-link { - speed = <1000>; - full-duplex; - }; - }; + eth0: ethernet@10001000 { + fixed-link { + speed = <1000>; + full-duplex; + }; + }; - spi1: spi@f8008000 { - pinctrl-0 = <&pinctrl_spi_ksz>; - cs-gpios = <&pioC 25 0>; - id = <1>; + spi1: spi@f8008000 { + pinctrl-0 = <&pinctrl_spi_ksz>; + cs-gpios = <&pioC 25 0>; + id = <1>; - ksz9477: ksz9477@0 { - compatible = "microchip,ksz9477"; - reg = <0>; + ksz9477: ksz9477@0 { + compatible = "microchip,ksz9477"; + reg = <0>; - spi-max-frequency = <44000000>; - spi-cpha; - spi-cpol; + spi-max-frequency = <44000000>; + spi-cpha; + spi-cpol; - ports { - #address-cells = <1>; - #size-cells = <0>; - port@0 { - reg = <0>; - label = "lan1"; - }; - port@1 { - reg = <1>; - label = "lan2"; - }; - port@2 { - reg = <2>; - label = "lan3"; - }; - port@3 { - reg = <3>; - label = "lan4"; - }; - port@4 { - reg = <4>; - label = "lan5"; - }; - port@5 { - reg = <5>; - label = "cpu"; - ethernet = <ð0>; - fixed-link { - speed = <1000>; - full-duplex; - }; - }; - }; - }; - }; + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + label = "lan1"; + }; + port@1 { + reg = <1>; + label = "lan2"; + }; + port@2 { + reg = <2>; + label = "lan3"; + }; + port@3 { + reg = <3>; + label = "lan4"; + }; + port@4 { + reg = <4>; + label = "lan5"; + }; + port@5 { + reg = <5>; + label = "cpu"; + ethernet = <ð0>; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; + }; diff --git a/Documentation/networking/devlink-health.txt b/Documentation/networking/devlink-health.txt new file mode 100644 index 000000000000..1db3fbea0831 --- /dev/null +++ b/Documentation/networking/devlink-health.txt @@ -0,0 +1,86 @@ +The health mechanism is targeted for Real Time Alerting, in order to know when +something bad had happened to a PCI device +- Provide alert debug information +- Self healing +- If problem needs vendor support, provide a way to gather all needed debugging + information. + +The main idea is to unify and centralize driver health reports in the +generic devlink instance and allow the user to set different +attributes of the health reporting and recovery procedures. + +The devlink health reporter: +Device driver creates a "health reporter" per each error/health type. +Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error) +or unknown (driver specific). +For each registered health reporter a driver can issue error/health reports +asynchronously. All health reports handling is done by devlink. +Device driver can provide specific callbacks for each "health reporter", e.g. + - Recovery procedures + - Diagnostics and object dump procedures + - OOB initial parameters +Different parts of the driver can register different types of health reporters +with different handlers. + +Once an error is reported, devlink health will do the following actions: + * A log is being send to the kernel trace events buffer + * Health status and statistics are being updated for the reporter instance + * Object dump is being taken and saved at the reporter instance (as long as + there is no other dump which is already stored) + * Auto recovery attempt is being done. Depends on: + - Auto-recovery configuration + - Grace period vs. time passed since last recover + +The user interface: +User can access/change each reporter's parameters and driver specific callbacks +via devlink, e.g per error type (per health reporter) + - Configure reporter's generic parameters (like: disable/enable auto recovery) + - Invoke recovery procedure + - Run diagnostics + - Object dump + +The devlink health interface (via netlink): +DEVLINK_CMD_HEALTH_REPORTER_GET + Retrieves status and configuration info per DEV and reporter. +DEVLINK_CMD_HEALTH_REPORTER_SET + Allows reporter-related configuration setting. +DEVLINK_CMD_HEALTH_REPORTER_RECOVER + Triggers a reporter's recovery procedure. +DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE + Retrieves diagnostics data from a reporter on a device. +DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET + Retrieves the last stored dump. Devlink health + saves a single dump. If an dump is not already stored by the devlink + for this reporter, devlink generates a new dump. + dump output is defined by the reporter. +DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR + Clears the last saved dump file for the specified reporter. + + + netlink + +--------------------------+ + | | + | + | + | | | + +--------------------------+ + |request for ops + |(diagnose, + mlx5_core devlink |recover, + |dump) ++--------+ +--------------------------+ +| | | reporter| | +| | | +---------v----------+ | +| | ops execution | | | | +| <----------------------------------+ | | +| | | | | | +| | | + ^------------------+ | +| | | | request for ops | +| | | | (recover, dump) | +| | | | | +| | | +-+------------------+ | +| | health report | | health handler | | +| +-------------------------------> | | +| | | +--------------------+ | +| | health reporter create | | +| +----------------------------> | ++--------+ +--------------------------+ diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 25170ad7d25b..1000b821681c 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -236,19 +236,6 @@ description. Design limitations ================== -DSA is a platform device driver -------------------------------- - -DSA is implemented as a DSA platform device driver which is convenient because -it will register the entire DSA switch tree attached to a master network device -in one-shot, facilitating the device creation and simplifying the device driver -model a bit, this comes however with a number of limitations: - -- building DSA and its switch drivers as modules is currently not working -- the device driver parenting does not necessarily reflect the original - bus/device the switch can be created from -- supporting non-MDIO and non-MMIO (platform) switches is not possible - Limits on the number of devices and ports ----------------------------------------- diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst index fe8f741193be..c5642f430d2e 100644 --- a/Documentation/networking/snmp_counter.rst +++ b/Documentation/networking/snmp_counter.rst @@ -1,16 +1,17 @@ -=========== +============ SNMP counter -=========== +============ This document explains the meaning of SNMP counters. General IPv4 counters -==================== +===================== All layer 4 packets and ICMP packets will change these counters, but these counters won't be changed by layer 2 packets (such as STP) or ARP packets. * IpInReceives + Defined in `RFC1213 ipInReceives`_ .. _RFC1213 ipInReceives: https://tools.ietf.org/html/rfc1213#page-26 @@ -23,6 +24,7 @@ and so on). It indicates the number of aggregated segments after GRO/LRO. * IpInDelivers + Defined in `RFC1213 ipInDelivers`_ .. _RFC1213 ipInDelivers: https://tools.ietf.org/html/rfc1213#page-28 @@ -33,6 +35,7 @@ supported protocols will be delivered, if someone listens on the raw socket, all valid IP packets will be delivered. * IpOutRequests + Defined in `RFC1213 ipOutRequests`_ .. _RFC1213 ipOutRequests: https://tools.ietf.org/html/rfc1213#page-28 @@ -42,6 +45,7 @@ multicast packets, and would always be updated together with IpExtOutOctets. * IpExtInOctets and IpExtOutOctets + They are Linux kernel extensions, no RFC definitions. Please note, RFC1213 indeed defines ifInOctets and ifOutOctets, but they are different things. The ifInOctets and ifOutOctets include the MAC @@ -49,6 +53,7 @@ layer header size but IpExtInOctets and IpExtOutOctets don't, they only include the IP layer header and the IP layer data. * IpExtInNoECTPkts, IpExtInECT1Pkts, IpExtInECT0Pkts, IpExtInCEPkts + They indicate the number of four kinds of ECN IP packets, please refer `Explicit Congestion Notification`_ for more details. @@ -60,6 +65,7 @@ for the same packet, you might find that IpInReceives count 1, but IpExtInNoECTPkts counts 2 or more. * IpInHdrErrors + Defined in `RFC1213 ipInHdrErrors`_. It indicates the packet is dropped due to the IP header error. It might happen in both IP input and IP forward paths. @@ -67,6 +73,7 @@ and IP forward paths. .. _RFC1213 ipInHdrErrors: https://tools.ietf.org/html/rfc1213#page-27 * IpInAddrErrors + Defined in `RFC1213 ipInAddrErrors`_. It will be increased in two scenarios: (1) The IP address is invalid. (2) The destination IP address is not a local address and IP forwarding is not enabled @@ -74,6 +81,7 @@ address is not a local address and IP forwarding is not enabled .. _RFC1213 ipInAddrErrors: https://tools.ietf.org/html/rfc1213#page-27 * IpExtInNoRoutes + This counter means the packet is dropped when the IP stack receives a packet and can't find a route for it from the route table. It might happen when IP forwarding is enabled and the destination IP address is @@ -81,6 +89,7 @@ not a local address and there is no route for the destination IP address. * IpInUnknownProtos + Defined in `RFC1213 ipInUnknownProtos`_. It will be increased if the layer 4 protocol is unsupported by kernel. If an application is using raw socket, kernel will always deliver the packet to the raw socket @@ -89,10 +98,12 @@ and this counter won't be increased. .. _RFC1213 ipInUnknownProtos: https://tools.ietf.org/html/rfc1213#page-27 * IpExtInTruncatedPkts + For IPv4 packet, it means the actual data size is smaller than the "Total Length" field in the IPv4 header. * IpInDiscards + Defined in `RFC1213 ipInDiscards`_. It indicates the packet is dropped in the IP receiving path and due to kernel internal reasons (e.g. no enough memory). @@ -100,20 +111,23 @@ enough memory). .. _RFC1213 ipInDiscards: https://tools.ietf.org/html/rfc1213#page-28 * IpOutDiscards + Defined in `RFC1213 ipOutDiscards`_. It indicates the packet is dropped in the IP sending path and due to kernel internal reasons. .. _RFC1213 ipOutDiscards: https://tools.ietf.org/html/rfc1213#page-28 * IpOutNoRoutes + Defined in `RFC1213 ipOutNoRoutes`_. It indicates the packet is dropped in the IP sending path and no route is found for it. .. _RFC1213 ipOutNoRoutes: https://tools.ietf.org/html/rfc1213#page-29 ICMP counters -============ +============= * IcmpInMsgs and IcmpOutMsgs + Defined by `RFC1213 icmpInMsgs`_ and `RFC1213 icmpOutMsgs`_ .. _RFC1213 icmpInMsgs: https://tools.ietf.org/html/rfc1213#page-41 @@ -126,6 +140,7 @@ IcmpOutMsgs would still be updated if the IP header is constructed by a userspace program. * ICMP named types + | These counters include most of common ICMP types, they are: | IcmpInDestUnreachs: `RFC1213 icmpInDestUnreachs`_ | IcmpInTimeExcds: `RFC1213 icmpInTimeExcds`_ @@ -180,6 +195,7 @@ straightforward. The 'In' counter means kernel receives such a packet and the 'Out' counter means kernel sends such a packet. * ICMP numeric types + They are IcmpMsgInType[N] and IcmpMsgOutType[N], the [N] indicates the ICMP type number. These counters track all kinds of ICMP packets. The ICMP type number definition could be found in the `ICMP parameters`_ @@ -192,12 +208,14 @@ IcmpMsgOutType8 would increase 1. And if kernel gets an ICMP Echo Reply packet, IcmpMsgInType0 would increase 1. * IcmpInCsumErrors + This counter indicates the checksum of the ICMP packet is wrong. Kernel verifies the checksum after updating the IcmpInMsgs and before updating IcmpMsgInType[N]. If a packet has bad checksum, the IcmpInMsgs would be updated but none of IcmpMsgInType[N] would be updated. * IcmpInErrors and IcmpOutErrors + Defined by `RFC1213 icmpInErrors`_ and `RFC1213 icmpOutErrors`_ .. _RFC1213 icmpInErrors: https://tools.ietf.org/html/rfc1213#page-41 @@ -209,7 +227,7 @@ and the sending packet path use IcmpOutErrors. When IcmpInCsumErrors is increased, IcmpInErrors would always be increased too. relationship of the ICMP counters -------------------------------- +--------------------------------- The sum of IcmpMsgOutType[N] is always equal to IcmpOutMsgs, as they are updated at the same time. The sum of IcmpMsgInType[N] plus IcmpInErrors should be equal or larger than IcmpInMsgs. When kernel @@ -229,8 +247,9 @@ IcmpInMsgs should be less than the sum of IcmpMsgOutType[N] plus IcmpInErrors. General TCP counters -================== +==================== * TcpInSegs + Defined in `RFC1213 tcpInSegs`_ .. _RFC1213 tcpInSegs: https://tools.ietf.org/html/rfc1213#page-48 @@ -247,6 +266,7 @@ isn't aware of GRO. So if two packets are merged by GRO, the TcpInSegs counter would only increase 1. * TcpOutSegs + Defined in `RFC1213 tcpOutSegs`_ .. _RFC1213 tcpOutSegs: https://tools.ietf.org/html/rfc1213#page-48 @@ -258,6 +278,7 @@ GSO, so if a packet would be split to 2 by GSO, TcpOutSegs will increase 2. * TcpActiveOpens + Defined in `RFC1213 tcpActiveOpens`_ .. _RFC1213 tcpActiveOpens: https://tools.ietf.org/html/rfc1213#page-47 @@ -267,6 +288,7 @@ state. Every time TcpActiveOpens increases 1, TcpOutSegs should always increase 1. * TcpPassiveOpens + Defined in `RFC1213 tcpPassiveOpens`_ .. _RFC1213 tcpPassiveOpens: https://tools.ietf.org/html/rfc1213#page-47 @@ -275,6 +297,7 @@ It means the TCP layer receives a SYN, replies a SYN+ACK, come into the SYN-RCVD state. * TcpExtTCPRcvCoalesce + When packets are received by the TCP layer and are not be read by the application, the TCP layer will try to merge them. This counter indicate how many packets are merged in such situation. If GRO is @@ -282,12 +305,14 @@ enabled, lots of packets would be merged by GRO, these packets wouldn't be counted to TcpExtTCPRcvCoalesce. * TcpExtTCPAutoCorking + When sending packets, the TCP layer will try to merge small packets to a bigger one. This counter increase 1 for every packet merged in such situation. Please refer to the LWN article for more details: https://lwn.net/Articles/576263/ * TcpExtTCPOrigDataSent + This counter is explained by `kernel commit f19c29e3e391`_, I pasted the explaination below:: @@ -297,6 +322,7 @@ explaination below:: more useful to track the TCP retransmission rate. * TCPSynRetrans + This counter is explained by `kernel commit f19c29e3e391`_, I pasted the explaination below:: @@ -304,6 +330,7 @@ explaination below:: retransmissions into SYN, fast-retransmits, timeout retransmits, etc. * TCPFastOpenActiveFail + This counter is explained by `kernel commit f19c29e3e391`_, I pasted the explaination below:: @@ -313,6 +340,7 @@ explaination below:: .. _kernel commit f19c29e3e391: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f19c29e3e391a66a273e9afebaf01917245148cd * TcpExtListenOverflows and TcpExtListenDrops + When kernel receives a SYN from a client, and if the TCP accept queue is full, kernel will drop the SYN and add 1 to TcpExtListenOverflows. At the same time kernel will also add 1 to TcpExtListenDrops. When a @@ -336,6 +364,8 @@ time client replies ACK, this socket will get another chance to move to the accept queue. +TCP Fast Open +============= * TcpEstabResets Defined in `RFC1213 tcpEstabResets`_. @@ -389,20 +419,23 @@ will disable the fast path at first, and try to enable it after kernel receives packets. * TcpExtTCPPureAcks and TcpExtTCPHPAcks + If a packet set ACK flag and has no data, it is a pure ACK packet, if kernel handles it in the fast path, TcpExtTCPHPAcks will increase 1, if kernel handles it in the slow path, TcpExtTCPPureAcks will increase 1. * TcpExtTCPHPHits + If a TCP packet has data (which means it is not a pure ACK packet), and this packet is handled in the fast path, TcpExtTCPHPHits will increase 1. TCP abort -======== +========= * TcpExtTCPAbortOnData + It means TCP layer has data in flight, but need to close the connection. So TCP layer sends a RST to the other side, indicate the connection is not closed very graceful. An easy way to increase this @@ -421,11 +454,13 @@ when the application closes a connection, kernel will send a RST immediately and increase the TcpExtTCPAbortOnData counter. * TcpExtTCPAbortOnClose + This counter means the application has unread data in the TCP layer when the application wants to close the TCP connection. In such a situation, kernel will send a RST to the other side of the TCP connection. * TcpExtTCPAbortOnMemory + When an application closes a TCP connection, kernel still need to track the connection, let it complete the TCP disconnect process. E.g. an app calls the close method of a socket, kernel sends fin to the other @@ -447,10 +482,12 @@ the tcp_mem. Please refer the tcp_mem section in the `TCP man page`_: * TcpExtTCPAbortOnTimeout + This counter will increase when any of the TCP timers expire. In such situation, kernel won't send RST, just give up the connection. * TcpExtTCPAbortOnLinger + When a TCP connection comes into FIN_WAIT_2 state, instead of waiting for the fin packet from the other side, kernel could send a RST and delete the socket immediately. This is not the default behavior of @@ -458,6 +495,7 @@ Linux kernel TCP stack. By configuring the TCP_LINGER2 socket option, you could let kernel follow this behavior. * TcpExtTCPAbortFailed + The kernel TCP layer will send RST if the `RFC2525 2.17 section`_ is satisfied. If an internal error occurs during this process, TcpExtTCPAbortFailed will be increased. @@ -465,7 +503,7 @@ TcpExtTCPAbortFailed will be increased. .. _RFC2525 2.17 section: https://tools.ietf.org/html/rfc2525#page-50 TCP Hybrid Slow Start -==================== +===================== The Hybrid Slow Start algorithm is an enhancement of the traditional TCP congestion window Slow Start algorithm. It uses two pieces of information to detect whether the max bandwidth of the TCP path is @@ -481,23 +519,27 @@ relate with the Hybrid Slow Start algorithm. .. _Hybrid Slow Start paper: https://pdfs.semanticscholar.org/25e9/ef3f03315782c7f1cbcd31b587857adae7d1.pdf * TcpExtTCPHystartTrainDetect + How many times the ACK train length threshold is detected * TcpExtTCPHystartTrainCwnd + The sum of CWND detected by ACK train length. Dividing this value by TcpExtTCPHystartTrainDetect is the average CWND which detected by the ACK train length. * TcpExtTCPHystartDelayDetect + How many times the packet delay threshold is detected. * TcpExtTCPHystartDelayCwnd + The sum of CWND detected by packet delay. Dividing this value by TcpExtTCPHystartDelayDetect is the average CWND which detected by the packet delay. TCP retransmission and congestion control -====================================== +========================================= The TCP protocol has two retransmission mechanisms: SACK and fast recovery. They are exclusive with each other. When SACK is enabled, the kernel TCP stack would use SACK, or kernel would use fast @@ -516,12 +558,14 @@ https://pdfs.semanticscholar.org/0e9c/968d09ab2e53e24c4dca5b2d67c7f7140f8e.pdf .. _RFC6582: https://tools.ietf.org/html/rfc6582 * TcpExtTCPRenoRecovery and TcpExtTCPSackRecovery + When the congestion control comes into Recovery state, if sack is used, TcpExtTCPSackRecovery increases 1, if sack is not used, TcpExtTCPRenoRecovery increases 1. These two counters mean the TCP stack begins to retransmit the lost packets. * TcpExtTCPSACKReneging + A packet was acknowledged by SACK, but the receiver has dropped this packet, so the sender needs to retransmit this packet. In this situation, the sender adds 1 to TcpExtTCPSACKReneging. A receiver @@ -532,6 +576,7 @@ the RTO expires for this packet, then the sender assumes this packet has been dropped by the receiver. * TcpExtTCPRenoReorder + The reorder packet is detected by fast recovery. It would only be used if SACK is disabled. The fast recovery algorithm detects recorder by the duplicate ACK number. E.g., if retransmission is triggered, and @@ -542,6 +587,7 @@ order packet. Thus the sender would find more ACks than its expectation, and the sender knows out of order occurs. * TcpExtTCPTSReorder + The reorder packet is detected when a hole is filled. E.g., assume the sender sends packet 1,2,3,4,5, and the receiving order is 1,2,4,5,3. When the sender receives the ACK of packet 3 (which will @@ -551,6 +597,7 @@ fill the hole), two conditions will let TcpExtTCPTSReorder increase than the retransmission timestamp. * TcpExtTCPSACKReorder + The reorder packet detected by SACK. The SACK has two methods to detect reorder: (1) DSACK is received by the sender. It means the sender sends the same packet more than one times. And the only reason @@ -574,10 +621,12 @@ sender side. .. _RFC2883 : https://tools.ietf.org/html/rfc2883 * TcpExtTCPDSACKOldSent + The TCP stack receives a duplicate packet which has been acked, so it sends a DSACK to the sender. * TcpExtTCPDSACKOfoSent + The TCP stack receives an out of order duplicate packet, so it sends a DSACK to the sender. @@ -586,6 +635,7 @@ The TCP stack receives a DSACK, which indicates an acknowledged duplicate packet is received. * TcpExtTCPDSACKOfoRecv + The TCP stack receives a DSACK, which indicate an out of order duplicate packet is received. @@ -640,23 +690,26 @@ A skb should be shifted or merged, but the TCP stack doesn't do it for some reasons. TCP out of order -=============== +================ * TcpExtTCPOFOQueue + The TCP layer receives an out of order packet and has enough memory to queue it. * TcpExtTCPOFODrop + The TCP layer receives an out of order packet but doesn't have enough memory, so drops it. Such packets won't be counted into TcpExtTCPOFOQueue. * TcpExtTCPOFOMerge + The received out of order packet has an overlay with the previous packet. the overlay part will be dropped. All of TcpExtTCPOFOMerge packets will also be counted into TcpExtTCPOFOQueue. TCP PAWS -======= +======== PAWS (Protection Against Wrapped Sequence numbers) is an algorithm which is used to drop old packets. It depends on the TCP timestamps. For detail information, please refer the `timestamp wiki`_ @@ -666,13 +719,15 @@ and the `RFC of PAWS`_. .. _timestamp wiki: https://en.wikipedia.org/wiki/Transmission_Control_Protocol#TCP_timestamps * TcpExtPAWSActive + Packets are dropped by PAWS in Syn-Sent status. * TcpExtPAWSEstab + Packets are dropped by PAWS in any status other than Syn-Sent. TCP ACK skip -=========== +============ In some scenarios, kernel would avoid sending duplicate ACKs too frequently. Please find more details in the tcp_invalid_ratelimit section of the `sysctl document`_. When kernel decides to skip an ACK @@ -684,6 +739,7 @@ it has no data. .. _sysctl document: https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt * TcpExtTCPACKSkippedSynRecv + The ACK is skipped in Syn-Recv status. The Syn-Recv status means the TCP stack receives a SYN and replies SYN+ACK. Now the TCP stack is waiting for an ACK. Generally, the TCP stack doesn't need to send ACK @@ -697,6 +753,7 @@ increase TcpExtTCPACKSkippedSynRecv. * TcpExtTCPACKSkippedPAWS + The ACK is skipped due to PAWS (Protect Against Wrapped Sequence numbers) check fails. If the PAWS check fails in Syn-Recv, Fin-Wait-2 or Time-Wait statuses, the skipped ACK would be counted to @@ -705,18 +762,22 @@ TcpExtTCPACKSkippedTimeWait. In all other statuses, the skipped ACK would be counted to TcpExtTCPACKSkippedPAWS. * TcpExtTCPACKSkippedSeq + The sequence number is out of window and the timestamp passes the PAWS check and the TCP status is not Syn-Recv, Fin-Wait-2, and Time-Wait. * TcpExtTCPACKSkippedFinWait2 + The ACK is skipped in Fin-Wait-2 status, the reason would be either PAWS check fails or the received sequence number is out of window. * TcpExtTCPACKSkippedTimeWait + Tha ACK is skipped in Time-Wait status, the reason would be either PAWS check failed or the received sequence number is out of window. * TcpExtTCPACKSkippedChallenge + The ACK is skipped if the ACK is a challenge ACK. The RFC 5961 defines 3 kind of challenge ACK, please refer `RFC 5961 section 3.2`_, `RFC 5961 section 4.2`_ and `RFC 5961 section 5.2`_. Besides these @@ -784,10 +845,10 @@ A TLP probe packet is sent. A packet loss is detected and recovered by TLP. examples -======= +======== ping test --------- +--------- Run the ping command against the public dns server 8.8.8.8:: nstatuser@nstat-a:~$ ping 8.8.8.8 -c 1 @@ -831,7 +892,7 @@ and its corresponding Echo Reply packet are constructed by: So the IpExtInOctets and IpExtOutOctets are 20+16+48=84. tcp 3-way handshake ------------------- +------------------- On server side, we run:: nstatuser@nstat-b:~$ nc -lknv 0.0.0.0 9000 @@ -873,7 +934,7 @@ ACK, so client sent 2 packets, received 1 packet, TcpInSegs increased 1, TcpOutSegs increased 2. TCP normal traffic ------------------ +------------------ Run nc on server:: nstatuser@nstat-b:~$ nc -lkv 0.0.0.0 9000 @@ -996,7 +1057,7 @@ and the packet received from client qualified for fast path, so it was counted into 'TcpExtTCPHPHits'. TcpExtTCPAbortOnClose --------------------- +--------------------- On the server side, we run below python script:: import socket @@ -1030,7 +1091,7 @@ If we run tcpdump on the server side, we could find the server sent a RST after we type Ctrl-C. TcpExtTCPAbortOnMemory and TcpExtTCPAbortOnTimeout ------------------------------------------------ +--------------------------------------------------- Below is an example which let the orphan socket count be higher than net.ipv4.tcp_max_orphans. Change tcp_max_orphans to a smaller value on client:: @@ -1152,7 +1213,7 @@ FIN_WAIT_1 state finally. So we wait for a few minutes, we could find TcpExtTCPAbortOnTimeout 10 0.0 TcpExtTCPAbortOnLinger ---------------------- +---------------------- The server side code:: nstatuser@nstat-b:~$ cat server_linger.py @@ -1197,7 +1258,7 @@ After run client_linger.py, check the output of nstat:: TcpExtTCPAbortOnLinger 1 0.0 TcpExtTCPRcvCoalesce -------------------- +-------------------- On the server, we run a program which listen on TCP port 9000, but doesn't read any data:: @@ -1257,7 +1318,7 @@ the receiving queue. So the TCP layer merged the two packets, and we could find the TcpExtTCPRcvCoalesce increased 1. TcpExtListenOverflows and TcpExtListenDrops ----------------------------------------- +------------------------------------------- On server, run the nc command, listen on port 9000:: nstatuser@nstat-b:~$ nc -lkv 0.0.0.0 9000 @@ -1305,7 +1366,7 @@ TcpExtListenOverflows and TcpExtListenDrops would be larger, because the SYN of the 4th nc was dropped, the client was retrying. IpInAddrErrors, IpExtInNoRoutes and IpOutNoRoutes ----------------------------------------------- +------------------------------------------------- server A IP address: 192.168.122.250 server B IP address: 192.168.122.251 Prepare on server A, add a route to server B:: @@ -1400,7 +1461,7 @@ a route for the 8.8.8.8 IP address, so server B increased IpOutNoRoutes. TcpExtTCPACKSkippedSynRecv ------------------------- +-------------------------- In this test, we send 3 same SYN packets from client to server. The first SYN will let server create a socket, set it to Syn-Recv status, and reply a SYN/ACK. The second SYN will let server reply the SYN/ACK @@ -1448,7 +1509,7 @@ Check snmp cunter on nstat-b:: As we expected, TcpExtTCPACKSkippedSynRecv is 1. TcpExtTCPACKSkippedPAWS ----------------------- +----------------------- To trigger PAWS, we could send an old SYN. On nstat-b, let nc listen on port 9000:: @@ -1485,7 +1546,7 @@ failed, the nstat-b replied an ACK for the first SYN, skipped the ACK for the second SYN, and updated TcpExtTCPACKSkippedPAWS. TcpExtTCPACKSkippedSeq --------------------- +---------------------- To trigger TcpExtTCPACKSkippedSeq, we send packets which have valid timestamp (to pass PAWS check) but the sequence number is out of window. The linux TCP stack would avoid to skip if the packet has diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 82236a17b5e6..f3244d87512a 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -196,7 +196,7 @@ The switch device will learn/forget source MAC address/VLAN on ingress packets and notify the switch driver of the mac/vlan/port tuples. The switch driver, in turn, will notify the bridge driver using the switchdev notifier call: - err = call_switchdev_notifiers(val, dev, info); + err = call_switchdev_notifiers(val, dev, info, extack); Where val is SWITCHDEV_FDB_ADD when learning and SWITCHDEV_FDB_DEL when forgetting, and info points to a struct switchdev_notifier_fdb_info. On diff --git a/MAINTAINERS b/MAINTAINERS index 51029a425dbe..8e2c82f4c72f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10575,6 +10575,7 @@ F: Documentation/devicetree/bindings/net/dsa/ F: net/dsa/ F: include/net/dsa.h F: include/linux/dsa/ +F: include/linux/platform_data/dsa.h F: drivers/net/dsa/ NETWORKING [GENERAL] diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 065fb372e355..b1c9b542c021 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -115,4 +115,6 @@ #define SO_TXTIME 61 #define SCM_TXTIME SO_TXTIME +#define SO_BINDTOIFINDEX 62 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 83a7ec4c16d0..c67f92bfa30e 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -20,7 +20,7 @@ #include <linux/delay.h> #include <linux/clk-provider.h> #include <linux/cpu.h> -#include <net/dsa.h> +#include <linux/platform_data/dsa.h> #include <asm/page.h> #include <asm/setup.h> #include <asm/system_misc.h> diff --git a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c index a3c1336d30c9..c65ab7db36ad 100644 --- a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c +++ b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c @@ -16,7 +16,7 @@ #include <linux/mtd/physmap.h> #include <linux/mv643xx_eth.h> #include <linux/ethtool.h> -#include <net/dsa.h> +#include <linux/platform_data/dsa.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> #include <asm/mach/pci.h> diff --git a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c index 252efe29bd1a..76b8138d9d79 100644 --- a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c +++ b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c @@ -17,7 +17,7 @@ #include <linux/mv643xx_eth.h> #include <linux/ethtool.h> #include <linux/i2c.h> -#include <net/dsa.h> +#include <linux/platform_data/dsa.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> #include <asm/mach/pci.h> diff --git a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c index f4f1dbe1d91d..5f388a1ed1e4 100644 --- a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c +++ b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c @@ -18,7 +18,7 @@ #include <linux/spi/spi.h> #include <linux/spi/flash.h> #include <linux/ethtool.h> -#include <net/dsa.h> +#include <linux/platform_data/dsa.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> #include <asm/mach/pci.h> diff --git a/arch/arm/mach-orion5x/wnr854t-setup.c b/arch/arm/mach-orion5x/wnr854t-setup.c index d162d4c7f85d..83589a28a491 100644 --- a/arch/arm/mach-orion5x/wnr854t-setup.c +++ b/arch/arm/mach-orion5x/wnr854t-setup.c @@ -15,7 +15,7 @@ #include <linux/mtd/physmap.h> #include <linux/mv643xx_eth.h> #include <linux/ethtool.h> -#include <net/dsa.h> +#include <linux/platform_data/dsa.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> #include <asm/mach/pci.h> diff --git a/arch/arm/mach-orion5x/wrt350n-v2-setup.c b/arch/arm/mach-orion5x/wrt350n-v2-setup.c index 9250bb2e429c..cea08d4a2597 100644 --- a/arch/arm/mach-orion5x/wrt350n-v2-setup.c +++ b/arch/arm/mach-orion5x/wrt350n-v2-setup.c @@ -18,7 +18,7 @@ #include <linux/leds.h> #include <linux/gpio_keys.h> #include <linux/input.h> -#include <net/dsa.h> +#include <linux/platform_data/dsa.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> #include <asm/mach/pci.h> diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index a2399fd66e97..a6c81ce00f52 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -18,7 +18,7 @@ #include <linux/clkdev.h> #include <linux/mv643xx_eth.h> #include <linux/mv643xx_i2c.h> -#include <net/dsa.h> +#include <linux/platform_data/dsa.h> #include <linux/platform_data/dma-mv_xor.h> #include <linux/platform_data/usb-ehci-orion.h> #include <plat/common.h> diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index c872c4e6bafb..ba0d245f9576 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -117,4 +117,6 @@ #define SO_TXTIME 61 #define SCM_TXTIME SO_TXTIME +#define SO_BINDTOIFINDEX 62 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 71370fb3ceef..73e25e35d803 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -126,4 +126,6 @@ #define SO_TXTIME 61 #define SCM_TXTIME SO_TXTIME +#define SO_BINDTOIFINDEX 62 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 061b9cf2a779..52bed5976cbe 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -107,4 +107,6 @@ #define SO_TXTIME 0x4036 #define SCM_TXTIME SO_TXTIME +#define SO_BINDTOIFINDEX 0x4037 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 39d901476ee5..49c971587087 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -114,4 +114,6 @@ #define SO_TXTIME 61 #define SCM_TXTIME SO_TXTIME +#define SO_BINDTOIFINDEX 62 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 7ea35e5601b6..bbdb81594dd4 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -104,6 +104,8 @@ #define SO_TXTIME 0x003f #define SCM_TXTIME SO_TXTIME +#define SO_BINDTOIFINDEX 0x0041 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 1de07a7f7680..b434217783d0 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -119,4 +119,6 @@ #define SO_TXTIME 61 #define SCM_TXTIME SO_TXTIME +#define SO_BINDTOIFINDEX 62 + #endif /* _XTENSA_SOCKET_H */ diff --git a/drivers/isdn/hisax/netjet.c b/drivers/isdn/hisax/netjet.c index e932a152c405..d7b011c8d692 100644 --- a/drivers/isdn/hisax/netjet.c +++ b/drivers/isdn/hisax/netjet.c @@ -332,7 +332,7 @@ static int make_raw_data_56k(struct BCState *bcs) { bitcnt = 0; } val >>= 1; - }; + } fcs = PPP_INITFCS; for (i = 0; i < bcs->tx_skb->len; i++) { val = bcs->tx_skb->data[i]; @@ -415,7 +415,7 @@ static void read_raw(struct BCState *bcs, u_int *buf, int cnt) { else { // it's 56K mask = 0x7f; bits = 7; - }; + } for (i = 0; i < cnt; i++) { val = bcs->channel ? ((*p >> 8) & 0xff) : (*p & 0xff); p++; @@ -623,7 +623,7 @@ void netjet_fill_dma(struct BCState *bcs) else { // it's 56k if (make_raw_data_56k(bcs)) return; - }; + } if (bcs->cs->debug & L1_DEB_HSCX) debugl1(bcs->cs, "tiger fill_dma2: c%d %4lx", bcs->channel, bcs->Flag); diff --git a/drivers/isdn/hisax/q931.c b/drivers/isdn/hisax/q931.c index 298c8dba0321..6b8c3fbe3965 100644 --- a/drivers/isdn/hisax/q931.c +++ b/drivers/isdn/hisax/q931.c @@ -598,7 +598,7 @@ prcalling(char *dest, u_char *p) dp += prbits(dp, *++p, 8, 8); *dp++ = '\n'; l--; - }; + } p++; dp += sprintf(dp, " number digits "); diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h index 8cd2d8277426..b421b86ca7da 100644 --- a/drivers/isdn/hisax/st5481.h +++ b/drivers/isdn/hisax/st5481.h @@ -512,7 +512,7 @@ static inline const char *ST5481_CMD_string(int evt) case ST5481_CMD_AR10: return "AR10"; case ST5481_CMD_ARL: return "ARL"; case ST5481_CMD_PDN: return "PDN"; - }; + } sprintf(s, "0x%x", evt); return s; diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c index a4597e96c916..f4253d468ae1 100644 --- a/drivers/isdn/isdnloop/isdnloop.c +++ b/drivers/isdn/isdnloop/isdnloop.c @@ -72,7 +72,7 @@ isdnloop_bchan_send(isdnloop_card *card, int ch) printk(KERN_WARNING "isdnloop: no rcard, skb dropped\n"); dev_kfree_skb(skb); - }; + } cmd.command = ISDN_STAT_BSENT; cmd.parm.length = len; card->interface.statcallb(&cmd); diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 693a67f45bef..27d092cab40e 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1069,10 +1069,10 @@ static int gswip_probe(struct platform_device *pdev) version = gswip_switch_r(priv, GSWIP_VERSION); /* bring up the mdio bus */ - gphy_fw_np = of_find_compatible_node(pdev->dev.of_node, NULL, - "lantiq,gphy-fw"); + gphy_fw_np = of_get_compatible_child(dev->of_node, "lantiq,gphy-fw"); if (gphy_fw_np) { err = gswip_gphy_fw_list(priv, gphy_fw_np, version); + of_node_put(gphy_fw_np); if (err) { dev_err(dev, "gphy fw probe failed\n"); return err; @@ -1080,13 +1080,12 @@ static int gswip_probe(struct platform_device *pdev) } /* bring up the mdio bus */ - mdio_np = of_find_compatible_node(pdev->dev.of_node, NULL, - "lantiq,xrx200-mdio"); + mdio_np = of_get_compatible_child(dev->of_node, "lantiq,xrx200-mdio"); if (mdio_np) { err = gswip_mdio(priv, mdio_np); if (err) { dev_err(dev, "mdio probe failed\n"); - goto gphy_fw; + goto put_mdio_node; } } @@ -1099,7 +1098,7 @@ static int gswip_probe(struct platform_device *pdev) dev_err(dev, "wrong CPU port defined, HW only supports port: %i", priv->hw_info->cpu_port); err = -EINVAL; - goto mdio_bus; + goto disable_switch; } platform_set_drvdata(pdev, priv); @@ -1109,10 +1108,14 @@ static int gswip_probe(struct platform_device *pdev) (version & GSWIP_VERSION_MOD_MASK) >> GSWIP_VERSION_MOD_SHIFT); return 0; +disable_switch: + gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB); + dsa_unregister_switch(priv->ds); mdio_bus: if (mdio_np) mdiobus_unregister(priv->ds->slave_mii_bus); -gphy_fw: +put_mdio_node: + of_node_put(mdio_np); for (i = 0; i < priv->num_gphy_fw; i++) gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]); return err; @@ -1123,16 +1126,15 @@ static int gswip_remove(struct platform_device *pdev) struct gswip_priv *priv = platform_get_drvdata(pdev); int i; - if (!priv) - return 0; - /* disable the switch */ gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB); dsa_unregister_switch(priv->ds); - if (priv->ds->slave_mii_bus) + if (priv->ds->slave_mii_bus) { mdiobus_unregister(priv->ds->slave_mii_bus); + of_node_put(priv->ds->slave_mii_bus->dev.of_node); + } for (i = 0; i < priv->num_gphy_fw; i++) gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h index 46ee2c01f4c5..066765fbef06 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h @@ -449,7 +449,7 @@ static inline void bnx2x_init_fw_wrr(const struct cmng_init_input *input_data, ccd[cos] = (u32)input_data->cos_min_rate[cos] * 100 * (T_FAIR_COEF / (8 * 100 * cosWeightSum)); - if (ccd[cos] < pdata->fair_vars.fair_threshold + if (ccd[cos] < pdata->fair_vars.fair_threshold + MIN_ABOVE_THRESH) { ccd[cos] = pdata->fair_vars.fair_threshold + diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 98d4c5a3ff21..29738dfa878c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -837,49 +837,45 @@ static int bnx2x_ets_e3b0_set_cos_bw(struct bnx2x *bp, switch (cos_entry) { case 0: - nig_reg_adress_crd_weight = - (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_0 : - NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_0; - pbf_reg_adress_crd_weight = (port) ? - PBF_REG_COS0_WEIGHT_P1 : PBF_REG_COS0_WEIGHT_P0; - break; + nig_reg_adress_crd_weight = + (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_0 : + NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_0; + pbf_reg_adress_crd_weight = (port) ? + PBF_REG_COS0_WEIGHT_P1 : PBF_REG_COS0_WEIGHT_P0; + break; case 1: - nig_reg_adress_crd_weight = (port) ? - NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_1 : - NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_1; - pbf_reg_adress_crd_weight = (port) ? - PBF_REG_COS1_WEIGHT_P1 : PBF_REG_COS1_WEIGHT_P0; - break; + nig_reg_adress_crd_weight = (port) ? + NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_1 : + NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_1; + pbf_reg_adress_crd_weight = (port) ? + PBF_REG_COS1_WEIGHT_P1 : PBF_REG_COS1_WEIGHT_P0; + break; case 2: - nig_reg_adress_crd_weight = (port) ? - NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_2 : - NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_2; + nig_reg_adress_crd_weight = (port) ? + NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_2 : + NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_2; - pbf_reg_adress_crd_weight = (port) ? - PBF_REG_COS2_WEIGHT_P1 : PBF_REG_COS2_WEIGHT_P0; - break; + pbf_reg_adress_crd_weight = (port) ? + PBF_REG_COS2_WEIGHT_P1 : PBF_REG_COS2_WEIGHT_P0; + break; case 3: - if (port) + if (port) return -EINVAL; - nig_reg_adress_crd_weight = - NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_3; - pbf_reg_adress_crd_weight = - PBF_REG_COS3_WEIGHT_P0; - break; + nig_reg_adress_crd_weight = NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_3; + pbf_reg_adress_crd_weight = PBF_REG_COS3_WEIGHT_P0; + break; case 4: - if (port) - return -EINVAL; - nig_reg_adress_crd_weight = - NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_4; - pbf_reg_adress_crd_weight = PBF_REG_COS4_WEIGHT_P0; - break; + if (port) + return -EINVAL; + nig_reg_adress_crd_weight = NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_4; + pbf_reg_adress_crd_weight = PBF_REG_COS4_WEIGHT_P0; + break; case 5: - if (port) - return -EINVAL; - nig_reg_adress_crd_weight = - NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_5; - pbf_reg_adress_crd_weight = PBF_REG_COS5_WEIGHT_P0; - break; + if (port) + return -EINVAL; + nig_reg_adress_crd_weight = NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_5; + pbf_reg_adress_crd_weight = PBF_REG_COS5_WEIGHT_P0; + break; } REG_WR(bp, nig_reg_adress_crd_weight, cos_bw_nig); @@ -966,7 +962,7 @@ static int bnx2x_ets_e3b0_sp_pri_to_cos_set(const struct link_params *params, if (pri >= max_num_of_cos) { DP(NETIF_MSG_LINK, "bnx2x_ets_e3b0_sp_pri_to_cos_set invalid " "parameter Illegal strict priority\n"); - return -EINVAL; + return -EINVAL; } if (sp_pri_to_cos[pri] != DCBX_INVALID_COS) { @@ -1845,28 +1841,28 @@ static int bnx2x_emac_enable(struct link_params *params, bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE, EMAC_TX_MODE_RESET); - /* pause enable/disable */ - bnx2x_bits_dis(bp, emac_base + EMAC_REG_EMAC_RX_MODE, - EMAC_RX_MODE_FLOW_EN); + /* pause enable/disable */ + bnx2x_bits_dis(bp, emac_base + EMAC_REG_EMAC_RX_MODE, + EMAC_RX_MODE_FLOW_EN); - bnx2x_bits_dis(bp, emac_base + EMAC_REG_EMAC_TX_MODE, - (EMAC_TX_MODE_EXT_PAUSE_EN | - EMAC_TX_MODE_FLOW_EN)); - if (!(params->feature_config_flags & - FEATURE_CONFIG_PFC_ENABLED)) { - if (vars->flow_ctrl & BNX2X_FLOW_CTRL_RX) - bnx2x_bits_en(bp, emac_base + - EMAC_REG_EMAC_RX_MODE, - EMAC_RX_MODE_FLOW_EN); - - if (vars->flow_ctrl & BNX2X_FLOW_CTRL_TX) - bnx2x_bits_en(bp, emac_base + - EMAC_REG_EMAC_TX_MODE, - (EMAC_TX_MODE_EXT_PAUSE_EN | - EMAC_TX_MODE_FLOW_EN)); - } else - bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE, - EMAC_TX_MODE_FLOW_EN); + bnx2x_bits_dis(bp, emac_base + EMAC_REG_EMAC_TX_MODE, + (EMAC_TX_MODE_EXT_PAUSE_EN | + EMAC_TX_MODE_FLOW_EN)); + if (!(params->feature_config_flags & + FEATURE_CONFIG_PFC_ENABLED)) { + if (vars->flow_ctrl & BNX2X_FLOW_CTRL_RX) + bnx2x_bits_en(bp, emac_base + + EMAC_REG_EMAC_RX_MODE, + EMAC_RX_MODE_FLOW_EN); + + if (vars->flow_ctrl & BNX2X_FLOW_CTRL_TX) + bnx2x_bits_en(bp, emac_base + + EMAC_REG_EMAC_TX_MODE, + (EMAC_TX_MODE_EXT_PAUSE_EN | + EMAC_TX_MODE_FLOW_EN)); + } else + bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE, + EMAC_TX_MODE_FLOW_EN); /* KEEP_VLAN_TAG, promiscuous */ val = REG_RD(bp, emac_base + EMAC_REG_EMAC_RX_MODE); @@ -6478,9 +6474,9 @@ int bnx2x_test_link(struct link_params *params, struct link_vars *vars, MDIO_REG_BANK_GP_STATUS, MDIO_GP_STATUS_TOP_AN_STATUS1, &gp_status); - /* Link is up only if both local phy and external phy are up */ - if (!(gp_status & MDIO_GP_STATUS_TOP_AN_STATUS1_LINK_STATUS)) - return -ESRCH; + /* Link is up only if both local phy and external phy are up */ + if (!(gp_status & MDIO_GP_STATUS_TOP_AN_STATUS1_LINK_STATUS)) + return -ESRCH; } /* In XGXS loopback mode, do not check external PHY */ if (params->loopback_mode == LOOPBACK_XGXS) @@ -7293,8 +7289,8 @@ static int bnx2x_8073_xaui_wa(struct bnx2x *bp, struct bnx2x_phy *phy) DP(NETIF_MSG_LINK, "XAUI workaround has completed\n"); return 0; - } - usleep_range(3000, 6000); + } + usleep_range(3000, 6000); } break; } @@ -12675,39 +12671,39 @@ static void bnx2x_init_bmac_loopback(struct link_params *params, struct link_vars *vars) { struct bnx2x *bp = params->bp; - vars->link_up = 1; - vars->line_speed = SPEED_10000; - vars->duplex = DUPLEX_FULL; - vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE; - vars->mac_type = MAC_TYPE_BMAC; + vars->link_up = 1; + vars->line_speed = SPEED_10000; + vars->duplex = DUPLEX_FULL; + vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE; + vars->mac_type = MAC_TYPE_BMAC; - vars->phy_flags = PHY_XGXS_FLAG; + vars->phy_flags = PHY_XGXS_FLAG; - bnx2x_xgxs_deassert(params); + bnx2x_xgxs_deassert(params); - /* Set bmac loopback */ - bnx2x_bmac_enable(params, vars, 1, 1); + /* Set bmac loopback */ + bnx2x_bmac_enable(params, vars, 1, 1); - REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0); + REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0); } static void bnx2x_init_emac_loopback(struct link_params *params, struct link_vars *vars) { struct bnx2x *bp = params->bp; - vars->link_up = 1; - vars->line_speed = SPEED_1000; - vars->duplex = DUPLEX_FULL; - vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE; - vars->mac_type = MAC_TYPE_EMAC; + vars->link_up = 1; + vars->line_speed = SPEED_1000; + vars->duplex = DUPLEX_FULL; + vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE; + vars->mac_type = MAC_TYPE_EMAC; - vars->phy_flags = PHY_XGXS_FLAG; + vars->phy_flags = PHY_XGXS_FLAG; - bnx2x_xgxs_deassert(params); - /* Set bmac loopback */ - bnx2x_emac_enable(params, vars, 1); - bnx2x_emac_program(params, vars); - REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0); + bnx2x_xgxs_deassert(params); + /* Set bmac loopback */ + bnx2x_emac_enable(params, vars, 1); + bnx2x_emac_program(params, vars); + REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0); } static void bnx2x_init_xmac_loopback(struct link_params *params, @@ -13073,12 +13069,12 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars, REG_WR(bp, NIG_REG_EGRESS_EMAC0_OUT_EN + port*4, 0); } - if (!CHIP_IS_E3(bp)) { - bnx2x_set_bmac_rx(bp, params->chip_id, port, 0); - } else { - bnx2x_set_xmac_rxtx(params, 0); - bnx2x_set_umac_rxtx(params, 0); - } + if (!CHIP_IS_E3(bp)) { + bnx2x_set_bmac_rx(bp, params->chip_id, port, 0); + } else { + bnx2x_set_xmac_rxtx(params, 0); + bnx2x_set_umac_rxtx(params, 0); + } /* Disable emac */ if (!CHIP_IS_E3(bp)) REG_WR(bp, NIG_REG_NIG_EMAC0_EN + port*4, 0); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 3b5b47e98c73..0cec82450e19 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -11298,7 +11298,7 @@ static void bnx2x_link_settings_supported(struct bnx2x *bp, u32 switch_cfg) dev_info.port_hw_config[port].external_phy_config), SHMEM_RD(bp, dev_info.port_hw_config[port].external_phy_config2)); - return; + return; } if (CHIP_IS_E3(bp)) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index a9eaaf3e73a4..7b22a6d8514c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -2977,8 +2977,8 @@ static inline void bnx2x_mcast_hdl_pending_del_e2(struct bnx2x *bp, cmd_pos->data.macs_num--; - DP(BNX2X_MSG_SP, "Deleting MAC. %d left,cnt is %d\n", - cmd_pos->data.macs_num, cnt); + DP(BNX2X_MSG_SP, "Deleting MAC. %d left,cnt is %d\n", + cmd_pos->data.macs_num, cnt); /* Break if we reached the maximum * number of rules. @@ -3597,8 +3597,8 @@ static int bnx2x_mcast_validate_e1(struct bnx2x *bp, /* RESTORE command will restore the entire multicast configuration */ case BNX2X_MCAST_CMD_RESTORE: p->mcast_list_len = reg_sz; - DP(BNX2X_MSG_SP, "Command %d, p->mcast_list_len=%d\n", - cmd, p->mcast_list_len); + DP(BNX2X_MSG_SP, "Command %d, p->mcast_list_len=%d\n", + cmd, p->mcast_list_len); break; case BNX2X_MCAST_CMD_ADD: @@ -3735,8 +3735,8 @@ static inline int bnx2x_mcast_handle_restore_cmd_e1( i++; - DP(BNX2X_MSG_SP, "About to configure %pM mcast MAC\n", - cfg_data.mac); + DP(BNX2X_MSG_SP, "About to configure %pM mcast MAC\n", + cfg_data.mac); } *rdata_idx = i; diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 4852febbfec3..1a407d3c1d67 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -646,7 +646,7 @@ struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end) if (l2t_size < L2T_MIN_HASH_BUCKETS) return NULL; - d = kvzalloc(sizeof(*d) + l2t_size * sizeof(struct l2t_entry), GFP_KERNEL); + d = kvzalloc(struct_size(d, l2tab, l2t_size), GFP_KERNEL); if (!d) return NULL; diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 0a82fcf16d35..c2586f44c29d 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -395,6 +395,7 @@ static void dm9000_set_io(struct board_info *db, int byte_width) case 3: dev_dbg(db->dev, ": 3 byte IO, falling back to 16bit\n"); + /* fall through */ case 2: db->dumpblk = dm9000_dumpblk_16bit; db->outblk = dm9000_outblk_16bit; diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile index 2f424e0a8225..d1e78cdd512f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Makefile +++ b/drivers/net/ethernet/freescale/dpaa2/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_FSL_DPAA2_ETH) += fsl-dpaa2-eth.o obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK) += fsl-dpaa2-ptp.o fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o +fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o # Needed by the tracing framework diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c new file mode 100644 index 000000000000..a027f4a9d0cc --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2015 Freescale Semiconductor Inc. + * Copyright 2018-2019 NXP + */ +#include <linux/module.h> +#include <linux/debugfs.h> +#include "dpaa2-eth.h" +#include "dpaa2-eth-debugfs.h" + +#define DPAA2_ETH_DBG_ROOT "dpaa2-eth" + +static struct dentry *dpaa2_dbg_root; + +static int dpaa2_dbg_cpu_show(struct seq_file *file, void *offset) +{ + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private; + struct rtnl_link_stats64 *stats; + struct dpaa2_eth_drv_stats *extras; + int i; + + seq_printf(file, "Per-CPU stats for %s\n", priv->net_dev->name); + seq_printf(file, "%s%16s%16s%16s%16s%16s%16s%16s%16s%16s\n", + "CPU", "Rx", "Rx Err", "Rx SG", "Tx", "Tx Err", "Tx conf", + "Tx SG", "Tx realloc", "Enq busy"); + + for_each_online_cpu(i) { + stats = per_cpu_ptr(priv->percpu_stats, i); + extras = per_cpu_ptr(priv->percpu_extras, i); + seq_printf(file, "%3d%16llu%16llu%16llu%16llu%16llu%16llu%16llu%16llu%16llu\n", + i, + stats->rx_packets, + stats->rx_errors, + extras->rx_sg_frames, + stats->tx_packets, + stats->tx_errors, + extras->tx_conf_frames, + extras->tx_sg_frames, + extras->tx_reallocs, + extras->tx_portal_busy); + } + + return 0; +} + +static int dpaa2_dbg_cpu_open(struct inode *inode, struct file *file) +{ + int err; + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private; + + err = single_open(file, dpaa2_dbg_cpu_show, priv); + if (err < 0) + netdev_err(priv->net_dev, "single_open() failed\n"); + + return err; +} + +static const struct file_operations dpaa2_dbg_cpu_ops = { + .open = dpaa2_dbg_cpu_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static char *fq_type_to_str(struct dpaa2_eth_fq *fq) +{ + switch (fq->type) { + case DPAA2_RX_FQ: + return "Rx"; + case DPAA2_TX_CONF_FQ: + return "Tx conf"; + default: + return "N/A"; + } +} + +static int dpaa2_dbg_fqs_show(struct seq_file *file, void *offset) +{ + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private; + struct dpaa2_eth_fq *fq; + u32 fcnt, bcnt; + int i, err; + + seq_printf(file, "FQ stats for %s:\n", priv->net_dev->name); + seq_printf(file, "%s%16s%16s%16s%16s\n", + "VFQID", "CPU", "Type", "Frames", "Pending frames"); + + for (i = 0; i < priv->num_fqs; i++) { + fq = &priv->fq[i]; + err = dpaa2_io_query_fq_count(NULL, fq->fqid, &fcnt, &bcnt); + if (err) + fcnt = 0; + + seq_printf(file, "%5d%16d%16s%16llu%16u\n", + fq->fqid, + fq->target_cpu, + fq_type_to_str(fq), + fq->stats.frames, + fcnt); + } + + return 0; +} + +static int dpaa2_dbg_fqs_open(struct inode *inode, struct file *file) +{ + int err; + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private; + + err = single_open(file, dpaa2_dbg_fqs_show, priv); + if (err < 0) + netdev_err(priv->net_dev, "single_open() failed\n"); + + return err; +} + +static const struct file_operations dpaa2_dbg_fq_ops = { + .open = dpaa2_dbg_fqs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int dpaa2_dbg_ch_show(struct seq_file *file, void *offset) +{ + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private; + struct dpaa2_eth_channel *ch; + int i; + + seq_printf(file, "Channel stats for %s:\n", priv->net_dev->name); + seq_printf(file, "%s%16s%16s%16s%16s\n", + "CHID", "CPU", "Deq busy", "CDANs", "Buf count"); + + for (i = 0; i < priv->num_channels; i++) { + ch = priv->channel[i]; + seq_printf(file, "%4d%16d%16llu%16llu%16d\n", + ch->ch_id, + ch->nctx.desired_cpu, + ch->stats.dequeue_portal_busy, + ch->stats.cdan, + ch->buf_count); + } + + return 0; +} + +static int dpaa2_dbg_ch_open(struct inode *inode, struct file *file) +{ + int err; + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private; + + err = single_open(file, dpaa2_dbg_ch_show, priv); + if (err < 0) + netdev_err(priv->net_dev, "single_open() failed\n"); + + return err; +} + +static const struct file_operations dpaa2_dbg_ch_ops = { + .open = dpaa2_dbg_ch_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void dpaa2_dbg_add(struct dpaa2_eth_priv *priv) +{ + if (!dpaa2_dbg_root) + return; + + /* Create a directory for the interface */ + priv->dbg.dir = debugfs_create_dir(priv->net_dev->name, + dpaa2_dbg_root); + if (!priv->dbg.dir) { + netdev_err(priv->net_dev, "debugfs_create_dir() failed\n"); + return; + } + + /* per-cpu stats file */ + priv->dbg.cpu_stats = debugfs_create_file("cpu_stats", 0444, + priv->dbg.dir, priv, + &dpaa2_dbg_cpu_ops); + if (!priv->dbg.cpu_stats) { + netdev_err(priv->net_dev, "debugfs_create_file() failed\n"); + goto err_cpu_stats; + } + + /* per-fq stats file */ + priv->dbg.fq_stats = debugfs_create_file("fq_stats", 0444, + priv->dbg.dir, priv, + &dpaa2_dbg_fq_ops); + if (!priv->dbg.fq_stats) { + netdev_err(priv->net_dev, "debugfs_create_file() failed\n"); + goto err_fq_stats; + } + + /* per-fq stats file */ + priv->dbg.ch_stats = debugfs_create_file("ch_stats", 0444, + priv->dbg.dir, priv, + &dpaa2_dbg_ch_ops); + if (!priv->dbg.fq_stats) { + netdev_err(priv->net_dev, "debugfs_create_file() failed\n"); + goto err_ch_stats; + } + + return; + +err_ch_stats: + debugfs_remove(priv->dbg.fq_stats); +err_fq_stats: + debugfs_remove(priv->dbg.cpu_stats); +err_cpu_stats: + debugfs_remove(priv->dbg.dir); +} + +void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv) +{ + debugfs_remove(priv->dbg.fq_stats); + debugfs_remove(priv->dbg.ch_stats); + debugfs_remove(priv->dbg.cpu_stats); + debugfs_remove(priv->dbg.dir); +} + +void dpaa2_eth_dbg_init(void) +{ + dpaa2_dbg_root = debugfs_create_dir(DPAA2_ETH_DBG_ROOT, NULL); + if (!dpaa2_dbg_root) { + pr_err("DPAA2-ETH: debugfs create failed\n"); + return; + } + + pr_debug("DPAA2-ETH: debugfs created\n"); +} + +void dpaa2_eth_dbg_exit(void) +{ + debugfs_remove(dpaa2_dbg_root); +} diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h new file mode 100644 index 000000000000..4f63de997a26 --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2015 Freescale Semiconductor Inc. + * Copyright 2018-2019 NXP + */ +#ifndef DPAA2_ETH_DEBUGFS_H +#define DPAA2_ETH_DEBUGFS_H + +#include <linux/dcache.h> + +struct dpaa2_eth_priv; + +struct dpaa2_debugfs { + struct dentry *dir; + struct dentry *fq_stats; + struct dentry *ch_stats; + struct dentry *cpu_stats; +}; + +#ifdef CONFIG_DEBUG_FS +void dpaa2_eth_dbg_init(void); +void dpaa2_eth_dbg_exit(void); +void dpaa2_dbg_add(struct dpaa2_eth_priv *priv); +void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv); +#else +static inline void dpaa2_eth_dbg_init(void) {} +static inline void dpaa2_eth_dbg_exit(void) {} +static inline void dpaa2_dbg_add(struct dpaa2_eth_priv *priv) {} +static inline void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv) {} +#endif /* CONFIG_DEBUG_FS */ + +#endif /* DPAA2_ETH_DEBUGFS_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 1ca9a18139ec..04925c731f0b 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1243,34 +1243,36 @@ enable_err: return err; } -/* The DPIO store must be empty when we call this, - * at the end of every NAPI cycle. - */ -static u32 drain_channel(struct dpaa2_eth_channel *ch) +/* Total number of in-flight frames on ingress queues */ +static u32 ingress_fq_count(struct dpaa2_eth_priv *priv) { - u32 drained = 0, total = 0; + struct dpaa2_eth_fq *fq; + u32 fcnt = 0, bcnt = 0, total = 0; + int i, err; - do { - pull_channel(ch); - drained = consume_frames(ch, NULL); - total += drained; - } while (drained); + for (i = 0; i < priv->num_fqs; i++) { + fq = &priv->fq[i]; + err = dpaa2_io_query_fq_count(NULL, fq->fqid, &fcnt, &bcnt); + if (err) { + netdev_warn(priv->net_dev, "query_fq_count failed"); + break; + } + total += fcnt; + } return total; } -static u32 drain_ingress_frames(struct dpaa2_eth_priv *priv) +static void wait_for_fq_empty(struct dpaa2_eth_priv *priv) { - struct dpaa2_eth_channel *ch; - int i; - u32 drained = 0; - - for (i = 0; i < priv->num_channels; i++) { - ch = priv->channel[i]; - drained += drain_channel(ch); - } + int retries = 10; + u32 pending; - return drained; + do { + pending = ingress_fq_count(priv); + if (pending) + msleep(100); + } while (pending && --retries); } static int dpaa2_eth_stop(struct net_device *net_dev) @@ -1278,14 +1280,22 @@ static int dpaa2_eth_stop(struct net_device *net_dev) struct dpaa2_eth_priv *priv = netdev_priv(net_dev); int dpni_enabled = 0; int retries = 10; - u32 drained; netif_tx_stop_all_queues(net_dev); netif_carrier_off(net_dev); - /* Loop while dpni_disable() attempts to drain the egress FQs - * and confirm them back to us. + /* On dpni_disable(), the MC firmware will: + * - stop MAC Rx and wait for all Rx frames to be enqueued to software + * - cut off WRIOP dequeues from egress FQs and wait until transmission + * of all in flight Tx frames is finished (and corresponding Tx conf + * frames are enqueued back to software) + * + * Before calling dpni_disable(), we wait for all Tx frames to arrive + * on WRIOP. After it finishes, wait until all remaining frames on Rx + * and Tx conf queues are consumed on NAPI poll. */ + msleep(500); + do { dpni_disable(priv->mc_io, 0, priv->mc_token); dpni_is_enabled(priv->mc_io, 0, priv->mc_token, &dpni_enabled); @@ -1300,19 +1310,9 @@ static int dpaa2_eth_stop(struct net_device *net_dev) */ } - /* Wait for NAPI to complete on every core and disable it. - * In particular, this will also prevent NAPI from being rescheduled if - * a new CDAN is serviced, effectively discarding the CDAN. We therefore - * don't even need to disarm the channels, except perhaps for the case - * of a huge coalescing value. - */ + wait_for_fq_empty(priv); disable_ch_napi(priv); - /* Manually drain the Rx and TxConf queues */ - drained = drain_ingress_frames(priv); - if (drained) - netdev_dbg(net_dev, "Drained %d frames.\n", drained); - /* Empty the buffer pool */ drain_pool(priv); @@ -3083,6 +3083,10 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) goto err_netdev_reg; } +#ifdef CONFIG_DEBUG_FS + dpaa2_dbg_add(priv); +#endif + dev_info(dev, "Probed interface %s\n", net_dev->name); return 0; @@ -3126,6 +3130,9 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) net_dev = dev_get_drvdata(dev); priv = netdev_priv(net_dev); +#ifdef CONFIG_DEBUG_FS + dpaa2_dbg_remove(priv); +#endif unregister_netdev(net_dev); if (priv->do_link_poll) @@ -3170,4 +3177,25 @@ static struct fsl_mc_driver dpaa2_eth_driver = { .match_id_table = dpaa2_eth_match_id_table }; -module_fsl_mc_driver(dpaa2_eth_driver); +static int __init dpaa2_eth_driver_init(void) +{ + int err; + + dpaa2_eth_dbg_init(); + err = fsl_mc_driver_register(&dpaa2_eth_driver); + if (err) { + dpaa2_eth_dbg_exit(); + return err; + } + + return 0; +} + +static void __exit dpaa2_eth_driver_exit(void) +{ + dpaa2_eth_dbg_exit(); + fsl_mc_driver_unregister(&dpaa2_eth_driver); +} + +module_init(dpaa2_eth_driver_init); +module_exit(dpaa2_eth_driver_exit); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 69c965de192b..31fe486ec25f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -16,6 +16,7 @@ #include "dpni-cmd.h" #include "dpaa2-eth-trace.h" +#include "dpaa2-eth-debugfs.h" #define DPAA2_WRIOP_VERSION(x, y, z) ((x) << 10 | (y) << 5 | (z) << 0) @@ -365,6 +366,9 @@ struct dpaa2_eth_priv { struct dpaa2_eth_cls_rule *cls_rules; u8 rx_cls_enabled; struct bpf_prog *xdp_prog; +#ifdef CONFIG_DEBUG_FS + struct dpaa2_debugfs dbg; +#endif }; #define DPAA2_RXH_SUPPORTED (RXH_L2DA | RXH_VLAN | RXH_L3_PROTO \ @@ -405,6 +409,10 @@ static inline int dpaa2_eth_cmp_dpni_ver(struct dpaa2_eth_priv *priv, #define dpaa2_eth_fs_count(priv) \ ((priv)->dpni_attrs.fs_entries) +/* We have exactly one {Rx, Tx conf} queue per channel */ +#define dpaa2_eth_queue_count(priv) \ + ((priv)->num_channels) + enum dpaa2_eth_rx_dist { DPAA2_ETH_RX_DIST_HASH, DPAA2_ETH_RX_DIST_CLS @@ -447,12 +455,6 @@ static inline unsigned int dpaa2_eth_rx_head_room(struct dpaa2_eth_priv *priv) DPAA2_ETH_RX_HWA_SIZE; } -/* We have exactly one {Rx, Tx conf} queue per channel */ -static int dpaa2_eth_queue_count(struct dpaa2_eth_priv *priv) -{ - return priv->num_channels; -} - int dpaa2_eth_set_hash(struct net_device *net_dev, u64 flags); int dpaa2_eth_cls_key_size(void); int dpaa2_eth_cls_fld_off(int prot, int field); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index 5d64519b9b1d..6bf346c11b25 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -788,8 +788,9 @@ int hns_rcb_common_get_cfg(struct dsaf_device *dsaf_dev, int ring_num = hns_rcb_get_ring_num(dsaf_dev); rcb_common = - devm_kzalloc(dsaf_dev->dev, sizeof(*rcb_common) + - ring_num * sizeof(struct ring_pair_cb), GFP_KERNEL); + devm_kzalloc(dsaf_dev->dev, + struct_size(rcb_common, ring_pair_cb, ring_num), + GFP_KERNEL); if (!rcb_common) { dev_err(dsaf_dev->dev, "rcb common devm_kzalloc fail!\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 36eab37d8a40..d486748d5883 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -124,6 +124,7 @@ enum hnae3_reset_notify_type { HNAE3_DOWN_CLIENT, HNAE3_INIT_CLIENT, HNAE3_UNINIT_CLIENT, + HNAE3_RESTORE_CLIENT, }; enum hnae3_reset_type { @@ -500,6 +501,7 @@ struct hnae3_tc_info { struct hnae3_knic_private_info { struct net_device *netdev; /* Set by KNIC client when init instance */ u16 rss_size; /* Allocated RSS queues */ + u16 req_rss_size; u16 rx_buf_len; u16 num_desc; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 1bf7a5f116a0..9dd8949381bc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3185,6 +3185,9 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) for (i = 0; i < priv->vector_num; i++) { tqp_vector = &priv->tqp_vector[i]; + if (!tqp_vector->rx_group.ring && !tqp_vector->tx_group.ring) + continue; + ret = hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain); if (ret) @@ -3205,7 +3208,6 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED; } - priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED; hns3_clear_ring_group(&tqp_vector->rx_group); hns3_clear_ring_group(&tqp_vector->tx_group); netif_napi_del(&priv->tqp_vector[i].napi); @@ -3238,6 +3240,7 @@ static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, { struct hns3_nic_ring_data *ring_data = priv->ring_data; int queue_num = priv->ae_handle->kinfo.num_tqps; + int desc_num = priv->ae_handle->kinfo.num_desc; struct pci_dev *pdev = priv->ae_handle->pdev; struct hns3_enet_ring *ring; @@ -3263,7 +3266,7 @@ static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, ring->dev = priv->dev; ring->desc_dma_addr = 0; ring->buf_size = q->buf_size; - ring->desc_num = q->desc_num; + ring->desc_num = desc_num; ring->next_to_use = 0; ring->next_to_clean = 0; @@ -3725,7 +3728,6 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) { struct hnae3_knic_private_info *kinfo = &handle->kinfo; struct net_device *ndev = kinfo->netdev; - bool if_running; int ret; if (tc > HNAE3_MAX_TC) @@ -3734,24 +3736,13 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) if (!ndev) return -ENODEV; - if_running = netif_running(ndev); - - if (if_running) { - (void)hns3_nic_net_stop(ndev); - msleep(100); - } - ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ? kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP; if (ret) - goto err_out; + return ret; ret = hns3_nic_set_real_num_queue(ndev); -err_out: - if (if_running) - (void)hns3_nic_net_open(ndev); - return ret; } @@ -4013,41 +4004,18 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle) { struct net_device *netdev = handle->kinfo.netdev; struct hns3_nic_priv *priv = netdev_priv(netdev); - bool vlan_filter_enable; int ret; - ret = hns3_init_mac_addr(netdev, false); - if (ret) - return ret; - - ret = hns3_recover_hw_addr(netdev); - if (ret) - return ret; - - ret = hns3_update_promisc_mode(netdev, handle->netdev_flags); - if (ret) - return ret; - - vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true; - hns3_enable_vlan_filter(netdev, vlan_filter_enable); - - /* Hardware table is only clear when pf resets */ - if (!(handle->flags & HNAE3_SUPPORT_VF)) { - ret = hns3_restore_vlan(netdev); - if (ret) - return ret; - } + /* Carrier off reporting is important to ethtool even BEFORE open */ + netif_carrier_off(netdev); - ret = hns3_restore_fd_rules(netdev); + ret = hns3_get_ring_config(priv); if (ret) return ret; - /* Carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - ret = hns3_nic_alloc_vector_data(priv); if (ret) - return ret; + goto err_put_ring; hns3_restore_coal(priv); @@ -4068,10 +4036,44 @@ err_uninit_vector: priv->ring_data = NULL; err_dealloc_vector: hns3_nic_dealloc_vector_data(priv); +err_put_ring: + hns3_put_ring_config(priv); + priv->ring_data = NULL; return ret; } +static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle) +{ + struct net_device *netdev = handle->kinfo.netdev; + bool vlan_filter_enable; + int ret; + + ret = hns3_init_mac_addr(netdev, false); + if (ret) + return ret; + + ret = hns3_recover_hw_addr(netdev); + if (ret) + return ret; + + ret = hns3_update_promisc_mode(netdev, handle->netdev_flags); + if (ret) + return ret; + + vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true; + hns3_enable_vlan_filter(netdev, vlan_filter_enable); + + /* Hardware table is only clear when pf resets */ + if (!(handle->flags & HNAE3_SUPPORT_VF)) { + ret = hns3_restore_vlan(netdev); + if (ret) + return ret; + } + + return hns3_restore_fd_rules(netdev); +} + static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) { struct net_device *netdev = handle->kinfo.netdev; @@ -4101,6 +4103,9 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) if (ret) netdev_err(netdev, "uninit ring error\n"); + hns3_put_ring_config(priv); + priv->ring_data = NULL; + clear_bit(HNS3_NIC_STATE_INITED, &priv->state); return ret; @@ -4124,6 +4129,9 @@ static int hns3_reset_notify(struct hnae3_handle *handle, case HNAE3_UNINIT_CLIENT: ret = hns3_reset_notify_uninit_enet(handle); break; + case HNAE3_RESTORE_CLIENT: + ret = hns3_reset_notify_restore_enet(handle); + break; default: break; } @@ -4131,57 +4139,11 @@ static int hns3_reset_notify(struct hnae3_handle *handle, return ret; } -static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num) -{ - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = hns3_get_handle(netdev); - int ret; - - ret = h->ae_algo->ops->set_channels(h, new_tqp_num); - if (ret) - return ret; - - ret = hns3_get_ring_config(priv); - if (ret) - return ret; - - ret = hns3_nic_alloc_vector_data(priv); - if (ret) - goto err_alloc_vector; - - hns3_restore_coal(priv); - - ret = hns3_nic_init_vector_data(priv); - if (ret) - goto err_uninit_vector; - - ret = hns3_init_all_ring(priv); - if (ret) - goto err_put_ring; - - return 0; - -err_put_ring: - hns3_put_ring_config(priv); -err_uninit_vector: - hns3_nic_uninit_vector_data(priv); -err_alloc_vector: - hns3_nic_dealloc_vector_data(priv); - return ret; -} - -static int hns3_adjust_tqps_num(u8 num_tc, u32 new_tqp_num) -{ - return (new_tqp_num / num_tc) * num_tc; -} - int hns3_set_channels(struct net_device *netdev, struct ethtool_channels *ch) { - struct hns3_nic_priv *priv = netdev_priv(netdev); struct hnae3_handle *h = hns3_get_handle(netdev); struct hnae3_knic_private_info *kinfo = &h->kinfo; - bool if_running = netif_running(netdev); u32 new_tqp_num = ch->combined_count; u16 org_tqp_num; int ret; @@ -4190,39 +4152,28 @@ int hns3_set_channels(struct net_device *netdev, return -EINVAL; if (new_tqp_num > hns3_get_max_available_channels(h) || - new_tqp_num < kinfo->num_tc) { + new_tqp_num < 1) { dev_err(&netdev->dev, - "Change tqps fail, the tqp range is from %d to %d", - kinfo->num_tc, + "Change tqps fail, the tqp range is from 1 to %d", hns3_get_max_available_channels(h)); return -EINVAL; } - new_tqp_num = hns3_adjust_tqps_num(kinfo->num_tc, new_tqp_num); - if (kinfo->num_tqps == new_tqp_num) + if (kinfo->rss_size == new_tqp_num) return 0; - if (if_running) - hns3_nic_net_stop(netdev); - - ret = hns3_nic_uninit_vector_data(priv); - if (ret) { - dev_err(&netdev->dev, - "Unbind vector with tqp fail, nothing is changed"); - goto open_netdev; - } - - hns3_store_coal(priv); - - hns3_nic_dealloc_vector_data(priv); + ret = hns3_reset_notify(h, HNAE3_DOWN_CLIENT); + if (ret) + return ret; - hns3_uninit_all_ring(priv); - hns3_put_ring_config(priv); + ret = hns3_reset_notify(h, HNAE3_UNINIT_CLIENT); + if (ret) + return ret; org_tqp_num = h->kinfo.num_tqps; - ret = hns3_modify_tqp_num(netdev, new_tqp_num); + ret = h->ae_algo->ops->set_channels(h, new_tqp_num); if (ret) { - ret = hns3_modify_tqp_num(netdev, org_tqp_num); + ret = h->ae_algo->ops->set_channels(h, org_tqp_num); if (ret) { /* If revert to old tqp failed, fatal error occurred */ dev_err(&netdev->dev, @@ -4232,12 +4183,11 @@ int hns3_set_channels(struct net_device *netdev, dev_info(&netdev->dev, "Change tqp num fail, Revert to old tqp num"); } + ret = hns3_reset_notify(h, HNAE3_INIT_CLIENT); + if (ret) + return ret; -open_netdev: - if (if_running) - hns3_nic_net_open(netdev); - - return ret; + return hns3_reset_notify(h, HNAE3_UP_CLIENT); } static const struct hnae3_client_ops client_ops = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index e55995e93bb0..f59ab7387b1f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -412,7 +412,6 @@ struct hns3_enet_ring { unsigned char *va; /* first buffer address for current packet */ u32 flag; /* ring attribute */ - int irq_init_flag; int numa_node; cpumask_t affinity_mask; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index f6323b2501dc..4ec0b9cd15ae 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -222,6 +222,16 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) if (ret) return ret; + if (map_changed) { + ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT); + if (ret) + return ret; + + ret = hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT); + if (ret) + return ret; + } + hclge_tm_schd_info_update(hdev, num_tc); ret = hclge_ieee_ets_to_tm_info(hdev, ets); @@ -232,6 +242,13 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) ret = hclge_client_setup_tc(hdev); if (ret) return ret; + ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT); + if (ret) + return ret; + + ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT); + if (ret) + return ret; } return hclge_tm_dwrr_cfg(hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index f7637c08bb3a..00d7acb4d45a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1068,14 +1068,14 @@ static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id, return ret; } -static int hclge_assign_tqp(struct hclge_vport *vport) +static int hclge_assign_tqp(struct hclge_vport *vport, u16 num_tqps) { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; struct hclge_dev *hdev = vport->back; int i, alloced; for (i = 0, alloced = 0; i < hdev->num_tqps && - alloced < kinfo->num_tqps; i++) { + alloced < num_tqps; i++) { if (!hdev->htqp[i].alloced) { hdev->htqp[i].q.handle = &vport->nic; hdev->htqp[i].q.tqp_index = alloced; @@ -1085,7 +1085,9 @@ static int hclge_assign_tqp(struct hclge_vport *vport) alloced++; } } - vport->alloc_tqps = kinfo->num_tqps; + vport->alloc_tqps = alloced; + kinfo->rss_size = min_t(u16, hdev->rss_size_max, + vport->alloc_tqps / hdev->tm_info.num_tc); return 0; } @@ -1096,36 +1098,17 @@ static int hclge_knic_setup(struct hclge_vport *vport, struct hnae3_handle *nic = &vport->nic; struct hnae3_knic_private_info *kinfo = &nic->kinfo; struct hclge_dev *hdev = vport->back; - int i, ret; + int ret; kinfo->num_desc = num_desc; kinfo->rx_buf_len = hdev->rx_buf_len; - kinfo->num_tc = min_t(u16, num_tqps, hdev->tm_info.num_tc); - kinfo->rss_size - = min_t(u16, hdev->rss_size_max, num_tqps / kinfo->num_tc); - kinfo->num_tqps = kinfo->rss_size * kinfo->num_tc; - for (i = 0; i < HNAE3_MAX_TC; i++) { - if (hdev->hw_tc_map & BIT(i)) { - kinfo->tc_info[i].enable = true; - kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size; - kinfo->tc_info[i].tqp_count = kinfo->rss_size; - kinfo->tc_info[i].tc = i; - } else { - /* Set to default queue if TC is disable */ - kinfo->tc_info[i].enable = false; - kinfo->tc_info[i].tqp_offset = 0; - kinfo->tc_info[i].tqp_count = 1; - kinfo->tc_info[i].tc = 0; - } - } - - kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps, + kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, num_tqps, sizeof(struct hnae3_queue *), GFP_KERNEL); if (!kinfo->tqp) return -ENOMEM; - ret = hclge_assign_tqp(vport); + ret = hclge_assign_tqp(vport, num_tqps); if (ret) dev_err(&hdev->pdev->dev, "fail to assign TQPs %d.\n", ret); @@ -1140,7 +1123,7 @@ static int hclge_map_tqp_to_vport(struct hclge_dev *hdev, u16 i; kinfo = &nic->kinfo; - for (i = 0; i < kinfo->num_tqps; i++) { + for (i = 0; i < vport->alloc_tqps; i++) { struct hclge_tqp *q = container_of(kinfo->tqp[i], struct hclge_tqp, q); bool is_pf; @@ -2418,8 +2401,8 @@ static void hclge_misc_irq_uninit(struct hclge_dev *hdev) hclge_free_vector(hdev, 0); } -static int hclge_notify_client(struct hclge_dev *hdev, - enum hnae3_reset_notify_type type) +int hclge_notify_client(struct hclge_dev *hdev, + enum hnae3_reset_notify_type type) { struct hnae3_client *client = hdev->nic_client; u16 i; @@ -2883,6 +2866,10 @@ static void hclge_reset(struct hclge_dev *hdev) if (ret) goto err_reset_lock; + ret = hclge_notify_client(hdev, HNAE3_RESTORE_CLIENT); + if (ret) + goto err_reset_lock; + hclge_clear_reset_cause(hdev); ret = hclge_reset_prepare_up(hdev); @@ -5258,6 +5245,7 @@ static int hclge_set_loopback(struct hnae3_handle *handle, enum hnae3_loop loop_mode, bool en) { struct hclge_vport *vport = hclge_get_vport(handle); + struct hnae3_knic_private_info *kinfo; struct hclge_dev *hdev = vport->back; int i, ret; @@ -5276,7 +5264,8 @@ static int hclge_set_loopback(struct hnae3_handle *handle, break; } - for (i = 0; i < vport->alloc_tqps; i++) { + kinfo = &vport->nic.kinfo; + for (i = 0; i < kinfo->num_tqps; i++) { ret = hclge_tqp_enable(hdev, i, 0, en); if (ret) return ret; @@ -5288,11 +5277,13 @@ static int hclge_set_loopback(struct hnae3_handle *handle, static void hclge_reset_tqp_stats(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); + struct hnae3_knic_private_info *kinfo; struct hnae3_queue *queue; struct hclge_tqp *tqp; int i; - for (i = 0; i < vport->alloc_tqps; i++) { + kinfo = &vport->nic.kinfo; + for (i = 0; i < kinfo->num_tqps; i++) { queue = handle->kinfo.tqp[i]; tqp = container_of(queue, struct hclge_tqp, q); memset(&tqp->tqp_stats, 0, sizeof(tqp->tqp_stats)); @@ -7523,18 +7514,17 @@ static u32 hclge_get_max_channels(struct hnae3_handle *handle) struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - return min_t(u32, hdev->rss_size_max * kinfo->num_tc, hdev->num_tqps); + return min_t(u32, hdev->rss_size_max, + vport->alloc_tqps / kinfo->num_tc); } static void hclge_get_channels(struct hnae3_handle *handle, struct ethtool_channels *ch) { - struct hclge_vport *vport = hclge_get_vport(handle); - ch->max_combined = hclge_get_max_channels(handle); ch->other_count = 1; ch->max_other = 1; - ch->combined_count = vport->alloc_tqps; + ch->combined_count = handle->kinfo.rss_size; } static void hclge_get_tqps_and_rss_info(struct hnae3_handle *handle, @@ -7547,25 +7537,6 @@ static void hclge_get_tqps_and_rss_info(struct hnae3_handle *handle, *max_rss_size = hdev->rss_size_max; } -static void hclge_release_tqp(struct hclge_vport *vport) -{ - struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; - struct hclge_dev *hdev = vport->back; - int i; - - for (i = 0; i < kinfo->num_tqps; i++) { - struct hclge_tqp *tqp = - container_of(kinfo->tqp[i], struct hclge_tqp, q); - - tqp->q.handle = NULL; - tqp->q.tqp_index = 0; - tqp->alloced = false; - } - - devm_kfree(&hdev->pdev->dev, kinfo->tqp); - kinfo->tqp = NULL; -} - static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -7580,24 +7551,11 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num) u32 *rss_indir; int ret, i; - /* Free old tqps, and reallocate with new tqp number when nic setup */ - hclge_release_tqp(vport); - - ret = hclge_knic_setup(vport, new_tqps_num, kinfo->num_desc); - if (ret) { - dev_err(&hdev->pdev->dev, "setup nic fail, ret =%d\n", ret); - return ret; - } - - ret = hclge_map_tqp_to_vport(hdev, vport); - if (ret) { - dev_err(&hdev->pdev->dev, "map vport tqp fail, ret =%d\n", ret); - return ret; - } + kinfo->req_rss_size = new_tqps_num; - ret = hclge_tm_schd_init(hdev); + ret = hclge_tm_vport_map_update(hdev); if (ret) { - dev_err(&hdev->pdev->dev, "tm schd init fail, ret =%d\n", ret); + dev_err(&hdev->pdev->dev, "tm vport map fail, ret =%d\n", ret); return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 6615b85a1c52..279ed2d83cb8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -878,4 +878,6 @@ void hclge_vport_stop(struct hclge_vport *vport); int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu); int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf); u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id); +int hclge_notify_client(struct hclge_dev *hdev, + enum hnae3_reset_notify_type type); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 00458da67503..fb8596a3e5e4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -517,19 +517,32 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; struct hclge_dev *hdev = vport->back; + u16 max_rss_size; u8 i; vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit; - kinfo->num_tc = - min_t(u16, kinfo->num_tqps, hdev->tm_info.num_tc); - kinfo->rss_size - = min_t(u16, hdev->rss_size_max, - kinfo->num_tqps / kinfo->num_tc); + kinfo->num_tc = min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc); + max_rss_size = min_t(u16, hdev->rss_size_max, + vport->alloc_tqps / kinfo->num_tc); + + if (kinfo->req_rss_size != kinfo->rss_size && kinfo->req_rss_size && + kinfo->req_rss_size <= max_rss_size) { + dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n", + kinfo->rss_size, kinfo->req_rss_size); + kinfo->rss_size = kinfo->req_rss_size; + } else if (kinfo->rss_size > max_rss_size || + (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) { + dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n", + kinfo->rss_size, max_rss_size); + kinfo->rss_size = max_rss_size; + } + + kinfo->num_tqps = kinfo->num_tc * kinfo->rss_size; vport->qs_offset = hdev->tm_info.num_tc * vport->vport_id; vport->dwrr = 100; /* 100 percent as init */ vport->alloc_rss_size = kinfo->rss_size; - for (i = 0; i < kinfo->num_tc; i++) { + for (i = 0; i < HNAE3_MAX_TC; i++) { if (hdev->hw_tc_map & BIT(i)) { kinfo->tc_info[i].enable = true; kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size; @@ -1228,10 +1241,23 @@ static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) return hclge_mac_pause_en_cfg(hdev, tx_en, rx_en); } +static int hclge_tm_bp_setup(struct hclge_dev *hdev) +{ + int ret = 0; + int i; + + for (i = 0; i < hdev->tm_info.num_tc; i++) { + ret = hclge_bp_setup_hw(hdev, i); + if (ret) + return ret; + } + + return ret; +} + int hclge_pause_setup_hw(struct hclge_dev *hdev) { int ret; - u8 i; ret = hclge_pause_param_setup_hw(hdev); if (ret) @@ -1250,13 +1276,7 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev) if (ret) dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret); - for (i = 0; i < hdev->tm_info.num_tc; i++) { - ret = hclge_bp_setup_hw(hdev, i); - if (ret) - return ret; - } - - return 0; + return hclge_tm_bp_setup(hdev); } void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc) @@ -1327,3 +1347,20 @@ int hclge_tm_schd_init(struct hclge_dev *hdev) return hclge_tm_init_hw(hdev); } + +int hclge_tm_vport_map_update(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + int ret; + + hclge_tm_vport_tc_info_update(vport); + + ret = hclge_vport_q_to_qs_map(hdev, vport); + if (ret) + return ret; + + if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) + return 0; + + return hclge_tm_bp_setup(hdev); +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index b6496a439304..898163c4d400 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -142,6 +142,7 @@ struct hclge_port_shapping_cmd { (HCLGE_TM_SHAP_##string##_LSH)) int hclge_tm_schd_init(struct hclge_dev *hdev); +int hclge_tm_vport_map_update(struct hclge_dev *hdev); int hclge_pause_setup_hw(struct hclge_dev *hdev); int hclge_tm_schd_mode_hw(struct hclge_dev *hdev); void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 82103d5fa815..bb9f45200ef5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1264,7 +1264,7 @@ static int hclgevf_reset_stack(struct hclgevf_dev *hdev) if (ret) return ret; - return 0; + return hclgevf_notify_client(hdev, HNAE3_RESTORE_CLIENT); } static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 6b19607a4caa..3875f39f43bb 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -1008,3 +1008,16 @@ int hinic_hwdev_hw_ci_addr_set(struct hinic_hwdev *hwdev, struct hinic_sq *sq, &hw_ci, sizeof(hw_ci), NULL, NULL, HINIC_MGMT_MSG_SYNC); } + +/** + * hinic_hwdev_set_msix_state- set msix state + * @hwdev: the NIC HW device + * @msix_index: IRQ corresponding index number + * @flag: msix state + * + **/ +void hinic_hwdev_set_msix_state(struct hinic_hwdev *hwdev, u16 msix_index, + enum hinic_msix_state flag) +{ + hinic_set_msix_state(hwdev->hwif, msix_index, flag); +} diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index d1a7d2522d82..c9e621e19dd0 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -240,4 +240,7 @@ int hinic_hwdev_msix_set(struct hinic_hwdev *hwdev, u16 msix_index, int hinic_hwdev_hw_ci_addr_set(struct hinic_hwdev *hwdev, struct hinic_sq *sq, u8 pending_limit, u8 coalesc_timer); +void hinic_hwdev_set_msix_state(struct hinic_hwdev *hwdev, u16 msix_index, + enum hinic_msix_state flag); + #endif diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c index 823a17061a97..9b160f076904 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c @@ -168,6 +168,22 @@ void hinic_db_state_set(struct hinic_hwif *hwif, hinic_hwif_write_reg(hwif, HINIC_CSR_FUNC_ATTR4_ADDR, attr4); } +void hinic_set_msix_state(struct hinic_hwif *hwif, u16 msix_idx, + enum hinic_msix_state flag) +{ + u32 offset = msix_idx * HINIC_PCI_MSIX_ENTRY_SIZE + + HINIC_PCI_MSIX_ENTRY_VECTOR_CTRL; + u32 mask_bits; + + mask_bits = readl(hwif->intr_regs_base + offset); + mask_bits &= ~HINIC_PCI_MSIX_ENTRY_CTRL_MASKBIT; + + if (flag) + mask_bits |= HINIC_PCI_MSIX_ENTRY_CTRL_MASKBIT; + + writel(mask_bits, hwif->intr_regs_base + offset); +} + /** * hwif_ready - test if the HW is ready for use * @hwif: the HW interface of a pci function device @@ -321,6 +337,13 @@ int hinic_init_hwif(struct hinic_hwif *hwif, struct pci_dev *pdev) return -ENOMEM; } + hwif->intr_regs_base = pci_ioremap_bar(pdev, HINIC_PCI_INTR_REGS_BAR); + if (!hwif->intr_regs_base) { + dev_err(&pdev->dev, "Failed to map configuration regs\n"); + err = -ENOMEM; + goto err_map_intr_bar; + } + err = hwif_ready(hwif); if (err) { dev_err(&pdev->dev, "HW interface is not ready\n"); @@ -337,7 +360,11 @@ int hinic_init_hwif(struct hinic_hwif *hwif, struct pci_dev *pdev) return 0; err_hwif_ready: + iounmap(hwif->intr_regs_base); + +err_map_intr_bar: iounmap(hwif->cfg_regs_bar); + return err; } @@ -347,5 +374,6 @@ err_hwif_ready: **/ void hinic_free_hwif(struct hinic_hwif *hwif) { + iounmap(hwif->intr_regs_base); iounmap(hwif->cfg_regs_bar); } diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h index 5b4760c0e9f5..22ec7f73e0a6 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h @@ -152,6 +152,7 @@ #define HINIC_IS_PPF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PPF) #define HINIC_PCI_CFG_REGS_BAR 0 +#define HINIC_PCI_INTR_REGS_BAR 2 #define HINIC_PCI_DB_BAR 4 #define HINIC_PCIE_ST_DISABLE 0 @@ -164,6 +165,10 @@ #define HINIC_EQ_MSIX_LLI_CREDIT_LIMIT_DEFAULT 0 /* Disabled */ #define HINIC_EQ_MSIX_RESEND_TIMER_DEFAULT 7 /* max */ +#define HINIC_PCI_MSIX_ENTRY_SIZE 16 +#define HINIC_PCI_MSIX_ENTRY_VECTOR_CTRL 12 +#define HINIC_PCI_MSIX_ENTRY_CTRL_MASKBIT 1 + enum hinic_pcie_nosnoop { HINIC_PCIE_SNOOP = 0, HINIC_PCIE_NO_SNOOP = 1, @@ -207,6 +212,11 @@ enum hinic_db_state { HINIC_DB_DISABLE = 1, }; +enum hinic_msix_state { + HINIC_MSIX_ENABLE, + HINIC_MSIX_DISABLE, +}; + struct hinic_func_attr { u16 func_idx; u8 pf_idx; @@ -226,6 +236,7 @@ struct hinic_func_attr { struct hinic_hwif { struct pci_dev *pdev; void __iomem *cfg_regs_bar; + void __iomem *intr_regs_base; struct hinic_func_attr attr; }; @@ -251,6 +262,9 @@ int hinic_msix_attr_get(struct hinic_hwif *hwif, u16 msix_index, u8 *lli_timer, u8 *lli_credit_limit, u8 *resend_timer); +void hinic_set_msix_state(struct hinic_hwif *hwif, u16 msix_idx, + enum hinic_msix_state flag); + int hinic_msix_attr_cnt_clear(struct hinic_hwif *hwif, u16 msix_index); void hinic_set_pf_action(struct hinic_hwif *hwif, enum hinic_pf_action action); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 0098b206e7e9..b6d218768ec1 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -381,6 +381,7 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget) static int rx_poll(struct napi_struct *napi, int budget) { struct hinic_rxq *rxq = container_of(napi, struct hinic_rxq, napi); + struct hinic_dev *nic_dev = netdev_priv(rxq->netdev); struct hinic_rq *rq = rxq->rq; int pkts; @@ -389,7 +390,10 @@ static int rx_poll(struct napi_struct *napi, int budget) return budget; napi_complete(napi); - enable_irq(rq->irq); + hinic_hwdev_set_msix_state(nic_dev->hwdev, + rq->msix_entry, + HINIC_MSIX_ENABLE); + return pkts; } @@ -414,7 +418,10 @@ static irqreturn_t rx_irq(int irq, void *data) struct hinic_dev *nic_dev; /* Disable the interrupt until napi will be completed */ - disable_irq_nosync(rq->irq); + nic_dev = netdev_priv(rxq->netdev); + hinic_hwdev_set_msix_state(nic_dev->hwdev, + rq->msix_entry, + HINIC_MSIX_DISABLE); nic_dev = netdev_priv(rxq->netdev); hinic_hwdev_msix_cnt_set(nic_dev->hwdev, rq->msix_entry); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 11e73e67358d..e17bf33eba0c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -655,7 +655,9 @@ static int free_tx_poll(struct napi_struct *napi, int budget) if (pkts < budget) { napi_complete(napi); - enable_irq(sq->irq); + hinic_hwdev_set_msix_state(nic_dev->hwdev, + sq->msix_entry, + HINIC_MSIX_ENABLE); return pkts; } @@ -682,7 +684,9 @@ static irqreturn_t tx_irq(int irq, void *data) nic_dev = netdev_priv(txq->netdev); /* Disable the interrupt until napi will be completed */ - disable_irq_nosync(txq->sq->irq); + hinic_hwdev_set_msix_state(nic_dev->hwdev, + txq->sq->msix_entry, + HINIC_MSIX_DISABLE); hinic_hwdev_msix_cnt_set(nic_dev->hwdev, txq->sq->msix_entry); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f52e2c46e6a7..0ee641c41be4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11644,7 +11644,8 @@ static int i40e_get_phys_port_id(struct net_device *netdev, static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_pf *pf = np->vsi->back; diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index a385575600f6..55944e089558 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -26,6 +26,7 @@ #include <linux/bitmap.h> #include <linux/log2.h> #include <linux/ip.h> +#include <linux/sctp.h> #include <linux/ipv6.h> #include <linux/if_bridge.h> #include <linux/avf/virtchnl.h> @@ -110,6 +111,9 @@ extern const char ice_drv_ver[]; #define ice_for_each_alloc_rxq(vsi, i) \ for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++) +#define ice_for_each_q_vector(vsi, i) \ + for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++) + struct ice_tc_info { u16 qoffset; u16 qcount_tx; @@ -129,6 +133,17 @@ struct ice_res_tracker { u16 list[1]; }; +struct ice_qs_cfg { + struct mutex *qs_mutex; /* will be assgined to &pf->avail_q_mutex */ + unsigned long *pf_map; + unsigned long pf_map_size; + unsigned int q_count; + unsigned int scatter_count; + u16 *vsi_map; + u16 vsi_map_offset; + u8 mapping_mode; +}; + struct ice_sw { struct ice_pf *pf; u16 sw_id; /* switch ID for this switch */ @@ -270,6 +285,7 @@ enum ice_pf_flags { ICE_FLAG_RSS_ENA, ICE_FLAG_SRIOV_ENA, ICE_FLAG_SRIOV_CAPABLE, + ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, ICE_PF_FLAGS_NBITS /* must be last */ }; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index fcdcd80b18e7..242c78469181 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -657,8 +657,13 @@ struct ice_aqc_get_topo { /* Update TSE (indirect 0x0403) * Get TSE (indirect 0x0404) + * Add TSE (indirect 0x0401) + * Delete TSE (indirect 0x040F) + * Move TSE (indirect 0x0408) + * Suspend Nodes (indirect 0x0409) + * Resume Nodes (indirect 0x040A) */ -struct ice_aqc_get_cfg_elem { +struct ice_aqc_sched_elem_cmd { __le16 num_elem_req; /* Used by commands */ __le16 num_elem_resp; /* Used by responses */ __le32 reserved; @@ -674,18 +679,6 @@ struct ice_aqc_suspend_resume_elem { __le32 teid[1]; }; -/* Add TSE (indirect 0x0401) - * Delete TSE (indirect 0x040F) - * Move TSE (indirect 0x0408) - */ -struct ice_aqc_add_move_delete_elem { - __le16 num_grps_req; - __le16 num_grps_updated; - __le32 reserved; - __le32 addr_high; - __le32 addr_low; -}; - struct ice_aqc_elem_info_bw { __le16 bw_profile_idx; __le16 bw_alloc; @@ -854,11 +847,46 @@ struct ice_aqc_get_phy_caps { #define ICE_PHY_TYPE_LOW_40GBASE_KR4 BIT_ULL(33) #define ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC BIT_ULL(34) #define ICE_PHY_TYPE_LOW_40G_XLAUI BIT_ULL(35) +#define ICE_PHY_TYPE_LOW_50GBASE_CR2 BIT_ULL(36) +#define ICE_PHY_TYPE_LOW_50GBASE_SR2 BIT_ULL(37) +#define ICE_PHY_TYPE_LOW_50GBASE_LR2 BIT_ULL(38) +#define ICE_PHY_TYPE_LOW_50GBASE_KR2 BIT_ULL(39) +#define ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC BIT_ULL(40) +#define ICE_PHY_TYPE_LOW_50G_LAUI2 BIT_ULL(41) +#define ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC BIT_ULL(42) +#define ICE_PHY_TYPE_LOW_50G_AUI2 BIT_ULL(43) +#define ICE_PHY_TYPE_LOW_50GBASE_CP BIT_ULL(44) +#define ICE_PHY_TYPE_LOW_50GBASE_SR BIT_ULL(45) +#define ICE_PHY_TYPE_LOW_50GBASE_FR BIT_ULL(46) +#define ICE_PHY_TYPE_LOW_50GBASE_LR BIT_ULL(47) +#define ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 BIT_ULL(48) +#define ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC BIT_ULL(49) +#define ICE_PHY_TYPE_LOW_50G_AUI1 BIT_ULL(50) +#define ICE_PHY_TYPE_LOW_100GBASE_CR4 BIT_ULL(51) +#define ICE_PHY_TYPE_LOW_100GBASE_SR4 BIT_ULL(52) +#define ICE_PHY_TYPE_LOW_100GBASE_LR4 BIT_ULL(53) +#define ICE_PHY_TYPE_LOW_100GBASE_KR4 BIT_ULL(54) +#define ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC BIT_ULL(55) +#define ICE_PHY_TYPE_LOW_100G_CAUI4 BIT_ULL(56) +#define ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC BIT_ULL(57) +#define ICE_PHY_TYPE_LOW_100G_AUI4 BIT_ULL(58) +#define ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 BIT_ULL(59) +#define ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 BIT_ULL(60) +#define ICE_PHY_TYPE_LOW_100GBASE_CP2 BIT_ULL(61) +#define ICE_PHY_TYPE_LOW_100GBASE_SR2 BIT_ULL(62) +#define ICE_PHY_TYPE_LOW_100GBASE_DR BIT_ULL(63) #define ICE_PHY_TYPE_LOW_MAX_INDEX 63 +/* The second set of defines is for phy_type_high. */ +#define ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 BIT_ULL(0) +#define ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC BIT_ULL(1) +#define ICE_PHY_TYPE_HIGH_100G_CAUI2 BIT_ULL(2) +#define ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC BIT_ULL(3) +#define ICE_PHY_TYPE_HIGH_100G_AUI2 BIT_ULL(4) +#define ICE_PHY_TYPE_HIGH_MAX_INDEX 19 struct ice_aqc_get_phy_caps_data { __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ - __le64 reserved; + __le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */ u8 caps; #define ICE_AQC_PHY_EN_TX_LINK_PAUSE BIT(0) #define ICE_AQC_PHY_EN_RX_LINK_PAUSE BIT(1) @@ -923,7 +951,7 @@ struct ice_aqc_set_phy_cfg { /* Set PHY config command data structure */ struct ice_aqc_set_phy_cfg_data { __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ - __le64 rsvd0; + __le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */ u8 caps; #define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY BIT(0) #define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY BIT(1) @@ -1032,10 +1060,12 @@ struct ice_aqc_get_link_status_data { #define ICE_AQ_LINK_SPEED_20GB BIT(6) #define ICE_AQ_LINK_SPEED_25GB BIT(7) #define ICE_AQ_LINK_SPEED_40GB BIT(8) +#define ICE_AQ_LINK_SPEED_50GB BIT(9) +#define ICE_AQ_LINK_SPEED_100GB BIT(10) #define ICE_AQ_LINK_SPEED_UNKNOWN BIT(15) __le32 reserved3; /* Aligns next field to 8-byte boundary */ __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ - __le64 reserved4; + __le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */ }; /* Set event mask command (direct 0x0613) */ @@ -1055,6 +1085,16 @@ struct ice_aqc_set_event_mask { u8 reserved1[6]; }; +/* Set Port Identification LED (direct, 0x06E9) */ +struct ice_aqc_set_port_id_led { + u8 lport_num; + u8 lport_num_valid; + u8 ident_mode; +#define ICE_AQC_PORT_IDENT_LED_BLINK BIT(0) +#define ICE_AQC_PORT_IDENT_LED_ORIG 0 + u8 rsvd[13]; +}; + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -1341,12 +1381,12 @@ struct ice_aq_desc { struct ice_aqc_get_phy_caps get_phy; struct ice_aqc_set_phy_cfg set_phy; struct ice_aqc_restart_an restart_an; + struct ice_aqc_set_port_id_led set_port_id_led; struct ice_aqc_get_sw_cfg get_sw_conf; struct ice_aqc_sw_rules sw_rules; struct ice_aqc_get_topo get_topo; - struct ice_aqc_get_cfg_elem get_update_elem; + struct ice_aqc_sched_elem_cmd sched_elem_cmd; struct ice_aqc_query_txsched_res query_sched_res; - struct ice_aqc_add_move_delete_elem add_move_delete_elem; struct ice_aqc_nvm nvm; struct ice_aqc_pf_vf_msg virt; struct ice_aqc_get_set_rss_lut get_set_rss_lut; @@ -1442,6 +1482,7 @@ enum ice_adminq_opc { ice_aqc_opc_restart_an = 0x0605, ice_aqc_opc_get_link_status = 0x0607, ice_aqc_opc_set_event_mask = 0x0613, + ice_aqc_opc_set_port_id_led = 0x06E9, /* NVM commands */ ice_aqc_opc_nvm_read = 0x0701, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 4c1d35da940d..b17ade424423 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -165,8 +165,10 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, cmd->param0 |= cpu_to_le16(report_mode); status = ice_aq_send_cmd(pi->hw, &desc, pcaps, pcaps_size, cd); - if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) + if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) { pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low); + pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high); + } return status; } @@ -183,6 +185,9 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi) return ICE_MEDIA_UNKNOWN; hw_link_info = &pi->phy.link_info; + if (hw_link_info->phy_type_low && hw_link_info->phy_type_high) + /* If more than one media type is selected, report unknown */ + return ICE_MEDIA_UNKNOWN; if (hw_link_info->phy_type_low) { switch (hw_link_info->phy_type_low) { @@ -196,6 +201,15 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi) case ICE_PHY_TYPE_LOW_25G_AUI_C2C: case ICE_PHY_TYPE_LOW_40GBASE_SR4: case ICE_PHY_TYPE_LOW_40GBASE_LR4: + case ICE_PHY_TYPE_LOW_50GBASE_SR2: + case ICE_PHY_TYPE_LOW_50GBASE_LR2: + case ICE_PHY_TYPE_LOW_50GBASE_SR: + case ICE_PHY_TYPE_LOW_50GBASE_FR: + case ICE_PHY_TYPE_LOW_50GBASE_LR: + case ICE_PHY_TYPE_LOW_100GBASE_SR4: + case ICE_PHY_TYPE_LOW_100GBASE_LR4: + case ICE_PHY_TYPE_LOW_100GBASE_SR2: + case ICE_PHY_TYPE_LOW_100GBASE_DR: return ICE_MEDIA_FIBER; case ICE_PHY_TYPE_LOW_100BASE_TX: case ICE_PHY_TYPE_LOW_1000BASE_T: @@ -209,6 +223,11 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi) case ICE_PHY_TYPE_LOW_25GBASE_CR_S: case ICE_PHY_TYPE_LOW_25GBASE_CR1: case ICE_PHY_TYPE_LOW_40GBASE_CR4: + case ICE_PHY_TYPE_LOW_50GBASE_CR2: + case ICE_PHY_TYPE_LOW_50GBASE_CP: + case ICE_PHY_TYPE_LOW_100GBASE_CR4: + case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4: + case ICE_PHY_TYPE_LOW_100GBASE_CP2: return ICE_MEDIA_DA; case ICE_PHY_TYPE_LOW_1000BASE_KX: case ICE_PHY_TYPE_LOW_2500BASE_KX: @@ -219,10 +238,18 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi) case ICE_PHY_TYPE_LOW_25GBASE_KR1: case ICE_PHY_TYPE_LOW_25GBASE_KR_S: case ICE_PHY_TYPE_LOW_40GBASE_KR4: + case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4: + case ICE_PHY_TYPE_LOW_50GBASE_KR2: + case ICE_PHY_TYPE_LOW_100GBASE_KR4: + case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4: + return ICE_MEDIA_BACKPLANE; + } + } else { + switch (hw_link_info->phy_type_high) { + case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4: return ICE_MEDIA_BACKPLANE; } } - return ICE_MEDIA_UNKNOWN; } @@ -274,6 +301,7 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, /* update current link status information */ hw_link_info->link_speed = le16_to_cpu(link_data.link_speed); hw_link_info->phy_type_low = le64_to_cpu(link_data.phy_type_low); + hw_link_info->phy_type_high = le64_to_cpu(link_data.phy_type_high); *hw_media_type = ice_get_media_type(pi); hw_link_info->link_info = link_data.link_info; hw_link_info->an_info = link_data.an_info; @@ -750,6 +778,7 @@ enum ice_status ice_init_hw(struct ice_hw *hw) status = ICE_ERR_CFG; goto err_unroll_sched; } + INIT_LIST_HEAD(&hw->agg_list); status = ice_init_fltr_mgmt_struct(hw); if (status) @@ -800,6 +829,7 @@ void ice_deinit_hw(struct ice_hw *hw) ice_cleanup_fltr_mgmt_struct(hw); ice_sched_cleanup_all(hw); + ice_sched_clear_agg(hw); if (hw->port_info) { devm_kfree(ice_hw_to_dev(hw), hw->port_info); @@ -1655,7 +1685,7 @@ enum ice_status ice_get_caps(struct ice_hw *hw) * This function is used to write MAC address to the NVM (0x0108). */ enum ice_status -ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags, +ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags, struct ice_sq_cd *cd) { struct ice_aqc_manage_mac_write *cmd; @@ -1667,8 +1697,8 @@ ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags, cmd->flags = flags; /* Prep values for flags, sah, sal */ - cmd->sah = htons(*((u16 *)mac_addr)); - cmd->sal = htonl(*((u32 *)(mac_addr + 2))); + cmd->sah = htons(*((const u16 *)mac_addr)); + cmd->sal = htonl(*((const u32 *)(mac_addr + 2))); return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); } @@ -1705,16 +1735,20 @@ void ice_clear_pxe_mode(struct ice_hw *hw) /** * ice_get_link_speed_based_on_phy_type - returns link speed * @phy_type_low: lower part of phy_type + * @phy_type_high: higher part of phy_type * - * This helper function will convert a phy_type_low to its corresponding link + * This helper function will convert an entry in phy type structure + * [phy_type_low, phy_type_high] to its corresponding link speed. + * Note: In the structure of [phy_type_low, phy_type_high], there should + * be one bit set, as this function will convert one phy type to its * speed. - * Note: In the structure of phy_type_low, there should be one bit set, as - * this function will convert one phy type to its speed. * If no bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned * If more than one bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned */ -static u16 ice_get_link_speed_based_on_phy_type(u64 phy_type_low) +static u16 +ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high) { + u16 speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN; u16 speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN; switch (phy_type_low) { @@ -1768,41 +1802,110 @@ static u16 ice_get_link_speed_based_on_phy_type(u64 phy_type_low) case ICE_PHY_TYPE_LOW_40G_XLAUI: speed_phy_type_low = ICE_AQ_LINK_SPEED_40GB; break; + case ICE_PHY_TYPE_LOW_50GBASE_CR2: + case ICE_PHY_TYPE_LOW_50GBASE_SR2: + case ICE_PHY_TYPE_LOW_50GBASE_LR2: + case ICE_PHY_TYPE_LOW_50GBASE_KR2: + case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC: + case ICE_PHY_TYPE_LOW_50G_LAUI2: + case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC: + case ICE_PHY_TYPE_LOW_50G_AUI2: + case ICE_PHY_TYPE_LOW_50GBASE_CP: + case ICE_PHY_TYPE_LOW_50GBASE_SR: + case ICE_PHY_TYPE_LOW_50GBASE_FR: + case ICE_PHY_TYPE_LOW_50GBASE_LR: + case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4: + case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC: + case ICE_PHY_TYPE_LOW_50G_AUI1: + speed_phy_type_low = ICE_AQ_LINK_SPEED_50GB; + break; + case ICE_PHY_TYPE_LOW_100GBASE_CR4: + case ICE_PHY_TYPE_LOW_100GBASE_SR4: + case ICE_PHY_TYPE_LOW_100GBASE_LR4: + case ICE_PHY_TYPE_LOW_100GBASE_KR4: + case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC: + case ICE_PHY_TYPE_LOW_100G_CAUI4: + case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC: + case ICE_PHY_TYPE_LOW_100G_AUI4: + case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4: + case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4: + case ICE_PHY_TYPE_LOW_100GBASE_CP2: + case ICE_PHY_TYPE_LOW_100GBASE_SR2: + case ICE_PHY_TYPE_LOW_100GBASE_DR: + speed_phy_type_low = ICE_AQ_LINK_SPEED_100GB; + break; default: speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN; break; } - return speed_phy_type_low; + switch (phy_type_high) { + case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4: + case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC: + case ICE_PHY_TYPE_HIGH_100G_CAUI2: + case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC: + case ICE_PHY_TYPE_HIGH_100G_AUI2: + speed_phy_type_high = ICE_AQ_LINK_SPEED_100GB; + break; + default: + speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN; + break; + } + + if (speed_phy_type_low == ICE_AQ_LINK_SPEED_UNKNOWN && + speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN) + return ICE_AQ_LINK_SPEED_UNKNOWN; + else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN && + speed_phy_type_high != ICE_AQ_LINK_SPEED_UNKNOWN) + return ICE_AQ_LINK_SPEED_UNKNOWN; + else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN && + speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN) + return speed_phy_type_low; + else + return speed_phy_type_high; } /** * ice_update_phy_type * @phy_type_low: pointer to the lower part of phy_type + * @phy_type_high: pointer to the higher part of phy_type * @link_speeds_bitmap: targeted link speeds bitmap * * Note: For the link_speeds_bitmap structure, you can check it at * [ice_aqc_get_link_status->link_speed]. Caller can pass in * link_speeds_bitmap include multiple speeds. * - * The value of phy_type_low will present a certain link speed. This helper - * function will turn on bits in the phy_type_low based on the value of + * Each entry in this [phy_type_low, phy_type_high] structure will + * present a certain link speed. This helper function will turn on bits + * in [phy_type_low, phy_type_high] structure based on the value of * link_speeds_bitmap input parameter. */ -void ice_update_phy_type(u64 *phy_type_low, u16 link_speeds_bitmap) +void +ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high, + u16 link_speeds_bitmap) { u16 speed = ICE_AQ_LINK_SPEED_UNKNOWN; + u64 pt_high; u64 pt_low; int index; /* We first check with low part of phy_type */ for (index = 0; index <= ICE_PHY_TYPE_LOW_MAX_INDEX; index++) { pt_low = BIT_ULL(index); - speed = ice_get_link_speed_based_on_phy_type(pt_low); + speed = ice_get_link_speed_based_on_phy_type(pt_low, 0); if (link_speeds_bitmap & speed) *phy_type_low |= BIT_ULL(index); } + + /* We then check with high part of phy_type */ + for (index = 0; index <= ICE_PHY_TYPE_HIGH_MAX_INDEX; index++) { + pt_high = BIT_ULL(index); + speed = ice_get_link_speed_based_on_phy_type(0, pt_high); + + if (link_speeds_bitmap & speed) + *phy_type_high |= BIT_ULL(index); + } } /** @@ -1934,6 +2037,7 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update) if (ena_auto_link_update) cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; /* Copy over all the old settings */ + cfg.phy_type_high = pcaps->phy_type_high; cfg.phy_type_low = pcaps->phy_type_low; cfg.low_power_ctrl = pcaps->low_power_ctrl; cfg.eee_cap = pcaps->eee_cap; @@ -2032,6 +2136,34 @@ ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, } /** + * ice_aq_set_port_id_led + * @pi: pointer to the port information + * @is_orig_mode: is this LED set to original mode (by the net-list) + * @cd: pointer to command details structure or NULL + * + * Set LED value for the given port (0x06e9) + */ +enum ice_status +ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode, + struct ice_sq_cd *cd) +{ + struct ice_aqc_set_port_id_led *cmd; + struct ice_hw *hw = pi->hw; + struct ice_aq_desc desc; + + cmd = &desc.params.set_port_id_led; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_id_led); + + if (is_orig_mode) + cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_ORIG; + else + cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_BLINK; + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + +/** * __ice_aq_get_set_rss_lut * @hw: pointer to the hardware structure * @vsi_id: VSI FW index diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index cf760c24a6aa..d7c7c2ed8823 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -28,6 +28,8 @@ ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res, enum ice_aq_res_access_type access, u32 timeout); void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res); enum ice_status ice_init_nvm(struct ice_hw *hw); +enum ice_status ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, + u16 *data); enum ice_status ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_aq_desc *desc, void *buf, u16 buf_size, @@ -70,9 +72,10 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, struct ice_aqc_get_phy_caps_data *caps, struct ice_sq_cd *cd); void -ice_update_phy_type(u64 *phy_type_low, u16 link_speeds_bitmap); +ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high, + u16 link_speeds_bitmap); enum ice_status -ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags, +ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags, struct ice_sq_cd *cd); enum ice_status ice_clear_pf_cfg(struct ice_hw *hw); enum ice_status @@ -86,6 +89,10 @@ enum ice_status ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, struct ice_sq_cd *cd); enum ice_status +ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode, + struct ice_sq_cd *cd); + +enum ice_status ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids, u32 *q_teids, enum ice_disq_rst_src rst_src, u16 vmvf_num, struct ice_sq_cd *cmd_details); diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 3b6e387f5440..a82f0202652d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -114,6 +114,22 @@ static const u32 ice_regs_dump_list[] = { QRX_ITR(0), }; +struct ice_priv_flag { + char name[ETH_GSTRING_LEN]; + u32 bitno; /* bit position in pf->flags */ +}; + +#define ICE_PRIV_FLAG(_name, _bitno) { \ + .name = _name, \ + .bitno = _bitno, \ +} + +static const struct ice_priv_flag ice_gstrings_priv_flags[] = { + ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA), +}; + +#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags) + /** * ice_nvm_version_str - format the NVM version strings * @hw: ptr to the hardware info @@ -152,6 +168,7 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) sizeof(drvinfo->fw_version)); strlcpy(drvinfo->bus_info, pci_name(pf->pdev), sizeof(drvinfo->bus_info)); + drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE; } static int ice_get_regs_len(struct net_device __always_unused *netdev) @@ -203,6 +220,55 @@ static void ice_set_msglevel(struct net_device *netdev, u32 data) #endif /* !CONFIG_DYNAMIC_DEBUG */ } +static int ice_get_eeprom_len(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + + return (int)(pf->hw.nvm.sr_words * sizeof(u16)); +} + +static int +ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, + u8 *bytes) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + u16 first_word, last_word, nwords; + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + struct device *dev; + int ret = 0; + u16 *buf; + + dev = &pf->pdev->dev; + + eeprom->magic = hw->vendor_id | (hw->device_id << 16); + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + nwords = last_word - first_word + 1; + + buf = devm_kcalloc(dev, nwords, sizeof(u16), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + status = ice_read_sr_buf(hw, first_word, &nwords, buf); + if (status) { + dev_err(dev, "ice_read_sr_buf failed, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + eeprom->len = sizeof(u16) * nwords; + ret = -EIO; + goto out; + } + + memcpy(bytes, (u8 *)buf + (eeprom->offset & 1), eeprom->len); +out: + devm_kfree(dev, buf); + return ret; +} + static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct ice_netdev_priv *np = netdev_priv(netdev); @@ -244,11 +310,99 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } break; + case ETH_SS_PRIV_FLAGS: + for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) { + snprintf(p, ETH_GSTRING_LEN, "%s", + ice_gstrings_priv_flags[i].name); + p += ETH_GSTRING_LEN; + } + break; default: break; } } +static int +ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + bool led_active; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + led_active = true; + break; + case ETHTOOL_ID_INACTIVE: + led_active = false; + break; + default: + return -EINVAL; + } + + if (ice_aq_set_port_id_led(np->vsi->port_info, !led_active, NULL)) + return -EIO; + + return 0; +} + +/** + * ice_get_priv_flags - report device private flags + * @netdev: network interface device structure + * + * The get string set count and the string set should be matched for each + * flag returned. Add new strings for each flag to the ice_gstrings_priv_flags + * array. + * + * Returns a u32 bitmap of flags. + */ +static u32 ice_get_priv_flags(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u32 i, ret_flags = 0; + + for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) { + const struct ice_priv_flag *priv_flag; + + priv_flag = &ice_gstrings_priv_flags[i]; + + if (test_bit(priv_flag->bitno, pf->flags)) + ret_flags |= BIT(i); + } + + return ret_flags; +} + +/** + * ice_set_priv_flags - set private flags + * @netdev: network interface device structure + * @flags: bit flags to be set + */ +static int ice_set_priv_flags(struct net_device *netdev, u32 flags) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u32 i; + + if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE)) + return -EINVAL; + + for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) { + const struct ice_priv_flag *priv_flag; + + priv_flag = &ice_gstrings_priv_flags[i]; + + if (flags & BIT(i)) + set_bit(priv_flag->bitno, pf->flags); + else + clear_bit(priv_flag->bitno, pf->flags); + } + + return 0; +} + static int ice_get_sset_count(struct net_device *netdev, int sset) { switch (sset) { @@ -272,6 +426,8 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) * not safe. */ return ICE_ALL_STATS_LEN(netdev); + case ETH_SS_PRIV_FLAGS: + return ICE_PRIV_FLAG_ARRAY_SIZE; default: return -EOPNOTSUPP; } @@ -337,16 +493,20 @@ ice_get_ethtool_stats(struct net_device *netdev, * @netdev: network interface device structure * @ks: ethtool link ksettings struct to fill out */ -static void ice_phy_type_to_ethtool(struct net_device *netdev, - struct ethtool_link_ksettings *ks) +static void +ice_phy_type_to_ethtool(struct net_device *netdev, + struct ethtool_link_ksettings *ks) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_link_status *hw_link_info; + bool need_add_adv_mode = false; struct ice_vsi *vsi = np->vsi; + u64 phy_types_high; u64 phy_types_low; hw_link_info = &vsi->port_info->phy.link_info; phy_types_low = vsi->port_info->phy.phy_type_low; + phy_types_high = vsi->port_info->phy.phy_type_high; ethtool_link_ksettings_zero_link_mode(ks, supported); ethtool_link_ksettings_zero_link_mode(ks, advertising); @@ -495,6 +655,95 @@ static void ice_phy_type_to_ethtool(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(ks, advertising, 40000baseLR4_Full); } + if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 || + phy_types_low & ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC || + phy_types_low & ICE_PHY_TYPE_LOW_50G_LAUI2 || + phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC || + phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI2 || + phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP || + phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_SR || + phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC || + phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 50000baseCR2_Full); + if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 50000baseCR2_Full); + } + if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 || + phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 50000baseKR2_Full); + if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 50000baseKR2_Full); + } + if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_SR2 || + phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR2 || + phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_FR || + phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 50000baseSR2_Full); + if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 50000baseSR2_Full); + } + if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 || + phy_types_low & ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC || + phy_types_low & ICE_PHY_TYPE_LOW_100G_CAUI4 || + phy_types_low & ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC || + phy_types_low & ICE_PHY_TYPE_LOW_100G_AUI4 || + phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 || + phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2 || + phy_types_high & ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC || + phy_types_high & ICE_PHY_TYPE_HIGH_100G_CAUI2 || + phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC || + phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseCR4_Full); + if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) + need_add_adv_mode = true; + } + if (need_add_adv_mode) { + need_add_adv_mode = false; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100000baseCR4_Full); + } + if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR4 || + phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseSR4_Full); + if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) + need_add_adv_mode = true; + } + if (need_add_adv_mode) { + need_add_adv_mode = false; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100000baseSR4_Full); + } + if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_LR4 || + phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_DR) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseLR4_ER4_Full); + if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) + need_add_adv_mode = true; + } + if (need_add_adv_mode) { + need_add_adv_mode = false; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100000baseLR4_ER4_Full); + } + if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 || + phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 || + phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseKR4_Full); + if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) + need_add_adv_mode = true; + } + if (need_add_adv_mode) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100000baseKR4_Full); /* Autoneg PHY types */ if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX || @@ -520,6 +769,24 @@ static void ice_phy_type_to_ethtool(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); } + if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 || + phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 || + phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP || + phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) { + ethtool_link_ksettings_add_link_mode(ks, supported, + Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + Autoneg); + } + if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 || + phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 || + phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 || + phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2) { + ethtool_link_ksettings_add_link_mode(ks, supported, + Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + Autoneg); + } } #define TEST_SET_BITS_TIMEOUT 50 @@ -531,13 +798,15 @@ static void ice_phy_type_to_ethtool(struct net_device *netdev, * @ks: ethtool ksettings to fill in * @netdev: network interface device structure */ -static void ice_get_settings_link_up(struct ethtool_link_ksettings *ks, - struct net_device *netdev) +static void +ice_get_settings_link_up(struct ethtool_link_ksettings *ks, + struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ethtool_link_ksettings cap_ksettings; struct ice_link_status *link_info; struct ice_vsi *vsi = np->vsi; + bool unrecog_phy_high = false; bool unrecog_phy_low = false; link_info = &vsi->port_info->phy.link_info; @@ -699,14 +968,116 @@ static void ice_get_settings_link_up(struct ethtool_link_ksettings *ks, ethtool_link_ksettings_add_link_mode(ks, advertising, 40000baseKR4_Full); break; + case ICE_PHY_TYPE_LOW_50GBASE_CR2: + case ICE_PHY_TYPE_LOW_50GBASE_CP: + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 50000baseCR2_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 50000baseCR2_Full); + break; + case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC: + case ICE_PHY_TYPE_LOW_50G_LAUI2: + case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC: + case ICE_PHY_TYPE_LOW_50G_AUI2: + case ICE_PHY_TYPE_LOW_50GBASE_SR: + case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC: + case ICE_PHY_TYPE_LOW_50G_AUI1: + ethtool_link_ksettings_add_link_mode(ks, supported, + 50000baseCR2_Full); + break; + case ICE_PHY_TYPE_LOW_50GBASE_KR2: + case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4: + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 50000baseKR2_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 50000baseKR2_Full); + break; + case ICE_PHY_TYPE_LOW_50GBASE_SR2: + case ICE_PHY_TYPE_LOW_50GBASE_LR2: + case ICE_PHY_TYPE_LOW_50GBASE_FR: + case ICE_PHY_TYPE_LOW_50GBASE_LR: + ethtool_link_ksettings_add_link_mode(ks, supported, + 50000baseSR2_Full); + break; + case ICE_PHY_TYPE_LOW_100GBASE_CR4: + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseCR4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100000baseCR4_Full); + break; + case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC: + case ICE_PHY_TYPE_LOW_100G_CAUI4: + case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC: + case ICE_PHY_TYPE_LOW_100G_AUI4: + case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseCR4_Full); + break; + case ICE_PHY_TYPE_LOW_100GBASE_CP2: + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseCR4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100000baseCR4_Full); + break; + case ICE_PHY_TYPE_LOW_100GBASE_SR4: + case ICE_PHY_TYPE_LOW_100GBASE_SR2: + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseSR4_Full); + break; + case ICE_PHY_TYPE_LOW_100GBASE_LR4: + case ICE_PHY_TYPE_LOW_100GBASE_DR: + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseLR4_ER4_Full); + break; + case ICE_PHY_TYPE_LOW_100GBASE_KR4: + case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4: + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseKR4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100000baseKR4_Full); + break; default: unrecog_phy_low = true; } - if (unrecog_phy_low) { + switch (link_info->phy_type_high) { + case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4: + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseKR4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100000baseKR4_Full); + break; + case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC: + case ICE_PHY_TYPE_HIGH_100G_CAUI2: + case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC: + case ICE_PHY_TYPE_HIGH_100G_AUI2: + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseCR4_Full); + break; + default: + unrecog_phy_high = true; + } + + if (unrecog_phy_low && unrecog_phy_high) { /* if we got here and link is up something bad is afoot */ - netdev_info(netdev, "WARNING: Unrecognized PHY_Low (0x%llx).\n", + netdev_info(netdev, + "WARNING: Unrecognized PHY_Low (0x%llx).\n", (u64)link_info->phy_type_low); + netdev_info(netdev, + "WARNING: Unrecognized PHY_High (0x%llx).\n", + (u64)link_info->phy_type_high); } /* Now that we've worked out everything that could be supported by the @@ -718,6 +1089,12 @@ static void ice_get_settings_link_up(struct ethtool_link_ksettings *ks, ethtool_intersect_link_masks(ks, &cap_ksettings); switch (link_info->link_speed) { + case ICE_AQ_LINK_SPEED_100GB: + ks->base.speed = SPEED_100000; + break; + case ICE_AQ_LINK_SPEED_50GB: + ks->base.speed = SPEED_50000; + break; case ICE_AQ_LINK_SPEED_40GB: ks->base.speed = SPEED_40000; break; @@ -911,6 +1288,23 @@ ice_ksettings_find_adv_link_speed(const struct ethtool_link_ksettings *ks) ethtool_link_ksettings_test_link_mode(ks, advertising, 40000baseKR4_Full)) adv_link_speed |= ICE_AQ_LINK_SPEED_40GB; + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 50000baseCR2_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 50000baseKR2_Full)) + adv_link_speed |= ICE_AQ_LINK_SPEED_50GB; + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 50000baseSR2_Full)) + adv_link_speed |= ICE_AQ_LINK_SPEED_50GB; + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 100000baseCR4_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 100000baseSR4_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 100000baseLR4_ER4_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 100000baseKR4_Full)) + adv_link_speed |= ICE_AQ_LINK_SPEED_100GB; return adv_link_speed; } @@ -981,8 +1375,9 @@ ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks, * * Set speed/duplex per media_types advertised/forced */ -static int ice_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *ks) +static int +ice_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *ks) { u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT, lport = 0; struct ice_netdev_priv *np = netdev_priv(netdev); @@ -994,6 +1389,7 @@ static int ice_set_link_ksettings(struct net_device *netdev, struct ice_port_info *p; u8 autoneg_changed = 0; enum ice_status status; + u64 phy_type_high; u64 phy_type_low; int err = 0; bool linkup; @@ -1109,7 +1505,7 @@ static int ice_set_link_ksettings(struct net_device *netdev, adv_link_speed = curr_link_speed; /* Convert the advertise link speeds to their corresponded PHY_TYPE */ - ice_update_phy_type(&phy_type_low, adv_link_speed); + ice_update_phy_type(&phy_type_low, &phy_type_high, adv_link_speed); if (!autoneg_changed && adv_link_speed == curr_link_speed) { netdev_info(netdev, "Nothing changed, exiting without setting anything.\n"); @@ -1128,7 +1524,9 @@ static int ice_set_link_ksettings(struct net_device *netdev, /* set link and auto negotiation so changes take effect */ config.caps |= ICE_AQ_PHY_ENA_LINK; - if (phy_type_low) { + if (phy_type_low || phy_type_high) { + config.phy_type_high = cpu_to_le64(phy_type_high) & + abilities->phy_type_high; config.phy_type_low = cpu_to_le64(phy_type_low) & abilities->phy_type_low; } else { @@ -1667,6 +2065,258 @@ static int ice_set_rxfh(struct net_device *netdev, const u32 *indir, return 0; } +enum ice_container_type { + ICE_RX_CONTAINER, + ICE_TX_CONTAINER, +}; + +/** + * ice_get_rc_coalesce - get ITR values for specific ring container + * @ec: ethtool structure to fill with driver's coalesce settings + * @c_type: container type, RX or TX + * @rc: ring container that the ITR values will come from + * + * Query the device for ice_ring_container specific ITR values. This is + * done per ice_ring_container because each q_vector can have 1 or more rings + * and all of said ring(s) will have the same ITR values. + * + * Returns 0 on success, negative otherwise. + */ +static int +ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, + struct ice_ring_container *rc) +{ + struct ice_pf *pf = rc->ring->vsi->back; + + switch (c_type) { + case ICE_RX_CONTAINER: + ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting); + ec->rx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC; + break; + case ICE_TX_CONTAINER: + ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting); + ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC; + break; + default: + dev_dbg(&pf->pdev->dev, "Invalid c_type %d\n", c_type); + return -EINVAL; + } + + return 0; +} + +/** + * __ice_get_coalesce - get ITR/INTRL values for the device + * @netdev: pointer to the netdev associated with this query + * @ec: ethtool structure to fill with driver's coalesce settings + * @q_num: queue number to get the coalesce settings for + */ +static int +__ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, + int q_num) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + int tx = -EINVAL, rx = -EINVAL; + struct ice_vsi *vsi = np->vsi; + + if (q_num < 0) { + rx = ice_get_rc_coalesce(ec, ICE_RX_CONTAINER, + &vsi->rx_rings[0]->q_vector->rx); + tx = ice_get_rc_coalesce(ec, ICE_TX_CONTAINER, + &vsi->tx_rings[0]->q_vector->tx); + + goto update_coalesced_frames; + } + + if (q_num < vsi->num_rxq && q_num < vsi->num_txq) { + rx = ice_get_rc_coalesce(ec, ICE_RX_CONTAINER, + &vsi->rx_rings[q_num]->q_vector->rx); + tx = ice_get_rc_coalesce(ec, ICE_TX_CONTAINER, + &vsi->tx_rings[q_num]->q_vector->tx); + } else if (q_num < vsi->num_rxq) { + rx = ice_get_rc_coalesce(ec, ICE_RX_CONTAINER, + &vsi->rx_rings[q_num]->q_vector->rx); + } else if (q_num < vsi->num_txq) { + tx = ice_get_rc_coalesce(ec, ICE_TX_CONTAINER, + &vsi->tx_rings[q_num]->q_vector->tx); + } else { + /* q_num is invalid for both Rx and Tx queues */ + return -EINVAL; + } + +update_coalesced_frames: + /* either q_num is invalid for both Rx and Tx queues or setting coalesce + * failed completely + */ + if (tx && rx) + return -EINVAL; + + if (q_num < vsi->num_txq) + ec->tx_max_coalesced_frames_irq = vsi->work_lmt; + + if (q_num < vsi->num_rxq) + ec->rx_max_coalesced_frames_irq = vsi->work_lmt; + + return 0; +} + +static int +ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) +{ + return __ice_get_coalesce(netdev, ec, -1); +} + +static int ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, + struct ethtool_coalesce *ec) +{ + return __ice_get_coalesce(netdev, ec, q_num); +} + +/** + * ice_set_rc_coalesce - set ITR values for specific ring container + * @c_type: container type, RX or TX + * @ec: ethtool structure from user to update ITR settings + * @rc: ring container that the ITR values will come from + * @vsi: VSI associated to the ring container + * + * Set specific ITR values. This is done per ice_ring_container because each + * q_vector can have 1 or more rings and all of said ring(s) will have the same + * ITR values. + * + * Returns 0 on success, negative otherwise. + */ +static int +ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, + struct ice_ring_container *rc, struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + u16 itr_setting; + + if (!rc->ring) + return -EINVAL; + + itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC; + + switch (c_type) { + case ICE_RX_CONTAINER: + if (ec->rx_coalesce_usecs != itr_setting && + ec->use_adaptive_rx_coalesce) { + netdev_info(vsi->netdev, + "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n"); + return -EINVAL; + } + + if (ec->rx_coalesce_usecs > ICE_ITR_MAX) { + netdev_info(vsi->netdev, + "Invalid value, rx-usecs range is 0-%d\n", + ICE_ITR_MAX); + return -EINVAL; + } + + if (ec->use_adaptive_rx_coalesce) { + rc->itr_setting |= ICE_ITR_DYNAMIC; + } else { + rc->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); + rc->target_itr = ITR_TO_REG(rc->itr_setting); + } + break; + case ICE_TX_CONTAINER: + if (ec->tx_coalesce_usecs != itr_setting && + ec->use_adaptive_tx_coalesce) { + netdev_info(vsi->netdev, + "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n"); + return -EINVAL; + } + + if (ec->tx_coalesce_usecs > ICE_ITR_MAX) { + netdev_info(vsi->netdev, + "Invalid value, tx-usecs range is 0-%d\n", + ICE_ITR_MAX); + return -EINVAL; + } + + if (ec->use_adaptive_tx_coalesce) { + rc->itr_setting |= ICE_ITR_DYNAMIC; + } else { + rc->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); + rc->target_itr = ITR_TO_REG(rc->itr_setting); + } + break; + default: + dev_dbg(&pf->pdev->dev, "Invalid container type %d\n", c_type); + return -EINVAL; + } + + return 0; +} + +static int +__ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, + int q_num) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + int rx = -EINVAL, tx = -EINVAL; + struct ice_vsi *vsi = np->vsi; + + if (q_num < 0) { + int i; + + ice_for_each_q_vector(vsi, i) { + struct ice_q_vector *q_vector = vsi->q_vectors[i]; + + if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec, + &q_vector->rx, vsi) || + ice_set_rc_coalesce(ICE_TX_CONTAINER, ec, + &q_vector->tx, vsi)) + return -EINVAL; + } + + goto set_work_lmt; + } + + if (q_num < vsi->num_rxq && q_num < vsi->num_txq) { + rx = ice_set_rc_coalesce(ICE_RX_CONTAINER, ec, + &vsi->rx_rings[q_num]->q_vector->rx, + vsi); + tx = ice_set_rc_coalesce(ICE_TX_CONTAINER, ec, + &vsi->tx_rings[q_num]->q_vector->tx, + vsi); + } else if (q_num < vsi->num_rxq) { + rx = ice_set_rc_coalesce(ICE_RX_CONTAINER, ec, + &vsi->rx_rings[q_num]->q_vector->rx, + vsi); + } else if (q_num < vsi->num_txq) { + tx = ice_set_rc_coalesce(ICE_TX_CONTAINER, ec, + &vsi->tx_rings[q_num]->q_vector->tx, + vsi); + } + + /* either q_num is invalid for both Rx and Tx queues or setting coalesce + * failed completely + */ + if (rx && tx) + return -EINVAL; + +set_work_lmt: + if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) + vsi->work_lmt = max(ec->tx_max_coalesced_frames_irq, + ec->rx_max_coalesced_frames_irq); + + return 0; +} + +static int +ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) +{ + return __ice_set_coalesce(netdev, ec, -1); +} + +static int ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num, + struct ethtool_coalesce *ec) +{ + return __ice_set_coalesce(netdev, ec, q_num); +} + static const struct ethtool_ops ice_ethtool_ops = { .get_link_ksettings = ice_get_link_ksettings, .set_link_ksettings = ice_set_link_ksettings, @@ -1676,8 +2326,15 @@ static const struct ethtool_ops ice_ethtool_ops = { .get_msglevel = ice_get_msglevel, .set_msglevel = ice_set_msglevel, .get_link = ethtool_op_get_link, + .get_eeprom_len = ice_get_eeprom_len, + .get_eeprom = ice_get_eeprom, + .get_coalesce = ice_get_coalesce, + .set_coalesce = ice_set_coalesce, .get_strings = ice_get_strings, + .set_phys_id = ice_set_phys_id, .get_ethtool_stats = ice_get_ethtool_stats, + .get_priv_flags = ice_get_priv_flags, + .set_priv_flags = ice_set_priv_flags, .get_sset_count = ice_get_sset_count, .get_rxnfc = ice_get_rxnfc, .get_ringparam = ice_get_ringparam, @@ -1689,6 +2346,9 @@ static const struct ethtool_ops ice_ethtool_ops = { .get_rxfh_indir_size = ice_get_rxfh_indir_size, .get_rxfh = ice_get_rxfh, .set_rxfh = ice_set_rxfh, + .get_ts_info = ethtool_op_get_ts_info, + .get_per_queue_coalesce = ice_get_per_q_coalesce, + .set_per_queue_coalesce = ice_set_per_q_coalesce, }; /** diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 5507928c8fbe..f9a38f2cd470 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -110,6 +110,7 @@ #define GLINT_DYN_CTL_CLEARPBA_M BIT(1) #define GLINT_DYN_CTL_SWINT_TRIG_M BIT(2) #define GLINT_DYN_CTL_ITR_INDX_S 3 +#define GLINT_DYN_CTL_INTERVAL_S 5 #define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25) #define GLINT_DYN_CTL_INTENA_MSK_M BIT(31) #define GLINT_ITR(_i, _INT) (0x00154000 + ((_i) * 8192 + (_INT) * 4)) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index bb51dd7defb5..ef4c79b5aa32 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -346,6 +346,7 @@ enum ice_tx_desc_cmd_bits { ICE_TX_DESC_CMD_IIPT_IPV4 = 0x0040, /* 2 BITS */ ICE_TX_DESC_CMD_IIPT_IPV4_CSUM = 0x0060, /* 2 BITS */ ICE_TX_DESC_CMD_L4T_EOFT_TCP = 0x0100, /* 2 BITS */ + ICE_TX_DESC_CMD_L4T_EOFT_SCTP = 0x0200, /* 2 BITS */ ICE_TX_DESC_CMD_L4T_EOFT_UDP = 0x0300, /* 2 BITS */ }; @@ -488,5 +489,7 @@ static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype) #define ICE_LINK_SPEED_20000MBPS 20000 #define ICE_LINK_SPEED_25000MBPS 25000 #define ICE_LINK_SPEED_40000MBPS 40000 +#define ICE_LINK_SPEED_50000MBPS 50000 +#define ICE_LINK_SPEED_100000MBPS 100000 #endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 29b1dcfd4331..27c3760ae5cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -514,110 +514,89 @@ unlock_pf: } /** - * ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI - * @vsi: the VSI getting queues + * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI + * @qs_cfg: gathered variables needed for PF->VSI queues assignment * - * Return 0 on success and a negative value on error + * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap */ -static int ice_vsi_get_qs_contig(struct ice_vsi *vsi) +static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg) { - struct ice_pf *pf = vsi->back; - int offset, ret = 0; - - mutex_lock(&pf->avail_q_mutex); - /* look for contiguous block of queues for Tx */ - offset = bitmap_find_next_zero_area(pf->avail_txqs, ICE_MAX_TXQS, - 0, vsi->alloc_txq, 0); - if (offset < ICE_MAX_TXQS) { - int i; + int offset, i; - bitmap_set(pf->avail_txqs, offset, vsi->alloc_txq); - for (i = 0; i < vsi->alloc_txq; i++) - vsi->txq_map[i] = i + offset; - } else { - ret = -ENOMEM; - vsi->tx_mapping_mode = ICE_VSI_MAP_SCATTER; + mutex_lock(qs_cfg->qs_mutex); + offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size, + 0, qs_cfg->q_count, 0); + if (offset >= qs_cfg->pf_map_size) { + mutex_unlock(qs_cfg->qs_mutex); + return -ENOMEM; } - /* look for contiguous block of queues for Rx */ - offset = bitmap_find_next_zero_area(pf->avail_rxqs, ICE_MAX_RXQS, - 0, vsi->alloc_rxq, 0); - if (offset < ICE_MAX_RXQS) { - int i; - - bitmap_set(pf->avail_rxqs, offset, vsi->alloc_rxq); - for (i = 0; i < vsi->alloc_rxq; i++) - vsi->rxq_map[i] = i + offset; - } else { - ret = -ENOMEM; - vsi->rx_mapping_mode = ICE_VSI_MAP_SCATTER; - } - mutex_unlock(&pf->avail_q_mutex); + bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count); + for (i = 0; i < qs_cfg->q_count; i++) + qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset; + mutex_unlock(qs_cfg->qs_mutex); - return ret; + return 0; } /** - * ice_vsi_get_qs_scatter - Assign a scattered queues to VSI - * @vsi: the VSI getting queues + * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI + * @qs_cfg: gathered variables needed for PF->VSI queues assignment * - * Return 0 on success and a negative value on error + * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap */ -static int ice_vsi_get_qs_scatter(struct ice_vsi *vsi) +static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg) { - struct ice_pf *pf = vsi->back; int i, index = 0; - mutex_lock(&pf->avail_q_mutex); - - if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) { - for (i = 0; i < vsi->alloc_txq; i++) { - index = find_next_zero_bit(pf->avail_txqs, - ICE_MAX_TXQS, index); - if (index < ICE_MAX_TXQS) { - set_bit(index, pf->avail_txqs); - vsi->txq_map[i] = index; - } else { - goto err_scatter_tx; - } - } + mutex_lock(qs_cfg->qs_mutex); + for (i = 0; i < qs_cfg->q_count; i++) { + index = find_next_zero_bit(qs_cfg->pf_map, + qs_cfg->pf_map_size, index); + if (index >= qs_cfg->pf_map_size) + goto err_scatter; + set_bit(index, qs_cfg->pf_map); + qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index; } + mutex_unlock(qs_cfg->qs_mutex); - if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) { - for (i = 0; i < vsi->alloc_rxq; i++) { - index = find_next_zero_bit(pf->avail_rxqs, - ICE_MAX_RXQS, index); - if (index < ICE_MAX_RXQS) { - set_bit(index, pf->avail_rxqs); - vsi->rxq_map[i] = index; - } else { - goto err_scatter_rx; - } - } - } - - mutex_unlock(&pf->avail_q_mutex); return 0; - -err_scatter_rx: - /* unflag any queues we have grabbed (i is failed position) */ - for (index = 0; index < i; index++) { - clear_bit(vsi->rxq_map[index], pf->avail_rxqs); - vsi->rxq_map[index] = 0; - } - i = vsi->alloc_txq; -err_scatter_tx: - /* i is either position of failed attempt or vsi->alloc_txq */ +err_scatter: for (index = 0; index < i; index++) { - clear_bit(vsi->txq_map[index], pf->avail_txqs); - vsi->txq_map[index] = 0; + clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map); + qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0; } + mutex_unlock(qs_cfg->qs_mutex); - mutex_unlock(&pf->avail_q_mutex); return -ENOMEM; } /** + * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI + * @qs_cfg: gathered variables needed for PF->VSI queues assignment + * + * This is an internal function for assigning queues from the PF to VSI and + * initially tries to find contiguous space. If it is not successful to find + * contiguous space, then it tries with the scatter approach. + * + * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap + */ +static int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg) +{ + int ret = 0; + + ret = __ice_vsi_get_qs_contig(qs_cfg); + if (ret) { + /* contig failed, so try with scatter approach */ + qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER; + qs_cfg->q_count = min_t(u16, qs_cfg->q_count, + qs_cfg->scatter_count); + ret = __ice_vsi_get_qs_sc(qs_cfg); + } + return ret; +} + +/** * ice_vsi_get_qs - Assign queues from PF to VSI * @vsi: the VSI to assign queues to * @@ -625,25 +604,35 @@ err_scatter_tx: */ static int ice_vsi_get_qs(struct ice_vsi *vsi) { + struct ice_pf *pf = vsi->back; + struct ice_qs_cfg tx_qs_cfg = { + .qs_mutex = &pf->avail_q_mutex, + .pf_map = pf->avail_txqs, + .pf_map_size = ICE_MAX_TXQS, + .q_count = vsi->alloc_txq, + .scatter_count = ICE_MAX_SCATTER_TXQS, + .vsi_map = vsi->txq_map, + .vsi_map_offset = 0, + .mapping_mode = vsi->tx_mapping_mode + }; + struct ice_qs_cfg rx_qs_cfg = { + .qs_mutex = &pf->avail_q_mutex, + .pf_map = pf->avail_rxqs, + .pf_map_size = ICE_MAX_RXQS, + .q_count = vsi->alloc_rxq, + .scatter_count = ICE_MAX_SCATTER_RXQS, + .vsi_map = vsi->rxq_map, + .vsi_map_offset = 0, + .mapping_mode = vsi->rx_mapping_mode + }; int ret = 0; vsi->tx_mapping_mode = ICE_VSI_MAP_CONTIG; vsi->rx_mapping_mode = ICE_VSI_MAP_CONTIG; - /* NOTE: ice_vsi_get_qs_contig() will set the Rx/Tx mapping - * modes individually to scatter if assigning contiguous queues - * to Rx or Tx fails - */ - ret = ice_vsi_get_qs_contig(vsi); - if (ret < 0) { - if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) - vsi->alloc_txq = max_t(u16, vsi->alloc_txq, - ICE_MAX_SCATTER_TXQS); - if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) - vsi->alloc_rxq = max_t(u16, vsi->alloc_rxq, - ICE_MAX_SCATTER_RXQS); - ret = ice_vsi_get_qs_scatter(vsi); - } + ret = __ice_vsi_get_qs(&tx_qs_cfg); + if (!ret) + ret = __ice_vsi_get_qs(&rx_qs_cfg); return ret; } @@ -1614,11 +1603,14 @@ setup_rings: /** * ice_vsi_cfg_txqs - Configure the VSI for Tx * @vsi: the VSI being configured + * @rings: Tx ring array to be configured + * @offset: offset within vsi->txq_map * * Return 0 on success and a negative value on error * Configure the Tx VSI for operation. */ -int ice_vsi_cfg_txqs(struct ice_vsi *vsi) +static int +ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset) { struct ice_aqc_add_tx_qgrp *qg_buf; struct ice_aqc_add_txqs_perq *txq; @@ -1626,7 +1618,7 @@ int ice_vsi_cfg_txqs(struct ice_vsi *vsi) u8 num_q_grps, q_idx = 0; enum ice_status status; u16 buf_len, i, pf_q; - int err = 0, tc = 0; + int err = 0, tc; buf_len = sizeof(struct ice_aqc_add_tx_qgrp); qg_buf = devm_kzalloc(&pf->pdev->dev, buf_len, GFP_KERNEL); @@ -1644,9 +1636,8 @@ int ice_vsi_cfg_txqs(struct ice_vsi *vsi) for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) { struct ice_tlan_ctx tlan_ctx = { 0 }; - pf_q = vsi->txq_map[q_idx]; - ice_setup_tx_ctx(vsi->tx_rings[q_idx], &tlan_ctx, - pf_q); + pf_q = vsi->txq_map[q_idx + offset]; + ice_setup_tx_ctx(rings[q_idx], &tlan_ctx, pf_q); /* copy context contents into the qg_buf */ qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, @@ -1655,7 +1646,7 @@ int ice_vsi_cfg_txqs(struct ice_vsi *vsi) /* init queue specific tail reg. It is referred as * transmit comm scheduler queue doorbell. */ - vsi->tx_rings[q_idx]->tail = + rings[q_idx]->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q); status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, num_q_grps, qg_buf, buf_len, @@ -1674,7 +1665,7 @@ int ice_vsi_cfg_txqs(struct ice_vsi *vsi) */ txq = &qg_buf->txqs[0]; if (pf_q == le16_to_cpu(txq->txq_id)) - vsi->tx_rings[q_idx]->txq_teid = + rings[q_idx]->txq_teid = le32_to_cpu(txq->q_teid); q_idx++; @@ -1686,6 +1677,18 @@ err_cfg_txqs: } /** + * ice_vsi_cfg_lan_txqs - Configure the VSI for Tx + * @vsi: the VSI being configured + * + * Return 0 on success and a negative value on error + * Configure the Tx VSI for operation. + */ +int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi) +{ + return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, 0); +} + +/** * ice_intrl_usec_to_reg - convert interrupt rate limit to register value * @intrl: interrupt rate limit in usecs * @gran: interrupt rate limit granularity in usecs @@ -1714,22 +1717,34 @@ static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) static void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector, u16 vector) { - u8 itr_gran = hw->itr_gran; - if (q_vector->num_ring_rx) { struct ice_ring_container *rc = &q_vector->rx; - rc->itr = ITR_TO_REG(ICE_DFLT_RX_ITR, itr_gran); + /* if this value is set then don't overwrite with default */ + if (!rc->itr_setting) + rc->itr_setting = ICE_DFLT_RX_ITR; + + rc->target_itr = ITR_TO_REG(rc->itr_setting); + rc->next_update = jiffies + 1; + rc->current_itr = rc->target_itr; rc->latency_range = ICE_LOW_LATENCY; - wr32(hw, GLINT_ITR(rc->itr_idx, vector), rc->itr); + wr32(hw, GLINT_ITR(rc->itr_idx, vector), + ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); } if (q_vector->num_ring_tx) { struct ice_ring_container *rc = &q_vector->tx; - rc->itr = ITR_TO_REG(ICE_DFLT_TX_ITR, itr_gran); + /* if this value is set then don't overwrite with default */ + if (!rc->itr_setting) + rc->itr_setting = ICE_DFLT_TX_ITR; + + rc->target_itr = ITR_TO_REG(rc->itr_setting); + rc->next_update = jiffies + 1; + rc->current_itr = rc->target_itr; rc->latency_range = ICE_LOW_LATENCY; - wr32(hw, GLINT_ITR(rc->itr_idx, vector), rc->itr); + wr32(hw, GLINT_ITR(rc->itr_idx, vector), + ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); } } @@ -1897,9 +1912,12 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi) * @vsi: the VSI being configured * @rst_src: reset source * @rel_vmvf_num: Relative id of VF/VM + * @rings: Tx ring array to be stopped + * @offset: offset within vsi->txq_map */ -int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num) +static int +ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, + u16 rel_vmvf_num, struct ice_ring **rings, int offset) { struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; @@ -1927,19 +1945,18 @@ int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, ice_for_each_txq(vsi, i) { u16 v_idx; - if (!vsi->tx_rings || !vsi->tx_rings[i] || - !vsi->tx_rings[i]->q_vector) { + if (!rings || !rings[i] || !rings[i]->q_vector) { err = -EINVAL; goto err_out; } - q_ids[i] = vsi->txq_map[i]; - q_teids[i] = vsi->tx_rings[i]->txq_teid; + q_ids[i] = vsi->txq_map[i + offset]; + q_teids[i] = rings[i]->txq_teid; /* clear cause_ena bit for disabled queues */ - val = rd32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx)); + val = rd32(hw, QINT_TQCTL(rings[i]->reg_idx)); val &= ~QINT_TQCTL_CAUSE_ENA_M; - wr32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val); + wr32(hw, QINT_TQCTL(rings[i]->reg_idx), val); /* software is expected to wait for 100 ns */ ndelay(100); @@ -1947,7 +1964,7 @@ int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, /* trigger a software interrupt for the vector associated to * the queue to schedule NAPI handler */ - v_idx = vsi->tx_rings[i]->q_vector->v_idx; + v_idx = rings[i]->q_vector->v_idx; wr32(hw, GLINT_DYN_CTL(vsi->hw_base_vector + v_idx), GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M); } @@ -1977,6 +1994,19 @@ err_alloc_q_ids: } /** + * ice_vsi_stop_lan_tx_rings - Disable LAN Tx rings + * @vsi: the VSI being configured + * @rst_src: reset source + * @rel_vmvf_num: Relative id of VF/VM + */ +int ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, + enum ice_disq_rst_src rst_src, u16 rel_vmvf_num) +{ + return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, + 0); +} + +/** * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI * @vsi: VSI to enable or disable VLAN pruning on * @ena: set to true to enable VLAN pruning and false to disable it @@ -2581,6 +2611,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) goto err_vectors; ice_vsi_map_rings_to_vectors(vsi); + /* Do not exit if configuring RSS had an issue, at least + * receive traffic on first queue. Hence no need to capture + * return value + */ + if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) + ice_vsi_cfg_rss_lut_key(vsi); break; case ICE_VSI_VF: ret = ice_vsi_alloc_q_vectors(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 3831b4f0960a..7988a53729a9 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -15,7 +15,7 @@ void ice_update_eth_stats(struct ice_vsi *vsi); int ice_vsi_cfg_rxqs(struct ice_vsi *vsi); -int ice_vsi_cfg_txqs(struct ice_vsi *vsi); +int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi); void ice_vsi_cfg_msix(struct ice_vsi *vsi); @@ -31,7 +31,8 @@ int ice_vsi_start_rx_rings(struct ice_vsi *vsi); int ice_vsi_stop_rx_rings(struct ice_vsi *vsi); -int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, +int +ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, u16 rel_vmvf_num); int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 8725569d11f0..48f033928aa2 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1389,7 +1389,6 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; int oicr_idx, err = 0; - u8 itr_gran; u32 val; if (!pf->int_name[0]) @@ -1453,10 +1452,8 @@ skip_req_irq: PFINT_MBX_CTL_CAUSE_ENA_M); wr32(hw, PFINT_MBX_CTL, val); - itr_gran = hw->itr_gran; - wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->hw_oicr_idx), - ITR_TO_REG(ICE_ITR_8K, itr_gran)); + ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S); ice_flush(hw); ice_irq_dynamic_ena(hw, NULL, NULL); @@ -1531,6 +1528,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) csumo_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | + NETIF_F_SCTP_CRC | NETIF_F_IPV6_CSUM; vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER | @@ -1998,6 +1996,23 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) } /** + * ice_verify_itr_gran - verify driver's assumption of ITR granularity + * @pf: pointer to the PF structure + * + * There is no error returned here because the driver will be able to handle a + * different ITR granularity, but interrupt moderation will not be accurate if + * the driver's assumptions are not verified. This assumption is made so we can + * use constants in the hot path instead of accessing structure members. + */ +static void ice_verify_itr_gran(struct ice_pf *pf) +{ + if (pf->hw.itr_gran != (ICE_ITR_GRAN_S << 1)) + dev_warn(&pf->pdev->dev, + "%d ITR granularity assumption is invalid, actual ITR granularity is %d. Interrupt moderation will be inaccurate!\n", + (ICE_ITR_GRAN_S << 1), pf->hw.itr_gran); +} + +/** * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines * @pf: pointer to the PF structure * @@ -2163,6 +2178,7 @@ static int ice_probe(struct pci_dev *pdev, mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); ice_verify_cacheline_size(pf); + ice_verify_itr_gran(pf); return 0; @@ -2422,7 +2438,8 @@ static void ice_set_rx_mode(struct net_device *netdev) */ static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[], struct net_device *dev, const unsigned char *addr, - u16 vid, u16 flags) + u16 vid, u16 flags, + struct netlink_ext_ack *extack) { int err; @@ -2546,7 +2563,8 @@ static int ice_vsi_cfg(struct ice_vsi *vsi) if (err) return err; } - err = ice_vsi_cfg_txqs(vsi); + + err = ice_vsi_cfg_lan_txqs(vsi); if (!err) err = ice_vsi_cfg_rxqs(vsi); @@ -2945,12 +2963,91 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) } /** + * ice_force_phys_link_state - Force the physical link state + * @vsi: VSI to force the physical link state to up/down + * @link_up: true/false indicates to set the physical link to up/down + * + * Force the physical link state by getting the current PHY capabilities from + * hardware and setting the PHY config based on the determined capabilities. If + * link changes a link event will be triggered because both the Enable Automatic + * Link Update and LESM Enable bits are set when setting the PHY capabilities. + * + * Returns 0 on success, negative on failure + */ +static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) +{ + struct ice_aqc_get_phy_caps_data *pcaps; + struct ice_aqc_set_phy_cfg_data *cfg; + struct ice_port_info *pi; + struct device *dev; + int retcode; + + if (!vsi || !vsi->port_info || !vsi->back) + return -EINVAL; + if (vsi->type != ICE_VSI_PF) + return 0; + + dev = &vsi->back->pdev->dev; + + pi = vsi->port_info; + + pcaps = devm_kzalloc(dev, sizeof(*pcaps), GFP_KERNEL); + if (!pcaps) + return -ENOMEM; + + retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + NULL); + if (retcode) { + dev_err(dev, + "Failed to get phy capabilities, VSI %d error %d\n", + vsi->vsi_num, retcode); + retcode = -EIO; + goto out; + } + + /* No change in link */ + if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) && + link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) + goto out; + + cfg = devm_kzalloc(dev, sizeof(*cfg), GFP_KERNEL); + if (!cfg) { + retcode = -ENOMEM; + goto out; + } + + cfg->phy_type_low = pcaps->phy_type_low; + cfg->phy_type_high = pcaps->phy_type_high; + cfg->caps = pcaps->caps | ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; + cfg->low_power_ctrl = pcaps->low_power_ctrl; + cfg->eee_cap = pcaps->eee_cap; + cfg->eeer_value = pcaps->eeer_value; + cfg->link_fec_opt = pcaps->link_fec_options; + if (link_up) + cfg->caps |= ICE_AQ_PHY_ENA_LINK; + else + cfg->caps &= ~ICE_AQ_PHY_ENA_LINK; + + retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi->lport, cfg, NULL); + if (retcode) { + dev_err(dev, "Failed to set phy config, VSI %d error %d\n", + vsi->vsi_num, retcode); + retcode = -EIO; + } + + devm_kfree(dev, cfg); +out: + devm_kfree(dev, pcaps); + return retcode; +} + +/** * ice_down - Shutdown the connection * @vsi: The VSI being stopped */ int ice_down(struct ice_vsi *vsi) { - int i, tx_err, rx_err; + int i, tx_err, rx_err, link_err = 0; /* Caller of this function is expected to set the * vsi->state __ICE_DOWN bit @@ -2961,7 +3058,8 @@ int ice_down(struct ice_vsi *vsi) } ice_vsi_dis_irq(vsi); - tx_err = ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0); + + tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0); if (tx_err) netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n", @@ -2975,13 +3073,21 @@ int ice_down(struct ice_vsi *vsi) ice_napi_disable_all(vsi); + if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) { + link_err = ice_force_phys_link_state(vsi, false); + if (link_err) + netdev_err(vsi->netdev, + "Failed to set physical link down, VSI %d error %d\n", + vsi->vsi_num, link_err); + } + ice_for_each_txq(vsi, i) ice_clean_tx_ring(vsi->tx_rings[i]); ice_for_each_rxq(vsi, i) ice_clean_rx_ring(vsi->rx_rings[i]); - if (tx_err || rx_err) { + if (tx_err || rx_err || link_err) { netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n", vsi->vsi_num, vsi->vsw->sw_id); @@ -3641,7 +3747,8 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) */ static int ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, - u16 __always_unused flags, struct netlink_ext_ack *extack) + u16 __always_unused flags, + struct netlink_ext_ack __always_unused *extack) { struct ice_netdev_priv *np = netdev_priv(dev); struct ice_pf *pf = np->vsi->back; @@ -3814,8 +3921,14 @@ static int ice_open(struct net_device *netdev) netif_carrier_off(netdev); - err = ice_vsi_open(vsi); + err = ice_force_phys_link_state(vsi, true); + if (err) { + netdev_err(netdev, + "Failed to set physical link up, error %d\n", err); + return err; + } + err = ice_vsi_open(vsi); if (err) netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n", vsi->vsi_num, vsi->vsw->sw_id); diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 3274c543283c..ce64cecdae9c 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -125,6 +125,62 @@ ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data) } /** + * ice_read_sr_buf_aq - Reads Shadow RAM buf via AQ + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) + * @words: (in) number of words to read; (out) number of words actually read + * @data: words read from the Shadow RAM + * + * Reads 16 bit words (data buf) from the SR using the ice_read_sr_aq + * method. Ownership of the NVM is taken before reading the buffer and later + * released. + */ +static enum ice_status +ice_read_sr_buf_aq(struct ice_hw *hw, u16 offset, u16 *words, u16 *data) +{ + enum ice_status status; + bool last_cmd = false; + u16 words_read = 0; + u16 i = 0; + + do { + u16 read_size, off_w; + + /* Calculate number of bytes we should read in this step. + * It's not allowed to read more than one page at a time or + * to cross page boundaries. + */ + off_w = offset % ICE_SR_SECTOR_SIZE_IN_WORDS; + read_size = off_w ? + min(*words, + (u16)(ICE_SR_SECTOR_SIZE_IN_WORDS - off_w)) : + min((*words - words_read), ICE_SR_SECTOR_SIZE_IN_WORDS); + + /* Check if this is last command, if so set proper flag */ + if ((words_read + read_size) >= *words) + last_cmd = true; + + status = ice_read_sr_aq(hw, offset, read_size, + data + words_read, last_cmd); + if (status) + goto read_nvm_buf_aq_exit; + + /* Increment counter for words already read and move offset to + * new read location + */ + words_read += read_size; + offset += read_size; + } while (words_read < *words); + + for (i = 0; i < *words; i++) + data[i] = le16_to_cpu(((__le16 *)data)[i]); + +read_nvm_buf_aq_exit: + *words = words_read; + return status; +} + +/** * ice_acquire_nvm - Generic request for acquiring the NVM ownership * @hw: pointer to the HW structure * @access: NVM access type (read or write) @@ -234,3 +290,28 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) return status; } + +/** + * ice_read_sr_buf - Reads Shadow RAM buf and acquire lock if necessary + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) + * @words: (in) number of words to read; (out) number of words actually read + * @data: words read from the Shadow RAM + * + * Reads 16 bit words (data buf) from the SR using the ice_read_nvm_buf_aq + * method. The buf read is preceded by the NVM ownership take + * and followed by the release. + */ +enum ice_status +ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data) +{ + enum ice_status status; + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (!status) { + status = ice_read_sr_buf_aq(hw, offset, words, data); + ice_release_nvm(hw); + } + + return status; +} diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index a1681853df2e..fb38e8be1e2e 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -85,37 +85,59 @@ ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid) } /** - * ice_aq_query_sched_elems - query scheduler elements + * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd * @hw: pointer to the hw struct - * @elems_req: number of elements to query + * @cmd_opc: cmd opcode + * @elems_req: number of elements to request * @buf: pointer to buffer * @buf_size: buffer size in bytes - * @elems_ret: returns total number of elements returned + * @elems_resp: returns total number of elements response * @cd: pointer to command details structure or NULL * - * Query scheduling elements (0x0404) + * This function sends a scheduling elements cmd (cmd_opc) */ static enum ice_status -ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req, - struct ice_aqc_get_elem *buf, u16 buf_size, - u16 *elems_ret, struct ice_sq_cd *cd) +ice_aqc_send_sched_elem_cmd(struct ice_hw *hw, enum ice_adminq_opc cmd_opc, + u16 elems_req, void *buf, u16 buf_size, + u16 *elems_resp, struct ice_sq_cd *cd) { - struct ice_aqc_get_cfg_elem *cmd; + struct ice_aqc_sched_elem_cmd *cmd; struct ice_aq_desc desc; enum ice_status status; - cmd = &desc.params.get_update_elem; - ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_sched_elems); + cmd = &desc.params.sched_elem_cmd; + ice_fill_dflt_direct_cmd_desc(&desc, cmd_opc); cmd->num_elem_req = cpu_to_le16(elems_req); desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); - if (!status && elems_ret) - *elems_ret = le16_to_cpu(cmd->num_elem_resp); + if (!status && elems_resp) + *elems_resp = le16_to_cpu(cmd->num_elem_resp); return status; } /** + * ice_aq_query_sched_elems - query scheduler elements + * @hw: pointer to the hw struct + * @elems_req: number of elements to query + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @elems_ret: returns total number of elements returned + * @cd: pointer to command details structure or NULL + * + * Query scheduling elements (0x0404) + */ +static enum ice_status +ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req, + struct ice_aqc_get_elem *buf, u16 buf_size, + u16 *elems_ret, struct ice_sq_cd *cd) +{ + return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_get_sched_elems, + elems_req, (void *)buf, buf_size, + elems_ret, cd); +} + +/** * ice_sched_query_elem - query element information from hw * @hw: pointer to the hw struct * @node_teid: node teid to be queried @@ -218,20 +240,9 @@ ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req, struct ice_aqc_delete_elem *buf, u16 buf_size, u16 *grps_del, struct ice_sq_cd *cd) { - struct ice_aqc_add_move_delete_elem *cmd; - struct ice_aq_desc desc; - enum ice_status status; - - cmd = &desc.params.add_move_delete_elem; - ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_delete_sched_elems); - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); - cmd->num_grps_req = cpu_to_le16(grps_req); - - status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); - if (!status && grps_del) - *grps_del = le16_to_cpu(cmd->num_grps_updated); - - return status; + return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_delete_sched_elems, + grps_req, (void *)buf, buf_size, + grps_del, cd); } /** @@ -442,52 +453,9 @@ ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req, struct ice_aqc_add_elem *buf, u16 buf_size, u16 *grps_added, struct ice_sq_cd *cd) { - struct ice_aqc_add_move_delete_elem *cmd; - struct ice_aq_desc desc; - enum ice_status status; - - cmd = &desc.params.add_move_delete_elem; - ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_sched_elems); - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); - - cmd->num_grps_req = cpu_to_le16(grps_req); - status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); - if (!status && grps_added) - *grps_added = le16_to_cpu(cmd->num_grps_updated); - - return status; -} - -/** - * ice_suspend_resume_elems - suspend/resume scheduler elements - * @hw: pointer to the hw struct - * @elems_req: number of elements to suspend - * @buf: pointer to buffer - * @buf_size: buffer size in bytes - * @elems_ret: returns total number of elements suspended - * @cd: pointer to command details structure or NULL - * @cmd_code: command code for suspend or resume - * - * suspend/resume scheduler elements - */ -static enum ice_status -ice_suspend_resume_elems(struct ice_hw *hw, u16 elems_req, - struct ice_aqc_suspend_resume_elem *buf, u16 buf_size, - u16 *elems_ret, struct ice_sq_cd *cd, - enum ice_adminq_opc cmd_code) -{ - struct ice_aqc_get_cfg_elem *cmd; - struct ice_aq_desc desc; - enum ice_status status; - - cmd = &desc.params.get_update_elem; - ice_fill_dflt_direct_cmd_desc(&desc, cmd_code); - cmd->num_elem_req = cpu_to_le16(elems_req); - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); - status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); - if (!status && elems_ret) - *elems_ret = le16_to_cpu(cmd->num_elem_resp); - return status; + return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_add_sched_elems, + grps_req, (void *)buf, buf_size, + grps_added, cd); } /** @@ -506,8 +474,9 @@ ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req, struct ice_aqc_suspend_resume_elem *buf, u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd) { - return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret, - cd, ice_aqc_opc_suspend_sched_elems); + return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_suspend_sched_elems, + elems_req, (void *)buf, buf_size, + elems_ret, cd); } /** @@ -526,8 +495,9 @@ ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req, struct ice_aqc_suspend_resume_elem *buf, u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd) { - return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret, - cd, ice_aqc_opc_resume_sched_elems); + return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_resume_sched_elems, + elems_req, (void *)buf, buf_size, + elems_ret, cd); } /** @@ -591,23 +561,18 @@ ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids, } /** - * ice_sched_clear_tx_topo - clears the schduler tree nodes - * @pi: port information structure + * ice_sched_clear_agg - clears the agg related information + * @hw: pointer to the hardware structure * - * This function removes all the nodes from HW as well as from SW DB. + * This function removes agg list and free up agg related memory + * previously allocated. */ -static void ice_sched_clear_tx_topo(struct ice_port_info *pi) +void ice_sched_clear_agg(struct ice_hw *hw) { struct ice_sched_agg_info *agg_info; struct ice_sched_agg_info *atmp; - struct ice_hw *hw; - - if (!pi) - return; - - hw = pi->hw; - list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) { + list_for_each_entry_safe(agg_info, atmp, &hw->agg_list, list_entry) { struct ice_sched_agg_vsi_info *agg_vsi_info; struct ice_sched_agg_vsi_info *vtmp; @@ -616,8 +581,21 @@ static void ice_sched_clear_tx_topo(struct ice_port_info *pi) list_del(&agg_vsi_info->list_entry); devm_kfree(ice_hw_to_dev(hw), agg_vsi_info); } + list_del(&agg_info->list_entry); + devm_kfree(ice_hw_to_dev(hw), agg_info); } +} +/** + * ice_sched_clear_tx_topo - clears the scheduler tree nodes + * @pi: port information structure + * + * This function removes all the nodes from HW as well as from SW DB. + */ +static void ice_sched_clear_tx_topo(struct ice_port_info *pi) +{ + if (!pi) + return; if (pi->root) { ice_free_sched_node(pi, pi->root); pi->root = NULL; @@ -1035,7 +1013,6 @@ enum ice_status ice_sched_init_port(struct ice_port_info *pi) /* initialize the port for handling the scheduler tree */ pi->port_state = ICE_SCHED_PORT_STATE_READY; mutex_init(&pi->sched_lock); - INIT_LIST_HEAD(&pi->agg_list); err_init_port: if (status && pi->root) { @@ -1618,7 +1595,8 @@ ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle) struct ice_sched_agg_info *agg_info; struct ice_sched_agg_info *atmp; - list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) { + list_for_each_entry_safe(agg_info, atmp, &pi->hw->agg_list, + list_entry) { struct ice_sched_agg_vsi_info *agg_vsi_info; struct ice_sched_agg_vsi_info *vtmp; diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index da5b4c166da8..bee8221ad146 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -28,6 +28,8 @@ enum ice_status ice_sched_init_port(struct ice_port_info *pi); enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw); void ice_sched_clear_port(struct ice_port_info *pi); void ice_sched_cleanup_all(struct ice_hw *hw); +void ice_sched_clear_agg(struct ice_hw *hw); + struct ice_sched_node * ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid); enum ice_status diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 533b989a23e1..d2db0d04e117 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -85,6 +85,12 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) case ICE_AQ_LINK_SPEED_40GB: speed = ICE_LINK_SPEED_40000MBPS; break; + case ICE_AQ_LINK_SPEED_50GB: + speed = ICE_LINK_SPEED_50000MBPS; + break; + case ICE_AQ_LINK_SPEED_100GB: + speed = ICE_LINK_SPEED_100000MBPS; + break; default: speed = ICE_LINK_SPEED_UNKNOWN; break; @@ -116,6 +122,9 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) break; case ICE_AQ_LINK_SPEED_40GB: /* fall through */ + case ICE_AQ_LINK_SPEED_50GB: + /* fall through */ + case ICE_AQ_LINK_SPEED_100GB: speed = (u32)VIRTCHNL_LINK_SPEED_40GB; break; default: diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 49fc38094185..2357fcac996b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1053,6 +1053,69 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) } /** + * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register + * @itr_idx: interrupt throttling index + * @reg_itr: interrupt throttling value adjusted based on ITR granularity + */ +static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr) +{ + return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | + (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) | + (reg_itr << GLINT_DYN_CTL_INTERVAL_S); +} + +/** + * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt + * @vsi: the VSI associated with the q_vector + * @q_vector: q_vector for which ITR is being updated and interrupt enabled + */ +static void +ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector) +{ + struct ice_hw *hw = &vsi->back->hw; + struct ice_ring_container *rc; + u32 itr_val; + + /* This block of logic allows us to get away with only updating + * one ITR value with each interrupt. The idea is to perform a + * pseudo-lazy update with the following criteria. + * + * 1. Rx is given higher priority than Tx if both are in same state + * 2. If we must reduce an ITR that is given highest priority. + * 3. We then give priority to increasing ITR based on amount. + */ + if (q_vector->rx.target_itr < q_vector->rx.current_itr) { + rc = &q_vector->rx; + /* Rx ITR needs to be reduced, this is highest priority */ + itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); + rc->current_itr = rc->target_itr; + } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || + ((q_vector->rx.target_itr - q_vector->rx.current_itr) < + (q_vector->tx.target_itr - q_vector->tx.current_itr))) { + rc = &q_vector->tx; + /* Tx ITR needs to be reduced, this is second priority + * Tx ITR needs to be increased more than Rx, fourth priority + */ + itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); + rc->current_itr = rc->target_itr; + } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { + rc = &q_vector->rx; + /* Rx ITR needs to be increased, third priority */ + itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); + rc->current_itr = rc->target_itr; + } else { + /* Still have to re-enable the interrupts */ + itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); + } + + if (!test_bit(__ICE_DOWN, vsi->state)) { + int vector = vsi->hw_base_vector + q_vector->v_idx; + + wr32(hw, GLINT_DYN_CTL(vector), itr_val); + } +} + +/** * ice_napi_poll - NAPI polling Rx/Tx cleanup routine * @napi: napi struct with our devices info in it * @budget: amount of work driver is allowed to do this pass, in packets @@ -1108,7 +1171,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) */ if (likely(napi_complete_done(napi, work_done))) if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) - ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector); + ice_update_ena_itr(vsi, q_vector); return min(work_done, budget - 1); } @@ -1402,6 +1465,12 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S; break; case IPPROTO_SCTP: + /* enable SCTP checksum offload */ + cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP; + l4_len = sizeof(struct sctphdr) >> 2; + offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S; + break; + default: if (first->tx_flags & ICE_TX_FLAGS_TSO) return -1; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 75d0eaf6c9dd..fc358ea81816 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -116,16 +116,17 @@ enum ice_rx_dtype { /* indices into GLINT_ITR registers */ #define ICE_RX_ITR ICE_IDX_ITR0 #define ICE_TX_ITR ICE_IDX_ITR1 -#define ICE_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ -#define ICE_ITR_8K 125 +#define ICE_ITR_8K 124 #define ICE_ITR_20K 50 -#define ICE_DFLT_TX_ITR ICE_ITR_20K -#define ICE_DFLT_RX_ITR ICE_ITR_20K -/* apply ITR granularity translation to program the register. itr_gran is either - * 2 or 4 usecs so we need to divide by 2 first then shift by that value - */ -#define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> \ - ((itr_gran) / 2)) +#define ICE_ITR_MAX 8160 +#define ICE_DFLT_TX_ITR (ICE_ITR_20K | ICE_ITR_DYNAMIC) +#define ICE_DFLT_RX_ITR (ICE_ITR_20K | ICE_ITR_DYNAMIC) +#define ICE_ITR_DYNAMIC 0x8000 /* used as flag for itr_setting */ +#define ITR_IS_DYNAMIC(setting) (!!((setting) & ICE_ITR_DYNAMIC)) +#define ITR_TO_REG(setting) ((setting) & ~ICE_ITR_DYNAMIC) +#define ICE_ITR_GRAN_S 1 /* Assume ITR granularity is 2us */ +#define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */ +#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~ICE_ITR_MASK) #define ICE_DFLT_INTRL 0 @@ -180,13 +181,20 @@ enum ice_latency_range { }; struct ice_ring_container { - /* array of pointers to rings */ + /* head of linked-list of rings */ struct ice_ring *ring; + unsigned long next_update; /* jiffies value of next queue update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_pkts; /* total packets processed this int */ enum ice_latency_range latency_range; - int itr_idx; /* index in the interrupt vector */ - u16 itr; + int itr_idx; /* index in the interrupt vector */ + u16 target_itr; /* value in usecs divided by the hw->itr_gran */ + u16 current_itr; /* value in usecs divided by the hw->itr_gran */ + /* high bit set means dynamic ITR, rest is used to store user + * readable ITR value in usecs and must be converted before programming + * to a register. + */ + u16 itr_setting; }; /* iterator for handling rings in ring container */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 0ea428104215..17086d5b5c33 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -90,6 +90,7 @@ enum ice_vsi_type { struct ice_link_status { /* Refer to ice_aq_phy_type for bits definition */ u64 phy_type_low; + u64 phy_type_high; u16 max_frame_size; u16 link_speed; u16 req_speeds; @@ -118,6 +119,7 @@ struct ice_phy_info { struct ice_link_status link_info; struct ice_link_status link_info_old; u64 phy_type_low; + u64 phy_type_high; enum ice_media_type media_type; u8 get_link_info; }; @@ -272,7 +274,6 @@ struct ice_port_info { struct ice_mac_info mac; struct ice_phy_info phy; struct mutex sched_lock; /* protect access to TXSched tree */ - struct list_head agg_list; /* lists all aggregator */ u8 lport; #define ICE_LPORT_MASK 0xff u8 is_vf; @@ -326,6 +327,7 @@ struct ice_hw { u8 max_cgds; u8 sw_entry_point_layer; u16 max_children[ICE_AQC_TOPO_MAX_LEVEL_NUM]; + struct list_head agg_list; /* lists all aggregator */ struct ice_vsi_ctx *vsi_ctx[ICE_MAX_VSI]; u8 evb_veb; /* true for VEB, false for VEPA */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 05ff4f910649..80b50e67cbef 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -224,13 +224,15 @@ void ice_free_vfs(struct ice_pf *pf) /* Avoid wait time by stopping all VFs at the same time */ for (i = 0; i < pf->num_alloc_vfs; i++) { + struct ice_vsi *vsi; + if (!test_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states)) continue; + vsi = pf->vsi[pf->vf[i].lan_vsi_idx]; /* stop rings without wait time */ - ice_vsi_stop_tx_rings(pf->vsi[pf->vf[i].lan_vsi_idx], - ICE_NO_RESET, i); - ice_vsi_stop_rx_rings(pf->vsi[pf->vf[i].lan_vsi_idx]); + ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, i); + ice_vsi_stop_rx_rings(vsi); clear_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states); } @@ -831,6 +833,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) { struct ice_pf *pf = vf->pf; struct ice_hw *hw = &pf->hw; + struct ice_vsi *vsi; bool rsd = false; u32 reg; int i; @@ -843,17 +846,18 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) ice_trigger_vf_reset(vf, is_vflr); + vsi = pf->vsi[vf->lan_vsi_idx]; + if (test_bit(ICE_VF_STATE_ENA, vf->vf_states)) { - ice_vsi_stop_tx_rings(pf->vsi[vf->lan_vsi_idx], ICE_VF_RESET, - vf->vf_id); - ice_vsi_stop_rx_rings(pf->vsi[vf->lan_vsi_idx]); + ice_vsi_stop_lan_tx_rings(vsi, ICE_VF_RESET, vf->vf_id); + ice_vsi_stop_rx_rings(vsi); clear_bit(ICE_VF_STATE_ENA, vf->vf_states); } else { /* Call Disable LAN Tx queue AQ call even when queues are not * enabled. This is needed for successful completiom of VFR */ - ice_dis_vsi_txq(pf->vsi[vf->lan_vsi_idx]->port_info, 0, - NULL, NULL, ICE_VF_RESET, vf->vf_id, NULL); + ice_dis_vsi_txq(vsi->port_info, 0, NULL, NULL, ICE_VF_RESET, + vf->vf_id, NULL); } /* poll VPGEN_VFRSTAT reg to make sure @@ -1614,7 +1618,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - if (ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, vf->vf_id)) { + if (ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id)) { dev_err(&vsi->back->pdev->dev, "Failed to stop tx rings on VSI %d\n", vsi->vsi_num); @@ -1784,7 +1788,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) vsi->num_txq = qci->num_queue_pairs; vsi->num_rxq = qci->num_queue_pairs; - if (!ice_vsi_cfg_txqs(vsi) && !ice_vsi_cfg_rxqs(vsi)) + if (!ice_vsi_cfg_lan_txqs(vsi) && !ice_vsi_cfg_rxqs(vsi)) aq_ret = 0; else aq_ret = ICE_ERR_PARAM; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 7137e7f9c7f3..dfa357b1a9d6 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2486,7 +2486,8 @@ static int igb_set_features(struct net_device *netdev, static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { /* guarantee we can provide a unique filter for the unicast address */ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index daff8183534b..b53087a980ef 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9910,7 +9910,8 @@ static void ixgbe_del_udp_tunnel_port(struct net_device *dev, static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { /* guarantee we can provide a unique filter for the unicast address */ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index a5ab6f3403ae..763ee5281177 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2034,10 +2034,9 @@ static void jme_drop_tx_map(struct jme_adapter *jme, int startidx, int count) ctxbi->len, PCI_DMA_TODEVICE); - ctxbi->mapping = 0; - ctxbi->len = 0; + ctxbi->mapping = 0; + ctxbi->len = 0; } - } static int diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index e65bc3c95630..c19e74e6ac94 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -3274,7 +3274,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n", link_state, slave, port); return -EINVAL; - }; + } s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; s_info->link_state = link_state; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 2df92dbd38e1..4953c852c247 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -820,7 +820,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? "HW" : "SW"); break; - }; + } ++eq->cons_index; eqes_found = 1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9de9abacf7f6..6bb2a860b15b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -22,7 +22,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ # mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ - en_selftest.o en/port.o en/monitor_stats.o + en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8fa8fdd30b85..27e276c9bf84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -388,10 +388,7 @@ struct mlx5e_txqsq { struct mlx5e_channel *channel; int txq_ix; u32 rate_limit; - struct mlx5e_txqsq_recover { - struct work_struct recover_work; - u64 last_recover; - } recover; + struct work_struct recover_work; } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { @@ -682,6 +679,13 @@ struct mlx5e_rss_params { u8 hfunc; }; +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + int rl_update; + int rl_index; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -737,6 +741,7 @@ struct mlx5e_priv { #ifdef CONFIG_MLX5_EN_TLS struct mlx5e_tls *tls; #endif + struct devlink_health_reporter *tx_reporter; }; struct mlx5e_profile { @@ -866,6 +871,11 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p); +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); +void mlx5e_tx_disable_queue(struct netdev_queue *txq); + static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) { return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h new file mode 100644 index 000000000000..2335c5b48820 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5E_EN_REPORTER_H +#define __MLX5E_EN_REPORTER_H + +#include <linux/mlx5/driver.h> +#include "en.h" + +int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); +void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv); +void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq); +void mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c new file mode 100644 index 000000000000..d9675afbb924 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/devlink.h> +#include "reporter.h" +#include "lib/eq.h" + +#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256 + +struct mlx5e_tx_err_ctx { + int (*recover)(struct mlx5e_txqsq *sq); + struct mlx5e_txqsq *sq; +}; + +static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + + while (time_before(jiffies, exp_time)) { + if (sq->cc == sq->pc) + return 0; + + msleep(20); + } + + netdev_err(sq->channel->netdev, + "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", + sq->sqn, sq->cc, sq->pc); + + return -ETIMEDOUT; +} + +static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) +{ + WARN_ONCE(sq->cc != sq->pc, + "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", + sq->sqn, sq->cc, sq->pc); + sq->cc = 0; + sq->dma_fifo_cc = 0; + sq->pc = 0; +} + +static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + struct mlx5e_modify_sq_param msp = {0}; + int err; + + msp.curr_state = curr_state; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); + return err; + } + + return 0; +} + +static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + u8 state; + int err; + + if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + return 0; + + err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", + sq->sqn, err); + return err; + } + + if (state != MLX5_RQC_STATE_ERR) { + netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); + return -EINVAL; + } + + mlx5e_tx_disable_queue(sq->txq); + + err = mlx5e_wait_for_sq_flush(sq); + if (err) + return err; + + /* At this point, no new packets will arrive from the stack as TXQ is + * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all + * pending WQEs. SQ can safely reset the SQ. + */ + + err = mlx5e_sq_to_ready(sq, state); + if (err) + return err; + + mlx5e_reset_txqsq_cc_pc(sq); + sq->stats->recover++; + mlx5e_activate_txqsq(sq); + + return 0; +} + +void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; + struct mlx5e_tx_err_ctx err_ctx = {0}; + + err_ctx.sq = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn); + + devlink_health_report(sq->channel->priv->tx_reporter, err_str, + &err_ctx); +} + +static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq) +{ + struct mlx5_eq_comp *eq = sq->cq.mcq.eq; + u32 eqe_count; + + netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eq->core.eqn, eq->core.cons_index, eq->core.irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + if (!eqe_count) { + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + return 1; + } + + netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n", + eqe_count, eq->core.eqn); + sq->channel->stats->eq_rearm++; + return 0; +} + +void mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_err_ctx err_ctx; + char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; + + err_ctx.sq = sq; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; + sprintf(err_str, + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", + sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - sq->txq->trans_start)); + devlink_health_report(sq->channel->priv->tx_reporter, err_str, + &err_ctx); +} + +/* state lock cannot be grabbed within this function. + * It can cause a dead lock or a read-after-free. + */ +int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx) +{ + return err_ctx->recover(err_ctx->sq); +} + +static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) +{ + int err; + + mutex_lock(&priv->state_lock); + mlx5e_close_locked(priv->netdev); + err = mlx5e_open_locked(priv->netdev); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, + void *context) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_tx_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : + mlx5e_tx_reporter_recover_all(priv); +} + +static int +mlx5e_tx_reporter_build_diagnose_output(struct devlink_health_buffer *buffer, + u32 sqn, u8 state, u8 stopped) +{ + int err, i; + int nest = 0; + char name[20]; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); + if (err) + goto buffer_error; + nest++; + + sprintf(name, "SQ 0x%x", sqn); + err = devlink_health_buffer_put_object_name(buffer, name); + if (err) + goto buffer_error; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_put_object_name(buffer, "HW state"); + if (err) + goto buffer_error; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_put_value_u8(buffer, state); + if (err) + goto buffer_error; + + devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE */ + nest--; + + devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR */ + nest--; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_put_object_name(buffer, "stopped"); + if (err) + goto buffer_error; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_put_value_u8(buffer, stopped); + if (err) + goto buffer_error; + + for (i = 0; i < nest; i++) + devlink_health_buffer_nest_end(buffer); + + return 0; + +buffer_error: + for (i = 0; i < nest; i++) + devlink_health_buffer_nest_cancel(buffer); + return err; +} + +static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_health_buffer **buffers_array, + unsigned int buffer_size, + unsigned int num_buffers) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + unsigned int buff = 0; + int i = 0, err = 0; + + if (buffer_size < MLX5E_TX_REPORTER_PER_SQ_MAX_LEN) + return -ENOMEM; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mutex_unlock(&priv->state_lock); + return 0; + } + + while (i < priv->channels.num * priv->channels.params.num_tc) { + struct mlx5e_txqsq *sq = priv->txq2sq[i]; + u8 state; + + err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); + if (err) + break; + + err = mlx5e_tx_reporter_build_diagnose_output(buffers_array[buff], + sq->sqn, state, + netif_xmit_stopped(sq->txq)); + if (err) { + if (++buff == num_buffers) + break; + } else { + i++; + } + } + + mutex_unlock(&priv->state_lock); + return err; +} + +static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { + .name = "TX", + .recover = mlx5e_tx_reporter_recover, + .diagnose_size = MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC * + MLX5E_TX_REPORTER_PER_SQ_MAX_LEN, + .diagnose = mlx5e_tx_reporter_diagnose, + .dump_size = 0, + .dump = NULL, +}; + +#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 +int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct devlink *devlink = priv_to_devlink(mdev); + + priv->tx_reporter = + devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, + MLX5_REPORTER_TX_GRACEFUL_PERIOD, + true, priv); + return PTR_ERR_OR_ZERO(priv->tx_reporter); +} + +void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) +{ + devlink_health_reporter_destroy(priv->tx_reporter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8cfd2ec7c0a2..dee0c8f3d4e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -51,6 +51,7 @@ #include "en/xdp.h" #include "lib/eq.h" #include "en/monitor_stats.h" +#include "en/reporter.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -1160,7 +1161,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } -static void mlx5e_sq_recover(struct work_struct *work); +static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, struct mlx5e_params *params, @@ -1182,7 +1183,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; - INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover); + INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) @@ -1270,15 +1271,8 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, return err; } -struct mlx5e_modify_sq_param { - int curr_state; - int next_state; - bool rl_update; - int rl_index; -}; - -static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, - struct mlx5e_modify_sq_param *p) +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) { void *in; void *sqc; @@ -1376,17 +1370,7 @@ err_free_txqsq: return err; } -static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) -{ - WARN_ONCE(sq->cc != sq->pc, - "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", - sq->sqn, sq->cc, sq->pc); - sq->cc = 0; - sq->dma_fifo_cc = 0; - sq->pc = 0; -} - -static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); @@ -1395,7 +1379,7 @@ static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) netif_tx_start_queue(sq->txq); } -static inline void netif_tx_disable_queue(struct netdev_queue *txq) +void mlx5e_tx_disable_queue(struct netdev_queue *txq) { __netif_tx_lock_bh(txq); netif_tx_stop_queue(txq); @@ -1411,7 +1395,7 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) /* prevent netif_tx_wake_queue */ napi_synchronize(&c->napi); - netif_tx_disable_queue(sq->txq); + mlx5e_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { @@ -1431,6 +1415,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) struct mlx5_rate_limit rl = {0}; cancel_work_sync(&sq->dim.work); + cancel_work_sync(&sq->recover_work); mlx5e_destroy_sq(mdev, sq->sqn); if (sq->rate_limit) { rl.rate = sq->rate_limit; @@ -1440,105 +1425,15 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) mlx5e_free_txqsq(sq); } -static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) -{ - unsigned long exp_time = jiffies + msecs_to_jiffies(2000); - - while (time_before(jiffies, exp_time)) { - if (sq->cc == sq->pc) - return 0; - - msleep(20); - } - - netdev_err(sq->channel->netdev, - "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", - sq->sqn, sq->cc, sq->pc); - - return -ETIMEDOUT; -} - -static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) { - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - struct mlx5e_modify_sq_param msp = {0}; - int err; + struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, + recover_work); - msp.curr_state = curr_state; - msp.next_state = MLX5_SQC_STATE_RST; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); - return err; - } - - memset(&msp, 0, sizeof(msp)); - msp.curr_state = MLX5_SQC_STATE_RST; - msp.next_state = MLX5_SQC_STATE_RDY; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); - return err; - } - - return 0; -} - -static void mlx5e_sq_recover(struct work_struct *work) -{ - struct mlx5e_txqsq_recover *recover = - container_of(work, struct mlx5e_txqsq_recover, - recover_work); - struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq, - recover); - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - u8 state; - int err; - - err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); - if (err) { - netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", - sq->sqn, err); - return; - } - - if (state != MLX5_RQC_STATE_ERR) { - netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); - return; - } - - netif_tx_disable_queue(sq->txq); - - if (mlx5e_wait_for_sq_flush(sq)) + if (!sq->channel->priv->tx_reporter) return; - /* If the interval between two consecutive recovers per SQ is too - * short, don't recover to avoid infinite loop of ERR_CQE -> recover. - * If we reached this state, there is probably a bug that needs to be - * fixed. let's keep the queue close and let tx timeout cleanup. - */ - if (jiffies_to_msecs(jiffies - recover->last_recover) < - MLX5E_SQ_RECOVER_MIN_INTERVAL) { - netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n", - sq->sqn); - return; - } - - /* At this point, no new packets will arrive from the stack as TXQ is - * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all - * pending WQEs. SQ can safely reset the SQ. - */ - if (mlx5e_sq_to_ready(sq, state)) - return; - - mlx5e_reset_txqsq_cc_pc(sq); - sq->stats->recover++; - recover->last_recover = jiffies; - mlx5e_activate_txqsq(sq); + mlx5e_tx_reporter_err_cqe(sq); } static int mlx5e_open_icosq(struct mlx5e_channel *c, @@ -3207,6 +3102,7 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; + mlx5e_tx_reporter_destroy(priv); for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); } @@ -4178,31 +4074,14 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } -static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, - struct mlx5e_txqsq *sq) -{ - struct mlx5_eq_comp *eq = sq->cq.mcq.eq; - u32 eqe_count; - - netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eq->core.eqn, eq->core.cons_index, eq->core.irqn); - - eqe_count = mlx5_eq_poll_irq_disabled(eq); - if (!eqe_count) - return false; - - netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn); - sq->channel->stats->eq_rearm++; - return true; -} - static void mlx5e_tx_timeout_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, tx_timeout_work); - struct net_device *dev = priv->netdev; - bool reopen_channels = false; - int i, err; + int i; + + if (!priv->tx_reporter) + return; rtnl_lock(); mutex_lock(&priv->state_lock); @@ -4211,36 +4090,16 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) goto unlock; for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); + struct netdev_queue *dev_queue = + netdev_get_tx_queue(priv->netdev, i); struct mlx5e_txqsq *sq = priv->txq2sq[i]; if (!netif_xmit_stopped(dev_queue)) continue; - netdev_err(dev, - "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", - i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, - jiffies_to_usecs(jiffies - dev_queue->trans_start)); - - /* If we recover a lost interrupt, most likely TX timeout will - * be resolved, skip reopening channels - */ - if (!mlx5e_tx_timeout_eq_recover(dev, sq)) { - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - reopen_channels = true; - } + mlx5e_tx_reporter_timeout(sq); } - if (!reopen_channels) - goto unlock; - - mlx5e_close_locked(dev); - err = mlx5e_open_locked(dev); - if (err) - netdev_err(priv->netdev, - "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", - err); - unlock: mutex_unlock(&priv->state_lock); rtnl_unlock(); @@ -4908,6 +4767,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); #endif + mlx5e_tx_reporter_create(priv); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 598ad7e4d5c9..a8e052a5ce36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -514,7 +514,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) mlx5e_dump_error_cqe(sq, (struct mlx5_err_cqe *)cqe); queue_work(cq->channel->priv->wq, - &sq->recover.recover_work); + &sq->recover_work); } stats->cqe_err++; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 9b48dffc9f63..5f8066ab7d40 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5666,6 +5666,8 @@ enum mlxsw_reg_ritr_loopback_protocol { MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4, /* IPinIP IPv6 underlay Unicast */ MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6, + /* IPinIP generic - used for Spectrum-2 underlay RIF */ + MLXSW_REG_RITR_LOOPBACK_GENERIC, }; /* reg_ritr_loopback_protocol @@ -5706,6 +5708,13 @@ MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4); */ MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16); +/* reg_ritr_loopback_ipip_underlay_rif + * Underlay ingress router interface. + * Reserved for Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_underlay_rif, 0x14, 0, 16); + /* reg_ritr_loopback_ipip_usip* * Encapsulation Underlay source IP. * Access: RW @@ -5821,11 +5830,12 @@ static inline void mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload, enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, enum mlxsw_reg_ritr_loopback_ipip_options options, - u16 uvr_id, u32 gre_key) + u16 uvr_id, u16 underlay_rif, u32 gre_key) { mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type); mlxsw_reg_ritr_loopback_ipip_options_set(payload, options); mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id); + mlxsw_reg_ritr_loopback_ipip_underlay_rif_set(payload, underlay_rif); mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key); } @@ -5833,12 +5843,12 @@ static inline void mlxsw_reg_ritr_loopback_ipip4_pack(char *payload, enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, enum mlxsw_reg_ritr_loopback_ipip_options options, - u16 uvr_id, u32 usip, u32 gre_key) + u16 uvr_id, u16 underlay_rif, u32 usip, u32 gre_key) { mlxsw_reg_ritr_loopback_protocol_set(payload, MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4); mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options, - uvr_id, gre_key); + uvr_id, underlay_rif, gre_key); mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip); } @@ -7200,6 +7210,13 @@ MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4); */ MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24); +/* reg_rtdp_egress_router_interface + * Underlay egress router interface. + * Valid range is from 0 to cap_max_router_interfaces - 1 + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, egress_router_interface, 0x40, 0, 16); + /* IPinIP */ /* reg_rtdp_ipip_irif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 32519c93df17..a88169738b4a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4094,6 +4094,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops; mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr; mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask; + mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -4110,6 +4111,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; + mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a1c32a81b011..1fa5c81b209f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -75,6 +75,11 @@ enum mlxsw_sp_rif_type { MLXSW_SP_RIF_TYPE_MAX, }; +struct mlxsw_sp_rif_ops; + +extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[]; +extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[]; + enum mlxsw_sp_fid_type { MLXSW_SP_FID_TYPE_8021Q, MLXSW_SP_FID_TYPE_8021D, @@ -161,6 +166,7 @@ struct mlxsw_sp { const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops; const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; const struct mlxsw_sp_nve_ops **nve_ops_arr; + const struct mlxsw_sp_rif_ops **rif_ops_arr; }; static inline struct mlxsw_sp_upper * diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c index 505b87846acc..f5c381dcb015 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c @@ -234,8 +234,8 @@ mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks) * is 2^ACL_MAX_BF_LOG */ bf_bank_size = 1 << MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_BF_LOG); - bf = kzalloc(sizeof(*bf) + bf_bank_size * num_erp_banks * - sizeof(*bf->refcnt), GFP_KERNEL); + bf = kzalloc(struct_size(bf, refcnt, bf_bank_size * num_erp_banks), + GFP_KERNEL); if (!bf) return ERR_PTR(-ENOMEM); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 41e607a14846..49933818c6f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -220,7 +220,7 @@ start_again: for (; i < rif_count; i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); - if (!rif) + if (!rif || !mlxsw_sp_rif_dev(rif)) continue; err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif, counters_enabled); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 00db26c96bf5..6400cd644b7a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -145,6 +145,7 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb); char rtdp_pl[MLXSW_REG_RTDP_LEN]; struct ip_tunnel_parm parms; unsigned int type_check; @@ -157,6 +158,7 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, ikey = mlxsw_sp_ipip_parms4_ikey(parms); mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); + mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_id); type_check = has_ikey ? MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY : diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c index fb1c48c698f2..1df164a4b06d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -267,8 +267,8 @@ mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nve_mc_record *mc_record; int err; - mc_record = kzalloc(sizeof(*mc_record) + num_max_entries * - sizeof(struct mlxsw_sp_nve_mc_entry), GFP_KERNEL); + mc_record = kzalloc(struct_size(mc_record, entries, num_max_entries), + GFP_KERNEL); if (!mc_record) return ERR_PTR(-ENOMEM); @@ -841,11 +841,9 @@ int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, nve->config = config; - err = ops->fdb_replay(params->dev, params->vni); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Failed to offload the FDB"); + err = ops->fdb_replay(params->dev, params->vni, extack); + if (err) goto err_fdb_replay; - } return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h index 02937ea95bc3..20d99b41611d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h @@ -41,7 +41,8 @@ struct mlxsw_sp_nve_ops { int (*init)(struct mlxsw_sp_nve *nve, const struct mlxsw_sp_nve_config *config); void (*fini)(struct mlxsw_sp_nve *nve); - int (*fdb_replay)(const struct net_device *nve_dev, __be32 vni); + int (*fdb_replay)(const struct net_device *nve_dev, __be32 vni, + struct netlink_ext_ack *extack); void (*fdb_clear_offload)(const struct net_device *nve_dev, __be32 vni); }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c index 74e564c4ac19..9ba0b83bd949 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -212,11 +212,13 @@ static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve) } static int -mlxsw_sp_nve_vxlan_fdb_replay(const struct net_device *nve_dev, __be32 vni) +mlxsw_sp_nve_vxlan_fdb_replay(const struct net_device *nve_dev, __be32 vni, + struct netlink_ext_ack *extack) { if (WARN_ON(!netif_is_vxlan(nve_dev))) return -EINVAL; - return vxlan_fdb_replay(nve_dev, vni, &mlxsw_sp_switchdev_notifier); + return vxlan_fdb_replay(nve_dev, vni, &mlxsw_sp_switchdev_notifier, + extack); } static void diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 98e5ffd71b91..0949404a28e5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -80,7 +80,7 @@ struct mlxsw_sp_router { struct mlxsw_sp_rif { struct list_head nexthop_list; struct list_head neigh_list; - struct net_device *dev; + struct net_device *dev; /* NULL for underlay RIF */ struct mlxsw_sp_fid *fid; unsigned char addr[ETH_ALEN]; int mtu; @@ -120,6 +120,7 @@ struct mlxsw_sp_rif_ipip_lb { struct mlxsw_sp_rif common; struct mlxsw_sp_rif_ipip_lb_config lb_config; u16 ul_vr_id; /* Reserved for Spectrum-2. */ + u16 ul_rif_id; /* Reserved for Spectrum. */ }; struct mlxsw_sp_rif_params_ipip_lb { @@ -440,6 +441,8 @@ struct mlxsw_sp_vr { struct mlxsw_sp_fib *fib4; struct mlxsw_sp_fib *fib6; struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX]; + struct mlxsw_sp_rif *ul_rif; + refcount_t ul_rif_refcnt; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; @@ -1437,8 +1440,8 @@ mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, - struct mlxsw_sp_vr *ul_vr, bool enable) +mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id, + u16 ul_rif_id, bool enable) { struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config; struct mlxsw_sp_rif *rif = &lb_rif->common; @@ -1453,7 +1456,7 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, rif->rif_index, rif->vr_id, rif->dev->mtu); mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt, MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, - ul_vr->id, saddr4, lb_cf.okey); + ul_vr_id, ul_rif_id, saddr4, lb_cf.okey); break; case MLXSW_SP_L3_PROTO_IPV6: @@ -1468,14 +1471,13 @@ static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_rif_ipip_lb *lb_rif; - struct mlxsw_sp_vr *ul_vr; int err = 0; ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); if (ipip_entry) { lb_rif = ipip_entry->ol_lb; - ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; - err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true); + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id, + lb_rif->ul_rif_id, true); if (err) goto out; lb_rif->common.mtu = ol_dev->mtu; @@ -6224,10 +6226,12 @@ static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, INIT_LIST_HEAD(&rif->nexthop_list); INIT_LIST_HEAD(&rif->neigh_list); - ether_addr_copy(rif->addr, l3_dev->dev_addr); - rif->mtu = l3_dev->mtu; + if (l3_dev) { + ether_addr_copy(rif->addr, l3_dev->dev_addr); + rif->mtu = l3_dev->mtu; + rif->dev = l3_dev; + } rif->vr_id = vr_id; - rif->dev = l3_dev; rif->rif_index = rif_index; return rif; @@ -6251,7 +6255,19 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) { - return lb_rif->ul_vr_id; + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev); + struct mlxsw_sp_vr *ul_vr; + + ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL); + if (WARN_ON(IS_ERR(ul_vr))) + return 0; + + return ul_vr->id; +} + +u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + return lb_rif->ul_rif_id; } int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) @@ -6284,7 +6300,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, int i, err; type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev); - ops = mlxsw_sp->router->rif_ops_arr[type]; + ops = mlxsw_sp->rif_ops_arr[type]; vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack); if (IS_ERR(vr)) @@ -6303,6 +6319,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, goto err_rif_alloc; } dev_hold(rif->dev); + mlxsw_sp->router->rifs[rif_index] = rif; rif->mlxsw_sp = mlxsw_sp; rif->ops = ops; @@ -6329,7 +6346,6 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, } mlxsw_sp_rif_counters_alloc(rif); - mlxsw_sp->router->rifs[rif_index] = rif; return rif; @@ -6341,6 +6357,7 @@ err_configure: if (fid) mlxsw_sp_fid_put(fid); err_fid_get: + mlxsw_sp->router->rifs[rif_index] = NULL; dev_put(rif->dev); kfree(rif); err_rif_alloc: @@ -6361,7 +6378,6 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; - mlxsw_sp->router->rifs[rif->rif_index] = NULL; mlxsw_sp_rif_counters_free(rif); for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); @@ -6369,6 +6385,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) if (fid) /* Loopback RIFs are not associated with a FID. */ mlxsw_sp_fid_put(fid); + mlxsw_sp->router->rifs[rif->rif_index] = NULL; dev_put(rif->dev); kfree(rif); vr->rif_count--; @@ -6750,7 +6767,7 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { rif = mlxsw_sp->router->rifs[i]; - if (rif && rif->dev != dev && + if (rif && rif->dev && rif->dev != dev && !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr, mlxsw_sp->mac_mask)) { NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix"); @@ -7294,7 +7311,8 @@ static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) info.addr = mac; info.vid = vid; - call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info); + call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, + NULL); } static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = { @@ -7381,7 +7399,8 @@ static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) info.addr = mac; info.vid = 0; - call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info); + call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, + NULL); } static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { @@ -7422,7 +7441,7 @@ mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif, } static int -mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) +mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); @@ -7434,11 +7453,12 @@ mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) if (IS_ERR(ul_vr)) return PTR_ERR(ul_vr); - err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true); + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true); if (err) goto err_loopback_op; lb_rif->ul_vr_id = ul_vr->id; + lb_rif->ul_rif_id = 0; ++ul_vr->rif_count; return 0; @@ -7447,32 +7467,185 @@ err_loopback_op: return err; } -static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) +static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_vr *ul_vr; ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; - mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false); + mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false); --ul_vr->rif_count; mlxsw_sp_vr_put(mlxsw_sp, ul_vr); } -static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = { +static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = { .type = MLXSW_SP_RIF_TYPE_IPIP_LB, .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), .setup = mlxsw_sp_rif_ipip_lb_setup, - .configure = mlxsw_sp_rif_ipip_lb_configure, - .deconfigure = mlxsw_sp_rif_ipip_lb_deconfigure, + .configure = mlxsw_sp1_rif_ipip_lb_configure, + .deconfigure = mlxsw_sp1_rif_ipip_lb_deconfigure, }; -static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = { +const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, - [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp_rif_ipip_lb_ops, + [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops, +}; + +static int +mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, + ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU); + mlxsw_reg_ritr_loopback_protocol_set(ritr_pl, + MLXSW_REG_RITR_LOOPBACK_GENERIC); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif *ul_rif; + u16 rif_index; + int err; + + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); + return ERR_PTR(err); + } + + ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL); + if (!ul_rif) + return ERR_PTR(-ENOMEM); + + mlxsw_sp->router->rifs[rif_index] = ul_rif; + ul_rif->mlxsw_sp = mlxsw_sp; + err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true); + if (err) + goto ul_rif_op_err; + + return ul_rif; + +ul_rif_op_err: + mlxsw_sp->router->rifs[rif_index] = NULL; + kfree(ul_rif); + return ERR_PTR(err); +} + +static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + + mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false); + mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL; + kfree(ul_rif); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack); + if (IS_ERR(vr)) + return ERR_CAST(vr); + + if (refcount_inc_not_zero(&vr->ul_rif_refcnt)) + return vr->ul_rif; + + vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack); + if (IS_ERR(vr->ul_rif)) { + err = PTR_ERR(vr->ul_rif); + goto err_ul_rif_create; + } + + vr->rif_count++; + refcount_set(&vr->ul_rif_refcnt, 1); + + return vr->ul_rif; + +err_ul_rif_create: + mlxsw_sp_vr_put(mlxsw_sp, vr); + return ERR_PTR(err); +} + +static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + struct mlxsw_sp_vr *vr; + + vr = &mlxsw_sp->router->vrs[ul_rif->vr_id]; + + if (!refcount_dec_and_test(&vr->ul_rif_refcnt)) + return; + + vr->rif_count--; + mlxsw_sp_ul_rif_destroy(ul_rif); + mlxsw_sp_vr_put(mlxsw_sp, vr); +} + +static int +mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif *ul_rif; + int err; + + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + if (IS_ERR(ul_rif)) + return PTR_ERR(ul_rif); + + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true); + if (err) + goto err_loopback_op; + + lb_rif->ul_vr_id = 0; + lb_rif->ul_rif_id = ul_rif->rif_index; + + return 0; + +err_loopback_op: + mlxsw_sp_ul_rif_put(ul_rif); + return err; +} + +static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif *ul_rif; + + ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id); + mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false); + mlxsw_sp_ul_rif_put(ul_rif); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = { + .type = MLXSW_SP_RIF_TYPE_IPIP_LB, + .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), + .setup = mlxsw_sp_rif_ipip_lb_setup, + .configure = mlxsw_sp2_rif_ipip_lb_configure, + .deconfigure = mlxsw_sp2_rif_ipip_lb_deconfigure, +}; + +const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = { + [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, + [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, + [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, + [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops, }; static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) @@ -7485,8 +7658,6 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp->router->rifs) return -ENOMEM; - mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr; - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 3dbafdeaab2b..cc1de91e8217 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -29,6 +29,7 @@ struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); +u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif); u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c772109b638d..0f4e68d31cc3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2443,7 +2443,7 @@ static void mlxsw_sp_fdb_vxlan_call_notifiers(struct net_device *dev, ether_addr_copy(info.eth_addr, mac); info.vni = vni; info.offloaded = adding; - call_switchdev_notifiers(type, dev, &info.info); + call_switchdev_notifiers(type, dev, &info.info, NULL); } static void mlxsw_sp_fdb_nve_call_notifiers(struct net_device *dev, @@ -2468,7 +2468,7 @@ mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type, info.addr = mac; info.vid = vid; info.offloaded = offloaded; - call_switchdev_notifiers(type, dev, &info.info); + call_switchdev_notifiers(type, dev, &info.info, NULL); } static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, @@ -2819,7 +2819,7 @@ mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp, return; vxlan_fdb_info.offloaded = true; call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, - &vxlan_fdb_info.info); + &vxlan_fdb_info.info, NULL); mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, vxlan_fdb_info.eth_addr, fdb_info->vid, dev, true); @@ -2832,7 +2832,7 @@ mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp, false); vxlan_fdb_info.offloaded = false; call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, - &vxlan_fdb_info.info); + &vxlan_fdb_info.info, NULL); break; } } @@ -2977,7 +2977,7 @@ mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp, } vxlan_fdb_info->offloaded = true; call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, - &vxlan_fdb_info->info); + &vxlan_fdb_info->info, NULL); mlxsw_sp_fid_put(fid); return; } @@ -2998,7 +2998,7 @@ mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp, goto err_fdb_tunnel_uc_op; vxlan_fdb_info->offloaded = true; call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, - &vxlan_fdb_info->info); + &vxlan_fdb_info->info, NULL); mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, vxlan_fdb_info->eth_addr, vid, dev, true); @@ -3099,23 +3099,34 @@ mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work * struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev); struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; struct vxlan_config *cfg = &vxlan->cfg; + struct netlink_ext_ack *extack; + extack = switchdev_notifier_info_to_extack(info); vxlan_fdb_info = container_of(info, struct switchdev_notifier_vxlan_fdb_info, info); - if (vxlan_fdb_info->remote_port != cfg->dst_port) - return -EOPNOTSUPP; - if (vxlan_fdb_info->remote_vni != cfg->vni) + if (vxlan_fdb_info->remote_port != cfg->dst_port) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Non-default remote port is not supported"); return -EOPNOTSUPP; - if (vxlan_fdb_info->vni != cfg->vni) + } + if (vxlan_fdb_info->remote_vni != cfg->vni || + vxlan_fdb_info->vni != cfg->vni) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Non-default VNI is not supported"); return -EOPNOTSUPP; - if (vxlan_fdb_info->remote_ifindex) + } + if (vxlan_fdb_info->remote_ifindex) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Local interface is not supported"); return -EOPNOTSUPP; - if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr)) + } + if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Multicast MAC addresses not supported"); return -EOPNOTSUPP; - if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip)) + } + if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Multicast destination IP is not supported"); return -EOPNOTSUPP; + } switchdev_work->vxlan_fdb_info = *vxlan_fdb_info; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 215a45374d7b..c6a575eb0ff5 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -721,7 +721,8 @@ static void ocelot_get_stats64(struct net_device *dev, static int ocelot_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, - u16 vid, u16 flags) + u16 vid, u16 flags, + struct netlink_ext_ack *extack) { struct ocelot_port *port = netdev_priv(dev); struct ocelot *ocelot = port->ocelot; diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 5ae3fa82909f..0da7393b2ef3 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -2553,7 +2553,7 @@ static int vxge_add_isr(struct vxgedev *vdev) vxge_debug_init(VXGE_ERR, "%s: Defaulting to INTA", vdev->ndev->name); - goto INTA_MODE; + goto INTA_MODE; } msix_idx = (vdev->vpaths[0].handle->vpath->vp_id * diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 4c5eaf36d5bb..56b22ea32474 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -203,7 +203,7 @@ nfp_flower_cmsg_portreify_rx(struct nfp_app *app, struct sk_buff *skb) } atomic_inc(&priv->reify_replies); - wake_up_interruptible(&priv->reify_wait_queue); + wake_up(&priv->reify_wait_queue); } static void diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 15f41cfef9f1..4fcaf11ed56e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -97,6 +97,9 @@ #define NFP_FLOWER_WORKQ_MAX_SKBS 30000 +/* Cmesg reply (empirical) timeout*/ +#define NFP_FL_REPLY_TIMEOUT msecs_to_jiffies(40) + #define nfp_flower_cmsg_warn(app, fmt, args...) \ do { \ if (net_ratelimit()) \ diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index 5059110a1768..408089133599 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -32,6 +32,71 @@ static enum devlink_eswitch_mode eswitch_mode_get(struct nfp_app *app) return DEVLINK_ESWITCH_MODE_SWITCHDEV; } +static struct nfp_flower_non_repr_priv * +nfp_flower_non_repr_priv_lookup(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_non_repr_priv *entry; + + ASSERT_RTNL(); + + list_for_each_entry(entry, &priv->non_repr_priv, list) + if (entry->netdev == netdev) + return entry; + + return NULL; +} + +void +__nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv) +{ + non_repr_priv->ref_count++; +} + +struct nfp_flower_non_repr_priv * +nfp_flower_non_repr_priv_get(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_non_repr_priv *entry; + + entry = nfp_flower_non_repr_priv_lookup(app, netdev); + if (entry) + goto inc_ref; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + + entry->netdev = netdev; + list_add(&entry->list, &priv->non_repr_priv); + +inc_ref: + __nfp_flower_non_repr_priv_get(entry); + return entry; +} + +void +__nfp_flower_non_repr_priv_put(struct nfp_flower_non_repr_priv *non_repr_priv) +{ + if (--non_repr_priv->ref_count) + return; + + list_del(&non_repr_priv->list); + kfree(non_repr_priv); +} + +void +nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_non_repr_priv *entry; + + entry = nfp_flower_non_repr_priv_lookup(app, netdev); + if (!entry) + return; + + __nfp_flower_non_repr_priv_put(entry); +} + static enum nfp_repr_type nfp_flower_repr_get_type_and_port(struct nfp_app *app, u32 port_id, u8 *port) { @@ -107,16 +172,14 @@ static int nfp_flower_wait_repr_reify(struct nfp_app *app, atomic_t *replies, int tot_repl) { struct nfp_flower_priv *priv = app->priv; - int err; if (!tot_repl) return 0; lockdep_assert_held(&app->pf->lock); - err = wait_event_interruptible_timeout(priv->reify_wait_queue, - atomic_read(replies) >= tot_repl, - msecs_to_jiffies(10)); - if (err <= 0) { + if (!wait_event_timeout(priv->reify_wait_queue, + atomic_read(replies) >= tot_repl, + NFP_FL_REPLY_TIMEOUT)) { nfp_warn(app->cpp, "Not all reprs responded to reify\n"); return -EIO; } @@ -223,6 +286,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, nfp_repr = netdev_priv(repr); nfp_repr->app_priv = repr_priv; + repr_priv->nfp_repr = nfp_repr; /* For now we only support 1 PF */ WARN_ON(repr_type == NFP_REPR_TYPE_PF && i); @@ -337,6 +401,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) nfp_repr = netdev_priv(repr); nfp_repr->app_priv = repr_priv; + repr_priv->nfp_repr = nfp_repr; port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr); if (IS_ERR(port)) { @@ -476,8 +541,8 @@ err_clear_nn: static int nfp_flower_init(struct nfp_app *app) { + u64 version, features, ctx_count, num_mems; const struct nfp_pf *pf = app->pf; - u64 version, features, ctx_count; struct nfp_flower_priv *app_priv; int err; @@ -502,6 +567,23 @@ static int nfp_flower_init(struct nfp_app *app) return err; } + num_mems = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_SPLIT", + &err); + if (err) { + nfp_warn(app->cpp, + "FlowerNIC: unsupported host context memory: %d\n", + err); + err = 0; + num_mems = 1; + } + + if (!FIELD_FIT(NFP_FL_STAT_ID_MU_NUM, num_mems) || !num_mems) { + nfp_warn(app->cpp, + "FlowerNIC: invalid host context memory: %llu\n", + num_mems); + return -EINVAL; + } + ctx_count = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_COUNT", &err); if (err) { @@ -522,6 +604,8 @@ static int nfp_flower_init(struct nfp_app *app) if (!app_priv) return -ENOMEM; + app_priv->total_mem_units = num_mems; + app_priv->active_mem_unit = 0; app_priv->stats_ring_size = roundup_pow_of_two(ctx_count); app->priv = app_priv; app_priv->app = app; @@ -533,7 +617,7 @@ static int nfp_flower_init(struct nfp_app *app) init_waitqueue_head(&app_priv->mtu_conf.wait_q); spin_lock_init(&app_priv->mtu_conf.lock); - err = nfp_flower_metadata_init(app, ctx_count); + err = nfp_flower_metadata_init(app, ctx_count, num_mems); if (err) goto err_free_app_priv; @@ -558,6 +642,7 @@ static int nfp_flower_init(struct nfp_app *app) } INIT_LIST_HEAD(&app_priv->indr_block_cb_priv); + INIT_LIST_HEAD(&app_priv->non_repr_priv); return 0; @@ -601,7 +686,7 @@ nfp_flower_repr_change_mtu(struct nfp_app *app, struct net_device *netdev, { struct nfp_flower_priv *app_priv = app->priv; struct nfp_repr *repr = netdev_priv(netdev); - int err, ack; + int err; /* Only need to config FW for physical port MTU change. */ if (repr->port->type != NFP_PORT_PHYS_PORT) @@ -628,11 +713,9 @@ nfp_flower_repr_change_mtu(struct nfp_app *app, struct net_device *netdev, } /* Wait for fw to ack the change. */ - ack = wait_event_timeout(app_priv->mtu_conf.wait_q, - nfp_flower_check_ack(app_priv), - msecs_to_jiffies(10)); - - if (!ack) { + if (!wait_event_timeout(app_priv->mtu_conf.wait_q, + nfp_flower_check_ack(app_priv), + NFP_FL_REPLY_TIMEOUT)) { spin_lock_bh(&app_priv->mtu_conf.lock); app_priv->mtu_conf.requested_val = 0; spin_unlock_bh(&app_priv->mtu_conf.lock); diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index b858bac47621..c0945a5fd1a4 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -20,6 +20,9 @@ struct nfp_fl_pre_lag; struct net_device; struct nfp_app; +#define NFP_FL_STAT_ID_MU_NUM GENMASK(31, 22) +#define NFP_FL_STAT_ID_STAT GENMASK(21, 0) + #define NFP_FL_STATS_ELEM_RS FIELD_SIZEOF(struct nfp_fl_stats_id, \ init_unalloc) #define NFP_FLOWER_MASK_ENTRY_RS 256 @@ -54,6 +57,26 @@ struct nfp_fl_stats_id { }; /** + * struct nfp_fl_tunnel_offloads - priv data for tunnel offloads + * @offloaded_macs: Hashtable of the offloaded MAC addresses + * @ipv4_off_list: List of IPv4 addresses to offload + * @neigh_off_list: List of neighbour offloads + * @ipv4_off_lock: Lock for the IPv4 address list + * @neigh_off_lock: Lock for the neighbour address list + * @mac_off_ids: IDA to manage id assignment for offloaded MACs + * @neigh_nb: Notifier to monitor neighbour state + */ +struct nfp_fl_tunnel_offloads { + struct rhashtable offloaded_macs; + struct list_head ipv4_off_list; + struct list_head neigh_off_list; + struct mutex ipv4_off_lock; + spinlock_t neigh_off_lock; + struct ida mac_off_ids; + struct notifier_block neigh_nb; +}; + +/** * struct nfp_mtu_conf - manage MTU setting * @portnum: NFP port number of repr with requested MTU change * @requested_val: MTU value requested for repr @@ -113,23 +136,16 @@ struct nfp_fl_lag { * processing * @cmsg_skbs_low: List of lower priority skbs for control message * processing - * @nfp_mac_off_list: List of MAC addresses to offload - * @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs - * @nfp_ipv4_off_list: List of IPv4 addresses to offload - * @nfp_neigh_off_list: List of neighbour offloads - * @nfp_mac_off_lock: Lock for the MAC address list - * @nfp_mac_index_lock: Lock for the MAC index list - * @nfp_ipv4_off_lock: Lock for the IPv4 address list - * @nfp_neigh_off_lock: Lock for the neighbour address list - * @nfp_mac_off_ids: IDA to manage id assignment for offloaded macs - * @nfp_mac_off_count: Number of MACs in address list - * @nfp_tun_neigh_nb: Notifier to monitor neighbour state + * @tun: Tunnel offload data * @reify_replies: atomically stores the number of replies received * from firmware for repr reify * @reify_wait_queue: wait queue for repr reify response counting * @mtu_conf: Configuration of repr MTU value * @nfp_lag: Link aggregation data block * @indr_block_cb_priv: List of priv data passed to indirect block cbs + * @non_repr_priv: List of offloaded non-repr ports and their priv data + * @active_mem_unit: Current active memory unit for flower rules + * @total_mem_units: Total number of available memory units for flower rules */ struct nfp_flower_priv { struct nfp_app *app; @@ -147,30 +163,47 @@ struct nfp_flower_priv { struct work_struct cmsg_work; struct sk_buff_head cmsg_skbs_high; struct sk_buff_head cmsg_skbs_low; - struct list_head nfp_mac_off_list; - struct list_head nfp_mac_index_list; - struct list_head nfp_ipv4_off_list; - struct list_head nfp_neigh_off_list; - struct mutex nfp_mac_off_lock; - struct mutex nfp_mac_index_lock; - struct mutex nfp_ipv4_off_lock; - spinlock_t nfp_neigh_off_lock; - struct ida nfp_mac_off_ids; - int nfp_mac_off_count; - struct notifier_block nfp_tun_neigh_nb; + struct nfp_fl_tunnel_offloads tun; atomic_t reify_replies; wait_queue_head_t reify_wait_queue; struct nfp_mtu_conf mtu_conf; struct nfp_fl_lag nfp_lag; struct list_head indr_block_cb_priv; + struct list_head non_repr_priv; + unsigned int active_mem_unit; + unsigned int total_mem_units; }; /** * struct nfp_flower_repr_priv - Flower APP per-repr priv data + * @nfp_repr: Back pointer to nfp_repr * @lag_port_flags: Extended port flags to record lag state of repr + * @mac_offloaded: Flag indicating a MAC address is offloaded for repr + * @offloaded_mac_addr: MAC address that has been offloaded for repr + * @mac_list: List entry of reprs that share the same offloaded MAC */ struct nfp_flower_repr_priv { + struct nfp_repr *nfp_repr; unsigned long lag_port_flags; + bool mac_offloaded; + u8 offloaded_mac_addr[ETH_ALEN]; + struct list_head mac_list; +}; + +/** + * struct nfp_flower_non_repr_priv - Priv data for non-repr offloaded ports + * @list: List entry of offloaded reprs + * @netdev: Pointer to non-repr net_device + * @ref_count: Number of references held for this priv data + * @mac_offloaded: Flag indicating a MAC address is offloaded for device + * @offloaded_mac_addr: MAC address that has been offloaded for dev + */ +struct nfp_flower_non_repr_priv { + struct list_head list; + struct net_device *netdev; + int ref_count; + bool mac_offloaded; + u8 offloaded_mac_addr[ETH_ALEN]; }; struct nfp_fl_key_ls { @@ -217,7 +250,8 @@ struct nfp_fl_stats_frame { __be64 stats_cookie; }; -int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count); +int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, + unsigned int host_ctx_split); void nfp_flower_metadata_cleanup(struct nfp_app *app); int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, @@ -252,7 +286,6 @@ void nfp_tunnel_config_stop(struct nfp_app *app); int nfp_tunnel_mac_event_handler(struct nfp_app *app, struct net_device *netdev, unsigned long event, void *ptr); -void nfp_tunnel_write_macs(struct nfp_app *app); void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb); @@ -273,4 +306,12 @@ int nfp_flower_reg_indir_block_handler(struct nfp_app *app, struct net_device *netdev, unsigned long event); +void +__nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv); +struct nfp_flower_non_repr_priv * +nfp_flower_non_repr_priv_get(struct nfp_app *app, struct net_device *netdev); +void +__nfp_flower_non_repr_priv_put(struct nfp_flower_non_repr_priv *non_repr_priv); +void +nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index cdf75595f627..c04a0d6b0184 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -403,9 +403,6 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, ext += sizeof(struct nfp_flower_ipv4_udp_tun); msk += sizeof(struct nfp_flower_ipv4_udp_tun); - /* Configure tunnel end point MAC. */ - nfp_tunnel_write_macs(app); - /* Store the tunnel destination in the rule data. * This must be present and be an exact match. */ diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 573a4400a26c..492837b852b6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -4,6 +4,7 @@ #include <linux/hash.h> #include <linux/hashtable.h> #include <linux/jhash.h> +#include <linux/math64.h> #include <linux/vmalloc.h> #include <net/pkt_cls.h> @@ -52,8 +53,17 @@ static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id) freed_stats_id = priv->stats_ring_size; /* Check for unallocated entries first. */ if (priv->stats_ids.init_unalloc > 0) { - *stats_context_id = priv->stats_ids.init_unalloc - 1; - priv->stats_ids.init_unalloc--; + if (priv->active_mem_unit == priv->total_mem_units) { + priv->stats_ids.init_unalloc--; + priv->active_mem_unit = 0; + } + + *stats_context_id = + FIELD_PREP(NFP_FL_STAT_ID_STAT, + priv->stats_ids.init_unalloc - 1) | + FIELD_PREP(NFP_FL_STAT_ID_MU_NUM, + priv->active_mem_unit); + priv->active_mem_unit++; return 0; } @@ -381,10 +391,11 @@ const struct rhashtable_params nfp_flower_table_params = { .automatic_shrinking = true, }; -int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count) +int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, + unsigned int host_num_mems) { struct nfp_flower_priv *priv = app->priv; - int err; + int err, stats_size; hash_init(priv->mask_table); @@ -417,10 +428,12 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count) if (!priv->stats_ids.free_list.buf) goto err_free_last_used; - priv->stats_ids.init_unalloc = host_ctx_count; + priv->stats_ids.init_unalloc = div_u64(host_ctx_count, host_num_mems); - priv->stats = kvmalloc_array(priv->stats_ring_size, - sizeof(struct nfp_fl_stats), GFP_KERNEL); + stats_size = FIELD_PREP(NFP_FL_STAT_ID_STAT, host_ctx_count) | + FIELD_PREP(NFP_FL_STAT_ID_MU_NUM, host_num_mems - 1); + priv->stats = kvmalloc_array(stats_size, sizeof(struct nfp_fl_stats), + GFP_KERNEL); if (!priv->stats) goto err_free_ring_buf; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 2d9f26a725c2..4d78be4ec4e9 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -98,47 +98,51 @@ struct nfp_ipv4_addr_entry { struct list_head list; }; -/** - * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP - * @reserved: reserved for future use - * @count: number of MAC addresses in the message - * @addresses.index: index of MAC address in the lookup table - * @addresses.addr: interface MAC address - * @addresses: series of MACs to offload - */ -struct nfp_tun_mac_addr { - __be16 reserved; - __be16 count; - struct index_mac_addr { - __be16 index; - u8 addr[ETH_ALEN]; - } addresses[]; -}; +#define NFP_TUN_MAC_OFFLOAD_DEL_FLAG 0x2 /** - * struct nfp_tun_mac_offload_entry - list of MACs to offload - * @index: index of MAC address for offloading + * struct nfp_tun_mac_addr_offload - configure MAC address of tunnel EP on NFP + * @flags: MAC address offload options + * @count: number of MAC addresses in the message (should be 1) + * @index: index of MAC address in the lookup table * @addr: interface MAC address - * @list: list pointer */ -struct nfp_tun_mac_offload_entry { +struct nfp_tun_mac_addr_offload { + __be16 flags; + __be16 count; __be16 index; u8 addr[ETH_ALEN]; - struct list_head list; +}; + +enum nfp_flower_mac_offload_cmd { + NFP_TUNNEL_MAC_OFFLOAD_ADD = 0, + NFP_TUNNEL_MAC_OFFLOAD_DEL = 1, + NFP_TUNNEL_MAC_OFFLOAD_MOD = 2, }; #define NFP_MAX_MAC_INDEX 0xff /** - * struct nfp_tun_mac_non_nfp_idx - converts non NFP netdev ifindex to 8-bit id - * @ifindex: netdev ifindex of the device - * @index: index of netdevs mac on NFP - * @list: list pointer + * struct nfp_tun_offloaded_mac - hashtable entry for an offloaded MAC + * @ht_node: Hashtable entry + * @addr: Offloaded MAC address + * @index: Offloaded index for given MAC address + * @ref_count: Number of devs using this MAC address + * @repr_list: List of reprs sharing this MAC address */ -struct nfp_tun_mac_non_nfp_idx { - int ifindex; - u8 index; - struct list_head list; +struct nfp_tun_offloaded_mac { + struct rhash_head ht_node; + u8 addr[ETH_ALEN]; + u16 index; + int ref_count; + struct list_head repr_list; +}; + +static const struct rhashtable_params offloaded_macs_params = { + .key_offset = offsetof(struct nfp_tun_offloaded_mac, addr), + .head_offset = offsetof(struct nfp_tun_offloaded_mac, ht_node), + .key_len = ETH_ALEN, + .automatic_shrinking = true, }; void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) @@ -205,15 +209,15 @@ static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr) struct nfp_ipv4_route_entry *entry; struct list_head *ptr, *storage; - spin_lock_bh(&priv->nfp_neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + spin_lock_bh(&priv->tun.neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); if (entry->ipv4_addr == ipv4_addr) { - spin_unlock_bh(&priv->nfp_neigh_off_lock); + spin_unlock_bh(&priv->tun.neigh_off_lock); return true; } } - spin_unlock_bh(&priv->nfp_neigh_off_lock); + spin_unlock_bh(&priv->tun.neigh_off_lock); return false; } @@ -223,24 +227,24 @@ static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr) struct nfp_ipv4_route_entry *entry; struct list_head *ptr, *storage; - spin_lock_bh(&priv->nfp_neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + spin_lock_bh(&priv->tun.neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); if (entry->ipv4_addr == ipv4_addr) { - spin_unlock_bh(&priv->nfp_neigh_off_lock); + spin_unlock_bh(&priv->tun.neigh_off_lock); return; } } entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { - spin_unlock_bh(&priv->nfp_neigh_off_lock); + spin_unlock_bh(&priv->tun.neigh_off_lock); nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n"); return; } entry->ipv4_addr = ipv4_addr; - list_add_tail(&entry->list, &priv->nfp_neigh_off_list); - spin_unlock_bh(&priv->nfp_neigh_off_lock); + list_add_tail(&entry->list, &priv->tun.neigh_off_list); + spin_unlock_bh(&priv->tun.neigh_off_lock); } static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) @@ -249,8 +253,8 @@ static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) struct nfp_ipv4_route_entry *entry; struct list_head *ptr, *storage; - spin_lock_bh(&priv->nfp_neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + spin_lock_bh(&priv->tun.neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); if (entry->ipv4_addr == ipv4_addr) { list_del(&entry->list); @@ -258,7 +262,7 @@ static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) break; } } - spin_unlock_bh(&priv->nfp_neigh_off_lock); + spin_unlock_bh(&priv->tun.neigh_off_lock); } static void @@ -326,7 +330,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, if (!nfp_netdev_is_nfp_repr(n->dev)) return NOTIFY_DONE; - app_priv = container_of(nb, struct nfp_flower_priv, nfp_tun_neigh_nb); + app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); app = app_priv->app; /* Only concerned with changes to routes already added to NFP. */ @@ -401,11 +405,11 @@ static void nfp_tun_write_ipv4_list(struct nfp_app *app) int count; memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr)); - mutex_lock(&priv->nfp_ipv4_off_lock); + mutex_lock(&priv->tun.ipv4_off_lock); count = 0; - list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { if (count >= NFP_FL_IPV4_ADDRS_MAX) { - mutex_unlock(&priv->nfp_ipv4_off_lock); + mutex_unlock(&priv->tun.ipv4_off_lock); nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n"); return; } @@ -413,7 +417,7 @@ static void nfp_tun_write_ipv4_list(struct nfp_app *app) payload.ipv4_addr[count++] = entry->ipv4_addr; } payload.count = cpu_to_be32(count); - mutex_unlock(&priv->nfp_ipv4_off_lock); + mutex_unlock(&priv->tun.ipv4_off_lock); nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS, sizeof(struct nfp_tun_ipv4_addr), @@ -426,26 +430,26 @@ void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4) struct nfp_ipv4_addr_entry *entry; struct list_head *ptr, *storage; - mutex_lock(&priv->nfp_ipv4_off_lock); - list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + mutex_lock(&priv->tun.ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); if (entry->ipv4_addr == ipv4) { entry->ref_count++; - mutex_unlock(&priv->nfp_ipv4_off_lock); + mutex_unlock(&priv->tun.ipv4_off_lock); return; } } entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { - mutex_unlock(&priv->nfp_ipv4_off_lock); + mutex_unlock(&priv->tun.ipv4_off_lock); nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); return; } entry->ipv4_addr = ipv4; entry->ref_count = 1; - list_add_tail(&entry->list, &priv->nfp_ipv4_off_list); - mutex_unlock(&priv->nfp_ipv4_off_lock); + list_add_tail(&entry->list, &priv->tun.ipv4_off_list); + mutex_unlock(&priv->tun.ipv4_off_lock); nfp_tun_write_ipv4_list(app); } @@ -456,8 +460,8 @@ void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4) struct nfp_ipv4_addr_entry *entry; struct list_head *ptr, *storage; - mutex_lock(&priv->nfp_ipv4_off_lock); - list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + mutex_lock(&priv->tun.ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); if (entry->ipv4_addr == ipv4) { entry->ref_count--; @@ -468,191 +472,357 @@ void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4) break; } } - mutex_unlock(&priv->nfp_ipv4_off_lock); + mutex_unlock(&priv->tun.ipv4_off_lock); nfp_tun_write_ipv4_list(app); } -void nfp_tunnel_write_macs(struct nfp_app *app) +static int +__nfp_tunnel_offload_mac(struct nfp_app *app, u8 *mac, u16 idx, bool del) { - struct nfp_flower_priv *priv = app->priv; - struct nfp_tun_mac_offload_entry *entry; - struct nfp_tun_mac_addr *payload; - struct list_head *ptr, *storage; - int mac_count, err, pay_size; + struct nfp_tun_mac_addr_offload payload; - mutex_lock(&priv->nfp_mac_off_lock); - if (!priv->nfp_mac_off_count) { - mutex_unlock(&priv->nfp_mac_off_lock); - return; - } + memset(&payload, 0, sizeof(payload)); - pay_size = sizeof(struct nfp_tun_mac_addr) + - sizeof(struct index_mac_addr) * priv->nfp_mac_off_count; + if (del) + payload.flags = cpu_to_be16(NFP_TUN_MAC_OFFLOAD_DEL_FLAG); - payload = kzalloc(pay_size, GFP_KERNEL); - if (!payload) { - mutex_unlock(&priv->nfp_mac_off_lock); - return; - } + /* FW supports multiple MACs per cmsg but restrict to single. */ + payload.count = cpu_to_be16(1); + payload.index = cpu_to_be16(idx); + ether_addr_copy(payload.addr, mac); - payload->count = cpu_to_be16(priv->nfp_mac_off_count); + return nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC, + sizeof(struct nfp_tun_mac_addr_offload), + &payload, GFP_KERNEL); +} - mac_count = 0; - list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { - entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, - list); - payload->addresses[mac_count].index = entry->index; - ether_addr_copy(payload->addresses[mac_count].addr, - entry->addr); - mac_count++; - } +static bool nfp_tunnel_port_is_phy_repr(int port) +{ + if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == + NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) + return true; - err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC, - pay_size, payload, GFP_KERNEL); + return false; +} - kfree(payload); +static u16 nfp_tunnel_get_mac_idx_from_phy_port_id(int port) +{ + return port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT; +} - if (err) { - mutex_unlock(&priv->nfp_mac_off_lock); - /* Write failed so retain list for future retry. */ - return; - } +static u16 nfp_tunnel_get_global_mac_idx_from_ida(int id) +{ + return id << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT; +} + +static int nfp_tunnel_get_ida_from_global_mac_idx(u16 nfp_mac_idx) +{ + return nfp_mac_idx >> 8; +} + +static bool nfp_tunnel_is_mac_idx_global(u16 nfp_mac_idx) +{ + return (nfp_mac_idx & 0xff) == NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT; +} + +static struct nfp_tun_offloaded_mac * +nfp_tunnel_lookup_offloaded_macs(struct nfp_app *app, u8 *mac) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_lookup_fast(&priv->tun.offloaded_macs, mac, + offloaded_macs_params); +} + +static void +nfp_tunnel_offloaded_macs_inc_ref_and_link(struct nfp_tun_offloaded_mac *entry, + struct net_device *netdev, bool mod) +{ + if (nfp_netdev_is_nfp_repr(netdev)) { + struct nfp_flower_repr_priv *repr_priv; + struct nfp_repr *repr; - /* If list was successfully offloaded, flush it. */ - list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { - entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, - list); - list_del(&entry->list); - kfree(entry); + repr = netdev_priv(netdev); + repr_priv = repr->app_priv; + + /* If modifing MAC, remove repr from old list first. */ + if (mod) + list_del(&repr_priv->mac_list); + + list_add_tail(&repr_priv->mac_list, &entry->repr_list); } - priv->nfp_mac_off_count = 0; - mutex_unlock(&priv->nfp_mac_off_lock); + entry->ref_count++; } -static int nfp_tun_get_mac_idx(struct nfp_app *app, int ifindex) +static int +nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, + int port, bool mod) { struct nfp_flower_priv *priv = app->priv; - struct nfp_tun_mac_non_nfp_idx *entry; - struct list_head *ptr, *storage; - int idx; - - mutex_lock(&priv->nfp_mac_index_lock); - list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { - entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list); - if (entry->ifindex == ifindex) { - idx = entry->index; - mutex_unlock(&priv->nfp_mac_index_lock); - return idx; - } + int ida_idx = NFP_MAX_MAC_INDEX, err; + struct nfp_tun_offloaded_mac *entry; + u16 nfp_mac_idx = 0; + + entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); + if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) { + nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, netdev, mod); + return 0; } - idx = ida_simple_get(&priv->nfp_mac_off_ids, 0, - NFP_MAX_MAC_INDEX, GFP_KERNEL); - if (idx < 0) { - mutex_unlock(&priv->nfp_mac_index_lock); - return idx; + /* Assign a global index if non-repr or MAC address is now shared. */ + if (entry || !port) { + ida_idx = ida_simple_get(&priv->tun.mac_off_ids, 0, + NFP_MAX_MAC_INDEX, GFP_KERNEL); + if (ida_idx < 0) + return ida_idx; + + nfp_mac_idx = nfp_tunnel_get_global_mac_idx_from_ida(ida_idx); + } else { + nfp_mac_idx = nfp_tunnel_get_mac_idx_from_phy_port_id(port); } - entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { - mutex_unlock(&priv->nfp_mac_index_lock); - return -ENOMEM; + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + err = -ENOMEM; + goto err_free_ida; + } + + ether_addr_copy(entry->addr, netdev->dev_addr); + INIT_LIST_HEAD(&entry->repr_list); + + if (rhashtable_insert_fast(&priv->tun.offloaded_macs, + &entry->ht_node, + offloaded_macs_params)) { + err = -ENOMEM; + goto err_free_entry; + } + } + + err = __nfp_tunnel_offload_mac(app, netdev->dev_addr, + nfp_mac_idx, false); + if (err) { + /* If not shared then free. */ + if (!entry->ref_count) + goto err_remove_hash; + goto err_free_ida; } - entry->ifindex = ifindex; - entry->index = idx; - list_add_tail(&entry->list, &priv->nfp_mac_index_list); - mutex_unlock(&priv->nfp_mac_index_lock); - return idx; + entry->index = nfp_mac_idx; + nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, netdev, mod); + + return 0; + +err_remove_hash: + rhashtable_remove_fast(&priv->tun.offloaded_macs, &entry->ht_node, + offloaded_macs_params); +err_free_entry: + kfree(entry); +err_free_ida: + if (ida_idx != NFP_MAX_MAC_INDEX) + ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + + return err; } -static void nfp_tun_del_mac_idx(struct nfp_app *app, int ifindex) +static int +nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, + u8 *mac, bool mod) { struct nfp_flower_priv *priv = app->priv; - struct nfp_tun_mac_non_nfp_idx *entry; - struct list_head *ptr, *storage; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_tun_offloaded_mac *entry; + struct nfp_repr *repr; + int ida_idx; + + entry = nfp_tunnel_lookup_offloaded_macs(app, mac); + if (!entry) + return 0; + + entry->ref_count--; + /* If del is part of a mod then mac_list is still in use elsewheree. */ + if (nfp_netdev_is_nfp_repr(netdev) && !mod) { + repr = netdev_priv(netdev); + repr_priv = repr->app_priv; + list_del(&repr_priv->mac_list); + } - mutex_lock(&priv->nfp_mac_index_lock); - list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { - entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list); - if (entry->ifindex == ifindex) { - ida_simple_remove(&priv->nfp_mac_off_ids, - entry->index); - list_del(&entry->list); - kfree(entry); - break; + /* If MAC is now used by 1 repr set the offloaded MAC index to port. */ + if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) { + u16 nfp_mac_idx; + int port, err; + + repr_priv = list_first_entry(&entry->repr_list, + struct nfp_flower_repr_priv, + mac_list); + repr = repr_priv->nfp_repr; + port = nfp_repr_get_port_id(repr->netdev); + nfp_mac_idx = nfp_tunnel_get_mac_idx_from_phy_port_id(port); + err = __nfp_tunnel_offload_mac(app, mac, nfp_mac_idx, false); + if (err) { + nfp_flower_cmsg_warn(app, "MAC offload index revert failed on %s.\n", + netdev_name(netdev)); + return 0; } + + ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); + ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + entry->index = nfp_mac_idx; + return 0; } - mutex_unlock(&priv->nfp_mac_index_lock); -} -static void nfp_tun_add_to_mac_offload_list(struct net_device *netdev, - struct nfp_app *app) -{ - struct nfp_flower_priv *priv = app->priv; - struct nfp_tun_mac_offload_entry *entry; - u16 nfp_mac_idx; - int port = 0; + if (entry->ref_count) + return 0; - /* Check if MAC should be offloaded. */ - if (!is_valid_ether_addr(netdev->dev_addr)) - return; + WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs, + &entry->ht_node, + offloaded_macs_params)); + /* If MAC has global ID then extract and free the ida entry. */ + if (nfp_tunnel_is_mac_idx_global(entry->index)) { + ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); + ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + } - if (nfp_netdev_is_nfp_repr(netdev)) + kfree(entry); + + return __nfp_tunnel_offload_mac(app, mac, 0, true); +} + +static int +nfp_tunnel_offload_mac(struct nfp_app *app, struct net_device *netdev, + enum nfp_flower_mac_offload_cmd cmd) +{ + struct nfp_flower_non_repr_priv *nr_priv = NULL; + bool non_repr = false, *mac_offloaded; + u8 *off_mac = NULL; + int err, port = 0; + + if (nfp_netdev_is_nfp_repr(netdev)) { + struct nfp_flower_repr_priv *repr_priv; + struct nfp_repr *repr; + + repr = netdev_priv(netdev); + if (repr->app != app) + return 0; + + repr_priv = repr->app_priv; + mac_offloaded = &repr_priv->mac_offloaded; + off_mac = &repr_priv->offloaded_mac_addr[0]; port = nfp_repr_get_port_id(netdev); - else if (!nfp_fl_is_netdev_to_offload(netdev)) - return; + if (!nfp_tunnel_port_is_phy_repr(port)) + return 0; + } else if (nfp_fl_is_netdev_to_offload(netdev)) { + nr_priv = nfp_flower_non_repr_priv_get(app, netdev); + if (!nr_priv) + return -ENOMEM; + + mac_offloaded = &nr_priv->mac_offloaded; + off_mac = &nr_priv->offloaded_mac_addr[0]; + non_repr = true; + } else { + return 0; + } - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { - nfp_flower_cmsg_warn(app, "Mem fail when offloading MAC.\n"); - return; + if (!is_valid_ether_addr(netdev->dev_addr)) { + err = -EINVAL; + goto err_put_non_repr_priv; } - if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == - NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) { - nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT; - } else if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == - NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT) { - port = FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC, port); - nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT; - } else { - /* Must assign our own unique 8-bit index. */ - int idx = nfp_tun_get_mac_idx(app, netdev->ifindex); + if (cmd == NFP_TUNNEL_MAC_OFFLOAD_MOD && !*mac_offloaded) + cmd = NFP_TUNNEL_MAC_OFFLOAD_ADD; - if (idx < 0) { - nfp_flower_cmsg_warn(app, "Can't assign non-repr MAC index.\n"); - kfree(entry); - return; - } - nfp_mac_idx = idx << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT; + switch (cmd) { + case NFP_TUNNEL_MAC_OFFLOAD_ADD: + err = nfp_tunnel_add_shared_mac(app, netdev, port, false); + if (err) + goto err_put_non_repr_priv; + + if (non_repr) + __nfp_flower_non_repr_priv_get(nr_priv); + + *mac_offloaded = true; + ether_addr_copy(off_mac, netdev->dev_addr); + break; + case NFP_TUNNEL_MAC_OFFLOAD_DEL: + /* Only attempt delete if add was successful. */ + if (!*mac_offloaded) + break; + + if (non_repr) + __nfp_flower_non_repr_priv_put(nr_priv); + + *mac_offloaded = false; + + err = nfp_tunnel_del_shared_mac(app, netdev, netdev->dev_addr, + false); + if (err) + goto err_put_non_repr_priv; + + break; + case NFP_TUNNEL_MAC_OFFLOAD_MOD: + /* Ignore if changing to the same address. */ + if (ether_addr_equal(netdev->dev_addr, off_mac)) + break; + + err = nfp_tunnel_add_shared_mac(app, netdev, port, true); + if (err) + goto err_put_non_repr_priv; + + /* Delete the previous MAC address. */ + err = nfp_tunnel_del_shared_mac(app, netdev, off_mac, true); + if (err) + nfp_flower_cmsg_warn(app, "Failed to remove offload of replaced MAC addr on %s.\n", + netdev_name(netdev)); + + ether_addr_copy(off_mac, netdev->dev_addr); + break; + default: + err = -EINVAL; + goto err_put_non_repr_priv; } - entry->index = cpu_to_be16(nfp_mac_idx); - ether_addr_copy(entry->addr, netdev->dev_addr); + if (non_repr) + __nfp_flower_non_repr_priv_put(nr_priv); + + return 0; + +err_put_non_repr_priv: + if (non_repr) + __nfp_flower_non_repr_priv_put(nr_priv); - mutex_lock(&priv->nfp_mac_off_lock); - priv->nfp_mac_off_count++; - list_add_tail(&entry->list, &priv->nfp_mac_off_list); - mutex_unlock(&priv->nfp_mac_off_lock); + return err; } int nfp_tunnel_mac_event_handler(struct nfp_app *app, struct net_device *netdev, unsigned long event, void *ptr) { - if (event == NETDEV_DOWN || event == NETDEV_UNREGISTER) { - /* If non-nfp netdev then free its offload index. */ - if (nfp_fl_is_netdev_to_offload(netdev)) - nfp_tun_del_mac_idx(app, netdev->ifindex); - } else if (event == NETDEV_UP || event == NETDEV_CHANGEADDR || - event == NETDEV_REGISTER) { - nfp_tun_add_to_mac_offload_list(netdev, app); - - /* Force a list write to keep NFP up to date. */ - nfp_tunnel_write_macs(app); + int err; + + if (event == NETDEV_DOWN) { + err = nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_DEL); + if (err) + nfp_flower_cmsg_warn(app, "Failed to delete offload MAC on %s.\n", + netdev_name(netdev)); + } else if (event == NETDEV_UP) { + err = nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_ADD); + if (err) + nfp_flower_cmsg_warn(app, "Failed to offload MAC on %s.\n", + netdev_name(netdev)); + } else if (event == NETDEV_CHANGEADDR) { + /* Only offload addr change if netdev is already up. */ + if (!(netdev->flags & IFF_UP)) + return NOTIFY_OK; + + err = nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_MOD); + if (err) + nfp_flower_cmsg_warn(app, "Failed to offload MAC change on %s.\n", + netdev_name(netdev)); } return NOTIFY_OK; } @@ -660,68 +830,62 @@ int nfp_tunnel_mac_event_handler(struct nfp_app *app, int nfp_tunnel_config_start(struct nfp_app *app) { struct nfp_flower_priv *priv = app->priv; + int err; + + /* Initialise rhash for MAC offload tracking. */ + err = rhashtable_init(&priv->tun.offloaded_macs, + &offloaded_macs_params); + if (err) + return err; - /* Initialise priv data for MAC offloading. */ - priv->nfp_mac_off_count = 0; - mutex_init(&priv->nfp_mac_off_lock); - INIT_LIST_HEAD(&priv->nfp_mac_off_list); - mutex_init(&priv->nfp_mac_index_lock); - INIT_LIST_HEAD(&priv->nfp_mac_index_list); - ida_init(&priv->nfp_mac_off_ids); + ida_init(&priv->tun.mac_off_ids); /* Initialise priv data for IPv4 offloading. */ - mutex_init(&priv->nfp_ipv4_off_lock); - INIT_LIST_HEAD(&priv->nfp_ipv4_off_list); + mutex_init(&priv->tun.ipv4_off_lock); + INIT_LIST_HEAD(&priv->tun.ipv4_off_list); /* Initialise priv data for neighbour offloading. */ - spin_lock_init(&priv->nfp_neigh_off_lock); - INIT_LIST_HEAD(&priv->nfp_neigh_off_list); - priv->nfp_tun_neigh_nb.notifier_call = nfp_tun_neigh_event_handler; + spin_lock_init(&priv->tun.neigh_off_lock); + INIT_LIST_HEAD(&priv->tun.neigh_off_list); + priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler; + + err = register_netevent_notifier(&priv->tun.neigh_nb); + if (err) { + rhashtable_free_and_destroy(&priv->tun.offloaded_macs, + nfp_check_rhashtable_empty, NULL); + return err; + } - return register_netevent_notifier(&priv->nfp_tun_neigh_nb); + return 0; } void nfp_tunnel_config_stop(struct nfp_app *app) { - struct nfp_tun_mac_offload_entry *mac_entry; struct nfp_flower_priv *priv = app->priv; struct nfp_ipv4_route_entry *route_entry; - struct nfp_tun_mac_non_nfp_idx *mac_idx; struct nfp_ipv4_addr_entry *ip_entry; struct list_head *ptr, *storage; - unregister_netevent_notifier(&priv->nfp_tun_neigh_nb); - - /* Free any memory that may be occupied by MAC list. */ - list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { - mac_entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, - list); - list_del(&mac_entry->list); - kfree(mac_entry); - } - - /* Free any memory that may be occupied by MAC index list. */ - list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { - mac_idx = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, - list); - list_del(&mac_idx->list); - kfree(mac_idx); - } + unregister_netevent_notifier(&priv->tun.neigh_nb); - ida_destroy(&priv->nfp_mac_off_ids); + ida_destroy(&priv->tun.mac_off_ids); /* Free any memory that may be occupied by ipv4 list. */ - list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); list_del(&ip_entry->list); kfree(ip_entry); } /* Free any memory that may be occupied by the route list. */ - list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { route_entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); list_del(&route_entry->list); kfree(route_entry); } + + /* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */ + rhashtable_free_and_destroy(&priv->tun.offloaded_macs, + nfp_check_rhashtable_empty, NULL); } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 16d0479f6891..7a873002e626 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -396,7 +396,8 @@ static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *netdev, - const unsigned char *addr, u16 vid, u16 flags) + const unsigned char *addr, u16 vid, u16 flags, + struct netlink_ext_ack *extack) { struct qlcnic_adapter *adapter = netdev_priv(netdev); int err = 0; diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index 7e011c1c1e6e..cfb67b746595 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -454,14 +454,14 @@ static void hardware_init(struct net_device *dev) { struct net_local *lp = netdev_priv(dev); long ioaddr = dev->base_addr; - int i; + int i; /* Turn off the printer multiplexer on the 8012. */ for (i = 0; i < 8; i++) outb(mux_8012[i], ioaddr + PAR_DATA); write_reg_high(ioaddr, CMR1, CMR1h_RESET); - for (i = 0; i < 6; i++) + for (i = 0; i < 6; i++) write_reg_byte(ioaddr, PAR0 + i, dev->dev_addr[i]); write_reg_high(ioaddr, CMR2, lp->addr_mode); @@ -471,15 +471,15 @@ static void hardware_init(struct net_device *dev) (read_nibble(ioaddr, CMR2_h) >> 3) & 0x0f); } - write_reg(ioaddr, CMR2, CMR2_IRQOUT); - write_reg_high(ioaddr, CMR1, CMR1h_RxENABLE | CMR1h_TxENABLE); + write_reg(ioaddr, CMR2, CMR2_IRQOUT); + write_reg_high(ioaddr, CMR1, CMR1h_RxENABLE | CMR1h_TxENABLE); /* Enable the interrupt line from the serial port. */ outb(Ctrl_SelData + Ctrl_IRQEN, ioaddr + PAR_CONTROL); /* Unmask the interesting interrupts. */ - write_reg(ioaddr, IMR, ISR_RxOK | ISR_TxErr | ISR_TxOK); - write_reg_high(ioaddr, IMR, ISRh_RxErr); + write_reg(ioaddr, IMR, ISR_RxOK | ISR_TxErr | ISR_TxOK); + write_reg_high(ioaddr, IMR, ISRh_RxErr); lp->tx_unit_busy = 0; lp->pac_cnt_in_tx_buf = 0; @@ -610,10 +610,12 @@ static irqreturn_t atp_interrupt(int irq, void *dev_instance) write_reg(ioaddr, CMR2, CMR2_NULL); write_reg(ioaddr, IMR, 0); - if (net_debug > 5) printk(KERN_DEBUG "%s: In interrupt ", dev->name); - while (--boguscount > 0) { + if (net_debug > 5) + printk(KERN_DEBUG "%s: In interrupt ", dev->name); + while (--boguscount > 0) { int status = read_nibble(ioaddr, ISR); - if (net_debug > 5) printk("loop status %02x..", status); + if (net_debug > 5) + printk("loop status %02x..", status); if (status & (ISR_RxOK<<3)) { handled = 1; @@ -640,7 +642,8 @@ static irqreturn_t atp_interrupt(int irq, void *dev_instance) } while (--boguscount > 0); } else if (status & ((ISR_TxErr + ISR_TxOK)<<3)) { handled = 1; - if (net_debug > 6) printk("handling Tx done.."); + if (net_debug > 6) + printk("handling Tx done.."); /* Clear the Tx interrupt. We should check for too many failures and reinitialize the adapter. */ write_reg(ioaddr, ISR, ISR_TxErr + ISR_TxOK); @@ -680,7 +683,7 @@ static irqreturn_t atp_interrupt(int irq, void *dev_instance) break; } else break; - } + } /* This following code fixes a rare (and very difficult to track down) problem where the adapter forgets its ethernet address. */ @@ -694,7 +697,7 @@ static irqreturn_t atp_interrupt(int irq, void *dev_instance) } /* Tell the adapter that it can go back to using the output line as IRQ. */ - write_reg(ioaddr, CMR2, CMR2_IRQOUT); + write_reg(ioaddr, CMR2, CMR2_IRQOUT); /* Enable the physical interrupt line, which is sure to be low until.. */ outb(Ctrl_SelData + Ctrl_IRQEN, ioaddr + PAR_CONTROL); /* .. we enable the interrupt sources. */ diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index abb94c543aa2..e790a4116f1e 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -639,6 +639,7 @@ struct rtl8169_private { void __iomem *mmio_addr; /* memory map physical address */ struct pci_dev *pci_dev; struct net_device *dev; + struct phy_device *phydev; struct napi_struct napi; u32 msg_enable; u16 mac_version; @@ -679,7 +680,6 @@ struct rtl8169_private { } wk; unsigned supports_gmii:1; - struct mii_bus *mii_bus; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; struct rtl8169_tc_offsets tc_offset; @@ -745,6 +745,16 @@ static void rtl_unlock_work(struct rtl8169_private *tp) mutex_unlock(&tp->wk.mutex); } +static void rtl_lock_config_regs(struct rtl8169_private *tp) +{ + RTL_W8(tp, Cfg9346, Cfg9346_Lock); +} + +static void rtl_unlock_config_regs(struct rtl8169_private *tp) +{ + RTL_W8(tp, Cfg9346, Cfg9346_Unlock); +} + static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force) { pcie_capability_clear_and_set_word(tp->pci_dev, PCI_EXP_DEVCTL, @@ -1278,11 +1288,6 @@ static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr) RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0; } -static u16 rtl_get_events(struct rtl8169_private *tp) -{ - return RTL_R16(tp, IntrStatus); -} - static void rtl_ack_events(struct rtl8169_private *tp, u16 bits) { RTL_W16(tp, IntrStatus, bits); @@ -1313,7 +1318,7 @@ static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp) static void rtl_link_chg_patch(struct rtl8169_private *tp) { struct net_device *dev = tp->dev; - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = tp->phydev; if (!netif_running(dev)) return; @@ -1431,7 +1436,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) }; u8 options; - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: @@ -1479,7 +1484,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) break; } - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); device_set_wakeup_enable(tp_to_dev(tp), wolopts); } @@ -3994,24 +3999,24 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp) } /* We may have called phy_speed_down before */ - phy_speed_up(dev->phydev); + phy_speed_up(tp->phydev); - genphy_soft_reset(dev->phydev); + genphy_soft_reset(tp->phydev); /* It was reported that several chips end up with 10MBit/Half on a * 1GBit link after resuming from S3. For whatever reason the PHY on * these chips doesn't properly start a renegotiation when soft-reset. * Explicitly requesting a renegotiation fixes this. */ - if (dev->phydev->autoneg == AUTONEG_ENABLE) - phy_restart_aneg(dev->phydev); + if (tp->phydev->autoneg == AUTONEG_ENABLE) + phy_restart_aneg(tp->phydev); } static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr) { rtl_lock_work(tp); - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); RTL_W32(tp, MAC4, addr[4] | addr[5] << 8); RTL_R32(tp, MAC4); @@ -4022,7 +4027,7 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr) if (tp->mac_version == RTL_GIGA_MAC_VER_34) rtl_rar_exgmac_set(tp, addr); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); rtl_unlock_work(tp); } @@ -4049,10 +4054,12 @@ static int rtl_set_mac_address(struct net_device *dev, void *p) static int rtl8169_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { + struct rtl8169_private *tp = netdev_priv(dev); + if (!netif_running(dev)) return -ENODEV; - return phy_mii_ioctl(dev->phydev, ifr, cmd); + return phy_mii_ioctl(tp->phydev, ifr, cmd); } static void rtl_init_mdio_ops(struct rtl8169_private *tp) @@ -4101,15 +4108,10 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp) static bool rtl_wol_pll_power_down(struct rtl8169_private *tp) { - struct phy_device *phydev; - if (!__rtl8169_get_wol(tp)) return false; - /* phydev may not be attached to netdevice */ - phydev = mdiobus_get_phy(tp->mii_bus, 0); - - phy_speed_down(phydev, false); + phy_speed_down(tp->phydev, false); rtl_wol_suspend_quirk(tp); return true; @@ -4178,7 +4180,7 @@ static void r8168_pll_power_up(struct rtl8169_private *tp) break; } - phy_resume(tp->dev->phydev); + phy_resume(tp->phydev); /* give MAC/PHY some time to resume */ msleep(20); } @@ -4234,18 +4236,18 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp) static void rtl_hw_jumbo_enable(struct rtl8169_private *tp) { if (tp->jumbo_ops.enable) { - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); tp->jumbo_ops.enable(tp); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); } } static void rtl_hw_jumbo_disable(struct rtl8169_private *tp) { if (tp->jumbo_ops.disable) { - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); tp->jumbo_ops.disable(tp); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); } } @@ -4566,13 +4568,13 @@ static void rtl_set_rx_mode(struct net_device *dev) static void rtl_hw_start(struct rtl8169_private *tp) { - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); tp->hw_start(tp); rtl_set_rx_max_size(tp); rtl_set_rx_tx_desc_registers(tp); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ RTL_R8(tp, IntrMask); @@ -4696,18 +4698,10 @@ static void rtl_enable_clock_request(struct rtl8169_private *tp) PCI_EXP_LNKCTL_CLKREQ_EN); } -static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable) +static void rtl_pcie_state_l2l3_disable(struct rtl8169_private *tp) { - u8 data; - - data = RTL_R8(tp, Config3); - - if (enable) - data |= Rdy_to_L23; - else - data &= ~Rdy_to_L23; - - RTL_W8(tp, Config3, data); + /* work around an issue when PCI reset occurs during L2/L3 state */ + RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Rdy_to_L23); } static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) @@ -5028,7 +5022,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp) }; rtl_hw_start_8168f(tp); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); rtl_ephy_init(tp, e_info_8168f_1, ARRAY_SIZE(e_info_8168f_1)); @@ -5062,7 +5056,7 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp) rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC); rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); } static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) @@ -5168,7 +5162,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); rtl_writephy(tp, 0x1f, 0x0c42); rg_saw_cnt = (rtl_readphy(tp, 0x13) & 0x3fff); @@ -5245,7 +5239,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp) RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); } static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp) @@ -5516,7 +5510,7 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1)); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); } static void rtl_hw_start_8105e_2(struct rtl8169_private *tp) @@ -5551,7 +5545,7 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp) rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0e00, 0xff00, ERIAR_EXGMAC); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); } static void rtl_hw_start_8106(struct rtl8169_private *tp) @@ -5565,7 +5559,7 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp) RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET); RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); rtl_hw_aspm_clkreq_enable(tp, true); } @@ -6201,8 +6195,6 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev) dev->features &= ~NETIF_F_HIGHDMA; } - rtl8169_hw_reset(tp); - rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); } @@ -6409,7 +6401,7 @@ release_descriptor: static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) { struct rtl8169_private *tp = dev_instance; - u16 status = rtl_get_events(tp); + u16 status = RTL_R16(tp, IntrStatus); u16 irq_mask = RTL_R16(tp, IntrMask); if (status == 0xffff || !(status & irq_mask)) @@ -6420,8 +6412,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) goto out; } - if (status & LinkChg && tp->dev->phydev) - phy_mac_interrupt(tp->dev->phydev); + if (status & LinkChg) + phy_mac_interrupt(tp->phydev); if (unlikely(status & RxFIFOOver && tp->mac_version == RTL_GIGA_MAC_VER_11)) { @@ -6512,12 +6504,12 @@ static void r8169_phylink_handler(struct net_device *ndev) } if (net_ratelimit()) - phy_print_status(ndev->phydev); + phy_print_status(tp->phydev); } static int r8169_phy_connect(struct rtl8169_private *tp) { - struct phy_device *phydev = mdiobus_get_phy(tp->mii_bus, 0); + struct phy_device *phydev = tp->phydev; phy_interface_t phy_mode; int ret; @@ -6544,7 +6536,7 @@ static void rtl8169_down(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); - phy_stop(dev->phydev); + phy_stop(tp->phydev); napi_disable(&tp->napi); netif_stop_queue(dev); @@ -6586,7 +6578,7 @@ static int rtl8169_close(struct net_device *dev) cancel_work_sync(&tp->wk.work); - phy_disconnect(dev->phydev); + phy_disconnect(tp->phydev); pci_free_irq(pdev, 0, tp); @@ -6637,10 +6629,6 @@ static int rtl_open(struct net_device *dev) if (retval < 0) goto err_free_rx_1; - INIT_WORK(&tp->wk.work, rtl_task); - - smp_mb(); - rtl_request_firmware(tp); retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, tp, @@ -6667,7 +6655,7 @@ static int rtl_open(struct net_device *dev) if (!rtl8169_init_counter_offsets(tp)) netif_warn(tp, hw, dev, "counter reset/update failed\n"); - phy_start(dev->phydev); + phy_start(tp->phydev); netif_start_queue(dev); rtl_unlock_work(tp); @@ -6756,7 +6744,7 @@ static void rtl8169_net_suspend(struct net_device *dev) if (!netif_running(dev)) return; - phy_stop(dev->phydev); + phy_stop(tp->phydev); netif_device_detach(dev); rtl_lock_work(tp); @@ -6791,14 +6779,13 @@ static void __rtl8169_resume(struct net_device *dev) rtl_pll_power_up(tp); rtl8169_init_phy(dev, tp); - phy_start(tp->dev->phydev); + phy_start(tp->phydev); rtl_lock_work(tp); napi_enable(&tp->napi); set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); + rtl_reset_work(tp); rtl_unlock_work(tp); - - rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); } static int rtl8169_resume(struct device *device) @@ -6935,7 +6922,7 @@ static void rtl_remove_one(struct pci_dev *pdev) netif_napi_del(&tp->napi); unregister_netdev(dev); - mdiobus_unregister(tp->mii_bus); + mdiobus_unregister(tp->phydev->mdio.bus); rtl_release_firmware(tp); @@ -6995,9 +6982,9 @@ static int rtl_alloc_irq(struct rtl8169_private *tp) unsigned int flags; if (tp->mac_version <= RTL_GIGA_MAC_VER_06) { - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); flags = PCI_IRQ_LEGACY; } else { flags = PCI_IRQ_ALL_TYPES; @@ -7042,7 +7029,6 @@ static int r8169_mdio_write_reg(struct mii_bus *mii_bus, int phyaddr, static int r8169_mdio_register(struct rtl8169_private *tp) { struct pci_dev *pdev = tp->pci_dev; - struct phy_device *phydev; struct mii_bus *new_bus; int ret; @@ -7064,16 +7050,14 @@ static int r8169_mdio_register(struct rtl8169_private *tp) if (ret) return ret; - phydev = mdiobus_get_phy(new_bus, 0); - if (!phydev) { + tp->phydev = mdiobus_get_phy(new_bus, 0); + if (!tp->phydev) { mdiobus_unregister(new_bus); return -ENODEV; } /* PHY will be woken up in rtl_open() */ - phy_suspend(phydev); - - tp->mii_bus = new_bus; + phy_suspend(tp->phydev); return 0; } @@ -7171,6 +7155,32 @@ static void rtl_disable_clk(void *data) clk_disable_unprepare(data); } +static int rtl_get_ether_clk(struct rtl8169_private *tp) +{ + struct device *d = tp_to_dev(tp); + struct clk *clk; + int rc; + + clk = devm_clk_get(d, "ether_clk"); + if (IS_ERR(clk)) { + rc = PTR_ERR(clk); + if (rc == -ENOENT) + /* clk-core allows NULL (for suspend / resume) */ + rc = 0; + else if (rc != -EPROBE_DEFER) + dev_err(d, "failed to get clk: %d\n", rc); + } else { + tp->clk = clk; + rc = clk_prepare_enable(clk); + if (rc) + dev_err(d, "failed to enable clk: %d\n", rc); + else + rc = devm_add_action_or_reset(d, rtl_disable_clk, clk); + } + + return rc; +} + static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data; @@ -7192,30 +7202,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->supports_gmii = cfg->has_gmii; /* Get the *optional* external "ether_clk" used on some boards */ - tp->clk = devm_clk_get(&pdev->dev, "ether_clk"); - if (IS_ERR(tp->clk)) { - rc = PTR_ERR(tp->clk); - if (rc == -ENOENT) { - /* clk-core allows NULL (for suspend / resume) */ - tp->clk = NULL; - } else if (rc == -EPROBE_DEFER) { - return rc; - } else { - dev_err(&pdev->dev, "failed to get clk: %d\n", rc); - return rc; - } - } else { - rc = clk_prepare_enable(tp->clk); - if (rc) { - dev_err(&pdev->dev, "failed to enable clk: %d\n", rc); - return rc; - } - - rc = devm_add_action_or_reset(&pdev->dev, rtl_disable_clk, - tp->clk); - if (rc) - return rc; - } + rc = rtl_get_ether_clk(tp); + if (rc) + return rc; /* enable device (incl. PCI PM wakeup and hotplug setup) */ rc = pcim_enable_device(pdev); @@ -7300,6 +7289,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->saved_wolopts = __rtl8169_get_wol(tp); mutex_init(&tp->wk.mutex); + INIT_WORK(&tp->wk.work, rtl_task); u64_stats_init(&tp->rx_stats.syncp); u64_stats_init(&tp->tx_stats.syncp); @@ -7406,7 +7396,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_mdio_unregister: - mdiobus_unregister(tp->mii_bus); + mdiobus_unregister(tp->phydev->mdio.bus); return rc; } diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 6213827e3956..62a205eba9f7 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2725,7 +2725,7 @@ rocker_fdb_offload_notify(struct rocker_port *rocker_port, info.vid = recv_info->vid; info.offloaded = true; call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, - rocker_port->dev, &info.info); + rocker_port->dev, &info.info, NULL); } static void rocker_switchdev_event_work(struct work_struct *work) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 6473cc68c2d5..bea7895930f6 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -1833,10 +1833,10 @@ static void ofdpa_port_fdb_learn_work(struct work_struct *work) rtnl_lock(); if (learned && removing) call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, - lw->ofdpa_port->dev, &info.info); + lw->ofdpa_port->dev, &info.info, NULL); else if (learned && !removing) call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE, - lw->ofdpa_port->dev, &info.info); + lw->ofdpa_port->dev, &info.info, NULL); rtnl_unlock(); kfree(work); diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index b6a50058bb8d..ee42d4a887d7 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -6041,10 +6041,11 @@ static const struct efx_ef10_nvram_type_info efx_ef10_nvram_types[] = { { NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3, 0, 3, "sfc_exp_rom_cfg" }, { NVRAM_PARTITION_TYPE_LICENSE, 0, 0, "sfc_license" }, { NVRAM_PARTITION_TYPE_PHY_MIN, 0xff, 0, "sfc_phy_fw" }, - /* MUM and SUC firmware share the same partition type */ { NVRAM_PARTITION_TYPE_MUM_FIRMWARE, 0, 0, "sfc_mumfw" }, { NVRAM_PARTITION_TYPE_EXPANSION_UEFI, 0, 0, "sfc_uefi" }, - { NVRAM_PARTITION_TYPE_STATUS, 0, 0, "sfc_status" } + { NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS, 0, 0, "sfc_dynamic_cfg_dflt" }, + { NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS, 0, 0, "sfc_exp_rom_cfg_dflt" }, + { NVRAM_PARTITION_TYPE_STATUS, 0, 0, "sfc_status" }, }; static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, @@ -6071,8 +6072,15 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &protected); if (rc) return rc; + if (protected && + (type != NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS && + type != NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS)) + /* Hide protected partitions that don't provide defaults. */ + return -ENODEV; + if (protected) - return -ENODEV; /* hide it */ + /* Protected partitions are read only. */ + erase_size = 0; part->nvram_type = type; diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index dfad93fca0a6..295ec1787b9f 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -2074,22 +2074,26 @@ fail: static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type) { - MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_START_IN_LEN); + MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN); int rc; MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_START_IN_TYPE, type); + MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_START_V2_IN_FLAGS, + NVRAM_UPDATE_START_V2_IN_FLAG_REPORT_VERIFY_RESULT, + 1); BUILD_BUG_ON(MC_CMD_NVRAM_UPDATE_START_OUT_LEN != 0); rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_START, inbuf, sizeof(inbuf), NULL, 0, NULL); + return rc; } static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type, loff_t offset, u8 *buffer, size_t length) { - MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_READ_IN_LEN); + MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_READ_IN_V2_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_READ_OUT_LEN(EFX_MCDI_NVRAM_LEN_MAX)); size_t outlen; @@ -2098,6 +2102,8 @@ static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type, MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_TYPE, type); MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_OFFSET, offset); MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_LENGTH, length); + MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_V2_MODE, + MC_CMD_NVRAM_READ_IN_V2_DEFAULT); rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_READ, inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); @@ -2147,15 +2153,51 @@ static int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type, static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type) { - MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_FINISH_IN_LEN); - int rc; + MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN); + size_t outlen; + int rc, rc2; MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_FINISH_IN_TYPE, type); - - BUILD_BUG_ON(MC_CMD_NVRAM_UPDATE_FINISH_OUT_LEN != 0); + /* Always set this flag. Old firmware ignores it */ + MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_FINISH_V2_IN_FLAGS, + NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT, + 1); rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_FINISH, inbuf, sizeof(inbuf), - NULL, 0, NULL); + outbuf, sizeof(outbuf), &outlen); + if (!rc && outlen >= MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN) { + rc2 = MCDI_DWORD(outbuf, NVRAM_UPDATE_FINISH_V2_OUT_RESULT_CODE); + if (rc2 != MC_CMD_NVRAM_VERIFY_RC_SUCCESS) + netif_err(efx, drv, efx->net_dev, + "NVRAM update failed verification with code 0x%x\n", + rc2); + switch (rc2) { + case MC_CMD_NVRAM_VERIFY_RC_SUCCESS: + break; + case MC_CMD_NVRAM_VERIFY_RC_CMS_CHECK_FAILED: + case MC_CMD_NVRAM_VERIFY_RC_MESSAGE_DIGEST_CHECK_FAILED: + case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHECK_FAILED: + case MC_CMD_NVRAM_VERIFY_RC_TRUSTED_APPROVERS_CHECK_FAILED: + case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHAIN_CHECK_FAILED: + rc = -EIO; + break; + case MC_CMD_NVRAM_VERIFY_RC_INVALID_CMS_FORMAT: + case MC_CMD_NVRAM_VERIFY_RC_BAD_MESSAGE_DIGEST: + rc = -EINVAL; + break; + case MC_CMD_NVRAM_VERIFY_RC_NO_VALID_SIGNATURES: + case MC_CMD_NVRAM_VERIFY_RC_NO_TRUSTED_APPROVERS: + case MC_CMD_NVRAM_VERIFY_RC_NO_SIGNATURE_MATCH: + rc = -EPERM; + break; + default: + netif_err(efx, drv, efx->net_dev, + "Unknown response to NVRAM_UPDATE_FINISH\n"); + rc = -EIO; + } + } + return rc; } diff --git a/drivers/net/ethernet/sfc/mtd.c b/drivers/net/ethernet/sfc/mtd.c index 4ac30b6e5dab..0d03e0577d85 100644 --- a/drivers/net/ethernet/sfc/mtd.c +++ b/drivers/net/ethernet/sfc/mtd.c @@ -66,6 +66,9 @@ int efx_mtd_add(struct efx_nic *efx, struct efx_mtd_partition *parts, part->mtd.writesize = 1; + if (!(part->mtd.flags & MTD_NO_ERASE)) + part->mtd.flags |= MTD_WRITEABLE; + part->mtd.owner = THIS_MODULE; part->mtd.priv = efx; part->mtd.name = part->name; diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index b9221fc1674d..3e7631160384 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -2760,7 +2760,7 @@ static void get_gem_mac_nonobp(struct pci_dev *pdev, unsigned char *dev_addr) void __iomem *p = pci_map_rom(pdev, &size); if (p) { - int found; + int found; found = readb(p) == 0x55 && readb(p + 1) == 0xaa && diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c index 396e1cd10667..fec275e2208d 100644 --- a/drivers/net/ethernet/ti/cpsw-phy-sel.c +++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c @@ -78,7 +78,7 @@ static void cpsw_gmii_sel_am3352(struct cpsw_phy_sel_priv *priv, case PHY_INTERFACE_MODE_MII: mode = AM33XX_GMII_SEL_MODE_MII; break; - }; + } mask = GMII_SEL_MODE_MASK << (slave * 2) | BIT(slave + 6); mask |= BIT(slave + 4); @@ -133,7 +133,7 @@ static void cpsw_gmii_sel_dra7xx(struct cpsw_phy_sel_priv *priv, case PHY_INTERFACE_MODE_MII: mode = AM33XX_GMII_SEL_MODE_MII; break; - }; + } switch (slave) { case 0: diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 6d067176320f..cf22a79af66b 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -963,7 +963,8 @@ static int macvlan_vlan_rx_kill_vid(struct net_device *dev, static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { struct macvlan_dev *vlan = netdev_priv(dev); int err = -EINVAL; diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 66b9cfe692fc..2e53ba3fa2e7 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -39,8 +39,6 @@ #include <linux/io.h> #include <linux/uaccess.h> -#include <asm/irq.h> - #define CREATE_TRACE_POINTS #include <trace/events/mdio.h> diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index b1f959935f50..0d62b548ab39 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -426,9 +426,6 @@ static int ksz9021_config_init(struct phy_device *phydev) return 0; } -#define MII_KSZ9031RN_MMD_CTRL_REG 0x0d -#define MII_KSZ9031RN_MMD_REGDATA_REG 0x0e -#define OP_DATA 1 #define KSZ9031_PS_TO_REG 60 /* Extended registers */ @@ -446,24 +443,6 @@ static int ksz9021_config_init(struct phy_device *phydev) #define MII_KSZ9031RN_EDPD 0x23 #define MII_KSZ9031RN_EDPD_ENABLE BIT(0) -static int ksz9031_extended_write(struct phy_device *phydev, - u8 mode, u32 dev_addr, u32 regnum, u16 val) -{ - phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, dev_addr); - phy_write(phydev, MII_KSZ9031RN_MMD_REGDATA_REG, regnum); - phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, (mode << 14) | dev_addr); - return phy_write(phydev, MII_KSZ9031RN_MMD_REGDATA_REG, val); -} - -static int ksz9031_extended_read(struct phy_device *phydev, - u8 mode, u32 dev_addr, u32 regnum) -{ - phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, dev_addr); - phy_write(phydev, MII_KSZ9031RN_MMD_REGDATA_REG, regnum); - phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, (mode << 14) | dev_addr); - return phy_read(phydev, MII_KSZ9031RN_MMD_REGDATA_REG); -} - static int ksz9031_of_load_skew_values(struct phy_device *phydev, const struct device_node *of_node, u16 reg, size_t field_sz, @@ -484,7 +463,7 @@ static int ksz9031_of_load_skew_values(struct phy_device *phydev, return 0; if (matches < numfields) - newval = ksz9031_extended_read(phydev, OP_DATA, 2, reg); + newval = phy_read_mmd(phydev, 2, reg); else newval = 0; @@ -498,7 +477,7 @@ static int ksz9031_of_load_skew_values(struct phy_device *phydev, << (field_sz * i)); } - return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval); + return phy_write_mmd(phydev, 2, reg, newval); } /* Center KSZ9031RNX FLP timing at 16ms. */ @@ -506,13 +485,13 @@ static int ksz9031_center_flp_timing(struct phy_device *phydev) { int result; - result = ksz9031_extended_write(phydev, OP_DATA, 0, - MII_KSZ9031RN_FLP_BURST_TX_HI, 0x0006); + result = phy_write_mmd(phydev, 0, MII_KSZ9031RN_FLP_BURST_TX_HI, + 0x0006); if (result) return result; - result = ksz9031_extended_write(phydev, OP_DATA, 0, - MII_KSZ9031RN_FLP_BURST_TX_LO, 0x1A80); + result = phy_write_mmd(phydev, 0, MII_KSZ9031RN_FLP_BURST_TX_LO, + 0x1A80); if (result) return result; @@ -524,11 +503,11 @@ static int ksz9031_enable_edpd(struct phy_device *phydev) { int reg; - reg = ksz9031_extended_read(phydev, OP_DATA, 0x1C, MII_KSZ9031RN_EDPD); + reg = phy_read_mmd(phydev, 0x1C, MII_KSZ9031RN_EDPD); if (reg < 0) return reg; - return ksz9031_extended_write(phydev, OP_DATA, 0x1C, MII_KSZ9031RN_EDPD, - reg | MII_KSZ9031RN_EDPD_ENABLE); + return phy_write_mmd(phydev, 0x1C, MII_KSZ9031RN_EDPD, + reg | MII_KSZ9031RN_EDPD_ENABLE); } static int ksz9031_config_init(struct phy_device *phydev) @@ -654,7 +633,7 @@ static int ksz9131_of_load_skew_values(struct phy_device *phydev, return 0; if (matches < numfields) - newval = ksz9031_extended_read(phydev, OP_DATA, 2, reg); + newval = phy_read_mmd(phydev, 2, reg); else newval = 0; @@ -668,7 +647,7 @@ static int ksz9131_of_load_skew_values(struct phy_device *phydev, << (field_sz * i)); } - return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval); + return phy_write_mmd(phydev, 2, reg, newval); } static int ksz9131_config_init(struct phy_device *phydev) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 189cd2048c3a..745a705a505a 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -13,8 +13,6 @@ * */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> @@ -36,8 +34,6 @@ #include <linux/uaccess.h> #include <linux/atomic.h> -#include <asm/irq.h> - #define PHY_STATE_STR(_state) \ case PHY_##_state: \ return __stringify(_state); \ @@ -51,7 +47,6 @@ static const char *phy_state_to_str(enum phy_state st) PHY_STATE_STR(RUNNING) PHY_STATE_STR(NOLINK) PHY_STATE_STR(FORCING) - PHY_STATE_STR(CHANGELINK) PHY_STATE_STR(HALTED) PHY_STATE_STR(RESUMING) } @@ -809,8 +804,7 @@ int phy_start_interrupts(struct phy_device *phydev) if (request_threaded_irq(phydev->irq, NULL, phy_interrupt, IRQF_ONESHOT | IRQF_SHARED, phydev_name(phydev), phydev) < 0) { - pr_warn("%s: Can't get IRQ %d (PHY)\n", - phydev->mdio.bus->name, phydev->irq); + phydev_warn(phydev, "Can't get IRQ %d\n", phydev->irq); phydev->irq = PHY_POLL; return 0; } @@ -820,23 +814,6 @@ int phy_start_interrupts(struct phy_device *phydev) EXPORT_SYMBOL(phy_start_interrupts); /** - * phy_stop_interrupts - disable interrupts from a PHY device - * @phydev: target phy_device struct - */ -int phy_stop_interrupts(struct phy_device *phydev) -{ - int err = phy_disable_interrupts(phydev); - - if (err) - phy_error(phydev); - - free_irq(phydev->irq, phydev); - - return err; -} -EXPORT_SYMBOL(phy_stop_interrupts); - -/** * phy_stop - Bring down the PHY link, and stop checking the status * @phydev: target phy_device struct */ @@ -859,6 +836,7 @@ void phy_stop(struct phy_device *phydev) mutex_unlock(&phydev->lock); phy_state_machine(&phydev->state_queue.work); + phy_stop_machine(phydev); /* Cannot call flush_scheduled_work() here as desired because * of rtnl_lock(), but PHY_HALTED shall guarantee irq handler @@ -939,7 +917,6 @@ void phy_state_machine(struct work_struct *work) break; case PHY_NOLINK: case PHY_RUNNING: - case PHY_CHANGELINK: case PHY_RESUMING: err = phy_check_link_status(phydev); break; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 46c86725a693..b61db0a5ba3a 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -37,8 +37,6 @@ #include <linux/uaccess.h> #include <linux/of.h> -#include <asm/irq.h> - MODULE_DESCRIPTION("PHY library"); MODULE_AUTHOR("Andy Fleming"); MODULE_LICENSE("GPL"); @@ -560,12 +558,31 @@ static const struct device_type mdio_bus_phy_type = { .pm = MDIO_BUS_PHY_PM_OPS, }; +static int phy_request_driver_module(struct phy_device *dev, int phy_id) +{ + int ret; + + ret = request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, + MDIO_ID_ARGS(phy_id)); + /* we only check for failures in executing the usermode binary, + * not whether a PHY driver module exists for the PHY ID + */ + if (IS_ENABLED(CONFIG_MODULES) && ret < 0) { + phydev_err(dev, "error %d loading PHY driver module for ID 0x%08x\n", + ret, phy_id); + return ret; + } + + return 0; +} + struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids) { struct phy_device *dev; struct mdio_device *mdiodev; + int ret = 0; /* We allocate the device, and initialize the default values */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); @@ -622,15 +639,21 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, if (!(c45_ids->devices_in_package & (1 << i))) continue; - request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, - MDIO_ID_ARGS(c45_ids->device_ids[i])); + ret = phy_request_driver_module(dev, + c45_ids->device_ids[i]); + if (ret) + break; } } else { - request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, - MDIO_ID_ARGS(phy_id)); + ret = phy_request_driver_module(dev, phy_id); } - device_initialize(&mdiodev->dev); + if (!ret) { + device_initialize(&mdiodev->dev); + } else { + kfree(dev); + dev = ERR_PTR(ret); + } return dev; } @@ -762,15 +785,8 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id, /* Grab the bits from PHYIR1, and put them in the upper half */ phy_reg = mdiobus_read(bus, addr, MII_PHYSID1); if (phy_reg < 0) { - /* if there is no device, return without an error so scanning - * the bus works properly - */ - if (phy_reg == -EIO || phy_reg == -ENODEV) { - *phy_id = 0xffffffff; - return 0; - } - - return -EIO; + /* returning -ENODEV doesn't stop bus scanning */ + return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO; } *phy_id = (phy_reg & 0xffff) << 16; @@ -831,13 +847,13 @@ int phy_device_register(struct phy_device *phydev) /* Run all of the fixups for this PHY */ err = phy_scan_fixups(phydev); if (err) { - pr_err("PHY %d failed to initialize\n", phydev->mdio.addr); + phydev_err(phydev, "failed to initialize\n"); goto out; } err = device_add(&phydev->mdio.dev); if (err) { - pr_err("PHY %d failed to add\n", phydev->mdio.addr); + phydev_err(phydev, "failed to add\n"); goto out; } @@ -995,10 +1011,11 @@ EXPORT_SYMBOL(phy_connect); */ void phy_disconnect(struct phy_device *phydev) { - if (phydev->irq > 0) - phy_stop_interrupts(phydev); + if (phy_is_started(phydev)) + phy_stop(phydev); - phy_stop_machine(phydev); + if (phy_interrupt_is_valid(phydev)) + free_irq(phydev->irq, phydev); phydev->adjust_link = NULL; @@ -1291,6 +1308,36 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, } EXPORT_SYMBOL(phy_attach); +static bool phy_driver_is_genphy_kind(struct phy_device *phydev, + struct device_driver *driver) +{ + struct device *d = &phydev->mdio.dev; + bool ret = false; + + if (!phydev->drv) + return ret; + + get_device(d); + ret = d->driver == driver; + put_device(d); + + return ret; +} + +bool phy_driver_is_genphy(struct phy_device *phydev) +{ + return phy_driver_is_genphy_kind(phydev, + &genphy_driver.mdiodrv.driver); +} +EXPORT_SYMBOL_GPL(phy_driver_is_genphy); + +bool phy_driver_is_genphy_10g(struct phy_device *phydev) +{ + return phy_driver_is_genphy_kind(phydev, + &genphy_10g_driver.mdiodrv.driver); +} +EXPORT_SYMBOL_GPL(phy_driver_is_genphy_10g); + /** * phy_detach - detach a PHY device from its network device * @phydev: target phy_device struct @@ -1322,8 +1369,8 @@ void phy_detach(struct phy_device *phydev) * from the generic driver so that there's a chance a * real driver could be loaded */ - if (phydev->mdio.dev.driver == &genphy_10g_driver.mdiodrv.driver || - phydev->mdio.dev.driver == &genphy_driver.mdiodrv.driver) + if (phy_driver_is_genphy(phydev) || + phy_driver_is_genphy_10g(phydev)) device_release_driver(&phydev->mdio.dev); /* @@ -2267,14 +2314,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) new_driver->mdiodrv.driver.remove = phy_remove; new_driver->mdiodrv.driver.owner = owner; - /* The following works around an issue where the PHY driver doesn't bind - * to the device, resulting in the genphy driver being used instead of - * the dedicated driver. The root cause of the issue isn't known yet - * and seems to be in the base driver core. Once this is fixed we may - * remove this workaround. - */ - new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS; - retval = driver_register(&new_driver->mdiodrv.driver); if (retval) { pr_err("%s: Error %d in registering driver\n", diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c index 7820fced33f6..941cfa8f1c2a 100644 --- a/drivers/net/sb1000.c +++ b/drivers/net/sb1000.c @@ -535,17 +535,20 @@ sb1000_activate(const int ioaddr[], const char* name) int status; ssleep(1); - if ((status = card_send_command(ioaddr, name, Command0, st))) + status = card_send_command(ioaddr, name, Command0, st); + if (status) return status; - if ((status = card_send_command(ioaddr, name, Command1, st))) + status = card_send_command(ioaddr, name, Command1, st); + if (status) return status; if (st[3] != 0xf1) { - if ((status = sb1000_start_get_set_command(ioaddr, name))) + status = sb1000_start_get_set_command(ioaddr, name); + if (status) return status; return -EIO; } udelay(1000); - return sb1000_start_get_set_command(ioaddr, name); + return sb1000_start_get_set_command(ioaddr, name); } /* get SB1000 firmware version */ diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 80373a9171dd..3f145e4c6c08 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -388,7 +388,6 @@ static void read_bulk_callback(struct urb *urb) unsigned pkt_len, res; struct sk_buff *skb; struct net_device *netdev; - u16 rx_stat; int status = urb->status; int result; unsigned long flags; @@ -424,7 +423,6 @@ static void read_bulk_callback(struct urb *urb) goto goon; res = urb->actual_length; - rx_stat = le16_to_cpu(*(__le16 *)(urb->transfer_buffer + res - 4)); pkt_len = res - 4; skb_put(dev->rx_skb, pkt_len); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8fadd8eaf601..2a0edd4653e3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1035,6 +1035,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, goto frame_err; } + skb_record_rx_queue(skb, vq2rxq(rq->vq)); skb->protocol = eth_type_trans(skb, dev); pr_debug("Receiving skb proto 0x%04x len %i type %i\n", ntohs(skb->protocol), skb->len, skb->pkt_type); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 5209ee9aac47..ef45c3c925be 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -361,10 +361,11 @@ errout: static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan, const struct vxlan_fdb *fdb, const struct vxlan_rdst *rd, + struct netlink_ext_ack *extack, struct switchdev_notifier_vxlan_fdb_info *fdb_info) { fdb_info->info.dev = vxlan->dev; - fdb_info->info.extack = NULL; + fdb_info->info.extack = extack; fdb_info->remote_ip = rd->remote_ip; fdb_info->remote_port = rd->remote_port; fdb_info->remote_vni = rd->remote_vni; @@ -375,41 +376,50 @@ static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan, fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER; } -static void vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan, - struct vxlan_fdb *fdb, - struct vxlan_rdst *rd, - bool adding) +static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan, + struct vxlan_fdb *fdb, + struct vxlan_rdst *rd, + bool adding, + struct netlink_ext_ack *extack) { struct switchdev_notifier_vxlan_fdb_info info; enum switchdev_notifier_type notifier_type; + int ret; if (WARN_ON(!rd)) - return; + return 0; notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE; - vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, &info); - call_switchdev_notifiers(notifier_type, vxlan->dev, - &info.info); + vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, NULL, &info); + ret = call_switchdev_notifiers(notifier_type, vxlan->dev, + &info.info, extack); + return notifier_to_errno(ret); } -static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, - struct vxlan_rdst *rd, int type, bool swdev_notify) +static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, + struct vxlan_rdst *rd, int type, bool swdev_notify, + struct netlink_ext_ack *extack) { + int err; + if (swdev_notify) { switch (type) { case RTM_NEWNEIGH: - vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, - true); + err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, + true, extack); + if (err) + return err; break; case RTM_DELNEIGH: vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, - false); + false, extack); break; } } __vxlan_fdb_notify(vxlan, fdb, rd, type); + return 0; } static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa) @@ -423,7 +433,7 @@ static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa) .remote_vni = cpu_to_be32(VXLAN_N_VID), }; - vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true); + vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL); } static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN]) @@ -435,7 +445,7 @@ static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN]) memcpy(f.eth_addr, eth_addr, ETH_ALEN); - vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true); + vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL); } /* Hash Ethernet address */ @@ -545,7 +555,7 @@ int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, } rdst = first_remote_rcu(f); - vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, fdb_info); + vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, NULL, fdb_info); out: rcu_read_unlock(); @@ -556,19 +566,21 @@ EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc); static int vxlan_fdb_notify_one(struct notifier_block *nb, const struct vxlan_dev *vxlan, const struct vxlan_fdb *f, - const struct vxlan_rdst *rdst) + const struct vxlan_rdst *rdst, + struct netlink_ext_ack *extack) { struct switchdev_notifier_vxlan_fdb_info fdb_info; int rc; - vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, &fdb_info); + vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, extack, &fdb_info); rc = nb->notifier_call(nb, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE, &fdb_info); return notifier_to_errno(rc); } int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, - struct notifier_block *nb) + struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan; struct vxlan_rdst *rdst; @@ -586,7 +598,8 @@ int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, if (f->vni == vni) { list_for_each_entry(rdst, &f->remotes, list) { rc = vxlan_fdb_notify_one(nb, vxlan, - f, rdst); + f, rdst, + extack); if (rc) goto out; } @@ -625,7 +638,7 @@ EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload); /* Replace destination of unicast mac */ static int vxlan_fdb_replace(struct vxlan_fdb *f, union vxlan_addr *ip, __be16 port, __be32 vni, - __u32 ifindex) + __u32 ifindex, struct vxlan_rdst *oldrd) { struct vxlan_rdst *rd; @@ -637,6 +650,7 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f, if (!rd) return 0; + *oldrd = *rd; dst_cache_reset(&rd->dst_cache); rd->remote_ip = *ip; rd->remote_port = port; @@ -826,120 +840,177 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, return 0; } -/* Add new entry to forwarding table -- assumes lock held */ -static int vxlan_fdb_update(struct vxlan_dev *vxlan, - const u8 *mac, union vxlan_addr *ip, - __u16 state, __u16 flags, - __be16 port, __be32 src_vni, __be32 vni, - __u32 ifindex, __u16 ndm_flags, - bool swdev_notify) +static void vxlan_fdb_free(struct rcu_head *head) +{ + struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); + struct vxlan_rdst *rd, *nd; + + list_for_each_entry_safe(rd, nd, &f->remotes, list) { + dst_cache_destroy(&rd->dst_cache); + kfree(rd); + } + kfree(f); +} + +static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, + bool do_notify, bool swdev_notify) +{ + struct vxlan_rdst *rd; + + netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr); + + --vxlan->addrcnt; + if (do_notify) + list_for_each_entry(rd, &f->remotes, list) + vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, + swdev_notify, NULL); + + hlist_del_rcu(&f->hlist); + call_rcu(&f->rcu, vxlan_fdb_free); +} + +static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, + union vxlan_addr *ip, + __u16 state, __u16 flags, + __be16 port, __be32 vni, + __u32 ifindex, __u16 ndm_flags, + struct vxlan_fdb *f, + bool swdev_notify, + struct netlink_ext_ack *extack) { __u16 fdb_flags = (ndm_flags & ~NTF_USE); struct vxlan_rdst *rd = NULL; - struct vxlan_fdb *f; + struct vxlan_rdst oldrd; int notify = 0; - int rc; - - f = __vxlan_find_mac(vxlan, mac, src_vni); - if (f) { - if (flags & NLM_F_EXCL) { - netdev_dbg(vxlan->dev, - "lost race to create %pM\n", mac); - return -EEXIST; - } + int rc = 0; + int err; - /* Do not allow an externally learned entry to take over an - * entry added by the user. - */ - if (!(fdb_flags & NTF_EXT_LEARNED) || - !(f->flags & NTF_VXLAN_ADDED_BY_USER)) { - if (f->state != state) { - f->state = state; - f->updated = jiffies; - notify = 1; - } - if (f->flags != fdb_flags) { - f->flags = fdb_flags; - f->updated = jiffies; - notify = 1; - } + /* Do not allow an externally learned entry to take over an entry added + * by the user. + */ + if (!(fdb_flags & NTF_EXT_LEARNED) || + !(f->flags & NTF_VXLAN_ADDED_BY_USER)) { + if (f->state != state) { + f->state = state; + f->updated = jiffies; + notify = 1; } - - if ((flags & NLM_F_REPLACE)) { - /* Only change unicasts */ - if (!(is_multicast_ether_addr(f->eth_addr) || - is_zero_ether_addr(f->eth_addr))) { - notify |= vxlan_fdb_replace(f, ip, port, vni, - ifindex); - } else - return -EOPNOTSUPP; + if (f->flags != fdb_flags) { + f->flags = fdb_flags; + f->updated = jiffies; + notify = 1; } - if ((flags & NLM_F_APPEND) && - (is_multicast_ether_addr(f->eth_addr) || - is_zero_ether_addr(f->eth_addr))) { - rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); + } - if (rc < 0) - return rc; + if ((flags & NLM_F_REPLACE)) { + /* Only change unicasts */ + if (!(is_multicast_ether_addr(f->eth_addr) || + is_zero_ether_addr(f->eth_addr))) { + rc = vxlan_fdb_replace(f, ip, port, vni, + ifindex, &oldrd); notify |= rc; - } - - if (ndm_flags & NTF_USE) - f->used = jiffies; - } else { - if (!(flags & NLM_F_CREATE)) - return -ENOENT; - - /* Disallow replace to add a multicast entry */ - if ((flags & NLM_F_REPLACE) && - (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac))) + } else { return -EOPNOTSUPP; + } + } + if ((flags & NLM_F_APPEND) && + (is_multicast_ether_addr(f->eth_addr) || + is_zero_ether_addr(f->eth_addr))) { + rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); - netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); - rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni, - vni, ifindex, fdb_flags, &f); if (rc < 0) return rc; - notify = 1; + notify |= rc; } + if (ndm_flags & NTF_USE) + f->used = jiffies; + if (notify) { if (rd == NULL) rd = first_remote_rtnl(f); - vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH, swdev_notify); + + err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH, + swdev_notify, extack); + if (err) + goto err_notify; } return 0; + +err_notify: + if ((flags & NLM_F_REPLACE) && rc) + *rd = oldrd; + else if ((flags & NLM_F_APPEND) && rc) + list_del_rcu(&rd->list); + return err; } -static void vxlan_fdb_free(struct rcu_head *head) +static int vxlan_fdb_update_create(struct vxlan_dev *vxlan, + const u8 *mac, union vxlan_addr *ip, + __u16 state, __u16 flags, + __be16 port, __be32 src_vni, __be32 vni, + __u32 ifindex, __u16 ndm_flags, + bool swdev_notify, + struct netlink_ext_ack *extack) { - struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); - struct vxlan_rdst *rd, *nd; + __u16 fdb_flags = (ndm_flags & ~NTF_USE); + struct vxlan_fdb *f; + int rc; - list_for_each_entry_safe(rd, nd, &f->remotes, list) { - dst_cache_destroy(&rd->dst_cache); - kfree(rd); - } - kfree(f); + /* Disallow replace to add a multicast entry */ + if ((flags & NLM_F_REPLACE) && + (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac))) + return -EOPNOTSUPP; + + netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); + rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni, + vni, ifindex, fdb_flags, &f); + if (rc < 0) + return rc; + + rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH, + swdev_notify, extack); + if (rc) + goto err_notify; + + return 0; + +err_notify: + vxlan_fdb_destroy(vxlan, f, false, false); + return rc; } -static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, - bool do_notify, bool swdev_notify) +/* Add new entry to forwarding table -- assumes lock held */ +static int vxlan_fdb_update(struct vxlan_dev *vxlan, + const u8 *mac, union vxlan_addr *ip, + __u16 state, __u16 flags, + __be16 port, __be32 src_vni, __be32 vni, + __u32 ifindex, __u16 ndm_flags, + bool swdev_notify, + struct netlink_ext_ack *extack) { - struct vxlan_rdst *rd; + struct vxlan_fdb *f; - netdev_dbg(vxlan->dev, - "delete %pM\n", f->eth_addr); + f = __vxlan_find_mac(vxlan, mac, src_vni); + if (f) { + if (flags & NLM_F_EXCL) { + netdev_dbg(vxlan->dev, + "lost race to create %pM\n", mac); + return -EEXIST; + } - --vxlan->addrcnt; - if (do_notify) - list_for_each_entry(rd, &f->remotes, list) - vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, - swdev_notify); + return vxlan_fdb_update_existing(vxlan, ip, state, flags, port, + vni, ifindex, ndm_flags, f, + swdev_notify, extack); + } else { + if (!(flags & NLM_F_CREATE)) + return -ENOENT; - hlist_del_rcu(&f->hlist); - call_rcu(&f->rcu, vxlan_fdb_free); + return vxlan_fdb_update_create(vxlan, mac, ip, state, flags, + port, src_vni, vni, ifindex, + ndm_flags, swdev_notify, extack); + } } static void vxlan_dst_free(struct rcu_head *head) @@ -954,7 +1025,7 @@ static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, struct vxlan_rdst *rd, bool swdev_notify) { list_del_rcu(&rd->list); - vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify); + vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify, NULL); call_rcu(&rd->rcu, vxlan_dst_free); } @@ -1025,7 +1096,8 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, /* Add static entry (via netlink) */ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, u16 flags) + const unsigned char *addr, u16 vid, u16 flags, + struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); /* struct net *net = dev_net(vxlan->dev); */ @@ -1055,7 +1127,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags, port, src_vni, vni, ifindex, ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER, - true); + true, extack); spin_unlock_bh(&vxlan->hash_lock); return err; @@ -1223,7 +1295,7 @@ static bool vxlan_snoop(struct net_device *dev, rdst->remote_ip = *src_ip; f->updated = jiffies; - vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true); + vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL); } else { /* learned new entry */ spin_lock(&vxlan->hash_lock); @@ -1236,7 +1308,7 @@ static bool vxlan_snoop(struct net_device *dev, vxlan->cfg.dst_port, vni, vxlan->default_dst.remote_vni, - ifindex, NTF_SELF, true); + ifindex, NTF_SELF, true, NULL); spin_unlock(&vxlan->hash_lock); } @@ -3478,9 +3550,12 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, goto errout; /* notify default fdb entry */ - if (f) - vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH, - true); + if (f) { + err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), + RTM_NEWNEIGH, true, extack); + if (err) + goto errout; + } list_add(&vxlan->next, &vn->vxlan_list); return 0; @@ -3727,8 +3802,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_rdst *dst = &vxlan->default_dst; - unsigned long old_age_interval; - struct vxlan_rdst old_dst; + struct net_device *lowerdev; struct vxlan_config conf; int err; @@ -3737,46 +3811,43 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], if (err) return err; - old_age_interval = vxlan->cfg.age_interval; - memcpy(&old_dst, dst, sizeof(struct vxlan_rdst)); - - err = vxlan_dev_configure(vxlan->net, dev, &conf, true, extack); + err = vxlan_config_validate(vxlan->net, &conf, &lowerdev, + vxlan, extack); if (err) return err; - if (old_age_interval != vxlan->cfg.age_interval) - mod_timer(&vxlan->age_timer, jiffies); - /* handle default dst entry */ - if (!vxlan_addr_equal(&dst->remote_ip, &old_dst.remote_ip)) { + if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) { spin_lock_bh(&vxlan->hash_lock); - if (!vxlan_addr_any(&old_dst.remote_ip)) - __vxlan_fdb_delete(vxlan, all_zeros_mac, - old_dst.remote_ip, - vxlan->cfg.dst_port, - old_dst.remote_vni, - old_dst.remote_vni, - old_dst.remote_ifindex, - true); - - if (!vxlan_addr_any(&dst->remote_ip)) { + if (!vxlan_addr_any(&conf.remote_ip)) { err = vxlan_fdb_update(vxlan, all_zeros_mac, - &dst->remote_ip, + &conf.remote_ip, NUD_REACHABLE | NUD_PERMANENT, NLM_F_APPEND | NLM_F_CREATE, vxlan->cfg.dst_port, - dst->remote_vni, - dst->remote_vni, - dst->remote_ifindex, - NTF_SELF, true); + conf.vni, conf.vni, + conf.remote_ifindex, + NTF_SELF, true, extack); if (err) { spin_unlock_bh(&vxlan->hash_lock); return err; } } + if (!vxlan_addr_any(&dst->remote_ip)) + __vxlan_fdb_delete(vxlan, all_zeros_mac, + dst->remote_ip, + vxlan->cfg.dst_port, + dst->remote_vni, + dst->remote_vni, + dst->remote_ifindex, + true); spin_unlock_bh(&vxlan->hash_lock); } + if (conf.age_interval != vxlan->cfg.age_interval) + mod_timer(&vxlan->age_timer, jiffies); + + vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true); return 0; } @@ -4051,8 +4122,11 @@ vxlan_fdb_external_learn_add(struct net_device *dev, struct switchdev_notifier_vxlan_fdb_info *fdb_info) { struct vxlan_dev *vxlan = netdev_priv(dev); + struct netlink_ext_ack *extack; int err; + extack = switchdev_notifier_info_to_extack(&fdb_info->info); + spin_lock_bh(&vxlan->hash_lock); err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip, NUD_REACHABLE, @@ -4062,7 +4136,7 @@ vxlan_fdb_external_learn_add(struct net_device *dev, fdb_info->remote_vni, fdb_info->remote_ifindex, NTF_USE | NTF_SELF | NTF_EXT_LEARNED, - false); + false, extack); spin_unlock_bh(&vxlan->hash_lock); return err; diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index c0b0f525c87c..d5dc823f781e 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -456,16 +456,16 @@ static int state_check(u32 state, struct dscc4_dev_priv *dpriv, int ret = 0; if (debug > 1) { - if (SOURCE_ID(state) != dpriv->dev_id) { - printk(KERN_DEBUG "%s (%s): Source Id=%d, state=%08x\n", - dev->name, msg, SOURCE_ID(state), state ); + if (SOURCE_ID(state) != dpriv->dev_id) { + printk(KERN_DEBUG "%s (%s): Source Id=%d, state=%08x\n", + dev->name, msg, SOURCE_ID(state), state); ret = -1; - } - if (state & 0x0df80c00) { - printk(KERN_DEBUG "%s (%s): state=%08x (UFO alert)\n", - dev->name, msg, state); + } + if (state & 0x0df80c00) { + printk(KERN_DEBUG "%s (%s): state=%08x (UFO alert)\n", + dev->name, msg, state); ret = -1; - } + } } return ret; } @@ -1760,25 +1760,25 @@ try: } else { /* SccEvt */ if (debug > 1) { //FIXME: verifier la presence de tous les evenements - static struct { - u32 mask; - const char *irq_name; - } evts[] = { - { 0x00008000, "TIN"}, - { 0x00000020, "RSC"}, - { 0x00000010, "PCE"}, - { 0x00000008, "PLLA"}, - { 0, NULL} - }, *evt; - - for (evt = evts; evt->irq_name; evt++) { - if (state & evt->mask) { + static struct { + u32 mask; + const char *irq_name; + } evts[] = { + { 0x00008000, "TIN"}, + { 0x00000020, "RSC"}, + { 0x00000010, "PCE"}, + { 0x00000008, "PLLA"}, + { 0, NULL} + }, *evt; + + for (evt = evts; evt->irq_name; evt++) { + if (state & evt->mask) { printk(KERN_DEBUG "%s: %s\n", - dev->name, evt->irq_name); - if (!(state &= ~evt->mask)) - goto try; + dev->name, evt->irq_name); + if (!(state &= ~evt->mask)) + goto try; + } } - } } else { if (!(state &= ~0x0000c03c)) goto try; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1377d085ef99..a57b9a853aab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1152,7 +1152,8 @@ struct dev_ifalias { * * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, - * const unsigned char *addr, u16 vid, u16 flags) + * const unsigned char *addr, u16 vid, u16 flags, + * struct netlink_ext_ack *extack); * Adds an FDB entry to dev for addr. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, @@ -1376,7 +1377,8 @@ struct net_device_ops { struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags); + u16 flags, + struct netlink_ext_ack *extack); int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 4e8add270200..593d1b9c33a8 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -126,6 +126,7 @@ void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, const struct netlink_ext_ack *extack); int netlink_has_listeners(struct sock *sk, unsigned int group); +bool netlink_strict_get_check(struct sk_buff *skb); int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, diff --git a/include/linux/phy.h b/include/linux/phy.h index ef20aeea10cc..1f3873a2ff29 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -306,11 +306,6 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); * - irq or timer will set NOLINK if link goes down * - phy_stop moves to HALTED * - * CHANGELINK: PHY experienced a change in link state - * - timer moves to RUNNING if link - * - timer moves to NOLINK if the link is down - * - phy_stop moves to HALTED - * * HALTED: PHY is up, but no polling or interrupts are done. Or * PHY is in an error state. * @@ -329,7 +324,6 @@ enum phy_state { PHY_RUNNING, PHY_NOLINK, PHY_FORCING, - PHY_CHANGELINK, PHY_RESUMING }; @@ -959,7 +953,6 @@ int phy_aneg_done(struct phy_device *phydev); int phy_speed_down(struct phy_device *phydev, bool sync); int phy_speed_up(struct phy_device *phydev); -int phy_stop_interrupts(struct phy_device *phydev); int phy_restart_aneg(struct phy_device *phydev); int phy_reset_after_clk_enable(struct phy_device *phydev); @@ -1185,4 +1178,7 @@ module_exit(phy_module_exit) #define module_phy_driver(__phy_drivers) \ phy_module_driver(__phy_drivers, ARRAY_SIZE(__phy_drivers)) +bool phy_driver_is_genphy(struct phy_device *phydev); +bool phy_driver_is_genphy_10g(struct phy_device *phydev); + #endif /* __PHY_H */ diff --git a/include/linux/platform_data/b53.h b/include/linux/platform_data/b53.h index 8eaef2f2b691..c3b61ead41f2 100644 --- a/include/linux/platform_data/b53.h +++ b/include/linux/platform_data/b53.h @@ -20,7 +20,7 @@ #define __B53_H #include <linux/kernel.h> -#include <net/dsa.h> +#include <linux/platform_data/dsa.h> struct b53_platform_data { /* Must be first such that dsa_register_switch() can access it */ diff --git a/include/linux/platform_data/dsa.h b/include/linux/platform_data/dsa.h new file mode 100644 index 000000000000..d4d9bf2060a6 --- /dev/null +++ b/include/linux/platform_data/dsa.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __DSA_PDATA_H +#define __DSA_PDATA_H + +struct device; +struct net_device; + +#define DSA_MAX_SWITCHES 4 +#define DSA_MAX_PORTS 12 +#define DSA_RTABLE_NONE -1 + +struct dsa_chip_data { + /* + * How to access the switch configuration registers. + */ + struct device *host_dev; + int sw_addr; + + /* + * Reference to network devices + */ + struct device *netdev[DSA_MAX_PORTS]; + + /* set to size of eeprom if supported by the switch */ + int eeprom_len; + + /* Device tree node pointer for this specific switch chip + * used during switch setup in case additional properties + * and resources needs to be used + */ + struct device_node *of_node; + + /* + * The names of the switch's ports. Use "cpu" to + * designate the switch port that the cpu is connected to, + * "dsa" to indicate that this port is a DSA link to + * another switch, NULL to indicate the port is unused, + * or any other string to indicate this is a physical port. + */ + char *port_names[DSA_MAX_PORTS]; + struct device_node *port_dn[DSA_MAX_PORTS]; + + /* + * An array of which element [a] indicates which port on this + * switch should be used to send packets to that are destined + * for switch a. Can be NULL if there is only one switch chip. + */ + s8 rtable[DSA_MAX_SWITCHES]; +}; + +struct dsa_platform_data { + /* + * Reference to a Linux network interface that connects + * to the root switch chip of the tree. + */ + struct device *netdev; + struct net_device *of_netdev; + + /* + * Info structs describing each of the switch chips + * connected via this network interface. + */ + int nr_chips; + struct dsa_chip_data *chip; +}; + + +#endif /* __DSA_PDATA_H */ diff --git a/include/linux/platform_data/mv88e6xxx.h b/include/linux/platform_data/mv88e6xxx.h index f63af2955ea0..963730b44aea 100644 --- a/include/linux/platform_data/mv88e6xxx.h +++ b/include/linux/platform_data/mv88e6xxx.h @@ -2,7 +2,7 @@ #ifndef __DSA_MV88E6XXX_H #define __DSA_MV88E6XXX_H -#include <net/dsa.h> +#include <linux/platform_data/dsa.h> struct dsa_mv88e6xxx_pdata { /* Must be first, such that dsa_register_switch() can access this diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 91c536a01b56..5f818fda96bd 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -38,7 +38,6 @@ #include <linux/netdevice.h> #include <linux/pci.h> #include <linux/skbuff.h> -#include <linux/types.h> #include <asm/byteorder.h> #include <linux/io.h> #include <linux/compiler.h> diff --git a/include/net/devlink.h b/include/net/devlink.h index 67f4293bc970..a81a1b7a67d7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -30,6 +30,7 @@ struct devlink { struct list_head param_list; struct list_head region_list; u32 snapshot_id; + struct list_head reporter_list; struct devlink_dpipe_headers *dpipe_headers; const struct devlink_ops *ops; struct device *dev; @@ -423,6 +424,36 @@ struct devlink_region; typedef void devlink_snapshot_data_dest_t(const void *data); +struct devlink_health_buffer; +struct devlink_health_reporter; + +/** + * struct devlink_health_reporter_ops - Reporter operations + * @name: reporter name + * dump_size: dump buffer size allocated by the devlink + * diagnose_size: diagnose buffer size allocated by the devlink + * recover: callback to recover from reported error + * if priv_ctx is NULL, run a full recover + * dump: callback to dump an object + * if priv_ctx is NULL, run a full dump + * diagnose: callback to diagnose the current status + */ + +struct devlink_health_reporter_ops { + char *name; + unsigned int dump_size; + unsigned int diagnose_size; + int (*recover)(struct devlink_health_reporter *reporter, + void *priv_ctx); + int (*dump)(struct devlink_health_reporter *reporter, + struct devlink_health_buffer **buffers_array, + unsigned int buffer_size, unsigned int num_buffers, + void *priv_ctx); + int (*diagnose)(struct devlink_health_reporter *reporter, + struct devlink_health_buffer **buffers_array, + unsigned int buffer_size, unsigned int num_buffers); +}; + struct devlink_ops { int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, @@ -584,6 +615,34 @@ int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, u8 *data, u32 snapshot_id, devlink_snapshot_data_dest_t *data_destructor); +int devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer, + int attrtype); +void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer); +void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer); +int devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer, + char *name); +int devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer, + u8 value); +int devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer, + u32 value); +int devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer, + u64 value); +int devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer, + char *name); +int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer, + void *data, int len); +struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv); +void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); + +void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter); +int devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx); #else static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, @@ -844,6 +903,91 @@ devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, return 0; } +static inline int +devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer, + int attrtype) +{ + return 0; +} + +static inline void +devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer) +{ +} + +static inline void +devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer) +{ +} + +static inline int +devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer, + char *name) +{ + return 0; +} + +static inline int +devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer, + u8 value) +{ + return 0; +} + +static inline int +devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer, + u32 value) +{ + return 0; +} + +static inline int +devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer, + u64 value) +{ + return 0; +} + +static inline int +devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer, + char *name) +{ + return 0; +} + +static inline int +devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer, + void *data, int len) +{ + return 0; +} + +static inline struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv) +{ + return NULL; +} + +static inline void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ +} + +static inline void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter) +{ + return NULL; +} + +static inline int +devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx) +{ + return 0; +} #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dsa.h b/include/net/dsa.h index b3eefe8e18fd..7f2a668ef2cc 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -21,6 +21,7 @@ #include <linux/ethtool.h> #include <linux/net_tstamp.h> #include <linux/phy.h> +#include <linux/platform_data/dsa.h> #include <net/devlink.h> #include <net/switchdev.h> @@ -44,66 +45,6 @@ enum dsa_tag_protocol { DSA_TAG_LAST, /* MUST BE LAST */ }; -#define DSA_MAX_SWITCHES 4 -#define DSA_MAX_PORTS 12 - -#define DSA_RTABLE_NONE -1 - -struct dsa_chip_data { - /* - * How to access the switch configuration registers. - */ - struct device *host_dev; - int sw_addr; - - /* - * Reference to network devices - */ - struct device *netdev[DSA_MAX_PORTS]; - - /* set to size of eeprom if supported by the switch */ - int eeprom_len; - - /* Device tree node pointer for this specific switch chip - * used during switch setup in case additional properties - * and resources needs to be used - */ - struct device_node *of_node; - - /* - * The names of the switch's ports. Use "cpu" to - * designate the switch port that the cpu is connected to, - * "dsa" to indicate that this port is a DSA link to - * another switch, NULL to indicate the port is unused, - * or any other string to indicate this is a physical port. - */ - char *port_names[DSA_MAX_PORTS]; - struct device_node *port_dn[DSA_MAX_PORTS]; - - /* - * An array of which element [a] indicates which port on this - * switch should be used to send packets to that are destined - * for switch a. Can be NULL if there is only one switch chip. - */ - s8 rtable[DSA_MAX_SWITCHES]; -}; - -struct dsa_platform_data { - /* - * Reference to a Linux network interface that connects - * to the root switch chip of the tree. - */ - struct device *netdev; - struct net_device *of_netdev; - - /* - * Info structs describing each of the switch chips - * connected via this network interface. - */ - int nr_chips; - struct dsa_chip_data *chip; -}; - struct packet_type; struct dsa_switch; @@ -208,6 +149,11 @@ struct dsa_port { * Original copy of the master netdev ethtool_ops */ const struct ethtool_ops *orig_ethtool_ops; + + /* + * Original copy of the master netdev net_device_ops + */ + const struct net_device_ops *orig_ndo_ops; }; struct dsa_switch { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9481f2c142e2..7a4957599874 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -580,8 +580,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, void qdisc_reset(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); void qdisc_put_unlocked(struct Qdisc *qdisc); -void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, - unsigned int len); +void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); #ifdef CONFIG_NET_SCHED int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, void *type_data); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index a7fdab5ee6c3..63843ae5dc81 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -195,7 +195,8 @@ int switchdev_port_obj_del(struct net_device *dev, int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, - struct switchdev_notifier_info *info); + struct switchdev_notifier_info *info, + struct netlink_ext_ack *extack); int register_switchdev_blocking_notifier(struct notifier_block *nb); int unregister_switchdev_blocking_notifier(struct notifier_block *nb); @@ -267,7 +268,8 @@ static inline int unregister_switchdev_notifier(struct notifier_block *nb) static inline int call_switchdev_notifiers(unsigned long val, struct net_device *dev, - struct switchdev_notifier_info *info) + struct switchdev_notifier_info *info, + struct netlink_ext_ack *extack) { return NOTIFY_DONE; } diff --git a/include/net/tcp.h b/include/net/tcp.h index e0a65c067662..5c950180d61b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -406,8 +406,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); int tcp_set_rcvlowat(struct sock *sk, int val); void tcp_data_ready(struct sock *sk); +#ifdef CONFIG_MMU int tcp_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); +#endif void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); diff --git a/include/net/tls.h b/include/net/tls.h index 2a6ac8d642af..90bf52db573e 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -145,12 +145,13 @@ struct tls_sw_context_tx { struct tls_sw_context_rx { struct crypto_aead *aead_recv; struct crypto_wait async_wait; - struct strparser strp; + struct sk_buff_head rx_list; /* list of decrypted 'data' records */ void (*saved_data_ready)(struct sock *sk); struct sk_buff *recv_pkt; u8 control; + int async_capable; bool decrypted; atomic_t decrypt_pending; bool async_notify; diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 236403eb5ba6..09767819c3d4 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -428,7 +428,8 @@ struct switchdev_notifier_vxlan_fdb_info { int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, struct switchdev_notifier_vxlan_fdb_info *fdb_info); int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, - struct notifier_block *nb); + struct notifier_block *nb, + struct netlink_ext_ack *extack); void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni); #else @@ -440,7 +441,8 @@ vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, } static inline int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, - struct notifier_block *nb) + struct notifier_block *nb, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 44acfbca1266..7e39d2fc7c75 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -46,6 +46,65 @@ TRACE_EVENT(devlink_hwmsg, (int) __entry->len, __get_dynamic_array(buf), __entry->len) ); +TRACE_EVENT(devlink_health_report, + TP_PROTO(const struct devlink *devlink, const char *reporter_name, + const char *msg), + + TP_ARGS(devlink, reporter_name, msg), + + TP_STRUCT__entry( + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(driver_name, devlink->dev->driver->name) + __string(reporter_name, msg) + __string(msg, msg) + ), + + TP_fast_assign( + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(driver_name, devlink->dev->driver->name); + __assign_str(reporter_name, reporter_name); + __assign_str(msg, msg); + ), + + TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s", + __get_str(bus_name), __get_str(dev_name), + __get_str(driver_name), __get_str(reporter_name), + __get_str(msg)) +); + +TRACE_EVENT(devlink_health_recover_aborted, + TP_PROTO(const struct devlink *devlink, const char *reporter_name, + bool health_state, u64 time_since_last_recover), + + TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover), + + TP_STRUCT__entry( + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(driver_name, devlink->dev->driver->name) + __string(reporter_name, reporter_name) + __field(bool, health_state) + __field(u64, time_since_last_recover) + ), + + TP_fast_assign( + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(driver_name, devlink->dev->driver->name); + __assign_str(reporter_name, reporter_name); + __entry->health_state = health_state; + __entry->time_since_last_recover = time_since_last_recover; + ), + + TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover = %llu recover aborted", + __get_str(bus_name), __get_str(dev_name), + __get_str(driver_name), __get_str(reporter_name), + __entry->health_state, + __entry->time_since_last_recover) +); + #endif /* _TRACE_DEVLINK_H */ /* This part must be outside protection */ @@ -64,6 +123,9 @@ static inline void trace_devlink_hwmsg(const struct devlink *devlink, { } +static inline void trace_devlink_health(const char *msg) +{ +} #endif /* _TRACE_DEVLINK_H */ #endif diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index a12692e5f7a8..3066ab3853a8 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -110,4 +110,6 @@ #define SO_TXTIME 61 #define SCM_TXTIME SO_TXTIME +#define SO_BINDTOIFINDEX 62 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6e52d3660654..6b26bb2ce4dc 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -89,6 +89,13 @@ enum devlink_command { DEVLINK_CMD_REGION_DEL, DEVLINK_CMD_REGION_READ, + DEVLINK_CMD_HEALTH_REPORTER_GET, + DEVLINK_CMD_HEALTH_REPORTER_SET, + DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -285,6 +292,24 @@ enum devlink_attr { DEVLINK_ATTR_REGION_CHUNK_ADDR, /* u64 */ DEVLINK_ATTR_REGION_CHUNK_LEN, /* u64 */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT, /* nested */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR, /* nested */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME, /* string */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE, /* nested */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY, /* nested */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, /* u8 */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, /* dynamic */ + + DEVLINK_ATTR_HEALTH_REPORTER, /* nested */ + DEVLINK_ATTR_HEALTH_REPORTER_NAME, /* string */ + DEVLINK_ATTR_HEALTH_REPORTER_STATE, /* u8 */ + DEVLINK_ATTR_HEALTH_REPORTER_ERR, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL, /* u8 */ + DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 95d0db2a8350..02ac251be8c4 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -333,12 +333,19 @@ enum { /* Basic filter */ +struct tc_basic_pcnt { + __u64 rcnt; + __u64 rhit; +}; + enum { TCA_BASIC_UNSPEC, TCA_BASIC_CLASSID, TCA_BASIC_EMATCHES, TCA_BASIC_ACT, TCA_BASIC_POLICE, + TCA_BASIC_PCNT, + TCA_BASIC_PAD, __TCA_BASIC_MAX }; @@ -527,11 +534,17 @@ enum { /* Match-all classifier */ +struct tc_matchall_pcnt { + __u64 rhit; +}; + enum { TCA_MATCHALL_UNSPEC, TCA_MATCHALL_CLASSID, TCA_MATCHALL_ACT, TCA_MATCHALL_FLAGS, + TCA_MATCHALL_PCNT, + TCA_MATCHALL_PAD, __TCA_MATCHALL_MAX, }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9e14767500ea..00573cc46c98 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -915,7 +915,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, u16 nlh_flags) + const unsigned char *addr, u16 vid, u16 nlh_flags, + struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index eabf8bf28a3f..00deef7fc1f3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -573,7 +573,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, u16 nlh_flags); + const unsigned char *addr, u16 vid, u16 nlh_flags, + struct netlink_ext_ack *extack); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *fdev, int *idx); int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 035ff59d9cbd..4d2b9eb7604a 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -113,7 +113,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, info.added_by_user = added_by_user; info.offloaded = offloaded; notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE; - call_switchdev_notifiers(notifier_type, dev, &info.info); + call_switchdev_notifiers(notifier_type, dev, &info.info, NULL); } void diff --git a/net/core/devlink.c b/net/core/devlink.c index abb0da9d7b4b..60248a53c0ad 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3597,6 +3597,1012 @@ out: return 0; } +#define DEVLINK_HEALTH_BUFFER_SIZE (4096 - GENL_HDRLEN) +#define DEVLINK_HEALTH_BUFFER_DATA_SIZE (DEVLINK_HEALTH_BUFFER_SIZE / 2) +#define DEVLINK_HEALTH_SIZE_TO_BUFFERS(size) DIV_ROUND_UP(size, DEVLINK_HEALTH_BUFFER_DATA_SIZE) +#define DEVLINK_HEALTH_BUFFER_MAX_CHUNK 1024 + +struct devlink_health_buffer { + void *data; + u64 offset; + u64 bytes_left; + u64 bytes_left_metadata; + u64 max_nested_depth; + u64 curr_nest; +}; + +struct devlink_health_buffer_desc { + int attrtype; + u16 len; + u8 nla_type; + u8 nest_end; + int value[0]; +}; + +static void +devlink_health_buffers_reset(struct devlink_health_buffer **buffers_list, + u64 num_of_buffers) +{ + u64 i; + + for (i = 0; i < num_of_buffers; i++) { + memset(buffers_list[i]->data, 0, DEVLINK_HEALTH_BUFFER_SIZE); + buffers_list[i]->offset = 0; + buffers_list[i]->bytes_left = DEVLINK_HEALTH_BUFFER_DATA_SIZE; + buffers_list[i]->bytes_left_metadata = + DEVLINK_HEALTH_BUFFER_DATA_SIZE; + buffers_list[i]->max_nested_depth = 0; + buffers_list[i]->curr_nest = 0; + } +} + +static void +devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list, + u64 size); + +static struct devlink_health_buffer ** +devlink_health_buffers_create(u64 size) +{ + struct devlink_health_buffer **buffers_list; + u64 num_of_buffers = DEVLINK_HEALTH_SIZE_TO_BUFFERS(size); + u64 i; + + buffers_list = kcalloc(num_of_buffers, + sizeof(struct devlink_health_buffer *), + GFP_KERNEL); + if (!buffers_list) + return NULL; + + for (i = 0; i < num_of_buffers; i++) { + struct devlink_health_buffer *buffer; + void *data; + + buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); + data = kzalloc(DEVLINK_HEALTH_BUFFER_SIZE, GFP_KERNEL); + if (!buffer || !data) { + kfree(buffer); + kfree(data); + goto buffers_cleanup; + } + buffers_list[i] = buffer; + buffer->data = data; + } + devlink_health_buffers_reset(buffers_list, num_of_buffers); + + return buffers_list; + +buffers_cleanup: + devlink_health_buffers_destroy(buffers_list, --i); + kfree(buffers_list); + return NULL; +} + +static void +devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list, + u64 num_of_buffers) +{ + u64 i; + + for (i = 0; i < num_of_buffers; i++) { + kfree(buffers_list[i]->data); + kfree(buffers_list[i]); + } +} + +void +devlink_health_buffer_offset_inc(struct devlink_health_buffer *buffer, + int len) +{ + buffer->offset += len; +} + +/* In order to store a nest, need two descriptors, for start and end */ +#define DEVLINK_HEALTH_BUFFER_NEST_SIZE (sizeof(struct devlink_health_buffer_desc) * 2) + +int devlink_health_buffer_verify_len(struct devlink_health_buffer *buffer, + int len, int metadata_len) +{ + if (len > DEVLINK_HEALTH_BUFFER_DATA_SIZE) + return -EINVAL; + + if (buffer->bytes_left < len || + buffer->bytes_left_metadata < metadata_len) + return -ENOMEM; + + return 0; +} + +static struct devlink_health_buffer_desc * +devlink_health_buffer_get_desc_from_offset(struct devlink_health_buffer *buffer) +{ + return buffer->data + buffer->offset; +} + +int +devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer, + int attrtype) +{ + struct devlink_health_buffer_desc *desc; + int err; + + err = devlink_health_buffer_verify_len(buffer, 0, + DEVLINK_HEALTH_BUFFER_NEST_SIZE); + if (err) + return err; + + if (attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT && + attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR && + attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE && + attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY) + return -EINVAL; + + desc = devlink_health_buffer_get_desc_from_offset(buffer); + + desc->attrtype = attrtype; + buffer->bytes_left_metadata -= DEVLINK_HEALTH_BUFFER_NEST_SIZE; + devlink_health_buffer_offset_inc(buffer, sizeof(*desc)); + + buffer->curr_nest++; + buffer->max_nested_depth = max(buffer->max_nested_depth, + buffer->curr_nest); + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_start); + +enum devlink_health_buffer_nest_end_cancel { + DEVLINK_HEALTH_BUFFER_NEST_END = 1, + DEVLINK_HEALTH_BUFFER_NEST_CANCEL, +}; + +static void +devlink_health_buffer_nest_end_cancel(struct devlink_health_buffer *buffer, + enum devlink_health_buffer_nest_end_cancel nest) +{ + struct devlink_health_buffer_desc *desc; + + WARN_ON(!buffer->curr_nest); + buffer->curr_nest--; + + desc = devlink_health_buffer_get_desc_from_offset(buffer); + desc->nest_end = nest; + devlink_health_buffer_offset_inc(buffer, sizeof(*desc)); +} + +void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer) +{ + devlink_health_buffer_nest_end_cancel(buffer, + DEVLINK_HEALTH_BUFFER_NEST_END); +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_end); + +void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer) +{ + devlink_health_buffer_nest_end_cancel(buffer, + DEVLINK_HEALTH_BUFFER_NEST_CANCEL); +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_cancel); + +int +devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer, + char *name) +{ + struct devlink_health_buffer_desc *desc; + int err; + + err = devlink_health_buffer_verify_len(buffer, strlen(name) + 1, + sizeof(*desc)); + if (err) + return err; + + desc = devlink_health_buffer_get_desc_from_offset(buffer); + desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME; + desc->nla_type = NLA_NUL_STRING; + desc->len = strlen(name) + 1; + memcpy(&desc->value, name, desc->len); + devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len); + + buffer->bytes_left_metadata -= sizeof(*desc); + buffer->bytes_left -= (strlen(name) + 1); + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_object_name); + +static int +devlink_health_buffer_put_value(struct devlink_health_buffer *buffer, + u8 nla_type, void *value, int len) +{ + struct devlink_health_buffer_desc *desc; + int err; + + err = devlink_health_buffer_verify_len(buffer, len, sizeof(*desc)); + if (err) + return err; + + desc = devlink_health_buffer_get_desc_from_offset(buffer); + desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA; + desc->nla_type = nla_type; + desc->len = len; + memcpy(&desc->value, value, len); + devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len); + + buffer->bytes_left_metadata -= sizeof(*desc); + buffer->bytes_left -= len; + + return 0; +} + +int +devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer, + u8 value) +{ + int err; + + err = devlink_health_buffer_put_value(buffer, NLA_U8, &value, + sizeof(value)); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u8); + +int +devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer, + u32 value) +{ + int err; + + err = devlink_health_buffer_put_value(buffer, NLA_U32, &value, + sizeof(value)); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u32); + +int +devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer, + u64 value) +{ + int err; + + err = devlink_health_buffer_put_value(buffer, NLA_U64, &value, + sizeof(value)); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u64); + +int +devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer, + char *name) +{ + int err; + + if (strlen(name) + 1 > DEVLINK_HEALTH_BUFFER_MAX_CHUNK) + return -EINVAL; + + err = devlink_health_buffer_put_value(buffer, NLA_NUL_STRING, name, + strlen(name) + 1); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_string); + +int +devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer, + void *data, int len) +{ + int err; + + if (len > DEVLINK_HEALTH_BUFFER_MAX_CHUNK) + return -EINVAL; + + err = devlink_health_buffer_put_value(buffer, NLA_BINARY, data, len); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_data); + +static int +devlink_health_buffer_fill_data(struct sk_buff *skb, + struct devlink_health_buffer_desc *desc) +{ + int err = -EINVAL; + + switch (desc->nla_type) { + case NLA_U8: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, + *(u8 *)desc->value); + break; + case NLA_U32: + err = nla_put_u32(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, + *(u32 *)desc->value); + break; + case NLA_U64: + err = nla_put_u64_64bit(skb, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, + *(u64 *)desc->value, DEVLINK_ATTR_PAD); + break; + case NLA_NUL_STRING: + err = nla_put_string(skb, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, + (char *)&desc->value); + break; + case NLA_BINARY: + err = nla_put(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, + desc->len, (void *)&desc->value); + break; + } + + return err; +} + +static int +devlink_health_buffer_fill_type(struct sk_buff *skb, + struct devlink_health_buffer_desc *desc) +{ + int err = -EINVAL; + + switch (desc->nla_type) { + case NLA_U8: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, + NLA_U8); + break; + case NLA_U32: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, + NLA_U32); + break; + case NLA_U64: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, + NLA_U64); + break; + case NLA_NUL_STRING: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, + NLA_NUL_STRING); + break; + case NLA_BINARY: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, + NLA_BINARY); + break; + } + + return err; +} + +static inline struct devlink_health_buffer_desc * +devlink_health_buffer_get_next_desc(struct devlink_health_buffer_desc *desc) +{ + return (void *)&desc->value + desc->len; +} + +static int +devlink_health_buffer_prepare_skb(struct sk_buff *skb, + struct devlink_health_buffer *buffer) +{ + struct devlink_health_buffer_desc *last_desc, *desc; + struct nlattr **buffer_nlattr; + int err; + int i = 0; + + buffer_nlattr = kcalloc(buffer->max_nested_depth, + sizeof(*buffer_nlattr), GFP_KERNEL); + if (!buffer_nlattr) + return -EINVAL; + + last_desc = devlink_health_buffer_get_desc_from_offset(buffer); + desc = buffer->data; + while (desc != last_desc) { + switch (desc->attrtype) { + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT: + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR: + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE: + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY: + buffer_nlattr[i] = nla_nest_start(skb, desc->attrtype); + if (!buffer_nlattr[i]) + goto nla_put_failure; + i++; + break; + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA: + err = devlink_health_buffer_fill_data(skb, desc); + if (err) + goto nla_put_failure; + err = devlink_health_buffer_fill_type(skb, desc); + if (err) + goto nla_put_failure; + break; + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME: + err = nla_put_string(skb, desc->attrtype, + (char *)&desc->value); + if (err) + goto nla_put_failure; + break; + default: + WARN_ON(!desc->nest_end); + WARN_ON(i <= 0); + if (desc->nest_end == DEVLINK_HEALTH_BUFFER_NEST_END) + nla_nest_end(skb, buffer_nlattr[--i]); + else + nla_nest_cancel(skb, buffer_nlattr[--i]); + break; + } + desc = devlink_health_buffer_get_next_desc(desc); + } + + return 0; + +nla_put_failure: + kfree(buffer_nlattr); + return err; +} + +static int +devlink_health_buffer_snd(struct genl_info *info, + enum devlink_command cmd, int flags, + struct devlink_health_buffer **buffers_array, + u64 num_of_buffers) +{ + struct sk_buff *skb; + struct nlmsghdr *nlh; + void *hdr; + int err; + u64 i; + + for (i = 0; i < num_of_buffers; i++) { + /* Skip buffer if driver did not fill it up with any data */ + if (!buffers_array[i]->offset) + continue; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, cmd); + if (!hdr) + goto nla_put_failure; + + err = devlink_health_buffer_prepare_skb(skb, buffers_array[i]); + if (err) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + err = genlmsg_reply(skb, info); + if (err) + return err; + } + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + err = genlmsg_reply(skb, info); + if (err) + return err; + + return 0; + +nla_put_failure: + err = -EIO; + nlmsg_free(skb); + return err; +} + +struct devlink_health_reporter { + struct list_head list; + struct devlink_health_buffer **dump_buffers_array; + struct mutex dump_lock; /* lock parallel read/write from dump buffers */ + struct devlink_health_buffer **diagnose_buffers_array; + struct mutex diagnose_lock; /* lock parallel read/write from diagnose buffers */ + void *priv; + const struct devlink_health_reporter_ops *ops; + struct devlink *devlink; + u64 graceful_period; + bool auto_recover; + u8 health_state; + u8 dump_avail; + u64 dump_ts; + u64 error_count; + u64 recovery_count; + u64 last_recovery_ts; +}; + +enum devlink_health_reporter_state { + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY, + DEVLINK_HEALTH_REPORTER_STATE_ERROR, +}; + +void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter) +{ + return reporter->priv; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); + +static struct devlink_health_reporter * +devlink_health_reporter_find_by_name(struct devlink *devlink, + const char *reporter_name) +{ + struct devlink_health_reporter *reporter; + + list_for_each_entry(reporter, &devlink->reporter_list, list) + if (!strcmp(reporter->ops->name, reporter_name)) + return reporter; + return NULL; +} + +/** + * devlink_health_reporter_create - create devlink health reporter + * + * @devlink: devlink + * @ops: ops + * @graceful_period: to avoid recovery loops, in msecs + * @auto_recover: auto recover when error occurs + * @priv: priv + */ +struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv) +{ + struct devlink_health_reporter *reporter; + + mutex_lock(&devlink->lock); + if (devlink_health_reporter_find_by_name(devlink, ops->name)) { + reporter = ERR_PTR(-EEXIST); + goto unlock; + } + + if (WARN_ON(ops->dump && !ops->dump_size) || + WARN_ON(ops->diagnose && !ops->diagnose_size) || + WARN_ON(auto_recover && !ops->recover) || + WARN_ON(graceful_period && !ops->recover)) { + reporter = ERR_PTR(-EINVAL); + goto unlock; + } + + reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); + if (!reporter) { + reporter = ERR_PTR(-ENOMEM); + goto unlock; + } + + if (ops->dump) { + reporter->dump_buffers_array = + devlink_health_buffers_create(ops->dump_size); + if (!reporter->dump_buffers_array) { + kfree(reporter); + reporter = ERR_PTR(-ENOMEM); + goto unlock; + } + } + + if (ops->diagnose) { + reporter->diagnose_buffers_array = + devlink_health_buffers_create(ops->diagnose_size); + if (!reporter->diagnose_buffers_array) { + devlink_health_buffers_destroy(reporter->dump_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size)); + kfree(reporter); + reporter = ERR_PTR(-ENOMEM); + goto unlock; + } + } + + list_add_tail(&reporter->list, &devlink->reporter_list); + mutex_init(&reporter->dump_lock); + mutex_init(&reporter->diagnose_lock); + + reporter->priv = priv; + reporter->ops = ops; + reporter->devlink = devlink; + reporter->graceful_period = graceful_period; + reporter->auto_recover = auto_recover; +unlock: + mutex_unlock(&devlink->lock); + return reporter; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_create); + +/** + * devlink_health_reporter_destroy - destroy devlink health reporter + * + * @reporter: devlink health reporter to destroy + */ +void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + mutex_lock(&reporter->devlink->lock); + list_del(&reporter->list); + devlink_health_buffers_destroy(reporter->dump_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size)); + devlink_health_buffers_destroy(reporter->diagnose_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size)); + kfree(reporter); + mutex_unlock(&reporter->devlink->lock); +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); + +static int +devlink_health_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + int err; + + if (!reporter->ops->recover) + return -EOPNOTSUPP; + + err = reporter->ops->recover(reporter, priv_ctx); + if (err) + return err; + + reporter->recovery_count++; + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; + reporter->last_recovery_ts = jiffies; + + return 0; +} + +static int devlink_health_do_dump(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + int err; + + if (!reporter->ops->dump) + return 0; + + if (reporter->dump_avail) + return 0; + + devlink_health_buffers_reset(reporter->dump_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size)); + err = reporter->ops->dump(reporter, reporter->dump_buffers_array, + DEVLINK_HEALTH_BUFFER_SIZE, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size), + priv_ctx); + if (!err) { + reporter->dump_avail = true; + reporter->dump_ts = jiffies; + } + + return err; +} + +int devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx) +{ + struct devlink *devlink = reporter->devlink; + int err = 0; + + /* write a log message of the current error */ + WARN_ON(!msg); + trace_devlink_health_report(devlink, reporter->ops->name, msg); + reporter->error_count++; + + /* abort if the previous error wasn't recovered */ + if (reporter->auto_recover && + (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || + jiffies - reporter->last_recovery_ts < + msecs_to_jiffies(reporter->graceful_period))) { + trace_devlink_health_recover_aborted(devlink, + reporter->ops->name, + reporter->health_state, + jiffies - + reporter->last_recovery_ts); + return -ECANCELED; + } + + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; + + mutex_lock(&reporter->dump_lock); + /* store current dump of current error, for later analysis */ + devlink_health_do_dump(reporter, priv_ctx); + mutex_unlock(&reporter->dump_lock); + + if (reporter->auto_recover) + err = devlink_health_reporter_recover(reporter, priv_ctx); + + return err; +} +EXPORT_SYMBOL_GPL(devlink_health_report); + +static struct devlink_health_reporter * +devlink_health_reporter_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + char *reporter_name; + + if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) + return NULL; + + reporter_name = + nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); + return devlink_health_reporter_find_by_name(devlink, reporter_name); +} + +static int +devlink_nl_health_reporter_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_health_reporter *reporter, + enum devlink_command cmd, u32 portid, + u32 seq, int flags) +{ + struct nlattr *reporter_attr; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto genlmsg_cancel; + + reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER); + if (!reporter_attr) + goto genlmsg_cancel; + if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, + reporter->ops->name)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, + reporter->health_state)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR, + reporter->error_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, + reporter->recovery_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, + reporter->graceful_period, + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, + reporter->auto_recover)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL, + reporter->dump_avail)) + goto reporter_nest_cancel; + if (reporter->dump_avail && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, + jiffies_to_msecs(reporter->dump_ts), + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + + nla_nest_end(msg, reporter_attr); + genlmsg_end(msg, hdr); + return 0; + +reporter_nest_cancel: + nla_nest_end(msg, reporter_attr); +genlmsg_cancel: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + struct sk_buff *msg; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_health_reporter_fill(msg, devlink, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + info->snd_portid, info->snd_seq, + 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int +devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_health_reporter *reporter; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(reporter, &devlink->reporter_list, + list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_health_reporter_fill(msg, devlink, + reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int +devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->recover && + (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] || + info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) + return -EINVAL; + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) + reporter->graceful_period = + nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]); + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]) + reporter->auto_recover = + nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); + + return 0; +} + +static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + return devlink_health_reporter_recover(reporter, NULL); +} + +static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + u64 num_of_buffers; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->diagnose) + return -EOPNOTSUPP; + + num_of_buffers = + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size); + + mutex_lock(&reporter->diagnose_lock); + devlink_health_buffers_reset(reporter->diagnose_buffers_array, + num_of_buffers); + + err = reporter->ops->diagnose(reporter, + reporter->diagnose_buffers_array, + DEVLINK_HEALTH_BUFFER_SIZE, + num_of_buffers); + if (err) + goto out; + + err = devlink_health_buffer_snd(info, + DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + 0, reporter->diagnose_buffers_array, + num_of_buffers); + if (err) + goto out; + + mutex_unlock(&reporter->diagnose_lock); + return 0; + +out: + mutex_unlock(&reporter->diagnose_lock); + return err; +} + +static void +devlink_health_dump_clear(struct devlink_health_reporter *reporter) +{ + reporter->dump_avail = false; + reporter->dump_ts = 0; + devlink_health_buffers_reset(reporter->dump_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size)); +} + +static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + u64 num_of_buffers; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->dump) + return -EOPNOTSUPP; + + num_of_buffers = + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size); + + mutex_lock(&reporter->dump_lock); + err = devlink_health_do_dump(reporter, NULL); + if (err) + goto out; + + err = devlink_health_buffer_snd(info, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + 0, reporter->dump_buffers_array, + num_of_buffers); + +out: + mutex_unlock(&reporter->dump_lock); + return err; +} + +static int +devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + mutex_lock(&reporter->dump_lock); + devlink_health_dump_clear(reporter); + mutex_unlock(&reporter->dump_lock); + return 0; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -3622,6 +4628,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -3842,6 +4851,51 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET, + .doit = devlink_nl_cmd_health_reporter_get_doit, + .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, + .doit = devlink_nl_cmd_health_reporter_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + .doit = devlink_nl_cmd_health_reporter_recover_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + .doit = devlink_nl_cmd_health_reporter_diagnose_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + .doit = devlink_nl_cmd_health_reporter_dump_get_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -3882,6 +4936,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->resource_list); INIT_LIST_HEAD(&devlink->param_list); INIT_LIST_HEAD(&devlink->region_list); + INIT_LIST_HEAD(&devlink->reporter_list); mutex_init(&devlink->lock); return devlink; } diff --git a/net/core/dst.c b/net/core/dst.c index 81ccf20e2826..a263309df115 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -98,8 +98,12 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, struct dst_entry *dst; if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) { - if (ops->gc(ops)) + if (ops->gc(ops)) { + printk_ratelimited(KERN_NOTICE "Route cache is full: " + "consider increasing sysctl " + "net.ipv[4|6].route.max_size.\n"); return NULL; + } } dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b02fb19df2cc..17f36317363d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -778,6 +778,41 @@ nla_put_failure: return -EMSGSIZE; } +static int rtnl_net_valid_getid_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETNSA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETNSA_PID: + case NETNSA_FD: + case NETNSA_NSID: + case NETNSA_TARGET_NSID: + break; + default: + NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request"); + return -EINVAL; + } + } + + return 0; +} + static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -793,8 +828,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, struct sk_buff *msg; int err; - err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy, extack); + err = rtnl_net_valid_getid_req(skb, nlh, tb, extack); if (err < 0) return err; if (tb[NETNSA_PID]) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5ea1bed08ede..f5a98082ac7a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3242,6 +3242,53 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, return ret; } +static int rtnl_valid_getlink_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifinfomsg *ifm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + NL_SET_ERR_MSG(extack, "Invalid header for get link"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, + extack); + + ifm = nlmsg_data(nlh); + if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || + ifm->ifi_change) { + NL_SET_ERR_MSG(extack, "Invalid values in header for get link request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, + extack); + if (err) + return err; + + for (i = 0; i <= IFLA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFLA_IFNAME: + case IFLA_EXT_MASK: + case IFLA_TARGET_NETNSID: + break; + default: + NL_SET_ERR_MSG(extack, "Unsupported attribute in get link request"); + return -EINVAL; + } + } + + return 0; +} + static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -3256,7 +3303,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, int err; u32 ext_filter_mask = 0; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); + err = rtnl_valid_getlink_req(skb, nlh, tb, extack); if (err < 0) return err; @@ -3639,7 +3686,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, const struct net_device_ops *ops = br_dev->netdev_ops; err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid, - nlh->nlmsg_flags); + nlh->nlmsg_flags, extack); if (err) goto out; else @@ -3651,7 +3698,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (dev->netdev_ops->ndo_fdb_add) err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, vid, - nlh->nlmsg_flags); + nlh->nlmsg_flags, + extack); else err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags); @@ -4901,6 +4949,40 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, return size; } +static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, + bool is_dump, struct netlink_ext_ack *extack) +{ + struct if_stats_msg *ifsm; + + if (nlh->nlmsg_len < sizeof(*ifsm)) { + NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); + return -EINVAL; + } + + if (!strict_check) + return 0; + + ifsm = nlmsg_data(nlh); + + /* only requests using strict checks can pass data to influence + * the dump. The legacy exception is filter_mask. + */ + if (ifsm->pad1 || ifsm->pad2 || (is_dump && ifsm->ifindex)) { + NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request"); + return -EINVAL; + } + if (nlmsg_attrlen(nlh, sizeof(*ifsm))) { + NL_SET_ERR_MSG(extack, "Invalid attributes after stats header"); + return -EINVAL; + } + if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) { + NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask"); + return -EINVAL; + } + + return 0; +} + static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4912,8 +4994,10 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, u32 filter_mask; int err; - if (nlmsg_len(nlh) < sizeof(*ifsm)) - return -EINVAL; + err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb), + false, extack); + if (err) + return err; ifsm = nlmsg_data(nlh); if (ifsm->ifindex > 0) @@ -4965,27 +5049,11 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = net->dev_base_seq; - if (nlmsg_len(cb->nlh) < sizeof(*ifsm)) { - NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); - return -EINVAL; - } + err = rtnl_valid_stats_req(cb->nlh, cb->strict_check, true, extack); + if (err) + return err; ifsm = nlmsg_data(cb->nlh); - - /* only requests using strict checks can pass data to influence - * the dump. The legacy exception is filter_mask. - */ - if (cb->strict_check) { - if (ifsm->pad1 || ifsm->pad2 || ifsm->ifindex) { - NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request"); - return -EINVAL; - } - if (nlmsg_attrlen(cb->nlh, sizeof(*ifsm))) { - NL_SET_ERR_MSG(extack, "Invalid attributes after stats header"); - return -EINVAL; - } - } - filter_mask = ifsm->filter_mask; if (!filter_mask) { NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump"); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index d6d5c20d7044..e76ed8df9f13 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -78,11 +78,9 @@ int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, { int i = src->sg.start; struct scatterlist *sge = sk_msg_elem(src, i); + struct scatterlist *sgd = NULL; u32 sge_len, sge_off; - if (sk_msg_full(dst)) - return -ENOSPC; - while (off) { if (sge->length > off) break; @@ -94,16 +92,27 @@ int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, } while (len) { - if (sk_msg_full(dst)) - return -ENOSPC; - sge_len = sge->length - off; - sge_off = sge->offset + off; if (sge_len > len) sge_len = len; + + if (dst->sg.end) + sgd = sk_msg_elem(dst, dst->sg.end - 1); + + if (sgd && + (sg_page(sge) == sg_page(sgd)) && + (sg_virt(sge) + off == sg_virt(sgd) + sgd->length)) { + sgd->length += sge_len; + dst->sg.size += sge_len; + } else if (!sk_msg_full(dst)) { + sge_off = sge->offset + off; + sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off); + } else { + return -ENOSPC; + } + off = 0; len -= sge_len; - sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off); sk_mem_charge(sk, sge_len); sk_msg_iter_var_next(i); if (i == src->sg.end && len) diff --git a/net/core/sock.c b/net/core/sock.c index 6aa2e7e0b4fb..900e8a9435f5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -520,14 +520,11 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); -static int sock_setbindtodevice(struct sock *sk, char __user *optval, - int optlen) +static int sock_setbindtodevice_locked(struct sock *sk, int ifindex) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES struct net *net = sock_net(sk); - char devname[IFNAMSIZ]; - int index; /* Sorry... */ ret = -EPERM; @@ -535,6 +532,32 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval, goto out; ret = -EINVAL; + if (ifindex < 0) + goto out; + + sk->sk_bound_dev_if = ifindex; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + sk_dst_reset(sk); + + ret = 0; + +out: +#endif + + return ret; +} + +static int sock_setbindtodevice(struct sock *sk, char __user *optval, + int optlen) +{ + int ret = -ENOPROTOOPT; +#ifdef CONFIG_NETDEVICES + struct net *net = sock_net(sk); + char devname[IFNAMSIZ]; + int index; + + ret = -EINVAL; if (optlen < 0) goto out; @@ -566,14 +589,9 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval, } lock_sock(sk); - sk->sk_bound_dev_if = index; - if (sk->sk_prot->rehash) - sk->sk_prot->rehash(sk); - sk_dst_reset(sk); + ret = sock_setbindtodevice_locked(sk, index); release_sock(sk); - ret = 0; - out: #endif @@ -1055,6 +1073,10 @@ set_rcvbuf: } break; + case SO_BINDTOIFINDEX: + ret = sock_setbindtodevice_locked(sk, val); + break; + default: ret = -ENOPROTOOPT; break; @@ -1399,6 +1421,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, SOF_TXTIME_REPORT_ERRORS : 0; break; + case SO_BINDTOIFINDEX: + v.val = sk->sk_bound_dev_if; + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -1726,7 +1752,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_err_soft = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); - atomic64_set(&newsk->sk_cookie, 0); if (likely(newsk->sk_net_refcnt)) sock_inuse_add(sock_net(newsk), 1); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index f78fe58eafc8..6cd3737593a6 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -282,7 +282,7 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *att (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0) goto err_inval; - fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); + fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); err = -ENOBUFS; if (fi == NULL) goto failure; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 026a05774bf7..1f4972dab9f2 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -103,7 +103,8 @@ static inline void dsa_legacy_unregister(void) { } int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags); + u16 flags, + struct netlink_ext_ack *extack); int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); diff --git a/net/dsa/master.c b/net/dsa/master.c index 71bb15f491c8..79e97d2f2d9b 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -126,6 +126,17 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, } } +static int dsa_master_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + + if (snprintf(name, len, "p%d", cpu_dp->index) >= len) + return -EINVAL; + + return 0; +} + static int dsa_master_ethtool_setup(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; @@ -158,6 +169,38 @@ static void dsa_master_ethtool_teardown(struct net_device *dev) cpu_dp->orig_ethtool_ops = NULL; } +static int dsa_master_ndo_setup(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch *ds = cpu_dp->ds; + struct net_device_ops *ops; + + if (dev->netdev_ops->ndo_get_phys_port_name) + return 0; + + ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + + cpu_dp->orig_ndo_ops = dev->netdev_ops; + if (cpu_dp->orig_ndo_ops) + memcpy(ops, cpu_dp->orig_ndo_ops, sizeof(*ops)); + + ops->ndo_get_phys_port_name = dsa_master_get_phys_port_name; + + dev->netdev_ops = ops; + + return 0; +} + +static void dsa_master_ndo_teardown(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + + dev->netdev_ops = cpu_dp->orig_ndo_ops; + cpu_dp->orig_ndo_ops = NULL; +} + static ssize_t tagging_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -223,16 +266,27 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) if (ret) return ret; + ret = dsa_master_ndo_setup(dev); + if (ret) + goto out_err_ethtool_teardown; + ret = sysfs_create_group(&dev->dev.kobj, &dsa_group); if (ret) - dsa_master_ethtool_teardown(dev); + goto out_err_ndo_teardown; + + return ret; +out_err_ndo_teardown: + dsa_master_ndo_teardown(dev); +out_err_ethtool_teardown: + dsa_master_ethtool_teardown(dev); return ret; } void dsa_master_teardown(struct net_device *dev) { sysfs_remove_group(&dev->dev.kobj, &dsa_group); + dsa_master_ndo_teardown(dev); dsa_master_ethtool_teardown(dev); dsa_master_reset_mtu(dev); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a3fcc1d01615..91de3a663226 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1009,7 +1009,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev); @@ -1450,7 +1451,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) } fdb_info->offloaded = true; call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev, - &fdb_info->info); + &fdb_info->info, NULL); break; case SWITCHDEV_FDB_DEL_TO_DEVICE: diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e258a00b4a3d..cd027639df2f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2063,13 +2063,49 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; +static int inet_netconf_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv4_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv4_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETCONFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETCONFA_IFINDEX: + break; + default: + NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request"); + return -EINVAL; + } + } + + return 0; +} + static int inet_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; - struct netconfmsg *ncm; struct sk_buff *skb; struct ipv4_devconf *devconf; struct in_device *in_dev; @@ -2077,9 +2113,8 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, int ifindex; int err; - err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv4_policy, extack); - if (err < 0) + err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack); + if (err) goto errout; err = -EINVAL; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ddbf8c9a1abb..fb99002c3d4e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2467,6 +2467,61 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); } +static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || + (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || + rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || + rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + if (err) + return err; + + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || + (tb[RTA_DST] && !rtm->rtm_dst_len)) { + NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_SRC: + case RTA_DST: + case RTA_TABLE: + break; + default: + NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request"); + return -EINVAL; + } + } + + return 0; +} + static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -2475,18 +2530,14 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct sk_buff *skb = NULL; struct mfc_cache *cache; struct mr_table *mrt; - struct rtmsg *rtm; __be32 src, grp; u32 tableid; int err; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; - rtm = nlmsg_data(nlh); - src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ce92f73cf104..99be68b15da0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2763,6 +2763,75 @@ static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, return skb; } +static int inet_rtm_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG(extack, + "ipv4: Invalid header for route get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || + (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || + rtm->rtm_table || rtm->rtm_protocol || + rtm->rtm_scope || rtm->rtm_type) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request"); + return -EINVAL; + } + + if (rtm->rtm_flags & ~(RTM_F_NOTIFY | + RTM_F_LOOKUP_TABLE | + RTM_F_FIB_MATCH)) { + NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + if (err) + return err; + + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || + (tb[RTA_DST] && !rtm->rtm_dst_len)) { + NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_IIF: + case RTA_OIF: + case RTA_SRC: + case RTA_DST: + case RTA_IP_PROTO: + case RTA_SPORT: + case RTA_DPORT: + case RTA_MARK: + case RTA_UID: + break; + default: + NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request"); + return -EINVAL; + } + } + + return 0; +} + static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -2783,8 +2852,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err; int mark; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, - extack); + err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) return err; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2079145a3b7c..5f099c9d04e5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2572,14 +2572,16 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); tp->srtt_us = 0; + tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); tp->rcv_rtt_last_tsecr = 0; tp->write_seq += tp->max_window + 2; if (tp->write_seq == 0) tp->write_seq = 1; icsk->icsk_backoff = 0; - tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; + icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + tp->snd_cwnd = TCP_INIT_CWND; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tp->delivered_ce = 0; @@ -2603,6 +2605,23 @@ int tcp_disconnect(struct sock *sk, int flags) tp->duplicate_sack[0].end_seq = 0; tp->dsack_dups = 0; tp->reord_seen = 0; + tp->retrans_out = 0; + tp->sacked_out = 0; + tp->tlp_high_seq = 0; + tp->last_oow_ack_time = 0; + /* There's a bubble in the pipe until at least the first ACK. */ + tp->app_limited = ~0U; + tp->rack.mstamp = 0; + tp->rack.advanced = 0; + tp->rack.reo_wnd_steps = 1; + tp->rack.last_delivered = 0; + tp->rack.reo_wnd_persist = 0; + tp->rack.dsack_seen = 0; + tp->syn_data_acked = 0; + tp->rx_opt.saw_tstamp = 0; + tp->rx_opt.dsack = 0; + tp->rx_opt.num_sacks = 0; + /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 12affb7864d9..182595e2d40f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -479,43 +479,16 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, tcp_init_wl(newtp, treq->rcv_isn); - newtp->srtt_us = 0; - newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); - newicsk->icsk_rto = TCP_TIMEOUT_INIT; newicsk->icsk_ack.lrcvtime = tcp_jiffies32; - newtp->packets_out = 0; - newtp->retrans_out = 0; - newtp->sacked_out = 0; - newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - newtp->tlp_high_seq = 0; newtp->lsndtime = tcp_jiffies32; newsk->sk_txhash = treq->txhash; - newtp->last_oow_ack_time = 0; newtp->total_retrans = req->num_retrans; - /* So many TCP implementations out there (incorrectly) count the - * initial SYN frame in their delayed-ACK and congestion control - * algorithms that we must have the following bandaid to talk - * efficiently to them. -DaveM - */ - newtp->snd_cwnd = TCP_INIT_CWND; - newtp->snd_cwnd_cnt = 0; - - /* There's a bubble in the pipe until at least the first ACK. */ - newtp->app_limited = ~0U; - tcp_init_xmit_timers(newsk); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; - newtp->rx_opt.saw_tstamp = 0; - - newtp->rx_opt.dsack = 0; - newtp->rx_opt.num_sacks = 0; - - newtp->urg_data = 0; - if (sock_flag(newsk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(newsk, keepalive_time_when(newtp)); @@ -556,13 +529,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, tcp_ecn_openreq_child(newtp, req); newtp->fastopen_req = NULL; newtp->fastopen_rsk = NULL; - newtp->syn_data_acked = 0; - newtp->rack.mstamp = 0; - newtp->rack.advanced = 0; - newtp->rack.reo_wnd_steps = 1; - newtp->rack.last_delivered = 0; - newtp->rack.reo_wnd_persist = 0; - newtp->rack.dsack_seen = 0; __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 730bc44dbad9..6527f61f59ff 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -980,7 +980,6 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); - skb->skb_mstamp_ns = tp->tcp_wstamp_ns; if (sk->sk_pacing_status != SK_PACING_NONE) { unsigned long rate = sk->sk_pacing_rate; @@ -1028,7 +1027,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, BUG_ON(!skb || !tcp_skb_pcount(skb)); tp = tcp_sk(sk); - + prior_wstamp = tp->tcp_wstamp_ns; + tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); + skb->skb_mstamp_ns = tp->tcp_wstamp_ns; if (clone_it) { TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; @@ -1045,11 +1046,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, return -ENOBUFS; } - prior_wstamp = tp->tcp_wstamp_ns; - tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); - - skb->skb_mstamp_ns = tp->tcp_wstamp_ns; - inet = inet_sk(sk); tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); @@ -2937,12 +2933,16 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } + /* To avoid taking spuriously low RTT samples based on a timestamp + * for a transmit that never happened, always mark EVER_RETRANS + */ + TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG)) tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB, TCP_SKB_CB(skb)->seq, segs, err); if (likely(!err)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; trace_tcp_retransmit_skb(sk, skb); } else if (err != -EBUSY) { NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); @@ -2963,13 +2963,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) #endif TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; tp->retrans_out += tcp_skb_pcount(skb); - - /* Save stamp of the first retransmit. */ - if (!tp->retrans_stamp) - tp->retrans_stamp = tcp_skb_timestamp(skb); - } + /* Save stamp of the first (attempted) retransmit. */ + if (!tp->retrans_stamp) + tp->retrans_stamp = tcp_skb_timestamp(skb); + if (tp->undo_retrans < 0) tp->undo_retrans = 0; tp->undo_retrans += tcp_skb_pcount(skb); @@ -3750,7 +3749,7 @@ void tcp_send_probe0(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); - unsigned long probe_max; + unsigned long timeout; int err; err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); @@ -3762,26 +3761,18 @@ void tcp_send_probe0(struct sock *sk) return; } + icsk->icsk_probes_out++; if (err <= 0) { if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) icsk->icsk_backoff++; - icsk->icsk_probes_out++; - probe_max = TCP_RTO_MAX; + timeout = tcp_probe0_when(sk, TCP_RTO_MAX); } else { /* If packet was not sent due to local congestion, - * do not backoff and do not remember icsk_probes_out. - * Let local senders to fight for local resources. - * - * Use accumulated backoff yet. + * Let senders fight for local resources conservatively. */ - if (!icsk->icsk_probes_out) - icsk->icsk_probes_out = 1; - probe_max = TCP_RESOURCE_PROBE_INTERVAL; - } - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - tcp_probe0_when(sk, probe_max), - TCP_RTO_MAX, - NULL); + timeout = TCP_RESOURCE_PROBE_INTERVAL; + } + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL); } int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 71a29e9c0620..d7399a89469d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,28 +22,14 @@ #include <linux/gfp.h> #include <net/tcp.h> -static u32 tcp_retransmit_stamp(const struct sock *sk) -{ - u32 start_ts = tcp_sk(sk)->retrans_stamp; - - if (unlikely(!start_ts)) { - struct sk_buff *head = tcp_rtx_queue_head(sk); - - if (!head) - return 0; - start_ts = tcp_skb_timestamp(head); - } - return start_ts; -} - static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); u32 elapsed, start_ts; s32 remaining; - start_ts = tcp_retransmit_stamp(sk); - if (!icsk->icsk_user_timeout || !start_ts) + start_ts = tcp_sk(sk)->retrans_stamp; + if (!icsk->icsk_user_timeout) return icsk->icsk_rto; elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; remaining = icsk->icsk_user_timeout - elapsed; @@ -173,7 +159,20 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } - +static unsigned int tcp_model_timeout(struct sock *sk, + unsigned int boundary, + unsigned int rto_base) +{ + unsigned int linear_backoff_thresh, timeout; + + linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base); + if (boundary <= linear_backoff_thresh) + timeout = ((2 << boundary) - 1) * rto_base; + else + timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + return jiffies_to_msecs(timeout); +} /** * retransmits_timed_out() - returns true if this connection has timed out * @sk: The current socket @@ -191,26 +190,15 @@ static bool retransmits_timed_out(struct sock *sk, unsigned int boundary, unsigned int timeout) { - const unsigned int rto_base = TCP_RTO_MIN; - unsigned int linear_backoff_thresh, start_ts; + unsigned int start_ts; if (!inet_csk(sk)->icsk_retransmits) return false; - start_ts = tcp_retransmit_stamp(sk); - if (!start_ts) - return false; - - if (likely(timeout == 0)) { - linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); + start_ts = tcp_sk(sk)->retrans_stamp; + if (likely(timeout == 0)) + timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN); - if (boundary <= linear_backoff_thresh) - timeout = ((2 << boundary) - 1) * rto_base; - else - timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + - (boundary - linear_backoff_thresh) * TCP_RTO_MAX; - timeout = jiffies_to_msecs(timeout); - } return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; } @@ -345,7 +333,6 @@ static void tcp_probe_timer(struct sock *sk) struct sk_buff *skb = tcp_send_head(sk); struct tcp_sock *tp = tcp_sk(sk); int max_probes; - u32 start_ts; if (tp->packets_out || !skb) { icsk->icsk_probes_out = 0; @@ -360,12 +347,13 @@ static void tcp_probe_timer(struct sock *sk) * corresponding system limit. We also implement similar policy when * we use RTO to probe window in tcp_retransmit_timer(). */ - start_ts = tcp_skb_timestamp(skb); - if (!start_ts) - skb->skb_mstamp_ns = tp->tcp_clock_cache; - else if (icsk->icsk_user_timeout && - (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout) - goto abort; + if (icsk->icsk_user_timeout) { + u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out, + tcp_probe0_base(sk)); + + if (elapsed >= icsk->icsk_user_timeout) + goto abort; + } max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { @@ -395,6 +383,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); int max_retries = icsk->icsk_syn_retries ? : sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ + struct tcp_sock *tp = tcp_sk(sk); struct request_sock *req; req = tcp_sk(sk)->fastopen_rsk; @@ -412,6 +401,8 @@ static void tcp_fastopen_synack_timer(struct sock *sk) inet_rtx_syn_ack(sk, req); req->num_timeout++; icsk->icsk_retransmits++; + if (!tp->retrans_stamp) + tp->retrans_stamp = tcp_time_stamp(tp); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); } @@ -443,10 +434,8 @@ void tcp_retransmit_timer(struct sock *sk) */ return; } - if (!tp->packets_out) - goto out; - - WARN_ON(tcp_rtx_queue_empty(sk)); + if (!tp->packets_out || WARN_ON_ONCE(tcp_rtx_queue_empty(sk))) + return; tp->tlp_high_seq = 0; @@ -511,14 +500,13 @@ void tcp_retransmit_timer(struct sock *sk) tcp_enter_loss(sk); + icsk->icsk_retransmits++; if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, - * do not backoff. + * Let senders fight for local resources conservatively. */ - if (!icsk->icsk_retransmits) - icsk->icsk_retransmits = 1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), + TCP_RESOURCE_PROBE_INTERVAL, TCP_RTO_MAX); goto out; } @@ -539,7 +527,6 @@ void tcp_retransmit_timer(struct sock *sk) * the 120 second clamps though! */ icsk->icsk_backoff++; - icsk->icsk_retransmits++; out_reset_timer: /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index be8b5b2157d8..e93cc0379201 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -21,18 +21,9 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, goto error; if (cfg->bind_ifindex) { - struct net_device *dev; - - dev = dev_get_by_index(net, cfg->bind_ifindex); - if (!dev) { - err = -ENODEV; - goto error; - } - - err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, - dev->name, strlen(dev->name) + 1); - dev_put(dev); - + err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX, + (void *)&cfg->bind_ifindex, + sizeof(cfg->bind_ifindex)); if (err < 0) goto error; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 93d5ad2b1a69..57198b3c86da 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -597,6 +597,43 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; +static int inet6_netconf_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv6_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv6_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETCONFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETCONFA_IFINDEX: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) @@ -605,14 +642,12 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, struct nlattr *tb[NETCONFA_MAX+1]; struct inet6_dev *in6_dev = NULL; struct net_device *dev = NULL; - struct netconfmsg *ncm; struct sk_buff *skb; struct ipv6_devconf *devconf; int ifindex; int err; - err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv6_policy, extack); + err = inet6_netconf_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) return err; @@ -5179,6 +5214,52 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } +static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrmsg *ifm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request"); + return -EINVAL; + } + + ifm = nlmsg_data(nlh); + if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + if (err) + return err; + + for (i = 0; i <= IFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFA_TARGET_NETNSID: + case IFA_ADDRESS: + case IFA_LOCAL: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get address request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -5199,8 +5280,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct sk_buff *skb; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, - extack); + err = inet6_rtm_valid_getaddr_req(in_skb, nlh, tb, extack); if (err < 0) return err; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 0d1ee82ee55b..d43d076c98f5 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -523,6 +523,50 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } +static int ip6addrlbl_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrlblmsg *ifal; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); + + ifal = nlmsg_data(nlh); + if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); + if (err) + return err; + + for (i = 0; i <= IFAL_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFAL_ADDRESS: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in addrlabel get request"); + return -EINVAL; + } + } + + return 0; +} + static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -535,8 +579,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct ip6addrlbl_entry *p; struct sk_buff *skb; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, - extack); + err = ip6addrlbl_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) return err; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b1be67ca6768..c465d8a102f2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -524,7 +524,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) return PACKET_REJECT; } -static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, +static int ip6erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi) { struct erspan_base_hdr *ershdr; @@ -611,7 +611,7 @@ static int gre_rcv(struct sk_buff *skb) if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || tpi.proto == htons(ETH_P_ERSPAN2))) { - if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) + if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD) return 0; goto out; } diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index ad1a9ccd4b44..25430c991cea 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -32,18 +32,9 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, goto error; } if (cfg->bind_ifindex) { - struct net_device *dev; - - dev = dev_get_by_index(net, cfg->bind_ifindex); - if (!dev) { - err = -ENODEV; - goto error; - } - - err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, - dev->name, strlen(dev->name) + 1); - dev_put(dev); - + err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX, + (void *)&cfg->bind_ifindex, + sizeof(cfg->bind_ifindex)); if (err < 0) goto error; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 964491cf3672..dc066fdf7e46 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4812,6 +4812,73 @@ int rt6_dump_route(struct fib6_info *rt, void *p_arg) arg->cb->nlh->nlmsg_seq, flags); } +static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid header for get route request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || + (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || + rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || + rtm->rtm_type) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); + return -EINVAL; + } + if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid flags for get route request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); + if (err) + return err; + + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || + (tb[RTA_DST] && !rtm->rtm_dst_len)) { + NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_SRC: + case RTA_DST: + case RTA_IIF: + case RTA_OIF: + case RTA_MARK: + case RTA_UID: + case RTA_SPORT: + case RTA_DPORT: + case RTA_IP_PROTO: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4826,8 +4893,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct flowi6 fl6 = {}; bool fibmatch; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, - extack); + err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 7d55d4c04088..2662a23c658e 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1209,21 +1209,57 @@ static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, }; +static int mpls_netconf_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid header for netconf get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_mpls_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_mpls_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETCONFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETCONFA_IFINDEX: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request"); + return -EINVAL; + } + } + + return 0; +} + static int mpls_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX + 1]; - struct netconfmsg *ncm; struct net_device *dev; struct mpls_dev *mdev; struct sk_buff *skb; int ifindex; int err; - err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_mpls_policy, extack); + err = mpls_netconf_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; @@ -2236,6 +2272,64 @@ errout: rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); } +static int mpls_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid header for get route request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) || + rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table || + rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); + return -EINVAL; + } + if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid flags for get route request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); + if (err) + return err; + + if ((tb[RTA_DST] || tb[RTA_NEWDST]) && !rtm->rtm_dst_len) { + NL_SET_ERR_MSG_MOD(extack, "rtm_dst_len must be 20 for MPLS"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_DST: + case RTA_NEWDST: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); + return -EINVAL; + } + } + + return 0; +} + static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct netlink_ext_ack *extack) { @@ -2255,8 +2349,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, u8 n_labels; int err; - err = nlmsg_parse(in_nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_mpls_policy, extack); + err = mpls_valid_getroute_req(in_skb, in_nlh, tb, extack); if (err < 0) goto errout; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3c023d6120f6..8fa35df94c07 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1371,6 +1371,14 @@ int netlink_has_listeners(struct sock *sk, unsigned int group) } EXPORT_SYMBOL_GPL(netlink_has_listeners); +bool netlink_strict_get_check(struct sk_buff *skb) +{ + const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk); + + return nlk->flags & NETLINK_F_STRICT_CHK; +} +EXPORT_SYMBOL_GPL(netlink_strict_get_check); + static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index c038e021a591..43849d752a1e 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -206,8 +206,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) return ERR_PTR(-EINVAL); /* Allocate and set up the meter before locking anything. */ - meter = kzalloc(n_bands * sizeof(struct dp_meter_band) + - sizeof(*meter), GFP_KERNEL); + meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL); if (!meter) return ERR_PTR(-ENOMEM); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 6a5dce8baf19..4a57fec6f306 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -18,6 +18,7 @@ #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/idr.h> +#include <linux/percpu.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> @@ -35,6 +36,7 @@ struct basic_filter { struct tcf_result res; struct tcf_proto *tp; struct list_head link; + struct tc_basic_pcnt __percpu *pf; struct rcu_work rwork; }; @@ -46,8 +48,10 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct basic_filter *f; list_for_each_entry_rcu(f, &head->flist, link) { + __this_cpu_inc(f->pf->rcnt); if (!tcf_em_tree_match(skb, &f->ematches, NULL)) continue; + __this_cpu_inc(f->pf->rhit); *res = f->res; r = tcf_exts_exec(skb, &f->exts, res); if (r < 0) @@ -89,6 +93,7 @@ static void __basic_delete_filter(struct basic_filter *f) tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); tcf_exts_put_net(&f->exts); + free_percpu(f->pf); kfree(f); } @@ -208,6 +213,11 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (err) goto errout; fnew->handle = handle; + fnew->pf = alloc_percpu(struct tc_basic_pcnt); + if (!fnew->pf) { + err = -ENOMEM; + goto errout; + } err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr, extack); @@ -231,6 +241,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + free_percpu(fnew->pf); tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; @@ -265,8 +276,10 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl) static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { + struct tc_basic_pcnt gpf = {}; struct basic_filter *f = fh; struct nlattr *nest; + int cpu; if (f == NULL) return skb->len; @@ -281,6 +294,18 @@ static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid)) goto nla_put_failure; + for_each_possible_cpu(cpu) { + struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu); + + gpf.rcnt += pf->rcnt; + gpf.rhit += pf->rhit; + } + + if (nla_put_64bit(skb, TCA_BASIC_PCNT, + sizeof(struct tc_basic_pcnt), + &gpf, TCA_BASIC_PAD)) + goto nla_put_failure; + if (tcf_exts_dump(skb, &f->exts) < 0 || tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto nla_put_failure; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 0e408ee9dcec..a1b803fd372e 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/percpu.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> @@ -22,6 +23,7 @@ struct cls_mall_head { u32 handle; u32 flags; unsigned int in_hw_count; + struct tc_matchall_pcnt __percpu *pf; struct rcu_work rwork; }; @@ -34,6 +36,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; *res = head->res; + __this_cpu_inc(head->pf->rhit); return tcf_exts_exec(skb, &head->exts, res); } @@ -46,6 +49,7 @@ static void __mall_destroy(struct cls_mall_head *head) { tcf_exts_destroy(&head->exts); tcf_exts_put_net(&head->exts); + free_percpu(head->pf); kfree(head); } @@ -192,6 +196,11 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, handle = 1; new->handle = handle; new->flags = flags; + new->pf = alloc_percpu(struct tc_matchall_pcnt); + if (!new->pf) { + err = -ENOMEM; + goto err_alloc_percpu; + } err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr, extack); @@ -214,6 +223,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, err_replace_hw_filter: err_set_parms: + free_percpu(new->pf); +err_alloc_percpu: tcf_exts_destroy(&new->exts); err_exts_init: kfree(new); @@ -270,8 +281,10 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { + struct tc_matchall_pcnt gpf = {}; struct cls_mall_head *head = fh; struct nlattr *nest; + int cpu; if (!head) return skb->len; @@ -289,6 +302,17 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags)) goto nla_put_failure; + for_each_possible_cpu(cpu) { + struct tc_matchall_pcnt *pf = per_cpu_ptr(head->pf, cpu); + + gpf.rhit += pf->rhit; + } + + if (nla_put_64bit(skb, TCA_MATCHALL_PCNT, + sizeof(struct tc_matchall_pcnt), + &gpf, TCA_MATCHALL_PAD)) + goto nla_put_failure; + if (tcf_exts_dump(skb, &head->exts)) goto nla_put_failure; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7e4d1ccf4c87..03e26e8d0ec9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -758,8 +758,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return 0; } -void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, - unsigned int len) +void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) { bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 5df9d1138ac9..cd78253de31d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -556,10 +556,11 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); * Call all network notifier blocks. */ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, - struct switchdev_notifier_info *info) + struct switchdev_notifier_info *info, + struct netlink_ext_ack *extack) { info->dev = dev; - info->extack = NULL; + info->extack = extack; return atomic_notifier_call_chain(&switchdev_notif_chain, val, info); } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); diff --git a/net/tipc/trace.c b/net/tipc/trace.c index 964823841efe..265f6a26aa3d 100644 --- a/net/tipc/trace.c +++ b/net/tipc/trace.c @@ -111,7 +111,7 @@ int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf) break; default: break; - }; + } i += scnprintf(buf + i, sz - i, " | %u", msg_src_droppable(hdr)); i += scnprintf(buf + i, sz - i, " %u", @@ -122,7 +122,7 @@ int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf) default: /* need more? */ break; - }; + } i += scnprintf(buf + i, sz - i, "\n"); if (!more) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 11cdc8f7db63..86b9527c4826 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -124,6 +124,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) { struct aead_request *aead_req = (struct aead_request *)req; struct scatterlist *sgout = aead_req->dst; + struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; struct tls_context *tls_ctx; struct scatterlist *sg; @@ -134,12 +135,16 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) skb = (struct sk_buff *)req->data; tls_ctx = tls_get_ctx(skb->sk); ctx = tls_sw_ctx_rx(tls_ctx); - pending = atomic_dec_return(&ctx->decrypt_pending); /* Propagate if there was an err */ if (err) { ctx->async_wait.err = err; tls_err_abort(skb->sk, err); + } else { + struct strp_msg *rxm = strp_msg(skb); + + rxm->offset += tls_ctx->rx.prepend_size; + rxm->full_len -= tls_ctx->rx.overhead_size; } /* After using skb->sk to propagate sk through crypto async callback @@ -147,18 +152,21 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) */ skb->sk = NULL; - /* Release the skb, pages and memory allocated for crypto req */ - kfree_skb(skb); - /* Skip the first S/G entry as it points to AAD */ - for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { - if (!sg) - break; - put_page(sg_page(sg)); + /* Free the destination pages if skb was not decrypted inplace */ + if (sgout != sgin) { + /* Skip the first S/G entry as it points to AAD */ + for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { + if (!sg) + break; + put_page(sg_page(sg)); + } } kfree(aead_req); + pending = atomic_dec_return(&ctx->decrypt_pending); + if (!pending && READ_ONCE(ctx->async_notify)) complete(&ctx->async_wait.completion); } @@ -1020,8 +1028,8 @@ send_end: return copied ? copied : ret; } -int tls_sw_do_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) +static int tls_sw_do_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags) { long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -1143,16 +1151,6 @@ sendpage_end: return copied ? copied : ret; } -int tls_sw_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) - return -ENOTSUPP; - - return tls_sw_do_sendpage(sk, page, offset, size, flags); -} - int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -1281,7 +1279,7 @@ out: static int decrypt_internal(struct sock *sk, struct sk_buff *skb, struct iov_iter *out_iov, struct scatterlist *out_sg, - int *chunk, bool *zc) + int *chunk, bool *zc, bool async) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1381,13 +1379,13 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, fallback_to_reg_recv: sgout = sgin; pages = 0; - *chunk = 0; + *chunk = data_len; *zc = false; } /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, skb, sgin, sgout, iv, - data_len, aead_req, *zc); + data_len, aead_req, async); if (err == -EINPROGRESS) return err; @@ -1400,7 +1398,8 @@ fallback_to_reg_recv: } static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, - struct iov_iter *dest, int *chunk, bool *zc) + struct iov_iter *dest, int *chunk, bool *zc, + bool async) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1413,7 +1412,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, return err; #endif if (!ctx->decrypted) { - err = decrypt_internal(sk, skb, dest, NULL, chunk, zc); + err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async); if (err < 0) { if (err == -EINPROGRESS) tls_advance_record_sn(sk, &tls_ctx->rx); @@ -1439,7 +1438,7 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb, bool zc = true; int chunk; - return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc); + return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false); } static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, @@ -1466,6 +1465,77 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, return true; } +/* This function traverses the rx_list in tls receive context to copies the + * decrypted data records into the buffer provided by caller zero copy is not + * true. Further, the records are removed from the rx_list if it is not a peek + * case and the record has been consumed completely. + */ +static int process_rx_list(struct tls_sw_context_rx *ctx, + struct msghdr *msg, + size_t skip, + size_t len, + bool zc, + bool is_peek) +{ + struct sk_buff *skb = skb_peek(&ctx->rx_list); + ssize_t copied = 0; + + while (skip && skb) { + struct strp_msg *rxm = strp_msg(skb); + + if (skip < rxm->full_len) + break; + + skip = skip - rxm->full_len; + skb = skb_peek_next(skb, &ctx->rx_list); + } + + while (len && skb) { + struct sk_buff *next_skb; + struct strp_msg *rxm = strp_msg(skb); + int chunk = min_t(unsigned int, rxm->full_len - skip, len); + + if (!zc || (rxm->full_len - skip) > len) { + int err = skb_copy_datagram_msg(skb, rxm->offset + skip, + msg, chunk); + if (err < 0) + return err; + } + + len = len - chunk; + copied = copied + chunk; + + /* Consume the data from record if it is non-peek case*/ + if (!is_peek) { + rxm->offset = rxm->offset + chunk; + rxm->full_len = rxm->full_len - chunk; + + /* Return if there is unconsumed data in the record */ + if (rxm->full_len - skip) + break; + } + + /* The remaining skip-bytes must lie in 1st record in rx_list. + * So from the 2nd record, 'skip' should be 0. + */ + skip = 0; + + if (msg) + msg->msg_flags |= MSG_EOR; + + next_skb = skb_peek_next(skb, &ctx->rx_list); + + if (!is_peek) { + skb_unlink(skb, &ctx->rx_list); + kfree_skb(skb); + } + + skb = next_skb; + } + + return copied; +} + int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, @@ -1476,7 +1546,8 @@ int tls_sw_recvmsg(struct sock *sk, struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct sk_psock *psock; - unsigned char control; + unsigned char control = 0; + ssize_t decrypted = 0; struct strp_msg *rxm; struct sk_buff *skb; ssize_t copied = 0; @@ -1484,6 +1555,7 @@ int tls_sw_recvmsg(struct sock *sk, int target, err = 0; long timeo; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); + bool is_peek = flags & MSG_PEEK; int num_async = 0; flags |= nonblock; @@ -1494,11 +1566,28 @@ int tls_sw_recvmsg(struct sock *sk, psock = sk_psock_get(sk); lock_sock(sk); - target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + /* Process pending decrypted records. It must be non-zero-copy */ + err = process_rx_list(ctx, msg, 0, len, false, is_peek); + if (err < 0) { + tls_err_abort(sk, err); + goto end; + } else { + copied = err; + } + + len = len - copied; + if (len) { + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + } else { + goto recv_end; + } + do { - bool zc = false; + bool retain_skb = false; bool async = false; + bool zc = false; + int to_decrypt; int chunk = 0; skb = tls_wait_data(sk, psock, flags, timeo, &err); @@ -1508,7 +1597,7 @@ int tls_sw_recvmsg(struct sock *sk, msg, len, flags); if (ret > 0) { - copied += ret; + decrypted += ret; len -= ret; continue; } @@ -1535,70 +1624,70 @@ int tls_sw_recvmsg(struct sock *sk, goto recv_end; } - if (!ctx->decrypted) { - int to_copy = rxm->full_len - tls_ctx->rx.overhead_size; + to_decrypt = rxm->full_len - tls_ctx->rx.overhead_size; - if (!is_kvec && to_copy <= len && - likely(!(flags & MSG_PEEK))) - zc = true; + if (to_decrypt <= len && !is_kvec && !is_peek) + zc = true; - err = decrypt_skb_update(sk, skb, &msg->msg_iter, - &chunk, &zc); - if (err < 0 && err != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); - goto recv_end; - } - - if (err == -EINPROGRESS) { - async = true; - num_async++; - goto pick_next_record; - } - - ctx->decrypted = true; + err = decrypt_skb_update(sk, skb, &msg->msg_iter, + &chunk, &zc, ctx->async_capable); + if (err < 0 && err != -EINPROGRESS) { + tls_err_abort(sk, EBADMSG); + goto recv_end; } - if (!zc) { - chunk = min_t(unsigned int, rxm->full_len, len); + if (err == -EINPROGRESS) { + async = true; + num_async++; + goto pick_next_record; + } else { + if (!zc) { + if (rxm->full_len > len) { + retain_skb = true; + chunk = len; + } else { + chunk = rxm->full_len; + } - err = skb_copy_datagram_msg(skb, rxm->offset, msg, - chunk); - if (err < 0) - goto recv_end; + err = skb_copy_datagram_msg(skb, rxm->offset, + msg, chunk); + if (err < 0) + goto recv_end; + + if (!is_peek) { + rxm->offset = rxm->offset + chunk; + rxm->full_len = rxm->full_len - chunk; + } + } } pick_next_record: - copied += chunk; + if (chunk > len) + chunk = len; + + decrypted += chunk; len -= chunk; - if (likely(!(flags & MSG_PEEK))) { - u8 control = ctx->control; - - /* For async, drop current skb reference */ - if (async) - skb = NULL; - - if (tls_sw_advance_skb(sk, skb, chunk)) { - /* Return full control message to - * userspace before trying to parse - * another message type - */ - msg->msg_flags |= MSG_EOR; - if (control != TLS_RECORD_TYPE_DATA) - goto recv_end; - } else { - break; - } - } else { - /* MSG_PEEK right now cannot look beyond current skb - * from strparser, meaning we cannot advance skb here - * and thus unpause strparser since we'd loose original - * one. + + /* For async or peek case, queue the current skb */ + if (async || is_peek || retain_skb) { + skb_queue_tail(&ctx->rx_list, skb); + skb = NULL; + } + + if (tls_sw_advance_skb(sk, skb, chunk)) { + /* Return full control message to + * userspace before trying to parse + * another message type */ + msg->msg_flags |= MSG_EOR; + if (ctx->control != TLS_RECORD_TYPE_DATA) + goto recv_end; + } else { break; } /* If we have a new message from strparser, continue now. */ - if (copied >= target && !ctx->recv_pkt) + if (decrypted >= target && !ctx->recv_pkt) break; } while (len); @@ -1612,13 +1701,33 @@ recv_end: /* one of async decrypt failed */ tls_err_abort(sk, err); copied = 0; + decrypted = 0; + goto end; } } else { reinit_completion(&ctx->async_wait.completion); } WRITE_ONCE(ctx->async_notify, false); + + /* Drain records from the rx_list & copy if required */ + if (is_peek || is_kvec) + err = process_rx_list(ctx, msg, copied, + decrypted, false, is_peek); + else + err = process_rx_list(ctx, msg, 0, + decrypted, true, is_peek); + if (err < 0) { + tls_err_abort(sk, err); + copied = 0; + goto end; + } + + WARN_ON(decrypted != err); } + copied += decrypted; + +end: release_sock(sk); if (psock) sk_psock_put(sk, psock); @@ -1655,7 +1764,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, } if (!ctx->decrypted) { - err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc); + err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false); if (err < 0) { tls_err_abort(sk, EBADMSG); @@ -1842,6 +1951,7 @@ void tls_sw_release_resources_rx(struct sock *sk) if (ctx->aead_recv) { kfree_skb(ctx->recv_pkt); ctx->recv_pkt = NULL; + skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); strp_stop(&ctx->strp); write_lock_bh(&sk->sk_callback_lock); @@ -1891,6 +2001,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) struct crypto_aead **aead; struct strp_callbacks cb; u16 nonce_size, tag_size, iv_size, rec_seq_size; + struct crypto_tfm *tfm; char *iv, *rec_seq; int rc = 0; @@ -1937,6 +2048,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) crypto_init_wait(&sw_ctx_rx->async_wait); crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; + skb_queue_head_init(&sw_ctx_rx->rx_list); aead = &sw_ctx_rx->aead_recv; } @@ -2004,6 +2116,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) goto free_aead; if (sw_ctx_rx) { + tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); + sw_ctx_rx->async_capable = + tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; + /* Set up strparser */ memset(&cb, 0, sizeof(cb)); cb.rcv_msg = tls_queue; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 43a1dec08825..a60df252d3cc 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -505,7 +505,7 @@ out: static int __vsock_bind_stream(struct vsock_sock *vsk, struct sockaddr_vm *addr) { - static u32 port = 0; + static u32 port; struct sockaddr_vm new_addr; if (!port) diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh new file mode 100755 index 000000000000..749ba3cfda1d --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh @@ -0,0 +1,126 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test vetoing of FDB entries that mlxsw can not offload. This exercises several +# different veto vectors to test various rollback scenarios in the vxlan driver. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + fdb_create_veto_test + fdb_replace_veto_test + fdb_append_veto_test + fdb_changelink_veto_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link set dev $swp1 up + ip link set dev $swp1 master br0 + ip link set dev $swp2 up + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link set dev vxlan0 master br0 +} + +cleanup() +{ + pre_cleanup + + ip link set dev vxlan0 nomaster + ip link del dev vxlan0 + + ip link set dev $swp2 down + ip link set dev $swp1 nomaster + ip link set dev $swp1 down + + ip link del dev br0 +} + +fdb_create_veto_test() +{ + RET=0 + + bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \ + dst 198.51.100.2 2>/dev/null + check_fail $? "multicast MAC not rejected" + + bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \ + dst 198.51.100.2 2>&1 >/dev/null | grep -q mlxsw_spectrum + check_err $? "multicast MAC rejected without extack" + + log_test "vxlan FDB veto - create" +} + +fdb_replace_veto_test() +{ + RET=0 + + bridge fdb add 00:01:02:03:04:05 dev vxlan0 self static \ + dst 198.51.100.2 + check_err $? "valid FDB rejected" + + bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \ + dst 198.51.100.2 port 1234 2>/dev/null + check_fail $? "FDB with an explicit port not rejected" + + bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \ + dst 198.51.100.2 port 1234 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "FDB with an explicit port rejected without extack" + + log_test "vxlan FDB veto - replace" +} + +fdb_append_veto_test() +{ + RET=0 + + bridge fdb add 00:00:00:00:00:00 dev vxlan0 self static \ + dst 198.51.100.2 + check_err $? "valid FDB rejected" + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \ + dst 198.51.100.3 port 1234 2>/dev/null + check_fail $? "FDB with an explicit port not rejected" + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \ + dst 198.51.100.3 port 1234 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "FDB with an explicit port rejected without extack" + + log_test "vxlan FDB veto - append" +} + +fdb_changelink_veto_test() +{ + RET=0 + + ip link set dev vxlan0 type vxlan \ + group 224.0.0.1 dev lo 2>/dev/null + check_fail $? "FDB with a multicast IP not rejected" + + ip link set dev vxlan0 type vxlan \ + group 224.0.0.1 dev lo 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "FDB with a multicast IP rejected without extack" + + log_test "vxlan FDB veto - changelink" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh new file mode 100755 index 000000000000..abb694397b86 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel without key. +# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more +# details. + +ALL_TESTS="gre_flat4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create gre $ol1 $ul1 + sw2_flat_create gre $ol2 $ul2 +} + +gre_flat4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre flat" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh new file mode 100755 index 000000000000..c4f373337e48 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel with key. +# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more +# details. + +ALL_TESTS="gre_flat4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create gre $ol1 $ul1 key 233 + sw2_flat_create gre $ol2 $ul2 key 233 +} + +gre_flat4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre flat with key" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh new file mode 100755 index 000000000000..a811130c0627 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel with key. +# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more +# details. + +ALL_TESTS="gre_flat4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create gre $ol1 $ul1 ikey 111 okey 222 + sw2_flat_create gre $ol2 $ul2 ikey 222 okey 111 +} + +gre_flat4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre flat with ikey/okey" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh new file mode 100755 index 000000000000..05c5b3cf2f78 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ipip_lib.sh for more details. + +ALL_TESTS="gre_hier4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create gre $ol1 $ul1 + sw2_hierarchical_create gre $ol2 $ul2 +} + +gre_hier4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre hierarchical" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh new file mode 100755 index 000000000000..9b105dbca32a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ipip_lib.sh for more details. + +ALL_TESTS="gre_hier4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create gre $ol1 $ul1 key 22 + sw2_hierarchical_create gre $ol2 $ul2 key 22 +} + +gre_hier4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre hierarchical with key" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh new file mode 100755 index 000000000000..e275d25bd83a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ipip_lib.sh for more details. + +ALL_TESTS="gre_hier4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create gre $ol1 $ul1 ikey 111 okey 222 + sw2_hierarchical_create gre $ol2 $ul2 ikey 222 okey 111 +} + +gre_hier4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre hierarchical with ikey/okey" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh new file mode 100644 index 000000000000..30f36a57bae6 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh @@ -0,0 +1,349 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Handles creation and destruction of IP-in-IP or GRE tunnels over the given +# topology. Supports both flat and hierarchical models. +# +# Flat Model: +# Overlay and underlay share the same VRF. +# SW1 uses default VRF so tunnel has no bound dev. +# SW2 uses non-default VRF tunnel has a bound dev. +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# +-------------------|-----+ +# | +# +-------------------|-----+ +# | SW1 | | +# | $ol1 + | +# | 192.0.2.2/28 | +# | | +# | + g1a (gre) | +# | loc=192.0.2.65 | +# | rem=192.0.2.66 --. | +# | tos=inherit | | +# | .------------------' | +# | | | +# | v | +# | + $ul1.111 (vlan) | +# | | 192.0.2.129/28 | +# | \ | +# | \_______ | +# | | | +# |VRF default + $ul1 | +# +------------|------------+ +# | +# +------------|------------+ +# | SW2 + $ul2 | +# | _______| | +# | / | +# | / | +# | + $ul2.111 (vlan) | +# | ^ 192.0.2.130/28 | +# | | | +# | | | +# | '------------------. | +# | + g2a (gre) | | +# | loc=192.0.2.66 | | +# | rem=192.0.2.65 --' | +# | tos=inherit | +# | | +# | $ol2 + | +# | 192.0.2.17/28 | | +# | VRF v$ol2 | | +# +-------------------|-----+ +# | +# +-------------------|-----+ +# | H2 | | +# | $h2 + | +# | 192.0.2.18/28 | +# +-------------------------+ +# +# Hierarchical model: +# The tunnel is bound to a device in a different VRF +# +# +---------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# +-------------------|-------+ +# | +# +-------------------|-------+ +# | SW1 | | +# | +-----------------|-----+ | +# | | $ol1 + | | +# | | 192.0.2.2/28 | | +# | | | | +# | | + g1a (gre) | | +# | | rem=192.0.2.66 | | +# | | tos=inherit | | +# | | loc=192.0.2.65 | | +# | | ^ | | +# | | VRF v$ol1 | | | +# | +-----------|-----------+ | +# | | | +# | +-----------|-----------+ | +# | | VRF v$ul1 | | | +# | | | | | +# | | | | | +# | | v | | +# | | dummy1 + | | +# | | 192.0.2.65 | | +# | | .-------' | | +# | | | | | +# | | v | | +# | | + $ul1.111 (vlan) | | +# | | | 192.0.2.129/28 | | +# | | \ | | +# | | \_____ | | +# | | | | | +# | | + $ul1 | | +# | +----------|------------+ | +# +------------|--------------+ +# | +# +------------|--------------+ +# | SW2 | | +# | +----------|------------+ | +# | | + $ul2 | | +# | | _____| | | +# | | / | | +# | | / | | +# | | | $ul2.111 (vlan) | | +# | | + 192.0.2.130/28 | | +# | | ^ | | +# | | | | | +# | | '-------. | | +# | | dummy2 + | | +# | | 192.0.2.66 | | +# | | ^ | | +# | | | | | +# | | | | | +# | | VRF v$ul2 | | | +# | +-----------|-----------+ | +# | | | +# | +-----------|-----------+ | +# | | VRF v$ol2 | | | +# | | | | | +# | | v | | +# | | g2a (gre)+ | | +# | | loc=192.0.2.66 | | +# | | rem=192.0.2.65 | | +# | | tos=inherit | | +# | | | | +# | | $ol2 + | | +# | | 192.0.2.17/28 | | | +# | +-----------------|-----+ | +# +-------------------|-------+ +# | +# +-------------------|-------+ +# | H2 | | +# | $h2 + | +# | 192.0.2.18/28 | +# +---------------------------+ +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 +} + +h1_destroy() +{ + ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.18/28 + ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17 +} + +h2_destroy() +{ + ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17 + simple_if_fini $h2 192.0.2.18/28 +} + +sw1_flat_create() +{ + local type=$1; shift + local ol1=$1; shift + local ul1=$1; shift + + ip link set dev $ol1 up + __addr_add_del $ol1 add "192.0.2.2/28" + + ip link set dev $ul1 up + vlan_create $ul1 111 "" 192.0.2.129/28 + + tunnel_create g1a $type 192.0.2.65 192.0.2.66 tos inherit "$@" + ip link set dev g1a up + __addr_add_del g1a add "192.0.2.65/32" + + ip route add 192.0.2.66/32 via 192.0.2.130 + + ip route add 192.0.2.16/28 nexthop dev g1a +} + +sw1_flat_destroy() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip route del 192.0.2.16/28 + + ip route del 192.0.2.66/32 via 192.0.2.130 + __simple_if_fini g1a 192.0.2.65/32 + tunnel_destroy g1a + + vlan_destroy $ul1 111 + __simple_if_fini $ul1 + __simple_if_fini $ol1 192.0.2.2/28 +} + +sw2_flat_create() +{ + local type=$1; shift + local ol2=$1; shift + local ul2=$1; shift + + simple_if_init $ol2 192.0.2.17/28 + __simple_if_init $ul2 v$ol2 + vlan_create $ul2 111 v$ol2 192.0.2.130/28 + + tunnel_create g2a $type 192.0.2.66 192.0.2.65 tos inherit dev v$ol2 \ + "$@" + __simple_if_init g2a v$ol2 192.0.2.66/32 + + ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + ip route add vrf v$ol2 192.0.2.0/28 nexthop dev g2a +} + +sw2_flat_destroy() +{ + local ol2=$1; shift + local ul2=$1; shift + + ip route del vrf v$ol2 192.0.2.0/28 + + ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + __simple_if_fini g2a 192.0.2.66/32 + tunnel_destroy g2a + + vlan_destroy $ul2 111 + __simple_if_fini $ul2 + simple_if_fini $ol2 192.0.2.17/28 +} + +sw1_hierarchical_create() +{ + local type=$1; shift + local ol1=$1; shift + local ul1=$1; shift + + simple_if_init $ol1 192.0.2.2/28 + simple_if_init $ul1 + ip link add name dummy1 type dummy + __simple_if_init dummy1 v$ul1 192.0.2.65/32 + + vlan_create $ul1 111 v$ul1 192.0.2.129/28 + tunnel_create g1a $type 192.0.2.65 192.0.2.66 tos inherit dev dummy1 \ + "$@" + ip link set dev g1a master v$ol1 + + ip route add vrf v$ul1 192.0.2.66/32 via 192.0.2.130 + ip route add vrf v$ol1 192.0.2.16/28 nexthop dev g1a +} + +sw1_hierarchical_destroy() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip route del vrf v$ol1 192.0.2.16/28 + ip route del vrf v$ul1 192.0.2.66/32 + + tunnel_destroy g1a + vlan_destroy $ul1 111 + + __simple_if_fini dummy1 192.0.2.65/32 + ip link del dev dummy1 + + simple_if_fini $ul1 + simple_if_fini $ol1 192.0.2.2/28 +} + +sw2_hierarchical_create() +{ + local type=$1; shift + local ol2=$1; shift + local ul2=$1; shift + + simple_if_init $ol2 192.0.2.17/28 + simple_if_init $ul2 + + ip link add name dummy2 type dummy + __simple_if_init dummy2 v$ul2 192.0.2.66/32 + + vlan_create $ul2 111 v$ul2 192.0.2.130/28 + tunnel_create g2a $type 192.0.2.66 192.0.2.65 tos inherit dev dummy2 \ + "$@" + ip link set dev g2a master v$ol2 + + ip route add vrf v$ul2 192.0.2.65/32 via 192.0.2.129 + ip route add vrf v$ol2 192.0.2.0/28 nexthop dev g2a +} + +sw2_hierarchical_destroy() +{ + local ol2=$1; shift + local ul2=$1; shift + + ip route del vrf v$ol2 192.0.2.0/28 + ip route del vrf v$ul2 192.0.2.65/32 + + tunnel_destroy g2a + vlan_destroy $ul2 111 + + __simple_if_fini dummy2 192.0.2.66/32 + ip link del dev dummy2 + + simple_if_fini $ul2 + simple_if_fini $ol2 192.0.2.17/28 +} + +topo_mtu_change() +{ + local mtu=$1 + + ip link set mtu $mtu dev $h1 + ip link set mtu $mtu dev $ol1 + ip link set mtu $mtu dev g1a + ip link set mtu $mtu dev $ul1 + ip link set mtu $mtu dev $ul1.111 + ip link set mtu $mtu dev $h2 + ip link set mtu $mtu dev $ol2 + ip link set mtu $mtu dev g2a + ip link set mtu $mtu dev $ul2 + ip link set mtu $mtu dev $ul2.111 +} + +test_mtu_change() +{ + local encap=$1; shift + + RET=0 + + ping_do $h1 192.0.2.18 "-s 1800 -w 3" + check_fail $? "ping $encap should not pass with size 1800" + + RET=0 + + topo_mtu_change 2000 + ping_do $h1 192.0.2.18 "-s 1800 -w 3" + check_err $? + log_test "ping $encap packet size 1800 after MTU change" +} diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index fac68d710f35..ff68ed19c0ef 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -452,10 +452,12 @@ TEST_F(tls, recv_partial) memset(recv_mem, 0, sizeof(recv_mem)); EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), 0), -1); + EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), + MSG_WAITALL), -1); EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0); memset(recv_mem, 0, sizeof(recv_mem)); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), 0), -1); + EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), + MSG_WAITALL), -1); EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)), 0); } @@ -565,10 +567,10 @@ TEST_F(tls, recv_peek_large_buf_mult_recs) len = strlen(test_str_second) + 1; EXPECT_EQ(send(self->fd, test_str_second, len, 0), len); - len = sizeof(buf); + len = strlen(test_str) + 1; memset(buf, 0, len); - EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1); - + EXPECT_NE((len = recv(self->cfd, buf, len, + MSG_PEEK | MSG_WAITALL)), -1); len = strlen(test_str) + 1; EXPECT_EQ(memcmp(test_str, buf, len), 0); } |