diff options
Diffstat (limited to 'Documentation/networking')
-rw-r--r-- | Documentation/networking/00-INDEX | 4 | ||||
-rw-r--r-- | Documentation/networking/alias.rst | 49 | ||||
-rw-r--r-- | Documentation/networking/alias.txt | 40 | ||||
-rw-r--r-- | Documentation/networking/bridge.rst (renamed from Documentation/networking/bridge.txt) | 6 | ||||
-rw-r--r-- | Documentation/networking/can_ucan_protocol.rst | 332 | ||||
-rw-r--r-- | Documentation/networking/index.rst | 6 | ||||
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 34 | ||||
-rw-r--r-- | Documentation/networking/net_failover.rst | 111 | ||||
-rw-r--r-- | Documentation/networking/netdev-FAQ.rst | 259 | ||||
-rw-r--r-- | Documentation/networking/netdev-FAQ.txt | 244 | ||||
-rw-r--r-- | Documentation/networking/scaling.txt | 61 | ||||
-rw-r--r-- | Documentation/networking/ti-cpsw.txt | 540 |
12 files changed, 1328 insertions, 358 deletions
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 2b89d91b376f..02a323c43261 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -18,8 +18,6 @@ README.ipw2200 - README for the Intel PRO/Wireless 2915ABG and 2200BG driver. README.sb1000 - info on General Instrument/NextLevel SURFboard1000 cable modem. -alias.txt - - info on using alias network devices. altera_tse.txt - Altera Triple-Speed Ethernet controller. arcnet-hardware.txt @@ -140,8 +138,6 @@ multiqueue.txt - HOWTO for multiqueue network device support. netconsole.txt - The network console module netconsole.ko: configuration and notes. -netdev-FAQ.txt - - FAQ describing how to submit net changes to netdev mailing list. netdev-features.txt - Network interface features API description. netdevices.txt diff --git a/Documentation/networking/alias.rst b/Documentation/networking/alias.rst new file mode 100644 index 000000000000..af7c5ee92014 --- /dev/null +++ b/Documentation/networking/alias.rst @@ -0,0 +1,49 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========== +IP-Aliasing +=========== + +IP-aliases are an obsolete way to manage multiple IP-addresses/masks +per interface. Newer tools such as iproute2 support multiple +address/prefixes per interface, but aliases are still supported +for backwards compatibility. + +An alias is formed by adding a colon and a string when running ifconfig. +This string is usually numeric, but this is not a must. + + +Alias creation +============== + +Alias creation is done by 'magic' interface naming: eg. to create a +200.1.1.1 alias for eth0 ... +:: + + # ifconfig eth0:0 200.1.1.1 etc,etc.... + ~~ -> request alias #0 creation (if not yet exists) for eth0 + +The corresponding route is also set up by this command. Please note: +The route always points to the base interface. + + +Alias deletion +============== + +The alias is removed by shutting the alias down:: + + # ifconfig eth0:0 down + ~~~~~~~~~~ -> will delete alias + + +Alias (re-)configuring +====================== + +Aliases are not real devices, but programs should be able to configure +and refer to them as usual (ifconfig, route, etc). + + +Relationship with main device +============================= + +If the base device is shut down the added aliases will be deleted too. diff --git a/Documentation/networking/alias.txt b/Documentation/networking/alias.txt deleted file mode 100644 index 85046f53fcfc..000000000000 --- a/Documentation/networking/alias.txt +++ /dev/null @@ -1,40 +0,0 @@ - -IP-Aliasing: -============ - -IP-aliases are an obsolete way to manage multiple IP-addresses/masks -per interface. Newer tools such as iproute2 support multiple -address/prefixes per interface, but aliases are still supported -for backwards compatibility. - -An alias is formed by adding a colon and a string when running ifconfig. -This string is usually numeric, but this is not a must. - -o Alias creation. - Alias creation is done by 'magic' interface naming: eg. to create a - 200.1.1.1 alias for eth0 ... - - # ifconfig eth0:0 200.1.1.1 etc,etc.... - ~~ -> request alias #0 creation (if not yet exists) for eth0 - - The corresponding route is also set up by this command. - Please note: The route always points to the base interface. - - -o Alias deletion. - The alias is removed by shutting the alias down: - - # ifconfig eth0:0 down - ~~~~~~~~~~ -> will delete alias - - -o Alias (re-)configuring - - Aliases are not real devices, but programs should be able to configure and - refer to them as usual (ifconfig, route, etc). - - -o Relationship with main device - - If the base device is shut down the added aliases will be deleted - too. diff --git a/Documentation/networking/bridge.txt b/Documentation/networking/bridge.rst index a27cb6214ed7..4aef9cddde2f 100644 --- a/Documentation/networking/bridge.txt +++ b/Documentation/networking/bridge.rst @@ -1,3 +1,9 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================= +Ethernet Bridging +================= + In order to use the Ethernet bridging functionality, you'll need the userspace tools. diff --git a/Documentation/networking/can_ucan_protocol.rst b/Documentation/networking/can_ucan_protocol.rst new file mode 100644 index 000000000000..4cef88d24fc7 --- /dev/null +++ b/Documentation/networking/can_ucan_protocol.rst @@ -0,0 +1,332 @@ +================= +The UCAN Protocol +================= + +UCAN is the protocol used by the microcontroller-based USB-CAN +adapter that is integrated on System-on-Modules from Theobroma Systems +and that is also available as a standalone USB stick. + +The UCAN protocol has been designed to be hardware-independent. +It is modeled closely after how Linux represents CAN devices +internally. All multi-byte integers are encoded as Little Endian. + +All structures mentioned in this document are defined in +``drivers/net/can/usb/ucan.c``. + +USB Endpoints +============= + +UCAN devices use three USB endpoints: + +CONTROL endpoint + The driver sends device management commands on this endpoint + +IN endpoint + The device sends CAN data frames and CAN error frames + +OUT endpoint + The driver sends CAN data frames on the out endpoint + + +CONTROL Messages +================ + +UCAN devices are configured using vendor requests on the control pipe. + +To support multiple CAN interfaces in a single USB device all +configuration commands target the corresponding interface in the USB +descriptor. + +The driver uses ``ucan_ctrl_command_in/out`` and +``ucan_device_request_in`` to deliver commands to the device. + +Setup Packet +------------ + +================= ===================================================== +``bmRequestType`` Direction | Vendor | (Interface or Device) +``bRequest`` Command Number +``wValue`` Subcommand Number (16 Bit) or 0 if not used +``wIndex`` USB Interface Index (0 for device commands) +``wLength`` * Host to Device - Number of bytes to transmit + * Device to Host - Maximum Number of bytes to + receive. If the device send less. Commom ZLP + semantics are used. +================= ===================================================== + +Error Handling +-------------- + +The device indicates failed control commands by stalling the +pipe. + +Device Commands +--------------- + +UCAN_DEVICE_GET_FW_STRING +~~~~~~~~~~~~~~~~~~~~~~~~~ + +*Dev2Host; optional* + +Request the device firmware string. + + +Interface Commands +------------------ + +UCAN_COMMAND_START +~~~~~~~~~~~~~~~~~~ + +*Host2Dev; mandatory* + +Bring the CAN interface up. + +Payload Format + ``ucan_ctl_payload_t.cmd_start`` + +==== ============================ +mode or mask of ``UCAN_MODE_*`` +==== ============================ + +UCAN_COMMAND_STOP +~~~~~~~~~~~~~~~~~~ + +*Host2Dev; mandatory* + +Stop the CAN interface + +Payload Format + *empty* + +UCAN_COMMAND_RESET +~~~~~~~~~~~~~~~~~~ + +*Host2Dev; mandatory* + +Reset the CAN controller (including error counters) + +Payload Format + *empty* + +UCAN_COMMAND_GET +~~~~~~~~~~~~~~~~ + +*Host2Dev; mandatory* + +Get Information from the Device + +Subcommands +^^^^^^^^^^^ + +UCAN_COMMAND_GET_INFO + Request the device information structure ``ucan_ctl_payload_t.device_info``. + + See the ``device_info`` field for details, and + ``uapi/linux/can/netlink.h`` for an explanation of the + ``can_bittiming fields``. + + Payload Format + ``ucan_ctl_payload_t.device_info`` + +UCAN_COMMAND_GET_PROTOCOL_VERSION + + Request the device protocol version + ``ucan_ctl_payload_t.protocol_version``. The current protocol version is 3. + + Payload Format + ``ucan_ctl_payload_t.protocol_version`` + +.. note:: Devices that do not implement this command use the old + protocol version 1 + +UCAN_COMMAND_SET_BITTIMING +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*Host2Dev; mandatory* + +Setup bittiming by sending the the structure +``ucan_ctl_payload_t.cmd_set_bittiming`` (see ``struct bittiming`` for +details) + +Payload Format + ``ucan_ctl_payload_t.cmd_set_bittiming``. + +UCAN_SLEEP/WAKE +~~~~~~~~~~~~~~~ + +*Host2Dev; optional* + +Configure sleep and wake modes. Not yet supported by the driver. + +UCAN_FILTER +~~~~~~~~~~~ + +*Host2Dev; optional* + +Setup hardware CAN filters. Not yet supported by the driver. + +Allowed interface commands +-------------------------- + +================== =================== ================== +Legal Device State Command New Device State +================== =================== ================== +stopped SET_BITTIMING stopped +stopped START started +started STOP or RESET stopped +stopped STOP or RESET stopped +started RESTART started +any GET *no change* +================== =================== ================== + +IN Message Format +================= + +A data packet on the USB IN endpoint contains one or more +``ucan_message_in`` values. If multiple messages are batched in a USB +data packet, the ``len`` field can be used to jump to the next +``ucan_message_in`` value (take care to sanity-check the ``len`` value +against the actual data size). + +.. _can_ucan_in_message_len: + +``len`` field +------------- + +Each ``ucan_message_in`` must be aligned to a 4-byte boundary (relative +to the start of the start of the data buffer). That means that there +may be padding bytes between multiple ``ucan_message_in`` values: + +.. code:: + + +----------------------------+ < 0 + | | + | struct ucan_message_in | + | | + +----------------------------+ < len + [padding] + +----------------------------+ < round_up(len, 4) + | | + | struct ucan_message_in | + | | + +----------------------------+ + [...] + +``type`` field +-------------- + +The ``type`` field specifies the type of the message. + +UCAN_IN_RX +~~~~~~~~~~ + +``subtype`` + zero + +Data received from the CAN bus (ID + payload). + +UCAN_IN_TX_COMPLETE +~~~~~~~~~~~~~~~~~~~ + +``subtype`` + zero + +The CAN device has sent a message to the CAN bus. It answers with a +list of of tuples <echo-ids, flags>. + +The echo-id identifies the frame from (echos the id from a previous +UCAN_OUT_TX message). The flag indicates the result of the +transmission. Whereas a set Bit 0 indicates success. All other bits +are reserved and set to zero. + +Flow Control +------------ + +When receiving CAN messages there is no flow control on the USB +buffer. The driver has to handle inbound message quickly enough to +avoid drops. I case the device buffer overflow the condition is +reported by sending corresponding error frames (see +:ref:`can_ucan_error_handling`) + + +OUT Message Format +================== + +A data packet on the USB OUT endpoint contains one or more ``struct +ucan_message_out`` values. If multiple messages are batched into one +data packet, the device uses the ``len`` field to jump to the next +ucan_message_out value. Each ucan_message_out must be aligned to 4 +bytes (relative to the start of the data buffer). The mechanism is +same as described in :ref:`can_ucan_in_message_len`. + +.. code:: + + +----------------------------+ < 0 + | | + | struct ucan_message_out | + | | + +----------------------------+ < len + [padding] + +----------------------------+ < round_up(len, 4) + | | + | struct ucan_message_out | + | | + +----------------------------+ + [...] + +``type`` field +-------------- + +In protocol version 3 only ``UCAN_OUT_TX`` is defined, others are used +only by legacy devices (protocol version 1). + +UCAN_OUT_TX +~~~~~~~~~~~ +``subtype`` + echo id to be replied within a CAN_IN_TX_COMPLETE message + +Transmit a CAN frame. (parameters: ``id``, ``data``) + +Flow Control +------------ + +When the device outbound buffers are full it starts sending *NAKs* on +the *OUT* pipe until more buffers are available. The driver stops the +queue when a certain threshold of out packets are incomplete. + +.. _can_ucan_error_handling: + +CAN Error Handling +================== + +If error reporting is turned on the device encodes errors into CAN +error frames (see ``uapi/linux/can/error.h``) and sends it using the +IN endpoint. The driver updates its error statistics and forwards +it. + +Although UCAN devices can suppress error frames completely, in Linux +the driver is always interested. Hence, the device is always started with +the ``UCAN_MODE_BERR_REPORT`` set. Filtering those messages for the +user space is done by the driver. + +Bus OFF +------- + +- The device does not recover from bus of automatically. +- Bus OFF is indicated by an error frame (see ``uapi/linux/can/error.h``) +- Bus OFF recovery is started by ``UCAN_COMMAND_RESTART`` +- Once Bus OFF recover is completed the device sends an error frame + indicating that it is on ERROR-ACTIVE state. +- During Bus OFF no frames are sent by the device. +- During Bus OFF transmission requests from the host are completed + immediately with the success bit left unset. + +Example Conversation +==================== + +#) Device is connected to USB +#) Host sends command ``UCAN_COMMAND_RESET``, subcmd 0 +#) Host sends command ``UCAN_COMMAND_GET``, subcmd ``UCAN_COMMAND_GET_INFO`` +#) Device sends ``UCAN_IN_DEVICE_INFO`` +#) Host sends command ``UCAN_OUT_SET_BITTIMING`` +#) Host sends command ``UCAN_COMMAND_START``, subcmd 0, mode ``UCAN_MODE_BERR_REPORT`` diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index fec8588a588e..fcd710f2cc7a 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -6,15 +6,21 @@ Contents: .. toctree:: :maxdepth: 2 + netdev-FAQ af_xdp batman-adv can + can_ucan_protocol dpaa2/index e100 e1000 kapi z8530book msg_zerocopy + failover + net_failover + alias + bridge .. only:: subproject diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ce8fbf5aa63c..8313a636dd53 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -81,6 +81,15 @@ fib_multipath_hash_policy - INTEGER 0 - Layer 3 1 - Layer 4 +ip_forward_update_priority - INTEGER + Whether to update SKB priority from "TOS" field in IPv4 header after it + is forwarded. The new SKB priority is mapped from TOS field value + according to an rt_tos2priority table (see e.g. man tc-prio). + Default: 1 (Update priority.) + Possible values: + 0 - Do not update priority. + 1 - Update priority. + route/max_size - INTEGER Maximum number of routes allowed in the kernel. Increase this when using large numbers of interfaces and/or routes. @@ -733,11 +742,11 @@ tcp_limit_output_bytes - INTEGER Controls TCP Small Queue limit per tcp socket. TCP bulk sender tends to increase packets in flight until it gets losses notifications. With SNDBUF autotuning, this can - result in a large amount of packets queued in qdisc/device - on the local machine, hurting latency of other flows, for - typical pfifo_fast qdiscs. - tcp_limit_output_bytes limits the number of bytes on qdisc - or device to reduce artificial RTT/cwnd and reduce bufferbloat. + result in a large amount of packets queued on the local machine + (e.g.: qdiscs, CPU backlog, or device) hurting latency of other + flows, for typical pfifo_fast qdiscs. tcp_limit_output_bytes + limits the number of bytes on qdisc or device to reduce artificial + RTT/cwnd and reduce bufferbloat. Default: 262144 tcp_challenge_ack_limit - INTEGER @@ -1834,6 +1843,16 @@ stable_secret - IPv6 address By default the stable secret is unset. +addr_gen_mode - INTEGER + Defines how link-local and autoconf addresses are generated. + + 0: generate address based on EUI64 (default) + 1: do no generate a link-local address, use EUI64 for addresses generated + from autoconf + 2: generate stable privacy addresses, using the secret from + stable_secret (RFC7217) + 3: generate stable privacy addresses, using a random secret if unset + drop_unicast_in_l2_multicast - BOOLEAN Drop any unicast IPv6 packets that are received in link-layer multicast (or broadcast) frames. @@ -1863,6 +1882,11 @@ ratelimit - INTEGER otherwise the minimal space between responses in milliseconds. Default: 1000 +echo_ignore_all - BOOLEAN + If set non-zero, then the kernel will ignore all ICMP ECHO + requests sent to it over the IPv6 protocol. + Default: 0 + xfrm6_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv6 destination cache entries. At twice this value the system will diff --git a/Documentation/networking/net_failover.rst b/Documentation/networking/net_failover.rst index 70ca2f5800c4..06c97dcb57ca 100644 --- a/Documentation/networking/net_failover.rst +++ b/Documentation/networking/net_failover.rst @@ -36,37 +36,39 @@ feature on the virtio-net interface and assign the same MAC address to both virtio-net and VF interfaces. Here is an example XML snippet that shows such configuration. - - <interface type='network'> - <mac address='52:54:00:00:12:53'/> - <source network='enp66s0f0_br'/> - <target dev='tap01'/> - <model type='virtio'/> - <driver name='vhost' queues='4'/> - <link state='down'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0a' function='0x0'/> - </interface> - <interface type='hostdev' managed='yes'> - <mac address='52:54:00:00:12:53'/> - <source> - <address type='pci' domain='0x0000' bus='0x42' slot='0x02' function='0x5'/> - </source> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' function='0x0'/> - </interface> +:: + + <interface type='network'> + <mac address='52:54:00:00:12:53'/> + <source network='enp66s0f0_br'/> + <target dev='tap01'/> + <model type='virtio'/> + <driver name='vhost' queues='4'/> + <link state='down'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0a' function='0x0'/> + </interface> + <interface type='hostdev' managed='yes'> + <mac address='52:54:00:00:12:53'/> + <source> + <address type='pci' domain='0x0000' bus='0x42' slot='0x02' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' function='0x0'/> + </interface> Booting a VM with the above configuration will result in the following 3 netdevs created in the VM. - -4: ens10: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000 - link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff - inet 192.168.12.53/24 brd 192.168.12.255 scope global dynamic ens10 - valid_lft 42482sec preferred_lft 42482sec - inet6 fe80::97d8:db2:8c10:b6d6/64 scope link - valid_lft forever preferred_lft forever -5: ens10nsby: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel master ens10 state UP group default qlen 1000 - link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff -7: ens11: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq master ens10 state UP group default qlen 1000 - link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff +:: + + 4: ens10: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000 + link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff + inet 192.168.12.53/24 brd 192.168.12.255 scope global dynamic ens10 + valid_lft 42482sec preferred_lft 42482sec + inet6 fe80::97d8:db2:8c10:b6d6/64 scope link + valid_lft forever preferred_lft forever + 5: ens10nsby: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel master ens10 state UP group default qlen 1000 + link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff + 7: ens11: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq master ens10 state UP group default qlen 1000 + link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff ens10 is the 'failover' master netdev, ens10nsby and ens11 are the slave 'standby' and 'primary' netdevs respectively. @@ -80,37 +82,38 @@ the paravirtual datapath when the VF is unplugged. Here is a sample script that shows the steps to initiate live migration on the source hypervisor. +:: -# cat vf_xml -<interface type='hostdev' managed='yes'> - <mac address='52:54:00:00:12:53'/> - <source> - <address type='pci' domain='0x0000' bus='0x42' slot='0x02' function='0x5'/> - </source> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' function='0x0'/> -</interface> + # cat vf_xml + <interface type='hostdev' managed='yes'> + <mac address='52:54:00:00:12:53'/> + <source> + <address type='pci' domain='0x0000' bus='0x42' slot='0x02' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' function='0x0'/> + </interface> -# Source Hypervisor -#!/bin/bash + # Source Hypervisor + #!/bin/bash -DOMAIN=fedora27-tap01 -PF=enp66s0f0 -VF_NUM=5 -TAP_IF=tap01 -VF_XML= + DOMAIN=fedora27-tap01 + PF=enp66s0f0 + VF_NUM=5 + TAP_IF=tap01 + VF_XML= -MAC=52:54:00:00:12:53 -ZERO_MAC=00:00:00:00:00:00 + MAC=52:54:00:00:12:53 + ZERO_MAC=00:00:00:00:00:00 -virsh domif-setlink $DOMAIN $TAP_IF up -bridge fdb del $MAC dev $PF master -virsh detach-device $DOMAIN $VF_XML -ip link set $PF vf $VF_NUM mac $ZERO_MAC + virsh domif-setlink $DOMAIN $TAP_IF up + bridge fdb del $MAC dev $PF master + virsh detach-device $DOMAIN $VF_XML + ip link set $PF vf $VF_NUM mac $ZERO_MAC -virsh migrate --live $DOMAIN qemu+ssh://$REMOTE_HOST/system + virsh migrate --live $DOMAIN qemu+ssh://$REMOTE_HOST/system -# Destination Hypervisor -#!/bin/bash + # Destination Hypervisor + #!/bin/bash -virsh attach-device $DOMAIN $VF_XML -virsh domif-setlink $DOMAIN $TAP_IF down + virsh attach-device $DOMAIN $VF_XML + virsh domif-setlink $DOMAIN $TAP_IF down diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst new file mode 100644 index 000000000000..0ac5fa77f501 --- /dev/null +++ b/Documentation/networking/netdev-FAQ.rst @@ -0,0 +1,259 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _netdev-FAQ: + +========== +netdev FAQ +========== + +Q: What is netdev? +------------------ +A: It is a mailing list for all network-related Linux stuff. This +includes anything found under net/ (i.e. core code like IPv6) and +drivers/net (i.e. hardware specific drivers) in the Linux source tree. + +Note that some subsystems (e.g. wireless drivers) which have a high +volume of traffic have their own specific mailing lists. + +The netdev list is managed (like many other Linux mailing lists) through +VGER (http://vger.kernel.org/) and archives can be found below: + +- http://marc.info/?l=linux-netdev +- http://www.spinics.net/lists/netdev/ + +Aside from subsystems like that mentioned above, all network-related +Linux development (i.e. RFC, review, comments, etc.) takes place on +netdev. + +Q: How do the changes posted to netdev make their way into Linux? +----------------------------------------------------------------- +A: There are always two trees (git repositories) in play. Both are +driven by David Miller, the main network maintainer. There is the +``net`` tree, and the ``net-next`` tree. As you can probably guess from +the names, the ``net`` tree is for fixes to existing code already in the +mainline tree from Linus, and ``net-next`` is where the new code goes +for the future release. You can find the trees here: + +- https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git +- https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git + +Q: How often do changes from these trees make it to the mainline Linus tree? +---------------------------------------------------------------------------- +A: To understand this, you need to know a bit of background information on +the cadence of Linux development. Each new release starts off with a +two week "merge window" where the main maintainers feed their new stuff +to Linus for merging into the mainline tree. After the two weeks, the +merge window is closed, and it is called/tagged ``-rc1``. No new +features get mainlined after this -- only fixes to the rc1 content are +expected. After roughly a week of collecting fixes to the rc1 content, +rc2 is released. This repeats on a roughly weekly basis until rc7 +(typically; sometimes rc6 if things are quiet, or rc8 if things are in a +state of churn), and a week after the last vX.Y-rcN was done, the +official vX.Y is released. + +Relating that to netdev: At the beginning of the 2-week merge window, +the ``net-next`` tree will be closed - no new changes/features. The +accumulated new content of the past ~10 weeks will be passed onto +mainline/Linus via a pull request for vX.Y -- at the same time, the +``net`` tree will start accumulating fixes for this pulled content +relating to vX.Y + +An announcement indicating when ``net-next`` has been closed is usually +sent to netdev, but knowing the above, you can predict that in advance. + +IMPORTANT: Do not send new ``net-next`` content to netdev during the +period during which ``net-next`` tree is closed. + +Shortly after the two weeks have passed (and vX.Y-rc1 is released), the +tree for ``net-next`` reopens to collect content for the next (vX.Y+1) +release. + +If you aren't subscribed to netdev and/or are simply unsure if +``net-next`` has re-opened yet, simply check the ``net-next`` git +repository link above for any new networking-related commits. You may +also check the following website for the current status: + + http://vger.kernel.org/~davem/net-next.html + +The ``net`` tree continues to collect fixes for the vX.Y content, and is +fed back to Linus at regular (~weekly) intervals. Meaning that the +focus for ``net`` is on stabilization and bug fixes. + +Finally, the vX.Y gets released, and the whole cycle starts over. + +Q: So where are we now in this cycle? + +Load the mainline (Linus) page here: + + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + +and note the top of the "tags" section. If it is rc1, it is early in +the dev cycle. If it was tagged rc7 a week ago, then a release is +probably imminent. + +Q: How do I indicate which tree (net vs. net-next) my patch should be in? +------------------------------------------------------------------------- +A: Firstly, think whether you have a bug fix or new "next-like" content. +Then once decided, assuming that you use git, use the prefix flag, i.e. +:: + + git format-patch --subject-prefix='PATCH net-next' start..finish + +Use ``net`` instead of ``net-next`` (always lower case) in the above for +bug-fix ``net`` content. If you don't use git, then note the only magic +in the above is just the subject text of the outgoing e-mail, and you +can manually change it yourself with whatever MUA you are comfortable +with. + +Q: I sent a patch and I'm wondering what happened to it? +-------------------------------------------------------- +Q: How can I tell whether it got merged? +A: Start by looking at the main patchworks queue for netdev: + + http://patchwork.ozlabs.org/project/netdev/list/ + +The "State" field will tell you exactly where things are at with your +patch. + +Q: The above only says "Under Review". How can I find out more? +---------------------------------------------------------------- +A: Generally speaking, the patches get triaged quickly (in less than +48h). So be patient. Asking the maintainer for status updates on your +patch is a good way to ensure your patch is ignored or pushed to the +bottom of the priority list. + +Q: I submitted multiple versions of the patch series +---------------------------------------------------- +Q: should I directly update patchwork for the previous versions of these +patch series? +A: No, please don't interfere with the patch status on patchwork, leave +it to the maintainer to figure out what is the most recent and current +version that should be applied. If there is any doubt, the maintainer +will reply and ask what should be done. + +Q: How can I tell what patches are queued up for backporting to the various stable releases? +-------------------------------------------------------------------------------------------- +A: Normally Greg Kroah-Hartman collects stable commits himself, but for +networking, Dave collects up patches he deems critical for the +networking subsystem, and then hands them off to Greg. + +There is a patchworks queue that you can see here: + + http://patchwork.ozlabs.org/bundle/davem/stable/?state=* + +It contains the patches which Dave has selected, but not yet handed off +to Greg. If Greg already has the patch, then it will be here: + + https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git + +A quick way to find whether the patch is in this stable-queue is to +simply clone the repo, and then git grep the mainline commit ID, e.g. +:: + + stable-queue$ git grep -l 284041ef21fdf2e + releases/3.0.84/ipv6-fix-possible-crashes-in-ip6_cork_release.patch + releases/3.4.51/ipv6-fix-possible-crashes-in-ip6_cork_release.patch + releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch + stable/stable-queue$ + +Q: I see a network patch and I think it should be backported to stable. +----------------------------------------------------------------------- +Q: Should I request it via stable@vger.kernel.org like the references in +the kernel's Documentation/process/stable-kernel-rules.rst file say? +A: No, not for networking. Check the stable queues as per above first +to see if it is already queued. If not, then send a mail to netdev, +listing the upstream commit ID and why you think it should be a stable +candidate. + +Before you jump to go do the above, do note that the normal stable rules +in :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>` +still apply. So you need to explicitly indicate why it is a critical +fix and exactly what users are impacted. In addition, you need to +convince yourself that you *really* think it has been overlooked, +vs. having been considered and rejected. + +Generally speaking, the longer it has had a chance to "soak" in +mainline, the better the odds that it is an OK candidate for stable. So +scrambling to request a commit be added the day after it appears should +be avoided. + +Q: I have created a network patch and I think it should be backported to stable. +-------------------------------------------------------------------------------- +Q: Should I add a Cc: stable@vger.kernel.org like the references in the +kernel's Documentation/ directory say? +A: No. See above answer. In short, if you think it really belongs in +stable, then ensure you write a decent commit log that describes who +gets impacted by the bug fix and how it manifests itself, and when the +bug was introduced. If you do that properly, then the commit will get +handled appropriately and most likely get put in the patchworks stable +queue if it really warrants it. + +If you think there is some valid information relating to it being in +stable that does *not* belong in the commit log, then use the three dash +marker line as described in +:ref:`Documentation/process/submitting-patches.rst <the_canonical_patch_format>` +to temporarily embed that information into the patch that you send. + +Q: Are all networking bug fixes backported to all stable releases? +------------------------------------------------------------------ +A: Due to capacity, Dave could only take care of the backports for the +last two stable releases. For earlier stable releases, each stable +branch maintainer is supposed to take care of them. If you find any +patch is missing from an earlier stable branch, please notify +stable@vger.kernel.org with either a commit ID or a formal patch +backported, and CC Dave and other relevant networking developers. + +Q: Is the comment style convention different for the networking content? +------------------------------------------------------------------------ +A: Yes, in a largely trivial way. Instead of this:: + + /* + * foobar blah blah blah + * another line of text + */ + +it is requested that you make it look like this:: + + /* foobar blah blah blah + * another line of text + */ + +Q: I am working in existing code that has the former comment style and not the latter. +-------------------------------------------------------------------------------------- +Q: Should I submit new code in the former style or the latter? +A: Make it the latter style, so that eventually all code in the domain +of netdev is of this format. + +Q: I found a bug that might have possible security implications or similar. +--------------------------------------------------------------------------- +Q: Should I mail the main netdev maintainer off-list?** +A: No. The current netdev maintainer has consistently requested that +people use the mailing lists and not reach out directly. If you aren't +OK with that, then perhaps consider mailing security@kernel.org or +reading about http://oss-security.openwall.org/wiki/mailing-lists/distros +as possible alternative mechanisms. + +Q: What level of testing is expected before I submit my change? +--------------------------------------------------------------- +A: If your changes are against ``net-next``, the expectation is that you +have tested by layering your changes on top of ``net-next``. Ideally +you will have done run-time testing specific to your change, but at a +minimum, your changes should survive an ``allyesconfig`` and an +``allmodconfig`` build without new warnings or failures. + +Q: Any other tips to help ensure my net/net-next patch gets OK'd? +----------------------------------------------------------------- +A: Attention to detail. Re-read your own work as if you were the +reviewer. You can start with using ``checkpatch.pl``, perhaps even with +the ``--strict`` flag. But do not be mindlessly robotic in doing so. +If your change is a bug fix, make sure your commit log indicates the +end-user visible symptom, the underlying reason as to why it happens, +and then if necessary, explain why the fix proposed is the best way to +get things done. Don't mangle whitespace, and as is common, don't +mis-indent function arguments that span multiple lines. If it is your +first patch, mail it to yourself so you can test apply it to an +unpatched tree to confirm infrastructure didn't mangle it. + +Finally, go back and read +:ref:`Documentation/process/submitting-patches.rst <submittingpatches>` +to be sure you are not repeating some common mistake documented there. diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt deleted file mode 100644 index fa951b820b25..000000000000 --- a/Documentation/networking/netdev-FAQ.txt +++ /dev/null @@ -1,244 +0,0 @@ - -Information you need to know about netdev ------------------------------------------ - -Q: What is netdev? - -A: It is a mailing list for all network-related Linux stuff. This includes - anything found under net/ (i.e. core code like IPv6) and drivers/net - (i.e. hardware specific drivers) in the Linux source tree. - - Note that some subsystems (e.g. wireless drivers) which have a high volume - of traffic have their own specific mailing lists. - - The netdev list is managed (like many other Linux mailing lists) through - VGER ( http://vger.kernel.org/ ) and archives can be found below: - - http://marc.info/?l=linux-netdev - http://www.spinics.net/lists/netdev/ - - Aside from subsystems like that mentioned above, all network-related Linux - development (i.e. RFC, review, comments, etc.) takes place on netdev. - -Q: How do the changes posted to netdev make their way into Linux? - -A: There are always two trees (git repositories) in play. Both are driven - by David Miller, the main network maintainer. There is the "net" tree, - and the "net-next" tree. As you can probably guess from the names, the - net tree is for fixes to existing code already in the mainline tree from - Linus, and net-next is where the new code goes for the future release. - You can find the trees here: - - https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git - https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git - -Q: How often do changes from these trees make it to the mainline Linus tree? - -A: To understand this, you need to know a bit of background information - on the cadence of Linux development. Each new release starts off with - a two week "merge window" where the main maintainers feed their new - stuff to Linus for merging into the mainline tree. After the two weeks, - the merge window is closed, and it is called/tagged "-rc1". No new - features get mainlined after this -- only fixes to the rc1 content - are expected. After roughly a week of collecting fixes to the rc1 - content, rc2 is released. This repeats on a roughly weekly basis - until rc7 (typically; sometimes rc6 if things are quiet, or rc8 if - things are in a state of churn), and a week after the last vX.Y-rcN - was done, the official "vX.Y" is released. - - Relating that to netdev: At the beginning of the 2-week merge window, - the net-next tree will be closed - no new changes/features. The - accumulated new content of the past ~10 weeks will be passed onto - mainline/Linus via a pull request for vX.Y -- at the same time, - the "net" tree will start accumulating fixes for this pulled content - relating to vX.Y - - An announcement indicating when net-next has been closed is usually - sent to netdev, but knowing the above, you can predict that in advance. - - IMPORTANT: Do not send new net-next content to netdev during the - period during which net-next tree is closed. - - Shortly after the two weeks have passed (and vX.Y-rc1 is released), the - tree for net-next reopens to collect content for the next (vX.Y+1) release. - - If you aren't subscribed to netdev and/or are simply unsure if net-next - has re-opened yet, simply check the net-next git repository link above for - any new networking-related commits. You may also check the following - website for the current status: - - http://vger.kernel.org/~davem/net-next.html - - The "net" tree continues to collect fixes for the vX.Y content, and - is fed back to Linus at regular (~weekly) intervals. Meaning that the - focus for "net" is on stabilization and bugfixes. - - Finally, the vX.Y gets released, and the whole cycle starts over. - -Q: So where are we now in this cycle? - -A: Load the mainline (Linus) page here: - - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git - - and note the top of the "tags" section. If it is rc1, it is early - in the dev cycle. If it was tagged rc7 a week ago, then a release - is probably imminent. - -Q: How do I indicate which tree (net vs. net-next) my patch should be in? - -A: Firstly, think whether you have a bug fix or new "next-like" content. - Then once decided, assuming that you use git, use the prefix flag, i.e. - - git format-patch --subject-prefix='PATCH net-next' start..finish - - Use "net" instead of "net-next" (always lower case) in the above for - bug-fix net content. If you don't use git, then note the only magic in - the above is just the subject text of the outgoing e-mail, and you can - manually change it yourself with whatever MUA you are comfortable with. - -Q: I sent a patch and I'm wondering what happened to it. How can I tell - whether it got merged? - -A: Start by looking at the main patchworks queue for netdev: - - http://patchwork.ozlabs.org/project/netdev/list/ - - The "State" field will tell you exactly where things are at with - your patch. - -Q: The above only says "Under Review". How can I find out more? - -A: Generally speaking, the patches get triaged quickly (in less than 48h). - So be patient. Asking the maintainer for status updates on your - patch is a good way to ensure your patch is ignored or pushed to - the bottom of the priority list. - -Q: I submitted multiple versions of the patch series, should I directly update - patchwork for the previous versions of these patch series? - -A: No, please don't interfere with the patch status on patchwork, leave it to - the maintainer to figure out what is the most recent and current version that - should be applied. If there is any doubt, the maintainer will reply and ask - what should be done. - -Q: How can I tell what patches are queued up for backporting to the - various stable releases? - -A: Normally Greg Kroah-Hartman collects stable commits himself, but - for networking, Dave collects up patches he deems critical for the - networking subsystem, and then hands them off to Greg. - - There is a patchworks queue that you can see here: - http://patchwork.ozlabs.org/bundle/davem/stable/?state=* - - It contains the patches which Dave has selected, but not yet handed - off to Greg. If Greg already has the patch, then it will be here: - https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git - - A quick way to find whether the patch is in this stable-queue is - to simply clone the repo, and then git grep the mainline commit ID, e.g. - - stable-queue$ git grep -l 284041ef21fdf2e - releases/3.0.84/ipv6-fix-possible-crashes-in-ip6_cork_release.patch - releases/3.4.51/ipv6-fix-possible-crashes-in-ip6_cork_release.patch - releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch - stable/stable-queue$ - -Q: I see a network patch and I think it should be backported to stable. - Should I request it via "stable@vger.kernel.org" like the references in - the kernel's Documentation/process/stable-kernel-rules.rst file say? - -A: No, not for networking. Check the stable queues as per above 1st to see - if it is already queued. If not, then send a mail to netdev, listing - the upstream commit ID and why you think it should be a stable candidate. - - Before you jump to go do the above, do note that the normal stable rules - in Documentation/process/stable-kernel-rules.rst still apply. So you need to - explicitly indicate why it is a critical fix and exactly what users are - impacted. In addition, you need to convince yourself that you _really_ - think it has been overlooked, vs. having been considered and rejected. - - Generally speaking, the longer it has had a chance to "soak" in mainline, - the better the odds that it is an OK candidate for stable. So scrambling - to request a commit be added the day after it appears should be avoided. - -Q: I have created a network patch and I think it should be backported to - stable. Should I add a "Cc: stable@vger.kernel.org" like the references - in the kernel's Documentation/ directory say? - -A: No. See above answer. In short, if you think it really belongs in - stable, then ensure you write a decent commit log that describes who - gets impacted by the bugfix and how it manifests itself, and when the - bug was introduced. If you do that properly, then the commit will - get handled appropriately and most likely get put in the patchworks - stable queue if it really warrants it. - - If you think there is some valid information relating to it being in - stable that does _not_ belong in the commit log, then use the three - dash marker line as described in Documentation/process/submitting-patches.rst to - temporarily embed that information into the patch that you send. - -Q: Are all networking bug fixes backported to all stable releases? - -A: Due to capacity, Dave could only take care of the backports for the last - 2 stable releases. For earlier stable releases, each stable branch maintainer - is supposed to take care of them. If you find any patch is missing from an - earlier stable branch, please notify stable@vger.kernel.org with either a - commit ID or a formal patch backported, and CC Dave and other relevant - networking developers. - -Q: Someone said that the comment style and coding convention is different - for the networking content. Is this true? - -A: Yes, in a largely trivial way. Instead of this: - - /* - * foobar blah blah blah - * another line of text - */ - - it is requested that you make it look like this: - - /* foobar blah blah blah - * another line of text - */ - -Q: I am working in existing code that has the former comment style and not the - latter. Should I submit new code in the former style or the latter? - -A: Make it the latter style, so that eventually all code in the domain of - netdev is of this format. - -Q: I found a bug that might have possible security implications or similar. - Should I mail the main netdev maintainer off-list? - -A: No. The current netdev maintainer has consistently requested that people - use the mailing lists and not reach out directly. If you aren't OK with - that, then perhaps consider mailing "security@kernel.org" or reading about - http://oss-security.openwall.org/wiki/mailing-lists/distros - as possible alternative mechanisms. - -Q: What level of testing is expected before I submit my change? - -A: If your changes are against net-next, the expectation is that you - have tested by layering your changes on top of net-next. Ideally you - will have done run-time testing specific to your change, but at a - minimum, your changes should survive an "allyesconfig" and an - "allmodconfig" build without new warnings or failures. - -Q: Any other tips to help ensure my net/net-next patch gets OK'd? - -A: Attention to detail. Re-read your own work as if you were the - reviewer. You can start with using checkpatch.pl, perhaps even - with the "--strict" flag. But do not be mindlessly robotic in - doing so. If your change is a bug-fix, make sure your commit log - indicates the end-user visible symptom, the underlying reason as - to why it happens, and then if necessary, explain why the fix proposed - is the best way to get things done. Don't mangle whitespace, and as - is common, don't mis-indent function arguments that span multiple lines. - If it is your first patch, mail it to yourself so you can test apply - it to an unpatched tree to confirm infrastructure didn't mangle it. - - Finally, go back and read Documentation/process/submitting-patches.rst to be - sure you are not repeating some common mistake documented there. diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt index f55639d71d35..b7056a8a0540 100644 --- a/Documentation/networking/scaling.txt +++ b/Documentation/networking/scaling.txt @@ -366,8 +366,13 @@ XPS: Transmit Packet Steering Transmit Packet Steering is a mechanism for intelligently selecting which transmit queue to use when transmitting a packet on a multi-queue -device. To accomplish this, a mapping from CPU to hardware queue(s) is -recorded. The goal of this mapping is usually to assign queues +device. This can be accomplished by recording two kinds of maps, either +a mapping of CPU to hardware queue(s) or a mapping of receive queue(s) +to hardware transmit queue(s). + +1. XPS using CPUs map + +The goal of this mapping is usually to assign queues exclusively to a subset of CPUs, where the transmit completions for these queues are processed on a CPU within this set. This choice provides two benefits. First, contention on the device queue lock is @@ -377,15 +382,40 @@ transmit queue). Secondly, cache miss rate on transmit completion is reduced, in particular for data cache lines that hold the sk_buff structures. -XPS is configured per transmit queue by setting a bitmap of CPUs that -may use that queue to transmit. The reverse mapping, from CPUs to -transmit queues, is computed and maintained for each network device. -When transmitting the first packet in a flow, the function -get_xps_queue() is called to select a queue. This function uses the ID -of the running CPU as a key into the CPU-to-queue lookup table. If the +2. XPS using receive queues map + +This mapping is used to pick transmit queue based on the receive +queue(s) map configuration set by the administrator. A set of receive +queues can be mapped to a set of transmit queues (many:many), although +the common use case is a 1:1 mapping. This will enable sending packets +on the same queue associations for transmit and receive. This is useful for +busy polling multi-threaded workloads where there are challenges in +associating a given CPU to a given application thread. The application +threads are not pinned to CPUs and each thread handles packets +received on a single queue. The receive queue number is cached in the +socket for the connection. In this model, sending the packets on the same +transmit queue corresponding to the associated receive queue has benefits +in keeping the CPU overhead low. Transmit completion work is locked into +the same queue-association that a given application is polling on. This +avoids the overhead of triggering an interrupt on another CPU. When the +application cleans up the packets during the busy poll, transmit completion +may be processed along with it in the same thread context and so result in +reduced latency. + +XPS is configured per transmit queue by setting a bitmap of +CPUs/receive-queues that may use that queue to transmit. The reverse +mapping, from CPUs to transmit queues or from receive-queues to transmit +queues, is computed and maintained for each network device. When +transmitting the first packet in a flow, the function get_xps_queue() is +called to select a queue. This function uses the ID of the receive queue +for the socket connection for a match in the receive queue-to-transmit queue +lookup table. Alternatively, this function can also use the ID of the +running CPU as a key into the CPU-to-queue lookup table. If the ID matches a single queue, that is used for transmission. If multiple queues match, one is selected by using the flow hash to compute an index -into the set. +into the set. When selecting the transmit queue based on receive queue(s) +map, the transmit device is not validated against the receive device as it +requires expensive lookup operation in the datapath. The queue chosen for transmitting a particular flow is saved in the corresponding socket structure for the flow (e.g. a TCP connection). @@ -404,11 +434,15 @@ acknowledged. XPS is only available if the kconfig symbol CONFIG_XPS is enabled (on by default for SMP). The functionality remains disabled until explicitly -configured. To enable XPS, the bitmap of CPUs that may use a transmit -queue is configured using the sysfs file entry: +configured. To enable XPS, the bitmap of CPUs/receive-queues that may +use a transmit queue is configured using the sysfs file entry: +For selection based on CPUs map: /sys/class/net/<dev>/queues/tx-<n>/xps_cpus +For selection based on receive-queues map: +/sys/class/net/<dev>/queues/tx-<n>/xps_rxqs + == Suggested Configuration For a network device with a single transmission queue, XPS configuration @@ -421,6 +455,11 @@ best CPUs to share a given queue are probably those that share the cache with the CPU that processes transmit completions for that queue (transmit interrupts). +For transmit queue selection based on receive queue(s), XPS has to be +explicitly configured mapping receive-queue(s) to transmit queue(s). If the +user configuration for receive-queue map does not apply, then the transmit +queue is selected based on the CPUs map. + Per TX Queue rate limitation: ============================= diff --git a/Documentation/networking/ti-cpsw.txt b/Documentation/networking/ti-cpsw.txt new file mode 100644 index 000000000000..67039205bd69 --- /dev/null +++ b/Documentation/networking/ti-cpsw.txt @@ -0,0 +1,540 @@ +* Texas Instruments CPSW ethernet driver + +Multiqueue & CBS & MQPRIO +===================================================================== +===================================================================== + +The cpsw has 3 CBS shapers for each external ports. This document +describes MQPRIO and CBS Qdisc offload configuration for cpsw driver +based on examples. It potentially can be used in audio video bridging +(AVB) and time sensitive networking (TSN). + +The following examples were tested on AM572x EVM and BBB boards. + +Test setup +========== + +Under consideration two examples with AM572x EVM running cpsw driver +in dual_emac mode. + +Several prerequisites: +- TX queues must be rated starting from txq0 that has highest priority +- Traffic classes are used starting from 0, that has highest priority +- CBS shapers should be used with rated queues +- The bandwidth for CBS shapers has to be set a little bit more then + potential incoming rate, thus, rate of all incoming tx queues has + to be a little less +- Real rates can differ, due to discreetness +- Map skb-priority to txq is not enough, also skb-priority to l2 prio + map has to be created with ip or vconfig tool +- Any l2/socket prio (0 - 7) for classes can be used, but for + simplicity default values are used: 3 and 2 +- only 2 classes tested: A and B, but checked and can work with more, + maximum allowed 4, but only for 3 rate can be set. + +Test setup for examples +======================= + +-------------------------------+ + |--+ | + | | Workstation0 | + |E | MAC 18:03:73:66:87:42 | ++-----------------------------+ +--|t | | +| | 1 | E | | |h |./tsn_listener -d \ | +| Target board: | 0 | t |--+ |0 | 18:03:73:66:87:42 -i eth0 \| +| AM572x EVM | 0 | h | | | -s 1500 | +| | 0 | 0 | |--+ | +| Only 2 classes: |Mb +---| +-------------------------------+ +| class A, class B | | +| | +---| +-------------------------------+ +| | 1 | E | |--+ | +| | 0 | t | | | Workstation1 | +| | 0 | h |--+ |E | MAC 20:cf:30:85:7d:fd | +| |Mb | 1 | +--|t | | ++-----------------------------+ |h |./tsn_listener -d \ | + |0 | 20:cf:30:85:7d:fd -i eth0 \| + | | -s 1500 | + |--+ | + +-------------------------------+ + +********************************************************************* +********************************************************************* +********************************************************************* +Example 1: One port tx AVB configuration scheme for target board +---------------------------------------------------------------------- +(prints and scheme for AM572x evm, applicable for single port boards) + +tc - traffic class +txq - transmit queue +p - priority +f - fifo (cpsw fifo) +S - shaper configured + ++------------------------------------------------------------------+ u +| +---------------+ +---------------+ +------+ +------+ | s +| | | | | | | | | | e +| | App 1 | | App 2 | | Apps | | Apps | | r +| | Class A | | Class B | | Rest | | Rest | | +| | Eth0 | | Eth0 | | Eth0 | | Eth1 | | s +| | VLAN100 | | VLAN100 | | | | | | | | p +| | 40 Mb/s | | 20 Mb/s | | | | | | | | a +| | SO_PRIORITY=3 | | SO_PRIORITY=2 | | | | | | | | c +| | | | | | | | | | | | | | e +| +---|-----------+ +---|-----------+ +---|--+ +---|--+ | ++-----|------------------|------------------|--------|-------------+ + +-+ +------------+ | | + | | +-----------------+ +--+ + | | | | ++---|-------|-------------|-----------------------|----------------+ +| +----+ +----+ +----+ +----+ +----+ | +| | p3 | | p2 | | p1 | | p0 | | p0 | | k +| \ / \ / \ / \ / \ / | e +| \ / \ / \ / \ / \ / | r +| \/ \/ \/ \/ \/ | n +| | | | | | e +| | | +-----+ | | l +| | | | | | +| +----+ +----+ +----+ +----+ | s +| |tc0 | |tc1 | |tc2 | |tc0 | | p +| \ / \ / \ / \ / | a +| \ / \ / \ / \ / | c +| \/ \/ \/ \/ | e +| | | +-----+ | | +| | | | | | | +| | | | | | | +| | | | | | | +| +----+ +----+ +----+ +----+ +----+ | +| |txq0| |txq1| |txq2| |txq3| |txq4| | +| \ / \ / \ / \ / \ / | +| \ / \ / \ / \ / \ / | +| \/ \/ \/ \/ \/ | +| +-|------|------|------|--+ +--|--------------+ | +| | | | | | | Eth0.100 | | Eth1 | | ++---|------|------|------|------------------------|----------------+ + | | | | | + p p p p | + 3 2 0-1, 4-7 <- L2 priority | + | | | | | + | | | | | ++---|------|------|------|------------------------|----------------+ +| | | | | |----------+ | +| +----+ +----+ +----+ +----+ +----+ | +| |dma7| |dma6| |dma5| |dma4| |dma3| | +| \ / \ / \ / \ / \ / | c +| \S / \S / \ / \ / \ / | p +| \/ \/ \/ \/ \/ | s +| | | | +----- | | w +| | | | | | | +| | | | | | | d +| +----+ +----+ +----+p p+----+ | r +| | | | | | |o o| | | i +| | f3 | | f2 | | f0 |r r| f0 | | v +| |tc0 | |tc1 | |tc2 |t t|tc0 | | e +| \CBS / \CBS / \CBS /1 2\CBS / | r +| \S / \S / \ / \ / | +| \/ \/ \/ \/ | ++------------------------------------------------------------------+ +========================================Eth==========================> + +1) +// Add 4 tx queues, for interface Eth0, and 1 tx queue for Eth1 +$ ethtool -L eth0 rx 1 tx 5 +rx unmodified, ignoring + +2) +// Check if num of queues is set correctly: +$ ethtool -l eth0 +Channel parameters for eth0: +Pre-set maximums: +RX: 8 +TX: 8 +Other: 0 +Combined: 0 +Current hardware settings: +RX: 1 +TX: 5 +Other: 0 +Combined: 0 + +3) +// TX queues must be rated starting from 0, so set bws for tx0 and tx1 +// Set rates 40 and 20 Mb/s appropriately. +// Pay attention, real speed can differ a bit due to discreetness. +// Leave last 2 tx queues not rated. +$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate +$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate + +4) +// Check maximum rate of tx (cpdma) queues: +$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate +40 +20 +0 +0 +0 + +5) +// Map skb->priority to traffic class: +// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2 +// Map traffic class to transmit queue: +// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq2, txq3) +$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \ +map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1 + +5a) +// As two interface sharing same set of tx queues, assign all traffic +// coming to interface Eth1 to separate queue in order to not mix it +// with traffic from interface Eth0, so use separate txq to send +// packets to Eth1, so all prio -> tc0 and tc0 -> txq4 +// Here hw 0, so here still default configuration for eth1 in hw +$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 1 \ +map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 queues 1@4 hw 0 + +6) +// Check classes settings +$ tc -g class show dev eth0 ++---(100:ffe2) mqprio +| +---(100:3) mqprio +| +---(100:4) mqprio +| ++---(100:ffe1) mqprio +| +---(100:2) mqprio +| ++---(100:ffe0) mqprio + +---(100:1) mqprio + +$ tc -g class show dev eth1 ++---(100:ffe0) mqprio + +---(100:5) mqprio + +7) +// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc +// Set it +1 Mb for reserve (important!) +// here only idle slope is important, others arg are ignored +// Pay attention, real speed can differ a bit due to discreetness +$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1438 \ +hicredit 62 sendslope -959000 idleslope 41000 offload 1 +net eth0: set FIFO3 bw = 50 + +8) +// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc: +// Set it +1 Mb for reserve (important!) +$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1468 \ +hicredit 65 sendslope -979000 idleslope 21000 offload 1 +net eth0: set FIFO2 bw = 30 + +9) +// Create vlan 100 to map sk->priority to vlan qos +$ ip link add link eth0 name eth0.100 type vlan id 100 +8021q: 802.1Q VLAN Support v1.8 +8021q: adding VLAN 0 to HW filter on device eth0 +8021q: adding VLAN 0 to HW filter on device eth1 +net eth0: Adding vlanid 100 to vlan filter + +10) +// Map skb->priority to L2 prio, 1 to 1 +$ ip link set eth0.100 type vlan \ +egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +11) +// Check egress map for vlan 100 +$ cat /proc/net/vlan/eth0.100 +[...] +INGRESS priority mappings: 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0 +EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +12) +// Run your appropriate tools with socket option "SO_PRIORITY" +// to 3 for class A and/or to 2 for class B +// (I took at https://www.spinics.net/lists/netdev/msg460869.html) +./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500& +./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500& + +13) +// run your listener on workstation (should be in same vlan) +// (I took at https://www.spinics.net/lists/netdev/msg460869.html) +./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500 +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39000 kbps + +14) +// Restore default configuration if needed +$ ip link del eth0.100 +$ tc qdisc del dev eth1 root +$ tc qdisc del dev eth0 root +net eth0: Prev FIFO2 is shaped +net eth0: set FIFO3 bw = 0 +net eth0: set FIFO2 bw = 0 +$ ethtool -L eth0 rx 1 tx 1 + +********************************************************************* +********************************************************************* +********************************************************************* +Example 2: Two port tx AVB configuration scheme for target board +---------------------------------------------------------------------- +(prints and scheme for AM572x evm, for dual emac boards only) + ++------------------------------------------------------------------+ u +| +----------+ +----------+ +------+ +----------+ +----------+ | s +| | | | | | | | | | | | e +| | App 1 | | App 2 | | Apps | | App 3 | | App 4 | | r +| | Class A | | Class B | | Rest | | Class B | | Class A | | +| | Eth0 | | Eth0 | | | | | Eth1 | | Eth1 | | s +| | VLAN100 | | VLAN100 | | | | | VLAN100 | | VLAN100 | | p +| | 40 Mb/s | | 20 Mb/s | | | | | 10 Mb/s | | 30 Mb/s | | a +| | SO_PRI=3 | | SO_PRI=2 | | | | | SO_PRI=3 | | SO_PRI=2 | | c +| | | | | | | | | | | | | | | | | e +| +---|------+ +---|------+ +---|--+ +---|------+ +---|------+ | ++-----|-------------|-------------|---------|-------------|--------+ + +-+ +-------+ | +----------+ +----+ + | | +-------+------+ | | + | | | | | | ++---|-------|-------------|--------------|-------------|-------|---+ +| +----+ +----+ +----+ +----+ +----+ +----+ +----+ +----+ | +| | p3 | | p2 | | p1 | | p0 | | p0 | | p1 | | p2 | | p3 | | k +| \ / \ / \ / \ / \ / \ / \ / \ / | e +| \ / \ / \ / \ / \ / \ / \ / \ / | r +| \/ \/ \/ \/ \/ \/ \/ \/ | n +| | | | | | | | e +| | | +----+ +----+ | | | l +| | | | | | | | +| +----+ +----+ +----+ +----+ +----+ +----+ | s +| |tc0 | |tc1 | |tc2 | |tc2 | |tc1 | |tc0 | | p +| \ / \ / \ / \ / \ / \ / | a +| \ / \ / \ / \ / \ / \ / | c +| \/ \/ \/ \/ \/ \/ | e +| | | +-----+ +-----+ | | | +| | | | | | | | | | +| | | | | | | | | | +| | | | | E E | | | | | +| +----+ +----+ +----+ +----+ t t +----+ +----+ +----+ +----+ | +| |txq0| |txq1| |txq4| |txq5| h h |txq6| |txq7| |txq3| |txq2| | +| \ / \ / \ / \ / 0 1 \ / \ / \ / \ / | +| \ / \ / \ / \ / . . \ / \ / \ / \ / | +| \/ \/ \/ \/ 1 1 \/ \/ \/ \/ | +| +-|------|------|------|--+ 0 0 +-|------|------|------|--+ | +| | | | | | | 0 0 | | | | | | | ++---|------|------|------|---------------|------|------|------|----+ + | | | | | | | | + p p p p p p p p + 3 2 0-1, 4-7 <-L2 pri-> 0-1, 4-7 2 3 + | | | | | | | | + | | | | | | | | ++---|------|------|------|---------------|------|------|------|----+ +| | | | | | | | | | +| +----+ +----+ +----+ +----+ +----+ +----+ +----+ +----+ | +| |dma7| |dma6| |dma3| |dma2| |dma1| |dma0| |dma4| |dma5| | +| \ / \ / \ / \ / \ / \ / \ / \ / | c +| \S / \S / \ / \ / \ / \ / \S / \S / | p +| \/ \/ \/ \/ \/ \/ \/ \/ | s +| | | | +----- | | | | | w +| | | | | +----+ | | | | +| | | | | | | | | | d +| +----+ +----+ +----+p p+----+ +----+ +----+ | r +| | | | | | |o o| | | | | | | i +| | f3 | | f2 | | f0 |r CPSW r| f3 | | f2 | | f0 | | v +| |tc0 | |tc1 | |tc2 |t t|tc0 | |tc1 | |tc2 | | e +| \CBS / \CBS / \CBS /1 2\CBS / \CBS / \CBS / | r +| \S / \S / \ / \S / \S / \ / | +| \/ \/ \/ \/ \/ \/ | ++------------------------------------------------------------------+ +========================================Eth==========================> + +1) +// Add 8 tx queues, for interface Eth0, but they are common, so are accessed +// by two interfaces Eth0 and Eth1. +$ ethtool -L eth1 rx 1 tx 8 +rx unmodified, ignoring + +2) +// Check if num of queues is set correctly: +$ ethtool -l eth0 +Channel parameters for eth0: +Pre-set maximums: +RX: 8 +TX: 8 +Other: 0 +Combined: 0 +Current hardware settings: +RX: 1 +TX: 8 +Other: 0 +Combined: 0 + +3) +// TX queues must be rated starting from 0, so set bws for tx0 and tx1 for Eth0 +// and for tx2 and tx3 for Eth1. That is, rates 40 and 20 Mb/s appropriately +// for Eth0 and 30 and 10 Mb/s for Eth1. +// Real speed can differ a bit due to discreetness +// Leave last 4 tx queues as not rated +$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate +$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate +$ echo 30 > /sys/class/net/eth1/queues/tx-2/tx_maxrate +$ echo 10 > /sys/class/net/eth1/queues/tx-3/tx_maxrate + +4) +// Check maximum rate of tx (cpdma) queues: +$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate +40 +20 +30 +10 +0 +0 +0 +0 + +5) +// Map skb->priority to traffic class for Eth0: +// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2 +// Map traffic class to transmit queue: +// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq4, txq5) +$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \ +map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@4 hw 1 + +6) +// Check classes settings +$ tc -g class show dev eth0 ++---(100:ffe2) mqprio +| +---(100:5) mqprio +| +---(100:6) mqprio +| ++---(100:ffe1) mqprio +| +---(100:2) mqprio +| ++---(100:ffe0) mqprio + +---(100:1) mqprio + +7) +// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc for Eth0 +// here only idle slope is important, others ignored +// Real speed can differ a bit due to discreetness +$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1470 \ +hicredit 62 sendslope -959000 idleslope 41000 offload 1 +net eth0: set FIFO3 bw = 50 + +8) +// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc for Eth0 +$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1470 \ +hicredit 65 sendslope -979000 idleslope 21000 offload 1 +net eth0: set FIFO2 bw = 30 + +9) +// Create vlan 100 to map sk->priority to vlan qos for Eth0 +$ ip link add link eth0 name eth0.100 type vlan id 100 +net eth0: Adding vlanid 100 to vlan filter + +10) +// Map skb->priority to L2 prio for Eth0.100, one to one +$ ip link set eth0.100 type vlan \ +egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +11) +// Check egress map for vlan 100 +$ cat /proc/net/vlan/eth0.100 +[...] +INGRESS priority mappings: 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0 +EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +12) +// Map skb->priority to traffic class for Eth1: +// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2 +// Map traffic class to transmit queue: +// tc0 -> txq2, tc1 -> txq3, tc2 -> (txq6, txq7) +$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 3 \ +map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@2 1@3 2@6 hw 1 + +13) +// Check classes settings +$ tc -g class show dev eth1 ++---(100:ffe2) mqprio +| +---(100:7) mqprio +| +---(100:8) mqprio +| ++---(100:ffe1) mqprio +| +---(100:4) mqprio +| ++---(100:ffe0) mqprio + +---(100:3) mqprio + +14) +// Set rate for class A - 31 Mbit (tc0, txq2) using CBS Qdisc for Eth1 +// here only idle slope is important, others ignored +// Set it +1 Mb for reserve (important!) +$ tc qdisc add dev eth1 parent 100:3 cbs locredit -1453 \ +hicredit 47 sendslope -969000 idleslope 31000 offload 1 +net eth1: set FIFO3 bw = 31 + +15) +// Set rate for class B - 11 Mbit (tc1, txq3) using CBS Qdisc for Eth1 +// Set it +1 Mb for reserve (important!) +$ tc qdisc add dev eth1 parent 100:4 cbs locredit -1483 \ +hicredit 34 sendslope -989000 idleslope 11000 offload 1 +net eth1: set FIFO2 bw = 11 + +16) +// Create vlan 100 to map sk->priority to vlan qos for Eth1 +$ ip link add link eth1 name eth1.100 type vlan id 100 +net eth1: Adding vlanid 100 to vlan filter + +17) +// Map skb->priority to L2 prio for Eth1.100, one to one +$ ip link set eth1.100 type vlan \ +egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +18) +// Check egress map for vlan 100 +$ cat /proc/net/vlan/eth1.100 +[...] +INGRESS priority mappings: 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0 +EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +19) +// Run appropriate tools with socket option "SO_PRIORITY" to 3 +// for class A and to 2 for class B. For both interfaces +./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500& +./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500& +./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p2 -s 1500& +./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p3 -s 1500& + +20) +// run your listener on workstation (should be in same vlan) +// (I took at https://www.spinics.net/lists/netdev/msg460869.html) +./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500 +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39012 kbps +Receiving data rate: 39000 kbps + +21) +// Restore default configuration if needed +$ ip link del eth1.100 +$ ip link del eth0.100 +$ tc qdisc del dev eth1 root +net eth1: Prev FIFO2 is shaped +net eth1: set FIFO3 bw = 0 +net eth1: set FIFO2 bw = 0 +$ tc qdisc del dev eth0 root +net eth0: Prev FIFO2 is shaped +net eth0: set FIFO3 bw = 0 +net eth0: set FIFO2 bw = 0 +$ ethtool -L eth0 rx 1 tx 1 |